From cba9c7b7d27b59edf49979c746e480dbce787bc0 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?C=C3=A9dric=20Vincent-Cuaz?= <cedvincentcuaz@gmail.com>
Date: Thu, 30 May 2024 01:09:22 +0200
Subject: [PATCH] [WIP] quantized gromov wasserstein solver (#603)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

* first commit : quantized gromov wasserstein solver

* start setting up tests

* fix build OT for all backends - nb: concatenation procedure is less efficient for numpy and torch

* dealing with edge cases

* fix pep8

* updates + start setting exemple

* updates + start setting exemple

* updating code + exemple + test + docs

* fix sklearn imports

* fix

* setting up new API for qGW

* fix pep8

* tests

* update qFGW plots

* update qFGW plots

* up tests

* update example

* merge master

* complete tests

---------

Co-authored-by: Rémi Flamary <remi.flamary@gmail.com>
---
 CONTRIBUTORS.md                               |    2 +-
 README.md                                     |    5 +-
 RELEASES.md                                   |    1 +
 .../plot_quantized_gromov_wasserstein.py      |  515 ++++++++
 ot/gromov/__init__.py                         |   16 +-
 ot/gromov/_quantized.py                       | 1147 +++++++++++++++++
 test/gromov/test_quantized.py                 |  377 ++++++
 7 files changed, 2060 insertions(+), 3 deletions(-)
 create mode 100644 examples/gromov/plot_quantized_gromov_wasserstein.py
 create mode 100644 ot/gromov/_quantized.py
 create mode 100644 test/gromov/test_quantized.py

diff --git a/CONTRIBUTORS.md b/CONTRIBUTORS.md
index c185e18a7..e982cd5b6 100644
--- a/CONTRIBUTORS.md
+++ b/CONTRIBUTORS.md
@@ -41,7 +41,7 @@ The contributors to this library are:
 * [Tanguy Kerdoncuff](https://hv0nnus.github.io/) (Sampled Gromov Wasserstein)
 * [Minhui Huang](https://mhhuang95.github.io) (Projection Robust Wasserstein Distance)
 * [Nathan Cassereau](https://github.com/ncassereau-idris) (Backends)
-* [Cédric Vincent-Cuaz](https://github.com/cedricvincentcuaz) (Graph Dictionary Learning, FGW, semi-relaxed FGW)
+* [Cédric Vincent-Cuaz](https://github.com/cedricvincentcuaz) (Graph Dictionary Learning, FGW, semi-relaxed FGW, quantized FGW)
 * [Eloi Tanguy](https://github.com/eloitanguy) (Generalized Wasserstein Barycenters)
 * [Camille Le Coz](https://www.linkedin.com/in/camille-le-coz-8593b91a1/) (EMD2 debug)
 * [Eduardo Fernandes Montesuma](https://eddardd.github.io/my-personal-blog/) (Free support sinkhorn barycenter)
diff --git a/README.md b/README.md
index f1149a008..1cd9fb59b 100644
--- a/README.md
+++ b/README.md
@@ -47,6 +47,7 @@ POT provides the following generic OT solvers (links to examples):
 * [Spherical Sliced Wasserstein](https://pythonot.github.io/auto_examples/sliced-wasserstein/plot_variance_ssw.html) [46]
 * [Graph Dictionary Learning solvers](https://pythonot.github.io/auto_examples/gromov/plot_gromov_wasserstein_dictionary_learning.html) [38].
 * [Semi-relaxed (Fused) Gromov-Wasserstein divergences](https://pythonot.github.io/auto_examples/gromov/plot_semirelaxed_fgw.html) (exact and regularized [48]).
+* [Quantized (Fused) Gromov-Wasserstein distances](https://pythonot.github.io/auto_examples/gromov/plot_quantized_gromov_wasserstein.html) [68].
 * [Efficient Discrete Multi Marginal Optimal Transport Regularization](https://pythonot.github.io/auto_examples/others/plot_demd_gradient_minimize.html) [50].
 * [Several backends](https://pythonot.github.io/quickstart.html#solving-ot-with-multiple-backends) for easy use of POT with  [Pytorch](https://pytorch.org/)/[jax](https://github.com/google/jax)/[Numpy](https://numpy.org/)/[Cupy](https://cupy.dev/)/[Tensorflow](https://www.tensorflow.org/) arrays.
 * Smooth Strongly Convex Nearest Brenier Potentials [58], with an extension to bounding potentials using [59].
@@ -358,4 +359,6 @@ distances between Gaussian distributions](https://hal.science/hal-03197398v2/fil
 
 [66] Pooladian, Aram-Alexandre, and Jonathan Niles-Weed. [Entropic estimation of optimal transport maps](https://arxiv.org/pdf/2109.12004.pdf). arXiv preprint arXiv:2109.12004 (2021).
 
-[67] Scetbon, M., Peyré, G. & Cuturi, M. (2022). [Linear-Time GromovWasserstein Distances using Low Rank Couplings and Costs](https://proceedings.mlr.press/v162/scetbon22b/scetbon22b.pdf). In International Conference on Machine Learning (ICML), 2022.
\ No newline at end of file
+[67] Scetbon, M., Peyré, G. & Cuturi, M. (2022). [Linear-Time Gromov-Wasserstein Distances using Low Rank Couplings and Costs](https://proceedings.mlr.press/v162/scetbon22b/scetbon22b.pdf). In International Conference on Machine Learning (ICML), 2022.
+
+[68] Chowdhury, S., Miller, D., & Needham, T. (2021). [Quantized gromov-wasserstein](https://link.springer.com/chapter/10.1007/978-3-030-86523-8_49). ECML PKDD 2021. Springer International Publishing.
diff --git a/RELEASES.md b/RELEASES.md
index c31081451..51075c973 100644
--- a/RELEASES.md
+++ b/RELEASES.md
@@ -3,6 +3,7 @@
 ## 0.9.4dev
 
 #### New features
++ New quantized FGW solvers `ot.gromov.quantized_fused_gromov_wasserstein`, `ot.gromov.quantized_fused_gromov_wasserstein_samples` and `ot.gromov.quantized_fused_gromov_wasserstein_partitioned` (PR #603)
 + `ot.gromov._gw.solve_gromov_linesearch` now has an argument to specify if the matrices are symmetric in which case the computation can be done faster (PR #607).
 + Continuous entropic mapping (PR #613)
 + New general unbalanced solvers for `ot.solve` and BFGS solver and illustrative example (PR #620)
diff --git a/examples/gromov/plot_quantized_gromov_wasserstein.py b/examples/gromov/plot_quantized_gromov_wasserstein.py
new file mode 100644
index 000000000..02d777c71
--- /dev/null
+++ b/examples/gromov/plot_quantized_gromov_wasserstein.py
@@ -0,0 +1,515 @@
+# -*- coding: utf-8 -*-
+"""
+===============================================
+Quantized Fused Gromov-Wasserstein examples
+===============================================
+
+These examples show how to use the quantized (Fused) Gromov-Wasserstein
+solvers (qFGW) [68]. POT provides a generic solver `quantized_fused_gromov_wasserstein_partitioned`
+that takes as inputs partitioned graphs potentially endowed with node features,
+which have to be built by the user. On top of that, POT provides two wrappers:
+    i) `quantized_fused_gromov_wasserstein` operating over generic graphs, whose
+    partitioning is performed via `get_graph_partition` using e.g the Louvain algorithm,
+    and representant for each partition can be selected via `get_graph_representants`
+    using e.g the PageRank algorithm.
+
+    ii) `quantized_fused_gromov_wasserstein_samples` operating over point clouds,
+    e.g :math:`X_1 \in R^{n_1 * d_1}` and :math:`X_2 \in R^{n_2 * d_2}`
+    endowed with their respective euclidean geometry, whose partitioning and
+    representant selection is performed jointly using e.g the K-means algorithm
+    via the function `get_partition_and_representants_samples`.
+
+
+We illustrate next how to compute the qGW distance on both types of data by:
+
+    i) Generating two graphs following Stochastic Block Models encoded as shortest
+    path matrices as qGW solvers tends to require dense structure to achieve a good
+    approximation of the GW distance (as qGW is an upper-bound of GW). In the meantime,
+    we illustrate an optional feature of our solvers, namely the use of auxiliary
+    structures e.g adjacency matrices to perform the graph partitioning.
+
+    ii) Generating two point clouds representing curves in 2D and 3D respectively.
+    We augment these point clouds by considering additional features of the same
+    dimensionaly :math:`F_1 \in R^{n_1 * d}` and :math:`F_2 \in R^{n_2 * d}`,
+    representing the color intensity associated to each sample of both distributions.
+    Then we compute the qFGW distance between these attributed point clouds.
+
+
+[68] Chowdhury, S., Miller, D., & Needham, T. (2021). Quantized gromov-wasserstein.
+ECML PKDD 2021. Springer International Publishing.
+"""
+
+# Author: Cédric Vincent-Cuaz <cedvincentcuaz@gmail.com>
+#
+# License: MIT License
+
+# sphinx_gallery_thumbnail_number = 2
+
+import numpy as np
+import matplotlib.pylab as pl
+import matplotlib.pyplot as plt
+import networkx
+from networkx.generators.community import stochastic_block_model as sbm
+from scipy.sparse.csgraph import shortest_path
+
+from ot.gromov import (
+    quantized_fused_gromov_wasserstein_partitioned, quantized_fused_gromov_wasserstein,
+    get_graph_partition, get_graph_representants, format_partitioned_graph,
+    quantized_fused_gromov_wasserstein_samples,
+    get_partition_and_representants_samples)
+
+#############################################################################
+#
+# Generate graphs
+# --------------------------------------------------------------------------
+#
+# Create two graphs following Stochastic Block models of 2 and 3 clusters.
+
+N1 = 30  # 2 communities
+N2 = 45  # 3 communities
+p1 = [[0.8, 0.1],
+      [0.1, 0.7]]
+p2 = [[0.8, 0.1, 0.],
+      [0.1, 0.75, 0.1],
+      [0., 0.1, 0.7]]
+G1 = sbm(seed=0, sizes=[N1 // 2, N1 // 2], p=p1)
+G2 = sbm(seed=0, sizes=[N2 // 3, N2 // 3, N2 // 3], p=p2)
+
+
+C1 = networkx.to_numpy_array(G1)
+C2 = networkx.to_numpy_array(G2)
+
+spC1 = shortest_path(C1)
+spC2 = shortest_path(C2)
+
+h1 = np.ones(C1.shape[0]) / C1.shape[0]
+h2 = np.ones(C2.shape[0]) / C2.shape[0]
+
+# Add weights on the edges for visualization later on
+weight_intra_G1 = 5
+weight_inter_G1 = 0.5
+weight_intra_G2 = 1.
+weight_inter_G2 = 1.5
+
+weightedG1 = networkx.Graph()
+part_G1 = [G1.nodes[i]['block'] for i in range(N1)]
+
+for node in G1.nodes():
+    weightedG1.add_node(node)
+for i, j in G1.edges():
+    if part_G1[i] == part_G1[j]:
+        weightedG1.add_edge(i, j, weight=weight_intra_G1)
+    else:
+        weightedG1.add_edge(i, j, weight=weight_inter_G1)
+
+weightedG2 = networkx.Graph()
+part_G2 = [G2.nodes[i]['block'] for i in range(N2)]
+
+for node in G2.nodes():
+    weightedG2.add_node(node)
+for i, j in G2.edges():
+    if part_G2[i] == part_G2[j]:
+        weightedG2.add_edge(i, j, weight=weight_intra_G2)
+    else:
+        weightedG2.add_edge(i, j, weight=weight_inter_G2)
+
+
+# setup for graph visualization
+
+def node_coloring(part, starting_color=0):
+
+    # get graphs partition and their coloring
+    unique_colors = ['C%s' % (starting_color + i) for i in np.unique(part)]
+    nodes_color_part = []
+    for cluster in part:
+        nodes_color_part.append(unique_colors[cluster])
+
+    return nodes_color_part
+
+
+def draw_graph(G, C, nodes_color_part, rep_indices, node_alphas=None, pos=None,
+               edge_color='black', alpha_edge=0.7, node_size=None,
+               shiftx=0, seed=0, highlight_rep=False):
+
+    if (pos is None):
+        pos = networkx.spring_layout(G, scale=1., seed=seed)
+
+    if shiftx != 0:
+        for k, v in pos.items():
+            v[0] = v[0] + shiftx
+
+    width_edge = 1.5
+
+    if not highlight_rep:
+        networkx.draw_networkx_edges(
+            G, pos, width=width_edge, alpha=alpha_edge, edge_color=edge_color)
+    else:
+        for edge in G.edges:
+            if (edge[0] in rep_indices) and (edge[1] in rep_indices):
+                networkx.draw_networkx_edges(
+                    G, pos, edgelist=[edge], width=width_edge, alpha=alpha_edge,
+                    edge_color=edge_color)
+            else:
+                networkx.draw_networkx_edges(
+                    G, pos, edgelist=[edge], width=width_edge, alpha=0.2,
+                    edge_color=edge_color)
+
+    for node, node_color in enumerate(nodes_color_part):
+        local_node_shape, local_node_size = 'o', node_size
+
+        if highlight_rep:
+            if node in rep_indices:
+                local_node_shape, local_node_size = '*', 6 * node_size
+
+        if node_alphas is None:
+            alpha = 0.9
+            if highlight_rep:
+                alpha = 0.9 if node in rep_indices else 0.1
+
+        else:
+            alpha = node_alphas[node]
+
+        networkx.draw_networkx_nodes(G, pos, nodelist=[node], alpha=alpha,
+                                     node_shape=local_node_shape,
+                                     node_size=local_node_size,
+                                     node_color=node_color)
+
+    return pos
+
+
+#############################################################################
+#
+# Compute their quantized Gromov-Wasserstein distance without using the wrapper
+# ---------------------------------------------------------
+#
+# We detail next the steps implemented within the wrapper that preprocess graphs
+# to form partitioned graphs, which are then passed as input to the generic qFGW solver.
+
+# 1-a) Partition C1 and C2 in 2 and 3 clusters respectively using Louvain
+#    algorithm from Networkx. Then encode these partitions via vectors of assignments.
+
+part_method = 'louvain'
+rep_method = 'pagerank'
+
+npart_1 = 2  # 2 clusters used to describe C1
+npart_2 = 3  # 3 clusters used to describe C2
+
+part1 = get_graph_partition(
+    C1, npart=npart_1, part_method=part_method, F=None, alpha=1.)
+part2 = get_graph_partition(
+    C2, npart=npart_2, part_method=part_method, F=None, alpha=1.)
+
+# 1-b) Select representant in each partition using the Pagerank algorithm
+#     implementation from networkx.
+
+rep_indices1 = get_graph_representants(C1, part1, rep_method=rep_method)
+rep_indices2 = get_graph_representants(C2, part2, rep_method=rep_method)
+
+# 1-c) Formate partitions such that:
+# CR contains relations between representants in each space.
+# list_R contains relations between samples and representants within each partition.
+# list_h contains samples relative importance within each partition.
+
+CR1, list_R1, list_h1 = format_partitioned_graph(
+    spC1, h1, part1, rep_indices1, F=None, M=None, alpha=1.)
+
+CR2, list_R2, list_h2 = format_partitioned_graph(
+    spC2, h2, part2, rep_indices2, F=None, M=None, alpha=1.)
+
+# 1-d) call to partitioned quantized gromov-wasserstein solver
+
+OT_global_, OTs_local_, OT_, log_ = quantized_fused_gromov_wasserstein_partitioned(
+    CR1, CR2, list_R1, list_R2, list_h1, list_h2, MR=None,
+    alpha=1., build_OT=True, log=True)
+
+
+# Visualization of the graph pre-processing
+
+node_size = 40
+fontsize = 10
+seed_G1 = 0
+seed_G2 = 3
+
+part1_ = part1.astype(np.int32)
+part2_ = part2.astype(np.int32)
+
+
+nodes_color_part1 = node_coloring(part1_, starting_color=0)
+nodes_color_part2 = node_coloring(part2_, starting_color=np.unique(nodes_color_part1).shape[0])
+
+
+pl.figure(1, figsize=(6, 5))
+pl.clf()
+pl.axis('off')
+pl.subplot(2, 3, 1)
+pl.title(r'Input graph: $\mathbf{spC_1}$', fontsize=fontsize)
+
+pos1 = draw_graph(
+    G1, C1, ['C0' for _ in part1_], rep_indices1, node_size=node_size, seed=seed_G1)
+
+pl.subplot(2, 3, 2)
+pl.title('Partitioning', fontsize=fontsize)
+
+_ = draw_graph(
+    G1, C1, nodes_color_part1, rep_indices1, pos=pos1, node_size=node_size, seed=seed_G1)
+
+pl.subplot(2, 3, 3)
+pl.title('Representant selection', fontsize=fontsize)
+
+_ = draw_graph(
+    G1, C1, nodes_color_part1, rep_indices1, pos=pos1, node_size=node_size,
+    seed=seed_G1, highlight_rep=True)
+
+pl.subplot(2, 3, 4)
+pl.title(r'Input graph: $\mathbf{spC_2}$', fontsize=fontsize)
+
+pos2 = draw_graph(
+    G2, C2, ['C0' for _ in part2_], rep_indices2, node_size=node_size, seed=seed_G2)
+
+pl.subplot(2, 3, 5)
+pl.title(r'Partitioning', fontsize=fontsize)
+
+_ = draw_graph(
+    G2, C2, nodes_color_part2, rep_indices2, pos=pos2, node_size=node_size, seed=seed_G2)
+
+pl.subplot(2, 3, 6)
+pl.title(r'Representant selection', fontsize=fontsize)
+
+_ = draw_graph(
+    G2, C2, nodes_color_part2, rep_indices2, pos=pos2, node_size=node_size,
+    seed=seed_G2, highlight_rep=True)
+pl.tight_layout()
+
+#############################################################################
+#
+# Compute the quantized Gromov-Wasserstein distance using the wrapper
+# ---------------------------------------------------------
+#
+# Compute qGW(spC1, h1, spC2, h2). We also illustrate the use of auxiliary matrices
+# such that the adjacency matrices `C1_aux=C1` and `C2_aux=C2` to partition the graph using
+# Louvain algorithm, and the Pagerank algorithm for selecting representant within
+# each partition. Notice that `C1_aux` and `C2_aux` are optional, if they are not
+# specified these pre-processing algorithms will be applied to spC2 and spC3.
+
+
+# no node features are considered on this synthetic dataset. Hence we simply
+# let F1, F2 = None and set alpha = 1.
+OT_global, OTs_local, OT, log = quantized_fused_gromov_wasserstein(
+    spC1, spC2, npart_1, npart_2, h1, h2, C1_aux=C1, C2_aux=C2, F1=None, F2=None,
+    alpha=1., part_method=part_method, rep_method=rep_method, log=True)
+
+qGW_dist = log['qFGW_dist']
+
+
+#############################################################################
+#
+# Visualization of the quantized Gromov-Wasserstein matching
+# --------------------------------------------------------------
+#
+# We color nodes of the graph based on the respective partition of each graph.
+# On the first plot we illustrate the qGW matching between both shortest path matrices.
+# While the GW matching across representants of each space is illustrated on the right.
+
+
+def draw_transp_colored_qGW(
+        G1, C1, G2, C2, part1, part2, rep_indices1, rep_indices2, T,
+        pos1=None, pos2=None, shiftx=4, switchx=False, node_size=70,
+        seed_G1=0, seed_G2=0, highlight_rep=False):
+    starting_color = 0
+    # get graphs partition and their coloring
+    unique_colors1 = ['C%s' % (starting_color + i) for i in np.unique(part1)]
+    nodes_color_part1 = []
+    for cluster in part1:
+        nodes_color_part1.append(unique_colors1[cluster])
+
+    starting_color = len(unique_colors1) + 1
+    unique_colors2 = ['C%s' % (starting_color + i) for i in np.unique(part2)]
+    nodes_color_part2 = []
+    for cluster in part2:
+        nodes_color_part2.append(unique_colors2[cluster])
+
+    pos1 = draw_graph(
+        G1, C1, nodes_color_part1, rep_indices1, pos=pos1, node_size=node_size,
+        shiftx=0, seed=seed_G1, highlight_rep=highlight_rep)
+    pos2 = draw_graph(
+        G2, C2, nodes_color_part2, rep_indices2, pos=pos2, node_size=node_size,
+        shiftx=shiftx, seed=seed_G1, highlight_rep=highlight_rep)
+
+    if not highlight_rep:
+        for k1, v1 in pos1.items():
+            max_Tk1 = np.max(T[k1, :])
+            for k2, v2 in pos2.items():
+                if (T[k1, k2] > 0):
+                    pl.plot([pos1[k1][0], pos2[k2][0]],
+                            [pos1[k1][1], pos2[k2][1]],
+                            '-', lw=0.7, alpha=T[k1, k2] / max_Tk1,
+                            color=nodes_color_part1[k1])
+
+    else:  # OT is only between representants
+        for id1, node_id1 in enumerate(rep_indices1):
+            max_Tk1 = np.max(T[id1, :])
+            for id2, node_id2 in enumerate(rep_indices2):
+                if (T[id1, id2] > 0):
+                    pl.plot([pos1[node_id1][0], pos2[node_id2][0]],
+                            [pos1[node_id1][1], pos2[node_id2][1]],
+                            '-', lw=0.8, alpha=T[id1, id2] / max_Tk1,
+                            color=nodes_color_part1[node_id1])
+    return pos1, pos2
+
+
+pl.figure(2, figsize=(5, 2.5))
+pl.clf()
+pl.axis('off')
+pl.subplot(1, 2, 1)
+pl.title(r'qGW$(\mathbf{spC_1}, \mathbf{spC_1}) =%s$' % (np.round(qGW_dist, 3)), fontsize=fontsize)
+
+pos1, pos2 = draw_transp_colored_qGW(
+    weightedG1, C1, weightedG2, C2, part1_, part2_, rep_indices1, rep_indices2,
+    T=OT_, shiftx=1.5, node_size=node_size, seed_G1=seed_G1, seed_G2=seed_G2)
+
+pl.tight_layout()
+
+pl.subplot(1, 2, 2)
+pl.title(r' GW$(\mathbf{CR_1}, \mathbf{CR_2}) =%s$' % (np.round(log_['global dist'], 3)), fontsize=fontsize)
+
+pos1, pos2 = draw_transp_colored_qGW(
+    weightedG1, C1, weightedG2, C2, part1_, part2_, rep_indices1, rep_indices2,
+    T=OT_global, shiftx=1.5, node_size=node_size, seed_G1=seed_G1, seed_G2=seed_G2,
+    highlight_rep=True)
+
+pl.tight_layout()
+pl.show()
+
+#############################################################################
+#
+# Generate attributed point clouds
+# --------------------------------------------------------------------------
+#
+# Create two attributed point clouds representing curves in 2D and 3D respectively,
+# whose samples are further associated to various color intensities.
+
+n_samples = 100
+
+# Generate 2D and 3D curves
+theta = np.linspace(-4 * np.pi, 4 * np.pi, n_samples)
+z = np.linspace(1, 2, n_samples)
+r = z**2 + 1
+x = r * np.sin(theta)
+y = r * np.cos(theta)
+
+# Source and target distribution across spaces encoded respectively via their
+# squared euclidean distance matrices.
+
+X = np.concatenate([x.reshape(-1, 1), z.reshape(-1, 1)], axis=1)
+Y = np.concatenate([x.reshape(-1, 1), y.reshape(-1, 1), z.reshape(-1, 1)], axis=1)
+
+# Further associated to color intensity features derived from z
+
+FX = z - z.min() / (z.max() - z.min())
+FX = np.clip(0.8 * FX + 0.2, a_min=0.2, a_max=1.)  # for numerical issues
+FY = FX
+
+
+#############################################################################
+#
+# Visualize partitioned attributed point clouds
+# --------------------------------------------------------------------------
+#
+# Compute the partitioning and representant selection further used within
+# qFGW wrapper, both provided by a K-means algorithm. Then visualize partitioned spaces.
+
+part1, rep_indices1 = get_partition_and_representants_samples(
+    X, 4, 'kmeans', 0)
+part2, rep_indices2 = get_partition_and_representants_samples(
+    Y, 4, 'kmeans', 0)
+
+upart1 = np.unique(part1)
+upart2 = np.unique(part2)
+
+# Plot the source and target samples as distributions
+s = 20
+fig = plt.figure(3, figsize=(6, 3))
+
+ax1 = fig.add_subplot(1, 3, 1)
+ax1.set_title("2D curve")
+ax1.scatter(X[:, 0], X[:, 1], color="C0", alpha=FX, s=s)
+plt.axis('off')
+
+
+ax2 = fig.add_subplot(1, 3, 2)
+ax2.set_title("Partitioning")
+for i, elem in enumerate(upart1):
+    idx = np.argwhere(part1 == elem)[:, 0]
+    ax2.scatter(X[idx, 0], X[idx, 1], color="C%s" % i, alpha=FX[idx], s=s)
+plt.axis('off')
+
+ax3 = fig.add_subplot(1, 3, 3)
+ax3.set_title("Representant selection")
+for i, elem in enumerate(upart1):
+    idx = np.argwhere(part1 == elem)[:, 0]
+    ax3.scatter(X[idx, 0], X[idx, 1], color="C%s" % i, alpha=FX[idx], s=10)
+    rep_idx = rep_indices1[i]
+    ax3.scatter([X[rep_idx, 0]], [X[rep_idx, 1]], color="C%s" % i, alpha=1, s=6 * s, marker='*')
+plt.axis('off')
+plt.tight_layout()
+plt.show()
+
+start_color = upart1.shape[0] + 1
+
+fig = plt.figure(4, figsize=(6, 5))
+
+ax4 = fig.add_subplot(1, 3, 1, projection="3d")
+ax4.set_title("3D curve")
+ax4.scatter(Y[:, 0], Y[:, 1], Y[:, 2], c='C0', alpha=FY, s=s)
+plt.axis('off')
+
+ax5 = fig.add_subplot(1, 3, 2, projection="3d")
+ax5.set_title("Partitioning")
+for i, elem in enumerate(upart2):
+    idx = np.argwhere(part2 == elem)[:, 0]
+    color = 'C%s' % (start_color + i)
+    ax5.scatter(Y[idx, 0], Y[idx, 1], Y[idx, 2], c=color, alpha=FY[idx], s=s)
+plt.axis('off')
+
+ax6 = fig.add_subplot(1, 3, 3, projection="3d")
+ax6.set_title("Representant selection")
+for i, elem in enumerate(upart2):
+    idx = np.argwhere(part2 == elem)[:, 0]
+    color = 'C%s' % (start_color + i)
+    rep_idx = rep_indices2[i]
+    ax6.scatter(Y[idx, 0], Y[idx, 1], Y[idx, 2], c=color, alpha=FY[idx], s=s)
+    ax6.scatter([Y[rep_idx, 0]], [Y[rep_idx, 1]], [Y[rep_idx, 2]], c=color, alpha=1, s=6 * s, marker='*')
+plt.axis('off')
+plt.tight_layout()
+plt.show()
+
+#############################################################################
+#
+# Compute the quantized Fused Gromov-Wasserstein distance between samples using the wrapper
+# ---------------------------------------------------------
+#
+# Compute qFGW(X, FX, hX, Y, FY, HY), setting the trade-off parameter between
+# structures and features `alpha=0.5`. This solver considers a squared euclidean structure
+# for each distribution X and Y, and partition each of them into 4 clusters using
+# the K-means algorithm before computing qFGW.
+
+T_global, Ts_local, T, log = quantized_fused_gromov_wasserstein_samples(
+    X, Y, 4, 4, p=None, q=None, F1=FX[:, None], F2=FY[:, None], alpha=0.5,
+    method='kmeans', log=True)
+
+# Plot low rank GW with different ranks
+pl.figure(5, figsize=(6, 3))
+pl.subplot(1, 2, 1)
+pl.title('OT between distributions')
+pl.imshow(T, interpolation="nearest", aspect="auto")
+pl.colorbar()
+pl.axis('off')
+
+pl.subplot(1, 2, 2)
+pl.title('OT between representants')
+pl.imshow(T_global, interpolation="nearest", aspect="auto")
+pl.axis('off')
+pl.colorbar()
+
+pl.tight_layout()
+pl.show()
diff --git a/ot/gromov/__init__.py b/ot/gromov/__init__.py
index b33dafd32..03663dab4 100644
--- a/ot/gromov/__init__.py
+++ b/ot/gromov/__init__.py
@@ -50,6 +50,16 @@
 from ._lowrank import (_flat_product_operator, lowrank_gromov_wasserstein_samples)
 
 
+from ._quantized import (quantized_fused_gromov_wasserstein_partitioned,
+                         get_graph_partition,
+                         get_graph_representants,
+                         format_partitioned_graph,
+                         quantized_fused_gromov_wasserstein,
+                         get_partition_and_representants_samples,
+                         format_partitioned_samples,
+                         quantized_fused_gromov_wasserstein_samples
+                         )
+
 __all__ = ['init_matrix', 'tensor_product', 'gwloss', 'gwggrad', 'update_square_loss',
            'update_kl_loss', 'update_feature_matrix', 'init_matrix_semirelaxed',
            'gromov_wasserstein', 'gromov_wasserstein2', 'fused_gromov_wasserstein',
@@ -66,4 +76,8 @@
            'entropic_semirelaxed_gromov_wasserstein2', 'entropic_semirelaxed_fused_gromov_wasserstein',
            'entropic_semirelaxed_fused_gromov_wasserstein2', 'gromov_wasserstein_dictionary_learning',
            'gromov_wasserstein_linear_unmixing', 'fused_gromov_wasserstein_dictionary_learning',
-           'fused_gromov_wasserstein_linear_unmixing', 'lowrank_gromov_wasserstein_samples']
+           'fused_gromov_wasserstein_linear_unmixing', 'lowrank_gromov_wasserstein_samples',
+           'quantized_fused_gromov_wasserstein_partitioned', 'get_graph_partition',
+           'get_graph_representants', 'format_partitioned_graph',
+           'quantized_fused_gromov_wasserstein', 'get_partition_and_representants_samples',
+           'format_partitioned_samples', 'quantized_fused_gromov_wasserstein_samples']
diff --git a/ot/gromov/_quantized.py b/ot/gromov/_quantized.py
new file mode 100644
index 000000000..147f4b221
--- /dev/null
+++ b/ot/gromov/_quantized.py
@@ -0,0 +1,1147 @@
+"""
+Quantized (Fused) Gromov-Wasserstein solvers.
+"""
+
+# Author: Cédric Vincent-Cuaz <cedvincentcuaz@gmail.com>
+#
+# License: MIT License
+
+import numpy as np
+import warnings
+
+try:
+    from networkx.algorithms.community import asyn_fluidc, louvain_communities
+    from networkx import from_numpy_array, pagerank
+    networkx_import = True
+except ImportError:
+    networkx_import = False
+
+try:
+    from sklearn.cluster import SpectralClustering, KMeans
+    sklearn_import = True
+except ImportError:
+    sklearn_import = False
+
+import random
+
+from ..utils import list_to_array, unif, dist
+from ..backend import get_backend
+from ..lp import emd_1d
+from ._gw import gromov_wasserstein, fused_gromov_wasserstein
+from ._utils import init_matrix, gwloss
+
+
+def quantized_fused_gromov_wasserstein_partitioned(
+        CR1, CR2, list_R1, list_R2, list_p1, list_p2, MR=None,
+        alpha=1., build_OT=False, log=False, armijo=False, max_iter=1e4,
+        tol_rel=1e-9, tol_abs=1e-9, nx=None, **kwargs):
+    r"""
+    Returns the quantized Fused Gromov-Wasserstein transport between
+    :math:`(\mathbf{C_1}, \mathbf{F_1}, \mathbf{p})` and :math:`(\mathbf{C_2},
+    \mathbf{F_2}, \mathbf{q})`, whose samples are assigned to partitions and representants
+    :math:`\mathcal{P_1} = \{(\mathbf{P_{1, i}}, \mathbf{r_{1, i}})\}_{i \leq npart1}`
+    and :math:`\mathcal{P_2} = \{(\mathbf{P_{2, j}}, \mathbf{r_{2, j}})\}_{j \leq npart2}`.
+    The latter must be precomputed and encoded e.g for the source as: :math:`\mathbf{CR_1}`
+    structure matrix between representants; `list_R1` a list of relations between
+    representants and their associated samples; `list_p1` a list of nodes
+    distribution within each partition; :math:`\mathbf{FR_1}` feature matrix
+    of representants.
+
+    The function estimates the following optimization problem:
+
+    .. math::
+        \mathbf{T}^* \in \mathop{\arg \min}_\mathbf{T} \quad \alpha \sum_{i,j,k,l}
+        L(\mathbf{C_1}_{i,k}, \mathbf{C_2}_{j,l}) \mathbf{T}_{i,j} \mathbf{T}_{k,l}
+        + (1-\alpha) \langle \mathbf{T}, M\rangle_F
+        s.t. \ \mathbf{T} \mathbf{1} &= \mathbf{p}
+
+             \mathbf{T}^T \mathbf{1} &= \mathbf{q}
+
+             \mathbf{T} &\geq 0
+
+             \mathbf{T}_{|\mathbf{P_{1, i}}, \mathbf{P_{2, j}}} &= T^{g}_{ij} \mathbf{T}^{(i,j)}
+
+    using a two-step strategy computing: i) a global alignment :math:`\mathbf{T}^{g}`
+    between representants joint structure and feature spaces; ii) local alignments
+    :math:`\mathbf{T}^{(i, j)}` between partitions :math:`\mathbf{P_{1, i}}`
+    and :math:`\mathbf{P_{2, j}}` seen as 1D measures.
+
+    Where :
+
+    - :math:`\mathbf{C_1}`: Metric cost matrix in the source space
+    - :math:`\mathbf{C_2}`: Metric cost matrix in the target space
+    - :math:`\mathbf{F_1}`: Feature matrix in the source space
+    - :math:`\mathbf{F_2}`: Feature matrix in the target space
+    - :math:`\mathbf{M}`: Pairwise similarity matrix between features
+    - :math:`\mathbf{p}`: distribution in the source space
+    - :math:`\mathbf{q}`: distribution in the target space
+    - :math:`L`: quadratic loss function to account for the misfit between the similarity matrices
+
+    .. note:: This function is backend-compatible and will work on arrays
+        from all compatible backends. But the algorithm uses the C++ CPU backend
+        which can lead to copy overhead on GPU arrays.
+    .. note:: All computations in the Gromov-Wasserstein conjugate gradient solver
+        are done with numpy to limit memory overhead.
+
+    Parameters
+    ----------
+    CR1 : array-like, shape (npart1, npart1)
+        Structure matrix between partition representants in the source space.
+    CR2 : array-like, shape (npart2, npart2)
+        Structure matrix between partition representants in the target space.
+    list_R1 : list of npart1 arrays,
+        List of relations between representants and their associated samples in the source space.
+    list_R2 : list of npart2 arrays,
+        List of relations between representants and their associated samples in the target space.
+    list_p1 : list of npart1 arrays,
+        List of node distributions within each partition of the source space.
+    list_p : list of npart2 arrays,
+        List of node distributions within each partition of the target space.
+    MR : array-like, shape (npart1, npart2), optional. (Default is None)
+        Metric cost matrix between features of representants across spaces.
+    alpha: float, optional. Default is None.
+        FGW trade-off parameter in :math:`]0, 1]` between structure and features.
+        If `alpha = 1` features are ignored hence computing qGW.
+    build_OT: bool, optional. Default is False
+        Either to build or not the OT between non-partitioned structures.
+    log : bool, optional. Default is False
+        record log if True
+    armijo : bool, optional
+        If True the step of the line-search is found via an armijo research. Else closed form is used.
+        If there are convergence issues use False.
+    max_iter : int, optional
+        Max number of iterations
+    tol_rel : float, optional
+        Stop threshold on relative error (>0)
+    tol_abs : float, optional
+        Stop threshold on absolute error (>0)
+    nx : backend, optional
+        POT backend
+
+    **kwargs : dict
+        parameters can be directly passed to the ot.optim.cg solver
+
+    Returns
+    -------
+    T_global: array-like, shape (`npart1`, `npart2`)
+        Gromov-Wasserstein alignment :math:`\mathbf{T}^{g}` between representants.
+    Ts_local: dict of local OT matrices.
+        Dictionary with keys :math:`(i, j)` corresponding to 1D OT between
+        :math:`\mathbf{P_{1, i}}` and :math:`\mathbf{P_{2, j}}` if :math:`T^{g}_{ij} \neq 0`.
+    T: array-like, shape `(ns, nt)`
+        Coupling between the two spaces if `build_OT=True` else None.
+    log : dict, if `log=True`.
+        Convergence information and losses of inner OT problems.
+
+    References
+    ----------
+    .. [68] Chowdhury, S., Miller, D., & Needham, T. (2021).
+        Quantized gromov-wasserstein. ECML PKDD 2021. Springer International Publishing.
+
+    """
+    if nx is None:
+        arr = [CR1, CR2, *list_R1, *list_R2, *list_p1, *list_p2]
+
+        if MR is not None:
+            arr.append(MR)
+
+        nx = get_backend(*arr)
+
+    npart1 = len(list_R1)
+    npart2 = len(list_R2)
+
+    # compute marginals for global alignment
+    pR1 = nx.from_numpy(list_to_array([nx.sum(p) for p in list_p1]))
+    pR2 = nx.from_numpy(list_to_array([nx.sum(q) for q in list_p2]))
+
+    # compute global alignment
+    if alpha == 1.:
+        res_global = gromov_wasserstein(
+            CR1, CR2, pR1, pR2, loss_fun='square_loss', log=log,
+            armijo=armijo, max_iter=max_iter, tol_rel=tol_rel, tol_abs=tol_abs)
+
+        if log:
+            T_global, dist_global = res_global[0], res_global[1]['gw_dist']
+        else:
+            T_global = res_global
+
+    elif (alpha < 1.) and (alpha > 0.):
+
+        res_global = fused_gromov_wasserstein(
+            MR, CR1, CR2, pR1, pR2, 'square_loss', alpha=alpha, log=log,
+            armijo=armijo, max_iter=max_iter, tol_rel=tol_rel, tol_abs=tol_abs)
+
+        if log:
+            T_global, dist_global = res_global[0], res_global[1]['fgw_dist']
+        else:
+            T_global = res_global
+
+    else:
+        raise ValueError(
+            f"""
+            `alpha='{alpha}'` should be in ]0, 1].
+            """)
+
+    if log:
+        log_ = {}
+        log_['global dist'] = dist_global
+
+    # compute local alignments
+    Ts_local = {}
+    list_p1_norm = [p / nx.sum(p) for p in list_p1]
+    list_p2_norm = [q / nx.sum(q) for q in list_p2]
+
+    for i in range(npart1):
+        for j in range(npart2):
+            if T_global[i, j] != 0.:
+                res_1d = emd_1d(list_R1[i], list_R2[j], list_p1_norm[i], list_p2_norm[j],
+                                metric='sqeuclidean', p=1., log=log)
+                if log:
+                    T_local, log_local = res_1d
+                    Ts_local[(i, j)] = T_local
+                    log_[f'local dist ({i},{j})'] = log_local['cost']
+                else:
+                    Ts_local[(i, j)] = res_1d
+
+    if build_OT:
+        T_rows = []
+        for i in range(npart1):
+            list_Ti = []
+            for j in range(npart2):
+                if T_global[i, j] == 0.:
+                    T_local = nx.zeros((list_R1[i].shape[0], list_R2[j].shape[0]), type_as=T_global)
+                else:
+                    T_local = T_global[i, j] * Ts_local[(i, j)]
+                list_Ti.append(T_local)
+
+            Ti = nx.concatenate(list_Ti, axis=1)
+            T_rows.append(Ti)
+        T = nx.concatenate(T_rows, axis=0)
+
+    else:
+        T = None
+
+    if log:
+        return T_global, Ts_local, T, log_
+
+    else:
+        return T_global, Ts_local, T
+
+
+def get_graph_partition(C, npart, part_method='random', F=None, alpha=1.,
+                        random_state=0, nx=None):
+    """
+    Partitioning a given graph with structure matrix :math:`\mathbf{C} \in R^{n \times n}`
+    into `npart` partitions either 'random', or using one of {'louvain', 'fluid'}
+    algorithms from networkx, or 'spectral' clustering from scikit-learn,
+    or (Fused) Gromov-Wasserstein projections from POT.
+
+    Parameters
+    ----------
+    C : array-like, shape (n, n)
+        Structure matrix.
+    npart : int,
+        number of partitions/clusters smaller than the number of nodes in
+        :math:`\mathbf{C}`.
+    part_method : str, optional. Default is 'random'.
+        Partitioning algorithm to use among {'random', 'louvain', 'fluid', 'spectral', 'GW', 'FGW'}.
+        'random' for random sampling of points; 'louvain' and 'fluid' for graph
+        partitioning algorithm that works well on adjacency matrix, If the
+        louvain algorithm is used, `npart` is ignored; 'spectral' for spectral
+        clustering; '(F)GW' for (F)GW projection using sr(F)GW solvers.
+    F : array-like, shape (n, d), optional. (Default is None)
+        Optional feature matrix aligned with the graph structure. Only used if
+        `part_method="FGW"`.
+    alpha : float, optional. (Default is 1.)
+        Trade-off parameter between feature and structure matrices, taking
+        values in [0, 1] and only used if `F != None` and `part_method="FGW"`.
+    random_state: int, optional
+        Random seed for the partitioning algorithm.
+    nx : backend, optional
+        POT backend.
+
+    Returns
+    -------
+    part : array-like, shape (npart,)
+        Array of partition assignment for each node.
+
+    References
+    ----------
+    .. [68] Chowdhury, S., Miller, D., & Needham, T. (2021).
+        Quantized gromov-wasserstein. ECML PKDD 2021. Springer International Publishing.
+
+    """
+    if nx is None:
+        nx = get_backend(C)
+
+    n = C.shape[0]
+    C0 = C
+
+    if (alpha != 1.) and (F is None):
+        raise ValueError("`alpha != 1` but node features are not provided.")
+
+    if npart >= n:
+        warnings.warn(
+            "Requested number of partitions higher than the number of nodes"
+            "hence we enforce each node to be a partition.",
+            stacklevel=2
+        )
+
+        part = np.arange(n)
+
+    elif npart == 1:
+        part = np.zeros(n)
+
+    elif part_method == 'random':
+        # randomly partition the space
+        random.seed(random_state)
+        part = list_to_array(random.choices(np.arange(npart), k=C.shape[0]))
+
+    elif part_method == 'louvain':
+        C = nx.to_numpy(C0)
+        graph = from_numpy_array(C)
+        part_sets = louvain_communities(graph, seed=random_state)
+        part = np.zeros(n)
+        for iset_, set_ in enumerate(part_sets):
+            set_ = list(set_)
+            part[set_] = iset_
+
+    elif part_method == 'fluid':
+        C = nx.to_numpy(C0)
+        graph = from_numpy_array(C)
+        part_sets = asyn_fluidc(graph, npart, seed=random_state)
+        part = np.zeros(n)
+        for iset_, set_ in enumerate(part_sets):
+            set_ = list(set_)
+            part[set_] = iset_
+
+    elif part_method == 'spectral':
+        C = nx.to_numpy(C0)
+        sc = SpectralClustering(n_clusters=npart,
+                                random_state=random_state,
+                                affinity='precomputed').fit(C)
+        part = sc.labels_
+
+    elif part_method in ['GW', 'FGW']:
+        raise ValueError(f"`part_method == {part_method}` not implemented yet.")
+
+    else:
+        raise ValueError(
+            f"""
+            Unknown `part_method='{part_method}'`. Use one of:
+            {'random', 'louvain', 'fluid', 'spectral', 'GW', 'FGW'}.
+            """)
+    return nx.from_numpy(part, type_as=C0)
+
+
+def get_graph_representants(C, part, rep_method='pagerank', random_state=0, nx=None):
+    """
+    Get representative node for each partition given by :math:`\mathbf{part} \in R^{n}`
+    of a graph with structure matrix :math:`\mathbf{C} \in R^{n \times n}`.
+    Selection is either done randomly or using 'pagerank' algorithm from networkx.
+
+    Parameters
+    ----------
+    C : array-like, shape (n, n)
+        structure matrix.
+    part : array-like, shape (n,)
+        Array of partition assignment for each node.
+    rep_method : str, optional. Default is 'pagerank'.
+        Selection method for representant in each partition. Can be either 'random'
+        i.e random sampling within each partition, or 'pagerank' to select a
+        node with maximal pagerank.
+    random_state: int, optional
+        Random seed for the partitioning algorithm
+    nx : backend, optional
+        POT backend
+
+    Returns
+    -------
+    rep_indices : list, shape (npart,)
+        indices for representative node of each partition sorted
+        according to partition identifiers.
+
+    References
+    ----------
+    .. [68] Chowdhury, S., Miller, D., & Needham, T. (2021).
+        Quantized gromov-wasserstein. ECML PKDD 2021. Springer International Publishing.
+
+    """
+    if nx is None:
+        nx = get_backend(C, part)
+
+    rep_indices = []
+    part_ids = nx.unique(part)
+    n_part_ids = part_ids.shape[0]
+    if n_part_ids == C.shape[0]:
+        rep_indices = nx.arange(n_part_ids)
+
+    elif rep_method == 'random':
+        random.seed(random_state)
+        for id_, part_id in enumerate(part_ids):
+            indices = nx.where(part == part_id)[0]
+            rep_indices.append(random.choice(indices))
+
+    elif rep_method == 'pagerank':
+        C0, part0 = C, part
+        C = nx.to_numpy(C0)
+        part = nx.to_numpy(part0)
+        part_ids = np.unique(part)
+
+        for id_ in part_ids:
+            indices = np.where(part == id_)[0]
+            C_id = C[indices, :][:, indices]
+            graph = from_numpy_array(C_id)
+            pagerank_values = list(pagerank(graph).values())
+            rep_idx = np.argmax(pagerank_values)
+            rep_indices.append(indices[rep_idx])
+
+    else:
+        raise ValueError(
+            f"""
+            Unknown `rep_method='{rep_method}'`. Use one of:
+            {'random', 'pagerank'}.
+            """)
+
+    return rep_indices
+
+
+def format_partitioned_graph(C, p, part, rep_indices, F=None, M=None,
+                             alpha=1., nx=None):
+    """
+    Format an attributed graph :math:`(\mathbf{C}, \mathbf{F}, \mathbf{p})`
+    with structure matrix :math:`(\mathbf{C} \in R^{n \times n}`, feature matrix
+    :math:`(\mathbf{F} \in R^{n \times d}` and node relative importance
+    :math:`(\mathbf{p} \in \Sigma_n`, into a partitioned attributed graph
+    taking into account partitions and representants :math:`\mathcal{P} = \left{(\mathbf{P_{i}}, \mathbf{r_{i}})\right}_i`.
+
+    Parameters
+    ----------
+    C : array-like, shape (n, n)
+        Structure matrix.
+    p : array-like, shape (n,),
+        Node distribution.
+    part : array-like, shape (n,)
+        Array of partition assignment for each node.
+    rep_indices : list of array-like of ints, shape (npart,)
+        indices for representative node of each partition sorted according to
+        partition identifiers.
+    F : array-like, shape (n, d), optional. (Default is None)
+        Optional feature matrix aligned with the graph structure.
+    M : array-like, shape (n, n), optional. (Default is None)
+        Optional pairwise similarity matrix between features.
+    alpha: float, optional. Default is 1.
+        Trade-off parameter in :math:`]0, 1]` between structure and features.
+        If `alpha = 1` features are ignored. This trade-off is taken into account
+        into the outputted relations between nodes and representants.
+    nx : backend, optional
+        POT backend
+
+    Returns
+    -------
+    CR : array-like, shape (npart, npart)
+        Structure matrix between partition representants.
+    list_R : list of npart arrays,
+        List of relations between a representant and nodes in its partition,
+        for each partition.
+    list_p : list of npart arrays,
+        List of node distributions within each partition.
+    FR : array-like, shape (npart, d), if `F != None`.
+        Feature matrix of representants.
+
+    References
+    ----------
+    .. [68] Chowdhury, S., Miller, D., & Needham, T. (2021).
+        Quantized gromov-wasserstein. ECML PKDD 2021. Springer International Publishing.
+
+    """
+    if nx is None:
+        arr = [C, p, part]
+        if F is not None:
+            arr.append(F)
+        if M is not None:
+            arr.append(M)
+
+        nx = get_backend(*arr)
+
+    if alpha != 1.:
+        if (M is None) or (F is None):
+            raise ValueError(
+                f"""
+                `alpha == {alpha} != 1` but features information is not properly provided.
+                """)
+
+    CR = C[rep_indices, :][:, rep_indices]
+
+    if alpha != 1.:
+        C_new = alpha * C + (1 - alpha) * M
+    else:
+        C_new = C
+
+    list_R, list_p = [], []
+
+    part_ids = nx.unique(part)
+
+    for id_, part_id in enumerate(part_ids):
+        indices = nx.where(part == part_id)[0]
+        list_R.append(C_new[rep_indices[id_], indices])
+        list_p.append(p[indices])
+
+    if F is None:
+
+        return CR, list_R, list_p
+    else:
+        FR = F[rep_indices, :]
+
+        return CR, list_R, list_p, FR
+
+
+def quantized_fused_gromov_wasserstein(
+        C1, C2, npart1, npart2, p=None, q=None, C1_aux=None, C2_aux=None,
+        F1=None, F2=None, alpha=1., part_method='fluid',
+        rep_method='random', log=False, armijo=False, max_iter=1e4,
+        tol_rel=1e-9, tol_abs=1e-9, random_state=0, **kwargs):
+    r"""
+    Returns the quantized Fused Gromov-Wasserstein transport between
+    :math:`(\mathbf{C_1}, \mathbf{F_1}, \mathbf{p})` and :math:`(\mathbf{C_2},
+    \mathbf{F_2}, \mathbf{q})`, whose samples are assigned to partitions and
+    representants :math:`\mathcal{P_1} = \{(\mathbf{P_{1, i}}, \mathbf{r_{1, i}})\}_{i \leq npart1}`
+    and :math:`\mathcal{P_2} = \{(\mathbf{P_{2, j}}, \mathbf{r_{2, j}})\}_{j \leq npart2}`.
+
+    The function estimates the following optimization problem:
+
+    .. math::
+        \mathbf{T}^* \in \mathop{\arg \min}_\mathbf{T} \quad \alpha \sum_{i,j,k,l}
+        L(\mathbf{C_1}_{i,k}, \mathbf{C_2}_{j,l}) \mathbf{T}_{i,j} \mathbf{T}_{k,l}
+        + (1-\alpha) \langle \mathbf{T}, \mathbf{D}(\mathbf{F_1}, \mathbf{F}_2) \rangle_F
+        s.t. \ \mathbf{T} \mathbf{1} &= \mathbf{p}
+
+             \mathbf{T}^T \mathbf{1} &= \mathbf{q}
+
+             \mathbf{T} &\geq 0
+
+             \mathbf{T}_{|\mathbf{P_{1, i}}, \mathbf{P_{2, j}}} &= T^{g}_{ij} \mathbf{T}^{(i,j)}
+
+    using a two-step strategy computing: i) a global alignment :math:`\mathbf{T}^{g}`
+    between representants across joint structure and feature spaces;
+    ii) local alignments :math:`\mathbf{T}^{(i, j)}` between partitions
+    :math:`\mathbf{P_{1, i}}` and :math:`\mathbf{P_{2, j}}` seen as 1D measures.
+
+    Where :
+
+    - :math:`\mathbf{C_1}`: Metric cost matrix in the source space
+    - :math:`\mathbf{C_2}`: Metric cost matrix in the target space
+    - :math:`\mathbf{F_1}`: Feature matrix in the source space
+    - :math:`\mathbf{F_2}`: Feature matrix in the target space
+    - :math:`\mathbf{D}(\mathbf{F_1}, \mathbf{F_2})`: Pairwise euclidean distance matrix between features
+    - :math:`\mathbf{p}`: distribution in the source space
+    - :math:`\mathbf{q}`: distribution in the target space
+    - :math:`L`: quadratic loss function to account for the misfit between the similarity matrices
+
+    .. note:: This function is backend-compatible and will work on arrays
+        from all compatible backends. But the algorithm uses the C++ CPU backend
+        which can lead to copy overhead on GPU arrays.
+    .. note:: All computations in the conjugate gradient solver are done with
+        numpy to limit memory overhead.
+
+    Parameters
+    ----------
+    C1 : array-like, shape (ns, ns)
+        Structure matrix in the source space.
+    C2 : array-like, shape (nt, nt)
+        Structure matrix in the target space.
+    npart1 : int,
+        number of partition in the source space.
+    npart2 : int,
+        number of partition in the target space.
+    p : array-like, shape (ns,), optional
+        Distribution in the source space.
+        If let to its default value None, uniform distribution is taken.
+    q : array-like, shape (nt,), optional
+        Distribution in the target space.
+        If let to its default value None, uniform distribution is taken.
+    C1_aux : array-like, shape (ns, ns), optional. Default is None.
+        Auxiliary structure matrix in the source space to perform the partitioning
+        and representant selection.
+    C2_aux : array-like, shape (nt, nt), optional. Default is None.
+        Auxiliary structure matrix in the target space to perform the partitioning
+        and representant selection.
+    F1 : array-like, shape (ns, d), optional. Default is None.
+        Feature matrix in the source space.
+    F2 : array-like, shape (nt, d), optional. Default is None.
+        Feature matrix in the target space
+    alpha: float, optional. Default is 1.
+        FGW trade-off parameter in :math:`]0, 1]` between structure and features.
+        If `alpha = 1` features are ignored hence computing qGW, if `alpha=0`
+        structures are ignored and we compute the quantized Wasserstein transport.
+    part_method : str, optional. Default is 'spectral'.
+        Partitioning algorithm to use among {'random', 'louvain', 'fluid',
+        'spectral', 'louvain_fused', 'fluid_fused', 'spectral_fused', 'GW', 'FGW'}.
+        If part_method in {'louvain_fused', 'fluid_fused', 'spectral_fused'},
+        corresponding graph partitioning algorithm {'louvain', 'fluid', 'spectral'}
+        will be used on the modified structure matrix
+        :math:`\alpha \mathbf{C} + (1 - \alpha) \mathbf{D}(\mathbf{F})` where
+        :math:`\mathbf{D}(\mathbf{F})` is the pairwise euclidean matrix between features.
+        If part_method in {'GW', 'FGW'}, a (F)GW projection is used.
+        If the louvain algorithm is used, the requested number of partitions is
+        ignored.
+    rep_method : str, optional. Default is 'pagerank'.
+        Selection method for node representant in each partition.
+        Can be either 'random' i.e random sampling within each partition,
+        {'pagerank', 'pagerank_fused'} to select a node with maximal pagerank w.r.t
+        :math:`\mathbf{C}` or :math:`\alpha \mathbf{C} + (1 - \alpha) \mathbf{D}(\mathbf{F})`.
+    verbose : bool, optional
+        Print information along iterations
+    log : bool, optional
+        record log if True
+    armijo : bool, optional
+        If True the step of the line-search is found via an armijo research. Else closed form is used.
+        If there are convergence issues use False.
+    max_iter : int, optional
+        Max number of iterations
+    tol_rel : float, optional
+        Stop threshold on relative error (>0)
+    tol_abs : float, optional
+        Stop threshold on absolute error (>0)
+    random_state: int, optional
+        Random seed for the partitioning algorithm
+    **kwargs : dict
+        parameters can be directly passed to the ot.optim.cg solver
+
+    Returns
+    -------
+    T_global: array-like, shape (`npart1`, `npart2`)
+        Fused Gromov-Wasserstein alignment :math:`\mathbf{T}^{g}` between representants.
+    Ts_local: dict of local OT matrices.
+        Dictionary with keys :math:`(i, j)` corresponding to 1D OT between
+        :math:`\mathbf{P_{1, i}}` and :math:`\mathbf{P_{2, j}}` if :math:`T^{g}_{ij} \neq 0`.
+    T: array-like, shape `(ns, nt)`
+        Coupling between the two spaces.
+    log : dict
+        Convergence information for inner problems and qGW loss.
+
+    References
+    ----------
+    .. [68] Chowdhury, S., Miller, D., & Needham, T. (2021).
+        Quantized gromov-wasserstein. ECML PKDD 2021. Springer International Publishing.
+
+    """
+    if (part_method in ['fluid', 'louvain', 'fluid_fused', 'louvain_fused'] or (rep_method in ['pagerank', 'pagerank_fused'])):
+        if not networkx_import:
+            warnings.warn(
+                f"""
+                Networkx is not installed, so part_method={part_method} and/or
+                rep_method={rep_method} cannot be used and are set to `random`
+                default methods. Consider installing Networkx to fix this.
+                """
+            )
+        part_method = 'random'
+        rep_method = 'random'
+
+    if (part_method in ['spectral', 'spectral_fused']) and (not sklearn_import):
+        warnings.warn(
+            f"""
+            Scikit-learn is not installed, so part_method={part_method} and/or
+            rep_method={rep_method} cannot be used and are set to `random`
+            default methods. Consider installing Scikit-learn to fix this.
+            """
+        )
+        part_method = 'random'
+        rep_method = 'random'
+
+    if (('fused' in part_method) or ('fused' in rep_method) or (part_method == 'FGW')):
+        if (F1 is None) or (F2 is None):
+            raise ValueError(
+                f"""
+                `part_method='{part_method}'` and/or `rep_method='{rep_method}'`
+                require feature matrices which are not provided as inputs.
+                """)
+
+    arr = [C1, C2]
+    if C1_aux is not None:
+        arr.append(C1_aux)
+    else:
+        C1_aux = C1
+    if C2_aux is not None:
+        arr.append(C2_aux)
+    else:
+        C2_aux = C2
+    if p is not None:
+        arr.append(list_to_array(p))
+    else:
+        p = unif(C1.shape[0], type_as=C1)
+    if q is not None:
+        arr.append(list_to_array(q))
+    else:
+        q = unif(C2.shape[0], type_as=C1)
+    if F1 is not None:
+        arr.append(F1)
+    if F2 is not None:
+        arr.append(F1)
+
+    nx = get_backend(*arr)
+
+    DF1 = None
+    DF2 = None
+    # compute attributed graph partitions potentially using the auxiliary structure
+    if 'fused' in part_method:
+
+        DF1 = dist(F1, F1)
+        DF2 = dist(F2, F2)
+        C1_new = alpha * C1_aux + (1 - alpha) * DF1
+        C2_new = alpha * C2_aux + (1 - alpha) * DF2
+
+        part_method_ = part_method[:-6]
+        part1 = get_graph_partition(C1_new, npart1, part_method_, random_state=random_state, nx=nx)
+        part2 = get_graph_partition(C2_new, npart2, part_method_, random_state=random_state, nx=nx)
+
+    else:
+        part1 = get_graph_partition(C1_aux, npart1, part_method, F1, alpha, random_state, nx)
+        part2 = get_graph_partition(C2_aux, npart2, part_method, F2, alpha, random_state, nx)
+
+    if 'fused' in rep_method:
+        if DF1 is None:
+            DF1 = dist(F1, F1)
+            DF2 = dist(F2, F2)
+            C1_new = alpha * C1_aux + (1 - alpha) * DF1
+            C2_new = alpha * C2_aux + (1 - alpha) * DF2
+
+        rep_method_ = rep_method[:-6]
+
+        rep_indices1 = get_graph_representants(C1_new, part1, rep_method_, random_state, nx)
+        rep_indices2 = get_graph_representants(C2_new, part2, rep_method_, random_state, nx)
+
+    else:
+        rep_indices1 = get_graph_representants(C1_aux, part1, rep_method, random_state, nx)
+        rep_indices2 = get_graph_representants(C2_aux, part2, rep_method, random_state, nx)
+
+    # format partitions over (C1, F1) and (C2, F2)
+    if (F1 is None) and (F2 is None):
+        CR1, list_R1, list_p1 = format_partitioned_graph(C1, p, part1, rep_indices1, nx=nx)
+        CR2, list_R2, list_p2 = format_partitioned_graph(C2, q, part2, rep_indices2, nx=nx)
+
+        MR = None
+    else:
+        if DF1 is None:
+            DF1 = dist(F1, F1)
+            DF2 = dist(F2, F2)
+
+        CR1, list_R1, list_p1, FR1 = format_partitioned_graph(C1, p, part1, rep_indices1, F1, DF1, alpha, nx)
+        CR2, list_R2, list_p2, FR2 = format_partitioned_graph(C2, q, part2, rep_indices2, F2, DF2, alpha, nx)
+
+        MR = dist(FR1, FR2)
+    # call to partitioned quantized fused gromov-wasserstein solver
+
+    res = quantized_fused_gromov_wasserstein_partitioned(
+        CR1, CR2, list_R1, list_R2, list_p1, list_p2, MR, alpha, build_OT=True,
+        log=log, armijo=armijo, max_iter=max_iter, tol_rel=tol_rel,
+        tol_abs=tol_abs, nx=nx, **kwargs)
+
+    if log:
+        T_global, Ts_local, T, log_ = res
+
+        # compute the transport cost on structures
+        constC, hC1, hC2 = init_matrix(C1, C2, p, q, 'square_loss', nx)
+        structure_cost = gwloss(constC, hC1, hC2, T, nx)
+
+        if alpha != 1.:
+            M = dist(F1, F2)
+            feature_cost = nx.sum(M * T)
+        else:
+            feature_cost = 0.
+
+        log_['qFGW_dist'] = alpha * structure_cost + (1 - alpha) * feature_cost
+        return T_global, Ts_local, T, log_
+
+    else:
+        T_global, Ts_local, T = res
+
+        return T_global, Ts_local, T
+
+
+def get_partition_and_representants_samples(
+        X, npart, method='kmeans', random_state=0, nx=None):
+    """
+    Compute `npart` partitions and representants over samples :math:`\mathbf{X} \in R^{n \times d}`
+    using either a random or a kmeans algorithm.
+
+    Parameters
+    ----------
+    X : array-like, shape (n, d)
+        Samples endowed with an euclidean geometry.
+    npart : int,
+        number of partitions smaller than the number of samples in
+        :math:`\mathbf{X}`.
+    method : str, optional. Default is 'kmeans'.
+        Partitioning and representant selection algorithms to use among
+        {'random', 'kmeans'}. 'random' for random sampling of points; 'kmeans'
+        for k-means clustering using scikit-learn implementation where closest
+        points to centroids are considered as representants.
+    random_state: int, optional
+        Random seed for the partitioning algorithm.
+    nx : backend, optional
+        POT backend.
+
+    Returns
+    -------
+    part : array-like, shape (npart,)
+        Array of partition assignment for each node.
+
+    rep_indices : list, shape (npart,)
+        indices for representative node of each partition sorted
+        according to partition identifiers.
+
+    References
+    ----------
+    .. [68] Chowdhury, S., Miller, D., & Needham, T. (2021).
+        Quantized gromov-wasserstein. ECML PKDD 2021. Springer International Publishing.
+
+    """
+    if nx is None:
+        nx = get_backend(X)
+
+    n = X.shape[0]
+    X0 = X
+
+    if npart >= n:
+        warnings.warn(
+            "Requested number of partitions higher than the number of nodes"
+            "hence we enforce each node to be a partition.",
+            stacklevel=2
+        )
+
+        part = nx.arange(n)
+        rep_indices = nx.arange(n)
+
+    elif npart == 1:
+        random.seed(random_state)
+        part = nx.zeros(n)
+        rep_indices = [random.choice(nx.arange(n))]
+
+    elif method == 'random':
+        # randomly partition the space
+        random.seed(random_state)
+        part = list_to_array(random.choices(np.arange(npart), k=X.shape[0]))
+        part = nx.from_numpy(part, type_as=X0)
+
+        # randomly select representant in each partition
+        rep_indices = []
+        part_ids = nx.unique(part)
+        for id_, part_id in enumerate(part_ids):
+            indices = nx.where(part == part_id)[0]
+            rep_indices.append(random.choice(indices))
+
+    elif method == 'kmeans':
+        X = nx.to_numpy(X0)
+        km = KMeans(n_clusters=npart, random_state=random_state).fit(X)
+        part = nx.from_numpy(km.labels_, type_as=X0)
+
+        rep_indices = []
+        for part_id in range(npart):
+            indices = nx.where(part == part_id)[0]
+            dists = dist(X[indices], km.cluster_centers_[part_id][None, :])
+            best_idx = indices[dists.argmin()]
+            rep_indices.append(best_idx)
+
+    else:
+        raise ValueError(
+            f"""
+            Unknown `method='{method}'`. Use one of: {'random', 'kmeans'}
+            """)
+
+    return part, rep_indices
+
+
+def format_partitioned_samples(
+        X, p, part, rep_indices, F=None, alpha=1., nx=None):
+    """
+    Format an attributed graph :math:`(\mathbf{D}(\mathbf{X}), \mathbf{F}, \mathbf{p})`
+    with euclidean structure matrix :math:`(\mathbf{D}(\mathbf{X}) \in R^{n \times n}`,
+    feature matrix :math:`(\mathbf{F} \in R^{n \times d}` and node relative importance
+    :math:`(\mathbf{p} \in \Sigma_n`, into a partitioned attributed graph
+    taking into account partitions and representants :math:`\mathcal{P} = \left{(\mathbf{P_{i}}, \mathbf{r_{i}})\right}_i`.
+
+    Parameters
+    ----------
+    X : array-like, shape (n, d)
+        Structure matrix.
+    p : array-like, shape (n,),
+        Node distribution.
+    part : array-like, shape (n,)
+        Array of partition assignment for each node.
+    rep_indices : list of array-like of ints, shape (npart,)
+        indices for representative node of each partition sorted according to
+        partition identifiers.
+    F : array-like, shape (n, p), optional. (Default is None)
+        Optional feature matrix aligned with the samples.
+    alpha: float, optional. Default is 1.
+        Trade-off parameter in :math:`]0, 1]` between structure and features.
+        If `alpha = 1` features are ignored. This trade-off is taken into account
+        into the outputted relations between nodes and representants.
+    nx : backend, optional
+        POT backend
+
+    Returns
+    -------
+    CR : array-like, shape (npart, npart)
+        Structure matrix between partition representants.
+    list_R : list of npart arrays,
+        List of relations between a representant and nodes in its partition,
+        for each partition.
+    list_p : list of npart arrays,
+        List of node distributions within each partition.
+    FR : array-like, shape (npart, d), if `F != None`.
+        Feature matrix of representants.
+
+    References
+    ----------
+    .. [68] Chowdhury, S., Miller, D., & Needham, T. (2021).
+        Quantized gromov-wasserstein. ECML PKDD 2021. Springer International Publishing.
+
+    """
+    if nx is None:
+        arr = [X, p, part]
+        if F is not None:
+            arr.append(F)
+
+        nx = get_backend(*arr)
+
+    if alpha != 1.:
+        if F is None:
+            raise ValueError(
+                f"""
+                `alpha == {alpha} != 1` but features information is not properly provided.
+                """)
+
+    XR = X[rep_indices, :]
+    CR = dist(XR, XR)
+
+    list_R, list_p = [], []
+
+    part_ids = nx.unique(part)
+
+    for id_, part_id in enumerate(part_ids):
+        indices = nx.where(part == part_id)[0]
+        structure_R = dist(X[indices], X[rep_indices[id_]][None, :])
+
+        if alpha != 1:
+            features_R = dist(F[indices], F[rep_indices[id_]][None, :])
+        else:
+            features_R = 0.
+
+        list_R.append(alpha * structure_R + (1 - alpha) * features_R)
+        list_p.append(p[indices])
+
+    if F is None:
+
+        return CR, list_R, list_p
+    else:
+        FR = F[rep_indices, :]
+
+        return CR, list_R, list_p, FR
+
+
+def quantized_fused_gromov_wasserstein_samples(
+        X1, X2, npart1, npart2, p=None, q=None, F1=None, F2=None, alpha=1.,
+        method='kmeans', log=False, armijo=False, max_iter=1e4,
+        tol_rel=1e-9, tol_abs=1e-9, random_state=0, **kwargs):
+    r"""
+    Returns the quantized Fused Gromov-Wasserstein transport between samples
+    endowed with their respective euclidean geometry :math:`(\mathbf{D}(\mathbf{X_1}), \mathbf{F_1}, \mathbf{p})`
+    and :math:`(\mathbf{D}(\mathbf{X_1}), \mathbf{F_2}, \mathbf{q})`, whose samples are assigned to partitions and
+    representants :math:`\mathcal{P_1} = \{(\mathbf{P_{1, i}}, \mathbf{r_{1, i}})\}_{i \leq npart1}`
+    and :math:`\mathcal{P_2} = \{(\mathbf{P_{2, j}}, \mathbf{r_{2, j}})\}_{j \leq npart2}`.
+
+    The function estimates the following optimization problem:
+
+    .. math::
+        \mathbf{T}^* \in \mathop{\arg \min}_\mathbf{T} \quad \alpha \sum_{i,j,k,l}
+        L(\mathbf{D}(\mathbf{X_1})_{i,k}, \mathbf{D}(\mathbf{X_2})_{j,l}) \mathbf{T}_{i,j} \mathbf{T}_{k,l}
+        + (1-\alpha) \langle \mathbf{T}, \mathbf{D}(\mathbf{F_1}, \mathbf{F}_2) \rangle_F
+        s.t. \ \mathbf{T} \mathbf{1} &= \mathbf{p}
+
+             \mathbf{T}^T \mathbf{1} &= \mathbf{q}
+
+             \mathbf{T} &\geq 0
+
+             \mathbf{T}_{|\mathbf{P_{1, i}}, \mathbf{P_{2, j}}} &= T^{g}_{ij} \mathbf{T}^{(i,j)}
+
+    using a two-step strategy computing: i) a global alignment :math:`\mathbf{T}^{g}`
+    between representants across joint structure and feature spaces;
+    ii) local alignments :math:`\mathbf{T}^{(i, j)}` between partitions
+    :math:`\mathbf{P_{1, i}}` and :math:`\mathbf{P_{2, j}}` seen as 1D measures.
+
+    Where :
+
+    - :math:`\mathbf{X_1}`: Samples in the source space
+    - :math:`\mathbf{X_2}`: Samples in the target space
+    - :math:`\mathbf{F_1}`: Feature matrix in the source space
+    - :math:`\mathbf{F_2}`: Feature matrix in the target space
+    - :math:`\mathbf{D}(\mathbf{F_1}, \mathbf{F_2})`: Pairwise euclidean distance matrix between features
+    - :math:`\mathbf{p}`: distribution in the source space
+    - :math:`\mathbf{q}`: distribution in the target space
+    - :math:`L`: quadratic loss function to account for the misfit between the similarity matrices
+
+    .. note:: This function is backend-compatible and will work on arrays
+        from all compatible backends. But the algorithm uses the C++ CPU backend
+        which can lead to copy overhead on GPU arrays.
+    .. note:: All computations in the conjugate gradient solver are done with
+        numpy to limit memory overhead.
+
+    Parameters
+    ----------
+    X1 : array-like, shape (ns, ds)
+        Samples in the source space.
+    X2 : array-like, shape (nt, dt)
+        Samples in the target space.
+    npart1 : int,
+        number of partition in the source space.
+    npart2 : int,
+        number of partition in the target space.
+    p : array-like, shape (ns,), optional
+        Distribution in the source space.
+        If let to its default value None, uniform distribution is taken.
+    q : array-like, shape (nt,), optional
+        Distribution in the target space.
+        If let to its default value None, uniform distribution is taken.
+    F1 : array-like, shape (ns, d), optional. Default is None.
+        Feature matrix in the source space.
+    F2 : array-like, shape (nt, d), optional. Default is None.
+        Feature matrix in the target space
+    alpha: float, optional. Default is 1.
+        FGW trade-off parameter in :math:`]0, 1]` between structure and features.
+        If `alpha = 1` features are ignored hence computing qGW, if `alpha=0`
+        structures are ignored and we compute the quantized Wasserstein transport.
+    method : str, optional. Default is 'kmeans'.
+        Partitioning and representant selection algorithms to use among
+        {'random', 'kmeans', 'kmeans_fused'}.
+        If `part_method == 'kmeans_fused'`, kmeans is performed on augmented
+        samples :math:`[\alpha \mathbf{X}; (1 - \alpha) \mathbf{F}]`.
+    verbose : bool, optional
+        Print information along iterations
+    log : bool, optional
+        record log if True
+    armijo : bool, optional
+        If True the step of the line-search is found via an armijo research. Else closed form is used.
+        If there are convergence issues use False.
+    max_iter : int, optional
+        Max number of iterations
+    tol_rel : float, optional
+        Stop threshold on relative error (>0)
+    tol_abs : float, optional
+        Stop threshold on absolute error (>0)
+    random_state: int, optional
+        Random seed for the partitioning algorithm
+    **kwargs : dict
+        parameters can be directly passed to the ot.optim.cg solver
+
+    Returns
+    -------
+    T_global: array-like, shape (`npart1`, `npart2`)
+        Fused Gromov-Wasserstein alignment :math:`\mathbf{T}^{g}` between representants.
+    Ts_local: dict of local OT matrices.
+        Dictionary with keys :math:`(i, j)` corresponding to 1D OT between
+        :math:`\mathbf{P_{1, i}}` and :math:`\mathbf{P_{2, j}}` if :math:`T^{g}_{ij} \neq 0`.
+    T: array-like, shape `(ns, nt)`
+        Coupling between the two spaces.
+    log : dict
+        Convergence information for inner problems and qGW loss.
+
+    References
+    ----------
+    .. [68] Chowdhury, S., Miller, D., & Needham, T. (2021).
+        Quantized gromov-wasserstein. ECML PKDD 2021. Springer International Publishing.
+
+    """
+
+    if (method in ['kmeans', 'kmeans_fused']) and (not sklearn_import):
+        warnings.warn(
+            f"""
+            Scikit-learn is not installed, so method={method} cannot be used
+            and is set to `random` default methods. Consider installing
+            Scikit-learn to fix this.
+            """
+        )
+        method = 'random'
+
+    if ('fused' in method) and ((F1 is None) or (F2 is None)):
+        raise ValueError(
+            f"""
+            `method='{method}'` requires feature matrices which are not provided as inputs.
+            """)
+
+    arr = [X1, X2]
+    if p is not None:
+        arr.append(list_to_array(p))
+    else:
+        p = unif(X1.shape[0], type_as=X1)
+    if q is not None:
+        arr.append(list_to_array(q))
+    else:
+        q = unif(X2.shape[0], type_as=X1)
+    if F1 is not None:
+        arr.append(F1)
+    if F2 is not None:
+        arr.append(F1)
+
+    nx = get_backend(*arr)
+
+    # compute attributed partitions and representants
+    if ('fused' in method) and (alpha != 1.):
+        X1_new = nx.concatenate([alpha * X1, (1 - alpha) * F1], axis=1)
+        X2_new = nx.concatenate([alpha * X2, (1 - alpha) * F2], axis=1)
+        method_ = method[:-6]
+    else:
+        X1_new, X2_new = X1, X2
+        method_ = method
+    part1, rep_indices1 = get_partition_and_representants_samples(
+        X1_new, npart1, method_, random_state, nx)
+    part2, rep_indices2 = get_partition_and_representants_samples(
+        X2_new, npart2, method_, random_state, nx)
+    # format partitions over (C1, F1) and (C2, F2)
+
+    if (F1 is None) and (F2 is None):
+        CR1, list_R1, list_p1 = format_partitioned_samples(
+            X1, p, part1, rep_indices1, nx=nx)
+        CR2, list_R2, list_p2 = format_partitioned_samples(
+            X2, q, part2, rep_indices2, nx=nx)
+
+        MR = None
+    else:
+        CR1, list_R1, list_p1, FR1 = format_partitioned_samples(
+            X1, p, part1, rep_indices1, F1, alpha, nx)
+        CR2, list_R2, list_p2, FR2 = format_partitioned_samples(
+            X2, q, part2, rep_indices2, F2, alpha, nx)
+
+        MR = dist(FR1, FR2)
+
+    # call to partitioned quantized fused gromov-wasserstein solver
+
+    res = quantized_fused_gromov_wasserstein_partitioned(
+        CR1, CR2, list_R1, list_R2, list_p1, list_p2, MR, alpha, build_OT=True,
+        log=log, armijo=armijo, max_iter=max_iter, tol_rel=tol_rel,
+        tol_abs=tol_abs, nx=nx, **kwargs)
+
+    if log:
+        T_global, Ts_local, T, log_ = res
+
+        C1 = dist(X1, X1)
+        C2 = dist(X2, X2)
+
+        # compute the transport cost on structures
+        constC, hC1, hC2 = init_matrix(C1, C2, p, q, 'square_loss', nx)
+        structure_cost = gwloss(constC, hC1, hC2, T, nx)
+
+        if alpha != 1.:
+            M = dist(F1, F2)
+            feature_cost = nx.sum(M * T)
+        else:
+            feature_cost = 0.
+
+        log_['qFGW_dist'] = alpha * structure_cost + (1 - alpha) * feature_cost
+        return T_global, Ts_local, T, log_
+
+    else:
+        T_global, Ts_local, T = res
+
+        return T_global, Ts_local, T
diff --git a/test/gromov/test_quantized.py b/test/gromov/test_quantized.py
new file mode 100644
index 000000000..a864a8a46
--- /dev/null
+++ b/test/gromov/test_quantized.py
@@ -0,0 +1,377 @@
+"""Tests for gromov._quantized.py """
+
+# Author: Cédric Vincent-Cuaz <cedvincentcuaz@gmail.com>
+#
+# License: MIT License
+
+import numpy as np
+import pytest
+
+import ot
+
+from ot.gromov._quantized import (
+    networkx_import, sklearn_import)
+
+
+def test_quantized_gw(nx):
+    n_samples = 30  # nb samples
+
+    rng = np.random.RandomState(0)
+    C1 = rng.uniform(low=0., high=10, size=(n_samples, n_samples))
+    C1 = (C1 + C1.T) / 2.
+
+    C2 = rng.uniform(low=10., high=20., size=(n_samples, n_samples))
+    C2 = (C2 + C2.T) / 2.
+
+    p = ot.unif(n_samples)
+    q = ot.unif(n_samples)
+
+    npart2 = 3
+
+    C1b, C2b, pb, qb = nx.from_numpy(C1, C2, p, q)
+
+    for npart1 in [1, n_samples + 1, 2]:
+        log_tests = [True, False, False, True, True, False]
+
+        pairs_part_rep = [('random', 'random')]
+        if networkx_import:
+            pairs_part_rep += [('louvain', 'random'), ('fluid', 'pagerank')]
+        if sklearn_import:
+            pairs_part_rep += [('spectral', 'random')]
+
+        count_mode = 0
+
+        for part_method, rep_method in pairs_part_rep:
+            log_ = log_tests[count_mode]
+            count_mode += 1
+
+            res = ot.gromov.quantized_fused_gromov_wasserstein(
+                C1, C2, npart1, npart2, p, None, C1, None, part_method=part_method,
+                rep_method=rep_method, log=log_)
+
+            resb = ot.gromov.quantized_fused_gromov_wasserstein(
+                C1b, C2b, npart1, npart2, None, qb, None, C2b, part_method=part_method,
+                rep_method=rep_method, log=log_)
+
+            if log_:
+                T_global, Ts_local, T, log = res
+                T_globalb, Ts_localb, Tb, logb = resb
+            else:
+                T_global, Ts_local, T = res
+                T_globalb, Ts_localb, Tb = resb
+
+            Tb = nx.to_numpy(Tb)
+            # check constraints
+            np.testing.assert_allclose(T, Tb, atol=1e-06)
+            np.testing.assert_allclose(
+                p, Tb.sum(1), atol=1e-06)  # cf convergence gromov
+            np.testing.assert_allclose(
+                q, Tb.sum(0), atol=1e-06)  # cf convergence gromov
+
+            if log_:
+                for key in log.keys():
+                    # The inner test T_global[i, j] != 0. can lead to different
+                    # computation of 1D OT computations between partition depending
+                    # on the different float errors across backend
+                    if key in logb.keys():
+                        np.testing.assert_allclose(log[key], logb[key], atol=1e-06)
+
+
+def test_quantized_fgw(nx):
+    n_samples = 30  # nb samples
+
+    rng = np.random.RandomState(0)
+    C1 = rng.uniform(low=0., high=10, size=(n_samples, n_samples))
+    C1 = (C1 + C1.T) / 2.
+
+    F1 = rng.uniform(low=0., high=10, size=(n_samples, 1))
+
+    C2 = rng.uniform(low=10., high=20., size=(n_samples, n_samples))
+    C2 = (C2 + C2.T) / 2.
+
+    F2 = rng.uniform(low=0., high=10, size=(n_samples, 1))
+
+    p = ot.unif(n_samples)
+    q = ot.unif(n_samples)
+
+    npart1 = 2
+    npart2 = 3
+
+    C1b, C2b, F1b, F2b, pb, qb = nx.from_numpy(C1, C2, F1, F2, p, q)
+
+    log_tests = [True, False, False, True, True, False]
+
+    pairs_part_rep = []
+    if networkx_import:
+        pairs_part_rep += [('louvain_fused', 'pagerank'),
+                           ('louvain', 'pagerank_fused'),
+                           ('fluid_fused', 'pagerank_fused')]
+    if sklearn_import:
+        pairs_part_rep += [('spectral_fused', 'random')]
+
+    pairs_part_rep += [('random', 'random')]
+    count_mode = 0
+
+    alpha = 0.5
+
+    for part_method, rep_method in pairs_part_rep:
+        log_ = log_tests[count_mode]
+        count_mode += 1
+
+        res = ot.gromov.quantized_fused_gromov_wasserstein(
+            C1, C2, npart1, npart2, p, None, C1, None, F1, F2, alpha,
+            part_method, rep_method, log_)
+
+        resb = ot.gromov.quantized_fused_gromov_wasserstein(
+            C1b, C2b, npart1, npart2, None, qb, None, C2b, F1b, F2b, alpha,
+            part_method, rep_method, log_)
+
+        if log_:
+            T_global, Ts_local, T, log = res
+            T_globalb, Ts_localb, Tb, logb = resb
+        else:
+            T_global, Ts_local, T = res
+            T_globalb, Ts_localb, Tb = resb
+
+        Tb = nx.to_numpy(Tb)
+        # check constraints
+        np.testing.assert_allclose(T, Tb, atol=1e-06)
+        np.testing.assert_allclose(
+            p, Tb.sum(1), atol=1e-06)  # cf convergence gromov
+        np.testing.assert_allclose(
+            q, Tb.sum(0), atol=1e-06)  # cf convergence gromov
+
+        if log_:
+            for key in log.keys():
+                # The inner test T_global[i, j] != 0. can lead to different
+                # computation of 1D OT computations between partition depending
+                # on the different float errors across backend
+                if key in logb.keys():
+                    np.testing.assert_allclose(log[key], logb[key], atol=1e-06)
+
+    # complementary tests for utils functions
+    DF1b = ot.dist(F1b, F1b)
+    DF2b = ot.dist(F2b, F2b)
+    C1b_new = alpha * C1b + (1 - alpha) * DF1b
+    C2b_new = alpha * C2b + (1 - alpha) * DF2b
+
+    part1b = ot.gromov.get_graph_partition(
+        C1b_new, npart1, part_method=pairs_part_rep[-1][0], random_state=0)
+    part2b = ot.gromov._quantized.get_graph_partition(
+        C2b_new, npart2, part_method=pairs_part_rep[-1][0], random_state=0)
+
+    rep_indices1b = ot.gromov.get_graph_representants(
+        C1b, part1b, rep_method=pairs_part_rep[-1][1], random_state=0)
+    rep_indices2b = ot.gromov.get_graph_representants(
+        C2b, part2b, rep_method=pairs_part_rep[-1][1], random_state=0)
+
+    CR1b, list_R1b, list_p1b, FR1b = ot.gromov.format_partitioned_graph(
+        C1b, pb, part1b, rep_indices1b, F1b, DF1b, alpha)
+    CR2b, list_R2b, list_p2b, FR2b = ot.gromov.format_partitioned_graph(
+        C2b, qb, part2b, rep_indices2b, F2b, DF2b, alpha)
+
+    MRb = ot.dist(FR1b, FR2b)
+
+    T_globalb, Ts_localb, _ = ot.gromov.quantized_fused_gromov_wasserstein_partitioned(
+        CR1b, CR2b, list_R1b, list_R2b, list_p1b, list_p2b, MRb, alpha, build_OT=False)
+
+    T_globalb = nx.to_numpy(T_globalb)
+    np.testing.assert_allclose(T_global, T_globalb, atol=1e-06)
+
+    for key in Ts_localb.keys():
+        T_localb = nx.to_numpy(Ts_localb[key])
+        np.testing.assert_allclose(Ts_local[key], T_localb, atol=1e-06)
+
+    # tests for edge cases of the graph partitioning
+    for method in ['unknown_method', 'GW', 'FGW']:
+        with pytest.raises(ValueError):
+            ot.gromov.get_graph_partition(
+                C1b, npart1, part_method=method, random_state=0)
+
+    with pytest.raises(ValueError):
+        ot.gromov.get_graph_partition(
+            C1b, npart1, part_method=method, alpha=0.5, F=None, random_state=0)
+
+    # tests for edge cases of the representant selection
+    with pytest.raises(ValueError):
+        ot.gromov.get_graph_representants(
+            C1b, part1b, rep_method='unknown_method', random_state=0)
+
+    # tests for edge cases of the format_partitioned_graph function
+    with pytest.raises(ValueError):
+        CR1b, list_R1b, list_p1b, FR1b = ot.gromov.format_partitioned_graph(
+            C1b, pb, part1b, rep_indices1b, F1b, None, alpha)
+
+    # Tests in qFGW solvers
+    # for non admissible values of alpha
+    with pytest.raises(ValueError):
+        ot.gromov.quantized_fused_gromov_wasserstein_partitioned(
+            CR1b, CR2b, list_R1b, list_R2b, list_p1b, list_p2b, MRb, 0, build_OT=False)
+
+    # for non-consistent feature information provided
+    with pytest.raises(ValueError):
+        ot.gromov.quantized_fused_gromov_wasserstein(
+            C1, C2, npart1, npart2, p, q, None, None, F1, None, 0.5,
+            'spectral_fused', 'random', log_)
+
+
+@pytest.skip_backend("jax", reason="test very slow with jax backend")
+def test_quantized_gw_samples(nx):
+    n_samples_1 = 15  # nb samples
+    n_samples_2 = 20  # nb samples
+
+    rng = np.random.RandomState(0)
+    X1 = rng.uniform(low=0., high=10, size=(n_samples_1, 2))
+    X2 = rng.uniform(low=0., high=10, size=(n_samples_2, 4))
+
+    p = ot.unif(n_samples_1)
+    q = ot.unif(n_samples_2)
+
+    npart1 = 2
+    npart2 = 3
+
+    X1b, X2b, pb, qb = nx.from_numpy(X1, X2, p, q)
+
+    log_tests = [True, False, True]
+    methods = ['random']
+    if sklearn_import:
+        methods += ['kmeans']
+
+    count_mode = 0
+    alpha = 1.
+
+    for method in methods:
+        log_ = log_tests[count_mode]
+        count_mode += 1
+
+        res = ot.gromov.quantized_fused_gromov_wasserstein_samples(
+            X1, X2, npart1, npart2, p, None, None, None, alpha, method, log_)
+
+        resb = ot.gromov.quantized_fused_gromov_wasserstein_samples(
+            X1b, X2b, npart1, npart2, None, qb, None, None, alpha, method, log_)
+
+        if log_:
+            T_global, Ts_local, T, log = res
+            T_globalb, Ts_localb, Tb, logb = resb
+        else:
+            T_global, Ts_local, T = res
+            T_globalb, Ts_localb, Tb = resb
+
+        Tb = nx.to_numpy(Tb)
+        # check constraints
+        np.testing.assert_allclose(T, Tb, atol=1e-06)
+        np.testing.assert_allclose(
+            p, Tb.sum(1), atol=1e-06)  # cf convergence gromov
+        np.testing.assert_allclose(
+            q, Tb.sum(0), atol=1e-06)  # cf convergence gromov
+
+        if log_:
+            for key in log.keys():
+                # The inner test T_global[i, j] != 0. can lead to different
+                # computation of 1D OT computations between partition depending
+                # on the different float errors across backend
+                if key in logb.keys():
+                    np.testing.assert_allclose(log[key], logb[key], atol=1e-06)
+
+    # tests for edge cases of the representant selection
+    with pytest.raises(ValueError):
+        ot.gromov.get_partition_and_representants_samples(
+            X1, npart1, method='unknown_method', random_state=0)
+
+
+@pytest.skip_backend("jax", reason="test very slow with jax backend")
+def test_quantized_fgw_samples(nx):
+    n_samples_1 = 20  # nb samples
+    n_samples_2 = 30  # nb samples
+
+    rng = np.random.RandomState(0)
+    X1 = rng.uniform(low=0., high=10, size=(n_samples_1, 2))
+    X2 = rng.uniform(low=0., high=10, size=(n_samples_2, 4))
+
+    F1 = rng.uniform(low=0., high=10, size=(n_samples_1, 3))
+    F2 = rng.uniform(low=0., high=10, size=(n_samples_2, 3))
+
+    p = ot.unif(n_samples_1)
+    q = ot.unif(n_samples_2)
+
+    npart1 = 2
+    npart2 = 3
+
+    X1b, X2b, F1b, F2b, pb, qb = nx.from_numpy(X1, X2, F1, F2, p, q)
+
+    methods = []
+    if sklearn_import:
+        methods += ['kmeans', 'kmeans_fused']
+    methods += ['random']
+
+    alpha = 0.5
+
+    for npart1 in [1, n_samples_1 + 1, 2]:
+        log_tests = [True, False, True]
+        count_mode = 0
+
+        for method in methods:
+            log_ = log_tests[count_mode]
+            count_mode += 1
+
+            res = ot.gromov.quantized_fused_gromov_wasserstein_samples(
+                X1, X2, npart1, npart2, p, None, F1, F2, alpha, method, log_)
+
+            resb = ot.gromov.quantized_fused_gromov_wasserstein_samples(
+                X1b, X2b, npart1, npart2, None, qb, F1b, F2b, alpha, method, log_)
+
+            if log_:
+                T_global, Ts_local, T, log = res
+                T_globalb, Ts_localb, Tb, logb = resb
+            else:
+                T_global, Ts_local, T = res
+                T_globalb, Ts_localb, Tb = resb
+
+            Tb = nx.to_numpy(Tb)
+            # check constraints
+            np.testing.assert_allclose(T, Tb, atol=1e-06)
+            np.testing.assert_allclose(
+                p, Tb.sum(1), atol=1e-06)  # cf convergence gromov
+            np.testing.assert_allclose(
+                q, Tb.sum(0), atol=1e-06)  # cf convergence gromov
+
+            if log_:
+                for key in log.keys():
+                    # The inner test T_global[i, j] != 0. can lead to different
+                    # computation of 1D OT computations between partition depending
+                    # on the different float errors across backend
+                    if key in logb.keys():
+                        np.testing.assert_allclose(log[key], logb[key], atol=1e-06)
+
+    # complementary tests for utils functions
+    part1b, rep_indices1 = ot.gromov.get_partition_and_representants_samples(
+        X1b, npart1, method=method, random_state=0)
+    part2b, rep_indices2 = ot.gromov.get_partition_and_representants_samples(
+        X2b, npart2, method=method, random_state=0)
+
+    CR1b, list_R1b, list_p1b, FR1b = ot.gromov.format_partitioned_samples(
+        X1b, pb, part1b, rep_indices1, F1b, alpha)
+    CR2b, list_R2b, list_p2b, FR2b = ot.gromov.format_partitioned_samples(
+        X2b, qb, part2b, rep_indices2, F2b, alpha)
+
+    MRb = ot.dist(FR1b, FR2b)
+
+    T_globalb, Ts_localb, _ = ot.gromov.quantized_fused_gromov_wasserstein_partitioned(
+        CR1b, CR2b, list_R1b, list_R2b, list_p1b, list_p2b, MRb, alpha, build_OT=False)
+
+    T_globalb = nx.to_numpy(T_globalb)
+    np.testing.assert_allclose(T_global, T_globalb, atol=1e-06)
+
+    for key in Ts_localb.keys():
+        T_localb = nx.to_numpy(Ts_localb[key])
+        np.testing.assert_allclose(Ts_local[key], T_localb, atol=1e-06)
+
+    # tests for edge cases of the format_partitioned_graph function
+    with pytest.raises(ValueError):
+        CR1b, list_R1b, list_p1b, FR1b = ot.gromov.format_partitioned_samples(
+            X1b, pb, part1b, rep_indices1, None, alpha)
+
+    # for non-consistent feature information provided
+    with pytest.raises(ValueError):
+        ot.gromov.quantized_fused_gromov_wasserstein_samples(
+            X1, X2, npart1, npart2, p, None, None, F2, alpha, 'fused_spectral', log_)