diff --git a/pygenstability/constructors.py b/pygenstability/constructors.py index 5c6ee34..29dcd2d 100644 --- a/pygenstability/constructors.py +++ b/pygenstability/constructors.py @@ -1,7 +1,6 @@ """Quality matrix and null model constructor functions.""" import logging import sys -from functools import lru_cache, partial import numpy as np import scipy.sparse as sp @@ -12,26 +11,16 @@ DTYPE = "float128" -def load_constructor(graph, constructor, with_spectral_gap=True, use_cache=_USE_CACHE): - """Load constructor.""" +def load_constructor(constructor): + """Load a constructor from its name, or as a custom Constructor class.""" if isinstance(constructor, str): try: - constructor = getattr(sys.modules[__name__], "constructor_%s" % constructor) + return getattr(sys.modules[__name__], "constructor_%s" % constructor) except AttributeError as exc: raise Exception("Could not load constructor %s" % constructor) from exc - - if not use_cache: - if hasattr(constructor, "with_spectral_gap"): - return partial(constructor, graph, with_spectral_gap=with_spectral_gap) - return partial(constructor, graph) - - @lru_cache() - def cached_constructor(time): - if hasattr(constructor, "with_spectral_gap"): - return partial(constructor, graph, with_spectral_gap=with_spectral_gap) - return constructor(graph, time) - - return cached_constructor + if not isinstance(constructor, Constructor): + raise Exception("Only Constructor class object can be used.") + return constructor def threshold_matrix(matrix, threshold=THRESHOLD): @@ -63,100 +52,156 @@ def get_spectral_gap(laplacian): return spectral_gap -def constructor_linearized(graph, time): - """Constructor for continuous linearized Markov Stability.""" - degrees = np.array(graph.sum(1)).flatten() - _check_total_degree(degrees) +class Constructor: + """Parent constructor class.""" - pi = degrees / degrees.sum() - null_model = np.array([pi, pi]) + def __init__(self, graph, with_spectral_gap=False, kwargs={}): + """Initialise constructor.""" + self.graph = graph + self.with_spectral_gap = with_spectral_gap + self.spectral_gap = None - quality_matrix = time * (graph / degrees.sum()).astype(DTYPE) + # these two variable can be used in prepare method + self.partial_quality_matrix = None + self.partial_null_model = None - return quality_matrix, null_model, 1 - time + self.prepare(**kwargs) + def prepare(self, **kwargs): + """Prepare the constructor with non-time dependent computations.""" + + def get_data(self, time): + """Return quality and null model at given time.""" -def constructor_continuous_combinatorial(graph, time, with_spectral_gap=True): - """Constructor for continuous combinatorial Markov Stability.""" - laplacian, degrees = sp.csgraph.laplacian(graph, return_diag=True, normed=False) - _check_total_degree(degrees) - laplacian /= degrees.mean() - pi = np.ones(graph.shape[0]) / graph.shape[0] - null_model = np.array([pi, pi], dtype=DTYPE) - if with_spectral_gap: - time /= get_spectral_gap(laplacian) +class constructor_linearized(Constructor): + """Constructor for continuous linearized Markov Stability.""" - exp = apply_expm(-time * laplacian) - quality_matrix = sp.diags(pi).dot(exp) + def prepare(self, **kwargs): + """Prepare the constructor with non-time dependent computations.""" + degrees = np.array(self.graph.sum(1)).flatten() + _check_total_degree(degrees) - return quality_matrix, null_model + pi = degrees / degrees.sum() + self.partial_null_model = np.array([pi, pi]) + if self.with_spectral_gap: + laplacian = sp.csgraph.laplacian(self.graph, normed=False) + self.spectral_gap = get_spectral_gap(laplacian) + self.partial_quality_matrix = (self.graph / degrees.sum()).astype(DTYPE) -def constructor_continuous_normalized(graph, time, with_spectral_gap=True): + def get_data(self, time): + """Return quality and null model at given time.""" + if self.with_spectral_gap: + time /= self.spectral_gap + return time * self.partial_quality_matrix, self.partial_null_model, 1 - time + + +class constructor_continuous_combinatorial(Constructor): + """Constructor for continuous combinatorial Markov Stability.""" + + def prepare(self, **kwargs): + """Prepare the constructor with non-time dependent computations.""" + laplacian, degrees = sp.csgraph.laplacian(self.graph, return_diag=True, normed=False) + _check_total_degree(degrees) + laplacian /= degrees.mean() + pi = np.ones(self.graph.shape[0]) / self.graph.shape[0] + self.partial_null_model = np.array([pi, pi], dtype=DTYPE) + if self.with_spectral_gap: + self.spectral_gap = get_spectral_gap(laplacian) + self.partial_quality_matrix = laplacian + + def get_data(self, time): + """Return quality and null model at given time.""" + if self.with_spectral_gap: + time /= self.spectral_gap + exp = apply_expm(-time * self.partial_quality_matrix) + quality_matrix = sp.diags(self.partial_null_model[0]).dot(exp) + return quality_matrix, self.partial_null_model + + +class constructor_continuous_normalized(Constructor): """Constructor for continuous normalized Markov Stability.""" - laplacian, degrees = sp.csgraph.laplacian(graph, return_diag=True, normed=False) - _check_total_degree(degrees) - normed_laplacian = sp.diags(1.0 / degrees).dot(laplacian) - pi = degrees / degrees.sum() - null_model = np.array([pi, pi], dtype=DTYPE) + def prepare(self, **kwargs): + """Prepare the constructor with non-time dependent computations.""" + laplacian, degrees = sp.csgraph.laplacian(self.graph, return_diag=True, normed=False) + _check_total_degree(degrees) + normed_laplacian = sp.diags(1.0 / degrees).dot(laplacian) - if with_spectral_gap: - time /= get_spectral_gap(normed_laplacian) + pi = degrees / degrees.sum() + self.partial_null_model = np.array([pi, pi], dtype=DTYPE) - exp = apply_expm(-time * normed_laplacian) - quality_matrix = sp.diags(pi).dot(exp) + if self.with_spectral_gap: + self.spectral_gap = get_spectral_gap(normed_laplacian) + self.partial_quality_matrix = normed_laplacian - return quality_matrix, null_model + def get_data(self, time): + """Return quality and null model at given time.""" + if self.with_spectral_gap: + time /= self.spectral_gap + exp = apply_expm(-time * self.partial_quality_matrix) + quality_matrix = sp.diags(self.partial_null_model[0]).dot(exp) + return quality_matrix, self.partial_null_model -def constructor_signed_modularity(graph, time): +class constructor_signed_modularity(Constructor): """Constructor of signed modularity. Based on (Gomes, Jensen, Arenas, PRE 2009). The time only multiplies the quality matrix (this many not mean anything, use with care!). """ - if np.min(graph) >= 0: - return constructor_linearized(graph, time) - - adj_pos = graph.copy() - adj_pos[graph < 0] = 0.0 - adj_neg = -graph.copy() - adj_neg[graph > 0] = 0.0 - - deg_plus = adj_pos.sum(1).flatten() - deg_neg = adj_neg.sum(1).flatten() - - deg_norm = deg_plus.sum() + deg_neg.sum() - null_model = np.array( - [ - deg_plus / deg_norm, - deg_plus / deg_plus.sum(), - -deg_neg / deg_neg.sum(), - deg_neg / deg_norm, - ] - ) - quality_matrix = time * graph / deg_norm - return quality_matrix, null_model - - -def constructor_directed(graph, time, alpha=0.85): + + def prepare(self, **kwargs): + """Prepare the constructor with non-time dependent computations.""" + adj_pos = self.graph.copy() + adj_pos[self.graph < 0] = 0.0 + adj_neg = -self.graph.copy() + adj_neg[self.graph > 0] = 0.0 + + deg_plus = adj_pos.sum(1).flatten() + deg_neg = adj_neg.sum(1).flatten() + + deg_norm = deg_plus.sum() + deg_neg.sum() + self.partial_null_model = np.array( + [ + deg_plus / deg_norm, + deg_plus / deg_plus.sum(), + -deg_neg / deg_neg.sum(), + deg_neg / deg_norm, + ] + ) + self.partial_quality_matrix = self.graph / deg_norm + + def get_data(self, time): + """Return quality and null model at given time.""" + return time * self.partial_quality_matrix, self.partial_null_model + + +def constructor_directed(Constructor): """Constructor for directed Markov stability.""" - out_degrees = graph.toarray().sum(axis=1).flatten() - dinv = np.divide(1, out_degrees, where=out_degrees != 0) - N = graph.shape[0] - ones = np.ones((N, N)) / N - M = alpha * np.diag(dinv).dot(graph.toarray()) + ( - (1 - alpha) * np.diag(np.ones(N)) + np.diag(alpha * (dinv == 0.0)) - ).dot(ones) - Q = sp.csr_matrix(M - np.eye(N)) - - exp = apply_expm(time * Q) - pi = abs(sp.linalg.eigs(Q.transpose(), which="SM", k=1)[1][:, 0]) - pi /= pi.sum() - - quality_matrix = sp.diags(pi).dot(exp) - null_model = np.array([pi, pi]) - - return quality_matrix, null_model + + def prepare(self, **kwargs): + """Prepare the constructor with non-time dependent computations.""" + alpha = kwargs["alpha"] + n_nodes = self.graph.shape[0] + ones = np.ones((n_nodes, n_nodes)) / n_nodes + + out_degrees = self.graph.toarray().sum(axis=1).flatten() + dinv = np.divide(1, out_degrees, where=out_degrees != 0) + + self.partial_quality_matrix = sp.csr_matrix( + alpha * np.diag(dinv).dot(self.graph.toarray()) + + ((1 - alpha) * np.diag(np.ones(n_nodes)) + np.diag(alpha * (dinv == 0.0))).dot(ones) + - np.eye(n_nodes) + ) + + pi = abs(sp.linalg.eigs(self.partial_quality_matrix.transpose(), which="SM", k=1)[1][:, 0]) + pi /= pi.sum() + self.partial_null_model = np.array([pi, pi]) + + def get_data(self, time): + """Return quality and null model at given time.""" + exp = apply_expm(time * self.partial_quality_matrix) + quality_matrix = sp.diags(self.partial_null_model).dot(exp) + return quality_matrix, self.partial_null_model diff --git a/pygenstability/plotting.py b/pygenstability/plotting.py index d781687..f7c1b8b 100644 --- a/pygenstability/plotting.py +++ b/pygenstability/plotting.py @@ -36,7 +36,7 @@ def plot_scan_plotly( # pylint: disable=too-many-branches,too-many-statements,t all_results, ): """Plot results of pygenstability with plotly.""" - from plotly.subplots import make_subplots # pylint: disable=import-outside-toplevel + # from plotly.subplots import make_subplots # pylint: disable=import-outside-toplevel import plotly.graph_objects as go # pylint: disable=import-outside-toplevel if all_results["run_params"]["log_time"]: @@ -70,7 +70,6 @@ def plot_scan_plotly( # pylint: disable=too-many-branches,too-many-statements,t ) ] - fig = make_subplots(rows=3, cols=1, shared_xaxes=True) ncom = go.Scatter( x=times, y=all_results["number_of_communities"], @@ -132,18 +131,42 @@ def plot_scan_plotly( # pylint: disable=too-many-branches,too-many-statements,t opacity=vi_opacity, ) + opt_criterion = go.Scatter( + x=times, + y=all_results["optimal_scale_criterion"], + mode="lines+markers", + hovertemplate=hovertemplate, + text=text, + name="Optimal Scale Criterion", + yaxis="y5", + xaxis="x3", + marker_color="orange", + ) + + opt_scale = go.Scatter( + x=times[all_results["selected_partitions"]], + y=np.zeros(len(all_results["selected_partitions"])), + mode="markers", + hovertemplate=hovertemplate, + text=text, + name="Optimal Scale", + yaxis="y5", + xaxis="x3", + marker_color="black", + ) + layout = go.Layout( yaxis=dict( title="Stability", titlefont=dict(color="blue"), tickfont=dict(color="blue"), - domain=[0, 0.28], + domain=[0.26, 0.49], ), yaxis2=dict( title=tprime_title, titlefont=dict(color="black"), tickfont=dict(color="black"), - domain=[0.32, 1], + domain=[0.51, 1], side="right", range=[times[0], times[-1]], ), @@ -161,13 +184,21 @@ def plot_scan_plotly( # pylint: disable=too-many-branches,too-many-statements,t tickfont=dict(color="red"), overlaying="y2", ), + yaxis5=dict( + title="Optimal Scale Criterion", + titlefont=dict(color="orange"), + tickfont=dict(color="orange"), + domain=[0, 0.24], + ), xaxis=dict( range=[times[0], times[-1]], ), xaxis2=dict(range=[times[0], times[-1]]), + height=600, + width=800, ) - fig = go.Figure(data=[stab, ncom, vi, ttprime], layout=layout) + fig = go.Figure(data=[stab, ncom, vi, ttprime, opt_criterion, opt_scale], layout=layout) fig.show() diff --git a/pygenstability/pygenstability.py b/pygenstability/pygenstability.py index 0d3c8b1..a9eeea8 100644 --- a/pygenstability/pygenstability.py +++ b/pygenstability/pygenstability.py @@ -15,6 +15,7 @@ from pygenstability.io import save_results L = logging.getLogger(__name__) +_DTYPE = np.float64 def _get_chunksize(n_comp, pool): @@ -22,8 +23,9 @@ def _get_chunksize(n_comp, pool): return max(1, int(n_comp / pool._processes)) # pylint: disable=protected-access -def _graph_checks(graph): +def _graph_checks(graph, dtype=_DTYPE): """Do some checks and preprocessing of the graph.""" + graph = sp.csr_matrix(graph, dtype=_DTYPE) if sp.csgraph.connected_components(graph)[0] > 1: raise Exception( "Graph not connected, with {} components".format( @@ -82,10 +84,13 @@ def run( n_louvain_VI=20, with_postprocessing=True, with_ttprime=True, - with_spectral_gap=True, + with_spectral_gap=False, result_file="results.pkl", n_workers=4, tqdm_disable=False, + select_scales=True, + window=2, + beta=0.1, ): """Main funtion to compute clustering at various time scales. @@ -107,6 +112,9 @@ def run( result_file (str): path to the result file n_workers (int): number of workers for multiprocessing tqdm_disable (bool): disable progress bars + select_scales (bool): automatically select scales + window (int): size of window for moving average + beta (float): cut-off parameter for identifying plateau """ run_params = _get_params(locals()) graph = _graph_checks(graph) @@ -117,14 +125,14 @@ def run( log_time=log_time, times=times, ) - constructor = load_constructor(graph, constructor, with_spectral_gap=with_spectral_gap) + constructor = load_constructor(constructor)(graph, with_spectral_gap=with_spectral_gap) pool = multiprocessing.Pool(n_workers) L.info("Start loop over times...") all_results = defaultdict(list) all_results["run_params"] = run_params for time in tqdm(times, disable=tqdm_disable): - quality_matrix, null_model, global_shift = _get_constructor_data(constructor, time) + quality_matrix, null_model, global_shift = constructor.get_data(time) louvain_results = run_several_louvains( quality_matrix, null_model, global_shift, n_louvain, pool ) @@ -148,6 +156,10 @@ def run( L.info("Apply postprocessing...") apply_postprocessing(all_results, pool, constructor=constructor) + if select_scales: + L.info("Identifying optimal scales...") + all_results = identify_optimal_scales(all_results, window=window, beta=beta) + save_results(all_results, filename=result_file) pool.close() @@ -297,3 +309,52 @@ def apply_postprocessing(all_results, pool, constructor, tqdm_disable=False): all_results["number_of_communities"][i] = all_results_raw["number_of_communities"][ best_quality_id ] + + +def identify_optimal_scales(results, window=2, beta=0.1): + """Function to identify optimal scales in Markov Stability. + + Args: + results (dict): the results from a Markov Stability calculation + window (int): size of window for moving average + beta (float): cut-off parameter for identifying plateau + """ + # extract ttprime and flip to identify diagonals + ttprime_ = np.flipud(results["ttprime"]) + n_ = ttprime_.shape[0] + + # extract diagonals in lower triangular and identify plateaus + plateau_size = np.zeros(n_) + for i, shift in enumerate(range(-n_ + 1, n_, 2)): + diagonal = np.diag(ttprime_, k=shift) + plateau_size[i] = np.sum(diagonal < beta) + + # compute normalised ttprime + plateau_moving_average = np.convolve(plateau_size, np.ones(window), "valid") / window + plateau_moving_average_norm = 1 - (plateau_moving_average / plateau_moving_average.max()) + ttprime_metric = plateau_moving_average_norm / plateau_moving_average_norm.max() + ttprime_metric = np.append(ttprime_metric, 0) + + # compute normalised VI + nvi_moving_average = ( + np.convolve(results["variation_information"], np.ones(window), "valid") / window + ) + vi_metric = nvi_moving_average / nvi_moving_average.max() + vi_metric = np.append(vi_metric, 0) + + # define criterion + criterion = np.sqrt((ttprime_metric ** 2 + vi_metric ** 2) / 2) + criterion = criterion / criterion.max() + + # find gradient of criterion function + criterion_gradient = np.gradient(criterion) + + # find minima in criterion + index_minima = np.where(criterion_gradient[:-1] * criterion_gradient[1:] < 0)[0] + index_minima = index_minima[criterion_gradient[index_minima] < 0] + + # return with results dict + results["selected_partitions"] = index_minima + results["optimal_scale_criterion"] = criterion + + return results