Skip to content

Commit

Permalink
add pyproject.toml; black formatting
Browse files Browse the repository at this point in the history
  • Loading branch information
afrendeiro committed Jun 1, 2021
1 parent e0cb97d commit 4dafc4d
Show file tree
Hide file tree
Showing 5 changed files with 352 additions and 424 deletions.
152 changes: 41 additions & 111 deletions imc/data_models/project.py
Original file line number Diff line number Diff line change
Expand Up @@ -186,7 +186,7 @@ def _detect_samples(self) -> DataFrame:
return pd.DataFrame()

content = (
[x for x in self.processed_dir.iterdir() if x.is_dir()]
[x for x in list(self.processed_dir.iterdir()) if x.is_dir()]
if self.subfolder_per_sample
else self.processed_dir.glob("*_full.tiff")
)
Expand All @@ -208,11 +208,7 @@ def _initialize_project_from_annotation(
def cols_with_unique_values(dfs: DataFrame) -> set:
return {col for col in dfs if len(dfs[col].unique()) == 1}

metadata = (
self.metadata
if self.metadata is not None
else self._detect_samples()
)
metadata = self.metadata if self.metadata is not None else self._detect_samples()

if metadata.empty:
return
Expand All @@ -236,9 +232,7 @@ def cols_with_unique_values(dfs: DataFrame) -> set:

sample = IMCSample(
sample_name=row[self.sample_name_attribute],
root_dir=(
self.processed_dir / str(row[self.sample_name_attribute])
)
root_dir=(self.processed_dir / str(row[self.sample_name_attribute]))
if self.subfolder_per_sample
else self.processed_dir,
subfolder_per_sample=self.subfolder_per_sample,
Expand All @@ -264,9 +258,7 @@ def rois(self) -> List["ROI"]:
@rois.setter
def rois(self, rois: List[Union[str, "ROI"]]):
for sample in self.samples:
sample.rois = [
r for r in sample.rois if r.name in rois or r in rois
]
sample.rois = [r for r in sample.rois if r.name in rois or r in rois]

@property
def n_samples(self) -> int:
Expand All @@ -278,21 +270,15 @@ def n_rois(self) -> int:

@property
def channel_labels(self) -> Union[Series, DataFrame]:
return pd.concat(
[sample.channel_labels for sample in self.samples], axis=1
)
return pd.concat([sample.channel_labels for sample in self.samples], axis=1)

@property
def channel_names(self) -> Union[Series, DataFrame]:
return pd.concat(
[sample.channel_names for sample in self.samples], axis=1
)
return pd.concat([sample.channel_names for sample in self.samples], axis=1)

@property
def channel_metals(self) -> Union[Series, DataFrame]:
return pd.concat(
[sample.channel_metals for sample in self.samples], axis=1
)
return pd.concat([sample.channel_metals for sample in self.samples], axis=1)

def _get_rois(
self, samples: Optional[List["IMCSample"]], rois: Optional[List["ROI"]]
Expand Down Expand Up @@ -361,9 +347,7 @@ def plot_channels(
if save:
output_dir.mkdir(exist_ok=True)
channels_str = ",".join(channels)
fig_file = output_dir / ".".join(
[self.name, f"all_rois.{channels_str}.pdf"]
)
fig_file = output_dir / ".".join([self.name, f"all_rois.{channels_str}.pdf"])
if per_sample:
for sample in samples or self.samples:
fig = sample.plot_channels(channels, **kwargs)
Expand All @@ -377,17 +361,11 @@ def plot_channels(

i = 0
j = 1 if merged else len(channels)
n, m = (
get_grid_dims(len(rois))
if merged
else get_grid_dims(len(rois) * j)
)
n, m = get_grid_dims(len(rois)) if merged else get_grid_dims(len(rois) * j)
fig, axes = plt.subplots(n, m, figsize=(4 * m, 4 * n))
axes = axes.flatten()
for roi in rois:
roi.plot_channels(
channels, axes=axes[i : i + j], merged=merged, **kwargs
)
roi.plot_channels(channels, axes=axes[i : i + j], merged=merged, **kwargs)
i += j
for _ax in axes[i:]:
_ax.axis("off")
Expand Down Expand Up @@ -427,16 +405,13 @@ def plot_probabilities_and_segmentation(
for i, roi in enumerate(rois):
roi.plot_probabilities_and_segmentation(axes=axes[i])
plot_file = output_dir / (
self.name
+ ".all_rois.plot_probabilities_and_segmentation.all_rois.svg"
self.name + ".all_rois.plot_probabilities_and_segmentation.all_rois.svg"
)
fig.savefig(plot_file, **FIG_KWS)

def plot_cell_types(
self,
cell_type_combinations: Optional[
Union[str, List[Tuple[str, str]]]
] = None,
cell_type_combinations: Optional[Union[str, List[Tuple[str, str]]]] = None,
cell_type_assignments: Optional[DataFrame] = None,
palette: Optional[str] = "tab20",
samples: Optional[List["IMCSample"]] = None,
Expand All @@ -451,9 +426,7 @@ def plot_cell_types(
fig, axes = plt.subplots(n, m, figsize=(3 * m, 3 * n), squeeze=False)
patches: List[Patch] = list()
for i, sample in enumerate(samples):
for j, roi in enumerate(
[roi for roi in rois if roi in sample.rois]
):
for j, roi in enumerate([roi for roi in rois if roi in sample.rois]):
patches += roi.plot_cell_types(
cell_type_combinations=cell_type_combinations,
cell_type_assignments=cell_type_assignments,
Expand Down Expand Up @@ -523,9 +496,7 @@ def channel_summary(
({}, "", ""),
(def_kwargs, ".z_score", " (Z-score)"),
]:
plot_file = (
output_prefix + f".mean_per_channel.clustermap{label}.svg"
)
plot_file = output_prefix + f".mean_per_channel.clustermap{label}.svg"
grid = sns.clustermap(
res,
cbar_kws=dict(label=red_func.capitalize() + cbar_label),
Expand Down Expand Up @@ -558,9 +529,7 @@ def image_summary(
name="cell density",
)
lacunarities = pd.Series(
parmap.map(
lacunarity, [roi.cell_mask_o for roi in rois], pm_pbar=True
),
parmap.map(lacunarity, [roi.cell_mask_o for roi in rois], pm_pbar=True),
index=roi_names,
name="lacunarity",
)
Expand All @@ -574,9 +543,7 @@ def image_summary(
name="fractal_dimension",
)

morphos = pd.DataFrame(
[densities * 1e4, lacunarities, fractal_dimensions]
).T
morphos = pd.DataFrame([densities * 1e4, lacunarities, fractal_dimensions]).T

def channel_correlation(
self,
Expand All @@ -595,10 +562,7 @@ def channel_correlation(
# handling differnet pannels based on channel name
# that then makes that concatenating dfs with duplicated names in indeces
res = pd.concat(
[
x.groupby(level=0).mean().T.groupby(level=0).mean().T
for x in _res
]
[x.groupby(level=0).mean().T.groupby(level=0).mean().T for x in _res]
)
xcorr = res.groupby(level=0).mean().fillna(0)
labels = xcorr.index
Expand All @@ -617,12 +581,9 @@ def channel_correlation(
yticklabels=True,
cbar_kws=dict(label="Pearson correlation"),
)
grid.ax_col_dendrogram.set_title(
"Pairwise channel correlation\n(pixel level)"
)
grid.ax_col_dendrogram.set_title("Pairwise channel correlation\n(pixel level)")
grid.savefig(
self.results_dir / "qc" / self.name
+ ".channel_pairwise_correlation.svg",
self.results_dir / "qc" / self.name + ".channel_pairwise_correlation.svg",
**FIG_KWS,
)
grid.fig.grid = grid
Expand Down Expand Up @@ -696,9 +657,7 @@ def quantify_cell_intensity(
"""
from imc.operations import quantify_cell_intensity_rois

return quantify_cell_intensity_rois(
self._get_rois(samples, rois), **kwargs
)
return quantify_cell_intensity_rois(self._get_rois(samples, rois), **kwargs)

def quantify_cell_morphology(
self,
Expand All @@ -711,9 +670,7 @@ def quantify_cell_morphology(
"""
from imc.operations import quantify_cell_morphology_rois

return quantify_cell_morphology_rois(
self._get_rois(samples, rois), **kwargs
)
return quantify_cell_morphology_rois(self._get_rois(samples, rois), **kwargs)

def cluster_cells(
self,
Expand All @@ -733,10 +690,7 @@ def cluster_cells(

if "quantification" not in kwargs and self.quantification is not None:
kwargs["quantification"] = self.quantification
if (
"cell_type_channels" not in kwargs
and self.panel_metadata is not None
):
if "cell_type_channels" not in kwargs and self.panel_metadata is not None:
if "cell_type" in self.panel_metadata.columns:
kwargs["cell_type_channels"] = self.panel_metadata.query(
"cell_type == 1"
Expand Down Expand Up @@ -794,9 +748,7 @@ def set_clusters(
)[
"cluster"
] # .astype(str)
assert isinstance(
clusters.index, pd.MultiIndex
), "Series index must be "
assert isinstance(clusters.index, pd.MultiIndex), "Series index must be "
assert clusters.index.names == id_cols
self._clusters = clusters
for sample in samples or self.samples:
Expand All @@ -817,9 +769,7 @@ def label_clusters(
prefix = self.results_dir / "single_cell" / self.name
h5ad_file = Path(h5ad_file or prefix + ".single_cell.processed.h5ad")
output_prefix = Path(output_prefix or prefix + ".cell_type_reference")
new_labels = derive_reference_cell_type_labels(
h5ad_file, output_prefix, **kwargs
)
new_labels = derive_reference_cell_type_labels(h5ad_file, output_prefix, **kwargs)
self._rename_clusters(new_labels.to_dict())

def _rename_clusters(self, new_labels: dict, save: bool = True):
Expand Down Expand Up @@ -863,9 +813,7 @@ def sample_comparisons(
.rename_axis("sample")
.reset_index()
)
sample_groups = sample_df.groupby(sample_attributes)["sample"].apply(
set
)
sample_groups = sample_df.groupby(sample_attributes)["sample"].apply(set)
sample_roi_df = pd.DataFrame(
[(roi.name, roi.sample.name) for roi in rois],
columns=["roi", "sample"],
Expand Down Expand Up @@ -903,9 +851,7 @@ def sample_comparisons(
# # absolute
# # fraction of total
cluster_df = (
cluster_counts.reset_index()
.merge(sample_df)
.sort_values(sample_attributes)
cluster_counts.reset_index().merge(sample_df).sort_values(sample_attributes)
)
cluster_df["cell_perc"] = cluster_df.groupby("roi")["cell_count"].apply(
lambda x: (x / x.sum()) * 100
Expand All @@ -932,9 +878,7 @@ def sample_comparisons(
mu = mannwhitneyu(a, b)
except ValueError:
mu = (np.nan, np.nan)
_res.append(
[attribute, channel, group1, group2, *means, *mu]
)
_res.append([attribute, channel, group1, group2, *means, *mu])
cols = [
"attribute",
"channel",
Expand All @@ -947,9 +891,9 @@ def sample_comparisons(
"p_value",
]
channel_stats = pd.DataFrame(_res, columns=cols)
channel_stats["p_adj"] = multipletests(
channel_stats["p_value"], method="fdr_bh"
)[1]
channel_stats["p_adj"] = multipletests(channel_stats["p_value"], method="fdr_bh")[
1
]

# # # remove duplication due to lazy itertools.permutations
channel_stats["abs_log2_fold"] = channel_stats["log2_fold"].abs()
Expand Down Expand Up @@ -993,9 +937,7 @@ def sample_comparisons(
mu = mannwhitneyu(a, b)
except ValueError:
mu = (np.nan, np.nan)
_res.append(
[attribute, cluster, group1, group2, *means, *mu]
)
_res.append([attribute, cluster, group1, group2, *means, *mu])
cols = [
"attribute",
"cluster",
Expand All @@ -1008,9 +950,9 @@ def sample_comparisons(
"p_value",
]
cluster_stats = pd.DataFrame(_res, columns=cols)
cluster_stats["p_adj"] = multipletests(
cluster_stats["p_value"], method="fdr_bh"
)[1]
cluster_stats["p_adj"] = multipletests(cluster_stats["p_value"], method="fdr_bh")[
1
]

# # # remove duplication due to lazy itertools.permutations
cluster_stats["abs_log2_fold"] = cluster_stats["log2_fold"].abs()
Expand All @@ -1030,8 +972,7 @@ def sample_comparisons(
cluster_stats.loc[i] = row
# # # save
cluster_stats.to_csv(
output_prefix
+ f"cell_type_abundance.testing_between_attributes.csv",
output_prefix + f"cell_type_abundance.testing_between_attributes.csv",
index=False,
)

Expand All @@ -1054,9 +995,7 @@ def sample_comparisons(
for axs in axes[i, (0, 1)]:
sns.barplot(**kwargs, hue=attribute, ax=axs)
axes[i, 1].set_xscale("log")
for axs, lab in zip(
axes[i, :], ["Channel mean", "Channel mean (log)"]
):
for axs, lab in zip(axes[i, :], ["Channel mean", "Channel mean (log)"]):
axs.set_xlabel(lab)
fig.savefig(
output_prefix + f"channel_mean.by_{attribute}.barplot.svg",
Expand All @@ -1066,9 +1005,7 @@ def sample_comparisons(
# # # clusters
# # # # plot once for all cell types, another time excluding rare cell types
n = len(sample_attributes)
kwargs = dict(
y="cluster", orient="horiz", ci="sd"
) # , estimator=np.std)
kwargs = dict(y="cluster", orient="horiz", ci="sd") # , estimator=np.std)
for label, pl_df in [
("all_clusters", cluster_df),
("filtered_clusters", filtered_cluster_df),
Expand Down Expand Up @@ -1099,8 +1036,7 @@ def sample_comparisons(
):
axs.set_xlabel(lab)
fig.savefig(
output_prefix
+ f"cell_type_abundance.by_{attribute}.barplot.svg",
output_prefix + f"cell_type_abundance.by_{attribute}.barplot.svg",
**FIG_KWS,
)

Expand Down Expand Up @@ -1229,9 +1165,7 @@ def sample_comparisons(
xticklabels=True,
yticklabels=True,
)
grid.savefig(
output_prefix + "cell_type_abundance.by_area.svg", **FIG_KWS
)
grid.savefig(output_prefix + "cell_type_abundance.by_area.svg", **FIG_KWS)

grid = sns.clustermap(
cluster_densities,
Expand All @@ -1244,9 +1178,7 @@ def sample_comparisons(
xticklabels=True,
yticklabels=True,
)
grid.savefig(
output_prefix + "cell_type_abundance.by_area.zscore.svg", **FIG_KWS
)
grid.savefig(output_prefix + "cell_type_abundance.by_area.zscore.svg", **FIG_KWS)

def measure_adjacency(
self,
Expand Down Expand Up @@ -1300,9 +1232,7 @@ def measure_adjacency(

v = np.percentile(melted["value"].abs(), 95)
n, m = get_grid_dims(len(freqs))
fig, axes = plt.subplots(
n, m, figsize=(m * 5, n * 5), sharex=True, sharey=True
)
fig, axes = plt.subplots(n, m, figsize=(m * 5, n * 5), sharex=True, sharey=True)
axes = axes.flatten()
i = -1
for i, (dfs, roi) in enumerate(zip(freqs, rois)):
Expand Down
Loading

0 comments on commit 4dafc4d

Please sign in to comment.