Skip to content

Commit

Permalink
Merge pull request #33 from msk-access/hotfix-plotting-bug
Browse files Browse the repository at this point in the history
plotting tests, bug in genotype heatmap,
  • Loading branch information
murphycj2 authored Mar 25, 2021
2 parents 09abf3f + e574aa6 commit eab03fc
Show file tree
Hide file tree
Showing 4 changed files with 49 additions and 20 deletions.
2 changes: 1 addition & 1 deletion Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@ FROM python:3.6-slim
ARG BUILD_DATE
ARG BUILD_VERSION
ARG LICENSE="Apache-2.0"
ARG BIOMETRICS_VERSION=0.2.2
ARG BIOMETRICS_VERSION=0.2.4
ARG VCS_REF
################## METADATA ########################
LABEL org.opencontainers.image.vendor="MSKCC"
Expand Down
2 changes: 1 addition & 1 deletion biometrics/VERSION
Original file line number Diff line number Diff line change
@@ -1 +1 @@
0.2.3
0.2.4
34 changes: 18 additions & 16 deletions biometrics/genotype.py
Original file line number Diff line number Diff line change
Expand Up @@ -53,14 +53,15 @@ def _plot_heatmap(self, data, outdir, name, title="Discordance calculations betw
customdata=data.to_numpy(),
hovertemplate='<b>Reference sample:</b> %{customdata[0]}' +
'<br><b>Query sample:</b> %{customdata[1]}' +
'<br><b>Homozygous count in reference:</b> %{customdata[3]}' +
'<br><b>Total match count:</b> %{customdata[4]}' +
'<br><b>Homozygous match count:</b> %{customdata[5]}' +
'<br><b>Heterozygous match count:</b> %{customdata[6]}' +
'<br><b>Homozygous mismatch count:</b> %{customdata[7]}' +
'<br><b>Heterozygous mismatch count:</b> %{customdata[8]}' +
'<br><b>Discordance rate:</b> %{customdata[9]}' +
'<br><b>Status:</b> %{customdata[12]}' +
'<br><b>Count of common sites:</b> %{customdata[4]}' +
'<br><b>Homozygous count in reference:</b> %{customdata[5]}' +
'<br><b>Total match count:</b> %{customdata[6]}' +
'<br><b>Homozygous match count:</b> %{customdata[7]}' +
'<br><b>Heterozygous match count:</b> %{customdata[8]}' +
'<br><b>Homozygous mismatch count:</b> %{customdata[9]}' +
'<br><b>Heterozygous mismatch count:</b> %{customdata[10]}' +
'<br><b>Discordance rate:</b> %{customdata[11]}' +
'<br><b>Status:</b> %{customdata[14]}' +
'<extra></extra>',
zmin=self.zmin,
zmax=self.zmax,
Expand Down Expand Up @@ -96,18 +97,20 @@ def plot(self, data, outdir):

# make plot for comparing input samples with each other

data_sub = data[~data['DatabaseComparison']].copy()
data_sub = data[~data['IsInputToDatabaseComparison']].copy()
del data_sub['IsInputToDatabaseComparison']
data_sub['DiscordanceRate'] = data_sub['DiscordanceRate'].map(
lambda x: round(x, 4))

if data_sub.shape[0] > 1:
self._plot_heatmap(
data_sub, outdir, name='genotype_comparison_input_only.html',
data_sub, outdir, name='genotype_comparison_input.html',
title="Discordance calculations between input samples")

# make plot for comparing input samples with database samples

data_sub = data[data['DatabaseComparison']].copy()
data_sub = data[data['IsInputToDatabaseComparison']].copy()
del data_sub['IsInputToDatabaseComparison']
data_sub['DiscordanceRate'] = data_sub['DiscordanceRate'].map(
lambda x: round(x, 4))

Expand Down Expand Up @@ -238,7 +241,7 @@ def compare_samples(self, samples):
samples_input, samples_input, samples)

for i in range(len(results)):
results[i]['DatabaseComparison'] = True
results[i]['IsInputToDatabaseComparison'] = False
comparisons += results

# for each input sample, compare with all the samples in the db
Expand All @@ -248,14 +251,15 @@ def compare_samples(self, samples):
samples_input, samples_db, samples)

for i in range(len(results)):
results[i]['DatabaseComparison'] = True
results[i]['IsInputToDatabaseComparison'] = True
comparisons += results

comparisons = pd.DataFrame(comparisons)

# compute discordance rate

comparisons['DiscordanceRate'] = comparisons['HomozygousMismatch'] / (comparisons['HomozygousInRef'] + EPSILON)

# data['DiscordanceRate'] = data['DiscordanceRate'].map(lambda x: round(x, 6))
comparisons.loc[comparisons['HomozygousInRef'] < 10, 'DiscordanceRate'] = np.nan

Expand All @@ -277,9 +281,7 @@ def compare_samples(self, samples):
~comparisons['Matched'] & ~comparisons['ExpectedMatch'], 'Status'] = "Expected Mismatch"

self.comparisons = comparisons[[
'ReferenceSample', 'ReferenceSampleGroup', 'QuerySample', 'QuerySampleGroup',
'CountOfCommonSites', 'HomozygousInRef', 'TotalMatch', 'HomozygousMatch',
'HeterozygousMatch', 'HomozygousMismatch',
'ReferenceSample', 'ReferenceSampleGroup', 'QuerySample', 'QuerySampleGroup', 'IsInputToDatabaseComparison', 'CountOfCommonSites', 'HomozygousInRef', 'TotalMatch', 'HomozygousMatch', 'HeterozygousMatch', 'HomozygousMismatch',
'HeterozygousMismatch', 'DiscordanceRate', 'Matched',
'ExpectedMatch', 'Status']]

Expand Down
31 changes: 29 additions & 2 deletions tests/test_biometrics.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,9 @@
from biometrics.extract import Extract
from biometrics.genotype import Genotyper
from biometrics.sex_mismatch import SexMismatch
from biometrics.minor_contamination import MinorContamination
from biometrics.major_contamination import MajorContamination


CUR_DIR = os.path.dirname(os.path.abspath(__file__))

Expand Down Expand Up @@ -224,7 +227,7 @@ def setUp(self, mock_args):

self.args = get_args()

def test_sample_minor_contamination(self):
def test_minor_contamination(self):
samples = get_samples(self.args, extraction_mode=False)
samples = run_minor_contamination(self.args, samples)

Expand All @@ -236,14 +239,26 @@ def test_sample_minor_contamination(self):
samples['test_sample1'].metrics['minor_contamination']['n_contributing_sites'], 1,
msg='Count of contributing sites for minor contamination is wrong.')

def test_sample_major_contamination(self):
def test_plot_minor_contamination(self):
samples = get_samples(self.args, extraction_mode=False)
minor_contamination = MinorContamination(threshold=self.args.minor_threshold)
samples = minor_contamination.estimate(samples)
minor_contamination.plot(samples, self.args.outdir)

def test_major_contamination(self):
samples = get_samples(self.args, extraction_mode=False)
samples = run_major_contamination(self.args, samples)

self.assertAlmostEqual(
samples['test_sample1'].metrics['major_contamination']['val'], 0.2,
places=1, msg='Major contamination is wrong.')

def test_plot_major_contamination(self):
samples = get_samples(self.args, extraction_mode=False)
major_contamination = MajorContamination(threshold=self.args.minor_threshold)
samples = major_contamination.estimate(samples)
major_contamination.plot(samples, self.args.outdir)

def test_genotyper(self):
samples = get_samples(self.args, extraction_mode=False)

Expand All @@ -258,6 +273,18 @@ def test_genotyper(self):
self.assertEqual(len(data), 4, msg='There were not four comparisons done.')
self.assertEqual(set(data['Status']), set(['Expected Match']), msg='All sample comparisons were expected to match.')

def test_genotyper_plot(self):
samples = get_samples(self.args, extraction_mode=False)

genotyper = Genotyper(
no_db_compare=self.args.no_db_compare,
discordance_threshold=self.args.discordance_threshold,
threads=self.args.threads,
zmin=self.args.zmin,
zmax=self.args.zmax)
data = genotyper.compare_samples(samples)
genotyper.plot(data, self.args.outdir)

def test_sexmismatch(self):
samples = get_samples(self.args, extraction_mode=False)

Expand Down

0 comments on commit eab03fc

Please sign in to comment.