Skip to content

Commit

Permalink
Concordance, SplitMulti, diploid/unphased warnings (hail-is#3502)
Browse files Browse the repository at this point in the history
* Concordance, SplitMulti, diploid/unphased warnings

* add missing file
  • Loading branch information
jigold authored and jackgoldsmith4 committed Jun 25, 2018
1 parent 1db664c commit a3b4b26
Show file tree
Hide file tree
Showing 5 changed files with 42 additions and 7 deletions.
5 changes: 5 additions & 0 deletions python/hail/docs/_templates/req_unphased_diploid_gt.rst
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
.. note::

Requires the dataset to contain only diploid and unphased genotype calls.
Use :func:`.call` to recode genotype calls or :func:`.null` to set genotype
calls to missing.
3 changes: 1 addition & 2 deletions python/hail/methods/impex.py
Original file line number Diff line number Diff line change
Expand Up @@ -121,10 +121,9 @@ def export_plink(dataset, output, call=None, fam_id=None, ind_id=None, pat_id=No
BED, BIM and FAM files.
.. include:: ../_templates/req_tvariant_w_struct_locus.rst
.. include:: ../_templates/req_tstring.rst
.. include:: ../_templates/req_biallelic.rst
.. include:: ../_templates/req_unphased_diploid_gt.rst
Examples
--------
Expand Down
6 changes: 5 additions & 1 deletion python/hail/methods/qc.py
Original file line number Diff line number Diff line change
Expand Up @@ -97,6 +97,7 @@ def variant_qc(dataset, name='variant_qc') -> MatrixTable:
.. include:: ../_templates/req_biallelic.rst
.. include:: ../_templates/req_tvariant.rst
.. include:: ../_templates/req_unphased_diploid_gt.rst
Examples
--------
Expand Down Expand Up @@ -174,6 +175,8 @@ def concordance(left, right) -> Tuple[List[List[int]], Table, Table]:
.. include:: ../_templates/req_biallelic.rst
.. include:: ../_templates/req_unphased_diploid_gt.rst
.. testsetup::
dataset2 = dataset
Expand All @@ -193,7 +196,8 @@ def concordance(left, right) -> Tuple[List[List[int]], Table, Table]:
This method computes the genotype call concordance (from the entry
field **GT**) between two biallelic variant datasets. It requires
unique sample IDs and performs an inner join on samples (only
samples in both datasets will be considered).
samples in both datasets will be considered). In addition, all genotype
calls must be **diploid** and **unphased**.
It performs an ordered zip join of the variants. That means the
variants of each dataset are sorted, with duplicate variants
Expand Down
19 changes: 19 additions & 0 deletions python/hail/methods/statgen.py
Original file line number Diff line number Diff line change
Expand Up @@ -1541,6 +1541,7 @@ def pc_relate(call_expr, min_individual_maf, *, k=None, scores_expr=None,
PC-Relate method.
.. include:: ../_templates/experimental.rst
.. include:: ../_templates/req_unphased_diploid_gt.rst
Examples
--------
Expand Down Expand Up @@ -1873,6 +1874,11 @@ class SplitMulti(object):
... PL=pl,
... GQ=hl.gq_from_pl(pl))
>>> split_ds = sm.result()
Warning
-------
Any entry and row fields that are not updated will be copied (unchanged)
for each split variant.
"""

@typecheck_method(ds=MatrixTable,
Expand Down Expand Up @@ -1979,6 +1985,17 @@ def result(self):
if not self._entry_fields:
self._entry_fields = {}

unmod_row_fields = set(self._ds.row) - set(self._row_fields) - {'locus', 'alleles', 'a_index', 'was_split'}
unmod_entry_fields = set(self._ds.entry) - set(self._entry_fields)

for name, fds in [('row', unmod_row_fields), ('entry', unmod_entry_fields)]:
if fds:
field = hl.utils.misc.plural('field', len(fds))
word = hl.utils.misc.plural('was', len(fds), 'were')
fds = ', '.join(["'" + f + "'" for f in fds])
warn(f"SplitMulti: The following {name} {field} {word} not updated: {fds}. " \
"Data will be copied (unchanged) for each split variant.")

base, _ = self._ds._process_joins(*itertools.chain(
self._row_fields.values(), self._entry_fields.values()))

Expand Down Expand Up @@ -2938,6 +2955,8 @@ def _local_ld_prune(ds, r2=0.2, window=1000000, memory_per_core=256):
def ld_prune(ds, r2=0.2, window=1000000, memory_per_core=256):
"""Prune variants in linkage disequilibrium.
.. include:: ../_templates/req_unphased_diploid_gt.rst
Notes
-----
Expand Down
16 changes: 12 additions & 4 deletions src/main/scala/is/hail/methods/CalculateConcordance.scala
Original file line number Diff line number Diff line change
Expand Up @@ -131,15 +131,23 @@ object CalculateConcordance {
rview.setGenotype(leftToRightBc.value(li))
comb(li).merge(
if (lrv != null) {
if (lview.hasGT)
Call.unphasedDiploidGtIndex(lview.getGT) + 2
if (lview.hasGT) {
val gt = Call.unphasedDiploidGtIndex(lview.getGT)
if (gt > 2)
fatal(s"'concordance' requires biallelic genotype calls. Found ${ Call.toString(lview.getGT) }.")
gt + 2
}
else
1
} else
0,
if (rrv != null) {
if (rview.hasGT)
Call.unphasedDiploidGtIndex(rview.getGT) + 2
if (rview.hasGT) {
val gt = Call.unphasedDiploidGtIndex(rview.getGT)
if (gt > 2)
fatal(s"'concordance' requires biallelic genotype calls. Found ${ Call.toString(rview.getGT) }.")
gt + 2
}
else
1
} else
Expand Down

0 comments on commit a3b4b26

Please sign in to comment.