From 0c5d89553b2fbbf275fdf6d381dd6ff9d9232165 Mon Sep 17 00:00:00 2001 From: Yan Wong Date: Tue, 23 Jul 2024 09:58:07 +0100 Subject: [PATCH] Explictly convert genotype store to int8, to preserve -1 (missing) values --- tsinfer/algorithm.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/tsinfer/algorithm.py b/tsinfer/algorithm.py index a7ea9873..7b08c133 100644 --- a/tsinfer/algorithm.py +++ b/tsinfer/algorithm.py @@ -111,7 +111,8 @@ def get_site_genotypes_subset(self, site_id, samples): g[j] = int((byte & mask) != 0) else: for j, u in enumerate(samples): - g[j] = self.genotype_store[start + u] + # NB missing data (-1) is stored as 255 in the genotype_store + g[j] = self.genotype_store[start + u].astype(np.int8) gp = self.get_site_genotypes(site_id) np.testing.assert_array_equal(gp[samples], g) return g @@ -129,6 +130,8 @@ def store_site_genotypes(self, site_id, genotypes): if self.genotype_encoding == constants.GenotypeEncoding.ONE_BIT: assert np.all(genotypes >= 0) and np.all(genotypes <= 1) genotypes = np.packbits(genotypes, bitorder="little") + else: + assert np.all(genotypes <= 127) start = site_id * self.encoded_genotypes_size stop = start + self.encoded_genotypes_size self.genotype_store[start:stop] = genotypes