Skip to content

Commit

Permalink
Merge pull request #98 from apriha/develop
Browse files Browse the repository at this point in the history
v2.0.1
  • Loading branch information
apriha committed Sep 25, 2020
2 parents 5acb035 + 826ce27 commit 2baeb62
Show file tree
Hide file tree
Showing 3 changed files with 38 additions and 19 deletions.
44 changes: 25 additions & 19 deletions src/snps/io/reader.py
Original file line number Diff line number Diff line change
Expand Up @@ -167,8 +167,8 @@ def __call__(self):
d = self.read_myheritage(file, compression)
elif "Living DNA" in first_line:
d = self.read_livingdna(file, compression)
elif "SNP Name rsID Sample.ID Allele1...Top" in first_line:
d = self.read_mapmygenome(file, compression)
elif "SNP Name\trsID" in first_line or "SNP.Name\tSample.ID" in first_line:
d = self.read_mapmygenome(file, compression, first_line)
elif "lineage" in first_line or "snps" in first_line:
d = self.read_snps_csv(file, comments, compression)
elif "Chromosome" in first_line:
Expand Down Expand Up @@ -705,7 +705,7 @@ def parser():

return self.read_helper("LivingDNA", parser)

def read_mapmygenome(self, file, compression):
def read_mapmygenome(self, file, compression, header):
""" Read and parse Mapmygenome file.
https://mapmygenome.in
Expand All @@ -722,22 +722,28 @@ def read_mapmygenome(self, file, compression):
"""

def parser():
df = pd.read_csv(
file,
comment="#",
sep="\t",
na_values="--",
header=0,
index_col=1,
dtype={
"rsID": object,
"Chr": object,
"Position": np.uint32,
"Allele1...Top": object,
"Allele2...Top": object,
},
compression=compression,
)
def parse(rsid_col_name, rsid_col_idx):
return pd.read_csv(
file,
comment="#",
sep="\t",
na_values="--",
header=0,
index_col=rsid_col_idx,
dtype={
rsid_col_name: object,
"Chr": object,
"Position": np.uint32,
"Allele1...Top": object,
"Allele2...Top": object,
},
compression=compression,
)

if "rsID" in header:
df = parse("rsID", 1)
else:
df = parse("SNP.Name", 0)

df["genotype"] = df["Allele1...Top"] + df["Allele2...Top"]
df.rename(columns={"Chr": "chrom", "Position": "pos"}, inplace=True)
Expand Down
9 changes: 9 additions & 0 deletions tests/input/mapmygenome_alt_header.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
SNP.Name Sample.ID Allele1...Top Allele2...Top GC.Score Sample.Name Sample.Group Sample.Index SNP.Index SNP.Aux Allele1...Forward Allele2...Forward Allele1...Design Allele2...Design Allele1...AB Allele2...AB Allele1...Plus Allele2...Plus Chr Position GT.Score Cluster.Sep SNP ILMN.Strand Customer.Strand Top.Genomic.Sequence Plus.Minus.Strand Theta R X Y X.Raw Y.Raw B.Allele.Freq Log.R.Ratio CNV.Value CNV.Confidence
rs1 0 A A 0 NA NA 0 0 0 A A A A A A A A 1 101 0 1 [A/A] TOP TOP NA + 0 0 0 0 0 0 0 0 NA NA
rs2 0 C C 0 NA NA 0 0 0 A A A A A A A A 1 102 0 1 [A/A] TOP TOP NA + 0 0 0 0 0 0 0 0 NA NA
rs3 0 G G 0 NA NA 0 0 0 A A A A A A A A 1 103 0 1 [A/A] TOP TOP NA + 0 0 0 0 0 0 0 0 NA NA
rs4 0 T T 0 NA NA 0 0 0 A A A A A A A A 1 104 0 1 [A/A] TOP TOP NA + 0 0 0 0 0 0 0 0 NA NA
rs5 0 -- -- 0 NA NA 0 0 0 A A A A A A A A 1 105 0 1 [A/A] TOP TOP NA + 0 0 0 0 0 0 0 0 NA NA
rs6 0 G C 0 NA NA 0 0 0 A A A A A A A A 1 106 0 1 [A/A] TOP TOP NA + 0 0 0 0 0 0 0 0 NA NA
rs7 0 T C 0 NA NA 0 0 0 A A A A A A A A 1 107 0 1 [A/A] TOP TOP NA + 0 0 0 0 0 0 0 0 NA NA
rs8 0 A T 0 NA NA 0 0 0 A A A A A A A A 1 108 0 1 [A/A] TOP TOP NA + 0 0 0 0 0 0 0 0 NA NA
4 changes: 4 additions & 0 deletions tests/io/test_reader.py
Original file line number Diff line number Diff line change
Expand Up @@ -302,6 +302,10 @@ def test_read_mapmygenome(self):
# https://mapmygenome.in
self.run_parsing_tests("tests/input/mapmygenome.txt", "Mapmygenome")

def test_read_mapmygenome_alt_header(self):
# https://mapmygenome.in
self.run_parsing_tests("tests/input/mapmygenome_alt_header.txt", "Mapmygenome")

def test_read_myheritage(self):
# https://www.myheritage.com
self.run_parsing_tests("tests/input/myheritage.csv", "MyHeritage")
Expand Down

0 comments on commit 2baeb62

Please sign in to comment.