-
Notifications
You must be signed in to change notification settings - Fork 87
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Added Papio Anubis genome, recombination map, and demographic model
- Loading branch information
1 parent
263674b
commit 5423db6
Showing
6 changed files
with
338 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,5 @@ | ||
""" | ||
Catalog definitions for PapAnu (Ensembl ID='papio_anubis') | ||
""" | ||
from . import species # noqa: F401 | ||
from . import demographic_models # noqa: F401 |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,78 @@ | ||
import msprime | ||
import numpy as np | ||
import stdpopsim | ||
|
||
_species = stdpopsim.get_species("PapAnu") | ||
|
||
|
||
def _pap_anu(): | ||
# the size during the interval times[k] to times[k+1] = sizes[k] | ||
times = np.array( | ||
[ | ||
0.0000, | ||
221.2812, | ||
463.8092, | ||
15027.7847, | ||
39328.5751, | ||
71092.5441, | ||
110830.9203, | ||
186053.2682, | ||
] | ||
) | ||
sizes = np.array( | ||
[ | ||
335505.4808, | ||
120758.1302, | ||
51822.58297, | ||
41841.54229, | ||
30714.33863, | ||
72998.86202, | ||
55968.42221, | ||
93362.02606, | ||
] | ||
) | ||
|
||
demographic_events = [] | ||
for sz, t in zip(sizes, times): | ||
demographic_events.append( | ||
msprime.PopulationParametersChange(time=t, initial_size=sz, population_id=0) | ||
) | ||
populations = [ | ||
stdpopsim.Population( | ||
id="PAnubis_SNPRC", | ||
description="Papio Anubis population from SNPRC", | ||
) | ||
] | ||
|
||
return stdpopsim.DemographicModel( | ||
id="SinglePopSMCpp_1W22", | ||
description="SMC++ estimates of N(t) for Papio Anubis individuals", | ||
long_description=""" | ||
These estimates were obtained from a sample of Papio Anubis | ||
individuals from the colony housed at the Southwest National | ||
Primate Research Center (SNPRC). SMC++ was run with a subset of | ||
36 individuals from the population. | ||
""", | ||
populations=populations, | ||
citations=[ | ||
stdpopsim.Citation( | ||
doi="https://doi.org/10.1093/gbe/evac040", | ||
year=2022, | ||
author="Wall et. al.", | ||
reasons={stdpopsim.CiteReason.DEM_MODEL}, | ||
) | ||
], | ||
# citations for generation time and mutation rate can be | ||
# found in species.py | ||
generation_time=11, | ||
mutation_rate=5.7 * 10e-9, | ||
demographic_events=demographic_events, | ||
population_configurations=[ | ||
msprime.PopulationConfiguration( | ||
initial_size=335505, metadata=populations[0].asdict() | ||
) | ||
], | ||
) | ||
|
||
|
||
_species.add_demographic_model(_pap_anu()) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,30 @@ | ||
import stdpopsim | ||
|
||
|
||
_species = stdpopsim.get_species("PapAnu") | ||
|
||
_gm = stdpopsim.GeneticMap( | ||
species=_species, | ||
id="Pyrho_PAnubis1.0", | ||
description="Pyrho inferred genetic map for Papio Anubis", | ||
long_description=""" | ||
These estimates were obtained from a sample of Papio Anubis | ||
individuals from the colony housed at the Southwest National | ||
Primate Research Center (SNPRC). | ||
""", | ||
url=( | ||
"https://stdpopsim.s3-us-west-2.amazonaws.com/genetic_maps/" | ||
"PapAnu/papio_anubis_genetic_map.tar.gz" | ||
), | ||
sha256="", | ||
file_pattern="Pyrho_PAnubis1.0_chr{id}.txt", | ||
citations=[ | ||
stdpopsim.Citation( | ||
year=2022, | ||
author="Wall et. al.", | ||
doi="https://doi.org/10.1093/gbe/evac040", | ||
reasons={stdpopsim.CiteReason.GEN_MAP}, | ||
), | ||
], | ||
) | ||
_species.add_genetic_map(_gm) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,29 @@ | ||
# File autogenerated from Ensembl REST API. Do not edit. | ||
data = { | ||
"assembly_accession": "GCA_008728515.1", | ||
"assembly_name": "Panubis1.0", | ||
"chromosomes": { | ||
"1": {"length": 218172882, "synonyms": []}, | ||
"2": {"length": 193660750, "synonyms": []}, | ||
"3": {"length": 184919515, "synonyms": []}, | ||
"4": {"length": 182120902, "synonyms": []}, | ||
"5": {"length": 173900761, "synonyms": []}, | ||
"6": {"length": 167138247, "synonyms": []}, | ||
"7": {"length": 161768468, "synonyms": []}, | ||
"8": {"length": 140274886, "synonyms": []}, | ||
"9": {"length": 127591819, "synonyms": []}, | ||
"10": {"length": 126462689, "synonyms": []}, | ||
"11": {"length": 125913696, "synonyms": []}, | ||
"12": {"length": 123343450, "synonyms": []}, | ||
"13": {"length": 106849001, "synonyms": []}, | ||
"14": {"length": 106654974, "synonyms": []}, | ||
"15": {"length": 91985775, "synonyms": []}, | ||
"16": {"length": 91184193, "synonyms": []}, | ||
"17": {"length": 74525926, "synonyms": []}, | ||
"18": {"length": 72894408, "synonyms": []}, | ||
"19": {"length": 72123344, "synonyms": []}, | ||
"20": {"length": 50021108, "synonyms": []}, | ||
"X": {"length": 142711496, "synonyms": []}, | ||
"Y": {"length": 8309886, "synonyms": []}, | ||
}, | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,97 @@ | ||
import stdpopsim | ||
|
||
from . import genome_data | ||
|
||
# recombination rates for autosomes are obtained from | ||
# Wall et. al. 2022 GBE | ||
# Recombination for X chromosome has not been estimated | ||
# and is assigned the mean value of other chromosomes | ||
_recombination_rate = { | ||
"1": 9.926379e-09, | ||
"2": 9.605435e-09, | ||
"3": 9.022377e-09, | ||
"4": 9.825128e-09, | ||
"5": 9.579804e-09, | ||
"6": 1.049788e-08, | ||
"7": 1.118884e-08, | ||
"8": 1.108988e-08, | ||
"9": 1.132883e-08, | ||
"10": 1.175322e-08, | ||
"11": 1.184026e-08, | ||
"12": 1.082400e-08, | ||
"13": 1.246772e-08, | ||
"14": 1.274188e-08, | ||
"15": 1.260836e-08, | ||
"16": 1.476158e-08, | ||
"17": 1.524101e-08, | ||
"18": 1.368410e-08, | ||
"19": 1.303735e-08, | ||
"20": 1.677201e-08, | ||
"X": 1.18898e-08, | ||
"Y": 0.0, | ||
} | ||
|
||
|
||
_batra2020 = stdpopsim.Citation( | ||
author="Batra et. al.", | ||
year=2020, | ||
doi="https://doi.org/10.1093/gigascience/giaa134", | ||
reasons={stdpopsim.CiteReason.ASSEMBLY}, | ||
) | ||
|
||
_wall2022 = stdpopsim.Citation( | ||
author="Wall et. al.", | ||
year=2022, | ||
doi="https://doi.org/10.1093/gbe/evac040", | ||
reasons={stdpopsim.CiteReason.REC_RATE}, | ||
) | ||
|
||
_wu2020 = stdpopsim.Citation( | ||
author="Wu et. al.", | ||
year=2020, | ||
doi="https://doi.org/10.1371/journal.pbio.3000838", | ||
reasons={stdpopsim.CiteReason.MUT_RATE}, | ||
) | ||
|
||
# mutation rate from Wu et. al. 2020 PLoS Biology | ||
# recombination rates from Wall et. al. 2022 GBE | ||
_chromosomes = [] | ||
for name, data in genome_data.data["chromosomes"].items(): | ||
_chromosomes.append( | ||
stdpopsim.Chromosome( | ||
id=name, | ||
length=data["length"], | ||
synonyms=data["synonyms"], | ||
mutation_rate=5.7 * 10e-9, | ||
recombination_rate=_recombination_rate[name], | ||
) | ||
) | ||
|
||
_genome = stdpopsim.Genome( | ||
chromosomes=_chromosomes, | ||
assembly_name=genome_data.data["assembly_name"], | ||
assembly_accession=genome_data.data["assembly_accession"], | ||
citations=[ | ||
_batra2020.because(stdpopsim.CiteReason.ASSEMBLY), | ||
_wall2022.because(stdpopsim.CiteReason.REC_RATE), | ||
_wu2020.because(stdpopsim.CiteReason.MUT_RATE), | ||
], | ||
) | ||
|
||
_species = stdpopsim.Species( | ||
id="PapAnu", | ||
ensembl_id="papio_anubis", | ||
name="Papio anubis", | ||
common_name="Olive baboon", | ||
genome=_genome, | ||
generation_time=11, # Generation time from Wu et al section | ||
# "Inferring split times of humans and baboons" | ||
population_size=335505, # Most recent from Wall et al demographic model | ||
# included in demographic_models.py in this directory | ||
citations=[ | ||
_wall2022.because(stdpopsim.CiteReason.POP_SIZE), | ||
_wu2020.because(stdpopsim.CiteReason.GEN_TIME), | ||
], | ||
) | ||
|
||
stdpopsim.register_species(_species) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,99 @@ | ||
import pytest | ||
|
||
import stdpopsim | ||
from tests import test_species | ||
|
||
|
||
class TestSpeciesData(test_species.SpeciesTestBase): | ||
|
||
species = stdpopsim.get_species("PapAnu") | ||
|
||
def test_ensembl_id(self): | ||
assert self.species.ensembl_id == "papio_anubis" | ||
|
||
def test_name(self): | ||
assert self.species.name == "Papio anubis" | ||
|
||
def test_common_name(self): | ||
assert self.species.common_name == "Olive baboon" | ||
|
||
# QC Tests. These tests are performed by another contributor | ||
# independently referring to the citations provided in the | ||
# species definition, filling in the appropriate values | ||
# and deleting the pytest "skip" annotations. | ||
@pytest.mark.skip("Population size QC not done yet") | ||
def test_qc_population_size(self): | ||
assert self.species.population_size == -1 | ||
|
||
@pytest.mark.skip("Generation time QC not done yet") | ||
def test_qc_generation_time(self): | ||
assert self.species.generation_time == -1 | ||
|
||
|
||
class TestGenomeData(test_species.GenomeTestBase): | ||
|
||
genome = stdpopsim.get_species("PapAnu").genome | ||
|
||
@pytest.mark.skip("Recombination rate QC not done yet") | ||
@pytest.mark.parametrize( | ||
["name", "rate"], | ||
{ | ||
"1": -1, | ||
"2": -1, | ||
"3": -1, | ||
"4": -1, | ||
"5": -1, | ||
"6": -1, | ||
"7": -1, | ||
"8": -1, | ||
"9": -1, | ||
"10": -1, | ||
"11": -1, | ||
"12": -1, | ||
"13": -1, | ||
"14": -1, | ||
"15": -1, | ||
"16": -1, | ||
"17": -1, | ||
"18": -1, | ||
"19": -1, | ||
"20": -1, | ||
"X": -1, | ||
"Y": -1, | ||
}.items(), | ||
) | ||
def test_recombination_rate(self, name, rate): | ||
assert rate == pytest.approx( | ||
self.genome.get_chromosome(name).recombination_rate | ||
) | ||
|
||
@pytest.mark.skip("Mutation rate QC not done yet") | ||
@pytest.mark.parametrize( | ||
["name", "rate"], | ||
{ | ||
"1": -1, | ||
"2": -1, | ||
"3": -1, | ||
"4": -1, | ||
"5": -1, | ||
"6": -1, | ||
"7": -1, | ||
"8": -1, | ||
"9": -1, | ||
"10": -1, | ||
"11": -1, | ||
"12": -1, | ||
"13": -1, | ||
"14": -1, | ||
"15": -1, | ||
"16": -1, | ||
"17": -1, | ||
"18": -1, | ||
"19": -1, | ||
"20": -1, | ||
"X": -1, | ||
"Y": -1, | ||
}.items(), | ||
) | ||
def test_mutation_rate(self, name, rate): | ||
assert rate == pytest.approx(self.genome.get_chromosome(name).mutation_rate) |