Skip to content

Commit

Permalink
Added Papio Anubis genome, recombination map, and demographic model
Browse files Browse the repository at this point in the history
  • Loading branch information
saurabhbelsare authored and petrelharp committed May 16, 2022
1 parent 263674b commit 5423db6
Show file tree
Hide file tree
Showing 6 changed files with 338 additions and 0 deletions.
5 changes: 5 additions & 0 deletions stdpopsim/catalog/PapAnu/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
"""
Catalog definitions for PapAnu (Ensembl ID='papio_anubis')
"""
from . import species # noqa: F401
from . import demographic_models # noqa: F401
78 changes: 78 additions & 0 deletions stdpopsim/catalog/PapAnu/demographic_models.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,78 @@
import msprime
import numpy as np
import stdpopsim

_species = stdpopsim.get_species("PapAnu")


def _pap_anu():
# the size during the interval times[k] to times[k+1] = sizes[k]
times = np.array(
[
0.0000,
221.2812,
463.8092,
15027.7847,
39328.5751,
71092.5441,
110830.9203,
186053.2682,
]
)
sizes = np.array(
[
335505.4808,
120758.1302,
51822.58297,
41841.54229,
30714.33863,
72998.86202,
55968.42221,
93362.02606,
]
)

demographic_events = []
for sz, t in zip(sizes, times):
demographic_events.append(
msprime.PopulationParametersChange(time=t, initial_size=sz, population_id=0)
)
populations = [
stdpopsim.Population(
id="PAnubis_SNPRC",
description="Papio Anubis population from SNPRC",
)
]

return stdpopsim.DemographicModel(
id="SinglePopSMCpp_1W22",
description="SMC++ estimates of N(t) for Papio Anubis individuals",
long_description="""
These estimates were obtained from a sample of Papio Anubis
individuals from the colony housed at the Southwest National
Primate Research Center (SNPRC). SMC++ was run with a subset of
36 individuals from the population.
""",
populations=populations,
citations=[
stdpopsim.Citation(
doi="https://doi.org/10.1093/gbe/evac040",
year=2022,
author="Wall et. al.",
reasons={stdpopsim.CiteReason.DEM_MODEL},
)
],
# citations for generation time and mutation rate can be
# found in species.py
generation_time=11,
mutation_rate=5.7 * 10e-9,
demographic_events=demographic_events,
population_configurations=[
msprime.PopulationConfiguration(
initial_size=335505, metadata=populations[0].asdict()
)
],
)


_species.add_demographic_model(_pap_anu())
30 changes: 30 additions & 0 deletions stdpopsim/catalog/PapAnu/genetic_maps.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
import stdpopsim


_species = stdpopsim.get_species("PapAnu")

_gm = stdpopsim.GeneticMap(
species=_species,
id="Pyrho_PAnubis1.0",
description="Pyrho inferred genetic map for Papio Anubis",
long_description="""
These estimates were obtained from a sample of Papio Anubis
individuals from the colony housed at the Southwest National
Primate Research Center (SNPRC).
""",
url=(
"https://stdpopsim.s3-us-west-2.amazonaws.com/genetic_maps/"
"PapAnu/papio_anubis_genetic_map.tar.gz"
),
sha256="",
file_pattern="Pyrho_PAnubis1.0_chr{id}.txt",
citations=[
stdpopsim.Citation(
year=2022,
author="Wall et. al.",
doi="https://doi.org/10.1093/gbe/evac040",
reasons={stdpopsim.CiteReason.GEN_MAP},
),
],
)
_species.add_genetic_map(_gm)
29 changes: 29 additions & 0 deletions stdpopsim/catalog/PapAnu/genome_data.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
# File autogenerated from Ensembl REST API. Do not edit.
data = {
"assembly_accession": "GCA_008728515.1",
"assembly_name": "Panubis1.0",
"chromosomes": {
"1": {"length": 218172882, "synonyms": []},
"2": {"length": 193660750, "synonyms": []},
"3": {"length": 184919515, "synonyms": []},
"4": {"length": 182120902, "synonyms": []},
"5": {"length": 173900761, "synonyms": []},
"6": {"length": 167138247, "synonyms": []},
"7": {"length": 161768468, "synonyms": []},
"8": {"length": 140274886, "synonyms": []},
"9": {"length": 127591819, "synonyms": []},
"10": {"length": 126462689, "synonyms": []},
"11": {"length": 125913696, "synonyms": []},
"12": {"length": 123343450, "synonyms": []},
"13": {"length": 106849001, "synonyms": []},
"14": {"length": 106654974, "synonyms": []},
"15": {"length": 91985775, "synonyms": []},
"16": {"length": 91184193, "synonyms": []},
"17": {"length": 74525926, "synonyms": []},
"18": {"length": 72894408, "synonyms": []},
"19": {"length": 72123344, "synonyms": []},
"20": {"length": 50021108, "synonyms": []},
"X": {"length": 142711496, "synonyms": []},
"Y": {"length": 8309886, "synonyms": []},
},
}
97 changes: 97 additions & 0 deletions stdpopsim/catalog/PapAnu/species.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,97 @@
import stdpopsim

from . import genome_data

# recombination rates for autosomes are obtained from
# Wall et. al. 2022 GBE
# Recombination for X chromosome has not been estimated
# and is assigned the mean value of other chromosomes
_recombination_rate = {
"1": 9.926379e-09,
"2": 9.605435e-09,
"3": 9.022377e-09,
"4": 9.825128e-09,
"5": 9.579804e-09,
"6": 1.049788e-08,
"7": 1.118884e-08,
"8": 1.108988e-08,
"9": 1.132883e-08,
"10": 1.175322e-08,
"11": 1.184026e-08,
"12": 1.082400e-08,
"13": 1.246772e-08,
"14": 1.274188e-08,
"15": 1.260836e-08,
"16": 1.476158e-08,
"17": 1.524101e-08,
"18": 1.368410e-08,
"19": 1.303735e-08,
"20": 1.677201e-08,
"X": 1.18898e-08,
"Y": 0.0,
}


_batra2020 = stdpopsim.Citation(
author="Batra et. al.",
year=2020,
doi="https://doi.org/10.1093/gigascience/giaa134",
reasons={stdpopsim.CiteReason.ASSEMBLY},
)

_wall2022 = stdpopsim.Citation(
author="Wall et. al.",
year=2022,
doi="https://doi.org/10.1093/gbe/evac040",
reasons={stdpopsim.CiteReason.REC_RATE},
)

_wu2020 = stdpopsim.Citation(
author="Wu et. al.",
year=2020,
doi="https://doi.org/10.1371/journal.pbio.3000838",
reasons={stdpopsim.CiteReason.MUT_RATE},
)

# mutation rate from Wu et. al. 2020 PLoS Biology
# recombination rates from Wall et. al. 2022 GBE
_chromosomes = []
for name, data in genome_data.data["chromosomes"].items():
_chromosomes.append(
stdpopsim.Chromosome(
id=name,
length=data["length"],
synonyms=data["synonyms"],
mutation_rate=5.7 * 10e-9,
recombination_rate=_recombination_rate[name],
)
)

_genome = stdpopsim.Genome(
chromosomes=_chromosomes,
assembly_name=genome_data.data["assembly_name"],
assembly_accession=genome_data.data["assembly_accession"],
citations=[
_batra2020.because(stdpopsim.CiteReason.ASSEMBLY),
_wall2022.because(stdpopsim.CiteReason.REC_RATE),
_wu2020.because(stdpopsim.CiteReason.MUT_RATE),
],
)

_species = stdpopsim.Species(
id="PapAnu",
ensembl_id="papio_anubis",
name="Papio anubis",
common_name="Olive baboon",
genome=_genome,
generation_time=11, # Generation time from Wu et al section
# "Inferring split times of humans and baboons"
population_size=335505, # Most recent from Wall et al demographic model
# included in demographic_models.py in this directory
citations=[
_wall2022.because(stdpopsim.CiteReason.POP_SIZE),
_wu2020.because(stdpopsim.CiteReason.GEN_TIME),
],
)

stdpopsim.register_species(_species)
99 changes: 99 additions & 0 deletions tests/test_PapAnu.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,99 @@
import pytest

import stdpopsim
from tests import test_species


class TestSpeciesData(test_species.SpeciesTestBase):

species = stdpopsim.get_species("PapAnu")

def test_ensembl_id(self):
assert self.species.ensembl_id == "papio_anubis"

def test_name(self):
assert self.species.name == "Papio anubis"

def test_common_name(self):
assert self.species.common_name == "Olive baboon"

# QC Tests. These tests are performed by another contributor
# independently referring to the citations provided in the
# species definition, filling in the appropriate values
# and deleting the pytest "skip" annotations.
@pytest.mark.skip("Population size QC not done yet")
def test_qc_population_size(self):
assert self.species.population_size == -1

@pytest.mark.skip("Generation time QC not done yet")
def test_qc_generation_time(self):
assert self.species.generation_time == -1


class TestGenomeData(test_species.GenomeTestBase):

genome = stdpopsim.get_species("PapAnu").genome

@pytest.mark.skip("Recombination rate QC not done yet")
@pytest.mark.parametrize(
["name", "rate"],
{
"1": -1,
"2": -1,
"3": -1,
"4": -1,
"5": -1,
"6": -1,
"7": -1,
"8": -1,
"9": -1,
"10": -1,
"11": -1,
"12": -1,
"13": -1,
"14": -1,
"15": -1,
"16": -1,
"17": -1,
"18": -1,
"19": -1,
"20": -1,
"X": -1,
"Y": -1,
}.items(),
)
def test_recombination_rate(self, name, rate):
assert rate == pytest.approx(
self.genome.get_chromosome(name).recombination_rate
)

@pytest.mark.skip("Mutation rate QC not done yet")
@pytest.mark.parametrize(
["name", "rate"],
{
"1": -1,
"2": -1,
"3": -1,
"4": -1,
"5": -1,
"6": -1,
"7": -1,
"8": -1,
"9": -1,
"10": -1,
"11": -1,
"12": -1,
"13": -1,
"14": -1,
"15": -1,
"16": -1,
"17": -1,
"18": -1,
"19": -1,
"20": -1,
"X": -1,
"Y": -1,
}.items(),
)
def test_mutation_rate(self, name, rate):
assert rate == pytest.approx(self.genome.get_chromosome(name).mutation_rate)

0 comments on commit 5423db6

Please sign in to comment.