Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add Bos taurus #600

Merged
merged 2 commits into from
Oct 2, 2020
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions stdpopsim/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@
# We import these here to build the catalog, but the internal functions
# defined are not part of the external API.
from .catalog import AraTha # NOQA
from .catalog import BosTau # NOQA
from .catalog import CanFam # NOQA
from .catalog import DroMel # NOQA
from .catalog import EscCol # NOQA
Expand Down
190 changes: 190 additions & 0 deletions stdpopsim/catalog/BosTau/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,190 @@
"""
Catalog definitions for Bos Taurus
"""
import collections

import msprime

import stdpopsim
from . import genome_data

###########################################################
#
# Genome definition
#
###########################################################

# De novo assembly of the cattle reference genome with single-molecule sequencing.
_RosenEtAl = stdpopsim.Citation(
doi="https://doi.org/10.1093/gigascience/giaa021",
year="2020",
author="Rosen et al.",
)

# Frequency of mosaicism points towards mutation-prone early cleavage
# cell divisions in cattle.
_HarlandEtAl = stdpopsim.Citation(
author="Harland et al.",
year="2017",
# BioRxiv preprint
doi="https://doi.org/10.1101/079863",
)

# Cattle Sex-Specific Recombination and Genetic Control from a
# Large Pedigree Analysis.
_MaEtAl = stdpopsim.Citation(
author="Ma et al.",
year="2015",
doi="https://doi.org/10.1371/journal.pgen.1005387",
)

# Inferring Demography from Runs of Homozygosity in Whole-Genome Sequence,
# with Correction for Sequence Errors.
_MacLeodEtAl = stdpopsim.Citation(
doi="https://doi.org/10.1093/molbev/mst125",
year="2013",
author="MacLeod et al.",
)

# Recombination rate has been derived from dairy cattle crossovers
# per meiosis, by taking the average between females and males and then
# dividing by the whole genome length (equal to the sum of chromosome
# lengths used above).
# From Ma et al. (2015), 25.5 crossovers per meiosis in males and
# 23.2 crossovers per meiosis in females, gives an average of 24.35
# crossovers per meiosis. The sum of chromosome lengths is 2628394923 bp.
# 24.35 / 2628394923 = 9.26e-9 per bp per generation.
_genome_wide_recombination_rate = 9.26e-9

_recombination_rate_data = collections.defaultdict(
lambda: _genome_wide_recombination_rate
)
# Set some exceptions for non-recombining chrs.
_recombination_rate_data["MT"] = 0

_chromosomes = []
for name, data in genome_data.data["chromosomes"].items():
_chromosomes.append(
stdpopsim.Chromosome(
id=name,
length=data["length"],
synonyms=data["synonyms"],
# Harland et al. (2017), sex-averaged estimate per bp per generation.
mutation_rate=1.2e-8,
recombination_rate=_recombination_rate_data[name],
)
)

_genome = stdpopsim.Genome(
chromosomes=_chromosomes,
mutation_rate_citations=[
_HarlandEtAl.because(stdpopsim.CiteReason.MUT_RATE),
],
recombination_rate_citations=[_MaEtAl.because(stdpopsim.CiteReason.REC_RATE)],
assembly_citations=[_RosenEtAl.because(stdpopsim.CiteReason.ASSEMBLY)],
)

_species = stdpopsim.Species(
id="BosTau",
name="Bos Taurus",
common_name="Cattle",
genome=_genome,
generation_time=5,
generation_time_citations=[_MacLeodEtAl.because(stdpopsim.CiteReason.GEN_TIME)],
population_size=62000,
population_size_citations=[_MacLeodEtAl.because(stdpopsim.CiteReason.POP_SIZE)],
)

stdpopsim.register_species(_species)


###########################################################
#
# Demographic models
#
###########################################################


def _HolsteinFriesan_1M13():
id = "HolsteinFriesian_1M13"
description = "Piecewise size changes in Holstein-Friesian cattle."
long_description = """
The piecewise-constant population size model of Holstein-Friesian cattle
from MacLeod et al. 2013. Population sizes were estimated from inferred
runs of homozygosity, with parameter values taken from Figure 4A by visual
inspection of the plots.
"""
populations = [
stdpopsim.Population(id="Holstein-Friesian", description="Holstein-Friesian"),
]
citations = [_MacLeodEtAl.because(stdpopsim.CiteReason.DEM_MODEL)]

return stdpopsim.DemographicModel(
id=id,
description=description,
long_description=long_description,
populations=populations,
citations=citations,
generation_time=_species.generation_time,
population_configurations=[
msprime.PopulationConfiguration(
initial_size=90, growth_rate=0.0166, metadata=populations[0].asdict()
)
],
# Here 'time' should be in generation notation ie. how many
# generations ago were that Ne (effective population size)
# and growth rate.
# Growth rate is "per generation exponential growth rate":
# -alpha= [ln(initial_pop_size/next_stage_pop_size)/generation_span_in_years]
# For example: ln(90/120)/3= -0.095894024
demographic_events=[
msprime.PopulationParametersChange(
time=1,
initial_size=90,
growth_rate=-0.095894024,
population_id=0,
), # Ne 90 to 120
msprime.PopulationParametersChange(
time=4, growth_rate=-0.24465639, population_id=0
), # Ne 120 to 250
msprime.PopulationParametersChange(
time=7, growth_rate=-0.0560787, population_id=0
), # Ne 250 to 350
msprime.PopulationParametersChange(
time=13, growth_rate=-0.1749704, population_id=0
), # Ne 350 to 1000
msprime.PopulationParametersChange(
time=19, growth_rate=-0.0675775, population_id=0
), # Ne 1000 to 1500
msprime.PopulationParametersChange(
time=25, growth_rate=-0.0022129, population_id=0
), # Ne 1500 to 2000
msprime.PopulationParametersChange(
time=155, growth_rate=-0.0007438, population_id=0
), # Ne 2000 to 2500
msprime.PopulationParametersChange(
time=455, growth_rate=-0.0016824, population_id=0
), # Ne 2500 to 3500
msprime.PopulationParametersChange(
time=655, growth_rate=-0.0006301, population_id=0
), # Ne 3500 to 7000
msprime.PopulationParametersChange(
time=1755, growth_rate=-0.0005945, population_id=0
), # Ne 7000 to 10000
msprime.PopulationParametersChange(
time=2355, growth_rate=-0.0005306, population_id=0
), # Ne 10000 to 17000
msprime.PopulationParametersChange(
time=3355, growth_rate=-0.0000434, population_id=0
), # Ne 17000 to 62000
msprime.PopulationParametersChange(
time=33155, growth_rate=-0.0000, population_id=0
), # Ne 62000 (model has "coalesced")
msprime.PopulationParametersChange(
time=933155, growth_rate=-0.0, population_id=0
),
],
)


_species.add_demographic_model(_HolsteinFriesan_1M13())
38 changes: 38 additions & 0 deletions stdpopsim/catalog/BosTau/genome_data.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,38 @@
# File autogenerated from Ensembl REST API. Do not edit.
data = {
"assembly_accession": "GCA_002263795.2",
"assembly_name": "ARS-UCD1.2",
"chromosomes": {
"1": {"length": 158534110, "synonyms": []},
"2": {"length": 136231102, "synonyms": []},
"3": {"length": 121005158, "synonyms": []},
"4": {"length": 120000601, "synonyms": []},
"5": {"length": 120089316, "synonyms": []},
"6": {"length": 117806340, "synonyms": []},
"7": {"length": 110682743, "synonyms": []},
"8": {"length": 113319770, "synonyms": []},
"9": {"length": 105454467, "synonyms": []},
"10": {"length": 103308737, "synonyms": []},
"11": {"length": 106982474, "synonyms": []},
"12": {"length": 87216183, "synonyms": []},
"13": {"length": 83472345, "synonyms": []},
"14": {"length": 82403003, "synonyms": []},
"15": {"length": 85007780, "synonyms": []},
"16": {"length": 81013979, "synonyms": []},
"17": {"length": 73167244, "synonyms": []},
"18": {"length": 65820629, "synonyms": []},
"19": {"length": 63449741, "synonyms": []},
"20": {"length": 71974595, "synonyms": []},
"21": {"length": 69862954, "synonyms": []},
"22": {"length": 60773035, "synonyms": []},
"23": {"length": 52498615, "synonyms": []},
"24": {"length": 62317253, "synonyms": []},
"25": {"length": 42350435, "synonyms": []},
"26": {"length": 51992305, "synonyms": []},
"27": {"length": 45612108, "synonyms": []},
"28": {"length": 45940150, "synonyms": []},
"29": {"length": 51098607, "synonyms": []},
"X": {"length": 139009144, "synonyms": []},
"MT": {"length": 16338, "synonyms": []},
},
}
3 changes: 0 additions & 3 deletions stdpopsim/catalog/HomSap/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,15 +2,12 @@
Catalog definitions for Homo Sapiens
"""
import math
import logging

import msprime

import stdpopsim
from . import genome_data

logger = logging.getLogger(__name__)

###########################################################
#
# Genome definition
Expand Down
4 changes: 0 additions & 4 deletions stdpopsim/catalog/PonAbe/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,10 +8,6 @@
import stdpopsim
from . import genome_data

import logging

logger = logging.getLogger(__name__)

###########################################################
#
# Genome definition
Expand Down