-
Notifications
You must be signed in to change notification settings - Fork 0
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
WIP: HIV transmission for short term partners #70
Changes from all commits
37a88ce
e9b8377
32d9cc7
6b549d2
7a92a3e
849a361
4bf3863
2d464a3
369556e
a62a146
1d48503
a9ed103
fcb5e3d
64bf5c4
a485687
d6467df
bfd0fe9
40ce037
ed8a539
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change | ||||
---|---|---|---|---|---|---|
@@ -1,17 +1,34 @@ | ||||||
import operator | ||||||
|
||||||
import numpy as np | ||||||
import pandas as pd | ||||||
|
||||||
import hivpy.column_names as col | ||||||
|
||||||
from .common import rng | ||||||
from .sexual_behaviour import selector | ||||||
from .common import SexType, opposite_sex, rng | ||||||
|
||||||
|
||||||
class HIVStatusModule: | ||||||
initial_hiv_newp_threshold = 7 # lower limit for HIV infection at start of epidemic | ||||||
initial_hiv_prob = 0.8 # for those with enough partners at start of epidemic | ||||||
|
||||||
def __init__(self): | ||||||
self.stp_HIV_rate = {SexType.Male: np.zeros(5), | ||||||
SexType.Female: np.zeros(5)} # FIXME | ||||||
self.stp_viral_group_rate = {SexType.Male: np.array([np.zeros(7)]*5), | ||||||
SexType.Female: np.array([np.zeros(7)]*5)} | ||||||
# FIXME move these to data file | ||||||
# a more descriptive name would be nice | ||||||
self.fold_tr_newp = rng.choice( | ||||||
[0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0, 1/0.8, 1/0.6, 1/0.4]) | ||||||
self.fold_change_w = rng.choice([1., 1.5, 2.], p=[0.05, 0.25, 0.7]) | ||||||
self.fold_change_yw = rng.choice([1., 2., 3.]) * self.fold_change_w | ||||||
self.fold_change_sti = rng.choice([2., 3.]) | ||||||
self.tr_rate_primary = 0.16 | ||||||
self.tr_rate_undetectable_vl = rng.choice([0.0000, 0.0001, 0.0010], p=[0.7, 0.2, 0.1]) | ||||||
self.transmission_means = self.fold_tr_newp * \ | ||||||
np.array([0, self.tr_rate_undetectable_vl, 0.01, 0.03, 0.06, 0.1, self.tr_rate_primary]) | ||||||
self.transmission_sigmas = np.array( | ||||||
[0, 0.000025**2, 0.0025**2, 0.0075**2, 0.015**2, 0.025**2, 0.075**2]) | ||||||
|
||||||
def initial_HIV_status(self, population: pd.DataFrame): | ||||||
"""Initialise HIV status at the start of the simulation to no infections.""" | ||||||
# This may be useful as a separate method if we end up representing status | ||||||
|
@@ -29,17 +46,88 @@ def introduce_HIV(self, population: pd.DataFrame): | |||||
hiv_status.loc[initial_candidates] = initial_infection | ||||||
return hiv_status | ||||||
|
||||||
def update_HIV_status(self, population: pd.DataFrame): | ||||||
def update_partner_risk_vectors(self, population): | ||||||
"""calculate the risk factor associated with each sex and age group""" | ||||||
# Should we be using for loops here or can we do better? | ||||||
for sex in SexType: | ||||||
for age_group in range(5): # FIXME need to get number of age groups from somewhere | ||||||
sub_pop = population.data.loc[(population.data[col.SEX] == sex) & ( | ||||||
population.data[col.SEX_MIX_AGE_GROUP] == age_group)] | ||||||
# total number of people partnered to people in this group | ||||||
n_stp_total = sum(sub_pop[col.NUM_PARTNERS]) | ||||||
# num people partered to HIV+ people in this group | ||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
Suggested change
|
||||||
n_stp_of_infected = sum(sub_pop.loc[sub_pop[col.HIV_STATUS], col.NUM_PARTNERS]) | ||||||
# Probability of being HIV prositive | ||||||
if n_stp_of_infected == 0: | ||||||
self.stp_HIV_rate[sex][age_group] = 0 | ||||||
else: | ||||||
self.stp_HIV_rate[sex][age_group] = n_stp_of_infected / \ | ||||||
n_stp_total # TODO: need to double check this definition | ||||||
# Chances of being in a given viral group | ||||||
if n_stp_total > 0: | ||||||
self.stp_viral_group_rate[sex][age_group] = [ | ||||||
sum(sub_pop.loc[sub_pop[col.VIRAL_LOAD_GROUP] == vg, | ||||||
col.NUM_PARTNERS])/n_stp_total for vg in range(7)] | ||||||
else: | ||||||
self.stp_viral_group_rate[sex][age_group] = np.array([1, 0, 0, 0, 0, 0, 0]) | ||||||
|
||||||
def set_dummy_viral_load(self, population): | ||||||
"""Dummy function to set viral load until this | ||||||
part of the code has been implemented properly""" | ||||||
population.data[col.VIRAL_LOAD_GROUP] = rng.choice(7, population.size) | ||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Let's add this variable to #48? Or make an issue somewhere for properly implementing it. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I very much forgot #48 existed, so yes I will add this to it. |
||||||
|
||||||
def get_infection_prob(self, sex, age, n_partners, stp_age_groups): | ||||||
# Slow example that avoid repeating the iterations over partners | ||||||
# three time by putting them as part of | ||||||
# one for loop, but for loops in python will be slow. | ||||||
target_sex = opposite_sex(sex) | ||||||
infection_prob = np.zeros(n_partners) | ||||||
for i in range(n_partners): | ||||||
stp_viral_group = rng.choice( | ||||||
7, p=self.stp_viral_group_rate[target_sex][stp_age_groups[i]]) | ||||||
HIV_probability = self.stp_HIV_rate[opposite_sex(target_sex)][stp_age_groups[i]] | ||||||
infection_prob[i] = HIV_probability * max(0, rng.normal( | ||||||
self.transmission_means[stp_viral_group], | ||||||
self.transmission_sigmas[stp_viral_group])) | ||||||
if (sex == SexType.Female): | ||||||
if (age < 20): | ||||||
infection_prob[i] *= self.fold_change_yw | ||||||
else: | ||||||
infection_prob[i] *= self.fold_change_w | ||||||
return infection_prob | ||||||
|
||||||
def stp_HIV_transmission(self, person): | ||||||
# TODO: Add circumcision, STIs etc. | ||||||
"""Returns True if HIV transmission occurs, and False otherwise""" | ||||||
stp_viral_groups = np.array([ | ||||||
rng.choice(7, p=self.stp_viral_group_rate[opposite_sex(person[col.SEX])][age_group]) | ||||||
for age_group in person[col.STP_AGE_GROUPS]]) | ||||||
HIV_probabilities = np.array([self.stp_HIV_rate[opposite_sex( | ||||||
person[col.SEX])][age_group] for age_group in person[col.STP_AGE_GROUPS]]) | ||||||
viral_transmission_probabilities = np.array([max(0, rng.normal( | ||||||
self.transmission_means[group], self.transmission_sigmas[group])) | ||||||
for group in stp_viral_groups]) | ||||||
if person[col.SEX] is SexType.Female: | ||||||
if person[col.AGE] < 20: | ||||||
viral_transmission_probabilities = (viral_transmission_probabilities | ||||||
* self.fold_change_yw) | ||||||
else: | ||||||
viral_transmission_probabilities = (viral_transmission_probabilities | ||||||
* self.fold_change_w) | ||||||
prob_uninfected = np.prod(1-(HIV_probabilities * viral_transmission_probabilities)) | ||||||
r = rng.random() | ||||||
return r > prob_uninfected | ||||||
|
||||||
def update_HIV_status(self, population): | ||||||
"""Update HIV status for new transmissions in the last time period.\\ | ||||||
Super simple model where probability of being infected by a given person | ||||||
is prevalence times transmission risk (P x r).\\ | ||||||
Probability of each new partner not infecting you then is (1-Pr)\\ | ||||||
Then prob of n partners independently not infecting you is (1-Pr)**n\\ | ||||||
So probability of infection is 1-((1-Pr)**n)""" | ||||||
HIV_neg_idx = selector(population, HIV_status=(operator.eq, False)) | ||||||
rands = rng.uniform(0.0, 1.0, sum(HIV_neg_idx)) | ||||||
HIV_prevalence = sum(population[col.HIV_STATUS])/len(population) | ||||||
HIV_infection_risk = 0.2 # made up, based loosely on transmission probabilities | ||||||
n_partners = population.loc[HIV_neg_idx, col.NUM_PARTNERS] | ||||||
HIV_prob = 1-((1-HIV_prevalence*HIV_infection_risk)**n_partners) | ||||||
population.loc[HIV_neg_idx, col.HIV_STATUS] = (rands <= HIV_prob) | ||||||
self.update_partner_risk_vectors(population) | ||||||
HIV_neg_idx = population.data.index[(~population.data[col.HIV_STATUS]) & ( | ||||||
population.data[col.NUM_PARTNERS] > 0)] | ||||||
sub_pop = population.data.loc[HIV_neg_idx] | ||||||
population.data.loc[HIV_neg_idx, col.HIV_STATUS] = sub_pop.apply( | ||||||
self.stp_HIV_transmission, axis=1) |
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -6,6 +6,7 @@ | |
import pytest | ||
|
||
import hivpy.column_names as col | ||
from hivpy.common import SexType | ||
from hivpy.hiv_status import HIVStatusModule | ||
from hivpy.population import Population | ||
from hivpy.sexual_behaviour import selector | ||
|
@@ -90,7 +91,7 @@ def test_hiv_update(pop_with_initial_hiv): | |
data = pop_with_initial_hiv.data | ||
prev_status = data["HIV_status"].copy() | ||
for i in range(10): | ||
pop_with_initial_hiv.hiv_status.update_HIV_status(pop_with_initial_hiv.data) | ||
pop_with_initial_hiv.hiv_status.update_HIV_status(pop_with_initial_hiv) | ||
|
||
new_cases = data["HIV_status"] & (~ prev_status) | ||
print("Num new HIV+ = ", sum(new_cases)) | ||
|
@@ -99,3 +100,94 @@ def test_hiv_update(pop_with_initial_hiv): | |
assert not any(miracles) | ||
assert any(new_cases) | ||
assert not any(under_15s_idx) | ||
|
||
|
||
def test_HIV_risk_vector(): | ||
N = 10000 | ||
pop = Population(size=N, start_date=date(1989, 1, 1)) | ||
hiv_module = pop.hiv_status | ||
# Test probability of partnering with someone with HIV by sex and age group | ||
# 5 age groups (15-25, 25-35, 35-45, 45-55, 55-65) and 2 sexes = 10 groups | ||
N_group = N // 10 # number of people we will put in each group | ||
sex_list = [] | ||
age_group_list = [] | ||
HIV_list = [] | ||
HIV_ratio = 10 # mark 1 in 10 people as HIV positive | ||
for sex in SexType: | ||
for age_group in range(5): | ||
sex_list += [sex] * N_group | ||
age_group_list += [age_group] * N_group | ||
HIV_list += [True] * (N_group // HIV_ratio) + [False] * (N_group - N_group//HIV_ratio) | ||
pop.data[col.SEX] = np.array(sex_list) | ||
pop.data[col.SEX_MIX_AGE_GROUP] = np.array(age_group_list) | ||
pop.data[col.HIV_STATUS] = np.array(HIV_list) | ||
pop.data[col.NUM_PARTNERS] = 1 # give everyone a single stp to start with | ||
|
||
# if everyone has the same number of partners, | ||
# probability of being with someone with HIV should be = HIV prevalence | ||
hiv_module.update_partner_risk_vectors(pop) | ||
expectation = np.array([0.1]*5) | ||
assert np.allclose(hiv_module.stp_HIV_rate[SexType.Male], expectation) | ||
assert np.allclose(hiv_module.stp_HIV_rate[SexType.Female], expectation) | ||
|
||
# Check for differences in male and female rate correctly | ||
# change HIV rate in men to double | ||
males = pop.data.index[pop.data[col.SEX] == SexType.Male] | ||
# transform group fails when only grouped by one field | ||
# appears to change the type of the object passed to the function! | ||
Comment on lines
+136
to
+137
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Just noting that this should be fixed as part of #71, so we can update the following lines when merged. |
||
male_HIV_status = pop.transform_group([col.SEX_MIX_AGE_GROUP, col.SEX], lambda x, y: np.array( | ||
[True] * (2 * N_group // HIV_ratio) + | ||
[False] * (N_group - 2*N_group // HIV_ratio)), False, males) | ||
Comment on lines
+138
to
+140
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Although I'm confused by what the (lambda) function does here: neither argument is used? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This is just because the function passed to transform group has to match the number of arguments to the variables grouped by, which is something we can potentially change by making transform_group more flexible |
||
pop.data.loc[males, col.HIV_STATUS] = male_HIV_status | ||
hiv_module.update_partner_risk_vectors(pop) | ||
assert np.allclose(hiv_module.stp_HIV_rate[SexType.Male], 2*expectation) | ||
assert np.allclose(hiv_module.stp_HIV_rate[SexType.Female], expectation) | ||
|
||
# Check for difference when changing number of partners between HIV + / - people | ||
HIV_positive = pop.data.index[pop.data[col.HIV_STATUS]] | ||
# 2 partners for each HIV+ person, one for each HIV- person. | ||
pop.data.loc[HIV_positive, col.NUM_PARTNERS] = 2 | ||
expectation_male = (2 * 0.2) / (2*0.2 + 0.8) | ||
expectation_female = (2 * 0.1) / (2*0.1 + 0.9) | ||
hiv_module.update_partner_risk_vectors(pop) | ||
assert np.allclose(hiv_module.stp_HIV_rate[SexType.Male], expectation_male) | ||
assert np.allclose(hiv_module.stp_HIV_rate[SexType.Female], expectation_female) | ||
|
||
|
||
def test_viral_group_risk_vector(): | ||
N = 10000 | ||
pop = Population(size=N, start_date=date(1989, 1, 1)) | ||
hiv_module = pop.hiv_status | ||
# Test probability of partnering with someone with HIV by sex and age group | ||
# 5 age groups (15-25, 25-35, 35-45, 45-55, 55-65) and 2 sexes = 10 groups | ||
N_group = N // 10 # number of people we will put in each group | ||
sex_list = [] | ||
age_group_list = [] | ||
HIV_list = [] | ||
HIV_ratio = 10 # mark 1 in 10 people as HIV positive | ||
for sex in SexType: | ||
for age_group in range(5): | ||
sex_list += [sex] * N_group | ||
age_group_list += [age_group] * N_group | ||
HIV_list += [True] * (N_group // HIV_ratio) + [False] * (N_group - N_group//HIV_ratio) | ||
pop.data[col.SEX] = np.array(sex_list) | ||
pop.data[col.SEX_MIX_AGE_GROUP] = np.array(age_group_list) | ||
pop.data[col.NUM_PARTNERS] = 1 # give everyone a single stp to start with] | ||
pop.data[col.VIRAL_LOAD_GROUP] = 1 # put everyone in the same viral load group to begin with | ||
hiv_module.update_partner_risk_vectors(pop) # probability of group 1 should be 100% | ||
expectation = np.array([0., 1., 0., 0., 0., 0., 0.]) | ||
assert np.allclose(hiv_module.stp_viral_group_rate[SexType.Male], expectation) | ||
assert np.allclose(hiv_module.stp_viral_group_rate[SexType.Female], expectation) | ||
pop.data[col.VIRAL_LOAD_GROUP] = np.array([1, 2] * (N // 2)) # alternate groups 1 & 2 | ||
pop.data.loc[pop.data[col.VIRAL_LOAD_GROUP] == 1, col.NUM_PARTNERS] = 2 | ||
hiv_module.update_partner_risk_vectors(pop) | ||
expectation = np.array([0., 2/3, 1/3, 0., 0., 0., 0.]) | ||
assert np.allclose(hiv_module.stp_viral_group_rate[SexType.Male], expectation) | ||
assert np.allclose(hiv_module.stp_viral_group_rate[SexType.Female], expectation) | ||
# check for appropriate sex differences | ||
pop.data.loc[(pop.data[col.VIRAL_LOAD_GROUP] == 1) & ( | ||
pop.data[col.SEX] == SexType.Female), col.VIRAL_LOAD_GROUP] = 3 | ||
hiv_module.update_partner_risk_vectors(pop) | ||
expecation_female = np.array([0., 0., 1/3, 2/3, 0., 0., 0.]) | ||
assert np.allclose(hiv_module.stp_viral_group_rate[SexType.Male], expectation) | ||
assert np.allclose(hiv_module.stp_viral_group_rate[SexType.Female], expecation_female) |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Personally I would prefer to keep the abstraction here, so something like
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Ah, but that doesn't work if called on a whole column, does it? Hm. There's probably a way to vectorise this while preserving the abstraction but I guess it's not crucial.