-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy path02_homogeneous_cohort.R
200 lines (157 loc) · 6.61 KB
/
02_homogeneous_cohort.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
#* Title: Ex. 02 - Draw age to death from homogeneous cohort
#*
#* Code function:
#* This code corresponds to the second example of the NPS manuscript.
#*
#* It showcases a use of the nps_nhppp method while drawing
#* ages to death from a homogeneous cohort in 2015.
#*
#* The homogeneous cohort is the "Total" population, without disaggregating
#* it by sex.
#*
#* Creation date: February 01 2024
#* Authors:
#* - David U. Garibay-Treviño, M.Sc.
#* - Hawre Jalal, M.D., Ph.D.
#* - Fernando Alarid-Escudero, Ph.D.
# 01 Initial Setup --------------------------------------------------------
## 01.01 Clean environment ------------------------------------------------
remove(list = ls())
#* Refresh environment memory
gc()
## 01.02 Load libraries ----------------------------------------------------
library(dplyr)
library(ggplot2)
library(tidyr)
library(tibble)
library(microbenchmark)
# 02 Define general parameters --------------------------------------------
# Number of samples to draw from the life table
n_samp_life_tables <- 1e5
# Number of iterations for microbenchmarking
n_samp_iter_life_tables <- 100
# Seed for reproducibility in random number generation
n_seed <- 10242022
# To print a specific number of digits in tibbles
options(pillar.sigfig = 4)
# 03 Load base data -------------------------------------------------------
#* Yearly USA data, from 2000 to 2019,
#* Mortality rate for males, females and total
#* Obtained from The Human Mortality Database:
#* https://www.mortality.org/cgi-bin/hmd/country.php?cntr=USA&level=1
load("data/all_cause_mortality.rda")
# 04 Filter data ----------------------------------------------------------
# For homogeneous population example
df_all_cause_mortality_filt <- all_cause_mortality %>%
as_tibble() %>%
filter(Year == 2015)
# 05 Data wrangling -------------------------------------------------------
#* Following Lee & Wang (2013) - Statistical methods for survival data analysis
#* 4th ed - chapter 2: Functions of survival time
df_lifetable <- df_all_cause_mortality_filt %>%
dplyr::arrange(Sex, Year, Age) %>%
dplyr::group_by(Sex) %>%
dplyr::mutate(
H_t = cumsum(Rate), # H(t) - Cumulative hazard
S_t = exp(-H_t), # S(t) - Cumulative survival
F_t = 1 - exp(-H_t), # F(t) - Cumulative probability: 1 - S(t)
p_t = c(F_t[1], diff(F_t)) # f(t) - Instantaneous probability
) %>%
ungroup()
# Calculate life expectancy from lifetables data
df_le_lifetable <- df_lifetable %>%
group_by(Sex) %>%
summarise(le = sum(S_t))
# Obtain life expectancy from lifetables
le_lifetable_homog <- df_le_lifetable[df_le_lifetable$Sex == "Total", ]$le
# 06 Calculate life expectancy using nps method ---------------------------
# Filter to have homogeneous population
df_lifetable_homog <- df_lifetable %>%
filter(Sex == "Total")
# Set seed for reproducibility in random number generation
set.seed(n_seed)
#* Sample ages to death from a categorical sampling
v_cat_life_table_homog <- sample(x = df_lifetable_homog$Age,
size = n_samp_life_tables,
prob = df_lifetable_homog$p_t,
replace = TRUE)
#* Create vector with draws from a uniform distribution
v_unif_life_table_homog <- runif(n = n_samp_life_tables, min = 0, max = 1)
#* Add this vector to the categorical sampling outputs
v_cat_life_table_corr_homog <- v_cat_life_table_homog + v_unif_life_table_homog
#* Life expectancy without continuous time correction
le_homog_uncorr <- mean(v_cat_life_table_homog)
#* Life expectancy with correction
le_homog_corr <- mean(v_cat_life_table_corr_homog)
# Measure mean execution time
## Without continuous time correction
l_mbench_homog_uncorr <- microbenchmark::microbenchmark(
sample(x = df_lifetable_homog$Age,
size = n_samp_life_tables,
prob = df_lifetable_homog$p_t,
replace = TRUE),
times = n_samp_iter_life_tables,
unit = "ms")
## With continuous time correction
l_mbench_homog_corr <- microbenchmark::microbenchmark(
sample(x = df_lifetable_homog$Age,
size = n_samp_life_tables,
prob = df_lifetable_homog$p_t,
replace = TRUE) + runif(n = n_samp_life_tables, min = 0, max = 1),
times = n_samp_iter_life_tables,
unit = "ms")
# Remove seed
set.seed(NULL)
# 07 Summarize results ----------------------------------------------------
# Create dataframe with summarized results
df_summary <- tibble::tibble(
Source = c("Life tables - Homogeneous cohort"),
Exact = c(le_lifetable_homog),
NPS = c(le_homog_uncorr),
NPS_continous_time = c(le_homog_corr))
# Check results table
df_summary
# 08 Plotting -------------------------------------------------------------
#* Create dataset for plotting
df_lifetable_samp <- data.frame(
age_death = v_cat_life_table_corr_homog,
type = "NPS Homogeneous"
)
axis_text_size <- 14
axis_title_size <- 14
legend_text_size <- 14
legend_title_size <- 14
title_size <- 14
#* Generate comparison plot (Issues with plot quality)
ggplt_lifetable_comparison_homog <- ggplot(data = df_lifetable_samp,
mapping = aes(x = age_death)) +
geom_histogram(mapping = aes(y = after_stat(density),
color = "Sample from NPS"),
binwidth = 1,
position = "identity",
alpha = 0.5,
fill = NA,
# To define the location of the bins
boundary = 0,
closed = "left") +
geom_segment(data = df_lifetable_homog,
mapping = aes(x = Age, xend = Age + 1,
y = p_t, yend = p_t,
color = "Lifetables"),
linewidth = 1) +
scale_x_continuous(breaks = seq(0, 100, 10),
minor_breaks = seq(0, 100, 5)) +
labs(x = "Age",
y = "Probability of death") +
theme_bw() +
scale_color_manual(name = "Type",
values = c("Sample from NPS" = "skyblue",
"Lifetables" = "black")) +
theme(legend.position = "bottom",
axis.text = element_text(size = axis_text_size),
axis.title = element_text(size = axis_title_size),
legend.text = element_text(size = legend_text_size),
legend.title = element_text(size = legend_title_size),
plot.title = element_text(size = title_size - 4, hjust = 0.5),
plot.subtitle = element_text(size = title_size - 6, hjust = 0.5),
plot.caption = element_text(size = title_size - 8))