-
Notifications
You must be signed in to change notification settings - Fork 0
/
2_scramble.R
47 lines (43 loc) · 2.75 KB
/
2_scramble.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
# 2_scramble.R
# scramble times for initial analysis
# July 2019
set.seed(1234)
## scramble data using probability distributions
# a) reviewers
reviewer = mutate(reviewer,
local.date = as.Date('2012-01-02') + round(runif(nrow(reviewer), min=0, max=365.25*7)), # scramble dates to avoid trends
holiday = rbinom(n= nrow(reviewer), size=1, prob=0.02), # rarer
holiday = factor(holiday, levels=0:1, labels=c('No','Yes')),
late.night = rbinom(n= nrow(reviewer), size=1, prob=0.4),
late.night = factor(late.night, levels=0:1, labels=c('No','Yes')),
weekend = rbinom(n= nrow(reviewer), size=1, prob=1/7),
weekend = factor(weekend, levels=0:1, labels=c('Weekday', 'Weekend')),
local.hour = runif(n = nrow(reviewer), min=0, max=23.99)) %>%
select(-window) # remove window
## re-calculate windows
windows = data.frame(local.date = seq(min(reviewer$local.date), max(reviewer$local.date), 1)) %>% # range of observed dates
mutate(monday = weekdays(local.date) == 'Monday', # new windows start on Monday
window = cumsum(monday)) %>%
filter(window > 0, window < max(window)) %>% # remove first and last windows that do not contain full weeks
select(-monday) # no longer needed
reviewer = left_join(windows, reviewer, by='local.date') %>%
filter(!is.na(journal)) # remove one day with no reviewer submissions
# b) authors
submission = mutate(submission,
local.date = as.Date('2012-01-02') + round(runif(nrow(submission), min=0, max=365.25*7)), # scramble dates to avoid trends
holiday = rbinom(n= nrow(submission), size=1, prob=0.02), # rarer
holiday = factor(holiday, levels=0:1, labels=c('No','Yes')),
late.night = rbinom(n= nrow(submission), size=1, prob=0.4),
late.night = factor(late.night, levels=0:1, labels=c('No','Yes')),
weekend = rbinom(n= nrow(submission), size=1, prob=1/7),
weekend = factor(weekend, levels=0:1, labels=c('Weekday', 'Weekend')),
local.hour = runif(n = nrow(submission), min=0, max=23.99)) %>%
select(-window) # remove window
## re-calculate windows
windows = data.frame(local.date = seq(min(submission$local.date), max(submission$local.date), 1)) %>% # range of observed dates
mutate(monday = weekdays(local.date) == 'Monday', # new windows start on Monday
window = cumsum(monday)) %>%
filter(window > 0, window < max(window)) %>% # remove first and last windows that do not contain full weeks
select(-monday) # no longer needed
submission = left_join(windows, submission, by='local.date') %>%
filter(!is.na(journal)) # remove one day with no reviewer submissions