Skip to content

Commit

Permalink
Changing to implicates = 1 in prepare() to avoid potential over-fitti…
Browse files Browse the repository at this point in the history
…ng issues
  • Loading branch information
kar01123 committed Feb 12, 2024
1 parent 02b567e commit a5dc917
Show file tree
Hide file tree
Showing 6 changed files with 15 additions and 13 deletions.
10 changes: 5 additions & 5 deletions fusion/AHS/2019/2019/input/AHS_2019_2019_input.R
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@ library(fusionData)
library(fusionModel)

# Number of cores to use
ncores <- 1
ncores <- 3

# Donor and recipient survey identifiers
donor <- "AHS_2019"
Expand All @@ -19,7 +19,7 @@ dir <- paste("fusion", sub("_", "/", donor), acs.vintage, "input", sep = "/")
prep <- prepare(donor = donor,
recipient = recipient,
respondent = "household",
implicates = 5)
implicates = 1)

# Specify fusion variables to be retained in harmonization results
# Removed pca for prep
Expand Down Expand Up @@ -73,14 +73,14 @@ saveRDS(prep, file = file.path(dir, paste(donor, acs.vintage, "prep.rds", sep =
pred.vars <- attr(prep, "xpredictors")

# Set cores for 'fst' to use when writing to disk
threads_fst(ncores)
fst::threads_fst(ncores)

# Save training data to disk
data[[1]] %>%
select(one_of(c("weight", unlist(prep$y), pred.vars))) %>%
write_fst(path = file.path(dir, paste(donor, acs.vintage, "train.fst", sep = "_")), compress = 100)
fst::write_fst(path = file.path(dir, paste(donor, acs.vintage, "train.fst", sep = "_")), compress = 100)

# Save prediction data to disk
data[[2]] %>%
select(one_of(pred.vars)) %>%
write_fst(path = file.path(dir, paste(donor, acs.vintage, "predict.fst", sep = "_")), compress = 100)
fst::write_fst(path = file.path(dir, paste(donor, acs.vintage, "predict.fst", sep = "_")), compress = 100)
4 changes: 2 additions & 2 deletions fusion/CEI/2015-2019/2019/input/CEI_2015-2019_2019_input.R
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@ library(fusionData)
library(fusionModel)
library(tidyverse)
library(fst)

library(data.table)


# Number of cores to use
Expand Down Expand Up @@ -34,7 +34,7 @@ fusion.vars <- googlesheets4::read_sheet("13GRKkVZXapHtP7oK1WUh0Yu7OQ_9icd17wUGu
prep <- prepare(donor = "CEI_2015-2019",
recipient = "ACS_2019",
respondent = "household",
implicates = 5)
implicates = 1)

data <- assemble(prep,
fusion.variables = fusion.vars,
Expand Down
2 changes: 1 addition & 1 deletion fusion/NHTS/2017/2015/input/NHTS_2017_2015_input.R
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@ dir <- paste("fusion", sub("_", "/", donor), acs.vintage, "input", sep = "/")
prep <- prepare(donor = donor,
recipient = recipient,
respondent = "household",
implicates = 5)
implicates = 1)

# Specify fusion variables to be retained in harmonization results
data <- assemble(prep,
Expand Down
8 changes: 5 additions & 3 deletions fusion/NHTS/2017/2017/input/NHTS_2017_2017_input.R
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
library(fusionData)
library(fusionModel)

library(fst)

# Number of cores to use
ncores <- 3

Expand All @@ -19,11 +20,12 @@ dir <- paste("fusion", sub("_", "/", donor), acs.vintage, "input", sep = "/")
prep <- prepare(donor = donor,
recipient = recipient,
respondent = "household",
implicates = 5)
implicates = 1)

# Specify fusion variables to be retained in harmonization results
data <- assemble(prep,
fusion.variables = c('bike2save','walk2save','price','place','gstotcst','ptrans'),
fusion.variables = c('bike2save','walk2save',
'price','place','gstotcst','ptrans'),
window = 2)

rm(prep)
Expand Down
2 changes: 1 addition & 1 deletion fusion/RECS/2015/2015/input/RECS_2015_2015_input.R
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@ dir <- paste("fusion", sub("_", "/", donor), acs.vintage, "input", sep = "/")
prep <- prepare(donor = donor,
recipient = recipient,
respondent = "household",
implicates = 5)
implicates = 1)

# Specify fusion variables to be retained in harmonization results
# Removed pca for prep
Expand Down
2 changes: 1 addition & 1 deletion fusion/RECS/2015/2015/output/RECS_2015_2015_output.R
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@ dir <- "fusion/RECS/2015/2015/input"
ncores <- 1

# Number of implicates to generate
nimps <- 30
nimps <- 40

# Output files path stub
out.path <- file.path(sub("input", "output", dir), sub("train.fst$", "", list.files(dir, "train\\.fst$")))
Expand Down

0 comments on commit a5dc917

Please sign in to comment.