Skip to content

Commit

Permalink
update
Browse files Browse the repository at this point in the history
  • Loading branch information
gcsharp committed Jul 9, 2023
1 parent 4658a28 commit c6b2ff0
Show file tree
Hide file tree
Showing 2 changed files with 35 additions and 11 deletions.
33 changes: 32 additions & 1 deletion combine_clean_meta_results.R
Original file line number Diff line number Diff line change
Expand Up @@ -66,13 +66,44 @@ numeric_cols <-c(c("est","se","p","q","hetp","i2","h2"),
combined_cleaned[,numeric_cols] <- apply(combined_cleaned[,numeric_cols],2,as.numeric)


#tidying up
combined_cleaned$total_n_exposure[combined_cleaned$total_n_exposure==0]<-NA
combined_cleaned$total_n_outcome[combined_cleaned$total_n_outcome==0]<-NA

# save
saveRDS(combined_cleaned,"University of Bristol/grp-EPoCH - Documents/EPoCH GitHub/all_results.rds")

# remove results related to physical activity or diet (for app)
combined_cleaned <- combined_cleaned[-which(combined_cleaned$exposure_class %in% c("physical activity","diet")),]
# and results for perinatal survival outcomes
combined_cleaned <- combined_cleaned[-which(combined_cleaned$outcome_class %in% c("perinatal survival")),]

# QC of cohort results (remove anything where there's evidence of the model not converging, probably because a single explanatory variable (exposure or covariate), uniquely identifies the outcome, i.e. perfect prediction/complete separation.)
## evidence predicted by a very high effect estimate combined with a large P-value

df <- combined_cleaned
df_bin <- df[df$outcome_type=="binary",]
cutoff_bin <- quantile(abs(df_bin$est),probs = 0.99,na.rm = T)
df_cont <- df[df$outcome_type=="continuous",]
cutoff_cont <- quantile(abs(df_cont$est),probs = 0.99,na.rm = T)
df_cleaned <- df[which((df$outcome_type=="binary"& abs(df$est)<=cutoff_bin)|
(df$outcome_type=="continuous"& abs(df$est)<=cutoff_cont)), ]
lost_exposures <- unique(df$exposure_linker[(df$exposure_linker %in% df_cleaned$exposure_linker)==F])
lost_outcomes <- unique(df$outcome_linker[(df$outcome_linker %in% df_cleaned$outcome_linker)==F])

original_associations <-paste(df$exposure_linker,df$outcome_linker)
cleaned_associations <-paste(df_cleaned$exposure_linker,df_cleaned$outcome_linker)
lost_associations <- unique(original_associations[(original_associations %in% cleaned_associations)==F])

#drop dustmite, autism, and insect allergy because very small Ns leading to implausible effect estimates and SEs:
combined_cleaned <- combined_cleaned[-which(combined_cleaned$outcome_subclass2 %in% c("dustmite allergy","insect allergy","autism")),]
#drop associations where very large SEs mean result is unreliable
cutoff <- quantile(combined_cleaned$se,probs = 0.999)
print(cutoff)
combined_cleaned2 <- combined_cleaned[combined_cleaned$se<cutoff,]

saveRDS(combined_cleaned,"~/University of Bristol/grp-EPoCH - Documents/EPoCH GitHub/all_results_reduced.rds")
saveRDS(combined_cleaned,"/Users/gs8094/Library/CloudStorage/OneDrive-UniversityofExeter/Projects/EPoCH/EPoCH results app/rds/all_results_reduced.rds")
saveRDS(combined_cleaned,"/Users/gs8094/Library/CloudStorage/OneDrive-UniversityofExeter/Projects/EPoCH/EPoCH results app/data/rds/all_results_reduced.rds")



13 changes: 3 additions & 10 deletions meta_analysis/RUN_META_ANALYSIS.R
Original file line number Diff line number Diff line change
Expand Up @@ -44,15 +44,11 @@ cohort_phewas <- lapply(1:length(cohorts),function(x){
res <- readRDS(paste0(location_of_phewas_res,cohorts[x],"_",model,"_phewas.rds"))
res <- res[,c("exposure","regression_term","outcome","est","se","p","n","exposure_n","outcome_n")]
res <- res[which(res$regression_term!="error"),] # can remove this once we have sorted the issue with MCS
# if(cohorts[x]=="BIB_ALL"){
# res <- res[-grep("bmi_stage0_zscore|bmi_stage1_zscore|bmi_stage2_zscore|bmi_stage3_zscore|bmi_stage4_zscore",res$outcome),] # can remove this once we have sorted the issue with BIB
# }
# if(cohorts[x]=="MOBA"){
# res <- res[-grep("cbcl|autism|aggression",res$outcome),] # can remove this once we have sorted the issue with MOBA
# res <- res[-grep("phys",res$exposure),] # can remove this once we have sorted the issue with MOBA
# }
res$cohort <- key_cohorts[x]
key <- readRDS(paste0(location_of_key,tolower(key_cohorts[x]),"_key.rds"))
#just tidying up a bit due to (accidental) differences in the make_key process for MoBa and the other cohorts - but actually this column isn't needed for the meta-analysis
key$exposure_source <-"reported by self or study mother"
key$exposure_linker<-str_replace(key$exposure_linker,pattern="self-reported|reported by self or study mother|self-reported or measured","reported by self or study mother")
res <- merge(res,key,by=c("exposure","outcome"),all.y=F)
res$exposure_dose <-NA
res$exposure_dose[grep("Heavy",res$regression_term)]<-"heavy"
Expand All @@ -71,9 +67,6 @@ print("combining cohort results in long format...")

all_cohort_phewas_long <- bind_rows(cohort_phewas)

#just tidying up a bit due to (accidental) differences in the make_key process for MoBa and the other cohorts - but actually this column isn't needed for the meta-analysis
all_cohort_phewas_long$exposure_source <-NA

print("combining cohort results in wide format...")

all_cohort_phewas_wide <- pivot_wider(all_cohort_phewas_long,
Expand Down

0 comments on commit c6b2ff0

Please sign in to comment.