update

ammegandchips · Jul 9, 2023 · c6b2ff0 · c6b2ff0
1 parent 4658a28
commit c6b2ff0
Show file tree

Hide file tree

Showing 2 changed files with 35 additions and 11 deletions.
diff --git a/combine_clean_meta_results.R b/combine_clean_meta_results.R
@@ -66,13 +66,44 @@ numeric_cols <-c(c("est","se","p","q","hetp","i2","h2"),
 combined_cleaned[,numeric_cols] <- apply(combined_cleaned[,numeric_cols],2,as.numeric)
 
 
+#tidying up
+combined_cleaned$total_n_exposure[combined_cleaned$total_n_exposure==0]<-NA
+combined_cleaned$total_n_outcome[combined_cleaned$total_n_outcome==0]<-NA
+
 # save
 saveRDS(combined_cleaned,"University of Bristol/grp-EPoCH - Documents/EPoCH GitHub/all_results.rds")
 
 # remove results related to physical activity or diet (for app)
 combined_cleaned <- combined_cleaned[-which(combined_cleaned$exposure_class %in% c("physical activity","diet")),]
+# and results for perinatal survival outcomes
+combined_cleaned <- combined_cleaned[-which(combined_cleaned$outcome_class %in% c("perinatal survival")),]
+
+# QC of cohort results (remove anything where there's evidence of the model not converging, probably because a single explanatory variable (exposure or covariate), uniquely identifies the outcome, i.e. perfect prediction/complete separation.)
+## evidence predicted by a very high effect estimate combined with a large P-value
+
+  df <- combined_cleaned
+  df_bin <- df[df$outcome_type=="binary",]
+  cutoff_bin <- quantile(abs(df_bin$est),probs = 0.99,na.rm = T)
+  df_cont <- df[df$outcome_type=="continuous",]
+  cutoff_cont <- quantile(abs(df_cont$est),probs = 0.99,na.rm = T)
+  df_cleaned <- df[which((df$outcome_type=="binary"& abs(df$est)<=cutoff_bin)|
+                     (df$outcome_type=="continuous"& abs(df$est)<=cutoff_cont)), ]
+  lost_exposures <- unique(df$exposure_linker[(df$exposure_linker %in% df_cleaned$exposure_linker)==F])
+  lost_outcomes <- unique(df$outcome_linker[(df$outcome_linker %in% df_cleaned$outcome_linker)==F])
+
+  original_associations <-paste(df$exposure_linker,df$outcome_linker)
+  cleaned_associations <-paste(df_cleaned$exposure_linker,df_cleaned$outcome_linker)
+  lost_associations <- unique(original_associations[(original_associations %in% cleaned_associations)==F])
+
+#drop dustmite, autism, and insect allergy because very small Ns leading to implausible effect estimates and SEs:
+combined_cleaned <- combined_cleaned[-which(combined_cleaned$outcome_subclass2 %in% c("dustmite allergy","insect allergy","autism")),]
+#drop associations where very large SEs mean result is unreliable
+cutoff <- quantile(combined_cleaned$se,probs = 0.999)
+print(cutoff)
+combined_cleaned2 <- combined_cleaned[combined_cleaned$se<cutoff,]
+
 saveRDS(combined_cleaned,"~/University of Bristol/grp-EPoCH - Documents/EPoCH GitHub/all_results_reduced.rds")
-saveRDS(combined_cleaned,"/Users/gs8094/Library/CloudStorage/OneDrive-UniversityofExeter/Projects/EPoCH/EPoCH results app/rds/all_results_reduced.rds")
+saveRDS(combined_cleaned,"/Users/gs8094/Library/CloudStorage/OneDrive-UniversityofExeter/Projects/EPoCH/EPoCH results app/data/rds/all_results_reduced.rds")
 
 
 
diff --git a/meta_analysis/RUN_META_ANALYSIS.R b/meta_analysis/RUN_META_ANALYSIS.R
@@ -44,15 +44,11 @@ cohort_phewas <- lapply(1:length(cohorts),function(x){
   res <- readRDS(paste0(location_of_phewas_res,cohorts[x],"_",model,"_phewas.rds"))
   res <- res[,c("exposure","regression_term","outcome","est","se","p","n","exposure_n","outcome_n")]
   res <- res[which(res$regression_term!="error"),] # can remove this once we have sorted the issue with MCS
-#  if(cohorts[x]=="BIB_ALL"){
-#  res <- res[-grep("bmi_stage0_zscore|bmi_stage1_zscore|bmi_stage2_zscore|bmi_stage3_zscore|bmi_stage4_zscore",res$outcome),] # can remove this once we have sorted the issue with BIB  
-#  }
-    # if(cohorts[x]=="MOBA"){
-    # res <- res[-grep("cbcl|autism|aggression",res$outcome),] # can remove this once we have sorted the issue with MOBA 
-    # res <- res[-grep("phys",res$exposure),] # can remove this once we have sorted the issue with MOBA 
-    #  }
   res$cohort <- key_cohorts[x]
   key <- readRDS(paste0(location_of_key,tolower(key_cohorts[x]),"_key.rds"))
+  #just tidying up a bit due to (accidental) differences in the make_key process for MoBa and the other cohorts - but actually this column isn't needed for the meta-analysis
+  key$exposure_source <-"reported by self or study mother"
+  key$exposure_linker<-str_replace(key$exposure_linker,pattern="self-reported|reported by self or study mother|self-reported or measured","reported by self or study mother")
   res <- merge(res,key,by=c("exposure","outcome"),all.y=F)
   res$exposure_dose <-NA
   res$exposure_dose[grep("Heavy",res$regression_term)]<-"heavy"
@@ -71,9 +67,6 @@ print("combining cohort results in long format...")
 
 all_cohort_phewas_long <- bind_rows(cohort_phewas)
 
-#just tidying up a bit due to (accidental) differences in the make_key process for MoBa and the other cohorts - but actually this column isn't needed for the meta-analysis
-all_cohort_phewas_long$exposure_source <-NA
-
 print("combining cohort results in wide format...")
 
 all_cohort_phewas_wide <- pivot_wider(all_cohort_phewas_long,