Skip to content

Commit

Permalink
add ctd, mesh, medgen; preserve source ids in merged nodes; move conf…
Browse files Browse the repository at this point in the history
…ig to yaml
  • Loading branch information
jamesamcl authored Oct 13, 2024
1 parent 0d6c6de commit 12d5c59
Show file tree
Hide file tree
Showing 119 changed files with 1,764 additions and 1,266 deletions.
30 changes: 30 additions & 0 deletions 00_fetch_data/ctd/download.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
#!/bin/bash

wget https://ctdbase.org/reports/CTD_anatomy.tsv.gz
wget https://ctdbase.org/reports/CTD_curated_genes_diseases.tsv.gz
wget https://ctdbase.org/reports/CTD_Disease-GO_biological_process_associations.tsv.gz
wget https://ctdbase.org/reports/CTD_Disease-GO_cellular_component_associations.tsv.gz
wget https://ctdbase.org/reports/CTD_Disease-GO_molecular_function_associations.tsv.gz
wget https://ctdbase.org/reports/CTD_exposure_events.tsv.gz
wget https://ctdbase.org/reports/CTD_exposure_studies.tsv.gz
wget https://ctdbase.org/reports/CTD_pheno_term_ixns.tsv.gz
wget https://ctdbase.org/reports/CTD_Phenotype-Disease_biological_process_associations.tsv.gz
wget https://ctdbase.org/reports/CTD_Phenotype-Disease_cellular_component_associations.tsv.gz
wget https://ctdbase.org/reports/CTD_Phenotype-Disease_molecular_function_associations.tsv.gz
wget https://ctdbase.org/reports/CTD_chemicals.tsv.gz
wget https://ctdbase.org/reports/CTD_chem_go_enriched.tsv.gz
wget https://ctdbase.org/reports/CTD_chemicals_diseases.tsv.gz
wget https://ctdbase.org/reports/CTD_chem_gene_ixns.tsv.gz
wget https://ctdbase.org/reports/CTD_chem_pathways_enriched.tsv.gz
wget https://ctdbase.org/reports/CTD_curated_cas_nbrs.tsv.gz
wget https://ctdbase.org/reports/CTD_diseases_pathways.tsv.gz
wget https://ctdbase.org/reports/CTD_UniProtToCTDIdMapping.txt.gz
wget https://ctdbase.org/reports/CTD_genes.tsv.gz
wget https://ctdbase.org/reports/CTD_genes_diseases.tsv.gz
wget https://ctdbase.org/reports/CTD_genes_pathways.tsv.gz
wget https://ctdbase.org/reports/CTD_pathways.tsv.gz


wget https://ctdbase.org/reports/CTD_chem_gene_ixn_types.obo
curl https://ctdbase.org/reports/CTD_diseases.obo.gz | gzip -d > CTD_diseases.obo

11 changes: 11 additions & 0 deletions 00_fetch_data/medgen/fetch.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
#!/bin/bash

rm -f medgen.obo medgen.owl medgen.obo.gz medgen.owl.gz

wget https://github.com/monarch-initiative/medgen/releases/download/2024-10-06/medgen.obo
robot convert --input medgen.obo --output medgen.owl

gzip -9 medgen.owl
rm -f medgen.obo


5 changes: 5 additions & 0 deletions 00_fetch_data/mesh/fetch.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
#!/bin/bash

wget https://nlmpubs.nlm.nih.gov/projects/mesh/rdf/2024/mesh2024.nt.gz
wget https://nlmpubs.nlm.nih.gov/projects/mesh/rdf/2024/vocabulary_1.0.0.ttl

6 changes: 0 additions & 6 deletions 01_ingest/grebi_ingest_kgx_edges/src/main.rs
Original file line number Diff line number Diff line change
Expand Up @@ -9,12 +9,6 @@ use serde_json::json;
#[command(author, version, about, long_about = None)]
struct Args {

#[arg(long)]
datasource_name: String,

#[arg(long)]
filename: String,

#[arg(long)]
kgx_rename_field:Option<Vec<String>>,

Expand Down
30 changes: 24 additions & 6 deletions 01_ingest/grebi_ingest_ols/src/main.rs
Original file line number Diff line number Diff line change
Expand Up @@ -17,9 +17,6 @@ struct Args {
#[arg(long)]
datasource_name: String,

#[arg(long)]
filename: String,

#[arg(long)]
ontologies:String,

Expand Down Expand Up @@ -236,12 +233,33 @@ fn read_entities(json: &mut JsonStreamReader<BufReader<StdinLock<'_>>>, output_n
continue;
}

let v = obj.get(k).unwrap();

output_nodes.write_all(r#","#.as_bytes()).unwrap();
output_nodes.write_all(r#"""#.as_bytes()).unwrap();
output_nodes.write_all(k.as_bytes()).unwrap();// already reprefixed on load
output_nodes.write_all(r#"":"#.as_bytes()).unwrap();

let v = obj.get(k).unwrap();
if k.eq("ols:relatedFrom") || k.eq("ols:relatedTo") {
let vals = {
if v.is_array() {
v.as_array().unwrap().iter().map(|x| x).collect()
} else {
vec!(v)
}
};
for related in vals {
let v_as_obj = related.as_object().unwrap();
let pred = v_as_obj.get("http://www.w3.org/2002/07/owl#onProperty");
if pred.is_some() {
output_nodes.write_all(pred.unwrap().as_str().unwrap().as_bytes()).unwrap();
} else {
output_nodes.write_all(k.as_bytes()).unwrap();
}
}
} else {
output_nodes.write_all(k.as_bytes()).unwrap();
}

output_nodes.write_all(r#"":"#.as_bytes()).unwrap();

output_nodes.write_all(r#"["#.as_bytes()).unwrap();
if v.is_array() {
Expand Down
13 changes: 0 additions & 13 deletions 01_ingest/grebi_ingest_reactome/src/main.rs
Original file line number Diff line number Diff line change
Expand Up @@ -10,21 +10,8 @@ use grebi_shared::prefix_map::PrefixMapBuilder;
use serde_json::json;
use serde_json::Value;

#[derive(clap::Parser, Debug)]
#[command(author, version, about, long_about = None)]
struct Args {

#[arg(long)]
datasource_name: String,

#[arg(long)]
filename: String,
}

fn main() {

let args = Args::parse();

let stdin = io::stdin().lock();
let mut reader = BufReader::new(stdin);

Expand Down
15 changes: 0 additions & 15 deletions 01_ingest/grebi_ingest_sssom/src/main.rs
Original file line number Diff line number Diff line change
Expand Up @@ -9,26 +9,11 @@ use grebi_shared::prefix_map::PrefixMapBuilder;
use serde_json::json;
use serde_yaml;

#[derive(clap::Parser, Debug)]
#[command(author, version, about, long_about = None)]
struct Args {

#[arg(long)]
datasource_name: String,

#[arg(long)]
filename: String,
}

fn main() {

let args = Args::parse();

let stdin = io::stdin().lock();
let mut reader = BufReader::new(stdin);

let datasource_name = args.datasource_name.as_str();

let stdout = io::stdout().lock();
let mut output_nodes = BufWriter::new(stdout);

Expand Down
114 changes: 0 additions & 114 deletions 01_ingest/grebi_ingest_tsv/src/main.rs

This file was deleted.

Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
[package]
name = "grebi_ingest_json"
name = "grebi_nodes2edges"
version = "0.1.0"
edition = "2021"

Expand All @@ -10,3 +10,4 @@ serde_json = { version = "1.0.108", features=["preserve_order"] }
struson = "0.3.0"
jemallocator = "0.5.4"


Loading

0 comments on commit 12d5c59

Please sign in to comment.