From 37effaa2794f6e45e68782e64b8442428a3298c6 Mon Sep 17 00:00:00 2001 From: James McLaughlin Date: Wed, 11 Sep 2024 17:12:00 +0100 Subject: [PATCH] update and add more sssom mappings --- 00_fetch_data/sssom/fetch.sh | 8 ++++++++ 01_ingest/grebi_ingest_sssom/src/main.rs | 13 +++++++------ .../datasource_configs/{mp_hp.json => sssom.json} | 4 ++-- configs/subgraph_configs/ebi_full_monarch.json | 3 +-- configs/subgraph_configs/hett.json | 1 - configs/subgraph_configs/hra_kg.json | 1 - configs/subgraph_configs/monarch.json | 1 - grebi_shared/src/prefix_map.rs | 2 +- 8 files changed, 19 insertions(+), 14 deletions(-) create mode 100755 00_fetch_data/sssom/fetch.sh rename configs/datasource_configs/{mp_hp.json => sssom.json} (73%) diff --git a/00_fetch_data/sssom/fetch.sh b/00_fetch_data/sssom/fetch.sh new file mode 100755 index 0000000..c0b2882 --- /dev/null +++ b/00_fetch_data/sssom/fetch.sh @@ -0,0 +1,8 @@ +#!/bin/bash + +rm -f *.sssom.tsv.gz + +curl -L https://data.monarchinitiative.org/mappings/latest/upheno_custom.sssom.tsv | gzip > upheno_custom.sssom.tsv.gz +curl -L https://raw.githubusercontent.com/mapping-commons/mh_mapping_initiative/master/mappings/mp_hp_mgi_all.sssom.tsv | gzip > mp_hp_mgi_all.sssom.tsv.gz +curl -L https://raw.githubusercontent.com/obophenotype/bio-attribute-ontology/master/src/mappings/oba-efo.sssom.tsv | gzip > oba-efo.sssom.tsv.gz +curl -L https://raw.githubusercontent.com/obophenotype/bio-attribute-ontology/master/src/mappings/oba-vt.sssom.tsv | gzip > oba-vt.sssom.tsv.gz diff --git a/01_ingest/grebi_ingest_sssom/src/main.rs b/01_ingest/grebi_ingest_sssom/src/main.rs index c7cb96c..921b892 100644 --- a/01_ingest/grebi_ingest_sssom/src/main.rs +++ b/01_ingest/grebi_ingest_sssom/src/main.rs @@ -52,14 +52,15 @@ fn main() { return line.trim_start_matches("#").to_string(); }).collect::>().join("\n"); - let yaml_header:serde_yaml::Value = serde_yaml::from_str::(yaml.as_str()).unwrap(); - - let yaml_header_curie_map = yaml_header.get("curie_map").unwrap().as_mapping().unwrap(); - + let expand:PrefixMap = { let mut builder = PrefixMapBuilder::new(); - for (k, v) in yaml_header_curie_map { - builder.add_mapping(k.as_str().unwrap().to_string() + ":", v.as_str().unwrap().to_string()); + let yaml_header:serde_yaml::Value = serde_yaml::from_str::(yaml.as_str()).unwrap(); + let yaml_header_curie_map = yaml_header.get("curie_map"); + if yaml_header_curie_map.is_some() { + for (k, v) in yaml_header_curie_map.unwrap().as_mapping().unwrap() { + builder.add_mapping(k.as_str().unwrap().to_string() + ":", v.as_str().unwrap().to_string()); + } } builder.build() }; diff --git a/configs/datasource_configs/mp_hp.json b/configs/datasource_configs/sssom.json similarity index 73% rename from configs/datasource_configs/mp_hp.json rename to configs/datasource_configs/sssom.json index 17b5a21..0e8b206 100644 --- a/configs/datasource_configs/mp_hp.json +++ b/configs/datasource_configs/sssom.json @@ -1,10 +1,10 @@ { - "name": "MP_HP", + "name": "SSSOM", "enabled": true, "ingests": [ { "ingest_files": [ - "./00_fetch_data/sssom/mphp.sssom.tsv" + "./00_fetch_data/sssom/*.tsv.gz" ], "ingest_script": "./target/release/grebi_ingest_sssom", "ingest_args": [] diff --git a/configs/subgraph_configs/ebi_full_monarch.json b/configs/subgraph_configs/ebi_full_monarch.json index 0681854..3acebad 100644 --- a/configs/subgraph_configs/ebi_full_monarch.json +++ b/configs/subgraph_configs/ebi_full_monarch.json @@ -19,7 +19,6 @@ "skos:exactMatch", "ncit:P368", "ncit:C98965", - "cheminf:000407", "dcterms:identifier", "oboinowl:hasAlternativeId", "semapv:crossSpeciesExactMatch" @@ -65,7 +64,7 @@ "./configs/datasource_configs/gwas.json", "./configs/datasource_configs/hgnc.json", "./configs/datasource_configs/impc.json", - "./configs/datasource_configs/mp_hp.json", + "./configs/datasource_configs/sssom.json", "./configs/datasource_configs/ols.json", "./configs/datasource_configs/reactome.json", "./configs/datasource_configs/ubergraph.json", diff --git a/configs/subgraph_configs/hett.json b/configs/subgraph_configs/hett.json index c8cb4e5..c15cb01 100644 --- a/configs/subgraph_configs/hett.json +++ b/configs/subgraph_configs/hett.json @@ -20,7 +20,6 @@ "skos:exactMatch", "ncit:P368", "ncit:C98965", - "cheminf:000407", "dcterms:identifier", "oboinowl:hasAlternativeId" ], diff --git a/configs/subgraph_configs/hra_kg.json b/configs/subgraph_configs/hra_kg.json index fad9ab4..6d680ad 100644 --- a/configs/subgraph_configs/hra_kg.json +++ b/configs/subgraph_configs/hra_kg.json @@ -20,7 +20,6 @@ "skos:exactMatch", "ncit:P368", "ncit:C98965", - "cheminf:000407", "dcterms:identifier", "oboinowl:hasAlternativeId" ], diff --git a/configs/subgraph_configs/monarch.json b/configs/subgraph_configs/monarch.json index 6541ec2..5e99593 100644 --- a/configs/subgraph_configs/monarch.json +++ b/configs/subgraph_configs/monarch.json @@ -20,7 +20,6 @@ "skos:exactMatch", "ncit:P368", "ncit:C98965", - "cheminf:000407", "dcterms:identifier", "oboinowl:hasAlternativeId" ], diff --git a/grebi_shared/src/prefix_map.rs b/grebi_shared/src/prefix_map.rs index 2c060cc..2108ca1 100644 --- a/grebi_shared/src/prefix_map.rs +++ b/grebi_shared/src/prefix_map.rs @@ -129,7 +129,7 @@ Node { #[inline(always)] fn reprefix_impl<'a>(subject:&[u8], buf:&[u8]) -> Option> { - if subject.len() == 0 { + if subject.len() == 0 || buf.len() == 0 { return None; }