-
Notifications
You must be signed in to change notification settings - Fork 1
/
SearchviaCombinedOntology.Rmd
91 lines (75 loc) · 3.05 KB
/
SearchviaCombinedOntology.Rmd
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
---
title: "CrossOntology Term Parser"
author: "Krishna Udaiwal"
date: "12/11/2020"
output: html_document
---
```{r setup, include=FALSE}
# POST form request to API
library(RCurl)
# Parse XML response from API
library(XML)
# Parse JSON data
library(jsonlite)
# Data manipulation
library(tidyverse)
# Instructions on getting an API key are at http://www.bioontology.org/wiki/index.php/Annotator_User_Guide#Annotator_Web_service_Validation
# Without inputing an API key, the script will fail, but you can continue from Line 61, where the results are used from an input file
yourAPIKey <- "ENTER_API_KEY_HERE"
searchTerm <- "Neurofibroma"
```
```{r functions}
getfullterm <- function(link, format = "json"){
if(format=="xml"){
httpGET(paste(link, "?apikey=",yourAPIKey, "&include=all", "&format=xml", sep = "")) %>% xmlParse()
} else(httpGET(paste(link, "?apikey=",yourAPIKey, "&include=all", sep = "")) %>% fromJSON())
}
# Retrieved function cat_lists: https://stackoverflow.com/a/57171812
cat_lists <- function(list1, list2) {
keys <- unique(c(names(list1), names(list2)))
map2(list1[keys], list2[keys], c) %>%
set_names(keys)
}
listunique <- function(list){
unique(unlist(list))
}
```
```{r}
res.search <- postForm('http://data.bioontology.org/search',
q = searchTerm,
also_search_obsolete=FALSE ,
inlcude="all",
pagesize=1000,
apikey = yourAPIKey
)
res.json <- fromJSON(res.search)$collection %>%
filter(ontologyType == "ONTOLOGY") %>%
select(-obsolete, -ontologyType, -provisional, -`@context`, -cui, -semanticType)
res.json$links$`@context` <- NULL
```
```{r Class Terms GET}
if(file.exists("input/Searchtoclasses_json.RData")){
load("input/Searchtoclasses_json.RData")
} else{
rdfs <- lapply(res.json$links$self, getfullterm)
}
```
```{r Combining Data based on Single specific term assumption}
combined_output <- reduce(rdfs, cat_lists)
combined_output.parents <- reduce(combined_output$parents, cat_lists)
combined_output$parents <- NULL
cols <- c("subClassOf", "definition", "synonym", "cui", "semanticType", "label")
combined_output[cols] <- lapply(combined_output[cols], listunique)
combined_output.parents[cols] <- lapply(combined_output.parents[cols], listunique)
# complex variables here consist of non-specific and specific relationary terms belonging to the term superclass, and thus link to input information per unique variable data input is a future work-in-process.
complex_cols <- c("properties", "links", "@context")
combined_output.complex <- combined_output[complex_cols]
combined_output[complex_cols] <- NULL
combined_output <- lapply(combined_output, unique)
combined_output$id <- unique(combined_output$id)
combined_output$prefLabel <- unique(combined_output$prefLabel)
```
```{r}
lapply(combined_output, function(x) write.table(data.frame(x), "output/combined_output_.csv", append= T, sep=',' ))
saveRDS(combined_output.complex, file = "output/combined_output.complex.RData")
```