-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathCtReader.R
210 lines (184 loc) · 6.54 KB
/
CtReader.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
### Functions to read and get CT
# Currently available CT Versions
CTVersions <- c(
"2011-06-10",
"2011-07-22",
"2011-12-09",
"2012-01-02",
"2012-03-23",
"2012-08-03",
"2012-12-21",
"2013-04-12",
"2013-06-28",
"2013-10-04",
"2013-12-20",
"2014-03-28",
"2014-06-27",
"2014-09-26",
"2014-12-19",
"2015-03-27",
"2015-06-26",
"2015-09-25",
"2015-12-18",
"2016-03-25",
"2016-06-24",
"2016-09-30",
"2016-12-16",
"2017-03-31",
"2017-06-30",
"2017-09-29",
"2017-12-22",
"2018-03-30",
"2018-06-29",
"2018-09-28",
"2018-12-21",
"2019-03-29",
"2019-06-28",
"2019-09-27",
"2019-12-20",
"2020-03-27",
"2020-05-08",
"2020-06-26",
"2020-09-25",
"2020-11-06",
"2020-12-18",
"2021-03-26",
"2021-06-25"
)
## read worksheet by first downloading a file
readWorksheetFromURL <- function(aLocation,aName,aSheet) {
subdir <- "downloads"
createOutputDirectory(sourceDir,subdir)
aTarget <- paste(sourceDir,subdir,aName,sep="/")
aURL <- paste(aLocation,aName,sep="/")
# get file if not aleady downloaded
if (!file.exists(aTarget)) {
download.file(aURL,aTarget ,mode = "wb")
}
readWorksheetFromFile(aTarget,aSheet)
}
## Read in CT file, This should only be called from the getCT function.
importCT <- function(version) {
CTDownloadsDir <- paste0(sourceDir, "/downloads/CT/")
if(file.exists(paste0(CTDownloadsDir, version, ".xls"))) {
print(paste0("CT Loading... from ",CTDownloadsDir, version, ".xls"))
df <- readWorksheet(loadWorkbook(paste0(CTDownloadsDir, version, ".xls")), sheet = paste0("SEND Terminology ", version))
} else {
print("CT Downloading...")
# Switch function to determine version
# Create directory if not there
createOutputDirectory(sourceDir,"downloads")
createOutputDirectory(paste0(sourceDir,"/downloads"),"CT")
# Reads directly from the NCI location
base <- "https://evs.nci.nih.gov/ftp1/CDISC/SEND/Archive/"
path <- paste0(base, "SEND%20Terminology%20", version, ".xls")
print(paste0(CTDownloadsDir, version))
CTxl <- paste0(CTDownloadsDir, version, ".xls")
print(paste0("CT Downloading the file... ",path))
GET(path, write_disk(CTxl),timeout(20))
df <- readWorksheet(loadWorkbook(CTxl), sheet = paste0("SEND Terminology ", version))
}
# Attribute used to determine if user changes CT version.
attr(df, "version") <- version
df
}
# Return CT filtered dataframe, if in parenthesis is the submission value to translate to a codelist name
getCTDF <<- function(codelist, version) {
# If CT hasn't been loaded in already, superassign to parent environment
if(!exists("CTdf") || !(attr(CTdf, "version") == version)) CTdf <<- importCT(version)
# Remove parenthesis
parenthesisLoc <- gregexpr(codelist,pattern="[(]")[[1]][1]
if (parenthesisLoc==1) {
# starts with a parentheses, so is the code name for a codelist, remove it and find its name
aValue <- substr(codelist,parenthesisLoc+1,nchar(codelist)-1)
# find name from submission value
codelist <- CTdf[(toupper(CTdf$CDISC.Submission.Value) == toupper(aValue)),]$Codelist.Name[1]
}
# Return the reqested codelist as a character vector, remove the codelist header row.
CTdf[(toupper(CTdf$Codelist.Name) == toupper(codelist)) &
!(is.na(CTdf$Codelist.Code)),]
}
# return a random result from a code list
CTRandomName <<- function(nameList) {
aSet <- getCTDF(nameList,gCTVersion)
aRow <- aSet[sample(nrow(aSet), 1), ]
# return the name
aRow$CDISC.Submission.Value[1]
}
# return the name given a CT Code number
CTSearchOnCode <<- function(nameList,aCode) {
# print(paste("trying last test code",aCode,nameList))
aSet <- getCTDF(nameList,gCTVersion)
# print(paste("tring last test code",aSet))
aSet[aSet$Code==aCode,]$CDISC.Submission.Value[1]
}
# return the code number given a CT name
CTSearchOnName <<- function(nameList,aName) {
aSet <<- getCTDF(nameList,gCTVersion)
aSet[aSet$CDISC.Name==aName,]$Codelist.Code[1]
}
# return the code number given a CT short name (submission value)
CTSearchOnShortName <<- function(nameList,aName) {
# print(paste("Retrieving CT code for:",nameList,aName))
aSet <<- getCTDF(nameList,gCTVersion)
aSet[aSet$CDISC.Submission.Value==aName,]$Code[1]
}
# get the list for use for a column from the SENDIG
getCodeList <- function(aCol){
dfSENDIG[dfSENDIG$Column==aCol,]$Codelist[1]
}
# get a test code selection
getSENDTestCode <- function(aCol,aTestCD) {
# use test code passed in
# if same column as last call, no need to look up namelist again
if (lastColumnQueriedCD!=aCol) {
nameList <- getCodeList(aCol)
} else {
nameList <- lastNameListCD
printDebug(paste("in getSENDLastTestCode, using last namelist again",aCol,lastNameListCD))
}
if (!is.null(nameList)&& nchar(nameList)>0) {
# retain last test code so as to match next request for test code name
lastTestCode <<- CTSearchOnShortName(nameList,aTestCD)
} else {
# for some domains, this must come from a configuration file
aValue <- aTestCD
lastTestCode <<- aTestCD
}
# retain last column,namelist used
lastColumnQueriedCD <<- aCol
lastNameListCD <<- nameList
# pass back same set code
as.character(aTestCD)
}
getSENDLastTestCodeName <- function(aCol,aDomain) {
# Retrieve from terminology, the test name matching the last test code
# if this test name was already lookedup, just use it again
if (lastTestCodeLookup==lastTestCode) {
aValue <- lastTestNameResult
printDebug(paste("in getSENDLastTestCodeName , using last lookup again",aCol,lastTestCode))
} else {
# if same column as last call, no need to look up namelist again
if (lastColumnQueried!=aCol) {
nameList <- getCodeList(aCol)
} else {
printDebug(paste("in getSENDLastTestCodeName , using last namelist again",aCol,lastNameList))
nameList <- lastNameList
}
if (!is.null(nameList)&& nchar(nameList)>0) {
aValue <- CTSearchOnCode(nameList,lastTestCode)
# print(paste("Last test code is ",lastTestCode,aValue))
} else {
# some domains, this must come from a configuration file
# print(paste(" Reading test name from code",aDomain,lastTestCode,sep=":"))
aValue <- getMatchColumn(aDomain,paste0(aDomain,"TESTCD"),lastTestCode,paste0(aDomain,"TEST"))
}
# retain last column,namelist used
lastColumnQueried <<- aCol
lastNameList <<- nameList
# retain last found name, to use again is same code number as last time
lastTestCodeLookup <<- lastTestCode
lastTestNameResult <<- aValue
} # end of check on using same value as last time
aValue
}