-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathfilterMIFindings.R
198 lines (190 loc) · 10.5 KB
/
filterMIFindings.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
###################################################################################
# Script name : filterMIFindings.R
# Date Created : 12-Mar-2020
# Documentation : <if relevant, reference to specification document>
# Programmer : Bo Larsen
# --------------------------------------------------------------------------------
# Change log:
# Programmer/date Description
# ----------------- ------------------------------------------------------------
# <init/dd-Mon-yyyy> <description>
#
# -------------------------------------------------------------------------------
# Purpose : Extract a set of MI findings for specified MISTRESC and/or MISPEC
# value(S)
#
# Description : The function FilterMIFindings extracts and return a set of
# MI findings from a input set of MI findings which matches the
# criterions specified in in the input parameters mistrescVal and/or
# mispecVal.
# If a value is specified for input parameter mistrescVal, the set of
# rows in the input table is extracted where MISTRESC matches the specified
# parameter value.
# If a value is specified for input parameter mispecVal, the set of
# rows in the input table is extracted where MISPEC matches the specified
# parameter value.
# If both of these input parameters are specified, the set of rows in
# the input table is extracted where both MISTRESC and MISPEC match
# the respectively input parameter values.
# If the specified input value contains one or more *, the filtering of
# the variable is done as a regular expression search where each *
# represents zero or more characters - else the filtereing is done
# as a simple 'equal to'.
# The filtering of MISTRESC/MISPEC values is done case insensitive in
# all cases.
# If the value of input parameter mistrescVal.negate/mispecVal.negate
# is TRUE, the filteing using the condition in mistrescVal/mispecVal
# is negated - i.e. the rows where MISTRESC/MISPEC doesn't match the
# condition are extracted.
#
# Input : A data table containing the input set of MI findings (input parameter
# findings). It is expected that it contains all MI variables - but this
# are the required minimun set of variables (used by function) in the table:
# - MISTRESC - character
# - MISPEC - character
#
# Output : A data table containing the rows matching the specified input values
# of MISTRESC and/or MISPEC in the input data table.
# The data table contains the same variables as the input data table.
#
# Parameters : The functionis defined with these input parameters:
# mi: Mandatory, data table
# The data table with the MI findings to filter
# mistrescVal: Optional *), character
# The MISTRESC value to filter for
# mispecVal: Optional *), character
# The MISPEC value to filter for
# mistrescVal.negate:
# Optional, boolean, default=FALSE
# Specifies whether the condition specified in mistrescVal
# shall be negated or not
# mispecVal.negate:
# Optional, boolean, default=FALSE
# Specifies whether the condition specified in mispecVal
# shall be negated or not
# *) Either mistrescVal, mispecVal or both must be specified
#
# Usage notes : Examples:
# - Get findings for specimen 'BRAIN':
# FilterMIFindings(dosingMI, mispecVal="brain")
# - Get findings where specimen starts with 'GLAND, SALIVARY' and MISTRESC is different from 'NORMAL':
# FilterMIFindings(dosingMI, mistrescVal="normal", mispecVal="gland, SALIVARY*", , mistrescVal.negate=TRUE)
# - Get findings where value of MISTRESC starts with 'increased':
# FilterMIFindings(dosingMI, mistrescVal="increased*")
#
# MISSING:
# - Handling of pooled data
###################################################################################
library(data.table)
library(stringr)
FilterMIFindings<-function(mi,
mistrescVal=NULL, mistrescVal.negate=FALSE,
mispecVal=NULL, mispecVal.negate=FALSE) {
if (is.null(mistrescVal) & is.null(mispecVal)) {
print("ERROR: A mistrescVal and/or mispecVal to filter for must be specified")
}
else {
if (!is.null(mispecVal)) {
# Build expression for search condition for MISPEC
if (grepl("\\*", mispecVal)) {
# Wildcard included in condition - create regular expression to search for (case insensitive)
mispecExpr<-paste(paste("^", str_replace_all(mispecVal, "\\*", "\\.*"), sep=""), "$", sep="")
mispecExecStr<-'grepl(mispecExpr, MISPEC, ignore.case = TRUE)'
if (mispecVal.negate) {
# Invert the search condition
mispecExecStr<-paste('!', mispecExecStr, sep='')
}
}
else {
# Search for exact match (case insensitive)
mispecExecStr<-paste(paste('tolower(MISPEC) ', ifelse(mispecVal.negate,'!=','=='), sep=''), 'tolower(mispecVal)',sep='')
}
}
if (!is.null(mistrescVal)) {
# Build expression for search condition for MISTRESC
if (grepl("\\*", mistrescVal)) {
# Wildcard included in condition - create regular expression to search for (case insensitive)
mistrescExpr<-paste(paste("^", str_replace_all(mistrescVal, "\\*", "\\.*"), sep=""), "$", sep="")
mistrescExecStr<-'grepl(mistrescExpr, MISTRESC, ignore.case = TRUE)'
if (mistrescVal.negate) {
# Invert the search condition
mistrecExecStr<-paste('!', mistrecExecStr, sep='')
}
}
else {
# Search for exact match (case insensitive)
mistrescExecStr<-paste(paste('tolower(MISTRESC) ', ifelse(mistrescVal.negate,'!=','=='), sep=''), 'tolower(mistrescVal)',sep='')
}
}
if (is.null(mistrescVal) | is.null(mispecVal)) {
# Create the final filter expression based on conditions for both MISPEC and MISTRESC
ExecStr<-paste(paste('mi[', ifelse(is.null(mistrescVal), mispecExecStr, mistrescExecStr), sep=''), ']', sep='')
}
else {
# Create the final filter expression based on condition for either MISPEC or MISTRESC
ExecStr<-paste(paste(paste(paste('mi[', mispecExecStr, sep=''), ' & ', sep=''), mistrescExecStr, sep=''), ']', sep='')
}
# Execute the filtering and return extracted rows
eval(parse(text=ExecStr))
}
}
######################################################################################################################################
#
# Alternative version of the function using agrep to do approximate matches
# Examples:
# - Get findings for specimen 'BRAIN':
# FilterMIFindingsApprox(dosingMI, mispecVal="brain")
# - Get findings where specimen starts with 'GLAND, SALIVARY' and MISTRESC is different from 'NORMAL':
# FilterMIFindingsApprox(dosingMI, mistrescVal="normal", mispecVal="gland, SALIVARY*", , mistrescVal.negate=TRUE)
# - Get findings where value of MISTRESC starts with 'increased':
# FilterMIFindingsApprox(dosingMI, mistrescVal="increased*")
########################################################################################################################################
FilterMIFindingsApprox<-function(mi,
mistrescVal=NULL, mistrescVal.negate=FALSE,
mispecVal=NULL, mispecVal.negate=FALSE) {
if (is.null(mistrescVal) & is.null(mispecVal)) {
print("ERROR: A mistrescVal and/or mispecVal to filter for must be specified")
}
else {
if (!is.null(mispecVal)) {
# Build expression for search condition for MISPEC - include start/end of line indications to ensure we
# don't return too many rows macth the intended condition
mispecExecStr<-paste(paste('agrepl("^',
ifelse(grepl("\\*", mispecVal),
# Wildcard included in condition - create regular expression to search for
str_replace_all(mispecVal, "\\*", "\\.*"),
# Search for fixed match
mispecVal), sep=''),
'$", MISPEC, ignore.case = TRUE, fixed = FALSE)', sep='')
if (mispecVal.negate) {
# Invert the search condition
mispecExecStr<-paste('!', mispecExecStr, sep='')
}
}
if (!is.null(mistrescVal)) {
# Build expression for search condition for MISTRESC - include start/end of line indications to ensure we
# don't return too many rows macth the intended condition
mistrescExecStr<-paste(paste('agrepl("^',
ifelse(grepl("\\*", mistrescVal),
# Wildcard included in condition - create regular expression to search for
str_replace_all(mistrescVal, "\\*", "\\.*"),
# Search for fixed match
mistrescVal), sep=''),
'$", MISTRESC, ignore.case = TRUE, fixed = FALSE)', sep='')
if (mistrescVal.negate) {
# Invert the search condition
mistrescExecStr<-paste('!', mistrescExecStr, sep='')
}
}
if (is.null(mistrescVal) | is.null(mispecVal)) {
# Create the final filter expression based on conditions for both MISPEC and MISTRESC
ExecStr<-paste(paste('mi[', ifelse(is.null(mistrescVal), mispecExecStr, mistrescExecStr), sep=''), ']', sep='')
}
else {
# Create the final filter expression based on condition for either MISPEC or MISTRESC
ExecStr<-paste(paste(paste(paste('mi[', mispecExecStr, sep=''), ' & ', sep=''), mistrescExecStr, sep=''), ']', sep='')
}
# Execute the filtering and return extracted rows
eval(parse(text=ExecStr))
}
}