Skip to content

Commit cf6b3fa

Browse files
committed
Put some bounds checks on various tests in a quote annotator sieve
1 parent 12dfb70 commit cf6b3fa

File tree

1 file changed

+14
-11
lines changed

1 file changed

+14
-11
lines changed

src/edu/stanford/nlp/quoteattribution/Sieves/QMSieves/TrigramSieve.java

Lines changed: 14 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -91,8 +91,8 @@ public void trigramPatterns(Annotation doc) {
9191
continue;
9292
}
9393
//VPQ
94-
if (lastPronounIndex == quoteBeginTokenIndex - 1 - offset
95-
&& docTokens.get(quoteBeginTokenIndex - 2 - offset).tag().startsWith("V")) {
94+
if (lastPronounIndex == quoteBeginTokenIndex - 1 - offset &&
95+
docTokens.get(quoteBeginTokenIndex - 2 - offset).tag().startsWith("V")) {
9696
fillInMention(quote, tokenRangeToString(lastPronounIndex), lastPronounIndex, lastPronounIndex, "trigram VPQ", PRONOUN);
9797
continue;
9898
}
@@ -106,17 +106,19 @@ public void trigramPatterns(Annotation doc) {
106106
ArrayList<String> names = namesAndNameIndices.first;
107107
ArrayList<Pair<Integer, Integer>> nameIndices = namesAndNameIndices.second;
108108

109-
if (names.size() > 0) {
109+
if (names.size() > 0 &&
110+
docTokens.size() > quoteEndTokenIndex + 1) {
110111
Pair<Integer, Integer> firstNameIndex = nameIndices.get(0);
111112
CoreLabel nextToken = docTokens.get(quoteEndTokenIndex + 1);
112113
//QVC
113-
if (nextToken.tag().startsWith("V") // verb!
114-
&& firstNameIndex.first.equals(quoteEndTokenIndex + 2)) {
114+
if (nextToken.tag().startsWith("V") && // verb!
115+
firstNameIndex.first.equals(quoteEndTokenIndex + 2)) {
115116
fillInMention(quote, names.get(0), firstNameIndex.first, firstNameIndex.second, "trigram QVC", NAME);
116117
continue;
117118
}
118119
//QCV
119-
if (firstNameIndex.first.equals(quoteEndTokenIndex + 1)) {
120+
if (firstNameIndex.first.equals(quoteEndTokenIndex + 1) &&
121+
docTokens.size() > firstNameIndex.second + 1) {
120122
CoreLabel secondNextToken = docTokens.get(firstNameIndex.second + 1);
121123
if(secondNextToken.tag().startsWith("V")) {
122124
fillInMention(quote, names.get(0), firstNameIndex.first, firstNameIndex.second, "trigram QCV", NAME);
@@ -126,7 +128,8 @@ public void trigramPatterns(Annotation doc) {
126128
}
127129

128130
ArrayList<Integer> pronounsIndices = scanForPronouns(followingTokenRange);
129-
if (pronounsIndices.size() > 0) {
131+
if (pronounsIndices.size() > 0 &&
132+
docTokens.size() > quoteEndTokenIndex + 1) {
130133
CoreLabel nextToken = docTokens.get(quoteEndTokenIndex + 1);
131134
int firstPronounIndex = pronounsIndices.get(0);
132135
//QVP
@@ -135,15 +138,15 @@ public void trigramPatterns(Annotation doc) {
135138
continue;
136139
}
137140
//QPV
138-
if (firstPronounIndex == quoteEndTokenIndex + 1
139-
&& docTokens.get(quoteEndTokenIndex + 2).tag().startsWith("V")) {
141+
if (firstPronounIndex == quoteEndTokenIndex + 1 &&
142+
docTokens.size() > quoteEndTokenIndex + 2 &&
143+
docTokens.get(quoteEndTokenIndex + 2).tag().startsWith("V")) {
140144
fillInMention(quote, tokenRangeToString(pronounsIndices.get(pronounsIndices.size() - 1)), firstPronounIndex,
141-
firstPronounIndex, "trigram QPV", PRONOUN);
145+
firstPronounIndex, "trigram QPV", PRONOUN);
142146
continue;
143147
}
144148
}
145149
}
146150
}
147151
}
148-
149152
}

0 commit comments

Comments
 (0)