Skip to content

Commit b84d443

Browse files
authored
Merge pull request #328 from morokosi/fix-matchesregexdeciderule
Fix match result is always false in MatchesListRegexDecideRule
2 parents 7764f92 + 13ba0b2 commit b84d443

File tree

2 files changed

+18
-3
lines changed

2 files changed

+18
-3
lines changed

modules/src/main/java/org/archive/modules/deciderules/MatchesListRegexDecideRule.java

+1-2
Original file line numberDiff line numberDiff line change
@@ -20,7 +20,6 @@
2020

2121
import java.util.ArrayList;
2222
import java.util.List;
23-
import java.util.concurrent.CompletableFuture;
2423
import java.util.concurrent.ExecutionException;
2524
import java.util.concurrent.ForkJoinPool;
2625
import java.util.concurrent.FutureTask;
@@ -118,7 +117,7 @@ protected boolean evaluate(CrawlURI uri) {
118117
FutureTask<Boolean> matchesFuture = new FutureTask<>(() -> p.matcher(interruptible).matches());
119118
ForkJoinPool.commonPool().submit(matchesFuture);
120119
try {
121-
matchesFuture.get(getTimeoutPerRegexSeconds(), TimeUnit.SECONDS);
120+
matches = matchesFuture.get(getTimeoutPerRegexSeconds(), TimeUnit.SECONDS);
122121
} catch (TimeoutException e) {
123122
matchesFuture.cancel(true);
124123
logger.warning("Timed out after " + getTimeoutPerRegexSeconds() + " seconds waiting for '" + p + "' to match.");

modules/src/test/java/org/archive/modules/deciderules/MatchesListRegexDecideRuleTest.java

+17-1
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,5 @@
11
package org.archive.modules.deciderules;
22

3-
import com.google.common.annotations.VisibleForTesting;
43
import junit.framework.TestCase;
54
import org.apache.commons.httpclient.URIException;
65
import org.archive.modules.CrawlURI;
@@ -32,4 +31,21 @@ public void testEvaluate() throws URIException {
3231
final DecideResult decideResult = rule.decisionFor(curi);
3332
assertEquals("Expected NONE not " + decideResult , DecideResult.NONE, decideResult);
3433
}
34+
35+
public void testEvaluateInTime() throws URIException {
36+
final String regex = "http://www\\.netarkivet\\.dk/x+";
37+
String seed = "http://www.netarkivet.dk/xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx";
38+
MatchesListRegexDecideRule rule = new MatchesListRegexDecideRule();
39+
List<Pattern> patternList = new ArrayList<>();
40+
patternList.add(Pattern.compile(regex));
41+
rule.setRegexList(patternList);
42+
rule.setEnabled(true);
43+
rule.setListLogicalOr(true);
44+
rule.setDecision(DecideResult.REJECT);
45+
rule.setTimeoutPerRegexSeconds(2);
46+
final CrawlURI curi = new CrawlURI(UURIFactory.getInstance(seed));
47+
final DecideResult decideResult = rule.decisionFor(curi);
48+
assertEquals("Expected REJECT not " + decideResult , DecideResult.REJECT, decideResult);
49+
}
50+
3551
}

0 commit comments

Comments
 (0)