diff --git a/src/plugin/index-geoip/src/java/org/apache/nutch/indexer/geoip/GeoIPIndexingFilter.java b/src/plugin/index-geoip/src/java/org/apache/nutch/indexer/geoip/GeoIPIndexingFilter.java index eea14fc12..0b551d748 100644 --- a/src/plugin/index-geoip/src/java/org/apache/nutch/indexer/geoip/GeoIPIndexingFilter.java +++ b/src/plugin/index-geoip/src/java/org/apache/nutch/indexer/geoip/GeoIPIndexingFilter.java @@ -16,10 +16,11 @@ */ package org.apache.nutch.indexer.geoip; -import java.io.File; import java.io.IOException; +import java.io.InputStream; import java.lang.invoke.MethodHandles; -import java.net.URL; +import java.util.Arrays; +import java.util.List; import java.util.Objects; import org.apache.hadoop.conf.Configuration; @@ -67,6 +68,8 @@ public class GeoIPIndexingFilter implements IndexingFilter { private Configuration conf; private String usage; private static final String INSIGHTS_SERVICE = "insights"; + private static final List DB_TYPES = Arrays.asList( + "anonymous", "asn", "city", "connection", "domain", "isp"); private WebServiceClient client; private DatabaseReader reader; @@ -99,39 +102,37 @@ public void setConf(Configuration config) { usage = config.get("index.geoip.usage"); if (usage != null && usage.equalsIgnoreCase(INSIGHTS_SERVICE)) { client = new WebServiceClient.Builder( - Integer.parseInt(config.get("index.geoip.userid")), - config.get("index.geoip.licensekey")).build(); + Integer.parseInt(config.get("index.geoip.userid")), + config.get("index.geoip.licensekey")).build(); LOG.debug("Established geoip-index InsightsService client."); - } else if (usage != null && !usage.equalsIgnoreCase(INSIGHTS_SERVICE)) { + } else if (usage != null && DB_TYPES.contains(usage.toLowerCase())) { String dbFile = config.get("index.geoip.db.file"); if (dbFile != null) { - LOG.debug("GeoIP db file: {}", dbFile); - URL dbFileUrl = config.getResource(dbFile); - if (dbFileUrl == null) { - LOG.error("Db file {} not found on classpath", dbFile); + InputStream db = config.getConfResourceAsInputStream(dbFile); + if (db == null) { + LOG.error("GeoIP DB file {} not found on classpath", dbFile); } else { - try { - buildDb(new File(dbFileUrl.getFile())); - } catch (Exception e) { - LOG.error("Failed to read Db file: {} {}", dbFile, e.getMessage()); - } + buildDb(db, dbFile); } } + } else { + LOG.warn("Error processing index-geoip plugin configuration."); } } - /* + /** * Build the Database and * * associated cache. - * @param geoDb the GeoIP2 database to be used for IP lookups. + * @param db an {@link InputStream} representing the GeoIP2 DB to be used for IP lookups. + * @param dbFile the GeoIP DB file name */ - private void buildDb(File geoDb) { + private void buildDb(InputStream db, String dbFile) { try { - LOG.info("Reading index-geoip Db file: {}", geoDb); - reader = Objects.requireNonNull(new DatabaseReader.Builder(geoDb).withCache(new CHMCache()).build()); + reader = Objects.requireNonNull(new DatabaseReader.Builder(db).withCache(new CHMCache()).build()); + LOG.info("Built in-memory GeoIP lookup DB from file: {}", db); } catch (IOException | NullPointerException e) { - LOG.error("Failed to build Db: {}", e.getMessage()); + LOG.error("Failed to read Db file: {} {}", dbFile, e.getMessage()); } } @@ -179,8 +180,8 @@ private NutchDocument augmentNutchDocWithIPData(NutchDocument doc, ParseData dat LOG.error("Failed to determine 'index.geoip.usage' value: {}", usage); } } catch (IOException | GeoIp2Exception e) { - LOG.error("Error creating index-geoip fields _ip_: {}, databe type: {} \n{}", - serverIp, reader.getMetadata().getDatabaseType(), e.getMessage()); + LOG.error("Error creating index-geoip fields _ip_: {}, databe type: {} \n{}", + serverIp, reader.getMetadata().getDatabaseType(), e.getMessage()); } } return doc; diff --git a/src/plugin/index-geoip/src/test/org/apache/nutch/indexer/geoip/TestGeoIPIndexingFilter.java b/src/plugin/index-geoip/src/test/org/apache/nutch/indexer/geoip/TestGeoIPIndexingFilter.java index 9e2c52ed9..ee8331fb4 100644 --- a/src/plugin/index-geoip/src/test/org/apache/nutch/indexer/geoip/TestGeoIPIndexingFilter.java +++ b/src/plugin/index-geoip/src/test/org/apache/nutch/indexer/geoip/TestGeoIPIndexingFilter.java @@ -42,7 +42,7 @@ import org.junit.Test; /** - * + * Tests for {@link org.apache.nutch.indexer.geoip.GeoIPIndexingFilter} */ public class TestGeoIPIndexingFilter { diff --git a/src/plugin/lib-regex-filter/src/test/org/apache/nutch/urlfilter/api/RegexURLFilterBaseTest.java b/src/plugin/lib-regex-filter/src/test/org/apache/nutch/urlfilter/api/RegexURLFilterBaseTest.java index 080b2e587..48784e09b 100644 --- a/src/plugin/lib-regex-filter/src/test/org/apache/nutch/urlfilter/api/RegexURLFilterBaseTest.java +++ b/src/plugin/lib-regex-filter/src/test/org/apache/nutch/urlfilter/api/RegexURLFilterBaseTest.java @@ -33,7 +33,7 @@ import org.apache.nutch.net.URLFilter; /** - * JUnit based test of class RegexURLFilterBase. + * Base test class for {@link org.apache.nutch.urlfilter.api.RegexURLFilterBase} * * @author Jérôme Charron */ @@ -43,14 +43,14 @@ public abstract class RegexURLFilterBaseTest { private static final Logger LOG = LoggerFactory .getLogger(MethodHandles.lookup().lookupClass()); - protected final static String SEPARATOR = System.getProperty("file.separator"); - protected final static String SAMPLES = System.getProperty("test.data", "."); + protected static final String SEPARATOR = System.getProperty("file.separator"); + protected static final String SAMPLES = System.getProperty("test.data", "."); protected abstract URLFilter getURLFilter(Reader rules); protected void bench(int loops, String file) { - try { - bench(loops, new FileReader(SAMPLES + SEPARATOR + file + ".rules"), + try (FileReader rulesReader = new FileReader(SAMPLES + SEPARATOR + file + ".rules");) { + bench(loops, rulesReader, new FileReader(SAMPLES + SEPARATOR + file + ".urls")); } catch (Exception e) { Assert.fail(e.toString()); @@ -74,8 +74,8 @@ protected void bench(int loops, Reader rules, Reader urls) { } protected void bench(int loops, String rulesFile, String urlsFile) { - try { - bench(loops, new FileReader(SAMPLES + SEPARATOR + rulesFile), + try (FileReader rulesReader = new FileReader(SAMPLES + SEPARATOR + rulesFile);) { + bench(loops, rulesReader, new FileReader(SAMPLES + SEPARATOR + urlsFile)); } catch (Exception e) { Assert.fail(e.toString()); @@ -83,18 +83,18 @@ protected void bench(int loops, String rulesFile, String urlsFile) { } protected void test(String rulesFile, String urlsFile) { - try { - test(new FileReader(SAMPLES + SEPARATOR + rulesFile), - new FileReader(SAMPLES + SEPARATOR + urlsFile)); + try (FileReader rulesReader = new FileReader(SAMPLES + SEPARATOR + rulesFile); + FileReader urlsReader = new FileReader(SAMPLES + SEPARATOR + urlsFile);) { + test(rulesReader, urlsReader); } catch (Exception e) { Assert.fail(e.toString()); } } protected void test(String file) { - try { - test(new FileReader(SAMPLES + SEPARATOR + file + ".rules"), - new FileReader(SAMPLES + SEPARATOR + file + ".urls")); + try (FileReader rulesReader = new FileReader(SAMPLES + SEPARATOR + file + ".rules"); + FileReader urlsReader = new FileReader(SAMPLES + SEPARATOR + file + ".urls");) { + test(rulesReader, urlsReader); } catch (Exception e) { Assert.fail(e.toString()); } @@ -121,14 +121,14 @@ protected void test(URLFilter filter, FilteredURL[] expected) { private static FilteredURL[] readURLFile(Reader reader) throws IOException { BufferedReader in = new BufferedReader(reader); - List list = new ArrayList(); + List list = new ArrayList<>(); String line; while ((line = in.readLine()) != null) { if (line.length() != 0) { list.add(new FilteredURL(line)); } } - return (FilteredURL[]) list.toArray(new FilteredURL[list.size()]); + return list.toArray(new FilteredURL[list.size()]); } private static class FilteredURL {