Skip to content

Commit

Permalink
NUTCH-3064 Upgrade com.maxmind.geoip2:geoip2 dependency in geoip-inde…
Browse files Browse the repository at this point in the history
…x to v4.2.0
  • Loading branch information
lewismc committed Sep 26, 2024
1 parent 48840f0 commit 28eb772
Show file tree
Hide file tree
Showing 3 changed files with 39 additions and 38 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -16,10 +16,11 @@
*/
package org.apache.nutch.indexer.geoip;

import java.io.File;
import java.io.IOException;
import java.io.InputStream;
import java.lang.invoke.MethodHandles;
import java.net.URL;
import java.util.Arrays;
import java.util.List;
import java.util.Objects;

import org.apache.hadoop.conf.Configuration;
Expand Down Expand Up @@ -67,6 +68,8 @@ public class GeoIPIndexingFilter implements IndexingFilter {
private Configuration conf;
private String usage;
private static final String INSIGHTS_SERVICE = "insights";
private static final List<String> DB_TYPES = Arrays.asList(
"anonymous", "asn", "city", "connection", "domain", "isp");
private WebServiceClient client;
private DatabaseReader reader;

Expand Down Expand Up @@ -99,39 +102,37 @@ public void setConf(Configuration config) {
usage = config.get("index.geoip.usage");
if (usage != null && usage.equalsIgnoreCase(INSIGHTS_SERVICE)) {
client = new WebServiceClient.Builder(
Integer.parseInt(config.get("index.geoip.userid")),
config.get("index.geoip.licensekey")).build();
Integer.parseInt(config.get("index.geoip.userid")),
config.get("index.geoip.licensekey")).build();
LOG.debug("Established geoip-index InsightsService client.");
} else if (usage != null && !usage.equalsIgnoreCase(INSIGHTS_SERVICE)) {
} else if (usage != null && DB_TYPES.contains(usage.toLowerCase())) {
String dbFile = config.get("index.geoip.db.file");
if (dbFile != null) {
LOG.debug("GeoIP db file: {}", dbFile);
URL dbFileUrl = config.getResource(dbFile);
if (dbFileUrl == null) {
LOG.error("Db file {} not found on classpath", dbFile);
InputStream db = config.getConfResourceAsInputStream(dbFile);
if (db == null) {
LOG.error("GeoIP DB file {} not found on classpath", dbFile);
} else {
try {
buildDb(new File(dbFileUrl.getFile()));
} catch (Exception e) {
LOG.error("Failed to read Db file: {} {}", dbFile, e.getMessage());
}
buildDb(db, dbFile);
}
}
} else {
LOG.warn("Error processing index-geoip plugin configuration.");
}
}

/*
/**
* Build the Database and
* <a href="https://github.com/maxmind/GeoIP2-java/tree/main?tab=readme-ov-file#caching">
* associated cache</a>.
* @param geoDb the GeoIP2 database to be used for IP lookups.
* @param db an {@link InputStream} representing the GeoIP2 DB to be used for IP lookups.
* @param dbFile the GeoIP DB file name
*/
private void buildDb(File geoDb) {
private void buildDb(InputStream db, String dbFile) {
try {
LOG.info("Reading index-geoip Db file: {}", geoDb);
reader = Objects.requireNonNull(new DatabaseReader.Builder(geoDb).withCache(new CHMCache()).build());
reader = Objects.requireNonNull(new DatabaseReader.Builder(db).withCache(new CHMCache()).build());
LOG.info("Built in-memory GeoIP lookup DB from file: {}", db);
} catch (IOException | NullPointerException e) {
LOG.error("Failed to build Db: {}", e.getMessage());
LOG.error("Failed to read Db file: {} {}", dbFile, e.getMessage());
}
}

Expand Down Expand Up @@ -179,8 +180,8 @@ private NutchDocument augmentNutchDocWithIPData(NutchDocument doc, ParseData dat
LOG.error("Failed to determine 'index.geoip.usage' value: {}", usage);
}
} catch (IOException | GeoIp2Exception e) {
LOG.error("Error creating index-geoip fields _ip_: {}, databe type: {} \n{}",
serverIp, reader.getMetadata().getDatabaseType(), e.getMessage());
LOG.error("Error creating index-geoip fields _ip_: {}, databe type: {} \n{}",
serverIp, reader.getMetadata().getDatabaseType(), e.getMessage());
}
}
return doc;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,7 @@
import org.junit.Test;

/**
*
* Tests for {@link org.apache.nutch.indexer.geoip.GeoIPIndexingFilter}
*/
public class TestGeoIPIndexingFilter {

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,7 @@
import org.apache.nutch.net.URLFilter;

/**
* JUnit based test of class <code>RegexURLFilterBase</code>.
* Base test class for {@link org.apache.nutch.urlfilter.api.RegexURLFilterBase}
*
* @author J&eacute;r&ocirc;me Charron
*/
Expand All @@ -43,14 +43,14 @@ public abstract class RegexURLFilterBaseTest {
private static final Logger LOG = LoggerFactory
.getLogger(MethodHandles.lookup().lookupClass());

protected final static String SEPARATOR = System.getProperty("file.separator");
protected final static String SAMPLES = System.getProperty("test.data", ".");
protected static final String SEPARATOR = System.getProperty("file.separator");
protected static final String SAMPLES = System.getProperty("test.data", ".");

protected abstract URLFilter getURLFilter(Reader rules);

protected void bench(int loops, String file) {
try {
bench(loops, new FileReader(SAMPLES + SEPARATOR + file + ".rules"),
try (FileReader rulesReader = new FileReader(SAMPLES + SEPARATOR + file + ".rules");) {
bench(loops, rulesReader,
new FileReader(SAMPLES + SEPARATOR + file + ".urls"));
} catch (Exception e) {
Assert.fail(e.toString());
Expand All @@ -74,27 +74,27 @@ protected void bench(int loops, Reader rules, Reader urls) {
}

protected void bench(int loops, String rulesFile, String urlsFile) {
try {
bench(loops, new FileReader(SAMPLES + SEPARATOR + rulesFile),
try (FileReader rulesReader = new FileReader(SAMPLES + SEPARATOR + rulesFile);) {
bench(loops, rulesReader,
new FileReader(SAMPLES + SEPARATOR + urlsFile));
} catch (Exception e) {
Assert.fail(e.toString());
}
}

protected void test(String rulesFile, String urlsFile) {
try {
test(new FileReader(SAMPLES + SEPARATOR + rulesFile),
new FileReader(SAMPLES + SEPARATOR + urlsFile));
try (FileReader rulesReader = new FileReader(SAMPLES + SEPARATOR + rulesFile);
FileReader urlsReader = new FileReader(SAMPLES + SEPARATOR + urlsFile);) {
test(rulesReader, urlsReader);
} catch (Exception e) {
Assert.fail(e.toString());
}
}

protected void test(String file) {
try {
test(new FileReader(SAMPLES + SEPARATOR + file + ".rules"),
new FileReader(SAMPLES + SEPARATOR + file + ".urls"));
try (FileReader rulesReader = new FileReader(SAMPLES + SEPARATOR + file + ".rules");
FileReader urlsReader = new FileReader(SAMPLES + SEPARATOR + file + ".urls");) {
test(rulesReader, urlsReader);
} catch (Exception e) {
Assert.fail(e.toString());
}
Expand All @@ -121,14 +121,14 @@ protected void test(URLFilter filter, FilteredURL[] expected) {

private static FilteredURL[] readURLFile(Reader reader) throws IOException {
BufferedReader in = new BufferedReader(reader);
List<FilteredURL> list = new ArrayList<FilteredURL>();
List<FilteredURL> list = new ArrayList<>();
String line;
while ((line = in.readLine()) != null) {
if (line.length() != 0) {
list.add(new FilteredURL(line));
}
}
return (FilteredURL[]) list.toArray(new FilteredURL[list.size()]);
return list.toArray(new FilteredURL[list.size()]);
}

private static class FilteredURL {
Expand Down

0 comments on commit 28eb772

Please sign in to comment.