diff --git a/src/java/org/apache/nutch/crawl/AbstractFetchSchedule.java b/src/java/org/apache/nutch/crawl/AbstractFetchSchedule.java index 2f05a34200..c05200e79c 100644 --- a/src/java/org/apache/nutch/crawl/AbstractFetchSchedule.java +++ b/src/java/org/apache/nutch/crawl/AbstractFetchSchedule.java @@ -19,8 +19,6 @@ import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.conf.Configured; import org.apache.hadoop.io.Text; -import org.apache.nutch.crawl.CrawlDatum; - import org.slf4j.Logger; import org.slf4j.LoggerFactory; diff --git a/src/java/org/apache/nutch/crawl/AdaptiveFetchSchedule.java b/src/java/org/apache/nutch/crawl/AdaptiveFetchSchedule.java index 8ee957c095..d6272c5983 100644 --- a/src/java/org/apache/nutch/crawl/AdaptiveFetchSchedule.java +++ b/src/java/org/apache/nutch/crawl/AdaptiveFetchSchedule.java @@ -19,7 +19,6 @@ import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.io.Text; import org.apache.hadoop.io.FloatWritable; -import org.apache.nutch.crawl.CrawlDatum; import org.apache.nutch.metadata.Nutch; import org.apache.nutch.util.NutchConfiguration; import org.apache.commons.lang.StringUtils; diff --git a/src/java/org/apache/nutch/crawl/Generator.java b/src/java/org/apache/nutch/crawl/Generator.java index 7e66ac2bb1..570b19d931 100644 --- a/src/java/org/apache/nutch/crawl/Generator.java +++ b/src/java/org/apache/nutch/crawl/Generator.java @@ -85,7 +85,7 @@ * fetchlists for several segments in one go. Unlike in the initial version * (OldGenerator), the IP resolution is done ONLY on the entries which have been * selected for fetching. The URLs are partitioned by IP, domain or host within - * a segment. We can chose separately how to count the URLS i.e. by domain or + * a segment. We can choose separately how to count the URLs i.e. by domain or * host to limit the entries. **/ public class Generator extends NutchTool implements Tool { diff --git a/src/java/org/apache/nutch/crawl/MimeAdaptiveFetchSchedule.java b/src/java/org/apache/nutch/crawl/MimeAdaptiveFetchSchedule.java index a181fbf0de..21022d46ef 100644 --- a/src/java/org/apache/nutch/crawl/MimeAdaptiveFetchSchedule.java +++ b/src/java/org/apache/nutch/crawl/MimeAdaptiveFetchSchedule.java @@ -25,7 +25,6 @@ import org.apache.commons.lang.StringUtils; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.io.Text; -import org.apache.nutch.crawl.CrawlDatum; import org.apache.nutch.metadata.HttpHeaders; import org.apache.nutch.util.MimeUtil; import org.apache.nutch.util.NutchConfiguration; diff --git a/src/java/org/apache/nutch/fetcher/FetcherThread.java b/src/java/org/apache/nutch/fetcher/FetcherThread.java index f48f6076a1..ebb28aadb7 100644 --- a/src/java/org/apache/nutch/fetcher/FetcherThread.java +++ b/src/java/org/apache/nutch/fetcher/FetcherThread.java @@ -404,11 +404,6 @@ public void run() { switch (status.getCode()) { - case ProtocolStatus.WOULDBLOCK: - // retry ? - fetchQueues.addFetchItem(fit); - break; - case ProtocolStatus.SUCCESS: // got a page pstatus = output(fit.url, fit.datum, content, status, CrawlDatum.STATUS_FETCH_SUCCESS, fit.outlinkDepth); @@ -457,8 +452,8 @@ public void run() { context.getCounter("FetcherStatus", "AboveExceptionThresholdInQueue").increment(killedURLs); /* FALLTHROUGH */ + case ProtocolStatus.RETRY: // retry - case ProtocolStatus.BLOCKED: output(fit.url, fit.datum, null, status, CrawlDatum.STATUS_FETCH_RETRY); break; diff --git a/src/java/org/apache/nutch/indexer/NutchIndexAction.java b/src/java/org/apache/nutch/indexer/NutchIndexAction.java index 80016b7aa3..18d9621326 100644 --- a/src/java/org/apache/nutch/indexer/NutchIndexAction.java +++ b/src/java/org/apache/nutch/indexer/NutchIndexAction.java @@ -22,8 +22,6 @@ import org.apache.hadoop.io.Writable; -import org.apache.nutch.indexer.NutchDocument; - /** * A {@link NutchIndexAction} is the new unit of indexing holding the document * and action information. diff --git a/src/java/org/apache/nutch/service/NutchReader.java b/src/java/org/apache/nutch/service/NutchReader.java index 98d7141a99..8d77254244 100644 --- a/src/java/org/apache/nutch/service/NutchReader.java +++ b/src/java/org/apache/nutch/service/NutchReader.java @@ -25,14 +25,14 @@ import org.slf4j.Logger; import org.slf4j.LoggerFactory; -public interface NutchReader { +public interface NutchReader { static final Logger LOG = LoggerFactory .getLogger(MethodHandles.lookup().lookupClass()); public static final Configuration conf = NutchConfiguration.create(); - public List read(String path) throws FileNotFoundException; - public List head(String path, int nrows) throws FileNotFoundException; - public List slice(String path, int start, int end) throws FileNotFoundException; + public List read(String path) throws FileNotFoundException; + public List head(String path, int nrows) throws FileNotFoundException; + public List slice(String path, int start, int end) throws FileNotFoundException; public int count(String path) throws FileNotFoundException; } diff --git a/src/java/org/apache/nutch/service/impl/LinkReader.java b/src/java/org/apache/nutch/service/impl/LinkReader.java index f3e54a3cc5..59d84509a6 100644 --- a/src/java/org/apache/nutch/service/impl/LinkReader.java +++ b/src/java/org/apache/nutch/service/impl/LinkReader.java @@ -33,11 +33,11 @@ import org.apache.nutch.scoring.webgraph.LinkDatum; import org.apache.nutch.service.NutchReader; -public class LinkReader implements NutchReader{ +public class LinkReader implements NutchReader { @Override - public List read(String path) throws FileNotFoundException { - List rows= new ArrayList<>(); + public List> read(String path) throws FileNotFoundException { + List> rows= new ArrayList<>(); Path file = new Path(path); SequenceFile.Reader reader; try{ @@ -69,8 +69,8 @@ public List read(String path) throws FileNotFoundException { } @Override - public List head(String path, int nrows) throws FileNotFoundException { - List rows= new ArrayList<>(); + public List> head(String path, int nrows) throws FileNotFoundException { + List> rows= new ArrayList<>(); Path file = new Path(path); SequenceFile.Reader reader; try{ @@ -101,9 +101,9 @@ public List head(String path, int nrows) throws FileNotFoundException { } @Override - public List slice(String path, int start, int end) + public List> slice(String path, int start, int end) throws FileNotFoundException { - List rows= new ArrayList<>(); + List> rows= new ArrayList<>(); Path file = new Path(path); SequenceFile.Reader reader; try{ diff --git a/src/java/org/apache/nutch/service/impl/NodeReader.java b/src/java/org/apache/nutch/service/impl/NodeReader.java index 612fa264f3..efa94f2329 100644 --- a/src/java/org/apache/nutch/service/impl/NodeReader.java +++ b/src/java/org/apache/nutch/service/impl/NodeReader.java @@ -36,8 +36,8 @@ public class NodeReader implements NutchReader { @Override - public List read(String path) throws FileNotFoundException { - List rows= new ArrayList<>(); + public List> read(String path) throws FileNotFoundException { + List> rows= new ArrayList<>(); Path file = new Path(path); SequenceFile.Reader reader; try{ @@ -70,8 +70,8 @@ public List read(String path) throws FileNotFoundException { } @Override - public List head(String path, int nrows) throws FileNotFoundException { - List rows= new ArrayList<>(); + public List> head(String path, int nrows) throws FileNotFoundException { + List> rows= new ArrayList<>(); Path file = new Path(path); SequenceFile.Reader reader; try{ @@ -102,9 +102,9 @@ public List head(String path, int nrows) throws FileNotFoundException { } @Override - public List slice(String path, int start, int end) + public List> slice(String path, int start, int end) throws FileNotFoundException { - List rows= new ArrayList<>(); + List> rows= new ArrayList<>(); Path file = new Path(path); SequenceFile.Reader reader; try{ diff --git a/src/java/org/apache/nutch/util/EncodingDetector.java b/src/java/org/apache/nutch/util/EncodingDetector.java index cffebc4e76..d474b11b12 100644 --- a/src/java/org/apache/nutch/util/EncodingDetector.java +++ b/src/java/org/apache/nutch/util/EncodingDetector.java @@ -33,7 +33,6 @@ import org.apache.nutch.metadata.Metadata; import org.apache.nutch.net.protocols.Response; import org.apache.nutch.protocol.Content; -import org.apache.nutch.util.NutchConfiguration; import com.ibm.icu.text.CharsetDetector; import com.ibm.icu.text.CharsetMatch; diff --git a/src/plugin/index-arbitrary/src/java/org/apache/nutch/indexer/arbitrary/ArbitraryIndexingFilter.java b/src/plugin/index-arbitrary/src/java/org/apache/nutch/indexer/arbitrary/ArbitraryIndexingFilter.java index 21a4537bf6..6fd6a3ac78 100644 --- a/src/plugin/index-arbitrary/src/java/org/apache/nutch/indexer/arbitrary/ArbitraryIndexingFilter.java +++ b/src/plugin/index-arbitrary/src/java/org/apache/nutch/indexer/arbitrary/ArbitraryIndexingFilter.java @@ -153,7 +153,7 @@ public class ArbitraryIndexingFilter implements IndexingFilter { public NutchDocument filter(NutchDocument doc, Parse parse, Text url, CrawlDatum datum, Inlinks inlinks) throws IndexingException { - Class theClass = null; + Class theClass = null; Method theMethod = null; Constructor theConstructor = null; Object instance = null; diff --git a/src/plugin/index-arbitrary/src/test/org/apache/nutch/indexer/arbitrary/TestArbitraryIndexingFilter.java b/src/plugin/index-arbitrary/src/test/org/apache/nutch/indexer/arbitrary/TestArbitraryIndexingFilter.java index adaecf55c3..b8c92e168f 100644 --- a/src/plugin/index-arbitrary/src/test/org/apache/nutch/indexer/arbitrary/TestArbitraryIndexingFilter.java +++ b/src/plugin/index-arbitrary/src/test/org/apache/nutch/indexer/arbitrary/TestArbitraryIndexingFilter.java @@ -21,15 +21,11 @@ import org.apache.nutch.crawl.CrawlDatum; import org.apache.nutch.crawl.Inlinks; import org.apache.nutch.indexer.NutchDocument; -import org.apache.nutch.indexer.NutchField; import org.apache.nutch.parse.ParseImpl; import org.apache.nutch.util.NutchConfiguration; import org.junit.Assert; import org.junit.Before; import org.junit.Test; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; -import java.lang.invoke.MethodHandles; /** * Tests that the index-arbitrary filter can add a new field with an arbitrary diff --git a/src/plugin/index-basic/src/test/org/apache/nutch/indexer/basic/TestBasicIndexingFilter.java b/src/plugin/index-basic/src/test/org/apache/nutch/indexer/basic/TestBasicIndexingFilter.java index 4bc317e6f5..3684c9907b 100644 --- a/src/plugin/index-basic/src/test/org/apache/nutch/indexer/basic/TestBasicIndexingFilter.java +++ b/src/plugin/index-basic/src/test/org/apache/nutch/indexer/basic/TestBasicIndexingFilter.java @@ -21,7 +21,6 @@ import org.apache.nutch.crawl.CrawlDatum; import org.apache.nutch.crawl.Inlinks; import org.apache.nutch.indexer.NutchDocument; -import org.apache.nutch.indexer.basic.BasicIndexingFilter; import org.apache.nutch.metadata.Metadata; import org.apache.nutch.parse.Outlink; import org.apache.nutch.parse.ParseData; @@ -94,6 +93,6 @@ public void testBasicIndexingFilter() throws Exception { Assert.assertEquals("test content", "this is a sample foo", doc.getField("content").getValues().get(0)); Assert.assertEquals("test fetch time", new Date(100L), - (Date) doc.getField("tstamp").getValues().get(0)); + doc.getField("tstamp").getValues().get(0)); } } diff --git a/src/plugin/parse-html/src/test/org/apache/nutch/parse/html/TestHtmlParser.java b/src/plugin/parse-html/src/test/org/apache/nutch/parse/html/TestHtmlParser.java index 397a310bd5..80d09e7d85 100644 --- a/src/plugin/parse-html/src/test/org/apache/nutch/parse/html/TestHtmlParser.java +++ b/src/plugin/parse-html/src/test/org/apache/nutch/parse/html/TestHtmlParser.java @@ -22,7 +22,6 @@ import org.apache.hadoop.conf.Configuration; import org.apache.nutch.metadata.Metadata; -import org.apache.nutch.parse.html.HtmlParser; import org.apache.nutch.parse.Outlink; import org.apache.nutch.parse.Parse; import org.apache.nutch.parse.Parser; diff --git a/src/plugin/parse-tika/src/java/org/apache/nutch/parse/tika/BoilerpipeExtractorRepository.java b/src/plugin/parse-tika/src/java/org/apache/nutch/parse/tika/BoilerpipeExtractorRepository.java index 23e8ddb24e..03c4932878 100644 --- a/src/plugin/parse-tika/src/java/org/apache/nutch/parse/tika/BoilerpipeExtractorRepository.java +++ b/src/plugin/parse-tika/src/java/org/apache/nutch/parse/tika/BoilerpipeExtractorRepository.java @@ -41,7 +41,7 @@ public static synchronized BoilerpipeExtractor getExtractor(String boilerpipeExt // Attempt to load the class try { ClassLoader loader = BoilerpipeExtractor.class.getClassLoader(); - Class extractorClass = loader.loadClass(boilerpipeExtractorName); + Class extractorClass = loader.loadClass(boilerpipeExtractorName); // Add an instance to the repository extractorRepository.put(boilerpipeExtractorName, (BoilerpipeExtractor)extractorClass.getConstructor().newInstance()); diff --git a/src/plugin/parsefilter-debug/src/java/org/apache/nutch/parsefilter/debug/DebugParseFilter.java b/src/plugin/parsefilter-debug/src/java/org/apache/nutch/parsefilter/debug/DebugParseFilter.java index 691f8944ca..1371bebe62 100644 --- a/src/plugin/parsefilter-debug/src/java/org/apache/nutch/parsefilter/debug/DebugParseFilter.java +++ b/src/plugin/parsefilter-debug/src/java/org/apache/nutch/parsefilter/debug/DebugParseFilter.java @@ -17,13 +17,10 @@ package org.apache.nutch.parsefilter.debug; import java.io.ByteArrayOutputStream; -import java.io.OutputStreamWriter; import java.lang.invoke.MethodHandles; -import java.nio.charset.Charset; import java.nio.charset.StandardCharsets; import org.apache.hadoop.conf.Configuration; -import org.apache.nutch.metadata.Metadata; import org.apache.nutch.parse.HTMLMetaTags; import org.apache.nutch.parse.HtmlParseFilter; import org.apache.nutch.parse.Parse; diff --git a/src/plugin/protocol-httpclient/src/java/org/apache/nutch/protocol/httpclient/HttpFormAuthentication.java b/src/plugin/protocol-httpclient/src/java/org/apache/nutch/protocol/httpclient/HttpFormAuthentication.java index 868e73822e..966ab6522c 100644 --- a/src/plugin/protocol-httpclient/src/java/org/apache/nutch/protocol/httpclient/HttpFormAuthentication.java +++ b/src/plugin/protocol-httpclient/src/java/org/apache/nutch/protocol/httpclient/HttpFormAuthentication.java @@ -21,6 +21,7 @@ import java.io.UnsupportedEncodingException; import java.net.CookieHandler; import java.net.CookieManager; +import java.nio.charset.StandardCharsets; import java.util.ArrayList; import java.util.HashMap; import java.util.HashSet; @@ -132,7 +133,8 @@ public boolean getFollowRedirects() { LOG.debug("Response headers : " + header); } } - String rst = IOUtils.toString(post.getResponseBodyAsStream()); + String rst = IOUtils.toString(post.getResponseBodyAsStream(), + StandardCharsets.UTF_8); LOG.debug("login post result: " + rst); } finally { if (post != null) { @@ -194,7 +196,8 @@ private String httpGetPageContent(String url) throws IOException { if (cookieHeader != null) { setCookies(cookieHeader.getValue()); } - String rst = IOUtils.toString(get.getResponseBodyAsStream()); + String rst = IOUtils.toString(get.getResponseBodyAsStream(), + StandardCharsets.UTF_8); return rst; } finally { get.releaseConnection(); diff --git a/src/plugin/protocol-interactiveselenium/src/java/org/apache/nutch/protocol/interactiveselenium/Http.java b/src/plugin/protocol-interactiveselenium/src/java/org/apache/nutch/protocol/interactiveselenium/Http.java index e15ae118ee..59899b6bbf 100644 --- a/src/plugin/protocol-interactiveselenium/src/java/org/apache/nutch/protocol/interactiveselenium/Http.java +++ b/src/plugin/protocol-interactiveselenium/src/java/org/apache/nutch/protocol/interactiveselenium/Http.java @@ -16,18 +16,16 @@ */ package org.apache.nutch.protocol.interactiveselenium; -import java.lang.invoke.MethodHandles; import java.io.IOException; +import java.lang.invoke.MethodHandles; import java.net.URL; + import org.apache.hadoop.conf.Configuration; import org.apache.nutch.crawl.CrawlDatum; import org.apache.nutch.net.protocols.Response; -import org.apache.nutch.protocol.http.api.HttpBase; import org.apache.nutch.protocol.ProtocolException; +import org.apache.nutch.protocol.http.api.HttpBase; import org.apache.nutch.util.NutchConfiguration; - -import org.apache.nutch.protocol.interactiveselenium.HttpResponse; - import org.slf4j.Logger; import org.slf4j.LoggerFactory; diff --git a/src/plugin/scoring-link/src/java/org/apache/nutch/scoring/link/LinkAnalysisScoringFilter.java b/src/plugin/scoring-link/src/java/org/apache/nutch/scoring/link/LinkAnalysisScoringFilter.java index 41895ea815..9d1e2ab277 100644 --- a/src/plugin/scoring-link/src/java/org/apache/nutch/scoring/link/LinkAnalysisScoringFilter.java +++ b/src/plugin/scoring-link/src/java/org/apache/nutch/scoring/link/LinkAnalysisScoringFilter.java @@ -16,8 +16,6 @@ */ package org.apache.nutch.scoring.link; -import java.util.List; - import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.io.Text; import org.apache.nutch.crawl.CrawlDatum; diff --git a/src/plugin/scoring-metadata/src/java/org/apache/nutch/scoring/metadata/MetadataScoringFilter.java b/src/plugin/scoring-metadata/src/java/org/apache/nutch/scoring/metadata/MetadataScoringFilter.java index 489491cf2d..26cbaa4c3e 100644 --- a/src/plugin/scoring-metadata/src/java/org/apache/nutch/scoring/metadata/MetadataScoringFilter.java +++ b/src/plugin/scoring-metadata/src/java/org/apache/nutch/scoring/metadata/MetadataScoringFilter.java @@ -17,21 +17,17 @@ package org.apache.nutch.scoring.metadata; import java.util.Collection; -import java.util.Map.Entry; import java.util.Iterator; -import java.util.List; +import java.util.Map.Entry; import org.apache.hadoop.conf.Configuration; -import org.apache.hadoop.conf.Configured; import org.apache.hadoop.io.Text; import org.apache.nutch.crawl.CrawlDatum; -import org.apache.nutch.crawl.Inlinks; -import org.apache.nutch.indexer.NutchDocument; import org.apache.nutch.parse.Parse; import org.apache.nutch.parse.ParseData; import org.apache.nutch.protocol.Content; -import org.apache.nutch.scoring.ScoringFilter; import org.apache.nutch.scoring.AbstractScoringFilter; +import org.apache.nutch.scoring.ScoringFilter; import org.apache.nutch.scoring.ScoringFilterException; @@ -48,7 +44,6 @@ public class MetadataScoringFilter extends AbstractScoringFilter { private static String[] datumMetadata; private static String[] contentMetadata; private static String[] parseMetadata; - private Configuration conf; /** * This will take the metadata that you have listed in your "scoring.parse.md" diff --git a/src/plugin/scoring-metadata/src/test/org/apache/nutch/scoring/metadata/TestMetadataScoringFilter.java b/src/plugin/scoring-metadata/src/test/org/apache/nutch/scoring/metadata/TestMetadataScoringFilter.java index 8683cecfd9..0112239586 100644 --- a/src/plugin/scoring-metadata/src/test/org/apache/nutch/scoring/metadata/TestMetadataScoringFilter.java +++ b/src/plugin/scoring-metadata/src/test/org/apache/nutch/scoring/metadata/TestMetadataScoringFilter.java @@ -24,7 +24,6 @@ import org.apache.nutch.scoring.ScoringFilterException; import org.apache.nutch.util.NutchConfiguration; import org.junit.Assert; -import org.junit.Before; import org.junit.Test; import java.util.HashMap; @@ -51,7 +50,7 @@ public void distributeScoreToOutlinks() throws ScoringFilterException { parseData.getParseMeta().add("parent",parentMD); parseData.getParseMeta().add("depth",depthMD); - HashMap targets = new HashMap(); + HashMap targets = new HashMap<>(); targets.put(new Text("https://nutch.apache.org/downloads.html"),new CrawlDatum()); targets.put(new Text("https://wiki.apache.org/nutch"),new CrawlDatum()); diff --git a/src/plugin/subcollection/src/java/org/apache/nutch/collection/CollectionManager.java b/src/plugin/subcollection/src/java/org/apache/nutch/collection/CollectionManager.java index 79e03b686a..00e8c644b1 100644 --- a/src/plugin/subcollection/src/java/org/apache/nutch/collection/CollectionManager.java +++ b/src/plugin/subcollection/src/java/org/apache/nutch/collection/CollectionManager.java @@ -133,7 +133,7 @@ public static CollectionManager getCollectionManager(Configuration conf) { * @return Named SubCollection (or null if not existing) */ public Subcollection getSubColection(final String id) { - return (Subcollection) collectionMap.get(id); + return collectionMap.get(id); } /** @@ -180,10 +180,10 @@ public Subcollection createSubCollection(final String id, final String name) { */ public List getSubCollections(final String url) { List collections = new ArrayList(); - final Iterator iterator = collectionMap.values().iterator(); + final Iterator iterator = collectionMap.values().iterator(); while (iterator.hasNext()) { - final Subcollection subCol = (Subcollection) iterator.next(); + final Subcollection subCol = iterator.next(); if (subCol.filter(url) != null) { collections.add(subCol); } @@ -200,7 +200,7 @@ public List getSubCollections(final String url) { * * @return All collections CollectionManager knows about */ - public Collection getAll() { + public Collection getAll() { return collectionMap.values(); } @@ -219,10 +219,10 @@ public void save() throws IOException { final Document doc = new DocumentImpl(); final Element collections = doc .createElement(Subcollection.TAG_COLLECTIONS); - final Iterator iterator = collectionMap.values().iterator(); + final Iterator iterator = collectionMap.values().iterator(); while (iterator.hasNext()) { - final Subcollection subCol = (Subcollection) iterator.next(); + final Subcollection subCol = iterator.next(); final Element collection = doc .createElement(Subcollection.TAG_COLLECTION); collections.appendChild(collection); diff --git a/src/plugin/urlfilter-validator/src/test/org/apache/nutch/urlfilter/validator/TestUrlValidator.java b/src/plugin/urlfilter-validator/src/test/org/apache/nutch/urlfilter/validator/TestUrlValidator.java index 2e6d695b55..a52285bded 100644 --- a/src/plugin/urlfilter-validator/src/test/org/apache/nutch/urlfilter/validator/TestUrlValidator.java +++ b/src/plugin/urlfilter-validator/src/test/org/apache/nutch/urlfilter/validator/TestUrlValidator.java @@ -16,7 +16,6 @@ */ package org.apache.nutch.urlfilter.validator; -import org.apache.nutch.urlfilter.validator.UrlValidator; import org.junit.Assert; import org.junit.Test; diff --git a/src/plugin/urlnormalizer-protocol/src/java/org/apache/nutch/net/urlnormalizer/protocol/ProtocolURLNormalizer.java b/src/plugin/urlnormalizer-protocol/src/java/org/apache/nutch/net/urlnormalizer/protocol/ProtocolURLNormalizer.java index f2b475a178..cec27760e3 100644 --- a/src/plugin/urlnormalizer-protocol/src/java/org/apache/nutch/net/urlnormalizer/protocol/ProtocolURLNormalizer.java +++ b/src/plugin/urlnormalizer-protocol/src/java/org/apache/nutch/net/urlnormalizer/protocol/ProtocolURLNormalizer.java @@ -36,7 +36,6 @@ import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; -import org.apache.nutch.crawl.CrawlDatum; import org.apache.nutch.net.URLNormalizer; import org.apache.nutch.plugin.Extension; import org.apache.nutch.plugin.PluginRepository; diff --git a/src/plugin/urlnormalizer-slash/src/java/org/apache/nutch/net/urlnormalizer/slash/SlashURLNormalizer.java b/src/plugin/urlnormalizer-slash/src/java/org/apache/nutch/net/urlnormalizer/slash/SlashURLNormalizer.java index ce3128da90..5e5884ea20 100644 --- a/src/plugin/urlnormalizer-slash/src/java/org/apache/nutch/net/urlnormalizer/slash/SlashURLNormalizer.java +++ b/src/plugin/urlnormalizer-slash/src/java/org/apache/nutch/net/urlnormalizer/slash/SlashURLNormalizer.java @@ -31,7 +31,6 @@ import org.slf4j.Logger; import org.slf4j.LoggerFactory; import org.apache.hadoop.conf.Configuration; -import org.apache.nutch.crawl.CrawlDatum; import org.apache.nutch.net.URLNormalizer; import org.apache.nutch.plugin.Extension; import org.apache.nutch.plugin.PluginRepository; diff --git a/src/test/org/apache/nutch/crawl/ContinuousCrawlTestUtil.java b/src/test/org/apache/nutch/crawl/ContinuousCrawlTestUtil.java index 5d36fe9a3c..66fa5de540 100644 --- a/src/test/org/apache/nutch/crawl/ContinuousCrawlTestUtil.java +++ b/src/test/org/apache/nutch/crawl/ContinuousCrawlTestUtil.java @@ -28,7 +28,6 @@ import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.io.Text; import org.apache.hadoop.mapreduce.Reducer; -import org.apache.nutch.crawl.CrawlDbUpdateUtil; import org.apache.nutch.protocol.Content; import org.apache.nutch.util.TimingUtil; import org.slf4j.Logger; diff --git a/src/test/org/apache/nutch/crawl/CrawlDbUpdateUtil.java b/src/test/org/apache/nutch/crawl/CrawlDbUpdateUtil.java index 0fd094ece3..1beab362be 100644 --- a/src/test/org/apache/nutch/crawl/CrawlDbUpdateUtil.java +++ b/src/test/org/apache/nutch/crawl/CrawlDbUpdateUtil.java @@ -347,7 +347,6 @@ public Path getWorkingDirectory() throws IOException { * list of input CrawlDatums * @return list of resulting CrawlDatum(s) in CrawlDb */ - @SuppressWarnings("unchecked") public List update(List values) { if (values == null || values.size() == 0) { return new ArrayList(0); @@ -355,8 +354,8 @@ public List update(List values) { Collections.shuffle(values); // sorting of values should have no influence DummyContext context = new DummyContext(); try { - Iterable iterable_values = (Iterable)values; - reducer.reduce(dummyURL, iterable_values, (Reducer.Context) context); + Iterable iterable_values = values; + reducer.reduce(dummyURL, iterable_values, context); } catch (IOException e) { LOG.error(StringUtils.stringifyException(e)); } catch (InterruptedException e) { diff --git a/src/test/org/apache/nutch/crawl/TestCrawlDbStates.java b/src/test/org/apache/nutch/crawl/TestCrawlDbStates.java index 375e331254..1c0d0a7f96 100644 --- a/src/test/org/apache/nutch/crawl/TestCrawlDbStates.java +++ b/src/test/org/apache/nutch/crawl/TestCrawlDbStates.java @@ -26,8 +26,6 @@ import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.util.StringUtils; -import org.apache.nutch.crawl.CrawlDatum; - import static org.apache.nutch.crawl.CrawlDatum.*; import org.apache.nutch.scoring.ScoringFilterException; @@ -103,9 +101,9 @@ public void testCrawlDbStateTransitionMatrix() { LOG.info("Test CrawlDatum state transitions"); Reducer.Context context = CrawlDBTestUtil.createContext(); Configuration conf = context.getConfiguration(); - CrawlDbUpdateUtil updateDb = null; + CrawlDbUpdateUtil updateDb = null; try { - updateDb = new CrawlDbUpdateUtil( + updateDb = new CrawlDbUpdateUtil<>( new CrawlDbReducer(), context); } catch (IOException e) { e.printStackTrace(); diff --git a/src/test/org/apache/nutch/fetcher/TestFetcher.java b/src/test/org/apache/nutch/fetcher/TestFetcher.java index ecc135c52a..19e4104b27 100644 --- a/src/test/org/apache/nutch/fetcher/TestFetcher.java +++ b/src/test/org/apache/nutch/fetcher/TestFetcher.java @@ -100,7 +100,7 @@ public void testFetch() throws IOException, ClassNotFoundException, InterruptedE // generate Generator g = new Generator(conf); Path[] generatedSegment = g.generate(crawldbPath, segmentsPath, 1, - Long.MAX_VALUE, Long.MAX_VALUE, false, false); + Long.MAX_VALUE, Long.MAX_VALUE, false, false, false, 1, null); long time = System.currentTimeMillis(); // fetch @@ -121,7 +121,6 @@ public void testFetch() throws IOException, ClassNotFoundException, InterruptedE // verify content Path content = new Path(new Path(generatedSegment[0], Content.DIR_NAME), "part-r-00000/data"); - @SuppressWarnings("resource") SequenceFile.Reader reader = new SequenceFile.Reader(conf, SequenceFile.Reader.file(content)); ArrayList handledurls = new ArrayList(); @@ -171,6 +170,8 @@ public void testFetch() throws IOException, ClassNotFoundException, InterruptedE } } while (true); + reader.close(); + Collections.sort(handledurls); Assert.assertEquals(urls.size(), handledurls.size()); diff --git a/src/test/org/apache/nutch/parse/TestOutlinkExtractor.java b/src/test/org/apache/nutch/parse/TestOutlinkExtractor.java index a0515c3d48..218e4fc460 100644 --- a/src/test/org/apache/nutch/parse/TestOutlinkExtractor.java +++ b/src/test/org/apache/nutch/parse/TestOutlinkExtractor.java @@ -16,8 +16,6 @@ */ package org.apache.nutch.parse; -import org.apache.nutch.parse.Outlink; -import org.apache.nutch.parse.OutlinkExtractor; import org.apache.hadoop.conf.Configuration; import org.apache.nutch.util.NutchConfiguration; import org.junit.Assert; diff --git a/src/test/org/apache/nutch/protocol/TestProtocolFactory.java b/src/test/org/apache/nutch/protocol/TestProtocolFactory.java index 7cab6232cf..2266b084e9 100644 --- a/src/test/org/apache/nutch/protocol/TestProtocolFactory.java +++ b/src/test/org/apache/nutch/protocol/TestProtocolFactory.java @@ -18,7 +18,6 @@ import org.apache.hadoop.conf.Configuration; import org.apache.nutch.util.NutchConfiguration; -import org.apache.nutch.util.ObjectCache; import org.junit.Assert; import org.junit.Before; import org.junit.Test; diff --git a/src/test/org/apache/nutch/tools/TestCommonCrawlDataDumper.java b/src/test/org/apache/nutch/tools/TestCommonCrawlDataDumper.java index d49b993102..547007aea3 100644 --- a/src/test/org/apache/nutch/tools/TestCommonCrawlDataDumper.java +++ b/src/test/org/apache/nutch/tools/TestCommonCrawlDataDumper.java @@ -16,22 +16,15 @@ */ package org.apache.nutch.tools; -//Junit imports -import static org.junit.Assert.*; -import org.junit.Test; - -//Commons imports -import org.apache.commons.io.FileUtils; -import org.apache.commons.io.filefilter.FileFilterUtils; +import static org.junit.Assert.assertTrue; -//JDK imports import java.io.File; import java.nio.file.Files; import java.util.Collection; -//Nutch imports -import org.apache.nutch.tools.CommonCrawlDataDumper; -import org.apache.nutch.tools.CommonCrawlConfig; +import org.apache.commons.io.FileUtils; +import org.apache.commons.io.filefilter.FileFilterUtils; +import org.junit.Test; /** * diff --git a/src/test/org/apache/nutch/util/TestTableUtil.java b/src/test/org/apache/nutch/util/TestTableUtil.java index 1f5512fa1d..d31acdb13d 100644 --- a/src/test/org/apache/nutch/util/TestTableUtil.java +++ b/src/test/org/apache/nutch/util/TestTableUtil.java @@ -16,7 +16,6 @@ */ package org.apache.nutch.util; -import org.apache.nutch.util.TableUtil; import org.junit.Test; import static org.junit.Assert.*;