Skip to content

Commit

Permalink
Merge pull request #828 from sebastian-nagel/NUTCH-3073
Browse files Browse the repository at this point in the history
NUTCH-3073 Address Java compiler warning
  • Loading branch information
sebastian-nagel authored Oct 6, 2024
2 parents d6f55b8 + e678777 commit 4a61208
Show file tree
Hide file tree
Showing 33 changed files with 50 additions and 95 deletions.
2 changes: 0 additions & 2 deletions src/java/org/apache/nutch/crawl/AbstractFetchSchedule.java
Original file line number Diff line number Diff line change
Expand Up @@ -19,8 +19,6 @@
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.conf.Configured;
import org.apache.hadoop.io.Text;
import org.apache.nutch.crawl.CrawlDatum;

import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

Expand Down
1 change: 0 additions & 1 deletion src/java/org/apache/nutch/crawl/AdaptiveFetchSchedule.java
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,6 @@
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.io.FloatWritable;
import org.apache.nutch.crawl.CrawlDatum;
import org.apache.nutch.metadata.Nutch;
import org.apache.nutch.util.NutchConfiguration;
import org.apache.commons.lang.StringUtils;
Expand Down
2 changes: 1 addition & 1 deletion src/java/org/apache/nutch/crawl/Generator.java
Original file line number Diff line number Diff line change
Expand Up @@ -85,7 +85,7 @@
* fetchlists for several segments in one go. Unlike in the initial version
* (OldGenerator), the IP resolution is done ONLY on the entries which have been
* selected for fetching. The URLs are partitioned by IP, domain or host within
* a segment. We can chose separately how to count the URLS i.e. by domain or
* a segment. We can choose separately how to count the URLs i.e. by domain or
* host to limit the entries.
**/
public class Generator extends NutchTool implements Tool {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,6 @@
import org.apache.commons.lang.StringUtils;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.io.Text;
import org.apache.nutch.crawl.CrawlDatum;
import org.apache.nutch.metadata.HttpHeaders;
import org.apache.nutch.util.MimeUtil;
import org.apache.nutch.util.NutchConfiguration;
Expand Down
7 changes: 1 addition & 6 deletions src/java/org/apache/nutch/fetcher/FetcherThread.java
Original file line number Diff line number Diff line change
Expand Up @@ -404,11 +404,6 @@ public void run() {

switch (status.getCode()) {

case ProtocolStatus.WOULDBLOCK:
// retry ?
fetchQueues.addFetchItem(fit);
break;

case ProtocolStatus.SUCCESS: // got a page
pstatus = output(fit.url, fit.datum, content, status,
CrawlDatum.STATUS_FETCH_SUCCESS, fit.outlinkDepth);
Expand Down Expand Up @@ -457,8 +452,8 @@ public void run() {
context.getCounter("FetcherStatus",
"AboveExceptionThresholdInQueue").increment(killedURLs);
/* FALLTHROUGH */

case ProtocolStatus.RETRY: // retry
case ProtocolStatus.BLOCKED:
output(fit.url, fit.datum, null, status,
CrawlDatum.STATUS_FETCH_RETRY);
break;
Expand Down
2 changes: 0 additions & 2 deletions src/java/org/apache/nutch/indexer/NutchIndexAction.java
Original file line number Diff line number Diff line change
Expand Up @@ -22,8 +22,6 @@

import org.apache.hadoop.io.Writable;

import org.apache.nutch.indexer.NutchDocument;

/**
* A {@link NutchIndexAction} is the new unit of indexing holding the document
* and action information.
Expand Down
8 changes: 4 additions & 4 deletions src/java/org/apache/nutch/service/NutchReader.java
Original file line number Diff line number Diff line change
Expand Up @@ -25,14 +25,14 @@
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

public interface NutchReader {
public interface NutchReader {

static final Logger LOG = LoggerFactory
.getLogger(MethodHandles.lookup().lookupClass());
public static final Configuration conf = NutchConfiguration.create();

public List read(String path) throws FileNotFoundException;
public List head(String path, int nrows) throws FileNotFoundException;
public List slice(String path, int start, int end) throws FileNotFoundException;
public List<?> read(String path) throws FileNotFoundException;
public List<?> head(String path, int nrows) throws FileNotFoundException;
public List<?> slice(String path, int start, int end) throws FileNotFoundException;
public int count(String path) throws FileNotFoundException;
}
14 changes: 7 additions & 7 deletions src/java/org/apache/nutch/service/impl/LinkReader.java
Original file line number Diff line number Diff line change
Expand Up @@ -33,11 +33,11 @@
import org.apache.nutch.scoring.webgraph.LinkDatum;
import org.apache.nutch.service.NutchReader;

public class LinkReader implements NutchReader{
public class LinkReader implements NutchReader {

@Override
public List read(String path) throws FileNotFoundException {
List<HashMap> rows= new ArrayList<>();
public List<HashMap<String, String>> read(String path) throws FileNotFoundException {
List<HashMap<String, String>> rows= new ArrayList<>();
Path file = new Path(path);
SequenceFile.Reader reader;
try{
Expand Down Expand Up @@ -69,8 +69,8 @@ public List read(String path) throws FileNotFoundException {
}

@Override
public List head(String path, int nrows) throws FileNotFoundException {
List<HashMap> rows= new ArrayList<>();
public List<HashMap<String, String>> head(String path, int nrows) throws FileNotFoundException {
List<HashMap<String, String>> rows= new ArrayList<>();
Path file = new Path(path);
SequenceFile.Reader reader;
try{
Expand Down Expand Up @@ -101,9 +101,9 @@ public List head(String path, int nrows) throws FileNotFoundException {
}

@Override
public List slice(String path, int start, int end)
public List<HashMap<String, String>> slice(String path, int start, int end)
throws FileNotFoundException {
List<HashMap> rows= new ArrayList<>();
List<HashMap<String, String>> rows= new ArrayList<>();
Path file = new Path(path);
SequenceFile.Reader reader;
try{
Expand Down
12 changes: 6 additions & 6 deletions src/java/org/apache/nutch/service/impl/NodeReader.java
Original file line number Diff line number Diff line change
Expand Up @@ -36,8 +36,8 @@
public class NodeReader implements NutchReader {

@Override
public List read(String path) throws FileNotFoundException {
List<HashMap> rows= new ArrayList<>();
public List<HashMap<String, String>> read(String path) throws FileNotFoundException {
List<HashMap<String, String>> rows= new ArrayList<>();
Path file = new Path(path);
SequenceFile.Reader reader;
try{
Expand Down Expand Up @@ -70,8 +70,8 @@ public List read(String path) throws FileNotFoundException {
}

@Override
public List head(String path, int nrows) throws FileNotFoundException {
List<HashMap> rows= new ArrayList<>();
public List<HashMap<String, String>> head(String path, int nrows) throws FileNotFoundException {
List<HashMap<String, String>> rows= new ArrayList<>();
Path file = new Path(path);
SequenceFile.Reader reader;
try{
Expand Down Expand Up @@ -102,9 +102,9 @@ public List head(String path, int nrows) throws FileNotFoundException {
}

@Override
public List slice(String path, int start, int end)
public List<HashMap<String, String>> slice(String path, int start, int end)
throws FileNotFoundException {
List<HashMap> rows= new ArrayList<>();
List<HashMap<String, String>> rows= new ArrayList<>();
Path file = new Path(path);
SequenceFile.Reader reader;
try{
Expand Down
1 change: 0 additions & 1 deletion src/java/org/apache/nutch/util/EncodingDetector.java
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,6 @@
import org.apache.nutch.metadata.Metadata;
import org.apache.nutch.net.protocols.Response;
import org.apache.nutch.protocol.Content;
import org.apache.nutch.util.NutchConfiguration;

import com.ibm.icu.text.CharsetDetector;
import com.ibm.icu.text.CharsetMatch;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -153,7 +153,7 @@ public class ArbitraryIndexingFilter implements IndexingFilter {
public NutchDocument filter(NutchDocument doc, Parse parse, Text url,
CrawlDatum datum, Inlinks inlinks) throws IndexingException {

Class theClass = null;
Class<?> theClass = null;
Method theMethod = null;
Constructor<?> theConstructor = null;
Object instance = null;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -21,15 +21,11 @@
import org.apache.nutch.crawl.CrawlDatum;
import org.apache.nutch.crawl.Inlinks;
import org.apache.nutch.indexer.NutchDocument;
import org.apache.nutch.indexer.NutchField;
import org.apache.nutch.parse.ParseImpl;
import org.apache.nutch.util.NutchConfiguration;
import org.junit.Assert;
import org.junit.Before;
import org.junit.Test;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import java.lang.invoke.MethodHandles;

/**
* Tests that the index-arbitrary filter can add a new field with an arbitrary
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,6 @@
import org.apache.nutch.crawl.CrawlDatum;
import org.apache.nutch.crawl.Inlinks;
import org.apache.nutch.indexer.NutchDocument;
import org.apache.nutch.indexer.basic.BasicIndexingFilter;
import org.apache.nutch.metadata.Metadata;
import org.apache.nutch.parse.Outlink;
import org.apache.nutch.parse.ParseData;
Expand Down Expand Up @@ -94,6 +93,6 @@ public void testBasicIndexingFilter() throws Exception {
Assert.assertEquals("test content", "this is a sample foo",
doc.getField("content").getValues().get(0));
Assert.assertEquals("test fetch time", new Date(100L),
(Date) doc.getField("tstamp").getValues().get(0));
doc.getField("tstamp").getValues().get(0));
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,6 @@

import org.apache.hadoop.conf.Configuration;
import org.apache.nutch.metadata.Metadata;
import org.apache.nutch.parse.html.HtmlParser;
import org.apache.nutch.parse.Outlink;
import org.apache.nutch.parse.Parse;
import org.apache.nutch.parse.Parser;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,7 @@ public static synchronized BoilerpipeExtractor getExtractor(String boilerpipeExt
// Attempt to load the class
try {
ClassLoader loader = BoilerpipeExtractor.class.getClassLoader();
Class extractorClass = loader.loadClass(boilerpipeExtractorName);
Class<?> extractorClass = loader.loadClass(boilerpipeExtractorName);

// Add an instance to the repository
extractorRepository.put(boilerpipeExtractorName, (BoilerpipeExtractor)extractorClass.getConstructor().newInstance());
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -17,13 +17,10 @@
package org.apache.nutch.parsefilter.debug;

import java.io.ByteArrayOutputStream;
import java.io.OutputStreamWriter;
import java.lang.invoke.MethodHandles;
import java.nio.charset.Charset;
import java.nio.charset.StandardCharsets;

import org.apache.hadoop.conf.Configuration;
import org.apache.nutch.metadata.Metadata;
import org.apache.nutch.parse.HTMLMetaTags;
import org.apache.nutch.parse.HtmlParseFilter;
import org.apache.nutch.parse.Parse;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@
import java.io.UnsupportedEncodingException;
import java.net.CookieHandler;
import java.net.CookieManager;
import java.nio.charset.StandardCharsets;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.HashSet;
Expand Down Expand Up @@ -132,7 +133,8 @@ public boolean getFollowRedirects() {
LOG.debug("Response headers : " + header);
}
}
String rst = IOUtils.toString(post.getResponseBodyAsStream());
String rst = IOUtils.toString(post.getResponseBodyAsStream(),
StandardCharsets.UTF_8);
LOG.debug("login post result: " + rst);
} finally {
if (post != null) {
Expand Down Expand Up @@ -194,7 +196,8 @@ private String httpGetPageContent(String url) throws IOException {
if (cookieHeader != null) {
setCookies(cookieHeader.getValue());
}
String rst = IOUtils.toString(get.getResponseBodyAsStream());
String rst = IOUtils.toString(get.getResponseBodyAsStream(),
StandardCharsets.UTF_8);
return rst;
} finally {
get.releaseConnection();
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -16,18 +16,16 @@
*/
package org.apache.nutch.protocol.interactiveselenium;

import java.lang.invoke.MethodHandles;
import java.io.IOException;
import java.lang.invoke.MethodHandles;
import java.net.URL;

import org.apache.hadoop.conf.Configuration;
import org.apache.nutch.crawl.CrawlDatum;
import org.apache.nutch.net.protocols.Response;
import org.apache.nutch.protocol.http.api.HttpBase;
import org.apache.nutch.protocol.ProtocolException;
import org.apache.nutch.protocol.http.api.HttpBase;
import org.apache.nutch.util.NutchConfiguration;

import org.apache.nutch.protocol.interactiveselenium.HttpResponse;

import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -16,8 +16,6 @@
*/
package org.apache.nutch.scoring.link;

import java.util.List;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.io.Text;
import org.apache.nutch.crawl.CrawlDatum;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -17,21 +17,17 @@
package org.apache.nutch.scoring.metadata;

import java.util.Collection;
import java.util.Map.Entry;
import java.util.Iterator;
import java.util.List;
import java.util.Map.Entry;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.conf.Configured;
import org.apache.hadoop.io.Text;
import org.apache.nutch.crawl.CrawlDatum;
import org.apache.nutch.crawl.Inlinks;
import org.apache.nutch.indexer.NutchDocument;
import org.apache.nutch.parse.Parse;
import org.apache.nutch.parse.ParseData;
import org.apache.nutch.protocol.Content;
import org.apache.nutch.scoring.ScoringFilter;
import org.apache.nutch.scoring.AbstractScoringFilter;
import org.apache.nutch.scoring.ScoringFilter;
import org.apache.nutch.scoring.ScoringFilterException;


Expand All @@ -48,7 +44,6 @@ public class MetadataScoringFilter extends AbstractScoringFilter {
private static String[] datumMetadata;
private static String[] contentMetadata;
private static String[] parseMetadata;
private Configuration conf;

/**
* This will take the metadata that you have listed in your "scoring.parse.md"
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,6 @@
import org.apache.nutch.scoring.ScoringFilterException;
import org.apache.nutch.util.NutchConfiguration;
import org.junit.Assert;
import org.junit.Before;
import org.junit.Test;

import java.util.HashMap;
Expand All @@ -51,7 +50,7 @@ public void distributeScoreToOutlinks() throws ScoringFilterException {
parseData.getParseMeta().add("parent",parentMD);
parseData.getParseMeta().add("depth",depthMD);

HashMap<Text,CrawlDatum> targets = new HashMap();
HashMap<Text,CrawlDatum> targets = new HashMap<>();
targets.put(new Text("https://nutch.apache.org/downloads.html"),new CrawlDatum());
targets.put(new Text("https://wiki.apache.org/nutch"),new CrawlDatum());

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -133,7 +133,7 @@ public static CollectionManager getCollectionManager(Configuration conf) {
* @return Named SubCollection (or null if not existing)
*/
public Subcollection getSubColection(final String id) {
return (Subcollection) collectionMap.get(id);
return collectionMap.get(id);
}

/**
Expand Down Expand Up @@ -180,10 +180,10 @@ public Subcollection createSubCollection(final String id, final String name) {
*/
public List<Subcollection> getSubCollections(final String url) {
List<Subcollection> collections = new ArrayList<Subcollection>();
final Iterator iterator = collectionMap.values().iterator();
final Iterator<Subcollection> iterator = collectionMap.values().iterator();

while (iterator.hasNext()) {
final Subcollection subCol = (Subcollection) iterator.next();
final Subcollection subCol = iterator.next();
if (subCol.filter(url) != null) {
collections.add(subCol);
}
Expand All @@ -200,7 +200,7 @@ public List<Subcollection> getSubCollections(final String url) {
*
* @return All collections CollectionManager knows about
*/
public Collection getAll() {
public Collection<Subcollection> getAll() {
return collectionMap.values();
}

Expand All @@ -219,10 +219,10 @@ public void save() throws IOException {
final Document doc = new DocumentImpl();
final Element collections = doc
.createElement(Subcollection.TAG_COLLECTIONS);
final Iterator iterator = collectionMap.values().iterator();
final Iterator<Subcollection> iterator = collectionMap.values().iterator();

while (iterator.hasNext()) {
final Subcollection subCol = (Subcollection) iterator.next();
final Subcollection subCol = iterator.next();
final Element collection = doc
.createElement(Subcollection.TAG_COLLECTION);
collections.appendChild(collection);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,6 @@
*/
package org.apache.nutch.urlfilter.validator;

import org.apache.nutch.urlfilter.validator.UrlValidator;
import org.junit.Assert;
import org.junit.Test;

Expand Down
Loading

0 comments on commit 4a61208

Please sign in to comment.