Skip to content

Commit 28ea9c4

Browse files
committed
HTTP: Populate WARC-IP-Address with server's IP address
Fixes #396
1 parent 5a6531b commit 28ea9c4

File tree

3 files changed

+12
-10
lines changed

3 files changed

+12
-10
lines changed

modules/src/main/java/org/archive/modules/fetcher/FetchHTTPRequest.java

+5-2
Original file line numberDiff line numberDiff line change
@@ -600,7 +600,7 @@ public ManagedHttpClientConnection create(HttpRoute route,
600600
DEFAULT_BUFSIZE, chardecoder, charencoder,
601601
cconfig.getMessageConstraints(), null, null,
602602
DefaultHttpRequestWriterFactory.INSTANCE,
603-
DefaultHttpResponseParserFactory.INSTANCE);
603+
DefaultHttpResponseParserFactory.INSTANCE, curi);
604604
}
605605
};
606606
BasicHttpClientConnectionManager connMan = new BasicHttpClientConnectionManager(
@@ -618,6 +618,7 @@ protected static class RecordingHttpClientConnection extends DefaultBHttpClientC
618618

619619
private static final AtomicLong COUNTER = new AtomicLong();
620620
private String id;
621+
private final CrawlURI curi;
621622

622623
public RecordingHttpClientConnection(
623624
final int buffersize,
@@ -628,15 +629,17 @@ public RecordingHttpClientConnection(
628629
final ContentLengthStrategy incomingContentStrategy,
629630
final ContentLengthStrategy outgoingContentStrategy,
630631
final HttpMessageWriterFactory<HttpRequest> requestWriterFactory,
631-
final HttpMessageParserFactory<HttpResponse> responseParserFactory) {
632+
final HttpMessageParserFactory<HttpResponse> responseParserFactory, CrawlURI curi) {
632633
super(buffersize, fragmentSizeHint, chardecoder, charencoder,
633634
constraints, incomingContentStrategy, outgoingContentStrategy,
634635
requestWriterFactory, responseParserFactory);
635636
id = "recording-http-connection-" + Long.toString(COUNTER.getAndIncrement());
637+
this.curi = curi;
636638
}
637639

638640
@Override
639641
protected InputStream getSocketInputStream(final Socket socket) throws IOException {
642+
curi.setServerIP(socket.getInetAddress().getHostAddress());
640643
Recorder recorder = Recorder.getHttpRecorder();
641644
if (recorder != null) { // XXX || (isSecure() && isProxied())) {
642645
return recorder.inputWrap(super.getSocketInputStream(socket));

modules/src/main/java/org/archive/modules/warc/HttpResponseRecordBuilder.java

+5-7
Original file line numberDiff line numberDiff line change
@@ -1,12 +1,6 @@
11
package org.archive.modules.warc;
22

3-
import static org.archive.format.warc.WARCConstants.HEADER_KEY_CONCURRENT_TO;
4-
import static org.archive.format.warc.WARCConstants.HEADER_KEY_PAYLOAD_DIGEST;
5-
import static org.archive.format.warc.WARCConstants.HEADER_KEY_TRUNCATED;
6-
import static org.archive.format.warc.WARCConstants.HTTP_RESPONSE_MIMETYPE;
7-
import static org.archive.format.warc.WARCConstants.NAMED_FIELD_TRUNCATED_VALUE_HEAD;
8-
import static org.archive.format.warc.WARCConstants.NAMED_FIELD_TRUNCATED_VALUE_LENGTH;
9-
import static org.archive.format.warc.WARCConstants.NAMED_FIELD_TRUNCATED_VALUE_TIME;
3+
import static org.archive.format.warc.WARCConstants.*;
104
import static org.archive.modules.CoreAttributeConstants.A_WARC_RESPONSE_HEADERS;
115
import static org.archive.modules.CoreAttributeConstants.HEADER_TRUNC;
126
import static org.archive.modules.CoreAttributeConstants.LENGTH_TRUNC;
@@ -55,6 +49,10 @@ public WARCRecordInfo buildRecord(CrawlURI curi, URI concurrentTo) throws IOExce
5549
curi.getContentDigestSchemeString());
5650
}
5751

52+
if (curi.getServerIP() != null) {
53+
recordInfo.addExtraHeader(HEADER_KEY_IP, curi.getServerIP());
54+
}
55+
5856
// Check for truncated annotation
5957
String value = null;
6058
Collection<String> anno = curi.getAnnotations();

modules/src/test/java/org/archive/modules/fetcher/FetchHTTPTests.java

+2-1
Original file line numberDiff line numberDiff line change
@@ -130,7 +130,8 @@ protected void runDefaultChecks(CrawlURI curi, String... exclusionsArray)
130130
assertEquals(DEFAULT_PAYLOAD_STRING.length(), curi.getContentLength());
131131
assertEquals(curi.getContentSize(), curi.getRecordedSize());
132132

133-
// check various
133+
// check various
134+
assertNotNull(curi.getServerIP());
134135
assertEquals("sha1:TQ5R6YVOZLTQENRIIENVGXHOPX3YCRNJ", curi.getContentDigestSchemeString());
135136
if (!exclusions.contains("contentType")) {
136137
assertEquals("text/plain;charset=US-ASCII", curi.getContentType());

0 commit comments

Comments
 (0)