Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fixes #8926 - HttpClient GZIPContentDecoder should remove Content-Len… #10326

Merged
merged 6 commits into from
Aug 21, 2023
Merged
Show file tree
Hide file tree
Changes from 5 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -15,10 +15,10 @@

import java.net.URI;
import java.util.Map;
import java.util.concurrent.atomic.AtomicReference;

import org.eclipse.jetty.client.api.ContentResponse;
import org.eclipse.jetty.http.HttpHeader;
import org.eclipse.jetty.http.HttpHeaderValue;
import org.eclipse.jetty.http.HttpMethod;
import org.eclipse.jetty.http.HttpStatus;
import org.eclipse.jetty.server.Server;
Expand Down Expand Up @@ -53,18 +53,23 @@ public void testGetParams() throws Exception
{
URI uri = server.getURI().resolve("/params?a=b&foo=bar");

AtomicReference<String> contentEncoding = new AtomicReference<>();
ContentResponse response = client.newRequest(uri)
.method(HttpMethod.GET)
.headers(headers -> headers.put(HttpHeader.ACCEPT_ENCODING, HttpHeaderValue.GZIP))
.onResponseHeader((r, field) ->
{
if (field.getHeader() == HttpHeader.CONTENT_ENCODING)
contentEncoding.set(field.getValue());
return true;
})
.send();
assertThat("HTTP Response Status", response.getStatus(), is(HttpStatus.OK_200));

// dumpResponseHeaders(response);

// test gzip
// Test that Gzip was used to produce the response
String contentEncoding = response.getHeaders().get(HttpHeader.CONTENT_ENCODING);
assertThat("Content-Encoding", contentEncoding, containsString("gzip"));
assertThat("Content-Encoding", contentEncoding.get(), containsString("gzip"));

// test response content
String responseBody = response.getContentAsString();
Expand All @@ -78,18 +83,25 @@ public void testGetParams() throws Exception
public void testGetHello() throws Exception
{
URI uri = server.getURI().resolve("/hello");

AtomicReference<String> contentEncoding = new AtomicReference<>();
ContentResponse response = client.newRequest(uri)
.method(HttpMethod.GET)
.headers(headers -> headers.put(HttpHeader.ACCEPT_ENCODING, HttpHeaderValue.GZIP))
.onResponseHeader((r, field) ->
{
if (field.getHeader() == HttpHeader.CONTENT_ENCODING)
contentEncoding.set(field.getValue());
return true;
})
.send();

assertThat("HTTP Response Status", response.getStatus(), is(HttpStatus.OK_200));

// dumpResponseHeaders(response);

// test gzip
// Test that Gzip was used to produce the response
String contentEncoding = response.getHeaders().get(HttpHeader.CONTENT_ENCODING);
assertThat("Content-Encoding", contentEncoding, containsString("gzip"));
assertThat("Content-Encoding", contentEncoding.get(), containsString("gzip"));

// test expected header from wrapper
String welcome = response.getHeaders().get("X-Welcome");
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,18 @@
*/
public interface ContentDecoder
{
/**
* <p>Processes the exchange just before the decoding of the response content.</p>
* <p>Typical processing may involve modifying the response headers, for example
* by temporarily removing the {@code Content-Length} header, or modifying the
* {@code Content-Encoding} header.</p>
*
* @param exchange the exchange to process before decoding the response content
*/
public default void beforeDecoding(HttpExchange exchange)
lorban marked this conversation as resolved.
Show resolved Hide resolved
{
}

/**
* <p>Decodes the bytes in the given {@code buffer} and returns decoded bytes, if any.</p>
*
Expand All @@ -39,6 +51,18 @@ public default void release(ByteBuffer decoded)
{
}

/**
* <p>Processes the exchange after the response content has been decoded.</p>
* <p>Typical processing may involve modifying the response headers, for example
* updating the {@code Content-Length} header to the length of the decoded
* response content.
*
* @param exchange the exchange to process after decoding the response content
*/
public default void afterDecoding(HttpExchange exchange)
lorban marked this conversation as resolved.
Show resolved Hide resolved
{
}

/**
* Factory for {@link ContentDecoder}s; subclasses must implement {@link #newContentDecoder()}.
* <p>
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,10 @@
package org.eclipse.jetty.client;

import java.nio.ByteBuffer;
import java.util.ListIterator;

import org.eclipse.jetty.http.HttpField;
import org.eclipse.jetty.http.HttpHeader;
import org.eclipse.jetty.io.ByteBufferPool;

/**
Expand All @@ -24,6 +27,8 @@ public class GZIPContentDecoder extends org.eclipse.jetty.http.GZIPContentDecode
{
public static final int DEFAULT_BUFFER_SIZE = 8192;

private long decodedLength;

public GZIPContentDecoder()
{
this(DEFAULT_BUFFER_SIZE);
Expand All @@ -39,13 +44,54 @@ public GZIPContentDecoder(ByteBufferPool byteBufferPool, int bufferSize)
super(byteBufferPool, bufferSize);
}

@Override
public void beforeDecoding(HttpExchange exchange)
{
exchange.getResponse().headers(headers ->
{
ListIterator<HttpField> iterator = headers.listIterator();
while (iterator.hasNext())
{
HttpField field = iterator.next();
HttpHeader header = field.getHeader();
if (header == HttpHeader.CONTENT_LENGTH)
{
// Content-Length is not valid anymore while we are decoding.
iterator.remove();
}
else if (header == HttpHeader.CONTENT_ENCODING)
{
// Content-Encoding should be removed/modified as the content will be decoded.
String value = field.getValue();
int comma = value.lastIndexOf(",");
if (comma < 0)
iterator.remove();
else
iterator.set(new HttpField(HttpHeader.CONTENT_ENCODING, value.substring(0, comma)));
break;
}
}
});
}

@Override
protected boolean decodedChunk(ByteBuffer chunk)
{
decodedLength += chunk.remaining();
super.decodedChunk(chunk);
return true;
}

@Override
public void afterDecoding(HttpExchange exchange)
{
exchange.getResponse().headers(headers ->
{
headers.remove(HttpHeader.TRANSFER_ENCODING);
headers.putLongField(HttpHeader.CONTENT_LENGTH, decodedLength);
});
}

/**
* Specialized {@link ContentDecoder.Factory} for the "gzip" encoding.
*/
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -31,8 +31,11 @@
import org.eclipse.jetty.client.api.Response;
import org.eclipse.jetty.client.api.Result;
import org.eclipse.jetty.http.HttpField;
import org.eclipse.jetty.http.HttpFields;
import org.eclipse.jetty.http.HttpHeader;
import org.eclipse.jetty.http.HttpMethod;
import org.eclipse.jetty.http.HttpStatus;
import org.eclipse.jetty.http.QuotedCSV;
import org.eclipse.jetty.util.BufferUtil;
import org.eclipse.jetty.util.Callback;
import org.eclipse.jetty.util.MathUtils;
Expand Down Expand Up @@ -286,33 +289,45 @@ protected boolean responseHeaders(HttpExchange exchange)
return false;

HttpResponse response = exchange.getResponse();
HttpFields responseHeaders = response.getHeaders();
if (LOG.isDebugEnabled())
LOG.debug("Response headers {}{}{}", response, System.lineSeparator(), response.getHeaders().toString().trim());
ResponseNotifier notifier = getHttpDestination().getResponseNotifier();
List<Response.ResponseListener> responseListeners = exchange.getConversation().getResponseListeners();
notifier.notifyHeaders(responseListeners, response);
contentListeners.reset(responseListeners);
contentListeners.notifyBeforeContent(response);
LOG.debug("Response headers {}{}{}", response, System.lineSeparator(), responseHeaders.toString().trim());

if (!contentListeners.isEmpty())
// HEAD responses may have Content-Encoding
// and Content-Length, but have no content.
if (!HttpMethod.HEAD.is(exchange.getRequest().getMethod()))
{
List<String> contentEncodings = response.getHeaders().getCSV(HttpHeader.CONTENT_ENCODING.asString(), false);
if (contentEncodings != null && !contentEncodings.isEmpty())
// Content-Encoding may have multiple values in the order they
// are applied, but we only support one decoding pass, the last one.
String contentEncoding = responseHeaders.get(HttpHeader.CONTENT_ENCODING);
if (contentEncoding != null)
{
int comma = contentEncoding.indexOf(",");
if (comma > 0)
{
QuotedCSV parser = new QuotedCSV(false);
parser.addValue(contentEncoding);
List<String> values = parser.getValues();
contentEncoding = values.get(values.size() - 1);
}
Comment on lines +302 to +312
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think a header can have multiple CONTENT_ENCODING fields which mean the same as a single comma separated field. But it is 1am and it could be space separate fields I'm thinking of?
So probably best to use HttpFields.getCSV

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

If we wanted to be really nasty, we should strip "identity" encodings off the end.... but the server doesn't do that (anymore)

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Most of the times Content-Encoding is single-valued.

As you seem concerned about hot paths, I chose not to use getCSV() because in the single-value case allocates a List and a QuotedCSV for nothing, while the code above only does so for multi-values.

Doing a first get() and then getCSV() iterates over the headers twice, although should be a small list.

What do you prefer?

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It is a vexed question and now I'm concerned the server doesn't well handle the case of multiple HttpFields.

If we are struggling to come up with something that is both simple, clear and efficient, then probably the HttpFields API is not good enough.
Perhaps we need a method to ask HttpFields if it has a value as the last entry in a list of values, with the option of removing that entry. This could be implemented efficiently without having to tokenize the list twice. I'd be happy with an implementation that worked well for single valued lists, but was more expensive for multi valued lists.

Something like:

    String contentEncoding = responseHeaders.getLastCSV(HttpHeader.CONTENT_ENCODING);

and on mutable:

    String contentEncoding = responseHeaders.takeLastCSV(HttpHeader.CONTENT_ENCODING);

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@sbordet I've created #10340 that might be useful hear as an efficient way to find the last value. I've not done anything about removing the last value until I get feedback.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@sbordet I've further update #10340 with a listIterator that make operating on the last item simpler. So now in the server GzipRequest, I just copy the HttpFields to a mutable, then iterate in reverse over it correcting the headers found.
This will have merge hell with #10339 and #10308

// If there is a matching content decoder factory, build a decoder.
for (ContentDecoder.Factory factory : getHttpDestination().getHttpClient().getContentDecoderFactories())
{
for (String encoding : contentEncodings)
if (factory.getEncoding().equalsIgnoreCase(contentEncoding))
{
if (factory.getEncoding().equalsIgnoreCase(encoding))
{
decoder = new Decoder(exchange, factory.newContentDecoder());
break;
}
decoder = new Decoder(exchange, factory.newContentDecoder());
break;
}
}
}
}

ResponseNotifier notifier = getHttpDestination().getResponseNotifier();
List<Response.ResponseListener> responseListeners = exchange.getConversation().getResponseListeners();
notifier.notifyHeaders(responseListeners, response);
contentListeners.reset(responseListeners);
contentListeners.notifyBeforeContent(response);

if (updateResponseState(ResponseState.TRANSIENT, ResponseState.HEADERS))
{
boolean hasDemand = hasDemandOrStall();
Expand Down Expand Up @@ -755,6 +770,7 @@ private Decoder(HttpExchange exchange, ContentDecoder decoder)
{
this.exchange = exchange;
this.decoder = Objects.requireNonNull(decoder);
decoder.beforeDecoding(exchange);
lorban marked this conversation as resolved.
Show resolved Hide resolved
}

private boolean decode(ByteBuffer encoded, Callback callback)
Expand Down Expand Up @@ -868,6 +884,7 @@ private void resume()
@Override
public void destroy()
{
decoder.afterDecoding(exchange);
lorban marked this conversation as resolved.
Show resolved Hide resolved
if (decoder instanceof Destroyable)
((Destroyable)decoder).destroy();
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -422,6 +422,12 @@ default Request port(int port)
Request onResponseHeader(Response.HeaderListener listener);

/**
* <p>Registers a listener for the headers event.</p>
* <p>Note that the response headers at this event
* may be different from the headers received in
* {@link #onResponseHeader(Response.HeaderListener)}
* as specified in {@link Response#getHeaders()}.</p>
*
* @param listener a listener for response headers event
* @return this request object
*/
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -65,6 +65,18 @@ public interface Response
String getReason();

/**
* <p>Returns the headers of this response.</p>
* <p>Some headers sent by the server may not be present,
* or be present but modified, while the content is being
* processed.
* A typical example is the {@code Content-Length} header
* when the content is sent compressed by the server and
* automatically decompressed by the client: the
* {@code Content-Length} header will be removed.</p>
* <p>Similarly, the {@code Content-Encoding} header
* may be removed or modified, as the content is
* decoded by the client.</p>
*
* @return the headers of this response
*/
HttpFields getHeaders();
Expand Down
Loading