Skip to content

Commit

Permalink
Use lowercase for charsets #11741 (#12347)
Browse files Browse the repository at this point in the history
Fix #11741 as per the WhatTFWG recommendations, use lower case for charset names.
Took the opportunity for some minor optimizations:
 + use the already made HttpField instance in MimeTypes.Type rather than create a new one in the HttpParser.CACHE
 + keep the MimeType.Type associated with the pre encoded Content-Type fields
  • Loading branch information
gregw authored Oct 16, 2024
1 parent 1f1040c commit b947b48
Show file tree
Hide file tree
Showing 29 changed files with 265 additions and 103 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@

import java.io.InputStream;
import java.nio.ByteBuffer;
import java.nio.charset.Charset;
import java.time.Duration;
import java.util.List;
import java.util.Locale;
Expand All @@ -24,10 +25,12 @@
import java.util.function.Supplier;

import org.eclipse.jetty.http.HttpCookie;
import org.eclipse.jetty.http.HttpField;
import org.eclipse.jetty.http.HttpFields;
import org.eclipse.jetty.http.HttpHeader;
import org.eclipse.jetty.http.HttpStatus;
import org.eclipse.jetty.http.HttpURI;
import org.eclipse.jetty.http.MimeTypes;
import org.eclipse.jetty.http.Trailers;
import org.eclipse.jetty.io.Content;
import org.eclipse.jetty.server.Context;
Expand Down Expand Up @@ -118,7 +121,16 @@ public boolean handle(Request request, Response response, Callback callback) thr
// Gets the request Content-Type.
// Replaces:
// - servletRequest.getContentType()
String contentType = request.getHeaders().get(HttpHeader.CONTENT_TYPE);
HttpField contentTypeField = request.getHeaders().getField(HttpHeader.CONTENT_TYPE);
String contentType = contentTypeField.getValue();
MimeTypes.Type knownType = MimeTypes.getMimeTypeFromContentType(contentTypeField);

// Gets the request Character Encoding.
// Replaces:
// - servletRequest.getCharacterEncoding()
Charset charset = knownType == null
? MimeTypes.getCharsetFromContentType(contentTypeField)
: knownType.getCharset();

// Gets the request Content-Length.
// Replaces:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -23,9 +23,11 @@
import org.eclipse.jetty.server.Request;
import org.eclipse.jetty.server.Response;
import org.eclipse.jetty.util.Callback;
import org.hamcrest.Matchers;
import org.junit.jupiter.params.ParameterizedTest;
import org.junit.jupiter.params.provider.ArgumentsSource;

import static org.hamcrest.MatcherAssert.assertThat;
import static org.junit.jupiter.api.Assertions.assertArrayEquals;
import static org.junit.jupiter.api.Assertions.assertEquals;
import static org.junit.jupiter.api.Assertions.assertNull;
Expand Down Expand Up @@ -114,7 +116,7 @@ public boolean handle(Request request, Response response, Callback callback) thr
assertEquals(200, response.getStatus());
assertEquals(content, response.getContentAsString());
assertEquals(mediaType, response.getMediaType());
assertEquals(encoding, response.getEncoding());
assertThat(response.getEncoding(), Matchers.equalToIgnoringCase(encoding));
}

@ParameterizedTest
Expand Down Expand Up @@ -144,6 +146,6 @@ public boolean handle(Request request, Response response, Callback callback) thr
assertEquals(200, response.getStatus());
assertEquals(content, response.getContentAsString());
assertEquals(mediaType, response.getMediaType());
assertEquals(encoding, response.getEncoding());
assertThat(response.getEncoding(), Matchers.equalToIgnoringCase(encoding));
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@
import org.eclipse.jetty.server.Request;
import org.eclipse.jetty.server.Response;
import org.eclipse.jetty.util.Fields;
import org.hamcrest.Matchers;
import org.junit.jupiter.params.ParameterizedTest;
import org.junit.jupiter.params.provider.ArgumentsSource;

Expand Down Expand Up @@ -96,7 +97,7 @@ public void testFormContentProviderWithDifferentContentType(Scenario scenario) t
protected void service(Request request, Response response) throws Throwable
{
assertEquals("POST", request.getMethod());
assertEquals(contentType, request.getHeaders().get(HttpHeader.CONTENT_TYPE));
assertThat(request.getHeaders().get(HttpHeader.CONTENT_TYPE), Matchers.equalToIgnoringCase(contentType));
assertEquals(content, Content.Source.asString(request));
}
});
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,6 @@
import java.util.EnumSet;
import java.util.LinkedHashMap;
import java.util.List;
import java.util.Locale;
import java.util.Map;

import org.eclipse.jetty.http.HttpTokens.EndOfContent;
Expand Down Expand Up @@ -138,25 +137,17 @@ public class HttpParser
.with(new HttpField(HttpHeader.EXPIRES, "Fri, 01 Jan 1990 00:00:00 GMT"))
.withAll(() ->
{
Map<String, HttpField> map = new LinkedHashMap<>();
// Add common Content types as fields
for (String type : new String[]{
"text/plain", "text/html", "text/xml", "text/json", "application/json", "application/x-www-form-urlencoded"
})
Map<String, HttpField> map = new LinkedHashMap<>();
for (MimeTypes.Type mimetype : MimeTypes.Type.values())
{
HttpField field = new PreEncodedHttpField(HttpHeader.CONTENT_TYPE, type);
map.put(field.toString(), field);

for (String charset : new String[]{"utf-8", "iso-8859-1"})
HttpField contentTypeField = mimetype.getContentTypeField();
map.put(contentTypeField.toString(), contentTypeField);
if (contentTypeField.getValue().contains(";charset="))
{
PreEncodedHttpField field1 = new PreEncodedHttpField(HttpHeader.CONTENT_TYPE, type + ";charset=" + charset);
map.put(field1.toString(), field1);
PreEncodedHttpField field2 = new PreEncodedHttpField(HttpHeader.CONTENT_TYPE, type + "; charset=" + charset);
map.put(field2.toString(), field2);
PreEncodedHttpField field3 = new PreEncodedHttpField(HttpHeader.CONTENT_TYPE, type + ";charset=" + charset.toUpperCase(Locale.ENGLISH));
map.put(field3.toString(), field3);
PreEncodedHttpField field4 = new PreEncodedHttpField(HttpHeader.CONTENT_TYPE, type + "; charset=" + charset.toUpperCase(Locale.ENGLISH));
map.put(field4.toString(), field4);
HttpField contentTypeFieldWithSpace =
new MimeTypes.ContentTypeField(MimeTypes.getMimeTypeFromContentType(contentTypeField), contentTypeField.getValue().replace(";charset=", "; charset="));
map.put(contentTypeFieldWithSpace.toString(), contentTypeFieldWithSpace);
}
}
return map;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -124,7 +124,7 @@ public HttpField getContentTypeField(Charset charset)
private final Charset _charset;
private final String _charsetString;
private final boolean _assumedCharset;
private final HttpField _field;
private final ContentTypeField _field;

Type(String name)
{
Expand All @@ -133,18 +133,18 @@ public HttpField getContentTypeField(Charset charset)
_charset = null;
_charsetString = null;
_assumedCharset = false;
_field = new PreEncodedHttpField(HttpHeader.CONTENT_TYPE, _string);
_field = new ContentTypeField(this);
}

Type(String name, Type base)
{
_string = name;
_base = base;
_base = Objects.requireNonNull(base);
int i = name.indexOf(";charset=");
_charset = Charset.forName(name.substring(i + 9));
_charsetString = _charset.toString().toLowerCase(Locale.ENGLISH);
_assumedCharset = false;
_field = new PreEncodedHttpField(HttpHeader.CONTENT_TYPE, _string);
_field = new ContentTypeField(this);
}

Type(String name, Charset cs)
Expand All @@ -154,9 +154,12 @@ public HttpField getContentTypeField(Charset charset)
_charset = cs;
_charsetString = _charset == null ? null : _charset.toString().toLowerCase(Locale.ENGLISH);
_assumedCharset = true;
_field = new PreEncodedHttpField(HttpHeader.CONTENT_TYPE, _string);
_field = new ContentTypeField(this);
}

/**
* @return The {@link Charset} for this type or {@code null} if it is not known
*/
public Charset getCharset()
{
return _charset;
Expand All @@ -167,6 +170,11 @@ public String getCharsetString()
return _charsetString;
}

/**
* Check if this type is equal to the type passed as a string
* @param type The type to compare to
* @return {@code true} if this is the same type
*/
public boolean is(String type)
{
return _string.equalsIgnoreCase(type);
Expand All @@ -183,6 +191,9 @@ public String toString()
return _string;
}

/**
* @return {@code true} If the {@link Charset} for this type is assumed rather than being explicitly declared.
*/
public boolean isCharsetAssumed()
{
return _assumedCharset;
Expand All @@ -200,6 +211,10 @@ public HttpField getContentTypeField(Charset charset)
return new HttpField(HttpHeader.CONTENT_TYPE, getContentTypeWithoutCharset(_string) + ";charset=" + charset.name());
}

/**
* Get the base type of this type, which is the type without a charset specified
* @return The base type or this type if it is a base type
*/
public Type getBaseType()
{
return _base;
Expand Down Expand Up @@ -227,23 +242,34 @@ public Type getBaseType()
})
.build();

/**
* Get the base value, stripped of any parameters
* @param value The value
* @return A string with any semicolon separated parameters removed
*/
public static String getBase(String value)
{
int index = value.indexOf(';');
return index == -1 ? value : value.substring(0, index);
}

/**
* Get the base type of this type, which is the type without a charset specified
* @param contentType The mimetype as a string
* @return The base type or this type if it is a base type
*/
public static Type getBaseType(String contentType)
{
if (StringUtil.isEmpty(contentType))
return null;
Type type = CACHE.getBest(contentType);
if (type == null)
return null;
if (type.asString().length() == contentType.length())
return type.getBaseType();
if (contentType.charAt(type.asString().length()) == ';')
return type.getBaseType();
contentType = contentType.replace(" ", "");
if (type.asString().length() == contentType.length())
return type.getBaseType();
if (contentType.charAt(type.asString().length()) == ';')
return type.getBaseType();
return null;
{
type = CACHE.get(getBase(contentType));
if (type == null)
return null;
}
return type.getBaseType();
}

public static boolean isKnownLocale(Locale locale)
Expand Down Expand Up @@ -326,6 +352,23 @@ public MimeTypes(MimeTypes defaults)
}
}

/**
* Get the explicit, assumed, or inferred Charset for a HttpField containing a mime type value
* @param field HttpField with a mime type value (e.g. Content-Type)
* @return A {@link Charset} or null;
* @throws IllegalCharsetNameException
* If the given charset name is illegal
* @throws UnsupportedCharsetException
* If no support for the named charset is available
* in this instance of the Java virtual machine
*/
public Charset getCharset(HttpField field) throws IllegalCharsetNameException, UnsupportedCharsetException
{
if (field instanceof ContentTypeField contentTypeField)
return contentTypeField.getMimeType().getCharset();
return getCharset(field.getValue());
}

/**
* Get the explicit, assumed, or inferred Charset for a mime type
* @param mimeType String form or a mimeType
Expand Down Expand Up @@ -638,6 +681,46 @@ private static String normalizeMimeType(String type)
return StringUtil.asciiToLowerCase(type);
}

public static MimeTypes.Type getMimeTypeFromContentType(HttpField field)
{
if (field == null)
return null;

assert field.getHeader() == HttpHeader.CONTENT_TYPE;

if (field instanceof MimeTypes.ContentTypeField contentTypeField)
return contentTypeField.getMimeType();

return MimeTypes.CACHE.get(field.getValue());
}

/**
* Efficiently extract the charset value from a {@code Content-Type} {@link HttpField}.
* @param field A {@code Content-Type} field.
* @return The {@link Charset}
*/
public static Charset getCharsetFromContentType(HttpField field)
{
if (field == null)
return null;

assert field.getHeader() == HttpHeader.CONTENT_TYPE;

if (field instanceof ContentTypeField contentTypeField)
return contentTypeField._type.getCharset();

String charset = getCharsetFromContentType(field.getValue());
if (charset == null)
return null;

return Charset.forName(charset);
}

/**
* Efficiently extract the charset value from a {@code Content-Type} string
* @param value A content-type value (e.g. {@code text/plain; charset=utf8}).
* @return The charset value (e.g. {@code utf-8}).
*/
public static String getCharsetFromContentType(String value)
{
if (value == null)
Expand Down Expand Up @@ -751,6 +834,11 @@ else if (' ' != b)
return null;
}

/**
* Efficiently extract the base mime-type from a content-type value
* @param value A content-type value (e.g. {@code text/plain; charset=utf8}).
* @return The base mime-type value (e.g. {@code text/plain}).
*/
public static String getContentTypeWithoutCharset(String value)
{
int end = value.length();
Expand Down Expand Up @@ -876,4 +964,29 @@ else if (' ' != b)
return value;
return builder.toString();
}

/**
* A {@link PreEncodedHttpField} for `Content-Type` that can hold a {@link MimeTypes.Type} field
* for later recovery.
*/
static class ContentTypeField extends PreEncodedHttpField
{
private final Type _type;

public ContentTypeField(MimeTypes.Type type)
{
this(type, type.toString());
}

public ContentTypeField(MimeTypes.Type type, String value)
{
super(HttpHeader.CONTENT_TYPE, value);
_type = type;
}

public Type getMimeType()
{
return _type;
}
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -2601,7 +2601,25 @@ public void testRequestMaxHeaderBytesCumulative(String eoln)
@ParameterizedTest
@ValueSource(strings = {"\r\n", "\n"})
@SuppressWarnings("ReferenceEquality")
public void testCachedField(String eoln)
public void testInsensitiveCachedField(String eoln)
{
ByteBuffer buffer = BufferUtil.toBuffer(
"GET / HTTP/1.1" + eoln +
"Content-Type: text/plain;Charset=UTF-8" + eoln +
eoln);

HttpParser.RequestHandler handler = new Handler();
HttpParser parser = new HttpParser(handler);
parseAll(parser, buffer);

HttpField field = _fields.get(0);
assertThat(field.getValue(), is("text/plain;charset=utf-8"));
}

@ParameterizedTest
@ValueSource(strings = {"\r\n", "\n"})
@SuppressWarnings("ReferenceEquality")
public void testDynamicCachedField(String eoln)
{
ByteBuffer buffer = BufferUtil.toBuffer(
"GET / HTTP/1.1" + eoln +
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -89,7 +89,7 @@ public FormAuthenticator(String login, String error, boolean dispatch)
* If true, uris that cause a redirect to a login page will always
* be remembered. If false, only the first uri that leads to a login
* page redirect is remembered.
* See https://bugs.eclipse.org/bugs/show_bug.cgi?id=379909
* See <a href="https://bugs.eclipse.org/bugs/show_bug.cgi?id=379909">bug 379909</a>
*
* @param alwaysSave true to always save the uri
*/
Expand Down
Loading

0 comments on commit b947b48

Please sign in to comment.