Skip to content

Commit b947b48

Browse files
authored
Use lowercase for charsets #11741 (#12347)
Fix #11741 as per the WhatTFWG recommendations, use lower case for charset names. Took the opportunity for some minor optimizations: + use the already made HttpField instance in MimeTypes.Type rather than create a new one in the HttpParser.CACHE + keep the MimeType.Type associated with the pre encoded Content-Type fields
1 parent 1f1040c commit b947b48

File tree

29 files changed

+265
-103
lines changed

29 files changed

+265
-103
lines changed

documentation/jetty/modules/code/examples/src/main/java/org/eclipse/jetty/docs/programming/migration/ServletToHandlerDocs.java

+13-1
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,7 @@
1515

1616
import java.io.InputStream;
1717
import java.nio.ByteBuffer;
18+
import java.nio.charset.Charset;
1819
import java.time.Duration;
1920
import java.util.List;
2021
import java.util.Locale;
@@ -24,10 +25,12 @@
2425
import java.util.function.Supplier;
2526

2627
import org.eclipse.jetty.http.HttpCookie;
28+
import org.eclipse.jetty.http.HttpField;
2729
import org.eclipse.jetty.http.HttpFields;
2830
import org.eclipse.jetty.http.HttpHeader;
2931
import org.eclipse.jetty.http.HttpStatus;
3032
import org.eclipse.jetty.http.HttpURI;
33+
import org.eclipse.jetty.http.MimeTypes;
3134
import org.eclipse.jetty.http.Trailers;
3235
import org.eclipse.jetty.io.Content;
3336
import org.eclipse.jetty.server.Context;
@@ -118,7 +121,16 @@ public boolean handle(Request request, Response response, Callback callback) thr
118121
// Gets the request Content-Type.
119122
// Replaces:
120123
// - servletRequest.getContentType()
121-
String contentType = request.getHeaders().get(HttpHeader.CONTENT_TYPE);
124+
HttpField contentTypeField = request.getHeaders().getField(HttpHeader.CONTENT_TYPE);
125+
String contentType = contentTypeField.getValue();
126+
MimeTypes.Type knownType = MimeTypes.getMimeTypeFromContentType(contentTypeField);
127+
128+
// Gets the request Character Encoding.
129+
// Replaces:
130+
// - servletRequest.getCharacterEncoding()
131+
Charset charset = knownType == null
132+
? MimeTypes.getCharsetFromContentType(contentTypeField)
133+
: knownType.getCharset();
122134

123135
// Gets the request Content-Length.
124136
// Replaces:

jetty-core/jetty-client/src/test/java/org/eclipse/jetty/client/ContentResponseTest.java

+4-2
Original file line numberDiff line numberDiff line change
@@ -23,9 +23,11 @@
2323
import org.eclipse.jetty.server.Request;
2424
import org.eclipse.jetty.server.Response;
2525
import org.eclipse.jetty.util.Callback;
26+
import org.hamcrest.Matchers;
2627
import org.junit.jupiter.params.ParameterizedTest;
2728
import org.junit.jupiter.params.provider.ArgumentsSource;
2829

30+
import static org.hamcrest.MatcherAssert.assertThat;
2931
import static org.junit.jupiter.api.Assertions.assertArrayEquals;
3032
import static org.junit.jupiter.api.Assertions.assertEquals;
3133
import static org.junit.jupiter.api.Assertions.assertNull;
@@ -114,7 +116,7 @@ public boolean handle(Request request, Response response, Callback callback) thr
114116
assertEquals(200, response.getStatus());
115117
assertEquals(content, response.getContentAsString());
116118
assertEquals(mediaType, response.getMediaType());
117-
assertEquals(encoding, response.getEncoding());
119+
assertThat(response.getEncoding(), Matchers.equalToIgnoringCase(encoding));
118120
}
119121

120122
@ParameterizedTest
@@ -144,6 +146,6 @@ public boolean handle(Request request, Response response, Callback callback) thr
144146
assertEquals(200, response.getStatus());
145147
assertEquals(content, response.getContentAsString());
146148
assertEquals(mediaType, response.getMediaType());
147-
assertEquals(encoding, response.getEncoding());
149+
assertThat(response.getEncoding(), Matchers.equalToIgnoringCase(encoding));
148150
}
149151
}

jetty-core/jetty-client/src/test/java/org/eclipse/jetty/client/util/TypedContentProviderTest.java

+2-1
Original file line numberDiff line numberDiff line change
@@ -29,6 +29,7 @@
2929
import org.eclipse.jetty.server.Request;
3030
import org.eclipse.jetty.server.Response;
3131
import org.eclipse.jetty.util.Fields;
32+
import org.hamcrest.Matchers;
3233
import org.junit.jupiter.params.ParameterizedTest;
3334
import org.junit.jupiter.params.provider.ArgumentsSource;
3435

@@ -96,7 +97,7 @@ public void testFormContentProviderWithDifferentContentType(Scenario scenario) t
9697
protected void service(Request request, Response response) throws Throwable
9798
{
9899
assertEquals("POST", request.getMethod());
99-
assertEquals(contentType, request.getHeaders().get(HttpHeader.CONTENT_TYPE));
100+
assertThat(request.getHeaders().get(HttpHeader.CONTENT_TYPE), Matchers.equalToIgnoringCase(contentType));
100101
assertEquals(content, Content.Source.asString(request));
101102
}
102103
});

jetty-core/jetty-http/src/main/java/org/eclipse/jetty/http/HttpParser.java

+8-17
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,6 @@
1919
import java.util.EnumSet;
2020
import java.util.LinkedHashMap;
2121
import java.util.List;
22-
import java.util.Locale;
2322
import java.util.Map;
2423

2524
import org.eclipse.jetty.http.HttpTokens.EndOfContent;
@@ -138,25 +137,17 @@ public class HttpParser
138137
.with(new HttpField(HttpHeader.EXPIRES, "Fri, 01 Jan 1990 00:00:00 GMT"))
139138
.withAll(() ->
140139
{
141-
Map<String, HttpField> map = new LinkedHashMap<>();
142140
// Add common Content types as fields
143-
for (String type : new String[]{
144-
"text/plain", "text/html", "text/xml", "text/json", "application/json", "application/x-www-form-urlencoded"
145-
})
141+
Map<String, HttpField> map = new LinkedHashMap<>();
142+
for (MimeTypes.Type mimetype : MimeTypes.Type.values())
146143
{
147-
HttpField field = new PreEncodedHttpField(HttpHeader.CONTENT_TYPE, type);
148-
map.put(field.toString(), field);
149-
150-
for (String charset : new String[]{"utf-8", "iso-8859-1"})
144+
HttpField contentTypeField = mimetype.getContentTypeField();
145+
map.put(contentTypeField.toString(), contentTypeField);
146+
if (contentTypeField.getValue().contains(";charset="))
151147
{
152-
PreEncodedHttpField field1 = new PreEncodedHttpField(HttpHeader.CONTENT_TYPE, type + ";charset=" + charset);
153-
map.put(field1.toString(), field1);
154-
PreEncodedHttpField field2 = new PreEncodedHttpField(HttpHeader.CONTENT_TYPE, type + "; charset=" + charset);
155-
map.put(field2.toString(), field2);
156-
PreEncodedHttpField field3 = new PreEncodedHttpField(HttpHeader.CONTENT_TYPE, type + ";charset=" + charset.toUpperCase(Locale.ENGLISH));
157-
map.put(field3.toString(), field3);
158-
PreEncodedHttpField field4 = new PreEncodedHttpField(HttpHeader.CONTENT_TYPE, type + "; charset=" + charset.toUpperCase(Locale.ENGLISH));
159-
map.put(field4.toString(), field4);
148+
HttpField contentTypeFieldWithSpace =
149+
new MimeTypes.ContentTypeField(MimeTypes.getMimeTypeFromContentType(contentTypeField), contentTypeField.getValue().replace(";charset=", "; charset="));
150+
map.put(contentTypeFieldWithSpace.toString(), contentTypeFieldWithSpace);
160151
}
161152
}
162153
return map;

jetty-core/jetty-http/src/main/java/org/eclipse/jetty/http/MimeTypes.java

+129-16
Original file line numberDiff line numberDiff line change
@@ -124,7 +124,7 @@ public HttpField getContentTypeField(Charset charset)
124124
private final Charset _charset;
125125
private final String _charsetString;
126126
private final boolean _assumedCharset;
127-
private final HttpField _field;
127+
private final ContentTypeField _field;
128128

129129
Type(String name)
130130
{
@@ -133,18 +133,18 @@ public HttpField getContentTypeField(Charset charset)
133133
_charset = null;
134134
_charsetString = null;
135135
_assumedCharset = false;
136-
_field = new PreEncodedHttpField(HttpHeader.CONTENT_TYPE, _string);
136+
_field = new ContentTypeField(this);
137137
}
138138

139139
Type(String name, Type base)
140140
{
141141
_string = name;
142-
_base = base;
142+
_base = Objects.requireNonNull(base);
143143
int i = name.indexOf(";charset=");
144144
_charset = Charset.forName(name.substring(i + 9));
145145
_charsetString = _charset.toString().toLowerCase(Locale.ENGLISH);
146146
_assumedCharset = false;
147-
_field = new PreEncodedHttpField(HttpHeader.CONTENT_TYPE, _string);
147+
_field = new ContentTypeField(this);
148148
}
149149

150150
Type(String name, Charset cs)
@@ -154,9 +154,12 @@ public HttpField getContentTypeField(Charset charset)
154154
_charset = cs;
155155
_charsetString = _charset == null ? null : _charset.toString().toLowerCase(Locale.ENGLISH);
156156
_assumedCharset = true;
157-
_field = new PreEncodedHttpField(HttpHeader.CONTENT_TYPE, _string);
157+
_field = new ContentTypeField(this);
158158
}
159159

160+
/**
161+
* @return The {@link Charset} for this type or {@code null} if it is not known
162+
*/
160163
public Charset getCharset()
161164
{
162165
return _charset;
@@ -167,6 +170,11 @@ public String getCharsetString()
167170
return _charsetString;
168171
}
169172

173+
/**
174+
* Check if this type is equal to the type passed as a string
175+
* @param type The type to compare to
176+
* @return {@code true} if this is the same type
177+
*/
170178
public boolean is(String type)
171179
{
172180
return _string.equalsIgnoreCase(type);
@@ -183,6 +191,9 @@ public String toString()
183191
return _string;
184192
}
185193

194+
/**
195+
* @return {@code true} If the {@link Charset} for this type is assumed rather than being explicitly declared.
196+
*/
186197
public boolean isCharsetAssumed()
187198
{
188199
return _assumedCharset;
@@ -200,6 +211,10 @@ public HttpField getContentTypeField(Charset charset)
200211
return new HttpField(HttpHeader.CONTENT_TYPE, getContentTypeWithoutCharset(_string) + ";charset=" + charset.name());
201212
}
202213

214+
/**
215+
* Get the base type of this type, which is the type without a charset specified
216+
* @return The base type or this type if it is a base type
217+
*/
203218
public Type getBaseType()
204219
{
205220
return _base;
@@ -227,23 +242,34 @@ public Type getBaseType()
227242
})
228243
.build();
229244

245+
/**
246+
* Get the base value, stripped of any parameters
247+
* @param value The value
248+
* @return A string with any semicolon separated parameters removed
249+
*/
250+
public static String getBase(String value)
251+
{
252+
int index = value.indexOf(';');
253+
return index == -1 ? value : value.substring(0, index);
254+
}
255+
256+
/**
257+
* Get the base type of this type, which is the type without a charset specified
258+
* @param contentType The mimetype as a string
259+
* @return The base type or this type if it is a base type
260+
*/
230261
public static Type getBaseType(String contentType)
231262
{
232263
if (StringUtil.isEmpty(contentType))
233264
return null;
234265
Type type = CACHE.getBest(contentType);
235266
if (type == null)
236-
return null;
237-
if (type.asString().length() == contentType.length())
238-
return type.getBaseType();
239-
if (contentType.charAt(type.asString().length()) == ';')
240-
return type.getBaseType();
241-
contentType = contentType.replace(" ", "");
242-
if (type.asString().length() == contentType.length())
243-
return type.getBaseType();
244-
if (contentType.charAt(type.asString().length()) == ';')
245-
return type.getBaseType();
246-
return null;
267+
{
268+
type = CACHE.get(getBase(contentType));
269+
if (type == null)
270+
return null;
271+
}
272+
return type.getBaseType();
247273
}
248274

249275
public static boolean isKnownLocale(Locale locale)
@@ -326,6 +352,23 @@ public MimeTypes(MimeTypes defaults)
326352
}
327353
}
328354

355+
/**
356+
* Get the explicit, assumed, or inferred Charset for a HttpField containing a mime type value
357+
* @param field HttpField with a mime type value (e.g. Content-Type)
358+
* @return A {@link Charset} or null;
359+
* @throws IllegalCharsetNameException
360+
* If the given charset name is illegal
361+
* @throws UnsupportedCharsetException
362+
* If no support for the named charset is available
363+
* in this instance of the Java virtual machine
364+
*/
365+
public Charset getCharset(HttpField field) throws IllegalCharsetNameException, UnsupportedCharsetException
366+
{
367+
if (field instanceof ContentTypeField contentTypeField)
368+
return contentTypeField.getMimeType().getCharset();
369+
return getCharset(field.getValue());
370+
}
371+
329372
/**
330373
* Get the explicit, assumed, or inferred Charset for a mime type
331374
* @param mimeType String form or a mimeType
@@ -638,6 +681,46 @@ private static String normalizeMimeType(String type)
638681
return StringUtil.asciiToLowerCase(type);
639682
}
640683

684+
public static MimeTypes.Type getMimeTypeFromContentType(HttpField field)
685+
{
686+
if (field == null)
687+
return null;
688+
689+
assert field.getHeader() == HttpHeader.CONTENT_TYPE;
690+
691+
if (field instanceof MimeTypes.ContentTypeField contentTypeField)
692+
return contentTypeField.getMimeType();
693+
694+
return MimeTypes.CACHE.get(field.getValue());
695+
}
696+
697+
/**
698+
* Efficiently extract the charset value from a {@code Content-Type} {@link HttpField}.
699+
* @param field A {@code Content-Type} field.
700+
* @return The {@link Charset}
701+
*/
702+
public static Charset getCharsetFromContentType(HttpField field)
703+
{
704+
if (field == null)
705+
return null;
706+
707+
assert field.getHeader() == HttpHeader.CONTENT_TYPE;
708+
709+
if (field instanceof ContentTypeField contentTypeField)
710+
return contentTypeField._type.getCharset();
711+
712+
String charset = getCharsetFromContentType(field.getValue());
713+
if (charset == null)
714+
return null;
715+
716+
return Charset.forName(charset);
717+
}
718+
719+
/**
720+
* Efficiently extract the charset value from a {@code Content-Type} string
721+
* @param value A content-type value (e.g. {@code text/plain; charset=utf8}).
722+
* @return The charset value (e.g. {@code utf-8}).
723+
*/
641724
public static String getCharsetFromContentType(String value)
642725
{
643726
if (value == null)
@@ -751,6 +834,11 @@ else if (' ' != b)
751834
return null;
752835
}
753836

837+
/**
838+
* Efficiently extract the base mime-type from a content-type value
839+
* @param value A content-type value (e.g. {@code text/plain; charset=utf8}).
840+
* @return The base mime-type value (e.g. {@code text/plain}).
841+
*/
754842
public static String getContentTypeWithoutCharset(String value)
755843
{
756844
int end = value.length();
@@ -876,4 +964,29 @@ else if (' ' != b)
876964
return value;
877965
return builder.toString();
878966
}
967+
968+
/**
969+
* A {@link PreEncodedHttpField} for `Content-Type` that can hold a {@link MimeTypes.Type} field
970+
* for later recovery.
971+
*/
972+
static class ContentTypeField extends PreEncodedHttpField
973+
{
974+
private final Type _type;
975+
976+
public ContentTypeField(MimeTypes.Type type)
977+
{
978+
this(type, type.toString());
979+
}
980+
981+
public ContentTypeField(MimeTypes.Type type, String value)
982+
{
983+
super(HttpHeader.CONTENT_TYPE, value);
984+
_type = type;
985+
}
986+
987+
public Type getMimeType()
988+
{
989+
return _type;
990+
}
991+
}
879992
}

jetty-core/jetty-http/src/test/java/org/eclipse/jetty/http/HttpParserTest.java

+19-1
Original file line numberDiff line numberDiff line change
@@ -2601,7 +2601,25 @@ public void testRequestMaxHeaderBytesCumulative(String eoln)
26012601
@ParameterizedTest
26022602
@ValueSource(strings = {"\r\n", "\n"})
26032603
@SuppressWarnings("ReferenceEquality")
2604-
public void testCachedField(String eoln)
2604+
public void testInsensitiveCachedField(String eoln)
2605+
{
2606+
ByteBuffer buffer = BufferUtil.toBuffer(
2607+
"GET / HTTP/1.1" + eoln +
2608+
"Content-Type: text/plain;Charset=UTF-8" + eoln +
2609+
eoln);
2610+
2611+
HttpParser.RequestHandler handler = new Handler();
2612+
HttpParser parser = new HttpParser(handler);
2613+
parseAll(parser, buffer);
2614+
2615+
HttpField field = _fields.get(0);
2616+
assertThat(field.getValue(), is("text/plain;charset=utf-8"));
2617+
}
2618+
2619+
@ParameterizedTest
2620+
@ValueSource(strings = {"\r\n", "\n"})
2621+
@SuppressWarnings("ReferenceEquality")
2622+
public void testDynamicCachedField(String eoln)
26052623
{
26062624
ByteBuffer buffer = BufferUtil.toBuffer(
26072625
"GET / HTTP/1.1" + eoln +

jetty-core/jetty-security/src/main/java/org/eclipse/jetty/security/authentication/FormAuthenticator.java

+1-1
Original file line numberDiff line numberDiff line change
@@ -89,7 +89,7 @@ public FormAuthenticator(String login, String error, boolean dispatch)
8989
* If true, uris that cause a redirect to a login page will always
9090
* be remembered. If false, only the first uri that leads to a login
9191
* page redirect is remembered.
92-
* See https://bugs.eclipse.org/bugs/show_bug.cgi?id=379909
92+
* See <a href="https://bugs.eclipse.org/bugs/show_bug.cgi?id=379909">bug 379909</a>
9393
*
9494
* @param alwaysSave true to always save the uri
9595
*/

0 commit comments

Comments
 (0)