Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Jetty 9.4.x #6473 normalize only once #6474

Merged
merged 1 commit into from
Jun 28, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@

package org.eclipse.jetty.http;

import java.util.EnumMap;
import java.util.EnumSet;
import java.util.HashMap;
import java.util.Map;
Expand Down Expand Up @@ -214,4 +215,46 @@ public EnumSet<HttpComplianceSection> sections()
{
return _sections;
}

private static final EnumMap<HttpURI.Violation, HttpComplianceSection> __uriViolations = new EnumMap<>(HttpURI.Violation.class);
static
{
// create a map from Violation to compliance in a loop, so that any new violations added are detected with ISE
for (HttpURI.Violation violation : HttpURI.Violation.values())
{
switch (violation)
{
case SEPARATOR:
__uriViolations.put(violation, HttpComplianceSection.NO_AMBIGUOUS_PATH_SEPARATORS);
break;
case SEGMENT:
__uriViolations.put(violation, HttpComplianceSection.NO_AMBIGUOUS_PATH_SEGMENTS);
break;
case PARAM:
__uriViolations.put(violation, HttpComplianceSection.NO_AMBIGUOUS_PATH_PARAMETERS);
break;
case ENCODING:
__uriViolations.put(violation, HttpComplianceSection.NO_AMBIGUOUS_PATH_ENCODING);
break;
case EMPTY:
__uriViolations.put(violation, HttpComplianceSection.NO_AMBIGUOUS_EMPTY_SEGMENT);
break;
case UTF16:
__uriViolations.put(violation, HttpComplianceSection.NO_UTF16_ENCODINGS);
break;
default:
throw new IllegalStateException();
}
}
}

public static String checkUriCompliance(HttpCompliance compliance, HttpURI uri)
{
for (HttpURI.Violation violation : HttpURI.Violation.values())
{
if (uri.hasViolation(violation) && (compliance == null || compliance.sections().contains(__uriViolations.get(violation))))
return violation.getMessage();
}
return null;
}
}
136 changes: 105 additions & 31 deletions jetty-http/src/main/java/org/eclipse/jetty/http/HttpURI.java
Original file line number Diff line number Diff line change
Expand Up @@ -36,22 +36,45 @@
/**
* Http URI.
* Parse an HTTP URI from a string or byte array. Given a URI
* <code>http://user@host:port/path/info;param?query#fragment</code>
* this class will split it into the following undecoded optional elements:<ul>
* <code>http://user@host:port/path;param1/%2e/info;param2?query#fragment</code>
* this class will split it into the following optional elements:<ul>
* <li>{@link #getScheme()} - http:</li>
* <li>{@link #getAuthority()} - //name@host:port</li>
* <li>{@link #getHost()} - host</li>
* <li>{@link #getPort()} - port</li>
* <li>{@link #getPath()} - /path/info</li>
* <li>{@link #getParam()} - param</li>
* <li>{@link #getPath()} - /path;param1/%2e/info;param2</li>
* <li>{@link #getDecodedPath()} - /path/info</li>
* <li>{@link #getParam()} - param2</li>
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Needs an explanation of why we don't return param1.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

added

* <li>{@link #getQuery()} - query</li>
* <li>{@link #getFragment()} - fragment</li>
* </ul>
*
* <p>Any parameters will be returned from {@link #getPath()}, but are excluded from the
* return value of {@link #getDecodedPath()}. If there are multiple parameters, the
* {@link #getParam()} method returns only the last one.
*/
* <p>The path part of the URI is provided in both raw form ({@link #getPath()}) and
* decoded form ({@link #getDecodedPath}), which has: path parameters removed,
* percent encoded characters expanded and relative segments resolved. This approach
* is somewhat contrary to <a href="https://tools.ietf.org/html/rfc3986#section-3.3">RFC3986</a>
* which no longer defines path parameters (removed after
* <a href="https://tools.ietf.org/html/rfc2396#section-3.3">RFC2396</a>) and specifies
* that relative segment normalization should take place before percent encoded character
* expansion. A literal interpretation of the RFC can result in URI paths with ambiguities
* when viewed as strings. For example, a URI of {@code /foo%2f..%2fbar} is technically a single
* segment of "/foo/../bar", but could easily be misinterpreted as 3 segments resolving to "/bar"
* by a file system.
* </p>
* <p>
* Thus this class avoid and/or detects such ambiguities. Furthermore, by decoding characters and
* removing parameters before relative path normalization, ambiguous paths will be resolved in such
* a way to be non-standard-but-non-ambiguous to down stream interpretation of the decoded path string.
* The violations are recorded and available by API such as {@link #hasViolation(Violation)} so that requests
* containing them may be rejected in case the non-standard-but-non-ambiguous interpretations
* are not satisfactory for a given compliance configuration. Implementations that wish to
* process ambiguous URI paths must configure the compliance modes to accept them and then perform
* their own decoding of {@link #getPath()}.
* </p>
* <p>
* If there are multiple path parameters, only the last one is returned by {@link #getParam()}.
* </p>
**/
public class HttpURI
{
private enum State
Expand All @@ -69,28 +92,49 @@ private enum State
ASTERISK
}

enum Violation
/**
* Violations of safe URI interpretations
*/
public enum Violation
{
SEGMENT,
SEPARATOR,
PARAM,
ENCODING,
EMPTY,
UTF16
/**
* Ambiguous path segments e.g. <code>/foo/%2E%2E/bar</code>
*/
SEGMENT("Ambiguous path segments"),
/**
* Ambiguous path separator within a URI segment e.g. <code>/foo%2Fbar</code>
*/
SEPARATOR("Ambiguous path separator"),
/**
* Ambiguous path parameters within a URI segment e.g. <code>/foo/..;/bar</code>
*/
PARAM("Ambiguous path parameters"),
/**
* Ambiguous double encoding within a URI segment e.g. <code>/%2557EB-INF</code>
*/
ENCODING("Ambiguous double encoding"),
/**
* Ambiguous empty segments e.g. <code>/foo//bar</code>
*/
EMPTY("Ambiguous empty segments"),
/**
* Non standard UTF-16 encoding eg <code>/foo%u2192bar</code>.
*/
UTF16("Non standard UTF-16 encoding");

private final String _message;

Violation(String message)
{
_message = message;
}

String getMessage()
{
return _message;
}
}

/**
* The concept of URI path parameters was originally specified in
* <a href="https://tools.ietf.org/html/rfc2396#section-3.3">RFC2396</a>, but that was
* obsoleted by
* <a href="https://tools.ietf.org/html/rfc3986#section-3.3">RFC3986</a> which removed
* a normative definition of path parameters. Specifically it excluded them from the
* <a href="https://tools.ietf.org/html/rfc3986#section-5.2.4">Remove Dot Segments</a>
* algorithm. This results in some ambiguity as dot segments can result from later
* parameter removal or % encoding expansion, that are not removed from the URI
* by {@link URIUtil#canonicalPath(String)}. Thus this class flags such ambiguous
* path segments, so that they may be rejected by the server if so configured.
*/
private static final Trie<Boolean> __ambiguousSegments = new ArrayTrie<>();

static
Expand Down Expand Up @@ -179,6 +223,22 @@ public HttpURI(HttpURI uri)
_emptySegment = false;
}

public HttpURI(HttpURI schemeHostPort, HttpURI uri)
{
_scheme = schemeHostPort._scheme;
_user = schemeHostPort._user;
_host = schemeHostPort._host;
_port = schemeHostPort._port;
_path = uri._path;
_param = uri._param;
_query = uri._query;
_fragment = uri._fragment;
_uri = uri._uri;
_decodedPath = uri._decodedPath;
_violations.addAll(uri._violations);
_emptySegment = false;
}

public HttpURI(String uri)
{
_port = -1;
Expand Down Expand Up @@ -506,6 +566,8 @@ else if (c == '/')
{
switch (encodedValue)
{
case 0:
throw new IllegalArgumentException("Illegal character in path");
case '/':
_violations.add(Violation.SEPARATOR);
break;
Expand Down Expand Up @@ -677,10 +739,12 @@ else if (c == '/')
}
else if (_path != null)
{
String canonical = URIUtil.canonicalPath(_path);
if (canonical == null)
throw new BadMessageException("Bad URI");
_decodedPath = URIUtil.decodePath(canonical);
// The RFC requires this to be canonical before decoding, but this can leave dot segments and dot dot segments
// which are not canonicalized and could be used in an attempt to bypass security checks.
String decodeNonCanonical = URIUtil.decodePath(_path);
_decodedPath = URIUtil.canonicalPath(decodeNonCanonical);
if (_decodedPath == null)
throw new IllegalArgumentException("Bad URI");
}
}

Expand Down Expand Up @@ -794,6 +858,11 @@ public boolean hasViolations()
return !_violations.isEmpty();
}

public boolean hasViolation(Violation violation)
{
return _violations.contains(violation);
}

/**
* @return True if the URI encodes UTF-16 characters with '%u'.
*/
Expand Down Expand Up @@ -839,6 +908,11 @@ public String getDecodedPath()
return _decodedPath;
}

/**
* Get a URI path parameter. Multiple and in segment parameters are ignored and only
* the last trailing parameter is returned.
* @return The last path parameter or null
*/
public String getParam()
{
return _param;
Expand Down
Loading