H1SpecExceptions add option to allow LF without proceeding CR (#1475)

Motivation: The HTTP/1.x spec allows for optional support to accept LF without proceeding CR as a valid separator in some scenarios [1]. Some older implementations rely upon this behavior and interoperability would be improved by allowing opt-in support. [1] https://tools.ietf.org/html/rfc7230#section-3.5 Although the line terminator for the start-line and header fields is the sequence CRLF, a recipient MAY recognize a single LF as a line terminator and ignore any preceding CR. Modifications: - Update HttpObjectDecoder to support LF without CR Result: H1SpecExceptions allows for opt-in support for LF without proceeding CR.
apple · Apr 10, 2021 · cf51335 · cf51335
1 parent da4be45
commit cf51335
Show file tree

Hide file tree

Showing 10 changed files with 680 additions and 283 deletions.
diff --git a/servicetalk-http-netty/src/main/java/io/servicetalk/http/netty/H1SpecExceptions.java b/servicetalk-http-netty/src/main/java/io/servicetalk/http/netty/H1SpecExceptions.java
@@ -21,9 +21,12 @@
 public final class H1SpecExceptions {
 
     private final boolean allowPrematureClosureBeforePayloadBody;
+    private final boolean allowLFWithoutCR;
 
-    private H1SpecExceptions(final boolean allowPrematureClosureBeforePayloadBody) {
+    private H1SpecExceptions(final boolean allowPrematureClosureBeforePayloadBody,
+                             final boolean allowLFWithoutCR) {
         this.allowPrematureClosureBeforePayloadBody = allowPrematureClosureBeforePayloadBody;
+        this.allowLFWithoutCR = allowLFWithoutCR;
     }
 
     /**
@@ -37,21 +40,64 @@ public boolean allowPrematureClosureBeforePayloadBody() {
         return allowPrematureClosureBeforePayloadBody;
     }
 
+    /**
+     * Allow {@code LF} without a proceeding {@code CR} as described in
+     * <a href="https://tools.ietf.org/html/rfc7230#section-3.5">HTTP/1.x Message Parsing Robustness</a>:
+     * <pre>
+     *   Although the line terminator for the start-line and header fields is
+     *   the sequence CRLF, a recipient MAY recognize a single LF as a line
+     *   terminator and ignore any preceding CR.
+     * </pre>
+     * @return {@code true} to allow {@code LF} without a proceeding {@code CR}.
+     */
+    public boolean allowLFWithoutCR() {
+        return allowLFWithoutCR;
+    }
+
     /**
      * Builder for {@link H1SpecExceptions}.
      */
     public static final class Builder {
 
         private boolean allowPrematureClosureBeforePayloadBody;
+        private boolean allowLFWithoutCR;
 
         /**
          * Allows interpreting connection closures as the end of HTTP/1.1 messages if the receiver did not receive any
          * part of the payload body before the connection closure.
-         *
+         * @deprecated Use {@link #allowPrematureClosureBeforePayloadBody(boolean)}.
          * @return {@code this}
          */
+        @Deprecated
         public Builder allowPrematureClosureBeforePayloadBody() {
-            this.allowPrematureClosureBeforePayloadBody = true;
+            return allowPrematureClosureBeforePayloadBody(true);
+        }
+
+        /**
+         * Allows interpreting connection closures as the end of HTTP/1.1 messages if the receiver did not receive any
+         * part of the payload body before the connection closure.
+         * @param allow {@code true} if the receiver should interpret connection closures as the end of HTTP/1.1
+         * messages if it did not receive any part of the payload body before the connection closure.
+         * @return {@code this}
+         */
+        public Builder allowPrematureClosureBeforePayloadBody(boolean allow) {
+            this.allowPrematureClosureBeforePayloadBody = allow;
+            return this;
+        }
+
+        /**
+         * Allow {@code LF} without a proceeding {@code CR} as described in
+         * <a href="https://tools.ietf.org/html/rfc7230#section-3.5">HTTP/1.x Message Parsing Robustness</a>:
+         * <pre>
+         *   Although the line terminator for the start-line and header fields is
+         *   the sequence CRLF, a recipient MAY recognize a single LF as a line
+         *   terminator and ignore any preceding CR.
+         * </pre>
+         * @param allow {@code true} to allow {@code LF} without a proceeding {@code CR}.
+         * @return {@code this}
+         */
+        public Builder allowLFWithoutCR(boolean allow) {
+            this.allowLFWithoutCR = allow;
             return this;
         }
 
@@ -61,7 +107,7 @@ public Builder allowPrematureClosureBeforePayloadBody() {
          * @return a new {@link H1SpecExceptions}
          */
         public H1SpecExceptions build() {
-            return new H1SpecExceptions(allowPrematureClosureBeforePayloadBody);
+            return new H1SpecExceptions(allowPrematureClosureBeforePayloadBody, allowLFWithoutCR);
         }
     }
 }
diff --git a/...talk-http-netty/src/main/java/io/servicetalk/http/netty/HttpClientChannelInitializer.java b/...talk-http-netty/src/main/java/io/servicetalk/http/netty/HttpClientChannelInitializer.java
@@ -47,7 +47,8 @@ final class HttpClientChannelInitializer implements ChannelInitializer {
             final ChannelPipeline pipeline = channel.pipeline();
             pipeline.addLast(new HttpResponseDecoder(methodQueue, alloc, config.headersFactory(),
                     config.maxStartLineLength(), config.maxHeaderFieldLength(),
-                    config.specExceptions().allowPrematureClosureBeforePayloadBody(), closeHandler));
+                    config.specExceptions().allowPrematureClosureBeforePayloadBody(),
+                    config.specExceptions().allowLFWithoutCR(), closeHandler));
             pipeline.addLast(new HttpRequestEncoder(methodQueue,
                     config.headersEncodedSizeEstimate(), config.trailersEncodedSizeEstimate(), closeHandler));
         });

diff --git a/servicetalk-http-netty/src/main/java/io/servicetalk/http/netty/HttpObjectDecoder.java b/servicetalk-http-netty/src/main/java/io/servicetalk/http/netty/HttpObjectDecoder.java
@@ -66,6 +66,7 @@
 import static io.servicetalk.buffer.api.CharSequences.emptyAsciiString;
 import static io.servicetalk.buffer.api.CharSequences.newAsciiString;
 import static io.servicetalk.buffer.netty.BufferUtils.newBufferFrom;
+import static io.servicetalk.concurrent.internal.FlowControlUtils.addWithOverflowProtection;
 import static io.servicetalk.http.api.HeaderUtils.isTransferEncodingChunked;
 import static io.servicetalk.http.api.HttpHeaderNames.CONTENT_LENGTH;
 import static io.servicetalk.http.api.HttpHeaderNames.SEC_WEBSOCKET_KEY1;
@@ -132,6 +133,15 @@ abstract class HttpObjectDecoder<T extends HttpMetaData> extends ByteToMessageDe
     private final HttpHeadersFactory headersFactory;
     private final CloseHandler closeHandler;
     private final boolean allowPrematureClosureBeforePayloadBody;
+    /**
+     * <a href="https://tools.ietf.org/html/rfc7230#section-3.5>HTTP/1.x Message Parsing Robustness</a>
+     * <pre>
+     *   Although the line terminator for the start-line and header fields is
+     *   the sequence CRLF, a recipient MAY recognize a single LF as a line
+     *   terminator and ignore any preceding CR.
+     * </pre>
+     */
+    private final boolean allowLFWithoutCR;
     @Nullable
     private T message;
     @Nullable
@@ -166,9 +176,10 @@ private enum State {
     /**
      * Creates a new instance with the specified parameters.
      */
-    protected HttpObjectDecoder(final ByteBufAllocator alloc, final HttpHeadersFactory headersFactory,
-                                final int maxStartLineLength, final int maxHeaderFieldLength,
-                                final boolean allowPrematureClosureBeforePayloadBody, final CloseHandler closeHandler) {
+    HttpObjectDecoder(final ByteBufAllocator alloc, final HttpHeadersFactory headersFactory,
+                      final int maxStartLineLength, final int maxHeaderFieldLength,
+                      final boolean allowPrematureClosureBeforePayloadBody, final boolean allowLFWithoutCR,
+                      final CloseHandler closeHandler) {
         super(alloc);
         this.closeHandler = requireNonNull(closeHandler);
         if (maxStartLineLength <= 0) {
@@ -181,6 +192,7 @@ protected HttpObjectDecoder(final ByteBufAllocator alloc, final HttpHeadersFacto
         this.maxStartLineLength = maxStartLineLength;
         this.maxHeaderFieldLength = maxHeaderFieldLength;
         this.allowPrematureClosureBeforePayloadBody = allowPrematureClosureBeforePayloadBody;
+        this.allowLFWithoutCR = allowLFWithoutCR;
     }
 
     final HttpHeadersFactory headersFactory() {
@@ -231,8 +243,8 @@ protected final void decode(final ChannelHandlerContext ctx, final ByteBuf buffe
                 currentState = State.READ_INITIAL;
             }
             case READ_INITIAL: {
-                final int lfIndex = findCRLF(buffer, maxStartLineLength);
-                if (lfIndex < 0) {
+                final long longLFIndex = findCRLF(buffer, maxStartLineLength, allowLFWithoutCR);
+                if (longLFIndex < 0) {
                     handlePartialInitialLine(ctx, buffer);
                     return;
                 }
@@ -242,7 +254,8 @@ protected final void decode(final ChannelHandlerContext ctx, final ByteBuf buffe
                 // request-line = method SP request-target SP HTTP-version CRLF
                 // https://tools.ietf.org/html/rfc7230#section-3.1.2
                 // status-line = HTTP-version SP status-code SP reason-phrase CRLF
-                final int nonControlIndex = lfIndex - 2;
+                final int lfIndex = crlfIndex(longLFIndex);
+                final int nonControlIndex = crlfBeforeIndex(longLFIndex);
                 final int aStart = buffer.readerIndex();    // We already skipped all preface control chars
                 // Look only for a WS, other checks will be done later by request/response decoder
                 final int aEnd = buffer.forEachByte(aStart + 1, nonControlIndex - aStart, FIND_WS);
@@ -375,10 +388,11 @@ protected final void decode(final ChannelHandlerContext ctx, final ByteBuf buffe
             // everything else after this point takes care of reading chunked content. basically, read chunk size,
             // read chunk, read and ignore the CRLF and repeat until 0
             case READ_CHUNK_SIZE: {
-                int lfIndex = findCRLF(buffer, MAX_HEX_CHARS_FOR_LONG);
-                if (lfIndex < 0) {
+                final long longLFIndex = findCRLF(buffer, MAX_HEX_CHARS_FOR_LONG, false);
+                if (longLFIndex < 0) {
                     return;
                 }
+                final int lfIndex = crlfIndex(longLFIndex);
                 long chunkSize = getChunkSize(buffer, lfIndex);
                 consumeCRLF(buffer, lfIndex);
                 this.chunkSize = chunkSize;
@@ -409,11 +423,11 @@ protected final void decode(final ChannelHandlerContext ctx, final ByteBuf buffe
             }
             case READ_CHUNK_DELIMITER: {
                 // Read the chunk delimiter
-                int lfIndex = findCRLF(buffer, CHUNK_DELIMETER_SIZE);
-                if (lfIndex < 0) {
+                final long longLFIndex = findCRLF(buffer, CHUNK_DELIMETER_SIZE, false);
+                if (longLFIndex < 0) {
                     return;
                 }
-                consumeCRLF(buffer, lfIndex);
+                consumeCRLF(buffer, crlfIndex(longLFIndex));
                 currentState = State.READ_CHUNK_SIZE;
                 break;
             }
@@ -592,8 +606,8 @@ private boolean skipControlCharacters(final ByteBuf buffer) {
         }
     }
 
-    private void parseHeaderLine(final HttpHeaders headers, final ByteBuf buffer, final int lfIndex)
-            throws DecoderException {
+    private void parseHeaderLine(final HttpHeaders headers, final ByteBuf buffer, final int lfIndex,
+                                 final int nonControlIndex) throws DecoderException {
         // https://tools.ietf.org/html/rfc7230#section-3.2
         // header-field   = field-name ":" OWS field-value OWS
         //
@@ -613,7 +627,6 @@ private void parseHeaderLine(final HttpHeaders headers, final ByteBuf buffer, fi
         //    security vulnerabilities in request routing and response handling.
         // Additional checks will be done by header validator
 
-        final int nonControlIndex = lfIndex - 2;
         final int nameStart = buffer.readerIndex();
         final int nameEnd = buffer.forEachByte(nameStart, nonControlIndex - nameStart + 1, FIND_COLON);
         if (nameEnd < 0) {
@@ -665,13 +678,13 @@ private static DecoderException invalidHeaderValue(final CharSequence name, fina
 
     @Nullable
     private State readHeaders(final ByteBuf buffer) {
-        int lfIndex = findCRLF(buffer, maxHeaderFieldLength);
-        if (lfIndex < 0) {
+        final long longLFIndex = findCRLF(buffer, maxHeaderFieldLength, allowLFWithoutCR);
+        if (longLFIndex < 0) {
             return null;
         }
         final T message = this.message;
         assert message != null;
-        if (!parseAllHeaders(buffer, message.headers(), lfIndex, maxHeaderFieldLength)) {
+        if (!parseAllHeaders(buffer, message.headers(), longLFIndex)) {
             return null;
         }
 
@@ -705,20 +718,20 @@ private long contentLength() {
 
     @Nullable
     private HttpHeaders readTrailingHeaders(final ByteBuf buffer) {
-        final int lfIndex = findCRLF(buffer, maxHeaderFieldLength);
-        if (lfIndex < 0) {
+        final long longLFIndex = findCRLF(buffer, maxHeaderFieldLength, allowLFWithoutCR);
+        if (longLFIndex < 0) {
             return null;
         }
-        if (lfIndex - 2 > buffer.readerIndex()) {
+        if (crlfBeforeIndex(longLFIndex) > buffer.readerIndex()) {
             HttpHeaders trailer = this.trailer;
             if (trailer == null) {
                 trailer = this.trailer = headersFactory.newTrailers();
             }
 
-            return parseAllHeaders(buffer, trailer, lfIndex, maxHeaderFieldLength) ? trailer : null;
+            return parseAllHeaders(buffer, trailer, longLFIndex) ? trailer : null;
         }
 
-        consumeCRLF(buffer, lfIndex);
+        consumeCRLF(buffer, crlfIndex(longLFIndex));
         // The RFC says the trailers are optional [1] so use an empty trailers instance from the headers factory.
         // [1] https://tools.ietf.org/html/rfc7230.html#section-4.1
         return trailer != null ? trailer : headersFactory.newEmptyTrailers();
@@ -729,29 +742,23 @@ private HttpHeaders readTrailingHeaders(final ByteBuf buffer) {
      *
      * @param buffer source of the headers
      * @param headers destination for parsed headers
-     * @param lfIndex the end of the current line
-     * @param maxHeaderFieldLength limit to the length of the headers
+     * @param longLFIndex result of {@link #findCRLF(ByteBuf, int, int, int, boolean)}
      * @return true if complete headers were processed otherwise false indicates incomplete parsing or error.
      */
-    private boolean parseAllHeaders(final ByteBuf buffer, final HttpHeaders headers, int lfIndex,
-                                    final int maxHeaderFieldLength) {
+    private boolean parseAllHeaders(final ByteBuf buffer, final HttpHeaders headers, long longLFIndex) {
         for (;;) {
-            if (lfIndex - 1 == buffer.readerIndex()) {
+            final int lfIndex = crlfIndex(longLFIndex);
+            final int nonControlIndex = crlfBeforeIndex(longLFIndex);
+            if (nonControlIndex < buffer.readerIndex()) {
                 consumeCRLF(buffer, lfIndex);
                 return true;
             }
-            final int nextLFIndex = findCRLF(buffer, lfIndex + 1, maxHeaderFieldLength, parsingLine);
-            parseHeaderLine(headers, buffer, lfIndex);
-            if (nextLFIndex < 0) {
+            longLFIndex = findCRLF(buffer, lfIndex + 1, maxHeaderFieldLength, parsingLine, allowLFWithoutCR);
+            parseHeaderLine(headers, buffer, lfIndex, nonControlIndex);
+            if (longLFIndex < 0) {
                 return false;
             }
             ++parsingLine;
-            if (nextLFIndex - 2 == lfIndex) {
-                // CRLF followed by CRLF, we are done.
-                consumeCRLF(buffer, nextLFIndex);
-                return true;
-            }
-            lfIndex = nextLFIndex;
         }
     }
 
@@ -795,16 +802,18 @@ private void consumeCRLF(final ByteBuf buffer, final int lfIndex) {
         }
     }
 
-    private int findCRLF(final ByteBuf buffer, final int maxLineSize) {
+    private long findCRLF(final ByteBuf buffer, final int maxLineSize, final boolean allowLFWithoutCR) {
         if (cumulationIndex < 0) {
             cumulationIndex = buffer.readerIndex();
         }
-        final int lfIndex = findCRLF(buffer, cumulationIndex, maxLineSize, parsingLine);
-        cumulationIndex = lfIndex < 0 ? min(buffer.writerIndex(), cumulationIndex + maxLineSize) : lfIndex;
-        if (lfIndex >= 0) {
+        final long longLFIndex = findCRLF(buffer, cumulationIndex, maxLineSize, parsingLine, allowLFWithoutCR);
+        if (longLFIndex < 0) {
+            cumulationIndex = min(buffer.writerIndex(), cumulationIndex + maxLineSize);
+        } else {
+            cumulationIndex = crlfIndex(longLFIndex);
             ++parsingLine;
         }
-        return lfIndex;
+        return longLFIndex;
     }
 
     /**
@@ -815,26 +824,37 @@ private int findCRLF(final ByteBuf buffer, final int maxLineSize) {
      * @param startIndex initial scanning index
      * @param maxLineSize maximum length of a line
      * @param parsingLine line count for error messages.
-     * @return the offset of the CRLF or < 0 if no CRLF found
+     * @param allowLFWithoutCR allow a LF without a proceeding CR to signify "end of line".
+     * @return
+     * <ul>
+     *     <li>{@code <0} if no [CR]?LF found</li>
+     *     <li>Use {@link #crlfIndex(long)} to extract the offset of the [CR]?LF and {@link #crlfBeforeIndex(long)}
+     *     to get the index immediately preceding the [CR]?LF</li>
+     * </ul>
      * @throws DecoderException if no LF or found in buffer
      * @throws TooLongFrameException if the line exceeds the permitted maximum
      */
-    private static int findCRLF(final ByteBuf buffer, int startIndex, final int maxLineSize, final int parsingLine) {
-        final int maxToIndex = startIndex + maxLineSize;
+    private static long findCRLF(final ByteBuf buffer, int startIndex, final int maxLineSize, final int parsingLine,
+                                final boolean allowLFWithoutCR) {
+        final int maxToIndex = addWithOverflowProtection(startIndex, maxLineSize);
         for (;;) {
             final int toIndex = min(buffer.writerIndex(), maxToIndex);
             final int lfIndex = findLF(buffer, startIndex, toIndex);
+            final boolean foundCR;
             if (lfIndex == -1) {
                 if (toIndex - startIndex == maxLineSize) {
                     throw new DecoderException("Could not find CRLF (0x0d0a) within " + maxLineSize +
                             " bytes, while parsing line " + parsingLine);
                 }
                 return -2;
             } else if (lfIndex == buffer.readerIndex()) {
+                if (allowLFWithoutCR) {
+                    return ((long) lfIndex) << 32 | lfIndex;
+                }
                 buffer.skipBytes(1);
                 ++startIndex;
-            } else if (buffer.getByte(lfIndex - 1) == CR) {
-                return lfIndex;
+            } else if ((foundCR = buffer.getByte(lfIndex - 1) == CR) || allowLFWithoutCR) {
+                return foundCR ? ((long) lfIndex - 1) << 32 | lfIndex : ((long) lfIndex) << 32 | lfIndex;
             } else if (lfIndex != maxToIndex) {
                 throw new DecoderException("Found LF (0x0a) but no CR (0x0d) before, while parsing line " +
                         parsingLine);
@@ -845,6 +865,14 @@ private static int findCRLF(final ByteBuf buffer, int startIndex, final int maxL
         }
     }
 
+    private static int crlfIndex(long index) {
+        return (int) index;
+    }
+
+    private static int crlfBeforeIndex(long index) {
+        return ((int) (index >>> 32)) - 1;
+    }
+
     private static int findLF(final ByteBuf buffer, final int fromIndex, final int toIndex) {
         if (fromIndex >= toIndex) {
             return -1;

diff --git a/servicetalk-http-netty/src/main/java/io/servicetalk/http/netty/HttpRequestDecoder.java b/servicetalk-http-netty/src/main/java/io/servicetalk/http/netty/HttpRequestDecoder.java
@@ -54,15 +54,15 @@ final class HttpRequestDecoder extends HttpObjectDecoder<HttpRequestMetaData> {
                        final HttpHeadersFactory headersFactory, final int maxStartLineLength,
                        final int maxHeaderFieldLength) {
         this(methodQueue, alloc, headersFactory, maxStartLineLength, maxHeaderFieldLength,
-                false, UNSUPPORTED_PROTOCOL_CLOSE_HANDLER);
+                false, false, UNSUPPORTED_PROTOCOL_CLOSE_HANDLER);
     }
 
     HttpRequestDecoder(final Queue<HttpRequestMethod> methodQueue, final ByteBufAllocator alloc,
                        final HttpHeadersFactory headersFactory, final int maxStartLineLength,
                        final int maxHeaderFieldLength, final boolean allowPrematureClosureBeforePayloadBody,
-                       final CloseHandler closeHandler) {
+                       final boolean allowLFWithoutCR, final CloseHandler closeHandler) {
         super(alloc, headersFactory, maxStartLineLength, maxHeaderFieldLength, allowPrematureClosureBeforePayloadBody,
-                closeHandler);
+                allowLFWithoutCR, closeHandler);
         this.methodQueue = requireNonNull(methodQueue);
     }