Implement CharSequences#parseLong(CharSequence) (#1469)

Motivation: Our `HttpHeaders` API uses `CharSequence`. In order for users to parse the value (like, `Content-Length`) they have to convert `CharSequence` to `String` for `Long.parseLong(String)` API. This approach does an extra copy of data from underlying `Buffer`. We can avoid it. Modifications: - Add `CharSequences#parseLong(CharSequence)` API that does not force `CharSequence#toString()` conversion; - Add tests to validate new API; - Use the new API in all places when we parse an `AsciiBuffer` or `AsciiString`; Result: Less allocations of `String`.
apple · Apr 2, 2021 · fc33ee5 · fc33ee5
1 parent ff0ec96
commit fc33ee5
Show file tree

Hide file tree

Showing 7 changed files with 328 additions and 7 deletions.
diff --git a/...lk-benchmarks/src/jmh/java/io/servicetalk/buffer/api/CharSequencesParseLongBenchmark.java b/...lk-benchmarks/src/jmh/java/io/servicetalk/buffer/api/CharSequencesParseLongBenchmark.java
@@ -0,0 +1,125 @@
+/*
+ * Copyright © 2021 Apple Inc. and the ServiceTalk project authors
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package io.servicetalk.buffer.api;
+
+import io.netty.util.AsciiString;
+import org.openjdk.jmh.annotations.Benchmark;
+import org.openjdk.jmh.annotations.BenchmarkMode;
+import org.openjdk.jmh.annotations.Fork;
+import org.openjdk.jmh.annotations.Level;
+import org.openjdk.jmh.annotations.Measurement;
+import org.openjdk.jmh.annotations.Mode;
+import org.openjdk.jmh.annotations.Param;
+import org.openjdk.jmh.annotations.Scope;
+import org.openjdk.jmh.annotations.Setup;
+import org.openjdk.jmh.annotations.State;
+import org.openjdk.jmh.annotations.Warmup;
+
+import java.nio.charset.StandardCharsets;
+
+import static io.servicetalk.buffer.api.CharSequences.newAsciiString;
+
+/*
+ * This benchmark compares CharSequences#parseLong(CharSequence) and Long.parseLong(String):
+ *
+ * Benchmark                              (value)   Mode  Cnt         Score         Error  Units
+ * javaParseLongString       -9223372036854775808  thrpt    5  24524373.658 ±  490417.495  ops/s
+ *   stParseLongString       -9223372036854775808  thrpt    5  19168467.144 ±  594280.502  ops/s
+ *
+ * javaParseLongString        9223372036854775807  thrpt    5  15573374.506 ± 1049847.936  ops/s
+ *   stParseLongString        9223372036854775807  thrpt    5  16697590.692 ±  231851.871  ops/s
+ *
+ * javaParseLongAsciiBuffer  -9223372036854775808  thrpt    5  10066121.790 ±  117298.619  ops/s
+ *   stParseLongAsciiBuffer  -9223372036854775808  thrpt    5  18155698.684 ±  436706.324  ops/s
+ *
+ * javaParseLongAsciiBuffer   9223372036854775807  thrpt    5  10730908.955 ±  116656.679  ops/s
+ *   stParseLongAsciiBuffer   9223372036854775807  thrpt    5  19615079.368 ±  459852.132  ops/s
+ *
+ * javaParseLongAsciiString  -9223372036854775808  thrpt    5  17546166.613 ±  444219.547  ops/s
+ *   stParseLongAsciiString  -9223372036854775808  thrpt    5  19169592.065 ±  499213.146  ops/s
+ *
+ * javaParseLongAsciiString   9223372036854775807  thrpt    5  22611841.803 ±  503643.380  ops/s
+ *   stParseLongAsciiString   9223372036854775807  thrpt    5  21140372.163 ± 2921605.423  ops/s
+ *
+ *
+ *
+ * javaParseLongString                      -8192  thrpt    5  69974528.501 ± 6167380.442  ops/s
+ *   stParseLongString                      -8192  thrpt    5  73735070.747 ± 2968101.803  ops/s
+ *
+ * javaParseLongString                       8192  thrpt    5  70138556.799 ±  918507.526  ops/s
+ *   stParseLongString                       8192  thrpt    5  66549636.755 ± 1023126.881  ops/s
+ *
+ * javaParseLongAsciiBuffer                 -8192  thrpt    5  15418127.631 ±  271577.020  ops/s
+ *   stParseLongAsciiBuffer                 -8192  thrpt    5  58372951.121 ±  920176.976  ops/s
+ *
+ * javaParseLongAsciiBuffer                  8192  thrpt    5  15203126.170 ±  289559.904  ops/s
+ *   stParseLongAsciiBuffer                  8192  thrpt    5  56709314.826 ±  813985.642  ops/s
+ *
+ * javaParseLongAsciiString                 -8192  thrpt    5  70984579.239 ± 1614728.648  ops/s
+ *   stParseLongAsciiString                 -8192  thrpt    5  68602480.185 ± 1052183.360  ops/s
+ *
+ * javaParseLongAsciiString                  8192  thrpt    5  75324292.593 ±  592329.213  ops/s
+ *   stParseLongAsciiString                  8192  thrpt    5  80748587.669 ± 1076241.647  ops/s
+ */
+@Fork(value = 1)
+@State(Scope.Benchmark)
+@Warmup(iterations = 5, time = 3)
+@Measurement(iterations = 5, time = 3)
+@BenchmarkMode(Mode.Throughput)
+public class CharSequencesParseLongBenchmark {
+
+    @Param({"-9223372036854775808", "9223372036854775807", "-8192", "8192"})
+    private String value;
+
+    private CharSequence asciiBuffer;
+    private CharSequence asciiString;
+
+    @Setup(Level.Trial)
+    public void setup() {
+        asciiBuffer = newAsciiString(value);
+        asciiString = new AsciiString(value.getBytes(StandardCharsets.US_ASCII));
+    }
+
+    @Benchmark
+    public long javaParseLongString() {
+        return Long.parseLong(value);
+    }
+
+    @Benchmark
+    public long stParseLongString() {
+        return CharSequences.parseLong(value);
+    }
+
+    @Benchmark
+    public long javaParseLongAsciiBuffer() {
+        return Long.parseLong(asciiBuffer.toString());
+    }
+
+    @Benchmark
+    public long stParseLongAsciiBuffer() {
+        return CharSequences.parseLong(asciiBuffer);
+    }
+
+    @Benchmark
+    public long javaParseLongAsciiString() {
+        return Long.parseLong(asciiString.toString());
+    }
+
+    @Benchmark
+    public long stParseLongAsciiString() {
+        return CharSequences.parseLong(asciiString);
+    }
+}
diff --git a/servicetalk-buffer-api/build.gradle b/servicetalk-buffer-api/build.gradle
@@ -22,6 +22,8 @@ dependencies {
   implementation "com.google.code.findbugs:jsr305:$jsr305Version"
 
   testImplementation "junit:junit:$junitVersion"
+  testImplementation "org.junit.jupiter:junit-jupiter-api:$junit5Version"
+  testImplementation "org.junit.jupiter:junit-jupiter-params:$junit5Version"
   testImplementation "org.hamcrest:hamcrest-library:$hamcrestVersion"
   testImplementation "org.mockito:mockito-core:$mockitoCoreVersion"
   testFixturesImplementation "org.hamcrest:hamcrest-library:$hamcrestVersion"

diff --git a/servicetalk-buffer-api/src/main/java/io/servicetalk/buffer/api/CharSequences.java b/servicetalk-buffer-api/src/main/java/io/servicetalk/buffer/api/CharSequences.java
@@ -13,6 +13,21 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
+/*
+ * Copyright 2014 The Netty Project
+ *
+ * The Netty Project licenses this file to you under the Apache License,
+ * version 2.0 (the "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at:
+ *
+ *   https://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations
+ * under the License.
+ */
 package io.servicetalk.buffer.api;
 
 import java.util.ArrayList;
@@ -23,6 +38,7 @@
 import static io.servicetalk.buffer.api.AsciiBuffer.hashCodeAscii;
 import static io.servicetalk.buffer.api.ReadOnlyBufferAllocators.DEFAULT_RO_ALLOCATOR;
 import static java.lang.Character.toUpperCase;
+import static java.nio.charset.StandardCharsets.US_ASCII;
 import static java.util.Collections.emptyList;
 import static java.util.Objects.requireNonNull;
 
@@ -32,6 +48,9 @@
  */
 public final class CharSequences {
 
+    private static final int RADIX_10 = 10;
+    private static final long PARSE_LONG_MIN = Long.MIN_VALUE / RADIX_10;
+
     private CharSequences() {
         // No instances
     }
@@ -420,4 +439,125 @@ static boolean contentEqualsIgnoreCaseUnknownTypes(final CharSequence a, final C
         }
         return true;
     }
+
+    /**
+     * Parses the {@link CharSequence} argument as a signed decimal {@code long}.
+     *
+     * <p> This is the equivalent of {@link Long#parseLong(String)} that does not require to
+     * {@link CharSequence#toString()} conversion.
+     *
+     * @param cs a {@code CharSequence} containing the {@code long} value to be parsed
+     * @return {code long} representation of the passed {@link CharSequence}
+     * @throws NumberFormatException if the passed {@link CharSequence} cannot be parsed into {@code long}
+     */
+    public static long parseLong(final CharSequence cs) throws NumberFormatException {
+        final int length = cs.length();
+        if (length <= 0) {
+            throw new NumberFormatException("Illegal length of the CharSequence: " + length + " (expected > 0)");
+        }
+
+        if (isAsciiString(cs)) {
+            return parseLong(((AsciiBuffer) cs).unwrap());
+        }
+
+        return parseLong(cs, length);
+    }
+
+    @SuppressWarnings("SameParameterValue")
+    private static long parseLong(final CharSequence cs, final int length) {
+        int i = 0;
+        final char firstCh = cs.charAt(i);
+        final boolean negative = firstCh == '-';
+        if ((negative || firstCh == '+') && ++i == length) {
+            throw illegalInput(cs);
+        }
+
+        long result = 0;
+        while (i < length) {
+            final int digit = Character.digit(cs.charAt(i++), RADIX_10);
+            if (digit < 0) {
+                throw illegalInput(cs);
+            }
+            if (PARSE_LONG_MIN > result) {
+                throw illegalInput(cs);
+            }
+            long next = result * RADIX_10 - digit;
+            if (next > result) {
+                throw illegalInput(cs);
+            }
+            result = next;
+        }
+        if (!negative) {
+            result = -result;
+            if (result < 0) {
+                throw illegalInput(cs);
+            }
+        }
+        return result;
+    }
+
+    private static long parseLong(final Buffer buffer) {
+        final ParseLongByteProcessor bp = new ParseLongByteProcessor(buffer);
+        buffer.forEachByte(bp);
+        return bp.result();
+    }
+
+    private static final class ParseLongByteProcessor implements ByteProcessor {
+
+        private final Buffer buffer;
+        private long result;
+        private boolean negative;
+        private boolean sign;
+
+        ParseLongByteProcessor(final Buffer buffer) {
+            this.buffer = buffer;
+        }
+
+        @Override
+        public boolean process(final byte value) {
+            if (!sign) {
+                sign = true;
+                negative = value == '-';
+                if ((negative || value == '+')) {
+                    if (buffer.readableBytes() == 1) {
+                        throw illegalInput(buffer);
+                    } else {
+                        return true;
+                    }
+                }
+            }
+
+            final int digit = value - '0';
+            if (digit < 0 || digit > 9) {
+                throw illegalInput(buffer);
+            }
+            if (PARSE_LONG_MIN > result) {
+                throw illegalInput(buffer);
+            }
+            long next = result * RADIX_10 - digit;
+            if (next > result) {
+                throw illegalInput(buffer);
+            }
+            result = next;
+            return true;
+        }
+
+        long result() {
+            if (!negative) {
+                result = -result;
+                if (result < 0) {
+                    throw illegalInput(buffer);
+                }
+            }
+            return result;
+        }
+    }
+
+    private static NumberFormatException illegalInput(final CharSequence cs) {
+        return new NumberFormatException("Illegal input: \"" + cs + "\"");
+    }
+
+    private static NumberFormatException illegalInput(final Buffer buffer) {
+        return new NumberFormatException("Illegal input: \"" + buffer.toString(US_ASCII) + "\"");
+    }
 }
diff --git a/servicetalk-buffer-api/src/test/java/io/servicetalk/buffer/api/CharSequencesTest.java b/servicetalk-buffer-api/src/test/java/io/servicetalk/buffer/api/CharSequencesTest.java
@@ -15,14 +15,21 @@
  */
 package io.servicetalk.buffer.api;
 
-import org.junit.Test;
+import org.junit.jupiter.api.Disabled;
+import org.junit.jupiter.api.Test;
+import org.junit.jupiter.params.ParameterizedTest;
+import org.junit.jupiter.params.provider.ValueSource;
 
 import java.util.function.Function;
 
+import static io.servicetalk.buffer.api.CharSequences.newAsciiString;
 import static io.servicetalk.buffer.api.CharSequences.split;
+import static io.servicetalk.buffer.api.ReadOnlyBufferAllocators.DEFAULT_RO_ALLOCATOR;
 import static java.util.function.Function.identity;
 import static org.hamcrest.MatcherAssert.assertThat;
 import static org.hamcrest.Matchers.contains;
+import static org.hamcrest.Matchers.is;
+import static org.junit.jupiter.api.Assertions.assertThrows;
 
 public class CharSequencesTest {
 
@@ -152,4 +159,50 @@ public void splitAsciiNoTrim() {
     public void splitAsciiWithTrim() {
         splitWithTrim(CharSequences::newAsciiString);
     }
+
+    @ParameterizedTest
+    @ValueSource(longs = { Long.MIN_VALUE, Long.MIN_VALUE + 1,
+            -101, -100, -99, -11, -10, -9, -1, 0, 1, 9, 10, 11, 99, 100, 101,
+            Long.MAX_VALUE - 1, Long.MAX_VALUE })
+    public void parseLong(final long value) {
+        final String strValue = String.valueOf(value);
+        assertThat("Unexpected result for String representation", CharSequences.parseLong(strValue), is(value));
+        assertThat("Unexpected result for AsciiBuffer representation",
+                CharSequences.parseLong(newAsciiString(strValue)), is(value));
+    }
+
+    @ParameterizedTest
+    @ValueSource(strings = { "-0", "+0", "+1", "+10", "000" })
+    public void parseLongSigned(final String value) {
+        assertThat("Unexpected result for String representation",
+                CharSequences.parseLong(value), is(Long.parseLong(value)));
+        assertThat("Unexpected result for AsciiBuffer representation",
+                CharSequences.parseLong(newAsciiString(value)), is(Long.parseLong(value)));
+    }
+
+    @ParameterizedTest
+    @ValueSource(strings = { "", "-", "+", "a", "0+", "0-", "--0", "++0", "0a0" })
+    public void parseLongFailure(final String value) {
+        assertThrows(NumberFormatException.class, () -> CharSequences.parseLong(value),
+                "Unexpected result for String representation");
+        assertThrows(NumberFormatException.class, () -> CharSequences.parseLong(newAsciiString(value)),
+                "Unexpected result for AsciiBuffer representation");
+    }
+
+    @Test
+    public void parseLongFromSubSequence() {
+        String value = "text42text";
+        assertThat("Unexpected result for String representation",
+                CharSequences.parseLong(value.subSequence(4, 6)), is(42L));
+        assertThat("Unexpected result for AsciiBuffer representation",
+                CharSequences.parseLong(newAsciiString(value).subSequence(4, 6)), is(42L));
+    }
+
+    @Test
+    @Disabled("ReadOnlyByteBuffer#slice() does not account for the slice offset")
+    public void parseLongFromSlice() {
+        Buffer buffer = DEFAULT_RO_ALLOCATOR.fromAscii("text42text");
+        assertThat("Unexpected result for AsciiBuffer representation",
+                CharSequences.parseLong(newAsciiString(buffer.slice(4, 2))), is(42L));
+    }
 }