Skip to content

Commit

Permalink
Implement CharSequences#parseLong(CharSequence) (#1469)
Browse files Browse the repository at this point in the history
Motivation:

Our `HttpHeaders` API uses `CharSequence`. In order for users to parse
the value (like, `Content-Length`) they have to convert `CharSequence`
to `String` for `Long.parseLong(String)` API. This approach does an extra
copy of data from underlying `Buffer`. We can avoid it.

Modifications:

- Add `CharSequences#parseLong(CharSequence)` API that does not force
`CharSequence#toString()` conversion;
- Add tests to validate new API;
- Use the new API in all places when we parse an `AsciiBuffer` or
`AsciiString`;

Result:

Less allocations of `String`.
  • Loading branch information
idelpivnitskiy authored Apr 2, 2021
1 parent ff0ec96 commit fc33ee5
Show file tree
Hide file tree
Showing 7 changed files with 328 additions and 7 deletions.
Original file line number Diff line number Diff line change
@@ -0,0 +1,125 @@
/*
* Copyright © 2021 Apple Inc. and the ServiceTalk project authors
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package io.servicetalk.buffer.api;

import io.netty.util.AsciiString;
import org.openjdk.jmh.annotations.Benchmark;
import org.openjdk.jmh.annotations.BenchmarkMode;
import org.openjdk.jmh.annotations.Fork;
import org.openjdk.jmh.annotations.Level;
import org.openjdk.jmh.annotations.Measurement;
import org.openjdk.jmh.annotations.Mode;
import org.openjdk.jmh.annotations.Param;
import org.openjdk.jmh.annotations.Scope;
import org.openjdk.jmh.annotations.Setup;
import org.openjdk.jmh.annotations.State;
import org.openjdk.jmh.annotations.Warmup;

import java.nio.charset.StandardCharsets;

import static io.servicetalk.buffer.api.CharSequences.newAsciiString;

/*
* This benchmark compares CharSequences#parseLong(CharSequence) and Long.parseLong(String):
*
* Benchmark (value) Mode Cnt Score Error Units
* javaParseLongString -9223372036854775808 thrpt 5 24524373.658 ± 490417.495 ops/s
* stParseLongString -9223372036854775808 thrpt 5 19168467.144 ± 594280.502 ops/s
*
* javaParseLongString 9223372036854775807 thrpt 5 15573374.506 ± 1049847.936 ops/s
* stParseLongString 9223372036854775807 thrpt 5 16697590.692 ± 231851.871 ops/s
*
* javaParseLongAsciiBuffer -9223372036854775808 thrpt 5 10066121.790 ± 117298.619 ops/s
* stParseLongAsciiBuffer -9223372036854775808 thrpt 5 18155698.684 ± 436706.324 ops/s
*
* javaParseLongAsciiBuffer 9223372036854775807 thrpt 5 10730908.955 ± 116656.679 ops/s
* stParseLongAsciiBuffer 9223372036854775807 thrpt 5 19615079.368 ± 459852.132 ops/s
*
* javaParseLongAsciiString -9223372036854775808 thrpt 5 17546166.613 ± 444219.547 ops/s
* stParseLongAsciiString -9223372036854775808 thrpt 5 19169592.065 ± 499213.146 ops/s
*
* javaParseLongAsciiString 9223372036854775807 thrpt 5 22611841.803 ± 503643.380 ops/s
* stParseLongAsciiString 9223372036854775807 thrpt 5 21140372.163 ± 2921605.423 ops/s
*
*
*
* javaParseLongString -8192 thrpt 5 69974528.501 ± 6167380.442 ops/s
* stParseLongString -8192 thrpt 5 73735070.747 ± 2968101.803 ops/s
*
* javaParseLongString 8192 thrpt 5 70138556.799 ± 918507.526 ops/s
* stParseLongString 8192 thrpt 5 66549636.755 ± 1023126.881 ops/s
*
* javaParseLongAsciiBuffer -8192 thrpt 5 15418127.631 ± 271577.020 ops/s
* stParseLongAsciiBuffer -8192 thrpt 5 58372951.121 ± 920176.976 ops/s
*
* javaParseLongAsciiBuffer 8192 thrpt 5 15203126.170 ± 289559.904 ops/s
* stParseLongAsciiBuffer 8192 thrpt 5 56709314.826 ± 813985.642 ops/s
*
* javaParseLongAsciiString -8192 thrpt 5 70984579.239 ± 1614728.648 ops/s
* stParseLongAsciiString -8192 thrpt 5 68602480.185 ± 1052183.360 ops/s
*
* javaParseLongAsciiString 8192 thrpt 5 75324292.593 ± 592329.213 ops/s
* stParseLongAsciiString 8192 thrpt 5 80748587.669 ± 1076241.647 ops/s
*/
@Fork(value = 1)
@State(Scope.Benchmark)
@Warmup(iterations = 5, time = 3)
@Measurement(iterations = 5, time = 3)
@BenchmarkMode(Mode.Throughput)
public class CharSequencesParseLongBenchmark {

@Param({"-9223372036854775808", "9223372036854775807", "-8192", "8192"})
private String value;

private CharSequence asciiBuffer;
private CharSequence asciiString;

@Setup(Level.Trial)
public void setup() {
asciiBuffer = newAsciiString(value);
asciiString = new AsciiString(value.getBytes(StandardCharsets.US_ASCII));
}

@Benchmark
public long javaParseLongString() {
return Long.parseLong(value);
}

@Benchmark
public long stParseLongString() {
return CharSequences.parseLong(value);
}

@Benchmark
public long javaParseLongAsciiBuffer() {
return Long.parseLong(asciiBuffer.toString());
}

@Benchmark
public long stParseLongAsciiBuffer() {
return CharSequences.parseLong(asciiBuffer);
}

@Benchmark
public long javaParseLongAsciiString() {
return Long.parseLong(asciiString.toString());
}

@Benchmark
public long stParseLongAsciiString() {
return CharSequences.parseLong(asciiString);
}
}
2 changes: 2 additions & 0 deletions servicetalk-buffer-api/build.gradle
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,8 @@ dependencies {
implementation "com.google.code.findbugs:jsr305:$jsr305Version"

testImplementation "junit:junit:$junitVersion"
testImplementation "org.junit.jupiter:junit-jupiter-api:$junit5Version"
testImplementation "org.junit.jupiter:junit-jupiter-params:$junit5Version"
testImplementation "org.hamcrest:hamcrest-library:$hamcrestVersion"
testImplementation "org.mockito:mockito-core:$mockitoCoreVersion"
testFixturesImplementation "org.hamcrest:hamcrest-library:$hamcrestVersion"
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,21 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/*
* Copyright 2014 The Netty Project
*
* The Netty Project licenses this file to you under the Apache License,
* version 2.0 (the "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at:
*
* https://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
* License for the specific language governing permissions and limitations
* under the License.
*/
package io.servicetalk.buffer.api;

import java.util.ArrayList;
Expand All @@ -23,6 +38,7 @@
import static io.servicetalk.buffer.api.AsciiBuffer.hashCodeAscii;
import static io.servicetalk.buffer.api.ReadOnlyBufferAllocators.DEFAULT_RO_ALLOCATOR;
import static java.lang.Character.toUpperCase;
import static java.nio.charset.StandardCharsets.US_ASCII;
import static java.util.Collections.emptyList;
import static java.util.Objects.requireNonNull;

Expand All @@ -32,6 +48,9 @@
*/
public final class CharSequences {

private static final int RADIX_10 = 10;
private static final long PARSE_LONG_MIN = Long.MIN_VALUE / RADIX_10;

private CharSequences() {
// No instances
}
Expand Down Expand Up @@ -420,4 +439,125 @@ static boolean contentEqualsIgnoreCaseUnknownTypes(final CharSequence a, final C
}
return true;
}

/**
* Parses the {@link CharSequence} argument as a signed decimal {@code long}.
*
* <p> This is the equivalent of {@link Long#parseLong(String)} that does not require to
* {@link CharSequence#toString()} conversion.
*
* @param cs a {@code CharSequence} containing the {@code long} value to be parsed
* @return {code long} representation of the passed {@link CharSequence}
* @throws NumberFormatException if the passed {@link CharSequence} cannot be parsed into {@code long}
*/
public static long parseLong(final CharSequence cs) throws NumberFormatException {
final int length = cs.length();
if (length <= 0) {
throw new NumberFormatException("Illegal length of the CharSequence: " + length + " (expected > 0)");
}

if (isAsciiString(cs)) {
return parseLong(((AsciiBuffer) cs).unwrap());
}

return parseLong(cs, length);
}

@SuppressWarnings("SameParameterValue")
private static long parseLong(final CharSequence cs, final int length) {
int i = 0;
final char firstCh = cs.charAt(i);
final boolean negative = firstCh == '-';
if ((negative || firstCh == '+') && ++i == length) {
throw illegalInput(cs);
}

long result = 0;
while (i < length) {
final int digit = Character.digit(cs.charAt(i++), RADIX_10);
if (digit < 0) {
throw illegalInput(cs);
}
if (PARSE_LONG_MIN > result) {
throw illegalInput(cs);
}
long next = result * RADIX_10 - digit;
if (next > result) {
throw illegalInput(cs);
}
result = next;
}
if (!negative) {
result = -result;
if (result < 0) {
throw illegalInput(cs);
}
}
return result;
}

private static long parseLong(final Buffer buffer) {
final ParseLongByteProcessor bp = new ParseLongByteProcessor(buffer);
buffer.forEachByte(bp);
return bp.result();
}

private static final class ParseLongByteProcessor implements ByteProcessor {

private final Buffer buffer;
private long result;
private boolean negative;
private boolean sign;

ParseLongByteProcessor(final Buffer buffer) {
this.buffer = buffer;
}

@Override
public boolean process(final byte value) {
if (!sign) {
sign = true;
negative = value == '-';
if ((negative || value == '+')) {
if (buffer.readableBytes() == 1) {
throw illegalInput(buffer);
} else {
return true;
}
}
}

final int digit = value - '0';
if (digit < 0 || digit > 9) {
throw illegalInput(buffer);
}
if (PARSE_LONG_MIN > result) {
throw illegalInput(buffer);
}
long next = result * RADIX_10 - digit;
if (next > result) {
throw illegalInput(buffer);
}
result = next;
return true;
}

long result() {
if (!negative) {
result = -result;
if (result < 0) {
throw illegalInput(buffer);
}
}
return result;
}
}

private static NumberFormatException illegalInput(final CharSequence cs) {
return new NumberFormatException("Illegal input: \"" + cs + "\"");
}

private static NumberFormatException illegalInput(final Buffer buffer) {
return new NumberFormatException("Illegal input: \"" + buffer.toString(US_ASCII) + "\"");
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -15,14 +15,21 @@
*/
package io.servicetalk.buffer.api;

import org.junit.Test;
import org.junit.jupiter.api.Disabled;
import org.junit.jupiter.api.Test;
import org.junit.jupiter.params.ParameterizedTest;
import org.junit.jupiter.params.provider.ValueSource;

import java.util.function.Function;

import static io.servicetalk.buffer.api.CharSequences.newAsciiString;
import static io.servicetalk.buffer.api.CharSequences.split;
import static io.servicetalk.buffer.api.ReadOnlyBufferAllocators.DEFAULT_RO_ALLOCATOR;
import static java.util.function.Function.identity;
import static org.hamcrest.MatcherAssert.assertThat;
import static org.hamcrest.Matchers.contains;
import static org.hamcrest.Matchers.is;
import static org.junit.jupiter.api.Assertions.assertThrows;

public class CharSequencesTest {

Expand Down Expand Up @@ -152,4 +159,50 @@ public void splitAsciiNoTrim() {
public void splitAsciiWithTrim() {
splitWithTrim(CharSequences::newAsciiString);
}

@ParameterizedTest
@ValueSource(longs = { Long.MIN_VALUE, Long.MIN_VALUE + 1,
-101, -100, -99, -11, -10, -9, -1, 0, 1, 9, 10, 11, 99, 100, 101,
Long.MAX_VALUE - 1, Long.MAX_VALUE })
public void parseLong(final long value) {
final String strValue = String.valueOf(value);
assertThat("Unexpected result for String representation", CharSequences.parseLong(strValue), is(value));
assertThat("Unexpected result for AsciiBuffer representation",
CharSequences.parseLong(newAsciiString(strValue)), is(value));
}

@ParameterizedTest
@ValueSource(strings = { "-0", "+0", "+1", "+10", "000" })
public void parseLongSigned(final String value) {
assertThat("Unexpected result for String representation",
CharSequences.parseLong(value), is(Long.parseLong(value)));
assertThat("Unexpected result for AsciiBuffer representation",
CharSequences.parseLong(newAsciiString(value)), is(Long.parseLong(value)));
}

@ParameterizedTest
@ValueSource(strings = { "", "-", "+", "a", "0+", "0-", "--0", "++0", "0a0" })
public void parseLongFailure(final String value) {
assertThrows(NumberFormatException.class, () -> CharSequences.parseLong(value),
"Unexpected result for String representation");
assertThrows(NumberFormatException.class, () -> CharSequences.parseLong(newAsciiString(value)),
"Unexpected result for AsciiBuffer representation");
}

@Test
public void parseLongFromSubSequence() {
String value = "text42text";
assertThat("Unexpected result for String representation",
CharSequences.parseLong(value.subSequence(4, 6)), is(42L));
assertThat("Unexpected result for AsciiBuffer representation",
CharSequences.parseLong(newAsciiString(value).subSequence(4, 6)), is(42L));
}

@Test
@Disabled("ReadOnlyByteBuffer#slice() does not account for the slice offset")
public void parseLongFromSlice() {
Buffer buffer = DEFAULT_RO_ALLOCATOR.fromAscii("text42text");
assertThat("Unexpected result for AsciiBuffer representation",
CharSequences.parseLong(newAsciiString(buffer.slice(4, 2))), is(42L));
}
}
Loading

0 comments on commit fc33ee5

Please sign in to comment.