Skip to content

Commit

Permalink
Merge bitcoin#24041: util: Restore GetIntArg saturating behavior
Browse files Browse the repository at this point in the history
b5c9bb5 util: Restore GetIntArg saturating behavior (James O'Beirne)

Pull request description:

  The new locale-independent atoi64 method introduced in bitcoin#20452 parses large integer values higher than maximum representable value as 0 instead of the maximum value, which breaks backwards compatibility. This commit restores compatibility and adds test coverage for this case in terms of the related GetIntArg and strtoll functions.

  Specifically, command line or bitcoin.conf integer values greater than `9223372036854775807` (`2**63-1`) used to be parsed as `9223372036854775807` before bitcoin#20452. Then bitcoin#20452 caused them to be parsed as `0`. And after this PR they will be parsed as `9223372036854775807` again.

  This change is a stripped-down alternative version of bitcoin#23841 by jamesob

ACKs for top commit:
  jamesob:
    Github ACK bitcoin@b5c9bb5
  vincenzopalazzo:
    ACK bitcoin@b5c9bb5
  MarcoFalke:
    review ACK b5c9bb5 🌘

Tree-SHA512: 4e8abdbabf3cf4713cf5a7c5169539159f359ab4109a4e7e644cc2e9b2b0c3c532fad9f6b772daf015e1c5340ce59280cd9a41f2730afda6099cbf636b7d23ae
  • Loading branch information
MarcoFalke authored and vijaydasmp committed Jan 17, 2025
1 parent 4c166e1 commit fb722ff
Show file tree
Hide file tree
Showing 4 changed files with 65 additions and 25 deletions.
10 changes: 2 additions & 8 deletions src/test/fuzz/string.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -280,20 +280,14 @@ FUZZ_TARGET(string)
}

{
const int atoi_result = atoi(random_string_1.c_str());
const int locale_independent_atoi_result = LocaleIndependentAtoi<int>(random_string_1);
const int64_t atoi64_result = atoi64_legacy(random_string_1);
const bool out_of_range = atoi64_result < std::numeric_limits<int>::min() || atoi64_result > std::numeric_limits<int>::max();
if (out_of_range) {
assert(locale_independent_atoi_result == 0);
} else {
assert(atoi_result == locale_independent_atoi_result);
}
assert(locale_independent_atoi_result == std::clamp<int64_t>(atoi64_result, std::numeric_limits<int>::min(), std::numeric_limits<int>::max()));
}

{
const int64_t atoi64_result = atoi64_legacy(random_string_1);
const int64_t locale_independent_atoi_result = LocaleIndependentAtoi<int64_t>(random_string_1);
assert(atoi64_result == locale_independent_atoi_result || locale_independent_atoi_result == 0);
assert(atoi64_result == locale_independent_atoi_result);
}
}
6 changes: 6 additions & 0 deletions src/test/getarg_tests.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
#include <util/strencodings.h>
#include <util/system.h>

#include <limits>
#include <string>
#include <utility>
#include <vector>
Expand Down Expand Up @@ -144,6 +145,11 @@ BOOST_AUTO_TEST_CASE(intarg)
BOOST_CHECK_EQUAL(m_local_args.GetArg("-foo", 11), 0);
BOOST_CHECK_EQUAL(m_local_args.GetArg("-bar", 11), 0);

// Check under-/overflow behavior.
ResetArgs("-foo=-9223372036854775809 -bar=9223372036854775808");
BOOST_CHECK_EQUAL(m_local_args.GetArg("-foo", 0), std::numeric_limits<int64_t>::min());
BOOST_CHECK_EQUAL(m_local_args.GetArg("-bar", 0), std::numeric_limits<int64_t>::max());

ResetArgs("-foo=11 -bar=12");
BOOST_CHECK_EQUAL(m_local_args.GetArg("-foo", 0), 11);
BOOST_CHECK_EQUAL(m_local_args.GetArg("-bar", 11), 12);
Expand Down
55 changes: 41 additions & 14 deletions src/test/util_tests.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,8 @@
#include <deque>
#include <optional>
#include <random>
#include <limits>
#include <map>
#include <stdint.h>
#include <string.h>
#include <thread>
Expand Down Expand Up @@ -1737,6 +1739,11 @@ BOOST_AUTO_TEST_CASE(test_ToIntegral)
BOOST_CHECK(!ToIntegral<uint8_t>("256"));
}

int64_t atoi64_legacy(const std::string& str)
{
return strtoll(str.c_str(), nullptr, 10);
}

BOOST_AUTO_TEST_CASE(test_LocaleIndependentAtoi)
{
BOOST_CHECK_EQUAL(LocaleIndependentAtoi<int32_t>("1234"), 1'234);
Expand Down Expand Up @@ -1764,48 +1771,68 @@ BOOST_AUTO_TEST_CASE(test_LocaleIndependentAtoi)
BOOST_CHECK_EQUAL(LocaleIndependentAtoi<int32_t>(""), 0);
BOOST_CHECK_EQUAL(LocaleIndependentAtoi<int32_t>("aap"), 0);
BOOST_CHECK_EQUAL(LocaleIndependentAtoi<int32_t>("0x1"), 0);
BOOST_CHECK_EQUAL(LocaleIndependentAtoi<int32_t>("-32482348723847471234"), 0);
BOOST_CHECK_EQUAL(LocaleIndependentAtoi<int32_t>("32482348723847471234"), 0);
BOOST_CHECK_EQUAL(LocaleIndependentAtoi<int32_t>("-32482348723847471234"), -2'147'483'647 - 1);
BOOST_CHECK_EQUAL(LocaleIndependentAtoi<int32_t>("32482348723847471234"), 2'147'483'647);

BOOST_CHECK_EQUAL(LocaleIndependentAtoi<int64_t>("-9223372036854775809"), 0);
BOOST_CHECK_EQUAL(LocaleIndependentAtoi<int64_t>("-9223372036854775809"), -9'223'372'036'854'775'807LL - 1LL);
BOOST_CHECK_EQUAL(LocaleIndependentAtoi<int64_t>("-9223372036854775808"), -9'223'372'036'854'775'807LL - 1LL);
BOOST_CHECK_EQUAL(LocaleIndependentAtoi<int64_t>("9223372036854775807"), 9'223'372'036'854'775'807);
BOOST_CHECK_EQUAL(LocaleIndependentAtoi<int64_t>("9223372036854775808"), 0);
BOOST_CHECK_EQUAL(LocaleIndependentAtoi<int64_t>("9223372036854775808"), 9'223'372'036'854'775'807);

std::map<std::string, int64_t> atoi64_test_pairs = {
{"-9223372036854775809", std::numeric_limits<int64_t>::min()},
{"-9223372036854775808", -9'223'372'036'854'775'807LL - 1LL},
{"9223372036854775807", 9'223'372'036'854'775'807},
{"9223372036854775808", std::numeric_limits<int64_t>::max()},
{"+-", 0},
{"0x1", 0},
{"ox1", 0},
{"", 0},
};

for (const auto& pair : atoi64_test_pairs) {
BOOST_CHECK_EQUAL(LocaleIndependentAtoi<int64_t>(pair.first), pair.second);
}

// Ensure legacy compatibility with previous versions of Bitcoin Core's atoi64
for (const auto& pair : atoi64_test_pairs) {
BOOST_CHECK_EQUAL(LocaleIndependentAtoi<int64_t>(pair.first), atoi64_legacy(pair.first));
}

BOOST_CHECK_EQUAL(LocaleIndependentAtoi<uint64_t>("-1"), 0U);
BOOST_CHECK_EQUAL(LocaleIndependentAtoi<uint64_t>("0"), 0U);
BOOST_CHECK_EQUAL(LocaleIndependentAtoi<uint64_t>("18446744073709551615"), 18'446'744'073'709'551'615ULL);
BOOST_CHECK_EQUAL(LocaleIndependentAtoi<uint64_t>("18446744073709551616"), 0U);
BOOST_CHECK_EQUAL(LocaleIndependentAtoi<uint64_t>("18446744073709551616"), 18'446'744'073'709'551'615ULL);

BOOST_CHECK_EQUAL(LocaleIndependentAtoi<int32_t>("-2147483649"), 0);
BOOST_CHECK_EQUAL(LocaleIndependentAtoi<int32_t>("-2147483649"), -2'147'483'648LL);
BOOST_CHECK_EQUAL(LocaleIndependentAtoi<int32_t>("-2147483648"), -2'147'483'648LL);
BOOST_CHECK_EQUAL(LocaleIndependentAtoi<int32_t>("2147483647"), 2'147'483'647);
BOOST_CHECK_EQUAL(LocaleIndependentAtoi<int32_t>("2147483648"), 0);
BOOST_CHECK_EQUAL(LocaleIndependentAtoi<int32_t>("2147483648"), 2'147'483'647);

BOOST_CHECK_EQUAL(LocaleIndependentAtoi<uint32_t>("-1"), 0U);
BOOST_CHECK_EQUAL(LocaleIndependentAtoi<uint32_t>("0"), 0U);
BOOST_CHECK_EQUAL(LocaleIndependentAtoi<uint32_t>("4294967295"), 4'294'967'295U);
BOOST_CHECK_EQUAL(LocaleIndependentAtoi<uint32_t>("4294967296"), 0U);
BOOST_CHECK_EQUAL(LocaleIndependentAtoi<uint32_t>("4294967296"), 4'294'967'295U);

BOOST_CHECK_EQUAL(LocaleIndependentAtoi<int16_t>("-32769"), 0);
BOOST_CHECK_EQUAL(LocaleIndependentAtoi<int16_t>("-32769"), -32'768);
BOOST_CHECK_EQUAL(LocaleIndependentAtoi<int16_t>("-32768"), -32'768);
BOOST_CHECK_EQUAL(LocaleIndependentAtoi<int16_t>("32767"), 32'767);
BOOST_CHECK_EQUAL(LocaleIndependentAtoi<int16_t>("32768"), 0);
BOOST_CHECK_EQUAL(LocaleIndependentAtoi<int16_t>("32768"), 32'767);

BOOST_CHECK_EQUAL(LocaleIndependentAtoi<uint16_t>("-1"), 0U);
BOOST_CHECK_EQUAL(LocaleIndependentAtoi<uint16_t>("0"), 0U);
BOOST_CHECK_EQUAL(LocaleIndependentAtoi<uint16_t>("65535"), 65'535U);
BOOST_CHECK_EQUAL(LocaleIndependentAtoi<uint16_t>("65536"), 0U);
BOOST_CHECK_EQUAL(LocaleIndependentAtoi<uint16_t>("65536"), 65'535U);

BOOST_CHECK_EQUAL(LocaleIndependentAtoi<int8_t>("-129"), 0);
BOOST_CHECK_EQUAL(LocaleIndependentAtoi<int8_t>("-129"), -128);
BOOST_CHECK_EQUAL(LocaleIndependentAtoi<int8_t>("-128"), -128);
BOOST_CHECK_EQUAL(LocaleIndependentAtoi<int8_t>("127"), 127);
BOOST_CHECK_EQUAL(LocaleIndependentAtoi<int8_t>("128"), 0);
BOOST_CHECK_EQUAL(LocaleIndependentAtoi<int8_t>("128"), 127);

BOOST_CHECK_EQUAL(LocaleIndependentAtoi<uint8_t>("-1"), 0U);
BOOST_CHECK_EQUAL(LocaleIndependentAtoi<uint8_t>("0"), 0U);
BOOST_CHECK_EQUAL(LocaleIndependentAtoi<uint8_t>("255"), 255U);
BOOST_CHECK_EQUAL(LocaleIndependentAtoi<uint8_t>("256"), 0U);
BOOST_CHECK_EQUAL(LocaleIndependentAtoi<uint8_t>("256"), 255U);
}

BOOST_AUTO_TEST_CASE(test_ParseInt64)
Expand Down
19 changes: 16 additions & 3 deletions src/util/strencodings.h
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@
#include <charconv>
#include <cstdint>
#include <optional>
#include <limits>
#include <string>
#include <vector>

Expand Down Expand Up @@ -91,8 +92,12 @@ void SplitHostPort(std::string_view in, uint16_t &portOut, std::string &hostOut)
// New code should use ToIntegral or the ParseInt* functions
// which provide parse error feedback.
//
// The goal of LocaleIndependentAtoi is to replicate the exact defined behaviour
// of atoi and atoi64 as they behave under the "C" locale.
// The goal of LocaleIndependentAtoi is to replicate the defined behaviour of
// std::atoi as it behaves under the "C" locale, and remove some undefined
// behavior. If the parsed value is bigger than the integer type's maximum
// value, or smaller than the integer type's minimum value, std::atoi has
// undefined behavior, while this function returns the maximum or minimum
// values, respectively.
template <typename T>
T LocaleIndependentAtoi(std::string_view str)
{
Expand All @@ -107,7 +112,15 @@ T LocaleIndependentAtoi(std::string_view str)
s = s.substr(1);
}
auto [_, error_condition] = std::from_chars(s.data(), s.data() + s.size(), result);
if (error_condition != std::errc{}) {
if (error_condition == std::errc::result_out_of_range) {
if (s.length() >= 1 && s[0] == '-') {
// Saturate underflow, per strtoll's behavior.
return std::numeric_limits<T>::min();
} else {
// Saturate overflow, per strtoll's behavior.
return std::numeric_limits<T>::max();
}
} else if (error_condition != std::errc{}) {
return 0;
}
return result;
Expand Down

0 comments on commit fb722ff

Please sign in to comment.