Skip to content

Commit

Permalink
make stdev calculation more numerically stable (#3984)
Browse files Browse the repository at this point in the history
  • Loading branch information
vporyadke authored Apr 22, 2024
1 parent 36fc0ee commit b1eb036
Show file tree
Hide file tree
Showing 2 changed files with 40 additions and 8 deletions.
30 changes: 22 additions & 8 deletions ydb/core/mind/hive/hive.h
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
#pragma once
#include <bitset>
#include <ranges>

#include <util/generic/queue.h>
#include <util/random/random.h>
Expand Down Expand Up @@ -201,20 +202,33 @@ TResourceNormalizedValues NormalizeRawValues(const TResourceRawValues& values, c
NMetrics::EResource GetDominantResourceType(const TResourceRawValues& values, const TResourceRawValues& maximum);
NMetrics::EResource GetDominantResourceType(const TResourceNormalizedValues& normValues);

// https://en.wikipedia.org/wiki/Kahan_summation_algorithm
template<std::ranges::range TRange>
std::ranges::range_value_t<TRange> StableSum(const TRange& values) {
using TValue = std::ranges::range_value_t<TRange>;
TValue sum{};
TValue correction{};
for (const auto& x : values) {
TValue y = x - correction;
TValue tmp = sum + y;
correction = (tmp - sum) - y;
sum = tmp;
}
return sum;
}

template <typename... ResourceTypes>
inline std::tuple<ResourceTypes...> GetStDev(const TVector<std::tuple<ResourceTypes...>>& values) {
std::tuple<ResourceTypes...> sum;
if (values.empty())
return sum;
for (const auto& v : values) {
sum = sum + v;
}
sum = StableSum(values);
auto mean = sum / values.size();
sum = std::tuple<ResourceTypes...>();
for (const auto& v : values) {
auto diff = v - mean;
sum = sum + diff * diff;
}
auto quadraticDev = [&] (const std::tuple<ResourceTypes...>& value) {
auto diff = value - mean;
return diff * diff;
};
sum = StableSum(values | std::views::transform(quadraticDev));
auto div = sum / values.size();
auto st_dev = sqrt(div);
return tuple_cast<ResourceTypes...>::cast(st_dev);
Expand Down
18 changes: 18 additions & 0 deletions ydb/core/mind/hive/hive_impl_ut.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -193,4 +193,22 @@ Y_UNIT_TEST_SUITE(THiveImplTest) {
Ctest << "HIVE_TABLET_BALANCE_STRATEGY_RANDOM" << Endl;
CheckSpeedAndDistribution(allTablets, BalanceTablets<NKikimrConfig::THiveConfig::HIVE_TABLET_BALANCE_STRATEGY_RANDOM>, EResourceToBalance::Memory);
}

Y_UNIT_TEST(TestStDev) {
using TSingleResource = std::tuple<double>;

TVector<TSingleResource> values(100, 50.0 / 1'000'000);
values.front() = 51.0 / 1'000'000;

double stDev1 = std::get<0>(GetStDev(values));

std::swap(values.front(), values.back());

double stDev2 = std::get<0>(GetStDev(values));

double expectedStDev = sqrt(0.9703) / 1'000'000;

UNIT_ASSERT_DOUBLES_EQUAL(expectedStDev, stDev1, 1e-6);
UNIT_ASSERT_VALUES_EQUAL(stDev1, stDev2);
}
}

0 comments on commit b1eb036

Please sign in to comment.