diff --git a/ydb/library/yql/dq/runtime/dq_output_consumer.cpp b/ydb/library/yql/dq/runtime/dq_output_consumer.cpp index 4def8b5924ee..63787ee83003 100644 --- a/ydb/library/yql/dq/runtime/dq_output_consumer.cpp +++ b/ydb/library/yql/dq/runtime/dq_output_consumer.cpp @@ -20,6 +20,11 @@ using namespace NKikimr; using namespace NMiniKQL; using namespace NUdf; +inline ui64 SpreadHash(ui64 hash) { + // https://probablydance.com/2018/06/16/fibonacci-hashing-the-optimization-that-the-world-forgot-or-a-better-alternative-to-integer-modulo/ + return ((unsigned __int128)hash * 11400714819323198485llu) >> 64; +} + class TDqOutputMultiConsumer : public IDqOutputConsumer { public: @@ -189,6 +194,9 @@ class TDqOutputHashPartitionConsumer : public IDqOutputConsumer { hash = CombineHashes(hash, HashColumn(keyId, columnValue)); } + + hash = SpreadHash(hash); + return hash % Outputs.size(); } @@ -200,6 +208,8 @@ class TDqOutputHashPartitionConsumer : public IDqOutputConsumer { hash = CombineHashes(hash, HashColumn(keyId, values[KeyColumns[keyId].Index])); } + hash = SpreadHash(hash); + return hash % Outputs.size(); } @@ -303,6 +313,8 @@ class TDqOutputHashPartitionConsumerScalar : public IDqOutputConsumer { hash = CombineHashes(hash, HashColumn(keyId, values[KeyColumns_[keyId].Index])); } + hash = SpreadHash(hash); + return hash % Outputs_.size(); } @@ -500,6 +512,9 @@ class TDqOutputHashPartitionConsumerBlock : public IDqOutputConsumer { } hash = CombineHashes(hash, keyHash); } + + hash = SpreadHash(hash); + return hash % Outputs_.size(); }