diff --git a/crypto/merkle/tree.go b/crypto/merkle/tree.go index 466c43482..1148f9a7d 100644 --- a/crypto/merkle/tree.go +++ b/crypto/merkle/tree.go @@ -1,7 +1,10 @@ package merkle import ( + "math" "math/bits" + "runtime" + "sync" ) // HashFromByteSlices computes a Merkle tree where the leaves are the byte slice, @@ -20,6 +23,43 @@ func HashFromByteSlices(items [][]byte) []byte { } } +// HashFromByteSlicesParallel computes a Merkle tree in parallel +// Since it uses the maximum CPU core, performance may decrease if called from multiple places at the same time. +func HashFromByteSlicesParallel(items [][]byte, depth int) []byte { + switch len(items) { + case 0: + return emptyHash() + case 1: + return leafHash(items[0]) + default: + k := getSplitPoint(int64(len(items))) + var left, right []byte + var wg sync.WaitGroup + + if depth < maxDepthToLimitGoroutines() { + wg.Add(1) + go func() { + defer wg.Done() + left = HashFromByteSlicesParallel(items[:k], depth+1) + }() + right = HashFromByteSlicesParallel(items[k:], depth+1) + wg.Wait() + + } else { + // If it is deeper than max depth, it executes sequentially. + left = HashFromByteSlicesParallel(items[:k], depth+1) + right = HashFromByteSlicesParallel(items[k:], depth+1) + } + return innerHash(left, right) + } +} + +// The number of goroutines created up to the max depth should not be too large for the number of CPUs. +// 2^max_depth < ceil(num_cpu*2) +func maxDepthToLimitGoroutines() int { + return int(math.Ceil(math.Log2(float64(runtime.NumCPU() * 2)))) +} + // HashFromByteSliceIterative is an iterative alternative to // HashFromByteSlice motivated by potential performance improvements. // (#2611) had suggested that an iterative version of diff --git a/crypto/merkle/tree_bench_test.go b/crypto/merkle/tree_bench_test.go new file mode 100644 index 000000000..ede7e58ff --- /dev/null +++ b/crypto/merkle/tree_bench_test.go @@ -0,0 +1,34 @@ +package merkle + +import ( + "testing" + + "github.com/line/ostracon/crypto/tmhash" + tmrand "github.com/line/ostracon/libs/rand" +) + +func BenchmarkHashFromByteSlices(b *testing.B) { + const total = 4000 + slices := make([][]byte, total) + for j := 0; j < total; j++ { + slices[j] = tmrand.Bytes(tmhash.Size) + } + + b.ResetTimer() + for i := 0; i < b.N; i++ { + HashFromByteSlices(slices) + } +} + +func BenchmarkHashFromByteSlicesParallel(b *testing.B) { + const total = 4000 + slices := make([][]byte, total) + for j := 0; j < total; j++ { + slices[j] = tmrand.Bytes(tmhash.Size) + } + + b.ResetTimer() + for i := 0; i < b.N; i++ { + HashFromByteSlicesParallel(slices, 0) + } +} diff --git a/types/tx.go b/types/tx.go index 6a506c6d3..7d9b0d600 100644 --- a/types/tx.go +++ b/types/tx.go @@ -4,6 +4,8 @@ import ( "bytes" "errors" "fmt" + "runtime" + "sync" "github.com/line/ostracon/crypto/merkle" "github.com/line/ostracon/crypto/tmhash" @@ -35,10 +37,23 @@ func (txs Txs) Hash() []byte { // These allocations will be removed once Txs is switched to [][]byte, // ref #2603. This is because golang does not allow type casting slices without unsafe txBzs := make([][]byte, len(txs)) + // Semaphore is used to limit the maximum number of executable goroutines. + sem := make(chan int, runtime.NumCPU()*2) + var wg sync.WaitGroup + wg.Add(len(txs)) for i := 0; i < len(txs); i++ { - txBzs[i] = txs[i].Hash() + sem <- 1 + i := i + go func() { + defer func() { + wg.Done() + <-sem + }() + txBzs[i] = txs[i].Hash() + }() } - return merkle.HashFromByteSlices(txBzs) + wg.Wait() + return merkle.HashFromByteSlicesParallel(txBzs, 0) } // Index returns the index of this transaction in the list, or -1 if not found diff --git a/types/tx_bench_test.go b/types/tx_bench_test.go new file mode 100644 index 000000000..efafeba2d --- /dev/null +++ b/types/tx_bench_test.go @@ -0,0 +1,17 @@ +package types + +import ( + "testing" +) + +func BenchmarkTx_Hash(b *testing.B) { + const ( + total = 4000 + size = 300 + ) + txs := makeTxs(total, size) + b.ResetTimer() + for i := 0; i < b.N; i++ { + txs.Hash() + } +}