Skip to content

Commit

Permalink
math/rand/v2: add ChaCha8
Browse files Browse the repository at this point in the history
ChaCha8 provides a cryptographically strong generator
alongside PCG, so that people who want stronger randomness
have access to that. On systems with 128-bit vector math
assembly (amd64 and arm64), ChaCha8 runs at about the same
speed as PCG (25% slower on amd64, 2% faster on arm64).

Obviously all the claimed benchmark variation other than the
new ChaCha8 benchmark is a lie.

goos: linux
goarch: amd64
pkg: math/rand/v2
cpu: AMD Ryzen 9 7950X 16-Core Processor
                        │ afa459a2f0.amd64 │          bbb48afeb7.amd64           │
                        │      sec/op      │    sec/op     vs base               │
PCG_DXSM-32                    1.488n ± 2%    1.492n ± 2%       ~ (p=0.309 n=20)
ChaCha8-32                                    1.861n ± 2%
SourceUint64-32                1.450n ± 3%    1.590n ± 2%  +9.69% (p=0.000 n=20)
GlobalInt64-32                 2.067n ± 2%    2.061n ± 1%       ~ (p=0.952 n=20)
GlobalInt64Parallel-32        0.1044n ± 2%   0.1041n ± 1%       ~ (p=0.498 n=20)
GlobalUint64-32                2.085n ± 0%    2.256n ± 2%  +8.23% (p=0.000 n=20)
GlobalUint64Parallel-32       0.1008n ± 1%   0.1018n ± 1%       ~ (p=0.041 n=20)
Int64-32                       1.779n ± 1%    1.779n ± 1%       ~ (p=0.410 n=20)
Uint64-32                      1.854n ± 2%    1.882n ± 1%       ~ (p=0.044 n=20)
GlobalIntN1000-32              3.140n ± 3%    3.115n ± 3%       ~ (p=0.673 n=20)
IntN1000-32                    2.496n ± 1%    2.509n ± 1%       ~ (p=0.171 n=20)
Int64N1000-32                  2.510n ± 2%    2.493n ± 1%       ~ (p=0.804 n=20)
Int64N1e8-32                   2.471n ± 2%    2.521n ± 1%  +1.98% (p=0.003 n=20)
Int64N1e9-32                   2.488n ± 2%    2.506n ± 1%       ~ (p=0.663 n=20)
Int64N2e9-32                   2.478n ± 2%    2.482n ± 2%       ~ (p=0.533 n=20)
Int64N1e18-32                  3.088n ± 1%    3.216n ± 1%  +4.15% (p=0.000 n=20)
Int64N2e18-32                  3.493n ± 1%    3.635n ± 2%  +4.05% (p=0.000 n=20)
Int64N4e18-32                  5.060n ± 2%    5.122n ± 1%  +1.22% (p=0.000 n=20)
Int32N1000-32                  2.620n ± 1%    2.672n ± 1%  +2.00% (p=0.002 n=20)
Int32N1e8-32                   2.652n ± 0%    2.646n ± 1%       ~ (p=0.743 n=20)
Int32N1e9-32                   2.644n ± 1%    2.660n ± 2%       ~ (p=0.163 n=20)
Int32N2e9-32                   2.619n ± 2%    2.652n ± 1%       ~ (p=0.132 n=20)
Float32-32                     2.261n ± 1%    2.267n ± 1%       ~ (p=0.516 n=20)
Float64-32                     2.241n ± 2%    2.276n ± 1%       ~ (p=0.080 n=20)
ExpFloat64-32                  3.716n ± 1%    3.779n ± 1%  +1.68% (p=0.007 n=20)
NormFloat64-32                 3.718n ± 1%    3.747n ± 1%       ~ (p=0.011 n=20)
Perm3-32                       34.11n ± 2%    34.23n ± 2%       ~ (p=0.779 n=20)
Perm30-32                      200.6n ± 0%    202.3n ± 2%       ~ (p=0.055 n=20)
Perm30ViaShuffle-32            109.7n ± 1%    115.5n ± 2%  +5.34% (p=0.000 n=20)
ShuffleOverhead-32             107.2n ± 1%    113.3n ± 1%  +5.74% (p=0.000 n=20)
Concurrent-32                  2.108n ± 6%    2.107n ± 1%       ~ (p=0.448 n=20)

goos: darwin
goarch: arm64
pkg: math/rand/v2
cpu: Apple M1
                       │ afa459a2f0.arm64 │          bbb48afeb7.arm64           │
                       │      sec/op      │    sec/op     vs base               │
PCG_DXSM-8                    2.531n ± 0%    2.529n ± 0%       ~ (p=0.586 n=20)
ChaCha8-8                                    2.480n ± 0%
SourceUint64-8                2.531n ± 0%    2.534n ± 0%       ~ (p=0.227 n=20)
GlobalInt64-8                 2.177n ± 1%    2.173n ± 1%       ~ (p=0.733 n=20)
GlobalInt64Parallel-8        0.4319n ± 0%   0.4304n ± 0%  -0.32% (p=0.003 n=20)
GlobalUint64-8                2.185n ± 1%    2.185n ± 0%       ~ (p=0.541 n=20)
GlobalUint64Parallel-8       0.4295n ± 1%   0.4294n ± 0%       ~ (p=0.203 n=20)
Int64-8                       4.104n ± 0%    4.107n ± 0%       ~ (p=0.193 n=20)
Uint64-8                      4.080n ± 0%    4.081n ± 0%       ~ (p=0.053 n=20)
GlobalIntN1000-8              2.814n ± 1%    2.814n ± 0%       ~ (p=0.879 n=20)
IntN1000-8                    4.140n ± 0%    4.141n ± 0%       ~ (p=0.428 n=20)
Int64N1000-8                  4.139n ± 0%    4.140n ± 0%       ~ (p=0.114 n=20)
Int64N1e8-8                   4.140n ± 0%    4.140n ± 0%       ~ (p=0.898 n=20)
Int64N1e9-8                   4.139n ± 0%    4.140n ± 0%       ~ (p=0.593 n=20)
Int64N2e9-8                   4.140n ± 0%    4.139n ± 0%       ~ (p=0.158 n=20)
Int64N1e18-8                  5.273n ± 0%    5.274n ± 0%       ~ (p=0.308 n=20)
Int64N2e18-8                  6.059n ± 0%    6.058n ± 0%       ~ (p=0.053 n=20)
Int64N4e18-8                  8.803n ± 0%    8.800n ± 0%       ~ (p=0.673 n=20)
Int32N1000-8                  4.131n ± 0%    4.131n ± 0%       ~ (p=0.342 n=20)
Int32N1e8-8                   4.131n ± 0%    4.131n ± 0%       ~ (p=0.091 n=20)
Int32N1e9-8                   4.131n ± 0%    4.131n ± 0%       ~ (p=0.273 n=20)
Int32N2e9-8                   4.131n ± 0%    4.131n ± 0%       ~ (p=0.425 n=20)
Float32-8                     4.110n ± 0%    4.112n ± 0%       ~ (p=0.203 n=20)
Float64-8                     4.104n ± 0%    4.106n ± 0%       ~ (p=0.409 n=20)
ExpFloat64-8                  5.338n ± 0%    5.339n ± 0%       ~ (p=0.037 n=20)
NormFloat64-8                 5.731n ± 0%    5.733n ± 0%       ~ (p=0.692 n=20)
Perm3-8                       26.62n ± 0%    26.65n ± 0%  +0.09% (p=0.000 n=20)
Perm30-8                      194.6n ± 2%    194.9n ± 0%       ~ (p=0.141 n=20)
Perm30ViaShuffle-8            156.4n ± 0%    156.5n ± 0%  +0.06% (p=0.000 n=20)
ShuffleOverhead-8             125.8n ± 0%    125.0n ± 0%  -0.64% (p=0.000 n=20)
Concurrent-8                  2.654n ± 6%    2.441n ± 6%  -8.06% (p=0.009 n=20)

goos: linux
goarch: 386
pkg: math/rand/v2
cpu: AMD Ryzen 9 7950X 16-Core Processor
                        │ afa459a2f0.386 │            bbb48afeb7.386            │
                        │     sec/op     │    sec/op      vs base               │
PCG_DXSM-32                  7.793n ± 2%    7.647n ±  1%       ~ (p=0.021 n=20)
ChaCha8-32                                  11.48n ±  2%
SourceUint64-32              7.680n ± 1%    7.714n ±  1%       ~ (p=0.713 n=20)
GlobalInt64-32               3.474n ± 3%    3.491n ± 28%       ~ (p=0.337 n=20)
GlobalInt64Parallel-32      0.3253n ± 0%   0.3194n ±  0%  -1.81% (p=0.000 n=20)
GlobalUint64-32              3.433n ± 2%    3.610n ±  2%  +5.14% (p=0.000 n=20)
GlobalUint64Parallel-32     0.3156n ± 0%   0.3164n ±  0%       ~ (p=0.073 n=20)
Int64-32                     7.707n ± 1%    7.824n ±  0%  +1.52% (p=0.005 n=20)
Uint64-32                    7.714n ± 1%    7.732n ±  2%       ~ (p=0.441 n=20)
GlobalIntN1000-32            6.236n ± 1%    6.176n ±  2%       ~ (p=0.499 n=20)
IntN1000-32                  10.41n ± 1%    10.31n ±  2%       ~ (p=0.782 n=20)
Int64N1000-32                10.97n ± 2%    11.22n ±  2%  +2.19% (p=0.002 n=20)
Int64N1e8-32                 10.98n ± 1%    11.07n ±  1%       ~ (p=0.056 n=20)
Int64N1e9-32                 10.95n ± 0%    11.15n ±  2%       ~ (p=0.016 n=20)
Int64N2e9-32                 11.11n ± 1%    11.00n ±  1%       ~ (p=0.654 n=20)
Int64N1e18-32                15.18n ± 2%    14.97n ±  2%       ~ (p=0.387 n=20)
Int64N2e18-32                15.61n ± 1%    15.91n ±  1%  +1.92% (p=0.003 n=20)
Int64N4e18-32                19.23n ± 2%    18.98n ±  1%       ~ (p=1.000 n=20)
Int32N1000-32                10.35n ± 1%    10.31n ±  2%       ~ (p=0.081 n=20)
Int32N1e8-32                 10.33n ± 1%    10.38n ±  1%       ~ (p=0.335 n=20)
Int32N1e9-32                 10.35n ± 1%    10.37n ±  1%       ~ (p=0.497 n=20)
Int32N2e9-32                 10.35n ± 1%    10.41n ±  1%       ~ (p=0.605 n=20)
Float32-32                   13.57n ± 1%    13.78n ±  2%       ~ (p=0.047 n=20)
Float64-32                   22.95n ± 4%    23.43n ±  3%       ~ (p=0.218 n=20)
ExpFloat64-32                15.23n ± 2%    15.46n ±  1%       ~ (p=0.095 n=20)
NormFloat64-32               13.78n ± 1%    13.73n ±  2%       ~ (p=0.031 n=20)
Perm3-32                     46.62n ± 2%    47.46n ±  2%  +1.82% (p=0.004 n=20)
Perm30-32                    400.7n ± 1%    403.5n ±  1%       ~ (p=0.098 n=20)
Perm30ViaShuffle-32          350.5n ± 1%    348.1n ±  2%       ~ (p=0.703 n=20)
ShuffleOverhead-32           326.0n ± 2%    326.2n ±  2%       ~ (p=0.440 n=20)
Concurrent-32                3.290n ± 0%    3.297n ±  4%       ~ (p=0.189 n=20)

For #61716.

Change-Id: Id2a7e1c1db0beb81f563faaefba65fe292497269
Reviewed-on: https://go-review.googlesource.com/c/go/+/516859
LUCI-TryBot-Result: Go LUCI <golang-scoped@luci-project-accounts.iam.gserviceaccount.com>
Reviewed-by: Filippo Valsorda <filippo@golang.org>
Reviewed-by: Heschi Kreinick <heschi@google.com>
  • Loading branch information
rsc committed Nov 19, 2023
1 parent 06145fe commit 6382893
Show file tree
Hide file tree
Showing 11 changed files with 1,479 additions and 3 deletions.
6 changes: 6 additions & 0 deletions api/next/61716.txt
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@ pkg math/rand/v2, func Int64N(int64) int64 #61716
pkg math/rand/v2, func IntN(int) int #61716
pkg math/rand/v2, func N[$0 intType]($0) $0 #61716
pkg math/rand/v2, func New(Source) *Rand #61716
pkg math/rand/v2, func NewChaCha8([32]uint8) *ChaCha8 #61716
pkg math/rand/v2, func NewPCG(uint64, uint64) *PCG #61716
pkg math/rand/v2, func NewZipf(*Rand, float64, float64, uint64) *Zipf #61716
pkg math/rand/v2, func NormFloat64() float64 #61716
Expand All @@ -19,6 +20,10 @@ pkg math/rand/v2, func Uint32N(uint32) uint32 #61716
pkg math/rand/v2, func Uint64() uint64 #61716
pkg math/rand/v2, func Uint64N(uint64) uint64 #61716
pkg math/rand/v2, func UintN(uint) uint #61716
pkg math/rand/v2, method (*ChaCha8) MarshalBinary() ([]uint8, error) #61716
pkg math/rand/v2, method (*ChaCha8) Seed([32]uint8) #61716
pkg math/rand/v2, method (*ChaCha8) Uint64() uint64 #61716
pkg math/rand/v2, method (*ChaCha8) UnmarshalBinary([]uint8) error #61716
pkg math/rand/v2, method (*PCG) MarshalBinary() ([]uint8, error) #61716
pkg math/rand/v2, method (*PCG) Seed(uint64, uint64) #61716
pkg math/rand/v2, method (*PCG) Uint64() uint64 #61716
Expand All @@ -41,6 +46,7 @@ pkg math/rand/v2, method (*Rand) Uint64() uint64 #61716
pkg math/rand/v2, method (*Rand) Uint64N(uint64) uint64 #61716
pkg math/rand/v2, method (*Rand) UintN(uint) uint #61716
pkg math/rand/v2, method (*Zipf) Uint64() uint64 #61716
pkg math/rand/v2, type ChaCha8 struct #61716
pkg math/rand/v2, type PCG struct #61716
pkg math/rand/v2, type Rand struct #61716
pkg math/rand/v2, type Source interface { Uint64 } #61716
Expand Down
13 changes: 10 additions & 3 deletions src/go/build/deps_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -55,10 +55,17 @@ var depsRules = `
unsafe < maps;
unsafe < internal/chacha8rand;
# RUNTIME is the core runtime group of packages, all of them very light-weight.
internal/abi, internal/cpu, internal/goarch,
internal/coverage/rtcov, internal/godebugs, internal/goexperiment,
internal/goos, unsafe
internal/abi,
internal/chacha8rand,
internal/coverage/rtcov,
internal/cpu,
internal/goarch,
internal/godebugs,
internal/goexperiment,
internal/goos
< internal/bytealg
< internal/itoa
< internal/unsafeheader
Expand Down
175 changes: 175 additions & 0 deletions src/internal/chacha8rand/chacha8.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,175 @@
// Copyright 2023 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.

// Package chacha8rand implements a pseudorandom generator
// based on ChaCha8. It is used by both runtime and math/rand/v2
// and must have no dependencies.
package chacha8rand

import "unsafe"

const (
ctrInc = 4 // increment counter by 4 between block calls
ctrMax = 16 // reseed when counter reaches 16
chunk = 32 // each chunk produced by block is 32 uint64s
reseed = 4 // reseed with 4 words
)

// block is the chacha8rand block function.
func block(seed *[4]uint64, blocks *[32]uint64, counter uint32)

// A State holds the state for a single random generator.
// It must be used from one goroutine at a time.
// If used by multiple goroutines at a time, the goroutines
// may see the same random values, but the code will not
// crash or cause out-of-bounds memory accesses.
type State struct {
buf [32]uint64
seed [4]uint64
i uint32
n uint32
c uint32
}

// Next returns the next random value, along with a boolean
// indicating whether one was available.
// If one is not available, the caller should call Refill
// and then repeat the call to Next.
//
// Next is //go:nosplit to allow its use in the runtime
// with per-m data without holding the per-m lock.
//go:nosplit
func (s *State) Next() (uint64, bool) {
i := s.i
if i >= s.n {
return 0, false
}
s.i = i + 1
return s.buf[i&31], true // i&31 eliminates bounds check
}

// Init seeds the State with the given seed value.
func (s *State) Init(seed [32]byte) {
s.Init64(*(*[4]uint64)(unsafe.Pointer(&seed)))
}

// Init64 seeds the state with the given seed value.
func (s *State) Init64(seed [4]uint64) {
s.seed = seed
block(&s.seed, &s.buf, 0)
s.c = 0
s.i = 0
s.n = chunk
}

// Refill refills the state with more random values.
// After a call to Refill, an immediate call to Next will succeed
// (unless multiple goroutines are incorrectly sharing a state).
func (s *State) Refill() {
s.c += ctrInc
if s.c == ctrMax {
// Reseed with generated uint64s for forward secrecy.
// Normally this is done immediately after computing a block,
// but we do it immediately before computing the next block,
// to allow a much smaller serialized state (just the seed plus offset).
// This gives a delayed benefit for the forward secrecy
// (you can reconstruct the recent past given a memory dump),
// which we deem acceptable in exchange for the reduced size.
s.seed[0] = s.buf[len(s.buf)-reseed+0]
s.seed[1] = s.buf[len(s.buf)-reseed+1]
s.seed[2] = s.buf[len(s.buf)-reseed+2]
s.seed[3] = s.buf[len(s.buf)-reseed+3]
s.c = 0
}
block(&s.seed, &s.buf, s.c)
s.i = 0
s.n = uint32(len(s.buf))
if s.c == ctrMax-ctrInc {
s.n = uint32(len(s.buf)) - reseed
}
}

// Marshal marshals the state into a byte slice.
// Marshal and Unmarshal are functions, not methods,
// so that they will not be linked into the runtime
// when it uses the State struct, since the runtime
// does not need these.
func Marshal(s *State) []byte {
data := make([]byte, 6*8)
copy(data, "chacha8:")
used := (s.c/ctrInc)*chunk + s.i
bePutUint64(data[1*8:], uint64(used))
for i, seed := range s.seed {
lePutUint64(data[(2+i)*8:], seed)
}
return data
}

type errUnmarshalChaCha8 struct{}

func (*errUnmarshalChaCha8) Error() string {
return "invalid ChaCha8 encoding"
}

// Unmarshal unmarshals the state from a byte slice.
func Unmarshal(s *State, data []byte) error {
if len(data) != 6*8 || string(data[:8]) != "chacha8:" {
return new(errUnmarshalChaCha8)
}
used := beUint64(data[1*8:])
if used > (ctrMax/ctrInc)*chunk-reseed {
return new(errUnmarshalChaCha8)
}
for i := range s.seed {
s.seed[i] = leUint64(data[(2+i)*8:])
}
s.c = ctrInc * (uint32(used) / chunk)
block(&s.seed, &s.buf, s.c)
s.i = uint32(used) % chunk
s.n = chunk
if s.c == ctrMax-ctrInc {
s.n = chunk - reseed
}
return nil
}

// binary.bigEndian.Uint64, copied to avoid dependency
func beUint64(b []byte) uint64 {
_ = b[7] // bounds check hint to compiler; see golang.org/issue/14808
return uint64(b[7]) | uint64(b[6])<<8 | uint64(b[5])<<16 | uint64(b[4])<<24 |
uint64(b[3])<<32 | uint64(b[2])<<40 | uint64(b[1])<<48 | uint64(b[0])<<56
}

// binary.bigEndian.PutUint64, copied to avoid dependency
func bePutUint64(b []byte, v uint64) {
_ = b[7] // early bounds check to guarantee safety of writes below
b[0] = byte(v >> 56)
b[1] = byte(v >> 48)
b[2] = byte(v >> 40)
b[3] = byte(v >> 32)
b[4] = byte(v >> 24)
b[5] = byte(v >> 16)
b[6] = byte(v >> 8)
b[7] = byte(v)
}

// binary.littleEndian.Uint64, copied to avoid dependency
func leUint64(b []byte) uint64 {
_ = b[7] // bounds check hint to compiler; see golang.org/issue/14808
return uint64(b[0]) | uint64(b[1])<<8 | uint64(b[2])<<16 | uint64(b[3])<<24 |
uint64(b[4])<<32 | uint64(b[5])<<40 | uint64(b[6])<<48 | uint64(b[7])<<56
}

// binary.littleEndian.PutUint64, copied to avoid dependency
func lePutUint64(b []byte, v uint64) {
_ = b[7] // early bounds check to guarantee safety of writes below
b[0] = byte(v)
b[1] = byte(v >> 8)
b[2] = byte(v >> 16)
b[3] = byte(v >> 24)
b[4] = byte(v >> 32)
b[5] = byte(v >> 40)
b[6] = byte(v >> 48)
b[7] = byte(v >> 56)
}
Loading

0 comments on commit 6382893

Please sign in to comment.