Uint - allow compile-time evaluation for all procs #54

mratsim · 2018-06-17T14:50:07Z

This PR allows compile-time evaluation for Uint for all procs (including +, *, div, mod, or, and, xor, ...).
Ints are not covered.

Implementation

What didn't work

Changing Stint backend to C arrays, which I tried yesterday in another branch led to much too many issues, nim-lang/Nim#8052, nim-lang/Nim#8053 until I was stuck with how to slice half of the array to do a simple +=: foo.lo += bar.lo

nim-stint/stint/private/datatypes.nim

Lines 49 to 155 in fec2190

    
           type 
        
             # ### Private ### # 
        
             UintImpl*[N: static[int], T: SomeUnsignedInt] = object 
        
               raw_data*: array[N, T] 
        
             IntImpl*[N: static[int], T: SomeUnsignedInt] = object 
        
               raw_data*: array[N, T] 
        
             # ### Private ### # 
        
             StUint*[bits: static[int]] = object 
        
               data*: stintImpl(bits, UintImpl) 
        
             StInt*[bits: static[int]] = object 
        
               data*: stintImpl(bits, IntImpl) 
        
             AnyImpl*[N: static[int], T: SomeUnsignedInt] = UintImpl[N, T] or IntImpl[N, T] 
        
           # ################################# 
        
           func getSize*[N: static[int], T: SomeUnsignedInt](x: AnyImpl[N,T]): static[int] = 
        
             ## Get size of int or uint implementation in bits 
        
             N * T.sizeof * 8 
        
           # ###### lo and hi accessors ###### 
        
           # macro used as workaround because template crashes - https://github.com/nim-lang/Nim/issues/8052 
        
           macro loImpl(dst, src: untyped, N: static[int]): untyped = 
        
             assert N >= 2 
        
             result = quote do: 
        
               const halfSize = `N` div 2 
        
               when cpuEndian == littleEndian: 
        
                 for i in 0 ..< halfSize: 
        
                   {.unroll.} 
        
                   `dst`.raw_data[i] = `src`.raw_data[i] 
        
               else: 
        
                 for i in halfSize ..< `N`: 
        
                   {.unroll.} 
        
                   `dst`.raw_data[i] = `src`.raw_data[i] 
        
           macro hiImpl(dst, src: untyped, N: static[int]): untyped = 
        
             assert N >= 2 
        
             result = quote do: 
        
               const halfSize = `N` div 2 
        
               when cpuEndian == littleEndian: 
        
                 for i in halfSize ..< `N`: 
        
                   {.unroll.} 
        
                   `dst`.raw_data[i] = `src`.raw_data[i] 
        
               else: 
        
                 for i in 0 ..< halfSize: 
        
                   {.unroll.} 
        
                   `dst`.raw_data[i] = `src`.raw_data[i] 
        
           proc loProc[N: static[int], T: SomeUnsignedInt](x: AnyImpl[N, T]): AnyImpl[N div 2, T] {.inline.}= 
        
             loImpl(result, x, N) 
        
           proc hiProc[N: static[int], T: SomeUnsignedInt](x: AnyImpl[N, T]): AnyImpl[N div 2, T] {.inline.}= 
        
             hiImpl(result, x, N) 
        
           proc loProc[N: static[int], T: SomeUnsignedInt](x: var AnyImpl[N, T]): var AnyImpl[N div 2, T] {.inline.}= 
        
             loImpl(result, x, N) 
        
           proc hiProc[N: static[int], T: SomeUnsignedInt](x: var AnyImpl[N, T]): var AnyImpl[N div 2, T] {.inline.}= 
        
             hiImpl(result, x, N) 
        
           macro lo*[N: static[int], T: SomeUnsignedInt](x: AnyImpl[N,T]): untyped= 
        
             ## Get the low part of an unsigned integer 
        
             # TODO: as this is called extensively and it is not a field 
        
             #       we should make sure repeated calls are optimized away by the compiler 
        
             if N == 1: 
        
               result = quote do: `x`.raw_data[0] 
        
             elif N > 1: 
        
               let loProc = bindSym"loProc" 
        
               result = quote do: `loProc`(`x`) 
        
             else: 
        
               result = quote do: {.fatal: "Unreachable".} 
        
           macro `lo=`*[N: static[int], T: SomeUnsignedInt](dst: var AnyImpl[N,T], src: AnyImpl[N div 2, T]): untyped = 
        
             ## Get the mutable part of an unsigned integer 
        
             if N == 1: 
        
               result = quote do: `dst`.raw_data[0] = `src`.raw_data[0] 
        
             elif N > 1: 
        
               result = getAST(loImpl(dst, src, N)) 
        
             else: 
        
               result = quote do: {.fatal: "Unreachable".} 
        
           macro hi*[N: static[int], T: SomeUnsignedInt](x: AnyImpl[N,T]): untyped = 
        
             ## Get the high part of an unsigned integer 
        
             # TODO: as this is called extensively and it is not a field 
        
             #       we should make sure repeated calls are optimized away by the compiler 
        
             if N == 1: 
        
               result = quote do: `x`.raw_data[0] 
        
             elif N > 1: 
        
               let hiProc = bindSym"hiProc" 
        
               result = quote do: `hiProc`(`x`) 
        
             else: 
        
               result = quote do: {.fatal: "Unreachable".} 
        
           macro `hi=`*[N: static[int], T: SomeUnsignedInt](dst: var AnyImpl[N,T], src: AnyImpl[N div 2, T]): untyped = 
        
             ## Get the high part of an unsigned integer 
        
             if N == 1: 
        
               result = quote do: `dst`.raw_data[0] = `src`.raw_data[0] 
        
             elif N > 1: 
        
               result = getAST(hiImpl(dst, src, N)) 
        
             else: 
        
               result = quote do: {.fatal: "Unreachable".}

What worked

I implemented a macro that gets all the "leaves" of a uint or int in big endian order, so for a uint256 I get [foo.hi.hi, foo.hi.lo, foo.lo.hi, foo.lo.lo].

I replaced all the asWords, asWordsZip, m_asWordsZip macros by a single asWords macro using the new ForLoopStmt which has the benefits of using the clearer for foo, bar in x, y and saving 160 lines of code.

Internally the new asWords just replace the iteration variables by foo.hi.hi, foo.hi.lo, ... in sequence

Reserves

Due to how it's working, the implementation always unrolls loops for GCC. This may lead to code bloat. For Uint256, there is probably no difference between:
```
foo.hi.hi = not bar.hi.hi
foo.hi.lo = not bar.hi.lo
foo.lo.hi = not bar.lo.hi
foo.lo.lo = not bar.lo.lo
```
And
```
for i in 0 ..< 4:
  foo.wordArray[i] = not bar.wordArray[i]
```
but there Uint2048 used in Ethereum bloom filter might be a bit heavy. GCC has an option to reroll loops if needed.
Iteration is always done from most significant word to least significant word, i.e. there is no ignoreEndianness parameter. On littleEndian, this would iterate backward and may leave performance on the table for proc with no loop order dependency.

For example on old Core architecture, the prefetcher could work with up to 12 forward streams but only 4 backward streams:

on Sandy Bridge, the prefetcher has no preference

.

I am not sure on ARM. Note that the compiler is probably able to reorder operations or use vectorized operations (ARM Neon, AVX2) if there is no loop order dependency so it may not matter at all.

Benchmark

I updated the benchmark to make sure we didn't have a perf regression, the change brings about 15% perf improvement on my machine.

Javascript

Compile-time evaluation works but Javascript display at least doesn't work, uint256 are printed with hundreds of leading zeros.

Nim issues

Working with types in macro is a pain, especially this bug nim-lang/Nim#7737. There are also many tricks to know about to get proper type resolution, like storing idents in a nnkArgList and not nnkBracket or nnkPar if we want to pass them to another macro for further processing.

…the workaround (and is tracked in the PR)

coffeepots

LGTM. AsWords is a nice change.

yglukhov

👍

mratsim added 3 commits June 17, 2018 15:48

Initial commit - allow all ops at compile-time on uint.

f53d420

Update benchmark

10fc21f

Delete commented ideal version of asWordsiterate, it is obvious from …

3d1cb3f

…the workaround (and is tracked in the PR)

mratsim requested review from yglukhov, zah and coffeepots June 17, 2018 15:03

coffeepots approved these changes Jun 18, 2018

View reviewed changes

yglukhov approved these changes Jun 18, 2018

View reviewed changes

mratsim merged commit a46c62b into master Jun 18, 2018

mratsim deleted the compile-time-js-compat-v2 branch July 6, 2018 11:34

mratsim mentioned this pull request Aug 24, 2018

Post Mortem for {.experimental: "ForLoopMacros".} status-im/nimbus-eth1#120

Closed

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

Uint - allow compile-time evaluation for all procs #54

Uint - allow compile-time evaluation for all procs #54

mratsim commented Jun 17, 2018 •

edited

Loading

coffeepots left a comment

yglukhov left a comment

	type
	# ### Private ### #
	UintImpl*[N: static[int], T: SomeUnsignedInt] = object
	raw_data*: array[N, T]

	IntImpl*[N: static[int], T: SomeUnsignedInt] = object
	raw_data*: array[N, T]
	# ### Private ### #

	StUint*[bits: static[int]] = object
	data*: stintImpl(bits, UintImpl)

	StInt*[bits: static[int]] = object
	data*: stintImpl(bits, IntImpl)

	AnyImpl*[N: static[int], T: SomeUnsignedInt] = UintImpl[N, T] or IntImpl[N, T]

	# #################################

	func getSize*[N: static[int], T: SomeUnsignedInt](x: AnyImpl[N,T]): static[int] =
	## Get size of int or uint implementation in bits
	N * T.sizeof * 8

	# ###### lo and hi accessors ######
	# macro used as workaround because template crashes - https://github.com/nim-lang/Nim/issues/8052

	macro loImpl(dst, src: untyped, N: static[int]): untyped =
	assert N >= 2

	result = quote do:
	const halfSize = `N` div 2
	when cpuEndian == littleEndian:
	for i in 0 ..< halfSize:
	{.unroll.}
	`dst`.raw_data[i] = `src`.raw_data[i]
	else:
	for i in halfSize ..< `N`:
	{.unroll.}
	`dst`.raw_data[i] = `src`.raw_data[i]

	macro hiImpl(dst, src: untyped, N: static[int]): untyped =
	assert N >= 2

	result = quote do:
	const halfSize = `N` div 2
	when cpuEndian == littleEndian:
	for i in halfSize ..< `N`:
	{.unroll.}
	`dst`.raw_data[i] = `src`.raw_data[i]
	else:
	for i in 0 ..< halfSize:
	{.unroll.}
	`dst`.raw_data[i] = `src`.raw_data[i]

	proc loProc[N: static[int], T: SomeUnsignedInt](x: AnyImpl[N, T]): AnyImpl[N div 2, T] {.inline.}=
	loImpl(result, x, N)

	proc hiProc[N: static[int], T: SomeUnsignedInt](x: AnyImpl[N, T]): AnyImpl[N div 2, T] {.inline.}=
	hiImpl(result, x, N)

	proc loProc[N: static[int], T: SomeUnsignedInt](x: var AnyImpl[N, T]): var AnyImpl[N div 2, T] {.inline.}=
	loImpl(result, x, N)

	proc hiProc[N: static[int], T: SomeUnsignedInt](x: var AnyImpl[N, T]): var AnyImpl[N div 2, T] {.inline.}=
	hiImpl(result, x, N)

	macro lo*[N: static[int], T: SomeUnsignedInt](x: AnyImpl[N,T]): untyped=
	## Get the low part of an unsigned integer
	# TODO: as this is called extensively and it is not a field
	# we should make sure repeated calls are optimized away by the compiler
	if N == 1:
	result = quote do: `x`.raw_data[0]
	elif N > 1:
	let loProc = bindSym"loProc"
	result = quote do: `loProc`(`x`)
	else:
	result = quote do: {.fatal: "Unreachable".}

	macro `lo=`*[N: static[int], T: SomeUnsignedInt](dst: var AnyImpl[N,T], src: AnyImpl[N div 2, T]): untyped =
	## Get the mutable part of an unsigned integer
	if N == 1:
	result = quote do: `dst`.raw_data[0] = `src`.raw_data[0]
	elif N > 1:
	result = getAST(loImpl(dst, src, N))
	else:
	result = quote do: {.fatal: "Unreachable".}

	macro hi*[N: static[int], T: SomeUnsignedInt](x: AnyImpl[N,T]): untyped =
	## Get the high part of an unsigned integer
	# TODO: as this is called extensively and it is not a field
	# we should make sure repeated calls are optimized away by the compiler
	if N == 1:
	result = quote do: `x`.raw_data[0]
	elif N > 1:
	let hiProc = bindSym"hiProc"
	result = quote do: `hiProc`(`x`)
	else:
	result = quote do: {.fatal: "Unreachable".}

	macro `hi=`*[N: static[int], T: SomeUnsignedInt](dst: var AnyImpl[N,T], src: AnyImpl[N div 2, T]): untyped =
	## Get the high part of an unsigned integer
	if N == 1:
	result = quote do: `dst`.raw_data[0] = `src`.raw_data[0]
	elif N > 1:
	result = getAST(hiImpl(dst, src, N))
	else:
	result = quote do: {.fatal: "Unreachable".}

Uint - allow compile-time evaluation for all procs #54

Uint - allow compile-time evaluation for all procs #54

Conversation

mratsim commented Jun 17, 2018 • edited Loading

Implementation

What didn't work

What worked

Reserves

Benchmark

Javascript

Nim issues

coffeepots left a comment

Choose a reason for hiding this comment

yglukhov left a comment

Choose a reason for hiding this comment

mratsim commented Jun 17, 2018 •

edited

Loading