diff --git a/include/tvm/relay/attrs/random.h b/include/tvm/relay/attrs/random.h new file mode 100644 index 000000000000..8238f102dab8 --- /dev/null +++ b/include/tvm/relay/attrs/random.h @@ -0,0 +1,42 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +/*! + * \file tvm/relay/attrs/vision.h + * \brief Auxiliary attributes for random operators. + */ +#ifndef TVM_RELAY_ATTRS_RANDOM_H_ +#define TVM_RELAY_ATTRS_RANDOM_H_ + +#include + +namespace tvm { +namespace relay { + +struct ThreefryGenerateAttrs : public tvm::AttrsNode { + Array out_shape; + + TVM_DECLARE_ATTRS(ThreefryGenerateAttrs, "relay.attrs.ThreefryGenerateAttrs") { + TVM_ATTR_FIELD(out_shape).describe("Shape of random numbers to generate"); + } +}; + +} // namespace relay +} // namespace tvm +#endif // TVM_RELAY_ATTRS_RANDOM_H_ diff --git a/python/tvm/relay/__init__.py b/python/tvm/relay/__init__.py index cd96ecc7ee33..97f6d1cb60c0 100644 --- a/python/tvm/relay/__init__.py +++ b/python/tvm/relay/__init__.py @@ -45,6 +45,7 @@ from .op import vision from .op import contrib from .op import dyn +from .op import random from .op.reduce import * from .op.tensor import * from .op.transform import * diff --git a/python/tvm/relay/op/__init__.py b/python/tvm/relay/op/__init__.py index f6afa443d280..1f267abedc1a 100644 --- a/python/tvm/relay/op/__init__.py +++ b/python/tvm/relay/op/__init__.py @@ -43,6 +43,7 @@ from . import image from . import vision from . import op_attrs +from . import random # operator registry diff --git a/python/tvm/relay/op/algorithm.py b/python/tvm/relay/op/algorithm.py index 99140fcb3e11..6fd5c0645eed 100644 --- a/python/tvm/relay/op/algorithm.py +++ b/python/tvm/relay/op/algorithm.py @@ -17,9 +17,9 @@ """Classic algorithm operation""" from __future__ import absolute_import as _abs +from ..expr import Constant, Expr, TupleWrapper from . import _make from .dyn import _make as _dyn_make -from ..expr import TupleWrapper, Expr, Constant def sort(data, axis=-1, is_ascend=1): diff --git a/python/tvm/relay/op/op_attrs.py b/python/tvm/relay/op/op_attrs.py index cb837b192a6c..41076817b374 100644 --- a/python/tvm/relay/op/op_attrs.py +++ b/python/tvm/relay/op/op_attrs.py @@ -552,3 +552,8 @@ class SpaceToBatchNDAttrs(Attrs): @tvm._ffi.register_object("relay.attrs.BatchToSpaceNDAttrs") class BatchToSpaceNDAttrs(Attrs): """Attributes used in BatchToSpaceNDAttrs operators""" + + +@tvm._ffi.register_object("relay.attrs.ThreefryGenerateAttrs") +class ThreefryGenerateAttrs(Attrs): + """Attributes used in ThreefryGenerateAttrs operators""" diff --git a/python/tvm/relay/op/random/__init__.py b/python/tvm/relay/op/random/__init__.py new file mode 100644 index 000000000000..8366f4a06dac --- /dev/null +++ b/python/tvm/relay/op/random/__init__.py @@ -0,0 +1,20 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +# pylint: disable=wildcard-import +"""PRNG related operators.""" +from .kernel import * +from . import _kernel diff --git a/python/tvm/relay/op/random/_kernel.py b/python/tvm/relay/op/random/_kernel.py new file mode 100644 index 000000000000..8be3397008d5 --- /dev/null +++ b/python/tvm/relay/op/random/_kernel.py @@ -0,0 +1,29 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +"""Splittable and parallelizable PRNG kernels.""" +# pylint: disable=invalid-name,unused-argument +from __future__ import absolute_import + +from .. import strategy +from ..op import register_strategy, register_pattern, OpPattern + + +# Threefry +register_strategy("random.threefry_generate", strategy.threefry_generate_strategy) +register_pattern("random.threefry_generate", OpPattern.OPAQUE) +register_strategy("random.threefry_split", strategy.threefry_split_strategy) +register_pattern("random.threefry_split", OpPattern.OPAQUE) diff --git a/python/tvm/relay/op/random/_make.py b/python/tvm/relay/op/random/_make.py new file mode 100644 index 000000000000..51a8a6aa9339 --- /dev/null +++ b/python/tvm/relay/op/random/_make.py @@ -0,0 +1,20 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +"""Constructor APIs""" +import tvm._ffi + +tvm._ffi._init_api("relay.op.random._make", __name__) diff --git a/python/tvm/relay/op/random/kernel.py b/python/tvm/relay/op/random/kernel.py new file mode 100644 index 000000000000..96634943128d --- /dev/null +++ b/python/tvm/relay/op/random/kernel.py @@ -0,0 +1,134 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +"""Splittable and parallelizable PRNG kernels.""" +# pylint: disable=invalid-name,unused-argument +from __future__ import absolute_import + +import sys +import numpy as np + +from ...expr import Constant +from .... import nd +from . import _make + + +def threefry_key(seed): + """Create a new Threefry random number generator key. + + Example + ------- + + .. code-block:: python + + gen = threefry_key(0) + _, random_number = threefry_generate(gen, (4,)) + + Parameters + ---------- + seed : int + Starting seed for the key + + Returns + ------- + key : relay.Expr + New key to pass to future uses of :py:func:`threefry_split` or + :py:func:`threefry_generate`. + """ + s = np.frombuffer(seed.to_bytes(32, sys.byteorder), dtype="uint64") + a = np.concatenate((s, np.array([0, 0, 0, 0, 1 << 63, 0], dtype="uint64"))) + return Constant(nd.array(a)) + + +def threefry_generate(key, shape): + """Generate an array of random bits (`uint64`) using the Threefry algorithm + + Example + ------- + + .. code-block:: python + + key = threefry_key(0) + new_key, random1 = threefry_generate(key, (4,)) + _, random2 = threefry_generate(new_key, (4,)) + # random1 and random2 are different random numbers + + Parameters + ---------- + key : relay.Expr + key that uniquely determines the random values. Multiple uses with the + same key will generate the same random values. This key should be + treated as an opaque pointer. You can create one from calling + :py:func:`threefry_key`, :py:func:`threefry_split`, or + :py:func:`threefry_generate`. **Do not use this key again after calling + this function.** + + shape : Sequence[int] + Desired outputs shape of random numbers. **Currently the total + number of elements must be a multiple of 4.** + + Returns + ------- + new_key : relay.Expr + New key to pass to future uses of :py:func:`threefry_split` or + :py:func:`threefry_generate`. + + random_array : relay.Expr + Array of random numbers. Has shape `shape`. + """ + return _make.threefry_generate(key, shape) + + +def threefry_split(key): + """Split an existing Threefry key into two new ones. + + This is useful if you have to subsequent calls which each need their own + independent random number generation. + + Example + ------- + + .. code-block:: python + + def foo(key): + new_key, num = threefry_generate(key, (4,)) + return num + + key = threefry_key(0) + key1, key2 = threefry_split(key) + assert foo(key1) != foo(key2) + + Parameters + ---------- + key : relay.Expr + key that uniquely determines the random values. Multiple uses with the + same generator will generate the same random values. This generator should be + treated as an opaque pointer. You can create one from calling + :py:func:`threefry_key`, :py:func:`threefry_split`, or + :py:func:`threefry_generate`. **Do not use this generator again after calling + this function.** + + Returns + ------- + new_key_1 : relay.Expr + New key to pass to future uses of :py:func:`threefry_split` or + :py:func:`threefry_generate`. + + new_key_2 : relay.Expr + New key to pass to future uses of :py:func:`threefry_split` or + :py:func:`threefry_generate`. + """ + return _make.threefry_split(key) diff --git a/python/tvm/relay/op/strategy/generic.py b/python/tvm/relay/op/strategy/generic.py index 107534c9b530..fa4853041dfa 100644 --- a/python/tvm/relay/op/strategy/generic.py +++ b/python/tvm/relay/op/strategy/generic.py @@ -1325,3 +1325,47 @@ def argwhere_strategy(attrs, inputs, out_type, target): name="argwhere.generic", ) return strategy + + +# threefry_generate +def wrap_compute_threefry_generate(topi_compute): + """Wrap threefry_generate topi compute""" + + def _compute_threefry_generate(attrs, inputs, _): + return topi_compute(inputs[0], attrs.out_shape) + + return _compute_threefry_generate + + +@override_native_generic_func("threefry_generate_strategy") +def threefry_generate_strategy(attrs, inputs, out_type, target): + """threefry_generate generic strategy""" + strategy = _op.OpStrategy() + strategy.add_implementation( + wrap_compute_threefry_generate(topi.random.threefry_generate), + wrap_topi_schedule(topi.generic.schedule_extern), + name="threefry_generate.generic", + ) + return strategy + + +# threefry_split +def wrap_compute_threefry_split(topi_compute): + """Wrap threefry_split topi compute""" + + def _compute_threefry_split(attrs, inputs, _): + return topi_compute(inputs[0]) + + return _compute_threefry_split + + +@override_native_generic_func("threefry_split_strategy") +def threefry_split_strategy(attrs, inputs, out_type, target): + """threefry_split generic strategy""" + strategy = _op.OpStrategy() + strategy.add_implementation( + wrap_compute_threefry_split(topi.random.threefry_split), + wrap_topi_schedule(topi.generic.schedule_extern), + name="threefry_split.generic", + ) + return strategy diff --git a/python/tvm/topi/__init__.py b/python/tvm/topi/__init__.py index 555717854ed6..05dd36a952f6 100644 --- a/python/tvm/topi/__init__.py +++ b/python/tvm/topi/__init__.py @@ -55,6 +55,7 @@ from . import image from . import sparse from . import hls +from . import random # error reporting from .utils import InvalidShapeError diff --git a/python/tvm/topi/random/__init__.py b/python/tvm/topi/random/__init__.py new file mode 100644 index 000000000000..ee8d1d6385b7 --- /dev/null +++ b/python/tvm/topi/random/__init__.py @@ -0,0 +1,22 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +# pylint: disable=wildcard-import +"""Pseudorandom generator kernels and operators.""" +from __future__ import absolute_import + +from .kernel import * diff --git a/python/tvm/topi/random/kernel.py b/python/tvm/topi/random/kernel.py new file mode 100644 index 000000000000..576fd9254a79 --- /dev/null +++ b/python/tvm/topi/random/kernel.py @@ -0,0 +1,410 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +"""Pseudorandom number kernels.""" +import tvm +import tvm.topi +from ... import tir +from ...tir import ir_builder + + +# Threefry PRNG with splitting based on +# - J. K. Salmon, M. A. Moraes, R. O. Dror and D. E. Shaw, "Parallel random numbers: As easy as 1, +# 2, 3," SC '11: Proceedings of 2011 International Conference for High Performance Computing, +# Networking, Storage and Analysis, Seattle, WA, 2011, pp. 1-12, doi: 10.1145/2063384.2063405. +# - Claessen, K. ; Palka, M. (2013) "Splittable Pseudorandom Number Generators using Cryptographic +# Hashing". Proceedings of Haskell Symposium 2013 pp. 47-58. MLA +# - Ferguson, Niels, et al. "The Skein hash function family." Submission to NIST (round 3) 7.7.5 +# (2010): 3. + + +# Threefry is a counter based PRNG: given a unique input, it generates a unique random number. As +# there is no state to maintain, we can apply it to a sequence of numbers (0..N) to generate a +# sequence of random numbers in parallel. In order to make the PRNG splittable (that is we can +# generate a sequence of random numbers in one place, and another sequence in another), we add a +# path and key in addition to the counter. The path allows us to encode a sequence of splits (a 0 in +# the path indicates the left result of a split, a 1 indicates the right). To avoid continuously +# growing the path, we can compress an existing path into the key portion of the generator by +# hashing the current key, path, and counter to create the new key (this same technique is used if +# we run out of room for the counter). They key is initialized with a unique initial state. +# +# Random numbers are generated by applying the Threefry hash to the current key, path, and counter. + +# This module use encoding e4 from the appendix of "Splittable Pseudorandom Number Generators using +# Cryptographic Hashing" (confusingly, the definition in the paper uses e3 to define the encoding +# function). This encoding uses a 10 element uint64 tensor where each byte means the following: + +# .. code-block: + +# gen: +# words: 0 1 2 3 | 4 5 | 6 7 | 8 9 +# usage: key | path | counter | position of next step in path encoded in binary +# ex: 0b00010 -> next path entry goes one from the right + +# Right now, counter only uses the rightmost word. + +# Threefry rotation constants from the Skein paper ("The Skein Hash Function Family" +# https://www.schneier.com/wp-content/uploads/2015/01/skein.pdf) +_ROTATIONS = { + 4: [[14, 16], [52, 57], [23, 40], [5, 37], [25, 33], [46, 12], [58, 22], [32, 32]], + 8: [ + [46, 36, 19, 37], + [33, 27, 14, 42], + [17, 49, 36, 39], + [44, 9, 54, 56], + [39, 30, 34, 24], + [13, 50, 10, 17], + [25, 29, 39, 43], + [8, 35, 56, 22], + ], + 16: [ + [24, 13, 8, 47, 8, 17, 22, 37], + [38, 19, 10, 55, 49, 18, 23, 52], + [33, 4, 51, 13, 34, 41, 59, 17], + [5, 20, 48, 41, 47, 28, 16, 25], + [41, 9, 37, 31, 12, 47, 44, 30], + [16, 34, 56, 51, 4, 53, 42, 41], + [31, 44, 47, 46, 19, 42, 44, 25], + [9, 48, 35, 52, 23, 31, 37, 20], + ], +} + +# Threefry permutation constants from the Skein paper ("The Skein Hash Function Family" +# https://www.schneier.com/wp-content/uploads/2015/01/skein.pdf) +_PERMUTATIONS = { + 4: [0, 3, 2, 1], + 8: [2, 1, 4, 7, 6, 5, 0, 3], + 16: [0, 9, 2, 13, 6, 11, 4, 15, 10, 7, 12, 3, 14, 5, 8, 1], +} + + +def _threefry( + irb, key_buf, key_offset, counter_buf, counter_offset, out_buf, out_offset, out_shape +): + """IRBuilder code for running Threefry + + Parameters + ---------- + irb: IRBuilder + IRBuilder that this code will be generated for. + + key_buf: BufferVar + Buffer to read the key from. + + key_offset: number + Threefry will write to :code:`key_buf[key_offset:key_offset+4]` + + counter_buf: BufferVar + Buffer to read the counter from. + + counter_offset: number + Threefry will write to :code:`counter_buf[counter_offset:counter_offset+4]` + + out_buf: BufferVar + Buffer to read the counter from. + + out_offset: number + Threefry will write to :code:`out_buf[out_offset:out_offset+4*product(out_shape)]` + + out_shape: number + Determines the number of ouput states to generate. :code:`state[i]` will correspond to + counter+i. + """ + nrounds = 20 + nwords = 4 + iwidth = 64 + assert nrounds % 4 == 0 + assert nwords in [4, 8, 16] + + # The paper has constants for 32 bit threefry, but we keep the implementation simple by only + # using 64-bit words. + assert key_buf.dtype == "uint64", "threefry only supports 64-bit keys" + assert key_buf.dtype == counter_buf.dtype, "threefry key and counter must be the same dtype" + + def mix(a, b, rotation): + x = a + b # TODO should be wrapping + y = x ^ ((b << rotation) | (b >> (iwidth - rotation))) + return [x, y] + + # temporary buffer for holding the results of _PERMUTATIONS + tmp = irb.allocate(out_buf.dtype, out_shape, name="tmp", scope="global") + tmp_offset = 0 + + # Initialize entire key. It is composed of the original key with one + # element appended. The appended element is the xor of all key words plus a + # constant. + full_key = irb.allocate("uint64", nwords + 1, name="full_key", scope="global") + for i in range(nwords): + full_key[i] = key_buf[key_offset + i] + # initial key constant, full_key[nwords] is equivalent to k_{N_W} in the Skein paper. + full_key[nwords] = tvm.tir.const(0x1BD11BDAA9FC1A22, dtype="uint64") + for i in range(nwords): + full_key[nwords] ^= key_buf[key_offset + i] + + with irb.for_range(0, out_shape, dtype="uint64", name="i") as i: + for j in range(nwords): + out_buf[out_offset + i * nwords + j] = counter_buf[counter_offset + j] + i + + def key_schedule(s, i): + # Threefry uses no tweak, so the key schedule is simple + if i == nwords - 1: + return full_key[(s + i) % (nwords + 1)] + tvm.tir.const(s, dtype="uint64") + return full_key[(s + i) % (nwords + 1)] + + with irb.for_range(0, out_shape, name="l") as l: # pylint: disable=invalid-name + for i in range(nrounds // 4): + for j in range(nwords): + out_buf[out_offset + l * nwords + j] += key_schedule(i, j) # TODO wrapping + for k in range(4): + for j in range(nwords // 2): + ( + out_buf[out_offset + l * nwords + j * 2 + 0], + out_buf[out_offset + l * nwords + j * 2 + 1], + ) = mix( + out_buf[out_offset + l * nwords + j * 2 + 0], + out_buf[out_offset + l * nwords + j * 2 + 1], + _ROTATIONS[nwords][(i * 4 + k) % 8][j], + ) + for j in range(nwords): + tmp[tmp_offset + l * nwords + j] = out_buf[ + out_offset + l * nwords + _PERMUTATIONS[nwords][j] + ] + # number of rounds is even, so out always contains the result + (out_buf, tmp) = (tmp, out_buf) + (out_offset, tmp_offset) = (tmp_offset, out_offset) + + +def threefry_generate(gen, out_shape): + """Generate a series of random values + + Notes + ----- + This function uses the counter portion of the generator state to generate a series of random + numbers in parallel. Random number `i` is generated by applying Threefry to the current + generator state with the counter portion incremented by `i`. This means that each random number + is generated independently from each other random number, so we can compute them in parallel. + + If there is not enough room left in the counter to generate the desired shape of random values, + then a new generator is created by applying Threefry to the current key, path, and counter. + This new generator will have a reset counter. + + Parameters + ---------- + gen : Tensor[10, uint64] + Generator state. Can be create with :py:func:`tvm.relay.threefry_key`. This should not be + reused in another function, otherwise random numbers will be repeated. + + out_shape : Sequence[int] + Output shape of the random numbers. Product of all dimensions must be a multiple of 4. + + Returns + ------- + new_gen : Tensor[10, uint64] + The new generator state to be used in subsequent calls. + + rand : Tensor[out_shape, uint64] + Tensor of random numbers with shape `out_shape`. + """ + out_len = tir.const(1) + for s in out_shape: + out_len *= s + assert ( + out_len.value % 4 == 0 + ), f"Threefry can only generate arrays who's size is a multiple of 4 ({out_len} was provided)." + assert ( + out_len.value <= 2 ** 64 - 1 + ), f"Can only generate up to 2^64 random numbers, but {out_len} were requested." + + def gen_ir(gen_ptr, out_gen_ptr, out_array_ptr): + irb = ir_builder.create() + gen = irb.buffer_ptr(gen_ptr) + out_gen = irb.buffer_ptr(out_gen_ptr) + out_array = irb.buffer_ptr(out_array_ptr) + + # Create a temporary array to hold the generator state we will use to create the random + # numbers. We cannot use gen because we may need to update the key + path if there is not + # enough room in the counter. + tmp = irb.allocate(gen.dtype, 10, name="tmp", scope="global") + + # TODO(tkonolige): for now we only use the last word of the counter for counting. It is too + # much work to figure out how to do 128 bit addition. + + # Max value for counter should be 2**64-2 because we need to reserve a special value to + # indicate the counter is used up. + with irb.if_scope(gen[7] < tir.const(2 ** 64 - 1, dtype=gen.dtype) - out_len): + for i in range(10): + tmp[i] = gen[i] + with irb.else_scope(): + # no room left in the counter, we have to change the path or key + with irb.if_scope(gen[8] == 0 and gen[9] == 0): + # out of room in the path, have to generate new key + + # The paper says the counter that we will be hashing should be a special value of + # all ones. We need to allocate some space for it because we cannot overwrite gen. + tmp_counter = irb.allocate(gen.dtype, 2, name="tmp_counter", scope="global") + tmp_counter[0] = tir.const(0xFFFFFFFFFFFFFFFF, dtype=gen.dtype) + tmp_counter[1] = tir.const(0xFFFFFFFFFFFFFFFF, dtype=gen.dtype) + _threefry(irb, gen, 0, tmp_counter, 0, tmp, 0, 1) + tmp[4] = tir.const(0, dtype=gen.dtype) # zero path, i.e. no path + tmp[5] = tir.const(0, dtype=gen.dtype) + tmp[6] = tir.const(0, dtype=gen.dtype) # zero counter + tmp[7] = tir.const(0, dtype=gen.dtype) + tmp[8] = tir.const(1 << 63, dtype=gen.dtype) # one in the leftmost position + tmp[9] = tir.const(0, dtype=gen.dtype) + with irb.else_scope(): + tmp[0] = gen[0] + tmp[1] = gen[1] + tmp[2] = gen[2] + tmp[3] = gen[3] + tmp[4] = gen[4] | gen[8] # add a 1 to the path + tmp[5] = gen[5] | gen[9] + tmp[6] = tir.const(0, dtype=gen.dtype) # zero counter + tmp[7] = tir.const(0, dtype=gen.dtype) + _shift_right(irb, gen[8], gen[9], tmp, 8, tmp, 9) + + # Compute random values + _threefry(irb, tmp, 0, tmp, 4, out_array, 0, out_len // 4) + + # Update generator state + out_gen[0] = tmp[0] # key stays the same + out_gen[1] = tmp[1] + out_gen[2] = tmp[2] + out_gen[3] = tmp[3] + out_gen[4] = tmp[4] # path stays the same + out_gen[5] = tmp[5] + out_gen[6] = tir.const(0, dtype=gen.dtype) # unused, leave it as 0 + out_gen[7] = tmp[7] + tir.Cast(gen.dtype, out_len) # increment counter + out_gen[8] = tmp[8] # path unchanged, so no update here + out_gen[9] = tmp[9] + + return irb.get() + + out_gen = tvm.tir.decl_buffer((10,), name="out_gen", dtype="uint64") + out_array = tvm.tir.decl_buffer(out_shape, name="out_array", dtype="uint64") + return tvm.te.extern( + [out_gen.shape, out_array.shape], + [gen], + lambda ins, outs: gen_ir(ins[0], outs[0], outs[1]), + out_buffers=[out_gen, out_array], + name="threefry_generate", + tag="threefry_generate", + ) + + +def _shift_right(irb, a, b, out_a, a_off, out_b, b_off): + """Binary shift a 128bit number composed of two 64 bit words right by one.""" + with irb.if_scope(a == 1): + out_a[a_off] = tir.const(0, dtype=a.dtype) + out_b[b_off] = tir.const(0x8000000000000000, dtype=a.dtype) + with irb.else_scope(): + with irb.if_scope(a == 0): + out_a[a_off] = tir.const(0, dtype=a.dtype) + out_b[b_off] = b >> 1 + with irb.else_scope(): + out_a[a_off] = a >> 1 + out_b[b_off] = tir.const(0, dtype=a.dtype) + + +def threefry_split(gen): + """Split a single generator state into two new ones + + Notes + ----- + The new generator is created by appending a one (for the right output) or a zero (for the left + output) to the end of the path portion of the generator If there is no longer and room in the + path, then we create a new key portion of the generator by applying Threefry to the old state, + path, and counter. i.e. :code:`new_key = threefry(old_key, [old_path, old_counter])`. This + resets the path portion of the new generator. + + Parameters + ---------- + gen : Tensor[10, uint64] + Generator state. Can be create with :py:func:`tvm.relay.threefry_key`. This should not be + reused in another function, otherwise random numbers will be repeated. + + Returns + ------- + out_gen_left : Tensor[10, uint64] + New generator state that is distinct from `out_gen_right`. + + out_gen_right : Tensor[10, uint64] + New generator state that is distinct from `out_gen_left`. + """ + + def gen_ir(gen_ptr, out_left_ptr, out_right_ptr): + irb = ir_builder.create() + gen = irb.buffer_ptr(gen_ptr) + out_left = irb.buffer_ptr(out_left_ptr) + out_right = irb.buffer_ptr(out_right_ptr) + + with irb.if_scope(gen[8] == 0 and gen[9] == 0): + # Generate new key because we have run out of room to extend the path + _threefry(irb, gen, 0, gen, 4, out_left, 0, 1) + out_left[4] = tir.const(0, dtype=gen.dtype) + out_left[5] = tir.const(0, dtype=gen.dtype) + out_left[6] = tir.const(0, dtype=gen.dtype) # counter gets zeroed + out_left[7] = tir.const(0, dtype=gen.dtype) # counter gets zeroed + out_left[8] = tir.const( + 1 << 62, dtype=gen.dtype + ) # one in the second from the leftmost position + out_left[9] = tir.const(0, dtype=gen.dtype) + + out_right[0] = out_left[0] + out_right[1] = out_left[1] + out_right[2] = out_left[2] + out_right[3] = out_left[3] + out_right[4] = tir.const(1 << 63, dtype=gen.dtype) # one in the leftmost position + out_right[5] = tir.const(0, dtype=gen.dtype) + out_right[6] = tir.const(0, dtype=gen.dtype) + out_right[7] = tir.const(0, dtype=gen.dtype) + out_right[8] = tir.const( + 1 << 62, dtype=gen.dtype + ) # one in the second from the leftmost position + out_right[9] = tir.const(0, dtype=gen.dtype) + with irb.else_scope(): + out_left[0] = gen[0] + out_left[1] = gen[1] + out_left[2] = gen[2] + out_left[3] = gen[3] + out_left[4] = gen[4] # adding a zero here, but its already zero padded + out_left[5] = gen[5] + out_left[6] = gen[6] + out_left[7] = gen[7] + # move path position over one bit + _shift_right(irb, gen[8], gen[9], out_left, 8, out_left, 9) + + out_right[0] = gen[0] + out_right[1] = gen[1] + out_right[2] = gen[2] + out_right[3] = gen[3] + out_right[4] = gen[4] | gen[8] # add a one to the path + out_right[5] = gen[5] | gen[9] + out_right[6] = gen[6] + out_right[7] = gen[7] + _shift_right(irb, gen[8], gen[9], out_right, 8, out_right, 9) + + return irb.get() + + out_left = tvm.tir.decl_buffer((10,), name="out_left", dtype="uint64") + out_right = tvm.tir.decl_buffer((10,), name="out_right", dtype="uint64") + return tvm.te.extern( + [out_left.shape, out_right.shape], + [gen], + lambda ins, outs: gen_ir(ins[0], outs[0], outs[1]), + out_buffers=[out_left, out_right], + name="threefry_split", + tag="threefry_split", + ) diff --git a/src/relay/op/random/kernel.cc b/src/relay/op/random/kernel.cc new file mode 100644 index 000000000000..ec092a7e05f2 --- /dev/null +++ b/src/relay/op/random/kernel.cc @@ -0,0 +1,89 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +#include +#include + +namespace tvm { +namespace relay { + +TVM_REGISTER_NODE_TYPE(ThreefryGenerateAttrs); + +static TensorType ThreefryKeyType() { return TensorType({10}, tvm::DataType::UInt(64)); } + +bool ThreefryGenerateRel(const Array& types, int num_inputs, const Attrs& attrs, + const TypeReporter& reporter) { + const ThreefryGenerateAttrs* param = attrs.as(); + ICHECK_EQ(types.size(), 2) << "ThreefryGenerate should have one input and one output"; + + reporter->Assign(types[0], ThreefryKeyType()); + + std::vector oshape; + for (auto& x : param->out_shape) { + oshape.push_back(x); + } + // generate returns the next key and an array of random values + // TODO(@tkonolige, @altanh): support other output dtypes? + reporter->Assign(types[1], + TupleType({ThreefryKeyType(), TensorType(oshape, tvm::DataType::UInt(64))})); + return true; +} + +Expr MakeThreefryGenerate(Expr key, Array out_shape) { + auto attrs = make_object(); + attrs->out_shape = out_shape; + static const Op& op = Op::Get("random.threefry_generate"); + return Call(op, {key}, Attrs(attrs), {}); +} + +TVM_REGISTER_GLOBAL("relay.op.random._make.threefry_generate").set_body_typed(MakeThreefryGenerate); + +RELAY_REGISTER_OP("random.threefry_generate") + .describe( + R"doc(Generate an array of random numbers using the Threefry algorithm.)doc" TVM_ADD_FILELINE) + .set_num_inputs(1) + .set_attrs_type() + .add_argument("key", "Tensor", "Input Threefry key") + .add_type_rel("ThreefryGenerate", ThreefryGenerateRel); + +bool ThreefrySplitRel(const Array& types, int num_inputs, const Attrs& attrs, + const TypeReporter& reporter) { + ICHECK_EQ(types.size(), 2) << "ThreefrySplit should have one input and one output"; + + reporter->Assign(types[0], ThreefryKeyType()); + reporter->Assign(types[1], TupleType({ThreefryKeyType(), ThreefryKeyType()})); + + return true; +} + +Expr MakeThreefrySplit(Expr key) { + static const Op& op = Op::Get("random.threefry_split"); + return Call(op, {key}, Attrs(), {}); +} + +TVM_REGISTER_GLOBAL("relay.op.random._make.threefry_split").set_body_typed(MakeThreefrySplit); + +RELAY_REGISTER_OP("random.threefry_split") + .describe(R"doc(Split the input Threefry key into two new ones.)doc" TVM_ADD_FILELINE) + .set_num_inputs(1) + .add_argument("key", "Tensor", "Input Threefry key") + .add_type_rel("ThreefrySplit", ThreefrySplitRel); + +} // namespace relay +} // namespace tvm diff --git a/tests/python/relay/test_prng.py b/tests/python/relay/test_prng.py new file mode 100644 index 000000000000..2109d3b30a82 --- /dev/null +++ b/tests/python/relay/test_prng.py @@ -0,0 +1,142 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +import pytest +import tvm +import tvm.relay +import tvm.testing +from tvm.relay.testing import run_infer_type + + +@tvm.testing.parametrize_targets +def test_threefry_repeatability(target, ctx): + target, ctx = "llvm", tvm.cpu(0) + key1 = tvm.relay.random.threefry_key(1) + rand1 = tvm.relay.random.threefry_generate(key1, (12,)) + out_key1, out1 = tvm.relay.create_executor( + "vm", tvm.IRModule.from_expr(tvm.relay.Function([], rand1)), target=target, ctx=ctx + ).evaluate()() + + key2 = tvm.relay.random.threefry_key(1) + rand2 = tvm.relay.random.threefry_generate(key2, (12,)) + out_key2, out2 = tvm.relay.create_executor( + "vm", tvm.IRModule.from_expr(tvm.relay.Function([], rand2)), target=target, ctx=ctx + ).evaluate()() + + assert ( + out1.asnumpy() == out2.asnumpy() + ).all(), "Generate on same seed should have the same output random numbers" + + assert ( + out_key1.asnumpy() == out_key2.asnumpy() + ).all(), "Generate on same seed should have the same next keys" + + +@tvm.testing.parametrize_targets +def test_threefry_split(target, ctx): + key = tvm.relay.random.threefry_key(1) + left, right = tvm.relay.TupleWrapper(tvm.relay.random.threefry_split(key), 2) + _, rand1 = tvm.relay.TupleWrapper(tvm.relay.random.threefry_generate(left, (16,)), 2) + _, rand2 = tvm.relay.TupleWrapper(tvm.relay.random.threefry_generate(right, (16,)), 2) + out1, out2 = tvm.relay.create_executor( + "vm", + tvm.IRModule.from_expr(tvm.relay.Function([], tvm.relay.Tuple((rand1, rand2)))), + target=target, + ctx=ctx, + ).evaluate()() + + assert ( + out1.asnumpy() != out2.asnumpy() + ).any(), "Generate after split should not have the same output" + + +@tvm.testing.parametrize_targets +def test_threefry_sequential_generate(target, ctx): + key = tvm.relay.random.threefry_key(1) + key, rand1 = tvm.relay.TupleWrapper(tvm.relay.random.threefry_generate(key, (4,)), 2) + _, rand2 = tvm.relay.TupleWrapper(tvm.relay.random.threefry_generate(key, (4,)), 2) + out1, out2 = tvm.relay.create_executor( + "vm", + tvm.IRModule.from_expr(tvm.relay.Function([], tvm.relay.Tuple((rand1, rand2)))), + target=target, + ctx=ctx, + ).evaluate()() + + assert ( + out1.asnumpy() != out2.asnumpy() + ).any(), "Sequential generates should not have the same output" + + +def test_threefry_generate_infer(): + oshape = (12,) + key_type = tvm.relay.TensorType([10], dtype="uint64") + gen_type = tvm.relay.TensorType(oshape, dtype="uint64") + expected_type = tvm.relay.TupleType([key_type, gen_type]) + + key = tvm.relay.random.threefry_key(1) + rand1 = tvm.relay.random.threefry_generate(key, oshape) + f = tvm.relay.Function([], rand1) + f = run_infer_type(f) + assert tvm.ir.structural_equal(f.ret_type, expected_type) + + +def test_threefry_split_infer(): + key_type = tvm.relay.TensorType([10], dtype="uint64") + expected_type = tvm.relay.TupleType([key_type, key_type]) + + key = tvm.relay.random.threefry_key(1) + out_keys = tvm.relay.random.threefry_split(key) + f = tvm.relay.Function([], out_keys) + f = run_infer_type(f) + assert tvm.ir.structural_equal(f.ret_type, expected_type) + + +@pytest.mark.xfail(raises=tvm.error.TVMError) +def test_threefry_generate_infer_fail(): + # xfail: key size should be 10 + fake_key = tvm.relay.const([1, 2, 3, 4, 5, 6, 7, 8, 9], dtype="uint64") + rand1 = tvm.relay.random.threefry_generate(fake_key, (12,)) + f = tvm.relay.Function([], rand1) + f = run_infer_type(f) + + +@pytest.mark.xfail(raises=tvm.error.TVMError) +def test_threefry_split_infer_fail(): + # xfail: key size should be 10 + fake_key = tvm.relay.const([1, 2, 3, 4, 5, 6, 7, 8, 9], dtype="uint64") + out_keys = tvm.relay.random.threefry_split(fake_key) + f = tvm.relay.Function([], out_keys) + f = run_infer_type(f) + + +@tvm.testing.requires_llvm +@pytest.mark.xfail(raises=tvm.error.TVMError) +def test_threefry_generate_incorrect_out_size(): + key = tvm.relay.random.threefry_key(1) + # xfail: output size should be multiple of 4 + key, rand1 = tvm.relay.TupleWrapper(tvm.relay.random.threefry_generate(key, (5,)), 2) + out1, out2 = tvm.relay.create_executor( + "vm", + tvm.IRModule.from_expr(tvm.relay.Function([], rand1)), + target=tvm.target.Target("llvm"), + ctx=tvm.context("cpu"), + ).evaluate()() + + +if __name__ == "__main__": + test_threefry_repeatability(tvm.target.Target("llvm"), tvm.context("cpu")) + test_threefry_split(tvm.target.Target("llvm"), tvm.context("cpu")) + test_threefry_sequential_generate(tvm.target.Target("llvm"), tvm.context("cpu")) diff --git a/tests/python/topi/python/test_topi_prng.py b/tests/python/topi/python/test_topi_prng.py new file mode 100644 index 000000000000..43b0494ee6f5 --- /dev/null +++ b/tests/python/topi/python/test_topi_prng.py @@ -0,0 +1,116 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +import tvm +import tvm.relay +import tvm.testing +import tvm.topi +import numpy as np + + +def threefry_split(target, ctx, gen): + gen_placeholder = tvm.te.placeholder(gen.shape, name="gen", dtype="uint64") + left_placeholder, right_placeholder = tvm.topi.random.threefry_split(gen_placeholder) + s = tvm.topi.generic.schedule_extern([left_placeholder, right_placeholder]) + f = tvm.build(s, [gen_placeholder, left_placeholder, right_placeholder]) + left = tvm.nd.array(np.zeros(gen.shape, dtype="uint64")) + right = tvm.nd.array(np.zeros(gen.shape, dtype="uint64")) + f(tvm.nd.array(gen), left, right) + return left.asnumpy(), right.asnumpy() + + +def threefry_generate(target, ctx, gen, size): + gen_placeholder = tvm.te.placeholder(gen.shape, name="gen", dtype="uint64") + left_placeholder, right_placeholder = tvm.topi.random.threefry_generate(gen_placeholder, size) + s = tvm.topi.generic.schedule_extern([left_placeholder, right_placeholder]) + f = tvm.build(s, [gen_placeholder, left_placeholder, right_placeholder]) + out_gen = tvm.nd.array(np.zeros(gen.shape, dtype="uint64")) + rands = tvm.nd.array(np.zeros(size, dtype="uint64")) + f(tvm.nd.array(gen), out_gen, rands) + return out_gen.asnumpy(), rands.asnumpy() + + +@tvm.testing.parametrize_targets +def test_threefry_split(target, ctx): + # test that results of split do not equal eachother or the input + gen = tvm.relay.random.threefry_key(0).data.asnumpy() + a, b = threefry_split(target, ctx, gen) + assert (a != b).any() and ( + a != gen + ).any(), "Splitting a gen should result in different output gens" + # unittest some split inputs + assert (a == np.array([0, 0, 0, 0, 0, 0, 0, 0, 1 << 62, 0], dtype="uint64")).all() + assert (b == np.array([0, 0, 0, 0, 1 << 63, 0, 0, 0, 1 << 62, 0], dtype="uint64")).all() + + # test enough splits to go over path length + for i in range(129): + a, b = threefry_split(target, ctx, b) + assert (a[0:4] == b[0:4]).all(), "State part of split should be the same" + assert (b[0:4] != np.zeros(4, dtype="uint64")).any() + + # check that split then generate does not generate the same for both sides + a, a_rands = threefry_generate(target, ctx, a, (100,)) + b, b_rands = threefry_generate(target, ctx, b, (100,)) + assert ( + a_rands != b_rands + ).all(), "Numbers generated from different initial states should be different" + + # check repeatability + _, rands1 = threefry_generate(target, ctx, a, (100,)) + _, rands2 = threefry_generate(target, ctx, a, (100,)) + assert ( + rands1 == rands2 + ).all(), "Numbers generated from the same initial state should be the same" + + a1, b1 = threefry_split(target, ctx, a) + a2, b2 = threefry_split(target, ctx, a) + assert (a1 == a2).all() and ( + b1 == b2 + ).all(), "Split called on the same input should return the same result" + + +@tvm.testing.parametrize_targets +def test_threefry_generate(target, ctx): + gen = tvm.relay.random.threefry_key(0).data.asnumpy() + + # check that we can generate some data + a, rands = threefry_generate(target, ctx, gen, (100,)) + assert ( + rands.shape[0] == 100 and len(rands.shape) == 1 + ), "Output shape should match requested shape" + + # check that gen out does not equal input + assert (a != gen).any(), "Output generator should be different from input generator" + + # test enough generates to go over generate limit + gen = np.array( + [0, 0, 0, 0, 0, 0, 0, 2 ** 64 - 2, 1 << 63, 0], dtype="uint64" + ) # make counter large + a, rands = threefry_generate(target, ctx, gen, (100,)) + assert gen[4] != a[4], "Overflow of counter should trigger path change" + assert a[7] == 100, "Overflow of counter should still update counter" + + # check generate with path at length limit + gen = np.array([0, 0, 0, 0, 0, 0, 0, 2 ** 64 - 2, 0, 0], dtype="uint64") # make counter large + a, rands = threefry_generate(target, ctx, gen, (100,)) + assert ( + gen[0:4] != a[0:4] + ).any(), "Overflowing counter with no space left in path should change state" + + +if __name__ == "__main__": + test_threefry_split(tvm.target.Target("llvm"), tvm.context("cpu")) + test_threefry_generate(tvm.target.Target("llvm"), tvm.context("cpu"))