Skip to content

Commit

Permalink
wip
Browse files Browse the repository at this point in the history
  • Loading branch information
patrick-schultz committed Oct 18, 2022
1 parent 5e05d33 commit 466aa3c
Show file tree
Hide file tree
Showing 5 changed files with 42 additions and 19 deletions.
2 changes: 2 additions & 0 deletions hail/python/hail/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,8 @@ def init(doctest_namespace):
# This gets run once per process -- must avoid race conditions
print("setting up doctest...")

hl.init(global_seed=0)

olddir = os.getcwd()
os.chdir(os.path.join(os.path.dirname(os.path.realpath(__file__)),
"docs"))
Expand Down
12 changes: 7 additions & 5 deletions hail/python/hail/context.py
Original file line number Diff line number Diff line change
Expand Up @@ -136,13 +136,15 @@ def __init__(self, log, quiet, append, tmpdir, local_tmpdir, global_seed, backen
' the latest changes weekly.\n')
sys.stderr.write(f'LOGGING: writing to {log}\n')

if global_seed is not None:
# FIXME: print deprication warning
pass
self._user_specified_rng_nonce = True
if global_seed is None:
if 'rng_nonce' not in backend.get_flags('rng_nonce'):
backend.set_flags({'rng_nonce': hex(Random().randrange(2**64))})
self._user_specified_rng_nonce = False
else:
backend.set_flags(rng_nonce=hex(global_seed))
Env._hc = self

backend.set_flags(rng_nonce=hex(Random().randrange(2**64)))

def initialize_references(self, references, default_reference):
for ref in references:
ReferenceGenome._from_config(ref, True)
Expand Down
36 changes: 24 additions & 12 deletions hail/python/hail/docs/functions/random.rst
Original file line number Diff line number Diff line change
Expand Up @@ -13,24 +13,27 @@ Evaluating the same expression will yield the same value every time, but multipl
calls of the same function will have different results. For example, let `x` be
a random number generated with the function :func:`.rand_unif`:

.. testsetup::
hl.reset_global_randomness()

>>> x = hl.rand_unif(0, 1)

The value of `x` will not change, although other calls to :func:`.rand_unif`
will generate different values:

>>> hl.eval(x) # doctest: +SKIP_OUTPUT_CHECK
>>> hl.eval(x)
0.5562065047992025

>>> hl.eval(x) # doctest: +SKIP_OUTPUT_CHECK
>>> hl.eval(x)
0.5562065047992025

>>> hl.eval(hl.rand_unif(0, 1)) # doctest: +SKIP_OUTPUT_CHECK
>>> hl.eval(hl.rand_unif(0, 1))
0.4678132874101748

>>> hl.eval(hl.rand_unif(0, 1)) # doctest: +SKIP_OUTPUT_CHECK
>>> hl.eval(hl.rand_unif(0, 1))
0.9097632224065403

>>> hl.eval(hl.array([x, x, x])) # doctest: +SKIP_OUTPUT_CHECK
>>> hl.eval(hl.array([x, x, x]))
[0.5562065047992025, 0.5562065047992025, 0.5562065047992025]

If the three values in the last expression should be distinct, three separate
Expand All @@ -39,15 +42,15 @@ calls to :func:`.rand_unif` should be made:
>>> a = hl.rand_unif(0, 1)
>>> b = hl.rand_unif(0, 1)
>>> c = hl.rand_unif(0, 1)
>>> hl.eval(hl.array([a, b, c])) # doctest: +SKIP_OUTPUT_CHECK
>>> hl.eval(hl.array([a, b, c]))
[0.8846327207915881, 0.14415148553468504, 0.8202677741734825]

Within the rows of a :class:`.Table`, the same expression will yield a
consistent value within each row, but different (random) values across rows:

>>> table = hl.utils.range_table(5, 1)
>>> table = table.annotate(x1=x, x2=x, rand=hl.rand_unif(0, 1))
>>> table.show() # doctest: +SKIP_OUTPUT_CHECK
>>> table.show()
+-------+-------------+-------------+-------------+
| idx | x1 | x2 | rand |
+-------+-------------+-------------+-------------+
Expand All @@ -69,33 +72,39 @@ All random functions can take a specified seed as an argument. This guarantees
that multiple invocations of the same function within the same context will
return the same result, e.g.

>>> hl.eval(hl.rand_unif(0, 1, seed=0)) # doctest: +SKIP_OUTPUT_CHECK
.. testsetup::
hl.reset_global_randomness()

>>> hl.eval(hl.rand_unif(0, 1, seed=0))
0.5488135008937808

>>> hl.eval(hl.rand_unif(0, 1, seed=0)) # doctest: +SKIP_OUTPUT_CHECK
>>> hl.eval(hl.rand_unif(0, 1, seed=0))
0.5488135008937808

This does not guarantee the same behavior across different contexts; e.g., the
rows may have different values if the expression is applied to different tables:

.. testsetup::
hl.reset_global_randomness()

>>> table = hl.utils.range_table(5, 1).annotate(x=hl.rand_bool(0.5, seed=0))
>>> table.x.collect() # doctest: +SKIP_OUTPUT_CHECK
>>> table.x.collect()
[0.5488135008937808,
0.7151893652121089,
0.6027633824638369,
0.5448831893094143,
0.42365480398481625]

>>> table = hl.utils.range_table(5, 1).annotate(x=hl.rand_bool(0.5, seed=0))
>>> table.x.collect() # doctest: +SKIP_OUTPUT_CHECK
>>> table.x.collect()
[0.5488135008937808,
0.7151893652121089,
0.6027633824638369,
0.5448831893094143,
0.42365480398481625]

>>> table = hl.utils.range_table(5, 5).annotate(x=hl.rand_bool(0.5, seed=0))
>>> table.x.collect() # doctest: +SKIP_OUTPUT_CHECK
>>> table.x.collect()
[0.5488135008937808,
0.9595974306263271,
0.42205690070893265,
Expand All @@ -106,6 +115,9 @@ The seed can also be set globally using :func:`.set_global_seed`. This sets the
seed globally for all subsequent Hail operations, and a pipeline will be
guaranteed to have the same results if the global seed is set right beforehand:

.. testsetup::
hl.reset_global_randomness()

>>> hl.set_global_seed(0)
>>> hl.eval(hl.array([hl.rand_unif(0, 1), hl.rand_unif(0, 1)])) # doctest: +SKIP_OUTPUT_CHECK
[0.6830630912401323, 0.4035978197966855]
Expand Down
9 changes: 8 additions & 1 deletion hail/python/hail/expr/functions.py
Original file line number Diff line number Diff line change
Expand Up @@ -43,7 +43,14 @@ def _func(name, ret_type, *args, type_args=()):


def _seeded_func(name, ret_type, seed, *args):
static_rng_uid = seed if seed is not None else Env.next_static_rng_uid()
if seed is None:
if not Env._hc._user_specified_rng_nonce:
warning('To ensure reproducible randomness across Hail sessions, '
'you must set the "global_seed" parameter in hl.init(), in '
'addition to the local seed in each random function.')
static_rng_uid = Env.next_static_rng_uid()
else:
static_rng_uid = -seed - 1
indices, aggregations = unify_all(*args)
rng_state = ir.Ref('__rng_state', trngstate)
return construct_expr(ir.ApplySeeded(name, static_rng_uid, rng_state, ret_type, *(a._ir for a in args)), ret_type, indices, aggregations)
Expand Down
2 changes: 1 addition & 1 deletion hail/python/hail/ir/base_ir.py
Original file line number Diff line number Diff line change
Expand Up @@ -341,7 +341,7 @@ def handle_randomness(self, create_uids):
The uid may be an int64, or arbitrary tuple of int64s. The only
requirement is that all stream elements contain distinct uid values.
"""
assert(self.is_stream())
assert(self.is_stream)
if (not create_uids and not self.uses_randomness) or self.has_uids:
return self
new = self._handle_randomness(create_uids)
Expand Down

0 comments on commit 466aa3c

Please sign in to comment.