From 799ec45f7822ce6fd99df335c915691f7cc6116b Mon Sep 17 00:00:00 2001 From: patrick-schultz Date: Fri, 21 Oct 2022 13:27:30 -0400 Subject: [PATCH] fixes --- hail/python/hail/conftest.py | 1 + hail/python/hail/docs/functions/random.rst | 87 ++++++++----------- hail/python/hail/docs/guides/agg.rst | 8 +- hail/python/hail/docs/scans.rst | 15 ++-- .../hail/experimental/full_outer_join_mt.py | 12 +-- .../hail/expr/aggregators/aggregators.py | 21 ++--- .../hail/expr/expressions/base_expression.py | 34 ++++---- hail/python/hail/expr/functions.py | 26 +++--- hail/python/hail/matrixtable.py | 4 +- hail/python/hail/methods/statgen.py | 10 +-- hail/python/test/hail/conftest.py | 7 +- hail/python/test/hail/expr/test_expr.py | 1 - hail/python/test/hail/helpers.py | 17 ++-- hail/python/test/hail/methods/test_statgen.py | 5 -- 14 files changed, 115 insertions(+), 133 deletions(-) diff --git a/hail/python/hail/conftest.py b/hail/python/hail/conftest.py index 386184378d9..302d358c680 100644 --- a/hail/python/hail/conftest.py +++ b/hail/python/hail/conftest.py @@ -35,6 +35,7 @@ def init(doctest_namespace): "docs")) hl.init(global_seed=0) + hl.reset_global_randomness() try: generate_datasets(doctest_namespace) diff --git a/hail/python/hail/docs/functions/random.rst b/hail/python/hail/docs/functions/random.rst index c3c2b4df453..7f61448dad9 100644 --- a/hail/python/hail/docs/functions/random.rst +++ b/hail/python/hail/docs/functions/random.rst @@ -13,28 +13,25 @@ Evaluating the same expression will yield the same value every time, but multipl calls of the same function will have different results. For example, let `x` be a random number generated with the function :func:`.rand_unif`: -.. testsetup:: - hl.reset_global_randomness() - >>> x = hl.rand_unif(0, 1) The value of `x` will not change, although other calls to :func:`.rand_unif` will generate different values: >>> hl.eval(x) - 0.7769696130603699 + 0.9828239225846387 >>> hl.eval(x) - 0.5562065047992025 + 0.9828239225846387 >>> hl.eval(hl.rand_unif(0, 1)) - 0.4678132874101748 + 0.49094525115847415 >>> hl.eval(hl.rand_unif(0, 1)) - 0.9097632224065403 + 0.3972543766997359 >>> hl.eval(hl.array([x, x, x])) - [0.5562065047992025, 0.5562065047992025, 0.5562065047992025] + [0.9828239225846387, 0.9828239225846387, 0.9828239225846387] If the three values in the last expression should be distinct, three separate calls to :func:`.rand_unif` should be made: @@ -43,7 +40,7 @@ calls to :func:`.rand_unif` should be made: >>> b = hl.rand_unif(0, 1) >>> c = hl.rand_unif(0, 1) >>> hl.eval(hl.array([a, b, c])) - [0.8846327207915881, 0.14415148553468504, 0.8202677741734825] + [0.992090957001768, 0.9564448098124774, 0.3905029525642664] Within the rows of a :class:`.Table`, the same expression will yield a consistent value within each row, but different (random) values across rows: @@ -51,17 +48,18 @@ consistent value within each row, but different (random) values across rows: >>> table = hl.utils.range_table(5, 1) >>> table = table.annotate(x1=x, x2=x, rand=hl.rand_unif(0, 1)) >>> table.show() - +-------+-------------+-------------+-------------+ - | idx | x1 | x2 | rand | - +-------+-------------+-------------+-------------+ - | int32 | float64 | float64 | float64 | - +-------+-------------+-------------+-------------+ - | 0 | 8.50369e-01 | 8.50369e-01 | 9.64129e-02 | - | 1 | 5.15437e-01 | 5.15437e-01 | 8.60843e-02 | - | 2 | 5.42493e-01 | 5.42493e-01 | 1.69816e-01 | - | 3 | 5.51289e-01 | 5.51289e-01 | 6.48706e-01 | - | 4 | 6.40977e-01 | 6.40977e-01 | 8.22508e-01 | - +-------+-------------+-------------+-------------+ + +-------+----------+----------+----------+ + | idx | x1 | x2 | rand | + +-------+----------+----------+----------+ + | int32 | float64 | float64 | float64 | + +-------+----------+----------+----------+ + | 0 | 4.68e-01 | 4.68e-01 | 6.36e-01 | + | 1 | 8.24e-01 | 8.24e-01 | 9.72e-01 | + | 2 | 7.33e-01 | 7.33e-01 | 1.43e-01 | + | 3 | 8.99e-01 | 8.99e-01 | 5.52e-01 | + | 4 | 4.03e-01 | 4.03e-01 | 3.50e-01 | + +-------+----------+----------+----------+ + The same is true of the rows, columns, and entries of a :class:`.MatrixTable`. @@ -72,44 +70,35 @@ All random functions can take a specified seed as an argument. This guarantees that multiple invocations of the same function within the same context will return the same result, e.g. -.. testsetup:: - hl.reset_global_randomness() - >>> hl.eval(hl.rand_unif(0, 1, seed=0)) - 0.5488135008937808 + 0.2664972565962568 >>> hl.eval(hl.rand_unif(0, 1, seed=0)) - 0.5488135008937808 - -This does not guarantee the same behavior across different contexts; e.g., the -rows may have different values if the expression is applied to different tables: - -.. testsetup:: - hl.reset_global_randomness() + 0.2664972565962568 - >>> table = hl.utils.range_table(5, 1).annotate(x=hl.rand_bool(0.5, seed=0)) + >>> table = hl.utils.range_table(5, 1).annotate(x=hl.rand_unif(0, 1, seed=0)) >>> table.x.collect() - [0.5488135008937808, - 0.7151893652121089, - 0.6027633824638369, - 0.5448831893094143, - 0.42365480398481625] + [0.5820244750020055, + 0.33150686392731943, + 0.20526631289173847, + 0.6964416913998893, + 0.6092952493383876] - >>> table = hl.utils.range_table(5, 1).annotate(x=hl.rand_bool(0.5, seed=0)) + >>> table = hl.utils.range_table(5, 1).annotate(x=hl.rand_unif(0, 1, seed=0)) >>> table.x.collect() - [0.5488135008937808, - 0.7151893652121089, - 0.6027633824638369, - 0.5448831893094143, - 0.42365480398481625] + [0.5820244750020055, + 0.33150686392731943, + 0.20526631289173847, + 0.6964416913998893, + 0.6092952493383876] - >>> table = hl.utils.range_table(5, 5).annotate(x=hl.rand_bool(0.5, seed=0)) + >>> table = hl.utils.range_table(5, 5).annotate(x=hl.rand_unif(0, 1, seed=0)) >>> table.x.collect() - [0.5488135008937808, - 0.9595974306263271, - 0.42205690070893265, - 0.828743805759555, - 0.6414977904324134] + [0.5820244750020055, + 0.33150686392731943, + 0.20526631289173847, + 0.6964416913998893, + 0.6092952493383876] The seed can also be set globally using :func:`.set_global_seed`. This sets the seed globally for all subsequent Hail operations, and a pipeline will be diff --git a/hail/python/hail/docs/guides/agg.rst b/hail/python/hail/docs/guides/agg.rst index 606bbeada45..58ff362375f 100644 --- a/hail/python/hail/docs/guides/agg.rst +++ b/hail/python/hail/docs/guides/agg.rst @@ -114,7 +114,7 @@ Multiple aggregations >>> mt.aggregate_cols(hl.struct( ... fraction_female=hl.agg.fraction(mt.pheno.is_female), ... case_ratio=hl.agg.count_where(mt.is_case) / hl.agg.count())) - Struct(fraction_female=0.48, case_ratio=1.0) + Struct(fraction_female=0.44, case_ratio=1.0) :**dependencies**: :meth:`.MatrixTable.aggregate_cols`, :func:`.aggregators.fraction`, :func:`.aggregators.count_where`, :class:`.StructExpression` @@ -129,7 +129,7 @@ One aggregation :**code**: >>> mt.aggregate_rows(hl.agg.mean(mt.qual)) - 544323.8915384616 + 140054.73333333334 :**dependencies**: :meth:`.MatrixTable.aggregate_rows`, :func:`.aggregators.mean` @@ -148,7 +148,7 @@ Multiple aggregations >>> mt.aggregate_rows( ... hl.struct(n_high_quality=hl.agg.count_where(mt.qual > 40), ... mean_qual=hl.agg.mean(mt.qual))) - Struct(n_high_quality=13, mean_qual=544323.8915384616) + Struct(n_high_quality=9, mean_qual=140054.73333333334) :**dependencies**: :meth:`.MatrixTable.aggregate_rows`, :func:`.aggregators.count_where`, :func:`.aggregators.mean`, :class:`.StructExpression` @@ -167,7 +167,7 @@ Aggregate Entry Values Into A Local Value >>> mt.aggregate_entries( ... hl.struct(global_gq_mean=hl.agg.mean(mt.GQ), ... call_rate=hl.agg.fraction(hl.is_defined(mt.GT)))) - Struct(global_gq_mean=64.01841473178543, call_rate=0.9607692307692308) + Struct(global_gq_mean=69.60514541387025, call_rate=0.9933333333333333) :**dependencies**: :meth:`.MatrixTable.aggregate_entries`, :func:`.aggregators.mean`, :func:`.aggregators.fraction`, :class:`.StructExpression` diff --git a/hail/python/hail/docs/scans.rst b/hail/python/hail/docs/scans.rst index 26993f59934..e8fd5f811bf 100644 --- a/hail/python/hail/docs/scans.rst +++ b/hail/python/hail/docs/scans.rst @@ -45,14 +45,13 @@ along the genome: +---------------+------------+-----------+---------------+ | locus | array | int64 | int64 | +---------------+------------+-----------+---------------+ - | 20:10579373 | ["C","T"] | 1 | 0 | - | 20:10579398 | ["C","T"] | 1 | 1 | - | 20:10633237 | ["G","A"] | 69 | 2 | - | 20:10636995 | ["C","T"] | 2 | 71 | - | 20:10639222 | ["G","A"] | 22 | 73 | - | 20:13763601 | ["A","G"] | 2 | 95 | - | 20:16223922 | ["T","C"] | 66 | 97 | - | 20:17479617 | ["G","A"] | 9 | 163 | + | 20:10627772 | ["C","T"] | 2 | 2 | + | 20:10633237 | ["G","A"] | 69 | 4 | + | 20:10636995 | ["C","T"] | 2 | 73 | + | 20:10639222 | ["G","A"] | 22 | 75 | + | 20:13763601 | ["A","G"] | 2 | 97 | + | 20:16223922 | ["T","C"] | 66 | 99 | + | 20:17479617 | ["G","A"] | 9 | 165 | +---------------+------------+-----------+---------------+ diff --git a/hail/python/hail/experimental/full_outer_join_mt.py b/hail/python/hail/experimental/full_outer_join_mt.py index 2d31df3ccc0..b5daea0c149 100644 --- a/hail/python/hail/experimental/full_outer_join_mt.py +++ b/hail/python/hail/experimental/full_outer_join_mt.py @@ -44,8 +44,8 @@ def full_outer_join_mt(left: hl.MatrixTable, right: hl.MatrixTable) -> hl.Matrix +---------------+------------+------+------+ | locus | array | call | call | +---------------+------------+------+------+ - | 1:3 | ["A","C"] | 1/1 | 1/1 | - | 1:4 | ["A","C"] | 0/1 | 1/1 | + | 1:3 | ["A","C"] | 0/0 | 0/1 | + | 1:4 | ["A","C"] | 1/1 | 0/1 | | 1:5 | ["A","C"] | 0/0 | 0/0 | +---------------+------------+------+------+ @@ -57,10 +57,10 @@ def full_outer_join_mt(left: hl.MatrixTable, right: hl.MatrixTable) -> hl.Matrix +---------------+------------+------+------+------+------+ | locus | array | call | call | call | call | +---------------+------------+------+------+------+------+ - | 1:1 | ["A","C"] | 0/0 | 0/0 | NA | NA | - | 1:2 | ["A","C"] | 1/1 | 0/0 | NA | NA | - | 1:3 | ["A","C"] | 0/1 | 0/0 | 1/1 | 1/1 | - | 1:4 | ["A","C"] | NA | NA | 0/1 | 1/1 | + | 1:1 | ["A","C"] | 0/1 | 0/1 | NA | NA | + | 1:2 | ["A","C"] | 0/0 | 1/1 | NA | NA | + | 1:3 | ["A","C"] | 0/0 | 0/0 | 0/0 | 0/1 | + | 1:4 | ["A","C"] | NA | NA | 1/1 | 0/1 | | 1:5 | ["A","C"] | NA | NA | 0/0 | 0/0 | +---------------+------------+------+------+------+------+ diff --git a/hail/python/hail/expr/aggregators/aggregators.py b/hail/python/hail/expr/aggregators/aggregators.py index 8429efa480d..8e8cf6d69e6 100644 --- a/hail/python/hail/expr/aggregators/aggregators.py +++ b/hail/python/hail/expr/aggregators/aggregators.py @@ -1139,16 +1139,16 @@ def inbreeding(expr, prior) -> StructExpression: +------------------+-----------+-------------+------------------+------------------+ | str | float64 | int64 | float64 | int64 | +------------------+-----------+-------------+------------------+------------------+ - | "C1046::HG02024" | 2.69e-01 | 8 | 6.63e+00 | 7 | - | "C1046::HG02025" | -4.62e-01 | 8 | 6.63e+00 | 6 | - | "C1046::HG02026" | -4.62e-01 | 8 | 6.63e+00 | 6 | - | "C1047::HG00731" | 2.69e-01 | 8 | 6.63e+00 | 7 | - | "C1047::HG00732" | 2.69e-01 | 8 | 6.63e+00 | 7 | - | "C1047::HG00733" | 2.69e-01 | 8 | 6.63e+00 | 7 | - | "C1048::HG02024" | -4.62e-01 | 8 | 6.63e+00 | 6 | - | "C1048::HG02025" | -4.62e-01 | 8 | 6.63e+00 | 6 | - | "C1048::HG02026" | -4.62e-01 | 8 | 6.63e+00 | 6 | - | "C1049::HG00731" | 2.69e-01 | 8 | 6.63e+00 | 7 | + | "C1046::HG02024" | 2.79e-01 | 9 | 7.61e+00 | 8 | + | "C1046::HG02025" | -4.41e-01 | 9 | 7.61e+00 | 7 | + | "C1046::HG02026" | -4.41e-01 | 9 | 7.61e+00 | 7 | + | "C1047::HG00731" | 2.79e-01 | 9 | 7.61e+00 | 8 | + | "C1047::HG00732" | 2.79e-01 | 9 | 7.61e+00 | 8 | + | "C1047::HG00733" | 2.79e-01 | 9 | 7.61e+00 | 8 | + | "C1048::HG02024" | -4.41e-01 | 9 | 7.61e+00 | 7 | + | "C1048::HG02025" | -4.41e-01 | 9 | 7.61e+00 | 7 | + | "C1048::HG02026" | -4.41e-01 | 9 | 7.61e+00 | 7 | + | "C1049::HG00731" | 2.79e-01 | 9 | 7.61e+00 | 8 | +------------------+-----------+-------------+------------------+------------------+ showing top 10 rows @@ -1219,6 +1219,7 @@ def call_stats(call, alleles) -> StructExpression: +---------------+--------------+---------------------+-------------+---------------------------+ | 20:10579373 | [199,1] | [9.95e-01,5.00e-03] | 200 | [99,0] | | 20:10579398 | [198,2] | [9.90e-01,1.00e-02] | 200 | [99,1] | + | 20:10627772 | [198,2] | [9.90e-01,1.00e-02] | 200 | [98,0] | 20:10633237 | [108,92] | [5.40e-01,4.60e-01] | 200 | [31,23] | | 20:10636995 | [198,2] | [9.90e-01,1.00e-02] | 200 | [98,0] | | 20:10639222 | [175,25] | [8.75e-01,1.25e-01] | 200 | [78,3] | diff --git a/hail/python/hail/expr/expressions/base_expression.py b/hail/python/hail/expr/expressions/base_expression.py index cac174dcf87..ac9a5bf32ac 100644 --- a/hail/python/hail/expr/expressions/base_expression.py +++ b/hail/python/hail/expr/expressions/base_expression.py @@ -972,21 +972,20 @@ def export(self, path, delimiter='\t', missing='NA', header=True): >>> with open('output/gt-no-header.tsv', 'r') as f: ... for line in f: ... print(line, end='') - 1:1 ["A","C"] 0/1 0/1 0/0 0/0 - 1:2 ["A","C"] 1/1 0/1 1/1 1/1 - 1:3 ["A","C"] 1/1 0/1 0/1 0/0 - 1:4 ["A","C"] 1/1 0/1 1/1 1/1 - + 1:1 ["A","C"] 1/1 1/1 0/1 0/1 + 1:2 ["A","C"] 1/1 1/1 0/0 1/1 + 1:3 ["A","C"] 0/0 0/0 0/1 0/0 + 1:4 ["A","C"] 1/1 0/1 1/1 0/1 >>> small_mt.pop.export('output/pops.tsv') >>> with open('output/pops.tsv', 'r') as f: ... for line in f: ... print(line, end='') sample_idx pop - 0 2 - 1 2 - 2 0 - 3 2 + 0 0 + 1 0 + 2 2 + 3 0 >>> small_mt.ancestral_af.export('output/ancestral_af.tsv') @@ -994,13 +993,12 @@ def export(self, path, delimiter='\t', missing='NA', header=True): ... for line in f: ... print(line, end='') locus alleles ancestral_af - 1:1 ["A","C"] 5.3905e-01 - 1:2 ["A","C"] 8.6768e-01 - 1:3 ["A","C"] 4.3765e-01 - 1:4 ["A","C"] 7.6300e-01 + 1:1 ["A","C"] 5.6562e-01 + 1:2 ["A","C"] 3.6521e-01 + 1:3 ["A","C"] 2.6421e-01 + 1:4 ["A","C"] 6.5715e-01 - >>> mt = small_mt >>> small_mt.bn.export('output/bn.tsv') >>> with open('output/bn.tsv', 'r') as f: ... for line in f: @@ -1024,10 +1022,10 @@ def export(self, path, delimiter='\t', missing='NA', header=True): ... for line in f: ... print(line, end='') locus alleles {"s":0,"family":"fam1"} {"s":1,"family":"fam1"} {"s":2,"family":"fam1"} {"s":3,"family":"fam1"} - 1:1 ["A","C"] 0/1 0/1 0/0 0/0 - 1:2 ["A","C"] 1/1 0/1 1/1 1/1 - 1:3 ["A","C"] 1/1 0/1 0/1 0/0 - 1:4 ["A","C"] 1/1 0/1 1/1 1/1 + 1:1 ["A","C"] 1/1 1/1 0/1 0/1 + 1:2 ["A","C"] 1/1 1/1 0/0 1/1 + 1:3 ["A","C"] 0/0 0/0 0/1 0/0 + 1:4 ["A","C"] 1/1 0/1 1/1 0/1 diff --git a/hail/python/hail/expr/functions.py b/hail/python/hail/expr/functions.py index b6bb3bfd251..8552473c765 100644 --- a/hail/python/hail/expr/functions.py +++ b/hail/python/hail/expr/functions.py @@ -2482,7 +2482,7 @@ def rand_norm(mean=0, sd=1, seed=None, size=None) -> Float64Expression: 0.347110923255205 >>> hl.eval(hl.rand_norm()) - -1.5943404792015596 + -0.9281375348070483 Parameters ---------- @@ -2517,7 +2517,7 @@ def rand_norm2d(mean=None, cov=None, seed=None) -> ArrayNumericExpression: [-1.3909495945443346, 1.2805588680053859] >>> hl.eval(hl.rand_norm2d()) - [-1.1559071720174392, -0.5130711271314501] + [0.289520302334123, -1.1108917435930954] Notes ----- @@ -2579,7 +2579,7 @@ def rand_pois(lamb, seed=None) -> Float64Expression: 4.0 >>> hl.eval(hl.rand_pois(1)) - 1.0 + 4.0 Parameters ---------- @@ -2608,10 +2608,10 @@ def rand_unif(lower=0.0, upper=1.0, seed=None, size=None) -> Float64Expression: 0.9828239225846387 >>> hl.eval(hl.rand_unif(0, 1)) - 0.25250989678083696 + 0.49094525115847415 >>> hl.eval(hl.rand_unif(0, 1)) - 0.25129917581034783 + 0.3972543766997359 Parameters ---------- @@ -2649,10 +2649,10 @@ def rand_int32(a, b=None, *, seed=None) -> Int32Expression: 9 >>> hl.eval(hl.rand_int32(10, 15)) - 13 + 14 >>> hl.eval(hl.rand_int32(10, 15)) - 10 + 12 Parameters ---------- @@ -2690,10 +2690,10 @@ def rand_int64(a=None, b=None, *, seed=None) -> Int64Expression: 9 >>> hl.eval(hl.rand_int64(1 << 33, 1 << 35)) - 27844105735 + 33089740109 >>> hl.eval(hl.rand_int64(1 << 33, 1 << 35)) - 27585770553 + 18195458570 Parameters ---------- @@ -2741,7 +2741,7 @@ def rand_beta(a, b, lower=None, upper=None, seed=None) -> Float64Expression: 0.30607924177641355 >>> hl.eval(hl.rand_beta(2, 5)) - 0.2600750641666824 + 0.1103872607301062 Parameters ---------- @@ -2784,7 +2784,7 @@ def rand_gamma(shape, scale, seed=None) -> Float64Expression: 3.115449479063202 >>> hl.eval(hl.rand_gamma(1, 1)) - 0.4119557259345351 + 3.077698059931638 Parameters ---------- @@ -2824,7 +2824,7 @@ def rand_cat(prob, seed=None) -> Int32Expression: 2 >>> hl.eval(hl.rand_cat([0, 1.7, 2])) - 1 + 2 Parameters ---------- @@ -2853,7 +2853,7 @@ def rand_dirichlet(a, seed=None) -> ArrayExpression: [0.6987619676833735, 0.287566556865261, 0.013671475451365567] >>> hl.eval(hl.rand_dirichlet([1, 1, 1])) - [0.007675161191324316, 0.7390117338502963, 0.25331310495837933] + [0.16299928555608242, 0.04393664153526524, 0.7930640729086523] Parameters ---------- diff --git a/hail/python/hail/matrixtable.py b/hail/python/hail/matrixtable.py index 36a1bc92709..895bbd6c0bb 100644 --- a/hail/python/hail/matrixtable.py +++ b/hail/python/hail/matrixtable.py @@ -1991,7 +1991,7 @@ def aggregate_rows(self, expr, _localize=True) -> Any: >>> dataset.aggregate_rows(hl.struct(n_high_quality=hl.agg.count_where(dataset.qual > 40), ... mean_qual=hl.agg.mean(dataset.qual))) - Struct(n_high_quality=8, mean_qual=157103.15875) + Struct(n_high_quality=9, mean_qual=140054.73333333334) Notes ----- @@ -2091,7 +2091,7 @@ def aggregate_entries(self, expr, _localize=True): >>> dataset.aggregate_entries(hl.struct(global_gq_mean=hl.agg.mean(dataset.GQ), ... call_rate=hl.agg.fraction(hl.is_defined(dataset.GT)))) - Struct(global_gq_mean=69.41687657430731, call_rate=0.9925) + Struct(global_gq_mean=69.60514541387025, call_rate=0.9933333333333333) Notes ----- diff --git a/hail/python/hail/methods/statgen.py b/hail/python/hail/methods/statgen.py index f440d7d7113..b0a49023d2e 100644 --- a/hail/python/hail/methods/statgen.py +++ b/hail/python/hail/methods/statgen.py @@ -2640,11 +2640,11 @@ def balding_nichols_model(n_populations: int, +---------------+------------+------+------+------+------+------+ | locus | array | call | call | call | call | call | +---------------+------------+------+------+------+------+------+ - | 1:1 | ["A","C"] | 0|0 | 0|0 | 0|1 | 0|0 | 0|0 | - | 1:2 | ["A","C"] | 0|1 | 1|1 | 0|1 | 1|1 | 0|1 | - | 1:3 | ["A","C"] | 1|1 | 1|0 | 0|0 | 0|1 | 0|0 | - | 1:4 | ["A","C"] | 1|1 | 1|0 | 1|1 | 1|1 | 1|1 | - | 1:5 | ["A","C"] | 1|1 | 1|1 | 1|1 | 1|1 | 1|1 | + | 1:1 | ["A","C"] | 0|0 | 0|0 | 0|1 | 0|1 | 1|0 | + | 1:2 | ["A","C"] | 1|1 | 0|1 | 0|0 | 0|0 | 0|1 | + | 1:3 | ["A","C"] | 0|0 | 0|0 | 1|0 | 1|0 | 0|0 | + | 1:4 | ["A","C"] | 1|1 | 1|1 | 1|0 | 0|1 | 0|1 | + | 1:5 | ["A","C"] | 1|1 | 0|1 | 0|1 | 1|0 | 1|1 | +---------------+------------+------+------+------+------+------+ showing top 5 rows showing the first 5 of 100 columns diff --git a/hail/python/test/hail/conftest.py b/hail/python/test/hail/conftest.py index ed8f1ebce44..b6c5ab84552 100644 --- a/hail/python/test/hail/conftest.py +++ b/hail/python/test/hail/conftest.py @@ -4,7 +4,7 @@ import pytest -from hail import current_backend, init +from hail import current_backend, init, reset_global_randomness from hail.backend.service_backend import ServiceBackend from .helpers import startTestHailContext, stopTestHailContext @@ -41,6 +41,11 @@ def init_hail(): stopTestHailContext() +@pytest.fixture(autouse=True) +def reset_randomness(init): + reset_global_randomness() + + @pytest.fixture(autouse=True) def set_query_name(init_hail, request): backend = current_backend() diff --git a/hail/python/test/hail/expr/test_expr.py b/hail/python/test/hail/expr/test_expr.py index 60cd8449d47..33c6a3a3417 100644 --- a/hail/python/test/hail/expr/test_expr.py +++ b/hail/python/test/hail/expr/test_expr.py @@ -2804,7 +2804,6 @@ def test_show_expression(self): +---------+ ''' - @fails_service_backend() def test_export_genetic_data(self): mt = hl.balding_nichols_model(1, 3, 3) mt = mt.key_cols_by(s = 's' + hl.str(mt.sample_idx)) diff --git a/hail/python/test/hail/helpers.py b/hail/python/test/hail/helpers.py index cdf6c4f0ebc..a0f448c3a02 100644 --- a/hail/python/test/hail/helpers.py +++ b/hail/python/test/hail/helpers.py @@ -7,22 +7,17 @@ from hail.utils.java import Env, choose_backend import hail as hl -_initialized = False - def startTestHailContext(): - global _initialized - if not _initialized: - backend_name = choose_backend() - if backend_name == 'spark': - hl.init(master='local[2]', min_block_size=0, quiet=True, global_seed=0) - else: - hl.init(global_seed=0) - _initialized = True + backend_name = choose_backend() + if backend_name == 'spark': + hl.init(master='local[2]', min_block_size=0, quiet=True, global_seed=0) + else: + hl.init(global_seed=0) def stopTestHailContext(): - pass + hl.stop() _test_dir = os.environ.get('HAIL_TEST_RESOURCES_DIR', '../src/test/resources') _doctest_dir = os.environ.get('HAIL_DOCTEST_DATA_DIR', 'hail/docs/data') diff --git a/hail/python/test/hail/methods/test_statgen.py b/hail/python/test/hail/methods/test_statgen.py index 4cc43500e8a..8b5ec815bbd 100644 --- a/hail/python/test/hail/methods/test_statgen.py +++ b/hail/python/test/hail/methods/test_statgen.py @@ -1274,10 +1274,8 @@ def test_poisson_pass_through(self): assert mt.aggregate_rows(hl.agg.all(mt.foo.bar == ht[mt.row_key].bar)) - @fails_service_backend() def test_genetic_relatedness_matrix(self): n, m = 100, 200 - hl.reset_global_randomness() mt = hl.balding_nichols_model(3, n, m, fst=[.9, .9, .9], n_partitions=4) g = BlockMatrix.from_entry_expr(mt.GT.n_alt_alleles()).to_numpy().T @@ -1307,7 +1305,6 @@ def _filter_and_standardize_cols(a): col_filter = col_lengths > 0 return np.copy(a[:, np.squeeze(col_filter)] / col_lengths[col_filter]) - @fails_service_backend() def test_realized_relationship_matrix(self): n, m = 100, 200 hl.reset_global_randomness() @@ -1348,7 +1345,6 @@ def test_row_correlation_vs_hardcode(self): def test_row_correlation_vs_numpy(self): n, m = 11, 10 - hl.reset_global_randomness() mt = hl.balding_nichols_model(3, n, m, fst=[.9, .9, .9], n_partitions=2) mt = mt.annotate_rows(sd=agg.stats(mt.GT.n_alt_alleles()).stdev) mt = mt.filter_rows(mt.sd > 1e-30) @@ -1630,7 +1626,6 @@ def variance(expr): test_stat(40, 400, 20, 12) def test_balding_nichols_model_phased(self): - hl.reset_global_randomness() bn_ds = hl.balding_nichols_model(1, 5, 5, phased=True) assert bn_ds.aggregate_entries(hl.agg.all(bn_ds.GT.phased)) == True actual = bn_ds.GT.collect()