From 20fc42f36bbd339f83c30b00081c88e238968ee7 Mon Sep 17 00:00:00 2001 From: Dan King Date: Mon, 23 Jan 2023 12:49:49 -0500 Subject: [PATCH] [query] add hl.pgenchisq (#12605) * [query] add hl.pgenchisq CHANGELOG: Add `hl.pgenchisq` the cumulative distribution function of the generalized chi-squared distribution. The [Generalized Chi-Squared Distribution](https://en.wikipedia.org/wiki/Generalized_chi-squared_distribution) arises from weighted sums of sums of squares of independent normally distributed variables and is used by `hl.skat` to generate p-values. The simplest formulation I know for it is this: w : R^n k : Z^n lam : R^n mu : R sigma : R x ~ N(mu, sigma^2) y_i ~ NonCentralChiSquared(k_i, lam_i) Z = x + w y^T = x + sum_i{ w_i y_i } Z ~ GeneralizedNonCentralChiSquared(w, k, lam, mu, sigma) The non-central chi-squared distribution arises from a sum of independent normally distributed variables with non-zero mean and unit variance. The non-centrality parameter, lambda, is defined as the sum of the squares of the means of each component normal random variable. Although the non-central chi-squared distribution has a closed form implementation (indeed, Hail implements this CDF: `hl.pchisqtail`), the generalized chi-squared distribution does not have a closed form. There are at least four distinct algorithms for evaluating the CDF. To my knowledge, the oldest one is by Robert Davies: Davies, Robert. "The distribution of a linear combination of chi-squared random variables." Applied Statistics 29 323-333. 1980. The [original publication](http://www.robertnz.net/pdf/lc_chisq.pdf) includes a Fortran implementation in the publication. Davies' [website](http://www.robertnz.net/QF.htm) also includes a C version. Hail includes a copy of the C version as `davies.cpp`. I suspect this code contains undefined behavior. Moreover, it is not supported on Apple M1 machines because we don't ship binaries for that platform. It seemed to me that the simplest solution is to port this algorithm to Scala. This PR is that port. I tested against the 39 test cases provided Davies with the source code. I also added some doctests based on the CDF plots from Wikipedia. The same 39 test cases are tested in Scala and in Python. I am open to suggestions for the name. `pgenchisq` seems to strike a balance between clarity and brevity. I believe this is the first CDF which can fail to converge. I included some relevant debugging information. I think we should standardize on a schema, but I need more examples before I am certain of the right standard. I am open to critique of `GeneralizedChiSquaredDistribution.scala` but I will strongly argue against significant refactoring. I worry that we will subtly break this algorithm. I directly reached out to Robert Davies to clarify the licensing of this algorithm. It appears to have been released at least under both GPL2 and MIT by unaffiliated third parties (who, really, have no right to apply a license to it). Do not remove WIP until I resolve this. With this PR in place, `hl.skat` can be implemented entirely in Python. * clarify license --- hail/python/hail/docs/functions/stats.rst | 2 + hail/python/hail/expr/__init__.py | 3 +- hail/python/hail/expr/functions.py | 157 +++++ hail/python/test/hail/expr/test_functions.py | 29 + .../is/hail/expr/ir/functions/Functions.scala | 12 + .../expr/ir/functions/MathFunctions.scala | 50 +- .../GeneralizedChiSquaredDistribution.scala | 622 ++++++++++++++++++ .../main/scala/is/hail/stats/package.scala | 4 + .../main/scala/is/hail/utils/package.scala | 5 + .../test/resources/davies-genchisq-tests.tsv | 40 ++ ...neralizedChiSquaredDistributionSuite.scala | 622 ++++++++++++++++++ 11 files changed, 1544 insertions(+), 2 deletions(-) create mode 100644 hail/src/main/scala/is/hail/stats/GeneralizedChiSquaredDistribution.scala create mode 100644 hail/src/test/resources/davies-genchisq-tests.tsv create mode 100644 hail/src/test/scala/is/hail/stats/GeneralizedChiSquaredDistributionSuite.scala diff --git a/hail/python/hail/docs/functions/stats.rst b/hail/python/hail/docs/functions/stats.rst index 1b363a898d5..44f8e059b52 100644 --- a/hail/python/hail/docs/functions/stats.rst +++ b/hail/python/hail/docs/functions/stats.rst @@ -14,6 +14,7 @@ Statistical functions hardy_weinberg_test binom_test pchisqtail + pgenchisq pnorm pT pF @@ -32,6 +33,7 @@ Statistical functions .. autofunction:: hardy_weinberg_test .. autofunction:: binom_test .. autofunction:: pchisqtail +.. autofunction:: pgenchisq .. autofunction:: pnorm .. autofunction:: pT .. autofunction:: pF diff --git a/hail/python/hail/expr/__init__.py b/hail/python/hail/expr/__init__.py index 3a00ad061a5..10a035048e8 100644 --- a/hail/python/hail/expr/__init__.py +++ b/hail/python/hail/expr/__init__.py @@ -21,7 +21,7 @@ hardy_weinberg_test, parse_locus, parse_variant, variant_str, locus, locus_from_global_position, interval, locus_interval, parse_locus_interval, call, is_defined, is_missing, is_nan, is_finite, is_infinite, json, parse_json, log, log10, null, missing, or_else, coalesce, or_missing, - binom_test, pchisqtail, pl_dosage, pl_to_gp, pnorm, pT, pF, ppois, qchisqtail, qnorm, qpois, + binom_test, pchisqtail, pgenchisq, pl_dosage, pl_to_gp, pnorm, pT, pF, ppois, qchisqtail, qnorm, qpois, range, _stream_range, zeros, rand_bool, rand_norm, rand_norm2d, rand_pois, rand_unif, rand_int32, rand_int64, rand_beta, rand_gamma, rand_cat, rand_dirichlet, sqrt, corr, str, is_snp, is_mnp, is_transition, is_transversion, is_insertion, is_deletion, is_indel, is_star, is_complex, is_strand_ambiguous, @@ -124,6 +124,7 @@ 'or_missing', 'binom_test', 'pchisqtail', + 'pgenchisq', 'pl_dosage', 'pl_to_gp', 'pnorm', diff --git a/hail/python/hail/expr/functions.py b/hail/python/hail/expr/functions.py index ff773c9763c..3754fcce6c3 100644 --- a/hail/python/hail/expr/functions.py +++ b/hail/python/hail/expr/functions.py @@ -2046,6 +2046,7 @@ def pchisqtail(x, df, ncp=None, lower_tail=False, log_p=False) -> Float64Express Parameters ---------- x : float or :class:`.Expression` of type :py:data:`.tfloat64` + The value at which to evaluate the CDF. df : float or :class:`.Expression` of type :py:data:`.tfloat64` Degrees of freedom. ncp: float or :class:`.Expression` of type :py:data:`.tfloat64` @@ -2066,6 +2067,162 @@ def pchisqtail(x, df, ncp=None, lower_tail=False, log_p=False) -> Float64Express return _func("pnchisqtail", tfloat64, x, df, ncp, lower_tail, log_p) +PGENCHISQ_RETURN_TYPE = tstruct(value=tfloat64, n_iterations=tint32, converged=tbool, fault=tint32) + + +@typecheck(x=expr_float64, + w=expr_array(expr_float64), + k=expr_array(expr_int32), + lam=expr_array(expr_float64), + mu=expr_float64, + sigma=expr_float64, + max_iterations=nullable(expr_int32), + min_accuracy=nullable(expr_float64)) +def pgenchisq(x, w, k, lam, mu, sigma, *, max_iterations=None, min_accuracy=None) -> Float64Expression: + r"""The cumulative probability function of a `generalized chi-squared distribution + `__. + + The generalized chi-squared distribution has many interpretations. We share here four + interpretations of the values of this distribution: + + 1. A linear combination of normal variables and squares of normal variables. + + 2. A weighted sum of sums of squares of normally distributed values plus a normally distributed + value. + + 3. A weighted sum of chi-squared distributed values plus a normally distributed value. + + 4. A `"quadratic form" `__ in a vector + of uncorrelated `standard normal + `__ values. + + The parameters of this function correspond to the parameters of the third interpretation. + + .. math:: + + \begin{aligned} + w &: R^n \quad k : Z^n \quad lam : R^n \quad mu : R \quad sigma : R \\ + \\ + x &\sim N(mu, sigma^2) \\ + y_i &\sim \mathrm{NonCentralChiSquared}(k_i, lam_i) \\ + \\ + Z &= x + w y^T \\ + &= x + \sum_i w_i y_i \\ + Z &\sim \mathrm{GeneralizedNonCentralChiSquared}(w, k, lam, mu, sigma) + \end{aligned} + + The generalized chi-squared distribution often arises when working on linear models with standard + normal noise because the sum of the squares of the residuals should follow a generalized + chi-squared distribution. + + Examples + -------- + + The following plot shows three examples of the generalized chi-squared cumulative distribution + function. + + .. image:: https://upload.wikimedia.org/wikipedia/commons/thumb/c/cd/Generalized_chi-square_cumulative_distribution_function.svg/1280px-Generalized_chi-square_cumulative_distribution_function.svg.png + :alt: Plots of examples of the generalized chi-square cumulative distribution function. Created by Dvidby0. + :target: https://commons.wikimedia.org/wiki/File:Generalized_chi-square_cumulative_distribution_function.svg + :width: 640px + + The following examples are chosen from the three instances shown above. The curves appear in the + same order as the legend of the plot: blue, red, yellow. + + >>> hl.eval(hl.pgenchisq(-80, w=[1, 2], k=[1, 4], lam=[1, 1], mu=0, sigma=0).value) + 0.0 + >>> hl.eval(hl.pgenchisq(-20, w=[1, 2], k=[1, 4], lam=[1, 1], mu=0, sigma=0).value) + 0.0 + >>> hl.eval(hl.pgenchisq(10 , w=[1, 2], k=[1, 4], lam=[1, 1], mu=0, sigma=0).value) + 0.4670012373599629 + >>> hl.eval(hl.pgenchisq(40 , w=[1, 2], k=[1, 4], lam=[1, 1], mu=0, sigma=0).value) + 0.9958803111156718 + + >>> hl.eval(hl.pgenchisq(-80, w=[-2, -1], k=[5, 2], lam=[3, 1], mu=-3, sigma=0).value) + 9.227056966837344e-05 + >>> hl.eval(hl.pgenchisq(-20, w=[-2, -1], k=[5, 2], lam=[3, 1], mu=-3, sigma=0).value) + 0.516439358616939 + >>> hl.eval(hl.pgenchisq(10 , w=[-2, -1], k=[5, 2], lam=[3, 1], mu=-3, sigma=0).value) + 1.0 + >>> hl.eval(hl.pgenchisq(40 , w=[-2, -1], k=[5, 2], lam=[3, 1], mu=-3, sigma=0).value) + 1.0 + + >>> hl.eval(hl.pgenchisq(-80, w=[1, -10, 2], k=[1, 2, 3], lam=[2, 3, 7], mu=-10, sigma=0).value) + 0.14284718767288906 + >>> hl.eval(hl.pgenchisq(-20, w=[1, -10, 2], k=[1, 2, 3], lam=[2, 3, 7], mu=-10, sigma=0).value) + 0.5950150356303258 + >>> hl.eval(hl.pgenchisq(10 , w=[1, -10, 2], k=[1, 2, 3], lam=[2, 3, 7], mu=-10, sigma=0).value) + 0.923219534175858 + >>> hl.eval(hl.pgenchisq(40 , w=[1, -10, 2], k=[1, 2, 3], lam=[2, 3, 7], mu=-10, sigma=0).value) + 0.9971746768781656 + + Notes + ----- + + We follow Wikipedia's notational conventions. Some texts refer to the weight vector (our `w`) as + :math:`\lambda` or `lb` and the non-centrality vector (our `lam`) as `nc`. + + We use the Davies' algorithm which was published as: `Davies, Robert. "The distribution of a + linear combination of chi-squared random variables." Applied Statistics 29 + 323-333. 1980. `__ Davies included Fortran source code + in the original publication. Davies also released a `C language port + `__. Hail's implementation is a fairly direct port of the C + implementation to Scala. Davies provides 39 test cases with the source code. The Hail tests + include all 39 test cases as well as a few additional tests. + + Davies' website cautions: + + The method works well in most situations if you want only modest accuracy, say 0.0001. But + problems may arise if the sum is dominated by one or two terms with a total of only one or + two degrees of freedom and x is small. + + Parameters + ---------- + x : :obj:`float` or :class:`.Expression` of type :py:data:`.tfloat64` + The value at which to evaluate the cumulative distribution function (CDF). + w : :obj:`list` of :obj:`float` or :class:`.Expression` of type :py:class:`.tarray` of :py:data:`.tfloat64` + A weight for each non-central chi-square term. + k : :obj:`list` of :obj:`int` or :class:`.Expression` of type :py:class:`.tarray` of :py:data:`.tint32` + A degrees of freedom parameter for each non-central chi-square term. + lam : :obj:`list` of :obj:`float` or :class:`.Expression` of type :py:class:`.tarray` of :py:data:`.tfloat64` + A non-centrality parameter for each non-central chi-square term. We use `lam` instead + of `lambda` because the latter is a reserved word in Python. + mu : :obj:`float` or :class:`.Expression` of type :py:data:`.tfloat64` + The standard deviation of the normal term. + sigma : :obj:`float` or :class:`.Expression` of type :py:data:`.tfloat64` + The standard deviation of the normal term. + max_iterations : :obj:`int` or :class:`.Expression` of type :py:data:`.tint32` + The maximum number of iterations of the numerical integration before raising an error. + min_accuracy : :obj:`int` or :class:`.Expression` of type :py:data:`.tint32` + The minimum accuracy of the returned value. If the minimum accuracy is not achieved, this + function will raise an error. + + Returns + ------- + :class:`.StructExpression` + This method returns a structure with the value as well as information about the numerical + integration. + + - value : :class:`.Float64Expression`. If converged is true, the value of the CDF evaluated + at `x`. Otherwise, this is the last value the integration evaluated before aborting. + + - n_iterations : :class:`.Int32Expression`. The number of iterations before stopping. + + - converged : :class:`.BooleanExpression`. True if the `min_accuracy` was achieved and round + off error is not likely significant. + + - fault : :class:`.Int32Expression`. If converged is true, fault is zero. If converged is + false, fault is either one or two. One indicates that the requried accuracy was not + achieved. Two indicates the round-off error is possibly significant. + + """ + if max_iterations is None: + max_iterations = hl.literal(10_000) + if min_accuracy is None: + min_accuracy = hl.literal(0.00001) + return _func("pgenchisq", PGENCHISQ_RETURN_TYPE, x - mu, w, k, lam, sigma, max_iterations, min_accuracy) + + @typecheck(x=expr_float64, mu=expr_float64, sigma=expr_float64, lower_tail=expr_bool, log_p=expr_bool) def pnorm(x, mu=0, sigma=1, lower_tail=True, log_p=False) -> Float64Expression: """The cumulative probability function of a normal distribution with mean diff --git a/hail/python/test/hail/expr/test_functions.py b/hail/python/test/hail/expr/test_functions.py index 8ba77e7ef0b..e31c57bc305 100644 --- a/hail/python/test/hail/expr/test_functions.py +++ b/hail/python/test/hail/expr/test_functions.py @@ -1,6 +1,7 @@ import hail as hl import scipy.stats as spst import pytest +from ..helpers import resource def test_deprecated_binom_test(): @@ -35,3 +36,31 @@ def right_tail_from_scipy(x, df, ncp): def test_shuffle(): assert set(hl.eval(hl.shuffle(hl.range(5)))) == set(range(5)) + + +def test_pgenchisq(): + ht = hl.import_table( + resource('davies-genchisq-tests.tsv'), + types={ + 'c': hl.tfloat64, + 'weights': hl.tarray(hl.tfloat64), + 'k': hl.tarray(hl.tint32), + 'lam': hl.tarray(hl.tfloat64), + 'sigma': hl.tfloat64, + 'lim': hl.tint32, + 'acc': hl.tfloat64, + 'expected': hl.tfloat64, + 'expected_n_iterations': hl.tint32 + } + ) + ht = ht.add_index('line_number') + ht = ht.annotate(line_number = ht.line_number + 1) + ht = ht.annotate(genchisq_result = hl.pgenchisq( + ht.c, ht.weights, ht.k, ht.lam, 0.0, ht.sigma, max_iterations=ht.lim, min_accuracy=ht.acc + )) + tests = ht.collect() + for test in tests: + assert abs(test.genchisq_result.value - test.expected) < 0.0000005, str(test) + assert test.genchisq_result.fault == 0, str(test) + assert test.genchisq_result.converged == True, str(test) + assert test.genchisq_result.n_iterations == test.expected_n_iterations, str(test) diff --git a/hail/src/main/scala/is/hail/expr/ir/functions/Functions.scala b/hail/src/main/scala/is/hail/expr/ir/functions/Functions.scala index 9c7e93cd716..c2697f1d802 100644 --- a/hail/src/main/scala/is/hail/expr/ir/functions/Functions.scala +++ b/hail/src/main/scala/is/hail/expr/ir/functions/Functions.scala @@ -581,6 +581,18 @@ abstract class RegistryFunctions { case (r, cb, _, rt, Array(a1, a2, a3, a4, a5), errorID) => impl(r, cb, rt, a1, a2, a3, a4, a5, errorID) } + def registerSCode6(name: String, mt1: Type, mt2: Type, mt3: Type, mt4: Type, mt5: Type, mt6: Type, rt: Type, pt: (Type, SType, SType, SType, SType, SType, SType) => SType) + (impl: (EmitRegion, EmitCodeBuilder, SType, SValue, SValue, SValue, SValue, SValue, SValue, Value[Int]) => SValue): Unit = + registerSCode(name, Array(mt1, mt2, mt3, mt4, mt5, mt6), rt, unwrappedApply(pt)) { + case (r, cb, _, rt, Array(a1, a2, a3, a4, a5, a6), errorID) => impl(r, cb, rt, a1, a2, a3, a4, a5, a6, errorID) + } + + def registerSCode7(name: String, mt1: Type, mt2: Type, mt3: Type, mt4: Type, mt5: Type, mt6: Type, mt7: Type, rt: Type, pt: (Type, SType, SType, SType, SType, SType, SType, SType) => SType) + (impl: (EmitRegion, EmitCodeBuilder, SType, SValue, SValue, SValue, SValue, SValue, SValue, SValue, Value[Int]) => SValue): Unit = + registerSCode(name, Array(mt1, mt2, mt3, mt4, mt5, mt6, mt7), rt, unwrappedApply(pt)) { + case (r, cb, _, rt, Array(a1, a2, a3, a4, a5, a6, a7), errorID) => impl(r, cb, rt, a1, a2, a3, a4, a5, a6, a7, errorID) + } + def registerCode1(name: String, mt1: Type, rt: Type, pt: (Type, SType) => SType)(impl: (EmitCodeBuilder, EmitRegion, SType, SValue) => Value[_]): Unit = registerCode(name, Array(mt1), rt, unwrappedApply(pt)) { case (r, cb, rt, _, Array(a1)) => impl(cb, r, rt, a1) diff --git a/hail/src/main/scala/is/hail/expr/ir/functions/MathFunctions.scala b/hail/src/main/scala/is/hail/expr/ir/functions/MathFunctions.scala index aa541ed0c4d..03f242f94b9 100644 --- a/hail/src/main/scala/is/hail/expr/ir/functions/MathFunctions.scala +++ b/hail/src/main/scala/is/hail/expr/ir/functions/MathFunctions.scala @@ -4,9 +4,10 @@ import is.hail.asm4s.Code import is.hail.expr.ir._ import is.hail.stats._ import is.hail.types.physical.stypes._ +import is.hail.types.physical.stypes.concrete._ import is.hail.types.physical.stypes.interfaces.primitive import is.hail.types.physical.stypes.primitives._ -import is.hail.types.physical.{PBoolean, PFloat32, PFloat64, PInt32, PInt64, PType} +import is.hail.types.physical.{PCanonicalArray, PBoolean, PFloat32, PFloat64, PInt32, PInt64, PType} import is.hail.types.virtual._ import is.hail.utils._ import org.apache.commons.math3.special.Gamma @@ -186,6 +187,53 @@ object MathFunctions extends RegistryFunctions { registerScalaFunction("qnchisqtail", Array(TFloat64, TFloat64, TFloat64), TFloat64, null)(statsPackageClass, "qnchisqtail") registerScalaFunction("qnchisqtail", Array(TFloat64, TFloat64, TFloat64, TBoolean, TBoolean), TFloat64, null)(statsPackageClass, "qnchisqtail") + registerSCode7( + "pgenchisq", + TFloat64, + TArray(TFloat64), + TArray(TInt32), + TArray(TFloat64), + TFloat64, + TInt32, + TFloat64, + DaviesAlgorithm.pType.virtualType, + (_, _, _, _, _, _, _, _) => DaviesAlgorithm.pType.sType + ) { + case (r, cb, rt, + x: SFloat64Value, + _w: SIndexablePointerValue, + _k: SIndexablePointerValue, + _lam: SIndexablePointerValue, + sigma: SFloat64Value, + maxIterations: SInt32Value, + minAccuracy: SFloat64Value, + _) => + + val w = _w.castToArray(cb) + val k = _k.castToArray(cb) + val lam = _lam.castToArray(cb) + + val res = cb.newLocal[DaviesResultForPython]("pgenchisq_result", + Code.invokeScalaObject7[ + Double, IndexedSeq[Double], IndexedSeq[Int], IndexedSeq[Double], Double, Int, Double, DaviesResultForPython + ](statsPackageClass, "pgenchisq", + x.value, + Code.checkcast[IndexedSeq[Double]](svalueToJavaValue(cb, r.region, w)), + Code.checkcast[IndexedSeq[Int]](svalueToJavaValue(cb, r.region, k)), + Code.checkcast[IndexedSeq[Double]](svalueToJavaValue(cb, r.region, lam)), + sigma.value, + maxIterations.value, + minAccuracy.value) + ) + + DaviesAlgorithm.pType.constructFromFields(cb, r.region, FastIndexedSeq( + EmitValue.present(primitive(cb.memoize(res.invoke[Double]("value")))), + EmitValue.present(primitive(cb.memoize(res.invoke[Int]("nIterations")))), + EmitValue.present(primitive(cb.memoize(res.invoke[Boolean]("converged")))), + EmitValue.present(primitive(cb.memoize(res.invoke[Int]("fault")))) + ), deepCopy = false) + } + registerScalaFunction("floor", Array(TFloat32), TFloat32, null)(thisClass, "floor") registerScalaFunction("floor", Array(TFloat64), TFloat64, null)(thisClass, "floor") diff --git a/hail/src/main/scala/is/hail/stats/GeneralizedChiSquaredDistribution.scala b/hail/src/main/scala/is/hail/stats/GeneralizedChiSquaredDistribution.scala new file mode 100644 index 00000000000..ee889e4b740 --- /dev/null +++ b/hail/src/main/scala/is/hail/stats/GeneralizedChiSquaredDistribution.scala @@ -0,0 +1,622 @@ +package is.hail.stats + +import is.hail.utils._ +import is.hail.types.physical._ + +case class DaviesAlgorithmTrace( + var absoluteSum: Double, + var totalNumberOfIntegrationTerms: Int, + var numberOfIntegrations: Int, + var integrationIntervalInFinalIntegration: Double, + var truncationPointInInitialIntegration: Double, + var standardDeviationOfInitialConvergenceFactor: Double, + var cyclesToLocateIntegrationParameters: Int +) + +class DaviesResultForPython( + val value: Double, + val nIterations: Int, + val converged: Boolean, + val fault: Int +) + +object DaviesAlgorithm { + private val pi = 3.14159265358979 + private val log28 = 0.0866 + private val divisForFindu = Array[Double](2.0, 1.4, 1.2, 1.1) + private val rats = Array[Int](1, 2, 4, 8); + + val pType = PCanonicalStruct( + "value" -> PFloat64(required = true), + "n_iterations" -> PInt32(required = true), + "converged" -> PBoolean(required = true), + "fault" -> PInt32(required = true) + ) +} + +class DaviesAlgorithm( + private[this] val c: Double, + private[this] val n: Array[Int], + private[this] val lb: Array[Double], + private[this] val nc: Array[Double], + private[this] val lim: Int, + private[this] val sigma: Double +) { + /** + * This algorithm is a direct port of Robert Davies' algorithm described in + * + * Davies, Robert. "The distribution of a linear combination of chi-squared + * random variables." Applied Statistics 29 323-333. 1980. + * + * The Fortran code was published with the aforementioned paper. A port to C is available on + * Davies' website http://www.robertnz.net/download.html . At the time of retrieval (2023-01-15), + * the code lacks a description of its license. On 2023-01-18 0304 ET I received personal e-mail + * correspondence from Robert Davies indicating: + * + * Assume it has the MIT license. That is on my todo list to say the MIT license applies to + * all the software on the website unless specified otherwise. + * + **/ + import GeneralizedChiSquaredDistribution._ + import DaviesAlgorithm._ + + private[this] val r: Int = lb.length + private[this] var count: Int = 0 + private[this] var ndtsrt: Boolean = true // "need to sort" + private[this] var fail: Boolean = true + private[this] var th: Array[Int] = new Array[Int](r) + private[this] var intl: Double = 0.0 + private[this] var ersm: Double = 0.0 + private[this] var sigsq: Double = square(sigma) + private[this] var lmax: Double = 0.0 + private[this] var lmin: Double = 0.0 + private[this] var mean: Double = 0.0 + + private class DaviesException() extends RuntimeException {} + + private[this] def counter(): Unit = { + count += 1 + if (count > lim) { + throw new DaviesException() + } + } + + def order(): Unit = { // "sort the th array", th appears to be a list of indices into lb + var j = 0 + while (j < r) { + val lj = Math.abs(lb(j)) + var k = j - 1 + var break: Boolean = false + while (k >= 0 && !break) { + if (lj > Math.abs(lb(th(k)))) { + th(k + 1) = th(k) + k -= 1 + } else { + break = true + } + } + if (!break) { + assert(k == -1) + } + th(k + 1) = j + j += 1 + } + ndtsrt = false + } + + def errbd(_u: Double): (Double, Double) = { + var u = _u + counter() + var xconst = u * sigsq + var sum1 = u * xconst + u = 2.0 * u + var j = r - 1 + while (j >= 0) { + val nj = n(j) + val lj = lb(j) + val ncj = nc(j) + val x = u * lj + val y = 1.0 - x + xconst = xconst + lj * (ncj / y + nj) / y + sum1 = (sum1 + + ncj * square(x / y) + + nj * (square(x) / y + log1(-x, false)) + ) + + j -= 1 + } + + (exp1(-0.5 * sum1), xconst) + } + + def ctff(accx: Double, _u2: Double): (Double, Double) = { + var u2 = _u2 + var u1 = 0.0 + var c1 = mean + val rb = 2.0 * (if (u2 > 0.0) { lmax } else { lmin }) + + var u = u2 / (1.0 + u2 * rb) + + val errc2 = errbd(u) + var err = errc2._1 + var c2 = errc2._2 + while (err > accx) { + u1 = u2 + c1 = c2 + u2 = 2.0 * u2 + + u = u2 / (1.0 + u2 * rb) + + val errc2 = errbd(u) + err = errc2._1 + c2 = errc2._2 + } + + u = (c1 - mean) / (c2 - mean) + while (u < 0.9) { + u = (u1 + u2) / 2.0 + + val errxconst = errbd(u / (1.0 + u * rb)) + err = errxconst._1 + val xconst = errxconst._2 + if (err > accx) { + u1 = u + c1 = xconst + } else { + u2 = u + c2 = xconst + } + + u = (c1 - mean) / (c2 - mean) + } + + (c2, u2) + } + + def truncation(_u: Double, _tausq: Double): Double = { + counter() + var u = _u + var tausq = _tausq + + var sum1 = 0.0 + var prod2 = 0.0 + var prod3 = 0.0 + var s = 0 + var sum2 = (sigsq + tausq) * square(u) + var prod1 = 2.0 * sum2 + u = 2.0 * u + + var j = 0 + while (j < r) { + val lj = lb(j) + val ncj = nc(j) + val nj = n(j) + + val x = square(u * lj) + sum1 = sum1 + ncj * x / (1.0 + x) + if (x > 1.0) { + prod2 = prod2 + nj * Math.log(x) + prod3 = prod3 + nj * log1(x, true) + s = s + nj + } else { + prod1 = prod1 + nj * log1(x, true) + } + + j += 1 + } + + sum1 = 0.5 * sum1 + prod2 = prod1 + prod2 + prod3 = prod1 + prod3 + var x = exp1(-sum1 - 0.25 * prod2) / pi + val y = exp1(-sum1 - 0.25 * prod3) / pi + + var err1 = if (s == 0) { + 1.0 + } else { + x * 2.0 / s + } + + var err2 = if (prod3 > 1.0) { + 2.5 * y + } else { + 1.0 + } + + if (err2 < err1) { + err1 = err2 + } + + x = 0.5 * sum2 + + if (x <= y) { + err2 = 1.0 + } else { + err2 = y / x + } + + if (err1 < err2) { + err1 + } else { + err2 + } + } + + def findu(_ut: Double, accx: Double): Double = { + var ut = _ut + var u = ut / 4.0 + if (truncation(u, 0.0) > accx) { + u = ut + while (truncation(u, 0.0) > accx) { + ut = ut * 4.0 + u = ut + } + } else { + ut = u + u = u / 4.0 + while (truncation(u, 0.0) <= accx) { + ut = u + u = u / 4.0 + } + } + var i = 0 + while (i < 4) { + u = ut / divisForFindu(i) + + if ( truncation(u, 0.0) <= accx ) { + ut = u + } + + i += 1 + } + ut + } + + def integrate(nterm: Int, interv: Double, tausq: Double, mainx: Boolean): Unit = { + val inpi = interv / pi + + var k = nterm + while (k >= 0) { + val u = (k + 0.5) * interv + var sum1 = - 2.0 * u * c + var sum2 = Math.abs(sum1) + var sum3 = - 0.5 * sigsq * square(u) + + var j = r - 1 + while (j >= 0) { + val nj = n(j) + val x = 2.0 * lb(j) * u + var y = square(x) + + sum3 = sum3 - 0.25 * nj * log1(y, true) + y = nc(j) * x / (1.0 + y) + val z = nj * Math.atan(x) + y + sum1 = sum1 + z + sum2 = sum2 + Math.abs(z) + sum3 = sum3 - 0.5 * x * y + + j -= 1 + } + + var x = inpi * exp1(sum3) / u + if (!mainx) { + x = x * (1.0 - exp1(-0.5 * tausq * square(u))) + } + sum1 = Math.sin(0.5 * sum1) * x + sum2 = 0.5 * sum2 * x + intl = intl + sum1 + ersm = ersm + sum2 + + k -= 1 + } + } + + + def cfe(x: Double): Double = { + counter(); + if (ndtsrt) { + order() + } + var axl = Math.abs(x) + val sxl = if (x > 0.0) { 1.0 } else { -1.0 } + var sum1 = 0.0; + var j = r - 1 + var break = false + while (j >= 0 && !break) { + val t = th(j); + if (lb(t) * sxl > 0.0) { + val lj = Math.abs(lb(t)); + val axl1 = axl - lj * (n(t) + nc(t)) + val axl2 = lj / log28 + if (axl1 > axl2) { + axl = axl1 + } else { + if (axl > axl2) { + axl = axl2 + } + sum1 = (axl - axl1) / lj + + var k = j - 1 + while (k >= 0) { + sum1 = sum1 + (n(th(k)) + nc(th(k))) + k -= 1 + } + break = true + } + } + + j -= 1 + } + + if (sum1 > 100.0) { + fail = true // FIXME: we can return the fail parameter instead + 1.0 + } else { + Math.pow(2.0, (sum1 / 4.0)) / (pi * square(axl)) + } + } + + def cdf(acc: Double): (Double, DaviesAlgorithmTrace, Int) = { + var acc1 = acc + val trace = DaviesAlgorithmTrace(0.0, 0, 0, 0.0, 0.0, 0.0, 0) + var ifault = 0 + var qfval = -1.0 + try { + ndtsrt = true + fail = false + var xlim = lim.toDouble + + /* find mean, sd, max and min of lb, check that parameter values are valid */ + var sd = sigsq + + var j = 0 + while (j < r) { + val nj = n(j) + val lj = lb(j) + val ncj = nc(j) + if (nj < 0) { + throw new HailException(s"Degrees of freedom parameters must all be positive, ${j}'th parameter is ${nj}.") + } + if (ncj < 0.0) { + throw new HailException(s"Non-centrality parameters must all be positive, ${j}'th parameter is ${ncj}.") + } + sd = sd + square(lj) * (2 * nj + 4.0 * ncj) + mean = mean + lj * (nj + ncj) + if (lmax < lj) { + lmax = lj + } else if (lmin > lj) { + lmin = lj + } + + j += 1 + } + + if (sd == 0.0) { + if (c > 0.0) { + qfval = 1.0 + } else { + qfval = 0.0 + } + throw new DaviesException() + } + + if (lmin == 0.0 && lmax == 0.0 && sigma == 0.0) { + val lbStr = lb.mkString("(", ",", ")") + throw new HailException(s"Either weights vector must be non-zero or sigma must be non-zero, found: ${lbStr} and ${sigma}.") + } + + sd = Math.sqrt(sd) + + val almx = if (lmax < -lmin) { + -lmin + } else { + lmax + } + + /* starting values for findu, ctff */ + var utx = 16.0 / sd + var up = 4.5 / sd + var un = -up + /* truncation point with no convergence factor */ + utx = findu(utx, .5 * acc1) + /* does convergence factor help */ + if (c != 0.0 && (almx > 0.07 * sd)) { + // FIXME: return the fail parameter + val tausq = .25 * acc1 / cfe(c) + if (fail) { + fail = false + } else if (truncation(utx, tausq) < .2 * acc1) { + sigsq = sigsq + tausq + utx = findu(utx, .25 * acc1) + trace.standardDeviationOfInitialConvergenceFactor = Math.sqrt(tausq) + } + } + trace.truncationPointInInitialIntegration = utx + acc1 = 0.5 * acc1 + + /* find RANGE of distribution, quit if outside this */ + var intv = 0.0 + var xnt = 0.0 + var stopL1 = false + while (!stopL1) { + val (c2, u2) = ctff(acc1, up) + up = u2 + val d1 = c2 - c + + if (d1 < 0.0) { + qfval = 1.0 + throw new DaviesException() + } + val (_c2, _u2) = ctff(acc1, un) + un = _u2 + val d2 = c - _c2 + if (d2 < 0.0) { + qfval = 0.0 + throw new DaviesException() + } + /* find integration interval */ + val divisor = if (d1 > d2) { d1 } else { d2 } + intv = 2.0 * pi / divisor + /* calculate number of terms required for main and + auxillary integrations */ + xnt = utx / intv + val xntm = 3.0 / Math.sqrt(acc1) + if (xnt > xntm * 1.5) { + /* parameters for auxillary integration */ + if (xntm > xlim) { + ifault = 1 + throw new DaviesException() + } + val ntm = Math.floor(xntm + 0.5).toInt + val intv1 = utx / ntm + val x = 2.0 * pi / intv1 + if (x <= Math.abs(c)) { + stopL1 = true + } else { + /* calculate convergence factor */ + val tausq = .33 * acc1 / (1.1 * (cfe(c - x) + cfe(c + x))) + if (fail) { + stopL1 = true + } else { + acc1 = .67 * acc1 + /* auxillary integration */ + integrate(ntm, intv1, tausq, false) + xlim = xlim - xntm + sigsq = sigsq + tausq + trace.numberOfIntegrations += 1 + trace.totalNumberOfIntegrationTerms += ntm + 1 + /* find truncation point with new convergence factor */ + utx = findu(utx, .25 * acc1) + acc1 = 0.75 * acc1 + } + } + } else { + stopL1 = true + } + } + + /* main integration */ + trace.integrationIntervalInFinalIntegration = intv + if (xnt > xlim) { + ifault = 1 + throw new DaviesException() + } + val nt = Math.floor(xnt + 0.5).toInt; + integrate(nt, intv, 0.0, true); + trace.numberOfIntegrations += 1 + trace.totalNumberOfIntegrationTerms += nt + 1 + qfval = 0.5 - intl + trace.absoluteSum = ersm + + /* test whether round-off error could be significant + allow for radix 8 or 16 machines */ + up = ersm + val x = up + acc / 10.0 + j = 0 + while (j < 4) { + if (rats(j) * x == rats(j) * up) { + ifault = 2 + } + + j += 1 + } + } catch { + case _: DaviesException => + } + + trace.cyclesToLocateIntegrationParameters = count + (qfval, trace, ifault) + } +} + +object GeneralizedChiSquaredDistribution { + def exp1(x: Double): Double = { + if (x < -50.0) { + 0.0 + } else { + Math.exp(x) + } + } + + def square(x: Double): Double = x * x + + def cube(x: Double): Double = x * x * x + + def log1(x: Double, first: Boolean): Double = { + if (Math.abs(x) > 0.1) { + if (first) { + Math.log(1.0 + x) + } else { + Math.log(1.0 + x) - x + } + } else { + val y = x / (2.0 + x) + var term = 2.0 * cube(y) + var k = 3.0 + var s = if (first) { + 2.0 * y + } else { + -x * y + } + val yy = square(y) + var s1 = s + term / k + while (s1 != s) { + k = k + 2.0 + term = term * yy + s = s1 + s1 = s + term / k + } + s + } + } + + def cdf( + c: Double, + n: Array[Int], + lb: Array[Double], + nc: Array[Double], + sigma: Double, + lim: Int, + acc: Double + ): Double = { + assert(n.length == lb.length) + assert(lb.length == nc.length) + assert(lim >= 0) + assert(acc >= 0) + + val (value, trace, fault) = new DaviesAlgorithm(c, n, lb, nc, lim, sigma).cdf(acc) + + assert(fault >= 0 && fault <= 2, fault) + + if (fault == 1) { + throw new RuntimeException(s"Required accuracy ($acc) not achieved. Best value found was: $value.") + } + + if (fault == 2) { + throw new RuntimeException(s"Round-off error is possibly significant. Best value found was: $value.") + } + + value + } + + def cdfReturnExceptions( + c: Double, + n: Array[Int], + lb: Array[Double], + nc: Array[Double], + sigma: Double, + lim: Int, + acc: Double + ): DaviesResultForPython = { + assert(n.length == lb.length) + assert(lb.length == nc.length) + assert(lim >= 0) + assert(acc >= 0) + + val (value, trace, fault) = new DaviesAlgorithm(c, n, lb, nc, lim, sigma).cdf(acc) + + assert(fault >= 0 && fault <= 2, fault) + + new DaviesResultForPython(value, trace.numberOfIntegrations, fault == 0, fault) + } +} diff --git a/hail/src/main/scala/is/hail/stats/package.scala b/hail/src/main/scala/is/hail/stats/package.scala index aa86d2360b7..44c769421ce 100644 --- a/hail/src/main/scala/is/hail/stats/package.scala +++ b/hail/src/main/scala/is/hail/stats/package.scala @@ -367,6 +367,10 @@ package object stats { def qnchisqtail(p: Double, df: Double, ncp: Double): Double = qnchisqtail(p, df, ncp, lowerTail = false, logP = false) + def pgenchisq(x: Double, w: IndexedSeq[Double], k: IndexedSeq[Int], lam: IndexedSeq[Double], sigma: Double, lim: Int, acc: Double): DaviesResultForPython = { + GeneralizedChiSquaredDistribution.cdfReturnExceptions(x, k.toArray, w.toArray, lam.toArray, sigma, lim, acc) + } + def dbeta(x: Double, a: Double, b: Double): Double = Beta.density(x, a, b, false) def dpois(x: Double, lambda: Double, logP: Boolean): Double = new Poisson(lambda).density(x, logP) diff --git a/hail/src/main/scala/is/hail/utils/package.scala b/hail/src/main/scala/is/hail/utils/package.scala index 3ef4b3c1988..10a45004988 100644 --- a/hail/src/main/scala/is/hail/utils/package.scala +++ b/hail/src/main/scala/is/hail/utils/package.scala @@ -842,6 +842,11 @@ package object utils extends Logging f(s, arg1, arg2, arg3, arg4, arg5, arg6) } + def unwrappedApply[U, T](f: (U, T, T, T, T, T, T, T) => T): (U, Seq[T]) => T = if (f == null) null else { (s, ts) => + val Seq(arg1, arg2, arg3, arg4, arg5, arg6, arg7) = ts + f(s, arg1, arg2, arg3, arg4, arg5, arg6, arg7) + } + def drainInputStreamToOutputStream( is: InputStream, os: OutputStream diff --git a/hail/src/test/resources/davies-genchisq-tests.tsv b/hail/src/test/resources/davies-genchisq-tests.tsv new file mode 100644 index 00000000000..eef96ff6b71 --- /dev/null +++ b/hail/src/test/resources/davies-genchisq-tests.tsv @@ -0,0 +1,40 @@ +c weights k lam sigma lim acc expected expected_n_iterations +1.000000 [6.0,3.0,1.0] [1,1,1] [0.0,0.0,0.0] 0.000000 1000 0.000100 0.054213 2 +7.000000 [6.0,3.0,1.0] [1,1,1] [0.0,0.0,0.0] 0.000000 1000 0.000100 0.493555 2 +20.000000 [6.0,3.0,1.0] [1,1,1] [0.0,0.0,0.0] 0.000000 1000 0.000100 0.876027 1 +2.000000 [6.0,3.0,1.0] [2,2,2] [0.0,0.0,0.0] 0.000000 1000 0.000100 0.006435 1 +20.000000 [6.0,3.0,1.0] [2,2,2] [0.0,0.0,0.0] 0.000000 1000 0.000100 0.600208 1 +60.000000 [6.0,3.0,1.0] [2,2,2] [0.0,0.0,0.0] 0.000000 1000 0.000100 0.983897 1 +10.000000 [6.0,3.0,1.0] [6,4,2] [0.0,0.0,0.0] 0.000000 1000 0.000100 0.002697 1 +50.000000 [6.0,3.0,1.0] [6,4,2] [0.0,0.0,0.0] 0.000000 1000 0.000100 0.564753 1 +120.000000 [6.0,3.0,1.0] [6,4,2] [0.0,0.0,0.0] 0.000000 1000 0.000100 0.991229 1 +10.000000 [6.0,3.0,1.0] [2,4,6] [0.0,0.0,0.0] 0.000000 1000 0.000100 0.033357 1 +30.000000 [6.0,3.0,1.0] [2,4,6] [0.0,0.0,0.0] 0.000000 1000 0.000100 0.580446 1 +80.000000 [6.0,3.0,1.0] [2,4,6] [0.0,0.0,0.0] 0.000000 1000 0.000100 0.991283 1 +20.000000 [7.0,3.0] [6,2] [6.0,2.0] 0.000000 1000 0.000100 0.006125 1 +100.000000 [7.0,3.0] [6,2] [6.0,2.0] 0.000000 1000 0.000100 0.591339 1 +200.000000 [7.0,3.0] [6,2] [6.0,2.0] 0.000000 1000 0.000100 0.977914 1 +10.000000 [7.0,3.0] [1,1] [6.0,2.0] 0.000000 1000 0.000100 0.045126 2 +60.000000 [7.0,3.0] [1,1] [6.0,2.0] 0.000000 1000 0.000100 0.592431 1 +150.000000 [7.0,3.0] [1,1] [6.0,2.0] 0.000000 1000 0.000100 0.977648 1 +45.000000 [6.0,3.0,1.0,12.0,6.0,2.0] [6,4,2,2,4,6] [0.0,0.0,0.0,0.0,0.0,0.0] 0.000000 1000 0.000100 0.010950 1 +120.000000 [6.0,3.0,1.0,12.0,6.0,2.0] [6,4,2,2,4,6] [0.0,0.0,0.0,0.0,0.0,0.0] 0.000000 1000 0.000100 0.654735 1 +210.000000 [6.0,3.0,1.0,12.0,6.0,2.0] [6,4,2,2,4,6] [0.0,0.0,0.0,0.0,0.0,0.0] 0.000000 1000 0.000100 0.984606 1 +70.000000 [7.0,3.0,7.0,3.0] [6,2,1,1] [6.0,2.0,6.0,2.0] 0.000000 1000 0.000100 0.043679 1 +160.000000 [7.0,3.0,7.0,3.0] [6,2,1,1] [6.0,2.0,6.0,2.0] 0.000000 1000 0.000100 0.584765 1 +260.000000 [7.0,3.0,7.0,3.0] [6,2,1,1] [6.0,2.0,6.0,2.0] 0.000000 1000 0.000100 0.953774 1 +-40.000000 [7.0,3.0,-7.0,-3.0] [6,2,1,1] [6.0,2.0,6.0,2.0] 0.000000 1000 0.000100 0.078208 1 +40.000000 [7.0,3.0,-7.0,-3.0] [6,2,1,1] [6.0,2.0,6.0,2.0] 0.000000 1000 0.000100 0.522108 1 +140.000000 [7.0,3.0,-7.0,-3.0] [6,2,1,1] [6.0,2.0,6.0,2.0] 0.000000 1000 0.000100 0.960370 1 +120.000000 [6.0,3.0,1.0,6.0,3.0,1.0,7.0,3.0,7.0,3.0] [6,4,2,2,4,6,6,2,1,1] [0.0,0.0,0.0,0.0,0.0,0.0,6.0,2.0,6.0,2.0] 0.000000 1000 0.000100 0.015844 1 +240.000000 [6.0,3.0,1.0,6.0,3.0,1.0,7.0,3.0,7.0,3.0] [6,4,2,2,4,6,6,2,1,1] [0.0,0.0,0.0,0.0,0.0,0.0,6.0,2.0,6.0,2.0] 0.000000 1000 0.000100 0.573625 1 +400.000000 [6.0,3.0,1.0,6.0,3.0,1.0,7.0,3.0,7.0,3.0] [6,4,2,2,4,6,6,2,1,1] [0.0,0.0,0.0,0.0,0.0,0.0,6.0,2.0,6.0,2.0] 0.000000 1000 0.000100 0.988332 1 +5.000000 [30.0,1.0] [1,10] [0.0,0.0] 0.000000 1000 0.000100 0.015392 1 +25.000000 [30.0,1.0] [1,10] [0.0,0.0] 0.000000 1000 0.000100 0.510819 1 +100.000000 [30.0,1.0] [1,10] [0.0,0.0] 0.000000 1000 0.000100 0.916340 1 +10.000000 [30.0,1.0] [1,20] [0.0,0.0] 0.000000 1000 0.000100 0.004925 1 +40.000000 [30.0,1.0] [1,20] [0.0,0.0] 0.000000 1000 0.000100 0.573251 1 +100.000000 [30.0,1.0] [1,20] [0.0,0.0] 0.000000 1000 0.000100 0.896501 1 +20.000000 [30.0,1.0] [1,30] [0.0,0.0] 0.000000 1000 0.000100 0.017101 1 +50.000000 [30.0,1.0] [1,30] [0.0,0.0] 0.000000 1000 0.000100 0.566488 1 +100.000000 [30.0,1.0] [1,30] [0.0,0.0] 0.000000 1000 0.000100 0.871323 1 diff --git a/hail/src/test/scala/is/hail/stats/GeneralizedChiSquaredDistributionSuite.scala b/hail/src/test/scala/is/hail/stats/GeneralizedChiSquaredDistributionSuite.scala new file mode 100644 index 00000000000..2390f08e04f --- /dev/null +++ b/hail/src/test/scala/is/hail/stats/GeneralizedChiSquaredDistributionSuite.scala @@ -0,0 +1,622 @@ +package is.hail.stats + +import is.hail.HailSuite +import org.testng.annotations.Test + + +class GeneralizedChiSquaredDistributionSuite extends HailSuite { + private[this] def pgenchisq( + c: Double, + n: Array[Int], + lb: Array[Double], + nc: Array[Double], + sigma: Double, + lim: Int, + acc: Double + ) = { + new DaviesAlgorithm(c, n, lb, nc, lim, sigma).cdf(acc) + } + + private[this] def nearEqual(a: Double, b: Double): Boolean = { + /* Davies only reports 6 significant figures */ + Math.abs(a - b) < 0.0000005 + } + + private[this] def nearEqualDAT(x: DaviesAlgorithmTrace, y: DaviesAlgorithmTrace): Boolean = { + val DaviesAlgorithmTrace(a, b, c, d, e, f, g) = x + val DaviesAlgorithmTrace(a2, b2, c2, d2, e2, f2, g2) = x + (nearEqual(a, a2) && + nearEqual(b, b2) && + nearEqual(c, c2) && + nearEqual(d, d2) && + nearEqual(e, e2) && + nearEqual(f, f2) && + nearEqual(g, g2)) + } + + @Test def test0{ + val (actualValue, actualTrace, actualFault) = pgenchisq( + 1.0, + Array(1, 1, 1), + Array(6.0, 3.0, 1.0), + Array(0.0, 0.0, 0.0), + 0.0, + 1000, + 0.0001 + ) + + assert(nearEqual(actualValue, 0.054213)) + assert(nearEqualDAT(actualTrace, DaviesAlgorithmTrace(0.76235, 744, 2, 0.03819, 53.37969, 0.0, 51))) + assert(actualFault == 0) + } + + @Test def test1{ + val (actualValue, actualTrace, actualFault) = pgenchisq( + 7.0, + Array(1, 1, 1), + Array(6.0, 3.0, 1.0), + Array(0.0, 0.0, 0.0), + 0.0, + 1000, + 0.0001 + ) + assert(nearEqual(actualValue, 0.493555)) + assert(nearEqualDAT(actualTrace, DaviesAlgorithmTrace(1.57018, 625, 2, 0.03964, 34.66214, 0.04784, 51))) + assert(actualFault == 0) + } + + @Test def test2{ + val (actualValue, actualTrace, actualFault) = pgenchisq( + 20.0, + Array(1, 1, 1), + Array(6.0, 3.0, 1.0), + Array(0.0, 0.0, 0.0), + 0.0, + 1000, + 0.0001 + ) + assert(nearEqual(actualValue, 0.876027)) + assert(nearEqualDAT(actualTrace, DaviesAlgorithmTrace(3.16244, 346, 1, 0.04602, 15.88681, 0.14159, 32))) + assert(actualFault == 0) + } + + @Test def test3{ + val (actualValue, actualTrace, actualFault) = pgenchisq( + 2.0, + Array(2, 2, 2), + Array(6.0, 3.0, 1.0), + Array(0.0, 0.0, 0.0), + 0.0, + 1000, + 0.0001 + ) + assert(nearEqual(actualValue, 0.006435)) + assert(nearEqualDAT(actualTrace, DaviesAlgorithmTrace(0.84764, 74, 1, 0.03514, 2.55311, 0.0, 22))) + assert(actualFault == 0) + } + + @Test def test4{ + val (actualValue, actualTrace, actualFault) = pgenchisq( + 20.0, + Array(2, 2, 2), + Array(6.0, 3.0, 1.0), + Array(0.0, 0.0, 0.0), + 0.0, + 1000, + 0.0001 + ) + assert(nearEqual(actualValue, 0.600208)) + assert(nearEqualDAT(actualTrace, DaviesAlgorithmTrace(1.74138, 66, 1, 0.03907, 2.55311, 0.0, 22))) + assert(actualFault == 0) + } + + @Test def test5{ + val (actualValue, actualTrace, actualFault) = pgenchisq( + 60.0, + Array(2, 2, 2), + Array(6.0, 3.0, 1.0), + Array(0.0, 0.0, 0.0), + 0.0, + 1000, + 0.0001 + ) + assert(nearEqual(actualValue, 0.983897)) + assert(nearEqualDAT(actualTrace, DaviesAlgorithmTrace(3.72757, 50, 1, 0.052, 2.55311, 0.0, 22))) + assert(actualFault == 0) + } + + @Test def test6{ + val (actualValue, actualTrace, actualFault) = pgenchisq( + 10.0, + Array(6, 4, 2), + Array(6.0, 3.0, 1.0), + Array(0.0, 0.0, 0.0), + 0.0, + 1000, + 0.0001 + ) + assert(nearEqual(actualValue, 0.002697)) + assert(nearEqualDAT(actualTrace, DaviesAlgorithmTrace(1.20122, 18, 1, 0.02706, 0.46096, 0.0, 20))) + assert(actualFault == 0) + } + + @Test def test7{ + val (actualValue, actualTrace, actualFault) = pgenchisq( + 50.0, + Array(6, 4, 2), + Array(6.0, 3.0, 1.0), + Array(0.0, 0.0, 0.0), + 0.0, + 1000, + 0.0001 + ) + assert(nearEqual(actualValue, 0.564753)) + assert(nearEqualDAT(actualTrace, DaviesAlgorithmTrace(2.06868, 15, 1, 0.03269, 0.46096, 0.0, 20))) + assert(actualFault == 0) + } + + @Test def test8{ + val (actualValue, actualTrace, actualFault) = pgenchisq( + 120.0, + Array(6, 4, 2), + Array(6.0, 3.0, 1.0), + Array(0.0, 0.0, 0.0), + 0.0, + 1000, + 0.0001 + ) + assert(nearEqual(actualValue, 0.991229)) + assert(nearEqualDAT(actualTrace, DaviesAlgorithmTrace(3.58496, 10, 1, 0.05141, 0.46096, 0.0, 20))) + assert(actualFault == 0) + } + + @Test def test9{ + val (actualValue, actualTrace, actualFault) = pgenchisq( + 10.0, + Array(2, 4, 6), + Array(6.0, 3.0, 1.0), + Array(0.0, 0.0, 0.0), + 0.0, + 1000, + 0.0001 + ) + assert(nearEqual(actualValue, 0.033357)) + assert(nearEqualDAT(actualTrace, DaviesAlgorithmTrace(1.29976, 27, 1, 0.03459, 0.88302, 0.0, 19))) + assert(actualFault == 0) + } + + @Test def test10{ + val (actualValue, actualTrace, actualFault) = pgenchisq( + 30.0, + Array(2, 4, 6), + Array(6.0, 3.0, 1.0), + Array(0.0, 0.0, 0.0), + 0.0, + 1000, + 0.0001 + ) + assert(nearEqual(actualValue, 0.580446)) + assert(nearEqualDAT(actualTrace, DaviesAlgorithmTrace(2.01747, 24, 1, 0.03887, 0.88302, 0.0, 19))) + assert(actualFault == 0) + } + + @Test def test11{ + val (actualValue, actualTrace, actualFault) = pgenchisq( + 80.0, + Array(2, 4, 6), + Array(6.0, 3.0, 1.0), + Array(0.0, 0.0, 0.0), + 0.0, + 1000, + 0.0001 + ) + assert(nearEqual(actualValue, 0.991283)) + assert(nearEqualDAT(actualTrace, DaviesAlgorithmTrace(3.81157, 17, 1, 0.05628, 0.88302, 0.0, 19))) + assert(actualFault == 0) + } + + @Test def test12{ + val (actualValue, actualTrace, actualFault) = pgenchisq( + 20.0, + Array(6, 2), + Array(7.0, 3.0), + Array(6.0, 2.0), + 0.0, + 1000, + 0.0001 + ) + assert(nearEqual(actualValue, 0.006125)) + assert(nearEqualDAT(actualTrace, DaviesAlgorithmTrace(1.16271, 16, 1, 0.01561, 0.24013, 0.0, 19))) + assert(actualFault == 0) + } + + @Test def test13{ + val (actualValue, actualTrace, actualFault) = pgenchisq( + 100.0, + Array(6, 2), + Array(7.0, 3.0), + Array(6.0, 2.0), + 0.0, + 1000, + 0.0001 + ) + assert(nearEqual(actualValue, 0.591339)) + assert(nearEqualDAT(actualTrace, DaviesAlgorithmTrace(2.02277, 13, 1, 0.01949, 0.24013, 0.0, 19))) + assert(actualFault == 0) + } + + @Test def test14{ + val (actualValue, actualTrace, actualFault) = pgenchisq( + 200.0, + Array(6, 2), + Array(7.0, 3.0), + Array(6.0, 2.0), + 0.0, + 1000, + 0.0001 + ) + assert(nearEqual(actualValue, 0.977914)) + assert(nearEqualDAT(actualTrace, DaviesAlgorithmTrace(3.09687, 10, 1, 0.02825, 0.24013, 0.0, 19))) + assert(actualFault == 0) + } + + @Test def test15{ + val (actualValue, actualTrace, actualFault) = pgenchisq( + 10.0, + Array(1, 1), + Array(7.0, 3.0), + Array(6.0, 2.0), + 0.0, + 1000, + 0.0001 + ) + assert(nearEqual(actualValue, 0.045126)) + assert(nearEqualDAT(actualTrace, DaviesAlgorithmTrace(0.8712, 603, 2, 0.01628, 13.86318, 0.0, 49))) + assert(actualFault == 0) + } + + @Test def test16{ + val (actualValue, actualTrace, actualFault) = pgenchisq( + 60.0, + Array(1, 1), + Array(7.0, 3.0), + Array(6.0, 2.0), + 0.0, + 1000, + 0.0001 + ) + assert(nearEqual(actualValue, 0.592431)) + assert(nearEqualDAT(actualTrace, DaviesAlgorithmTrace(1.69157, 340, 1, 0.02043, 6.93159, 0.24644, 31))) + assert(actualFault == 0) + } + + @Test def test17{ + val (actualValue, actualTrace, actualFault) = pgenchisq( + 150.0, + Array(1, 1), + Array(7.0, 3.0), + Array(6.0, 2.0), + 0.0, + 1000, + 0.0001 + ) + assert(nearEqual(actualValue, 0.977648)) + assert(nearEqualDAT(actualTrace, DaviesAlgorithmTrace(3.06625, 87, 1, 0.02888, 2.47557, 0.81533, 29))) + assert(actualFault == 0) + } + + @Test def test18{ + val (actualValue, actualTrace, actualFault) = pgenchisq( + 45.0, + Array(6, 4, 2, 2, 4, 6), + Array(6.0, 3.0, 1.0, 12.0, 6.0, 2.0), + Array(0.0, 0.0, 0.0, 0.0, 0.0, 0.0), + 0.0, + 1000, + 0.0001 + ) + assert(nearEqual(actualValue, 0.01095)) + assert(nearEqualDAT(actualTrace, DaviesAlgorithmTrace(1.82147, 13, 1, 0.01582, 0.193, 0.0, 18))) + assert(actualFault == 0) + } + + @Test def test19{ + val (actualValue, actualTrace, actualFault) = pgenchisq( + 120.0, + Array(6, 4, 2, 2, 4, 6), + Array(6.0, 3.0, 1.0, 12.0, 6.0, 2.0), + Array(0.0, 0.0, 0.0, 0.0, 0.0, 0.0), + 0.0, + 1000, + 0.0001 + ) + assert(nearEqual(actualValue, 0.654735)) + assert(nearEqualDAT(actualTrace, DaviesAlgorithmTrace(2.73768, 11, 1, 0.0195, 0.193, 0.0, 18))) + assert(actualFault == 0) + } + + @Test def test20{ + val (actualValue, actualTrace, actualFault) = pgenchisq( + 210.0, + Array(6, 4, 2, 2, 4, 6), + Array(6.0, 3.0, 1.0, 12.0, 6.0, 2.0), + Array(0.0, 0.0, 0.0, 0.0, 0.0, 0.0), + 0.0, + 1000, + 0.0001 + ) + assert(nearEqual(actualValue, 0.984606)) + assert(nearEqualDAT(actualTrace, DaviesAlgorithmTrace(3.83651, 8, 1, 0.02707, 0.193, 0.0, 18))) + assert(actualFault == 0) + } + + @Test def test21{ + val (actualValue, actualTrace, actualFault) = pgenchisq( + 70.0, + Array(6, 2, 1, 1), + Array(7.0, 3.0, 7.0, 3.0), + Array(6.0, 2.0, 6.0, 2.0), + 0.0, + 1000, + 0.0001 + ) + assert(nearEqual(actualValue, 0.043679)) + assert(nearEqualDAT(actualTrace, DaviesAlgorithmTrace(1.65876, 10, 1, 0.01346, 0.12785, 0.0, 18))) + assert(actualFault == 0) + } + + @Test def test22{ + val (actualValue, actualTrace, actualFault) = pgenchisq( + 160.0, + Array(6, 2, 1, 1), + Array(7.0, 3.0, 7.0, 3.0), + Array(6.0, 2.0, 6.0, 2.0), + 0.0, + 1000, + 0.0001 + ) + assert(nearEqual(actualValue, 0.584765)) + assert(nearEqualDAT(actualTrace, DaviesAlgorithmTrace(2.34799, 9, 1, 0.01668, 0.12785, 0.0, 18))) + assert(actualFault == 0) + } + + @Test def test23{ + val (actualValue, actualTrace, actualFault) = pgenchisq( + 260.0, + Array(6, 2, 1, 1), + Array(7.0, 3.0, 7.0, 3.0), + Array(6.0, 2.0, 6.0, 2.0), + 0.0, + 1000, + 0.0001 + ) + assert(nearEqual(actualValue, 0.953774)) + assert(nearEqualDAT(actualTrace, DaviesAlgorithmTrace(3.11236, 7, 1, 0.02271, 0.12785, 0.0, 18))) + assert(actualFault == 0) + } + + @Test def test24{ + val (actualValue, actualTrace, actualFault) = pgenchisq( + -40.0, + Array(6, 2, 1, 1), + Array(7.0, 3.0, -7.0, -3.0), + Array(6.0, 2.0, 6.0, 2.0), + 0.0, + 1000, + 0.0001 + ) + assert(nearEqual(actualValue, 0.078208)) + assert(nearEqualDAT(actualTrace, DaviesAlgorithmTrace(1.42913, 10, 1, 0.01483, 0.12785, 0.0, 19))) + assert(actualFault == 0) + } + + @Test def test25{ + val (actualValue, actualTrace, actualFault) = pgenchisq( + 40.0, + Array(6, 2, 1, 1), + Array(7.0, 3.0, -7.0, -3.0), + Array(6.0, 2.0, 6.0, 2.0), + 0.0, + 1000, + 0.0001 + ) + assert(nearEqual(actualValue, 0.522108)) + assert(nearEqualDAT(actualTrace, DaviesAlgorithmTrace(1.42909, 8, 1, 0.01771, 0.12785, 0.0, 19))) + assert(actualFault == 0) + } + + @Test def test26{ + val (actualValue, actualTrace, actualFault) = pgenchisq( + 140.0, + Array(6, 2, 1, 1), + Array(7.0, 3.0, -7.0, -3.0), + Array(6.0, 2.0, 6.0, 2.0), + 0.0, + 1000, + 0.0001 + ) + assert(nearEqual(actualValue, 0.96037)) + assert(nearEqualDAT(actualTrace, DaviesAlgorithmTrace(2.19476, 10, 1, 0.01381, 0.12785, 0.0, 19))) + assert(actualFault == 0) + } + + @Test def test27{ + val (actualValue, actualTrace, actualFault) = pgenchisq( + 120.0, + Array(6, 4, 2, 2, 4, 6, 6, 2, 1, 1), + Array(6.0, 3.0, 1.0, 6.0, 3.0, 1.0, 7.0, 3.0, 7.0, 3.0), + Array(0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 6.0, 2.0, 6.0, 2.0), + 0.0, + 1000, + 0.0001 + ) + assert(nearEqual(actualValue, 0.015844)) + assert(nearEqualDAT(actualTrace, DaviesAlgorithmTrace(2.33438, 9, 1, 0.01202, 0.09616, 0.0, 18))) + assert(actualFault == 0) + } + + @Test def test28{ + val (actualValue, actualTrace, actualFault) = pgenchisq( + 240.0, + Array(6, 4, 2, 2, 4, 6, 6, 2, 1, 1), + Array(6.0, 3.0, 1.0, 6.0, 3.0, 1.0, 7.0, 3.0, 7.0, 3.0), + Array(0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 6.0, 2.0, 6.0, 2.0), + 0.0, + 1000, + 0.0001 + ) + assert(nearEqual(actualValue, 0.573625)) + assert(nearEqualDAT(actualTrace, DaviesAlgorithmTrace(3.1401, 7, 1, 0.01561, 0.09616, 0.0, 18))) + assert(actualFault == 0) + } + + @Test def test29{ + val (actualValue, actualTrace, actualFault) = pgenchisq( + 400.0, + Array(6, 4, 2, 2, 4, 6, 6, 2, 1, 1), + Array(6.0, 3.0, 1.0, 6.0, 3.0, 1.0, 7.0, 3.0, 7.0, 3.0), + Array(0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 6.0, 2.0, 6.0, 2.0), + 0.0, + 1000, + 0.0001 + ) + assert(nearEqual(actualValue, 0.988332)) + assert(nearEqualDAT(actualTrace, DaviesAlgorithmTrace(4.2142, 6, 1, 0.01812, 0.09616, 0.0, 18))) + assert(actualFault == 0) + } + + @Test def test30{ + val (actualValue, actualTrace, actualFault) = pgenchisq( + 5.0, + Array(1, 10), + Array(30.0, 1.0), + Array(0.0, 0.0), + 0.0, + 1000, + 0.0001 + ) + assert(nearEqual(actualValue, 0.015392)) + assert(nearEqualDAT(actualTrace, DaviesAlgorithmTrace(0.95892, 163, 1, 0.00841, 1.3638, 0.0, 22))) + assert(actualFault == 0) + } + + @Test def test31{ + val (actualValue, actualTrace, actualFault) = pgenchisq( + 25.0, + Array(1, 10), + Array(30.0, 1.0), + Array(0.0, 0.0), + 0.0, + 1000, + 0.0001 + ) + assert(nearEqual(actualValue, 0.510819)) + assert(nearEqualDAT(actualTrace, DaviesAlgorithmTrace(1.72922, 159, 1, 0.00864, 1.3638, 0.0, 22))) + assert(actualFault == 0) + } + + @Test def test32{ + val (actualValue, actualTrace, actualFault) = pgenchisq( + 100.0, + Array(1, 10), + Array(30.0, 1.0), + Array(0.0, 0.0), + 0.0, + 1000, + 0.0001 + ) + assert(nearEqual(actualValue, 0.91634)) + assert(nearEqualDAT(actualTrace, DaviesAlgorithmTrace(4.61788, 143, 1, 0.00963, 1.3638, 0.0, 22))) + assert(actualFault == 0) + } + + @Test def test33{ + val (actualValue, actualTrace, actualFault) = pgenchisq( + 10.0, + Array(1, 20), + Array(30.0, 1.0), + Array(0.0, 0.0), + 0.0, + 1000, + 0.0001 + ) + assert(nearEqual(actualValue, 0.004925)) + assert(nearEqualDAT(actualTrace, DaviesAlgorithmTrace(1.26245, 97, 1, 0.00839, 0.80736, 0.0, 21))) + assert(actualFault == 0) + } + + @Test def test34{ + val (actualValue, actualTrace, actualFault) = pgenchisq( + 40.0, + Array(1, 20), + Array(30.0, 1.0), + Array(0.0, 0.0), + 0.0, + 1000, + 0.0001 + ) + assert(nearEqual(actualValue, 0.573251)) + assert(nearEqualDAT(actualTrace, DaviesAlgorithmTrace(2.16513, 93, 1, 0.00874, 0.80736, 0.0, 21))) + assert(actualFault == 0) + } + + @Test def test35{ + val (actualValue, actualTrace, actualFault) = pgenchisq( + 100.0, + Array(1, 20), + Array(30.0, 1.0), + Array(0.0, 0.0), + 0.0, + 1000, + 0.0001 + ) + assert(nearEqual(actualValue, 0.896501)) + assert(nearEqualDAT(actualTrace, DaviesAlgorithmTrace(3.97055, 86, 1, 0.00954, 0.80736, 0.0, 21))) + assert(actualFault == 0) + } + + @Test def test36{ + val (actualValue, actualTrace, actualFault) = pgenchisq( + 20.0, + Array(1, 30), + Array(30.0, 1.0), + Array(0.0, 0.0), + 0.0, + 1000, + 0.0001 + ) + assert(nearEqual(actualValue, 0.017101)) + assert(nearEqualDAT(actualTrace, DaviesAlgorithmTrace(1.65684, 81, 1, 0.00843, 0.67453, 0.0, 20))) + assert(actualFault == 0) + } + + @Test def test37{ + val (actualValue, actualTrace, actualFault) = pgenchisq( + 50.0, + Array(1, 30), + Array(30.0, 1.0), + Array(0.0, 0.0), + 0.0, + 1000, + 0.0001 + ) + assert(nearEqual(actualValue, 0.566488)) + assert(nearEqualDAT(actualTrace, DaviesAlgorithmTrace(2.44382, 78, 1, 0.00878, 0.67453, 0.0, 20))) + assert(actualFault == 0) + } + + @Test def test38{ + val (actualValue, actualTrace, actualFault) = pgenchisq( + 100.0, + Array(1, 30), + Array(30.0, 1.0), + Array(0.0, 0.0), + 0.0, + 1000, + 0.0001 + ) + assert(nearEqual(actualValue, 0.871323)) + assert(nearEqualDAT(actualTrace, DaviesAlgorithmTrace(3.75545, 72, 1, 0.00944, 0.67453, 0.0, 20))) + assert(actualFault == 0) + } +}