From b6165f7798686ed38caac0e5f7b530ff5b69df47 Mon Sep 17 00:00:00 2001 From: ces42 Date: Fri, 28 Jun 2019 00:15:08 +0200 Subject: [PATCH 1/9] Use Cholesky decomposition for correlated_values --- uncertainties/core.py | 86 +++++++++++++++++++++++++++++++++---------- 1 file changed, 66 insertions(+), 20 deletions(-) diff --git a/uncertainties/core.py b/uncertainties/core.py index cfba907d..46a857a9 100644 --- a/uncertainties/core.py +++ b/uncertainties/core.py @@ -157,29 +157,75 @@ def correlated_values(nom_values, covariance_mat, tags=None): # !!! It would in principle be possible to handle 0 variance # variables by first selecting the sub-matrix that does not contain - # such variables (with the help of numpy.ix_()), and creating + # such variables (with the help of numpy.ix_()), and creating # them separately. - - std_devs = numpy.sqrt(numpy.diag(covariance_mat)) - - # For numerical stability reasons, we go through the correlation - # matrix, because it is insensitive to any change of scale in the - # quantities returned. However, care must be taken with 0 variance - # variables: calculating the correlation matrix cannot be simply done - # by dividing by standard deviations. We thus use specific - # normalization values, with no null value: - norm_vector = std_devs.copy() - norm_vector[norm_vector==0] = 1 - - return correlated_values_norm( - # !! The following zip() is a bit suboptimal: correlated_values() - # separates back the nominal values and the standard deviations: - zip(nom_values, std_devs), - covariance_mat/norm_vector/norm_vector[:,numpy.newaxis], - tags) + + try: + L = numpy.cholesky(covariance_mat) + if tags is None: + tags = (None, ) * len(nom_values) + variables = [Variable(0, 1, tag) for tag in tags] + return nom_values + numpy.dot(L, variables) + except numpy.LinAlgError: + std_devs = numpy.sqrt(numpy.diag(covariance_mat)) + + # For numerical stability reasons, we go through the correlation + # matrix, because it is insensitive to any change of scale in the + # quantities returned. However, care must be taken with 0 variance + # variables: calculating the correlation matrix cannot be simply done + # by dividing by standard deviations. We thus use specific + # normalization values, with no null value: + norm_vector = std_devs.copy() + norm_vector[norm_vector==0] = 1 + + return correlated_values_norm( + # !! The following zip() is a bit suboptimal: correlated_values() + # separates back the nominal values and the standard deviations: + zip(nom_values, std_devs), + covariance_mat/norm_vector/norm_vector[:,numpy.newaxis], + tags) __all__.append('correlated_values') + def ldl(A): + """ + Return the LDL factorisation of a symmetric, positive semidefinite + matrix. If the matrix is not square, symmetric or positive + semi-definite, an error is raised. + + A -- a square symmetric positive semi-definite matrix + """ + EPS = 1.49e-8 # square root of eps + + n, n_ = numpy.shape(A) + if n != n_: + raise numpy.linalg.LinAlgError('matrix must be square') + + A = numpy.array(A, copy=True) + L = numpy.zeros_like(A) # we will only write in the lower half of L + D = numpy.zeros(n) + + for i in range(n): + L[i, i] = 1 + + a = A[i, i] + l = A[i+1:, i] + if a < -EPS or (a <= 0 and abs(l).max() >= EPS): + raise numpy.linalg.LinAlgError('matrix must be positive ' + 'semidefinite (failed on %s-th diagonal entry)' % i) + + if a <= 0: + D[i] = 0 + continue + else: + D[i] = a + L[i+1:, i] = l / a + A[i+1:, i+1:] -= numpy.outer(l, l) / a + + return L, D + + + def correlated_values_norm(values_with_std_dev, correlation_mat, tags=None): ''' @@ -239,7 +285,7 @@ def correlated_values_norm(values_with_std_dev, correlation_mat, # The coordinates of each new uncertainty as a function of the # new variables must include the variable scale (standard deviation): transform *= std_devs[:, numpy.newaxis] - + # Representation of the initial correlated values: values_funcs = tuple( AffineScalarFunc( From e539c595a677280186959eca3272c0f219dfba0e Mon Sep 17 00:00:00 2001 From: ces42 Date: Tue, 30 Jul 2019 17:58:18 +0200 Subject: [PATCH 2/9] use ldl decomposition for the degenerate case --- uncertainties/core.py | 39 ++++++++++++--------------------------- 1 file changed, 12 insertions(+), 27 deletions(-) diff --git a/uncertainties/core.py b/uncertainties/core.py index 46a857a9..764f515c 100644 --- a/uncertainties/core.py +++ b/uncertainties/core.py @@ -155,35 +155,20 @@ def correlated_values(nom_values, covariance_mat, tags=None): independent variable. """ - # !!! It would in principle be possible to handle 0 variance - # variables by first selecting the sub-matrix that does not contain - # such variables (with the help of numpy.ix_()), and creating - # them separately. - + # We perform a cholesky decomposition of the covariance matrix. + # If the matrix is only positive semidefinite numpy will refuse to + # perform a cholesky decomposition so we 'manually' do a LDL + # decomposition try: L = numpy.cholesky(covariance_mat) - if tags is None: - tags = (None, ) * len(nom_values) - variables = [Variable(0, 1, tag) for tag in tags] - return nom_values + numpy.dot(L, variables) except numpy.LinAlgError: - std_devs = numpy.sqrt(numpy.diag(covariance_mat)) - - # For numerical stability reasons, we go through the correlation - # matrix, because it is insensitive to any change of scale in the - # quantities returned. However, care must be taken with 0 variance - # variables: calculating the correlation matrix cannot be simply done - # by dividing by standard deviations. We thus use specific - # normalization values, with no null value: - norm_vector = std_devs.copy() - norm_vector[norm_vector==0] = 1 - - return correlated_values_norm( - # !! The following zip() is a bit suboptimal: correlated_values() - # separates back the nominal values and the standard deviations: - zip(nom_values, std_devs), - covariance_mat/norm_vector/norm_vector[:,numpy.newaxis], - tags) + L0, D = ldl(covariance_mat) + L = numpy.dot(L0, sqrt(D)) + + if tags is None: + tags = (None, ) * len(nom_values) + variables = [Variable(0, 1, tag) for tag in tags] + return nom_values + numpy.dot(L, variables) __all__.append('correlated_values') @@ -195,7 +180,7 @@ def ldl(A): A -- a square symmetric positive semi-definite matrix """ - EPS = 1.49e-8 # square root of eps + EPS = 1.49e-8 # square root of float64-accuracy n, n_ = numpy.shape(A) if n != n_: From d4b9cb7033ee523d7374eca4e86c8b5b7c03f901 Mon Sep 17 00:00:00 2001 From: ces42 Date: Tue, 30 Jul 2019 18:20:02 +0200 Subject: [PATCH 3/9] make this actually work --- uncertainties/core.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/uncertainties/core.py b/uncertainties/core.py index 764f515c..7a557d02 100644 --- a/uncertainties/core.py +++ b/uncertainties/core.py @@ -160,10 +160,10 @@ def correlated_values(nom_values, covariance_mat, tags=None): # perform a cholesky decomposition so we 'manually' do a LDL # decomposition try: - L = numpy.cholesky(covariance_mat) - except numpy.LinAlgError: + L = numpy.linalg.cholesky(covariance_mat) + except numpy.linalg.LinAlgError: L0, D = ldl(covariance_mat) - L = numpy.dot(L0, sqrt(D)) + L = L0 * numpy.sqrt(D) if tags is None: tags = (None, ) * len(nom_values) @@ -186,7 +186,7 @@ def ldl(A): if n != n_: raise numpy.linalg.LinAlgError('matrix must be square') - A = numpy.array(A, copy=True) + A = numpy.array(A, copy=True, dtype=numpy.float64) L = numpy.zeros_like(A) # we will only write in the lower half of L D = numpy.zeros(n) From 7d6aec268f3317f0820110b5ea7181b4ff2d2b76 Mon Sep 17 00:00:00 2001 From: ces42 Date: Tue, 30 Jul 2019 18:40:25 +0200 Subject: [PATCH 4/9] fix error in last iteration of ldl --- uncertainties/core.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/uncertainties/core.py b/uncertainties/core.py index 7a557d02..9924f00e 100644 --- a/uncertainties/core.py +++ b/uncertainties/core.py @@ -195,7 +195,7 @@ def ldl(A): a = A[i, i] l = A[i+1:, i] - if a < -EPS or (a <= 0 and abs(l).max() >= EPS): + if a < -EPS or (a <= 0 and len(l) > 0 and abs(l).max() >= EPS): raise numpy.linalg.LinAlgError('matrix must be positive ' 'semidefinite (failed on %s-th diagonal entry)' % i) From 77c103775248dda5b7c86367b6f812cacaa6db9d Mon Sep 17 00:00:00 2001 From: ces42 Date: Wed, 31 Jul 2019 00:51:26 +0200 Subject: [PATCH 5/9] rename `EPS` to `TOL` --- uncertainties/core.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/uncertainties/core.py b/uncertainties/core.py index 9924f00e..25be26d0 100644 --- a/uncertainties/core.py +++ b/uncertainties/core.py @@ -180,7 +180,7 @@ def ldl(A): A -- a square symmetric positive semi-definite matrix """ - EPS = 1.49e-8 # square root of float64-accuracy + TOL = 1.49e-8 # square root of float64-accuracy n, n_ = numpy.shape(A) if n != n_: From e87eb43b59123f25a93f694b0d32ef41a5793545 Mon Sep 17 00:00:00 2001 From: ces42 Date: Thu, 1 Aug 2019 12:21:34 +0200 Subject: [PATCH 6/9] docstrings and comments --- uncertainties/core.py | 89 +++++++++++++++++++++++-------------------- 1 file changed, 47 insertions(+), 42 deletions(-) diff --git a/uncertainties/core.py b/uncertainties/core.py index 25be26d0..79aa17ab 100644 --- a/uncertainties/core.py +++ b/uncertainties/core.py @@ -157,7 +157,7 @@ def correlated_values(nom_values, covariance_mat, tags=None): # We perform a cholesky decomposition of the covariance matrix. # If the matrix is only positive semidefinite numpy will refuse to - # perform a cholesky decomposition so we 'manually' do a LDL + # perform a cholesky decomposition, so we 'manually' do a LDL # decomposition try: L = numpy.linalg.cholesky(covariance_mat) @@ -165,9 +165,11 @@ def correlated_values(nom_values, covariance_mat, tags=None): L0, D = ldl(covariance_mat) L = L0 * numpy.sqrt(D) + # Creation of new, independent variables: if tags is None: tags = (None, ) * len(nom_values) - variables = [Variable(0, 1, tag) for tag in tags] + variables = tuple(Variable(0, 1, tag) for tag in tags) + return nom_values + numpy.dot(L, variables) __all__.append('correlated_values') @@ -175,12 +177,16 @@ def correlated_values(nom_values, covariance_mat, tags=None): def ldl(A): """ Return the LDL factorisation of a symmetric, positive semidefinite - matrix. If the matrix is not square, symmetric or positive + matrix. This is a lower triangular matrix L and an array representing + the diagonal matrix D. If the matrix is not square or positive semi-definite, an error is raised. - A -- a square symmetric positive semi-definite matrix + A -- a square (symmetric) positive semi-definite matrix. Only the + lower half of A is read. """ - TOL = 1.49e-8 # square root of float64-accuracy + # square root of float64-accuracy. In places where there should be + # a positive number we will only accept numbers larger than -TOL + TOL = 1.49e-8 n, n_ = numpy.shape(A) if n != n_: @@ -195,7 +201,7 @@ def ldl(A): a = A[i, i] l = A[i+1:, i] - if a < -EPS or (a <= 0 and len(l) > 0 and abs(l).max() >= EPS): + if a < -TOL or (a <= 0 and len(l) > 0 and abs(l).max() >= TOL): raise numpy.linalg.LinAlgError('matrix must be positive ' 'semidefinite (failed on %s-th diagonal entry)' % i) @@ -224,10 +230,8 @@ def correlated_values_norm(values_with_std_dev, correlation_mat, deviation) pairs. The returned, correlated values have these nominal values and standard deviations. - correlation_mat -- correlation matrix between the given values, except - that any value with a 0 standard deviation must have its correlations - set to 0, with a diagonal element set to an arbitrary value (something - close to 0-1 is recommended, for a better numerical precision). When + correlation_mat -- correlation matrix between the given values. The + entries corresponding to values with 0 variance are ignored. When no value has a 0 variance, this is the covariance matrix normalized by standard deviations, and thus a symmetric matrix with ones on its diagonal. This matrix must be an NumPy array-like (list of lists, @@ -243,42 +247,43 @@ def correlated_values_norm(values_with_std_dev, correlation_mat, (nominal_values, std_devs) = numpy.transpose(values_with_std_dev) - # We diagonalize the correlation matrix instead of the - # covariance matrix, because this is generally more stable - # numerically. In fact, the covariance matrix can have - # coefficients with arbitrary values, through changes of units - # of its input variables. This creates numerical instabilities. - # - # The covariance matrix is diagonalized in order to define - # the independent variables that model the given values: - (variances, transform) = numpy.linalg.eigh(correlation_mat) + # For values with zero uncertainty we ignore the corresponding entries + # in the correlation matrix + zero_stdev = numpy.where(std_devs == 0)[0] + eff_corr_mat = numpy.delete( + numpy.delete(correlation_mat, zero_stdev, axis=0), + zero_stdev, + axis=1 + ) - # Numerical errors might make some variances negative: we set - # them to zero: - variances[variances < 0] = 0. + # We perform a cholesky decomposition of the correlation matrix. + # If the matrix is only positive semidefinite numpy will refuse to + # perform a cholesky decomposition, so we 'manually' do a LDL + # decomposition + try: + L = numpy.linalg.cholesky(eff_corr_mat) + except numpy.linalg.LinAlgError: + L0, D = ldl(eff_corr_mat) + L = L0 * numpy.sqrt(D) # Creation of new, independent variables: - - # We use the fact that the eigenvectors in 'transform' are - # special: 'transform' is unitary: its inverse is its transpose: - - variables = tuple( + eff_variables = tuple( # The variables represent "pure" uncertainties: - Variable(0, sqrt(variance), tag) - for (variance, tag) in zip(variances, tags)) - - # The coordinates of each new uncertainty as a function of the - # new variables must include the variable scale (standard deviation): - transform *= std_devs[:, numpy.newaxis] - - # Representation of the initial correlated values: - values_funcs = tuple( - AffineScalarFunc( - value, - LinearCombination(dict(zip(variables, coords)))) - for (coords, value) in zip(transform, nominal_values)) - - return values_funcs + Variable(0, 1, tag) for i, tag in enumerate(tags) + if std_devs[i] != 0 + ) + zero_stdev_variables = tuple( + Variable(0, 0, tag) for i, tag in enumerate(tags) + if std_devs[i] == 0 + ) + + uncert = std_devs[std_devs != 0] * numpy.dot(L, eff_variables) + # we need to subtract arange(len(zero_stdev)) because the indices in + # zero_stdev refer to the original array + numpy.insert(uncert, zero_stdev - numpy.arange(len(zero_stdev)), + zero_stdev_variables) + + return nominal_values + uncert __all__.append('correlated_values_norm') From ae0298483b12a69e3a4fa3765e392778e579ce83 Mon Sep 17 00:00:00 2001 From: ces42 Date: Thu, 1 Aug 2019 12:32:11 +0200 Subject: [PATCH 7/9] allows passing an iterator as values_with_std_dev. --- uncertainties/core.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/uncertainties/core.py b/uncertainties/core.py index 79aa17ab..bdd80965 100644 --- a/uncertainties/core.py +++ b/uncertainties/core.py @@ -240,12 +240,12 @@ def correlated_values_norm(values_with_std_dev, correlation_mat, tags -- like for correlated_values(). ''' + (nominal_values, std_devs) = numpy.transpose(values_with_std_dev) + # If no tags were given, we prepare tags for the newly created # variables: if tags is None: - tags = (None,) * len(values_with_std_dev) - - (nominal_values, std_devs) = numpy.transpose(values_with_std_dev) + tags = (None,) * len(nominal_values) # For values with zero uncertainty we ignore the corresponding entries # in the correlation matrix From cf28d69431ebd76332930b232471514c54c501c3 Mon Sep 17 00:00:00 2001 From: ces42 Date: Thu, 1 Aug 2019 13:30:40 +0200 Subject: [PATCH 8/9] change error handling in ldl a bit. --- uncertainties/core.py | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/uncertainties/core.py b/uncertainties/core.py index bdd80965..f7689982 100644 --- a/uncertainties/core.py +++ b/uncertainties/core.py @@ -201,13 +201,14 @@ def ldl(A): a = A[i, i] l = A[i+1:, i] - if a < -TOL or (a <= 0 and len(l) > 0 and abs(l).max() >= TOL): - raise numpy.linalg.LinAlgError('matrix must be positive ' - 'semidefinite (failed on %s-th diagonal entry)' % i) if a <= 0: + if a < -TOL or (i < n - 1 and any(abs(l) >= TOL)): + raise numpy.linalg.LinAlgError('matrix must be positive ' + 'semidefinite (failed on %s-th diagonal entry)' % i) + # If we get here, then the whole first column of L[i:, i:] is + # (nearly) zero D[i] = 0 - continue else: D[i] = a L[i+1:, i] = l / a From 1c0980441a18867057f34a486d2a8e0771da1a26 Mon Sep 17 00:00:00 2001 From: ces42 Date: Thu, 1 Aug 2019 13:39:20 +0200 Subject: [PATCH 9/9] cosmetic --- uncertainties/core.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/uncertainties/core.py b/uncertainties/core.py index f7689982..4a78a2a0 100644 --- a/uncertainties/core.py +++ b/uncertainties/core.py @@ -241,7 +241,7 @@ def correlated_values_norm(values_with_std_dev, correlation_mat, tags -- like for correlated_values(). ''' - (nominal_values, std_devs) = numpy.transpose(values_with_std_dev) + nominal_values, std_devs = numpy.transpose(values_with_std_dev) # If no tags were given, we prepare tags for the newly created # variables: