From 0d9046abdecf9632808c15dd6ae5a1a1e2092bc5 Mon Sep 17 00:00:00 2001
From: Richard West <r.west@northeastern.edu>
Date: Sat, 30 Nov 2019 17:21:35 -0500
Subject: [PATCH 1/5] Added test_generate_reverse_rate_coefficient to
 MultiPDepArrhenius

This test fails, for me.
File "/Users/rwest/opt/anaconda3/envs/rmg3/lib/python3.7/site-packages/numpy/linalg/linalg.py", line 2156, in lstsq
    x, resids, rank, s = gufunc(a, b, rcond, signature=signature, extobj=extobj)
  File "/Users/rwest/opt/anaconda3/envs/rmg3/lib/python3.7/site-packages/numpy/linalg/linalg.py", line 101, in _raise_linalgerror_lstsq
    raise LinAlgError("SVD did not converge in Linear Least Squares")
numpy.linalg.linalg.LinAlgError: SVD did not converge in Linear Least Squares

See https://github.com/ReactionMechanismGenerator/RMG-Py/issues/1833
---
 rmgpy/kinetics/arrheniusTest.py | 33 +++++++++++++++++++++++++++++++++
 1 file changed, 33 insertions(+)
diff --git a/rmgpy/kinetics/arrheniusTest.py b/rmgpy/kinetics/arrheniusTest.py
index 21a1ec3a51..b3e54f888f 100644
--- a/rmgpy/kinetics/arrheniusTest.py
+++ b/rmgpy/kinetics/arrheniusTest.py
@@ -1020,3 +1020,36 @@ def test_change_rate(self):
         for T, kexp in zip(Tlist, k0list):
             kact = self.kinetics.get_rate_coefficient(T, 1e5)
             self.assertAlmostEqual(2 * kexp, kact, delta=1e-6 * kexp)
+
+    def test_generate_reverse_rate_coefficient(self):
+        """
+        Test ability to reverse a reaction rate.
+
+        This is a real example from an imported chemkin file.
+        """
+        from rmgpy.species import Species
+        from rmgpy.molecule import Molecule
+        from rmgpy.data.kinetics import LibraryReaction
+        from rmgpy.thermo import NASA, NASAPolynomial
+        test_reaction = LibraryReaction(reactants=[Species(label="C2H3", thermo=NASA(polynomials=[NASAPolynomial(coeffs=[3.12502,0.00235137,2.36803e-05,-3.35092e-08,1.39444e-11,34524.3,8.81538], Tmin=(200,"K"), Tmax=(1000,"K")), NASAPolynomial(coeffs=[4.37211,0.00746869,-2.64716e-06,4.22753e-10,-2.44958e-14,33805.2,0.428772], Tmin=(1000,"K"), Tmax=(6000,"K"))], Tmin=(200,"K"), Tmax=(6000,"K"), E0=(285.696,"kJ/mol"), Cp0=(33.2579,"J/mol/K"), CpInf=(108.088,"J/mol/K"), comment="""ATcT3E\nC2H3 <g> ATcT ver. 1.122, DHf298 = 296.91 ± 0.33 kJ/mol - fit JAN17"""), molecule=[Molecule(smiles="[CH]=C")], molecular_weight=(27.0452,"amu")), 
+                                                  Species(label="CH2O", thermo=NASA(polynomials=[NASAPolynomial(coeffs=[4.77187,-0.00976266,3.70122e-05,-3.76922e-08,1.31327e-11,-14379.8,0.696586], Tmin=(200,"K"), Tmax=(1000,"K")), NASAPolynomial(coeffs=[2.91333,0.0067004,-2.55521e-06,4.27795e-10,-2.44073e-14,-14462.2,7.43823], Tmin=(1000,"K"), Tmax=(6000,"K"))], Tmin=(200,"K"), Tmax=(6000,"K"), E0=(-119.527,"kJ/mol"), Cp0=(33.2579,"J/mol/K"), CpInf=(83.1447,"J/mol/K"), comment="""ATcT3E\nH2CO <g> ATcT ver. 1.122, DHf298 = -109.188 ± 0.099 kJ/mol - fit JAN17"""), molecule=[Molecule(smiles="C=O")], molecular_weight=(30.026,"amu"))], 
+                                        products=[Species(label="C2H4", thermo=NASA(polynomials=[NASAPolynomial(coeffs=[3.65151,-0.00535067,5.16486e-05,-6.36869e-08,2.50743e-11,5114.51,5.38561], Tmin=(200,"K"), Tmax=(1000,"K")), NASAPolynomial(coeffs=[4.14446,0.0102648,-3.61247e-06,5.74009e-10,-3.39296e-14,4190.59,-1.14778], Tmin=(1000,"K"), Tmax=(6000,"K"))], Tmin=(200,"K"), Tmax=(6000,"K"), E0=(42.06,"kJ/mol"), Cp0=(33.2579,"J/mol/K"), CpInf=(133.032,"J/mol/K"), comment="""ATcT3E\nC2H4 <g> ATcT ver. 1.122, DHf298 = 52.45 ± 0.13 kJ/mol - fit JAN17"""), molecule=[Molecule(smiles="C=C")], molecular_weight=(28.0532,"amu")), 
+                                                  Species(label="HCO", thermo=NASA(polynomials=[NASAPolynomial(coeffs=[3.97075,-0.00149122,9.54042e-06,-8.8272e-09,2.67645e-12,3842.03,4.4466], Tmin=(200,"K"), Tmax=(1000,"K")), NASAPolynomial(coeffs=[3.85781,0.00264114,-7.44177e-07,1.23313e-10,-8.88959e-15,3616.43,3.92451], Tmin=(1000,"K"), Tmax=(6000,"K"))], Tmin=(200,"K"), Tmax=(6000,"K"), E0=(32.0237,"kJ/mol"), Cp0=(33.2579,"J/mol/K"), CpInf=(58.2013,"J/mol/K"), comment="""HCO <g> ATcT ver. 1.122, DHf298 = 41.803 ± 0.099 kJ/mol - fit JAN17"""), molecule=[Molecule(smiles="[CH]=O")], molecular_weight=(29.018,"amu"))], 
+                                        kinetics=MultiPDepArrhenius(arrhenius=[PDepArrhenius(pressures=([0.001,0.01,0.1,1,10,100,1000],"atm"), 
+                                                                                             arrhenius=[Arrhenius(A=(1.1e+07,"cm^3/(mol*s)"), n=1.09, Ea=(1807,"cal/mol"), T0=(1,"K")), 
+                                                                                                        Arrhenius(A=(2.5e+07,"cm^3/(mol*s)"), n=0.993, Ea=(1995,"cal/mol"), T0=(1,"K")), 
+                                                                                                        Arrhenius(A=(2.5e+08,"cm^3/(mol*s)"), n=0.704, Ea=(2596,"cal/mol"), T0=(1,"K")), 
+                                                                                                        Arrhenius(A=(1.4e+10,"cm^3/(mol*s)"), n=0.209, Ea=(3934,"cal/mol"), T0=(1,"K")), 
+                                                                                                        Arrhenius(A=(3.5e+13,"cm^3/(mol*s)"), n=-0.726, Ea=(6944,"cal/mol"), T0=(1,"K")), 
+                                                                                                        Arrhenius(A=(3.3e+14,"cm^3/(mol*s)"), n=-0.866, Ea=(10966,"cal/mol"), T0=(1,"K")), 
+                                                                                                        Arrhenius(A=(17,"cm^3/(mol*s)"), n=3.17, Ea=(9400,"cal/mol"), T0=(1,"K"))]), 
+                                                                               PDepArrhenius(pressures=([0.001,0.01,0.1,1,10,100,1000],"atm"), 
+                                                                                             arrhenius=[Arrhenius(A=(-2.3e+16,"cm^3/(mol*s)"), n=-1.269, Ea=(20617,"cal/mol"), T0=(1,"K")), 
+                                                                                                        Arrhenius(A=(-5.2e+16,"cm^3/(mol*s)"), n=-1.366, Ea=(20805,"cal/mol"), T0=(1,"K")), 
+                                                                                                        Arrhenius(A=(-1.5e+18,"cm^3/(mol*s)"), n=-1.769, Ea=(22524,"cal/mol"), T0=(1,"K")), 
+                                                                                                        Arrhenius(A=(-8.5e+19,"cm^3/(mol*s)"), n=-2.264, Ea=(23862,"cal/mol"), T0=(1,"K")), 
+                                                                                                        Arrhenius(A=(-4.4e+23,"cm^3/(mol*s)"), n=-3.278, Ea=(27795,"cal/mol"), T0=(1,"K")), 
+                                                                                                        Arrhenius(A=(-4.2e+24,"cm^3/(mol*s)"), n=-3.418, Ea=(31817,"cal/mol"), T0=(1,"K")), 
+                                                                                                        Arrhenius(A=(-2.1e+11,"cm^3/(mol*s)"), n=0.618, Ea=(30251,"cal/mol"), T0=(1,"K"))])
+                                                                                                    ]), duplicate=True)
+        test_reaction.generate_reverse_rate_coefficient()

From c49521f1e5b751cfb9c21efbf1de961125a06b89 Mon Sep 17 00:00:00 2001
From: Richard West <r.west@northeastern.edu>
Date: Sat, 30 Nov 2019 23:19:45 -0500
Subject: [PATCH 2/5] Switch from numpy to scipy for linalg.lstsq

The scipy version allows you to change the algorithm used,
to work around some bugs in certain versions of MKL.
---
 rmgpy/kinetics/arrhenius.pyx | 11 ++++++-----
 1 file changed, 6 insertions(+), 5 deletions(-)

diff --git a/rmgpy/kinetics/arrhenius.pyx b/rmgpy/kinetics/arrhenius.pyx
index ea0f58b9d7..3352f49f9b 100644
--- a/rmgpy/kinetics/arrhenius.pyx
+++ b/rmgpy/kinetics/arrhenius.pyx
@@ -38,8 +38,6 @@ from rmgpy.exceptions import KineticsError
 from rmgpy.kinetics.uncertainties import rank_accuracy_map
 from rmgpy.molecule.molecule import Bond
 
-# Prior to numpy 1.14, `numpy.linalg.lstsq` does not accept None as a value
-RCOND = -1 if int(np.__version__.split('.')[1]) < 14 else None
 ################################################################################
 
 cdef class Arrhenius(KineticsModel):
@@ -152,7 +150,7 @@ cdef class Arrhenius(KineticsModel):
         data.
         """
         import scipy.stats
-
+        import scipy.linalg
         assert len(Tlist) == len(klist), "length of temperatures and rates must be the same"
         if len(Tlist) < 3 + three_params:
             raise KineticsError('Not enough degrees of freedom to fit this Arrhenius expression')
@@ -170,11 +168,14 @@ cdef class Arrhenius(KineticsModel):
             for n in range(b.size):
                 A[n, :] *= weights[n]
                 b[n] *= weights[n]
-        x, residues, rank, s = np.linalg.lstsq(A, b, rcond=RCOND)
+        try:
+            x, residues, rank, s = scipy.linalg.lstsq(A, b)
+        except:
+            x, residues, rank, s = scipy.linalg.lstsq(A, b, lapack_driver='gelss')
 
         # Determine covarianace matrix to obtain parameter uncertainties
         count = klist.size
-        cov = residues[0] / (count - 3) * np.linalg.inv(np.dot(A.T, A))
+        cov = residues / (count - 3) * np.linalg.inv(np.dot(A.T, A))
         t = scipy.stats.t.ppf(0.975, count - 3)
 
         if not three_params:

From 8a010f6cddf63865b1dc39d459d9a8fb4fb928ba Mon Sep 17 00:00:00 2001
From: Richard West <r.west@northeastern.edu>
Date: Sat, 30 Nov 2019 23:35:07 -0500
Subject: [PATCH 3/5] Allow Arrhenius fitting of negative rates.

Some MultiArrhenius and MultiPDepArrhenius expressions
use negative A factors for one of the expressions.
This is permitted as long as the overall sum is positive.

This commit allows the Arrhenius.fit_to_data to work
with negative rates.

Probably solves #1833
---
 rmgpy/kinetics/arrhenius.pyx | 12 +++++++++++-
 1 file changed, 11 insertions(+), 1 deletion(-)

diff --git a/rmgpy/kinetics/arrhenius.pyx b/rmgpy/kinetics/arrhenius.pyx
index 3352f49f9b..deba61962a 100644
--- a/rmgpy/kinetics/arrhenius.pyx
+++ b/rmgpy/kinetics/arrhenius.pyx
@@ -151,6 +151,16 @@ cdef class Arrhenius(KineticsModel):
         """
         import scipy.stats
         import scipy.linalg
+        if not all(np.isfinite(klist)):
+            raise  ValueError("Rates must all be finite, not inf or NaN")
+        if any(klist<0):
+            if not all(klist<0):
+                raise ValueError("Rates must all be positive or all be negative.")
+            rate_sign_multiplier = -1
+            klist = -1 * klist
+        else:
+            rate_sign_multiplier = 1
+
         assert len(Tlist) == len(klist), "length of temperatures and rates must be the same"
         if len(Tlist) < 3 + three_params:
             raise KineticsError('Not enough degrees of freedom to fit this Arrhenius expression')
@@ -182,7 +192,7 @@ cdef class Arrhenius(KineticsModel):
             x = np.array([x[0], 0, x[1]])
             cov = np.array([[cov[0, 0], 0, cov[0, 1]], [0, 0, 0], [cov[1, 0], 0, cov[1, 1]]])
 
-        self.A = (exp(x[0]), kunits)
+        self.A = (rate_sign_multiplier * exp(x[0]), kunits)
         self.n = x[1]
         self.Ea = (x[2] * 0.001, "kJ/mol")
         self.T0 = (T0, "K")

From b0d62f503def13f219dc48b87c24f3f3bad53586 Mon Sep 17 00:00:00 2001
From: Richard West <r.west@northeastern.edu>
Date: Sat, 30 Nov 2019 23:37:33 -0500
Subject: [PATCH 4/5] Revert "Switch from numpy to scipy for linalg.lstsq"

This reverts commit c49521f1e5b751cfb9c21efbf1de961125a06b89.

Having fixed the problem of trying to take logarithms of negative rates,
I think the numpy algorithm is probably robust enough, and there
is no need to use the scipy version. Though I have no idea which is "best".
---
 rmgpy/kinetics/arrhenius.pyx | 10 ++++------
 1 file changed, 4 insertions(+), 6 deletions(-)

diff --git a/rmgpy/kinetics/arrhenius.pyx b/rmgpy/kinetics/arrhenius.pyx
index deba61962a..ae519ddbd9 100644
--- a/rmgpy/kinetics/arrhenius.pyx
+++ b/rmgpy/kinetics/arrhenius.pyx
@@ -38,6 +38,8 @@ from rmgpy.exceptions import KineticsError
 from rmgpy.kinetics.uncertainties import rank_accuracy_map
 from rmgpy.molecule.molecule import Bond
 
+# Prior to numpy 1.14, `numpy.linalg.lstsq` does not accept None as a value
+RCOND = -1 if int(np.__version__.split('.')[1]) < 14 else None
 ################################################################################
 
 cdef class Arrhenius(KineticsModel):
@@ -150,7 +152,6 @@ cdef class Arrhenius(KineticsModel):
         data.
         """
         import scipy.stats
-        import scipy.linalg
         if not all(np.isfinite(klist)):
             raise  ValueError("Rates must all be finite, not inf or NaN")
         if any(klist<0):
@@ -178,14 +179,11 @@ cdef class Arrhenius(KineticsModel):
             for n in range(b.size):
                 A[n, :] *= weights[n]
                 b[n] *= weights[n]
-        try:
-            x, residues, rank, s = scipy.linalg.lstsq(A, b)
-        except:
-            x, residues, rank, s = scipy.linalg.lstsq(A, b, lapack_driver='gelss')
+        x, residues, rank, s = np.linalg.lstsq(A, b, rcond=RCOND)
 
         # Determine covarianace matrix to obtain parameter uncertainties
         count = klist.size
-        cov = residues / (count - 3) * np.linalg.inv(np.dot(A.T, A))
+        cov = residues[0] / (count - 3) * np.linalg.inv(np.dot(A.T, A))
         t = scipy.stats.t.ppf(0.975, count - 3)
 
         if not three_params:

From 523cf4842fc44f9b43c25c919dd78d3862a2ca24 Mon Sep 17 00:00:00 2001
From: Richard West <r.west@northeastern.edu>
Date: Sat, 30 Nov 2019 23:44:46 -0500
Subject: [PATCH 5/5] Added test for Arrhenius.fit_to_data on negative rates.

Sometimes you have negative k and want a negative A...
(as part of a MultiArrhenius expression)
---
 rmgpy/kinetics/arrheniusTest.py | 15 +++++++++++++++
 1 file changed, 15 insertions(+)

diff --git a/rmgpy/kinetics/arrheniusTest.py b/rmgpy/kinetics/arrheniusTest.py
index b3e54f888f..ceac23eebc 100644
--- a/rmgpy/kinetics/arrheniusTest.py
+++ b/rmgpy/kinetics/arrheniusTest.py
@@ -158,6 +158,21 @@ def test_fit_to_data(self):
         self.assertAlmostEqual(arrhenius.Ea.value_si, self.arrhenius.Ea.value_si, 2)
         self.assertAlmostEqual(arrhenius.T0.value_si, self.arrhenius.T0.value_si, 4)
 
+    def test_fit_to_negative_data(self):
+        """
+        Test the Arrhenius.fit_to_data() method on negative rates
+        """
+        Tdata = np.array([300, 400, 500, 600, 700, 800, 900, 1000, 1100, 1200, 1300, 1400, 1500])
+        kdata = np.array([-1 * self.arrhenius.get_rate_coefficient(T) for T in Tdata])
+        arrhenius = Arrhenius().fit_to_data(Tdata, kdata, kunits="m^3/(mol*s)")
+        self.assertEqual(float(self.arrhenius.T0.value_si), 1)
+        for T, k in zip(Tdata, kdata):
+            self.assertAlmostEqual(k, arrhenius.get_rate_coefficient(T), delta=1e-6 * abs(k))
+        self.assertAlmostEqual(arrhenius.A.value_si, -1 * self.arrhenius.A.value_si, delta=1e0)
+        self.assertAlmostEqual(arrhenius.n.value_si, self.arrhenius.n.value_si, 1, 4)
+        self.assertAlmostEqual(arrhenius.Ea.value_si, self.arrhenius.Ea.value_si, 2)
+        self.assertAlmostEqual(arrhenius.T0.value_si, self.arrhenius.T0.value_si, 4)
+
     def test_pickle(self):
         """
         Test that an Arrhenius object can be pickled and unpickled with no loss