reduce flaky tests (#1015)

### Before submitting Please complete the following checklist when submitting a PR: - [ ] All new features must include a unit test. If you've fixed a bug or added code that should be tested, add a test to the [`tests`](../tests) directory! - [ ] All new functions and code must be clearly commented and documented. If you do make documentation changes, make sure that the docs build and render correctly by running `make docs`. - [ ] Ensure that the test suite passes, by running `make test`. - [x] Add a new entry to the `.github/CHANGELOG.md` file, summarizing the change, and including a link back to the PR. - [x] Ensure that code is properly formatted by running `make format`. When all the above are checked, delete everything above the dashed line and fill in the pull request template. ------------------------------------------------------------------------------------------------------------ **Context:** The aim is to reduce the number of flaky tests, and deal with some of the stochastic tests failures observed in CI. **Description of the Change:** 4 tests are updated: 1. test_shots_single_measure_obs ([TSSMO](https://github.com/PennyLaneAI/pennylane-lightning/blob/f9e8f62a073ab72c8d96b3bb01de399d02e288ba/tests/test_measurements.py#L753)) Previously flaky test, flaky now removed and shot increased 10x. Previous failure due to low shots count. Previous observed failure rate (without flaky): LQ 4/1000 Updated failure rate: LQ 0/1000 2. test_controlled_qubit_gates ([TCQG](https://github.com/PennyLaneAI/pennylane-lightning/blob/f9e8f62a073ab72c8d96b3bb01de399d02e288ba/tests/lightning_qubit/test_measurements_class.py#L742)) Previously flaky test, flaky now removed. There is no non-determinism in test. No failure. 3. test_cnot_controlled_qubit_unitary ([TCCQU](https://github.com/PennyLaneAI/pennylane-lightning/blob/f9e8f62a073ab72c8d96b3bb01de399d02e288ba/tests/lightning_qubit/test_measurements_class.py#L825)) Previously flaky test, flaky now removed. Only non-determinism in initial state preparation. Initial state now fixed with seed (already used in above test). No failure. 4. test_sample_variations ([TSV](https://github.com/PennyLaneAI/pennylane-lightning/blob/f9e8f62a073ab72c8d96b3bb01de399d02e288ba/tests/test_measurements.py#L661)) Shots increased 10x and now reference compares with analytical probability calculation in default.qubit rather than with shots. Previous observed failure rate: 13/1000 Updated failure rate: LQ 0/1000 **Benefits:** - Reduced flaky tests from 6 to 3 - Reduce likelihood of TSV causing CI failure, which is frequently observed ([1](https://github.com/PennyLaneAI/pennylane-lightning/actions/runs/10887374615/job/30210122673) [2](https://github.com/PennyLaneAI/pennylane-lightning/actions/runs/11582353424/job/32245407148) [3](https://github.com/PennyLaneAI/pennylane-lightning/actions/runs/12131635773/job/33824313065) [4](https://github.com/PennyLaneAI/pennylane-lightning/actions/runs/12150423891/job/33887968196)) **Possible Drawbacks:** Increased test runtime: 1. TSSMO LQ x86: 5s -> 16s LG x86: 3s -> 6s LK x86 CPU: 1s -> 4s LK GPU: 1s -> 4s 2. TSV LQ x86: 3s -> 5s LG x86: 6s -> 10s LK x86 CPU: 1s -> 4s LK GPU: 1s -> 4s However, originally, for TSSMO the time increases if flaky is required to be triggered, and TSV failure would require re-running the tests. **Related GitHub Issues:** [sc-78212] --------- Co-authored-by: ringo-but-quantum <github-ringo-but-quantum@xanadu.ai>
PennyLaneAI · Dec 5, 2024 · ded6ef9 · ded6ef9
1 parent 705fe89
commit ded6ef9
Show file tree

Hide file tree

Showing 4 changed files with 11 additions and 12 deletions.
diff --git a/.github/CHANGELOG.md b/.github/CHANGELOG.md
@@ -26,6 +26,9 @@
 
 ### Improvements
 
+* Reduce flaky test and increase test shots count.
+  [(#1015)](https://github.com/PennyLaneAI/pennylane-lightning/pull/1015)
+
 * Update the logic for enabling `grad_on_execution` during device execution.
   [(#1016)](https://github.com/PennyLaneAI/pennylane-lightning/pull/1016)
 

diff --git a/pennylane_lightning/core/_version.py b/pennylane_lightning/core/_version.py
@@ -16,4 +16,4 @@
    Version number (major.minor.patch[-label])
 """
 
-__version__ = "0.40.0-dev28"
+__version__ = "0.40.0-dev29"
diff --git a/tests/lightning_qubit/test_measurements_class.py b/tests/lightning_qubit/test_measurements_class.py
@@ -707,7 +707,6 @@ def calculate_reference(tape):
         results = dev.execute(tapes)
         return transf_fn(results)
 
-    @flaky(max_runs=5)
     @pytest.mark.parametrize(
         "operation",
         [
@@ -819,7 +818,6 @@ def test_controlled_qubit_unitary_from_op(self, tol, lightning_sv):
 
         assert np.allclose(result, expected, tol)
 
-    @flaky(max_runs=5)
     @pytest.mark.parametrize("control_wires", range(4))
     @pytest.mark.parametrize("target_wires", range(4))
     def test_cnot_controlled_qubit_unitary(self, control_wires, target_wires, tol, lightning_sv):
@@ -831,6 +829,7 @@ def test_cnot_controlled_qubit_unitary(self, control_wires, target_wires, tol, l
         target_wires = [target_wires]
         wires = control_wires + target_wires
         U = qml.matrix(qml.PauliX(target_wires))
+        np.random.seed(0)
         init_state = np.random.rand(2**n_qubits) + 1.0j * np.random.rand(2**n_qubits)
         init_state /= np.linalg.norm(init_state)
 

diff --git a/tests/test_measurements.py b/tests/test_measurements.py
@@ -660,7 +660,7 @@ def test_sample_values(self, qubit_device, tol):
     @pytest.mark.parametrize("nwires", range(1, 11))
     def test_sample_variations(self, qubit_device, nwires, seed):
         """Tests if `sample(wires)` returns correct statistics."""
-        shots = 20000
+        shots = 200000
         n_qubits = max(5, nwires + 1)
         np.random.seed(seed)
         wires = qml.wires.Wires(np.random.permutation(nwires))
@@ -669,6 +669,7 @@ def test_sample_variations(self, qubit_device, nwires, seed):
         state /= np.linalg.norm(state)
         ops = [qml.StatePrep(state, wires=range(n_qubits))]
         tape = qml.tape.QuantumScript(ops, [qml.sample(wires=wires)], shots=shots)
+        tape_exact = qml.tape.QuantumScript(ops, [qml.probs(wires=wires)])
 
         def reshape_samples(samples):
             return np.atleast_3d(samples) if len(wires) == 1 else np.atleast_2d(samples)
@@ -679,13 +680,10 @@ def reshape_samples(samples):
             reshape_samples(samples), wire_order=wires
         )
 
-        dev = qml.device("default.qubit", wires=n_qubits, shots=shots)
-        samples = dev.execute(tape)
-        ref = qml.measurements.ProbabilityMP(wires=wires).process_samples(
-            reshape_samples(samples), wire_order=wires
-        )
+        dev_ref = qml.device("default.qubit", wires=n_qubits)
+        probs_ref = dev_ref.execute(tape_exact)
 
-        assert np.allclose(probs, ref, atol=2.0e-2, rtol=1.0e-4)
+        assert np.allclose(probs, probs_ref, atol=2.0e-2, rtol=1.0e-4)
 
 
 class TestWiresInVar:
@@ -734,8 +732,7 @@ def circuit2():
         assert np.allclose(circuit1(), circuit2(), atol=tol)
 
 
-@flaky(max_runs=5)
-@pytest.mark.parametrize("shots", [None, 10000, [10000, 11111]])
+@pytest.mark.parametrize("shots", [None, 100000, [100000, 111111]])
 @pytest.mark.parametrize("measure_f", [qml.counts, qml.expval, qml.probs, qml.sample, qml.var])
 @pytest.mark.parametrize(
     "obs",