diff --git a/benchmark/benchmark.py b/benchmark/benchmark.py index d8894ed..7d85ea5 100644 --- a/benchmark/benchmark.py +++ b/benchmark/benchmark.py @@ -55,9 +55,9 @@ def run_sim_batch(oq3_prog, sim, shots): return -device_ids = ("braket_sv", "braket_sv_v2", "braket_dm", "braket_dm_v2") +device_ids = ("sv", "dm") -generators = (ghz, qft) +generators = ("ghz", "qft") @pytest.mark.parametrize("device_id", device_ids) @@ -65,19 +65,14 @@ def run_sim_batch(oq3_prog, sim, shots): @pytest.mark.parametrize("exact_results", exact_shots_results) @pytest.mark.parametrize("circuit", generators) def test_exact_shots(benchmark, device_id, nq, exact_results, circuit): - if device_id in ("braket_dm_v2", "braket_dm") and ( - exact_results in ("state_vector",) or nq > 10 - ): - pytest.skip() - if ( - device_id in ("braket_sv",) - and exact_results in ("density_matrix q[0], q[1]",) - and nq >= 17 - ): + if device_id == "dm" and (exact_results in ("state_vector",) or nq > 10): pytest.skip() result_type = exact_results - oq3_prog = Program(source=circuit(nq, result_type)) - sim = LocalSimulator(device_id) + if circuit == "qft": + oq3_prog = Program(source=qft(nq, result_type)) + elif circuit == "ghz": + oq3_prog = Program(source=ghz(nq, result_type)) + sim = LocalSimulator(f"braket_{device_id}_v2") benchmark.pedantic(run_sim, args=(oq3_prog, sim, 0), iterations=5, warmup_rounds=1) @@ -89,17 +84,16 @@ def test_exact_shots(benchmark, device_id, nq, exact_results, circuit): def test_exact_shots_batched( benchmark, device_id, nq, batch_size, exact_results, circuit ): - if device_id in ("braket_dm_v2", "braket_dm") and ( - exact_results in ("state_vector,") or nq >= 5 - ): - pytest.skip() - if nq >= 10: + if ( + device_id == "dm" and (exact_results in ("state_vector,") or nq >= 5) + ) or nq >= 15: pytest.skip() - # skip all for now as this is very expensive - pytest.skip() result_type = exact_results - oq3_prog = [Program(source=circuit(nq, result_type)) for _ in range(batch_size)] - sim = LocalSimulator(device_id) + if circuit == "qft": + oq3_prog = [Program(source=qft(nq, result_type)) for _ in range(batch_size)] + elif circuit == "ghz": + oq3_prog = [Program(source=ghz(nq, result_type)) for _ in range(batch_size)] + sim = LocalSimulator(f"braket_{device_id}_v2") benchmark.pedantic( run_sim_batch, args=(oq3_prog, sim, 0), iterations=5, warmup_rounds=1 ) @@ -114,11 +108,14 @@ def test_exact_shots_batched( @pytest.mark.parametrize("nonzero_shots_results", nonzero_shots_results) @pytest.mark.parametrize("circuit", generators) def test_nonzero_shots(benchmark, device_id, nq, shots, nonzero_shots_results, circuit): - if device_id in ("braket_dm_v2", "braket_dm") and nq > 10: + if device_id in ("dm",) and nq > 10: pytest.skip() result_type = nonzero_shots_results - oq3_prog = Program(source=circuit(nq, result_type)) - sim = LocalSimulator(device_id) + if circuit == "qft": + oq3_prog = Program(source=qft(nq, result_type)) + elif circuit == "ghz": + oq3_prog = Program(source=ghz(nq, result_type)) + sim = LocalSimulator(f"braket_{device_id}_v2") benchmark.pedantic( run_sim, args=(oq3_prog, sim, shots), iterations=5, warmup_rounds=1 ) @@ -134,17 +131,17 @@ def test_nonzero_shots(benchmark, device_id, nq, shots, nonzero_shots_results, c def test_nonzero_shots_batched( benchmark, device_id, nq, batch_size, shots, nonzero_shots_results, circuit ): - if device_id in ("braket_dm_v2", "braket_dm") and nq >= 5: + if device_id in ("dm") and nq >= 5: pytest.skip() if nq >= 10: pytest.skip() - # skip all for now as this is very expensive - pytest.skip() - result_type = nonzero_shots_results - oq3_prog = [Program(source=circuit(nq, result_type)) for _ in range(batch_size)] - sim = LocalSimulator(device_id) + if circuit == "qft": + oq3_prog = [Program(source=qft(nq, result_type)) for _ in range(batch_size)] + elif circuit == "ghz": + oq3_prog = [Program(source=ghz(nq, result_type)) for _ in range(batch_size)] + sim = LocalSimulator(f"braket_{device_id}_v2") benchmark.pedantic( run_sim_batch, args=(oq3_prog, sim, shots), iterations=5, warmup_rounds=1 ) diff --git a/benchmark/pl_benchmark.py b/benchmark/pl_benchmark.py new file mode 100644 index 0000000..93a1179 --- /dev/null +++ b/benchmark/pl_benchmark.py @@ -0,0 +1,83 @@ +import networkx as nx +import numpy as np +import pennylane as qml +import pytest + +# always the same for repeatability +np.random.seed(0x1C2C6D66) +n_qubits = range(3, 16) +n_layers = range(1, 5) +shots = (100,) + + +def make_wide_tapes(nq: int, nl: int, shots: int): + gamma = 0.2 + alpha = 0.4 + p = 0.5 + seed = 42 + g = nx.erdos_renyi_graph(nq, p=p, seed=seed) + cost_h, mixer_h = qml.qaoa.max_clique(g, constrained=False) + ops = [qml.Hadamard(i) for i in range(nq)] + for layer in range(nl): + cl_op = qml.templates.ApproxTimeEvolution(cost_h, gamma, 1) + ops.append(cl_op) + ml_op = qml.templates.ApproxTimeEvolution(mixer_h, alpha, 1) + ops.append(ml_op) + + measurements = [qml.expval(o) for (c, o) in zip(cost_h.coeffs, cost_h.ops)] + tapes = [qml.tape.QuantumTape(ops, measurements, shots=shots)] + wider_tapes = [t.expand(depth=5) for t in tapes] + return wider_tapes + + +def make_qiskit_tapes(nq: int, nl: int, shots: int): + wide_tapes = make_wide_tapes(nq, nl, shots) + qiskit_sim = qml.device( + "qiskit.aer", + backend="aer_simulator_statevector", + wires=nq, + shots=shots, + statevector_parallel_threshold=8, + ) + qiskit_tapes = qiskit_sim.compile_circuits(wide_tapes) + return qiskit_tapes + + +@pytest.mark.parametrize("shots", shots) +@pytest.mark.parametrize("n_layers", n_layers) +@pytest.mark.parametrize("nq", n_qubits) +def test_sim_aer(benchmark, shots, n_layers, nq): + tapes = make_qiskit_tapes(nq, n_layers, shots) + sim = qml.device( + "qiskit.aer", backend="aer_simulator_statevector", wires=nq, shots=shots + ) + benchmark.pedantic(sim.execute, args=(tapes,), iterations=5, warmup_rounds=1) + + +@pytest.mark.parametrize("shots", shots) +@pytest.mark.parametrize("n_layers", n_layers) +@pytest.mark.parametrize("nq", n_qubits) +def test_sim_v2(benchmark, shots, n_layers, nq): + tapes = make_wide_tapes(nq, n_layers, shots) + sim = qml.device( + "braket.local.qubit", backend="braket_sv_v2", wires=nq, shots=shots + ) + benchmark.pedantic(sim.execute, args=(tapes,), iterations=5, warmup_rounds=1) + + +@pytest.mark.parametrize("shots", shots) +@pytest.mark.parametrize("n_layers", n_layers) +@pytest.mark.parametrize("nq", n_qubits) +def test_sim_v1(benchmark, shots, n_layers, nq): + tapes = make_wide_tapes(nq, n_layers, shots) + sim = qml.device("braket.local.qubit", backend="braket_sv", wires=nq, shots=shots) + benchmark.pedantic(sim.execute, args=(tapes,), iterations=5, warmup_rounds=1) + + +@pytest.mark.parametrize("shots", shots) +@pytest.mark.parametrize("n_layers", n_layers) +@pytest.mark.parametrize("nq", n_qubits) +def test_sim_lightning(benchmark, shots, n_layers, nq): + tapes = make_wide_tapes(nq, n_layers, shots) + sim = qml.device("lightning.qubit", wires=nq, shots=shots) + benchmark.pedantic(sim.execute, args=(tapes,), iterations=5, warmup_rounds=1) diff --git a/src/braket/juliapkg.json b/src/braket/juliapkg.json index 407b497..6c09e8d 100644 --- a/src/braket/juliapkg.json +++ b/src/braket/juliapkg.json @@ -1,9 +1,9 @@ { - "julia": "1.10", + "julia": "1.11", "packages": { "BraketSimulator": { "uuid": "76d27892-9a0b-406c-98e4-7c178e9b3dff", - "version": "0.0.5" + "version": "0.0.7" }, "JSON3": { "uuid": "0f8b85d8-7281-11e9-16c2-39a750bddbf1", diff --git a/src/braket/simulator_v2/base_simulator_v2.py b/src/braket/simulator_v2/base_simulator_v2.py index faf0799..9fbb580 100644 --- a/src/braket/simulator_v2/base_simulator_v2.py +++ b/src/braket/simulator_v2/base_simulator_v2.py @@ -24,7 +24,7 @@ def setup_julia(): import sys # don't reimport if we don't have to - if "juliacall" in sys.modules: + if "juliacall" in sys.modules and hasattr(sys.modules["juliacall"], "Main"): os.environ["PYTHON_JULIACALL_HANDLE_SIGNALS"] = "yes" return else: @@ -37,26 +37,67 @@ def setup_julia(): ): os.environ[k] = os.environ.get(k, default) - import juliacall + from juliacall import Main as jl - jl = juliacall.Main + # These are used at simulator class instantiation to trigger + # precompilation of Julia methods which may be invalidated + # or uncacheable. Total time for this should be <1s. jl.seval("using BraketSimulator, JSON3") - stock_oq3 = """ + exact_sv_oq3 = """ OPENQASM 3.0; + input float p; qubit[2] q; h q[0]; cphaseshift(1.5707963267948966) q[1], q[0]; + rx(1.5707963267948966) q[0]; + ry(1.5707963267948966) q[0]; + rz(p) q[0]; + rz(p) q[0]; + ry(1) q[1]; + rx(0) q[1]; + rz(2) q[1]; cnot q; - #pragma braket noise bit_flip(0.1) q[0] #pragma braket result variance y(q[0]) + #pragma braket result expectation y(q[0]) + #pragma braket result expectation y(q[0]) @ z(q[1]) + #pragma braket result expectation z(q[0]) @ z(q[1]) #pragma braket result density_matrix q[0], q[1] #pragma braket result probability """ - jl.BraketSimulator.simulate("braket_dm_v2", stock_oq3, "{}", 0) + inexact_sv_oq3 = """ + OPENQASM 3.0; + input float p; + qubit[9] q; + h q; + #pragma braket result variance y(q[0]) + #pragma braket result expectation z(q[1]) + #pragma braket result expectation z(q[1]) @ z(q[2]) + #pragma braket result expectation x(q[3]) @ x(q[4]) + #pragma braket result expectation y(q[5]) @ y(q[6]) + #pragma braket result expectation h(q[7]) @ h(q[8]) + """ + stock_dm_oq3 = """ + OPENQASM 3.0; + input float p; + qubit[2] q; + h q[0]; + #pragma braket noise bit_flip(0.1) q[0] + #pragma braket noise phase_flip(0.1) q[0] + #pragma braket result variance y(q[0]) + #pragma braket result expectation y(q[0]) + #pragma braket result density_matrix q[0], q[1] + """ + jl.BraketSimulator.simulate("braket_sv_v2", exact_sv_oq3, '{"p": 1.57}', 0) + jl.BraketSimulator.simulate("braket_sv_v2", inexact_sv_oq3, '{"p": 1.57}', 100) + jl.BraketSimulator.simulate("braket_dm_v2", stock_dm_oq3, '{"p": 1.57}', 0) return def setup_pool(): + # We use a multiprocessing Pool with one worker + # in order to bypass the Python GIL. This protects us + # when the simulator is used from a non-main thread from another + # Python module, as occurs in the Qiskit-Braket plugin. global __JULIA_POOL__ __JULIA_POOL__ = Pool(processes=1) __JULIA_POOL__.apply(setup_julia) @@ -65,6 +106,11 @@ def setup_pool(): return +# large arrays are extremely expensive to transfer among Python +# processes because they are pickle'd. For large arrays like for +# StateVector, DensityMatrix, or Probability result types, we +# instead do an mmap to disk, which is dramatically faster. For +# smaller objects this isn't helpful. def _handle_mmaped_result(raw_result, mmap_paths, obj_lengths): result = GateModelTaskResult(**raw_result) if mmap_paths: @@ -91,6 +137,8 @@ def _handle_mmaped_result(raw_result, mmap_paths, obj_lengths): class BaseLocalSimulatorV2(BaseLocalSimulator): def __init__(self, device: str): global __JULIA_POOL__ + # if the pool is already set up, no need + # to do anything if __JULIA_POOL__ is None: setup_pool() self._device = device @@ -120,10 +168,13 @@ def run_openqasm( are requested when shots>0. """ global __JULIA_POOL__ + + # pass inputs and source as strings to avoid pickling a dict + inputs_dict = json.dumps(openqasm_ir.inputs) if openqasm_ir.inputs else "{}" try: jl_result = __JULIA_POOL__.apply( translate_and_run, - [self._device, openqasm_ir, shots], + [self._device, openqasm_ir.source, inputs_dict, shots], ) except Exception as e: _handle_julia_error(e) @@ -134,6 +185,7 @@ def run_openqasm( # attach the result types if not shots: + # have to convert the types of array result types to what the BDK expects result = _result_value_to_ndarray(result) else: result.resultTypes = [rt.type for rt in result.resultTypes] diff --git a/src/braket/simulator_v2/julia_workers.py b/src/braket/simulator_v2/julia_workers.py index f85289e..ba329d9 100644 --- a/src/braket/simulator_v2/julia_workers.py +++ b/src/braket/simulator_v2/julia_workers.py @@ -8,7 +8,6 @@ def _handle_julia_error(error): # in case juliacall isn't loaded - print(error) if type(error).__name__ == "JuliaError": python_exception = getattr(error.exception, "alternate_type", None) if python_exception is None: @@ -27,23 +26,18 @@ def _handle_julia_error(error): def translate_and_run( - device_id: str, openqasm_ir: OpenQASMProgram, shots: int = 0 + device_id: str, openqasm_source: str, openqasm_inputs: str, shots: int = 0 ) -> str: - jl = sys.modules["juliacall"].Main - jl.GC.enable(False) - jl_inputs = json.dumps(openqasm_ir.inputs) if openqasm_ir.inputs else "{}" + jl = getattr(sys.modules["juliacall"], "Main") try: result = jl.BraketSimulator.simulate( device_id, - openqasm_ir.source, - jl_inputs, + openqasm_source, + openqasm_inputs, shots, ) - except Exception as e: _handle_julia_error(e) - finally: - jl.GC.enable(True) return result @@ -55,7 +49,7 @@ def translate_and_run_multiple( inputs: Optional[Union[dict, Sequence[dict]]] = None, ) -> List[str]: inputs = inputs or {} - jl = sys.modules["juliacall"].Main + jl = getattr(sys.modules["juliacall"], "Main") irs = [program.source for program in programs] py_inputs = {} if len(inputs) > 1 or isinstance(inputs, dict):