From 0cca1e32209498cc0b07db5167354cca84d5424c Mon Sep 17 00:00:00 2001 From: Samuel F Antao Date: Thu, 31 Aug 2023 03:17:29 +0100 Subject: [PATCH 01/63] Make vector allocation aligned to 64-bytes (#1909) * Make vector allocation aligned to 64-bytes as that is the same alignement requirement used by State data. * Add release note. --------- Co-authored-by: Jun Doi --- .../fix-aer-vector-alignment-aace6e14342c002e.yaml | 10 ++++++++++ src/framework/linalg/vector.hpp | 8 ++++++++ 2 files changed, 18 insertions(+) create mode 100644 releasenotes/notes/fix-aer-vector-alignment-aace6e14342c002e.yaml diff --git a/releasenotes/notes/fix-aer-vector-alignment-aace6e14342c002e.yaml b/releasenotes/notes/fix-aer-vector-alignment-aace6e14342c002e.yaml new file mode 100644 index 0000000000..0434e30fc7 --- /dev/null +++ b/releasenotes/notes/fix-aer-vector-alignment-aace6e14342c002e.yaml @@ -0,0 +1,10 @@ +--- +fixes: + - | + Change ``AER::Vector`` object alignement to 64-byte. In some cases, it is used to + initialize ``AER:QV::QubitVector`` objects by moving storage ownership to these + objects. As the code assumes that ``AER:QV::QubitVector`` storage is at least + 32-byte aligned for AVX2 load instructions, this change enforces the same alignement + requirements for both ``AER::Vector`` and ``AER:QV::QubitVector`` objects so that + one doesn't get into segmentation faults. + \ No newline at end of file diff --git a/src/framework/linalg/vector.hpp b/src/framework/linalg/vector.hpp index 985b64f5b3..21cd0c9a7e 100644 --- a/src/framework/linalg/vector.hpp +++ b/src/framework/linalg/vector.hpp @@ -31,7 +31,15 @@ namespace AER { template T *malloc_data(size_t size) { +#if !defined(_WIN64) && !defined(_WIN32) + // Data allocated here may need to be properly aligned to be compliant with + // AVX2. + void *data = nullptr; + posix_memalign(&data, 64, sizeof(T) * size); + return reinterpret_cast(data); +#else return reinterpret_cast(malloc(sizeof(T) * size)); +#endif } template From f9a6691269397f41db6934927d344e0f8076feea Mon Sep 17 00:00:00 2001 From: Samuel F Antao Date: Thu, 31 Aug 2023 07:02:16 +0100 Subject: [PATCH 02/63] Define environment variable to allow Qiskit-Aer to be built without CUDA requirements (#1910) * Define enviorment variable to allow Qiskit-Aer to be built without CUDA requirements. * Add release note. --------- Co-authored-by: Jun Doi --- ...ip-cuda-requirements-927ddce79b9e7108.yaml | 23 +++++++++++++++++++ setup.py | 10 +++++++- 2 files changed, 32 insertions(+), 1 deletion(-) create mode 100644 releasenotes/notes/skip-cuda-requirements-927ddce79b9e7108.yaml diff --git a/releasenotes/notes/skip-cuda-requirements-927ddce79b9e7108.yaml b/releasenotes/notes/skip-cuda-requirements-927ddce79b9e7108.yaml new file mode 100644 index 0000000000..77ead1090e --- /dev/null +++ b/releasenotes/notes/skip-cuda-requirements-927ddce79b9e7108.yaml @@ -0,0 +1,23 @@ +--- +prelude: > + Build environment variable was added to enable building Qiskit-Aer without the CUDA + requirements. The new variable is ``QISKIT_ADD_CUDA_REQUIREMENTS`` and can be set to + False/No/Off or True/Yes/On. By default, it is assumed True. + +features: + - | + A new environment variable ``QISKIT_ADD_CUDA_REQUIREMENTS`` can be sed to control + whether or not build the Python package for Qiskit-Aer with CUDA requirements. This + flag can be set to False/No/Off or True/Yes/On. By default it is assumed True. This + is useful in case a CUDA instalation is already available on the system where + Qiskit-Aer will run. Not including the requirements results in a smaller footprint + and facilitates leveraging different CUDA installs for development purposes. + The new flag can used like:: + + cd + + QISKIT_AER_PACKAGE_NAME='qiskit-aer-gpu' \ + QISKIT_AER_CUDA_MAJOR=$CUDA_MAJOR \ + QISKIT_ADD_CUDA_REQUIREMENTS=False \ + python3 setup.py bdist_wheel -- \ + -DAER_THRUST_BACKEND=CUDA ... diff --git a/setup.py b/setup.py index 5e9285c208..e98e47fd91 100644 --- a/setup.py +++ b/setup.py @@ -12,6 +12,13 @@ PACKAGE_NAME = os.getenv("QISKIT_AER_PACKAGE_NAME", "qiskit-aer") CUDA_MAJOR = os.getenv("QISKIT_AER_CUDA_MAJOR", "12") +# Allow build without the CUDA requirements. This is useful in case one intends to use a CUDA that exists in the host system. +ADD_CUDA_REQUIREMENTS = ( + False + if os.getenv("QISKIT_ADD_CUDA_REQUIREMENTS", "true").lower() in ["false", "off", "no"] + else True +) + extras_requirements = {"dask": ["dask", "distributed"]} requirements = [ @@ -38,7 +45,8 @@ "Topic :: Scientific/Engineering", ] -if "gpu" in PACKAGE_NAME: + +if ADD_CUDA_REQUIREMENTS and "gpu" in PACKAGE_NAME: if "11" in CUDA_MAJOR: requirements_cuda = [ "nvidia-cuda-runtime-cu11>=11.8.89", From 1819ffdb0ca70c447cd5635a052c7c3c14fa0bc8 Mon Sep 17 00:00:00 2001 From: Adrian Roman Date: Mon, 4 Sep 2023 05:20:30 +0300 Subject: [PATCH 03/63] =?UTF-8?q?For=20https://github.com/Qiskit/qiskit-ae?= =?UTF-8?q?r/issues/1905=20and=20https://gith=E2=80=A6=20(#1907)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * For https://github.com/Qiskit/qiskit-aer/issues/1905 and https://github.com/Qiskit/qiskit-aer/issues/1906 * formatting * Hopefully clang likes it now * Now with clang run on it, hopefully it's fine. * Removed prelude section in release note * Removed prelude section in release notes --- cmake/conan.cmake | 2 ++ ...atest-vc++-compilation-fixes-555601315e7e821b.yaml | 11 +++++++++++ src/transpile/cacheblocking.hpp | 7 ++++--- 3 files changed, 17 insertions(+), 3 deletions(-) create mode 100644 releasenotes/notes/latest-vc++-compilation-fixes-555601315e7e821b.yaml diff --git a/cmake/conan.cmake b/cmake/conan.cmake index b27dc33b41..e413c3f326 100644 --- a/cmake/conan.cmake +++ b/cmake/conan.cmake @@ -55,6 +55,8 @@ function(_get_msvc_ide_version result) set(${result} 15 PARENT_SCOPE) elseif(NOT MSVC_VERSION VERSION_LESS 1920 AND MSVC_VERSION VERSION_LESS 1930) set(${result} 16 PARENT_SCOPE) + elseif(NOT MSVC_VERSION VERSION_LESS 1930 AND MSVC_VERSION VERSION_LESS 1940) + set(${result} 17 PARENT_SCOPE) else() message(FATAL_ERROR "Conan: Unknown MSVC compiler version [${MSVC_VERSION}]") endif() diff --git a/releasenotes/notes/latest-vc++-compilation-fixes-555601315e7e821b.yaml b/releasenotes/notes/latest-vc++-compilation-fixes-555601315e7e821b.yaml new file mode 100644 index 0000000000..10ca356b1c --- /dev/null +++ b/releasenotes/notes/latest-vc++-compilation-fixes-555601315e7e821b.yaml @@ -0,0 +1,11 @@ +--- +fixes: + - | + Adresses two opened issues, one about replacing a char* parameter to a + const char*, another one for adding support for the latest vc++ in + conan.cmake + Changes: + Passing const char* to a char* parameter in CacheBlocking::insert_sim_op call: #1905 + https://github.com/Qiskit/qiskit-aer/issues/1905 + Add support for the latest VC++ in conan.cmake #1906: + https://github.com/Qiskit/qiskit-aer/issues/1906 diff --git a/src/transpile/cacheblocking.hpp b/src/transpile/cacheblocking.hpp index f3aa7e2347..35d72908c0 100644 --- a/src/transpile/cacheblocking.hpp +++ b/src/transpile/cacheblocking.hpp @@ -98,8 +98,9 @@ class CacheBlocking : public CircuitOptimization { void insert_swap(std::vector &ops, uint_t bit0, uint_t bit1, bool chunk) const; - void insert_sim_op(std::vector &ops, char *name, + void insert_sim_op(std::vector &ops, const char *name, const reg_t &qubits) const; + void insert_pauli(std::vector &ops, reg_t &qubits, std::string &pauli) const; @@ -192,8 +193,8 @@ void CacheBlocking::insert_swap(std::vector &ops, uint_t bit0, ops.push_back(sgate); } -void CacheBlocking::insert_sim_op(std::vector &ops, char *name, - const reg_t &qubits) const { +void CacheBlocking::insert_sim_op(std::vector &ops, + const char *name, const reg_t &qubits) const { Operations::Op op; op.type = Operations::OpType::sim_op; op.name = name; From d568c6a40d5cecc1c0d6ea653575a377f24e219a Mon Sep 17 00:00:00 2001 From: Samuel F Antao Date: Mon, 4 Sep 2023 08:09:34 +0100 Subject: [PATCH 04/63] Enable ROCm target based on existing CUDA/Thrust implementation. (#1914) * Add Eclipse IDE project files to .gitignore. * Change existing CUDA implementation to reflect a generic GPU. * Add AMD GPU support through ROCm. * Add release node. * Update ROCm release note. * Fix formatting. * Fix formatting. * Cancel .gitignore changes for Eclipse IDE. * Fix missing definition caused by change of header include ordering. * Define enviorment variable to allow Qiskit-Aer to be built without CUDA requirements. * Make vector allocation aligned to 64-bytes as that is the same alignement requirement used by State data. * Fix typos and remove changes going in separate PRs. * Fix lint error and rename release notes file. * Add partial release note to debug docs build. * Fix parsing of release note. * Add release note. * Revert "Add release note." This reverts commit f07234ea39c2b16b02ede905ff6d4970f327cb7d. * Update CONTRIBUTING.md Add ROCm build instructions. * Update add-rocm-support-db991e3c2f2ca455.yaml --------- Co-authored-by: Jun Doi --- CMakeLists.txt | 151 ++++++++++++++++++ CONTRIBUTING.md | 60 ++++++- cmake/conan_utils.cmake | 4 +- qiskit_aer/backends/wrappers/CMakeLists.txt | 14 ++ .../add-rocm-support-db991e3c2f2ca455.yaml | 40 +++++ setup.py | 5 +- src/misc/gpu_static_properties.hpp | 9 ++ src/misc/hipify.hpp | 79 +++++++++ src/misc/wrap_thrust.hpp | 5 + src/simulators/extended_stabilizer/gates.hpp | 11 ++ .../statevector/chunk/chunk_container.hpp | 4 + .../chunk/device_chunk_container.hpp | 3 + .../chunk/host_chunk_container.hpp | 4 + .../statevector/chunk/thrust_kernels.hpp | 3 + .../statevector/qubitvector_thrust.hpp | 2 + 15 files changed, 387 insertions(+), 7 deletions(-) create mode 100644 releasenotes/notes/add-rocm-support-db991e3c2f2ca455.yaml create mode 100644 src/misc/hipify.hpp diff --git a/CMakeLists.txt b/CMakeLists.txt index ab1560d9df..5ac3951fc7 100755 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -11,6 +11,34 @@ cmake_minimum_required(VERSION 3.8 FATAL_ERROR) file(STRINGS "qiskit_aer/VERSION.txt" VERSION_NUM) +# For ROCm builds we need to make sure the CXX and HIP compilers match and are clang. +# We should do this before the project() call to make sure the compiler options are +# properly assessed. +if(AER_THRUST_BACKEND STREQUAL "ROCM") + + if(DEFINED ENV{ROCM_PATH}) + set(ROCM_PATH "$ENV{ROCM_PATH}") + else() + set(ROCM_PATH "/opt/rocm") + endif() + + if(NOT DEFINED CMAKE_HIP_COMPILER) + if(DEFINED ENV{CMAKE_HIP_COMPILER}) + set(CMAKE_HIP_COMPILER "$ENV{CMAKE_HIP_COMPILER}") + else() + set(CMAKE_HIP_COMPILER "${ROCM_PATH}/llvm/bin/clang++") + endif() + endif() + + if(NOT DEFINED CMAKE_CXX_COMPILER) + if(DEFINED ENV{CMAKE_CXX_COMPILER}) + set(CMAKE_CXX_COMPILER "$ENV{CMAKE_CXX_COMPILER}") + else() + set(CMAKE_CXX_COMPILER "${CMAKE_HIP_COMPILER}") + endif() + endif() +endif() + # Add CUDA to the project if needed. set(EXTRA_LANGUAGES "") if(AER_THRUST_BACKEND STREQUAL "CUDA") @@ -376,6 +404,96 @@ if(AER_THRUST_SUPPORTED) set(AER_COMPILER_DEFINITIONS ${AER_COMPILER_DEFINITIONS} AER_THRUST_CPU=TRUE) # We don't need to add OMP because it's already an AER dependency set(THRUST_DEPENDENT_LIBS "") + elseif(AER_THRUST_BACKEND STREQUAL "ROCM") + # + # Build with GPU support with ROCm + # + + # Assert that the C++ and compilers are Clang to enable ROCm builds. + if(NOT CMAKE_CXX_COMPILER_ID MATCHES "Clang") + message(FATAL_ERROR + "The compiler for ROCm builds must be Clang. Set CMAKE_CXX_COMPILER to /llvm/bin/clang++") + endif() + + # GDB debug information is what is needed for runs enabled with ROCm. + set(CMAKE_CXX_FLAGS_DEBUG "${CMAKE_CXX_FLAGS_DEBUG} -ggdb") + set(CMAKE_CXX_FLAGS_RELWITHDEBINFO "${CMAKE_CXX_FLAGS_RELWITHDEBINFO} -ggdb") + + # Leverage AER_ROCM_ARCH to specify the relevant targets and send the ROCm default ones to + # the background by marking them as advanced. We need to set the architectures in advance + # of attemting to find HIP to leverage the package machinery. + + string(REPLACE " " ";" AER_ROCM_ARCH_LIST ${AER_ROCM_ARCH}) + set(GPU_TARGETS ${AER_ROCM_ARCH_LIST} CACHE INTERNAL "GPU targets to compile for") + set(AMDGPU_TARGETS ${AER_ROCM_ARCH_LIST} CACHE INTERNAL "AMD GPU targets to compile for") + set(CMAKE_HIP_ARCHITECTURES ${AER_ROCM_ARCH_LIST}) + + mark_as_advanced(GPU_TARGETS) + mark_as_advanced(AMDGPU_TARGETS) + mark_as_advanced(CMAKE_HIP_ARCHITECTURES) + message(STATUS "ROCm build targeting GPU Architectures: ${GPU_TARGETS}") + + message(STATUS "ROCm assumed path: ${ROCM_PATH}") + list(APPEND CMAKE_PREFIX_PATH ${ROCM_PATH}/hip ${ROCM_PATH}) + list(APPEND CMAKE_MODULE_PATH ${ROCM_PATH}/hip/cmake ${ROCM_PATH}) + + include(CheckLanguage) + check_language(HIP) + + # Find HIP in config mode as the module mode may not provide the hip:: targets. We can use module mode + # if we had hip libraries as they invoke the config package. + find_package(HIP CONFIG) + if(HIP_FOUND) + message(STATUS "Found HIP: " ${HIP_VERSION}) + else() + message(FATAL_ERROR "Could not find HIP.") + endif() + + list(APPEND AER_LIBRARIES hip::device) + + # Add definitions so that dependencies are properly determined. + # TODO: investigate the need for THRUST_DEVICE_SYSTEM=THRUST_DEVICE_SYSTEM_CUDA + list(APPEND ROCM_EXTRA_DEFS AER_THRUST_GPU AER_THRUST_ROCM THRUST_DEVICE_SYSTEM=THRUST_DEVICE_SYSTEM_HIP) + + # Add -D prefix to all defs as that is what ROCM_EXTRA_* expect to be set to. + list(TRANSFORM ROCM_EXTRA_DEFS PREPEND -D) + add_definitions(${ROCM_EXTRA_DEFS}) + list(APPEND ROCM_EXTRA_FLAGS ${ROCM_EXTRA_DEFS}) + list(APPEND ROCM_EXTRA_FLAGS -isystem${ROCM_PATH}/include; -I${AER_SIMULATOR_CPP_SRC_DIR} ; -isystem${AER_SIMULATOR_CPP_SRC_DIR}/third-party/headers; -ffast-math; -fPIC) + + if(CMAKE_BUILD_TYPE STREQUAL "Debug") + list(APPEND ROCM_EXTRA_FLAGS -O0) + else() + list(APPEND ROCM_EXTRA_FLAGS -O3) + endif() + + if(CMAKE_BUILD_TYPE STREQUAL "Debug" OR CMAKE_BUILD_TYPE STREQUAL "RelWithDebInfo") + list(APPEND ROCM_EXTRA_FLAGS -g; -ggdb) + endif() + + # Add some warning flags to allow existing code to go through with clang. + list(APPEND ROCM_EXTRA_FLAGS -ferror-limit=3 + -Wno-unused-lambda-capture + -Wno-bitwise-instead-of-logical + -Wno-inconsistent-missing-override + -Wno-cast-align + -Wno-float-equal + -Wno-unused-variable + -Wno-unused-but-set-variable + -Wno-switch + -Wno-writable-strings + -Wno-shadow + -Wno-delete-non-abstract-non-virtual-dtor + -Wno-pessimizing-move + -Wno-return-type-c-linkage + -Wno-overloaded-virtual + -Wno-braced-scalar-init) + + + if(AER_ENABLE_CUQUANTUM) + message(WARNING "Implementation of cuQuantum is not available for ROCm builds.") + endif() + else() message(STATUS "No Thrust supported backend") set(AER_THRUST_SUPPORTED FALSE) @@ -463,6 +581,35 @@ else() # Standalone build RUNTIME_OUTPUT_DIRECTORY_DEBUG Debug RUNTIME_OUTPUT_DIRECTORY_RELEASE Release) endfunction() + + function(build_rocm target src_file is_exec) + # ROCm is only supported in x86_64 devices so it should be safe to leverage AVX2. + set(SIMD_SOURCE_FILE "${PROJECT_SOURCE_DIR}/src/simulators/statevector/qv_avx2.cpp") + + set_source_files_properties( + ${SIMD_SOURCE_FILE} + ${src_file} + PROPERTIES LANGUAGE CXX) + + if(${is_exec}) + add_executable(${target} ${src_file} ${SIMD_SOURCE_FILE}) + else() + add_library(${target} ${src_file} ${SIMD_SOURCE_FILE}) + endif() + + target_compile_options(${target} PRIVATE ${ROCM_EXTRA_FLAGS} ${SIMD_FLAGS_LIST}) + target_compile_definitions(${target} PRIVATE ${ROCM_EXTRA_DEFS} ${AER_COMPILER_DEFINITIONS}) + + target_link_libraries(${target} PRIVATE ${AER_LIBRARIES}) + + set_target_properties(${target} PROPERTIES + LINKER_LANGUAGE CXX + CXX_STANDARD 14 + COMPILE_FLAGS ${AER_COMPILER_FLAGS} + LINK_FLAGS ${AER_LINKER_FLAGS} + RUNTIME_OUTPUT_DIRECTORY_DEBUG Debug + RUNTIME_OUTPUT_DIRECTORY_RELEASE Release) + endfunction() function(build_cpu target src_file is_exec) if(CMAKE_SYSTEM_PROCESSOR STREQUAL "x86_64" OR CMAKE_SYSTEM_PROCESSOR STREQUAL "AMD64" OR CMAKE_HOST_SYSTEM_PROCESSOR STREQUAL "amd64") @@ -506,6 +653,8 @@ else() # Standalone build set(CMAKE_INSTALL_RPATH_USE_LINK_PATH TRUE) if(CUDA_FOUND AND AER_THRUST_BACKEND STREQUAL "CUDA") build_cuda(qasm_simulator ${AER_SIMULATOR_SOURCE} TRUE) + elseif(HIP_FOUND AND AER_THRUST_BACKEND STREQUAL "ROCM") + build_rocm(qasm_simulator ${AER_SIMULATOR_SOURCE} TRUE) else() build_cpu(qasm_simulator ${AER_SIMULATOR_SOURCE} TRUE) endif() @@ -516,6 +665,8 @@ else() # Standalone build set(AER_RUNTIME_SOURCE "${PROJECT_SOURCE_DIR}/contrib/runtime/aer_runtime.cpp") if(CUDA_FOUND AND AER_THRUST_BACKEND STREQUAL "CUDA") build_cuda(aer ${AER_RUNTIME_SOURCE} FALSE) + elseif(HIP_FOUND AND AER_THRUST_BACKEND STREQUAL "ROCM") + build_rocm(aer ${AER_RUNTIME_SOURCE} FALSE) else() build_cpu(aer ${AER_RUNTIME_SOURCE} FALSE) endif() diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index 5e11aa1bc1..152affcccc 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -636,9 +636,11 @@ options we have on `Aer` to CMake, we use its native mechanism: ### Building with GPU support Qiskit Aer can exploit GPU's horsepower to accelerate some simulations, specially the larger ones. -GPU access is supported via CUDA® (NVIDIA® chipset), so to build with GPU support, you need -to have CUDA® >= 11.2 preinstalled. See install instructions [here](https://developer.nvidia.com/cuda-toolkit-archive) -Please note that we only support GPU acceleration on Linux platforms at the moment. +GPU access is supported either via CUDA® (NVIDIA® chipset) or ROCm® (AMD® GPUs). + +#### Building with CUDA® support +To build with CUDA® support, you need to have CUDA® >= 11.2 preinstalled. See install instructions [here](https://developer.nvidia.com/cuda-toolkit-archive). +Please note that we only support CUDA® GPU acceleration on Linux platforms at the moment. Once CUDA® is properly installed, you only need to set a flag so the build system knows what to do: @@ -664,7 +666,7 @@ or This will reduce the amount of compilation time when, for example, the architecture auto detection fails and the build system compiles all common architectures. -Few notes on GPU builds: +Few notes on CUDA® GPU builds: 1. Building takes considerable more time than non-GPU build, so be patient :) 2. CUDA® >= 11.2 imposes the restriction of building with g++ version not newer than 8 3. We don't need NVIDIA® drivers for building, but we need them for running simulations @@ -706,8 +708,58 @@ Also you can accelrate density matrix and unitary matrix simulations as well. sim = AerSimulator(method='density_matrix', device='GPU') results = execute(circuit,sim,cuStateVec_enable=True).result() ``` +#### Building with ROCm® support +ROCm® support has been added matching the CUDA® implementation based +on the `thrust` library. This enables Qiskit-Aer to run on AMD® GPUs, +including the AMD® Instinct GPU line based on the CDNA architecture. +ROCm® only support linux platforms. + +To build the standalone version, the following should be sufficient: + +``` +cmake -G Ninja \ + -DCMAKE_INSTALL_PREFIX= \ + -DSKBUILD=FALSE \ + -DAER_THRUST_BACKEND=ROCM \ + -DAER_MPI= \ + -DAER_ROCM_ARCH= \ + -DCMAKE_BUILD_TYPE=Release \ + -DBUILD_TESTS=True +ninja install +``` +Alternatively, and possibly preferred for most use cases, you can create a Python +wheel file that you can install as part of your Python environemnt: +``` +cd +QISKIT_AER_PACKAGE_NAME='qiskit-aer-gpu-rocm' \ + python3 setup.py bdist_wheel -- \ + -DAER_THRUST_BACKEND=ROCM \ + -DAER_MPI= \ + -DAER_ROCM_ARCH= +pip install --force-reinstall dist/qiskit_aer_gpu_rocm-*.whl +``` + +In both cases, the host system needs to have a functional ROCm® instalation and +the environment variable `ROCM_PATH` set pointing to the ROCm® instalation folder if +that is not the default `/opt/rocm`. +Depending on how your Python environment is set, you might need to install +Qiskit-Aer's required development modules: +``` +cd +pip install -r requirements-dev.txt +``` + +To leverage the ROCm® implementations no code changes are needed on top of one +already does for CUDA®. Running with cuStateVec, for instance, requires set +`device='GPU'` to AerSimulator option and set `cuStateVec_enable=True` option, +similarly to what is done for CUDA®: + +``` +sim = AerSimulator(method='statevector', device='GPU') +results = execute(circuit,sim,cuStateVec_enable=True).result() +``` ### Building with MPI support diff --git a/cmake/conan_utils.cmake b/cmake/conan_utils.cmake index 93c1c4220b..8d4d252f02 100644 --- a/cmake/conan_utils.cmake +++ b/cmake/conan_utils.cmake @@ -43,7 +43,7 @@ macro(setup_conan) endif() endif() - if(AER_THRUST_BACKEND AND NOT AER_THRUST_BACKEND STREQUAL "CUDA") + if(AER_THRUST_BACKEND AND NOT AER_THRUST_BACKEND STREQUAL "CUDA" AND NOT AER_THRUST_BACKEND STREQUAL "ROCM") set(REQUIREMENTS ${REQUIREMENTS} thrust/1.9.5) list(APPEND AER_CONAN_LIBS thrust) string(TOLOWER ${AER_THRUST_BACKEND} THRUST_BACKEND) @@ -78,7 +78,7 @@ macro(setup_conan) endif() # Headers includes - if(AER_THRUST_BACKEND AND NOT AER_THRUST_BACKEND STREQUAL "CUDA") + if(AER_THRUST_BACKEND AND NOT AER_THRUST_BACKEND STREQUAL "CUDA" AND NOT AER_THRUST_BACKEND STREQUAL "ROCM") set(AER_SIMULATOR_CPP_EXTERNAL_LIBS ${AER_SIMULATOR_CPP_EXTERNAL_LIBS} ${CONAN_INCLUDE_DIRS_THRUST}) endif() diff --git a/qiskit_aer/backends/wrappers/CMakeLists.txt b/qiskit_aer/backends/wrappers/CMakeLists.txt index 0430fc42d2..c20917fc9e 100644 --- a/qiskit_aer/backends/wrappers/CMakeLists.txt +++ b/qiskit_aer/backends/wrappers/CMakeLists.txt @@ -38,6 +38,20 @@ if(AER_THRUST_BACKEND STREQUAL "CUDA") nvcc_add_compiler_options(${AER_COMPILER_FLAGS_STRIPPED} AER_COMPILER_FLAGS_OUT) set_target_properties(controller_wrappers PROPERTIES COMPILE_FLAGS "${AER_COMPILER_FLAGS_OUT}") enable_language(CUDA) +elseif(AER_THRUST_BACKEND STREQUAL "ROCM") + + if(NOT DEFINED SIMD_SOURCE_FILE) + message(FATAL_ERROR "ROCm supported target machines are expected to be SIMD-enabled.") + endif() + + set_source_files_properties( + bindings.cc + ${SIMD_SOURCE_FILE} + PROPERTIES LANGUAGE CXX) + + target_compile_options(controller_wrappers PRIVATE ${ROCM_EXTRA_FLAGS} ${SIMD_FLAGS_LIST}) + target_compile_definitions(controller_wrappers PRIVATE ${ROCM_EXTRA_DEFS} ${AER_COMPILER_DEFINITIONS}) + set_target_properties(controller_wrappers PROPERTIES COMPILE_FLAGS "${AER_COMPILER_FLAGS}") else() if(DEFINED SIMD_SOURCE_FILE) string(REPLACE ";" " " SIMD_FLAGS "${SIMD_FLAGS_LIST}") diff --git a/releasenotes/notes/add-rocm-support-db991e3c2f2ca455.yaml b/releasenotes/notes/add-rocm-support-db991e3c2f2ca455.yaml new file mode 100644 index 0000000000..621c2ce60c --- /dev/null +++ b/releasenotes/notes/add-rocm-support-db991e3c2f2ca455.yaml @@ -0,0 +1,40 @@ +--- +features: + - | + ROCm support has been added matching the existing CUDA implementation based + on the ``thrust`` library. This enables Qiskit-Aer to run on AMD GPUs, + including the AMD Instinct GPU line based on the CDNA architecture. To build + the standalone version, the following should be sufficient:: + + cmake -G Ninja \ + -DCMAKE_INSTALL_PREFIX= \ + -DSKBUILD=FALSE \ + -DAER_THRUST_BACKEND=ROCM \ + -DAER_MPI= \ + -DAER_ROCM_ARCH= \ + -DCMAKE_BUILD_TYPE=Release \ + -DBUILD_TESTS=True + ninja install + + Alternatively, and possibly preferred for most use cases, you can create a Python + wheel file that you can install as part of your Python environemnt:: + + cd + + QISKIT_AER_PACKAGE_NAME='qiskit-aer-gpu-rocm' \ + python3 setup.py bdist_wheel -- \ + -DAER_THRUST_BACKEND=ROCM \ + -DAER_MPI= \ + -DAER_ROCM_ARCH= + + pip install --force-reinstall dist/qiskit_aer_gpu_rocm-*.whl + + In both cases, the host system needs to have a functional ROCm instalation and + the environment variable ``ROCM_PATH`` set pointing to the ROCm instalation folder if + that is not the default ``/opt/rocm``. + Depending on how your Python environment is set, you might need to install + Qiskit-Aer's required development modules:: + + cd + pip install -r requirements-dev.txt + diff --git a/setup.py b/setup.py index e98e47fd91..ea37a0c857 100644 --- a/setup.py +++ b/setup.py @@ -46,7 +46,10 @@ ] -if ADD_CUDA_REQUIREMENTS and "gpu" in PACKAGE_NAME: +# ROCm is expected to be available in the target system to enable CDNA GPUs, so no +# requirements to be loaded. Also, no ROCm related classifiers are in place that +# could be used here. +if ADD_CUDA_REQUIREMENTS and "gpu" in PACKAGE_NAME and "rocm" not in PACKAGE_NAME: if "11" in CUDA_MAJOR: requirements_cuda = [ "nvidia-cuda-runtime-cu11>=11.8.89", diff --git a/src/misc/gpu_static_properties.hpp b/src/misc/gpu_static_properties.hpp index 5730797cf2..4fabb5957e 100644 --- a/src/misc/gpu_static_properties.hpp +++ b/src/misc/gpu_static_properties.hpp @@ -14,6 +14,15 @@ #ifndef __GPU_STATIC_PRIORITIES_H__ #define __GPU_STATIC_PRIORITIES_H__ +#ifdef AER_THRUST_ROCM +#include +// In ROCm warpSize is a constexpr so the operations it is part for can be +// optimized as such. +#define _WS warpSize +// Maximum number of threads in a block. +#define _MAX_THD 1024 +#endif // AER_THRUST_ROCM + #ifdef AER_THRUST_CUDA // In CUDA warpSize could not be a compile-time constant so we use 32 directly. #define _WS 32 diff --git a/src/misc/hipify.hpp b/src/misc/hipify.hpp new file mode 100644 index 0000000000..1c675a229f --- /dev/null +++ b/src/misc/hipify.hpp @@ -0,0 +1,79 @@ +/** + * This code is part of Qiskit. + * + * (C) Copyright AMD 2023. + * + * This code is licensed under the Apache License, Version 2.0. You may + * obtain a copy of this license in the LICENSE.txt file in the root directory + * of this source tree or at http://www.apache.org/licenses/LICENSE-2.0. + * + * Any modifications or derivative works of this code must retain this + * copyright notice, and modified files need to carry a notice indicating + * that they have been altered from the originals. + */ +#ifndef __HIPIFY_H__ +#define __HIPIFY_H__ + +#include "misc/gpu_static_properties.hpp" + +// Define an equivalent for __shfl*_sync. This assumes that all threads +// in the wavefront are active, i.e. the mask is all ones. + +template +__device__ T __shfl_xor_aux(T var, int laneMask) { + // Assert based on the values that make sense in CUDA. + static_assert(mask == 0xffffffff, + "Shuffle XOR implementation assumes all wavefront is active."); + static_assert(width == 32, + "Shuffle XOR implementation assumes on the whole wavefront."); + // In AMDGCN all wavefront intrinsics are synchronous. + return __shfl_xor(var, laneMask, _WS); +} +#define __shfl_xor_sync(mask, var, laneMask, width) \ + __shfl_xor_aux(var, laneMask); + +template +__device__ T __shfl_aux(T var, int lane) { + // Assert based on the values that make sense in CUDA. + static_assert(mask == 0xffffffff, + "Shuffle implementation assumes all wavefront is active."); + static_assert(width == 32, + "Shuffle implementation assumes on the whole wavefront."); + // In AMDGCN all wavefront intrinsics are synchronous. + return __shfl(var, lane, _WS); +} +#define __shfl_sync(mask, var, lane, width) __shfl_aux(var, lane); + +// +// HIP types +// +#define cudaDataType hipDataType +#define cudaDeviceCanAccessPeer hipDeviceCanAccessPeer +#define cudaDeviceEnablePeerAccess hipDeviceEnablePeerAccess +#define cudaDeviceGetAttribute hipDeviceGetAttribute +#define cudaError_t hipError_t +#define cudaFree hipFree +#define cudaGetDevice hipGetDevice +#define cudaGetDeviceCount hipGetDeviceCount +#define cudaGetErrorName hipGetErrorName +#define cudaGetErrorString hipGetErrorString +#define cudaGetLastError hipGetLastError +#define cudaMalloc hipMalloc +#define cudaMemcpy hipMemcpy +#define cudaMemcpyAsync hipMemcpyAsync +#define cudaMemcpyDeviceToDevice hipMemcpyDeviceToDevice +#define cudaMemcpyDeviceToHost hipMemcpyDeviceToHost +#define cudaMemcpyHostToDevice hipMemcpyHostToDevice +#define cudaMemcpyPeerAsync hipMemcpyPeerAsync +#define cudaMemsetAsync hipMemsetAsync +#define cudaMemGetInfo hipMemGetInfo +#define cudaSetDevice hipSetDevice +#define cudaStreamCreate hipStreamCreate +#define cudaStreamCreateWithFlags hipStreamCreateWithFlags +#define cudaStreamDestroy hipStreamDestroy +#define cudaStreamNonBlocking hipStreamNonBlocking +#define cudaStreamSynchronize hipStreamSynchronize +#define cudaStream_t hipStream_t +#define cudaSuccess hipSuccess + +#endif //__HIPIFY_H__ diff --git a/src/misc/wrap_thrust.hpp b/src/misc/wrap_thrust.hpp index b02122c73c..df4ab7a03f 100644 --- a/src/misc/wrap_thrust.hpp +++ b/src/misc/wrap_thrust.hpp @@ -43,7 +43,12 @@ DISABLE_WARNING_PUSH #endif #include +// We can't mix OpenMP and in device-side builds when ROCm is enabled. +#if defined(AER_THRUST_ROCM) && defined(__HIP_DEVICE_COMPILE__) +#define AER_THRUST_ROCM_DISABLE_THRUST_OMP +#else #include +#endif DISABLE_WARNING_POP #endif // inclusion guard diff --git a/src/simulators/extended_stabilizer/gates.hpp b/src/simulators/extended_stabilizer/gates.hpp index c08ee91dea..3df76b37eb 100644 --- a/src/simulators/extended_stabilizer/gates.hpp +++ b/src/simulators/extended_stabilizer/gates.hpp @@ -24,6 +24,17 @@ #include "framework/operations.hpp" #include "framework/types.hpp" +// In ROCm builds, device-side implementation of pow is a template overload +// whereas host-side is a template. This means that the device always takes +// precedence which causes issues compiling the pow constexpr. Therefore we +// create a template overload here as well. +// TODO: remove when fixed in clang (https://reviews.llvm.org/D158247). +#ifdef AER_THRUST_ROCM +namespace std { +constexpr double pow(double x, int y) { return std::pow(x, y); } +} // namespace std +#endif + namespace CHSimulator { using uint_t = uint_fast64_t; using complex_t = std::complex; diff --git a/src/simulators/statevector/chunk/chunk_container.hpp b/src/simulators/statevector/chunk/chunk_container.hpp index 6afa8e09f4..029f9a039c 100644 --- a/src/simulators/statevector/chunk/chunk_container.hpp +++ b/src/simulators/statevector/chunk/chunk_container.hpp @@ -21,6 +21,10 @@ DISABLE_WARNING_PUSH #include #include #endif +#ifdef AER_THRUST_ROCM +#include "misc/hipify.hpp" +#include +#endif DISABLE_WARNING_POP #include "misc/wrap_thrust.hpp" diff --git a/src/simulators/statevector/chunk/device_chunk_container.hpp b/src/simulators/statevector/chunk/device_chunk_container.hpp index 6098fb613b..6ae1ac9950 100644 --- a/src/simulators/statevector/chunk/device_chunk_container.hpp +++ b/src/simulators/statevector/chunk/device_chunk_container.hpp @@ -23,6 +23,9 @@ #ifdef AER_THRUST_CUDA namespace thrust_gpu = thrust::cuda; #endif +#ifdef AER_THRUST_ROCM +namespace thrust_gpu = thrust::hip; +#endif namespace AER { namespace QV { diff --git a/src/simulators/statevector/chunk/host_chunk_container.hpp b/src/simulators/statevector/chunk/host_chunk_container.hpp index 9e95316fd2..092c49490b 100644 --- a/src/simulators/statevector/chunk/host_chunk_container.hpp +++ b/src/simulators/statevector/chunk/host_chunk_container.hpp @@ -243,8 +243,10 @@ void HostChunkContainer::Swap(Chunk &src, uint_t iChunk, template void HostChunkContainer::Zero(uint_t iChunk, uint_t count) { +#ifndef AER_THRUST_ROCM_DISABLE_THRUST_OMP thrust::fill_n(thrust::omp::par, data_.begin() + (iChunk << this->chunk_bits_), count, 0.0); +#endif } template @@ -259,6 +261,7 @@ reg_t HostChunkContainer::sample_measure( strided_range *> iter( chunk_pointer(iChunk), chunk_pointer(iChunk + count), stride); +#ifndef AER_THRUST_ROCM_DISABLE_THRUST_OMP if (dot) thrust::transform_inclusive_scan(thrust::omp::par, iter.begin(), iter.end(), iter.begin(), complex_dot_scan(), @@ -270,6 +273,7 @@ reg_t HostChunkContainer::sample_measure( thrust::lower_bound(thrust::omp::par, iter.begin(), iter.end(), rnds.begin(), rnds.begin() + SHOTS, vSmp.begin(), complex_less()); +#endif for (i = 0; i < SHOTS; i++) { samples[i] = vSmp[i]; diff --git a/src/simulators/statevector/chunk/thrust_kernels.hpp b/src/simulators/statevector/chunk/thrust_kernels.hpp index 2c27acae13..360181272f 100644 --- a/src/simulators/statevector/chunk/thrust_kernels.hpp +++ b/src/simulators/statevector/chunk/thrust_kernels.hpp @@ -21,6 +21,9 @@ DISABLE_WARNING_PUSH #include #include #endif +#ifdef AER_THRUST_ROCM +#include +#endif DISABLE_WARNING_POP #include "misc/wrap_thrust.hpp" diff --git a/src/simulators/statevector/qubitvector_thrust.hpp b/src/simulators/statevector/qubitvector_thrust.hpp index 57f09d9bee..31431ae8bf 100644 --- a/src/simulators/statevector/qubitvector_thrust.hpp +++ b/src/simulators/statevector/qubitvector_thrust.hpp @@ -1027,9 +1027,11 @@ std::complex QubitVectorThrust::inner_product() const { if (strm) dot = thrust::inner_product(thrust::device, vec0, vec0 + data_size_ * 2, vec1, 0.0); +#ifndef AER_THRUST_ROCM_DISABLE_THRUST_OMP else dot = thrust::inner_product(thrust::omp::par, vec0, vec0 + data_size_ * 2, vec1, 0.0); +#endif #else if (num_qubits_ > omp_threshold_ && omp_threads_ > 1) dot = thrust::inner_product(thrust::device, vec0, vec0 + data_size_ * 2, From 6460e4935fc2ad9c026102a6af14df5e89eb2f3b Mon Sep 17 00:00:00 2001 From: Adrian Roman Date: Wed, 6 Sep 2023 04:04:18 +0300 Subject: [PATCH 05/63] =?UTF-8?q?Fix=20for=20https://github.com/Qiskit/qis?= =?UTF-8?q?kit-aer/issues/1925=20(Aer=20runtime=E2=80=A6=20(#1926)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * Fix for https://github.com/Qiskit/qiskit-aer/issues/1925 (Aer runtime api (from contrib) exposes the wrong sx operation) * Changed as advised in https://github.com/Qiskit/qiskit-aer/pull/1926#pullrequestreview-1610165616 --- contrib/runtime/aer_runtime.cpp | 2 +- ...pi-exposed-wrong-sx-op-dadae6cf0787e169.yaml | 9 +++++++++ src/controllers/state_controller.hpp | 17 +++++++++++++++++ 3 files changed, 27 insertions(+), 1 deletion(-) create mode 100644 releasenotes/notes/aer-runtime-api-exposed-wrong-sx-op-dadae6cf0787e169.yaml diff --git a/contrib/runtime/aer_runtime.cpp b/contrib/runtime/aer_runtime.cpp index 784a626d51..6961bac30e 100644 --- a/contrib/runtime/aer_runtime.cpp +++ b/contrib/runtime/aer_runtime.cpp @@ -139,7 +139,7 @@ void aer_apply_tdg(void *handler, uint_t qubit) { // sqrt(NOT) gate void aer_apply_sx(void *handler, uint_t qubit) { AER::AerState *state = reinterpret_cast(handler); - state->apply_mcrx({qubit}, -M_PI / 4.0); + state->apply_mcsx({qubit}); }; // Rotation around X-axis diff --git a/releasenotes/notes/aer-runtime-api-exposed-wrong-sx-op-dadae6cf0787e169.yaml b/releasenotes/notes/aer-runtime-api-exposed-wrong-sx-op-dadae6cf0787e169.yaml new file mode 100644 index 0000000000..dfa6aa127c --- /dev/null +++ b/releasenotes/notes/aer-runtime-api-exposed-wrong-sx-op-dadae6cf0787e169.yaml @@ -0,0 +1,9 @@ +--- +fixes: + - | + Aer runtime api (from contrib) exposed the wrong sx operation, + implemented with a rx. The implementation is changed now by + adding AerState::apply_mcsx and calling it from aer_apply_sx. + This way the api has the same behavior as the documentation + states and also how the sx gate behaves in python. + Fix for: https://github.com/Qiskit/qiskit-aer/issues/1925 diff --git a/src/controllers/state_controller.hpp b/src/controllers/state_controller.hpp index 8b3cdf30cd..62d316b6e8 100644 --- a/src/controllers/state_controller.hpp +++ b/src/controllers/state_controller.hpp @@ -310,6 +310,12 @@ class AerState { // If N=3 this implements an optimized CCRZ gate virtual void apply_mcrz(const reg_t &qubits, const double theta); + // Apply a general N-qubit multi-controlled SX-gate + // If N=1 this implements an optimized SX gate + // If N=2 this implements an optimized CSX gate + // If N=3 this implements an optimized CCSX gate + virtual void apply_mcsx(const reg_t &qubits); + //----------------------------------------------------------------------- // Apply Non-Unitary Gates //----------------------------------------------------------------------- @@ -1292,6 +1298,17 @@ void AerState::apply_mcrz(const reg_t &qubits, const double theta) { buffer_op(std::move(op)); } +void AerState::apply_mcsx(const reg_t &qubits) { + assert_initialized(); + + Operations::Op op; + op.type = Operations::OpType::gate; + op.qubits = qubits; + op.name = "mcsx"; + + buffer_op(std::move(op)); +} + //----------------------------------------------------------------------- // Apply Non-Unitary Gates //----------------------------------------------------------------------- From 4fb99dddf9e692d08ff35f814acec3b7c13558f5 Mon Sep 17 00:00:00 2001 From: Jun Doi Date: Thu, 7 Sep 2023 14:00:49 +0900 Subject: [PATCH 06/63] Fix measure in stabilizer method (#1895) * Fix measure in stabilizer * add test case for measuring stabilizer --------- Co-authored-by: Hiroshi Horii --- ...x_stabilizer_measure-49f9b4261dfaa4d3.yaml | 4 + src/simulators/stabilizer/clifford.hpp | 141 +++++++++--------- .../backends/aer_simulator/test_measure.py | 63 ++++++++ 3 files changed, 137 insertions(+), 71 deletions(-) create mode 100644 releasenotes/notes/fix_stabilizer_measure-49f9b4261dfaa4d3.yaml diff --git a/releasenotes/notes/fix_stabilizer_measure-49f9b4261dfaa4d3.yaml b/releasenotes/notes/fix_stabilizer_measure-49f9b4261dfaa4d3.yaml new file mode 100644 index 0000000000..6a0132329c --- /dev/null +++ b/releasenotes/notes/fix_stabilizer_measure-49f9b4261dfaa4d3.yaml @@ -0,0 +1,4 @@ +--- +fixes: + - | + This release fixes an issue in measurement function of stabilizer simulator diff --git a/src/simulators/stabilizer/clifford.hpp b/src/simulators/stabilizer/clifford.hpp index e54844e573..1de95089ce 100644 --- a/src/simulators/stabilizer/clifford.hpp +++ b/src/simulators/stabilizer/clifford.hpp @@ -422,7 +422,6 @@ bool Clifford::measure_and_update(const uint64_t qubit, auto anticom = z_anticommuting(qubit); int nid = omp_get_num_threads(); - if (anticom.first) { bool outcome = (randint == 1); auto row = anticom.second; @@ -515,90 +514,86 @@ bool Clifford::measure_and_update(const uint64_t qubit, return outcome; } else { // Deterministic outcome - bool outcome = false; + uint_t outcome = 0; Pauli::Pauli accum(num_qubits_); - uint64_t blocks = destabilizer_phases_.blockLength(); + uint_t blocks = destabilizer_phases_.blockLength(); if (blocks < 2) { - for (uint64_t i = 0; i < num_qubits_; i++) { - if (destabilizer_table_[qubit].X[i]) { - bool b0 = false, b1 = false; - for (size_t q = 0; q < num_qubits_; q++) { - bool t0, t1, add; - bool accumX = accum.X[q]; - bool accumZ = accum.Z[q]; - - t0 = accumX & stabilizer_table_[q].Z[i]; - t1 = accumZ ^ stabilizer_table_[q].X[i]; - - b1 ^= (t0 & b0); - b0 ^= t0; - b1 ^= (t0 & t1); - - t0 = stabilizer_table_[q].X[i] & accumZ; - t1 = stabilizer_table_[q].Z[i] ^ accumX; - t1 ^= t0; - - b1 ^= (t0 & b0); - b0 ^= t0; - b1 ^= (t0 & t1); - - accum.X.setValue(stabilizer_table_[q].X[i] ^ accum.X[q], q); - accum.Z.setValue(stabilizer_table_[q].Z[i] ^ accum.Z[q], q); - } - b1 ^= (stabilizer_phases_[i] ^ outcome); - - if (b0) { - throw std::runtime_error("Clifford: rowsum error"); - } - outcome = b1; + for (uint_t ib = 0; ib < blocks; ib++) { + uint_t destabilizer_mask = destabilizer_table_[qubit].X(ib); + uint_t exponent_l = 0ull; + uint_t exponent_h = 0ull; + + for (uint_t q = 0; q < num_qubits_; q++) { + uint_t tl, th, add; + uint_t accumX = 0ull - (uint_t)accum.X[q]; + uint_t accumZ = 0ull - (uint_t)accum.Z[q]; + + tl = accumX & stabilizer_table_[q].Z(ib); + th = accumZ ^ stabilizer_table_[q].X(ib); + + add = tl & exponent_l; + exponent_l ^= tl; + exponent_h ^= add; + exponent_h ^= (tl & th); + + tl = stabilizer_table_[q].X(ib) & accumZ; + th = stabilizer_table_[q].Z(ib) ^ accumX; + th ^= tl; + + add = tl & exponent_l; + exponent_l ^= tl; + exponent_h ^= add; + exponent_h ^= (tl & th); + + add = stabilizer_table_[q].X(ib) & destabilizer_mask; + accumX &= AER::Utils::popcount(add) & 1; + add = stabilizer_table_[q].Z(ib) & destabilizer_mask; + accumZ &= AER::Utils::popcount(add) & 1; + + accum.X.setValue((bool)accumX, q); + accum.Z.setValue((bool)accumZ, q); + } + exponent_h ^= stabilizer_phases_(ib); + outcome ^= (exponent_h & destabilizer_mask); + + if ((exponent_l & destabilizer_mask) != 0) { + throw std::runtime_error("Clifford: rowsum error"); } } } else { - uint64_t blockSize = destabilizer_phases_.blockSize(); + uint_t blockSize = destabilizer_phases_.blockSize(); // loop for cache blocking - for (uint64_t ii = 0; ii < blocks; ii++) { - uint64_t destabilizer_mask = destabilizer_table_[qubit].X(ii); + for (uint_t ii = 0; ii < blocks; ii++) { + uint_t destabilizer_mask = destabilizer_table_[qubit].X(ii); if (destabilizer_mask == 0) continue; - uint64_t exponent_l = 0; - uint64_t exponent_lc = 0; - uint64_t exponent_h = 0; + uint_t exponent_l = 0; + uint_t exponent_lc = 0; + uint_t exponent_h = 0; auto measure_determinisitic_func = [this, &accum, &exponent_l, &exponent_lc, &exponent_h, blocks, blockSize, destabilizer_mask, ii](AER::int_t qq) { - uint64_t qs = qq * blockSize; - uint64_t qe = qs + blockSize; + uint_t qs = qq * blockSize; + uint_t qe = qs + blockSize; if (qe > num_qubits_) qe = num_qubits_; - uint64_t local_exponent_l = 0; - uint64_t local_exponent_h = 0; - - for (uint64_t q = qs; q < qe; q++) { - uint64_t sX = stabilizer_table_[q].X(ii); - uint64_t sZ = stabilizer_table_[q].Z(ii); + uint_t local_exponent_l = 0; + uint_t local_exponent_h = 0; - // set accum for this block - uint64_t accumX = destabilizer_mask & sX; - uint64_t accumZ = destabilizer_mask & sZ; - for (int b = 1; b < blockSize; b *= 2) { - accumX ^= (accumX << b); - accumZ ^= (accumZ << b); - } - accumX ^= (0ull - (uint64_t)accum.X[q]); - accumZ ^= (0ull - (uint64_t)accum.Z[q]); - accum.X.setValue((accumX >> (blockSize - 1)), q); - accum.Z.setValue((accumZ >> (blockSize - 1)), q); + for (uint_t q = qs; q < qe; q++) { + uint_t sX = stabilizer_table_[q].X(ii); + uint_t sZ = stabilizer_table_[q].Z(ii); - accumX ^= sX; - accumZ ^= sZ; + uint_t accumX = (0ull - (uint_t)accum.X[q]); + uint_t accumZ = (0ull - (uint_t)accum.Z[q]); // exponents for this block - uint64_t t0, t1; + uint_t t0, t1; t0 = accumX & sZ; t1 = accumZ ^ sX; @@ -614,6 +609,12 @@ bool Clifford::measure_and_update(const uint64_t qubit, local_exponent_h ^= (t0 & local_exponent_l); local_exponent_l ^= t0; local_exponent_h ^= (t0 & t1); + + // update accum + accumX &= AER::Utils::popcount(sX & destabilizer_mask) & 1; + accum.X.setValue((accumX != 0), q); + accumZ &= AER::Utils::popcount(sZ & destabilizer_mask) & 1; + accum.Z.setValue((accumZ != 0), q); } #pragma omp atomic @@ -627,16 +628,14 @@ bool Clifford::measure_and_update(const uint64_t qubit, (num_qubits_ > omp_threshold_ && omp_threads_ > 1 && nid == 1), 0, blocks, measure_determinisitic_func, omp_threads_); - exponent_h ^= - (exponent_lc ^ - exponent_l); // if exponent_l is 0 and any of local_exponent_l is - // 1, then flip exponent_h - - exponent_h ^= (stabilizer_phases_(ii) & destabilizer_mask); - outcome ^= ((AER::Utils::popcount(exponent_h) & 1) != 0); + // if exponent_l is 0 and any of local_exponent_l is + // 1, then flip exponent_h + exponent_h ^= (exponent_lc ^ exponent_l); + exponent_h ^= stabilizer_phases_(ii); + outcome ^= (exponent_h & destabilizer_mask); } } - return outcome; + return ((AER::Utils::popcount(outcome) & 1) != 0); } } diff --git a/test/terra/backends/aer_simulator/test_measure.py b/test/terra/backends/aer_simulator/test_measure.py index c705869817..fd39d68042 100644 --- a/test/terra/backends/aer_simulator/test_measure.py +++ b/test/terra/backends/aer_simulator/test_measure.py @@ -23,6 +23,7 @@ from qiskit.circuit.library import QuantumVolume from qiskit.quantum_info.random import random_unitary from test.terra.backends.simulator_test_case import SimulatorTestCase, supported_methods +import numpy as np SUPPORTED_METHODS = [ "automatic", @@ -199,6 +200,68 @@ def test_measure_nondeterministic_multi_qubit_without_sampling(self, method, dev self.compare_counts(result, circuits, targets, delta=delta * shots) self.compare_result_metadata(result, circuits, "measure_sampling", False) + # --------------------------------------------------------------------- + # Test stabilizer measure + # --------------------------------------------------------------------- + @supported_methods(["stabilizer"]) + def test_measure_stablizer_64bit(self, method, device): + backend = self.backend(method=method, device=device) + shots = 10000 + delta = 0.05 + circ = QuantumCircuit(65, 32) + + circ.reset(0) + for i in range(0, 30, 6): + circ.h(i) + circ.h(i + 4) + circ.h(30) + circ.h(31) + + for i in range(1, 32, 2): + circ.cx(i + 32, i) + for i in range(0, 30, 6): + circ.cx(i, i + 32) + circ.cx(i + 4, i + 36) + circ.cx(30, 62) + + for i in range(1, 30, 2): + circ.cx(i + 35, i) + for i in range(4, 32, 4): + circ.cx(i, i + 29) + + for i in range(0, 30, 2): + circ.cx(i + 35, i) + for i in range(1, 30, 6): + circ.cx(i, i + 33) + circ.cx(i + 2, i + 35) + circ.cx(31, 64) + + for i in range(0, 32): + circ.measure(i, i) + result = backend.run(circ, shots=shots).result() + counts = result.get_counts() + self.assertSuccess(result) + + n_anc = 32 + totals = np.zeros(n_anc, dtype=int) + for outcomes, num_counts in counts.items(): + new_totals = num_counts * np.array([int(bit) for bit in outcomes][::-1]) + assert len(new_totals) == n_anc + totals += new_totals + output = {} + for i in range(0, 32): + output[hex(i)] = totals[i] + + targets = {} + for i in range(0, 30, 3): + targets[hex(i)] = shots / 2 + targets[hex(i + 1)] = shots / 2 + targets[hex(i + 2)] = 0 + targets[hex(30)] = shots / 2 + targets[hex(31)] = shots / 2 + + self.assertDictAlmostEqual(output, targets, delta=delta * shots) + # --------------------------------------------------------------------- # Test MPS algorithms for measure # --------------------------------------------------------------------- From 5e77fc83dde114aba7e4f27d6d0247d6081bbb58 Mon Sep 17 00:00:00 2001 From: Adrian Roman Date: Thu, 7 Sep 2023 16:58:45 +0300 Subject: [PATCH 07/63] Fix for https://github.com/Qiskit/qiskit-aer/issues/1918 (#1922) * Fix for https://github.com/Qiskit/qiskit-aer/issues/1918 * Removed prelude section in release note --- ...in-release-with-vc++-47500a37841cfaa8.yaml | 8 +++++++ src/simulators/statevector/qv_avx2.cpp | 22 +++++-------------- 2 files changed, 14 insertions(+), 16 deletions(-) create mode 100644 releasenotes/notes/fix-compiling-issue-in-release-with-vc++-47500a37841cfaa8.yaml diff --git a/releasenotes/notes/fix-compiling-issue-in-release-with-vc++-47500a37841cfaa8.yaml b/releasenotes/notes/fix-compiling-issue-in-release-with-vc++-47500a37841cfaa8.yaml new file mode 100644 index 0000000000..f874dd78c0 --- /dev/null +++ b/releasenotes/notes/fix-compiling-issue-in-release-with-vc++-47500a37841cfaa8.yaml @@ -0,0 +1,8 @@ +--- +fixes: + - | + Fixes an issue when compiling a release version on windows with vc++, + also unrolls a for, avoiding an unnecessary switch inside it. + The fix is for https://github.com/Qiskit/qiskit-aer/issues/1918 + + diff --git a/src/simulators/statevector/qv_avx2.cpp b/src/simulators/statevector/qv_avx2.cpp index 87b38d8b13..4d92ff8283 100644 --- a/src/simulators/statevector/qv_avx2.cpp +++ b/src/simulators/statevector/qv_avx2.cpp @@ -770,22 +770,12 @@ inline void _apply_matrix_double_avx_q0q1(RealVectorView &reals, for (size_t i = 0; i < (1ULL << num_qubits); i += 4) { auto index = indexes[i]; _mm_load_twoarray_complex(reals[index], imags[index], vreals[i], vimags[i]); - for (size_t j = 1; j < 4; ++j) { - switch (j) { - case 1: - vreals[i + j] = _mm256_permute4x64_pd(vreals[i], PERM_D_Q0Q1_0); - vimags[i + j] = _mm256_permute4x64_pd(vimags[i], PERM_D_Q0Q1_0); - break; - case 2: - vreals[i + j] = _mm256_permute4x64_pd(vreals[i], PERM_D_Q0Q1_1); - vimags[i + j] = _mm256_permute4x64_pd(vimags[i], PERM_D_Q0Q1_1); - break; - case 3: - vreals[i + j] = _mm256_permute4x64_pd(vreals[i], PERM_D_Q0Q1_2); - vimags[i + j] = _mm256_permute4x64_pd(vimags[i], PERM_D_Q0Q1_2); - break; - } - } + vreals[i + 1] = _mm256_permute4x64_pd(vreals[i], PERM_D_Q0Q1_0); + vimags[i + 1] = _mm256_permute4x64_pd(vimags[i], PERM_D_Q0Q1_0); + vreals[i + 2] = _mm256_permute4x64_pd(vreals[i], PERM_D_Q0Q1_1); + vimags[i + 2] = _mm256_permute4x64_pd(vimags[i], PERM_D_Q0Q1_1); + vreals[i + 3] = _mm256_permute4x64_pd(vreals[i], PERM_D_Q0Q1_2); + vimags[i + 3] = _mm256_permute4x64_pd(vimags[i], PERM_D_Q0Q1_2); } size_t mindex = 0; From 39487dbf8cfe002dbf50cbadd923609c933a4a30 Mon Sep 17 00:00:00 2001 From: Jun Doi Date: Tue, 12 Sep 2023 15:43:12 +0900 Subject: [PATCH 08/63] Fix required_memory_mb for MPS and extended stabilizer (#1933) * Fix required_memory_mb for MPS and extended stabilizer * requried_memory_mb calculates everytime, so added Config to some functions calling requried_memory_mb --- ...izer_required_memory-f4fb0aebfeeb68e2.yaml | 10 ++ src/controllers/aer_controller.hpp | 93 ++++++------ src/simulators/batch_shots_executor.hpp | 8 +- src/simulators/circuit_executor.hpp | 44 +++--- .../density_matrix/densitymatrix_executor.hpp | 2 +- .../matrix_product_state.hpp | 16 +- .../matrix_product_state_size_estimator.hpp | 138 ++++++++++++++++++ src/simulators/multi_state_executor.hpp | 14 +- src/simulators/parallel_state_executor.hpp | 21 +-- .../statevector/statevector_executor.hpp | 2 +- 10 files changed, 247 insertions(+), 101 deletions(-) create mode 100644 releasenotes/notes/fix_mps_extstabilizer_required_memory-f4fb0aebfeeb68e2.yaml create mode 100644 src/simulators/matrix_product_state/matrix_product_state_size_estimator.hpp diff --git a/releasenotes/notes/fix_mps_extstabilizer_required_memory-f4fb0aebfeeb68e2.yaml b/releasenotes/notes/fix_mps_extstabilizer_required_memory-f4fb0aebfeeb68e2.yaml new file mode 100644 index 0000000000..38026d8e88 --- /dev/null +++ b/releasenotes/notes/fix_mps_extstabilizer_required_memory-f4fb0aebfeeb68e2.yaml @@ -0,0 +1,10 @@ +--- +fixes: + - | + State::set_config was not called before calling State::required_memory_mb. + Extended stabilizer uses parameter from config to calculate required memory + so size was not correct before this fix. + Now Config is passed to required_memory_mb function. + + State::required_memory_mb for MPS method returned wrong memory size. + This fix adds memory size estimation by calculating max bond dimension. diff --git a/src/controllers/aer_controller.hpp b/src/controllers/aer_controller.hpp index c455f5fc20..f42ae64ef8 100755 --- a/src/controllers/aer_controller.hpp +++ b/src/controllers/aer_controller.hpp @@ -131,7 +131,7 @@ class Controller { // If `throw_except` is true an exception will be thrown on the return false // case listing the invalid instructions in the circuit or noise model, or // the required memory. - bool validate_method(Method method, const Circuit &circ, + bool validate_method(Method method, const Config &config, const Circuit &circ, const Noise::NoiseModel &noise, bool throw_except = false) const; @@ -147,13 +147,14 @@ class Controller { // The noise model will be modified to enable superop or kraus sampling // methods if required by the chosen methods. std::vector - simulation_methods(std::vector> &circuits, + simulation_methods(const Config &config, + std::vector> &circuits, Noise::NoiseModel &noise_model) const; // Return the simulation method to use based on the input circuit // and noise model Method - automatic_simulation_method(const Circuit &circ, + automatic_simulation_method(const Config &config, const Circuit &circ, const Noise::NoiseModel &noise_model) const; bool has_statevector_ops(const Circuit &circuit) const; @@ -165,9 +166,7 @@ class Controller { void clear_parallelization(); // Set parallelization for experiments - void set_parallelization_experiments( - const std::vector> &circuits, - const Noise::NoiseModel &noise, const std::vector &methods); + void set_parallelization_experiments(const reg_t &required_memory_list); void save_exception_to_results(Result &result, const std::exception &e) const; @@ -354,12 +353,12 @@ void Controller::clear_parallelization() { } void Controller::set_parallelization_experiments( - const std::vector> &circuits, - const Noise::NoiseModel &noise, const std::vector &methods) { + const reg_t &required_memory_mb_list) { + if (explicit_parallelization_) return; - if (circuits.size() == 1) { + if (required_memory_mb_list.size() == 1) { parallel_experiments_ = 1; return; } @@ -378,20 +377,12 @@ void Controller::set_parallelization_experiments( } // If memory allows, execute experiments in parallel - std::vector required_memory_mb_list(circuits.size()); - for (size_t j = 0; j < circuits.size(); j++) { - std::shared_ptr executor = - make_circuit_executor(methods[j]); - required_memory_mb_list[j] = - executor->required_memory_mb(*circuits[j], noise); - executor.reset(); - } - std::sort(required_memory_mb_list.begin(), required_memory_mb_list.end(), - std::greater<>()); + reg_t required_sorted = required_memory_mb_list; + std::sort(required_sorted.begin(), required_sorted.end(), std::greater<>()); size_t total_memory = 0; int parallel_experiments = 0; - for (size_t required_memory_mb : required_memory_mb_list) { + for (size_t required_memory_mb : required_sorted) { total_memory += required_memory_mb; if (total_memory > max_memory_mb_) break; @@ -401,9 +392,9 @@ void Controller::set_parallelization_experiments( if (parallel_experiments <= 0) throw std::runtime_error( "a circuit requires more memory than max_memory_mb."); - parallel_experiments_ = - std::min({parallel_experiments, max_experiments, - max_parallel_threads_, static_cast(circuits.size())}); + parallel_experiments_ = std::min( + {parallel_experiments, max_experiments, max_parallel_threads_, + static_cast(required_memory_mb_list.size())}); } size_t Controller::get_system_memory_mb() { @@ -508,10 +499,14 @@ Result Controller::execute(std::vector> &circuits, #endif // Determine simulation method for each circuit // and enable required noise sampling methods - auto methods = simulation_methods(circuits, noise_model); + auto methods = simulation_methods(config, circuits, noise_model); // Initialize Result object for the given number of experiments Result result(circuits.size()); + // Initialize circuit executors for each circuit + std::vector> executors( + circuits.size()); + reg_t required_memory_mb_list(circuits.size()); // Execute each circuit in a try block try { @@ -519,9 +514,14 @@ Result Controller::execute(std::vector> &circuits, // set parallelization for experiments try { - // catch exception raised by required_memory_mb because of invalid - // simulation method - set_parallelization_experiments(circuits, noise_model, methods); + for (int i = 0; i < circuits.size(); i++) { + executors[i] = make_circuit_executor(methods[i]); + required_memory_mb_list[i] = + executors[i]->required_memory_mb(config, *circuits[i], noise_model); + result.results[i].metadata.add(required_memory_mb_list[i], + "required_memory_mb"); + } + set_parallelization_experiments(required_memory_mb_list); } catch (std::exception &e) { save_exception_to_results(result, e); } @@ -581,23 +581,18 @@ Result Controller::execute(std::vector> &circuits, // nested loops that causes performance degradation (DO NOT use if statement // in #pragma omp) if (parallel_experiments_ == 1) { - for (int j = 0; j < NUM_RESULTS; ++j) { - std::shared_ptr executor = - make_circuit_executor(methods[j]); - executor->run_circuit(*circuits[j], noise_model, config, methods[j], - sim_device_, result.results[j]); - executor.reset(); + for (int i = 0; i < NUM_RESULTS; i++) { + executors[i]->run_circuit(*circuits[i], noise_model, config, methods[i], + sim_device_, result.results[i]); } } else { #pragma omp parallel for num_threads(parallel_experiments_) - for (int j = 0; j < NUM_RESULTS; ++j) { - std::shared_ptr executor = - make_circuit_executor(methods[j]); - executor->run_circuit(*circuits[j], noise_model, config, methods[j], - sim_device_, result.results[j]); - executor.reset(); + for (int i = 0; i < NUM_RESULTS; i++) { + executors[i]->run_circuit(*circuits[i], noise_model, config, methods[i], + sim_device_, result.results[i]); } } + executors.clear(); // Check each experiment result for completed status. // If only some experiments completed return partial completed status. @@ -755,7 +750,8 @@ Controller::make_circuit_executor(const Method method) const { } std::vector -Controller::simulation_methods(std::vector> &circuits, +Controller::simulation_methods(const Config &config, + std::vector> &circuits, Noise::NoiseModel &noise_model) const { // Does noise model contain kraus noise bool kraus_noise = @@ -769,7 +765,7 @@ Controller::simulation_methods(std::vector> &circuits, bool kraus_enabled = false; for (const auto &_circ : circuits) { const auto circ = *_circ; - auto method = automatic_simulation_method(circ, noise_model); + auto method = automatic_simulation_method(config, circ, noise_model); sim_methods.push_back(method); if (!superop_enabled && (method == Method::density_matrix || method == Method::superop || @@ -811,9 +807,10 @@ Controller::simulation_methods(std::vector> &circuits, } Method Controller::automatic_simulation_method( - const Circuit &circ, const Noise::NoiseModel &noise_model) const { + const Config &config, const Circuit &circ, + const Noise::NoiseModel &noise_model) const { // If circuit and noise model are Clifford run on Stabilizer simulator - if (validate_method(Method::stabilizer, circ, noise_model, false)) { + if (validate_method(Method::stabilizer, config, circ, noise_model, false)) { return Method::stabilizer; } // For noisy simulations we enable the density matrix method if @@ -823,7 +820,8 @@ Method Controller::automatic_simulation_method( // dimension if (noise_model.has_quantum_errors() && circ.num_qubits < 64 && circ.shots > (1ULL << circ.num_qubits) && - validate_method(Method::density_matrix, circ, noise_model, false) && + validate_method(Method::density_matrix, config, circ, noise_model, + false) && circ.can_sample) { return Method::density_matrix; } @@ -837,7 +835,7 @@ Method Controller::automatic_simulation_method( {Method::statevector, Method::density_matrix, Method::matrix_product_state, Method::unitary, Method::superop}); for (const auto &method : methods) { - if (validate_method(method, circ, noise_model, false)) + if (validate_method(method, config, circ, noise_model, false)) return method; } @@ -867,12 +865,13 @@ bool Controller::has_statevector_ops(const Circuit &circ) const { //------------------------------------------------------------------------- // Validation //------------------------------------------------------------------------- -bool Controller::validate_method(Method method, const Circuit &circ, +bool Controller::validate_method(Method method, const Config &config, + const Circuit &circ, const Noise::NoiseModel &noise_model, bool throw_except) const { std::shared_ptr executor = make_circuit_executor(method); - bool ret = executor->validate_state(circ, noise_model, throw_except); + bool ret = executor->validate_state(config, circ, noise_model, throw_except); executor.reset(); return ret; } diff --git a/src/simulators/batch_shots_executor.hpp b/src/simulators/batch_shots_executor.hpp index eef2f85751..bc991b2a0c 100644 --- a/src/simulators/batch_shots_executor.hpp +++ b/src/simulators/batch_shots_executor.hpp @@ -51,7 +51,7 @@ class BatchShotsExecutor : public virtual MultiStateExecutor { protected: void set_config(const Config &config) override; - void set_parallelization(const Circuit &circ, + void set_parallelization(const Config &config, const Circuit &circ, const Noise::NoiseModel &noise) override; void run_circuit_shots(Circuit &circ, const Noise::NoiseModel &noise, @@ -104,8 +104,8 @@ void BatchShotsExecutor::set_config(const Config &config) { template void BatchShotsExecutor::set_parallelization( - const Circuit &circ, const Noise::NoiseModel &noise) { - Base::set_parallelization(circ, noise); + const Config &config, const Circuit &circ, const Noise::NoiseModel &noise) { + Base::set_parallelization(config, circ, noise); enable_batch_multi_shots_ = false; if (batched_shots_gpu_ && Base::sim_device_ != Device::CPU) { @@ -152,7 +152,7 @@ void BatchShotsExecutor::run_circuit_shots( } Base::set_distribution(circ.shots); - Base::num_max_shots_ = Base::get_max_parallel_shots(circ, noise); + Base::num_max_shots_ = Base::get_max_parallel_shots(config, circ, noise); if (Base::num_max_shots_ == 0) Base::num_max_shots_ = 1; diff --git a/src/simulators/circuit_executor.hpp b/src/simulators/circuit_executor.hpp index 425bb7d097..dbf880bdf4 100644 --- a/src/simulators/circuit_executor.hpp +++ b/src/simulators/circuit_executor.hpp @@ -53,11 +53,12 @@ class Base { const Device device, ExperimentResult &result) = 0; // Return an estimate of the required memory for a circuit. - virtual size_t required_memory_mb(const Circuit &circuit, + virtual size_t required_memory_mb(const Config &config, + const Circuit &circuit, const Noise::NoiseModel &noise) const = 0; virtual size_t max_memory_mb(void) = 0; - virtual bool validate_state(const Circuit &circ, + virtual bool validate_state(const Config &config, const Circuit &circ, const Noise::NoiseModel &noise, bool throw_except) const = 0; }; @@ -132,14 +133,17 @@ class Executor : public Base { const Device device, ExperimentResult &result) override; // Return an estimate of the required memory for a circuit. - size_t required_memory_mb(const Circuit &circuit, + size_t required_memory_mb(const Config &config, const Circuit &circuit, const Noise::NoiseModel &noise) const override { state_t tmp; - return tmp.required_memory_mb(circuit.num_qubits, circuit.ops); + tmp.set_config(config); + uint_t ret = tmp.required_memory_mb(circuit.num_qubits, circuit.ops); + return ret; } size_t max_memory_mb(void) override { return max_memory_mb_; } - bool validate_state(const Circuit &circ, const Noise::NoiseModel &noise, + bool validate_state(const Config &config, const Circuit &circ, + const Noise::NoiseModel &noise, bool throw_except) const override; protected: @@ -164,7 +168,7 @@ class Executor : public Base { } // get max shots stored on memory - uint_t get_max_parallel_shots(const Circuit &circuit, + uint_t get_max_parallel_shots(const Config &config, const Circuit &circuit, const Noise::NoiseModel &noise) const; bool multiple_shots_required(const Circuit &circuit, @@ -178,7 +182,7 @@ class Executor : public Base { bool has_statevector_ops(const Circuit &circ) const; virtual void set_config(const Config &config); - virtual void set_parallelization(const Circuit &circ, + virtual void set_parallelization(const Config &config, const Circuit &circ, const Noise::NoiseModel &noise); virtual void run_circuit_with_sampling(Circuit &circ, const Config &config, @@ -376,8 +380,9 @@ bool Executor::multiple_shots_required( template uint_t Executor::get_max_parallel_shots( - const Circuit &circ, const Noise::NoiseModel &noise) const { - uint_t mem = required_memory_mb(circ, noise); + const Config &config, const Circuit &circ, + const Noise::NoiseModel &noise) const { + uint_t mem = required_memory_mb(config, circ, noise); if (mem == 0) return circ.shots; @@ -389,7 +394,8 @@ uint_t Executor::get_max_parallel_shots( } template -void Executor::set_parallelization(const Circuit &circ, +void Executor::set_parallelization(const Config &config, + const Circuit &circ, const Noise::NoiseModel &noise) { // MPI setting myrank_ = 0; @@ -489,7 +495,7 @@ void Executor::set_parallelization(const Circuit &circ, // Limit parallel shots by available memory and number of shots // And assign the remaining threads to state update int circ_memory_mb = - required_memory_mb(circ, noise) / num_process_per_experiment_; + required_memory_mb(config, circ, noise) / num_process_per_experiment_; size_t mem_size = (sim_device_ == Device::GPU) ? max_gpu_memory_mb_ : max_memory_mb_; if (mem_size < circ_memory_mb) @@ -525,7 +531,7 @@ void Executor::run_circuit(Circuit &circ, sim_device_ = device; set_config(config); - set_parallelization(circ, noise); + set_parallelization(config, circ, noise); // Rng engine (this one is used to add noise on circuit) RngEngine rng; @@ -547,6 +553,9 @@ void Executor::run_circuit(Circuit &circ, result.metadata.add(circ.qubits(), "active_input_qubits"); result.metadata.add(circ.qubit_map(), "input_qubit_map"); result.metadata.add(circ.remapped_qubits, "remapped_qubits"); + result.metadata.add(max_memory_mb_, "max_memory_mb"); + if (sim_device_ == Device::GPU) + result.metadata.add(max_gpu_memory_mb_, "max_gpu_memory_mb"); // Add measure sampling to metadata // Note: this will set to `true` if sampling is enabled for the circuit @@ -555,7 +564,7 @@ void Executor::run_circuit(Circuit &circ, // Validate gateset and memory requirements, raise exception if they're // exceeded - validate_state(circ, noise, true); + validate_state(config, circ, noise, true); has_statevector_ops_ = has_statevector_ops(circ); @@ -696,7 +705,7 @@ void Executor::run_circuit_shots( RngEngine &init_rng, ExperimentResult &result, bool sample_noise) { // insert runtime noise sample ops here - int_t par_shots = (int_t)get_max_parallel_shots(circ, noise); + int_t par_shots = (int_t)get_max_parallel_shots(config, circ, noise); par_shots = std::min((int_t)parallel_shots_, par_shots); std::vector par_results(par_shots); @@ -914,7 +923,8 @@ void Executor::measure_sampler(InputIterator first_meas, } template -bool Executor::validate_state(const Circuit &circ, +bool Executor::validate_state(const Config &config, + const Circuit &circ, const Noise::NoiseModel &noise, bool throw_except) const { std::stringstream error_msg; @@ -942,8 +952,8 @@ bool Executor::validate_state(const Circuit &circ, // Validate memory requirements bool memory_valid = true; if (max_memory_mb_ > 0) { - size_t required_mb = state.required_memory_mb(circ.num_qubits, circ.ops) / - num_process_per_experiment_; + size_t required_mb = + required_memory_mb(config, circ, noise) / num_process_per_experiment_; size_t mem_size = (sim_device_ == Device::GPU) ? max_memory_mb_ + max_gpu_memory_mb_ : max_memory_mb_; diff --git a/src/simulators/density_matrix/densitymatrix_executor.hpp b/src/simulators/density_matrix/densitymatrix_executor.hpp index d656a6f9a0..6c7d28e923 100644 --- a/src/simulators/density_matrix/densitymatrix_executor.hpp +++ b/src/simulators/density_matrix/densitymatrix_executor.hpp @@ -306,7 +306,7 @@ void Executor::run_circuit_shots( Circuit &circ, const Noise::NoiseModel &noise, const Config &config, RngEngine &init_rng, ExperimentResult &result, bool sample_noise) { state_t dummy_state; - if (BasePar::multiple_chunk_required(circ, noise)) { + if (BasePar::multiple_chunk_required(config, circ, noise)) { return BasePar::run_circuit_shots(circ, noise, config, init_rng, result, sample_noise); } else { diff --git a/src/simulators/matrix_product_state/matrix_product_state.hpp b/src/simulators/matrix_product_state/matrix_product_state.hpp index 4105fd3bdf..1c29c9bd02 100644 --- a/src/simulators/matrix_product_state/matrix_product_state.hpp +++ b/src/simulators/matrix_product_state/matrix_product_state.hpp @@ -38,6 +38,8 @@ #include "matrix_product_state_internal.hpp" #include "simulators/state.hpp" +#include "matrix_product_state_size_estimator.hpp" + namespace AER { namespace MatrixProductState { @@ -320,14 +322,12 @@ void State::initialize_omp() { size_t State::required_memory_mb(uint_t num_qubits, const std::vector &ops) const { - // for each qubit we have a tensor structure. - // Initially, each tensor contains 2 matrices with a single complex double - // Depending on the number of 2-qubit gates, - // these matrices may double their size - // for now - compute only initial size - // later - FIXME - size_t mem_mb = 16 * 2 * num_qubits; - return mem_mb; + if (num_qubits > 1) { + MPSSizeEstimator est(num_qubits); + uint_t size = est.estimate(ops); + return (size >> 20); + } + return 0; } void State::set_config(const Config &config) { diff --git a/src/simulators/matrix_product_state/matrix_product_state_size_estimator.hpp b/src/simulators/matrix_product_state/matrix_product_state_size_estimator.hpp new file mode 100644 index 0000000000..600b29207d --- /dev/null +++ b/src/simulators/matrix_product_state/matrix_product_state_size_estimator.hpp @@ -0,0 +1,138 @@ +/** + * This code is part of Qiskit. + * + * (C) Copyright IBM 2018, 2019. + * + * This code is licensed under the Apache License, Version 2.0. You may + * obtain a copy of this license in the LICENSE.txt file in the root directory + * of this source tree or at http://www.apache.org/licenses/LICENSE-2.0. + * + * Any modifications or derivative works of this code must retain this + * copyright notice, and modified files need to carry a notice indicating + * that they have been altered from the originals. + */ + +#ifndef _mps_size_estimator_hpp_ +#define _mps_size_estimator_hpp_ + +#include "framework/operations.hpp" +#include "framework/utils.hpp" + +namespace AER { +namespace MatrixProductState { + +// size estimation of MPS simulation by calculating bond dimensions +class MPSSizeEstimator { +protected: + uint_t num_qubits_; + reg_t bond_dimensions_; + std::vector> tensor_size_; + reg_t qubit_map_; + reg_t qubit_order_; + +public: + MPSSizeEstimator(void) {} + MPSSizeEstimator(uint_t nq) { initialize(nq); } + + void initialize(uint_t nq); + + uint_t estimate(const std::vector &ops); + +protected: + void apply_qubits(const reg_t &qubits); + + void reorder_qubit(uint_t qubit, uint_t target); + + void update(uint_t a); +}; + +void MPSSizeEstimator::initialize(uint_t nq) { + num_qubits_ = nq; + bond_dimensions_.resize(nq); + tensor_size_.resize(nq); + qubit_map_.resize(nq); + qubit_order_.resize(nq); + + for (int_t i = 0; i < nq; i++) { + tensor_size_[i].first = 1; + tensor_size_[i].second = 1; + + qubit_map_[i] = i; + qubit_order_[i] = i; + + bond_dimensions_[i] = 1; + } +} + +uint_t MPSSizeEstimator::estimate(const std::vector &ops) { + uint_t n = ops.size(); + for (int_t i = 0; i < n; i++) { + switch (ops[i].type) { + case Operations::OpType::gate: + case Operations::OpType::matrix: + case Operations::OpType::diagonal_matrix: + if (ops[i].qubits.size() > 1) + apply_qubits(ops[i].qubits); + break; + default: + break; + } + } + uint_t max_bond = 0; + for (int_t i = 0; i < num_qubits_ - 1; i++) { + if (max_bond < bond_dimensions_[i]) + max_bond = bond_dimensions_[i]; + } + return num_qubits_ * (32 * max_bond * max_bond + 8 * max_bond); +} + +void MPSSizeEstimator::apply_qubits(const reg_t &qubits) { + reg_t sorted(qubits.size()); + + for (int_t i = 0; i < qubits.size(); i++) { + sorted[i] = qubit_map_[qubits[i]]; + } + std::sort(sorted.begin(), sorted.end()); + + for (int_t i = 1; i < qubits.size(); i++) { + reorder_qubit(sorted[i - 1], sorted[i]); + } + + for (int_t i = 0; i < qubits.size() - 1; i++) { + update(sorted[i]); + } +} + +void MPSSizeEstimator::reorder_qubit(uint_t qubit, uint_t target) { + while (target > qubit + 1) { + uint_t q0, q1; + q0 = qubit_order_[target - 1]; + q1 = qubit_order_[target]; + qubit_map_[q0] = target; + qubit_map_[q1] = target - 1; + std::swap(qubit_order_[target], qubit_order_[target - 1]); + + update(target - 1); + + target--; + } +} + +void MPSSizeEstimator::update(uint_t a) { + uint_t rows = tensor_size_[a].first; + uint_t cols = tensor_size_[a + 1].second; + + bond_dimensions_[a] = std::min(rows * 2, cols * 2); + + tensor_size_[a].first = rows; + tensor_size_[a].second = bond_dimensions_[a]; + tensor_size_[a + 1].first = bond_dimensions_[a]; + tensor_size_[a + 1].second = cols; +} + +//------------------------------------------------------------------------- +} // namespace MatrixProductState +//------------------------------------------------------------------------- +} // end namespace AER +//------------------------------------------------------------------------- +#endif diff --git a/src/simulators/multi_state_executor.hpp b/src/simulators/multi_state_executor.hpp index 2d0da87e4a..570464ec03 100644 --- a/src/simulators/multi_state_executor.hpp +++ b/src/simulators/multi_state_executor.hpp @@ -95,12 +95,6 @@ class MultiStateExecutor : public Executor { MultiStateExecutor(); virtual ~MultiStateExecutor(); - size_t required_memory_mb(const Circuit &circuit, - const Noise::NoiseModel &noise) const override { - state_t tmp; - return tmp.required_memory_mb(circuit.num_qubits, circuit.ops); - } - uint_t get_process_by_chunk(uint_t cid); protected: @@ -133,7 +127,7 @@ class MultiStateExecutor : public Executor { virtual void apply_global_phase() {} void set_global_phase(double theta); - void set_parallelization(const Circuit &circ, + void set_parallelization(const Config &config, const Circuit &circ, const Noise::NoiseModel &noise) override; virtual bool shot_branching_supported(void) { @@ -219,8 +213,8 @@ void MultiStateExecutor::set_distribution(uint_t num_states) { template void MultiStateExecutor::set_parallelization( - const Circuit &circ, const Noise::NoiseModel &noise) { - Base::set_parallelization(circ, noise); + const Config &config, const Circuit &circ, const Noise::NoiseModel &noise) { + Base::set_parallelization(config, circ, noise); } template @@ -266,7 +260,7 @@ void MultiStateExecutor::run_circuit_shots( } set_distribution(circ.shots); - num_max_shots_ = Base::get_max_parallel_shots(circ, noise); + num_max_shots_ = Base::get_max_parallel_shots(config, circ, noise); bool shot_branching = false; if (shot_branching_enable_ && num_local_states_ > 1 && diff --git a/src/simulators/parallel_state_executor.hpp b/src/simulators/parallel_state_executor.hpp index b40ac2556f..1dbb0983fd 100644 --- a/src/simulators/parallel_state_executor.hpp +++ b/src/simulators/parallel_state_executor.hpp @@ -59,12 +59,6 @@ class ParallelStateExecutor : public virtual MultiStateExecutor { ParallelStateExecutor(); virtual ~ParallelStateExecutor(); - size_t required_memory_mb(const Circuit &circuit, - const Noise::NoiseModel &noise) const override { - state_t tmp; - return tmp.required_memory_mb(circuit.num_qubits, circuit.ops); - } - uint_t get_process_by_chunk(uint_t cid); protected: @@ -72,7 +66,7 @@ class ParallelStateExecutor : public virtual MultiStateExecutor { virtual uint_t qubit_scale(void) { return 1; } - bool multiple_chunk_required(const Circuit &circuit, + bool multiple_chunk_required(const Config &config, const Circuit &circuit, const Noise::NoiseModel &noise) const; // Return cache blocking transpiler pass @@ -222,7 +216,8 @@ void ParallelStateExecutor::set_config(const Config &config) { template bool ParallelStateExecutor::multiple_chunk_required( - const Circuit &circ, const Noise::NoiseModel &noise) const { + const Config &config, const Circuit &circ, + const Noise::NoiseModel &noise) const { if (circ.num_qubits < 3) return false; if (cache_block_qubit_ >= 2 && cache_block_qubit_ < circ.num_qubits) @@ -231,14 +226,14 @@ bool ParallelStateExecutor::multiple_chunk_required( if (Base::num_process_per_experiment_ == 1 && Base::sim_device_ == Device::GPU && Base::num_gpus_ > 0) { return (Base::max_gpu_memory_mb_ / Base::num_gpus_ < - Base::required_memory_mb(circ, noise)); + Base::required_memory_mb(config, circ, noise)); } if (Base::num_process_per_experiment_ > 1) { size_t total_mem = Base::max_memory_mb_; if (Base::sim_device_ == Device::GPU) total_mem += Base::max_gpu_memory_mb_; if (total_mem * Base::num_process_per_experiment_ > - Base::required_memory_mb(circ, noise)) + Base::required_memory_mb(config, circ, noise)) return true; } @@ -263,7 +258,7 @@ ParallelStateExecutor::transpile_cache_blocking( if (!cache_block_pass.enabled()) { // if blocking is not set by config, automatically set if required - if (multiple_chunk_required(circ, noise)) { + if (multiple_chunk_required(config, circ, noise)) { int nplace = Base::num_process_per_experiment_; if (Base::sim_device_ == Device::GPU && Base::num_gpus_ > 0) nplace *= Base::num_gpus_; @@ -424,7 +419,7 @@ void ParallelStateExecutor::run_circuit_with_sampling( state_t dummy_state; bool cache_block = false; - if (multiple_chunk_required(circ, dummy_noise)) { + if (multiple_chunk_required(config, circ, dummy_noise)) { auto fusion_pass = Base::transpile_fusion(circ.opset(), config); fusion_pass.optimize_circuit(circ, dummy_noise, dummy_state.opset(), result); @@ -483,7 +478,7 @@ void ParallelStateExecutor::run_circuit_shots( Circuit &circ, const Noise::NoiseModel &noise, const Config &config, RngEngine &init_rng, ExperimentResult &result, bool sample_noise) { - if (!multiple_chunk_required(circ, noise)) { + if (!multiple_chunk_required(config, circ, noise)) { return Base::run_circuit_shots(circ, noise, config, init_rng, result, sample_noise); } diff --git a/src/simulators/statevector/statevector_executor.hpp b/src/simulators/statevector/statevector_executor.hpp index 28312f4aae..6c2071bcea 100644 --- a/src/simulators/statevector/statevector_executor.hpp +++ b/src/simulators/statevector/statevector_executor.hpp @@ -230,7 +230,7 @@ void Executor::run_circuit_shots( Circuit &circ, const Noise::NoiseModel &noise, const Config &config, RngEngine &init_rng, ExperimentResult &result, bool sample_noise) { state_t dummy_state; - if (BasePar::multiple_chunk_required(circ, noise)) { + if (BasePar::multiple_chunk_required(config, circ, noise)) { return BasePar::run_circuit_shots(circ, noise, config, init_rng, result, sample_noise); } else { From 343b1196937a52545584d2659e3093370b110306 Mon Sep 17 00:00:00 2001 From: Jun Doi Date: Tue, 19 Sep 2023 14:51:36 +0900 Subject: [PATCH 09/63] Add rotation gates to stabilizer (#1938) * Add rotation gates to stabilizer * format tests * set method=statevctor to sample tests * set method=statevector to Estimator test, fix ry gate * format test --- qiskit_aer/backends/backend_utils.py | 3 + .../stabilizer_rotation-8ce2effd9578ee0a.yaml | 9 + src/simulators/circuit_executor.hpp | 11 +- .../stabilizer/stabilizer_state.hpp | 82 +++++- src/simulators/state.hpp | 6 + .../backends/aer_simulator/test_rotation.py | 76 ++++++ test/terra/primitives/test_estimator.py | 12 +- test/terra/primitives/test_sampler.py | 4 +- test/terra/reference/ref_rotation.py | 256 ++++++++++++++++++ 9 files changed, 448 insertions(+), 11 deletions(-) create mode 100644 releasenotes/notes/stabilizer_rotation-8ce2effd9578ee0a.yaml create mode 100644 test/terra/backends/aer_simulator/test_rotation.py create mode 100644 test/terra/reference/ref_rotation.py diff --git a/qiskit_aer/backends/backend_utils.py b/qiskit_aer/backends/backend_utils.py index 717f0a4b59..8495ac8869 100644 --- a/qiskit_aer/backends/backend_utils.py +++ b/qiskit_aer/backends/backend_utils.py @@ -211,6 +211,9 @@ "delay", "pauli", "ecr", + "rx", + "ry", + "rz", ] ), "extended_stabilizer": sorted( diff --git a/releasenotes/notes/stabilizer_rotation-8ce2effd9578ee0a.yaml b/releasenotes/notes/stabilizer_rotation-8ce2effd9578ee0a.yaml new file mode 100644 index 0000000000..2202d0c817 --- /dev/null +++ b/releasenotes/notes/stabilizer_rotation-8ce2effd9578ee0a.yaml @@ -0,0 +1,9 @@ +--- +upgrade: + - | + Adding support of rotation gates (rx, ry and rz gates) to stabilizer method + when input theta is multiple of pi/2. + If ``method=automatic`` is specified (this is default), if all the input + theta of rotation gates are multiple of pi/2 ``method=stabilizer`` + is selected. Of when user sets ``method=stabilizer`` and any of theta + is not multiple of pi/2, Aer raises an exception. diff --git a/src/simulators/circuit_executor.hpp b/src/simulators/circuit_executor.hpp index dbf880bdf4..73a2d502e8 100644 --- a/src/simulators/circuit_executor.hpp +++ b/src/simulators/circuit_executor.hpp @@ -933,6 +933,7 @@ bool Executor::validate_state(const Config &config, JSON::get_value(circ_name, "name", circ.header); + state.set_config(config); // Check if a circuit is valid for state ops bool circ_valid = state.opset().contains(circ.opset()); if (throw_except && !circ_valid) { @@ -940,6 +941,12 @@ bool Executor::validate_state(const Config &config, error_msg << state.opset().difference(circ.opset()); error_msg << " for \"" << state.name() << "\" method."; } + // check parameters set inf ops + circ_valid &= state.validate_parameters(circ.ops); + if (throw_except && !circ_valid) { + error_msg << "Circuit " << circ_name << " contains invalid parameters "; + error_msg << " for \"" << state.name() << "\" method."; + } // Check if a noise model valid for state ops bool noise_valid = noise.is_ideal() || state.opset().contains(noise.opset()); @@ -952,8 +959,8 @@ bool Executor::validate_state(const Config &config, // Validate memory requirements bool memory_valid = true; if (max_memory_mb_ > 0) { - size_t required_mb = - required_memory_mb(config, circ, noise) / num_process_per_experiment_; + size_t required_mb = state.required_memory_mb(circ.num_qubits, circ.ops) / + num_process_per_experiment_; size_t mem_size = (sim_device_ == Device::GPU) ? max_memory_mb_ + max_gpu_memory_mb_ : max_memory_mb_; diff --git a/src/simulators/stabilizer/stabilizer_state.hpp b/src/simulators/stabilizer/stabilizer_state.hpp index 81ed2b9924..26ab0f418e 100644 --- a/src/simulators/stabilizer/stabilizer_state.hpp +++ b/src/simulators/stabilizer/stabilizer_state.hpp @@ -38,8 +38,8 @@ const Operations::OpSet StateOpSet( OpType::save_amps_sq, OpType::save_stabilizer, OpType::save_clifford, OpType::save_state, OpType::set_stabilizer, OpType::jump, OpType::mark}, // Gates - {"CX", "cx", "cy", "cz", "swap", "id", "x", "y", "z", "h", "s", "sdg", "sx", - "sxdg", "delay", "pauli", "ecr"}); + {"CX", "cx", "cy", "cz", "swap", "id", "x", "y", "z", "h", + "s", "sdg", "sx", "sxdg", "delay", "pauli", "ecr", "rx", "ry", "rz"}); enum class Gates { id, @@ -56,7 +56,10 @@ enum class Gates { cz, swap, pauli, - ecr + ecr, + rx, + ry, + rz }; //============================================================================ @@ -101,6 +104,9 @@ class State : public QuantumState::State { virtual std::vector sample_measure(const reg_t &qubits, uint_t shots, RngEngine &rng) override; + bool + validate_parameters(const std::vector &ops) const override; + protected: //----------------------------------------------------------------------- // Apply instructions @@ -203,7 +209,10 @@ const stringmap_t State::gateset_({ {"cz", Gates::cz}, // Controlled-Z gate {"swap", Gates::swap}, // SWAP gate {"pauli", Gates::pauli}, // Pauli gate - {"ecr", Gates::ecr} // ECR gate + {"ecr", Gates::ecr}, // ECR gate + {"rx", Gates::rx}, // RX gate (only support k * pi/2 cases) + {"ry", Gates::ry}, // RY gate (only support k * pi/2 cases) + {"rz", Gates::rz} // RZ gate (only support k * pi/2 cases) }); //============================================================================ @@ -245,6 +254,23 @@ void State::set_config(const Config &config) { max_qubits_snapshot_probs_ = std::max(max_qubits_snapshot_probs_, 64); } +bool State::validate_parameters(const std::vector &ops) const { + for (int_t i = 0; i < ops.size(); i++) { + if (ops[i].type == OpType::gate) { + // check parameter of R gates + if (ops[i].name == "rx" || ops[i].name == "ry" || ops[i].name == "rz") { + double pi2 = std::real(ops[i].params[0]) * 2.0 / M_PI; + double pi2_int = (double)std::round(pi2); + + if (!AER::Linalg::almost_equal(pi2, pi2_int)) { + return false; + } + } + } + } + return true; +} + //========================================================================= // Implementation: apply operations //========================================================================= @@ -298,6 +324,7 @@ void State::apply_op(const Operations::Op &op, ExperimentResult &result, } void State::apply_gate(const Operations::Op &op) { + int_t pi2; // Check Op is supported by State auto it = gateset_.find(op.name); if (it == gateset_.end()) @@ -369,6 +396,53 @@ void State::apply_gate(const Operations::Op &op) { BaseState::qreg_.append_x(op.qubits[0]); BaseState::qreg_.append_x(op.qubits[1]); break; + case Gates::rx: + pi2 = (int_t)std::round(std::real(op.params[0]) * 2.0 / M_PI) & 3; + if (pi2 == 1) { + // HSH + BaseState::qreg_.append_h(op.qubits[0]); + BaseState::qreg_.append_s(op.qubits[0]); + BaseState::qreg_.append_h(op.qubits[0]); + } else if (pi2 == 2) { + // X + BaseState::qreg_.append_x(op.qubits[0]); + } else if (pi2 == 3) { + // HSdgH + BaseState::qreg_.append_h(op.qubits[0]); + BaseState::qreg_.append_z(op.qubits[0]); + BaseState::qreg_.append_s(op.qubits[0]); + BaseState::qreg_.append_h(op.qubits[0]); + } + break; + case Gates::ry: + pi2 = (int_t)std::round(std::real(op.params[0]) * 2.0 / M_PI) & 3; + if (pi2 == 1) { + // HX + BaseState::qreg_.append_x(op.qubits[0]); + BaseState::qreg_.append_h(op.qubits[0]); + } else if (pi2 == 2) { + // Y + BaseState::qreg_.append_y(op.qubits[0]); + } else if (pi2 == 3) { + // Hdg + BaseState::qreg_.append_h(op.qubits[0]); + BaseState::qreg_.append_x(op.qubits[0]); + } + break; + case Gates::rz: + pi2 = (int_t)std::round(std::real(op.params[0]) * 2.0 / M_PI) & 3; + if (pi2 == 1) { + // S + BaseState::qreg_.append_s(op.qubits[0]); + } else if (pi2 == 2) { + // Z + BaseState::qreg_.append_z(op.qubits[0]); + } else if (pi2 == 3) { + // Sdg + BaseState::qreg_.append_z(op.qubits[0]); + BaseState::qreg_.append_s(op.qubits[0]); + } + break; default: // We shouldn't reach here unless there is a bug in gateset throw std::invalid_argument( diff --git a/src/simulators/state.hpp b/src/simulators/state.hpp index c8aebfef79..6209e1075d 100644 --- a/src/simulators/state.hpp +++ b/src/simulators/state.hpp @@ -115,6 +115,12 @@ class Base { // Typically this is the n-qubit all |0> state virtual void initialize_qreg(uint_t num_qubits) = 0; + // validate parameters in input operations + virtual bool + validate_parameters(const std::vector &ops) const { + return true; + } + //----------------------------------------------------------------------- // ClassicalRegister methods //----------------------------------------------------------------------- diff --git a/test/terra/backends/aer_simulator/test_rotation.py b/test/terra/backends/aer_simulator/test_rotation.py new file mode 100644 index 0000000000..9e9c2982ef --- /dev/null +++ b/test/terra/backends/aer_simulator/test_rotation.py @@ -0,0 +1,76 @@ +# This code is part of Qiskit. +# +# (C) Copyright IBM 2018, 2019. +# +# This code is licensed under the Apache License, Version 2.0. You may +# obtain a copy of this license in the LICENSE.txt file in the root directory +# of this source tree or at http://www.apache.org/licenses/LICENSE-2.0. +# +# Any modifications or derivative works of this code must retain this +# copyright notice, and modified files need to carry a notice indicating +# that they have been altered from the originals. +""" +AerSimulator Integration Tests +""" +from ddt import ddt +from test.terra.reference import ref_rotation +from qiskit import transpile +from test.terra.backends.simulator_test_case import SimulatorTestCase, supported_methods + +SUPPORTED_METHODS = [ + "automatic", + "stabilizer", + "statevector", + "density_matrix", + "matrix_product_state", + "tensor_network", +] + + +@ddt +class TestRotation(SimulatorTestCase): + """AerSimulator Rotation gate tests""" + + SEED = 12345 + + # --------------------------------------------------------------------- + # Test rx-gate + # --------------------------------------------------------------------- + @supported_methods(SUPPORTED_METHODS) + def test_rx_gate_deterministic(self, method, device): + """Test rx-gate circuits""" + backend = self.backend(method=method, device=device, seed_simulator=self.SEED) + shots = 1000 + circuits = ref_rotation.rx_gate_circuits_deterministic(final_measure=True) + targets = ref_rotation.rx_gate_counts_deterministic(shots) + result = backend.run(circuits, shots=shots).result() + self.assertSuccess(result) + self.compare_counts(result, circuits, targets, delta=0.05 * shots) + + # --------------------------------------------------------------------- + # Test rz-gate + # --------------------------------------------------------------------- + @supported_methods(SUPPORTED_METHODS) + def test_rz_gate_deterministic(self, method, device): + """Test rz-gate circuits""" + backend = self.backend(method=method, device=device, seed_simulator=self.SEED) + shots = 1000 + circuits = ref_rotation.rz_gate_circuits_deterministic(final_measure=True) + targets = ref_rotation.rz_gate_counts_deterministic(shots) + result = backend.run(circuits, shots=shots).result() + self.assertSuccess(result) + self.compare_counts(result, circuits, targets, delta=0.05 * shots) + + # --------------------------------------------------------------------- + # Test ry-gate + # --------------------------------------------------------------------- + @supported_methods(SUPPORTED_METHODS) + def test_ry_gate_deterministic(self, method, device): + """Test ry-gate circuits""" + backend = self.backend(method=method, device=device, seed_simulator=self.SEED) + shots = 1000 + circuits = ref_rotation.ry_gate_circuits_deterministic(final_measure=True) + targets = ref_rotation.ry_gate_counts_deterministic(shots) + result = backend.run(circuits, shots=shots).result() + self.assertSuccess(result) + self.compare_counts(result, circuits, targets, delta=0.05 * shots) diff --git a/test/terra/primitives/test_estimator.py b/test/terra/primitives/test_estimator.py index b14a2840e0..bcbea676fc 100644 --- a/test/terra/primitives/test_estimator.py +++ b/test/terra/primitives/test_estimator.py @@ -57,7 +57,9 @@ def test_estimator(self, abelian_grouping): with self.subTest("PauliSumOp"): observable = PauliSumOp.from_list(lst) ansatz = RealAmplitudes(num_qubits=2, reps=2) - est = Estimator(abelian_grouping=abelian_grouping) + est = Estimator( + backend_options={"method": "statevector"}, abelian_grouping=abelian_grouping + ) result = est.run( ansatz, observable, parameter_values=[[0, 1, 1, 2, 3, 5]], seed=15 ).result() @@ -67,7 +69,9 @@ def test_estimator(self, abelian_grouping): with self.subTest("SparsePauliOp"): observable = SparsePauliOp.from_list(lst) ansatz = RealAmplitudes(num_qubits=2, reps=2) - est = Estimator(abelian_grouping=abelian_grouping) + est = Estimator( + backend_options={"method": "statevector"}, abelian_grouping=abelian_grouping + ) result = est.run( ansatz, observable, parameter_values=[[0, 1, 1, 2, 3, 5]], seed=15 ).result() @@ -84,7 +88,9 @@ def test_estimator(self, abelian_grouping): ] ) ansatz = RealAmplitudes(num_qubits=2, reps=2) - est = Estimator(abelian_grouping=abelian_grouping) + est = Estimator( + backend_options={"method": "statevector"}, abelian_grouping=abelian_grouping + ) result = est.run(ansatz, observable, parameter_values=[[0] * 6], seed=15).result() self.assertIsInstance(result, EstimatorResult) np.testing.assert_allclose(result.values, [-0.4], rtol=0.02) diff --git a/test/terra/primitives/test_sampler.py b/test/terra/primitives/test_sampler.py index e19b7f8fd8..4cd6ba3b7f 100644 --- a/test/terra/primitives/test_sampler.py +++ b/test/terra/primitives/test_sampler.py @@ -110,7 +110,7 @@ def test_sampler_param_order(self): qc.measure(1, 1) qc.measure(2, 2) - sampler = Sampler(backend_options={"seed_simulator": 15}) + sampler = Sampler(backend_options={"method": "statevector", "seed_simulator": 15}) result = sampler.run([qc] * 4, [[0, 0], [0, 0], [np.pi / 2, 0], [0, np.pi / 2]]).result() self.assertIsInstance(result, SamplerResult) self.assertEqual(len(result.quasi_dists), 4) @@ -140,7 +140,7 @@ def test_sampler_reverse_meas_order(self): qc.measure(1, 1) qc.measure(2, 0) - sampler = Sampler() + sampler = Sampler(backend_options={"method": "statevector"}) result = sampler.run( [qc, qc, qc, qc], [[0, 0], [0, 0], [np.pi / 2, 0], [0, np.pi / 2]], seed=15 ).result() diff --git a/test/terra/reference/ref_rotation.py b/test/terra/reference/ref_rotation.py new file mode 100644 index 0000000000..741dc89481 --- /dev/null +++ b/test/terra/reference/ref_rotation.py @@ -0,0 +1,256 @@ +# This code is part of Qiskit. +# +# (C) Copyright IBM 2018, 2019. +# +# This code is licensed under the Apache License, Version 2.0. You may +# obtain a copy of this license in the LICENSE.txt file in the root directory +# of this source tree or at http://www.apache.org/licenses/LICENSE-2.0. +# +# Any modifications or derivative works of this code must retain this +# copyright notice, and modified files need to carry a notice indicating +# that they have been altered from the originals. + +""" +Test circuits and reference outputs for rotation gate instructions. +""" + +import numpy as np +from qiskit import QuantumRegister, ClassicalRegister, QuantumCircuit + + +# ========================================================================== +# RX-gate +# ========================================================================== + + +def rx_gate_circuits_deterministic(final_measure=True): + """X-gate test circuits with deterministic counts.""" + circuits = [] + qr = QuantumRegister(1) + if final_measure: + cr = ClassicalRegister(1) + regs = (qr, cr) + else: + regs = (qr,) + + # RX(pi/2) + circuit = QuantumCircuit(*regs) + circuit.rx(np.pi / 2, qr) + if final_measure: + circuit.barrier(qr) + circuit.measure(qr, cr) + circuits.append(circuit) + + # RX(pi) = X + circuit = QuantumCircuit(*regs) + circuit.rx(np.pi, qr) + if final_measure: + circuit.barrier(qr) + circuit.measure(qr, cr) + circuits.append(circuit) + + # RX(3*pi/2) + circuit = QuantumCircuit(*regs) + circuit.rx(3 * np.pi / 2, qr) + if final_measure: + circuit.barrier(qr) + circuit.measure(qr, cr) + circuits.append(circuit) + + # RX(4*pi/2) = I + circuit = QuantumCircuit(*regs) + circuit.rx(4 * np.pi / 2, qr) + if final_measure: + circuit.barrier(qr) + circuit.measure(qr, cr) + circuits.append(circuit) + + return circuits + + +def rx_gate_counts_deterministic(shots, hex_counts=True): + """RX-gate circuits reference counts.""" + targets = [] + if hex_counts: + # pi/2 + targets.append({"0x0": shots / 2, "0x1": shots / 2}) + # 2*pi/2 + targets.append({"0x1": shots}) + # 3*pi/2 + targets.append({"0x0": shots / 2, "0x1": shots / 2}) + # 4*pi/2 + targets.append({"0x0": shots}) + else: + # pi/2 + targets.append({"0": shots / 2, "1": shots / 2}) + # 2*pi/2 + targets.append({"1": shots}) + # 3*pi/2 + targets.append({"0": shots / 2, "1": shots / 2}) + # 4*pi/2 + targets.append({"0": shots}) + return targets + + +# ========================================================================== +# Z-gate +# ========================================================================== + + +def rz_gate_circuits_deterministic(final_measure=True): + """RZ-gate test circuits with deterministic counts.""" + circuits = [] + qr = QuantumRegister(1) + if final_measure: + cr = ClassicalRegister(1) + regs = (qr, cr) + else: + regs = (qr,) + + # RZ(pi/2) = S + circuit = QuantumCircuit(*regs) + circuit.h(qr) + circuit.barrier(qr) + circuit.rz(np.pi / 2, qr) + circuit.barrier(qr) + circuit.h(qr) + if final_measure: + circuit.barrier(qr) + circuit.measure(qr, cr) + circuits.append(circuit) + + # RZ(pi) = Z + circuit = QuantumCircuit(*regs) + circuit.h(qr) + circuit.barrier(qr) + circuit.rz(np.pi, qr) + circuit.barrier(qr) + circuit.h(qr) + if final_measure: + circuit.barrier(qr) + circuit.measure(qr, cr) + circuits.append(circuit) + + # RZ(3*pi/2) = Sdg + circuit = QuantumCircuit(*regs) + circuit.h(qr) + circuit.barrier(qr) + circuit.rz(3 * np.pi / 2, qr) + circuit.barrier(qr) + circuit.h(qr) + if final_measure: + circuit.barrier(qr) + circuit.measure(qr, cr) + circuits.append(circuit) + + # RZ(4*pi/2) = I + circuit = QuantumCircuit(*regs) + circuit.h(qr) + circuit.barrier(qr) + circuit.rz(4 * np.pi / 2, qr) + circuit.barrier(qr) + circuit.h(qr) + if final_measure: + circuit.barrier(qr) + circuit.measure(qr, cr) + circuits.append(circuit) + + return circuits + + +def rz_gate_counts_deterministic(shots, hex_counts=True): + """RZ-gate circuits reference counts.""" + targets = [] + if hex_counts: + # pi/2 = S + targets.append({"0x0": shots / 2, "0x1": shots / 2}) + # 2*pi/2 = Z + targets.append({"0x1": shots}) + # 3*pi/2 = Sdg + targets.append({"0x0": shots / 2, "0x1": shots / 2}) + # 4*pi/2 = I + targets.append({"0x0": shots}) + else: + # pi/2 = S + targets.append({"0": shots / 2, "1": shots / 2}) + # 2*pi/2 = Z + targets.append({"1": shots}) + # 3*pi/2 = Sdg + targets.append({"0": shots / 2, "1": shots / 2}) + # 4*pi/2 = I + targets.append({"0": shots}) + return targets + + +# ========================================================================== +# Y-gate +# ========================================================================== + + +def ry_gate_circuits_deterministic(final_measure=True): + """RY-gate test circuits with deterministic counts.""" + circuits = [] + qr = QuantumRegister(1) + if final_measure: + cr = ClassicalRegister(1) + regs = (qr, cr) + else: + regs = (qr,) + + # RX(pi/2) + circuit = QuantumCircuit(*regs) + circuit.ry(np.pi / 2, qr) + if final_measure: + circuit.barrier(qr) + circuit.measure(qr, cr) + circuits.append(circuit) + + # RX(pi) = Y + circuit = QuantumCircuit(*regs) + circuit.ry(np.pi, qr) + if final_measure: + circuit.barrier(qr) + circuit.measure(qr, cr) + circuits.append(circuit) + + # RX(3*pi/2) + circuit = QuantumCircuit(*regs) + circuit.ry(3 * np.pi / 2, qr) + if final_measure: + circuit.barrier(qr) + circuit.measure(qr, cr) + circuits.append(circuit) + + # RX(4*pi/2) = I + circuit = QuantumCircuit(*regs) + circuit.ry(4 * np.pi / 2, qr) + if final_measure: + circuit.barrier(qr) + circuit.measure(qr, cr) + circuits.append(circuit) + + return circuits + + +def ry_gate_counts_deterministic(shots, hex_counts=True): + """RY-gate circuits reference counts.""" + targets = [] + if hex_counts: + # pi/2 + targets.append({"0x0": shots / 2, "0x1": shots / 2}) + # 2*pi/2 + targets.append({"0x1": shots}) + # 3*pi/2 + targets.append({"0x0": shots / 2, "0x1": shots / 2}) + # 4*pi/2 + targets.append({"0x0": shots}) + else: + # pi/2 + targets.append({"0": shots / 2, "1": shots / 2}) + # 2*pi/2 + targets.append({"1": shots}) + # 3*pi/2 + targets.append({"0": shots / 2, "1": shots / 2}) + # 4*pi/2 + targets.append({"0": shots}) + return targets From 6ac975a6d9fdb474dea1f856da8672ca0269a5c0 Mon Sep 17 00:00:00 2001 From: Toshinari Itoko <15028342+itoko@users.noreply.github.com> Date: Tue, 26 Sep 2023 15:22:29 +0900 Subject: [PATCH 10/63] Fix incorrect excited state population when building noise model from BackendV2 (#1939) * Add test to fix 1937 * Fix incorrect computation of excited state population * Fix lint * Add reno --------- Co-authored-by: Jun Doi --- qiskit_aer/noise/device/models.py | 29 ++++++++++++------- ...xcitation-population-6af281a61f659dda.yaml | 6 ++++ test/terra/noise/test_device_models.py | 19 ++++++++++-- tox.ini | 1 + 4 files changed, 42 insertions(+), 13 deletions(-) create mode 100644 releasenotes/notes/fix-excitation-population-6af281a61f659dda.yaml diff --git a/qiskit_aer/noise/device/models.py b/qiskit_aer/noise/device/models.py index 4be3bf7283..adc5927bae 100644 --- a/qiskit_aer/noise/device/models.py +++ b/qiskit_aer/noise/device/models.py @@ -171,12 +171,15 @@ def basic_device_gate_errors( ) # Generate custom gate time dict + # Units used in the following computation: ns (time), Hz (frequency), mK (temperature). custom_times = {} relax_params = [] if thermal_relaxation: # If including thermal relaxation errors load - # T1, T2, and frequency values from properties + # T1 [ns], T2 [ns], and frequency [GHz] values from properties relax_params = thermal_relaxation_values(properties) + # Unit conversion: GHz -> Hz + relax_params = [(t1, t2, freq * 1e9) for t1, t2, freq in relax_params] # If we are specifying custom gate times include # them in the custom times dict if gate_lengths: @@ -207,7 +210,7 @@ def basic_device_gate_errors( # Get relaxation error if thermal_relaxation: relax_error = _device_thermal_relaxation_error( - qubits, relax_time, relax_params, temperature, thermal_relaxation + qubits, relax_time, relax_params, temperature ) # Get depolarizing error channel @@ -239,6 +242,8 @@ def _basic_device_target_gate_errors( Note that, in the resulting error list, non-Gate instructions (e.g. Reset) will have no gate errors while they may have thermal relaxation errors. Exceptionally, Measure instruction will have no errors, neither gate errors nor relaxation errors. + + Note: Units in use: Time [s], Frequency [Hz], Temperature [mK] """ errors = [] for op_name, inst_prop_dic in target.items(): @@ -329,12 +334,14 @@ def _device_depolarizing_error(qubits, error_param, relax_error=None): return None -def _device_thermal_relaxation_error( - qubits, gate_time, relax_params, temperature, thermal_relaxation=True -): - """Construct a thermal_relaxation_error for device""" +def _device_thermal_relaxation_error(qubits, gate_time, relax_params, temperature): + """Construct a thermal_relaxation_error for device. + + Expected units: frequency in relax_params [Hz], temperature [mK]. + Note that gate_time and T1/T2 in relax_params must be in the same time unit. + """ # Check trivial case - if not thermal_relaxation or gate_time is None or gate_time == 0: + if gate_time is None or gate_time == 0: return None # Construct a tensor product of single qubit relaxation errors @@ -368,7 +375,7 @@ def _truncate_t2_value(t1, t2): def _excited_population(freq, temperature): - """Return excited state population from freq [GHz] and temperature [mK].""" + """Return excited state population from freq [Hz] and temperature [mK].""" if freq is None or temperature is None: return 0 population = 0 @@ -379,10 +386,10 @@ def _excited_population(freq, temperature): # Boltzman constant kB = 8.617333262e-5 (eV/K) # Planck constant h = 4.135667696e-15 (eV.s) # qubit temperature temperatue = T (mK) - # qubit frequency frequency = f (GHz) - # excited state population = 1/(1+exp((h*f*1e9)/(kb*T*1e-3))) + # qubit frequency frequency = f (Hz) + # excited state population = 1/(1+exp((h*f)/(kb*T*1e-3))) # See e.g. Phys. Rev. Lett. 114, 240501 (2015). - exp_param = exp((47.99243 * freq) / abs(temperature)) + exp_param = exp((47.99243 * 1e-9 * freq) / abs(temperature)) population = 1 / (1 + exp_param) if temperature < 0: # negative temperate implies |1> is thermal ground diff --git a/releasenotes/notes/fix-excitation-population-6af281a61f659dda.yaml b/releasenotes/notes/fix-excitation-population-6af281a61f659dda.yaml new file mode 100644 index 0000000000..874f9b3455 --- /dev/null +++ b/releasenotes/notes/fix-excitation-population-6af281a61f659dda.yaml @@ -0,0 +1,6 @@ +--- +fixes: + - | + Fixed a bug where :meth:`~.NoiseModel.from_backend` with ``BackendV2`` and non-zero ``temperature`` + produces relaxation noises with incorrect excitation population. + Fixed `#1937 `__. diff --git a/test/terra/noise/test_device_models.py b/test/terra/noise/test_device_models.py index c31b6090e7..0035f38bdb 100644 --- a/test/terra/noise/test_device_models.py +++ b/test/terra/noise/test_device_models.py @@ -13,12 +13,14 @@ """ Tests for utility functions to create device noise model. """ - +import numpy as np from test.terra.common import QiskitAerTestCase -from qiskit.providers import QubitProperties +from qiskit.circuit.library.standard_gates import XGate from qiskit.providers.fake_provider import FakeNairobi, FakeNairobiV2 +from qiskit.transpiler import Target, QubitProperties, InstructionProperties from qiskit_aer.noise.device.models import basic_device_gate_errors +from qiskit_aer.noise.errors.standard_errors import thermal_relaxation_error class TestDeviceNoiseModel(QiskitAerTestCase): @@ -70,3 +72,16 @@ def test_basic_device_gate_errors_from_target_with_no_t2_value(self): target = FakeNairobiV2().target target.qubit_properties[0].t2 = None basic_device_gate_errors(target=target) + + def test_non_zero_temperature(self): + """Test if non-zero excited_state_population is obtained when positive temperature is supplied. + See https://github.com/Qiskit/qiskit-aer/issues/1937 for the details.""" + t1, t2, frequency, duration = 1e-4, 1e-4, 5e9, 5e-8 + target = Target(qubit_properties=[QubitProperties(t1=t1, t2=t2, frequency=frequency)]) + target.add_instruction(XGate(), {(0,): InstructionProperties(duration=duration)}) + errors = basic_device_gate_errors(target=target, gate_error=False, temperature=100) + _, _, x_error = errors[0] + no_excitation_error = thermal_relaxation_error(t1, t2, duration, excited_state_population=0) + x_error_matrix = x_error.to_quantumchannel().data + no_excitation_error_matrix = no_excitation_error.to_quantumchannel().data + self.assertFalse(np.allclose(x_error_matrix, no_excitation_error_matrix)) diff --git a/tox.ini b/tox.ini index 725e45bbc2..5b5a17c4ef 100644 --- a/tox.ini +++ b/tox.ini @@ -32,6 +32,7 @@ commands = [testenv:lint] envdir = .tox/lint basepython = python3 +allowlist_externals = sh commands = sh tools/clang-format.sh --Werror -n black --check {posargs} qiskit_aer test tools setup.py From 23488b9c242e2cc294210e88a8adb92ed23e73c4 Mon Sep 17 00:00:00 2001 From: Ikko Hamamura Date: Tue, 26 Sep 2023 22:26:45 +0900 Subject: [PATCH 11/63] Improve performance of sampler experiment (#1935) * Improve performance of sampler parameter-binds * lint * fix order * add reno * refactor * Update releasenotes/notes/sampler-performance-81e1649ec4657aad.yaml --------- Co-authored-by: Jun Doi --- qiskit_aer/primitives/sampler.py | 57 ++++++++++++++++--- .../sampler-performance-81e1649ec4657aad.yaml | 5 ++ 2 files changed, 55 insertions(+), 7 deletions(-) create mode 100644 releasenotes/notes/sampler-performance-81e1649ec4657aad.yaml diff --git a/qiskit_aer/primitives/sampler.py b/qiskit_aer/primitives/sampler.py index 286d7df0fe..fa988dfbd9 100644 --- a/qiskit_aer/primitives/sampler.py +++ b/qiskit_aer/primitives/sampler.py @@ -18,7 +18,8 @@ from collections.abc import Sequence -from qiskit.circuit import QuantumCircuit +import numpy as np +from qiskit.circuit import ParameterExpression, QuantumCircuit from qiskit.compiler import transpile from qiskit.exceptions import QiskitError from qiskit.primitives import BaseSampler, SamplerResult @@ -88,25 +89,30 @@ def _call( is_shots_none = "shots" in run_options and run_options["shots"] is None self._transpile(circuits, is_shots_none) - experiments = [] - parameter_binds = [] + experiment_manager = _ExperimentManager() for i, value in zip(circuits, parameter_values): if len(value) != len(self._parameters[i]): raise QiskitError( f"The number of values ({len(value)}) does not match " f"the number of parameters ({len(self._parameters[i])})." ) - parameter_binds.append({k: [v] for k, v in zip(self._parameters[i], value)}) - experiments.append(self._transpiled_circuits[(i, is_shots_none)]) + + experiment_manager.append( + key=i, + parameter_bind=dict(zip(self._parameters[i], value)), + experiment_circuit=self._transpiled_circuits[(i, is_shots_none)], + ) result = self._backend.run( - experiments, parameter_binds=parameter_binds, **run_options + experiment_manager.experiment_circuits, + parameter_binds=experiment_manager.parameter_binds, + **run_options, ).result() # Postprocessing metadata = [] quasis = [] - for i in range(len(experiments)): + for i in experiment_manager.experiment_indices: if is_shots_none: probabilities = result.data(i)["probabilities"] num_qubits = result.results[i].metadata["num_qubits"] @@ -186,3 +192,40 @@ def _transpile(self, circuit_indices: Sequence[int], is_shots_none: bool): ) for i, circuit in zip(to_handle, circuits): self._transpiled_circuits[(i, is_shots_none)] = circuit + + +class _ExperimentManager: + def __init__(self): + self.keys: list[int] = [] + self.experiment_circuits: list[QuantumCircuit] = [] + self.parameter_binds: list[dict[ParameterExpression, list[float]]] = [] + self._input_indices: list[list[int]] = [] + self._num_experiment: int = 0 + + def __len__(self): + return self._num_experiment + + @property + def experiment_indices(self): + """indices of experiments""" + return np.argsort(sum(self._input_indices, [])).tolist() + + def append( + self, + key: tuple[int, int], + parameter_bind: dict[ParameterExpression, float], + experiment_circuit: QuantumCircuit, + ): + """append experiments""" + if parameter_bind and key in self.keys: + key_index = self.keys.index(key) + for k, vs in self.parameter_binds[key_index].items(): + vs.append(parameter_bind[k]) + self._input_indices[key_index].append(self._num_experiment) + else: + self.experiment_circuits.append(experiment_circuit) + self.keys.append(key) + self.parameter_binds.append({k: [v] for k, v in parameter_bind.items()}) + self._input_indices.append([self._num_experiment]) + + self._num_experiment += 1 diff --git a/releasenotes/notes/sampler-performance-81e1649ec4657aad.yaml b/releasenotes/notes/sampler-performance-81e1649ec4657aad.yaml new file mode 100644 index 0000000000..5b8e1f2778 --- /dev/null +++ b/releasenotes/notes/sampler-performance-81e1649ec4657aad.yaml @@ -0,0 +1,5 @@ +--- +upgrade: + - | + Improved performance when the same circuits and multiple parameters are passed to + :class:`~.Sampler`. From b83abe26567af6e585a05c55cde0a7c69e2402f5 Mon Sep 17 00:00:00 2001 From: Ikko Hamamura Date: Wed, 27 Sep 2023 10:05:20 +0900 Subject: [PATCH 12/63] Fix the order bug in Estimator (#1936) * Fix the order bug * add reno --------- Co-authored-by: Jun Doi --- qiskit_aer/primitives/estimator.py | 30 ++++++++----------- .../estimator-order-bug-a341d82075f47046.yaml | 5 ++++ test/terra/primitives/test_estimator.py | 17 ++++++++++- 3 files changed, 33 insertions(+), 19 deletions(-) create mode 100644 releasenotes/notes/estimator-order-bug-a341d82075f47046.yaml diff --git a/qiskit_aer/primitives/estimator.py b/qiskit_aer/primitives/estimator.py index 6e34dda648..65d87231c5 100644 --- a/qiskit_aer/primitives/estimator.py +++ b/qiskit_aer/primitives/estimator.py @@ -382,20 +382,16 @@ def _compute_with_approximation( self._transpile_circuits(circuits) experiment_manager = _ExperimentManager() for i, j, value in zip(circuits, observables, parameter_values): + self._validate_parameter_length(value, i) if (i, j) in experiment_manager.keys: - self._validate_parameter_length(value, i) - experiment_manager.append( - key=(i, j), - parameter_bind=dict(zip(self._parameters[i], value)), - ) + key_index = experiment_manager.keys.index((i, j)) + circuit = experiment_manager.experiment_circuits[key_index] else: - self._validate_parameter_length(value, i) circuit = ( self._circuits[i].copy() if self._skip_transpilation else self._transpiled_circuits[i].copy() ) - observable = self._observables[j] if shots is None: circuit.save_expectation_value(observable, self._layouts[i]) @@ -404,11 +400,11 @@ def _compute_with_approximation( circuit.save_expectation_value( pauli, self._layouts[i], label=str(term_ind) ) - experiment_manager.append( - key=(i, j), - parameter_bind=dict(zip(self._parameters[i], value)), - experiment_circuit=circuit, - ) + experiment_manager.append( + key=(i, j), + parameter_bind=dict(zip(self._parameters[i], value)), + experiment_circuit=circuit, + ) self._cache[key] = experiment_manager result = self._backend.run( @@ -616,24 +612,22 @@ def __len__(self): @property def experiment_indices(self): """indices of experiments""" - return sum(self._input_indices, []) + return np.argsort(sum(self._input_indices, [])).tolist() def append( self, key: tuple[int, int], parameter_bind: dict[ParameterExpression, float], - experiment_circuit: QuantumCircuit | None = None, + experiment_circuit: QuantumCircuit, ): """append experiments""" - if experiment_circuit is not None: - self.experiment_circuits.append(experiment_circuit) - - if key in self.keys: + if key in self.keys and parameter_bind: key_index = self.keys.index(key) for k, vs in self.parameter_binds[key_index].items(): vs.append(parameter_bind[k]) self._input_indices[key_index].append(self._num_experiment) else: + self.experiment_circuits.append(experiment_circuit) self.keys.append(key) self.parameter_binds.append({k: [v] for k, v in parameter_bind.items()}) self._input_indices.append([self._num_experiment]) diff --git a/releasenotes/notes/estimator-order-bug-a341d82075f47046.yaml b/releasenotes/notes/estimator-order-bug-a341d82075f47046.yaml new file mode 100644 index 0000000000..ffd8d90f6b --- /dev/null +++ b/releasenotes/notes/estimator-order-bug-a341d82075f47046.yaml @@ -0,0 +1,5 @@ +--- +upgrade: + - | + Fixed a bug that caused results to be incorrectly ordered or errors in + :class:`~.Estimator` with ``approximation=True``. diff --git a/test/terra/primitives/test_estimator.py b/test/terra/primitives/test_estimator.py index bcbea676fc..96c13cf59b 100644 --- a/test/terra/primitives/test_estimator.py +++ b/test/terra/primitives/test_estimator.py @@ -20,7 +20,7 @@ import numpy as np from ddt import data, ddt -from qiskit.circuit import QuantumCircuit +from qiskit.circuit import Parameter, QuantumCircuit from qiskit.circuit.library import RealAmplitudes from qiskit.exceptions import QiskitError from qiskit.opflow import PauliSumOp @@ -308,6 +308,21 @@ def test_warn_shots_none_without_approximation(self): np.testing.assert_allclose(result.values, [-1.313831587508902]) self.assertIsInstance(result.metadata[0]["variance"], float) + def test_result_order(self): + """Test to validate the order.""" + qc1 = QuantumCircuit(1) + qc1.measure_all() + + param = Parameter("a") + qc2 = QuantumCircuit(1) + qc2.ry(np.pi / 2 * param, 0) + qc2.measure_all() + + estimator = Estimator(approximation=True) + job = estimator.run([qc1, qc2, qc1, qc1, qc2], ["Z"] * 5, [[], [1], [], [], [1]]) + result = job.result() + np.testing.assert_allclose(result.values, [1, 0, 1, 1, 0], atol=1e-10) + if __name__ == "__main__": unittest.main() From 1a5af5975c4a23bd523724f28ddc81db9db1aa51 Mon Sep 17 00:00:00 2001 From: Jun Doi Date: Thu, 28 Sep 2023 17:05:31 +0900 Subject: [PATCH 13/63] Fix nested parallel performance (#1949) --- ...p_nested_performance-a3d55f3e85366a5b.yaml | 7 +++++ src/controllers/aer_controller.hpp | 2 +- src/simulators/statevector/qubitvector.hpp | 9 +++--- src/simulators/unitary/unitarymatrix.hpp | 30 +++++++++---------- src/transpile/fusion.hpp | 20 +++++++++---- 5 files changed, 42 insertions(+), 26 deletions(-) create mode 100644 releasenotes/notes/fix_omp_nested_performance-a3d55f3e85366a5b.yaml diff --git a/releasenotes/notes/fix_omp_nested_performance-a3d55f3e85366a5b.yaml b/releasenotes/notes/fix_omp_nested_performance-a3d55f3e85366a5b.yaml new file mode 100644 index 0000000000..50a19f6be9 --- /dev/null +++ b/releasenotes/notes/fix_omp_nested_performance-a3d55f3e85366a5b.yaml @@ -0,0 +1,7 @@ +--- +fixes: + - | + OpenMP nested parallel simulation for parallel experiments + parallel state + update was very slow because gate fusion uses unitary simulator inside + and it used omp parallel region. This fix remove parallel region in + gate fusion and improve performance of nested parallel simulations diff --git a/src/controllers/aer_controller.hpp b/src/controllers/aer_controller.hpp index f42ae64ef8..e6005b9a62 100755 --- a/src/controllers/aer_controller.hpp +++ b/src/controllers/aer_controller.hpp @@ -541,7 +541,7 @@ Result Controller::execute(std::vector> &circuits, // nested should be set to zero if num_threads clause will be used #if _OPENMP >= 200805 - omp_set_max_active_levels(2); + omp_set_max_active_levels(1); #else omp_set_nested(1); #endif diff --git a/src/simulators/statevector/qubitvector.hpp b/src/simulators/statevector/qubitvector.hpp index 3cc84d8a79..a686899358 100755 --- a/src/simulators/statevector/qubitvector.hpp +++ b/src/simulators/statevector/qubitvector.hpp @@ -890,11 +890,10 @@ template void QubitVector::zero() { const int_t END = data_size_; // end for k loop -#pragma omp parallel for if (num_qubits_ > omp_threshold_ && omp_threads_ > 1) \ - num_threads(omp_threads_) - for (int_t k = 0; k < END; ++k) { - data_[k] = 0.0; - } + auto zero_proc = [this](int_t i) { data_[i] = 0.0; }; + Utils::apply_omp_parallel_for( + (num_qubits_ > omp_threshold_ && omp_threads_ > 1), 0, END, zero_proc, + omp_threads_); } template diff --git a/src/simulators/unitary/unitarymatrix.hpp b/src/simulators/unitary/unitarymatrix.hpp index 494d57e84e..f406091662 100644 --- a/src/simulators/unitary/unitarymatrix.hpp +++ b/src/simulators/unitary/unitarymatrix.hpp @@ -238,13 +238,13 @@ void UnitaryMatrix::initialize() { BaseVector::zero(); // Set to be identity matrix const int_t nrows = rows_; // end for k loop -#pragma omp parallel if (BaseVector::num_qubits_ > \ - BaseVector::omp_threshold_ && \ - BaseVector::omp_threads_ > 1) \ - num_threads(BaseVector::omp_threads_) - for (int_t k = 0; k < nrows; ++k) { - BaseVector::data_[k * (nrows + 1)] = 1.0; - } + auto initialize_proc = [this](int_t i) { + BaseVector::data_[i * (rows_ + 1)] = 1.0; + }; + Utils::apply_omp_parallel_for( + (BaseVector::num_qubits_ > BaseVector::omp_threshold_ && + BaseVector::omp_threads_ > 1), + 0, rows_, initialize_proc, BaseVector::omp_threads_); } template @@ -260,15 +260,15 @@ void UnitaryMatrix::initialize_from_matrix( std::to_string(mat.GetRows()) + "," + std::to_string(mat.GetColumns()) + ")."); } - -#pragma omp parallel if (BaseVector::num_qubits_ > \ - BaseVector::omp_threshold_ && \ - BaseVector::omp_threads_ > 1) \ - num_threads(BaseVector::omp_threads_) - for (int_t row = 0; row < nrows; ++row) - for (int_t col = 0; col < nrows; ++col) { - BaseVector::data_[row + nrows * col] = mat(row, col); + auto initialize_proc = [this, &mat](int_t row) { + for (int_t col = 0; col < rows_; ++col) { + BaseVector::data_[row + rows_ * col] = mat(row, col); } + }; + Utils::apply_omp_parallel_for( + (BaseVector::num_qubits_ > BaseVector::omp_threshold_ && + BaseVector::omp_threads_ > 1), + 0, rows_, initialize_proc, BaseVector::omp_threads_); } template diff --git a/src/transpile/fusion.hpp b/src/transpile/fusion.hpp index d7c14ec8b6..a3a1c8b59d 100644 --- a/src/transpile/fusion.hpp +++ b/src/transpile/fusion.hpp @@ -851,11 +851,21 @@ void Fusion::optimize_circuit(Circuit &circ, Noise::NoiseModel &noise, if (circ.ops.size() % parallelization_) ++unit; -#pragma omp parallel for if (parallelization_ > 1) num_threads(parallelization_) - for (int_t i = 0; i < parallelization_; i++) { - int_t start = unit * i; - int_t end = std::min(start + unit, (int_t)circ.ops.size()); - optimize_circuit(circ, noise, allowed_opset, start, end, fuser, method); + if (parallelization_ > 1) { +#pragma omp parallel for num_threads(parallelization_) + for (int_t i = 0; i < parallelization_; i++) { + int_t start = unit * i; + int_t end = std::min(start + unit, (int_t)circ.ops.size()); + optimize_circuit(circ, noise, allowed_opset, start, end, fuser, + method); + } + } else { + for (int_t i = 0; i < parallelization_; i++) { + int_t start = unit * i; + int_t end = std::min(start + unit, (int_t)circ.ops.size()); + optimize_circuit(circ, noise, allowed_opset, start, end, fuser, + method); + } } result.metadata.add(parallelization_, "fusion", "parallelization"); } From fffb5de1b764825159b3e3089e691059fe48d73d Mon Sep 17 00:00:00 2001 From: Luciano Bello Date: Mon, 2 Oct 2023 04:01:34 +0200 Subject: [PATCH 14/63] move travis file to gha in the README (#1943) * change the badge from travis to gha * add build --------- Co-authored-by: Jun Doi --- README.md | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/README.md b/README.md index b3be611454..a9c4795953 100755 --- a/README.md +++ b/README.md @@ -1,6 +1,10 @@ # Qiskit Aer -[![License](https://img.shields.io/github/license/Qiskit/qiskit-aer.svg?style=popout-square)](https://opensource.org/licenses/Apache-2.0)[![Build Status](https://img.shields.io/travis/com/Qiskit/qiskit-aer/master.svg?style=popout-square)](https://travis-ci.com/Qiskit/qiskit-aer)[![](https://img.shields.io/github/release/Qiskit/qiskit-aer.svg?style=popout-square)](https://github.com/Qiskit/qiskit-aer/releases)[![](https://img.shields.io/pypi/dm/qiskit-aer.svg?style=popout-square)](https://pypi.org/project/qiskit-aer/) +[![License](https://img.shields.io/github/license/Qiskit/qiskit-aer.svg?style=popout-square)](https://opensource.org/licenses/Apache-2.0) +[![Build](https://github.com/Qiskit/qiskit-aer/actions/workflows/build.yml/badge.svg?branch=main)](https://github.com/Qiskit/qiskit-aer/actions/workflows/build.yml) +[![Tests](https://github.com/Qiskit/qiskit-aer/actions/workflows/tests.yml/badge.svg?branch=main)](https://github.com/Qiskit/qiskit-aer/actions/workflows/tests.yml) +[![](https://img.shields.io/github/release/Qiskit/qiskit-aer.svg?style=popout-square)](https://github.com/Qiskit/qiskit-aer/releases) +[![](https://img.shields.io/pypi/dm/qiskit-aer.svg?style=popout-square)](https://pypi.org/project/qiskit-aer/) **Qiskit** is an open-source framework for working with noisy quantum computers at the level of pulses, circuits, and algorithms. From 8f825d7bfa540c9cd71282e62a93e4ee7f9b54e5 Mon Sep 17 00:00:00 2001 From: Jun Doi Date: Mon, 2 Oct 2023 14:26:35 +0900 Subject: [PATCH 15/63] fix matplotlib version (#1951) --- .github/workflows/docs.yml | 2 +- .../notes/fix_matplotlib_dependency-5b8ba26da6a98e3a.yaml | 4 ++++ 2 files changed, 5 insertions(+), 1 deletion(-) create mode 100644 releasenotes/notes/fix_matplotlib_dependency-5b8ba26da6a98e3a.yaml diff --git a/.github/workflows/docs.yml b/.github/workflows/docs.yml index da877479ac..28b533ac6c 100644 --- a/.github/workflows/docs.yml +++ b/.github/workflows/docs.yml @@ -74,7 +74,7 @@ jobs: pip install -U -r requirements-dev.txt -c constraints.txt pip install -c constraints.txt git+https://github.com/Qiskit/qiskit-terra pip install -c constraints.txt . - pip install -U "qiskit-ibmq-provider" "z3-solver" "qiskit-ignis" "qiskit-aqua" "pyscf<1.7.4" "matplotlib<3.3.0" jupyter pylatexenc nbsphinx cvxpy qiskit-sphinx-theme -c constraints.txt + pip install -U "qiskit-ibmq-provider" "z3-solver" "qiskit-ignis" "qiskit-aqua" "pyscf<1.7.4" "matplotlib>=3.3.0" jupyter pylatexenc nbsphinx cvxpy qiskit-sphinx-theme -c constraints.txt sudo apt install -y graphviz pandoc libopenblas-dev pip check shell: bash diff --git a/releasenotes/notes/fix_matplotlib_dependency-5b8ba26da6a98e3a.yaml b/releasenotes/notes/fix_matplotlib_dependency-5b8ba26da6a98e3a.yaml new file mode 100644 index 0000000000..01433b8ee3 --- /dev/null +++ b/releasenotes/notes/fix_matplotlib_dependency-5b8ba26da6a98e3a.yaml @@ -0,0 +1,4 @@ +--- +fixes: + - | + Fix the version of matplotlib required by seaborn From 73f0847af22fd1734ee6dbbc08800f72af013aa6 Mon Sep 17 00:00:00 2001 From: Julien Gacon Date: Mon, 2 Oct 2023 08:37:52 +0200 Subject: [PATCH 16/63] Fix usage of QuantumCircuit.id (#1945) Co-authored-by: Jun Doi --- qiskit_aer/backends/backend_utils.py | 4 ++-- test/terra/backends/simulator_test_case.py | 2 +- test/terra/decorators.py | 4 ---- test/terra/noise/test_noise_model.py | 2 +- test/terra/noise/test_quantum_error.py | 2 +- test/terra/reference/ref_algorithms.py | 6 +----- test/terra/reference/ref_kraus_noise.py | 6 +----- test/terra/reference/ref_measure.py | 22 +++++++++++----------- test/terra/reference/ref_pauli_noise.py | 12 ++++-------- test/terra/reference/ref_reset_noise.py | 8 ++------ test/terra/reference/ref_save_expval.py | 2 +- tools/generate_qobj.py | 2 +- tools/verify_wheels.py | 6 +----- 13 files changed, 27 insertions(+), 51 deletions(-) diff --git a/qiskit_aer/backends/backend_utils.py b/qiskit_aer/backends/backend_utils.py index 8495ac8869..1e41c38f56 100644 --- a/qiskit_aer/backends/backend_utils.py +++ b/qiskit_aer/backends/backend_utils.py @@ -443,7 +443,7 @@ def available_methods(controller, methods, devices): """Check available simulation methods by running a dummy circuit.""" # Test methods are available using the controller dummy_circ = QuantumCircuit(1) - dummy_circ.i(0) + dummy_circ.id(0) valid_methods = [] for device in devices: @@ -462,7 +462,7 @@ def available_devices(controller, devices): """Check available simulation devices by running a dummy circuit.""" # Test methods are available using the controller dummy_circ = QuantumCircuit(1) - dummy_circ.i(0) + dummy_circ.id(0) valid_devices = [] for device in devices: diff --git a/test/terra/backends/simulator_test_case.py b/test/terra/backends/simulator_test_case.py index 1e3c99b145..2173c2c413 100644 --- a/test/terra/backends/simulator_test_case.py +++ b/test/terra/backends/simulator_test_case.py @@ -116,7 +116,7 @@ def check_cuStateVec(devices): """Return if the system supports cuStateVec or not""" if "GPU" in devices: dummy_circ = QuantumCircuit(1) - dummy_circ.i(0) + dummy_circ.id(0) qobj = assemble( dummy_circ, optimization_level=0, diff --git a/test/terra/decorators.py b/test/terra/decorators.py index e542add595..c5d19a1426 100644 --- a/test/terra/decorators.py +++ b/test/terra/decorators.py @@ -21,10 +21,6 @@ from qiskit_aer import AerProvider, AerSimulator -# Backwards compatibility for Terra <= 0.13 -if not hasattr(QuantumCircuit, "i"): - QuantumCircuit.i = QuantumCircuit.iden - def is_method_available(backend, method): """Check if input method is available for the qasm simulator.""" diff --git a/test/terra/noise/test_noise_model.py b/test/terra/noise/test_noise_model.py index d4f5004fc6..a84c080567 100644 --- a/test/terra/noise/test_noise_model.py +++ b/test/terra/noise/test_noise_model.py @@ -59,7 +59,7 @@ def test_amplitude_damping_error(self): for _ in range(30): # Add noisy identities circuit.barrier(qr) - circuit.i(qr) + circuit.id(qr) circuit.barrier(qr) circuit.measure(qr, cr) shots = 4000 diff --git a/test/terra/noise/test_quantum_error.py b/test/terra/noise/test_quantum_error.py index 79cc49bb18..f4d821ec02 100644 --- a/test/terra/noise/test_quantum_error.py +++ b/test/terra/noise/test_quantum_error.py @@ -143,7 +143,7 @@ def test_ideal(self): # up to global phase qc = QuantumCircuit(1, global_phase=0.5) - qc.i(0) + qc.id(0) self.assertTrue(QuantumError(qc).ideal()) self.assertTrue(QuantumError(UnitaryGate(-1.0 * np.eye(2))).ideal()) diff --git a/test/terra/reference/ref_algorithms.py b/test/terra/reference/ref_algorithms.py index 385c81f5f0..d0fd75b112 100644 --- a/test/terra/reference/ref_algorithms.py +++ b/test/terra/reference/ref_algorithms.py @@ -17,10 +17,6 @@ from qiskit import QuantumRegister, ClassicalRegister, QuantumCircuit -# Backwards compatibility for Terra <= 0.13 -if not hasattr(QuantumCircuit, "i"): - QuantumCircuit.i = QuantumCircuit.iden - def grovers_circuit(final_measure=True, allow_sampling=True): """Testing a circuit originated in the Grover algorithm""" @@ -66,7 +62,7 @@ def grovers_circuit(final_measure=True, allow_sampling=True): circuit.measure(qr[1], cr[1]) if not allow_sampling: circuit.barrier(qr) - circuit.i(qr) + circuit.id(qr) circuits.append(circuit) return circuits diff --git a/test/terra/reference/ref_kraus_noise.py b/test/terra/reference/ref_kraus_noise.py index 59cdbc53b3..66b67a25d4 100644 --- a/test/terra/reference/ref_kraus_noise.py +++ b/test/terra/reference/ref_kraus_noise.py @@ -20,10 +20,6 @@ from qiskit_aer.noise import NoiseModel from qiskit_aer.noise.errors.standard_errors import amplitude_damping_error -# Backwards compatibility for Terra <= 0.13 -if not hasattr(QuantumCircuit, "i"): - QuantumCircuit.i = QuantumCircuit.iden - # ========================================================================== # Amplitude damping error @@ -42,7 +38,7 @@ def kraus_gate_error_circuits(): for _ in range(30): # Add noisy identities circuit.barrier(qr) - circuit.i(qr) + circuit.id(qr) circuit.barrier(qr) circuit.measure(qr, cr) circuits.append(circuit) diff --git a/test/terra/reference/ref_measure.py b/test/terra/reference/ref_measure.py index 8db104dfd2..a570600e85 100644 --- a/test/terra/reference/ref_measure.py +++ b/test/terra/reference/ref_measure.py @@ -37,7 +37,7 @@ def measure_circuits_deterministic(allow_sampling=True): circuit.measure(qr, cr) if not allow_sampling: circuit.barrier(qr) - circuit.i(qr) + circuit.id(qr) circuit.barrier(qr) circuit.measure(qr, cr) circuits.append(circuit) @@ -49,7 +49,7 @@ def measure_circuits_deterministic(allow_sampling=True): circuit.measure(qr, cr) if not allow_sampling: circuit.barrier(qr) - circuit.i(qr) + circuit.id(qr) circuit.barrier(qr) circuit.measure(qr, cr) circuits.append(circuit) @@ -61,7 +61,7 @@ def measure_circuits_deterministic(allow_sampling=True): circuit.measure(qr, cr) if not allow_sampling: circuit.barrier(qr) - circuit.i(qr) + circuit.id(qr) circuit.barrier(qr) circuit.measure(qr, cr) circuits.append(circuit) @@ -73,7 +73,7 @@ def measure_circuits_deterministic(allow_sampling=True): circuit.measure(qr, cr) if not allow_sampling: circuit.barrier(qr) - circuit.i(qr) + circuit.id(qr) circuit.barrier(qr) circuit.measure(qr, cr) circuits.append(circuit) @@ -89,7 +89,7 @@ def measure_circuits_deterministic(allow_sampling=True): circuit.measure(1, 0) if not allow_sampling: circuit.barrier(qr) - circuit.i(qr) + circuit.id(qr) circuit.barrier(qr) circuit.measure(1, 0) circuits.append(circuit) @@ -185,7 +185,7 @@ def measure_circuits_nondeterministic(allow_sampling=True): circuit.measure(qr, cr) if not allow_sampling: circuit.barrier(qr) - circuit.i(qr) + circuit.id(qr) circuit.barrier(qr) circuit.measure(qr, cr) circuits.append(circuit) @@ -228,7 +228,7 @@ def measure_n(num_qubits): circuit.append(measure_n(2), [0, 1], [0, 1]) if not allow_sampling: circuit.barrier(qr) - circuit.i(qr) + circuit.id(qr) circuit.barrier(qr) circuit.append(measure_n(2), [0, 1], [0, 1]) circuits.append(circuit) @@ -243,7 +243,7 @@ def measure_n(num_qubits): circuit.append(measure_n(3), [0, 1, 2], [0, 1, 2]) if not allow_sampling: circuit.barrier(qr) - circuit.i(qr) + circuit.id(qr) circuit.barrier(qr) circuit.append(measure_n(3), [0, 1, 2], [0, 1, 2]) circuits.append(circuit) @@ -258,7 +258,7 @@ def measure_n(num_qubits): circuit.append(measure_n(4), [0, 1, 2, 3], [0, 1, 2, 3]) if not allow_sampling: circuit.barrier(qr) - circuit.i(qr) + circuit.id(qr) circuit.barrier(qr) circuit.append(measure_n(4), [0, 1, 2, 3], [0, 1, 2, 3]) circuits.append(circuit) @@ -344,7 +344,7 @@ def measure_n(num_qubits): circuit.append(measure_n(2), [0, 1], [0, 1]) if not allow_sampling: circuit.barrier(qr) - circuit.i(qr) + circuit.id(qr) circuit.barrier(qr) circuit.append(measure_n(2), [0, 1], [0, 1]) circuits.append(circuit) @@ -359,7 +359,7 @@ def measure_n(num_qubits): circuit.append(measure_n(3), [0, 1, 2], [0, 1, 2]) if not allow_sampling: circuit.barrier(qr) - circuit.i(qr) + circuit.id(qr) circuit.barrier(qr) circuit.append(measure_n(3), [0, 1, 2], [0, 1, 2]) circuits.append(circuit) diff --git a/test/terra/reference/ref_pauli_noise.py b/test/terra/reference/ref_pauli_noise.py index 8646c82f0b..c43bddfe09 100644 --- a/test/terra/reference/ref_pauli_noise.py +++ b/test/terra/reference/ref_pauli_noise.py @@ -20,10 +20,6 @@ from qiskit_aer.noise import NoiseModel from qiskit_aer.noise.errors.standard_errors import pauli_error -# Backwards compatibility for Terra <= 0.13 -if not hasattr(QuantumCircuit, "i"): - QuantumCircuit.i = QuantumCircuit.iden - # ========================================================================== # Pauli Gate Errors @@ -39,28 +35,28 @@ def pauli_gate_error_circuits(): # 100% all-qubit Pauli error on "id" gate circuit = QuantumCircuit(qr, cr) - circuit.i(qr) + circuit.id(qr) circuit.barrier(qr) circuit.measure(qr, cr) circuits.append(circuit) # 25% all-qubit Pauli error on "id" gates circuit = QuantumCircuit(qr, cr) - circuit.i(qr) + circuit.id(qr) circuit.barrier(qr) circuit.measure(qr, cr) circuits.append(circuit) # 100% Pauli error on "id" gates on qubit-1 circuit = QuantumCircuit(qr, cr) - circuit.i(qr) + circuit.id(qr) circuit.barrier(qr) circuit.measure(qr, cr) circuits.append(circuit) # 25% all-qubit Pauli error on "id" gates on qubit-0 circuit = QuantumCircuit(qr, cr) - circuit.i(qr) + circuit.id(qr) circuit.barrier(qr) circuit.measure(qr, cr) circuits.append(circuit) diff --git a/test/terra/reference/ref_reset_noise.py b/test/terra/reference/ref_reset_noise.py index 73d917ece1..2eb6674c84 100644 --- a/test/terra/reference/ref_reset_noise.py +++ b/test/terra/reference/ref_reset_noise.py @@ -20,10 +20,6 @@ from qiskit_aer.noise import NoiseModel from qiskit_aer.noise.errors.standard_errors import reset_error -# Backwards compatibility for Terra <= 0.13 -if not hasattr(QuantumCircuit, "i"): - QuantumCircuit.i = QuantumCircuit.iden - # ========================================================================== # Reset Gate Errors @@ -65,7 +61,7 @@ def reset_gate_error_circuits(): qr = QuantumRegister(1, "qr") cr = ClassicalRegister(1, "cr") circuit = QuantumCircuit(qr, cr) - circuit.i(qr) + circuit.id(qr) circuit.barrier(qr) circuit.measure(qr, cr) circuits.append(circuit) @@ -74,7 +70,7 @@ def reset_gate_error_circuits(): qr = QuantumRegister(2, "qr") cr = ClassicalRegister(2, "cr") circuit = QuantumCircuit(qr, cr) - circuit.i(qr[0]) + circuit.id(qr[0]) circuit.x(qr[1]) circuit.barrier(qr) circuit.measure(qr, cr) diff --git a/test/terra/reference/ref_save_expval.py b/test/terra/reference/ref_save_expval.py index d272d51ca4..392ac19f55 100644 --- a/test/terra/reference/ref_save_expval.py +++ b/test/terra/reference/ref_save_expval.py @@ -275,7 +275,7 @@ def save_expval_circuit_parameterized( circuit.u(0, 0, 0, 1) circuit.cu(0, 0, 0, 0, 0, 1) circuit.u(0, 0, 0, 1) - circuit.i(0) + circuit.id(0) if snapshot: for label, (params, qubits) in save_expval_params(pauli=True).items(): circuit.save_expectation_value( diff --git a/tools/generate_qobj.py b/tools/generate_qobj.py index 23b0c4ac26..45006046c5 100755 --- a/tools/generate_qobj.py +++ b/tools/generate_qobj.py @@ -58,7 +58,7 @@ def grovers_circuit(final_measure=True, allow_sampling=True): circuit.measure(qr[1], cr[1]) if not allow_sampling: circuit.barrier(qr) - circuit.iden(qr) + circuit.id(qr) circuits.append(circuit) return circuits diff --git a/tools/verify_wheels.py b/tools/verify_wheels.py index 5b7e461463..1de976ddb9 100644 --- a/tools/verify_wheels.py +++ b/tools/verify_wheels.py @@ -20,10 +20,6 @@ from qiskit_aer import StatevectorSimulator from qiskit_aer import UnitarySimulator -# Backwards compatibility for Terra <= 0.13 -if not hasattr(QuantumCircuit, "i"): - QuantumCircuit.i = QuantumCircuit.iden - def assertAlmostEqual(first, second, places=None, msg=None, delta=None): """Test of 2 object are almost equal. @@ -105,7 +101,7 @@ def grovers_circuit(final_measure=True, allow_sampling=True): circuit.measure(qr[1], cr[1]) if not allow_sampling: circuit.barrier(qr) - circuit.i(qr) + circuit.id(qr) circuits.append(circuit) return circuits From e1332f862ec147d75f56f22136bd50371ff4be87 Mon Sep 17 00:00:00 2001 From: Jun Doi Date: Fri, 6 Oct 2023 11:24:48 +0900 Subject: [PATCH 17/63] Implementing runtime parameter binding (#1901) Optimizes GPU simulation for single circuit with multiple parameters by binding parameters to each gates at runtime on a single circuit with multiple shots of simulations. This feature is enabled by a new option ``runtime_parameter_bind_enable=True`` (Default is ``False``). * Implementing runtime parameter binding * remove old files * fix seg fault caused by global phase for parameters * delete duplicate max_matrix_qubits * Correct metadata for runtime param bind configs and move time_taken to metadata so that we can read time info from primitives * performance improvement of sampling measure for runtime parameter binding * fix error for MPI * Improve batched sampling measure * format * fix OpenMP nested parallel * reflecting review comments * fix lint * fix lint --- CMakeLists.txt | 1 + qiskit_aer/backends/aer_compiler.py | 1 + qiskit_aer/backends/aer_simulator.py | 8 + .../wrappers/aer_controller_binding.hpp | 14 +- .../notes/add_executor-ba4870f86ed5d8ec.yaml | 30 + ...me_parameter_binding-d2c57255f02729a1.yaml | 8 + src/controllers/aer_controller.hpp | 80 +- src/controllers/controller_execute.hpp | 118 ++- src/framework/circuit.hpp | 5 + src/framework/config.hpp | 9 + src/framework/operations.hpp | 27 + src/framework/results/data/metadata.hpp | 9 + .../results/data/subtypes/data_map.hpp | 38 + src/noise/noise_model.hpp | 2 + src/simulators/batch_shots_executor.hpp | 595 ++++++++++--- src/simulators/circuit_executor.hpp | 461 +++++++--- .../density_matrix/densitymatrix_executor.hpp | 195 +++- .../density_matrix/densitymatrix_state.hpp | 13 +- src/simulators/multi_state_executor.hpp | 444 +++++++--- src/simulators/parallel_state_executor.hpp | 294 +++--- src/simulators/shot_branching.hpp | 126 ++- src/simulators/state.hpp | 4 + src/simulators/statevector/chunk/chunk.hpp | 48 +- .../statevector/chunk/chunk_container.hpp | 132 ++- .../statevector/chunk/chunk_manager.hpp | 67 +- .../chunk/cuStateVec_chunk_container.hpp | 7 +- .../statevector/chunk/cuda_kernels.hpp | 9 +- .../chunk/device_chunk_container.hpp | 68 +- .../chunk/host_chunk_container.hpp | 6 +- .../statevector/chunk/thrust_kernels.hpp | 381 +++++++- src/simulators/statevector/indexes.hpp | 71 +- src/simulators/statevector/qubitvector.hpp | 33 +- .../statevector/qubitvector_thrust.hpp | 146 ++- .../statevector/statevector_executor.hpp | 214 +++-- .../statevector/statevector_state.hpp | 14 +- .../tensor_network/tensor_net_executor.hpp | 147 ++- .../tensor_network/tensor_net_state.hpp | 7 +- src/simulators/unitary/unitary_executor.hpp | 12 +- src/simulators/unitary/unitary_state.hpp | 6 +- src/transpile/batch_converter.hpp | 247 ++++++ src/transpile/fusion.hpp | 62 +- src/transpile/parameter2matrix.hpp | 215 +++++ .../backends/test_runtime_parameterization.py | 838 ++++++++++++++++++ 43 files changed, 4389 insertions(+), 823 deletions(-) create mode 100644 releasenotes/notes/add_executor-ba4870f86ed5d8ec.yaml create mode 100644 releasenotes/notes/runtime_parameter_binding-d2c57255f02729a1.yaml create mode 100644 src/transpile/batch_converter.hpp create mode 100644 src/transpile/parameter2matrix.hpp create mode 100644 test/terra/backends/test_runtime_parameterization.py diff --git a/CMakeLists.txt b/CMakeLists.txt index 5ac3951fc7..517ce982e7 100755 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -512,6 +512,7 @@ endif() if(AER_DEBUG) set(AER_COMPILER_DEFINITIONS ${AER_COMPILER_DEFINITIONS} AER_DEBUG) + set(AER_COMPILER_FLAGS "${AER_COMPILER_FLAGS} -g") endif() if(TEST_JSON) diff --git a/qiskit_aer/backends/aer_compiler.py b/qiskit_aer/backends/aer_compiler.py index 4909f73537..e4a3a4e9b6 100644 --- a/qiskit_aer/backends/aer_compiler.py +++ b/qiskit_aer/backends/aer_compiler.py @@ -491,6 +491,7 @@ def compile_circuit(circuits, basis_gates=None, optypes=None): "parameterizations": (list), "fusion_parallelization_threshold": (int, np.integer), "target_gpus": (list), + "runtime_parameter_bind_enable": (bool, np.bool_), } diff --git a/qiskit_aer/backends/aer_simulator.py b/qiskit_aer/backends/aer_simulator.py index d34cf1ef27..f845ecd6f0 100644 --- a/qiskit_aer/backends/aer_simulator.py +++ b/qiskit_aer/backends/aer_simulator.py @@ -318,6 +318,12 @@ class AerSimulator(AerBackend): * ``accept_distributed_results`` (bool): This option enables storing results independently in each process (Default: None). + * ``runtime_parameter_bind_enable`` (bool): If this option is True + parameters are bound at runtime by using multi-shots without constructing + circuits for each parameters. For GPU this option can be used with + ``batched_shots_gpu`` to run with multiple parameters in a batch. + (Default: False). + These backend options only apply when using the ``"statevector"`` simulation method: @@ -765,6 +771,8 @@ def _default_options(cls): # tensor network options tensor_network_num_sampling_qubits=10, use_cuTensorNet_autotuning=False, + # parameter binding + runtime_parameter_bind_enable=False, ) def __repr__(self): diff --git a/qiskit_aer/backends/wrappers/aer_controller_binding.hpp b/qiskit_aer/backends/wrappers/aer_controller_binding.hpp index 67e057c74f..f614e4483d 100644 --- a/qiskit_aer/backends/wrappers/aer_controller_binding.hpp +++ b/qiskit_aer/backends/wrappers/aer_controller_binding.hpp @@ -412,6 +412,14 @@ void bind_aer_controller(MODULE m) { "target_gpus", [](const Config &config) { return config.target_gpus.val; }, [](Config &config, reg_t val) { config.target_gpus.value(val); }); + aer_config.def_property( + "runtime_parameter_bind_enable", + [](const Config &config) { + return config.runtime_parameter_bind_enable.val; + }, + [](Config &config, bool val) { + config.runtime_parameter_bind_enable.value(val); + }); aer_config.def(py::pickle( [](const AER::Config &config) { @@ -500,11 +508,12 @@ void bind_aer_controller(MODULE m) { 79, config.extended_stabilizer_norm_estimation_default_samples), write_value(80, config.shot_branching_enable), write_value(81, config.shot_branching_sampling_enable), - write_value(82, config.target_gpus)); + write_value(82, config.target_gpus), + write_value(83, config.runtime_parameter_bind_enable)); }, [](py::tuple t) { AER::Config config; - if (t.size() != 82) + if (t.size() != 84) throw std::runtime_error("Invalid serialization format."); read_value(t, 0, config.shots); @@ -594,6 +603,7 @@ void bind_aer_controller(MODULE m) { read_value(t, 80, config.shot_branching_enable); read_value(t, 81, config.shot_branching_sampling_enable); read_value(t, 82, config.target_gpus); + read_value(t, 83, config.runtime_parameter_bind_enable); return config; })); } diff --git a/releasenotes/notes/add_executor-ba4870f86ed5d8ec.yaml b/releasenotes/notes/add_executor-ba4870f86ed5d8ec.yaml new file mode 100644 index 0000000000..e1088061fa --- /dev/null +++ b/releasenotes/notes/add_executor-ba4870f86ed5d8ec.yaml @@ -0,0 +1,30 @@ +--- +features: + - | + This release restructures ``State`` classes. + Adding circuit executor classes that runs a circuit and manages multiple + states for multi-shots simulations or multi-chunk simulations for large + number of qubits. + Previously ``StateChunk`` class manages multiple chunks for multi-shots or + multi-chunk simulations but now ``State`` class only has one state + and all the parallelization codes are moved to ``Executor`` classes. + Now all ``State`` classes are independent from parallelization. + Also some of the functions in ``Aer::Controller`` class are moved to + ``CircuitExecutor::Executor`` class. + - | + Shot-branching technique that accelerates dynamic circuits simulations + is implemented with restructured ``Executor`` classes. + Shot-branching is currently applicable to statevector, density_matrix + and tensor_network methods. + Shot-branching provides dynamic distribution of multi-shots + by branching states when applying dynamic operations + (measure, reset, initialize, noises) + By default ``shot_branching_enable`` is disabled. + And by setting ``shot_branching_sampling_enable``, final measures will be + done by sampling measure that will speed up to get counts for multiple shots + sharing the same state. + - | + New option for GPU simulation ``target_gpus`` is added. + A list of GPUs used for the simulation can be set by this option. + Without this option, all the available GPUs are used. + For example, if there is 4 GPUs, ``target_gpus=[0, 2]`` will use 2 GPUs. diff --git a/releasenotes/notes/runtime_parameter_binding-d2c57255f02729a1.yaml b/releasenotes/notes/runtime_parameter_binding-d2c57255f02729a1.yaml new file mode 100644 index 0000000000..04573f93f7 --- /dev/null +++ b/releasenotes/notes/runtime_parameter_binding-d2c57255f02729a1.yaml @@ -0,0 +1,8 @@ +--- +features: + - | + A runtime parameter binding option is implemented to bind paramters at + runtime to a single circuit instead running multiple circuits as input. + An option ``runtime_parameter_bind_enable=True`` enables this feature and + for GPU, ``batched_shots_gpu=True`` should be also set to speed up + simulating parameterized circuit. diff --git a/src/controllers/aer_controller.hpp b/src/controllers/aer_controller.hpp index e6005b9a62..d216b4ff9e 100755 --- a/src/controllers/aer_controller.hpp +++ b/src/controllers/aer_controller.hpp @@ -194,6 +194,9 @@ class Controller { int myrank_ = 0; int num_processes_ = 1; int num_process_per_experiment_ = 1; + + // runtime parameter binding + bool runtime_parameter_bind_ = false; }; //========================================================================= @@ -329,6 +332,10 @@ void Controller::set_config(const Config &config) { throw std::runtime_error(std::string("Invalid simulation precision (") + precision + std::string(").")); } + + // check if runtime binding is enable + if (config.runtime_parameter_bind_enable.has_value()) + runtime_parameter_bind_ = config.runtime_parameter_bind_enable.value(); } void Controller::clear_config() { @@ -502,7 +509,14 @@ Result Controller::execute(std::vector> &circuits, auto methods = simulation_methods(config, circuits, noise_model); // Initialize Result object for the given number of experiments - Result result(circuits.size()); + uint_t result_size; + reg_t result_offset(circuits.size()); + result_size = 0; + for (int_t i = 0; i < circuits.size(); i++) { + result_offset[i] = result_size; + result_size += circuits[i]->num_bind_params; + } + Result result(result_size); // Initialize circuit executors for each circuit std::vector> executors( circuits.size()); @@ -514,12 +528,15 @@ Result Controller::execute(std::vector> &circuits, // set parallelization for experiments try { + uint_t res_pos = 0; for (int i = 0; i < circuits.size(); i++) { executors[i] = make_circuit_executor(methods[i]); required_memory_mb_list[i] = executors[i]->required_memory_mb(config, *circuits[i], noise_model); - result.results[i].metadata.add(required_memory_mb_list[i], - "required_memory_mb"); + for (int j = 0; j < circuits[i]->num_bind_params; j++) { + result.results[res_pos++].metadata.add(required_memory_mb_list[i], + "required_memory_mb"); + } } set_parallelization_experiments(required_memory_mb_list); } catch (std::exception &e) { @@ -565,33 +582,40 @@ Result Controller::execute(std::vector> &circuits, // average random seed to set the same seed to each process (when // seed_simulator is not set) if (num_processes_ > 1) { - reg_t seeds(circuits.size()); - reg_t avg_seeds(circuits.size()); - for (int_t i = 0; i < circuits.size(); i++) - seeds[i] = circuits[i]->seed; - MPI_Allreduce(seeds.data(), avg_seeds.data(), circuits.size(), - MPI_UINT64_T, MPI_SUM, MPI_COMM_WORLD); - for (int_t i = 0; i < circuits.size(); i++) - circuits[i]->seed = avg_seeds[i] / num_processes_; - } -#endif - - const int NUM_RESULTS = result.results.size(); - // following looks very similar but we have to separate them to avoid omp - // nested loops that causes performance degradation (DO NOT use if statement - // in #pragma omp) - if (parallel_experiments_ == 1) { - for (int i = 0; i < NUM_RESULTS; i++) { - executors[i]->run_circuit(*circuits[i], noise_model, config, methods[i], - sim_device_, result.results[i]); + reg_t seeds(result_size); + reg_t avg_seeds(result_size); + int_t iseed = 0; + for (int_t i = 0; i < circuits.size(); i++) { + if (circuits[i]->num_bind_params > 1) { + for (int_t j = 0; i < circuits[i]->num_bind_params; i++) + seeds[iseed++] = circuits[i]->seed_for_params[j]; + } else + seeds[iseed++] = circuits[i]->seed; } - } else { -#pragma omp parallel for num_threads(parallel_experiments_) - for (int i = 0; i < NUM_RESULTS; i++) { - executors[i]->run_circuit(*circuits[i], noise_model, config, methods[i], - sim_device_, result.results[i]); + MPI_Allreduce(seeds.data(), avg_seeds.data(), result_size, MPI_UINT64_T, + MPI_SUM, MPI_COMM_WORLD); + iseed = 0; + for (int_t i = 0; i < circuits.size(); i++) { + if (circuits[i]->num_bind_params > 1) { + for (int_t j = 0; i < circuits[i]->num_bind_params; i++) + circuits[i]->seed_for_params[j] = + avg_seeds[iseed++] / num_processes_; + } else + circuits[i]->seed = avg_seeds[iseed++] / num_processes_; } } +#endif + + auto run_circuits = [this, &executors, &circuits, &noise_model, &config, + &methods, &result, &result_offset](int_t i) { + executors[i]->run_circuit(*circuits[i], noise_model, config, methods[i], + sim_device_, + result.results.begin() + result_offset[i]); + }; + Utils::apply_omp_parallel_for((parallel_experiments_ > 1), 0, + circuits.size(), run_circuits, + parallel_experiments_); + executors.clear(); // Check each experiment result for completed status. @@ -599,7 +623,7 @@ Result Controller::execute(std::vector> &circuits, bool all_failed = true; result.status = Result::Status::completed; - for (int i = 0; i < NUM_RESULTS; ++i) { + for (int i = 0; i < result.results.size(); ++i) { auto &experiment = result.results[i]; if (experiment.status == ExperimentResult::Status::completed) { all_failed = false; diff --git a/src/controllers/controller_execute.hpp b/src/controllers/controller_execute.hpp index 4c2015461f..f3128a7739 100644 --- a/src/controllers/controller_execute.hpp +++ b/src/controllers/controller_execute.hpp @@ -67,7 +67,12 @@ Result controller_execute(std::vector> &input_circs, // pars = [par0, par1, ...] is a list of different parameterizations using pos_t = std::pair; using exp_params_t = std::vector>>; - std::vector param_table = config.param_table; + std::vector ¶m_table = config.param_table; + + // check if runtime binding is enable + bool runtime_parameter_bind = false; + if (config.runtime_parameter_bind_enable.has_value()) + runtime_parameter_bind = config.runtime_parameter_bind_enable.value(); // Validate parameterizations for number of circuis if (!param_table.empty() && param_table.size() != num_circs) { @@ -78,6 +83,8 @@ Result controller_execute(std::vector> &input_circs, std::vector> circs; std::vector> template_circs; + using myclock_t = std::chrono::high_resolution_clock; + auto timer_start = myclock_t::now(); try { // Load circuits for (size_t i = 0; i < num_circs; i++) { @@ -93,39 +100,41 @@ Result controller_execute(std::vector> &input_circs, circ->set_params(false); circ->set_metadata(config, truncate); // Load different parameterizations of the initial circuit - const auto circ_params = param_table[i]; + const auto &circ_params = param_table[i]; const size_t num_params = circ_params[0].second.size(); const size_t num_instr = circ->ops.size(); - for (size_t j = 0; j < num_params; j++) { + + if (runtime_parameter_bind && num_params > 1) { // Make a copy of the initial circuit auto param_circ = std::make_shared(*circ); + param_circ->num_bind_params = num_params; + for (const auto ¶ms : circ_params) { const auto instr_pos = params.first.first; const auto param_pos = params.first.second; // Validation if (instr_pos == AER::Config::GLOBAL_PHASE_POS) { // negative position is for global phase - param_circ->global_phase_angle = params.second[j]; - } else { - if (instr_pos >= num_instr) { - std::cout << "Invalid parameterization: instruction position " - "out of range: " - << instr_pos << std::endl; - throw std::invalid_argument( - R"(Invalid parameterization: instruction position out of range)"); - } - auto &op = param_circ->ops[instr_pos]; + param_circ->global_phase_for_params.resize(num_params); + for (size_t j = 0; j < num_params; j++) + param_circ->global_phase_for_params[j] = params.second[j]; + } else if (instr_pos >= num_instr) { + throw std::invalid_argument( + R"(Invalid parameterized qobj: instruction position out of range)"); + } + auto &op = param_circ->ops[instr_pos]; + if (!op.has_bind_params) { if (param_pos >= op.params.size()) { throw std::invalid_argument( - R"(Invalid parameterization: instruction param position out of range)"); - } - if (j >= params.second.size()) { - throw std::invalid_argument( - R"(Invalid parameterization: parameterization value out of range)"); + R"(Invalid parameterized qobj: instruction param position out of range)"); } - // Update the param - op.params[param_pos] = params.second[j]; + // resize parameter array + op.params.resize(op.params.size() * num_params); + op.has_bind_params = true; } + uint_t stride = op.params.size() / num_params; + for (size_t j = 0; j < num_params; j++) + op.params[param_pos + stride * j] = params.second[j]; } // Run truncation. // TODO: Truncation should be performed and parameters should be @@ -137,7 +146,53 @@ Result controller_execute(std::vector> &input_circs, param_circ->set_metadata(config, true); } circs.push_back(param_circ); - template_circs.push_back(circ); + for (size_t j = 0; j < num_params; j++) + template_circs.push_back(circ); + } else { + for (size_t j = 0; j < num_params; j++) { + // Make a copy of the initial circuit + auto param_circ = std::make_shared(*circ); + for (const auto ¶ms : circ_params) { + const auto instr_pos = params.first.first; + const auto param_pos = params.first.second; + // Validation + if (instr_pos == AER::Config::GLOBAL_PHASE_POS) { + // negative position is for global phase + circ->global_phase_angle = params.second[j]; + } else { + if (instr_pos >= num_instr) { + std::cout << "Invalid parameterization: instruction position " + "out of range: " + << instr_pos << std::endl; + throw std::invalid_argument( + R"(Invalid parameterization: instruction position out of range)"); + } + auto &op = param_circ->ops[instr_pos]; + if (param_pos >= op.params.size()) { + throw std::invalid_argument( + R"(Invalid parameterization: instruction param position out of range)"); + } + if (j >= params.second.size()) { + throw std::invalid_argument( + R"(Invalid parameterization: parameterization value out of range)"); + } + // Update the param + op.params[param_pos] = params.second[j]; + } + } + // Run truncation. + // TODO: Truncation should be performed and parameters should be + // resolved after it. However, parameters are associated with + // indices of instructions, which can be changed in truncation. + // Therefore, current implementation performs truncation for each + // parameter set. + if (truncate) { + param_circ->set_params(true); + param_circ->set_metadata(config, true); + } + circs.push_back(param_circ); + template_circs.push_back(circ); + } } } } @@ -148,7 +203,6 @@ Result controller_execute(std::vector> &input_circs, result.message = std::string("Failed to load circuits: ") + e.what(); return result; } - int_t seed = -1; uint_t seed_shift = 0; @@ -157,10 +211,23 @@ Result controller_execute(std::vector> &input_circs, else seed = circs[0]->seed; - for (auto &circ : circs) { - circ->seed = seed + seed_shift; - seed_shift += 2113; + if (runtime_parameter_bind) { + for (auto &circ : circs) { + circ->seed = seed + seed_shift; + circ->seed_for_params.resize(circ->num_bind_params); + for (int_t i = 0; i < circ->num_bind_params; i++) { + circ->seed_for_params[i] = seed + seed_shift; + seed_shift += 2113; + } + } + } else { + for (auto &circ : circs) { + circ->seed = seed + seed_shift; + seed_shift += 2113; + } } + auto time_taken = + std::chrono::duration(myclock_t::now() - timer_start).count(); // Fix for MacOS and OpenMP library double initialization crash. // Issue: https://github.com/Qiskit/qiskit-aer/issues/1 @@ -170,6 +237,7 @@ Result controller_execute(std::vector> &input_circs, for (size_t i = 0; i < ret.results.size(); ++i) ret.results[i].circ_id = template_circs[i]->circ_id; + ret.metadata.add(time_taken, "time_taken_parameter_binding"); return ret; } diff --git a/src/framework/circuit.hpp b/src/framework/circuit.hpp index bc7645d694..a21a7fbd8c 100644 --- a/src/framework/circuit.hpp +++ b/src/framework/circuit.hpp @@ -63,6 +63,11 @@ class Circuit { double global_phase_angle = 0; bool remapped_qubits = false; // True if qubits have been remapped + // for runtime parameter bind, number of parameters per circuit + uint_t num_bind_params = 1; + reg_t seed_for_params; // random seed for each parameter + rvector_t global_phase_for_params; // global phase angles for each param + // Constructor // The constructor automatically calculates the num_qubits, num_memory, // num_registers parameters by scanning the input list of ops. diff --git a/src/framework/config.hpp b/src/framework/config.hpp index 60a5d7c313..1074f7acdf 100644 --- a/src/framework/config.hpp +++ b/src/framework/config.hpp @@ -171,6 +171,7 @@ struct Config { optional memory_blocking_bits; optional extended_stabilizer_norm_estimation_default_samples; optional target_gpus; + optional runtime_parameter_bind_enable; void clear() { shots = 1024; @@ -270,7 +271,9 @@ struct Config { unitary_parallel_threshold.clear(); memory_blocking_bits.clear(); extended_stabilizer_norm_estimation_default_samples.clear(); + target_gpus.clear(); + runtime_parameter_bind_enable.clear(); } void merge(const Config &other) { @@ -412,8 +415,12 @@ struct Config { if (other.extended_stabilizer_norm_estimation_default_samples.has_value()) extended_stabilizer_norm_estimation_default_samples.value( other.extended_stabilizer_norm_estimation_default_samples.value()); + if (other.target_gpus.has_value()) target_gpus.value(other.target_gpus.value()); + if (other.runtime_parameter_bind_enable.has_value()) + runtime_parameter_bind_enable.value( + other.runtime_parameter_bind_enable.value()); } }; @@ -529,6 +536,8 @@ inline void from_json(const json_t &js, Config &config) { get_value(config.extended_stabilizer_norm_estimation_default_samples, "extended_stabilizer_norm_estimation_default_samples", js); get_value(config.target_gpus, "target_gpus", js); + get_value(config.runtime_parameter_bind_enable, + "runtime_parameter_bind_enable", js); } } // namespace AER diff --git a/src/framework/operations.hpp b/src/framework/operations.hpp index 4ec55757ff..335528de59 100755 --- a/src/framework/operations.hpp +++ b/src/framework/operations.hpp @@ -308,6 +308,9 @@ struct Op { // Save DataSubType save_type = DataSubType::single; + + // runtime parameter bind + bool has_bind_params = false; }; inline std::ostream &operator<<(std::ostream &s, const Op &op) { @@ -940,6 +943,30 @@ inline Op make_qerror_loc(const reg_t &qubits, const std::string &label, return op; } +// make new op by parameter binding +inline Op bind_parameter(const Op &src, const uint_t iparam, + const uint_t num_params) { + Op op; + op.type = src.type; + op.name = src.name; + op.qubits = src.qubits; + op.conditional = src.conditional; + op.conditional_reg = src.conditional_reg; + + if (src.params.size() > 0) { + uint_t stride = src.params.size() / num_params; + op.params.resize(stride); + for (int_t i = 0; i < stride; i++) + op.params[i] = src.params[iparam * stride + i]; + } else if (src.mats.size() > 0) { + uint_t stride = src.mats.size() / num_params; + op.mats.resize(stride); + for (int_t i = 0; i < stride; i++) + op.mats[i] = src.mats[iparam * stride + i]; + } + return op; +} + //------------------------------------------------------------------------------ // JSON conversion //------------------------------------------------------------------------------ diff --git a/src/framework/results/data/metadata.hpp b/src/framework/results/data/metadata.hpp index cf7cb39bb1..789906b903 100644 --- a/src/framework/results/data/metadata.hpp +++ b/src/framework/results/data/metadata.hpp @@ -64,6 +64,8 @@ struct Metadata : public DataMap, // Combine stored data Metadata &combine(Metadata &&other); + + Metadata ©(Metadata &other); }; //------------------------------------------------------------------------------ @@ -77,6 +79,13 @@ Metadata &Metadata::combine(Metadata &&other) { return *this; } +Metadata &Metadata::copy(Metadata &other) { + DataMap::copy(other); + DataMap::copy(other); + DataMap::copy(other); + return *this; +} + json_t Metadata::to_json() { json_t result = json_t::object(); DataMap::add_to_json(result); diff --git a/src/framework/results/data/subtypes/data_map.hpp b/src/framework/results/data/subtypes/data_map.hpp index 8c942ae0ac..2d46bd19f9 100644 --- a/src/framework/results/data/subtypes/data_map.hpp +++ b/src/framework/results/data/subtypes/data_map.hpp @@ -43,6 +43,9 @@ class DataMap { // Combine with another data object void combine(DataMap &&other); + // copy from another data onject + void copy(DataMap &other); + // Clear all stored data void clear(); @@ -75,6 +78,9 @@ class DataMap { // Combine with another data object void combine(DataMap &&other); + // copy from another data onject + void copy(DataMap &other); + // Clear all stored data void clear(); @@ -128,6 +134,22 @@ void DataMap::combine(DataMap &&other) { } } +template