diff --git a/CMakeLists.txt b/CMakeLists.txt index 44e3d11926..e5d7663e94 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -91,6 +91,8 @@ else() set(OQS_DEBUG_BUILD OFF) endif() +option(OQS_SPEED_USE_ARM_PMU "Use ARM Performance Monitor Unit during benchmarking" OFF) + if(WIN32) set(CMAKE_GENERATOR_CC cl) endif() diff --git a/src/oqsconfig.h.cmake b/src/oqsconfig.h.cmake index e84e2a7f15..e78659fe23 100644 --- a/src/oqsconfig.h.cmake +++ b/src/oqsconfig.h.cmake @@ -39,6 +39,8 @@ #cmakedefine OQS_USE_ARM_SHA3_INSTRUCTIONS 1 #cmakedefine OQS_USE_ARM_NEON_INSTRUCTIONS 1 +#cmakedefine OQS_SPEED_USE_ARM_PMU 1 + #cmakedefine OQS_ENABLE_TEST_CONSTANT_TIME 1 #cmakedefine OQS_ENABLE_SHA3_xkcp_low_avx2 1 diff --git a/tests/ds_benchmark.h b/tests/ds_benchmark.h index 8a2d8a32c8..94ad1e0ef1 100644 --- a/tests/ds_benchmark.h +++ b/tests/ds_benchmark.h @@ -135,6 +135,12 @@ static uint64_t _bench_rdtsc(void) { __asm__ volatile("mrc p15, 0, %0, c9, c13, 0\t\n" : "=r"(value)); return value; +#elif defined(SPEED_USE_ARM_PMU) + /* Use the Performance Monitoring Unit */ + uint64_t value; + /* Read the PMU register */ + __asm__ volatile("mrs %0, PMCCNTR_EL0" : "=r" (value)); + return value; #elif defined(__s390x__) #define USING_TIME_RATHER_THAN_CYCLES uint64_t tod; @@ -174,6 +180,20 @@ static void _bench_init_perfcounters(int32_t do_reset, int32_t enable_divider) { /* Clear overflows */ __asm__ volatile("mcr p15, 0, %0, c9, c12, 3\t\n" ::"r"(0x8000000f)); } +#elif defined(SPEED_USE_ARM_PMU) + +/* Enabling access to ARMv8's Performance Monitoring Unit + * cannot be done from user mode. A kernel module to + * enable access must be loaded. This generally will + * require superuser permissions. A module that has + * been found to work on some platforms can be found at + * https://github.com/mupq/pqax#enable-access-to-performance-counters + */ + +static void _bench_init_perfcounters(void) { + __asm__ volatile("MSR PMCR_EL0, %0" ::"r"(1)); + __asm__ volatile("MSR PMCNTENSET_EL0, %0" ::"r"(0x80000000)); +} #endif #define DEFINE_TIMER_VARIABLES \ @@ -194,6 +214,15 @@ static void _bench_init_perfcounters(int32_t do_reset, int32_t enable_divider) { _bench_time_cumulative = 0; \ _bench_time_mean = 0.0; \ _bench_time_M2 = 0.0; +#elif defined(SPEED_USE_ARM_PMU) +#define INITIALIZE_TIMER \ + _bench_init_perfcounters(); \ + _bench_iterations = 0; \ + _bench_cycles_mean = 0.0; \ + _bench_cycles_M2 = 0.0; \ + _bench_time_cumulative = 0; \ + _bench_time_mean = 0.0; \ + _bench_time_M2 = 0.0; #else #define INITIALIZE_TIMER \ _bench_iterations = 0; \ diff --git a/tests/speed_kem.c b/tests/speed_kem.c index 38b0ee6e96..e4fc6c1cbe 100644 --- a/tests/speed_kem.c +++ b/tests/speed_kem.c @@ -11,6 +11,9 @@ #if defined(USE_RASPBERRY_PI) #define _RASPBERRY_PI #endif +#if defined(OQS_SPEED_USE_ARM_PMU) +#define SPEED_USE_ARM_PMU +#endif #include "ds_benchmark.h" #include "system_info.c" diff --git a/tests/speed_sig.c b/tests/speed_sig.c index d43415c3f8..976f25ce50 100644 --- a/tests/speed_sig.c +++ b/tests/speed_sig.c @@ -11,6 +11,9 @@ #if defined(USE_RASPBERRY_PI) #define _RASPBERRY_PI #endif +#if defined(OQS_SPEED_USE_ARM_PMU) +#define SPEED_USE_ARM_PMU +#endif #include "ds_benchmark.h" #include "system_info.c" diff --git a/tests/test_aes.c b/tests/test_aes.c index 35bd54bc0a..e987664a4f 100644 --- a/tests/test_aes.c +++ b/tests/test_aes.c @@ -10,6 +10,9 @@ #if defined(USE_RASPBERRY_PI) #define _RASPBERRY_PI #endif +#if defined(OQS_SPEED_USE_ARM_PMU) +#define SPEED_USE_ARM_PMU +#endif #include "ds_benchmark.h" #include "system_info.c"