diff --git a/benchmarks/dot/dot.f90 b/benchmarks/dot/dot.f90 index d95aea2..4342ca6 100644 --- a/benchmarks/dot/dot.f90 +++ b/benchmarks/dot/dot.f90 @@ -1,7 +1,7 @@ program benchmark_dot use kinds - use fordot + use fordot, only: fdot_product => dot_product ! to avoid overloading use fast_math, only: fprod, fprod_kahan use forbenchmark @@ -19,7 +19,7 @@ program benchmark_dot allocate(seed_array(seed_size)) seed_array = 123456789 - call bench%init(7,'Benchmark dot_product','benchmarks/dot/results/dot', 10000) + call bench%init(8,'Benchmark dot_product','benchmarks/dot/results/dot', 10000) num_elements = [1000_ik, 10000_ik, 100000_ik, 1000000_ik] @@ -48,9 +48,9 @@ program benchmark_dot !=============================================================================== - call bench%start_benchmark(2,'m1', "a = dot_product(u,v,'m1')",[p]) + call bench%start_benchmark(2,'blas', "a = dot_product(u,v,'m2')",[p]) do nl = 1,bench%nloops - a = dot_product(u,v,'m1') + a = fdot_product(u,v,'m2') call prevent_optimization(a,nl) ! loop-invariant end do call bench%stop_benchmark(cmp_gflops) @@ -58,9 +58,9 @@ program benchmark_dot !=============================================================================== - call bench%start_benchmark(3,'m2', "a = dot_product(u,v,'m2')",[p]) + call bench%start_benchmark(3,'m1_b16', "a = f(u,v,'m1',16)",[p]) do nl = 1,bench%nloops - a = dot_product(u,v,'m2') + a = fdot_product(u,v,'m1',16) call prevent_optimization(a,nl) ! loop-invariant end do call bench%stop_benchmark(cmp_gflops) @@ -68,9 +68,9 @@ program benchmark_dot !=============================================================================== - call bench%start_benchmark(4,'m3', "a = dot_product(u,v,'m3')",[p]) + call bench%start_benchmark(4,'blas_b16', "a = dot_product(u,v,'m2',16)",[p]) do nl = 1,bench%nloops - a = dot_product(u,v,'m3') + a = fdot_product(u,v,'m2',16) call prevent_optimization(a,nl) ! loop-invariant end do call bench%stop_benchmark(cmp_gflops) @@ -78,9 +78,9 @@ program benchmark_dot !=============================================================================== - call bench%start_benchmark(5,'m4', "a = dot_product(u,v,'m4')",[p]) + call bench%start_benchmark(5,'m3_b16', "a = dot_product(u,v,'m3',16)",[p]) do nl = 1,bench%nloops - a = dot_product(u,v,'m4') + a = fdot_product(u,v,'m3',16) call prevent_optimization(a,nl) ! loop-invariant end do call bench%stop_benchmark(cmp_gflops) @@ -88,7 +88,17 @@ program benchmark_dot !=============================================================================== - call bench%start_benchmark(6,'chunks', "a = fprod(u,v)",[p]) + call bench%start_benchmark(6,'m4_b16', "a = dot_product(u,v,'m4',16)",[p]) + do nl = 1,bench%nloops + a = fdot_product(u,v,'m4',16) + call prevent_optimization(a,nl) ! loop-invariant + end do + call bench%stop_benchmark(cmp_gflops) + !=============================================================================== + + + !=============================================================================== + call bench%start_benchmark(7,'chunks', "a = fprod(u,v)",[p]) do nl = 1,bench%nloops a = fprod(u,v) call prevent_optimization(a,nl) ! loop-invariant @@ -98,7 +108,7 @@ program benchmark_dot !=============================================================================== - call bench%start_benchmark(7,'kahan', "a = fprod_kahan(u,v)",[p]) + call bench%start_benchmark(8,'kahan', "a = fprod_kahan(u,v)",[p]) do nl = 1,bench%nloops a = fprod_kahan(u,v) call prevent_optimization(a,nl) ! loop-invariant @@ -128,7 +138,7 @@ end function cmp_gflops subroutine prevent_optimization(a, nl) real(rk), intent(in) :: a integer, intent(in) :: nl - if (a == 0.0_rk) print*, nl, 'a = 0.0' + if (abs(a) dot_product ! to avoid overloading use forbenchmark implicit none @@ -21,7 +21,7 @@ program benchmark_dot_coarray allocate(seed_array(seed_size)) seed_array = 123456789 - call bench%init(4, 'Fordot_product','benchmarks/dot/results/dot', 1000) + call bench%init(6, 'Fordot_product','benchmarks/dot/results/dot', 1000) num_elements = [1000_ik, 10000_ik, 100000_ik, 1000000_ik] @@ -52,7 +52,7 @@ program benchmark_dot_coarray !=============================================================================== call bench%start_benchmark(2,'m1_co', "a = dot_product(u,v,'m1',coarray=.true.)",[p]) do nl = 1,bench%nloops - a = dot_product(u,v,'m1',coarray=.true.) + a = fdot_product(u,v,'m1',coarray=.true.) call prevent_optimization(a,nl) ! loop-invariant end do call bench%stop_benchmark(cmp_gflops) @@ -62,7 +62,7 @@ program benchmark_dot_coarray !=============================================================================== call bench%start_benchmark(3,'m2_co', "a = dot_product(u,v,'m2',coarray=.true.)",[p]) do nl = 1,bench%nloops - a = dot_product(u,v,'m2',coarray=.true.) + a = fdot_product(u,v,'m2',coarray=.true.) call prevent_optimization(a,nl) ! loop-invariant end do call bench%stop_benchmark(cmp_gflops) @@ -72,7 +72,7 @@ program benchmark_dot_coarray !=============================================================================== call bench%start_benchmark(4,'m3_co', "a = dot_product(u,v,'m3',coarray=.true.)",[p]) do nl = 1,bench%nloops - a = dot_product(u,v,'m3',coarray=.true.) + a = fdot_product(u,v,'m3',coarray=.true.) call prevent_optimization(a,nl) ! loop-invariant end do call bench%stop_benchmark(cmp_gflops) diff --git a/benchmarks/dot/results/dot_gfortran.html b/benchmarks/dot/results/dot_gfortran.html index c0154b6..e9f3fdb 100644 --- a/benchmarks/dot/results/dot_gfortran.html +++ b/benchmarks/dot/results/dot_gfortran.html @@ -40,48 +40,58 @@

dot_product Benchmark

dot_product 1.000000 - 2.400000e-06 - 0.416667 + 1.600000e-06 + 0.625000 10000 1000 1000 1000 - m1 - 1.043478 - 2.300000e-06 - 0.434783 + blas + 5.333333 + 3.000000e-07 + 3.333333 10000 1000 1000 1000 - m2 - 12.000000 - 2.000000e-07 - 5.000000 + m1_b16 + 0.571429 + 2.800000e-06 + 0.357143 10000 1000 1000 1000 - m3 - 1.142857 - 2.100000e-06 - 0.476190 + blas_b16 + 0.842105 + 1.900000e-06 + 0.526316 10000 1000 1000 1000 - m4 - 1.043478 - 2.300000e-06 - 0.434783 + m3_b16 + 0.571429 + 2.800000e-06 + 0.357143 + 10000 + 1000 + 1000 + 1000 + + + m4_b16 + 0.592593 + 2.700000e-06 + 0.370370 10000 1000 1000 @@ -90,8 +100,8 @@

dot_product Benchmark

chunks 8.000000 - 3.000000e-07 - 3.333333 + 2.000000e-07 + 5.000000 10000 1000 1000 @@ -99,7 +109,7 @@

dot_product Benchmark

kahan - 6.000000 + 4.000000 4.000000e-07 2.500000 10000 @@ -110,48 +120,58 @@

dot_product Benchmark

dot_product 1.000000 - 1.750000e-05 - 0.571429 + 1.670000e-05 + 0.598802 10000 10000 10000 10000 - m1 - 1.023392 - 1.710000e-05 - 0.584795 + blas + 7.260870 + 2.300000e-06 + 4.347826 10000 10000 10000 10000 - m2 - 5.645161 - 3.100000e-06 - 3.225806 + m1_b16 + 0.970930 + 1.720000e-05 + 0.581395 + 10000 + 10000 + 10000 + 10000 + + + blas_b16 + 5.218750 + 3.200000e-06 + 3.125000 10000 10000 10000 10000 - m3 - 1.035503 - 1.690000e-05 - 0.591716 + m3_b16 + 0.976608 + 1.710000e-05 + 0.584795 10000 10000 10000 10000 - m4 - 1.041667 - 1.680000e-05 - 0.595238 + m4_b16 + 0.959770 + 1.740000e-05 + 0.574713 10000 10000 10000 @@ -159,9 +179,9 @@

dot_product Benchmark

chunks - 5.468750 - 3.200000e-06 - 3.125000 + 6.680000 + 2.500000e-06 + 4.000000 10000 10000 10000 @@ -169,9 +189,9 @@

dot_product Benchmark

kahan - 4.605263 - 3.800000e-06 - 2.631579 + 5.387097 + 3.100000e-06 + 3.225806 10000 10000 10000 @@ -180,48 +200,58 @@

dot_product Benchmark

dot_product 1.000000 - 1.680000e-04 - 0.595238 + 1.674000e-04 + 0.597372 10000 100000 100000 100000 - m1 - 0.998217 - 1.683000e-04 - 0.594177 + blas + 9.732558 + 1.720000e-05 + 5.813953 10000 100000 100000 100000 - m2 - 12.824427 - 1.310000e-05 - 7.633588 + m1_b16 + 0.989362 + 1.692000e-04 + 0.591017 10000 100000 100000 100000 - m3 - 0.993495 - 1.691000e-04 - 0.591366 + blas_b16 + 3.647059 + 4.590000e-05 + 2.178649 10000 100000 100000 100000 - m4 - 0.987654 - 1.701000e-04 - 0.587889 + m3_b16 + 0.998807 + 1.676000e-04 + 0.596659 + 10000 + 100000 + 100000 + 100000 + + + m4_b16 + 0.997616 + 1.678000e-04 + 0.595948 10000 100000 100000 @@ -229,9 +259,9 @@

dot_product Benchmark

chunks - 3.193916 - 5.260000e-05 - 1.901141 + 3.607759 + 4.640000e-05 + 2.155172 10000 100000 100000 @@ -239,9 +269,9 @@

dot_product Benchmark

kahan - 2.957746 - 5.680000e-05 - 1.760563 + 3.348000 + 5.000000e-05 + 2.000000 10000 100000 100000 @@ -250,48 +280,58 @@

dot_product Benchmark

dot_product 1.000000 - 1.716600e-03 - 0.582547 + 1.723200e-03 + 0.580316 + 10000 + 1000000 + 1000000 + 1000000 + + + blas + 8.082552 + 2.132000e-04 + 4.690432 10000 1000000 1000000 1000000 - m1 - 1.000117 - 1.716400e-03 - 0.582615 + m1_b16 + 0.998783 + 1.725300e-03 + 0.579609 10000 1000000 1000000 1000000 - m2 - 10.038596 - 1.710000e-04 - 5.847953 + blas_b16 + 6.216450 + 2.772000e-04 + 3.607504 10000 1000000 1000000 1000000 - m3 - 0.999011 - 1.718300e-03 - 0.581971 + m3_b16 + 0.998378 + 1.726000e-03 + 0.579374 10000 1000000 1000000 1000000 - m4 - 1.003566 - 1.710500e-03 - 0.584624 + m4_b16 + 0.999246 + 1.724500e-03 + 0.579878 10000 1000000 1000000 @@ -299,9 +339,9 @@

dot_product Benchmark

chunks - 2.615572 - 6.563000e-04 - 1.523693 + 2.691238 + 6.403000e-04 + 1.561768 10000 1000000 1000000 @@ -309,9 +349,9 @@

dot_product Benchmark

kahan - 2.418087 - 7.099000e-04 - 1.408649 + 2.487657 + 6.927000e-04 + 1.443626 10000 1000000 1000000 diff --git a/benchmarks/dot/results/dot_gfortran_perf.png b/benchmarks/dot/results/dot_gfortran_perf.png index d3832c8..bf334be 100644 Binary files a/benchmarks/dot/results/dot_gfortran_perf.png and b/benchmarks/dot/results/dot_gfortran_perf.png differ diff --git a/benchmarks/dot/results/dot_gfortran_speedup.png b/benchmarks/dot/results/dot_gfortran_speedup.png index edb05a0..a6be2b9 100644 Binary files a/benchmarks/dot/results/dot_gfortran_speedup.png and b/benchmarks/dot/results/dot_gfortran_speedup.png differ diff --git a/benchmarks/dot/results/dot_gfortran_speedup_avg.png b/benchmarks/dot/results/dot_gfortran_speedup_avg.png index 546d07d..6df0599 100644 Binary files a/benchmarks/dot/results/dot_gfortran_speedup_avg.png and b/benchmarks/dot/results/dot_gfortran_speedup_avg.png differ diff --git a/benchmarks/dot/results/dot_gfortran_time.png b/benchmarks/dot/results/dot_gfortran_time.png index 7915ecb..f35431d 100644 Binary files a/benchmarks/dot/results/dot_gfortran_time.png and b/benchmarks/dot/results/dot_gfortran_time.png differ diff --git a/benchmarks/dot/results/dot_ifort.html b/benchmarks/dot/results/dot_ifort.html index 2be7264..58c5f66 100644 --- a/benchmarks/dot/results/dot_ifort.html +++ b/benchmarks/dot/results/dot_ifort.html @@ -48,40 +48,50 @@

dot_product Benchmark

1000 - m1 - 0.614545 - 2.750000e-06 - 0.363636 + blas + 6.259259 + 2.700000e-07 + 3.703704 10000 1000 1000 1000 - m2 - 6.500000 - 2.600000e-07 - 3.846154 + m1_b16 + 0.710084 + 2.380000e-06 + 0.420168 10000 1000 1000 1000 - m3 - 0.612319 - 2.760000e-06 - 0.362319 + blas_b16 + 0.880208 + 1.920000e-06 + 0.520833 10000 1000 1000 1000 - m4 - 0.866667 - 1.950000e-06 - 0.512821 + m3_b16 + 0.590909 + 2.860000e-06 + 0.349650 + 10000 + 1000 + 1000 + 1000 + + + m4_b16 + 0.637736 + 2.650000e-06 + 0.377358 10000 1000 1000 @@ -89,9 +99,9 @@

dot_product Benchmark

chunks - 9.388889 - 1.800000e-07 - 5.555556 + 9.941176 + 1.700000e-07 + 5.882353 10000 1000 1000 @@ -99,9 +109,9 @@

dot_product Benchmark

kahan - 3.673913 - 4.600000e-07 - 2.173913 + 3.595745 + 4.700000e-07 + 2.127660 10000 1000 1000 @@ -110,48 +120,58 @@

dot_product Benchmark

dot_product 1.000000 - 1.677000e-05 - 0.596303 + 1.674000e-05 + 0.597372 + 10000 + 10000 + 10000 + 10000 + + + blas + 5.978571 + 2.800000e-06 + 3.571429 10000 10000 10000 10000 - m1 - 0.997620 - 1.681000e-05 - 0.594884 + m1_b16 + 0.996429 + 1.680000e-05 + 0.595238 10000 10000 10000 10000 - m2 - 6.142857 - 2.730000e-06 - 3.663004 + blas_b16 + 4.755682 + 3.520000e-06 + 2.840909 10000 10000 10000 10000 - m3 - 0.995252 - 1.685000e-05 - 0.593472 + m3_b16 + 0.959862 + 1.744000e-05 + 0.573394 10000 10000 10000 10000 - m4 - 0.994072 - 1.687000e-05 - 0.592768 + m4_b16 + 0.958214 + 1.747000e-05 + 0.572410 10000 10000 10000 @@ -159,9 +179,9 @@

dot_product Benchmark

chunks - 6.576471 - 2.550000e-06 - 3.921569 + 6.389313 + 2.620000e-06 + 3.816794 10000 10000 10000 @@ -169,9 +189,9 @@

dot_product Benchmark

kahan - 3.701987 - 4.530000e-06 - 2.207506 + 3.623377 + 4.620000e-06 + 2.164502 10000 10000 10000 @@ -180,48 +200,58 @@

dot_product Benchmark

dot_product 1.000000 - 1.673300e-04 - 0.597621 + 1.674000e-04 + 0.597372 10000 100000 100000 100000 - m1 - 0.999164 - 1.674700e-04 - 0.597122 + blas + 12.381657 + 1.352000e-05 + 7.396450 10000 100000 100000 100000 - m2 - 14.639545 - 1.143000e-05 - 8.748906 + m1_b16 + 0.993295 + 1.685300e-04 + 0.593366 10000 100000 100000 100000 - m3 - 0.998032 - 1.676600e-04 - 0.596445 + blas_b16 + 3.674276 + 4.556000e-05 + 2.194908 10000 100000 100000 100000 - m4 - 0.998985 - 1.675000e-04 - 0.597015 + m3_b16 + 0.991002 + 1.689200e-04 + 0.591996 + 10000 + 100000 + 100000 + 100000 + + + m4_b16 + 0.992353 + 1.686900e-04 + 0.592803 10000 100000 100000 @@ -229,9 +259,9 @@

dot_product Benchmark

chunks - 3.329288 - 5.026000e-05 - 1.989654 + 3.688037 + 4.539000e-05 + 2.203128 10000 100000 100000 @@ -239,9 +269,9 @@

dot_product Benchmark

kahan - 2.520789 - 6.638000e-05 - 1.506478 + 2.799331 + 5.980000e-05 + 1.672241 10000 100000 100000 @@ -250,48 +280,58 @@

dot_product Benchmark

dot_product 1.000000 - 1.730490e-03 - 0.577871 + 1.747730e-03 + 0.572171 + 10000 + 1000000 + 1000000 + 1000000 + + + blas + 8.625228 + 2.026300e-04 + 4.935103 10000 1000000 1000000 1000000 - m1 - 1.000844 - 1.729030e-03 - 0.578359 + m1_b16 + 0.987262 + 1.770280e-03 + 0.564882 10000 1000000 1000000 1000000 - m2 - 9.674027 - 1.788800e-04 - 5.590340 + blas_b16 + 5.937390 + 2.943600e-04 + 3.397201 10000 1000000 1000000 1000000 - m3 - 0.999059 - 1.732120e-03 - 0.577327 + m3_b16 + 0.981904 + 1.779940e-03 + 0.561817 10000 1000000 1000000 1000000 - m4 - 0.996160 - 1.737160e-03 - 0.575652 + m4_b16 + 0.986593 + 1.771480e-03 + 0.564500 10000 1000000 1000000 @@ -299,9 +339,9 @@

dot_product Benchmark

chunks - 2.802685 - 6.174400e-04 - 1.619591 + 2.796815 + 6.249000e-04 + 1.600256 10000 1000000 1000000 @@ -309,9 +349,9 @@

dot_product Benchmark

kahan - 2.338216 - 7.400900e-04 - 1.351187 + 2.260561 + 7.731400e-04 + 1.293427 10000 1000000 1000000 diff --git a/benchmarks/dot/results/dot_ifort_co.html b/benchmarks/dot/results/dot_ifort_co.html index 5335b37..b3d6fe2 100644 --- a/benchmarks/dot/results/dot_ifort_co.html +++ b/benchmarks/dot/results/dot_ifort_co.html @@ -42,10 +42,10 @@

dot_product Benchmark

dot_product 1.000000 - 0.000003 0.000002 0.000002 - 8.906157 + 0.000002 + 9.349845 1000 1000 1000 @@ -53,11 +53,11 @@

dot_product Benchmark

m1_co - 0.000324 - 0.008962 - 0.008962 - 0.008962 - 0.001785 + 0.000329 + 0.005775 + 0.005775 + 0.005775 + 0.002770 1000 1000 1000 @@ -65,11 +65,11 @@

dot_product Benchmark

m2_co - 0.000346 - 0.008380 - 0.008380 - 0.008380 - 0.001909 + 0.000273 + 0.006964 + 0.006964 + 0.006964 + 0.002297 1000 1000 1000 @@ -77,11 +77,11 @@

dot_product Benchmark

m3_co - 0.000327 - 0.008864 - 0.008864 - 0.008864 - 0.001805 + 0.000223 + 0.008538 + 0.008538 + 0.008538 + 0.001874 1000 1000 1000 @@ -90,10 +90,10 @@

dot_product Benchmark

dot_product 1.000000 - 0.000030 + 0.000035 0.000017 - 0.000018 - 8.917131 + 0.000020 + 8.565535 1000 10000 10000 @@ -101,11 +101,11 @@

dot_product Benchmark

m1_co - 0.003719 - 0.008121 - 0.008120 - 0.008121 - 0.019703 + 0.005151 + 0.006833 + 0.006833 + 0.006833 + 0.023416 1000 10000 10000 @@ -113,11 +113,11 @@

dot_product Benchmark

m2_co - 0.003765 - 0.008021 - 0.008021 - 0.008021 - 0.019947 + 0.005102 + 0.006899 + 0.006895 + 0.006897 + 0.023197 1000 10000 10000 @@ -125,11 +125,11 @@

dot_product Benchmark

m3_co - 0.003371 - 0.008959 - 0.008959 - 0.008959 - 0.017860 + 0.005644 + 0.006236 + 0.006236 + 0.006236 + 0.025656 1000 10000 10000 @@ -138,10 +138,10 @@

dot_product Benchmark

dot_product 1.000000 - 0.000467 - 0.000225 - 0.000359 - 4.655775 + 0.000447 + 0.000222 + 0.000335 + 4.950822 1000 100000 100000 @@ -149,11 +149,11 @@

dot_product Benchmark

m1_co - 0.056323 - 0.008292 - 0.008291 - 0.008291 - 0.192969 + 0.071489 + 0.006248 + 0.006248 + 0.006248 + 0.256062 1000 100000 100000 @@ -161,11 +161,11 @@

dot_product Benchmark

m2_co - 0.050779 - 0.009197 - 0.009197 - 0.009197 - 0.173975 + 0.044525 + 0.010033 + 0.010032 + 0.010033 + 0.159481 1000 100000 100000 @@ -173,11 +173,11 @@

dot_product Benchmark

m3_co - 0.055709 - 0.008383 - 0.008383 - 0.008383 - 0.190867 + 0.066330 + 0.006735 + 0.006734 + 0.006734 + 0.237583 1000 100000 100000 @@ -186,10 +186,10 @@

dot_product Benchmark

dot_product 1.000000 - 0.009344 - 0.009065 - 0.009204 - 1.738573 + 0.008509 + 0.008337 + 0.008413 + 1.901813 1000 1000000 1000000 @@ -197,11 +197,11 @@

dot_product Benchmark

m1_co - 0.678207 - 0.013777 - 0.013777 - 0.013777 - 1.161314 + 0.827831 + 0.010278 + 0.010278 + 0.010278 + 1.556690 1000 1000000 1000000 @@ -209,11 +209,11 @@

dot_product Benchmark

m2_co - 0.668718 - 0.013973 - 0.013973 - 0.013973 - 1.145068 + 0.543472 + 0.015656 + 0.015656 + 0.015656 + 1.021959 1000 1000000 1000000 @@ -221,11 +221,11 @@

dot_product Benchmark

m3_co - 0.706626 - 0.013223 - 0.013223 - 0.013223 - 1.209977 + 0.607708 + 0.014001 + 0.014001 + 0.014001 + 1.142752 1000 1000000 1000000 diff --git a/benchmarks/dot/results/dot_ifort_co_perf_tot.png b/benchmarks/dot/results/dot_ifort_co_perf_tot.png index 10aec84..55329fe 100644 Binary files a/benchmarks/dot/results/dot_ifort_co_perf_tot.png and b/benchmarks/dot/results/dot_ifort_co_perf_tot.png differ diff --git a/benchmarks/dot/results/dot_ifort_co_speedup.png b/benchmarks/dot/results/dot_ifort_co_speedup.png index 0b721ee..edea9b8 100644 Binary files a/benchmarks/dot/results/dot_ifort_co_speedup.png and b/benchmarks/dot/results/dot_ifort_co_speedup.png differ diff --git a/benchmarks/dot/results/dot_ifort_co_speedup_max_avg.png b/benchmarks/dot/results/dot_ifort_co_speedup_max_avg.png index 5094cc0..f2457d2 100644 Binary files a/benchmarks/dot/results/dot_ifort_co_speedup_max_avg.png and b/benchmarks/dot/results/dot_ifort_co_speedup_max_avg.png differ diff --git a/benchmarks/dot/results/dot_ifort_co_time_avg.png b/benchmarks/dot/results/dot_ifort_co_time_avg.png index b15b6d3..355915a 100644 Binary files a/benchmarks/dot/results/dot_ifort_co_time_avg.png and b/benchmarks/dot/results/dot_ifort_co_time_avg.png differ diff --git a/benchmarks/dot/results/dot_ifort_co_time_max.png b/benchmarks/dot/results/dot_ifort_co_time_max.png index 19ff1bf..f4830f8 100644 Binary files a/benchmarks/dot/results/dot_ifort_co_time_max.png and b/benchmarks/dot/results/dot_ifort_co_time_max.png differ diff --git a/benchmarks/dot/results/dot_ifort_perf.png b/benchmarks/dot/results/dot_ifort_perf.png index 4e512ce..eb0cf7e 100644 Binary files a/benchmarks/dot/results/dot_ifort_perf.png and b/benchmarks/dot/results/dot_ifort_perf.png differ diff --git a/benchmarks/dot/results/dot_ifort_speedup.png b/benchmarks/dot/results/dot_ifort_speedup.png index 37a7400..9075eaa 100644 Binary files a/benchmarks/dot/results/dot_ifort_speedup.png and b/benchmarks/dot/results/dot_ifort_speedup.png differ diff --git a/benchmarks/dot/results/dot_ifort_speedup_avg.png b/benchmarks/dot/results/dot_ifort_speedup_avg.png index 2041e4a..be1d17d 100644 Binary files a/benchmarks/dot/results/dot_ifort_speedup_avg.png and b/benchmarks/dot/results/dot_ifort_speedup_avg.png differ diff --git a/benchmarks/dot/results/dot_ifort_time.png b/benchmarks/dot/results/dot_ifort_time.png index 9f2442c..63b3cc9 100644 Binary files a/benchmarks/dot/results/dot_ifort_time.png and b/benchmarks/dot/results/dot_ifort_time.png differ diff --git a/benchmarks/dot/results/dot_ifx.html b/benchmarks/dot/results/dot_ifx.html index 674fd41..5e0d5f9 100644 --- a/benchmarks/dot/results/dot_ifx.html +++ b/benchmarks/dot/results/dot_ifx.html @@ -40,48 +40,58 @@

dot_product Benchmark

dot_product 1.000000 - 1.680000e-06 - 0.595238 + 1.690000e-06 + 0.591716 10000 1000 1000 1000 - m1 - 0.641221 - 2.620000e-06 - 0.381679 + blas + 6.500000 + 2.600000e-07 + 3.846154 + 10000 + 1000 + 1000 + 1000 + + + m1_b16 + 0.051368 + 3.290000e-05 + 0.030395 10000 1000 1000 1000 - m2 - 6.222222 - 2.700000e-07 - 3.703704 + blas_b16 + 0.200000 + 8.450000e-06 + 0.118343 10000 1000 1000 1000 - m3 - 0.501493 - 3.350000e-06 - 0.298507 + m3_b16 + 0.207872 + 8.130000e-06 + 0.123001 10000 1000 1000 1000 - m4 - 0.807692 - 2.080000e-06 - 0.480769 + m4_b16 + 0.113499 + 1.489000e-05 + 0.067159 10000 1000 1000 @@ -89,9 +99,9 @@

dot_product Benchmark

chunks - 1.826087 - 9.200000e-07 - 1.086957 + 1.362903 + 1.240000e-06 + 0.806452 10000 1000 1000 @@ -99,9 +109,9 @@

dot_product Benchmark

kahan - 1.166667 - 1.440000e-06 - 0.694444 + 0.734783 + 2.300000e-06 + 0.434783 10000 1000 1000 @@ -110,48 +120,58 @@

dot_product Benchmark

dot_product 1.000000 - 1.729000e-05 - 0.578369 + 1.717000e-05 + 0.582411 10000 10000 10000 10000 - m1 - 1.029780 - 1.679000e-05 - 0.595593 + blas + 6.553435 + 2.620000e-06 + 3.816794 10000 10000 10000 10000 - m2 - 5.523962 - 3.130000e-06 - 3.194888 + m1_b16 + 1.558076 + 1.102000e-05 + 0.907441 10000 10000 10000 10000 - m3 - 1.024289 - 1.688000e-05 - 0.592417 + blas_b16 + 1.980392 + 8.670000e-06 + 1.153403 10000 10000 10000 10000 - m4 - 1.029167 - 1.680000e-05 - 0.595238 + m3_b16 + 1.385795 + 1.239000e-05 + 0.807103 + 10000 + 10000 + 10000 + 10000 + + + m4_b16 + 1.630579 + 1.053000e-05 + 0.949668 10000 10000 10000 @@ -159,9 +179,9 @@

dot_product Benchmark

chunks - 1.996536 - 8.660000e-06 - 1.154734 + 1.430833 + 1.200000e-05 + 0.833333 10000 10000 10000 @@ -169,9 +189,9 @@

dot_product Benchmark

kahan - 1.262044 - 1.370000e-05 - 0.729927 + 1.036836 + 1.656000e-05 + 0.603865 10000 10000 10000 @@ -180,48 +200,58 @@

dot_product Benchmark

dot_product 1.000000 - 1.672400e-04 - 0.597943 + 1.673900e-04 + 0.597407 10000 100000 100000 100000 - m1 - 0.999522 - 1.673200e-04 - 0.597657 + blas + 15.121048 + 1.107000e-05 + 9.033424 10000 100000 100000 100000 - m2 - 14.892253 - 1.123000e-05 - 8.904720 + m1_b16 + 5.320725 + 3.146000e-05 + 3.178640 10000 100000 100000 100000 - m3 - 0.998031 - 1.675700e-04 - 0.596766 + blas_b16 + 13.016330 + 1.286000e-05 + 7.776050 10000 100000 100000 100000 - m4 - 0.999104 - 1.673900e-04 - 0.597407 + m3_b16 + 8.908462 + 1.879000e-05 + 5.321980 + 10000 + 100000 + 100000 + 100000 + + + m4_b16 + 8.814639 + 1.899000e-05 + 5.265929 10000 100000 100000 @@ -229,9 +259,9 @@

dot_product Benchmark

chunks - 1.830560 - 9.136000e-05 - 1.094571 + 1.756638 + 9.529000e-05 + 1.049428 10000 100000 100000 @@ -239,9 +269,9 @@

dot_product Benchmark

kahan - 1.217088 - 1.374100e-04 - 0.727749 + 1.213499 + 1.379400e-04 + 0.724953 10000 100000 100000 @@ -250,48 +280,58 @@

dot_product Benchmark

dot_product 1.000000 - 1.733590e-03 - 0.576838 + 1.729100e-03 + 0.578336 + 10000 + 1000000 + 1000000 + 1000000 + + + blas + 8.900957 + 1.942600e-04 + 5.147740 10000 1000000 1000000 1000000 - m1 - 1.001826 - 1.730430e-03 - 0.577891 + m1_b16 + 11.585260 + 1.492500e-04 + 6.700168 10000 1000000 1000000 1000000 - m2 - 9.999366 - 1.733700e-04 - 5.768011 + blas_b16 + 3.498998 + 4.941700e-04 + 2.023595 10000 1000000 1000000 1000000 - m3 - 1.000543 - 1.732650e-03 - 0.577151 + m3_b16 + 11.294663 + 1.530900e-04 + 6.532105 10000 1000000 1000000 1000000 - m4 - 0.999130 - 1.735100e-03 - 0.576336 + m4_b16 + 11.542724 + 1.498000e-04 + 6.675567 10000 1000000 1000000 @@ -299,9 +339,9 @@

dot_product Benchmark

chunks - 1.751349 - 9.898600e-04 - 1.010244 + 1.716876 + 1.007120e-03 + 0.992930 10000 1000000 1000000 @@ -309,9 +349,9 @@

dot_product Benchmark

kahan - 1.167409 - 1.484990e-03 - 0.673405 + 1.165672 + 1.483350e-03 + 0.674150 10000 1000000 1000000 diff --git a/benchmarks/dot/results/dot_ifx_co.html b/benchmarks/dot/results/dot_ifx_co.html index c069e00..c970e90 100644 --- a/benchmarks/dot/results/dot_ifx_co.html +++ b/benchmarks/dot/results/dot_ifx_co.html @@ -45,7 +45,7 @@

dot_product Benchmark

0.000002 0.000002 0.000002 - 9.180036 + 9.284486 1000 1000 1000 @@ -53,11 +53,11 @@

dot_product Benchmark

m1_co - 0.000241 - 0.009144 - 0.009144 - 0.009144 - 0.001750 + 0.000325 + 0.005838 + 0.005838 + 0.005838 + 0.002741 1000 1000 1000 @@ -65,11 +65,11 @@

dot_product Benchmark

m2_co - 0.000259 - 0.008494 - 0.008493 - 0.008494 - 0.001884 + 0.000297 + 0.006401 + 0.006401 + 0.006401 + 0.002500 1000 1000 1000 @@ -77,11 +77,11 @@

dot_product Benchmark

m3_co - 0.000233 - 0.009444 - 0.009444 - 0.009444 - 0.001694 + 0.000273 + 0.006963 + 0.006963 + 0.006963 + 0.002298 1000 1000 1000 @@ -90,10 +90,10 @@

dot_product Benchmark

dot_product 1.000000 - 0.000029 + 0.000033 0.000017 0.000018 - 8.917157 + 8.891551 1000 10000 10000 @@ -101,11 +101,11 @@

dot_product Benchmark

m1_co - 0.003570 - 0.008207 - 0.008207 - 0.008207 - 0.019495 + 0.004790 + 0.006911 + 0.006911 + 0.006911 + 0.023153 1000 10000 10000 @@ -113,11 +113,11 @@

dot_product Benchmark

m2_co - 0.003295 - 0.008891 - 0.008891 - 0.008891 - 0.017996 + 0.003659 + 0.009046 + 0.009046 + 0.009046 + 0.017688 1000 10000 10000 @@ -125,11 +125,11 @@

dot_product Benchmark

m3_co - 0.003533 - 0.008292 - 0.008292 - 0.008292 - 0.019295 + 0.004636 + 0.007140 + 0.007140 + 0.007140 + 0.022410 1000 10000 10000 @@ -138,10 +138,10 @@

dot_product Benchmark

dot_product 1.000000 - 0.000448 - 0.000226 - 0.000368 - 4.584413 + 0.000437 + 0.000250 + 0.000361 + 4.530980 1000 100000 100000 @@ -149,11 +149,11 @@

dot_product Benchmark

m1_co - 0.054181 - 0.008270 - 0.008270 - 0.008270 - 0.193461 + 0.070626 + 0.006183 + 0.006183 + 0.006183 + 0.258762 1000 100000 100000 @@ -161,11 +161,11 @@

dot_product Benchmark

m2_co - 0.047967 - 0.009342 - 0.009342 - 0.009342 - 0.171273 + 0.070399 + 0.006203 + 0.006203 + 0.006203 + 0.257932 1000 100000 100000 @@ -173,11 +173,11 @@

dot_product Benchmark

m3_co - 0.052354 - 0.008559 - 0.008559 - 0.008559 - 0.186936 + 0.062081 + 0.007034 + 0.007034 + 0.007034 + 0.227456 1000 100000 100000 @@ -186,10 +186,10 @@

dot_product Benchmark

dot_product 1.000000 - 0.009598 - 0.009041 - 0.009318 - 1.717448 + 0.008663 + 0.008174 + 0.008402 + 1.904804 1000 1000000 1000000 @@ -197,11 +197,11 @@

dot_product Benchmark

m1_co - 0.701147 - 0.013689 - 0.013689 - 0.013689 - 1.168822 + 0.711372 + 0.012178 + 0.012178 + 0.012178 + 1.313888 1000 1000000 1000000 @@ -209,11 +209,11 @@

dot_product Benchmark

m2_co - 0.651640 - 0.014729 - 0.014729 - 0.014729 - 1.086292 + 0.796191 + 0.010880 + 0.010876 + 0.010878 + 1.470868 1000 1000000 1000000 @@ -221,11 +221,11 @@

dot_product Benchmark

m3_co - 0.650201 - 0.014762 - 0.014761 - 0.014762 - 1.083896 + 0.577058 + 0.015012 + 0.015012 + 0.015012 + 1.065814 1000 1000000 1000000 diff --git a/benchmarks/dot/results/dot_ifx_co_perf_tot.png b/benchmarks/dot/results/dot_ifx_co_perf_tot.png index 5c2b995..dc5f724 100644 Binary files a/benchmarks/dot/results/dot_ifx_co_perf_tot.png and b/benchmarks/dot/results/dot_ifx_co_perf_tot.png differ diff --git a/benchmarks/dot/results/dot_ifx_co_speedup.png b/benchmarks/dot/results/dot_ifx_co_speedup.png index 07f23bf..a054ed6 100644 Binary files a/benchmarks/dot/results/dot_ifx_co_speedup.png and b/benchmarks/dot/results/dot_ifx_co_speedup.png differ diff --git a/benchmarks/dot/results/dot_ifx_co_speedup_max_avg.png b/benchmarks/dot/results/dot_ifx_co_speedup_max_avg.png index 7eb7a43..19fc7fc 100644 Binary files a/benchmarks/dot/results/dot_ifx_co_speedup_max_avg.png and b/benchmarks/dot/results/dot_ifx_co_speedup_max_avg.png differ diff --git a/benchmarks/dot/results/dot_ifx_co_time_avg.png b/benchmarks/dot/results/dot_ifx_co_time_avg.png index 798b906..815b7e4 100644 Binary files a/benchmarks/dot/results/dot_ifx_co_time_avg.png and b/benchmarks/dot/results/dot_ifx_co_time_avg.png differ diff --git a/benchmarks/dot/results/dot_ifx_co_time_max.png b/benchmarks/dot/results/dot_ifx_co_time_max.png index 48278f6..3a2bda8 100644 Binary files a/benchmarks/dot/results/dot_ifx_co_time_max.png and b/benchmarks/dot/results/dot_ifx_co_time_max.png differ diff --git a/benchmarks/dot/results/dot_ifx_perf.png b/benchmarks/dot/results/dot_ifx_perf.png index 52fadf6..392abfe 100644 Binary files a/benchmarks/dot/results/dot_ifx_perf.png and b/benchmarks/dot/results/dot_ifx_perf.png differ diff --git a/benchmarks/dot/results/dot_ifx_speedup.png b/benchmarks/dot/results/dot_ifx_speedup.png index 5c068e2..8c18cd8 100644 Binary files a/benchmarks/dot/results/dot_ifx_speedup.png and b/benchmarks/dot/results/dot_ifx_speedup.png differ diff --git a/benchmarks/dot/results/dot_ifx_speedup_avg.png b/benchmarks/dot/results/dot_ifx_speedup_avg.png index 2b8790e..5137fdd 100644 Binary files a/benchmarks/dot/results/dot_ifx_speedup_avg.png and b/benchmarks/dot/results/dot_ifx_speedup_avg.png differ diff --git a/benchmarks/dot/results/dot_ifx_time.png b/benchmarks/dot/results/dot_ifx_time.png index 9855f9e..db3982c 100644 Binary files a/benchmarks/dot/results/dot_ifx_time.png and b/benchmarks/dot/results/dot_ifx_time.png differ diff --git a/benchmarks/dot/results/dot_nvfortran.html b/benchmarks/dot/results/dot_nvfortran.html index b278183..ccd3690 100644 --- a/benchmarks/dot/results/dot_nvfortran.html +++ b/benchmarks/dot/results/dot_nvfortran.html @@ -40,48 +40,58 @@

dot_product Benchmark

dot_product 1.000000 - 2.136000e-07 - 4.681648 + 2.133000e-07 + 4.688233 10000 1000 1000 1000 - m1 - 0.962596 - 2.219000e-07 - 4.506534 + blas + 0.854225 + 2.497000e-07 + 4.004806 10000 1000 1000 1000 - m2 - 0.844603 - 2.529000e-07 - 3.954132 + m1_b16 + 0.161677 + 1.319300e-06 + 0.757978 10000 1000 1000 1000 - m3 - 0.900126 - 2.373000e-07 - 4.214075 + blas_b16 + 0.120454 + 1.770800e-06 + 0.564717 10000 1000 1000 1000 - m4 - 0.881552 - 2.423000e-07 - 4.127115 + m3_b16 + 0.131895 + 1.617200e-06 + 0.618353 + 10000 + 1000 + 1000 + 1000 + + + m4_b16 + 0.125618 + 1.698000e-06 + 0.588928 10000 1000 1000 @@ -89,9 +99,9 @@

dot_product Benchmark

chunks - 0.752643 - 2.838000e-07 - 3.523608 + 0.755847 + 2.822000e-07 + 3.543586 10000 1000 1000 @@ -99,9 +109,9 @@

dot_product Benchmark

kahan - 0.384796 - 5.551000e-07 - 1.801477 + 0.390159 + 5.467000e-07 + 1.829157 10000 1000 1000 @@ -110,48 +120,58 @@

dot_product Benchmark

dot_product 1.000000 - 2.449800e-06 - 4.081966 + 2.623500e-06 + 3.811702 + 10000 + 10000 + 10000 + 10000 + + + blas + 0.980198 + 2.676500e-06 + 3.736223 10000 10000 10000 10000 - m1 - 0.991380 - 2.471100e-06 - 4.046781 + m1_b16 + 0.632779 + 4.146000e-06 + 2.411963 10000 10000 10000 10000 - m2 - 0.980783 - 2.497800e-06 - 4.003523 + blas_b16 + 0.578220 + 4.537200e-06 + 2.204002 10000 10000 10000 10000 - m3 - 0.977613 - 2.505900e-06 - 3.990582 + m3_b16 + 0.601072 + 4.364700e-06 + 2.291108 10000 10000 10000 10000 - m4 - 0.973727 - 2.515900e-06 - 3.974721 + m4_b16 + 0.590958 + 4.439400e-06 + 2.252557 10000 10000 10000 @@ -159,9 +179,9 @@

dot_product Benchmark

chunks - 0.557457 - 4.394600e-06 - 2.275520 + 0.574774 + 4.564400e-06 + 2.190868 10000 10000 10000 @@ -169,9 +189,9 @@

dot_product Benchmark

kahan - 0.441971 - 5.542900e-06 - 1.804110 + 0.455952 + 5.753900e-06 + 1.737952 10000 10000 10000 @@ -180,48 +200,58 @@

dot_product Benchmark

dot_product 1.000000 - 4.245560e-05 - 2.355402 + 4.319610e-05 + 2.315024 10000 100000 100000 100000 - m1 - 1.006646 - 4.217530e-05 - 2.371056 + blas + 1.378156 + 3.134340e-05 + 3.190464 10000 100000 100000 100000 - m2 - 3.816268 - 1.112490e-05 - 8.988845 + m1_b16 + 0.982489 + 4.396600e-05 + 2.274485 10000 100000 100000 100000 - m3 - 1.005133 - 4.223880e-05 - 2.367492 + blas_b16 + 0.972519 + 4.441670e-05 + 2.251405 10000 100000 100000 100000 - m4 - 1.000754 - 4.242360e-05 - 2.357179 + m3_b16 + 0.978518 + 4.414440e-05 + 2.265293 + 10000 + 100000 + 100000 + 100000 + + + m4_b16 + 0.975412 + 4.428500e-05 + 2.258101 10000 100000 100000 @@ -229,9 +259,9 @@

dot_product Benchmark

chunks - 0.898362 - 4.725890e-05 - 2.116004 + 0.926702 + 4.661270e-05 + 2.145338 10000 100000 100000 @@ -239,9 +269,9 @@

dot_product Benchmark

kahan - 0.657020 - 6.461840e-05 - 1.547547 + 0.661962 + 6.525470e-05 + 1.532457 10000 100000 100000 @@ -250,48 +280,58 @@

dot_product Benchmark

dot_product 1.000000 - 5.856337e-04 - 1.707552 + 5.770094e-04 + 1.733074 + 10000 + 1000000 + 1000000 + 1000000 + + + blas + 1.568794 + 3.678045e-04 + 2.718836 10000 1000000 1000000 1000000 - m1 - 1.024743 - 5.714932e-04 - 1.749802 + m1_b16 + 0.962880 + 5.992540e-04 + 1.668741 10000 1000000 1000000 1000000 - m2 - 3.353743 - 1.746209e-04 - 5.726691 + blas_b16 + 1.843979 + 3.129154e-04 + 3.195752 10000 1000000 1000000 1000000 - m3 - 1.022575 - 5.727051e-04 - 1.746099 + m3_b16 + 0.950563 + 6.070184e-04 + 1.647397 10000 1000000 1000000 1000000 - m4 - 1.019593 - 5.743798e-04 - 1.741008 + m4_b16 + 0.920440 + 6.268845e-04 + 1.595190 10000 1000000 1000000 @@ -299,9 +339,9 @@

dot_product Benchmark

chunks - 0.903414 - 6.482454e-04 - 1.542626 + 0.830726 + 6.945842e-04 + 1.439710 10000 1000000 1000000 @@ -309,9 +349,9 @@

dot_product Benchmark

kahan - 0.762443 - 7.681020e-04 - 1.301910 + 0.663607 + 8.695051e-04 + 1.150080 10000 1000000 1000000 diff --git a/benchmarks/dot/results/dot_nvfortran_perf.png b/benchmarks/dot/results/dot_nvfortran_perf.png index 880945f..cccbf91 100644 Binary files a/benchmarks/dot/results/dot_nvfortran_perf.png and b/benchmarks/dot/results/dot_nvfortran_perf.png differ diff --git a/benchmarks/dot/results/dot_nvfortran_speedup.png b/benchmarks/dot/results/dot_nvfortran_speedup.png index 1f125f6..1579fa4 100644 Binary files a/benchmarks/dot/results/dot_nvfortran_speedup.png and b/benchmarks/dot/results/dot_nvfortran_speedup.png differ diff --git a/benchmarks/dot/results/dot_nvfortran_speedup_avg.png b/benchmarks/dot/results/dot_nvfortran_speedup_avg.png index fad0e90..a2d6403 100644 Binary files a/benchmarks/dot/results/dot_nvfortran_speedup_avg.png and b/benchmarks/dot/results/dot_nvfortran_speedup_avg.png differ diff --git a/benchmarks/dot/results/dot_nvfortran_time.png b/benchmarks/dot/results/dot_nvfortran_time.png index 055d4b7..61a38e1 100644 Binary files a/benchmarks/dot/results/dot_nvfortran_time.png and b/benchmarks/dot/results/dot_nvfortran_time.png differ diff --git a/benchmarks/dot/results/export.py b/benchmarks/dot/results/export.py index 0ec24e7..1c628d0 100644 --- a/benchmarks/dot/results/export.py +++ b/benchmarks/dot/results/export.py @@ -36,7 +36,7 @@ title='dot_product Benchmark - Speedup', xlabel='Number of Elements', ylabel='Speedup [-]', - bar_width=0.12) + bar_width=0.10) fpd.plot_speedup_avg(file_path, benchmark_data, x_data, diff --git a/benchmarks/dot/test/test_dot.f90 b/benchmarks/dot/test/test_dot.f90 index 20a412e..9b7a61e 100644 --- a/benchmarks/dot/test/test_dot.f90 +++ b/benchmarks/dot/test/test_dot.f90 @@ -1,7 +1,7 @@ program test_dot use kinds - use fordot + use fordot, only: fdot_product => dot_product use fast_math, only: fprod, fprod_kahan use forunittest @@ -12,22 +12,24 @@ program test_dot integer :: m type(unit_test) :: ut + m = 300 + allocate(u(m),v(m)) call random_number(u) call random_number(v) a_ref = dot_product(u,v) - a = dot_product(u,v, option='m1') + a = fdot_product(u,v, option='m1') call ut%check(a, a_ref, tol=1e-12_rk, msg="dot_product m1") - a = dot_product(u,v, option='m2') + a = fdot_product(u,v, option='m2') call ut%check(a, a_ref, tol=1e-12_rk, msg="dot_product m2") - a = dot_product(u,v, option='m3') + a = fdot_product(u,v, option='m3') call ut%check(a, a_ref, tol=1e-12_rk, msg="dot_product m3") - a = dot_product(u,v, option='m4') + a = fdot_product(u,v, option='m4') call ut%check(a, a_ref, tol=1e-12_rk, msg="dot_product m4") a = fprod(u,v) @@ -36,4 +38,16 @@ program test_dot a = fprod_kahan(u,v) call ut%check(a, a_ref, tol=1e-12_rk, msg="fprod_kahan") + a = fdot_product(u,v, option='m1',nblock=16) + call ut%check(a, a_ref, tol=1e-12_rk, msg="dot_product m1b") + + a = fdot_product(u,v, option='m2',nblock=16) + call ut%check(a, a_ref, tol=1e-12_rk, msg="dot_product m2b") + + a = fdot_product(u,v, option='m3',nblock=16) + call ut%check(a, a_ref, tol=1e-12_rk, msg="dot_product m3b") + + a = fdot_product(u,v, option='m4',nblock=16) + call ut%check(a, a_ref, tol=1e-12_rk, msg="dot_product m4b") + end program test_dot \ No newline at end of file diff --git a/fpm.rsp b/fpm.rsp index cd566ff..2c57110 100644 --- a/fpm.rsp +++ b/fpm.rsp @@ -3,7 +3,7 @@ options run --example options --target dot options --profile release options --compiler ifx -options --flag "-O3 -mtune=native -xHost -lblas -qopenmp -flto=full -DINT64" +options --flag "-O3 -mtune=native -xHost -lblas -qopenmp -flto=full -DINT64 -DUSE_DO_CONCURRENT" @benchmark-dot-ifort options run --example @@ -31,7 +31,7 @@ options run --example options --target dot_co options --profile release options --compiler ifx -options --flag "-O3 -mtune=native -xHost -lblas -qopenmp -DINT64 -flto=full -coarray -DUSE_COARRAY" +options --flag "-O3 -mtune=native -xHost -lblas -qopenmp -DINT64 -flto=full -coarray -DUSE_COARRAY -DUSE_DO_CONCURRENT" @benchmark-dot-ifort-coarray options run --example @@ -48,7 +48,7 @@ options test options --target test_dot options --profile release options --compiler ifx -options --flag "-O3 -mtune=native -xHost -lblas -qopenmp -flto=full -DINT64" +options --flag "-O3 -mtune=native -xHost -lblas -qopenmp -flto=full -DINT64 -DUSE_DO_CONCURRENT" @benchmark-dot-test-ifort options test