diff --git a/common_param.h b/common_param.h index 1854570f14..8b39ca2fc4 100644 --- a/common_param.h +++ b/common_param.h @@ -1,5 +1,6 @@ /*********************************************************************/ /* Copyright 2009, 2010 The University of Texas at Austin. */ +/* Copyright 2023 The OpenBLAS Project. */ /* All rights reserved. */ /* */ /* Redistribution and use in source and binary forms, with or */ @@ -45,6 +46,7 @@ typedef struct { int dtb_entries; + int switch_ratio; int offsetA, offsetB, align; #if BUILD_BFLOAT16 == 1 diff --git a/driver/level3/level3_gemm3m_thread.c b/driver/level3/level3_gemm3m_thread.c index 39824fc5ab..26d07fa944 100644 --- a/driver/level3/level3_gemm3m_thread.c +++ b/driver/level3/level3_gemm3m_thread.c @@ -1,5 +1,6 @@ /*********************************************************************/ /* Copyright 2009, 2010 The University of Texas at Austin. */ +/* Copyright 2023 The OpenBLAS Project. */ /* All rights reserved. */ /* */ /* Redistribution and use in source and binary forms, with or */ @@ -44,10 +45,6 @@ #define DIVIDE_RATE 2 #endif -#ifndef SWITCH_RATIO -#define SWITCH_RATIO 2 -#endif - //The array of job_t may overflow the stack. //Instead, use malloc to alloc job_t. #if MAX_CPU_NUMBER > BLAS3_MEM_ALLOC_THRESHOLD @@ -1015,6 +1012,12 @@ int CNAME(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n, FLOAT *sa, FLO BLASLONG divN, divT; int mode; +#if defined(DYNAMIC_ARCH) + int switch_ratio = gotoblas->switch_ratio; +#else + int switch_ratio = SWITCH_RATIO; +#endif + if (range_m) { BLASLONG m_from = *(((BLASLONG *)range_m) + 0); BLASLONG m_to = *(((BLASLONG *)range_m) + 1); @@ -1030,7 +1033,7 @@ int CNAME(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n, FLOAT *sa, FLO } */ - if ((args -> m < nthreads * SWITCH_RATIO) || (args -> n < nthreads * SWITCH_RATIO)) { + if ((args -> m < nthreads * switch_ratio) || (args -> n < nthreads * switch_ratio)) { GEMM3M_LOCAL(args, range_m, range_n, sa, sb, 0); return 0; } @@ -1038,7 +1041,7 @@ int CNAME(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n, FLOAT *sa, FLO divT = nthreads; divN = 1; - while ((GEMM3M_P * divT > m * SWITCH_RATIO) && (divT > 1)) { + while ((GEMM3M_P * divT > m * switch_ratio) && (divT > 1)) { do { divT --; divN = 1; diff --git a/driver/level3/level3_syrk_threaded.c b/driver/level3/level3_syrk_threaded.c index d7dcd68a3b..b03577fb3d 100644 --- a/driver/level3/level3_syrk_threaded.c +++ b/driver/level3/level3_syrk_threaded.c @@ -1,5 +1,6 @@ /*********************************************************************/ /* Copyright 2009, 2010 The University of Texas at Austin. */ +/* Copyright 2023 The OpenBLAS Project. */ /* All rights reserved. */ /* */ /* Redistribution and use in source and binary forms, with or */ @@ -44,10 +45,6 @@ #define DIVIDE_RATE 2 #endif -#ifndef SWITCH_RATIO -#define SWITCH_RATIO 2 -#endif - //The array of job_t may overflow the stack. //Instead, use malloc to alloc job_t. #if MAX_CPU_NUMBER > BLAS3_MEM_ALLOC_THRESHOLD @@ -528,7 +525,13 @@ int CNAME(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n, FLOAT *sa, FLO int mode, mask; double dnum, di, dinum; - if ((nthreads == 1) || (args -> n < nthreads * SWITCH_RATIO)) { +#if defined(DYNAMIC_ARCH) + int switch_ratio = gotoblas->switch_ratio; +#else + int switch_ratio = SWITCH_RATIO; +#endif + + if ((nthreads == 1) || (args->n < nthreads * switch_ratio)) { SYRK_LOCAL(args, range_m, range_n, sa, sb, 0); return 0; } diff --git a/driver/level3/level3_thread.c b/driver/level3/level3_thread.c index 02b60b50d9..c9ecf73e8a 100644 --- a/driver/level3/level3_thread.c +++ b/driver/level3/level3_thread.c @@ -1,5 +1,6 @@ /*********************************************************************/ /* Copyright 2009, 2010 The University of Texas at Austin. */ +/* Copyright 2023 The OpenBLAS Project. */ /* All rights reserved. */ /* */ /* Redistribution and use in source and binary forms, with or */ @@ -44,10 +45,6 @@ #define DIVIDE_RATE 2 #endif -#ifndef SWITCH_RATIO -#define SWITCH_RATIO 2 -#endif - #ifndef GEMM_PREFERED_SIZE #define GEMM_PREFERED_SIZE 1 #endif @@ -577,6 +574,11 @@ InitializeCriticalSection((PCRITICAL_SECTION)&level3_lock); BLASLONG width, i, j, k, js; BLASLONG m, n, n_from, n_to; int mode; +#if defined(DYNAMIC_ARCH) + int switch_ratio = gotoblas->switch_ratio; +#else + int switch_ratio = SWITCH_RATIO; +#endif /* Get execution mode */ #ifndef COMPLEX @@ -698,8 +700,8 @@ EnterCriticalSection((PCRITICAL_SECTION)&level3_lock); num_parts = 0; while (n > 0){ width = blas_quickdivide(n + nthreads - num_parts - 1, nthreads - num_parts); - if (width < SWITCH_RATIO) { - width = SWITCH_RATIO; + if (width < switch_ratio) { + width = switch_ratio; } width = round_up(n, width, GEMM_PREFERED_SIZE); @@ -746,6 +748,11 @@ int CNAME(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n, IFLOAT *sa, IF BLASLONG m = args -> m; BLASLONG n = args -> n; BLASLONG nthreads_m, nthreads_n; +#if defined(DYNAMIC_ARCH) + int switch_ratio = gotoblas->switch_ratio; +#else + int switch_ratio = SWITCH_RATIO; +#endif /* Get dimensions from index ranges if available */ if (range_m) { @@ -755,21 +762,21 @@ int CNAME(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n, IFLOAT *sa, IF n = range_n[1] - range_n[0]; } - /* Partitions in m should have at least SWITCH_RATIO rows */ - if (m < 2 * SWITCH_RATIO) { + /* Partitions in m should have at least switch_ratio rows */ + if (m < 2 * switch_ratio) { nthreads_m = 1; } else { nthreads_m = args -> nthreads; - while (m < nthreads_m * SWITCH_RATIO) { + while (m < nthreads_m * switch_ratio) { nthreads_m = nthreads_m / 2; } } - /* Partitions in n should have at most SWITCH_RATIO * nthreads_m columns */ - if (n < SWITCH_RATIO * nthreads_m) { + /* Partitions in n should have at most switch_ratio * nthreads_m columns */ + if (n < switch_ratio * nthreads_m) { nthreads_n = 1; } else { - nthreads_n = (n + SWITCH_RATIO * nthreads_m - 1) / (SWITCH_RATIO * nthreads_m); + nthreads_n = (n + switch_ratio * nthreads_m - 1) / (switch_ratio * nthreads_m); if (nthreads_m * nthreads_n > args -> nthreads) { nthreads_n = blas_quickdivide(args -> nthreads, nthreads_m); } diff --git a/kernel/setparam-ref.c b/kernel/setparam-ref.c index 14a339e758..7832c0a878 100644 --- a/kernel/setparam-ref.c +++ b/kernel/setparam-ref.c @@ -1,5 +1,6 @@ /*********************************************************************/ /* Copyright 2009, 2010 The University of Texas at Austin. */ +/* Copyright 2023 The OpenBLAS Project. */ /* All rights reserved. */ /* */ /* Redistribution and use in source and binary forms, with or */ @@ -49,7 +50,9 @@ static void init_parameter(void); gotoblas_t TABLE_NAME = { - DTB_DEFAULT_ENTRIES , + DTB_DEFAULT_ENTRIES, + + SWITCH_RATIO, GEMM_DEFAULT_OFFSET_A, GEMM_DEFAULT_OFFSET_B, GEMM_DEFAULT_ALIGN, diff --git a/lapack/potrf/potrf_parallel.c b/lapack/potrf/potrf_parallel.c index 29364cc051..a7c28f4c22 100644 --- a/lapack/potrf/potrf_parallel.c +++ b/lapack/potrf/potrf_parallel.c @@ -80,10 +80,6 @@ static FLOAT dm1 = -1.; #define DIVIDE_RATE 2 #endif -#ifndef SWITCH_RATIO -#define SWITCH_RATIO 2 -#endif - #ifndef LOWER #define TRANS #endif diff --git a/param.h b/param.h index ae391dd3f6..191664a9fc 100644 --- a/param.h +++ b/param.h @@ -3854,6 +3854,10 @@ Until then, just keep it different than DGEMM_DEFAULT_UNROLL_N to keep copy rout #endif +#ifndef SWITCH_RATIO +#define SWITCH_RATIO 2 +#endif + #ifndef QGEMM_DEFAULT_UNROLL_M #define QGEMM_DEFAULT_UNROLL_M 2 #endif