Skip to content

Commit

Permalink
Merge pull request #3843 from Mousius/switch-ratio
Browse files Browse the repository at this point in the history
Propagate SWITCH_RATIO to DYNAMIC_ARCH builds
  • Loading branch information
martin-frbg authored Apr 19, 2023
2 parents c628030 + 32f2faf commit 437c0bf
Show file tree
Hide file tree
Showing 7 changed files with 46 additions and 28 deletions.
2 changes: 2 additions & 0 deletions common_param.h
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
/*********************************************************************/
/* Copyright 2009, 2010 The University of Texas at Austin. */
/* Copyright 2023 The OpenBLAS Project. */
/* All rights reserved. */
/* */
/* Redistribution and use in source and binary forms, with or */
Expand Down Expand Up @@ -45,6 +46,7 @@

typedef struct {
int dtb_entries;
int switch_ratio;
int offsetA, offsetB, align;

#if BUILD_BFLOAT16 == 1
Expand Down
15 changes: 9 additions & 6 deletions driver/level3/level3_gemm3m_thread.c
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
/*********************************************************************/
/* Copyright 2009, 2010 The University of Texas at Austin. */
/* Copyright 2023 The OpenBLAS Project. */
/* All rights reserved. */
/* */
/* Redistribution and use in source and binary forms, with or */
Expand Down Expand Up @@ -44,10 +45,6 @@
#define DIVIDE_RATE 2
#endif

#ifndef SWITCH_RATIO
#define SWITCH_RATIO 2
#endif

//The array of job_t may overflow the stack.
//Instead, use malloc to alloc job_t.
#if MAX_CPU_NUMBER > BLAS3_MEM_ALLOC_THRESHOLD
Expand Down Expand Up @@ -1015,6 +1012,12 @@ int CNAME(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n, FLOAT *sa, FLO
BLASLONG divN, divT;
int mode;

#if defined(DYNAMIC_ARCH)
int switch_ratio = gotoblas->switch_ratio;
#else
int switch_ratio = SWITCH_RATIO;
#endif

if (range_m) {
BLASLONG m_from = *(((BLASLONG *)range_m) + 0);
BLASLONG m_to = *(((BLASLONG *)range_m) + 1);
Expand All @@ -1030,15 +1033,15 @@ int CNAME(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n, FLOAT *sa, FLO
}
*/

if ((args -> m < nthreads * SWITCH_RATIO) || (args -> n < nthreads * SWITCH_RATIO)) {
if ((args -> m < nthreads * switch_ratio) || (args -> n < nthreads * switch_ratio)) {
GEMM3M_LOCAL(args, range_m, range_n, sa, sb, 0);
return 0;
}

divT = nthreads;
divN = 1;

while ((GEMM3M_P * divT > m * SWITCH_RATIO) && (divT > 1)) {
while ((GEMM3M_P * divT > m * switch_ratio) && (divT > 1)) {
do {
divT --;
divN = 1;
Expand Down
13 changes: 8 additions & 5 deletions driver/level3/level3_syrk_threaded.c
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
/*********************************************************************/
/* Copyright 2009, 2010 The University of Texas at Austin. */
/* Copyright 2023 The OpenBLAS Project. */
/* All rights reserved. */
/* */
/* Redistribution and use in source and binary forms, with or */
Expand Down Expand Up @@ -44,10 +45,6 @@
#define DIVIDE_RATE 2
#endif

#ifndef SWITCH_RATIO
#define SWITCH_RATIO 2
#endif

//The array of job_t may overflow the stack.
//Instead, use malloc to alloc job_t.
#if MAX_CPU_NUMBER > BLAS3_MEM_ALLOC_THRESHOLD
Expand Down Expand Up @@ -528,7 +525,13 @@ int CNAME(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n, FLOAT *sa, FLO
int mode, mask;
double dnum, di, dinum;

if ((nthreads == 1) || (args -> n < nthreads * SWITCH_RATIO)) {
#if defined(DYNAMIC_ARCH)
int switch_ratio = gotoblas->switch_ratio;
#else
int switch_ratio = SWITCH_RATIO;
#endif

if ((nthreads == 1) || (args->n < nthreads * switch_ratio)) {
SYRK_LOCAL(args, range_m, range_n, sa, sb, 0);
return 0;
}
Expand Down
31 changes: 19 additions & 12 deletions driver/level3/level3_thread.c
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
/*********************************************************************/
/* Copyright 2009, 2010 The University of Texas at Austin. */
/* Copyright 2023 The OpenBLAS Project. */
/* All rights reserved. */
/* */
/* Redistribution and use in source and binary forms, with or */
Expand Down Expand Up @@ -44,10 +45,6 @@
#define DIVIDE_RATE 2
#endif

#ifndef SWITCH_RATIO
#define SWITCH_RATIO 2
#endif

#ifndef GEMM_PREFERED_SIZE
#define GEMM_PREFERED_SIZE 1
#endif
Expand Down Expand Up @@ -577,6 +574,11 @@ InitializeCriticalSection((PCRITICAL_SECTION)&level3_lock);
BLASLONG width, i, j, k, js;
BLASLONG m, n, n_from, n_to;
int mode;
#if defined(DYNAMIC_ARCH)
int switch_ratio = gotoblas->switch_ratio;
#else
int switch_ratio = SWITCH_RATIO;
#endif

/* Get execution mode */
#ifndef COMPLEX
Expand Down Expand Up @@ -698,8 +700,8 @@ EnterCriticalSection((PCRITICAL_SECTION)&level3_lock);
num_parts = 0;
while (n > 0){
width = blas_quickdivide(n + nthreads - num_parts - 1, nthreads - num_parts);
if (width < SWITCH_RATIO) {
width = SWITCH_RATIO;
if (width < switch_ratio) {
width = switch_ratio;
}
width = round_up(n, width, GEMM_PREFERED_SIZE);

Expand Down Expand Up @@ -746,6 +748,11 @@ int CNAME(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n, IFLOAT *sa, IF
BLASLONG m = args -> m;
BLASLONG n = args -> n;
BLASLONG nthreads_m, nthreads_n;
#if defined(DYNAMIC_ARCH)
int switch_ratio = gotoblas->switch_ratio;
#else
int switch_ratio = SWITCH_RATIO;
#endif

/* Get dimensions from index ranges if available */
if (range_m) {
Expand All @@ -755,21 +762,21 @@ int CNAME(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n, IFLOAT *sa, IF
n = range_n[1] - range_n[0];
}

/* Partitions in m should have at least SWITCH_RATIO rows */
if (m < 2 * SWITCH_RATIO) {
/* Partitions in m should have at least switch_ratio rows */
if (m < 2 * switch_ratio) {
nthreads_m = 1;
} else {
nthreads_m = args -> nthreads;
while (m < nthreads_m * SWITCH_RATIO) {
while (m < nthreads_m * switch_ratio) {
nthreads_m = nthreads_m / 2;
}
}

/* Partitions in n should have at most SWITCH_RATIO * nthreads_m columns */
if (n < SWITCH_RATIO * nthreads_m) {
/* Partitions in n should have at most switch_ratio * nthreads_m columns */
if (n < switch_ratio * nthreads_m) {
nthreads_n = 1;
} else {
nthreads_n = (n + SWITCH_RATIO * nthreads_m - 1) / (SWITCH_RATIO * nthreads_m);
nthreads_n = (n + switch_ratio * nthreads_m - 1) / (switch_ratio * nthreads_m);
if (nthreads_m * nthreads_n > args -> nthreads) {
nthreads_n = blas_quickdivide(args -> nthreads, nthreads_m);
}
Expand Down
5 changes: 4 additions & 1 deletion kernel/setparam-ref.c
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
/*********************************************************************/
/* Copyright 2009, 2010 The University of Texas at Austin. */
/* Copyright 2023 The OpenBLAS Project. */
/* All rights reserved. */
/* */
/* Redistribution and use in source and binary forms, with or */
Expand Down Expand Up @@ -49,7 +50,9 @@
static void init_parameter(void);

gotoblas_t TABLE_NAME = {
DTB_DEFAULT_ENTRIES ,
DTB_DEFAULT_ENTRIES,

SWITCH_RATIO,

GEMM_DEFAULT_OFFSET_A, GEMM_DEFAULT_OFFSET_B, GEMM_DEFAULT_ALIGN,

Expand Down
4 changes: 0 additions & 4 deletions lapack/potrf/potrf_parallel.c
Original file line number Diff line number Diff line change
Expand Up @@ -80,10 +80,6 @@ static FLOAT dm1 = -1.;
#define DIVIDE_RATE 2
#endif

#ifndef SWITCH_RATIO
#define SWITCH_RATIO 2
#endif

#ifndef LOWER
#define TRANS
#endif
Expand Down
4 changes: 4 additions & 0 deletions param.h
Original file line number Diff line number Diff line change
Expand Up @@ -3854,6 +3854,10 @@ Until then, just keep it different than DGEMM_DEFAULT_UNROLL_N to keep copy rout

#endif

#ifndef SWITCH_RATIO
#define SWITCH_RATIO 2
#endif

#ifndef QGEMM_DEFAULT_UNROLL_M
#define QGEMM_DEFAULT_UNROLL_M 2
#endif
Expand Down

0 comments on commit 437c0bf

Please sign in to comment.