forked from google/XNNPACK
-
Notifications
You must be signed in to change notification settings - Fork 1
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Add qs8/qu8 vadd/vaddc RVV microkernel implementations and configs
- Loading branch information
1 parent
d779b27
commit a08c147
Showing
18 changed files
with
654 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,71 @@ | ||
// Copyright 2024 Imagination Technologies, inc. | ||
// | ||
// This source code is licensed under the BSD-style license found in the | ||
// LICENSE file in the root directory of this source tree. | ||
|
||
$assert DATATYPE in ["QS8", "QU8"] | ||
$assert LMUL in [1, 2, 4, 8] | ||
#include <assert.h> | ||
|
||
#include <riscv_vector.h> | ||
|
||
#include "xnnpack/vbinary.h" | ||
|
||
$XINT8_T = {"QS8": "int8_t", "QU8": "uint8_t"}[DATATYPE] | ||
|
||
void xnn_${DATATYPE.lower()}_vadd_minmax_ukernel__rvv_u${LMUL}v( | ||
size_t batch, | ||
const ${XINT8_T}* input_a, | ||
const ${XINT8_T}* input_b, | ||
${XINT8_T}* output, | ||
const struct xnn_${DATATYPE.lower()}_add_minmax_params params[restrict XNN_MIN_ELEMENTS(1)]) | ||
{ | ||
assert(batch != 0); | ||
assert(batch % sizeof(${XINT8_T}) == 0); | ||
assert(input_a != NULL); | ||
assert(input_b != NULL); | ||
assert(output != NULL); | ||
|
||
const int32_t bias = params->scalar.bias; | ||
const int32_t a_multiplier = params->scalar.a_multiplier; | ||
const int32_t b_multiplier = params->scalar.b_multiplier; | ||
const uint32_t shift = params->scalar.shift; | ||
const int32_t output_min = params->scalar.output_min; | ||
const int32_t output_max = params->scalar.output_max; | ||
const int32_t output_zero_point = params->scalar.output_zero_point; | ||
|
||
do { | ||
int32_t n = __riscv_vsetvl_e8m${LMUL}(batch); batch -= n; | ||
|
||
$if DATATYPE == "QS8": | ||
vint8m${LMUL}_t in_a_i8v = __riscv_vle8_v_i8m${LMUL}(input_a, n); input_a += n; | ||
vint8m${LMUL}_t in_b_i8v = __riscv_vle8_v_i8m${LMUL}(input_b, n); input_b += n; | ||
vint16m${LMUL*2}_t a_i16v = __riscv_vwcvt_x_x_v_i16m${LMUL*2}(in_a_i8v, n); | ||
vint16m${LMUL*2}_t b_i16v = __riscv_vwcvt_x_x_v_i16m${LMUL*2}(in_b_i8v, n); | ||
$else: | ||
vuint8m${LMUL}_t in_a_u8v = __riscv_vle8_v_u8m${LMUL}(input_a, n); input_a += n; | ||
vuint8m${LMUL}_t in_b_u8v = __riscv_vle8_v_u8m${LMUL}(input_b, n); input_b += n; | ||
vuint16m${LMUL*2}_t a_u16v = __riscv_vwcvtu_x_x_v_u16m${LMUL*2}(in_a_u8v, n); | ||
vuint16m${LMUL*2}_t b_u16v = __riscv_vwcvtu_x_x_v_u16m${LMUL*2}(in_b_u8v, n); | ||
vint16m${LMUL*2}_t a_i16v = __riscv_vreinterpret_v_u16m${LMUL*2}_i16m${LMUL*2}(a_u16v); | ||
vint16m${LMUL*2}_t b_i16v = __riscv_vreinterpret_v_u16m${LMUL*2}_i16m${LMUL*2}(b_u16v); | ||
vint32m${LMUL*4}_t a_i32v = __riscv_vwcvt_x_x_v_i32m${LMUL*4}(a_i16v, n); | ||
vint32m${LMUL*4}_t b_i32v = __riscv_vwcvt_x_x_v_i32m${LMUL*4}(b_i16v, n); | ||
a_i32v = __riscv_vmul_vx_i32m${LMUL*4}(a_i32v, a_multiplier, n); | ||
b_i32v = __riscv_vmul_vx_i32m${LMUL*4}(b_i32v, b_multiplier, n); | ||
vint32m${LMUL*4}_t acc_i32v = __riscv_vadd_vx_i32m${LMUL*4}(a_i32v, bias, n); | ||
acc_i32v = __riscv_vadd_vv_i32m${LMUL*4}(acc_i32v, b_i32v, n); | ||
vint32m${LMUL*4}_t out_i32v = __riscv_vsra_vx_i32m${LMUL*4}(acc_i32v, shift, n); | ||
out_i32v = __riscv_vadd_vx_i32m${LMUL*4}(out_i32v, output_zero_point, n); | ||
out_i32v = __riscv_vmax_vx_i32m${LMUL*4}(out_i32v, output_min, n); | ||
out_i32v = __riscv_vmin_vx_i32m${LMUL*4}(out_i32v, output_max, n); | ||
vint16m${LMUL*2}_t out_i16v = __riscv_vncvt_x_x_w_i16m${LMUL*2}(out_i32v, n); | ||
$if DATATYPE == "QS8": | ||
vint8m${LMUL}_t out_i8v = __riscv_vncvt_x_x_w_i8m${LMUL}(out_i16v, n); | ||
__riscv_vse8_v_i8m${LMUL}(output, out_i8v, n); output += n; | ||
$else: | ||
a_u16v = __riscv_vreinterpret_v_i16m${LMUL*2}_u16m${LMUL*2}(out_i16v); | ||
vuint8m${LMUL}_t out_u8v = __riscv_vncvt_x_x_w_u8m${LMUL}(a_u16v, n); | ||
__riscv_vse8_v_u8m${LMUL}(output, out_u8v, n); output += n; | ||
} while (batch != 0); | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,62 @@ | ||
// Copyright 2024 Imagination Technologies, inc. | ||
// | ||
// This source code is licensed under the BSD-style license found in the | ||
// LICENSE file in the root directory of this source tree. | ||
|
||
$assert DATATYPE in ["QS8", "QU8"] | ||
$assert LMUL in [1, 2, 4, 8] | ||
#include <assert.h> | ||
|
||
#include <riscv_vector.h> | ||
|
||
#include "xnnpack/vbinary.h" | ||
|
||
$XINT8_T = {"QS8": "int8_t", "QU8": "uint8_t"}[DATATYPE] | ||
|
||
void xnn_${DATATYPE.lower()}_vaddc_minmax_ukernel__rvv_u${LMUL}v( | ||
size_t batch, | ||
const ${XINT8_T}* input_a, | ||
const ${XINT8_T}* input_b, | ||
${XINT8_T}* output, | ||
const struct xnn_${DATATYPE.lower()}_add_minmax_params params[restrict XNN_MIN_ELEMENTS(1)]) | ||
{ | ||
assert(batch != 0); | ||
assert(batch % sizeof(${XINT8_T}) == 0); | ||
assert(input_a != NULL); | ||
assert(input_b != NULL); | ||
assert(output != NULL); | ||
|
||
const int32_t bias = params->scalar.bias + (int32_t) *input_b * params->scalar.b_multiplier; | ||
const int32_t a_multiplier = params->scalar.a_multiplier; | ||
const uint32_t shift = params->scalar.shift; | ||
const int32_t output_min = params->scalar.output_min; | ||
const int32_t output_max = params->scalar.output_max; | ||
const int32_t output_zero_point = params->scalar.output_zero_point; | ||
|
||
do { | ||
int32_t n = __riscv_vsetvl_e8m${LMUL}(batch); batch -= n; | ||
|
||
$if DATATYPE == "QS8": | ||
vint8m${LMUL}_t in_a_i8v = __riscv_vle8_v_i8m${LMUL}(input_a, n); input_a += n; | ||
vint16m${LMUL*2}_t a_i16v = __riscv_vwcvt_x_x_v_i16m${LMUL*2}(in_a_i8v, n); | ||
$else: | ||
vuint8m${LMUL}_t in_a_u8v = __riscv_vle8_v_u8m${LMUL}(input_a, n); input_a += n; | ||
vuint16m${LMUL*2}_t a_u16v = __riscv_vwcvtu_x_x_v_u16m${LMUL*2}(in_a_u8v, n); | ||
vint16m${LMUL*2}_t a_i16v = __riscv_vreinterpret_v_u16m${LMUL*2}_i16m${LMUL*2}(a_u16v); | ||
vint32m${LMUL*4}_t a_i32v = __riscv_vwcvt_x_x_v_i32m${LMUL*4}(a_i16v, n); | ||
a_i32v = __riscv_vmul_vx_i32m${LMUL*4}(a_i32v, a_multiplier, n); | ||
vint32m${LMUL*4}_t acc_i32v = __riscv_vadd_vx_i32m${LMUL*4}(a_i32v, bias, n); | ||
vint32m${LMUL*4}_t out_i32v = __riscv_vsra_vx_i32m${LMUL*4}(acc_i32v, shift, n); | ||
out_i32v = __riscv_vadd_vx_i32m${LMUL*4}(out_i32v, output_zero_point, n); | ||
out_i32v = __riscv_vmax_vx_i32m${LMUL*4}(out_i32v, output_min, n); | ||
out_i32v = __riscv_vmin_vx_i32m${LMUL*4}(out_i32v, output_max, n); | ||
vint16m${LMUL*2}_t out_i16v = __riscv_vncvt_x_x_w_i16m${LMUL*2}(out_i32v, n); | ||
$if DATATYPE == "QS8": | ||
vint8m${LMUL}_t out_i8v = __riscv_vncvt_x_x_w_i8m${LMUL}(out_i16v, n); | ||
__riscv_vse8_v_i8m${LMUL}(output, out_i8v, n); output += n; | ||
$else: | ||
a_u16v = __riscv_vreinterpret_v_i16m${LMUL*2}_u16m${LMUL*2}(out_i16v); | ||
vuint8m${LMUL}_t out_u8v = __riscv_vncvt_x_x_w_u8m${LMUL}(a_u16v, n); | ||
__riscv_vse8_v_u8m${LMUL}(output, out_u8v, n); output += n; | ||
} while (batch != 0); | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,59 @@ | ||
// Auto-generated file. Do not edit! | ||
// Template: src/qs8-vadd/rvv.c.in | ||
// Generator: tools/xngen | ||
// | ||
// Copyright 2024 Imagination Technologies, inc. | ||
// | ||
// This source code is licensed under the BSD-style license found in the | ||
// LICENSE file in the root directory of this source tree. | ||
|
||
#include <assert.h> | ||
|
||
#include <riscv_vector.h> | ||
|
||
#include "xnnpack/vbinary.h" | ||
|
||
|
||
void xnn_qs8_vadd_minmax_ukernel__rvv_u1v( | ||
size_t batch, | ||
const int8_t* input_a, | ||
const int8_t* input_b, | ||
int8_t* output, | ||
const struct xnn_qs8_add_minmax_params params[restrict XNN_MIN_ELEMENTS(1)]) | ||
{ | ||
assert(batch != 0); | ||
assert(batch % sizeof(int8_t) == 0); | ||
assert(input_a != NULL); | ||
assert(input_b != NULL); | ||
assert(output != NULL); | ||
|
||
const int32_t bias = params->scalar.bias; | ||
const int32_t a_multiplier = params->scalar.a_multiplier; | ||
const int32_t b_multiplier = params->scalar.b_multiplier; | ||
const uint32_t shift = params->scalar.shift; | ||
const int32_t output_min = params->scalar.output_min; | ||
const int32_t output_max = params->scalar.output_max; | ||
const int32_t output_zero_point = params->scalar.output_zero_point; | ||
|
||
do { | ||
int32_t n = __riscv_vsetvl_e8m1(batch); batch -= n; | ||
|
||
vint8m1_t in_a_i8v = __riscv_vle8_v_i8m1(input_a, n); input_a += n; | ||
vint8m1_t in_b_i8v = __riscv_vle8_v_i8m1(input_b, n); input_b += n; | ||
vint16m2_t a_i16v = __riscv_vwcvt_x_x_v_i16m2(in_a_i8v, n); | ||
vint16m2_t b_i16v = __riscv_vwcvt_x_x_v_i16m2(in_b_i8v, n); | ||
vint32m4_t a_i32v = __riscv_vwcvt_x_x_v_i32m4(a_i16v, n); | ||
vint32m4_t b_i32v = __riscv_vwcvt_x_x_v_i32m4(b_i16v, n); | ||
a_i32v = __riscv_vmul_vx_i32m4(a_i32v, a_multiplier, n); | ||
b_i32v = __riscv_vmul_vx_i32m4(b_i32v, b_multiplier, n); | ||
vint32m4_t acc_i32v = __riscv_vadd_vx_i32m4(a_i32v, bias, n); | ||
acc_i32v = __riscv_vadd_vv_i32m4(acc_i32v, b_i32v, n); | ||
vint32m4_t out_i32v = __riscv_vsra_vx_i32m4(acc_i32v, shift, n); | ||
out_i32v = __riscv_vadd_vx_i32m4(out_i32v, output_zero_point, n); | ||
out_i32v = __riscv_vmax_vx_i32m4(out_i32v, output_min, n); | ||
out_i32v = __riscv_vmin_vx_i32m4(out_i32v, output_max, n); | ||
vint16m2_t out_i16v = __riscv_vncvt_x_x_w_i16m2(out_i32v, n); | ||
vint8m1_t out_i8v = __riscv_vncvt_x_x_w_i8m1(out_i16v, n); | ||
__riscv_vse8_v_i8m1(output, out_i8v, n); output += n; | ||
} while (batch != 0); | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,59 @@ | ||
// Auto-generated file. Do not edit! | ||
// Template: src/qs8-vadd/rvv.c.in | ||
// Generator: tools/xngen | ||
// | ||
// Copyright 2024 Imagination Technologies, inc. | ||
// | ||
// This source code is licensed under the BSD-style license found in the | ||
// LICENSE file in the root directory of this source tree. | ||
|
||
#include <assert.h> | ||
|
||
#include <riscv_vector.h> | ||
|
||
#include "xnnpack/vbinary.h" | ||
|
||
|
||
void xnn_qs8_vadd_minmax_ukernel__rvv_u2v( | ||
size_t batch, | ||
const int8_t* input_a, | ||
const int8_t* input_b, | ||
int8_t* output, | ||
const struct xnn_qs8_add_minmax_params params[restrict XNN_MIN_ELEMENTS(1)]) | ||
{ | ||
assert(batch != 0); | ||
assert(batch % sizeof(int8_t) == 0); | ||
assert(input_a != NULL); | ||
assert(input_b != NULL); | ||
assert(output != NULL); | ||
|
||
const int32_t bias = params->scalar.bias; | ||
const int32_t a_multiplier = params->scalar.a_multiplier; | ||
const int32_t b_multiplier = params->scalar.b_multiplier; | ||
const uint32_t shift = params->scalar.shift; | ||
const int32_t output_min = params->scalar.output_min; | ||
const int32_t output_max = params->scalar.output_max; | ||
const int32_t output_zero_point = params->scalar.output_zero_point; | ||
|
||
do { | ||
int32_t n = __riscv_vsetvl_e8m2(batch); batch -= n; | ||
|
||
vint8m2_t in_a_i8v = __riscv_vle8_v_i8m2(input_a, n); input_a += n; | ||
vint8m2_t in_b_i8v = __riscv_vle8_v_i8m2(input_b, n); input_b += n; | ||
vint16m4_t a_i16v = __riscv_vwcvt_x_x_v_i16m4(in_a_i8v, n); | ||
vint16m4_t b_i16v = __riscv_vwcvt_x_x_v_i16m4(in_b_i8v, n); | ||
vint32m8_t a_i32v = __riscv_vwcvt_x_x_v_i32m8(a_i16v, n); | ||
vint32m8_t b_i32v = __riscv_vwcvt_x_x_v_i32m8(b_i16v, n); | ||
a_i32v = __riscv_vmul_vx_i32m8(a_i32v, a_multiplier, n); | ||
b_i32v = __riscv_vmul_vx_i32m8(b_i32v, b_multiplier, n); | ||
vint32m8_t acc_i32v = __riscv_vadd_vx_i32m8(a_i32v, bias, n); | ||
acc_i32v = __riscv_vadd_vv_i32m8(acc_i32v, b_i32v, n); | ||
vint32m8_t out_i32v = __riscv_vsra_vx_i32m8(acc_i32v, shift, n); | ||
out_i32v = __riscv_vadd_vx_i32m8(out_i32v, output_zero_point, n); | ||
out_i32v = __riscv_vmax_vx_i32m8(out_i32v, output_min, n); | ||
out_i32v = __riscv_vmin_vx_i32m8(out_i32v, output_max, n); | ||
vint16m4_t out_i16v = __riscv_vncvt_x_x_w_i16m4(out_i32v, n); | ||
vint8m2_t out_i8v = __riscv_vncvt_x_x_w_i8m2(out_i16v, n); | ||
__riscv_vse8_v_i8m2(output, out_i8v, n); output += n; | ||
} while (batch != 0); | ||
} |
Oops, something went wrong.