Skip to content
This repository has been archived by the owner on Oct 25, 2024. It is now read-only.

Commit

Permalink
fix ebits overflow
Browse files Browse the repository at this point in the history
  • Loading branch information
zhewang1-intc committed Dec 14, 2023
1 parent 97d9c76 commit 7290d05
Show file tree
Hide file tree
Showing 2 changed files with 4 additions and 1 deletion.
Original file line number Diff line number Diff line change
Expand Up @@ -356,6 +356,7 @@ inline int jblas_dtype_get_f8_quant_mbits(const JBLAS_DTYPE t) {

inline float get_mxfp_maxnorm(const JBLAS_DTYPE t, int ebits, int mantissa_bits) {
auto emax = std::pow(2, ebits - 1);
if (t == JBLAS_DTYPE::F8_E5M2) emax -= 1;
auto max_norm = std::pow(2, emax);
if (t != JBLAS_DTYPE::F8_E4M3) {
max_norm *= ((std::pow(2, mantissa_bits - 1) - 1) / std::pow(2, mantissa_bits - 2));
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -878,7 +878,8 @@ int8_t f8_mx_quantize(float v, float shared_exp) {
*shift_v <<= 1;
uint8_t store_ebit = (*(p + 3) & 0xFF);
store_ebit = store_ebit - 127 + std::pow(2, ebits - 1) - 1;
if (store_ebit > 15) store_ebit = 0;
if (store_ebit > 15 && F8_T == JBLAS_DTYPE::F8_E4M3) store_ebit = 0;
if (store_ebit > 31 && F8_T == JBLAS_DTYPE::F8_E5M2) store_ebit = 0;
store_ebit <<= store_mantissa;
*shift_v <<= 8;
int8_t ox80_shift = -128 >> (store_mantissa - 1);
Expand All @@ -903,6 +904,7 @@ inline JBLAS_CODE quantize_f32_f8_rowblock_mxscale(const float* srcptr, int8_t*
shared_exp = std::floor(std::log2(shared_exp));
auto ebits = utils::jblas_dtype_get_f8_ebits(F8_T);
auto emax = std::pow(2, ebits - 1);
if (F8_T == JBLAS_DTYPE::F8_E5M2) emax -= 1;
shared_exp -= emax;
auto scale_max = std::pow(2, 7) - 1; // e8m0 scale type.
shared_exp = shared_exp < (-1 * scale_max) ? (-1 * scale_max) : shared_exp;
Expand Down

0 comments on commit 7290d05

Please sign in to comment.