Skip to content

Commit

Permalink
Merge branch 'main' of https://github.com/libxsmm/libxsmm into develop
Browse files Browse the repository at this point in the history
  • Loading branch information
hfp committed Oct 15, 2024
2 parents 5e4ec07 + ea90b80 commit e9170d0
Show file tree
Hide file tree
Showing 27 changed files with 1,643 additions and 1,422 deletions.
1 change: 1 addition & 0 deletions samples/eltwise/Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -160,6 +160,7 @@ endif
@-rm -f $(TSTDIR)/.generate_unary_gather_scatter_test_scripts
@-rm -f $(TSTDIR)/.generate_binary_test_scripts
@-rm -f $(TSTDIR)/.generate_ternary_test_scripts
@-rm -f $(TSTDIR)/generate*gen*.sh
@-rm -f $(TSTDIR)/unary_*.sh
@-rm -f $(TSTDIR)/binary_*.sh
@-rm -f $(TSTDIR)/ternary_*.sh
Expand Down
1 change: 1 addition & 0 deletions samples/eltwise/kernel_test/.gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -10,3 +10,4 @@
binary_*.sh
unary_*.sh
ternary_*.sh
generate*gen*.sh
120 changes: 10 additions & 110 deletions samples/eltwise/kernel_test/generate_binary_test_scripts.sh
Original file line number Diff line number Diff line change
Expand Up @@ -2,117 +2,17 @@

HERE=$(cd "$(dirname "$0")" && pwd -P)

if [[ -z "${SSIZE}" ]]; then
SAMPLESIZE=10
else
SAMPLESIZE=${SSIZE}
fi

TMPFILE=$(mktemp)
trap 'rm ${TMPFILE}' EXIT
echo "#!/bin/bash" > generate_binary_test_scripts_gen_parallel.sh
echo "" >> generate_binary_test_scripts_gen_parallel.sh

for PREC in 'F32_F32_F32_F32' 'BF16_BF16_BF16_BF16' 'F32_F32_BF16_F32' 'F32_BF16_F32_F32' 'F32_BF16_BF16_F32' 'BF16_F32_F32_F32' 'BF16_F32_BF16_F32' 'BF16_BF16_F32_F32' 'BF16_BF16_BF16_F32' 'F16_F16_F16_F16' 'F32_F32_F16_F32' 'F32_F16_F32_F32' 'F32_F16_F16_F32' 'F16_F32_F32_F32' 'F16_F32_F16_F32' 'F16_F16_F32_F32' 'F16_F16_F16_F32' 'BF8_BF8_BF8_BF8' 'F32_F32_BF8_F32' 'F32_BF8_F32_F32' 'F32_BF8_BF8_F32' 'BF8_F32_F32_F32' 'BF8_F32_BF8_F32' 'BF8_BF8_F32_F32' 'BF8_BF8_BF8_F32' 'HF8_HF8_HF8_HF8' 'F32_F32_HF8_F32' 'F32_HF8_F32_F32' 'F32_HF8_HF8_F32' 'HF8_F32_F32_F32' 'HF8_F32_HF8_F32' 'HF8_HF8_F32_F32' 'HF8_HF8_HF8_F32' 'F64_F64_F64_F64' 'U16_U16_U32_IMPLICIT'; do
for TYPE in 1 2 3 4 5 6 9 10 27 28 29 30 31 32; do
for ROUND in 'rne' 'stoch'; do
for LD in 'eqld' 'gtld'; do
TPPNAME="none"
OUTNAME="${HERE}/binary_"
PRECLC=$(echo "$PREC" | awk '{print tolower($0)}')
RMODE=0
PRECSCRIPT=${PREC}

# only cpy TPP has low precision compute
if [[ (("$PREC" == 'F16_F16_F16_F16') || ("$PREC" == 'BF16_BF16_BF16_BF16') || ("$PREC" == 'BF8_BF8_BF8_BF8') || ("$PREC" == 'HF8_HF8_HF8_HF8')) ]]; then
continue
fi

# Binary zip tp blocks 2 x U16 -> U32 is only possible for 1 prec combination
if [[ ("$TYPE" == '6') && ("$PREC" != 'U16_U16_U32_IMPLICIT') ]]; then
continue
fi
if [[ ("$TYPE" != '6') && ("$PREC" == 'U16_U16_U32_IMPLICIT') ]]; then
continue
fi

# get TPP name
if [ "$TYPE" == '1' ] ; then
TPPNAME="add"
elif [ "$TYPE" == '2' ] ; then
TPPNAME="mul"
elif [ "$TYPE" == '3' ] ; then
TPPNAME="sub"
elif [ "$TYPE" == '4' ] ; then
TPPNAME="div"
elif [ "$TYPE" == '5' ] ; then
TPPNAME="muladd"
elif [ "$TYPE" == '6' ] ; then
TPPNAME="zip"
elif [ "$TYPE" == '9' ] ; then
TPPNAME="max"
elif [ "$TYPE" == '10' ] ; then
TPPNAME="min"
elif [ "$TYPE" == '27' ] ; then
TPPNAME="cmp_gt"
elif [ "$TYPE" == '28' ] ; then
TPPNAME="cmp_ge"
elif [ "$TYPE" == '29' ] ; then
TPPNAME="cmp_lt"
elif [ "$TYPE" == '30' ] ; then
TPPNAME="cmp_le"
elif [ "$TYPE" == '31' ] ; then
TPPNAME="cmp_eq"
elif [ "$TYPE" == '32' ] ; then
TPPNAME="cmp_ne"
else
continue
fi

if [[ ("$TYPE" == '27') || ("$TYPE" == '28') || ("$TYPE" == '29') || ("$TYPE" == '30') || ("$TYPE" == '31') || ("$TYPE" == '32') ]]; then
if [ "$ROUND" == 'stoch' ]; then
continue
fi
PREC_IN0=$(echo "$PRECLC" | awk -F"_" '{print $1}')
PREC_IN1=$(echo "$PRECLC" | awk -F"_" '{print $2}')
if [[ ("$PREC_IN0" == 'f64') || ("$PREC_IN1" == 'f64') ]]; then
continue
fi
PRECH_IN0=$(echo "$PREC" | awk -F"_" '{print $1}')
PRECH_IN1=$(echo "$PREC" | awk -F"_" '{print $2}')
PRECLC=${PREC_IN0}_${PREC_IN1}_implicit_f32
PRECSCRIPT=${PRECH_IN0}_${PRECH_IN1}_IMPLICIT_F32
else
if [ "$ROUND" == 'stoch' ]; then
PREC_OUT=$(echo "$PRECLC" | awk -F"_" '{print $3}')
if [ "$PREC_OUT" == 'bf8' ] ; then
PREC_IN0=$(echo "$PRECLC" | awk -F"_" '{print $1}')
PREC_IN1=$(echo "$PRECLC" | awk -F"_" '{print $2}')
PREC_COMP=$(echo "$PRECLC" | awk -F"_" '{print $4}')
PREC_OUT=${PREC_OUT}${ROUND}
RMODE=1
PRECLC=${PREC_IN0}_${PREC_IN1}_${PREC_OUT}_${PREC_COMP}
else
continue
fi
fi
fi

OUTNAME=${OUTNAME}${TPPNAME}_${PRECLC}_${LD}.sh

# generate script by sed
sed "s/PREC=0/PREC=\"${PRECSCRIPT}\"/g" ${HERE}/binary.tpl \
| sed "s/BINARY_OP=0/BINARY_OP=${TYPE}/g" \
| sed "s/RMODE=0/RMODE=${RMODE}/g" \
| sed "s/SAMPLESIZE/${SAMPLESIZE}/g" \
>${OUTNAME}
sed "s/PRECDESC/${PREC}/g" generate_binary_test_scripts.tpl > generate_binary_test_scripts_gen_$PREC.sh
chmod 755 generate_binary_test_scripts_gen_$PREC.sh
echo "./generate_binary_test_scripts_gen_$PREC.sh &" >> generate_binary_test_scripts_gen_parallel.sh
done

# for gt we need to touch up the script
if [ "$LD" == 'gtld' ] ; then
sed "s/+ str(m) + '_' + str(m)/+ '100_100'/g" ${OUTNAME} >${TMPFILE}
cp ${TMPFILE} ${OUTNAME}
fi
echo "wait" >> generate_binary_test_scripts_gen_parallel.sh
echo "sync" >> generate_binary_test_scripts_gen_parallel.sh
chmod 755 generate_binary_test_scripts_gen_parallel.sh

chmod 755 ${OUTNAME}
done
done
done
done
./generate_binary_test_scripts_gen_parallel.sh
124 changes: 124 additions & 0 deletions samples/eltwise/kernel_test/generate_binary_test_scripts.tpl
Original file line number Diff line number Diff line change
@@ -0,0 +1,124 @@
#!/usr/bin/env bash

HERE=$(cd "$(dirname "$0")" && pwd -P)

if [[ -z "${SSIZE}" ]]; then
SAMPLESIZE=10
else
SAMPLESIZE=${SSIZE}
fi

TMPFILE=$(mktemp)
trap 'rm ${TMPFILE}' EXIT

PREC=PRECDESC

for TYPE in 1 2 3 4 5 6 9 10 27 28 29 30 31 32; do
for ROUND in 'rne' 'stoch'; do
for LD in 'eqld' 'gtld'; do
TPPNAME="none"
OUTNAME="${HERE}/binary_"
PRECLC=$(echo "$PREC" | awk '{print tolower($0)}')
RMODE=0
PRECSCRIPT=${PREC}

# only cpy TPP has low precision compute
if [[ (("$PREC" == 'F16_F16_F16_F16') || ("$PREC" == 'BF16_BF16_BF16_BF16') || ("$PREC" == 'BF8_BF8_BF8_BF8') || ("$PREC" == 'HF8_HF8_HF8_HF8')) ]]; then
continue
fi

# Binary zip tp blocks 2 x U16 -> U32 is only possible for 1 prec combination
if [[ ("$TYPE" == '6') && ("$PREC" != 'U16_U16_U32_IMPLICIT') ]]; then
continue
fi
if [[ ("$TYPE" != '6') && ("$PREC" == 'U16_U16_U32_IMPLICIT') ]]; then
continue
fi

# get TPP name
if [ "$TYPE" == '1' ] ; then
TPPNAME="add"
elif [ "$TYPE" == '2' ] ; then
TPPNAME="mul"
elif [ "$TYPE" == '3' ] ; then
TPPNAME="sub"
elif [ "$TYPE" == '4' ] ; then
TPPNAME="div"
elif [ "$TYPE" == '5' ] ; then
TPPNAME="muladd"
elif [ "$TYPE" == '6' ] ; then
TPPNAME="zip"
elif [ "$TYPE" == '9' ] ; then
TPPNAME="max"
elif [ "$TYPE" == '10' ] ; then
TPPNAME="min"
elif [ "$TYPE" == '27' ] ; then
TPPNAME="cmp_gt"
elif [ "$TYPE" == '28' ] ; then
TPPNAME="cmp_ge"
elif [ "$TYPE" == '29' ] ; then
TPPNAME="cmp_lt"
elif [ "$TYPE" == '30' ] ; then
TPPNAME="cmp_le"
elif [ "$TYPE" == '31' ] ; then
TPPNAME="cmp_eq"
elif [ "$TYPE" == '32' ] ; then
TPPNAME="cmp_ne"
else
continue
fi

if [[ ("$TYPE" == '27') || ("$TYPE" == '28') || ("$TYPE" == '29') || ("$TYPE" == '30') || ("$TYPE" == '31') || ("$TYPE" == '32') ]]; then
if [ "$ROUND" == 'stoch' ]; then
continue
fi
PREC_IN0=$(echo "$PRECLC" | awk -F"_" '{print $1}')
PREC_IN1=$(echo "$PRECLC" | awk -F"_" '{print $2}')
if [[ ("$PREC_IN0" == 'f64') || ("$PREC_IN1" == 'f64') ]]; then
continue
fi
PRECH_IN0=$(echo "$PREC" | awk -F"_" '{print $1}')
PRECH_IN1=$(echo "$PREC" | awk -F"_" '{print $2}')
PRECH_COMP=$(echo "$PREC" | awk -F"_" '{print $4}')
PRECH_OUT=$(echo "$PREC" | awk -F"_" '{print $3}')
PRECLC=${PREC_IN0}_${PREC_IN1}_implicit_f32
PRECSCRIPT=${PRECH_IN0}_${PRECH_IN1}_IMPLICIT_F32
# avoiding race condition when writing files with the same filename and precision
if [[ ("$PRECH_OUT" != 'F32') || ("$PRECH_COMP" != 'F32') ]]; then
continue
fi
else
if [ "$ROUND" == 'stoch' ]; then
PREC_OUT=$(echo "$PRECLC" | awk -F"_" '{print $3}')
if [ "$PREC_OUT" == 'bf8' ] ; then
PREC_IN0=$(echo "$PRECLC" | awk -F"_" '{print $1}')
PREC_IN1=$(echo "$PRECLC" | awk -F"_" '{print $2}')
PREC_COMP=$(echo "$PRECLC" | awk -F"_" '{print $4}')
PREC_OUT=${PREC_OUT}${ROUND}
RMODE=1
PRECLC=${PREC_IN0}_${PREC_IN1}_${PREC_OUT}_${PREC_COMP}
else
continue
fi
fi
fi

OUTNAME=${OUTNAME}${TPPNAME}_${PRECLC}_${LD}.sh

# generate script by sed
sed "s/PREC=0/PREC=\"${PRECSCRIPT}\"/g" ${HERE}/binary.tpl \
| sed "s/BINARY_OP=0/BINARY_OP=${TYPE}/g" \
| sed "s/RMODE=0/RMODE=${RMODE}/g" \
| sed "s/SAMPLESIZE/${SAMPLESIZE}/g" \
>${OUTNAME}

# for gt we need to touch up the script
if [ "$LD" == 'gtld' ] ; then
sed "s/+ str(m) + '_' + str(m)/+ '100_100'/g" ${OUTNAME} >${TMPFILE}
cp ${TMPFILE} ${OUTNAME}
fi

chmod 755 ${OUTNAME}
done
done
done
66 changes: 10 additions & 56 deletions samples/eltwise/kernel_test/generate_ternary_test_scripts.sh
Original file line number Diff line number Diff line change
Expand Up @@ -2,63 +2,17 @@

HERE=$(cd "$(dirname "$0")" && pwd -P)

if [[ -z "${SSIZE}" ]]; then
SAMPLESIZE=10
else
SAMPLESIZE=${SSIZE}
fi

TMPFILE=$(mktemp)
trap 'rm ${TMPFILE}' EXIT
echo "#!/bin/bash" > generate_ternary_test_scripts_gen_parallel.sh
echo "" >> generate_ternary_test_scripts_gen_parallel.sh

for PREC in 'F32_F32_IMPLICIT_F32_F32' 'BF16_BF16_IMPLICIT_BF16_F32' 'F16_F16_IMPLICIT_F16_F32' 'BF8_BF8_IMPLICIT_BF8_F32' 'HF8_HF8_IMPLICIT_HF8_F32'; do
for TYPE in 1; do
for ROUND in 'rne' 'stoch'; do
for LD in 'eqld' 'gtld'; do
TPPNAME="none"
OUTNAME="${HERE}/ternary_"
PRECLC=$(echo "$PREC" | awk '{print tolower($0)}')
RMODE=0

# get TPP name
if [ "$TYPE" == '1' ] ; then
TPPNAME="select"
else
continue
fi

if [ "$ROUND" == 'stoch' ]; then
PREC_OUT=$(echo "$PRECLC" | awk -F"_" '{print $4}')
if [ "$PREC_OUT" == 'bf8' ] ; then
PREC_IN0=$(echo "$PRECLC" | awk -F"_" '{print $1}')
PREC_IN1=$(echo "$PRECLC" | awk -F"_" '{print $2}')
PREC_IN2=$(echo "$PRECLC" | awk -F"_" '{print $3}')
PREC_COMP=$(echo "$PRECLC" | awk -F"_" '{print $5}')
PREC_OUT=${PREC_OUT}${ROUND}
RMODE=1
PRECLC=${PREC_IN0}_${PREC_IN1}_${PREC_IN2}_${PREC_OUT}_${PREC_COMP}
else
continue
fi
fi

OUTNAME=${OUTNAME}${TPPNAME}_${PRECLC}_${LD}.sh

# generate script by sed
sed "s/PREC=0/PREC=\"${PREC}\"/g" ${HERE}/ternary.tpl \
| sed "s/TERNARY_OP=0/TERNARY_OP=${TYPE}/g" \
| sed "s/RMODE=0/RMODE=${RMODE}/g" \
| sed "s/SAMPLESIZE/${SAMPLESIZE}/g" \
>${OUTNAME}
sed "s/PRECDESC/${PREC}/g" generate_ternary_test_scripts.tpl > generate_ternary_test_scripts_gen_$PREC.sh
chmod 755 generate_ternary_test_scripts_gen_$PREC.sh
echo "./generate_ternary_test_scripts_gen_$PREC.sh &" >> generate_ternary_test_scripts_gen_parallel.sh
done

# for gt we need to touch up the script
if [ "$LD" == 'gtld' ] ; then
sed "s/+ str(m) + '_' + str(m)/+ '100_100'/g" ${OUTNAME} >${TMPFILE}
cp ${TMPFILE} ${OUTNAME}
fi
echo "wait" >> generate_ternary_test_scripts_gen_parallel.sh
echo "sync" >> generate_ternary_test_scripts_gen_parallel.sh
chmod 755 generate_ternary_test_scripts_gen_parallel.sh

chmod 755 ${OUTNAME}
done
done
done
done
./generate_ternary_test_scripts_gen_parallel.sh
Loading

0 comments on commit e9170d0

Please sign in to comment.