Skip to content

Commit

Permalink
[MC][X86] addConstantComments - add mul vXi16 comments
Browse files Browse the repository at this point in the history
Based on feedback from #95403 - we use multiply by constant for various lowerings (shifts, division etc.), so its very useful to printout the constants to help understand the transform involved.

vXi16 multiplies are the easiest to add for this initial commit, but we can add other arithmetic instructions as follow ups when the need arises (I intend to add PMADDUBSW handling for #95403 next).

I've done my best to update all test checks but there are bound to be ones that got missed that will only appear when the file is regenerated.
  • Loading branch information
RKSimon committed Jun 14, 2024
1 parent 153fca5 commit 74fe1da
Show file tree
Hide file tree
Showing 56 changed files with 972 additions and 924 deletions.
39 changes: 39 additions & 0 deletions llvm/lib/Target/X86/X86MCInstLower.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1898,6 +1898,45 @@ static void addConstantComments(const MachineInstr *MI,
break;
}

#define INSTR_CASE(Prefix, Instr, Suffix, Postfix) \
case X86::Prefix##Instr##Suffix##rm##Postfix:

#define CASE_ARITH_RM(Instr) \
INSTR_CASE(, Instr, , ) /* SSE */ \
INSTR_CASE(V, Instr, , ) /* AVX-128 */ \
INSTR_CASE(V, Instr, Y, ) /* AVX-256 */ \
INSTR_CASE(V, Instr, Z128, ) \
INSTR_CASE(V, Instr, Z128, k) \
INSTR_CASE(V, Instr, Z128, kz) \
INSTR_CASE(V, Instr, Z256, ) \
INSTR_CASE(V, Instr, Z256, k) \
INSTR_CASE(V, Instr, Z256, kz) \
INSTR_CASE(V, Instr, Z, ) \
INSTR_CASE(V, Instr, Z, k) \
INSTR_CASE(V, Instr, Z, kz)

// TODO: Add additional instructions when useful.
CASE_ARITH_RM(PMADDWD)
CASE_ARITH_RM(PMULLW)
CASE_ARITH_RM(PMULHW)
CASE_ARITH_RM(PMULHUW)
CASE_ARITH_RM(PMULHRSW) {
unsigned SrcIdx = getSrcIdx(MI, 1);
if (auto *C = X86::getConstantFromPool(*MI, SrcIdx + 1)) {
if (C->getType()->getScalarSizeInBits() == 16) {
std::string Comment;
raw_string_ostream CS(Comment);
unsigned VectorWidth =
X86::getVectorRegisterWidth(MI->getDesc().operands()[0]);
CS << "[";
printConstant(C, VectorWidth, CS);
CS << "]";
OutStreamer.AddComment(CS.str());
}
}
break;
}

#define MASK_AVX512_CASE(Instr) \
case Instr: \
case Instr##k: \
Expand Down
6 changes: 3 additions & 3 deletions llvm/test/CodeGen/X86/combine-mul.ll
Original file line number Diff line number Diff line change
Expand Up @@ -543,10 +543,10 @@ define <16 x i8> @PR35579(<16 x i8> %x) {
; SSE: # %bb.0:
; SSE-NEXT: pmovzxbw {{.*#+}} xmm1 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
; SSE-NEXT: punpckhbw {{.*#+}} xmm0 = xmm0[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15]
; SSE-NEXT: pmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
; SSE-NEXT: pmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 # [8,1,2,1,4,1,2,1]
; SSE-NEXT: pmovzxbw {{.*#+}} xmm2 = [255,255,255,255,255,255,255,255]
; SSE-NEXT: pand %xmm2, %xmm0
; SSE-NEXT: pmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
; SSE-NEXT: pmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 # [0,1,2,1,4,1,2,1]
; SSE-NEXT: pand %xmm2, %xmm1
; SSE-NEXT: packuswb %xmm0, %xmm1
; SSE-NEXT: movdqa %xmm1, %xmm0
Expand All @@ -555,7 +555,7 @@ define <16 x i8> @PR35579(<16 x i8> %x) {
; AVX-LABEL: PR35579:
; AVX: # %bb.0:
; AVX-NEXT: vpmovzxbw {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero
; AVX-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0
; AVX-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0 # [0,1,2,1,4,1,2,1,8,1,2,1,4,1,2,1]
; AVX-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0
; AVX-NEXT: vextracti128 $1, %ymm0, %xmm1
; AVX-NEXT: vpackuswb %xmm1, %xmm0, %xmm0
Expand Down
148 changes: 74 additions & 74 deletions llvm/test/CodeGen/X86/combine-sdiv.ll

Large diffs are not rendered by default.

50 changes: 25 additions & 25 deletions llvm/test/CodeGen/X86/combine-udiv.ll
Original file line number Diff line number Diff line change
Expand Up @@ -471,7 +471,7 @@ define <8 x i16> @combine_vec_udiv_uniform(<8 x i16> %x) {
;
; AVX-LABEL: combine_vec_udiv_uniform:
; AVX: # %bb.0:
; AVX-NEXT: vpmulhuw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1
; AVX-NEXT: vpmulhuw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1 # [25645,25645,25645,25645,25645,25645,25645,25645]
; AVX-NEXT: vpsubw %xmm1, %xmm0, %xmm0
; AVX-NEXT: vpsrlw $1, %xmm0, %xmm0
; AVX-NEXT: vpaddw %xmm1, %xmm0, %xmm0
Expand All @@ -480,7 +480,7 @@ define <8 x i16> @combine_vec_udiv_uniform(<8 x i16> %x) {
;
; XOP-LABEL: combine_vec_udiv_uniform:
; XOP: # %bb.0:
; XOP-NEXT: vpmulhuw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1
; XOP-NEXT: vpmulhuw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1 # [25645,25645,25645,25645,25645,25645,25645,25645]
; XOP-NEXT: vpsubw %xmm1, %xmm0, %xmm0
; XOP-NEXT: vpsrlw $1, %xmm0, %xmm0
; XOP-NEXT: vpaddw %xmm1, %xmm0, %xmm0
Expand All @@ -500,7 +500,7 @@ define <8 x i16> @combine_vec_udiv_nonuniform(<8 x i16> %x) {
; SSE2-NEXT: psrlw $3, %xmm3
; SSE2-NEXT: pandn %xmm3, %xmm1
; SSE2-NEXT: por %xmm2, %xmm1
; SSE2-NEXT: pmulhuw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
; SSE2-NEXT: pmulhuw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 # [25645,61681,8195,9363,512,32769,32897,2]
; SSE2-NEXT: psubw %xmm1, %xmm0
; SSE2-NEXT: pmulhuw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
; SSE2-NEXT: paddw %xmm1, %xmm0
Expand All @@ -515,7 +515,7 @@ define <8 x i16> @combine_vec_udiv_nonuniform(<8 x i16> %x) {
; SSE41-NEXT: movdqa %xmm0, %xmm1
; SSE41-NEXT: psrlw $3, %xmm1
; SSE41-NEXT: pblendw {{.*#+}} xmm1 = xmm0[0,1,2],xmm1[3],xmm0[4,5,6,7]
; SSE41-NEXT: pmulhuw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
; SSE41-NEXT: pmulhuw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 # [25645,61681,8195,9363,512,32769,32897,2]
; SSE41-NEXT: psubw %xmm1, %xmm0
; SSE41-NEXT: pmulhuw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
; SSE41-NEXT: paddw %xmm1, %xmm0
Expand All @@ -528,18 +528,18 @@ define <8 x i16> @combine_vec_udiv_nonuniform(<8 x i16> %x) {
; AVX: # %bb.0:
; AVX-NEXT: vpsrlw $3, %xmm0, %xmm1
; AVX-NEXT: vpblendw {{.*#+}} xmm1 = xmm0[0,1,2],xmm1[3],xmm0[4,5,6,7]
; AVX-NEXT: vpmulhuw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
; AVX-NEXT: vpmulhuw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1 # [25645,61681,8195,9363,512,32769,32897,2]
; AVX-NEXT: vpsubw %xmm1, %xmm0, %xmm0
; AVX-NEXT: vpmulhuw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
; AVX-NEXT: vpaddw %xmm1, %xmm0, %xmm0
; AVX-NEXT: vpmulhuw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1
; AVX-NEXT: vpmulhuw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1 # [4096,2048,8,u,u,2,2,u]
; AVX-NEXT: vpblendw {{.*#+}} xmm0 = xmm1[0,1,2],xmm0[3,4],xmm1[5,6],xmm0[7]
; AVX-NEXT: retq
;
; XOP-LABEL: combine_vec_udiv_nonuniform:
; XOP: # %bb.0:
; XOP-NEXT: vpshlw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1
; XOP-NEXT: vpmulhuw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
; XOP-NEXT: vpmulhuw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1 # [25645,61681,8195,9363,512,32769,32897,2]
; XOP-NEXT: vpsubw %xmm1, %xmm0, %xmm0
; XOP-NEXT: vpmulhuw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
; XOP-NEXT: vpaddw %xmm1, %xmm0, %xmm0
Expand All @@ -558,8 +558,8 @@ define <8 x i16> @combine_vec_udiv_nonuniform2(<8 x i16> %x) {
; SSE2-NEXT: psrlw $1, %xmm0
; SSE2-NEXT: pandn %xmm0, %xmm1
; SSE2-NEXT: por %xmm2, %xmm1
; SSE2-NEXT: pmulhuw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
; SSE2-NEXT: pmulhuw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
; SSE2-NEXT: pmulhuw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 # [16393,59919,58255,32787,55189,8197,52429,32789]
; SSE2-NEXT: pmulhuw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 # [8,2048,2048,2,2048,8,2048,2]
; SSE2-NEXT: movdqa %xmm1, %xmm0
; SSE2-NEXT: retq
;
Expand All @@ -568,22 +568,22 @@ define <8 x i16> @combine_vec_udiv_nonuniform2(<8 x i16> %x) {
; SSE41-NEXT: movdqa %xmm0, %xmm1
; SSE41-NEXT: psrlw $1, %xmm1
; SSE41-NEXT: pblendw {{.*#+}} xmm0 = xmm1[0],xmm0[1,2,3,4,5,6,7]
; SSE41-NEXT: pmulhuw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
; SSE41-NEXT: pmulhuw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
; SSE41-NEXT: pmulhuw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 # [16393,59919,58255,32787,55189,8197,52429,32789]
; SSE41-NEXT: pmulhuw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 # [8,2048,2048,2,2048,8,2048,2]
; SSE41-NEXT: retq
;
; AVX-LABEL: combine_vec_udiv_nonuniform2:
; AVX: # %bb.0:
; AVX-NEXT: vpsrlw $1, %xmm0, %xmm1
; AVX-NEXT: vpblendw {{.*#+}} xmm0 = xmm1[0],xmm0[1,2,3,4,5,6,7]
; AVX-NEXT: vpmulhuw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
; AVX-NEXT: vpmulhuw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
; AVX-NEXT: vpmulhuw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 # [16393,59919,58255,32787,55189,8197,52429,32789]
; AVX-NEXT: vpmulhuw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 # [8,2048,2048,2,2048,8,2048,2]
; AVX-NEXT: retq
;
; XOP-LABEL: combine_vec_udiv_nonuniform2:
; XOP: # %bb.0:
; XOP-NEXT: vpshlw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
; XOP-NEXT: vpmulhuw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
; XOP-NEXT: vpmulhuw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 # [16393,59919,58255,32787,55189,8197,52429,32789]
; XOP-NEXT: vpshlw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
; XOP-NEXT: retq
%1 = udiv <8 x i16> %x, <i16 -34, i16 35, i16 36, i16 -37, i16 38, i16 -39, i16 40, i16 -41>
Expand All @@ -598,21 +598,21 @@ define <8 x i16> @combine_vec_udiv_nonuniform3(<8 x i16> %x) {
; SSE-NEXT: psubw %xmm1, %xmm0
; SSE-NEXT: psrlw $1, %xmm0
; SSE-NEXT: paddw %xmm1, %xmm0
; SSE-NEXT: pmulhuw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
; SSE-NEXT: pmulhuw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 # [16384,4096,4096,4096,4096,2048,2048,1024]
; SSE-NEXT: retq
;
; AVX-LABEL: combine_vec_udiv_nonuniform3:
; AVX: # %bb.0:
; AVX-NEXT: vpmulhuw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1
; AVX-NEXT: vpmulhuw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1 # [9363,25645,18351,12137,2115,23705,1041,517]
; AVX-NEXT: vpsubw %xmm1, %xmm0, %xmm0
; AVX-NEXT: vpsrlw $1, %xmm0, %xmm0
; AVX-NEXT: vpaddw %xmm1, %xmm0, %xmm0
; AVX-NEXT: vpmulhuw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
; AVX-NEXT: vpmulhuw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 # [16384,4096,4096,4096,4096,2048,2048,1024]
; AVX-NEXT: retq
;
; XOP-LABEL: combine_vec_udiv_nonuniform3:
; XOP: # %bb.0:
; XOP-NEXT: vpmulhuw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1
; XOP-NEXT: vpmulhuw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1 # [9363,25645,18351,12137,2115,23705,1041,517]
; XOP-NEXT: vpsubw %xmm1, %xmm0, %xmm0
; XOP-NEXT: vpsrlw $1, %xmm0, %xmm0
; XOP-NEXT: vpaddw %xmm1, %xmm0, %xmm0
Expand Down Expand Up @@ -687,7 +687,7 @@ define <8 x i16> @pr38477(<8 x i16> %a0) {
; SSE2-NEXT: pmulhuw %xmm0, %xmm1
; SSE2-NEXT: movdqa %xmm0, %xmm2
; SSE2-NEXT: psubw %xmm1, %xmm2
; SSE2-NEXT: pmulhuw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2
; SSE2-NEXT: pmulhuw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2 # [u,32768,0,0,0,0,0,32768]
; SSE2-NEXT: paddw %xmm1, %xmm2
; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [65535,65535,65535,65535,65535,65535,0,65535]
; SSE2-NEXT: pandn %xmm2, %xmm1
Expand All @@ -706,7 +706,7 @@ define <8 x i16> @pr38477(<8 x i16> %a0) {
; SSE41-NEXT: pmulhuw %xmm0, %xmm1
; SSE41-NEXT: movdqa %xmm0, %xmm2
; SSE41-NEXT: psubw %xmm1, %xmm2
; SSE41-NEXT: pmulhuw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2
; SSE41-NEXT: pmulhuw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2 # [u,32768,0,0,0,0,0,32768]
; SSE41-NEXT: paddw %xmm1, %xmm2
; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [u,1024,1024,16,4,1024,u,4096]
; SSE41-NEXT: pmulhuw %xmm2, %xmm1
Expand All @@ -716,20 +716,20 @@ define <8 x i16> @pr38477(<8 x i16> %a0) {
;
; AVX-LABEL: pr38477:
; AVX: # %bb.0:
; AVX-NEXT: vpmulhuw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1
; AVX-NEXT: vpmulhuw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1 # [u,4957,57457,4103,16385,35545,2048,2115]
; AVX-NEXT: vpsubw %xmm1, %xmm0, %xmm2
; AVX-NEXT: vpmulhuw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2, %xmm2
; AVX-NEXT: vpmulhuw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2, %xmm2 # [u,32768,0,0,0,0,0,32768]
; AVX-NEXT: vpaddw %xmm1, %xmm2, %xmm1
; AVX-NEXT: vpmulhuw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm2
; AVX-NEXT: vpmulhuw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm2 # [u,1024,1024,16,4,1024,u,4096]
; AVX-NEXT: vpblendw {{.*#+}} xmm1 = xmm2[0,1,2,3,4,5],xmm1[6],xmm2[7]
; AVX-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0],xmm1[1,2,3,4,5,6,7]
; AVX-NEXT: retq
;
; XOP-LABEL: pr38477:
; XOP: # %bb.0:
; XOP-NEXT: vpmulhuw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1
; XOP-NEXT: vpmulhuw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1 # [u,4957,57457,4103,16385,35545,2048,2115]
; XOP-NEXT: vpsubw %xmm1, %xmm0, %xmm2
; XOP-NEXT: vpmulhuw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2, %xmm2
; XOP-NEXT: vpmulhuw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2, %xmm2 # [u,32768,0,0,0,0,0,32768]
; XOP-NEXT: vpaddw %xmm1, %xmm2, %xmm1
; XOP-NEXT: vpshlw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
; XOP-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0],xmm1[1,2,3,4,5,6,7]
Expand Down
2 changes: 1 addition & 1 deletion llvm/test/CodeGen/X86/dagcombine-shifts.ll
Original file line number Diff line number Diff line change
Expand Up @@ -394,7 +394,7 @@ define <4 x i32> @shift_zext_shl_vec(<4 x i8> %x) nounwind {
; X64-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
; X64-NEXT: pxor %xmm1, %xmm1
; X64-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
; X64-NEXT: pmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
; X64-NEXT: pmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 # [512,256,128,64,u,u,u,u]
; X64-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
; X64-NEXT: retq
%a = and <4 x i8> %x, <i8 64, i8 63, i8 31, i8 23>
Expand Down
4 changes: 2 additions & 2 deletions llvm/test/CodeGen/X86/dpbusd_const.ll
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@ define i32 @mul_4xi8_zc_exceed(<4 x i8> %a, i32 %c) {
; ALL-LABEL: mul_4xi8_zc_exceed:
; ALL: # %bb.0: # %entry
; ALL-NEXT: vpmovzxbd {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero
; ALL-NEXT: vpmaddwd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
; ALL-NEXT: vpmaddwd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 # [0,0,1,0,2,0,128,0]
; ALL-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,2,3]
; ALL-NEXT: vpaddd %xmm1, %xmm0, %xmm0
; ALL-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,1,1]
Expand Down Expand Up @@ -148,7 +148,7 @@ define i32 @mul_4xi8_cs_exceed(<4 x i8> %a, i32 %c) {
; ALL-LABEL: mul_4xi8_cs_exceed:
; ALL: # %bb.0: # %entry
; ALL-NEXT: vpmovsxbd %xmm0, %xmm0
; ALL-NEXT: vpmaddwd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
; ALL-NEXT: vpmaddwd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 # [0,0,1,0,2,0,256,0]
; ALL-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,2,3]
; ALL-NEXT: vpaddd %xmm1, %xmm0, %xmm0
; ALL-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,1,1]
Expand Down
2 changes: 1 addition & 1 deletion llvm/test/CodeGen/X86/fold-int-pow2-with-fmul-or-fdiv.ll
Original file line number Diff line number Diff line change
Expand Up @@ -1008,7 +1008,7 @@ define <2 x half> @fmul_pow_shl_cnt_vec_fail_to_large(<2 x i16> %cnt) nounwind {
; CHECK-SSE-NEXT: paddd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
; CHECK-SSE-NEXT: cvttps2dq %xmm0, %xmm0
; CHECK-SSE-NEXT: pshuflw {{.*#+}} xmm1 = xmm0[0,2,2,3,4,5,6,7]
; CHECK-SSE-NEXT: pmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
; CHECK-SSE-NEXT: pmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 # [2,2,u,u,u,u,u,u]
; CHECK-SSE-NEXT: pxor %xmm0, %xmm0
; CHECK-SSE-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3]
; CHECK-SSE-NEXT: movdqa %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
Expand Down
12 changes: 6 additions & 6 deletions llvm/test/CodeGen/X86/freeze-binary.ll
Original file line number Diff line number Diff line change
Expand Up @@ -329,12 +329,12 @@ define i32 @freeze_mul_nsw(i32 %a0) nounwind {
define <8 x i16> @freeze_mul_vec(<8 x i16> %a0) nounwind {
; X86-LABEL: freeze_mul_vec:
; X86: # %bb.0:
; X86-NEXT: pmullw {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0
; X86-NEXT: pmullw {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0 # [4,6,6,4,4,6,6,4]
; X86-NEXT: retl
;
; X64-LABEL: freeze_mul_vec:
; X64: # %bb.0:
; X64-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
; X64-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 # [4,6,6,4,4,6,6,4]
; X64-NEXT: retq
%x = mul <8 x i16> %a0, <i16 1, i16 2, i16 3, i16 4, i16 4, i16 3, i16 2, i16 1>
%y = freeze <8 x i16> %x
Expand All @@ -345,14 +345,14 @@ define <8 x i16> @freeze_mul_vec(<8 x i16> %a0) nounwind {
define <8 x i16> @freeze_mul_vec_undef(<8 x i16> %a0) nounwind {
; X86-LABEL: freeze_mul_vec_undef:
; X86: # %bb.0:
; X86-NEXT: pmullw {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0
; X86-NEXT: pmullw {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0
; X86-NEXT: pmullw {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0 # [1,2,3,4,4,3,0,1]
; X86-NEXT: pmullw {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0 # [4,3,2,1,1,2,u,4]
; X86-NEXT: retl
;
; X64-LABEL: freeze_mul_vec_undef:
; X64: # %bb.0:
; X64-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
; X64-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
; X64-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 # [1,2,3,4,4,3,0,1]
; X64-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 # [4,3,2,1,1,2,u,4]
; X64-NEXT: retq
%x = mul <8 x i16> %a0, <i16 1, i16 2, i16 3, i16 4, i16 4, i16 3, i16 undef, i16 1>
%y = freeze <8 x i16> %x
Expand Down
Loading

0 comments on commit 74fe1da

Please sign in to comment.