Skip to content

[LoongArch] Add patterns for vstelm instructions #139201

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 6 commits into from
May 21, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
26 changes: 20 additions & 6 deletions llvm/lib/Target/LoongArch/LoongArchInstrInfo.td
Original file line number Diff line number Diff line change
Expand Up @@ -300,17 +300,31 @@ def simm5 : Operand<GRLenVT> {
let DecoderMethod = "decodeSImmOperand<5>";
}

def simm8 : Operand<GRLenVT> {
def simm8 : Operand<GRLenVT>,
ImmLeaf<GRLenVT, [{return isInt<8>(Imm);}]> {
let ParserMatchClass = SImmAsmOperand<8>;
let DecoderMethod = "decodeSImmOperand<8>";
}

foreach I = [1, 2, 3] in {
def simm8_lsl # I : Operand<GRLenVT> {
let ParserMatchClass = SImmAsmOperand<8, "lsl" # I>;
let EncoderMethod = "getImmOpValueAsr<" # I # ">";
let DecoderMethod = "decodeSImmOperand<8," # I # ">";
def simm8_lsl1 : Operand<GRLenVT>,
ImmLeaf<GRLenVT, [{return isShiftedInt<8,1>(Imm);}]> {
let ParserMatchClass = SImmAsmOperand<8, "lsl1">;
let EncoderMethod = "getImmOpValueAsr<1>";
let DecoderMethod = "decodeSImmOperand<8, 1>";
}

def simm8_lsl2 : Operand<GRLenVT>,
ImmLeaf<GRLenVT, [{return isShiftedInt<8,2>(Imm);}]> {
let ParserMatchClass = SImmAsmOperand<8, "lsl2">;
let EncoderMethod = "getImmOpValueAsr<2>";
let DecoderMethod = "decodeSImmOperand<8, 2>";
}

def simm8_lsl3 : Operand<GRLenVT>,
ImmLeaf<GRLenVT, [{return isShiftedInt<8,3>(Imm);}]> {
let ParserMatchClass = SImmAsmOperand<8, "lsl3">;
let EncoderMethod = "getImmOpValueAsr<3>";
let DecoderMethod = "decodeSImmOperand<8, 3>";
}

def simm9_lsl3 : Operand<GRLenVT>,
Expand Down
8 changes: 8 additions & 0 deletions llvm/lib/Target/LoongArch/LoongArchLASXInstrInfo.td
Original file line number Diff line number Diff line change
Expand Up @@ -1760,6 +1760,14 @@ def : Pat<(lasxsplatf32 FPR32:$fj),
def : Pat<(lasxsplatf64 FPR64:$fj),
(XVREPLVE0_D (SUBREG_TO_REG (i64 0), FPR64:$fj, sub_64))>;

// VSTELM
defm : VstelmPat<truncstorei8, v32i8, XVSTELM_B, simm8, uimm5>;
defm : VstelmPat<truncstorei16, v16i16, XVSTELM_H, simm8_lsl1, uimm4>;
defm : VstelmPat<truncstorei32, v8i32, XVSTELM_W, simm8_lsl2, uimm3>;
defm : VstelmPat<store, v4i64, XVSTELM_D, simm8_lsl3, uimm2>;
defm : VstelmPat<store, v8f32, XVSTELM_W, simm8_lsl2, uimm3, f32>;
defm : VstelmPat<store, v4f64, XVSTELM_D, simm8_lsl3, uimm2, f64>;

// Loads/Stores
foreach vt = [v32i8, v16i16, v8i32, v4i64, v8f32, v4f64] in {
defm : LdPat<load, XVLD, vt>;
Expand Down
21 changes: 21 additions & 0 deletions llvm/lib/Target/LoongArch/LoongArchLSXInstrInfo.td
Original file line number Diff line number Diff line change
Expand Up @@ -1451,6 +1451,20 @@ multiclass VldreplPat<ValueType vt, LAInst Inst, Operand ImmOpnd> {
(Inst BaseAddr:$rj, ImmOpnd:$imm)>;
}

multiclass VstelmPat<PatFrag StoreOp, ValueType vt, LAInst Inst,
Operand ImmOpnd, Operand IdxOpnd, ValueType elt = i64> {
def : Pat<(StoreOp(elt(vector_extract vt:$vd, IdxOpnd:$idx)), BaseAddr:$rj),
(Inst vt:$vd, BaseAddr:$rj, 0, IdxOpnd:$idx)>;

def : Pat<(StoreOp(elt(vector_extract vt:$vd, IdxOpnd:$idx)),
(AddrConstant GPR:$rj, ImmOpnd:$imm)),
(Inst vt:$vd, GPR:$rj, ImmOpnd:$imm, IdxOpnd:$idx)>;

def : Pat<(StoreOp(elt(vector_extract vt:$vd, IdxOpnd:$idx)),
(AddLike BaseAddr:$rj, ImmOpnd:$imm)),
(Inst vt:$vd, BaseAddr:$rj, ImmOpnd:$imm, IdxOpnd:$idx)>;
}

let Predicates = [HasExtLSX] in {

// VADD_{B/H/W/D}
Expand Down Expand Up @@ -1944,6 +1958,13 @@ def : Pat<(lsxsplatf32 FPR32:$fj),
def : Pat<(lsxsplatf64 FPR64:$fj),
(VREPLVEI_D (SUBREG_TO_REG (i64 0), FPR64:$fj, sub_64), 0)>;

defm : VstelmPat<truncstorei8, v16i8, VSTELM_B, simm8, uimm4>;
defm : VstelmPat<truncstorei16, v8i16, VSTELM_H, simm8_lsl1, uimm3>;
defm : VstelmPat<truncstorei32, v4i32, VSTELM_W, simm8_lsl2, uimm2>;
defm : VstelmPat<store, v2i64, VSTELM_D, simm8_lsl3, uimm1>;
defm : VstelmPat<store, v4f32, VSTELM_W, simm8_lsl2, uimm2, f32>;
defm : VstelmPat<store, v2f64, VSTELM_D, simm8_lsl3, uimm1, f64>;

// Loads/Stores
foreach vt = [v16i8, v8i16, v4i32, v2i64, v4f32, v2f64] in {
defm : LdPat<load, VLD, vt>;
Expand Down
39 changes: 38 additions & 1 deletion llvm/lib/Target/LoongArch/LoongArchRegisterInfo.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -140,7 +140,44 @@ bool LoongArchRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,

bool FrameRegIsKill = false;

if (!isInt<12>(Offset.getFixed())) {
int FixedOffset = Offset.getFixed();
bool OffsetLegal = true;

// Handle offsets that exceed the immediate range of the instruction.
switch (MIOpc) {
case LoongArch::VSTELM_B:
case LoongArch::XVSTELM_B:
OffsetLegal = isInt<8>(FixedOffset);
break;
case LoongArch::VSTELM_H:
case LoongArch::XVSTELM_H:
OffsetLegal = isShiftedInt<8, 1>(FixedOffset);
break;
case LoongArch::VSTELM_W:
case LoongArch::XVSTELM_W:
OffsetLegal = isShiftedInt<8, 2>(FixedOffset);
break;
case LoongArch::VSTELM_D:
case LoongArch::XVSTELM_D:
OffsetLegal = isShiftedInt<8, 3>(FixedOffset);
break;
}

if (!OffsetLegal && isInt<12>(FixedOffset)) {
unsigned Addi = IsLA64 ? LoongArch::ADDI_D : LoongArch::ADDI_W;

// The offset fits in si12 but is not legal for the instruction,
// so use only one scratch register instead.
Register ScratchReg = MRI.createVirtualRegister(&LoongArch::GPRRegClass);
BuildMI(MBB, II, DL, TII->get(Addi), ScratchReg)
.addReg(FrameReg)
.addImm(FixedOffset);
Offset = StackOffset::getFixed(0);
FrameReg = ScratchReg;
FrameRegIsKill = true;
}

if (!isInt<12>(FixedOffset)) {
unsigned Addi = IsLA64 ? LoongArch::ADDI_D : LoongArch::ADDI_W;
unsigned Add = IsLA64 ? LoongArch::ADD_D : LoongArch::ADD_W;

Expand Down
35 changes: 21 additions & 14 deletions llvm/test/CodeGen/LoongArch/lasx/ir-instruction/extractelement.ll
Original file line number Diff line number Diff line change
Expand Up @@ -5,8 +5,7 @@ define void @extract_32xi8(ptr %src, ptr %dst) nounwind {
; CHECK-LABEL: extract_32xi8:
; CHECK: # %bb.0:
; CHECK-NEXT: xvld $xr0, $a0, 0
; CHECK-NEXT: vpickve2gr.b $a0, $vr0, 1
; CHECK-NEXT: st.b $a0, $a1, 0
; CHECK-NEXT: xvstelm.b $xr0, $a1, 0, 1
; CHECK-NEXT: ret
%v = load volatile <32 x i8>, ptr %src
%e = extractelement <32 x i8> %v, i32 1
Expand All @@ -18,8 +17,7 @@ define void @extract_16xi16(ptr %src, ptr %dst) nounwind {
; CHECK-LABEL: extract_16xi16:
; CHECK: # %bb.0:
; CHECK-NEXT: xvld $xr0, $a0, 0
; CHECK-NEXT: vpickve2gr.h $a0, $vr0, 1
; CHECK-NEXT: st.h $a0, $a1, 0
; CHECK-NEXT: xvstelm.h $xr0, $a1, 0, 1
; CHECK-NEXT: ret
%v = load volatile <16 x i16>, ptr %src
%e = extractelement <16 x i16> %v, i32 1
Expand All @@ -31,8 +29,7 @@ define void @extract_8xi32(ptr %src, ptr %dst) nounwind {
; CHECK-LABEL: extract_8xi32:
; CHECK: # %bb.0:
; CHECK-NEXT: xvld $xr0, $a0, 0
; CHECK-NEXT: xvpickve2gr.w $a0, $xr0, 1
; CHECK-NEXT: st.w $a0, $a1, 0
; CHECK-NEXT: xvstelm.w $xr0, $a1, 0, 1
; CHECK-NEXT: ret
%v = load volatile <8 x i32>, ptr %src
%e = extractelement <8 x i32> %v, i32 1
Expand All @@ -44,8 +41,7 @@ define void @extract_4xi64(ptr %src, ptr %dst) nounwind {
; CHECK-LABEL: extract_4xi64:
; CHECK: # %bb.0:
; CHECK-NEXT: xvld $xr0, $a0, 0
; CHECK-NEXT: xvpickve2gr.d $a0, $xr0, 1
; CHECK-NEXT: st.d $a0, $a1, 0
; CHECK-NEXT: xvstelm.d $xr0, $a1, 0, 1
; CHECK-NEXT: ret
%v = load volatile <4 x i64>, ptr %src
%e = extractelement <4 x i64> %v, i32 1
Expand All @@ -57,9 +53,7 @@ define void @extract_8xfloat(ptr %src, ptr %dst) nounwind {
; CHECK-LABEL: extract_8xfloat:
; CHECK: # %bb.0:
; CHECK-NEXT: xvld $xr0, $a0, 0
; CHECK-NEXT: xvpickve2gr.w $a0, $xr0, 7
; CHECK-NEXT: movgr2fr.w $fa0, $a0
; CHECK-NEXT: fst.s $fa0, $a1, 0
; CHECK-NEXT: xvstelm.w $xr0, $a1, 0, 7
; CHECK-NEXT: ret
%v = load volatile <8 x float>, ptr %src
%e = extractelement <8 x float> %v, i32 7
Expand All @@ -71,9 +65,7 @@ define void @extract_4xdouble(ptr %src, ptr %dst) nounwind {
; CHECK-LABEL: extract_4xdouble:
; CHECK: # %bb.0:
; CHECK-NEXT: xvld $xr0, $a0, 0
; CHECK-NEXT: xvpickve2gr.d $a0, $xr0, 3
; CHECK-NEXT: movgr2fr.d $fa0, $a0
; CHECK-NEXT: fst.d $fa0, $a1, 0
; CHECK-NEXT: xvstelm.d $xr0, $a1, 0, 3
; CHECK-NEXT: ret
%v = load volatile <4 x double>, ptr %src
%e = extractelement <4 x double> %v, i32 3
Expand Down Expand Up @@ -230,3 +222,18 @@ define void @extract_4xdouble_idx(ptr %src, ptr %dst, i32 %idx) nounwind {
store double %e, ptr %dst
ret void
}

define void @eliminate_frame_index(<8 x i32> %a) nounwind {
; CHECK-LABEL: eliminate_frame_index:
; CHECK: # %bb.0:
; CHECK-NEXT: addi.d $sp, $sp, -1040
; CHECK-NEXT: addi.d $a0, $sp, 524
; CHECK-NEXT: xvstelm.w $xr0, $a0, 0, 1
; CHECK-NEXT: addi.d $sp, $sp, 1040
; CHECK-NEXT: ret
%1 = alloca [32 x [8 x i32]]
%2 = getelementptr i8, ptr %1, i64 508
%b = extractelement <8 x i32> %a, i64 1
store i32 %b, ptr %2
ret void
}
33 changes: 21 additions & 12 deletions llvm/test/CodeGen/LoongArch/lsx/ir-instruction/extractelement.ll
Original file line number Diff line number Diff line change
Expand Up @@ -5,8 +5,7 @@ define void @extract_16xi8(ptr %src, ptr %dst) nounwind {
; CHECK-LABEL: extract_16xi8:
; CHECK: # %bb.0:
; CHECK-NEXT: vld $vr0, $a0, 0
; CHECK-NEXT: vpickve2gr.b $a0, $vr0, 1
; CHECK-NEXT: st.b $a0, $a1, 0
; CHECK-NEXT: vstelm.b $vr0, $a1, 0, 1
; CHECK-NEXT: ret
%v = load volatile <16 x i8>, ptr %src
%e = extractelement <16 x i8> %v, i32 1
Expand All @@ -18,8 +17,7 @@ define void @extract_8xi16(ptr %src, ptr %dst) nounwind {
; CHECK-LABEL: extract_8xi16:
; CHECK: # %bb.0:
; CHECK-NEXT: vld $vr0, $a0, 0
; CHECK-NEXT: vpickve2gr.h $a0, $vr0, 1
; CHECK-NEXT: st.h $a0, $a1, 0
; CHECK-NEXT: vstelm.h $vr0, $a1, 0, 1
; CHECK-NEXT: ret
%v = load volatile <8 x i16>, ptr %src
%e = extractelement <8 x i16> %v, i32 1
Expand All @@ -31,8 +29,7 @@ define void @extract_4xi32(ptr %src, ptr %dst) nounwind {
; CHECK-LABEL: extract_4xi32:
; CHECK: # %bb.0:
; CHECK-NEXT: vld $vr0, $a0, 0
; CHECK-NEXT: vpickve2gr.w $a0, $vr0, 1
; CHECK-NEXT: st.w $a0, $a1, 0
; CHECK-NEXT: vstelm.w $vr0, $a1, 0, 1
; CHECK-NEXT: ret
%v = load volatile <4 x i32>, ptr %src
%e = extractelement <4 x i32> %v, i32 1
Expand All @@ -44,8 +41,7 @@ define void @extract_2xi64(ptr %src, ptr %dst) nounwind {
; CHECK-LABEL: extract_2xi64:
; CHECK: # %bb.0:
; CHECK-NEXT: vld $vr0, $a0, 0
; CHECK-NEXT: vpickve2gr.d $a0, $vr0, 1
; CHECK-NEXT: st.d $a0, $a1, 0
; CHECK-NEXT: vstelm.d $vr0, $a1, 0, 1
; CHECK-NEXT: ret
%v = load volatile <2 x i64>, ptr %src
%e = extractelement <2 x i64> %v, i32 1
Expand All @@ -57,8 +53,7 @@ define void @extract_4xfloat(ptr %src, ptr %dst) nounwind {
; CHECK-LABEL: extract_4xfloat:
; CHECK: # %bb.0:
; CHECK-NEXT: vld $vr0, $a0, 0
; CHECK-NEXT: vreplvei.w $vr0, $vr0, 1
; CHECK-NEXT: fst.s $fa0, $a1, 0
; CHECK-NEXT: vstelm.w $vr0, $a1, 0, 1
; CHECK-NEXT: ret
%v = load volatile <4 x float>, ptr %src
%e = extractelement <4 x float> %v, i32 1
Expand All @@ -70,8 +65,7 @@ define void @extract_2xdouble(ptr %src, ptr %dst) nounwind {
; CHECK-LABEL: extract_2xdouble:
; CHECK: # %bb.0:
; CHECK-NEXT: vld $vr0, $a0, 0
; CHECK-NEXT: vreplvei.d $vr0, $vr0, 1
; CHECK-NEXT: fst.d $fa0, $a1, 0
; CHECK-NEXT: vstelm.d $vr0, $a1, 0, 1
; CHECK-NEXT: ret
%v = load volatile <2 x double>, ptr %src
%e = extractelement <2 x double> %v, i32 1
Expand Down Expand Up @@ -168,3 +162,18 @@ define void @extract_2xdouble_idx(ptr %src, ptr %dst, i32 %idx) nounwind {
store double %e, ptr %dst
ret void
}

define void @eliminate_frame_index(<4 x i32> %a) nounwind {
; CHECK-LABEL: eliminate_frame_index:
; CHECK: # %bb.0:
; CHECK-NEXT: addi.d $sp, $sp, -1040
; CHECK-NEXT: addi.d $a0, $sp, 524
; CHECK-NEXT: vstelm.w $vr0, $a0, 0, 1
; CHECK-NEXT: addi.d $sp, $sp, 1040
; CHECK-NEXT: ret
%1 = alloca [64 x [4 x i32]]
%2 = getelementptr i8, ptr %1, i64 508
%b = extractelement <4 x i32> %a, i64 1
store i32 %b, ptr %2
ret void
}
30 changes: 10 additions & 20 deletions llvm/test/CodeGen/LoongArch/lsx/vec-trunc.ll
Original file line number Diff line number Diff line change
Expand Up @@ -6,8 +6,7 @@ define void @load_trunc_2i64_to_2i32(ptr %ptr, ptr %dst) nounwind {
; CHECK: # %bb.0:
; CHECK-NEXT: vld $vr0, $a0, 0
; CHECK-NEXT: vshuf4i.w $vr0, $vr0, 8
; CHECK-NEXT: vpickve2gr.d $a0, $vr0, 0
; CHECK-NEXT: st.d $a0, $a1, 0
; CHECK-NEXT: vstelm.d $vr0, $a1, 0, 0
; CHECK-NEXT: ret
%a = load <2 x i64>, ptr %ptr
%trunc = trunc <2 x i64> %a to <2 x i32>
Expand All @@ -22,8 +21,7 @@ define void @load_trunc_2i64_to_2i16(ptr %ptr, ptr %dst) nounwind {
; CHECK-NEXT: pcalau12i $a0, %pc_hi20(.LCPI1_0)
; CHECK-NEXT: vld $vr1, $a0, %pc_lo12(.LCPI1_0)
; CHECK-NEXT: vshuf.h $vr1, $vr0, $vr0
; CHECK-NEXT: vpickve2gr.w $a0, $vr1, 0
; CHECK-NEXT: st.w $a0, $a1, 0
; CHECK-NEXT: vstelm.w $vr1, $a1, 0, 0
; CHECK-NEXT: ret
%a = load <2 x i64>, ptr %ptr
%trunc = trunc <2 x i64> %a to <2 x i16>
Expand All @@ -38,8 +36,7 @@ define void @load_trunc_2i64_to_2i8(ptr %ptr, ptr %dst) nounwind {
; CHECK-NEXT: pcalau12i $a0, %pc_hi20(.LCPI2_0)
; CHECK-NEXT: vld $vr1, $a0, %pc_lo12(.LCPI2_0)
; CHECK-NEXT: vshuf.b $vr0, $vr0, $vr0, $vr1
; CHECK-NEXT: vpickve2gr.h $a0, $vr0, 0
; CHECK-NEXT: st.h $a0, $a1, 0
; CHECK-NEXT: vstelm.h $vr0, $a1, 0, 0
; CHECK-NEXT: ret
%a = load <2 x i64>, ptr %ptr
%trunc = trunc <2 x i64> %a to <2 x i8>
Expand All @@ -52,8 +49,7 @@ define void @load_trunc_4i32_to_4i16(ptr %ptr, ptr %dst) nounwind {
; CHECK: # %bb.0:
; CHECK-NEXT: vld $vr0, $a0, 0
; CHECK-NEXT: vpickev.h $vr0, $vr0, $vr0
; CHECK-NEXT: vpickve2gr.d $a0, $vr0, 0
; CHECK-NEXT: st.d $a0, $a1, 0
; CHECK-NEXT: vstelm.d $vr0, $a1, 0, 0
; CHECK-NEXT: ret
%a = load <4 x i32>, ptr %ptr
%trunc = trunc <4 x i32> %a to <4 x i16>
Expand All @@ -68,8 +64,7 @@ define void @load_trunc_4i32_to_4i8(ptr %ptr, ptr %dst) nounwind {
; CHECK-NEXT: pcalau12i $a0, %pc_hi20(.LCPI4_0)
; CHECK-NEXT: vld $vr1, $a0, %pc_lo12(.LCPI4_0)
; CHECK-NEXT: vshuf.b $vr0, $vr0, $vr0, $vr1
; CHECK-NEXT: vpickve2gr.w $a0, $vr0, 0
; CHECK-NEXT: st.w $a0, $a1, 0
; CHECK-NEXT: vstelm.w $vr0, $a1, 0, 0
; CHECK-NEXT: ret
%a = load <4 x i32>, ptr %ptr
%trunc = trunc <4 x i32> %a to <4 x i8>
Expand All @@ -82,8 +77,7 @@ define void @load_trunc_8i16_to_8i8(ptr %ptr, ptr %dst) nounwind {
; CHECK: # %bb.0:
; CHECK-NEXT: vld $vr0, $a0, 0
; CHECK-NEXT: vpickev.b $vr0, $vr0, $vr0
; CHECK-NEXT: vpickve2gr.d $a0, $vr0, 0
; CHECK-NEXT: st.d $a0, $a1, 0
; CHECK-NEXT: vstelm.d $vr0, $a1, 0, 0
; CHECK-NEXT: ret
%a = load <8 x i16>, ptr %ptr
%trunc = trunc <8 x i16> %a to <8 x i8>
Expand All @@ -97,8 +91,7 @@ define void @load_trunc_2i32_to_2i16(ptr %ptr, ptr %dst) nounwind {
; CHECK-NEXT: ld.d $a0, $a0, 0
; CHECK-NEXT: vinsgr2vr.d $vr0, $a0, 0
; CHECK-NEXT: vshuf4i.h $vr0, $vr0, 8
; CHECK-NEXT: vpickve2gr.w $a0, $vr0, 0
; CHECK-NEXT: st.w $a0, $a1, 0
; CHECK-NEXT: vstelm.w $vr0, $a1, 0, 0
; CHECK-NEXT: ret
%a = load <2 x i32>, ptr %ptr
%trunc = trunc <2 x i32> %a to <2 x i16>
Expand All @@ -114,8 +107,7 @@ define void @load_trunc_2i32_to_2i8(ptr %ptr, ptr %dst) nounwind {
; CHECK-NEXT: vld $vr0, $a2, %pc_lo12(.LCPI7_0)
; CHECK-NEXT: vinsgr2vr.d $vr1, $a0, 0
; CHECK-NEXT: vshuf.b $vr0, $vr0, $vr1, $vr0
; CHECK-NEXT: vpickve2gr.h $a0, $vr0, 0
; CHECK-NEXT: st.h $a0, $a1, 0
; CHECK-NEXT: vstelm.h $vr0, $a1, 0, 0
; CHECK-NEXT: ret
%a = load <2 x i32>, ptr %ptr
%trunc = trunc <2 x i32> %a to <2 x i8>
Expand All @@ -129,8 +121,7 @@ define void @load_trunc_4i16_to_4i8(ptr %ptr, ptr %dst) nounwind {
; CHECK-NEXT: ld.d $a0, $a0, 0
; CHECK-NEXT: vinsgr2vr.d $vr0, $a0, 0
; CHECK-NEXT: vpickev.b $vr0, $vr0, $vr0
; CHECK-NEXT: vpickve2gr.w $a0, $vr0, 0
; CHECK-NEXT: st.w $a0, $a1, 0
; CHECK-NEXT: vstelm.w $vr0, $a1, 0, 0
; CHECK-NEXT: ret
%a = load <4 x i16>, ptr %ptr
%trunc = trunc <4 x i16> %a to <4 x i8>
Expand All @@ -144,8 +135,7 @@ define void @load_trunc_2i16_to_2i8(ptr %ptr, ptr %dst) nounwind {
; CHECK-NEXT: ld.w $a0, $a0, 0
; CHECK-NEXT: vinsgr2vr.w $vr0, $a0, 0
; CHECK-NEXT: vshuf4i.b $vr0, $vr0, 8
; CHECK-NEXT: vpickve2gr.h $a0, $vr0, 0
; CHECK-NEXT: st.h $a0, $a1, 0
; CHECK-NEXT: vstelm.h $vr0, $a1, 0, 0
; CHECK-NEXT: ret
%a = load <2 x i16>, ptr %ptr
%trunc = trunc <2 x i16> %a to <2 x i8>
Expand Down
Loading