Skip to content

Commit

Permalink
[X86][AVX] Add VMOVDDUP-VPBROADCASTQ execution domain mapping
Browse files Browse the repository at this point in the history
Noticed in D57514.

Differential Revision: https://reviews.llvm.org/D57519

llvm-svn: 352922
  • Loading branch information
RKSimon committed Feb 1, 2019
1 parent 33706e3 commit e95550f
Show file tree
Hide file tree
Showing 27 changed files with 261 additions and 298 deletions.
4 changes: 4 additions & 0 deletions llvm/lib/Target/X86/X86InstrInfo.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -6040,6 +6040,8 @@ static const uint16_t ReplaceableInstrs[][3] = {
{ X86::VBROADCASTSSZ256m, X86::VBROADCASTSSZ256m, X86::VPBROADCASTDZ256m },
{ X86::VBROADCASTSSZr, X86::VBROADCASTSSZr, X86::VPBROADCASTDZr },
{ X86::VBROADCASTSSZm, X86::VBROADCASTSSZm, X86::VPBROADCASTDZm },
{ X86::VMOVDDUPZ128rr, X86::VMOVDDUPZ128rr, X86::VPBROADCASTQZ128r },
{ X86::VMOVDDUPZ128rm, X86::VMOVDDUPZ128rm, X86::VPBROADCASTQZ128m },
{ X86::VBROADCASTSDZ256r, X86::VBROADCASTSDZ256r, X86::VPBROADCASTQZ256r },
{ X86::VBROADCASTSDZ256m, X86::VBROADCASTSDZ256m, X86::VPBROADCASTQZ256m },
{ X86::VBROADCASTSDZr, X86::VBROADCASTSDZr, X86::VPBROADCASTQZr },
Expand Down Expand Up @@ -6130,6 +6132,8 @@ static const uint16_t ReplaceableInstrsAVX2[][3] = {
{ X86::VPERM2F128rr, X86::VPERM2F128rr, X86::VPERM2I128rr },
{ X86::VBROADCASTSSrm, X86::VBROADCASTSSrm, X86::VPBROADCASTDrm},
{ X86::VBROADCASTSSrr, X86::VBROADCASTSSrr, X86::VPBROADCASTDrr},
{ X86::VMOVDDUPrm, X86::VMOVDDUPrm, X86::VPBROADCASTQrm},
{ X86::VMOVDDUPrr, X86::VMOVDDUPrr, X86::VPBROADCASTQrr},
{ X86::VBROADCASTSSYrr, X86::VBROADCASTSSYrr, X86::VPBROADCASTDYrr},
{ X86::VBROADCASTSSYrm, X86::VBROADCASTSSYrm, X86::VPBROADCASTDYrm},
{ X86::VBROADCASTSDYrr, X86::VBROADCASTSDYrr, X86::VPBROADCASTQYrr},
Expand Down
2 changes: 1 addition & 1 deletion llvm/test/CodeGen/X86/avx2-intrinsics-fast-isel.ll
Original file line number Diff line number Diff line change
Expand Up @@ -319,7 +319,7 @@ define <4 x i64> @test_mm256_broadcastd_epi32(<4 x i64> %a0) {
define <2 x i64> @test_mm_broadcastq_epi64(<2 x i64> %a0) {
; CHECK-LABEL: test_mm_broadcastq_epi64:
; CHECK: # %bb.0:
; CHECK-NEXT: vpbroadcastq %xmm0, %xmm0
; CHECK-NEXT: vmovddup {{.*#+}} xmm0 = xmm0[0,0]
; CHECK-NEXT: ret{{[l|q]}}
%res = shufflevector <2 x i64> %a0, <2 x i64> undef, <2 x i32> zeroinitializer
ret <2 x i64> %res
Expand Down
2 changes: 1 addition & 1 deletion llvm/test/CodeGen/X86/avx2-intrinsics-x86-upgrade.ll
Original file line number Diff line number Diff line change
Expand Up @@ -234,7 +234,7 @@ declare <8 x i32> @llvm.x86.avx2.pbroadcastd.256(<4 x i32>) nounwind readonly
define <2 x i64> @test_x86_avx2_pbroadcastq_128(<2 x i64> %a0) {
; CHECK-LABEL: test_x86_avx2_pbroadcastq_128:
; CHECK: ## %bb.0:
; CHECK-NEXT: vpbroadcastq %xmm0, %xmm0
; CHECK-NEXT: vmovddup {{.*#+}} xmm0 = xmm0[0,0]
; CHECK-NEXT: ret{{[l|q]}}
%res = call <2 x i64> @llvm.x86.avx2.pbroadcastq.128(<2 x i64> %a0)
ret <2 x i64> %res
Expand Down
20 changes: 10 additions & 10 deletions llvm/test/CodeGen/X86/avx2-vbroadcast.ll
Original file line number Diff line number Diff line change
Expand Up @@ -189,12 +189,12 @@ define <2 x i64> @Q64(i64* %ptr) nounwind uwtable readnone ssp {
; X32-LABEL: Q64:
; X32: ## %bb.0: ## %entry
; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
; X32-NEXT: vpbroadcastq (%eax), %xmm0
; X32-NEXT: vmovddup {{.*#+}} xmm0 = mem[0,0]
; X32-NEXT: retl
;
; X64-LABEL: Q64:
; X64: ## %bb.0: ## %entry
; X64-NEXT: vpbroadcastq (%rdi), %xmm0
; X64-NEXT: vmovddup {{.*#+}} xmm0 = mem[0,0]
; X64-NEXT: retq
entry:
%q = load i64, i64* %ptr, align 4
Expand Down Expand Up @@ -233,7 +233,7 @@ define <8 x i16> @broadcast_mem_v4i16_v8i16(<4 x i16>* %ptr) {
;
; X64-LABEL: broadcast_mem_v4i16_v8i16:
; X64: ## %bb.0:
; X64-NEXT: vpbroadcastq (%rdi), %xmm0
; X64-NEXT: vmovddup {{.*#+}} xmm0 = mem[0,0]
; X64-NEXT: retq
%load = load <4 x i16>, <4 x i16>* %ptr
%shuf = shufflevector <4 x i16> %load, <4 x i16> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3>
Expand Down Expand Up @@ -471,7 +471,7 @@ define <2 x i64> @load_splat_2i64_2i64_1111(<2 x i64>* %ptr) nounwind uwtable re
;
; X64-LABEL: load_splat_2i64_2i64_1111:
; X64: ## %bb.0: ## %entry
; X64-NEXT: vpbroadcastq 8(%rdi), %xmm0
; X64-NEXT: vmovddup {{.*#+}} xmm0 = mem[0,0]
; X64-NEXT: retq
entry:
%ld = load <2 x i64>, <2 x i64>* %ptr
Expand Down Expand Up @@ -865,12 +865,12 @@ define <4 x i64> @_inreg4xi64(<4 x i64> %a) {
define <2 x i64> @_inreg2xi64(<2 x i64> %a) {
; X32-LABEL: _inreg2xi64:
; X32: ## %bb.0:
; X32-NEXT: vpbroadcastq %xmm0, %xmm0
; X32-NEXT: vmovddup {{.*#+}} xmm0 = xmm0[0,0]
; X32-NEXT: retl
;
; X64-LABEL: _inreg2xi64:
; X64: ## %bb.0:
; X64-NEXT: vpbroadcastq %xmm0, %xmm0
; X64-NEXT: vmovddup {{.*#+}} xmm0 = xmm0[0,0]
; X64-NEXT: retq
%b = shufflevector <2 x i64> %a, <2 x i64> undef, <2 x i32> zeroinitializer
ret <2 x i64> %b
Expand Down Expand Up @@ -1327,19 +1327,19 @@ define void @isel_crash_2q(i64* %cV_R.addr) {
; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
; X32-NEXT: vxorps %xmm0, %xmm0, %xmm0
; X32-NEXT: vmovaps %xmm0, (%esp)
; X32-NEXT: vpbroadcastq (%eax), %xmm1
; X32-NEXT: vmovddup {{.*#+}} xmm1 = mem[0,0]
; X32-NEXT: vmovaps %xmm0, {{[0-9]+}}(%esp)
; X32-NEXT: vmovdqa %xmm1, {{[0-9]+}}(%esp)
; X32-NEXT: vmovaps %xmm1, {{[0-9]+}}(%esp)
; X32-NEXT: addl $60, %esp
; X32-NEXT: retl
;
; X64-LABEL: isel_crash_2q:
; X64: ## %bb.0: ## %entry
; X64-NEXT: vxorps %xmm0, %xmm0, %xmm0
; X64-NEXT: vmovaps %xmm0, -{{[0-9]+}}(%rsp)
; X64-NEXT: vpbroadcastq (%rdi), %xmm1
; X64-NEXT: vmovddup {{.*#+}} xmm1 = mem[0,0]
; X64-NEXT: vmovaps %xmm0, -{{[0-9]+}}(%rsp)
; X64-NEXT: vmovdqa %xmm1, -{{[0-9]+}}(%rsp)
; X64-NEXT: vmovaps %xmm1, -{{[0-9]+}}(%rsp)
; X64-NEXT: retq
entry:
%__a.addr.i = alloca <2 x i64>, align 16
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2324,7 +2324,7 @@ define <16 x i32> @test_masked_z_i32_to_16_mem_mask3(i32* %p, <16 x i32> %mask)
define <2 x i64> @test_i64_to_2_mem(i64* %p) {
; CHECK-LABEL: test_i64_to_2_mem:
; CHECK: # %bb.0:
; CHECK-NEXT: vpbroadcastq (%rdi), %xmm0
; CHECK-NEXT: vmovddup {{.*#+}} xmm0 = mem[0,0]
; CHECK-NEXT: retq
%s = load i64, i64* %p
%vec = insertelement <2 x i64> undef, i64 %s, i32 0
Expand Down
4 changes: 2 additions & 2 deletions llvm/test/CodeGen/X86/avx512-shuffles/broadcast-vector-int.ll
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
define <4 x i32> @test_2xi32_to_4xi32(<4 x i32> %vec) {
; CHECK-LABEL: test_2xi32_to_4xi32:
; CHECK: # %bb.0:
; CHECK-NEXT: vpbroadcastq %xmm0, %xmm0
; CHECK-NEXT: vmovddup {{.*#+}} xmm0 = xmm0[0,0]
; CHECK-NEXT: retq
%res = shufflevector <4 x i32> %vec, <4 x i32> undef, <4 x i32> <i32 0, i32 1, i32 0, i32 1>
ret <4 x i32> %res
Expand Down Expand Up @@ -318,7 +318,7 @@ define <16 x i32> @test_masked_z_2xi32_to_16xi32_mask3(<16 x i32> %vec, <16 x i3
define <4 x i32> @test_2xi32_to_4xi32_mem(<2 x i32>* %vp) {
; CHECK-LABEL: test_2xi32_to_4xi32_mem:
; CHECK: # %bb.0:
; CHECK-NEXT: vpbroadcastq (%rdi), %xmm0
; CHECK-NEXT: vmovddup {{.*#+}} xmm0 = mem[0,0]
; CHECK-NEXT: retq
%vec = load <2 x i32>, <2 x i32>* %vp
%res = shufflevector <2 x i32> %vec, <2 x i32> undef, <4 x i32> <i32 0, i32 1, i32 0, i32 1>
Expand Down
6 changes: 3 additions & 3 deletions llvm/test/CodeGen/X86/avx512-shuffles/partial_permute.ll
Original file line number Diff line number Diff line change
Expand Up @@ -1160,9 +1160,9 @@ define <4 x i32> @test_masked_z_8xi32_to_4xi32_perm_mem_mask2(<8 x i32>* %vp, <4
define <4 x i32> @test_8xi32_to_4xi32_perm_mem_mask3(<8 x i32>* %vp) {
; CHECK-LABEL: test_8xi32_to_4xi32_perm_mem_mask3:
; CHECK: # %bb.0:
; CHECK-NEXT: vpshufd {{.*#+}} xmm0 = mem[1,1,2,3]
; CHECK-NEXT: vpbroadcastq 8(%rdi), %xmm1
; CHECK-NEXT: vpblendd {{.*#+}} xmm0 = xmm0[0],xmm1[1,2],xmm0[3]
; CHECK-NEXT: vpermilps {{.*#+}} xmm0 = mem[1,1,2,3]
; CHECK-NEXT: vmovddup {{.*#+}} xmm1 = mem[0,0]
; CHECK-NEXT: vblendps {{.*#+}} xmm0 = xmm0[0],xmm1[1,2],xmm0[3]
; CHECK-NEXT: retq
%vec = load <8 x i32>, <8 x i32>* %vp
%res = shufflevector <8 x i32> %vec, <8 x i32> undef, <4 x i32> <i32 5, i32 3, i32 2, i32 7>
Expand Down
2 changes: 1 addition & 1 deletion llvm/test/CodeGen/X86/avx512vl-intrinsics-fast-isel.ll
Original file line number Diff line number Diff line change
Expand Up @@ -2104,7 +2104,7 @@ define <4 x i64> @test_mm256_maskz_broadcastd_epi32(i8 %a0, <2 x i64> %a1) {
define <2 x i64> @test_mm_broadcastq_epi64(<2 x i64> %a0) {
; CHECK-LABEL: test_mm_broadcastq_epi64:
; CHECK: # %bb.0:
; CHECK-NEXT: vpbroadcastq %xmm0, %xmm0
; CHECK-NEXT: vmovddup {{.*#+}} xmm0 = xmm0[0,0]
; CHECK-NEXT: ret{{[l|q]}}
%res = shufflevector <2 x i64> %a0, <2 x i64> undef, <2 x i32> zeroinitializer
ret <2 x i64> %res
Expand Down
5 changes: 3 additions & 2 deletions llvm/test/CodeGen/X86/avx512vl-intrinsics-upgrade.ll
Original file line number Diff line number Diff line change
Expand Up @@ -3910,8 +3910,9 @@ define <2 x i64> @test_mask_andnot_epi64_rmb_128(<2 x i64> %a, i64* %ptr_b) {
; X86-LABEL: test_mask_andnot_epi64_rmb_128:
; X86: # %bb.0:
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
; X86-NEXT: vpbroadcastq (%eax), %xmm1 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x59,0x08]
; X86-NEXT: vpandn %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xdf,0xc1]
; X86-NEXT: vmovddup (%eax), %xmm1 # EVEX TO VEX Compression encoding: [0xc5,0xfb,0x12,0x08]
; X86-NEXT: # xmm1 = mem[0,0]
; X86-NEXT: vandnps %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf8,0x55,0xc1]
; X86-NEXT: retl # encoding: [0xc3]
;
; X64-LABEL: test_mask_andnot_epi64_rmb_128:
Expand Down
12 changes: 5 additions & 7 deletions llvm/test/CodeGen/X86/broadcast-elm-cross-splat-vec.ll
Original file line number Diff line number Diff line change
Expand Up @@ -95,8 +95,7 @@ define <16 x i8> @f16xi8_i64(<16 x i8> %a) {
;
; ALL32-LABEL: f16xi8_i64:
; ALL32: # %bb.0:
; ALL32-NEXT: vmovddup {{.*#+}} xmm1 = [7.9499288951273625E-275,7.9499288951273625E-275]
; ALL32-NEXT: # xmm1 = mem[0,0]
; ALL32-NEXT: vpbroadcastq {{.*#+}} xmm1 = [7.9499288951273625E-275,7.9499288951273625E-275]
; ALL32-NEXT: vpaddb %xmm1, %xmm0, %xmm0
; ALL32-NEXT: vpand %xmm1, %xmm0, %xmm0
; ALL32-NEXT: retl
Expand Down Expand Up @@ -692,8 +691,7 @@ define <8 x i16> @f8xi16_i64(<8 x i16> %a) {
;
; ALL32-LABEL: f8xi16_i64:
; ALL32: # %bb.0:
; ALL32-NEXT: vmovddup {{.*#+}} xmm1 = [4.1720559249406128E-309,4.1720559249406128E-309]
; ALL32-NEXT: # xmm1 = mem[0,0]
; ALL32-NEXT: vpbroadcastq {{.*#+}} xmm1 = [4.1720559249406128E-309,4.1720559249406128E-309]
; ALL32-NEXT: vpaddw %xmm1, %xmm0, %xmm0
; ALL32-NEXT: vpand %xmm1, %xmm0, %xmm0
; ALL32-NEXT: retl
Expand Down Expand Up @@ -1147,8 +1145,7 @@ define <4 x i32> @f4xi32_i64(<4 x i32> %a) {
;
; ALL32-LABEL: f4xi32_i64:
; ALL32: # %bb.0:
; ALL32-NEXT: vmovddup {{.*#+}} xmm1 = [2.1219957909652723E-314,2.1219957909652723E-314]
; ALL32-NEXT: # xmm1 = mem[0,0]
; ALL32-NEXT: vpbroadcastq {{.*#+}} xmm1 = [2.1219957909652723E-314,2.1219957909652723E-314]
; ALL32-NEXT: vpaddd %xmm1, %xmm0, %xmm0
; ALL32-NEXT: vpand %xmm1, %xmm0, %xmm0
; ALL32-NEXT: retl
Expand Down Expand Up @@ -1624,7 +1621,8 @@ define <4 x float> @f4xf32_f64(<4 x float> %a) {
;
; ALL64-LABEL: f4xf32_f64:
; ALL64: # %bb.0:
; ALL64-NEXT: vpbroadcastq {{.*#+}} xmm1 = [4575657222482165760,4575657222482165760]
; ALL64-NEXT: vmovddup {{.*#+}} xmm1 = [4575657222482165760,4575657222482165760]
; ALL64-NEXT: # xmm1 = mem[0,0]
; ALL64-NEXT: vaddps %xmm1, %xmm0, %xmm0
; ALL64-NEXT: vdivps %xmm0, %xmm1, %xmm0
; ALL64-NEXT: retq
Expand Down
13 changes: 4 additions & 9 deletions llvm/test/CodeGen/X86/insert-loaded-scalar.ll
Original file line number Diff line number Diff line change
Expand Up @@ -178,15 +178,10 @@ define <2 x i64> @load64_ins_eltc_v2i64(i64* %p) nounwind {
; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,1,0,1]
; SSE-NEXT: retq
;
; AVX1-LABEL: load64_ins_eltc_v2i64:
; AVX1: # %bb.0:
; AVX1-NEXT: vmovddup {{.*#+}} xmm0 = mem[0,0]
; AVX1-NEXT: retq
;
; AVX2-LABEL: load64_ins_eltc_v2i64:
; AVX2: # %bb.0:
; AVX2-NEXT: vpbroadcastq (%rdi), %xmm0
; AVX2-NEXT: retq
; AVX-LABEL: load64_ins_eltc_v2i64:
; AVX: # %bb.0:
; AVX-NEXT: vmovddup {{.*#+}} xmm0 = mem[0,0]
; AVX-NEXT: retq
%x = load i64, i64* %p
%ins = insertelement <2 x i64> undef, i64 %x, i32 1
ret <2 x i64> %ins
Expand Down
13 changes: 4 additions & 9 deletions llvm/test/CodeGen/X86/insertelement-var-index.ll
Original file line number Diff line number Diff line change
Expand Up @@ -203,15 +203,10 @@ define <2 x i64> @load_i64_v2i64(i64* %p, i32 %y) nounwind {
; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,1,0,1]
; SSE-NEXT: retq
;
; AVX1-LABEL: load_i64_v2i64:
; AVX1: # %bb.0:
; AVX1-NEXT: vmovddup {{.*#+}} xmm0 = mem[0,0]
; AVX1-NEXT: retq
;
; AVX2-LABEL: load_i64_v2i64:
; AVX2: # %bb.0:
; AVX2-NEXT: vpbroadcastq (%rdi), %xmm0
; AVX2-NEXT: retq
; AVX-LABEL: load_i64_v2i64:
; AVX: # %bb.0:
; AVX-NEXT: vmovddup {{.*#+}} xmm0 = mem[0,0]
; AVX-NEXT: retq
%x = load i64, i64* %p
%ins = insertelement <2 x i64> undef, i64 %x, i32 %y
ret <2 x i64> %ins
Expand Down
6 changes: 3 additions & 3 deletions llvm/test/CodeGen/X86/oddshuffles.ll
Original file line number Diff line number Diff line change
Expand Up @@ -1673,7 +1673,7 @@ define void @interleave_24i32_in(<24 x i32>* %p, <8 x i32>* %q1, <8 x i32>* %q2,
; XOP-LABEL: interleave_24i32_in:
; XOP: # %bb.0:
; XOP-NEXT: vmovupd (%rsi), %ymm0
; XOP-NEXT: vmovupd (%rcx), %ymm1
; XOP-NEXT: vmovups (%rcx), %ymm1
; XOP-NEXT: vmovups 16(%rcx), %xmm2
; XOP-NEXT: vmovups (%rdx), %xmm3
; XOP-NEXT: vmovups 16(%rdx), %xmm4
Expand Down Expand Up @@ -1744,8 +1744,8 @@ define <2 x double> @wrongorder(<4 x double> %A, <8 x double>* %P) #0 {
; AVX2-LABEL: wrongorder:
; AVX2: # %bb.0:
; AVX2-NEXT: vbroadcastsd %xmm0, %ymm1
; AVX2-NEXT: vmovapd %ymm1, 32(%rdi)
; AVX2-NEXT: vmovapd %ymm1, (%rdi)
; AVX2-NEXT: vmovaps %ymm1, 32(%rdi)
; AVX2-NEXT: vmovaps %ymm1, (%rdi)
; AVX2-NEXT: vmovddup {{.*#+}} xmm0 = xmm0[0,0]
; AVX2-NEXT: vzeroupper
; AVX2-NEXT: retq
Expand Down
16 changes: 8 additions & 8 deletions llvm/test/CodeGen/X86/sse2-intrinsics-fast-isel.ll
Original file line number Diff line number Diff line change
Expand Up @@ -5363,15 +5363,15 @@ define void @test_mm_store_pd1(double *%a0, <2 x double> %a1) {
; X86-AVX1-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
; X86-AVX1-NEXT: vmovddup %xmm0, %xmm0 # encoding: [0xc5,0xfb,0x12,0xc0]
; X86-AVX1-NEXT: # xmm0 = xmm0[0,0]
; X86-AVX1-NEXT: vmovapd %xmm0, (%eax) # encoding: [0xc5,0xf9,0x29,0x00]
; X86-AVX1-NEXT: vmovaps %xmm0, (%eax) # encoding: [0xc5,0xf8,0x29,0x00]
; X86-AVX1-NEXT: retl # encoding: [0xc3]
;
; X86-AVX512-LABEL: test_mm_store_pd1:
; X86-AVX512: # %bb.0:
; X86-AVX512-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
; X86-AVX512-NEXT: vmovddup %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xfb,0x12,0xc0]
; X86-AVX512-NEXT: # xmm0 = xmm0[0,0]
; X86-AVX512-NEXT: vmovapd %xmm0, (%eax) # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x29,0x00]
; X86-AVX512-NEXT: vmovaps %xmm0, (%eax) # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x29,0x00]
; X86-AVX512-NEXT: retl # encoding: [0xc3]
;
; X64-SSE-LABEL: test_mm_store_pd1:
Expand All @@ -5385,14 +5385,14 @@ define void @test_mm_store_pd1(double *%a0, <2 x double> %a1) {
; X64-AVX1: # %bb.0:
; X64-AVX1-NEXT: vmovddup %xmm0, %xmm0 # encoding: [0xc5,0xfb,0x12,0xc0]
; X64-AVX1-NEXT: # xmm0 = xmm0[0,0]
; X64-AVX1-NEXT: vmovapd %xmm0, (%rdi) # encoding: [0xc5,0xf9,0x29,0x07]
; X64-AVX1-NEXT: vmovaps %xmm0, (%rdi) # encoding: [0xc5,0xf8,0x29,0x07]
; X64-AVX1-NEXT: retq # encoding: [0xc3]
;
; X64-AVX512-LABEL: test_mm_store_pd1:
; X64-AVX512: # %bb.0:
; X64-AVX512-NEXT: vmovddup %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xfb,0x12,0xc0]
; X64-AVX512-NEXT: # xmm0 = xmm0[0,0]
; X64-AVX512-NEXT: vmovapd %xmm0, (%rdi) # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x29,0x07]
; X64-AVX512-NEXT: vmovaps %xmm0, (%rdi) # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x29,0x07]
; X64-AVX512-NEXT: retq # encoding: [0xc3]
%arg0 = bitcast double * %a0 to <2 x double>*
%shuf = shufflevector <2 x double> %a1, <2 x double> undef, <2 x i32> zeroinitializer
Expand Down Expand Up @@ -5489,15 +5489,15 @@ define void @test_mm_store1_pd(double *%a0, <2 x double> %a1) {
; X86-AVX1-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
; X86-AVX1-NEXT: vmovddup %xmm0, %xmm0 # encoding: [0xc5,0xfb,0x12,0xc0]
; X86-AVX1-NEXT: # xmm0 = xmm0[0,0]
; X86-AVX1-NEXT: vmovapd %xmm0, (%eax) # encoding: [0xc5,0xf9,0x29,0x00]
; X86-AVX1-NEXT: vmovaps %xmm0, (%eax) # encoding: [0xc5,0xf8,0x29,0x00]
; X86-AVX1-NEXT: retl # encoding: [0xc3]
;
; X86-AVX512-LABEL: test_mm_store1_pd:
; X86-AVX512: # %bb.0:
; X86-AVX512-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
; X86-AVX512-NEXT: vmovddup %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xfb,0x12,0xc0]
; X86-AVX512-NEXT: # xmm0 = xmm0[0,0]
; X86-AVX512-NEXT: vmovapd %xmm0, (%eax) # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x29,0x00]
; X86-AVX512-NEXT: vmovaps %xmm0, (%eax) # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x29,0x00]
; X86-AVX512-NEXT: retl # encoding: [0xc3]
;
; X64-SSE-LABEL: test_mm_store1_pd:
Expand All @@ -5511,14 +5511,14 @@ define void @test_mm_store1_pd(double *%a0, <2 x double> %a1) {
; X64-AVX1: # %bb.0:
; X64-AVX1-NEXT: vmovddup %xmm0, %xmm0 # encoding: [0xc5,0xfb,0x12,0xc0]
; X64-AVX1-NEXT: # xmm0 = xmm0[0,0]
; X64-AVX1-NEXT: vmovapd %xmm0, (%rdi) # encoding: [0xc5,0xf9,0x29,0x07]
; X64-AVX1-NEXT: vmovaps %xmm0, (%rdi) # encoding: [0xc5,0xf8,0x29,0x07]
; X64-AVX1-NEXT: retq # encoding: [0xc3]
;
; X64-AVX512-LABEL: test_mm_store1_pd:
; X64-AVX512: # %bb.0:
; X64-AVX512-NEXT: vmovddup %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xfb,0x12,0xc0]
; X64-AVX512-NEXT: # xmm0 = xmm0[0,0]
; X64-AVX512-NEXT: vmovapd %xmm0, (%rdi) # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x29,0x07]
; X64-AVX512-NEXT: vmovaps %xmm0, (%rdi) # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x29,0x07]
; X64-AVX512-NEXT: retq # encoding: [0xc3]
%arg0 = bitcast double * %a0 to <2 x double>*
%shuf = shufflevector <2 x double> %a1, <2 x double> undef, <2 x i32> zeroinitializer
Expand Down
18 changes: 4 additions & 14 deletions llvm/test/CodeGen/X86/subvector-broadcast.ll
Original file line number Diff line number Diff line change
Expand Up @@ -1562,20 +1562,10 @@ define <4 x i32> @test_2xi32_to_4xi32_mem(<2 x i32>* %vp) {
; X32-NEXT: vmovddup {{.*#+}} xmm0 = mem[0,0]
; X32-NEXT: retl
;
; X64-AVX1-LABEL: test_2xi32_to_4xi32_mem:
; X64-AVX1: # %bb.0:
; X64-AVX1-NEXT: vmovddup {{.*#+}} xmm0 = mem[0,0]
; X64-AVX1-NEXT: retq
;
; X64-AVX2-LABEL: test_2xi32_to_4xi32_mem:
; X64-AVX2: # %bb.0:
; X64-AVX2-NEXT: vpbroadcastq (%rdi), %xmm0
; X64-AVX2-NEXT: retq
;
; X64-AVX512-LABEL: test_2xi32_to_4xi32_mem:
; X64-AVX512: # %bb.0:
; X64-AVX512-NEXT: vpbroadcastq (%rdi), %xmm0
; X64-AVX512-NEXT: retq
; X64-LABEL: test_2xi32_to_4xi32_mem:
; X64: # %bb.0:
; X64-NEXT: vmovddup {{.*#+}} xmm0 = mem[0,0]
; X64-NEXT: retq
%vec = load <2 x i32>, <2 x i32>* %vp
%res = shufflevector <2 x i32> %vec, <2 x i32> undef, <4 x i32> <i32 0, i32 1, i32 0, i32 1>
ret <4 x i32> %res
Expand Down
Loading

0 comments on commit e95550f

Please sign in to comment.