Skip to content

Commit

Permalink
[Clang][XTHeadVector] Add vector unit-stride segment load/store intri…
Browse files Browse the repository at this point in the history
…nsics (llvm#56)

* [Clang][XTHeadVector] Add `th_vlseg<nf>e<eew>_v`

* [Clang][XTHeadVector] Add `th_vlseg<nf>e<eew>ff_v`

* [Clang][XTHeadVector] Add `th_vlseg<nf><b/h/w>_v`

* [Clang][XTHeadVector] Fix Unit-stride segment load

* [Clang][XTHeadVector] Test Unit-stride segment load

* [Clang][XTHeadVector] Add Unit-stride segment store

* [Clang][XTHeadVector] Test Unit-stride segment store

* [NFC][XTHeadVector] Update README
  • Loading branch information
imkiva authored Jan 19, 2024
1 parent c5122da commit 220cd17
Show file tree
Hide file tree
Showing 61 changed files with 4,344 additions and 4 deletions.
2 changes: 2 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -52,6 +52,8 @@ Any feature not listed below but present in the specification should be consider
- (Done) `7.2. Vector Strided Load/Store Operations`
- (Done) `7.3. Vector Indexed Load/Store Operations`
- (Done) `7.4 Unit-stride Fault-Only-First Loads Operations`
- (WIP) `7.5. Vector Load/Store Segment Operations (Zvlsseg)`
- (Done) `7.5.1. Vector Unit-Stride Segment Loads and Stores`

## Q & A

Expand Down
215 changes: 215 additions & 0 deletions clang/include/clang/Basic/riscv_vector_xtheadv.td
Original file line number Diff line number Diff line change
Expand Up @@ -58,6 +58,7 @@ multiclass RVVIntBinBuiltinSet
: RVVSignedBinBuiltinSet,
RVVUnsignedBinBuiltinSet;

defvar NFList = [2, 3, 4, 5, 6, 7, 8];
defvar TypeList = ["c", "s", "i", "l", "x", "f", "d"];
defvar EEWList = [["8", "(Log2EEW:3)"],
["16", "(Log2EEW:4)"],
Expand Down Expand Up @@ -560,6 +561,220 @@ defm th_vle16ff: RVVVLEFFBuiltin<"th_vleff", ["s","x"]>; // i16, f16
defm th_vle32ff: RVVVLEFFBuiltin<"th_vleff", ["i", "f"]>; // i32, f32
defm th_vle64ff: RVVVLEFFBuiltin<"th_vleff", ["l", "d"]>; // i64, f64

// 7.5.1 Vector Unit-stride Segment Loads (Zvlsseg)

multiclass RVVUSSegLoad<string ir, string bhwe, bit with_eew, list<string> types> {
foreach type = types in {
defvar eew = !cond(!eq(type, "c") : "8",
!eq(type, "s") : "16",
!eq(type, "i") : "32",
!eq(type, "l") : "64",
!eq(type, "x") : "16",
!eq(type, "f") : "32",
!eq(type, "d") : "64");
foreach nf = NFList in {
let Name = ir # nf # bhwe # !if(with_eew, eew, "") # "_v",
IRName = ir # nf # bhwe,
MaskedIRName = ir # nf # bhwe # "_mask",
NF = nf,
ManualCodegen = [{
{
llvm::Type *ElementVectorType = cast<StructType>(ResultType)->elements()[0];
IntrinsicTypes = {ElementVectorType, Ops.back()->getType()};
SmallVector<llvm::Value*, 12> Operands;

bool NoPassthru =
(IsMasked && (PolicyAttrs & RVV_VTA) && (PolicyAttrs & RVV_VMA)) |
(!IsMasked && (PolicyAttrs & RVV_VTA));
unsigned Offset = IsMasked ? NoPassthru ? 1 : 2 : NoPassthru ? 0 : 1;

if (NoPassthru) { // Push poison into passthru
Operands.append(NF, llvm::PoisonValue::get(ElementVectorType));
} else { // Push intrinsics operands into passthru
llvm::Value *PassthruOperand = IsMasked ? Ops[1] : Ops[0];
for (unsigned I = 0; I < NF; ++I)
Operands.push_back(Builder.CreateExtractValue(PassthruOperand, {I}));
}

Operands.push_back(Ops[Offset]); // Ptr
if (IsMasked)
Operands.push_back(Ops[0]);
Operands.push_back(Ops[Offset + 1]); // VL
if (IsMasked)
Operands.push_back(ConstantInt::get(Ops.back()->getType(), PolicyAttrs));

llvm::Function *F = CGM.getIntrinsic(ID, IntrinsicTypes);

llvm::Value *LoadValue = Builder.CreateCall(F, Operands, "");
if (ReturnValue.isNull())
return LoadValue;
else
return Builder.CreateStore(LoadValue, ReturnValue.getValue());
}
}] in {
defvar T = "(Tuple:" # nf # ")";
def : RVVBuiltin<T # "v", T # "vPCe", type>;
if !not(IsFloat<type>.val) then {
def : RVVBuiltin<T # "Uv", T # "UvPCUe", type>;
}
}
}
}
}

// 7.5.1 Vector Unit-stride Segment Stores (Zvlsseg)
multiclass RVVUSSegStore<string ir, string bhwe, bit with_eew, list<string> types> {
foreach type = types in {
defvar eew = !cond(!eq(type, "c") : "8",
!eq(type, "s") : "16",
!eq(type, "i") : "32",
!eq(type, "l") : "64",
!eq(type, "x") : "16",
!eq(type, "f") : "32",
!eq(type, "d") : "64");
foreach nf = NFList in {
let Name = ir # nf # bhwe # !if(with_eew, eew, "") # "_v",
IRName = ir # nf # bhwe,
MaskedIRName = ir # nf # bhwe # "_mask",
NF = nf,
HasMaskedOffOperand = false,
ManualCodegen = [{
{
// Masked
// Builtin: (mask, ptr, v_tuple, vl)
// Intrinsic: (val0, val1, ..., ptr, mask, vl)
// Unmasked
// Builtin: (ptr, v_tuple, vl)
// Intrinsic: (val0, val1, ..., ptr, vl)
unsigned Offset = IsMasked ? 1 : 0;
llvm::Value *VTupleOperand = Ops[Offset + 1];

SmallVector<llvm::Value*, 12> Operands;
for (unsigned I = 0; I < NF; ++I) {
llvm::Value *V = Builder.CreateExtractValue(VTupleOperand, {I});
Operands.push_back(V);
}
Operands.push_back(Ops[Offset]); // Ptr
if (IsMasked)
Operands.push_back(Ops[0]);
Operands.push_back(Ops[Offset + 2]); // VL

IntrinsicTypes = {Operands[0]->getType(), Operands.back()->getType()};
llvm::Function *F = CGM.getIntrinsic(ID, IntrinsicTypes);
return Builder.CreateCall(F, Operands, "");
}
}] in {
defvar T = "(Tuple:" # nf # ")";
def : RVVBuiltin<T # "v", "0Pe" # T # "v", type>;
if !not(IsFloat<type>.val) then {
def : RVVBuiltin<T # "Uv", "0PUe" # T # "Uv", type>;
}
}
}
}
}

// 7.5.1 Vector Unit-stride Segment Loads Fault-Only-First (Zvlsseg)
multiclass RVVUSSegLoadFF<string ir, string bhwe, bit with_eew, list<string> types> {
foreach type = types in {
defvar eew = !cond(!eq(type, "c") : "8",
!eq(type, "s") : "16",
!eq(type, "i") : "32",
!eq(type, "l") : "64",
!eq(type, "x") : "16",
!eq(type, "f") : "32",
!eq(type, "d") : "64");
foreach nf = NFList in {
let Name = ir # nf # bhwe # !if(with_eew, eew, "") # "ff_v",
IRName = ir # nf # bhwe # "ff",
MaskedIRName = ir # nf # "eff_mask",
NF = nf,
ManualCodegen = [{
{
llvm::Type *ElementVectorType = cast<StructType>(ResultType)->elements()[0];
IntrinsicTypes = {ElementVectorType, Ops.back()->getType()};
SmallVector<llvm::Value*, 12> Operands;

bool NoPassthru =
(IsMasked && (PolicyAttrs & RVV_VTA) && (PolicyAttrs & RVV_VMA)) |
(!IsMasked && (PolicyAttrs & RVV_VTA));
unsigned Offset = IsMasked ? NoPassthru ? 1 : 2 : NoPassthru ? 0 : 1;

if (NoPassthru) { // Push poison into passthru
Operands.append(NF, llvm::PoisonValue::get(ElementVectorType));
} else { // Push intrinsics operands into passthru
llvm::Value *PassthruOperand = IsMasked ? Ops[1] : Ops[0];
for (unsigned I = 0; I < NF; ++I)
Operands.push_back(Builder.CreateExtractValue(PassthruOperand, {I}));
}

Operands.push_back(Ops[Offset]); // Ptr
if (IsMasked)
Operands.push_back(Ops[0]);
Operands.push_back(Ops[Offset + 2]); // vl
if (IsMasked)
Operands.push_back(ConstantInt::get(Ops.back()->getType(), PolicyAttrs));

llvm::Function *F = CGM.getIntrinsic(ID, IntrinsicTypes);

llvm::Value *LoadValue = Builder.CreateCall(F, Operands, "");
// Get alignment from the new vl operand
clang::CharUnits Align =
CGM.getNaturalPointeeTypeAlignment(E->getArg(Offset + 1)->getType());

llvm::Value *ReturnTuple = llvm::PoisonValue::get(ResultType);
for (unsigned I = 0; I < NF; ++I) {
llvm::Value *V = Builder.CreateExtractValue(LoadValue, {I});
ReturnTuple = Builder.CreateInsertValue(ReturnTuple, V, {I});
}

// Store new_vl
llvm::Value *V = Builder.CreateExtractValue(LoadValue, {NF});
Builder.CreateStore(V, Address(Ops[Offset + 1], V->getType(), Align));

if (ReturnValue.isNull())
return ReturnTuple;
else
return Builder.CreateStore(ReturnTuple, ReturnValue.getValue());
}
}] in {
defvar T = "(Tuple:" # nf # ")";
def : RVVBuiltin<T # "v", T # "vPCePz", type>;
if !not(IsFloat<type>.val) then {
def : RVVBuiltin<T # "Uv", T # "UvPCUePz", type>;
}
}
}
}
}

// 7.5. Vector Load/Store Segment Operations (Zvlsseg)

let UnMaskedPolicyScheme = HasPassthruOperand,
IsTuple = true in {
// Unit-stride segment load
defm : RVVUSSegLoad<"th_vlseg", "b", 0, TypeList>;
defm : RVVUSSegLoad<"th_vlseg", "h", 0, TypeList>;
defm : RVVUSSegLoad<"th_vlseg", "w", 0, TypeList>;
defm : RVVUSSegLoad<"th_vlseg", "e", 1, TypeList>;
defm : RVVUSSegLoadFF<"th_vlseg", "e", 1, TypeList>;

// TODO: indexed segment load
// defm : RVVSSegLoadEEW<"th_vlsseg", TypeList>;
}

let UnMaskedPolicyScheme = NonePolicy,
MaskedPolicyScheme = NonePolicy,
IsTuple = true in {
defm : RVVUSSegStore<"th_vsseg", "b", 0, TypeList>;
defm : RVVUSSegStore<"th_vsseg", "h", 0, TypeList>;
defm : RVVUSSegStore<"th_vsseg", "w", 0, TypeList>;
defm : RVVUSSegStore<"th_vsseg", "e", 1, TypeList>;

// TODO: indexed segment store
// defm : RVVSSegStoreEEW<"th_vssseg", TypeList>;
}

//===----------------------------------------------------------------------===//
// 12. Vector Integer Arithmetic Operations
//===----------------------------------------------------------------------===//
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
// RUN: %clang_cc1 -triple riscv64 -target-feature +xtheadvector \
// RUN: -disable-O0-optnone -emit-llvm %s -o - | \
// RUN: opt -S -passes=mem2reg | \
// RUN: FileCheck --check-prefix=CHECK-RV64 %s

#include <riscv_vector.h>

// CHECK-RV64-LABEL: define dso_local { <vscale x 8 x i8>, <vscale x 8 x i8> } @test_th_vlseg2b_v_i8m1x2
// CHECK-RV64-SAME: (ptr noundef [[BASE:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0:[0-9]+]] {
// CHECK-RV64-NEXT: entry:
// CHECK-RV64-NEXT: [[TMP0:%.*]] = call { <vscale x 8 x i8>, <vscale x 8 x i8> } @llvm.riscv.th.vlseg2b.nxv8i8.i64(<vscale x 8 x i8> poison, <vscale x 8 x i8> poison, ptr [[BASE]], i64 [[VL]])
// CHECK-RV64-NEXT: ret { <vscale x 8 x i8>, <vscale x 8 x i8> } [[TMP0]]
//
vint8m1x2_t test_th_vlseg2b_v_i8m1x2(const int8_t *base, size_t vl) {
return __riscv_th_vlseg2b_v_i8m1x2(base, vl);
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,96 @@
// RUN: %clang_cc1 -triple riscv64 -target-feature +xtheadvector \
// RUN: -disable-O0-optnone -emit-llvm %s -o - | \
// RUN: opt -S -passes=mem2reg | \
// RUN: FileCheck --check-prefix=CHECK-RV64 %s

#include <riscv_vector.h>

// CHECK-RV64-LABEL: define dso_local { <vscale x 4 x half>, <vscale x 4 x half> } @test_th_vlseg2e16_v_f16m1x2
// CHECK-RV64-SAME: (ptr noundef [[BASE:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0:[0-9]+]] {
// CHECK-RV64-NEXT: entry:
// CHECK-RV64-NEXT: [[TMP0:%.*]] = call { <vscale x 4 x half>, <vscale x 4 x half> } @llvm.riscv.th.vlseg2e.nxv4f16.i64(<vscale x 4 x half> poison, <vscale x 4 x half> poison, ptr [[BASE]], i64 [[VL]])
// CHECK-RV64-NEXT: ret { <vscale x 4 x half>, <vscale x 4 x half> } [[TMP0]]
//
vfloat16m1x2_t test_th_vlseg2e16_v_f16m1x2(const _Float16 *base, size_t vl) {
return __riscv_th_vlseg2e16_v_f16m1x2(base, vl);
}

// CHECK-RV64-LABEL: define dso_local { <vscale x 8 x half>, <vscale x 8 x half> } @test_th_vlseg2e16_v_f16m2x2
// CHECK-RV64-SAME: (ptr noundef [[BASE:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0:[0-9]+]] {
// CHECK-RV64-NEXT: entry:
// CHECK-RV64-NEXT: [[TMP0:%.*]] = call { <vscale x 8 x half>, <vscale x 8 x half> } @llvm.riscv.th.vlseg2e.nxv8f16.i64(<vscale x 8 x half> poison, <vscale x 8 x half> poison, ptr [[BASE]], i64 [[VL]])
// CHECK-RV64-NEXT: ret { <vscale x 8 x half>, <vscale x 8 x half> } [[TMP0]]
//
vfloat16m2x2_t test_th_vlseg2e16_v_f16m2x2(const _Float16 *base, size_t vl) {
return __riscv_th_vlseg2e16_v_f16m2x2(base, vl);
}

// CHECK-RV64-LABEL: define dso_local { <vscale x 16 x half>, <vscale x 16 x half> } @test_th_vlseg2e16_v_f16m4x2
// CHECK-RV64-SAME: (ptr noundef [[BASE:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0:[0-9]+]] {
// CHECK-RV64-NEXT: entry:
// CHECK-RV64-NEXT: [[TMP0:%.*]] = call { <vscale x 16 x half>, <vscale x 16 x half> } @llvm.riscv.th.vlseg2e.nxv16f16.i64(<vscale x 16 x half> poison, <vscale x 16 x half> poison, ptr [[BASE]], i64 [[VL]])
// CHECK-RV64-NEXT: ret { <vscale x 16 x half>, <vscale x 16 x half> } [[TMP0]]
//
vfloat16m4x2_t test_th_vlseg2e16_v_f16m4x2(const _Float16 *base, size_t vl) {
return __riscv_th_vlseg2e16_v_f16m4x2(base, vl);
}

// CHECK-RV64-LABEL: define dso_local { <vscale x 4 x i16>, <vscale x 4 x i16> } @test_th_vlseg2e16_v_i16m1x2
// CHECK-RV64-SAME: (ptr noundef [[BASE:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0:[0-9]+]] {
// CHECK-RV64-NEXT: entry:
// CHECK-RV64-NEXT: [[TMP0:%.*]] = call { <vscale x 4 x i16>, <vscale x 4 x i16> } @llvm.riscv.th.vlseg2e.nxv4i16.i64(<vscale x 4 x i16> poison, <vscale x 4 x i16> poison, ptr [[BASE]], i64 [[VL]])
// CHECK-RV64-NEXT: ret { <vscale x 4 x i16>, <vscale x 4 x i16> } [[TMP0]]
//
vint16m1x2_t test_th_vlseg2e16_v_i16m1x2(const int16_t *base, size_t vl) {
return __riscv_th_vlseg2e16_v_i16m1x2(base, vl);
}

// CHECK-RV64-LABEL: define dso_local { <vscale x 8 x i16>, <vscale x 8 x i16> } @test_th_vlseg2e16_v_i16m2x2
// CHECK-RV64-SAME: (ptr noundef [[BASE:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0:[0-9]+]] {
// CHECK-RV64-NEXT: entry:
// CHECK-RV64-NEXT: [[TMP0:%.*]] = call { <vscale x 8 x i16>, <vscale x 8 x i16> } @llvm.riscv.th.vlseg2e.nxv8i16.i64(<vscale x 8 x i16> poison, <vscale x 8 x i16> poison, ptr [[BASE]], i64 [[VL]])
// CHECK-RV64-NEXT: ret { <vscale x 8 x i16>, <vscale x 8 x i16> } [[TMP0]]
//
vint16m2x2_t test_th_vlseg2e16_v_i16m2x2(const int16_t *base, size_t vl) {
return __riscv_th_vlseg2e16_v_i16m2x2(base, vl);
}

// CHECK-RV64-LABEL: define dso_local { <vscale x 16 x i16>, <vscale x 16 x i16> } @test_th_vlseg2e16_v_i16m4x2
// CHECK-RV64-SAME: (ptr noundef [[BASE:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0:[0-9]+]] {
// CHECK-RV64-NEXT: entry:
// CHECK-RV64-NEXT: [[TMP0:%.*]] = call { <vscale x 16 x i16>, <vscale x 16 x i16> } @llvm.riscv.th.vlseg2e.nxv16i16.i64(<vscale x 16 x i16> poison, <vscale x 16 x i16> poison, ptr [[BASE]], i64 [[VL]])
// CHECK-RV64-NEXT: ret { <vscale x 16 x i16>, <vscale x 16 x i16> } [[TMP0]]
//
vint16m4x2_t test_th_vlseg2e16_v_i16m4x2(const int16_t *base, size_t vl) {
return __riscv_th_vlseg2e16_v_i16m4x2(base, vl);
}

// CHECK-RV64-LABEL: define dso_local { <vscale x 4 x i16>, <vscale x 4 x i16> } @test_th_vlseg2e16_v_u16m1x2
// CHECK-RV64-SAME: (ptr noundef [[BASE:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0:[0-9]+]] {
// CHECK-RV64-NEXT: entry:
// CHECK-RV64-NEXT: [[TMP0:%.*]] = call { <vscale x 4 x i16>, <vscale x 4 x i16> } @llvm.riscv.th.vlseg2e.nxv4i16.i64(<vscale x 4 x i16> poison, <vscale x 4 x i16> poison, ptr [[BASE]], i64 [[VL]])
// CHECK-RV64-NEXT: ret { <vscale x 4 x i16>, <vscale x 4 x i16> } [[TMP0]]
//
vuint16m1x2_t test_th_vlseg2e16_v_u16m1x2(const uint16_t *base, size_t vl) {
return __riscv_th_vlseg2e16_v_u16m1x2(base, vl);
}

// CHECK-RV64-LABEL: define dso_local { <vscale x 8 x i16>, <vscale x 8 x i16> } @test_th_vlseg2e16_v_u16m2x2
// CHECK-RV64-SAME: (ptr noundef [[BASE:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0:[0-9]+]] {
// CHECK-RV64-NEXT: entry:
// CHECK-RV64-NEXT: [[TMP0:%.*]] = call { <vscale x 8 x i16>, <vscale x 8 x i16> } @llvm.riscv.th.vlseg2e.nxv8i16.i64(<vscale x 8 x i16> poison, <vscale x 8 x i16> poison, ptr [[BASE]], i64 [[VL]])
// CHECK-RV64-NEXT: ret { <vscale x 8 x i16>, <vscale x 8 x i16> } [[TMP0]]
//
vuint16m2x2_t test_th_vlseg2e16_v_u16m2x2(const uint16_t *base, size_t vl) {
return __riscv_th_vlseg2e16_v_u16m2x2(base, vl);
}

// CHECK-RV64-LABEL: define dso_local { <vscale x 16 x i16>, <vscale x 16 x i16> } @test_th_vlseg2e16_v_u16m4x2
// CHECK-RV64-SAME: (ptr noundef [[BASE:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0:[0-9]+]] {
// CHECK-RV64-NEXT: entry:
// CHECK-RV64-NEXT: [[TMP0:%.*]] = call { <vscale x 16 x i16>, <vscale x 16 x i16> } @llvm.riscv.th.vlseg2e.nxv16i16.i64(<vscale x 16 x i16> poison, <vscale x 16 x i16> poison, ptr [[BASE]], i64 [[VL]])
// CHECK-RV64-NEXT: ret { <vscale x 16 x i16>, <vscale x 16 x i16> } [[TMP0]]
//
vuint16m4x2_t test_th_vlseg2e16_v_u16m4x2(const uint16_t *base, size_t vl) {
return __riscv_th_vlseg2e16_v_u16m4x2(base, vl);
}
Loading

0 comments on commit 220cd17

Please sign in to comment.