Skip to content

Commit

Permalink
[CIR][CIRGen][Builtin][Neon] Lower neon_vhadd_v and neon_vhaddq_v (#1038
Browse files Browse the repository at this point in the history
)
  • Loading branch information
ghehg authored Nov 2, 2024
1 parent aff1719 commit 07deb51
Show file tree
Hide file tree
Showing 2 changed files with 162 additions and 96 deletions.
6 changes: 6 additions & 0 deletions clang/lib/CIR/CodeGen/CIRGenBuiltinAArch64.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2442,6 +2442,12 @@ mlir::Value CIRGenFunction::buildCommonNeonBuiltinExpr(
: "llvm.aarch64.neon.srhadd";
break;
}
case NEON::BI__builtin_neon_vhadd_v:
case NEON::BI__builtin_neon_vhaddq_v: {
intrincsName = (intrinicId != altLLVMIntrinsic) ? "llvm.aarch64.neon.uhadd"
: "llvm.aarch64.neon.shadd";
break;
}
case NEON::BI__builtin_neon_vqmovun_v: {
intrincsName = "llvm.aarch64.neon.sqxtun";
argTypes.push_back(builder.getExtendedOrTruncatedElementVectorType(
Expand Down
252 changes: 156 additions & 96 deletions clang/test/CIR/CodeGen/AArch64/neon.c
Original file line number Diff line number Diff line change
Expand Up @@ -2568,113 +2568,173 @@ float64x2_t test_vabdq_f64(float64x2_t v1, float64x2_t v2) {
// return vcltq_f64(v1, v2);
// }

// NYI-LABEL: @test_vhadd_s8(
// NYI: [[VHADD_V_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.shadd.v8i8(<8 x i8> %v1, <8 x i8> %v2)
// NYI: ret <8 x i8> [[VHADD_V_I]]
// int8x8_t test_vhadd_s8(int8x8_t v1, int8x8_t v2) {
// return vhadd_s8(v1, v2);
// }
int8x8_t test_vhadd_s8(int8x8_t v1, int8x8_t v2) {
return vhadd_s8(v1, v2);

// NYI-LABEL: @test_vhadd_s16(
// NYI: [[TMP0:%.*]] = bitcast <4 x i16> %v1 to <8 x i8>
// NYI: [[TMP1:%.*]] = bitcast <4 x i16> %v2 to <8 x i8>
// NYI: [[VHADD_V2_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.shadd.v4i16(<4 x i16> %v1, <4 x i16> %v2)
// NYI: [[VHADD_V3_I:%.*]] = bitcast <4 x i16> [[VHADD_V2_I]] to <8 x i8>
// NYI: ret <4 x i16> [[VHADD_V2_I]]
// int16x4_t test_vhadd_s16(int16x4_t v1, int16x4_t v2) {
// return vhadd_s16(v1, v2);
// }
// CIR-LABEL: vhadd_s8
// CIR: {{%.*}} = cir.llvm.intrinsic "llvm.aarch64.neon.shadd" {{%.*}}, {{%.*}} :
// CIR-SAME: (!cir.vector<!s8i x 8>, !cir.vector<!s8i x 8>) -> !cir.vector<!s8i x 8>

// NYI-LABEL: @test_vhadd_s32(
// NYI: [[TMP0:%.*]] = bitcast <2 x i32> %v1 to <8 x i8>
// NYI: [[TMP1:%.*]] = bitcast <2 x i32> %v2 to <8 x i8>
// NYI: [[VHADD_V2_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.shadd.v2i32(<2 x i32> %v1, <2 x i32> %v2)
// NYI: [[VHADD_V3_I:%.*]] = bitcast <2 x i32> [[VHADD_V2_I]] to <8 x i8>
// NYI: ret <2 x i32> [[VHADD_V2_I]]
// int32x2_t test_vhadd_s32(int32x2_t v1, int32x2_t v2) {
// return vhadd_s32(v1, v2);
// }
// LLVM: {{.*}}test_vhadd_s8(<8 x i8>{{.*}}[[V1:%.*]], <8 x i8>{{.*}}[[V2:%.*]])
// LLVM: [[VHADD_V_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.shadd.v8i8(<8 x i8> [[V1]], <8 x i8> [[V2]])
// LLVM: ret <8 x i8> [[VHADD_V_I]]
}

// NYI-LABEL: @test_vhadd_u8(
// NYI: [[VHADD_V_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.uhadd.v8i8(<8 x i8> %v1, <8 x i8> %v2)
// NYI: ret <8 x i8> [[VHADD_V_I]]
// uint8x8_t test_vhadd_u8(uint8x8_t v1, uint8x8_t v2) {
// return vhadd_u8(v1, v2);
// }
int16x4_t test_vhadd_s16(int16x4_t v1, int16x4_t v2) {
return vhadd_s16(v1, v2);

// NYI-LABEL: @test_vhadd_u16(
// NYI: [[TMP0:%.*]] = bitcast <4 x i16> %v1 to <8 x i8>
// NYI: [[TMP1:%.*]] = bitcast <4 x i16> %v2 to <8 x i8>
// NYI: [[VHADD_V2_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.uhadd.v4i16(<4 x i16> %v1, <4 x i16> %v2)
// NYI: [[VHADD_V3_I:%.*]] = bitcast <4 x i16> [[VHADD_V2_I]] to <8 x i8>
// NYI: ret <4 x i16> [[VHADD_V2_I]]
// uint16x4_t test_vhadd_u16(uint16x4_t v1, uint16x4_t v2) {
// return vhadd_u16(v1, v2);
// }
// CIR-LABEL: vhadd_s16
// CIR: {{%.*}} = cir.llvm.intrinsic "llvm.aarch64.neon.shadd" {{%.*}}, {{%.*}} :
// CIR-SAME: (!cir.vector<!s16i x 4>, !cir.vector<!s16i x 4>) -> !cir.vector<!s16i x 4>

// NYI-LABEL: @test_vhadd_u32(
// NYI: [[TMP0:%.*]] = bitcast <2 x i32> %v1 to <8 x i8>
// NYI: [[TMP1:%.*]] = bitcast <2 x i32> %v2 to <8 x i8>
// NYI: [[VHADD_V2_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.uhadd.v2i32(<2 x i32> %v1, <2 x i32> %v2)
// NYI: [[VHADD_V3_I:%.*]] = bitcast <2 x i32> [[VHADD_V2_I]] to <8 x i8>
// NYI: ret <2 x i32> [[VHADD_V2_I]]
// uint32x2_t test_vhadd_u32(uint32x2_t v1, uint32x2_t v2) {
// return vhadd_u32(v1, v2);
// }
// LLVM: {{.*}}test_vhadd_s16(<4 x i16>{{.*}}[[V1:%.*]], <4 x i16>{{.*}}[[V2:%.*]])
// LLVM: [[TMP0:%.*]] = bitcast <4 x i16> [[V1]] to <8 x i8>
// LLVM: [[TMP1:%.*]] = bitcast <4 x i16> [[V2]] to <8 x i8>
// LLVM: [[VHADD_V2_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.shadd.v4i16(<4 x i16> [[V1]], <4 x i16> [[V2]])
// LLVM: [[VHADD_V3_I:%.*]] = bitcast <4 x i16> [[VHADD_V2_I]] to <8 x i8>
// LLVM: ret <4 x i16> [[VHADD_V2_I]]
}

// NYI-LABEL: @test_vhaddq_s8(
// NYI: [[VHADDQ_V_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.shadd.v16i8(<16 x i8> %v1, <16 x i8> %v2)
// NYI: ret <16 x i8> [[VHADDQ_V_I]]
// int8x16_t test_vhaddq_s8(int8x16_t v1, int8x16_t v2) {
// return vhaddq_s8(v1, v2);
// }
int32x2_t test_vhadd_s32(int32x2_t v1, int32x2_t v2) {
return vhadd_s32(v1, v2);

// NYI-LABEL: @test_vhaddq_s16(
// NYI: [[TMP0:%.*]] = bitcast <8 x i16> %v1 to <16 x i8>
// NYI: [[TMP1:%.*]] = bitcast <8 x i16> %v2 to <16 x i8>
// NYI: [[VHADDQ_V2_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.shadd.v8i16(<8 x i16> %v1, <8 x i16> %v2)
// NYI: [[VHADDQ_V3_I:%.*]] = bitcast <8 x i16> [[VHADDQ_V2_I]] to <16 x i8>
// NYI: ret <8 x i16> [[VHADDQ_V2_I]]
// int16x8_t test_vhaddq_s16(int16x8_t v1, int16x8_t v2) {
// return vhaddq_s16(v1, v2);
// }
// CIR-LABEL: vhadd_s32
// CIR: {{%.*}} = cir.llvm.intrinsic "llvm.aarch64.neon.shadd" {{%.*}}, {{%.*}} :
// CIR-SAME: (!cir.vector<!s32i x 2>, !cir.vector<!s32i x 2>) -> !cir.vector<!s32i x 2>

// NYI-LABEL: @test_vhaddq_s32(
// NYI: [[TMP0:%.*]] = bitcast <4 x i32> %v1 to <16 x i8>
// NYI: [[TMP1:%.*]] = bitcast <4 x i32> %v2 to <16 x i8>
// NYI: [[VHADDQ_V2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.shadd.v4i32(<4 x i32> %v1, <4 x i32> %v2)
// NYI: [[VHADDQ_V3_I:%.*]] = bitcast <4 x i32> [[VHADDQ_V2_I]] to <16 x i8>
// NYI: ret <4 x i32> [[VHADDQ_V2_I]]
// int32x4_t test_vhaddq_s32(int32x4_t v1, int32x4_t v2) {
// return vhaddq_s32(v1, v2);
// }
// LLVM: {{.*}}test_vhadd_s32(<2 x i32>{{.*}}[[V1:%.*]], <2 x i32>{{.*}}[[V2:%.*]])
// LLVM: [[TMP0:%.*]] = bitcast <2 x i32> [[V1]] to <8 x i8>
// LLVM: [[TMP1:%.*]] = bitcast <2 x i32> [[V2]] to <8 x i8>
// LLVM: [[VHADD_V2_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.shadd.v2i32(<2 x i32> [[V1]], <2 x i32> [[V2]])
// LLVM: [[VHADD_V3_I:%.*]] = bitcast <2 x i32> [[VHADD_V2_I]] to <8 x i8>
// LLVM: ret <2 x i32> [[VHADD_V2_I]]
}

// NYI-LABEL: @test_vhaddq_u8(
// NYI: [[VHADDQ_V_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.uhadd.v16i8(<16 x i8> %v1, <16 x i8> %v2)
// NYI: ret <16 x i8> [[VHADDQ_V_I]]
// uint8x16_t test_vhaddq_u8(uint8x16_t v1, uint8x16_t v2) {
// return vhaddq_u8(v1, v2);
// }
uint8x8_t test_vhadd_u8(uint8x8_t v1, uint8x8_t v2) {
return vhadd_u8(v1, v2);

// NYI-LABEL: @test_vhaddq_u16(
// NYI: [[TMP0:%.*]] = bitcast <8 x i16> %v1 to <16 x i8>
// NYI: [[TMP1:%.*]] = bitcast <8 x i16> %v2 to <16 x i8>
// NYI: [[VHADDQ_V2_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.uhadd.v8i16(<8 x i16> %v1, <8 x i16> %v2)
// NYI: [[VHADDQ_V3_I:%.*]] = bitcast <8 x i16> [[VHADDQ_V2_I]] to <16 x i8>
// NYI: ret <8 x i16> [[VHADDQ_V2_I]]
// uint16x8_t test_vhaddq_u16(uint16x8_t v1, uint16x8_t v2) {
// return vhaddq_u16(v1, v2);
// }
// CIR-LABEL: vhadd_u8
// CIR: {{%.*}} = cir.llvm.intrinsic "llvm.aarch64.neon.uhadd" {{%.*}}, {{%.*}} :
// CIR-SAME: (!cir.vector<!u8i x 8>, !cir.vector<!u8i x 8>) -> !cir.vector<!u8i x 8>

// NYI-LABEL: @test_vhaddq_u32(
// NYI: [[TMP0:%.*]] = bitcast <4 x i32> %v1 to <16 x i8>
// NYI: [[TMP1:%.*]] = bitcast <4 x i32> %v2 to <16 x i8>
// NYI: [[VHADDQ_V2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.uhadd.v4i32(<4 x i32> %v1, <4 x i32> %v2)
// NYI: [[VHADDQ_V3_I:%.*]] = bitcast <4 x i32> [[VHADDQ_V2_I]] to <16 x i8>
// NYI: ret <4 x i32> [[VHADDQ_V2_I]]
// uint32x4_t test_vhaddq_u32(uint32x4_t v1, uint32x4_t v2) {
// return vhaddq_u32(v1, v2);
// }
// LLVM: {{.*}}test_vhadd_u8(<8 x i8>{{.*}}[[V1:%.*]], <8 x i8>{{.*}}[[V2:%.*]])
// LLVM: [[VHADD_V_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.uhadd.v8i8(<8 x i8> [[V1]], <8 x i8> [[V2]])
// LLVM: ret <8 x i8> [[VHADD_V_I]]
}

uint16x4_t test_vhadd_u16(uint16x4_t v1, uint16x4_t v2) {
return vhadd_u16(v1, v2);

// CIR-LABEL: vhadd_u16
// CIR: {{%.*}} = cir.llvm.intrinsic "llvm.aarch64.neon.uhadd" {{%.*}}, {{%.*}} :
// CIR-SAME: (!cir.vector<!u16i x 4>, !cir.vector<!u16i x 4>) -> !cir.vector<!u16i x 4>

// LLVM: {{.*}}test_vhadd_u16(<4 x i16>{{.*}}[[V1:%.*]], <4 x i16>{{.*}}[[V2:%.*]])
// LLVM: [[TMP0:%.*]] = bitcast <4 x i16> [[V1]] to <8 x i8>
// LLVM: [[TMP1:%.*]] = bitcast <4 x i16> [[V2]] to <8 x i8>
// LLVM: [[VHADD_V2_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.uhadd.v4i16(<4 x i16> [[V1]], <4 x i16> [[V2]])
// LLVM: [[VHADD_V3_I:%.*]] = bitcast <4 x i16> [[VHADD_V2_I]] to <8 x i8>
// LLVM: ret <4 x i16> [[VHADD_V2_I]]
}

uint32x2_t test_vhadd_u32(uint32x2_t v1, uint32x2_t v2) {
return vhadd_u32(v1, v2);

// CIR-LABEL: vhadd_u32
// CIR: {{%.*}} = cir.llvm.intrinsic "llvm.aarch64.neon.uhadd" {{%.*}}, {{%.*}} :
// CIR-SAME: (!cir.vector<!u32i x 2>, !cir.vector<!u32i x 2>) -> !cir.vector<!u32i x 2>

// LLVM: {{.*}}test_vhadd_u32(<2 x i32>{{.*}}[[V1:%.*]], <2 x i32>{{.*}}[[V2:%.*]])
// LLVM: [[TMP0:%.*]] = bitcast <2 x i32> [[V1]] to <8 x i8>
// LLVM: [[TMP1:%.*]] = bitcast <2 x i32> [[V2]] to <8 x i8>
// LLVM: [[VHADD_V2_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.uhadd.v2i32(<2 x i32> [[V1]], <2 x i32> [[V2]])
// LLVM: [[VHADD_V3_I:%.*]] = bitcast <2 x i32> [[VHADD_V2_I]] to <8 x i8>
// LLVM: ret <2 x i32> [[VHADD_V2_I]]
}

int8x16_t test_vhaddq_s8(int8x16_t v1, int8x16_t v2) {
return vhaddq_s8(v1, v2);

// CIR-LABEL: vhaddq_s8
// CIR: {{%.*}} = cir.llvm.intrinsic "llvm.aarch64.neon.shadd" {{%.*}}, {{%.*}} :
// CIR-SAME: (!cir.vector<!s8i x 16>, !cir.vector<!s8i x 16>) -> !cir.vector<!s8i x 16>

// LLVM: {{.*}}test_vhaddq_s8(<16 x i8>{{.*}}[[V1:%.*]], <16 x i8>{{.*}}[[V2:%.*]])
// LLVM: [[VHADD_V_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.shadd.v16i8(<16 x i8> [[V1]], <16 x i8> [[V2]])
// LLVM: ret <16 x i8> [[VHADD_V_I]]
}

int16x8_t test_vhaddq_s16(int16x8_t v1, int16x8_t v2) {
return vhaddq_s16(v1, v2);

// CIR-LABEL: vhaddq_s16
// CIR: {{%.*}} = cir.llvm.intrinsic "llvm.aarch64.neon.shadd" {{%.*}}, {{%.*}} :
// CIR-SAME: (!cir.vector<!s16i x 8>, !cir.vector<!s16i x 8>) -> !cir.vector<!s16i x 8>

// LLVM: {{.*}}test_vhaddq_s16(<8 x i16>{{.*}}[[V1:%.*]], <8 x i16>{{.*}}[[V2:%.*]])
// LLVM: [[TMP0:%.*]] = bitcast <8 x i16> [[V1]] to <16 x i8>
// LLVM: [[TMP1:%.*]] = bitcast <8 x i16> [[V2]] to <16 x i8>
// LLVM: [[VHADD_V2_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.shadd.v8i16(<8 x i16> [[V1]], <8 x i16> [[V2]])
// LLVM: [[VHADD_V3_I:%.*]] = bitcast <8 x i16> [[VHADD_V2_I]] to <16 x i8>
// LLVM: ret <8 x i16> [[VHADD_V2_I]]
}

int32x4_t test_vhaddq_s32(int32x4_t v1, int32x4_t v2) {
return vhaddq_s32(v1, v2);

// CIR-LABEL: vhaddq_s32
// CIR: {{%.*}} = cir.llvm.intrinsic "llvm.aarch64.neon.shadd" {{%.*}}, {{%.*}} :
// CIR-SAME: (!cir.vector<!s32i x 4>, !cir.vector<!s32i x 4>) -> !cir.vector<!s32i x 4>

// LLVM: {{.*}}test_vhaddq_s32(<4 x i32>{{.*}}[[V1:%.*]], <4 x i32>{{.*}}[[V2:%.*]])
// LLVM: [[TMP0:%.*]] = bitcast <4 x i32> [[V1]] to <16 x i8>
// LLVM: [[TMP1:%.*]] = bitcast <4 x i32> [[V2]] to <16 x i8>
// LLVM: [[VHADD_V2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.shadd.v4i32(<4 x i32> [[V1]], <4 x i32> [[V2]])
// LLVM: [[VHADD_V3_I:%.*]] = bitcast <4 x i32> [[VHADD_V2_I]] to <16 x i8>
// LLVM: ret <4 x i32> [[VHADD_V2_I]]
}

uint8x16_t test_vhaddq_u8(uint8x16_t v1, uint8x16_t v2) {
return vhaddq_u8(v1, v2);

// CIR-LABEL: vhaddq_u8
// CIR: {{%.*}} = cir.llvm.intrinsic "llvm.aarch64.neon.uhadd" {{%.*}}, {{%.*}} :
// CIR-SAME: (!cir.vector<!u8i x 16>, !cir.vector<!u8i x 16>) -> !cir.vector<!u8i x 16>

// LLVM: {{.*}}test_vhaddq_u8(<16 x i8>{{.*}}[[V1:%.*]], <16 x i8>{{.*}}[[V2:%.*]])
// LLVM: [[VHADD_V_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.uhadd.v16i8(<16 x i8> [[V1]], <16 x i8> [[V2]])
// LLVM: ret <16 x i8> [[VHADD_V_I]]
}

uint16x8_t test_vhaddq_u16(uint16x8_t v1, uint16x8_t v2) {
return vhaddq_u16(v1, v2);

// CIR-LABEL: vhaddq_u16
// CIR: {{%.*}} = cir.llvm.intrinsic "llvm.aarch64.neon.uhadd" {{%.*}}, {{%.*}} :
// CIR-SAME: (!cir.vector<!u16i x 8>, !cir.vector<!u16i x 8>) -> !cir.vector<!u16i x 8>

// LLVM: {{.*}}test_vhaddq_u16(<8 x i16>{{.*}}[[V1:%.*]], <8 x i16>{{.*}}[[V2:%.*]])
// LLVM: [[TMP0:%.*]] = bitcast <8 x i16> [[V1]] to <16 x i8>
// LLVM: [[TMP1:%.*]] = bitcast <8 x i16> [[V2]] to <16 x i8>
// LLVM: [[VHADD_V2_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.uhadd.v8i16(<8 x i16> [[V1]], <8 x i16> [[V2]])
// LLVM: [[VHADD_V3_I:%.*]] = bitcast <8 x i16> [[VHADD_V2_I]] to <16 x i8>
// LLVM: ret <8 x i16> [[VHADD_V2_I]]
}

uint32x4_t test_vhaddq_u32(uint32x4_t v1, uint32x4_t v2) {
return vhaddq_u32(v1, v2);

// CIR-LABEL: vhaddq_u32
// CIR: {{%.*}} = cir.llvm.intrinsic "llvm.aarch64.neon.uhadd" {{%.*}}, {{%.*}} :
// CIR-SAME: (!cir.vector<!u32i x 4>, !cir.vector<!u32i x 4>) -> !cir.vector<!u32i x 4>

// LLVM: {{.*}}test_vhaddq_u32(<4 x i32>{{.*}}[[V1:%.*]], <4 x i32>{{.*}}[[V2:%.*]])
// LLVM: [[TMP0:%.*]] = bitcast <4 x i32> [[V1]] to <16 x i8>
// LLVM: [[TMP1:%.*]] = bitcast <4 x i32> [[V2]] to <16 x i8>
// LLVM: [[VHADD_V2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.uhadd.v4i32(<4 x i32> [[V1]], <4 x i32> [[V2]])
// LLVM: [[VHADD_V3_I:%.*]] = bitcast <4 x i32> [[VHADD_V2_I]] to <16 x i8>
// LLVM: ret <4 x i32> [[VHADD_V2_I]]
}

// NYI-LABEL: @test_vhsub_s8(
// NYI: [[VHSUB_V_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.shsub.v8i8(<8 x i8> %v1, <8 x i8> %v2)
Expand Down

0 comments on commit 07deb51

Please sign in to comment.