Unite template verion and non-template version

FantasqueX · FantasqueX · commit 5259c84d597b · 2025-03-05T15:44:20.000+08:00
diff --git a/clang/lib/CIR/CodeGen/CIRGenBuiltinAArch64.cpp b/clang/lib/CIR/CodeGen/CIRGenBuiltinAArch64.cpp
@@ -2226,12 +2226,19 @@ static mlir::Value vecReduceIntValue(CIRGenFunction &cgf, mlir::Value val,
       loc, val, builder.getConstInt(loc, cgf.SizeTy, 0));
 }
 
-static void prepareNeonCallArgs(CIRGenBuilderTy &builder,
-                                llvm::SmallVector<mlir::Type> argTypes,
-                                llvm::SmallVectorImpl<mlir::Value> &args,
-                                mlir::Location loc,
-                                bool isConstrainedFPIntrinsic, unsigned shift,
-                                bool rightshift) {
+template <typename Operation>
+static mlir::Value emitNeonCallToOp(
+    CIRGenBuilderTy &builder, llvm::SmallVector<mlir::Type> argTypes,
+    llvm::SmallVectorImpl<mlir::Value> &args,
+    std::optional<llvm::StringRef> intrinsicName, mlir::Type funcResTy,
+    mlir::Location loc, bool isConstrainedFPIntrinsic = false,
+    unsigned shift = 0, bool rightshift = false) {
+  // TODO: Consider removing the following unreachable when we have
+  // emitConstrainedFPCall feature implemented
+  assert(!cir::MissingFeatures::emitConstrainedFPCall());
+  if (isConstrainedFPIntrinsic)
+    llvm_unreachable("isConstrainedFPIntrinsic NYI");
+
   for (unsigned j = 0; j < argTypes.size(); ++j) {
     if (isConstrainedFPIntrinsic) {
       assert(!cir::MissingFeatures::emitConstrainedFPCall());
@@ -2244,56 +2251,19 @@ static void prepareNeonCallArgs(CIRGenBuilderTy &builder,
       args[j] = builder.createBitcast(args[j], argTypes[j]);
     }
   }
-}
-
-static mlir::Value emitNeonCall(CIRGenBuilderTy &builder,
-                                llvm::SmallVector<mlir::Type> argTypes,
-                                llvm::SmallVectorImpl<mlir::Value> &args,
-                                llvm::StringRef intrinsicName,
-                                mlir::Type funcResTy, mlir::Location loc,
-                                bool isConstrainedFPIntrinsic = false,
-                                unsigned shift = 0, bool rightshift = false) {
-  // TODO: Consider removing the following unreachable when we have
-  // emitConstrainedFPCall feature implemented
-  assert(!cir::MissingFeatures::emitConstrainedFPCall());
-  if (isConstrainedFPIntrinsic)
-    llvm_unreachable("isConstrainedFPIntrinsic NYI");
-
-  prepareNeonCallArgs(builder, std::move(argTypes), args, loc,
-                      isConstrainedFPIntrinsic, shift, rightshift);
 
   if (isConstrainedFPIntrinsic) {
     assert(!cir::MissingFeatures::emitConstrainedFPCall());
     return nullptr;
   }
-  return builder
-      .create<cir::LLVMIntrinsicCallOp>(
-          loc, builder.getStringAttr(intrinsicName), funcResTy, args)
-      .getResult();
-}
-
-// This one is similar to the function above, except for creating an operation
-// from template instead of creating a LLVMIntrinsicCallOp
-template <typename Operation>
-static mlir::Value
-emitNeonCall(CIRGenBuilderTy &builder, llvm::SmallVector<mlir::Type> argTypes,
-             llvm::SmallVectorImpl<mlir::Value> &args, mlir::Type funcResTy,
-             mlir::Location loc, bool isConstrainedFPIntrinsic = false,
-             unsigned shift = 0, bool rightshift = false) {
-  // TODO: Consider removing the following unreachable when we have
-  // emitConstrainedFPCall feature implemented
-  assert(!cir::MissingFeatures::emitConstrainedFPCall());
-  if (isConstrainedFPIntrinsic)
-    llvm_unreachable("isConstrainedFPIntrinsic NYI");
-
-  prepareNeonCallArgs(builder, std::move(argTypes), args, loc,
-                      isConstrainedFPIntrinsic, shift, rightshift);
-
-  if (isConstrainedFPIntrinsic) {
-    assert(!cir::MissingFeatures::emitConstrainedFPCall());
-    return nullptr;
+  if constexpr (std::is_same_v<Operation, cir::LLVMIntrinsicCallOp>) {
+    return builder
+        .create<Operation>(loc, builder.getStringAttr(intrinsicName.value()),
+                           funcResTy, args)
+        .getResult();
+  } else {
+    return builder.create<Operation>(loc, funcResTy, args).getResult();
   }
-  return builder.create<Operation>(loc, funcResTy, args).getResult();
 }
 
 /// This function `emitCommonNeonCallPattern0` implements a common way
@@ -2315,7 +2285,7 @@ emitCommonNeonCallPattern0(CIRGenFunction &cgf, llvm::StringRef intrincsName,
     argTypes = {funcResTy, funcResTy};
   }
   mlir::Value res =
-      emitNeonCall(builder, std::move(argTypes), ops, intrincsName, funcResTy,
+      emitNeonCallToOp<cir::LLVMIntrinsicCallOp>(builder, std::move(argTypes), ops, intrincsName, funcResTy,
                    cgf.getLoc(e->getExprLoc()));
   mlir::Type resultType = cgf.convertType(e->getType());
   return builder.createBitcast(res, resultType);
@@ -2337,7 +2307,7 @@ static mlir::Value emitCommonNeonVecAcrossCall(CIRGenFunction &cgf,
   cir::VectorType vTy =
       cir::VectorType::get(&cgf.getMLIRContext(), eltTy, vecLen);
   llvm::SmallVector<mlir::Value, 1> args{op};
-  return emitNeonCall(builder, {vTy}, args, intrincsName, eltTy,
+  return emitNeonCallToOp<cir::LLVMIntrinsicCallOp>(builder, {vTy}, args, intrincsName, eltTy,
                       cgf.getLoc(e->getExprLoc()));
 }
 
@@ -2420,7 +2390,7 @@ mlir::Value CIRGenFunction::emitCommonNeonBuiltinExpr(
   case NEON::BI__builtin_neon_vpaddlq_v: {
     // The source operand type has twice as many elements of half the size.
     cir::VectorType narrowTy = getHalfEltSizeTwiceNumElemsVecType(builder, vTy);
-    return emitNeonCall(builder, {narrowTy}, ops,
+    return emitNeonCallToOp<cir::LLVMIntrinsicCallOp>(builder, {narrowTy}, ops,
                         isUnsigned ? "aarch64.neon.uaddlp"
                                    : "aarch64.neon.saddlp",
                         vTy, getLoc(e->getExprLoc()));
@@ -2431,10 +2401,10 @@ mlir::Value CIRGenFunction::emitCommonNeonBuiltinExpr(
     cir::VectorType srcVty = builder.getExtendedOrTruncatedElementVectorType(
         vTy, false, /* truncate */
         mlir::cast<cir::IntType>(vTy.getEltType()).isSigned());
-    ops[1] = emitNeonCall(builder, {srcVty, srcVty}, mulOps,
+    ops[1] = emitNeonCallToOp<cir::LLVMIntrinsicCallOp>(builder, {srcVty, srcVty}, mulOps,
                           "aarch64.neon.sqdmull", vTy, getLoc(e->getExprLoc()));
     ops.resize(2);
-    return emitNeonCall(builder, {vTy, vTy}, ops,
+    return emitNeonCallToOp<cir::LLVMIntrinsicCallOp>(builder, {vTy, vTy}, ops,
                         builtinID == NEON::BI__builtin_neon_vqdmlal_v
                             ? "aarch64.neon.sqadd"
                             : "aarch64.neon.sqsub",
@@ -2472,7 +2442,7 @@ mlir::Value CIRGenFunction::emitCommonNeonBuiltinExpr(
     cir::VectorType mulVecT =
         GetNeonType(this, NeonTypeFlags(neonType.getEltType(), false,
                                         /*isQuad*/ false));
-    return emitNeonCall(builder, {resTy, mulVecT, SInt32Ty}, ops,
+    return emitNeonCallToOp<cir::LLVMIntrinsicCallOp>(builder, {resTy, mulVecT, SInt32Ty}, ops,
                         (builtinID == NEON::BI__builtin_neon_vqdmulhq_lane_v ||
                          builtinID == NEON::BI__builtin_neon_vqdmulh_lane_v)
                             ? "aarch64.neon.sqdmulh.lane"
@@ -2484,15 +2454,15 @@ mlir::Value CIRGenFunction::emitCommonNeonBuiltinExpr(
     // These intrinsics expect signed vector type as input, but
     // return unsigned vector type.
     cir::VectorType srcTy = getSignChangedVectorType(builder, vTy);
-    return emitNeonCall(builder, {srcTy, srcTy}, ops, "aarch64.neon.sqshlu",
+    return emitNeonCallToOp<cir::LLVMIntrinsicCallOp>(builder, {srcTy, srcTy}, ops, "aarch64.neon.sqshlu",
                         vTy, getLoc(e->getExprLoc()),
                         false, /* not fp constrained op */
                         1,     /* second arg is shift amount */
                         false /* leftshift */);
   }
   case NEON::BI__builtin_neon_vrshr_n_v:
   case NEON::BI__builtin_neon_vrshrq_n_v: {
-    return emitNeonCall(
+    return emitNeonCallToOp<cir::LLVMIntrinsicCallOp>(
         builder,
         {vTy, isUnsigned ? getSignChangedVectorType(builder, vTy) : vTy}, ops,
         isUnsigned ? "aarch64.neon.urshl" : "aarch64.neon.srshl", vTy,
@@ -2698,25 +2668,25 @@ static mlir::Value emitCommonNeonSISDBuiltinExpr(
   case NEON::BI__builtin_neon_vaddlvq_s32:
     llvm_unreachable(" neon_vaddlvq_s32 NYI ");
   case NEON::BI__builtin_neon_vaddlvq_u32:
-    return emitNeonCall(builder, {argTy}, ops, "aarch64.neon.uaddlv", resultTy,
+    return emitNeonCallToOp<cir::LLVMIntrinsicCallOp>(builder, {argTy}, ops, "aarch64.neon.uaddlv", resultTy,
                         loc);
   case NEON::BI__builtin_neon_vaddv_f32:
   case NEON::BI__builtin_neon_vaddvq_f32:
   case NEON::BI__builtin_neon_vaddvq_f64:
-    return emitNeonCall(builder, {argTy}, ops, "aarch64.neon.faddv", resultTy,
+    return emitNeonCallToOp<cir::LLVMIntrinsicCallOp>(builder, {argTy}, ops, "aarch64.neon.faddv", resultTy,
                         loc);
   case NEON::BI__builtin_neon_vaddv_s32:
   case NEON::BI__builtin_neon_vaddvq_s32:
   case NEON::BI__builtin_neon_vaddvq_s64:
-    return emitNeonCall(builder, {argTy}, ops, "aarch64.neon.saddv", resultTy,
+    return emitNeonCallToOp<cir::LLVMIntrinsicCallOp>(builder, {argTy}, ops, "aarch64.neon.saddv", resultTy,
                         loc);
   case NEON::BI__builtin_neon_vaddv_u32:
   case NEON::BI__builtin_neon_vaddvq_u32:
   case NEON::BI__builtin_neon_vaddvq_u64:
-    return emitNeonCall(builder, {argTy}, ops, "aarch64.neon.uaddv", resultTy,
+    return emitNeonCallToOp<cir::LLVMIntrinsicCallOp>(builder, {argTy}, ops, "aarch64.neon.uaddv", resultTy,
                         loc);
   case NEON::BI__builtin_neon_vcaged_f64: {
-    return emitNeonCall(builder, {argTy}, ops, "aarch64.neon.facge", resultTy,
+    return emitNeonCallToOp<cir::LLVMIntrinsicCallOp>(builder, {argTy}, ops, "aarch64.neon.facge", resultTy,
                         loc);
   }
   case NEON::BI__builtin_neon_vcages_f32:
@@ -2906,7 +2876,7 @@ static mlir::Value emitCommonNeonSISDBuiltinExpr(
     cir::VectorType resVecTy =
         cir::VectorType::get(&(cgf.getMLIRContext()), cgf.SInt16Ty, 4);
     vecExtendIntValue(cgf, argVecTy, ops[0], loc);
-    mlir::Value result = emitNeonCall(builder, {argVecTy}, ops,
+    mlir::Value result = emitNeonCallToOp<cir::LLVMIntrinsicCallOp>(builder, {argVecTy}, ops,
                                       "aarch64.neon.sqxtn", resVecTy, loc);
     return vecReduceIntValue(cgf, result, loc);
   }
@@ -2937,7 +2907,7 @@ static mlir::Value emitCommonNeonSISDBuiltinExpr(
   case NEON::BI__builtin_neon_vqrdmulhh_s16:
     llvm_unreachable(" neon_vqrdmulhh_s16 NYI ");
   case NEON::BI__builtin_neon_vqrdmulhs_s32:
-    return emitNeonCall(builder, {resultTy, resultTy}, ops,
+    return emitNeonCallToOp<cir::LLVMIntrinsicCallOp>(builder, {resultTy, resultTy}, ops,
                         "aarch64.neon.sqrdmulh", resultTy, loc);
   case NEON::BI__builtin_neon_vqrshlb_s8:
     llvm_unreachable(" neon_vqrshlb_s8 NYI ");
@@ -3843,7 +3813,7 @@ CIRGenFunction::emitAArch64BuiltinExpr(unsigned BuiltinID, const CallExpr *E,
     assert(APSInt && "Expected argument to be a constant");
     Ops[1] = builder.getSInt64(APSInt->getZExtValue(), getLoc(E->getExprLoc()));
     const StringRef Intrinsic = "aarch64.neon.sqshlu";
-    return emitNeonCall(builder, {IntType, IntType}, Ops, Intrinsic, IntType,
+    return emitNeonCallToOp<cir::LLVMIntrinsicCallOp>(builder, {IntType, IntType}, Ops, Intrinsic, IntType,
                         getLoc(E->getExprLoc()));
   }
   case NEON::BI__builtin_neon_vqshld_n_u64:
@@ -3857,7 +3827,7 @@ CIRGenFunction::emitAArch64BuiltinExpr(unsigned BuiltinID, const CallExpr *E,
                                     : "aarch64.neon.sqshl";
     Ops.push_back(emitScalarExpr(E->getArg(1)));
     Ops[1] = builder.createIntCast(Ops[1], IntType);
-    return emitNeonCall(builder, {IntType, IntType}, Ops, Intrinsic, IntType,
+    return emitNeonCallToOp<cir::LLVMIntrinsicCallOp>(builder, {IntType, IntType}, Ops, Intrinsic, IntType,
                         getLoc(E->getExprLoc()));
   }
   case NEON::BI__builtin_neon_vrshrd_n_u64:
@@ -3875,7 +3845,7 @@ CIRGenFunction::emitAArch64BuiltinExpr(unsigned BuiltinID, const CallExpr *E,
     assert(APSInt && "Expected argument to be a constant");
     int64_t SV = -APSInt->getSExtValue();
     Ops[1] = builder.getSInt64(SV, getLoc(E->getExprLoc()));
-    return emitNeonCall(builder, {IntType, builder.getSInt64Ty()}, Ops,
+    return emitNeonCallToOp<cir::LLVMIntrinsicCallOp>(builder, {IntType, builder.getSInt64Ty()}, Ops,
                         Intrinsic, IntType, getLoc(E->getExprLoc()));
   }
   case NEON::BI__builtin_neon_vrsrad_n_u64:
@@ -3892,7 +3862,7 @@ CIRGenFunction::emitAArch64BuiltinExpr(unsigned BuiltinID, const CallExpr *E,
 
     llvm::SmallVector<mlir::Value, 2> args = {
         Ops[1], builder.createIntCast(Ops[2], IntType)};
-    Ops[1] = emitNeonCall(builder, {IntType, IntType}, args, Intrinsic, IntType,
+    Ops[1] = emitNeonCallToOp<cir::LLVMIntrinsicCallOp>(builder, {IntType, IntType}, args, Intrinsic, IntType,
                           getLoc(E->getExprLoc()));
     return builder.createAdd(Ops[0], builder.createBitcast(Ops[1], IntType));
   }
@@ -4040,7 +4010,7 @@ CIRGenFunction::emitAArch64BuiltinExpr(unsigned BuiltinID, const CallExpr *E,
       name = "aarch64.neon.pmull";
     cir::VectorType argTy = builder.getExtendedOrTruncatedElementVectorType(
         ty, false /* truncated */, !usgn);
-    return emitNeonCall(builder, {argTy, argTy}, Ops, name, ty,
+    return emitNeonCallToOp<cir::LLVMIntrinsicCallOp>(builder, {argTy, argTy}, Ops, name, ty,
                         getLoc(E->getExprLoc()));
   }
   case NEON::BI__builtin_neon_vmax_v:
@@ -4061,7 +4031,7 @@ CIRGenFunction::emitAArch64BuiltinExpr(unsigned BuiltinID, const CallExpr *E,
     llvm::StringRef name = usgn ? "aarch64.neon.umin" : "aarch64.neon.smin";
     if (cir::isFPOrFPVectorTy(ty))
       name = "aarch64.neon.fmin";
-    return emitNeonCall(builder, {ty, ty}, Ops, name, ty,
+    return emitNeonCallToOp<cir::LLVMIntrinsicCallOp>(builder, {ty, ty}, Ops, name, ty,
                         getLoc(E->getExprLoc()));
   }
   case NEON::BI__builtin_neon_vminh_f16: {
@@ -4072,15 +4042,15 @@ CIRGenFunction::emitAArch64BuiltinExpr(unsigned BuiltinID, const CallExpr *E,
     llvm::StringRef name = usgn ? "aarch64.neon.uabd" : "aarch64.neon.sabd";
     if (cir::isFPOrFPVectorTy(ty))
       name = "aarch64.neon.fabd";
-    return emitNeonCall(builder, {ty, ty}, Ops, name, ty,
+    return emitNeonCallToOp<cir::LLVMIntrinsicCallOp>(builder, {ty, ty}, Ops, name, ty,
                         getLoc(E->getExprLoc()));
   }
   case NEON::BI__builtin_neon_vpadal_v:
   case NEON::BI__builtin_neon_vpadalq_v: {
     cir::VectorType argTy = getHalfEltSizeTwiceNumElemsVecType(builder, vTy);
     mlir::Location loc = getLoc(E->getExprLoc());
     llvm::SmallVector<mlir::Value, 1> args = {Ops[1]};
-    mlir::Value tmp = emitNeonCall(
+    mlir::Value tmp = emitNeonCallToOp<cir::LLVMIntrinsicCallOp>(
         builder, {argTy}, args,
         usgn ? "aarch64.neon.uaddlp" : "aarch64.neon.saddlp", vTy, loc);
     mlir::Value addEnd = builder.createBitcast(Ops[0], vTy);
@@ -4114,13 +4084,13 @@ CIRGenFunction::emitAArch64BuiltinExpr(unsigned BuiltinID, const CallExpr *E,
   case NEON::BI__builtin_neon_vqrshrun_n_v:
     // The prototype of builtin_neon_vqrshrun_n can be found at
     // https://developer.arm.com/architectures/instruction-sets/intrinsics/
-    return emitNeonCall(
+    return emitNeonCallToOp<cir::LLVMIntrinsicCallOp>(
         builder,
         {builder.getExtendedOrTruncatedElementVectorType(ty, true, true),
          SInt32Ty},
         Ops, "aarch64.neon.sqrshrun", ty, getLoc(E->getExprLoc()));
   case NEON::BI__builtin_neon_vqshrn_n_v:
-    return emitNeonCall(
+    return emitNeonCallToOp<cir::LLVMIntrinsicCallOp>(
         builder,
         {builder.getExtendedOrTruncatedElementVectorType(
              vTy, true /* extend */,
@@ -4129,15 +4099,15 @@ CIRGenFunction::emitAArch64BuiltinExpr(unsigned BuiltinID, const CallExpr *E,
         Ops, usgn ? "aarch64.neon.uqshrn" : "aarch64.neon.sqshrn", ty,
         getLoc(E->getExprLoc()));
   case NEON::BI__builtin_neon_vrshrn_n_v:
-    return emitNeonCall(
+    return emitNeonCallToOp<cir::LLVMIntrinsicCallOp>(
         builder,
         {builder.getExtendedOrTruncatedElementVectorType(
              vTy, true /* extend */,
              mlir::cast<cir::IntType>(vTy.getEltType()).isSigned()),
          SInt32Ty},
         Ops, "aarch64.neon.rshrn", ty, getLoc(E->getExprLoc()));
   case NEON::BI__builtin_neon_vqrshrn_n_v:
-    return emitNeonCall(
+    return emitNeonCallToOp<cir::LLVMIntrinsicCallOp>(
         builder,
         {builder.getExtendedOrTruncatedElementVectorType(
              vTy, true /* extend */,
@@ -4151,7 +4121,7 @@ CIRGenFunction::emitAArch64BuiltinExpr(unsigned BuiltinID, const CallExpr *E,
   case NEON::BI__builtin_neon_vrnda_v:
   case NEON::BI__builtin_neon_vrndaq_v: {
     assert(!cir::MissingFeatures::emitConstrainedFPCall());
-    return emitNeonCall(builder, {ty}, Ops, "round", ty,
+    return emitNeonCallToOp<cir::LLVMIntrinsicCallOp>(builder, {ty}, Ops, "round", ty,
                         getLoc(E->getExprLoc()));
   }
   case NEON::BI__builtin_neon_vrndih_f16: {
@@ -4174,7 +4144,7 @@ CIRGenFunction::emitAArch64BuiltinExpr(unsigned BuiltinID, const CallExpr *E,
   case NEON::BI__builtin_neon_vrndns_f32: {
     mlir::Value arg0 = emitScalarExpr(E->getArg(0));
     args.push_back(arg0);
-    return emitNeonCall<cir::RoundEvenOp>(builder, {arg0.getType()}, args,
+    return emitNeonCallToOp<cir::RoundEvenOp>(builder, {arg0.getType()}, args, std::nullopt,
                                           getCIRGenModule().FloatTy,
                                           getLoc(E->getExprLoc()));
   }
@@ -4334,7 +4304,7 @@ CIRGenFunction::emitAArch64BuiltinExpr(unsigned BuiltinID, const CallExpr *E,
     cir::VectorType vTy = cir::VectorType::get(builder.getContext(), eltTy, 4);
     Ops.push_back(emitScalarExpr(E->getArg(0)));
     // This is to add across the vector elements, so wider result type needed.
-    Ops[0] = emitNeonCall(builder, {vTy}, Ops,
+    Ops[0] = emitNeonCallToOp<cir::LLVMIntrinsicCallOp>(builder, {vTy}, Ops,
                           usgn ? "aarch64.neon.uaddv" : "aarch64.neon.saddv",
                           SInt32Ty, getLoc(E->getExprLoc()));
     return builder.createIntCast(Ops[0], eltTy);
@@ -4443,7 +4413,7 @@ CIRGenFunction::emitAArch64BuiltinExpr(unsigned BuiltinID, const CallExpr *E,
     mlir::Type argTy = cir::VectorType::get(builder.getContext(),
                                             usgn ? UInt16Ty : SInt16Ty, 8);
     llvm::SmallVector<mlir::Value, 1> argOps = {emitScalarExpr(E->getArg(0))};
-    return emitNeonCall(builder, {argTy}, argOps,
+    return emitNeonCallToOp<cir::LLVMIntrinsicCallOp>(builder, {argTy}, argOps,
                         usgn ? "aarch64.neon.uaddlv" : "aarch64.neon.saddlv",
                         usgn ? UInt32Ty : SInt32Ty, getLoc(E->getExprLoc()));
   }
@@ -4457,7 +4427,7 @@ CIRGenFunction::emitAArch64BuiltinExpr(unsigned BuiltinID, const CallExpr *E,
     mlir::Type argTy = cir::VectorType::get(builder.getContext(),
                                             usgn ? UInt16Ty : SInt16Ty, 4);
     llvm::SmallVector<mlir::Value, 1> argOps = {emitScalarExpr(E->getArg(0))};
-    return emitNeonCall(builder, {argTy}, argOps,
+    return emitNeonCallToOp<cir::LLVMIntrinsicCallOp>(builder, {argTy}, argOps,
                         usgn ? "aarch64.neon.uaddlv" : "aarch64.neon.saddlv",
                         usgn ? UInt32Ty : SInt32Ty, getLoc(E->getExprLoc()));
   }
@@ -4487,7 +4457,7 @@ CIRGenFunction::emitAArch64BuiltinExpr(unsigned BuiltinID, const CallExpr *E,
     cir::VectorType shitAmtVecTy =
         usgn ? getSignChangedVectorType(builder, vTy) : vTy;
     mlir::Value tmp =
-        emitNeonCall(builder, {vTy, shitAmtVecTy}, tmpOps,
+        emitNeonCallToOp<cir::LLVMIntrinsicCallOp>(builder, {vTy, shitAmtVecTy}, tmpOps,
                      usgn ? "aarch64.neon.urshl" : "aarch64.neon.srshl", vTy,
                      getLoc(E->getExprLoc()), false,
                      1 /* shift amount is args[1]*/, true /* right shift */);