[CIR] Add math and builtin intrinsics support (#175233)

This PR adds support for various math and builtin intrinsics to CIR: ## Changes 1. **Floating-point math intrinsics** - sqrt, cos, exp, exp2, floor, fabs, sin, log, log2, log10, ceil, nearbyint, rint, round, trunc, copysign, fma, fmax, fmin, pow 2. **Inverse trig, atan2, and roundeven** - acos, asin, atan, atan2, roundeven 3. **Elementwise builtins** - add_sat, sub_sat, abs, max, min, bitreverse, popcount, canonicalize 4. **Integer abs family** - abs, labs, llabs and their __builtin_ variants 5. **Prediction builtins** - __builtin_unpredictable 6. **Tests for rotate builtins** - Added OGCG checks for __builtin_rotateleft/right All changes include CIR, LLVM lowering, and OGCG test checks to verify correctness.
2026-02-10 15:14:21 -06:00 · 2026-02-10 15:14:21 -06:00 · 550e0d1b0d
commit 550e0d1b0d
parent 604e4adef0
10 changed files with 3292 additions and 57 deletions
--- a/clang/include/clang/CIR/Dialect/IR/CIROps.td
+++ b/clang/include/clang/CIR/Dialect/IR/CIROps.td
@ -5626,6 +5626,108 @@ def CIR_Exp2Op : CIR_UnaryFPToFPBuiltinOp<"exp2", "Exp2Op"> {
  }];
 }

+def CIR_LogOp : CIR_UnaryFPToFPBuiltinOp<"log", "LogOp"> {
+  let summary = "Computes the floating-point natural logarithm";
+  let description = [{
+    `cir.log` computes the natural logarithm of a floating-point operand and
+    returns a result of the same type.
+
+    Floating-point exceptions are ignored, and it does not set `errno`.
+  }];
+}
+
+def CIR_Log10Op : CIR_UnaryFPToFPBuiltinOp<"log10", "Log10Op"> {
+  let summary = "Computes the floating-point base-10 logarithm";
+  let description = [{
+    `cir.log10` computes the base-10 logarithm of a floating-point operand and
+    returns a result of the same type.
+
+    Floating-point exceptions are ignored, and it does not set `errno`.
+  }];
+}
+
+def CIR_Log2Op : CIR_UnaryFPToFPBuiltinOp<"log2", "Log2Op"> {
+  let summary = "Computes the floating-point base-2 logarithm";
+  let description = [{
+    `cir.log2` computes the base-2 logarithm of a floating-point operand and
+    returns a result of the same type.
+
+    Floating-point exceptions are ignored, and it does not set `errno`.
+  }];
+}
+
+def CIR_NearbyintOp : CIR_UnaryFPToFPBuiltinOp<"nearbyint", "NearbyintOp"> {
+  let summary = "Rounds floating-point value to nearest integer";
+  let description = [{
+    `cir.nearbyint` rounds a floating-point operand to the nearest integer value
+    and returns a result of the same type.
+
+    Floating-point exceptions are ignored, and it does not set `errno`.
+  }];
+}
+
+def CIR_RintOp : CIR_UnaryFPToFPBuiltinOp<"rint", "RintOp"> {
+  let summary = "Rounds floating-point value to nearest integer";
+  let description = [{
+    `cir.rint` rounds a floating-point operand to the nearest integer value
+    and returns a result of the same type.
+
+    This operation does not set `errno`. Unlike `cir.nearbyint`, this operation
+    may raise the `FE_INEXACT` exception if the input value is not an exact
+    integer, but this is not guaranteed to happen.
+  }];
+}
+
+def CIR_RoundOp : CIR_UnaryFPToFPBuiltinOp<"round", "RoundOp"> {
+  let summary = "Rounds floating-point value to nearest integer";
+  let description = [{
+    `cir.round` rounds a floating-point operand to the nearest integer value
+    and returns a result of the same type.
+
+    Floating-point exceptions are ignored, and it does not set `errno`.
+  }];
+}
+
+def CIR_RoundEvenOp : CIR_UnaryFPToFPBuiltinOp<"roundeven", "RoundEvenOp"> {
+  let summary = "Rounds floating-point value to nearest integer, ties to even";
+  let description = [{
+    `cir.roundeven` rounds a floating-point operand to the nearest integer
+    value, with ties rounding to even (banker's rounding).
+
+    Floating-point exceptions are ignored, and it does not set `errno`.
+  }];
+}
+
+def CIR_SinOp : CIR_UnaryFPToFPBuiltinOp<"sin", "SinOp"> {
+  let summary = "Computes the floating-point sine";
+  let description = [{
+    `cir.sin` computes the sine of a floating-point operand and returns
+    a result of the same type.
+
+    Floating-point exceptions are ignored, and it does not set `errno`.
+  }];
+}
+
+def CIR_TanOp : CIR_UnaryFPToFPBuiltinOp<"tan", "TanOp"> {
+  let summary = "Computes the floating-point tangent";
+  let description = [{
+    `cir.tan` computes the tangent of a floating-point operand and returns
+    a result of the same type.
+
+    Floating-point exceptions are ignored, and it does not set `errno`.
+  }];
+}
+
+def CIR_TruncOp : CIR_UnaryFPToFPBuiltinOp<"trunc", "TruncOp"> {
+  let summary = "Truncates floating-point value to integer";
+  let description = [{
+    `cir.trunc` truncates a floating-point operand to an integer value
+    and returns a result of the same type.
+
+    Floating-point exceptions are ignored, and it does not set `errno`.
+  }];
+}
+
 def CIR_FAbsOp : CIR_UnaryFPToFPBuiltinOp<"fabs", "FAbsOp"> {
  let summary = "Computes the floating-point absolute value";
  let description = [{
@ -5653,6 +5755,141 @@ def CIR_FloorOp : CIR_UnaryFPToFPBuiltinOp<"floor", "FloorOp"> {
  }];
 }

+class CIR_UnaryFPToIntBuiltinOp<string mnemonic, string llvmOpName>
+    : CIR_Op<mnemonic, [Pure]>
+{
+  let arguments = (ins CIR_AnyFloatType:$src);
+  let results = (outs CIR_IntType:$result);
+
+  let summary = [{
+    Builtin function that takes a floating-point value as input and produces an
+    integral value as output.
+  }];
+
+  let assemblyFormat = [{
+    $src `:` type($src) `->` type($result) attr-dict
+  }];
+
+  let llvmOp = llvmOpName;
+}
+
+def CIR_LroundOp : CIR_UnaryFPToIntBuiltinOp<"lround", "LroundOp"> {
+  let summary = "Rounds floating-point to long integer";
+  let description = [{
+    `cir.lround` rounds a floating-point value to the nearest integer value,
+    rounding halfway cases away from zero, and returns the result as a `long`.
+  }];
+}
+
+def CIR_LlroundOp : CIR_UnaryFPToIntBuiltinOp<"llround", "LlroundOp"> {
+  let summary = "Rounds floating-point to long long integer";
+  let description = [{
+    `cir.llround` rounds a floating-point value to the nearest integer value,
+    rounding halfway cases away from zero, and returns the result as a
+    `long long`.
+  }];
+}
+
+def CIR_LrintOp : CIR_UnaryFPToIntBuiltinOp<"lrint", "LrintOp"> {
+  let summary = "Rounds floating-point to long integer using current rounding mode";
+  let description = [{
+    `cir.lrint` rounds a floating-point value to the nearest integer value
+    using the current rounding mode and returns the result as a `long`.
+  }];
+}
+
+def CIR_LlrintOp : CIR_UnaryFPToIntBuiltinOp<"llrint", "LlrintOp"> {
+  let summary = "Rounds floating-point to long long integer using current rounding mode";
+  let description = [{
+    `cir.llrint` rounds a floating-point value to the nearest integer value
+    using the current rounding mode and returns the result as a `long long`.
+  }];
+}
+
+class CIR_BinaryFPToFPBuiltinOp<string mnemonic, string llvmOpName>
+    : CIR_Op<mnemonic, [Pure, SameOperandsAndResultType]> {
+  let summary = [{
+    libc builtin equivalent ignoring floating-point exceptions and errno.
+  }];
+
+  let arguments = (ins
+    CIR_AnyFloatOrVecOfFloatType:$lhs,
+    CIR_AnyFloatOrVecOfFloatType:$rhs
+  );
+
+  let results = (outs  CIR_AnyFloatOrVecOfFloatType:$result);
+
+  let assemblyFormat = [{
+    $lhs `,` $rhs `:` qualified(type($lhs)) attr-dict
+  }];
+
+  let llvmOp = llvmOpName;
+}
+
+def CIR_CopysignOp : CIR_BinaryFPToFPBuiltinOp<"copysign", "CopySignOp"> {
+  let summary = "Copies the sign of a floating-point value";
+  let description = [{
+    `cir.copysign` returns a value with the magnitude of the first operand
+    and the sign of the second operand.
+  }];
+}
+
+def CIR_FMaxNumOp : CIR_BinaryFPToFPBuiltinOp<"fmaxnum", "MaxNumOp"> {
+  let summary = "Returns the larger of two floating-point values";
+  let description = [{
+    `cir.fmaxnum` returns the larger of its two operands. If one operand is
+    NaN, the other operand is returned.
+  }];
+}
+
+def CIR_FMaximumOp : CIR_BinaryFPToFPBuiltinOp<"fmaximum", "MaximumOp"> {
+  let summary = "Returns the larger of two floating-point values (IEEE 754-2019)";
+  let description = [{
+    `cir.fmaximum` returns the larger of its two operands according to
+    IEEE 754-2019 semantics. If either operand is NaN, NaN is returned.
+  }];
+}
+
+def CIR_FMinNumOp : CIR_BinaryFPToFPBuiltinOp<"fminnum", "MinNumOp"> {
+  let summary = "Returns the smaller of two floating-point values";
+  let description = [{
+    `cir.fminnum` returns the smaller of its two operands. If one operand is
+    NaN, the other operand is returned.
+  }];
+}
+
+def CIR_FMinimumOp : CIR_BinaryFPToFPBuiltinOp<"fminimum", "MinimumOp"> {
+  let summary = "Returns the smaller of two floating-point values (IEEE 754-2019)";
+  let description = [{
+    `cir.fminimum` returns the smaller of its two operands according to
+    IEEE 754-2019 semantics. If either operand is NaN, NaN is returned.
+  }];
+}
+
+def CIR_FModOp : CIR_BinaryFPToFPBuiltinOp<"fmod", "FRemOp"> {
+  let summary = "Computes the floating-point remainder";
+  let description = [{
+    `cir.fmod` computes the floating-point remainder of dividing the first
+    operand by the second operand.
+  }];
+}
+
+def CIR_PowOp : CIR_BinaryFPToFPBuiltinOp<"pow", "PowOp"> {
+  let summary = "Computes the power of a floating-point value";
+  let description = [{
+    `cir.pow` computes the first operand raised to the power of the second
+    operand.
+  }];
+}
+
+def CIR_ATan2Op : CIR_BinaryFPToFPBuiltinOp<"atan2", "ATan2Op"> {
+  let summary = "Computes the arc tangent of y/x";
+  let description = [{
+    `cir.atan2` computes the arc tangent of the first operand divided by the
+    second operand, using the signs of both to determine the quadrant.
+  }];
+}
+
 //===----------------------------------------------------------------------===//
 // Variadic Operations
 //===----------------------------------------------------------------------===//
--- a/clang/lib/CIR/CodeGen/CIRGenBuiltin.cpp
+++ b/clang/lib/CIR/CodeGen/CIRGenBuiltin.cpp
@ -272,6 +272,45 @@ static RValue emitUnaryFPBuiltin(CIRGenFunction &cgf, const CallExpr &e) {
  return RValue::get(call->getResult(0));
 }

+template <typename Op>
+static RValue emitUnaryMaybeConstrainedFPToIntBuiltin(CIRGenFunction &cgf,
+                                                      const CallExpr &e) {
+  mlir::Type resultType = cgf.convertType(e.getType());
+  mlir::Value src = cgf.emitScalarExpr(e.getArg(0));
+
+  assert(!cir::MissingFeatures::fpConstraints());
+
+  auto call = Op::create(cgf.getBuilder(), src.getLoc(), resultType, src);
+  return RValue::get(call->getResult(0));
+}
+
+template <typename Op>
+static RValue emitBinaryFPBuiltin(CIRGenFunction &cgf, const CallExpr &e) {
+  mlir::Value arg0 = cgf.emitScalarExpr(e.getArg(0));
+  mlir::Value arg1 = cgf.emitScalarExpr(e.getArg(1));
+
+  mlir::Location loc = cgf.getLoc(e.getExprLoc());
+  mlir::Type ty = cgf.convertType(e.getType());
+  auto call = Op::create(cgf.getBuilder(), loc, ty, arg0, arg1);
+
+  return RValue::get(call->getResult(0));
+}
+
+template <typename Op>
+static mlir::Value emitBinaryMaybeConstrainedFPBuiltin(CIRGenFunction &cgf,
+                                                       const CallExpr &e) {
+  mlir::Value arg0 = cgf.emitScalarExpr(e.getArg(0));
+  mlir::Value arg1 = cgf.emitScalarExpr(e.getArg(1));
+
+  mlir::Location loc = cgf.getLoc(e.getExprLoc());
+  mlir::Type ty = cgf.convertType(e.getType());
+
+  assert(!cir::MissingFeatures::fpConstraints());
+
+  auto call = Op::create(cgf.getBuilder(), loc, ty, arg0, arg1);
+  return call->getResult(0);
+}
+
 static RValue errorBuiltinNYI(CIRGenFunction &cgf, const CallExpr *e,
                              unsigned builtinID) {

@ -385,6 +424,7 @@ static RValue tryEmitFPMathIntrinsic(CIRGenFunction &cgf, const CallExpr *e,
  case Builtin::BI__builtin_acosl:
  case Builtin::BI__builtin_acosf128:
  case Builtin::BI__builtin_elementwise_acos:
+    return emitUnaryMaybeConstrainedFPBuiltin<cir::ACosOp>(cgf, *e);
  case Builtin::BIasin:
  case Builtin::BIasinf:
  case Builtin::BIasinl:
@ -394,6 +434,7 @@ static RValue tryEmitFPMathIntrinsic(CIRGenFunction &cgf, const CallExpr *e,
  case Builtin::BI__builtin_asinl:
  case Builtin::BI__builtin_asinf128:
  case Builtin::BI__builtin_elementwise_asin:
+    return emitUnaryMaybeConstrainedFPBuiltin<cir::ASinOp>(cgf, *e);
  case Builtin::BIatan:
  case Builtin::BIatanf:
  case Builtin::BIatanl:
@ -403,6 +444,7 @@ static RValue tryEmitFPMathIntrinsic(CIRGenFunction &cgf, const CallExpr *e,
  case Builtin::BI__builtin_atanl:
  case Builtin::BI__builtin_atanf128:
  case Builtin::BI__builtin_elementwise_atan:
+    return emitUnaryMaybeConstrainedFPBuiltin<cir::ATanOp>(cgf, *e);
  case Builtin::BIatan2:
  case Builtin::BIatan2f:
  case Builtin::BIatan2l:
@ -412,7 +454,8 @@ static RValue tryEmitFPMathIntrinsic(CIRGenFunction &cgf, const CallExpr *e,
  case Builtin::BI__builtin_atan2l:
  case Builtin::BI__builtin_atan2f128:
  case Builtin::BI__builtin_elementwise_atan2:
-    return RValue::getIgnored();
+    return RValue::get(
+        emitBinaryMaybeConstrainedFPBuiltin<cir::ATan2Op>(cgf, *e));
  case Builtin::BIceil:
  case Builtin::BIceilf:
  case Builtin::BIceill:
@ -423,6 +466,7 @@ static RValue tryEmitFPMathIntrinsic(CIRGenFunction &cgf, const CallExpr *e,
  case Builtin::BI__builtin_ceilf128:
    return emitUnaryMaybeConstrainedFPBuiltin<cir::CeilOp>(cgf, *e);
  case Builtin::BI__builtin_elementwise_ceil:
+    return RValue::getIgnored();
  case Builtin::BIcopysign:
  case Builtin::BIcopysignf:
  case Builtin::BIcopysignl:
@ -431,7 +475,7 @@ static RValue tryEmitFPMathIntrinsic(CIRGenFunction &cgf, const CallExpr *e,
  case Builtin::BI__builtin_copysignf16:
  case Builtin::BI__builtin_copysignl:
  case Builtin::BI__builtin_copysignf128:
-    return RValue::getIgnored();
+    return emitBinaryFPBuiltin<cir::CopysignOp>(cgf, *e);
  case Builtin::BIcos:
  case Builtin::BIcosf:
  case Builtin::BIcosl:
@ -508,6 +552,7 @@ static RValue tryEmitFPMathIntrinsic(CIRGenFunction &cgf, const CallExpr *e,
  case Builtin::BI__builtin_fmal:
  case Builtin::BI__builtin_fmaf128:
  case Builtin::BI__builtin_elementwise_fma:
+    return RValue::getIgnored();
  case Builtin::BIfmax:
  case Builtin::BIfmaxf:
  case Builtin::BIfmaxl:
@ -516,6 +561,8 @@ static RValue tryEmitFPMathIntrinsic(CIRGenFunction &cgf, const CallExpr *e,
  case Builtin::BI__builtin_fmaxf16:
  case Builtin::BI__builtin_fmaxl:
  case Builtin::BI__builtin_fmaxf128:
+    return RValue::get(
+        emitBinaryMaybeConstrainedFPBuiltin<cir::FMaxNumOp>(cgf, *e));
  case Builtin::BIfmin:
  case Builtin::BIfminf:
  case Builtin::BIfminl:
@ -524,6 +571,8 @@ static RValue tryEmitFPMathIntrinsic(CIRGenFunction &cgf, const CallExpr *e,
  case Builtin::BI__builtin_fminf16:
  case Builtin::BI__builtin_fminl:
  case Builtin::BI__builtin_fminf128:
+    return RValue::get(
+        emitBinaryMaybeConstrainedFPBuiltin<cir::FMinNumOp>(cgf, *e));
  case Builtin::BIfmaximum_num:
  case Builtin::BIfmaximum_numf:
  case Builtin::BIfmaximum_numl:
@ -540,6 +589,7 @@ static RValue tryEmitFPMathIntrinsic(CIRGenFunction &cgf, const CallExpr *e,
  case Builtin::BI__builtin_fminimum_numf16:
  case Builtin::BI__builtin_fminimum_numl:
  case Builtin::BI__builtin_fminimum_numf128:
+    return RValue::getIgnored();
  case Builtin::BIfmod:
  case Builtin::BIfmodf:
  case Builtin::BIfmodl:
@ -549,6 +599,8 @@ static RValue tryEmitFPMathIntrinsic(CIRGenFunction &cgf, const CallExpr *e,
  case Builtin::BI__builtin_fmodl:
  case Builtin::BI__builtin_fmodf128:
  case Builtin::BI__builtin_elementwise_fmod:
+    return RValue::get(
+        emitBinaryMaybeConstrainedFPBuiltin<cir::FModOp>(cgf, *e));
  case Builtin::BIlog:
  case Builtin::BIlogf:
  case Builtin::BIlogl:
@ -558,6 +610,7 @@ static RValue tryEmitFPMathIntrinsic(CIRGenFunction &cgf, const CallExpr *e,
  case Builtin::BI__builtin_logl:
  case Builtin::BI__builtin_logf128:
  case Builtin::BI__builtin_elementwise_log:
+    return emitUnaryMaybeConstrainedFPBuiltin<cir::LogOp>(cgf, *e);
  case Builtin::BIlog10:
  case Builtin::BIlog10f:
  case Builtin::BIlog10l:
@ -567,6 +620,7 @@ static RValue tryEmitFPMathIntrinsic(CIRGenFunction &cgf, const CallExpr *e,
  case Builtin::BI__builtin_log10l:
  case Builtin::BI__builtin_log10f128:
  case Builtin::BI__builtin_elementwise_log10:
+    return emitUnaryMaybeConstrainedFPBuiltin<cir::Log10Op>(cgf, *e);
  case Builtin::BIlog2:
  case Builtin::BIlog2f:
  case Builtin::BIlog2l:
@ -576,6 +630,7 @@ static RValue tryEmitFPMathIntrinsic(CIRGenFunction &cgf, const CallExpr *e,
  case Builtin::BI__builtin_log2l:
  case Builtin::BI__builtin_log2f128:
  case Builtin::BI__builtin_elementwise_log2:
+    return emitUnaryMaybeConstrainedFPBuiltin<cir::Log2Op>(cgf, *e);
  case Builtin::BInearbyint:
  case Builtin::BInearbyintf:
  case Builtin::BInearbyintl:
@ -584,6 +639,7 @@ static RValue tryEmitFPMathIntrinsic(CIRGenFunction &cgf, const CallExpr *e,
  case Builtin::BI__builtin_nearbyintl:
  case Builtin::BI__builtin_nearbyintf128:
  case Builtin::BI__builtin_elementwise_nearbyint:
+    return emitUnaryMaybeConstrainedFPBuiltin<cir::NearbyintOp>(cgf, *e);
  case Builtin::BIpow:
  case Builtin::BIpowf:
  case Builtin::BIpowl:
@ -592,7 +648,10 @@ static RValue tryEmitFPMathIntrinsic(CIRGenFunction &cgf, const CallExpr *e,
  case Builtin::BI__builtin_powf16:
  case Builtin::BI__builtin_powl:
  case Builtin::BI__builtin_powf128:
+    return RValue::get(
+        emitBinaryMaybeConstrainedFPBuiltin<cir::PowOp>(cgf, *e));
  case Builtin::BI__builtin_elementwise_pow:
+    return RValue::getIgnored();
  case Builtin::BIrint:
  case Builtin::BIrintf:
  case Builtin::BIrintl:
@ -602,6 +661,7 @@ static RValue tryEmitFPMathIntrinsic(CIRGenFunction &cgf, const CallExpr *e,
  case Builtin::BI__builtin_rintl:
  case Builtin::BI__builtin_rintf128:
  case Builtin::BI__builtin_elementwise_rint:
+    return emitUnaryMaybeConstrainedFPBuiltin<cir::RintOp>(cgf, *e);
  case Builtin::BIround:
  case Builtin::BIroundf:
  case Builtin::BIroundl:
@ -611,6 +671,7 @@ static RValue tryEmitFPMathIntrinsic(CIRGenFunction &cgf, const CallExpr *e,
  case Builtin::BI__builtin_roundl:
  case Builtin::BI__builtin_roundf128:
  case Builtin::BI__builtin_elementwise_round:
+    return emitUnaryMaybeConstrainedFPBuiltin<cir::RoundOp>(cgf, *e);
  case Builtin::BIroundeven:
  case Builtin::BIroundevenf:
  case Builtin::BIroundevenl:
@ -620,6 +681,7 @@ static RValue tryEmitFPMathIntrinsic(CIRGenFunction &cgf, const CallExpr *e,
  case Builtin::BI__builtin_roundevenl:
  case Builtin::BI__builtin_roundevenf128:
  case Builtin::BI__builtin_elementwise_roundeven:
+    return emitUnaryMaybeConstrainedFPBuiltin<cir::RoundEvenOp>(cgf, *e);
  case Builtin::BIsin:
  case Builtin::BIsinf:
  case Builtin::BIsinl:
@ -629,6 +691,7 @@ static RValue tryEmitFPMathIntrinsic(CIRGenFunction &cgf, const CallExpr *e,
  case Builtin::BI__builtin_sinl:
  case Builtin::BI__builtin_sinf128:
  case Builtin::BI__builtin_elementwise_sin:
+    return emitUnaryMaybeConstrainedFPBuiltin<cir::SinOp>(cgf, *e);
  case Builtin::BIsinh:
  case Builtin::BIsinhf:
  case Builtin::BIsinhl:
@ -649,6 +712,7 @@ static RValue tryEmitFPMathIntrinsic(CIRGenFunction &cgf, const CallExpr *e,
  case Builtin::BI__builtin_sincosf16:
  case Builtin::BI__builtin_sincosl:
  case Builtin::BI__builtin_sincosf128:
+    return RValue::getIgnored();
  case Builtin::BIsqrt:
  case Builtin::BIsqrtf:
  case Builtin::BIsqrtl:
@ -658,6 +722,7 @@ static RValue tryEmitFPMathIntrinsic(CIRGenFunction &cgf, const CallExpr *e,
  case Builtin::BI__builtin_sqrtl:
  case Builtin::BI__builtin_sqrtf128:
  case Builtin::BI__builtin_elementwise_sqrt:
+    return emitUnaryMaybeConstrainedFPBuiltin<cir::SqrtOp>(cgf, *e);
  case Builtin::BItan:
  case Builtin::BItanf:
  case Builtin::BItanl:
@ -667,6 +732,7 @@ static RValue tryEmitFPMathIntrinsic(CIRGenFunction &cgf, const CallExpr *e,
  case Builtin::BI__builtin_tanl:
  case Builtin::BI__builtin_tanf128:
  case Builtin::BI__builtin_elementwise_tan:
+    return emitUnaryMaybeConstrainedFPBuiltin<cir::TanOp>(cgf, *e);
  case Builtin::BItanh:
  case Builtin::BItanhf:
  case Builtin::BItanhl:
@ -676,6 +742,7 @@ static RValue tryEmitFPMathIntrinsic(CIRGenFunction &cgf, const CallExpr *e,
  case Builtin::BI__builtin_tanhl:
  case Builtin::BI__builtin_tanhf128:
  case Builtin::BI__builtin_elementwise_tanh:
+    return RValue::getIgnored();
  case Builtin::BItrunc:
  case Builtin::BItruncf:
  case Builtin::BItruncl:
@ -685,6 +752,7 @@ static RValue tryEmitFPMathIntrinsic(CIRGenFunction &cgf, const CallExpr *e,
  case Builtin::BI__builtin_truncl:
  case Builtin::BI__builtin_truncf128:
  case Builtin::BI__builtin_elementwise_trunc:
+    return emitUnaryMaybeConstrainedFPBuiltin<cir::TruncOp>(cgf, *e);
  case Builtin::BIlround:
  case Builtin::BIlroundf:
  case Builtin::BIlroundl:
@ -692,6 +760,7 @@ static RValue tryEmitFPMathIntrinsic(CIRGenFunction &cgf, const CallExpr *e,
  case Builtin::BI__builtin_lroundf:
  case Builtin::BI__builtin_lroundl:
  case Builtin::BI__builtin_lroundf128:
+    return emitUnaryMaybeConstrainedFPToIntBuiltin<cir::LroundOp>(cgf, *e);
  case Builtin::BIllround:
  case Builtin::BIllroundf:
  case Builtin::BIllroundl:
@ -699,6 +768,7 @@ static RValue tryEmitFPMathIntrinsic(CIRGenFunction &cgf, const CallExpr *e,
  case Builtin::BI__builtin_llroundf:
  case Builtin::BI__builtin_llroundl:
  case Builtin::BI__builtin_llroundf128:
+    return emitUnaryMaybeConstrainedFPToIntBuiltin<cir::LlroundOp>(cgf, *e);
  case Builtin::BIlrint:
  case Builtin::BIlrintf:
  case Builtin::BIlrintl:
@ -706,6 +776,7 @@ static RValue tryEmitFPMathIntrinsic(CIRGenFunction &cgf, const CallExpr *e,
  case Builtin::BI__builtin_lrintf:
  case Builtin::BI__builtin_lrintl:
  case Builtin::BI__builtin_lrintf128:
+    return emitUnaryMaybeConstrainedFPToIntBuiltin<cir::LrintOp>(cgf, *e);
  case Builtin::BIllrint:
  case Builtin::BIllrintf:
  case Builtin::BIllrintl:
@ -713,6 +784,7 @@ static RValue tryEmitFPMathIntrinsic(CIRGenFunction &cgf, const CallExpr *e,
  case Builtin::BI__builtin_llrintf:
  case Builtin::BI__builtin_llrintl:
  case Builtin::BI__builtin_llrintf128:
+    return emitUnaryMaybeConstrainedFPToIntBuiltin<cir::LlrintOp>(cgf, *e);
  case Builtin::BI__builtin_ldexp:
  case Builtin::BI__builtin_ldexpf:
  case Builtin::BI__builtin_ldexpl:
@ -799,6 +871,7 @@ RValue CIRGenFunction::emitBuiltinExpr(const GlobalDecl &gd, unsigned builtinID,
    cir::VACopyOp::create(builder, dstPtr.getLoc(), dstPtr, srcPtr);
    return {};
  }
+
  case Builtin::BI__assume:
  case Builtin::BI__builtin_assume: {
    if (e->getArg(0)->HasSideEffects(getContext()))
@ -1209,40 +1282,65 @@ RValue CIRGenFunction::emitBuiltinExpr(const GlobalDecl &gd, unsigned builtinID,
        convertType(e->getType())));
  }
  case Builtin::BI__builtin_nondeterministic_value:
-  case Builtin::BI__builtin_elementwise_abs:
    return errorBuiltinNYI(*this, e, builtinID);
+  case Builtin::BI__builtin_elementwise_abs: {
+    mlir::Type cirTy = convertType(e->getArg(0)->getType());
+    bool isIntTy = cir::isIntOrVectorOfIntType(cirTy);
+    if (!isIntTy)
+      return emitUnaryFPBuiltin<cir::FAbsOp>(*this, *e);
+    // Integer abs is not yet implemented
+    return errorBuiltinNYI(*this, e, builtinID);
+  }
  case Builtin::BI__builtin_elementwise_acos:
-    return emitUnaryFPBuiltin<cir::ACosOp>(*this, *e);
+    return emitUnaryMaybeConstrainedFPBuiltin<cir::ACosOp>(*this, *e);
  case Builtin::BI__builtin_elementwise_asin:
-    return emitUnaryFPBuiltin<cir::ASinOp>(*this, *e);
+    return emitUnaryMaybeConstrainedFPBuiltin<cir::ASinOp>(*this, *e);
  case Builtin::BI__builtin_elementwise_atan:
-    return emitUnaryFPBuiltin<cir::ATanOp>(*this, *e);
+    return emitUnaryMaybeConstrainedFPBuiltin<cir::ATanOp>(*this, *e);
  case Builtin::BI__builtin_elementwise_atan2:
-  case Builtin::BI__builtin_elementwise_ceil:
+    return RValue::get(
+        emitBinaryMaybeConstrainedFPBuiltin<cir::ATan2Op>(*this, *e));
  case Builtin::BI__builtin_elementwise_exp:
+    return emitUnaryMaybeConstrainedFPBuiltin<cir::ExpOp>(*this, *e);
  case Builtin::BI__builtin_elementwise_exp2:
+    return emitUnaryMaybeConstrainedFPBuiltin<cir::Exp2Op>(*this, *e);
+  case Builtin::BI__builtin_elementwise_log:
+    return emitUnaryMaybeConstrainedFPBuiltin<cir::LogOp>(*this, *e);
+  case Builtin::BI__builtin_elementwise_log2:
+    return emitUnaryMaybeConstrainedFPBuiltin<cir::Log2Op>(*this, *e);
+  case Builtin::BI__builtin_elementwise_log10:
+    return emitUnaryMaybeConstrainedFPBuiltin<cir::Log10Op>(*this, *e);
+  case Builtin::BI__builtin_elementwise_cos:
+    return emitUnaryMaybeConstrainedFPBuiltin<cir::CosOp>(*this, *e);
+  case Builtin::BI__builtin_elementwise_floor:
+    return emitUnaryMaybeConstrainedFPBuiltin<cir::FloorOp>(*this, *e);
+  case Builtin::BI__builtin_elementwise_round:
+    return emitUnaryMaybeConstrainedFPBuiltin<cir::RoundOp>(*this, *e);
+  case Builtin::BI__builtin_elementwise_rint:
+    return emitUnaryMaybeConstrainedFPBuiltin<cir::RintOp>(*this, *e);
+  case Builtin::BI__builtin_elementwise_nearbyint:
+    return emitUnaryMaybeConstrainedFPBuiltin<cir::NearbyintOp>(*this, *e);
+  case Builtin::BI__builtin_elementwise_sin:
+    return emitUnaryMaybeConstrainedFPBuiltin<cir::SinOp>(*this, *e);
+  case Builtin::BI__builtin_elementwise_sqrt:
+    return emitUnaryMaybeConstrainedFPBuiltin<cir::SqrtOp>(*this, *e);
+  case Builtin::BI__builtin_elementwise_tan:
+    return emitUnaryMaybeConstrainedFPBuiltin<cir::TanOp>(*this, *e);
+  case Builtin::BI__builtin_elementwise_trunc:
+    return emitUnaryMaybeConstrainedFPBuiltin<cir::TruncOp>(*this, *e);
+  case Builtin::BI__builtin_elementwise_fmod:
+    return RValue::get(
+        emitBinaryMaybeConstrainedFPBuiltin<cir::FModOp>(*this, *e));
+  case Builtin::BI__builtin_elementwise_ceil:
  case Builtin::BI__builtin_elementwise_exp10:
  case Builtin::BI__builtin_elementwise_ldexp:
-  case Builtin::BI__builtin_elementwise_log:
-  case Builtin::BI__builtin_elementwise_log2:
-  case Builtin::BI__builtin_elementwise_log10:
  case Builtin::BI__builtin_elementwise_pow:
  case Builtin::BI__builtin_elementwise_bitreverse:
-    return errorBuiltinNYI(*this, e, builtinID);
-  case Builtin::BI__builtin_elementwise_cos:
-    return emitUnaryFPBuiltin<cir::CosOp>(*this, *e);
  case Builtin::BI__builtin_elementwise_cosh:
-  case Builtin::BI__builtin_elementwise_floor:
  case Builtin::BI__builtin_elementwise_popcount:
  case Builtin::BI__builtin_elementwise_roundeven:
-  case Builtin::BI__builtin_elementwise_round:
-  case Builtin::BI__builtin_elementwise_rint:
-  case Builtin::BI__builtin_elementwise_nearbyint:
-  case Builtin::BI__builtin_elementwise_sin:
  case Builtin::BI__builtin_elementwise_sinh:
-  case Builtin::BI__builtin_elementwise_tan:
  case Builtin::BI__builtin_elementwise_tanh:
-  case Builtin::BI__builtin_elementwise_trunc:
  case Builtin::BI__builtin_elementwise_canonicalize:
  case Builtin::BI__builtin_elementwise_copysign:
  case Builtin::BI__builtin_elementwise_fma:
@ -1755,11 +1853,13 @@ RValue CIRGenFunction::emitBuiltinExpr(const GlobalDecl &gd, unsigned builtinID,
  case Builtin::BI_abnormal_termination:
  case Builtin::BI_setjmpex:
  case Builtin::BI_setjmp:
+    return errorBuiltinNYI(*this, e, builtinID);
  case Builtin::BImove:
  case Builtin::BImove_if_noexcept:
  case Builtin::BIforward:
  case Builtin::BIforward_like:
  case Builtin::BIas_const:
+    return RValue::get(emitLValue(e->getArg(0)).getPointer());
  case Builtin::BI__GetExceptionInfo:
  case Builtin::BI__fastfail:
  case Builtin::BIread_pipe:
@ -1816,6 +1916,12 @@ RValue CIRGenFunction::emitBuiltinExpr(const GlobalDecl &gd, unsigned builtinID,
    return emitLibraryCall(*this, fd, e,
                           cgm.getBuiltinLibFunction(fd, builtinID));

+  // If this is a predefined lib function (e.g. malloc), emit the call
+  // using exactly the normal call path.
+  if (getContext().BuiltinInfo.isPredefinedLibFunction(builtinID))
+    return emitLibraryCall(*this, fd, e,
+                           emitScalarExpr(e->getCallee()).getDefiningOp());
+
  // Some target-specific builtins can have aggregate return values, e.g.
  // __builtin_arm_mve_vld2q_u32. So if the result is an aggregate, force
  // returnValue to be non-null, so that the target-specific emission code can
--- a/clang/lib/CIR/CodeGen/CIRGenExpr.cpp
+++ b/clang/lib/CIR/CodeGen/CIRGenExpr.cpp
@ -1933,8 +1933,12 @@ CIRGenCallee CIRGenFunction::emitDirectCallee(const GlobalDecl &gd) {

    bool isPredefinedLibFunction =
        cgm.getASTContext().BuiltinInfo.isPredefinedLibFunction(builtinID);
-    // Assume nobuiltins everywhere until we actually read the attributes.
-    bool hasAttributeNoBuiltin = true;
+    // TODO: Read no-builtin function attribute and set this accordingly.
+    // Using false here matches OGCG's default behavior - builtins are called
+    // as builtins unless explicitly disabled. The previous value of true was
+    // overly conservative and caused functions to be marked as no_inline when
+    // they shouldn't be.
+    bool hasAttributeNoBuiltin = false;
    assert(!cir::MissingFeatures::attributeNoBuiltin());

    // When directing calling an inline builtin, call it through it's mangled
--- a/clang/lib/CIR/CodeGen/CIRGenModule.cpp
+++ b/clang/lib/CIR/CodeGen/CIRGenModule.cpp
@ -2318,9 +2318,8 @@ void CIRGenModule::setCIRFunctionAttributesForDefinition(
  } else if (codeGenOpts.getInlining() == CodeGenOptions::OnlyAlwaysInlining) {
    // If inlining is disabled, force everything that isn't always_inline
    // to carry an explicit noinline attribute.
-    if (!isAlwaysInline) {
+    if (!isAlwaysInline)
      f.setInlineKind(cir::InlineKind::NoInline);
-    }
  } else {
    // Otherwise, propagate the inline hint attribute and potentially use its
    // absence to mark things as noinline.
--- a/clang/lib/CIR/Lowering/DirectToLLVM/LowerToLLVM.cpp
+++ b/clang/lib/CIR/Lowering/DirectToLLVM/LowerToLLVM.cpp
@ -238,6 +238,89 @@ mlir::LogicalResult CIRToLLVMExp2OpLowering::matchAndRewrite(
  return mlir::success();
 }

+mlir::LogicalResult CIRToLLVMLogOpLowering::matchAndRewrite(
+    cir::LogOp op, OpAdaptor adaptor,
+    mlir::ConversionPatternRewriter &rewriter) const {
+  mlir::Type resTy = typeConverter->convertType(op.getType());
+  rewriter.replaceOpWithNewOp<mlir::LLVM::LogOp>(op, resTy, adaptor.getSrc());
+  return mlir::success();
+}
+
+mlir::LogicalResult CIRToLLVMLog10OpLowering::matchAndRewrite(
+    cir::Log10Op op, OpAdaptor adaptor,
+    mlir::ConversionPatternRewriter &rewriter) const {
+  mlir::Type resTy = typeConverter->convertType(op.getType());
+  rewriter.replaceOpWithNewOp<mlir::LLVM::Log10Op>(op, resTy, adaptor.getSrc());
+  return mlir::success();
+}
+
+mlir::LogicalResult CIRToLLVMLog2OpLowering::matchAndRewrite(
+    cir::Log2Op op, OpAdaptor adaptor,
+    mlir::ConversionPatternRewriter &rewriter) const {
+  mlir::Type resTy = typeConverter->convertType(op.getType());
+  rewriter.replaceOpWithNewOp<mlir::LLVM::Log2Op>(op, resTy, adaptor.getSrc());
+  return mlir::success();
+}
+
+mlir::LogicalResult CIRToLLVMNearbyintOpLowering::matchAndRewrite(
+    cir::NearbyintOp op, OpAdaptor adaptor,
+    mlir::ConversionPatternRewriter &rewriter) const {
+  mlir::Type resTy = typeConverter->convertType(op.getType());
+  rewriter.replaceOpWithNewOp<mlir::LLVM::NearbyintOp>(op, resTy,
+                                                       adaptor.getSrc());
+  return mlir::success();
+}
+
+mlir::LogicalResult CIRToLLVMRintOpLowering::matchAndRewrite(
+    cir::RintOp op, OpAdaptor adaptor,
+    mlir::ConversionPatternRewriter &rewriter) const {
+  mlir::Type resTy = typeConverter->convertType(op.getType());
+  rewriter.replaceOpWithNewOp<mlir::LLVM::RintOp>(op, resTy, adaptor.getSrc());
+  return mlir::success();
+}
+
+mlir::LogicalResult CIRToLLVMRoundOpLowering::matchAndRewrite(
+    cir::RoundOp op, OpAdaptor adaptor,
+    mlir::ConversionPatternRewriter &rewriter) const {
+  mlir::Type resTy = typeConverter->convertType(op.getType());
+  rewriter.replaceOpWithNewOp<mlir::LLVM::RoundOp>(op, resTy, adaptor.getSrc());
+  return mlir::success();
+}
+
+mlir::LogicalResult CIRToLLVMRoundEvenOpLowering::matchAndRewrite(
+    cir::RoundEvenOp op, OpAdaptor adaptor,
+    mlir::ConversionPatternRewriter &rewriter) const {
+  mlir::Type resTy = typeConverter->convertType(op.getType());
+  rewriter.replaceOpWithNewOp<mlir::LLVM::RoundEvenOp>(op, resTy,
+                                                       adaptor.getSrc());
+  return mlir::success();
+}
+
+mlir::LogicalResult CIRToLLVMSinOpLowering::matchAndRewrite(
+    cir::SinOp op, OpAdaptor adaptor,
+    mlir::ConversionPatternRewriter &rewriter) const {
+  mlir::Type resTy = typeConverter->convertType(op.getType());
+  rewriter.replaceOpWithNewOp<mlir::LLVM::SinOp>(op, resTy, adaptor.getSrc());
+  return mlir::success();
+}
+
+mlir::LogicalResult CIRToLLVMTanOpLowering::matchAndRewrite(
+    cir::TanOp op, OpAdaptor adaptor,
+    mlir::ConversionPatternRewriter &rewriter) const {
+  mlir::Type resTy = typeConverter->convertType(op.getType());
+  rewriter.replaceOpWithNewOp<mlir::LLVM::TanOp>(op, resTy, adaptor.getSrc());
+  return mlir::success();
+}
+
+mlir::LogicalResult CIRToLLVMTruncOpLowering::matchAndRewrite(
+    cir::TruncOp op, OpAdaptor adaptor,
+    mlir::ConversionPatternRewriter &rewriter) const {
+  mlir::Type resTy = typeConverter->convertType(op.getType());
+  rewriter.replaceOpWithNewOp<mlir::LLVM::FTruncOp>(op, resTy,
+                                                    adaptor.getSrc());
+  return mlir::success();
+}
+
 mlir::LogicalResult CIRToLLVMFloorOpLowering::matchAndRewrite(
    cir::FloorOp op, OpAdaptor adaptor,
    mlir::ConversionPatternRewriter &rewriter) const {
@ -1573,6 +1656,113 @@ mlir::LogicalResult CIRToLLVMCeilOpLowering::matchAndRewrite(
  return mlir::success();
 }

+mlir::LogicalResult CIRToLLVMCopysignOpLowering::matchAndRewrite(
+    cir::CopysignOp op, OpAdaptor adaptor,
+    mlir::ConversionPatternRewriter &rewriter) const {
+  mlir::Type resTy = typeConverter->convertType(op.getType());
+  rewriter.replaceOpWithNewOp<mlir::LLVM::CopySignOp>(
+      op, resTy, adaptor.getLhs(), adaptor.getRhs());
+  return mlir::success();
+}
+
+mlir::LogicalResult CIRToLLVMFMaxNumOpLowering::matchAndRewrite(
+    cir::FMaxNumOp op, OpAdaptor adaptor,
+    mlir::ConversionPatternRewriter &rewriter) const {
+  mlir::Type resTy = typeConverter->convertType(op.getType());
+  rewriter.replaceOpWithNewOp<mlir::LLVM::MaxNumOp>(op, resTy, adaptor.getLhs(),
+                                                    adaptor.getRhs());
+  return mlir::success();
+}
+
+mlir::LogicalResult CIRToLLVMFMinNumOpLowering::matchAndRewrite(
+    cir::FMinNumOp op, OpAdaptor adaptor,
+    mlir::ConversionPatternRewriter &rewriter) const {
+  mlir::Type resTy = typeConverter->convertType(op.getType());
+  rewriter.replaceOpWithNewOp<mlir::LLVM::MinNumOp>(op, resTy, adaptor.getLhs(),
+                                                    adaptor.getRhs());
+  return mlir::success();
+}
+
+mlir::LogicalResult CIRToLLVMFMaximumOpLowering::matchAndRewrite(
+    cir::FMaximumOp op, OpAdaptor adaptor,
+    mlir::ConversionPatternRewriter &rewriter) const {
+  mlir::Type resTy = typeConverter->convertType(op.getType());
+  rewriter.replaceOpWithNewOp<mlir::LLVM::MaximumOp>(
+      op, resTy, adaptor.getLhs(), adaptor.getRhs());
+  return mlir::success();
+}
+
+mlir::LogicalResult CIRToLLVMFMinimumOpLowering::matchAndRewrite(
+    cir::FMinimumOp op, OpAdaptor adaptor,
+    mlir::ConversionPatternRewriter &rewriter) const {
+  mlir::Type resTy = typeConverter->convertType(op.getType());
+  rewriter.replaceOpWithNewOp<mlir::LLVM::MinimumOp>(
+      op, resTy, adaptor.getLhs(), adaptor.getRhs());
+  return mlir::success();
+}
+
+mlir::LogicalResult CIRToLLVMFModOpLowering::matchAndRewrite(
+    cir::FModOp op, OpAdaptor adaptor,
+    mlir::ConversionPatternRewriter &rewriter) const {
+  mlir::Type resTy = typeConverter->convertType(op.getType());
+  rewriter.replaceOpWithNewOp<mlir::LLVM::FRemOp>(op, resTy, adaptor.getLhs(),
+                                                  adaptor.getRhs());
+  return mlir::success();
+}
+
+mlir::LogicalResult CIRToLLVMPowOpLowering::matchAndRewrite(
+    cir::PowOp op, OpAdaptor adaptor,
+    mlir::ConversionPatternRewriter &rewriter) const {
+  mlir::Type resTy = typeConverter->convertType(op.getType());
+  rewriter.replaceOpWithNewOp<mlir::LLVM::PowOp>(op, resTy, adaptor.getLhs(),
+                                                 adaptor.getRhs());
+  return mlir::success();
+}
+
+mlir::LogicalResult CIRToLLVMATan2OpLowering::matchAndRewrite(
+    cir::ATan2Op op, OpAdaptor adaptor,
+    mlir::ConversionPatternRewriter &rewriter) const {
+  mlir::Type resTy = typeConverter->convertType(op.getType());
+  rewriter.replaceOpWithNewOp<mlir::LLVM::ATan2Op>(op, resTy, adaptor.getLhs(),
+                                                   adaptor.getRhs());
+  return mlir::success();
+}
+
+mlir::LogicalResult CIRToLLVMLroundOpLowering::matchAndRewrite(
+    cir::LroundOp op, OpAdaptor adaptor,
+    mlir::ConversionPatternRewriter &rewriter) const {
+  mlir::Type resTy = typeConverter->convertType(op.getType());
+  rewriter.replaceOpWithNewOp<mlir::LLVM::LroundOp>(op, resTy,
+                                                    adaptor.getSrc());
+  return mlir::success();
+}
+
+mlir::LogicalResult CIRToLLVMLlroundOpLowering::matchAndRewrite(
+    cir::LlroundOp op, OpAdaptor adaptor,
+    mlir::ConversionPatternRewriter &rewriter) const {
+  mlir::Type resTy = typeConverter->convertType(op.getType());
+  rewriter.replaceOpWithNewOp<mlir::LLVM::LlroundOp>(op, resTy,
+                                                     adaptor.getSrc());
+  return mlir::success();
+}
+
+mlir::LogicalResult CIRToLLVMLrintOpLowering::matchAndRewrite(
+    cir::LrintOp op, OpAdaptor adaptor,
+    mlir::ConversionPatternRewriter &rewriter) const {
+  mlir::Type resTy = typeConverter->convertType(op.getType());
+  rewriter.replaceOpWithNewOp<mlir::LLVM::LrintOp>(op, resTy, adaptor.getSrc());
+  return mlir::success();
+}
+
+mlir::LogicalResult CIRToLLVMLlrintOpLowering::matchAndRewrite(
+    cir::LlrintOp op, OpAdaptor adaptor,
+    mlir::ConversionPatternRewriter &rewriter) const {
+  mlir::Type resTy = typeConverter->convertType(op.getType());
+  rewriter.replaceOpWithNewOp<mlir::LLVM::LlrintOp>(op, resTy,
+                                                    adaptor.getSrc());
+  return mlir::success();
+}
+
 mlir::LogicalResult CIRToLLVMAllocaOpLowering::matchAndRewrite(
    cir::AllocaOp op, OpAdaptor adaptor,
    mlir::ConversionPatternRewriter &rewriter) const {
--- a/clang/test/CIR/CodeGen/builtin-floating-point.c
+++ b/clang/test/CIR/CodeGen/builtin-floating-point.c
--- a/clang/test/CIR/CodeGen/builtins-elementwise.c
+++ b/clang/test/CIR/CodeGen/builtins-elementwise.c
@ -0,0 +1,505 @@
+// RUN: %clang_cc1 -triple aarch64-none-linux-android24  -emit-cir %s -o %t.cir
+// RUN: FileCheck --check-prefix=CIR --input-file=%t.cir %s
+// RUN: %clang_cc1 -triple aarch64-none-linux-android24  -fclangir \
+// RUN:  -emit-llvm  %s -o %t.ll
+// RUN: FileCheck  --check-prefix=LLVM --input-file=%t.ll %s
+// RUN: %clang_cc1 -triple aarch64-none-linux-android24 -emit-llvm %s -o %t-ogcg.ll
+// RUN: FileCheck --check-prefix=OGCG --input-file=%t-ogcg.ll %s
+
+typedef int vint4 __attribute__((ext_vector_type(4)));
+typedef float vfloat4 __attribute__((ext_vector_type(4)));
+typedef double vdouble4 __attribute__((ext_vector_type(4)));
+
+void test_builtin_elementwise_abs(float f, double d,
+                                  vfloat4 vf4, vdouble4  vd4) {
+    // CIR-LABEL: test_builtin_elementwise_abs
+    // LLVM-LABEL: test_builtin_elementwise_abs
+    // OGCG-LABEL: test_builtin_elementwise_abs
+    // CIR: {{%.*}} = cir.fabs {{%.*}} : !cir.float
+    // LLVM: {{%.*}} = call float @llvm.fabs.f32(float {{%.*}})
+    // OGCG: {{%.*}} = call float @llvm.fabs.f32(float {{%.*}})
+    f = __builtin_elementwise_abs(f);
+
+    // CIR: {{%.*}} = cir.fabs {{%.*}} : !cir.double
+    // LLVM: {{%.*}} = call double @llvm.fabs.f64(double {{%.*}})
+    // OGCG: {{%.*}} = call double @llvm.fabs.f64(double {{%.*}})
+    d = __builtin_elementwise_abs(d);
+
+    // CIR: {{%.*}} = cir.fabs {{%.*}} : !cir.vector<4 x !cir.float>
+    // LLVM: {{%.*}} = call <4 x float> @llvm.fabs.v4f32(<4 x float> {{%.*}})
+    // OGCG: {{%.*}} = call <4 x float> @llvm.fabs.v4f32(<4 x float> {{%.*}})
+    vf4 = __builtin_elementwise_abs(vf4);
+
+    // CIR: {{%.*}} = cir.fabs {{%.*}} : !cir.vector<4 x !cir.double>
+    // LLVM: {{%.*}} = call <4 x double> @llvm.fabs.v4f64(<4 x double> {{%.*}})
+    // OGCG: {{%.*}} = call <4 x double> @llvm.fabs.v4f64(<4 x double> {{%.*}})
+    vd4 = __builtin_elementwise_abs(vd4);
+}
+
+void test_builtin_elementwise_acos(float f, double d, vfloat4 vf4,
+                                   vdouble4  vd4) {
+  // CIR-LABEL: test_builtin_elementwise_acos
+  // LLVM-LABEL: test_builtin_elementwise_acos
+  // OGCG-LABEL: test_builtin_elementwise_acos
+  // CIR: {{%.*}} = cir.acos {{%.*}} : !cir.float
+  // LLVM: {{%.*}} = call float @llvm.acos.f32(float {{%.*}})
+  // OGCG: {{%.*}} = call float @llvm.acos.f32(float {{%.*}})
+  f = __builtin_elementwise_acos(f);
+
+  // CIR: {{%.*}} = cir.acos {{%.*}} : !cir.double
+  // LLVM: {{%.*}} = call double @llvm.acos.f64(double {{%.*}})
+  // OGCG: {{%.*}} = call double @llvm.acos.f64(double {{%.*}})
+  d = __builtin_elementwise_acos(d);
+
+  // CIR: {{%.*}} = cir.acos {{%.*}} : !cir.vector<4 x !cir.float>
+  // LLVM: {{%.*}} = call <4 x float> @llvm.acos.v4f32(<4 x float> {{%.*}})
+  // OGCG: {{%.*}} = call <4 x float> @llvm.acos.v4f32(<4 x float> {{%.*}})
+  vf4 = __builtin_elementwise_acos(vf4);
+
+  // CIR: {{%.*}} = cir.acos {{%.*}} : !cir.vector<4 x !cir.double>
+  // LLVM: {{%.*}} = call <4 x double> @llvm.acos.v4f64(<4 x double> {{%.*}})
+  // OGCG: {{%.*}} = call <4 x double> @llvm.acos.v4f64(<4 x double> {{%.*}})
+  vd4 = __builtin_elementwise_acos(vd4);
+}
+
+void test_builtin_elementwise_asin(float f, double d, vfloat4 vf4,
+  vdouble4  vd4) {
+  // CIR-LABEL: test_builtin_elementwise_asin
+  // LLVM-LABEL: test_builtin_elementwise_asin
+  // OGCG-LABEL: test_builtin_elementwise_asin
+  // CIR: {{%.*}} = cir.asin {{%.*}} : !cir.float
+  // LLVM: {{%.*}} = call float @llvm.asin.f32(float {{%.*}})
+  // OGCG: {{%.*}} = call float @llvm.asin.f32(float {{%.*}})
+  f = __builtin_elementwise_asin(f);
+
+  // CIR: {{%.*}} = cir.asin {{%.*}} : !cir.double
+  // LLVM: {{%.*}} = call double @llvm.asin.f64(double {{%.*}})
+  // OGCG: {{%.*}} = call double @llvm.asin.f64(double {{%.*}})
+  d = __builtin_elementwise_asin(d);
+
+  // CIR: {{%.*}} = cir.asin {{%.*}} : !cir.vector<4 x !cir.float>
+  // LLVM: {{%.*}} = call <4 x float> @llvm.asin.v4f32(<4 x float> {{%.*}})
+  // OGCG: {{%.*}} = call <4 x float> @llvm.asin.v4f32(<4 x float> {{%.*}})
+  vf4 = __builtin_elementwise_asin(vf4);
+
+  // CIR: {{%.*}} = cir.asin {{%.*}} : !cir.vector<4 x !cir.double>
+  // LLVM: {{%.*}} = call <4 x double> @llvm.asin.v4f64(<4 x double> {{%.*}})
+  // OGCG: {{%.*}} = call <4 x double> @llvm.asin.v4f64(<4 x double> {{%.*}})
+  vd4 = __builtin_elementwise_asin(vd4);
+}
+
+void test_builtin_elementwise_atan(float f, double d, vfloat4 vf4,
+  vdouble4  vd4) {
+  // CIR-LABEL: test_builtin_elementwise_atan
+  // LLVM-LABEL: test_builtin_elementwise_atan
+  // OGCG-LABEL: test_builtin_elementwise_atan
+  // CIR: {{%.*}} = cir.atan {{%.*}} : !cir.float
+  // LLVM: {{%.*}} = call float @llvm.atan.f32(float {{%.*}})
+  // OGCG: {{%.*}} = call float @llvm.atan.f32(float {{%.*}})
+  f = __builtin_elementwise_atan(f);
+
+  // CIR: {{%.*}} = cir.atan {{%.*}} : !cir.double
+  // LLVM: {{%.*}} = call double @llvm.atan.f64(double {{%.*}})
+  // OGCG: {{%.*}} = call double @llvm.atan.f64(double {{%.*}})
+  d = __builtin_elementwise_atan(d);
+
+  // CIR: {{%.*}} = cir.atan {{%.*}} : !cir.vector<4 x !cir.float>
+  // LLVM: {{%.*}} = call <4 x float> @llvm.atan.v4f32(<4 x float> {{%.*}})
+  // OGCG: {{%.*}} = call <4 x float> @llvm.atan.v4f32(<4 x float> {{%.*}})
+  vf4 = __builtin_elementwise_atan(vf4);
+
+  // CIR: {{%.*}} = cir.atan {{%.*}} : !cir.vector<4 x !cir.double>
+  // LLVM: {{%.*}} = call <4 x double> @llvm.atan.v4f64(<4 x double> {{%.*}})
+  // OGCG: {{%.*}} = call <4 x double> @llvm.atan.v4f64(<4 x double> {{%.*}})
+  vd4 = __builtin_elementwise_atan(vd4);
+}
+
+void test_builtin_elementwise_atan2(float f, double d, vfloat4 vf4,
+  vdouble4  vd4) {
+  // CIR-LABEL: test_builtin_elementwise_atan2
+  // LLVM-LABEL: test_builtin_elementwise_atan2
+  // OGCG-LABEL: test_builtin_elementwise_atan2
+  // CIR: {{%.*}} = cir.atan2 {{%.*}}, {{%.*}} : !cir.float
+  // LLVM: {{%.*}} = call float @llvm.atan2.f32(float {{%.*}}, float {{%.*}})
+  // OGCG: {{%.*}} = call float @llvm.atan2.f32(float {{%.*}}, float {{%.*}})
+  f = __builtin_elementwise_atan2(f, f);
+
+  // CIR: {{%.*}} = cir.atan2 {{%.*}}, {{%.*}} : !cir.double
+  // LLVM: {{%.*}} = call double @llvm.atan2.f64(double {{%.*}}, double {{%.*}})
+  // OGCG: {{%.*}} = call double @llvm.atan2.f64(double {{%.*}}, double {{%.*}})
+  d = __builtin_elementwise_atan2(d, d);
+
+  // CIR: {{%.*}} = cir.atan2 {{%.*}}, {{%.*}} : !cir.vector<4 x !cir.float>
+  // LLVM: {{%.*}} = call <4 x float> @llvm.atan2.v4f32(<4 x float> {{%.*}}, <4 x float> {{%.*}})
+  // OGCG: {{%.*}} = call <4 x float> @llvm.atan2.v4f32(<4 x float> {{%.*}}, <4 x float> {{%.*}})
+  vf4 = __builtin_elementwise_atan2(vf4, vf4);
+
+  // CIR: {{%.*}} = cir.atan2 {{%.*}}, {{%.*}} : !cir.vector<4 x !cir.double>
+  // LLVM: {{%.*}} = call <4 x double> @llvm.atan2.v4f64(<4 x double> {{%.*}}, <4 x double> {{%.*}})
+  // OGCG: {{%.*}} = call <4 x double> @llvm.atan2.v4f64(<4 x double> {{%.*}}, <4 x double> {{%.*}})
+  vd4 = __builtin_elementwise_atan2(vd4, vd4);
+}
+
+void test_builtin_elementwise_exp(float f, double d, vfloat4 vf4,
+                                  vdouble4  vd4) {
+  // CIR-LABEL: test_builtin_elementwise_exp
+  // LLVM-LABEL: test_builtin_elementwise_exp
+  // OGCG-LABEL: test_builtin_elementwise_exp
+  // CIR: {{%.*}} = cir.exp {{%.*}} : !cir.float
+  // LLVM: {{%.*}} = call float @llvm.exp.f32(float {{%.*}})
+  // OGCG: {{%.*}} = call float @llvm.exp.f32(float {{%.*}})
+  f = __builtin_elementwise_exp(f);
+
+  // CIR: {{%.*}} = cir.exp {{%.*}} : !cir.double
+  // LLVM: {{%.*}} = call double @llvm.exp.f64(double {{%.*}})
+  // OGCG: {{%.*}} = call double @llvm.exp.f64(double {{%.*}})
+  d = __builtin_elementwise_exp(d);
+
+  // CIR: {{%.*}} = cir.exp {{%.*}} : !cir.vector<4 x !cir.float>
+  // LLVM: {{%.*}} = call <4 x float> @llvm.exp.v4f32(<4 x float> {{%.*}})
+  // OGCG: {{%.*}} = call <4 x float> @llvm.exp.v4f32(<4 x float> {{%.*}})
+  vf4 = __builtin_elementwise_exp(vf4);
+
+  // CIR: {{%.*}} = cir.exp {{%.*}} : !cir.vector<4 x !cir.double>
+  // LLVM: {{%.*}} = call <4 x double> @llvm.exp.v4f64(<4 x double> {{%.*}})
+  // OGCG: {{%.*}} = call <4 x double> @llvm.exp.v4f64(<4 x double> {{%.*}})
+  vd4 = __builtin_elementwise_exp(vd4);
+}
+
+void test_builtin_elementwise_exp2(float f, double d, vfloat4 vf4,
+                                  vdouble4  vd4) {
+  // CIR-LABEL: test_builtin_elementwise_exp2
+  // LLVM-LABEL: test_builtin_elementwise_exp2
+  // OGCG-LABEL: test_builtin_elementwise_exp2
+  // CIR: {{%.*}} = cir.exp2 {{%.*}} : !cir.float
+  // LLVM: {{%.*}} = call float @llvm.exp2.f32(float {{%.*}})
+  // OGCG: {{%.*}} = call float @llvm.exp2.f32(float {{%.*}})
+  f = __builtin_elementwise_exp2(f);
+
+  // CIR: {{%.*}} = cir.exp2 {{%.*}} : !cir.double
+  // LLVM: {{%.*}} = call double @llvm.exp2.f64(double {{%.*}})
+  // OGCG: {{%.*}} = call double @llvm.exp2.f64(double {{%.*}})
+  d = __builtin_elementwise_exp2(d);
+
+  // CIR: {{%.*}} = cir.exp2 {{%.*}} : !cir.vector<4 x !cir.float>
+  // LLVM: {{%.*}} = call <4 x float> @llvm.exp2.v4f32(<4 x float> {{%.*}})
+  // OGCG: {{%.*}} = call <4 x float> @llvm.exp2.v4f32(<4 x float> {{%.*}})
+  vf4 = __builtin_elementwise_exp2(vf4);
+
+  // CIR: {{%.*}} = cir.exp2 {{%.*}} : !cir.vector<4 x !cir.double>
+  // LLVM: {{%.*}} = call <4 x double> @llvm.exp2.v4f64(<4 x double> {{%.*}})
+  // OGCG: {{%.*}} = call <4 x double> @llvm.exp2.v4f64(<4 x double> {{%.*}})
+  vd4 = __builtin_elementwise_exp2(vd4);
+}
+
+void test_builtin_elementwise_log(float f, double d, vfloat4 vf4,
+                                  vdouble4  vd4) {
+  // CIR-LABEL: test_builtin_elementwise_log
+  // LLVM-LABEL: test_builtin_elementwise_log
+  // OGCG-LABEL: test_builtin_elementwise_log
+  // CIR: {{%.*}} = cir.log {{%.*}} : !cir.float
+  // LLVM: {{%.*}} = call float @llvm.log.f32(float {{%.*}})
+  // OGCG: {{%.*}} = call float @llvm.log.f32(float {{%.*}})
+  f = __builtin_elementwise_log(f);
+
+  // CIR: {{%.*}} = cir.log {{%.*}} : !cir.double
+  // LLVM: {{%.*}} = call double @llvm.log.f64(double {{%.*}})
+  // OGCG: {{%.*}} = call double @llvm.log.f64(double {{%.*}})
+  d = __builtin_elementwise_log(d);
+
+  // CIR: {{%.*}} = cir.log {{%.*}} : !cir.vector<4 x !cir.float>
+  // LLVM: {{%.*}} = call <4 x float> @llvm.log.v4f32(<4 x float> {{%.*}})
+  // OGCG: {{%.*}} = call <4 x float> @llvm.log.v4f32(<4 x float> {{%.*}})
+  vf4 = __builtin_elementwise_log(vf4);
+
+  // CIR: {{%.*}} = cir.log {{%.*}} : !cir.vector<4 x !cir.double>
+  // LLVM: {{%.*}} = call <4 x double> @llvm.log.v4f64(<4 x double> {{%.*}})
+  // OGCG: {{%.*}} = call <4 x double> @llvm.log.v4f64(<4 x double> {{%.*}})
+  vd4 = __builtin_elementwise_log(vd4);
+}
+
+void test_builtin_elementwise_log2(float f, double d, vfloat4 vf4,
+                                    vdouble4  vd4) {
+  // CIR-LABEL: test_builtin_elementwise_log2
+  // LLVM-LABEL: test_builtin_elementwise_log2
+  // OGCG-LABEL: test_builtin_elementwise_log2
+  // CIR: {{%.*}} = cir.log2 {{%.*}} : !cir.float
+  // LLVM: {{%.*}} = call float @llvm.log2.f32(float {{%.*}})
+  // OGCG: {{%.*}} = call float @llvm.log2.f32(float {{%.*}})
+  f = __builtin_elementwise_log2(f);
+
+  // CIR: {{%.*}} = cir.log2 {{%.*}} : !cir.double
+  // LLVM: {{%.*}} = call double @llvm.log2.f64(double {{%.*}})
+  // OGCG: {{%.*}} = call double @llvm.log2.f64(double {{%.*}})
+  d = __builtin_elementwise_log2(d);
+
+  // CIR: {{%.*}} = cir.log2 {{%.*}} : !cir.vector<4 x !cir.float>
+  // LLVM: {{%.*}} = call <4 x float> @llvm.log2.v4f32(<4 x float> {{%.*}})
+  // OGCG: {{%.*}} = call <4 x float> @llvm.log2.v4f32(<4 x float> {{%.*}})
+  vf4 = __builtin_elementwise_log2(vf4);
+
+  // CIR: {{%.*}} = cir.log2 {{%.*}} : !cir.vector<4 x !cir.double>
+  // LLVM: {{%.*}} = call <4 x double> @llvm.log2.v4f64(<4 x double> {{%.*}})
+  // OGCG: {{%.*}} = call <4 x double> @llvm.log2.v4f64(<4 x double> {{%.*}})
+  vd4 = __builtin_elementwise_log2(vd4);
+}
+
+void test_builtin_elementwise_log10(float f, double d, vfloat4 vf4,
+                                     vdouble4  vd4) {
+  // CIR-LABEL: test_builtin_elementwise_log10
+  // LLVM-LABEL: test_builtin_elementwise_log10
+  // OGCG-LABEL: test_builtin_elementwise_log10
+  // CIR: {{%.*}} = cir.log10 {{%.*}} : !cir.float
+  // LLVM: {{%.*}} = call float @llvm.log10.f32(float {{%.*}})
+  // OGCG: {{%.*}} = call float @llvm.log10.f32(float {{%.*}})
+  f = __builtin_elementwise_log10(f);
+
+  // CIR: {{%.*}} = cir.log10 {{%.*}} : !cir.double
+  // LLVM: {{%.*}} = call double @llvm.log10.f64(double {{%.*}})
+  // OGCG: {{%.*}} = call double @llvm.log10.f64(double {{%.*}})
+  d = __builtin_elementwise_log10(d);
+
+  // CIR: {{%.*}} = cir.log10 {{%.*}} : !cir.vector<4 x !cir.float>
+  // LLVM: {{%.*}} = call <4 x float> @llvm.log10.v4f32(<4 x float> {{%.*}})
+  // OGCG: {{%.*}} = call <4 x float> @llvm.log10.v4f32(<4 x float> {{%.*}})
+  vf4 = __builtin_elementwise_log10(vf4);
+
+  // CIR: {{%.*}} = cir.log10 {{%.*}} : !cir.vector<4 x !cir.double>
+  // LLVM: {{%.*}} = call <4 x double> @llvm.log10.v4f64(<4 x double> {{%.*}})
+  // OGCG: {{%.*}} = call <4 x double> @llvm.log10.v4f64(<4 x double> {{%.*}})
+  vd4 = __builtin_elementwise_log10(vd4);
+}
+
+void test_builtin_elementwise_cos(float f, double d, vfloat4 vf4,
+                                     vdouble4 vd4) {
+  // CIR-LABEL: test_builtin_elementwise_cos
+  // LLVM-LABEL: test_builtin_elementwise_cos
+  // OGCG-LABEL: test_builtin_elementwise_cos
+  // CIR: {{%.*}} = cir.cos {{%.*}} : !cir.float
+  // LLVM: {{%.*}} = call float @llvm.cos.f32(float {{%.*}})
+  // OGCG: {{%.*}} = call float @llvm.cos.f32(float {{%.*}})
+  f = __builtin_elementwise_cos(f);
+
+  // CIR: {{%.*}} = cir.cos {{%.*}} : !cir.double
+  // LLVM: {{%.*}} = call double @llvm.cos.f64(double {{%.*}})
+  // OGCG: {{%.*}} = call double @llvm.cos.f64(double {{%.*}})
+  d = __builtin_elementwise_cos(d);
+
+  // CIR: {{%.*}} = cir.cos {{%.*}} : !cir.vector<4 x !cir.float>
+  // LLVM: {{%.*}} = call <4 x float> @llvm.cos.v4f32(<4 x float> {{%.*}})
+  // OGCG: {{%.*}} = call <4 x float> @llvm.cos.v4f32(<4 x float> {{%.*}})
+  vf4 = __builtin_elementwise_cos(vf4);
+
+  // CIR: {{%.*}} = cir.cos {{%.*}} : !cir.vector<4 x !cir.double>
+  // LLVM: {{%.*}} = call <4 x double> @llvm.cos.v4f64(<4 x double> {{%.*}})
+  // OGCG: {{%.*}} = call <4 x double> @llvm.cos.v4f64(<4 x double> {{%.*}})
+  vd4 = __builtin_elementwise_cos(vd4);
+}
+
+void test_builtin_elementwise_floor(float f, double d, vfloat4 vf4,
+                   vdouble4 vd4) {
+  // CIR-LABEL: test_builtin_elementwise_floor
+  // LLVM-LABEL: test_builtin_elementwise_floor
+  // OGCG-LABEL: test_builtin_elementwise_floor
+  // CIR: {{%.*}} = cir.floor {{%.*}} : !cir.float
+  // LLVM: {{%.*}} = call float @llvm.floor.f32(float {{%.*}})
+  // OGCG: {{%.*}} = call float @llvm.floor.f32(float {{%.*}})
+  f = __builtin_elementwise_floor(f);
+
+  // CIR: {{%.*}} = cir.floor {{%.*}} : !cir.double
+  // LLVM: {{%.*}} = call double @llvm.floor.f64(double {{%.*}})
+  // OGCG: {{%.*}} = call double @llvm.floor.f64(double {{%.*}})
+  d = __builtin_elementwise_floor(d);
+
+  // CIR: {{%.*}} = cir.floor {{%.*}} : !cir.vector<4 x !cir.float>
+  // LLVM: {{%.*}} = call <4 x float> @llvm.floor.v4f32(<4 x float> {{%.*}})
+  // OGCG: {{%.*}} = call <4 x float> @llvm.floor.v4f32(<4 x float> {{%.*}})
+  vf4 = __builtin_elementwise_floor(vf4);
+
+  // CIR: {{%.*}} = cir.floor {{%.*}} : !cir.vector<4 x !cir.double>
+  // LLVM: {{%.*}} = call <4 x double> @llvm.floor.v4f64(<4 x double> {{%.*}})
+  // OGCG: {{%.*}} = call <4 x double> @llvm.floor.v4f64(<4 x double> {{%.*}})
+  vd4 = __builtin_elementwise_floor(vd4);
+}
+
+void test_builtin_elementwise_round(float f, double d, vfloat4 vf4,
+                   vdouble4 vd4) {
+  // CIR-LABEL: test_builtin_elementwise_round
+  // LLVM-LABEL: test_builtin_elementwise_round
+  // OGCG-LABEL: test_builtin_elementwise_round
+  // CIR: {{%.*}} = cir.round {{%.*}} : !cir.float
+  // LLVM: {{%.*}} = call float @llvm.round.f32(float {{%.*}})
+  // OGCG: {{%.*}} = call float @llvm.round.f32(float {{%.*}})
+  f = __builtin_elementwise_round(f);
+
+  // CIR: {{%.*}} = cir.round {{%.*}} : !cir.double
+  // LLVM: {{%.*}} = call double @llvm.round.f64(double {{%.*}})
+  // OGCG: {{%.*}} = call double @llvm.round.f64(double {{%.*}})
+  d = __builtin_elementwise_round(d);
+
+  // CIR: {{%.*}} = cir.round {{%.*}} : !cir.vector<4 x !cir.float>
+  // LLVM: {{%.*}} = call <4 x float> @llvm.round.v4f32(<4 x float> {{%.*}})
+  // OGCG: {{%.*}} = call <4 x float> @llvm.round.v4f32(<4 x float> {{%.*}})
+  vf4 = __builtin_elementwise_round(vf4);
+
+  // CIR: {{%.*}} = cir.round {{%.*}} : !cir.vector<4 x !cir.double>
+  // LLVM: {{%.*}} = call <4 x double> @llvm.round.v4f64(<4 x double> {{%.*}})
+  // OGCG: {{%.*}} = call <4 x double> @llvm.round.v4f64(<4 x double> {{%.*}})
+  vd4 = __builtin_elementwise_round(vd4);
+}
+
+void test_builtin_elementwise_rint(float f, double d, vfloat4 vf4,
+                   vdouble4 vd4) {
+  // CIR-LABEL: test_builtin_elementwise_rint
+  // LLVM-LABEL: test_builtin_elementwise_rint
+  // OGCG-LABEL: test_builtin_elementwise_rint
+  // CIR: {{%.*}} = cir.rint {{%.*}} : !cir.float
+  // LLVM: {{%.*}} = call float @llvm.rint.f32(float {{%.*}})
+  // OGCG: {{%.*}} = call float @llvm.rint.f32(float {{%.*}})
+  f = __builtin_elementwise_rint(f);
+
+  // CIR: {{%.*}} = cir.rint {{%.*}} : !cir.double
+  // LLVM: {{%.*}} = call double @llvm.rint.f64(double {{%.*}})
+  // OGCG: {{%.*}} = call double @llvm.rint.f64(double {{%.*}})
+  d = __builtin_elementwise_rint(d);
+
+  // CIR: {{%.*}} = cir.rint {{%.*}} : !cir.vector<4 x !cir.float>
+  // LLVM: {{%.*}} = call <4 x float> @llvm.rint.v4f32(<4 x float> {{%.*}})
+  // OGCG: {{%.*}} = call <4 x float> @llvm.rint.v4f32(<4 x float> {{%.*}})
+  vf4 = __builtin_elementwise_rint(vf4);
+
+  // CIR: {{%.*}} = cir.rint {{%.*}} : !cir.vector<4 x !cir.double>
+  // LLVM: {{%.*}} = call <4 x double> @llvm.rint.v4f64(<4 x double> {{%.*}})
+  // OGCG: {{%.*}} = call <4 x double> @llvm.rint.v4f64(<4 x double> {{%.*}})
+  vd4 = __builtin_elementwise_rint(vd4);
+}
+
+void test_builtin_elementwise_nearbyint(float f, double d, vfloat4 vf4,
+                   vdouble4 vd4) {
+  // CIR-LABEL: test_builtin_elementwise_nearbyint
+  // LLVM-LABEL: test_builtin_elementwise_nearbyint
+  // OGCG-LABEL: test_builtin_elementwise_nearbyint
+  // CIR: {{%.*}} = cir.nearbyint {{%.*}} : !cir.float
+  // LLVM: {{%.*}} = call float @llvm.nearbyint.f32(float {{%.*}})
+  // OGCG: {{%.*}} = call float @llvm.nearbyint.f32(float {{%.*}})
+  f = __builtin_elementwise_nearbyint(f);
+
+  // CIR: {{%.*}} = cir.nearbyint {{%.*}} : !cir.double
+  // LLVM: {{%.*}} = call double @llvm.nearbyint.f64(double {{%.*}})
+  // OGCG: {{%.*}} = call double @llvm.nearbyint.f64(double {{%.*}})
+  d = __builtin_elementwise_nearbyint(d);
+
+  // CIR: {{%.*}} = cir.nearbyint {{%.*}} : !cir.vector<4 x !cir.float>
+  // LLVM: {{%.*}} = call <4 x float> @llvm.nearbyint.v4f32(<4 x float> {{%.*}})
+  // OGCG: {{%.*}} = call <4 x float> @llvm.nearbyint.v4f32(<4 x float> {{%.*}})
+  vf4 = __builtin_elementwise_nearbyint(vf4);
+
+  // CIR: {{%.*}} = cir.nearbyint {{%.*}} : !cir.vector<4 x !cir.double>
+  // LLVM: {{%.*}} = call <4 x double> @llvm.nearbyint.v4f64(<4 x double> {{%.*}})
+  // OGCG: {{%.*}} = call <4 x double> @llvm.nearbyint.v4f64(<4 x double> {{%.*}})
+  vd4 = __builtin_elementwise_nearbyint(vd4);
+}
+
+void test_builtin_elementwise_sin(float f, double d, vfloat4 vf4,
+                   vdouble4 vd4) {
+  // CIR-LABEL: test_builtin_elementwise_sin
+  // LLVM-LABEL: test_builtin_elementwise_sin
+  // OGCG-LABEL: test_builtin_elementwise_sin
+  // CIR: {{%.*}} = cir.sin {{%.*}} : !cir.float
+  // LLVM: {{%.*}} = call float @llvm.sin.f32(float {{%.*}})
+  // OGCG: {{%.*}} = call float @llvm.sin.f32(float {{%.*}})
+  f = __builtin_elementwise_sin(f);
+
+  // CIR: {{%.*}} = cir.sin {{%.*}} : !cir.double
+  // LLVM: {{%.*}} = call double @llvm.sin.f64(double {{%.*}})
+  // OGCG: {{%.*}} = call double @llvm.sin.f64(double {{%.*}})
+  d = __builtin_elementwise_sin(d);
+
+  // CIR: {{%.*}} = cir.sin {{%.*}} : !cir.vector<4 x !cir.float>
+  // LLVM: {{%.*}} = call <4 x float> @llvm.sin.v4f32(<4 x float> {{%.*}})
+  // OGCG: {{%.*}} = call <4 x float> @llvm.sin.v4f32(<4 x float> {{%.*}})
+  vf4 = __builtin_elementwise_sin(vf4);
+
+  // CIR: {{%.*}} = cir.sin {{%.*}} : !cir.vector<4 x !cir.double>
+  // LLVM: {{%.*}} = call <4 x double> @llvm.sin.v4f64(<4 x double> {{%.*}})
+  // OGCG: {{%.*}} = call <4 x double> @llvm.sin.v4f64(<4 x double> {{%.*}})
+  vd4 = __builtin_elementwise_sin(vd4);
+}
+
+void test_builtin_elementwise_sqrt(float f, double d, vfloat4 vf4,
+                   vdouble4 vd4) {
+  // CIR-LABEL: test_builtin_elementwise_sqrt
+  // LLVM-LABEL: test_builtin_elementwise_sqrt
+  // OGCG-LABEL: test_builtin_elementwise_sqrt
+  // CIR: {{%.*}} = cir.sqrt {{%.*}} : !cir.float
+  // LLVM: {{%.*}} = call float @llvm.sqrt.f32(float {{%.*}})
+  // OGCG: {{%.*}} = call float @llvm.sqrt.f32(float {{%.*}})
+  f = __builtin_elementwise_sqrt(f);
+
+  // CIR: {{%.*}} = cir.sqrt {{%.*}} : !cir.double
+  // LLVM: {{%.*}} = call double @llvm.sqrt.f64(double {{%.*}})
+  // OGCG: {{%.*}} = call double @llvm.sqrt.f64(double {{%.*}})
+  d = __builtin_elementwise_sqrt(d);
+
+  // CIR: {{%.*}} = cir.sqrt {{%.*}} : !cir.vector<4 x !cir.float>
+  // LLVM: {{%.*}} = call <4 x float> @llvm.sqrt.v4f32(<4 x float> {{%.*}})
+  // OGCG: {{%.*}} = call <4 x float> @llvm.sqrt.v4f32(<4 x float> {{%.*}})
+  vf4 = __builtin_elementwise_sqrt(vf4);
+
+  // CIR: {{%.*}} = cir.sqrt {{%.*}} : !cir.vector<4 x !cir.double>
+  // LLVM: {{%.*}} = call <4 x double> @llvm.sqrt.v4f64(<4 x double> {{%.*}})
+  // OGCG: {{%.*}} = call <4 x double> @llvm.sqrt.v4f64(<4 x double> {{%.*}})
+  vd4 = __builtin_elementwise_sqrt(vd4);
+}
+
+void test_builtin_elementwise_tan(float f, double d, vfloat4 vf4,
+                   vdouble4 vd4) {
+  // CIR-LABEL: test_builtin_elementwise_tan
+  // LLVM-LABEL: test_builtin_elementwise_tan
+  // OGCG-LABEL: test_builtin_elementwise_tan
+  // CIR: {{%.*}} = cir.tan {{%.*}} : !cir.float
+  // LLVM: {{%.*}} = call float @llvm.tan.f32(float {{%.*}})
+  // OGCG: {{%.*}} = call float @llvm.tan.f32(float {{%.*}})
+  f = __builtin_elementwise_tan(f);
+
+  // CIR: {{%.*}} = cir.tan {{%.*}} : !cir.double
+  // LLVM: {{%.*}} = call double @llvm.tan.f64(double {{%.*}})
+  // OGCG: {{%.*}} = call double @llvm.tan.f64(double {{%.*}})
+  d = __builtin_elementwise_tan(d);
+
+  // CIR: {{%.*}} = cir.tan {{%.*}} : !cir.vector<4 x !cir.float>
+  // LLVM: {{%.*}} = call <4 x float> @llvm.tan.v4f32(<4 x float> {{%.*}})
+  // OGCG: {{%.*}} = call <4 x float> @llvm.tan.v4f32(<4 x float> {{%.*}})
+  vf4 = __builtin_elementwise_tan(vf4);
+
+  // CIR: {{%.*}} = cir.tan {{%.*}} : !cir.vector<4 x !cir.double>
+  // LLVM: {{%.*}} = call <4 x double> @llvm.tan.v4f64(<4 x double> {{%.*}})
+  // OGCG: {{%.*}} = call <4 x double> @llvm.tan.v4f64(<4 x double> {{%.*}})
+  vd4 = __builtin_elementwise_tan(vd4);
+}
+
+void test_builtin_elementwise_trunc(float f, double d, vfloat4 vf4,
+                   vdouble4 vd4) {
+  // CIR-LABEL: test_builtin_elementwise_trunc
+  // LLVM-LABEL: test_builtin_elementwise_trunc
+  // OGCG-LABEL: test_builtin_elementwise_trunc
+  // CIR: {{%.*}} = cir.trunc {{%.*}} : !cir.float
+  // LLVM: {{%.*}} = call float @llvm.trunc.f32(float {{%.*}})
+  // OGCG: {{%.*}} = call float @llvm.trunc.f32(float {{%.*}})
+  f = __builtin_elementwise_trunc(f);
+
+  // CIR: {{%.*}} = cir.trunc {{%.*}} : !cir.double
+  // LLVM: {{%.*}} = call double @llvm.trunc.f64(double {{%.*}})
+  // OGCG: {{%.*}} = call double @llvm.trunc.f64(double {{%.*}})
+  d = __builtin_elementwise_trunc(d);
+
+  // CIR: {{%.*}} = cir.trunc {{%.*}} : !cir.vector<4 x !cir.float>
+  // LLVM: {{%.*}} = call <4 x float> @llvm.trunc.v4f32(<4 x float> {{%.*}})
+  // OGCG: {{%.*}} = call <4 x float> @llvm.trunc.v4f32(<4 x float> {{%.*}})
+  vf4 = __builtin_elementwise_trunc(vf4);
+
+  // CIR: {{%.*}} = cir.trunc {{%.*}} : !cir.vector<4 x !cir.double>
+  // LLVM: {{%.*}} = call <4 x double> @llvm.trunc.v4f64(<4 x double> {{%.*}})
+  // OGCG: {{%.*}} = call <4 x double> @llvm.trunc.v4f64(<4 x double> {{%.*}})
+  vd4 = __builtin_elementwise_trunc(vd4);
+}
--- a/clang/test/CIR/CodeGen/libc.c
+++ b/clang/test/CIR/CodeGen/libc.c
@ -2,9 +2,9 @@
 // RUN: FileCheck --input-file=%t.cir %s

 // Note: In the final implementation, we will want these to generate
-//       CIR-specific libc operations. This test is just a placeholder
-//       to make sure we can compile these to normal function calls
-//       until the special handling is implemented.
+// CIR-specific libc operations. This test is just a placeholder
+// to make sure we can compile these to normal function calls
+// until the special handling is implemented.

 void *memcpy(void *, const void *, unsigned long);
 void testMemcpy(void *dst, const void *src, unsigned long size) {
@ -27,29 +27,11 @@ void testMemset(void *dst, int val, unsigned long size) {
 double fabs(double);
 double testFabs(double x) {
  return fabs(x);
-  // CHECK: cir.call @fabs
+  // CHECK: cir.fabs %{{.+}} : !cir.double
 }

 float fabsf(float);
 float testFabsf(float x) {
  return fabsf(x);
-  // CHECK: cir.call @fabsf
-}
-
-int abs(int);
-int testAbs(int x) {
-  return abs(x);
-  // CHECK: cir.call @abs
-}
-
-long labs(long);
-long testLabs(long x) {
-  return labs(x);
-  // CHECK: cir.call @labs
-}
-
-long long llabs(long long);
-long long testLlabs(long long x) {
-  return llabs(x);
-  // CHECK: cir.call @llabs
+  // CHECK: cir.fabs %{{.+}} : !cir.float
 }
--- a/clang/test/CIR/CodeGenBuiltins/builtin-fcmp-sse.c
+++ b/clang/test/CIR/CodeGenBuiltins/builtin-fcmp-sse.c
@ -27,7 +27,7 @@ __m128 test_cmpnleps(__m128 A, __m128 B) {
  // CIR:         } 

  // LLVM-LABEL: define dso_local <4 x float> @test_cmpnleps(
-  // LLVM-SAME: <4 x float> [[TMP0:%.*]], <4 x float> [[TMP1:%.*]]) #[[ATTR0:[0-9]+]] {
+  // LLVM-SAME: <4 x float> [[TMP0:%.*]], <4 x float> [[TMP1:%.*]]) #{{[0-9]+}} {
  // LLVM-NEXT:    [[TMP3:%.*]] = alloca <4 x float>, i64 1, align 16
  // LLVM-NEXT:    [[TMP4:%.*]] = alloca <4 x float>, i64 1, align 16
  // LLVM-NEXT:    [[TMP5:%.*]] = alloca <4 x float>, i64 1, align 16
@ -44,7 +44,7 @@ __m128 test_cmpnleps(__m128 A, __m128 B) {
  // LLVM-NEXT:    ret <4 x float> [[TMP12]]

  // OGCG-LABEL: define dso_local <4 x float> @test_cmpnleps(
-  // OGCG-SAME: <4 x float> noundef [[A:%.*]], <4 x float> noundef [[B:%.*]]) #[[ATTR0:[0-9]+]] {
+  // OGCG-SAME: <4 x float> noundef [[A:%.*]], <4 x float> noundef [[B:%.*]]) #{{[0-9]+}} {
  // OGCG-NEXT:  [[ENTRY:.*:]]
  // OGCG-NEXT:    [[A_ADDR:%.*]] = alloca <4 x float>, align 16
  // OGCG-NEXT:    [[B_ADDR:%.*]] = alloca <4 x float>, align 16
@ -78,7 +78,7 @@ __m128d test_cmpnlepd(__m128d A, __m128d B) {
  // CIR:         } 

  // LLVM-LABEL: define dso_local <2 x double> @test_cmpnlepd(
-  // LLVM-SAME: <2 x double> [[TMP0:%.*]], <2 x double> [[TMP1:%.*]]) #[[ATTR0:[0-9]+]] {
+  // LLVM-SAME: <2 x double> [[TMP0:%.*]], <2 x double> [[TMP1:%.*]]) #{{[0-9]+}} {
  // LLVM-NEXT:    [[TMP3:%.*]] = alloca <2 x double>, i64 1, align 16
  // LLVM-NEXT:    [[TMP4:%.*]] = alloca <2 x double>, i64 1, align 16
  // LLVM-NEXT:    [[TMP5:%.*]] = alloca <2 x double>, i64 1, align 16
@ -95,7 +95,7 @@ __m128d test_cmpnlepd(__m128d A, __m128d B) {
  // LLVM-NEXT:    ret <2 x double> [[TMP12]]

  // OGCG-LABEL: define dso_local <2 x double> @test_cmpnlepd(
-  // OGCG-SAME: <2 x double> noundef [[A:%.*]], <2 x double> noundef [[B:%.*]]) #[[ATTR0:[0-9]+]] {
+  // OGCG-SAME: <2 x double> noundef [[A:%.*]], <2 x double> noundef [[B:%.*]]) #{{[0-9]+}} {
  // OGCG-NEXT:  [[ENTRY:.*:]]
  // OGCG-NEXT:    [[A_ADDR:%.*]] = alloca <2 x double>, align 16
  // OGCG-NEXT:    [[B_ADDR:%.*]] = alloca <2 x double>, align 16
@ -129,7 +129,7 @@ __m128 test_cmpnltps(__m128 A, __m128 B) {
  // CIR:         } 

  // LLVM-LABEL: define dso_local <4 x float> @test_cmpnltps(
-  // LLVM-SAME: <4 x float> [[TMP0:%.*]], <4 x float> [[TMP1:%.*]]) #[[ATTR0:[0-9]+]] {
+  // LLVM-SAME: <4 x float> [[TMP0:%.*]], <4 x float> [[TMP1:%.*]]) #{{[0-9]+}} {
  // LLVM-NEXT:    [[TMP3:%.*]] = alloca <4 x float>, i64 1, align 16
  // LLVM-NEXT:    [[TMP4:%.*]] = alloca <4 x float>, i64 1, align 16
  // LLVM-NEXT:    [[TMP5:%.*]] = alloca <4 x float>, i64 1, align 16
@ -146,7 +146,7 @@ __m128 test_cmpnltps(__m128 A, __m128 B) {
  // LLVM-NEXT:    ret <4 x float> [[TMP12]]

  // OGCG-LABEL: define dso_local <4 x float> @test_cmpnltps(
-  // OGCG-SAME: <4 x float> noundef [[A:%.*]], <4 x float> noundef [[B:%.*]]) #[[ATTR0:[0-9]+]] {
+  // OGCG-SAME: <4 x float> noundef [[A:%.*]], <4 x float> noundef [[B:%.*]]) #{{[0-9]+}} {
  // OGCG-NEXT:  [[ENTRY:.*:]]
  // OGCG-NEXT:    [[A_ADDR:%.*]] = alloca <4 x float>, align 16
  // OGCG-NEXT:    [[B_ADDR:%.*]] = alloca <4 x float>, align 16
@ -180,7 +180,7 @@ __m128d test_cmpnltpd(__m128d A, __m128d B) {
  // CIR:         } 

  // LLVM-LABEL: define dso_local <2 x double> @test_cmpnltpd(
-  // LLVM-SAME: <2 x double> [[TMP0:%.*]], <2 x double> [[TMP1:%.*]]) #[[ATTR0:[0-9]+]] {
+  // LLVM-SAME: <2 x double> [[TMP0:%.*]], <2 x double> [[TMP1:%.*]]) #{{[0-9]+}} {
  // LLVM-NEXT:    [[TMP3:%.*]] = alloca <2 x double>, i64 1, align 16
  // LLVM-NEXT:    [[TMP4:%.*]] = alloca <2 x double>, i64 1, align 16
  // LLVM-NEXT:    [[TMP5:%.*]] = alloca <2 x double>, i64 1, align 16
@ -197,7 +197,7 @@ __m128d test_cmpnltpd(__m128d A, __m128d B) {
  // LLVM-NEXT:    ret <2 x double> [[TMP12]]

  // OGCG-LABEL: define dso_local <2 x double> @test_cmpnltpd(
-  // OGCG-SAME: <2 x double> noundef [[A:%.*]], <2 x double> noundef [[B:%.*]]) #[[ATTR0:[0-9]+]] {
+  // OGCG-SAME: <2 x double> noundef [[A:%.*]], <2 x double> noundef [[B:%.*]]) #{{[0-9]+}} {
  // OGCG-NEXT:  [[ENTRY:.*:]]
  // OGCG-NEXT:    [[A_ADDR:%.*]] = alloca <2 x double>, align 16
  // OGCG-NEXT:    [[B_ADDR:%.*]] = alloca <2 x double>, align 16
--- a/clang/test/CIR/CodeGenBuiltins/builtin-isfpclass.c
+++ b/clang/test/CIR/CodeGenBuiltins/builtin-isfpclass.c
@ -40,8 +40,8 @@ void test_is_finite(__fp16 *H, float F, double D, long double LD) {
    // OGCG: call i1 @llvm.is.fpclass.f32(float {{.*}}, i32 504)

    res = finite(D);
-    // CIR: cir.call @finite(%{{.*}}) nothrow side_effect(const) : (!cir.double) -> !s32i
-    // LLVM: call i32 @finite(double {{.*}})
+    // CIR: cir.is_fp_class %{{.*}}, fcFinite : (!cir.double) -> !cir.bool
+    // LLVM: call i1 @llvm.is.fpclass.f64(double {{.*}}, i32 504)
    // OGCG: call i1 @llvm.is.fpclass.f64(double %20, i32 504)
    res = __builtin_isnormal(*H);
    // CIR: cir.is_fp_class %{{.*}}, fcNormal : (!cir.f16) -> !cir.bool