[clang][CodeGen] Propagate pragma set fast-math flags to floating point builtins (#90377)

This is a fix for the issue #87758 where fast-math flags are not propagated all builtins. It seems like pragmas with fast math flags was only propagated to calls of unary floating point builtins. This patch propagate them also for binary and ternary floating point builtins.
2024-05-04 17:47:48 +02:00 · 2024-05-04 17:47:48 +02:00 · cb015b9ec9
commit cb015b9ec9
parent 7ee6288312
3 changed files with 84 additions and 7 deletions
--- a/clang/lib/CodeGen/CGBuiltin.cpp
+++ b/clang/lib/CodeGen/CGBuiltin.cpp
@ -513,8 +513,8 @@ static Value *emitBinaryMaybeConstrainedFPBuiltin(CodeGenFunction &CGF,
  llvm::Value *Src0 = CGF.EmitScalarExpr(E->getArg(0));
  llvm::Value *Src1 = CGF.EmitScalarExpr(E->getArg(1));

+  CodeGenFunction::CGFPOptionsRAII FPOptsRAII(CGF, E);
  if (CGF.Builder.getIsFPConstrained()) {
-    CodeGenFunction::CGFPOptionsRAII FPOptsRAII(CGF, E);
    Function *F = CGF.CGM.getIntrinsic(ConstrainedIntrinsicID, Src0->getType());
    return CGF.Builder.CreateConstrainedFPCall(F, { Src0, Src1 });
  } else {
@ -530,8 +530,8 @@ static Value *emitBinaryExpMaybeConstrainedFPBuiltin(
  llvm::Value *Src0 = CGF.EmitScalarExpr(E->getArg(0));
  llvm::Value *Src1 = CGF.EmitScalarExpr(E->getArg(1));

+  CodeGenFunction::CGFPOptionsRAII FPOptsRAII(CGF, E);
  if (CGF.Builder.getIsFPConstrained()) {
-    CodeGenFunction::CGFPOptionsRAII FPOptsRAII(CGF, E);
    Function *F = CGF.CGM.getIntrinsic(ConstrainedIntrinsicID,
                                       {Src0->getType(), Src1->getType()});
    return CGF.Builder.CreateConstrainedFPCall(F, {Src0, Src1});
@ -551,8 +551,8 @@ static Value *emitTernaryMaybeConstrainedFPBuiltin(CodeGenFunction &CGF,
  llvm::Value *Src1 = CGF.EmitScalarExpr(E->getArg(1));
  llvm::Value *Src2 = CGF.EmitScalarExpr(E->getArg(2));

+  CodeGenFunction::CGFPOptionsRAII FPOptsRAII(CGF, E);
  if (CGF.Builder.getIsFPConstrained()) {
-    CodeGenFunction::CGFPOptionsRAII FPOptsRAII(CGF, E);
    Function *F = CGF.CGM.getIntrinsic(ConstrainedIntrinsicID, Src0->getType());
    return CGF.Builder.CreateConstrainedFPCall(F, { Src0, Src1, Src2 });
  } else {
@ -704,6 +704,7 @@ static Value *EmitSignBit(CodeGenFunction &CGF, Value *V) {

 static RValue emitLibraryCall(CodeGenFunction &CGF, const FunctionDecl *FD,
                              const CallExpr *E, llvm::Constant *calleeValue) {
+  CodeGenFunction::CGFPOptionsRAII FPOptsRAII(CGF, E);
  CGCallee callee = CGCallee::forDirect(calleeValue, GlobalDecl(FD));
  return CGF.EmitCall(E->getCallee()->getType(), callee, E, ReturnValueSlot());
 }
@ -2660,7 +2661,7 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID,
    if (OP.hasMathErrnoOverride())
      ErrnoOverriden = OP.getMathErrnoOverride();
  }
-  // True if 'atttibute__((optnone)) is used. This attibute overrides
+  // True if 'attribute__((optnone))' is used. This attribute overrides
  // fast-math which implies math-errno.
  bool OptNone = CurFuncDecl && CurFuncDecl->hasAttr<OptimizeNoneAttr>();

--- a/clang/test/CodeGen/math-errno.c
+++ b/clang/test/CodeGen/math-errno.c
@ -27,7 +27,7 @@ float f1(float x) {
 // CHECK: tail call float @sqrtf(float noundef {{.*}}) #[[ATTR4_O2:[0-9]+]]

 // FAST-LABEL: define {{.*}} nofpclass(nan inf) float @f1
-// FAST: call fast nofpclass(nan inf) float @sqrtf(float noundef nofpclass(nan inf) {{.*}}) #[[ATTR3_FAST:[0-9]+]]
+// FAST: call nofpclass(nan inf) float @sqrtf(float noundef nofpclass(nan inf) {{.*}}) #[[ATTR3_FAST:[0-9]+]]

 // NOOPT-LABEL: define {{.*}} float @f1
 // NOOPT: call float @sqrtf(float noundef {{.*}}) #[[ATTR4_NOOPT:[0-9]+]]
@ -44,7 +44,7 @@ float f2(float x) {
 // FAST: call fast float @llvm.sqrt.f32(float {{.*}})

 // NOOPT-LABEL: define {{.*}} float @f2
-// NOOPT: call float @sqrtf(float {{.*}}) #[[ATTR4_NOOPT:[0-9]+]]
+// NOOPT: call fast float @sqrtf(float {{.*}}) #[[ATTR4_NOOPT:[0-9]+]]

 __attribute__((optnone))
 float f3(float x) {
@ -56,7 +56,7 @@ float f3(float x) {
 // CHECK: call float @sqrtf(float noundef {{.*}})

 // FAST-LABEL: define {{.*}} nofpclass(nan inf) float @f3
-// FAST: call fast nofpclass(nan inf) float @sqrtf(float noundef nofpclass(nan inf) {{.*}}) #[[ATTR4_FAST:[0-9]+]]
+// FAST: call nofpclass(nan inf) float @sqrtf(float noundef nofpclass(nan inf) {{.*}}) #[[ATTR4_FAST:[0-9]+]]

 // NOOPT-LABEL: define {{.*}} float @f3
 // NOOPT:  call float @sqrtf(float noundef %0) #[[ATTR4_NOOPT:[0-9]+]]
--- a/clang/test/CodeGen/pr87758.c
+++ b/clang/test/CodeGen/pr87758.c
@ -0,0 +1,76 @@
+// RUN: %clang_cc1 -triple x86_64-linux-gnu -emit-llvm -o - %s | FileCheck %s
+
+// precise mode
+// RUN: %clang_cc1 -triple x86_64-linux-gnu -fmath-errno -ffp-contract=on \
+// RUN: -fno-rounding-math -emit-llvm  -o - %s | FileCheck \
+// RUN: --check-prefix=CHECK-PRECISE %s
+
+// fast mode
+// RUN: %clang_cc1 -triple x86_64-linux-gnu -ffast-math -ffp-contract=fast \
+// RUN: -emit-llvm -o - %s | FileCheck --check-prefix=CHECK-FAST %s
+
+// Reproducer for issue #87758
+// The testcase below verifies that the "fast" flag are set on the calls.
+
+float sqrtf(float x); // unary fp builtin
+float powf(float x, float y); // binary fp builtin
+float fmaf(float x, float y, float z); // ternary fp builtin
+char *rindex(const char *s, int c); // not a fp builtin
+
+#pragma float_control(push)
+#pragma float_control(precise, off)
+// CHECK: define dso_local float @fp_precise_off_libm_calls(
+// CHECK: call fast float @llvm.sqrt.f32(
+// CHECK: call fast float @llvm.pow.f32(
+// CHECK: call fast float @llvm.fma.f32(
+// CHECK: call ptr @rindex(
+
+// CHECK-PRECISE: define dso_local float @fp_precise_off_libm_calls(
+// CHECK-PRECISE: call fast float @sqrtf(
+// CHECK-PRECISE: call fast float @powf(
+// CHECK-PRECISE: call fast float @llvm.fma.f32(
+// CHECK-PRECISE: call ptr @rindex(
+
+// CHECK-FAST: define dso_local nofpclass(nan inf) float @fp_precise_off_libm_calls(
+// CHECK-FAST: call fast float @llvm.sqrt.f32(
+// CHECK-FAST: call fast float @llvm.pow.f32(
+// CHECK-FAST: call fast float @llvm.fma.f32(
+// CHECK-FAST: call ptr @rindex(
+
+float fp_precise_off_libm_calls(float a, float b, float c, const char *d, char *e, unsigned char f) {
+  a = sqrtf(a);
+  a = powf(a,b);
+  a = fmaf(a,b,c);
+  e = rindex(d, 75);
+  return a;
+}
+#pragma float_control(pop)
+
+#pragma float_control(push)
+#pragma float_control(precise, on)
+// CHECK: define dso_local float @fp_precise_on_libm_calls(
+// CHECK: call float @sqrtf(
+// CHECK: call float @powf(
+// CHECK: call float @llvm.fma.f32(
+// CHECK: call ptr @rindex(
+
+// CHECK-PRECISE: define dso_local float @fp_precise_on_libm_calls(
+// CHECK-PRECISE: call float @sqrtf(
+// CHECK-PRECISE: call float @powf(
+// CHECK-PRECISE: call float @llvm.fma.f32(
+// CHECK-PRECISE: call ptr @rindex(
+
+// CHECK-FAST: define dso_local nofpclass(nan inf) float @fp_precise_on_libm_calls(
+// CHECK-FAST: call nofpclass(nan inf) float @sqrtf(
+// CHECK-FAST: call nofpclass(nan inf) float @powf(
+// CHECK-FAST: call float @llvm.fma.f32(
+// CHECK-FAST: call ptr @rindex(
+
+float fp_precise_on_libm_calls(float a, float b, float c, const char *d, char *e, unsigned char f) {
+  a = sqrtf(a);
+  a = powf(a,b);
+  a = fmaf(a,b,c);
+  e = rindex(d, 75);
+  return a;
+}
+#pragma float_control(pop)