[AMDGPU] Add half vector support for table-driven libcall optimization (#178638)

When replacing certain AMDGPU library calls with constant data vectors, the existing implementation only handled single and double-precision floats. This change extends the functionality to also support half-precision floats. Additionally, it refactors the function responsible for generating constant float data vectors to improve readability and reduces code duplication. In tandem with this refactoring, the patch relaxes the check for constant data vectors to include any constant of vector type. This allows other constant vectors to be processed, such as those created from constant aggregate zeros (e.g. `<2 x float> zeroinitializer`). --------- Signed-off-by: Steffen Holst Larsen <sholstla@amd.com>
2026-03-04 08:42:44 +01:00 · 2026-03-04 08:42:44 +01:00 · 5b156a4372
commit 5b156a4372
parent 6b59ad6e8d
32 changed files with 2221 additions and 95 deletions
--- a/llvm/lib/Target/AMDGPU/AMDGPULibCalls.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPULibCalls.cpp
@ -98,8 +98,8 @@ private:
  bool sincosUseNative(CallInst *aCI, const FuncInfo &FInfo);

  // evaluate calls if calls' arguments are constants.
-  bool evaluateScalarMathFunc(const FuncInfo &FInfo, double &Res0, double &Res1,
-                              Constant *copr0, Constant *copr1);
+  bool evaluateScalarMathFunc(const FuncInfo &FInfo, APFloat &Res0,
+                              APFloat &Res1, Constant *copr0, Constant *copr1);
  bool evaluateCall(CallInst *aCI, const FuncInfo &FInfo);

  /// Insert a value to sincos function \p Fsincos. Returns (value of sin, value
@ -801,6 +801,21 @@ bool AMDGPULibCalls::fold(CallInst *CI) {
  return false;
 }

+static Constant *getConstantFloatVector(const ArrayRef<APFloat> Values,
+                                        const Type *Ty) {
+  Type *ElemTy = Ty->getScalarType();
+  const fltSemantics &FltSem = ElemTy->getFltSemantics();
+
+  SmallVector<Constant *, 4> ConstValues;
+  ConstValues.reserve(Values.size());
+  for (APFloat APF : Values) {
+    bool Unused;
+    APF.convert(FltSem, APFloat::rmNearestTiesToEven, &Unused);
+    ConstValues.push_back(ConstantFP::get(ElemTy, APF));
+  }
+  return ConstantVector::get(ConstValues);
+}
+
 bool AMDGPULibCalls::TDOFold(CallInst *CI, const FuncInfo &FInfo) {
  // Table-Driven optimization
  const TableRef tr = getOptTable(FInfo.getId());
@ -810,40 +825,26 @@ bool AMDGPULibCalls::TDOFold(CallInst *CI, const FuncInfo &FInfo) {
  int const sz = (int)tr.size();
  Value *opr0 = CI->getArgOperand(0);

-  if (getVecSize(FInfo) > 1) {
-    if (ConstantDataVector *CV = dyn_cast<ConstantDataVector>(opr0)) {
-      SmallVector<double, 0> DVal;
-      for (int eltNo = 0; eltNo < getVecSize(FInfo); ++eltNo) {
-        ConstantFP *eltval = dyn_cast<ConstantFP>(
-                               CV->getElementAsConstant((unsigned)eltNo));
-        assert(eltval && "Non-FP arguments in math function!");
-        bool found = false;
-        for (int i=0; i < sz; ++i) {
-          if (eltval->isExactlyValue(tr[i].input)) {
-            DVal.push_back(tr[i].result);
-            found = true;
-            break;
-          }
-        }
-        if (!found) {
-          // This vector constants not handled yet.
+  int vecSize = getVecSize(FInfo);
+  if (vecSize > 1) {
+    // Vector version
+    Constant *CV = dyn_cast<Constant>(opr0);
+    if (CV && CV->getType()->isVectorTy()) {
+      SmallVector<APFloat, 4> Values;
+      Values.reserve(vecSize);
+      for (int eltNo = 0; eltNo < vecSize; ++eltNo) {
+        ConstantFP *eltval =
+            cast<ConstantFP>(CV->getAggregateElement((unsigned)eltNo));
+        auto MatchingRow = llvm::find_if(tr, [eltval](const TableEntry &entry) {
+          return eltval->isExactlyValue(entry.input);
+        });
+        if (MatchingRow == tr.end())
          return false;
-        }
+        Values.push_back(APFloat(MatchingRow->result));
      }
-      LLVMContext &context = CI->getContext();
-      Constant *nval;
-      if (getArgType(FInfo) == AMDGPULibFunc::F32) {
-        SmallVector<float, 0> FVal;
-        for (double D : DVal)
-          FVal.push_back((float)D);
-        ArrayRef<float> tmp(FVal);
-        nval = ConstantDataVector::get(context, tmp);
-      } else { // F64
-        ArrayRef<double> tmp(DVal);
-        nval = ConstantDataVector::get(context, tmp);
-      }
-      LLVM_DEBUG(errs() << "AMDIC: " << *CI << " ---> " << *nval << "\n");
-      replaceCall(CI, nval);
+      Constant *NewValues = getConstantFloatVector(Values, CI->getType());
+      LLVM_DEBUG(errs() << "AMDIC: " << *CI << " ---> " << *NewValues << "\n");
+      replaceCall(CI, NewValues);
      return true;
    }
  } else {
@ -1782,9 +1783,9 @@ bool AMDGPULibCalls::fold_sincos(FPMathOperator *FPOp, IRBuilder<> &B,
  return true;
 }

-bool AMDGPULibCalls::evaluateScalarMathFunc(const FuncInfo &FInfo, double &Res0,
-                                            double &Res1, Constant *copr0,
-                                            Constant *copr1) {
+bool AMDGPULibCalls::evaluateScalarMathFunc(const FuncInfo &FInfo,
+                                            APFloat &Res0, APFloat &Res1,
+                                            Constant *copr0, Constant *copr1) {
  // By default, opr0/opr1/opr3 holds values of float/double type.
  // If they are not float/double, each function has to its
  // operand separately.
@ -1804,125 +1805,127 @@ bool AMDGPULibCalls::evaluateScalarMathFunc(const FuncInfo &FInfo, double &Res0,
  }

  switch (FInfo.getId()) {
-  default : return false;
+  default:
+    return false;

  case AMDGPULibFunc::EI_ACOS:
-    Res0 = acos(opr0);
+    Res0 = APFloat{acos(opr0)};
    return true;

  case AMDGPULibFunc::EI_ACOSH:
    // acosh(x) == log(x + sqrt(x*x - 1))
-    Res0 = log(opr0 + sqrt(opr0*opr0 - 1.0));
+    Res0 = APFloat{log(opr0 + sqrt(opr0 * opr0 - 1.0))};
    return true;

  case AMDGPULibFunc::EI_ACOSPI:
-    Res0 = acos(opr0) / MATH_PI;
+    Res0 = APFloat{acos(opr0) / MATH_PI};
    return true;

  case AMDGPULibFunc::EI_ASIN:
-    Res0 = asin(opr0);
+    Res0 = APFloat{asin(opr0)};
    return true;

  case AMDGPULibFunc::EI_ASINH:
    // asinh(x) == log(x + sqrt(x*x + 1))
-    Res0 = log(opr0 + sqrt(opr0*opr0 + 1.0));
+    Res0 = APFloat{log(opr0 + sqrt(opr0 * opr0 + 1.0))};
    return true;

  case AMDGPULibFunc::EI_ASINPI:
-    Res0 = asin(opr0) / MATH_PI;
+    Res0 = APFloat{asin(opr0) / MATH_PI};
    return true;

  case AMDGPULibFunc::EI_ATAN:
-    Res0 = atan(opr0);
+    Res0 = APFloat{atan(opr0)};
    return true;

  case AMDGPULibFunc::EI_ATANH:
    // atanh(x) == (log(x+1) - log(x-1))/2;
-    Res0 = (log(opr0 + 1.0) - log(opr0 - 1.0))/2.0;
+    Res0 = APFloat{(log(opr0 + 1.0) - log(opr0 - 1.0)) / 2.0};
    return true;

  case AMDGPULibFunc::EI_ATANPI:
-    Res0 = atan(opr0) / MATH_PI;
+    Res0 = APFloat{atan(opr0) / MATH_PI};
    return true;

  case AMDGPULibFunc::EI_CBRT:
-    Res0 = (opr0 < 0.0) ? -pow(-opr0, 1.0/3.0) : pow(opr0, 1.0/3.0);
+    Res0 =
+        APFloat{(opr0 < 0.0) ? -pow(-opr0, 1.0 / 3.0) : pow(opr0, 1.0 / 3.0)};
    return true;

  case AMDGPULibFunc::EI_COS:
-    Res0 = cos(opr0);
+    Res0 = APFloat{cos(opr0)};
    return true;

  case AMDGPULibFunc::EI_COSH:
-    Res0 = cosh(opr0);
+    Res0 = APFloat{cosh(opr0)};
    return true;

  case AMDGPULibFunc::EI_COSPI:
-    Res0 = cos(MATH_PI * opr0);
+    Res0 = APFloat{cos(MATH_PI * opr0)};
    return true;

  case AMDGPULibFunc::EI_EXP:
-    Res0 = exp(opr0);
+    Res0 = APFloat{exp(opr0)};
    return true;

  case AMDGPULibFunc::EI_EXP2:
-    Res0 = pow(2.0, opr0);
+    Res0 = APFloat{pow(2.0, opr0)};
    return true;

  case AMDGPULibFunc::EI_EXP10:
-    Res0 = pow(10.0, opr0);
+    Res0 = APFloat{pow(10.0, opr0)};
    return true;

  case AMDGPULibFunc::EI_LOG:
-    Res0 = log(opr0);
+    Res0 = APFloat{log(opr0)};
    return true;

  case AMDGPULibFunc::EI_LOG2:
-    Res0 = log(opr0) / log(2.0);
+    Res0 = APFloat{log(opr0) / log(2.0)};
    return true;

  case AMDGPULibFunc::EI_LOG10:
-    Res0 = log(opr0) / log(10.0);
+    Res0 = APFloat{log(opr0) / log(10.0)};
    return true;

  case AMDGPULibFunc::EI_RSQRT:
-    Res0 = 1.0 / sqrt(opr0);
+    Res0 = APFloat{1.0 / sqrt(opr0)};
    return true;

  case AMDGPULibFunc::EI_SIN:
-    Res0 = sin(opr0);
+    Res0 = APFloat{sin(opr0)};
    return true;

  case AMDGPULibFunc::EI_SINH:
-    Res0 = sinh(opr0);
+    Res0 = APFloat{sinh(opr0)};
    return true;

  case AMDGPULibFunc::EI_SINPI:
-    Res0 = sin(MATH_PI * opr0);
+    Res0 = APFloat{sin(MATH_PI * opr0)};
    return true;

  case AMDGPULibFunc::EI_TAN:
-    Res0 = tan(opr0);
+    Res0 = APFloat{tan(opr0)};
    return true;

  case AMDGPULibFunc::EI_TANH:
-    Res0 = tanh(opr0);
+    Res0 = APFloat{tanh(opr0)};
    return true;

  case AMDGPULibFunc::EI_TANPI:
-    Res0 = tan(MATH_PI * opr0);
+    Res0 = APFloat{tan(MATH_PI * opr0)};
    return true;

  // two-arg functions
  case AMDGPULibFunc::EI_POW:
  case AMDGPULibFunc::EI_POWR:
-    Res0 = pow(opr0, opr1);
+    Res0 = APFloat{pow(opr0, opr1)};
    return true;

  case AMDGPULibFunc::EI_POWN: {
    if (ConstantInt *iopr1 = dyn_cast_or_null<ConstantInt>(copr1)) {
      double val = (double)iopr1->getSExtValue();
-      Res0 = pow(opr0, val);
+      Res0 = APFloat{pow(opr0, val)};
      return true;
    }
    return false;
@ -1931,7 +1934,7 @@ bool AMDGPULibCalls::evaluateScalarMathFunc(const FuncInfo &FInfo, double &Res0,
  case AMDGPULibFunc::EI_ROOTN: {
    if (ConstantInt *iopr1 = dyn_cast_or_null<ConstantInt>(copr1)) {
      double val = (double)iopr1->getSExtValue();
-      Res0 = pow(opr0, 1.0 / val);
+      Res0 = APFloat{pow(opr0, 1.0 / val)};
      return true;
    }
    return false;
@ -1939,8 +1942,8 @@ bool AMDGPULibCalls::evaluateScalarMathFunc(const FuncInfo &FInfo, double &Res0,

  // with ptr arg
  case AMDGPULibFunc::EI_SINCOS:
-    Res0 = sin(opr0);
-    Res1 = cos(opr0);
+    Res0 = APFloat{sin(opr0)};
+    Res1 = APFloat{cos(opr0)};
    return true;
  }

@ -1969,11 +1972,12 @@ bool AMDGPULibCalls::evaluateCall(CallInst *aCI, const FuncInfo &FInfo) {
  // At this point, all arguments to aCI are constants.

  // max vector size is 16, and sincos will generate two results.
-  double DVal0[16], DVal1[16];
+  SmallVector<APFloat, 16> Val0, Val1;
  int FuncVecSize = getVecSize(FInfo);
  bool hasTwoResults = (FInfo.getId() == AMDGPULibFunc::EI_SINCOS);
  if (FuncVecSize == 1) {
-    if (!evaluateScalarMathFunc(FInfo, DVal0[0], DVal1[0], copr0, copr1)) {
+    if (!evaluateScalarMathFunc(FInfo, Val0.emplace_back(0.0),
+                                Val1.emplace_back(0.0), copr0, copr1)) {
      return false;
    }
  } else {
@ -1982,39 +1986,22 @@ bool AMDGPULibCalls::evaluateCall(CallInst *aCI, const FuncInfo &FInfo) {
    for (int i = 0; i < FuncVecSize; ++i) {
      Constant *celt0 = CDV0 ? CDV0->getElementAsConstant(i) : nullptr;
      Constant *celt1 = CDV1 ? CDV1->getElementAsConstant(i) : nullptr;
-      if (!evaluateScalarMathFunc(FInfo, DVal0[i], DVal1[i], celt0, celt1)) {
+      if (!evaluateScalarMathFunc(FInfo, Val0.emplace_back(0.0),
+                                  Val1.emplace_back(0.0), celt0, celt1)) {
        return false;
      }
    }
  }

-  LLVMContext &context = aCI->getContext();
  Constant *nval0, *nval1;
  if (FuncVecSize == 1) {
-    nval0 = ConstantFP::get(aCI->getType(), DVal0[0]);
+    nval0 = ConstantFP::get(aCI->getType(), Val0[0]);
    if (hasTwoResults)
-      nval1 = ConstantFP::get(aCI->getType(), DVal1[0]);
+      nval1 = ConstantFP::get(aCI->getType(), Val1[0]);
  } else {
-    if (getArgType(FInfo) == AMDGPULibFunc::F32) {
-      SmallVector <float, 0> FVal0, FVal1;
-      for (int i = 0; i < FuncVecSize; ++i)
-        FVal0.push_back((float)DVal0[i]);
-      ArrayRef<float> tmp0(FVal0);
-      nval0 = ConstantDataVector::get(context, tmp0);
-      if (hasTwoResults) {
-        for (int i = 0; i < FuncVecSize; ++i)
-          FVal1.push_back((float)DVal1[i]);
-        ArrayRef<float> tmp1(FVal1);
-        nval1 = ConstantDataVector::get(context, tmp1);
-      }
-    } else {
-      ArrayRef<double> tmp0(DVal0);
-      nval0 = ConstantDataVector::get(context, tmp0);
-      if (hasTwoResults) {
-        ArrayRef<double> tmp1(DVal1);
-        nval1 = ConstantDataVector::get(context, tmp1);
-      }
-    }
+    nval0 = getConstantFloatVector(Val0, aCI->getType());
+    if (hasTwoResults)
+      nval1 = getConstantFloatVector(Val1, aCI->getType());
  }

  if (hasTwoResults) {
--- a/llvm/test/CodeGen/AMDGPU/amdgpu-simplify-libcall-tdo-acos.ll
+++ b/llvm/test/CodeGen/AMDGPU/amdgpu-simplify-libcall-tdo-acos.ll
@ -0,0 +1,69 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 6
+; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -passes=amdgpu-simplifylib %s | FileCheck %s
+
+define float @test_tdo_scalar_f32_acos() {
+; CHECK-LABEL: define float @test_tdo_scalar_f32_acos() {
+; CHECK-NEXT:  [[ENTRY:.*:]]
+; CHECK-NEXT:    ret float 0x3FF921FB60000000
+;
+entry:
+  %c = call float @_Z4acosf(float 0.000000e+00)
+  ret float %c
+}
+
+define <4 x float> @test_tdo_v2_f32_acos() {
+; CHECK-LABEL: define <4 x float> @test_tdo_v2_f32_acos() {
+; CHECK-NEXT:  [[ENTRY:.*:]]
+; CHECK-NEXT:    ret <4 x float> <float 0x3FF921FB60000000, float 0x3FF921FB60000000, float 0.000000e+00, float 0x400921FB60000000>
+;
+entry:
+  %c = call <4 x float> @_Z4acosDv4_f(<4 x float> <float 0.000000e+00, float -0.000000e+00, float 1.000000e+00, float -1.000000e+00>)
+  ret <4 x float> %c
+}
+
+define half @test_tdo_scalar_f16_acos() {
+; CHECK-LABEL: define half @test_tdo_scalar_f16_acos() {
+; CHECK-NEXT:  [[ENTRY:.*:]]
+; CHECK-NEXT:    ret half 0xH3E48
+;
+entry:
+  %c = call half @_Z4acosDh(half 0.000000e+00)
+  ret half %c
+}
+
+define <4 x half> @test_tdo_v2_f16_acos() {
+; CHECK-LABEL: define <4 x half> @test_tdo_v2_f16_acos() {
+; CHECK-NEXT:  [[ENTRY:.*:]]
+; CHECK-NEXT:    ret <4 x half> <half 0xH3E48, half 0xH3E48, half 0xH0000, half 0xH4248>
+;
+entry:
+  %c = call <4 x half> @_Z4acosDv4_Dh(<4 x half> <half 0.000000e+00, half -0.000000e+00, half 1.000000e+00, half -1.000000e+00>)
+  ret <4 x half> %c
+}
+
+define double @test_tdo_scalar_f64_acos() {
+; CHECK-LABEL: define double @test_tdo_scalar_f64_acos() {
+; CHECK-NEXT:  [[ENTRY:.*:]]
+; CHECK-NEXT:    ret double 0x3FF921FB54442D18
+;
+entry:
+  %c = call double @_Z4acosd(double 0.000000e+00)
+  ret double %c
+}
+
+define <4 x double> @test_tdo_v2_f64_acos() {
+; CHECK-LABEL: define <4 x double> @test_tdo_v2_f64_acos() {
+; CHECK-NEXT:  [[ENTRY:.*:]]
+; CHECK-NEXT:    ret <4 x double> <double 0x3FF921FB54442D18, double 0x3FF921FB54442D18, double 0.000000e+00, double 0x400921FB54442D18>
+;
+entry:
+  %c = call <4 x double> @_Z4acosDv4_d(<4 x double> <double 0.000000e+00, double -0.000000e+00, double 1.000000e+00, double -1.000000e+00>)
+  ret <4 x double> %c
+}
+
+declare float        @_Z4acosf(float)
+declare <4 x float>  @_Z4acosDv4_f(<4 x float>)
+declare half         @_Z4acosDh(half)
+declare <4 x half>   @_Z4acosDv4_Dh(<4 x half>)
+declare double       @_Z4acosd(double)
+declare <4 x double> @_Z4acosDv4_d(<4 x double>)
--- a/llvm/test/CodeGen/AMDGPU/amdgpu-simplify-libcall-tdo-acosh.ll
+++ b/llvm/test/CodeGen/AMDGPU/amdgpu-simplify-libcall-tdo-acosh.ll
@ -0,0 +1,69 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 6
+; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -passes=amdgpu-simplifylib %s | FileCheck %s
+
+define float @test_tdo_scalar_f32_acosh() {
+; CHECK-LABEL: define float @test_tdo_scalar_f32_acosh() {
+; CHECK-NEXT:  [[ENTRY:.*:]]
+; CHECK-NEXT:    ret float 0.000000e+00
+;
+entry:
+  %c = call float @_Z5acoshf(float 1.000000e+00)
+  ret float %c
+}
+
+define <2 x float> @test_tdo_v2_f32_acosh() {
+; CHECK-LABEL: define <2 x float> @test_tdo_v2_f32_acosh() {
+; CHECK-NEXT:  [[ENTRY:.*:]]
+; CHECK-NEXT:    ret <2 x float> zeroinitializer
+;
+entry:
+  %c = call <2 x float> @_Z5acoshDv2_f(<2 x float> <float 1.000000e+00, float 1.000000e+00>)
+  ret <2 x float> %c
+}
+
+define half @test_tdo_scalar_f16_acosh() {
+; CHECK-LABEL: define half @test_tdo_scalar_f16_acosh() {
+; CHECK-NEXT:  [[ENTRY:.*:]]
+; CHECK-NEXT:    ret half 0xH0000
+;
+entry:
+  %c = call half @_Z5acoshDh(half 1.000000e+00)
+  ret half %c
+}
+
+define <2 x half> @test_tdo_v2_f16_acosh() {
+; CHECK-LABEL: define <2 x half> @test_tdo_v2_f16_acosh() {
+; CHECK-NEXT:  [[ENTRY:.*:]]
+; CHECK-NEXT:    ret <2 x half> zeroinitializer
+;
+entry:
+  %c = call <2 x half> @_Z5acoshDv2_Dh(<2 x half> <half 1.000000e+00, half 1.000000e+00>)
+  ret <2 x half> %c
+}
+
+define double @test_tdo_scalar_f64_acosh() {
+; CHECK-LABEL: define double @test_tdo_scalar_f64_acosh() {
+; CHECK-NEXT:  [[ENTRY:.*:]]
+; CHECK-NEXT:    ret double 0.000000e+00
+;
+entry:
+  %c = call double @_Z5acoshd(double 1.000000e+00)
+  ret double %c
+}
+
+define <2 x double> @test_tdo_v2_f64_acosh() {
+; CHECK-LABEL: define <2 x double> @test_tdo_v2_f64_acosh() {
+; CHECK-NEXT:  [[ENTRY:.*:]]
+; CHECK-NEXT:    ret <2 x double> zeroinitializer
+;
+entry:
+  %c = call <2 x double> @_Z5acoshDv2_d(<2 x double> <double 1.000000e+00, double 1.000000e+00>)
+  ret <2 x double> %c
+}
+
+declare float        @_Z5acoshf(float)
+declare <2 x float>  @_Z5acoshDv2_f(<2 x float>)
+declare half         @_Z5acoshDh(half)
+declare <2 x half>   @_Z5acoshDv2_Dh(<2 x half>)
+declare double       @_Z5acoshd(double)
+declare <2 x double> @_Z5acoshDv2_d(<2 x double>)
--- a/llvm/test/CodeGen/AMDGPU/amdgpu-simplify-libcall-tdo-acospi.ll
+++ b/llvm/test/CodeGen/AMDGPU/amdgpu-simplify-libcall-tdo-acospi.ll
@ -0,0 +1,69 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 6
+; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -passes=amdgpu-simplifylib %s | FileCheck %s
+
+define float @test_tdo_scalar_f32_acospi() {
+; CHECK-LABEL: define float @test_tdo_scalar_f32_acospi() {
+; CHECK-NEXT:  [[ENTRY:.*:]]
+; CHECK-NEXT:    ret float 5.000000e-01
+;
+entry:
+  %c = call float @_Z6acospif(float 0.000000e+00)
+  ret float %c
+}
+
+define <4 x float> @test_tdo_v2_f32_acospi() {
+; CHECK-LABEL: define <4 x float> @test_tdo_v2_f32_acospi() {
+; CHECK-NEXT:  [[ENTRY:.*:]]
+; CHECK-NEXT:    ret <4 x float> <float 5.000000e-01, float 5.000000e-01, float 0.000000e+00, float 1.000000e+00>
+;
+entry:
+  %c = call <4 x float> @_Z6acospiDv4_f(<4 x float> <float 0.000000e+00, float -0.000000e+00, float 1.000000e+00, float -1.000000e+00>)
+  ret <4 x float> %c
+}
+
+define half @test_tdo_scalar_f16_acospi() {
+; CHECK-LABEL: define half @test_tdo_scalar_f16_acospi() {
+; CHECK-NEXT:  [[ENTRY:.*:]]
+; CHECK-NEXT:    ret half 0xH3800
+;
+entry:
+  %c = call half @_Z6acospiDh(half 0.000000e+00)
+  ret half %c
+}
+
+define <4 x half> @test_tdo_v2_f16_acospi() {
+; CHECK-LABEL: define <4 x half> @test_tdo_v2_f16_acospi() {
+; CHECK-NEXT:  [[ENTRY:.*:]]
+; CHECK-NEXT:    ret <4 x half> <half 0xH3800, half 0xH3800, half 0xH0000, half 0xH3C00>
+;
+entry:
+  %c = call <4 x half> @_Z6acospiDv4_Dh(<4 x half> <half 0.000000e+00, half -0.000000e+00, half 1.000000e+00, half -1.000000e+00>)
+  ret <4 x half> %c
+}
+
+define double @test_tdo_scalar_f64_acospi() {
+; CHECK-LABEL: define double @test_tdo_scalar_f64_acospi() {
+; CHECK-NEXT:  [[ENTRY:.*:]]
+; CHECK-NEXT:    ret double 5.000000e-01
+;
+entry:
+  %c = call double @_Z6acospid(double 0.000000e+00)
+  ret double %c
+}
+
+define <4 x double> @test_tdo_v2_f64_acospi() {
+; CHECK-LABEL: define <4 x double> @test_tdo_v2_f64_acospi() {
+; CHECK-NEXT:  [[ENTRY:.*:]]
+; CHECK-NEXT:    ret <4 x double> <double 5.000000e-01, double 5.000000e-01, double 0.000000e+00, double 1.000000e+00>
+;
+entry:
+  %c = call <4 x double> @_Z6acospiDv4_d(<4 x double> <double 0.000000e+00, double -0.000000e+00, double 1.000000e+00, double -1.000000e+00>)
+  ret <4 x double> %c
+}
+
+declare float        @_Z6acospif(float)
+declare <4 x float>  @_Z6acospiDv4_f(<4 x float>)
+declare half         @_Z6acospiDh(half)
+declare <4 x half>   @_Z6acospiDv4_Dh(<4 x half>)
+declare double       @_Z6acospid(double)
+declare <4 x double> @_Z6acospiDv4_d(<4 x double>)
--- a/llvm/test/CodeGen/AMDGPU/amdgpu-simplify-libcall-tdo-asin.ll
+++ b/llvm/test/CodeGen/AMDGPU/amdgpu-simplify-libcall-tdo-asin.ll
@ -0,0 +1,69 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 6
+; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -passes=amdgpu-simplifylib %s | FileCheck %s
+
+define float @test_tdo_scalar_f32_asin() {
+; CHECK-LABEL: define float @test_tdo_scalar_f32_asin() {
+; CHECK-NEXT:  [[ENTRY:.*:]]
+; CHECK-NEXT:    ret float 0.000000e+00
+;
+entry:
+  %c = call float @_Z4asinf(float 0.000000e+00)
+  ret float %c
+}
+
+define <4 x float> @test_tdo_v2_f32_asin() {
+; CHECK-LABEL: define <4 x float> @test_tdo_v2_f32_asin() {
+; CHECK-NEXT:  [[ENTRY:.*:]]
+; CHECK-NEXT:    ret <4 x float> <float 0.000000e+00, float -0.000000e+00, float 0x3FF921FB60000000, float 0xBFF921FB60000000>
+;
+entry:
+  %c = call <4 x float> @_Z4asinDv4_f(<4 x float> <float 0.000000e+00, float -0.000000e+00, float 1.000000e+00, float -1.000000e+00>)
+  ret <4 x float> %c
+}
+
+define half @test_tdo_scalar_f16_asin() {
+; CHECK-LABEL: define half @test_tdo_scalar_f16_asin() {
+; CHECK-NEXT:  [[ENTRY:.*:]]
+; CHECK-NEXT:    ret half 0xH0000
+;
+entry:
+  %c = call half @_Z4asinDh(half 0.000000e+00)
+  ret half %c
+}
+
+define <4 x half> @test_tdo_v2_f16_asin() {
+; CHECK-LABEL: define <4 x half> @test_tdo_v2_f16_asin() {
+; CHECK-NEXT:  [[ENTRY:.*:]]
+; CHECK-NEXT:    ret <4 x half> <half 0xH0000, half 0xH8000, half 0xH3E48, half 0xHBE48>
+;
+entry:
+  %c = call <4 x half> @_Z4asinDv4_Dh(<4 x half> <half 0.000000e+00, half -0.000000e+00, half 1.000000e+00, half -1.000000e+00>)
+  ret <4 x half> %c
+}
+
+define double @test_tdo_scalar_f64_asin() {
+; CHECK-LABEL: define double @test_tdo_scalar_f64_asin() {
+; CHECK-NEXT:  [[ENTRY:.*:]]
+; CHECK-NEXT:    ret double 0.000000e+00
+;
+entry:
+  %c = call double @_Z4asind(double 0.000000e+00)
+  ret double %c
+}
+
+define <4 x double> @test_tdo_v2_f64_asin() {
+; CHECK-LABEL: define <4 x double> @test_tdo_v2_f64_asin() {
+; CHECK-NEXT:  [[ENTRY:.*:]]
+; CHECK-NEXT:    ret <4 x double> <double 0.000000e+00, double -0.000000e+00, double 0x3FF921FB54442D18, double 0xBFF921FB54442D18>
+;
+entry:
+  %c = call <4 x double> @_Z4asinDv4_d(<4 x double> <double 0.000000e+00, double -0.000000e+00, double 1.000000e+00, double -1.000000e+00>)
+  ret <4 x double> %c
+}
+
+declare float        @_Z4asinf(float)
+declare <4 x float>  @_Z4asinDv4_f(<4 x float>)
+declare half         @_Z4asinDh(half)
+declare <4 x half>   @_Z4asinDv4_Dh(<4 x half>)
+declare double       @_Z4asind(double)
+declare <4 x double> @_Z4asinDv4_d(<4 x double>)
--- a/llvm/test/CodeGen/AMDGPU/amdgpu-simplify-libcall-tdo-asinh.ll
+++ b/llvm/test/CodeGen/AMDGPU/amdgpu-simplify-libcall-tdo-asinh.ll
@ -0,0 +1,69 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 6
+; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -passes=amdgpu-simplifylib %s | FileCheck %s
+
+define float @test_tdo_scalar_f32_asinh() {
+; CHECK-LABEL: define float @test_tdo_scalar_f32_asinh() {
+; CHECK-NEXT:  [[ENTRY:.*:]]
+; CHECK-NEXT:    ret float 0.000000e+00
+;
+entry:
+  %c = call float @_Z5asinhf(float 0.000000e+00)
+  ret float %c
+}
+
+define <2 x float> @test_tdo_v2_f32_asinh() {
+; CHECK-LABEL: define <2 x float> @test_tdo_v2_f32_asinh() {
+; CHECK-NEXT:  [[ENTRY:.*:]]
+; CHECK-NEXT:    ret <2 x float> <float 0.000000e+00, float -0.000000e+00>
+;
+entry:
+  %c = call <2 x float> @_Z5asinhDv2_f(<2 x float> <float 0.000000e+00, float -0.000000e+00>)
+  ret <2 x float> %c
+}
+
+define half @test_tdo_scalar_f16_asinh() {
+; CHECK-LABEL: define half @test_tdo_scalar_f16_asinh() {
+; CHECK-NEXT:  [[ENTRY:.*:]]
+; CHECK-NEXT:    ret half 0xH0000
+;
+entry:
+  %c = call half @_Z5asinhDh(half 0.000000e+00)
+  ret half %c
+}
+
+define <2 x half> @test_tdo_v2_f16_asinh() {
+; CHECK-LABEL: define <2 x half> @test_tdo_v2_f16_asinh() {
+; CHECK-NEXT:  [[ENTRY:.*:]]
+; CHECK-NEXT:    ret <2 x half> <half 0xH0000, half 0xH8000>
+;
+entry:
+  %c = call <2 x half> @_Z5asinhDv2_Dh(<2 x half> <half 0.000000e+00, half -0.000000e+00>)
+  ret <2 x half> %c
+}
+
+define double @test_tdo_scalar_f64_asinh() {
+; CHECK-LABEL: define double @test_tdo_scalar_f64_asinh() {
+; CHECK-NEXT:  [[ENTRY:.*:]]
+; CHECK-NEXT:    ret double 0.000000e+00
+;
+entry:
+  %c = call double @_Z5asinhd(double 0.000000e+00)
+  ret double %c
+}
+
+define <2 x double> @test_tdo_v2_f64_asinh() {
+; CHECK-LABEL: define <2 x double> @test_tdo_v2_f64_asinh() {
+; CHECK-NEXT:  [[ENTRY:.*:]]
+; CHECK-NEXT:    ret <2 x double> <double 0.000000e+00, double -0.000000e+00>
+;
+entry:
+  %c = call <2 x double> @_Z5asinhDv2_d(<2 x double> <double 0.000000e+00, double -0.000000e+00>)
+  ret <2 x double> %c
+}
+
+declare float        @_Z5asinhf(float)
+declare <2 x float>  @_Z5asinhDv2_f(<2 x float>)
+declare half         @_Z5asinhDh(half)
+declare <2 x half>   @_Z5asinhDv2_Dh(<2 x half>)
+declare double       @_Z5asinhd(double)
+declare <2 x double> @_Z5asinhDv2_d(<2 x double>)
--- a/llvm/test/CodeGen/AMDGPU/amdgpu-simplify-libcall-tdo-asinpi.ll
+++ b/llvm/test/CodeGen/AMDGPU/amdgpu-simplify-libcall-tdo-asinpi.ll
@ -0,0 +1,69 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 6
+; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -passes=amdgpu-simplifylib %s | FileCheck %s
+
+define float @test_tdo_scalar_f32_asinpi() {
+; CHECK-LABEL: define float @test_tdo_scalar_f32_asinpi() {
+; CHECK-NEXT:  [[ENTRY:.*:]]
+; CHECK-NEXT:    ret float 0.000000e+00
+;
+entry:
+  %c = call float @_Z6asinpif(float 0.000000e+00)
+  ret float %c
+}
+
+define <4 x float> @test_tdo_v2_f32_asinpi() {
+; CHECK-LABEL: define <4 x float> @test_tdo_v2_f32_asinpi() {
+; CHECK-NEXT:  [[ENTRY:.*:]]
+; CHECK-NEXT:    ret <4 x float> <float 0.000000e+00, float -0.000000e+00, float 5.000000e-01, float -5.000000e-01>
+;
+entry:
+  %c = call <4 x float> @_Z6asinpiDv4_f(<4 x float> <float 0.000000e+00, float -0.000000e+00, float 1.000000e+00, float -1.000000e+00>)
+  ret <4 x float> %c
+}
+
+define half @test_tdo_scalar_f16_asinpi() {
+; CHECK-LABEL: define half @test_tdo_scalar_f16_asinpi() {
+; CHECK-NEXT:  [[ENTRY:.*:]]
+; CHECK-NEXT:    ret half 0xH0000
+;
+entry:
+  %c = call half @_Z6asinpiDh(half 0.000000e+00)
+  ret half %c
+}
+
+define <4 x half> @test_tdo_v2_f16_asinpi() {
+; CHECK-LABEL: define <4 x half> @test_tdo_v2_f16_asinpi() {
+; CHECK-NEXT:  [[ENTRY:.*:]]
+; CHECK-NEXT:    ret <4 x half> <half 0xH0000, half 0xH8000, half 0xH3800, half 0xHB800>
+;
+entry:
+  %c = call <4 x half> @_Z6asinpiDv4_Dh(<4 x half> <half 0.000000e+00, half -0.000000e+00, half 1.000000e+00, half -1.000000e+00>)
+  ret <4 x half> %c
+}
+
+define double @test_tdo_scalar_f64_asinpi() {
+; CHECK-LABEL: define double @test_tdo_scalar_f64_asinpi() {
+; CHECK-NEXT:  [[ENTRY:.*:]]
+; CHECK-NEXT:    ret double 0.000000e+00
+;
+entry:
+  %c = call double @_Z6asinpid(double 0.000000e+00)
+  ret double %c
+}
+
+define <4 x double> @test_tdo_v2_f64_asinpi() {
+; CHECK-LABEL: define <4 x double> @test_tdo_v2_f64_asinpi() {
+; CHECK-NEXT:  [[ENTRY:.*:]]
+; CHECK-NEXT:    ret <4 x double> <double 0.000000e+00, double -0.000000e+00, double 5.000000e-01, double -5.000000e-01>
+;
+entry:
+  %c = call <4 x double> @_Z6asinpiDv4_d(<4 x double> <double 0.000000e+00, double -0.000000e+00, double 1.000000e+00, double -1.000000e+00>)
+  ret <4 x double> %c
+}
+
+declare float        @_Z6asinpif(float)
+declare <4 x float>  @_Z6asinpiDv4_f(<4 x float>)
+declare half         @_Z6asinpiDh(half)
+declare <4 x half>   @_Z6asinpiDv4_Dh(<4 x half>)
+declare double       @_Z6asinpid(double)
+declare <4 x double> @_Z6asinpiDv4_d(<4 x double>)
--- a/llvm/test/CodeGen/AMDGPU/amdgpu-simplify-libcall-tdo-atan.ll
+++ b/llvm/test/CodeGen/AMDGPU/amdgpu-simplify-libcall-tdo-atan.ll
@ -0,0 +1,69 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 6
+; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -passes=amdgpu-simplifylib %s | FileCheck %s
+
+define float @test_tdo_scalar_f32_atan() {
+; CHECK-LABEL: define float @test_tdo_scalar_f32_atan() {
+; CHECK-NEXT:  [[ENTRY:.*:]]
+; CHECK-NEXT:    ret float 0.000000e+00
+;
+entry:
+  %c = call float @_Z4atanf(float 0.000000e+00)
+  ret float %c
+}
+
+define <4 x float> @test_tdo_v2_f32_atan() {
+; CHECK-LABEL: define <4 x float> @test_tdo_v2_f32_atan() {
+; CHECK-NEXT:  [[ENTRY:.*:]]
+; CHECK-NEXT:    ret <4 x float> <float 0.000000e+00, float -0.000000e+00, float 0x3FE921FB60000000, float 0xBFE921FB60000000>
+;
+entry:
+  %c = call <4 x float> @_Z4atanDv4_f(<4 x float> <float 0.000000e+00, float -0.000000e+00, float 1.000000e+00, float -1.000000e+00>)
+  ret <4 x float> %c
+}
+
+define half @test_tdo_scalar_f16_atan() {
+; CHECK-LABEL: define half @test_tdo_scalar_f16_atan() {
+; CHECK-NEXT:  [[ENTRY:.*:]]
+; CHECK-NEXT:    ret half 0xH0000
+;
+entry:
+  %c = call half @_Z4atanDh(half 0.000000e+00)
+  ret half %c
+}
+
+define <4 x half> @test_tdo_v2_f16_atan() {
+; CHECK-LABEL: define <4 x half> @test_tdo_v2_f16_atan() {
+; CHECK-NEXT:  [[ENTRY:.*:]]
+; CHECK-NEXT:    ret <4 x half> <half 0xH0000, half 0xH8000, half 0xH3A48, half 0xHBA48>
+;
+entry:
+  %c = call <4 x half> @_Z4atanDv4_Dh(<4 x half> <half 0.000000e+00, half -0.000000e+00, half 1.000000e+00, half -1.000000e+00>)
+  ret <4 x half> %c
+}
+
+define double @test_tdo_scalar_f64_atan() {
+; CHECK-LABEL: define double @test_tdo_scalar_f64_atan() {
+; CHECK-NEXT:  [[ENTRY:.*:]]
+; CHECK-NEXT:    ret double 0.000000e+00
+;
+entry:
+  %c = call double @_Z4atand(double 0.000000e+00)
+  ret double %c
+}
+
+define <4 x double> @test_tdo_v2_f64_atan() {
+; CHECK-LABEL: define <4 x double> @test_tdo_v2_f64_atan() {
+; CHECK-NEXT:  [[ENTRY:.*:]]
+; CHECK-NEXT:    ret <4 x double> <double 0.000000e+00, double -0.000000e+00, double 0x3FE921FB54442D18, double 0xBFE921FB54442D18>
+;
+entry:
+  %c = call <4 x double> @_Z4atanDv4_d(<4 x double> <double 0.000000e+00, double -0.000000e+00, double 1.000000e+00, double -1.000000e+00>)
+  ret <4 x double> %c
+}
+
+declare float        @_Z4atanf(float)
+declare <4 x float>  @_Z4atanDv4_f(<4 x float>)
+declare half         @_Z4atanDh(half)
+declare <4 x half>   @_Z4atanDv4_Dh(<4 x half>)
+declare double       @_Z4atand(double)
+declare <4 x double> @_Z4atanDv4_d(<4 x double>)
--- a/llvm/test/CodeGen/AMDGPU/amdgpu-simplify-libcall-tdo-atanh.ll
+++ b/llvm/test/CodeGen/AMDGPU/amdgpu-simplify-libcall-tdo-atanh.ll
@ -0,0 +1,69 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 6
+; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -passes=amdgpu-simplifylib %s | FileCheck %s
+
+define float @test_tdo_scalar_f32_atanh() {
+; CHECK-LABEL: define float @test_tdo_scalar_f32_atanh() {
+; CHECK-NEXT:  [[ENTRY:.*:]]
+; CHECK-NEXT:    ret float 0.000000e+00
+;
+entry:
+  %c = call float @_Z5atanhf(float 0.000000e+00)
+  ret float %c
+}
+
+define <2 x float> @test_tdo_v2_f32_atanh() {
+; CHECK-LABEL: define <2 x float> @test_tdo_v2_f32_atanh() {
+; CHECK-NEXT:  [[ENTRY:.*:]]
+; CHECK-NEXT:    ret <2 x float> <float 0.000000e+00, float -0.000000e+00>
+;
+entry:
+  %c = call <2 x float> @_Z5atanhDv2_f(<2 x float> <float 0.000000e+00, float -0.000000e+00>)
+  ret <2 x float> %c
+}
+
+define half @test_tdo_scalar_f16_atanh() {
+; CHECK-LABEL: define half @test_tdo_scalar_f16_atanh() {
+; CHECK-NEXT:  [[ENTRY:.*:]]
+; CHECK-NEXT:    ret half 0xH0000
+;
+entry:
+  %c = call half @_Z5atanhDh(half 0.000000e+00)
+  ret half %c
+}
+
+define <2 x half> @test_tdo_v2_f16_atanh() {
+; CHECK-LABEL: define <2 x half> @test_tdo_v2_f16_atanh() {
+; CHECK-NEXT:  [[ENTRY:.*:]]
+; CHECK-NEXT:    ret <2 x half> <half 0xH0000, half 0xH8000>
+;
+entry:
+  %c = call <2 x half> @_Z5atanhDv2_Dh(<2 x half> <half 0.000000e+00, half -0.000000e+00>)
+  ret <2 x half> %c
+}
+
+define double @test_tdo_scalar_f64_atanh() {
+; CHECK-LABEL: define double @test_tdo_scalar_f64_atanh() {
+; CHECK-NEXT:  [[ENTRY:.*:]]
+; CHECK-NEXT:    ret double 0.000000e+00
+;
+entry:
+  %c = call double @_Z5atanhd(double 0.000000e+00)
+  ret double %c
+}
+
+define <2 x double> @test_tdo_v2_f64_atanh() {
+; CHECK-LABEL: define <2 x double> @test_tdo_v2_f64_atanh() {
+; CHECK-NEXT:  [[ENTRY:.*:]]
+; CHECK-NEXT:    ret <2 x double> <double 0.000000e+00, double -0.000000e+00>
+;
+entry:
+  %c = call <2 x double> @_Z5atanhDv2_d(<2 x double> <double 0.000000e+00, double -0.000000e+00>)
+  ret <2 x double> %c
+}
+
+declare float        @_Z5atanhf(float)
+declare <2 x float>  @_Z5atanhDv2_f(<2 x float>)
+declare half         @_Z5atanhDh(half)
+declare <2 x half>   @_Z5atanhDv2_Dh(<2 x half>)
+declare double       @_Z5atanhd(double)
+declare <2 x double> @_Z5atanhDv2_d(<2 x double>)
--- a/llvm/test/CodeGen/AMDGPU/amdgpu-simplify-libcall-tdo-atanpi.ll
+++ b/llvm/test/CodeGen/AMDGPU/amdgpu-simplify-libcall-tdo-atanpi.ll
@ -0,0 +1,69 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 6
+; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -passes=amdgpu-simplifylib %s | FileCheck %s
+
+define float @test_tdo_scalar_f32_atanpi() {
+; CHECK-LABEL: define float @test_tdo_scalar_f32_atanpi() {
+; CHECK-NEXT:  [[ENTRY:.*:]]
+; CHECK-NEXT:    ret float 0.000000e+00
+;
+entry:
+  %c = call float @_Z6atanpif(float 0.000000e+00)
+  ret float %c
+}
+
+define <4 x float> @test_tdo_v2_f32_atanpi() {
+; CHECK-LABEL: define <4 x float> @test_tdo_v2_f32_atanpi() {
+; CHECK-NEXT:  [[ENTRY:.*:]]
+; CHECK-NEXT:    ret <4 x float> <float 0.000000e+00, float -0.000000e+00, float 2.500000e-01, float -2.500000e-01>
+;
+entry:
+  %c = call <4 x float> @_Z6atanpiDv4_f(<4 x float> <float 0.000000e+00, float -0.000000e+00, float 1.000000e+00, float -1.000000e+00>)
+  ret <4 x float> %c
+}
+
+define half @test_tdo_scalar_f16_atanpi() {
+; CHECK-LABEL: define half @test_tdo_scalar_f16_atanpi() {
+; CHECK-NEXT:  [[ENTRY:.*:]]
+; CHECK-NEXT:    ret half 0xH0000
+;
+entry:
+  %c = call half @_Z6atanpiDh(half 0.000000e+00)
+  ret half %c
+}
+
+define <4 x half> @test_tdo_v2_f16_atanpi() {
+; CHECK-LABEL: define <4 x half> @test_tdo_v2_f16_atanpi() {
+; CHECK-NEXT:  [[ENTRY:.*:]]
+; CHECK-NEXT:    ret <4 x half> <half 0xH0000, half 0xH8000, half 0xH3400, half 0xHB400>
+;
+entry:
+  %c = call <4 x half> @_Z6atanpiDv4_Dh(<4 x half> <half 0.000000e+00, half -0.000000e+00, half 1.000000e+00, half -1.000000e+00>)
+  ret <4 x half> %c
+}
+
+define double @test_tdo_scalar_f64_atanpi() {
+; CHECK-LABEL: define double @test_tdo_scalar_f64_atanpi() {
+; CHECK-NEXT:  [[ENTRY:.*:]]
+; CHECK-NEXT:    ret double 0.000000e+00
+;
+entry:
+  %c = call double @_Z6atanpid(double 0.000000e+00)
+  ret double %c
+}
+
+define <4 x double> @test_tdo_v2_f64_atanpi() {
+; CHECK-LABEL: define <4 x double> @test_tdo_v2_f64_atanpi() {
+; CHECK-NEXT:  [[ENTRY:.*:]]
+; CHECK-NEXT:    ret <4 x double> <double 0.000000e+00, double -0.000000e+00, double 2.500000e-01, double -2.500000e-01>
+;
+entry:
+  %c = call <4 x double> @_Z6atanpiDv4_d(<4 x double> <double 0.000000e+00, double -0.000000e+00, double 1.000000e+00, double -1.000000e+00>)
+  ret <4 x double> %c
+}
+
+declare float        @_Z6atanpif(float)
+declare <4 x float>  @_Z6atanpiDv4_f(<4 x float>)
+declare half         @_Z6atanpiDh(half)
+declare <4 x half>   @_Z6atanpiDv4_Dh(<4 x half>)
+declare double       @_Z6atanpid(double)
+declare <4 x double> @_Z6atanpiDv4_d(<4 x double>)
--- a/llvm/test/CodeGen/AMDGPU/amdgpu-simplify-libcall-tdo-cbrt.ll
+++ b/llvm/test/CodeGen/AMDGPU/amdgpu-simplify-libcall-tdo-cbrt.ll
@ -0,0 +1,69 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 6
+; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -passes=amdgpu-simplifylib %s | FileCheck %s
+
+define float @test_tdo_scalar_f32_cbrt() {
+; CHECK-LABEL: define float @test_tdo_scalar_f32_cbrt() {
+; CHECK-NEXT:  [[ENTRY:.*:]]
+; CHECK-NEXT:    ret float 1.000000e+00
+;
+entry:
+  %c = call float @_Z4cbrtf(float 1.000000e+00)
+  ret float %c
+}
+
+define <4 x float> @test_tdo_v2_f32_cbrt() {
+; CHECK-LABEL: define <4 x float> @test_tdo_v2_f32_cbrt() {
+; CHECK-NEXT:  [[ENTRY:.*:]]
+; CHECK-NEXT:    ret <4 x float> <float 0.000000e+00, float -0.000000e+00, float 1.000000e+00, float -1.000000e+00>
+;
+entry:
+  %c = call <4 x float> @_Z4cbrtDv4_f(<4 x float> <float 0.000000e+00, float -0.000000e+00, float 1.000000e+00, float -1.000000e+00>)
+  ret <4 x float> %c
+}
+
+define half @test_tdo_scalar_f16_cbrt() {
+; CHECK-LABEL: define half @test_tdo_scalar_f16_cbrt() {
+; CHECK-NEXT:  [[ENTRY:.*:]]
+; CHECK-NEXT:    ret half 0xH3C00
+;
+entry:
+  %c = call half @_Z4cbrtDh(half 1.000000e+00)
+  ret half %c
+}
+
+define <4 x half> @test_tdo_v2_f16_cbrt() {
+; CHECK-LABEL: define <4 x half> @test_tdo_v2_f16_cbrt() {
+; CHECK-NEXT:  [[ENTRY:.*:]]
+; CHECK-NEXT:    ret <4 x half> <half 0xH0000, half 0xH8000, half 0xH3C00, half 0xHBC00>
+;
+entry:
+  %c = call <4 x half> @_Z4cbrtDv4_Dh(<4 x half> <half 0.000000e+00, half -0.000000e+00, half 1.000000e+00, half -1.000000e+00>)
+  ret <4 x half> %c
+}
+
+define double @test_tdo_scalar_f64_cbrt() {
+; CHECK-LABEL: define double @test_tdo_scalar_f64_cbrt() {
+; CHECK-NEXT:  [[ENTRY:.*:]]
+; CHECK-NEXT:    ret double 1.000000e+00
+;
+entry:
+  %c = call double @_Z4cbrtd(double 1.000000e+00)
+  ret double %c
+}
+
+define <4 x double> @test_tdo_v2_f64_cbrt() {
+; CHECK-LABEL: define <4 x double> @test_tdo_v2_f64_cbrt() {
+; CHECK-NEXT:  [[ENTRY:.*:]]
+; CHECK-NEXT:    ret <4 x double> <double 0.000000e+00, double -0.000000e+00, double 1.000000e+00, double -1.000000e+00>
+;
+entry:
+  %c = call <4 x double> @_Z4cbrtDv4_d(<4 x double> <double 0.000000e+00, double -0.000000e+00, double 1.000000e+00, double -1.000000e+00>)
+  ret <4 x double> %c
+}
+
+declare float        @_Z4cbrtf(float)
+declare <4 x float>  @_Z4cbrtDv4_f(<4 x float>)
+declare half         @_Z4cbrtDh(half)
+declare <4 x half>   @_Z4cbrtDv4_Dh(<4 x half>)
+declare double       @_Z4cbrtd(double)
+declare <4 x double> @_Z4cbrtDv4_d(<4 x double>)
--- a/llvm/test/CodeGen/AMDGPU/amdgpu-simplify-libcall-tdo-cos.ll
+++ b/llvm/test/CodeGen/AMDGPU/amdgpu-simplify-libcall-tdo-cos.ll
@ -0,0 +1,69 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 6
+; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -passes=amdgpu-simplifylib %s | FileCheck %s
+
+define float @test_tdo_scalar_f32_cos() {
+; CHECK-LABEL: define float @test_tdo_scalar_f32_cos() {
+; CHECK-NEXT:  [[ENTRY:.*:]]
+; CHECK-NEXT:    ret float 1.000000e+00
+;
+entry:
+  %c = call float @_Z3cosf(float 0.000000e+00)
+  ret float %c
+}
+
+define <2 x float> @test_tdo_v2_f32_cos() {
+; CHECK-LABEL: define <2 x float> @test_tdo_v2_f32_cos() {
+; CHECK-NEXT:  [[ENTRY:.*:]]
+; CHECK-NEXT:    ret <2 x float> splat (float 1.000000e+00)
+;
+entry:
+  %c = call <2 x float> @_Z3cosDv2_f(<2 x float> <float 0.000000e+00, float -0.000000e+00>)
+  ret <2 x float> %c
+}
+
+define half @test_tdo_scalar_f16_cos() {
+; CHECK-LABEL: define half @test_tdo_scalar_f16_cos() {
+; CHECK-NEXT:  [[ENTRY:.*:]]
+; CHECK-NEXT:    ret half 0xH3C00
+;
+entry:
+  %c = call half @_Z3cosDh(half 0.000000e+00)
+  ret half %c
+}
+
+define <2 x half> @test_tdo_v2_f16_cos() {
+; CHECK-LABEL: define <2 x half> @test_tdo_v2_f16_cos() {
+; CHECK-NEXT:  [[ENTRY:.*:]]
+; CHECK-NEXT:    ret <2 x half> splat (half 0xH3C00)
+;
+entry:
+  %c = call <2 x half> @_Z3cosDv2_Dh(<2 x half> <half 0.000000e+00, half -0.000000e+00>)
+  ret <2 x half> %c
+}
+
+define double @test_tdo_scalar_f64_cos() {
+; CHECK-LABEL: define double @test_tdo_scalar_f64_cos() {
+; CHECK-NEXT:  [[ENTRY:.*:]]
+; CHECK-NEXT:    ret double 1.000000e+00
+;
+entry:
+  %c = call double @_Z3cosd(double 0.000000e+00)
+  ret double %c
+}
+
+define <2 x double> @test_tdo_v2_f64_cos() {
+; CHECK-LABEL: define <2 x double> @test_tdo_v2_f64_cos() {
+; CHECK-NEXT:  [[ENTRY:.*:]]
+; CHECK-NEXT:    ret <2 x double> splat (double 1.000000e+00)
+;
+entry:
+  %c = call <2 x double> @_Z3cosDv2_d(<2 x double> <double 0.000000e+00, double -0.000000e+00>)
+  ret <2 x double> %c
+}
+
+declare float        @_Z3cosf(float)
+declare <2 x float>  @_Z3cosDv2_f(<2 x float>)
+declare half         @_Z3cosDh(half)
+declare <2 x half>   @_Z3cosDv2_Dh(<2 x half>)
+declare double       @_Z3cosd(double)
+declare <2 x double> @_Z3cosDv2_d(<2 x double>)
--- a/llvm/test/CodeGen/AMDGPU/amdgpu-simplify-libcall-tdo-cosh.ll
+++ b/llvm/test/CodeGen/AMDGPU/amdgpu-simplify-libcall-tdo-cosh.ll
@ -0,0 +1,69 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 6
+; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -passes=amdgpu-simplifylib %s | FileCheck %s
+
+define float @test_tdo_scalar_f32_cosh() {
+; CHECK-LABEL: define float @test_tdo_scalar_f32_cosh() {
+; CHECK-NEXT:  [[ENTRY:.*:]]
+; CHECK-NEXT:    ret float 1.000000e+00
+;
+entry:
+  %c = call float @_Z4coshf(float 0.000000e+00)
+  ret float %c
+}
+
+define <2 x float> @test_tdo_v2_f32_cosh() {
+; CHECK-LABEL: define <2 x float> @test_tdo_v2_f32_cosh() {
+; CHECK-NEXT:  [[ENTRY:.*:]]
+; CHECK-NEXT:    ret <2 x float> splat (float 1.000000e+00)
+;
+entry:
+  %c = call <2 x float> @_Z4coshDv2_f(<2 x float> <float 0.000000e+00, float -0.000000e+00>)
+  ret <2 x float> %c
+}
+
+define half @test_tdo_scalar_f16_cosh() {
+; CHECK-LABEL: define half @test_tdo_scalar_f16_cosh() {
+; CHECK-NEXT:  [[ENTRY:.*:]]
+; CHECK-NEXT:    ret half 0xH3C00
+;
+entry:
+  %c = call half @_Z4coshDh(half 0.000000e+00)
+  ret half %c
+}
+
+define <2 x half> @test_tdo_v2_f16_cosh() {
+; CHECK-LABEL: define <2 x half> @test_tdo_v2_f16_cosh() {
+; CHECK-NEXT:  [[ENTRY:.*:]]
+; CHECK-NEXT:    ret <2 x half> splat (half 0xH3C00)
+;
+entry:
+  %c = call <2 x half> @_Z4coshDv2_Dh(<2 x half> <half 0.000000e+00, half -0.000000e+00>)
+  ret <2 x half> %c
+}
+
+define double @test_tdo_scalar_f64_cosh() {
+; CHECK-LABEL: define double @test_tdo_scalar_f64_cosh() {
+; CHECK-NEXT:  [[ENTRY:.*:]]
+; CHECK-NEXT:    ret double 1.000000e+00
+;
+entry:
+  %c = call double @_Z4coshd(double 0.000000e+00)
+  ret double %c
+}
+
+define <2 x double> @test_tdo_v2_f64_cosh() {
+; CHECK-LABEL: define <2 x double> @test_tdo_v2_f64_cosh() {
+; CHECK-NEXT:  [[ENTRY:.*:]]
+; CHECK-NEXT:    ret <2 x double> splat (double 1.000000e+00)
+;
+entry:
+  %c = call <2 x double> @_Z4coshDv2_d(<2 x double> <double 0.000000e+00, double -0.000000e+00>)
+  ret <2 x double> %c
+}
+
+declare float        @_Z4coshf(float)
+declare <2 x float>  @_Z4coshDv2_f(<2 x float>)
+declare half         @_Z4coshDh(half)
+declare <2 x half>   @_Z4coshDv2_Dh(<2 x half>)
+declare double       @_Z4coshd(double)
+declare <2 x double> @_Z4coshDv2_d(<2 x double>)
--- a/llvm/test/CodeGen/AMDGPU/amdgpu-simplify-libcall-tdo-cospi.ll
+++ b/llvm/test/CodeGen/AMDGPU/amdgpu-simplify-libcall-tdo-cospi.ll
@ -0,0 +1,69 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 6
+; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -passes=amdgpu-simplifylib %s | FileCheck %s
+
+define float @test_tdo_scalar_f32_cospi() {
+; CHECK-LABEL: define float @test_tdo_scalar_f32_cospi() {
+; CHECK-NEXT:  [[ENTRY:.*:]]
+; CHECK-NEXT:    ret float 1.000000e+00
+;
+entry:
+  %c = call float @_Z5cospif(float 0.000000e+00)
+  ret float %c
+}
+
+define <2 x float> @test_tdo_v2_f32_cospi() {
+; CHECK-LABEL: define <2 x float> @test_tdo_v2_f32_cospi() {
+; CHECK-NEXT:  [[ENTRY:.*:]]
+; CHECK-NEXT:    ret <2 x float> splat (float 1.000000e+00)
+;
+entry:
+  %c = call <2 x float> @_Z5cospiDv2_f(<2 x float> <float 0.000000e+00, float -0.000000e+00>)
+  ret <2 x float> %c
+}
+
+define half @test_tdo_scalar_f16_cospi() {
+; CHECK-LABEL: define half @test_tdo_scalar_f16_cospi() {
+; CHECK-NEXT:  [[ENTRY:.*:]]
+; CHECK-NEXT:    ret half 0xH3C00
+;
+entry:
+  %c = call half @_Z5cospiDh(half 0.000000e+00)
+  ret half %c
+}
+
+define <2 x half> @test_tdo_v2_f16_cospi() {
+; CHECK-LABEL: define <2 x half> @test_tdo_v2_f16_cospi() {
+; CHECK-NEXT:  [[ENTRY:.*:]]
+; CHECK-NEXT:    ret <2 x half> splat (half 0xH3C00)
+;
+entry:
+  %c = call <2 x half> @_Z5cospiDv2_Dh(<2 x half> <half 0.000000e+00, half -0.000000e+00>)
+  ret <2 x half> %c
+}
+
+define double @test_tdo_scalar_f64_cospi() {
+; CHECK-LABEL: define double @test_tdo_scalar_f64_cospi() {
+; CHECK-NEXT:  [[ENTRY:.*:]]
+; CHECK-NEXT:    ret double 1.000000e+00
+;
+entry:
+  %c = call double @_Z5cospid(double 0.000000e+00)
+  ret double %c
+}
+
+define <2 x double> @test_tdo_v2_f64_cospi() {
+; CHECK-LABEL: define <2 x double> @test_tdo_v2_f64_cospi() {
+; CHECK-NEXT:  [[ENTRY:.*:]]
+; CHECK-NEXT:    ret <2 x double> splat (double 1.000000e+00)
+;
+entry:
+  %c = call <2 x double> @_Z5cospiDv2_d(<2 x double> <double 0.000000e+00, double -0.000000e+00>)
+  ret <2 x double> %c
+}
+
+declare float        @_Z5cospif(float)
+declare <2 x float>  @_Z5cospiDv2_f(<2 x float>)
+declare half         @_Z5cospiDh(half)
+declare <2 x half>   @_Z5cospiDv2_Dh(<2 x half>)
+declare double       @_Z5cospid(double)
+declare <2 x double> @_Z5cospiDv2_d(<2 x double>)
--- a/llvm/test/CodeGen/AMDGPU/amdgpu-simplify-libcall-tdo-erf.ll
+++ b/llvm/test/CodeGen/AMDGPU/amdgpu-simplify-libcall-tdo-erf.ll
@ -0,0 +1,69 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 6
+; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -passes=amdgpu-simplifylib %s | FileCheck %s
+
+define float @test_tdo_scalar_f32_erf() {
+; CHECK-LABEL: define float @test_tdo_scalar_f32_erf() {
+; CHECK-NEXT:  [[ENTRY:.*:]]
+; CHECK-NEXT:    ret float 0.000000e+00
+;
+entry:
+  %c = call float @_Z3erff(float 0.000000e+00)
+  ret float %c
+}
+
+define <2 x float> @test_tdo_v2_f32_erf() {
+; CHECK-LABEL: define <2 x float> @test_tdo_v2_f32_erf() {
+; CHECK-NEXT:  [[ENTRY:.*:]]
+; CHECK-NEXT:    ret <2 x float> <float 0.000000e+00, float -0.000000e+00>
+;
+entry:
+  %c = call <2 x float> @_Z3erfDv2_f(<2 x float> <float 0.000000e+00, float -0.000000e+00>)
+  ret <2 x float> %c
+}
+
+define half @test_tdo_scalar_f16_erf() {
+; CHECK-LABEL: define half @test_tdo_scalar_f16_erf() {
+; CHECK-NEXT:  [[ENTRY:.*:]]
+; CHECK-NEXT:    ret half 0xH0000
+;
+entry:
+  %c = call half @_Z3erfDh(half 0.000000e+00)
+  ret half %c
+}
+
+define <2 x half> @test_tdo_v2_f16_erf() {
+; CHECK-LABEL: define <2 x half> @test_tdo_v2_f16_erf() {
+; CHECK-NEXT:  [[ENTRY:.*:]]
+; CHECK-NEXT:    ret <2 x half> <half 0xH0000, half 0xH8000>
+;
+entry:
+  %c = call <2 x half> @_Z3erfDv2_Dh(<2 x half> <half 0.000000e+00, half -0.000000e+00>)
+  ret <2 x half> %c
+}
+
+define double @test_tdo_scalar_f64_erf() {
+; CHECK-LABEL: define double @test_tdo_scalar_f64_erf() {
+; CHECK-NEXT:  [[ENTRY:.*:]]
+; CHECK-NEXT:    ret double 0.000000e+00
+;
+entry:
+  %c = call double @_Z3erfd(double 0.000000e+00)
+  ret double %c
+}
+
+define <2 x double> @test_tdo_v2_f64_erf() {
+; CHECK-LABEL: define <2 x double> @test_tdo_v2_f64_erf() {
+; CHECK-NEXT:  [[ENTRY:.*:]]
+; CHECK-NEXT:    ret <2 x double> <double 0.000000e+00, double -0.000000e+00>
+;
+entry:
+  %c = call <2 x double> @_Z3erfDv2_d(<2 x double> <double 0.000000e+00, double -0.000000e+00>)
+  ret <2 x double> %c
+}
+
+declare float        @_Z3erff(float)
+declare <2 x float>  @_Z3erfDv2_f(<2 x float>)
+declare half         @_Z3erfDh(half)
+declare <2 x half>   @_Z3erfDv2_Dh(<2 x half>)
+declare double       @_Z3erfd(double)
+declare <2 x double> @_Z3erfDv2_d(<2 x double>)
--- a/llvm/test/CodeGen/AMDGPU/amdgpu-simplify-libcall-tdo-erfc.ll
+++ b/llvm/test/CodeGen/AMDGPU/amdgpu-simplify-libcall-tdo-erfc.ll
@ -0,0 +1,69 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 6
+; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -passes=amdgpu-simplifylib %s | FileCheck %s
+
+define float @test_tdo_scalar_f32_erfc() {
+; CHECK-LABEL: define float @test_tdo_scalar_f32_erfc() {
+; CHECK-NEXT:  [[ENTRY:.*:]]
+; CHECK-NEXT:    ret float 1.000000e+00
+;
+entry:
+  %c = call float @_Z4erfcf(float 0.000000e+00)
+  ret float %c
+}
+
+define <2 x float> @test_tdo_v2_f32_erfc() {
+; CHECK-LABEL: define <2 x float> @test_tdo_v2_f32_erfc() {
+; CHECK-NEXT:  [[ENTRY:.*:]]
+; CHECK-NEXT:    ret <2 x float> splat (float 1.000000e+00)
+;
+entry:
+  %c = call <2 x float> @_Z4erfcDv2_f(<2 x float> <float 0.000000e+00, float -0.000000e+00>)
+  ret <2 x float> %c
+}
+
+define half @test_tdo_scalar_f16_erfc() {
+; CHECK-LABEL: define half @test_tdo_scalar_f16_erfc() {
+; CHECK-NEXT:  [[ENTRY:.*:]]
+; CHECK-NEXT:    ret half 0xH3C00
+;
+entry:
+  %c = call half @_Z4erfcDh(half 0.000000e+00)
+  ret half %c
+}
+
+define <2 x half> @test_tdo_v2_f16_erfc() {
+; CHECK-LABEL: define <2 x half> @test_tdo_v2_f16_erfc() {
+; CHECK-NEXT:  [[ENTRY:.*:]]
+; CHECK-NEXT:    ret <2 x half> splat (half 0xH3C00)
+;
+entry:
+  %c = call <2 x half> @_Z4erfcDv2_Dh(<2 x half> <half 0.000000e+00, half -0.000000e+00>)
+  ret <2 x half> %c
+}
+
+define double @test_tdo_scalar_f64_erfc() {
+; CHECK-LABEL: define double @test_tdo_scalar_f64_erfc() {
+; CHECK-NEXT:  [[ENTRY:.*:]]
+; CHECK-NEXT:    ret double 1.000000e+00
+;
+entry:
+  %c = call double @_Z4erfcd(double 0.000000e+00)
+  ret double %c
+}
+
+define <2 x double> @test_tdo_v2_f64_erfc() {
+; CHECK-LABEL: define <2 x double> @test_tdo_v2_f64_erfc() {
+; CHECK-NEXT:  [[ENTRY:.*:]]
+; CHECK-NEXT:    ret <2 x double> splat (double 1.000000e+00)
+;
+entry:
+  %c = call <2 x double> @_Z4erfcDv2_d(<2 x double> <double 0.000000e+00, double -0.000000e+00>)
+  ret <2 x double> %c
+}
+
+declare float        @_Z4erfcf(float)
+declare <2 x float>  @_Z4erfcDv2_f(<2 x float>)
+declare half         @_Z4erfcDh(half)
+declare <2 x half>   @_Z4erfcDv2_Dh(<2 x half>)
+declare double       @_Z4erfcd(double)
+declare <2 x double> @_Z4erfcDv2_d(<2 x double>)
--- a/llvm/test/CodeGen/AMDGPU/amdgpu-simplify-libcall-tdo-exp.ll
+++ b/llvm/test/CodeGen/AMDGPU/amdgpu-simplify-libcall-tdo-exp.ll
@ -0,0 +1,69 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 6
+; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -passes=amdgpu-simplifylib %s | FileCheck %s
+
+define float @test_tdo_scalar_f32_exp() {
+; CHECK-LABEL: define float @test_tdo_scalar_f32_exp() {
+; CHECK-NEXT:  [[ENTRY:.*:]]
+; CHECK-NEXT:    ret float 1.000000e+00
+;
+entry:
+  %c = call float @_Z3expf(float 0.000000e+00)
+  ret float %c
+}
+
+define <3 x float> @test_tdo_v2_f32_exp() {
+; CHECK-LABEL: define <3 x float> @test_tdo_v2_f32_exp() {
+; CHECK-NEXT:  [[ENTRY:.*:]]
+; CHECK-NEXT:    ret <3 x float> <float 1.000000e+00, float 1.000000e+00, float 0x4005BF0A80000000>
+;
+entry:
+  %c = call <3 x float> @_Z3expDv3_f(<3 x float> <float 0.000000e+00, float -0.000000e+00, float 1.000000e+00>)
+  ret <3 x float> %c
+}
+
+define half @test_tdo_scalar_f16_exp() {
+; CHECK-LABEL: define half @test_tdo_scalar_f16_exp() {
+; CHECK-NEXT:  [[ENTRY:.*:]]
+; CHECK-NEXT:    ret half 0xH3C00
+;
+entry:
+  %c = call half @_Z3expDh(half 0.000000e+00)
+  ret half %c
+}
+
+define <3 x half> @test_tdo_v2_f16_exp() {
+; CHECK-LABEL: define <3 x half> @test_tdo_v2_f16_exp() {
+; CHECK-NEXT:  [[ENTRY:.*:]]
+; CHECK-NEXT:    ret <3 x half> <half 0xH3C00, half 0xH3C00, half 0xH4170>
+;
+entry:
+  %c = call <3 x half> @_Z3expDv3_Dh(<3 x half> <half 0.000000e+00, half -0.000000e+00, half 1.000000e+00>)
+  ret <3 x half> %c
+}
+
+define double @test_tdo_scalar_f64_exp() {
+; CHECK-LABEL: define double @test_tdo_scalar_f64_exp() {
+; CHECK-NEXT:  [[ENTRY:.*:]]
+; CHECK-NEXT:    ret double 1.000000e+00
+;
+entry:
+  %c = call double @_Z3expd(double 0.000000e+00)
+  ret double %c
+}
+
+define <3 x double> @test_tdo_v2_f64_exp() {
+; CHECK-LABEL: define <3 x double> @test_tdo_v2_f64_exp() {
+; CHECK-NEXT:  [[ENTRY:.*:]]
+; CHECK-NEXT:    ret <3 x double> <double 1.000000e+00, double 1.000000e+00, double 0x4005BF0A8B145769>
+;
+entry:
+  %c = call <3 x double> @_Z3expDv3_d(<3 x double> <double 0.000000e+00, double -0.000000e+00, double 1.000000e+00>)
+  ret <3 x double> %c
+}
+
+declare float        @_Z3expf(float)
+declare <3 x float>  @_Z3expDv3_f(<3 x float>)
+declare half         @_Z3expDh(half)
+declare <3 x half>   @_Z3expDv3_Dh(<3 x half>)
+declare double       @_Z3expd(double)
+declare <3 x double> @_Z3expDv3_d(<3 x double>)
--- a/llvm/test/CodeGen/AMDGPU/amdgpu-simplify-libcall-tdo-exp10.ll
+++ b/llvm/test/CodeGen/AMDGPU/amdgpu-simplify-libcall-tdo-exp10.ll
@ -0,0 +1,69 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 6
+; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -passes=amdgpu-simplifylib %s | FileCheck %s
+
+define float @test_tdo_scalar_f32_exp10() {
+; CHECK-LABEL: define float @test_tdo_scalar_f32_exp10() {
+; CHECK-NEXT:  [[ENTRY:.*:]]
+; CHECK-NEXT:    ret float 1.000000e+00
+;
+entry:
+  %c = call float @_Z5exp10f(float 0.000000e+00)
+  ret float %c
+}
+
+define <3 x float> @test_tdo_v2_f32_exp10() {
+; CHECK-LABEL: define <3 x float> @test_tdo_v2_f32_exp10() {
+; CHECK-NEXT:  [[ENTRY:.*:]]
+; CHECK-NEXT:    ret <3 x float> <float 1.000000e+00, float 1.000000e+00, float 1.000000e+01>
+;
+entry:
+  %c = call <3 x float> @_Z5exp10Dv3_f(<3 x float> <float 0.000000e+00, float -0.000000e+00, float 1.000000e+00>)
+  ret <3 x float> %c
+}
+
+define half @test_tdo_scalar_f16_exp10() {
+; CHECK-LABEL: define half @test_tdo_scalar_f16_exp10() {
+; CHECK-NEXT:  [[ENTRY:.*:]]
+; CHECK-NEXT:    ret half 0xH3C00
+;
+entry:
+  %c = call half @_Z5exp10Dh(half 0.000000e+00)
+  ret half %c
+}
+
+define <3 x half> @test_tdo_v2_f16_exp10() {
+; CHECK-LABEL: define <3 x half> @test_tdo_v2_f16_exp10() {
+; CHECK-NEXT:  [[ENTRY:.*:]]
+; CHECK-NEXT:    ret <3 x half> <half 0xH3C00, half 0xH3C00, half 0xH4900>
+;
+entry:
+  %c = call <3 x half> @_Z5exp10Dv3_Dh(<3 x half> <half 0.000000e+00, half -0.000000e+00, half 1.000000e+00>)
+  ret <3 x half> %c
+}
+
+define double @test_tdo_scalar_f64_exp10() {
+; CHECK-LABEL: define double @test_tdo_scalar_f64_exp10() {
+; CHECK-NEXT:  [[ENTRY:.*:]]
+; CHECK-NEXT:    ret double 1.000000e+00
+;
+entry:
+  %c = call double @_Z5exp10d(double 0.000000e+00)
+  ret double %c
+}
+
+define <3 x double> @test_tdo_v2_f64_exp10() {
+; CHECK-LABEL: define <3 x double> @test_tdo_v2_f64_exp10() {
+; CHECK-NEXT:  [[ENTRY:.*:]]
+; CHECK-NEXT:    ret <3 x double> <double 1.000000e+00, double 1.000000e+00, double 1.000000e+01>
+;
+entry:
+  %c = call <3 x double> @_Z5exp10Dv3_d(<3 x double> <double 0.000000e+00, double -0.000000e+00, double 1.000000e+00>)
+  ret <3 x double> %c
+}
+
+declare float        @_Z5exp10f(float)
+declare <3 x float>  @_Z5exp10Dv3_f(<3 x float>)
+declare half         @_Z5exp10Dh(half)
+declare <3 x half>   @_Z5exp10Dv3_Dh(<3 x half>)
+declare double       @_Z5exp10d(double)
+declare <3 x double> @_Z5exp10Dv3_d(<3 x double>)
--- a/llvm/test/CodeGen/AMDGPU/amdgpu-simplify-libcall-tdo-exp2.ll
+++ b/llvm/test/CodeGen/AMDGPU/amdgpu-simplify-libcall-tdo-exp2.ll
@ -0,0 +1,69 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 6
+; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -passes=amdgpu-simplifylib %s | FileCheck %s
+
+define float @test_tdo_scalar_f32_exp2() {
+; CHECK-LABEL: define float @test_tdo_scalar_f32_exp2() {
+; CHECK-NEXT:  [[ENTRY:.*:]]
+; CHECK-NEXT:    ret float 1.000000e+00
+;
+entry:
+  %c = call float @_Z4exp2f(float 0.000000e+00)
+  ret float %c
+}
+
+define <3 x float> @test_tdo_v2_f32_exp2() {
+; CHECK-LABEL: define <3 x float> @test_tdo_v2_f32_exp2() {
+; CHECK-NEXT:  [[ENTRY:.*:]]
+; CHECK-NEXT:    ret <3 x float> <float 1.000000e+00, float 1.000000e+00, float 2.000000e+00>
+;
+entry:
+  %c = call <3 x float> @_Z4exp2Dv3_f(<3 x float> <float 0.000000e+00, float -0.000000e+00, float 1.000000e+00>)
+  ret <3 x float> %c
+}
+
+define half @test_tdo_scalar_f16_exp2() {
+; CHECK-LABEL: define half @test_tdo_scalar_f16_exp2() {
+; CHECK-NEXT:  [[ENTRY:.*:]]
+; CHECK-NEXT:    ret half 0xH3C00
+;
+entry:
+  %c = call half @_Z4exp2Dh(half 0.000000e+00)
+  ret half %c
+}
+
+define <3 x half> @test_tdo_v2_f16_exp2() {
+; CHECK-LABEL: define <3 x half> @test_tdo_v2_f16_exp2() {
+; CHECK-NEXT:  [[ENTRY:.*:]]
+; CHECK-NEXT:    ret <3 x half> <half 0xH3C00, half 0xH3C00, half 0xH4000>
+;
+entry:
+  %c = call <3 x half> @_Z4exp2Dv3_Dh(<3 x half> <half 0.000000e+00, half -0.000000e+00, half 1.000000e+00>)
+  ret <3 x half> %c
+}
+
+define double @test_tdo_scalar_f64_exp2() {
+; CHECK-LABEL: define double @test_tdo_scalar_f64_exp2() {
+; CHECK-NEXT:  [[ENTRY:.*:]]
+; CHECK-NEXT:    ret double 1.000000e+00
+;
+entry:
+  %c = call double @_Z4exp2d(double 0.000000e+00)
+  ret double %c
+}
+
+define <3 x double> @test_tdo_v2_f64_exp2() {
+; CHECK-LABEL: define <3 x double> @test_tdo_v2_f64_exp2() {
+; CHECK-NEXT:  [[ENTRY:.*:]]
+; CHECK-NEXT:    ret <3 x double> <double 1.000000e+00, double 1.000000e+00, double 2.000000e+00>
+;
+entry:
+  %c = call <3 x double> @_Z4exp2Dv3_d(<3 x double> <double 0.000000e+00, double -0.000000e+00, double 1.000000e+00>)
+  ret <3 x double> %c
+}
+
+declare float        @_Z4exp2f(float)
+declare <3 x float>  @_Z4exp2Dv3_f(<3 x float>)
+declare half         @_Z4exp2Dh(half)
+declare <3 x half>   @_Z4exp2Dv3_Dh(<3 x half>)
+declare double       @_Z4exp2d(double)
+declare <3 x double> @_Z4exp2Dv3_d(<3 x double>)
--- a/llvm/test/CodeGen/AMDGPU/amdgpu-simplify-libcall-tdo-expm1.ll
+++ b/llvm/test/CodeGen/AMDGPU/amdgpu-simplify-libcall-tdo-expm1.ll
@ -0,0 +1,69 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 6
+; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -passes=amdgpu-simplifylib %s | FileCheck %s
+
+define float @test_tdo_scalar_f32_expm1() {
+; CHECK-LABEL: define float @test_tdo_scalar_f32_expm1() {
+; CHECK-NEXT:  [[ENTRY:.*:]]
+; CHECK-NEXT:    ret float 0.000000e+00
+;
+entry:
+  %c = call float @_Z5expm1f(float 0.000000e+00)
+  ret float %c
+}
+
+define <2 x float> @test_tdo_v2_f32_expm1() {
+; CHECK-LABEL: define <2 x float> @test_tdo_v2_f32_expm1() {
+; CHECK-NEXT:  [[ENTRY:.*:]]
+; CHECK-NEXT:    ret <2 x float> <float 0.000000e+00, float -0.000000e+00>
+;
+entry:
+  %c = call <2 x float> @_Z5expm1Dv2_f(<2 x float> <float 0.000000e+00, float -0.000000e+00>)
+  ret <2 x float> %c
+}
+
+define half @test_tdo_scalar_f16_expm1() {
+; CHECK-LABEL: define half @test_tdo_scalar_f16_expm1() {
+; CHECK-NEXT:  [[ENTRY:.*:]]
+; CHECK-NEXT:    ret half 0xH0000
+;
+entry:
+  %c = call half @_Z5expm1Dh(half 0.000000e+00)
+  ret half %c
+}
+
+define <2 x half> @test_tdo_v2_f16_expm1() {
+; CHECK-LABEL: define <2 x half> @test_tdo_v2_f16_expm1() {
+; CHECK-NEXT:  [[ENTRY:.*:]]
+; CHECK-NEXT:    ret <2 x half> <half 0xH0000, half 0xH8000>
+;
+entry:
+  %c = call <2 x half> @_Z5expm1Dv2_Dh(<2 x half> <half 0.000000e+00, half -0.000000e+00>)
+  ret <2 x half> %c
+}
+
+define double @test_tdo_scalar_f64_expm1() {
+; CHECK-LABEL: define double @test_tdo_scalar_f64_expm1() {
+; CHECK-NEXT:  [[ENTRY:.*:]]
+; CHECK-NEXT:    ret double 0.000000e+00
+;
+entry:
+  %c = call double @_Z5expm1d(double 0.000000e+00)
+  ret double %c
+}
+
+define <2 x double> @test_tdo_v2_f64_expm1() {
+; CHECK-LABEL: define <2 x double> @test_tdo_v2_f64_expm1() {
+; CHECK-NEXT:  [[ENTRY:.*:]]
+; CHECK-NEXT:    ret <2 x double> <double 0.000000e+00, double -0.000000e+00>
+;
+entry:
+  %c = call <2 x double> @_Z5expm1Dv2_d(<2 x double> <double 0.000000e+00, double -0.000000e+00>)
+  ret <2 x double> %c
+}
+
+declare float        @_Z5expm1f(float)
+declare <2 x float>  @_Z5expm1Dv2_f(<2 x float>)
+declare half         @_Z5expm1Dh(half)
+declare <2 x half>   @_Z5expm1Dv2_Dh(<2 x half>)
+declare double       @_Z5expm1d(double)
+declare <2 x double> @_Z5expm1Dv2_d(<2 x double>)
--- a/llvm/test/CodeGen/AMDGPU/amdgpu-simplify-libcall-tdo-log.ll
+++ b/llvm/test/CodeGen/AMDGPU/amdgpu-simplify-libcall-tdo-log.ll
@ -0,0 +1,69 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 6
+; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -passes=amdgpu-simplifylib %s | FileCheck %s
+
+define float @test_tdo_scalar_f32_log() {
+; CHECK-LABEL: define float @test_tdo_scalar_f32_log() {
+; CHECK-NEXT:  [[ENTRY:.*:]]
+; CHECK-NEXT:    ret float 0.000000e+00
+;
+entry:
+  %c = call float @_Z3logf(float 1.000000e+00)
+  ret float %c
+}
+
+define <2 x float> @test_tdo_v2_f32_log() {
+; CHECK-LABEL: define <2 x float> @test_tdo_v2_f32_log() {
+; CHECK-NEXT:  [[ENTRY:.*:]]
+; CHECK-NEXT:    ret <2 x float> zeroinitializer
+;
+entry:
+  %c = call <2 x float> @_Z3logDv2_f(<2 x float> <float 1.000000e+00, float 1.000000e+00>)
+  ret <2 x float> %c
+}
+
+define half @test_tdo_scalar_f16_log() {
+; CHECK-LABEL: define half @test_tdo_scalar_f16_log() {
+; CHECK-NEXT:  [[ENTRY:.*:]]
+; CHECK-NEXT:    ret half 0xH0000
+;
+entry:
+  %c = call half @_Z3logDh(half 1.000000e+00)
+  ret half %c
+}
+
+define <2 x half> @test_tdo_v2_f16_log() {
+; CHECK-LABEL: define <2 x half> @test_tdo_v2_f16_log() {
+; CHECK-NEXT:  [[ENTRY:.*:]]
+; CHECK-NEXT:    ret <2 x half> zeroinitializer
+;
+entry:
+  %c = call <2 x half> @_Z3logDv2_Dh(<2 x half> <half 1.000000e+00, half 1.000000e+00>)
+  ret <2 x half> %c
+}
+
+define double @test_tdo_scalar_f64_log() {
+; CHECK-LABEL: define double @test_tdo_scalar_f64_log() {
+; CHECK-NEXT:  [[ENTRY:.*:]]
+; CHECK-NEXT:    ret double 0.000000e+00
+;
+entry:
+  %c = call double @_Z3logd(double 1.000000e+00)
+  ret double %c
+}
+
+define <2 x double> @test_tdo_v2_f64_log() {
+; CHECK-LABEL: define <2 x double> @test_tdo_v2_f64_log() {
+; CHECK-NEXT:  [[ENTRY:.*:]]
+; CHECK-NEXT:    ret <2 x double> zeroinitializer
+;
+entry:
+  %c = call <2 x double> @_Z3logDv2_d(<2 x double> <double 1.000000e+00, double 1.000000e+00>)
+  ret <2 x double> %c
+}
+
+declare float        @_Z3logf(float)
+declare <2 x float>  @_Z3logDv2_f(<2 x float>)
+declare half         @_Z3logDh(half)
+declare <2 x half>   @_Z3logDv2_Dh(<2 x half>)
+declare double       @_Z3logd(double)
+declare <2 x double> @_Z3logDv2_d(<2 x double>)
--- a/llvm/test/CodeGen/AMDGPU/amdgpu-simplify-libcall-tdo-log10.ll
+++ b/llvm/test/CodeGen/AMDGPU/amdgpu-simplify-libcall-tdo-log10.ll
@ -0,0 +1,69 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 6
+; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -passes=amdgpu-simplifylib %s | FileCheck %s
+
+define float @test_tdo_scalar_f32_log10() {
+; CHECK-LABEL: define float @test_tdo_scalar_f32_log10() {
+; CHECK-NEXT:  [[ENTRY:.*:]]
+; CHECK-NEXT:    ret float 0.000000e+00
+;
+entry:
+  %c = call float @_Z5log10f(float 1.000000e+00)
+  ret float %c
+}
+
+define <2 x float> @test_tdo_v2_f32_log10() {
+; CHECK-LABEL: define <2 x float> @test_tdo_v2_f32_log10() {
+; CHECK-NEXT:  [[ENTRY:.*:]]
+; CHECK-NEXT:    ret <2 x float> zeroinitializer
+;
+entry:
+  %c = call <2 x float> @_Z5log10Dv2_f(<2 x float> <float 1.000000e+00, float 1.000000e+00>)
+  ret <2 x float> %c
+}
+
+define half @test_tdo_scalar_f16_log10() {
+; CHECK-LABEL: define half @test_tdo_scalar_f16_log10() {
+; CHECK-NEXT:  [[ENTRY:.*:]]
+; CHECK-NEXT:    ret half 0xH0000
+;
+entry:
+  %c = call half @_Z5log10Dh(half 1.000000e+00)
+  ret half %c
+}
+
+define <2 x half> @test_tdo_v2_f16_log10() {
+; CHECK-LABEL: define <2 x half> @test_tdo_v2_f16_log10() {
+; CHECK-NEXT:  [[ENTRY:.*:]]
+; CHECK-NEXT:    ret <2 x half> zeroinitializer
+;
+entry:
+  %c = call <2 x half> @_Z5log10Dv2_Dh(<2 x half> <half 1.000000e+00, half 1.000000e+00>)
+  ret <2 x half> %c
+}
+
+define double @test_tdo_scalar_f64_log10() {
+; CHECK-LABEL: define double @test_tdo_scalar_f64_log10() {
+; CHECK-NEXT:  [[ENTRY:.*:]]
+; CHECK-NEXT:    ret double 0.000000e+00
+;
+entry:
+  %c = call double @_Z5log10d(double 1.000000e+00)
+  ret double %c
+}
+
+define <2 x double> @test_tdo_v2_f64_log10() {
+; CHECK-LABEL: define <2 x double> @test_tdo_v2_f64_log10() {
+; CHECK-NEXT:  [[ENTRY:.*:]]
+; CHECK-NEXT:    ret <2 x double> zeroinitializer
+;
+entry:
+  %c = call <2 x double> @_Z5log10Dv2_d(<2 x double> <double 1.000000e+00, double 1.000000e+00>)
+  ret <2 x double> %c
+}
+
+declare float        @_Z5log10f(float)
+declare <2 x float>  @_Z5log10Dv2_f(<2 x float>)
+declare half         @_Z5log10Dh(half)
+declare <2 x half>   @_Z5log10Dv2_Dh(<2 x half>)
+declare double       @_Z5log10d(double)
+declare <2 x double> @_Z5log10Dv2_d(<2 x double>)
--- a/llvm/test/CodeGen/AMDGPU/amdgpu-simplify-libcall-tdo-log2.ll
+++ b/llvm/test/CodeGen/AMDGPU/amdgpu-simplify-libcall-tdo-log2.ll
@ -0,0 +1,69 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 6
+; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -passes=amdgpu-simplifylib %s | FileCheck %s
+
+define float @test_tdo_scalar_f32_log2() {
+; CHECK-LABEL: define float @test_tdo_scalar_f32_log2() {
+; CHECK-NEXT:  [[ENTRY:.*:]]
+; CHECK-NEXT:    ret float 0.000000e+00
+;
+entry:
+  %c = call float @_Z4log2f(float 1.000000e+00)
+  ret float %c
+}
+
+define <2 x float> @test_tdo_v2_f32_log2() {
+; CHECK-LABEL: define <2 x float> @test_tdo_v2_f32_log2() {
+; CHECK-NEXT:  [[ENTRY:.*:]]
+; CHECK-NEXT:    ret <2 x float> zeroinitializer
+;
+entry:
+  %c = call <2 x float> @_Z4log2Dv2_f(<2 x float> <float 1.000000e+00, float 1.000000e+00>)
+  ret <2 x float> %c
+}
+
+define half @test_tdo_scalar_f16_log2() {
+; CHECK-LABEL: define half @test_tdo_scalar_f16_log2() {
+; CHECK-NEXT:  [[ENTRY:.*:]]
+; CHECK-NEXT:    ret half 0xH0000
+;
+entry:
+  %c = call half @_Z4log2Dh(half 1.000000e+00)
+  ret half %c
+}
+
+define <2 x half> @test_tdo_v2_f16_log2() {
+; CHECK-LABEL: define <2 x half> @test_tdo_v2_f16_log2() {
+; CHECK-NEXT:  [[ENTRY:.*:]]
+; CHECK-NEXT:    ret <2 x half> zeroinitializer
+;
+entry:
+  %c = call <2 x half> @_Z4log2Dv2_Dh(<2 x half> <half 1.000000e+00, half 1.000000e+00>)
+  ret <2 x half> %c
+}
+
+define double @test_tdo_scalar_f64_log2() {
+; CHECK-LABEL: define double @test_tdo_scalar_f64_log2() {
+; CHECK-NEXT:  [[ENTRY:.*:]]
+; CHECK-NEXT:    ret double 0.000000e+00
+;
+entry:
+  %c = call double @_Z4log2d(double 1.000000e+00)
+  ret double %c
+}
+
+define <2 x double> @test_tdo_v2_f64_log2() {
+; CHECK-LABEL: define <2 x double> @test_tdo_v2_f64_log2() {
+; CHECK-NEXT:  [[ENTRY:.*:]]
+; CHECK-NEXT:    ret <2 x double> zeroinitializer
+;
+entry:
+  %c = call <2 x double> @_Z4log2Dv2_d(<2 x double> <double 1.000000e+00, double 1.000000e+00>)
+  ret <2 x double> %c
+}
+
+declare float        @_Z4log2f(float)
+declare <2 x float>  @_Z4log2Dv2_f(<2 x float>)
+declare half         @_Z4log2Dh(half)
+declare <2 x half>   @_Z4log2Dv2_Dh(<2 x half>)
+declare double       @_Z4log2d(double)
+declare <2 x double> @_Z4log2Dv2_d(<2 x double>)
--- a/llvm/test/CodeGen/AMDGPU/amdgpu-simplify-libcall-tdo-rsqrt.ll
+++ b/llvm/test/CodeGen/AMDGPU/amdgpu-simplify-libcall-tdo-rsqrt.ll
@ -0,0 +1,69 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 6
+; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -passes=amdgpu-simplifylib %s | FileCheck %s
+
+define float @test_tdo_scalar_f32_rsqrt() {
+; CHECK-LABEL: define float @test_tdo_scalar_f32_rsqrt() {
+; CHECK-NEXT:  [[ENTRY:.*:]]
+; CHECK-NEXT:    ret float 1.000000e+00
+;
+entry:
+  %c = call float @_Z5rsqrtf(float 1.000000e+00)
+  ret float %c
+}
+
+define <2 x float> @test_tdo_v2_f32_rsqrt() {
+; CHECK-LABEL: define <2 x float> @test_tdo_v2_f32_rsqrt() {
+; CHECK-NEXT:  [[ENTRY:.*:]]
+; CHECK-NEXT:    ret <2 x float> splat (float 1.000000e+00)
+;
+entry:
+  %c = call <2 x float> @_Z5rsqrtDv2_f(<2 x float> <float 1.000000e+00, float 1.000000e+00>)
+  ret <2 x float> %c
+}
+
+define half @test_tdo_scalar_f16_rsqrt() {
+; CHECK-LABEL: define half @test_tdo_scalar_f16_rsqrt() {
+; CHECK-NEXT:  [[ENTRY:.*:]]
+; CHECK-NEXT:    ret half 0xH3C00
+;
+entry:
+  %c = call half @_Z5rsqrtDh(half 1.000000e+00)
+  ret half %c
+}
+
+define <2 x half> @test_tdo_v2_f16_rsqrt() {
+; CHECK-LABEL: define <2 x half> @test_tdo_v2_f16_rsqrt() {
+; CHECK-NEXT:  [[ENTRY:.*:]]
+; CHECK-NEXT:    ret <2 x half> splat (half 0xH3C00)
+;
+entry:
+  %c = call <2 x half> @_Z5rsqrtDv2_Dh(<2 x half> <half 1.000000e+00, half 1.000000e+00>)
+  ret <2 x half> %c
+}
+
+define double @test_tdo_scalar_f64_rsqrt() {
+; CHECK-LABEL: define double @test_tdo_scalar_f64_rsqrt() {
+; CHECK-NEXT:  [[ENTRY:.*:]]
+; CHECK-NEXT:    ret double 1.000000e+00
+;
+entry:
+  %c = call double @_Z5rsqrtd(double 1.000000e+00)
+  ret double %c
+}
+
+define <2 x double> @test_tdo_v2_f64_rsqrt() {
+; CHECK-LABEL: define <2 x double> @test_tdo_v2_f64_rsqrt() {
+; CHECK-NEXT:  [[ENTRY:.*:]]
+; CHECK-NEXT:    ret <2 x double> splat (double 1.000000e+00)
+;
+entry:
+  %c = call <2 x double> @_Z5rsqrtDv2_d(<2 x double> <double 1.000000e+00, double 1.000000e+00>)
+  ret <2 x double> %c
+}
+
+declare float        @_Z5rsqrtf(float)
+declare <2 x float>  @_Z5rsqrtDv2_f(<2 x float>)
+declare half         @_Z5rsqrtDh(half)
+declare <2 x half>   @_Z5rsqrtDv2_Dh(<2 x half>)
+declare double       @_Z5rsqrtd(double)
+declare <2 x double> @_Z5rsqrtDv2_d(<2 x double>)
--- a/llvm/test/CodeGen/AMDGPU/amdgpu-simplify-libcall-tdo-sin.ll
+++ b/llvm/test/CodeGen/AMDGPU/amdgpu-simplify-libcall-tdo-sin.ll
@ -0,0 +1,69 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 6
+; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -passes=amdgpu-simplifylib %s | FileCheck %s
+
+define float @test_tdo_scalar_f32_sin() {
+; CHECK-LABEL: define float @test_tdo_scalar_f32_sin() {
+; CHECK-NEXT:  [[ENTRY:.*:]]
+; CHECK-NEXT:    ret float 0.000000e+00
+;
+entry:
+  %c = call float @_Z3sinf(float 0.000000e+00)
+  ret float %c
+}
+
+define <2 x float> @test_tdo_v2_f32_sin() {
+; CHECK-LABEL: define <2 x float> @test_tdo_v2_f32_sin() {
+; CHECK-NEXT:  [[ENTRY:.*:]]
+; CHECK-NEXT:    ret <2 x float> zeroinitializer
+;
+entry:
+  %c = call <2 x float> @_Z3sinDv2_f(<2 x float> zeroinitializer)
+  ret <2 x float> %c
+}
+
+define half @test_tdo_scalar_f16_sin() {
+; CHECK-LABEL: define half @test_tdo_scalar_f16_sin() {
+; CHECK-NEXT:  [[ENTRY:.*:]]
+; CHECK-NEXT:    ret half 0xH0000
+;
+entry:
+  %c = call half @_Z3sinDh(half 0.000000e+00)
+  ret half %c
+}
+
+define <2 x half> @test_tdo_v2_f16_sin() {
+; CHECK-LABEL: define <2 x half> @test_tdo_v2_f16_sin() {
+; CHECK-NEXT:  [[ENTRY:.*:]]
+; CHECK-NEXT:    ret <2 x half> zeroinitializer
+;
+entry:
+  %c = call <2 x half> @_Z3sinDv2_Dh(<2 x half> zeroinitializer)
+  ret <2 x half> %c
+}
+
+define double @test_tdo_scalar_f64_sin() {
+; CHECK-LABEL: define double @test_tdo_scalar_f64_sin() {
+; CHECK-NEXT:  [[ENTRY:.*:]]
+; CHECK-NEXT:    ret double 0.000000e+00
+;
+entry:
+  %c = call double @_Z3sind(double 0.000000e+00)
+  ret double %c
+}
+
+define <2 x double> @test_tdo_v2_f64_sin() {
+; CHECK-LABEL: define <2 x double> @test_tdo_v2_f64_sin() {
+; CHECK-NEXT:  [[ENTRY:.*:]]
+; CHECK-NEXT:    ret <2 x double> zeroinitializer
+;
+entry:
+  %c = call <2 x double> @_Z3sinDv2_d(<2 x double> zeroinitializer)
+  ret <2 x double> %c
+}
+
+declare float        @_Z3sinf(float)
+declare <2 x float>  @_Z3sinDv2_f(<2 x float>)
+declare half         @_Z3sinDh(half)
+declare <2 x half>   @_Z3sinDv2_Dh(<2 x half>)
+declare double       @_Z3sind(double)
+declare <2 x double> @_Z3sinDv2_d(<2 x double>)
--- a/llvm/test/CodeGen/AMDGPU/amdgpu-simplify-libcall-tdo-sinh.ll
+++ b/llvm/test/CodeGen/AMDGPU/amdgpu-simplify-libcall-tdo-sinh.ll
@ -0,0 +1,69 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 6
+; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -passes=amdgpu-simplifylib %s | FileCheck %s
+
+define float @test_tdo_scalar_f32_sinh() {
+; CHECK-LABEL: define float @test_tdo_scalar_f32_sinh() {
+; CHECK-NEXT:  [[ENTRY:.*:]]
+; CHECK-NEXT:    ret float 0.000000e+00
+;
+entry:
+  %c = call float @_Z4sinhf(float 0.000000e+00)
+  ret float %c
+}
+
+define <2 x float> @test_tdo_v2_f32_sinh() {
+; CHECK-LABEL: define <2 x float> @test_tdo_v2_f32_sinh() {
+; CHECK-NEXT:  [[ENTRY:.*:]]
+; CHECK-NEXT:    ret <2 x float> zeroinitializer
+;
+entry:
+  %c = call <2 x float> @_Z4sinhDv2_f(<2 x float> zeroinitializer)
+  ret <2 x float> %c
+}
+
+define half @test_tdo_scalar_f16_sinh() {
+; CHECK-LABEL: define half @test_tdo_scalar_f16_sinh() {
+; CHECK-NEXT:  [[ENTRY:.*:]]
+; CHECK-NEXT:    ret half 0xH0000
+;
+entry:
+  %c = call half @_Z4sinhDh(half 0.000000e+00)
+  ret half %c
+}
+
+define <2 x half> @test_tdo_v2_f16_sinh() {
+; CHECK-LABEL: define <2 x half> @test_tdo_v2_f16_sinh() {
+; CHECK-NEXT:  [[ENTRY:.*:]]
+; CHECK-NEXT:    ret <2 x half> zeroinitializer
+;
+entry:
+  %c = call <2 x half> @_Z4sinhDv2_Dh(<2 x half> zeroinitializer)
+  ret <2 x half> %c
+}
+
+define double @test_tdo_scalar_f64_sinh() {
+; CHECK-LABEL: define double @test_tdo_scalar_f64_sinh() {
+; CHECK-NEXT:  [[ENTRY:.*:]]
+; CHECK-NEXT:    ret double 0.000000e+00
+;
+entry:
+  %c = call double @_Z4sinhd(double 0.000000e+00)
+  ret double %c
+}
+
+define <2 x double> @test_tdo_v2_f64_sinh() {
+; CHECK-LABEL: define <2 x double> @test_tdo_v2_f64_sinh() {
+; CHECK-NEXT:  [[ENTRY:.*:]]
+; CHECK-NEXT:    ret <2 x double> zeroinitializer
+;
+entry:
+  %c = call <2 x double> @_Z4sinhDv2_d(<2 x double> zeroinitializer)
+  ret <2 x double> %c
+}
+
+declare float        @_Z4sinhf(float)
+declare <2 x float>  @_Z4sinhDv2_f(<2 x float>)
+declare half         @_Z4sinhDh(half)
+declare <2 x half>   @_Z4sinhDv2_Dh(<2 x half>)
+declare double       @_Z4sinhd(double)
+declare <2 x double> @_Z4sinhDv2_d(<2 x double>)
--- a/llvm/test/CodeGen/AMDGPU/amdgpu-simplify-libcall-tdo-sinpi.ll
+++ b/llvm/test/CodeGen/AMDGPU/amdgpu-simplify-libcall-tdo-sinpi.ll
@ -0,0 +1,69 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 6
+; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -passes=amdgpu-simplifylib %s | FileCheck %s
+
+define float @test_tdo_scalar_f32_sinpi() {
+; CHECK-LABEL: define float @test_tdo_scalar_f32_sinpi() {
+; CHECK-NEXT:  [[ENTRY:.*:]]
+; CHECK-NEXT:    ret float 0.000000e+00
+;
+entry:
+  %c = call float @_Z5sinpif(float 0.000000e+00)
+  ret float %c
+}
+
+define <2 x float> @test_tdo_v2_f32_sinpi() {
+; CHECK-LABEL: define <2 x float> @test_tdo_v2_f32_sinpi() {
+; CHECK-NEXT:  [[ENTRY:.*:]]
+; CHECK-NEXT:    ret <2 x float> zeroinitializer
+;
+entry:
+  %c = call <2 x float> @_Z5sinpiDv2_f(<2 x float> zeroinitializer)
+  ret <2 x float> %c
+}
+
+define half @test_tdo_scalar_f16_sinpi() {
+; CHECK-LABEL: define half @test_tdo_scalar_f16_sinpi() {
+; CHECK-NEXT:  [[ENTRY:.*:]]
+; CHECK-NEXT:    ret half 0xH0000
+;
+entry:
+  %c = call half @_Z5sinpiDh(half 0.000000e+00)
+  ret half %c
+}
+
+define <2 x half> @test_tdo_v2_f16_sinpi() {
+; CHECK-LABEL: define <2 x half> @test_tdo_v2_f16_sinpi() {
+; CHECK-NEXT:  [[ENTRY:.*:]]
+; CHECK-NEXT:    ret <2 x half> zeroinitializer
+;
+entry:
+  %c = call <2 x half> @_Z5sinpiDv2_Dh(<2 x half> zeroinitializer)
+  ret <2 x half> %c
+}
+
+define double @test_tdo_scalar_f64_sinpi() {
+; CHECK-LABEL: define double @test_tdo_scalar_f64_sinpi() {
+; CHECK-NEXT:  [[ENTRY:.*:]]
+; CHECK-NEXT:    ret double 0.000000e+00
+;
+entry:
+  %c = call double @_Z5sinpid(double 0.000000e+00)
+  ret double %c
+}
+
+define <2 x double> @test_tdo_v2_f64_sinpi() {
+; CHECK-LABEL: define <2 x double> @test_tdo_v2_f64_sinpi() {
+; CHECK-NEXT:  [[ENTRY:.*:]]
+; CHECK-NEXT:    ret <2 x double> zeroinitializer
+;
+entry:
+  %c = call <2 x double> @_Z5sinpiDv2_d(<2 x double> zeroinitializer)
+  ret <2 x double> %c
+}
+
+declare float        @_Z5sinpif(float)
+declare <2 x float>  @_Z5sinpiDv2_f(<2 x float>)
+declare half         @_Z5sinpiDh(half)
+declare <2 x half>   @_Z5sinpiDv2_Dh(<2 x half>)
+declare double       @_Z5sinpid(double)
+declare <2 x double> @_Z5sinpiDv2_d(<2 x double>)
--- a/llvm/test/CodeGen/AMDGPU/amdgpu-simplify-libcall-tdo-sqrt.ll
+++ b/llvm/test/CodeGen/AMDGPU/amdgpu-simplify-libcall-tdo-sqrt.ll
@ -0,0 +1,69 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 6
+; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -passes=amdgpu-simplifylib %s | FileCheck %s
+
+define float @test_tdo_scalar_f32_sqrt() {
+; CHECK-LABEL: define float @test_tdo_scalar_f32_sqrt() {
+; CHECK-NEXT:  [[ENTRY:.*:]]
+; CHECK-NEXT:    ret float 1.000000e+00
+;
+entry:
+  %c = call float @_Z4sqrtf(float 1.000000e+00)
+  ret float %c
+}
+
+define <3 x float> @test_tdo_v2_f32_sqrt() {
+; CHECK-LABEL: define <3 x float> @test_tdo_v2_f32_sqrt() {
+; CHECK-NEXT:  [[ENTRY:.*:]]
+; CHECK-NEXT:    ret <3 x float> <float 0.000000e+00, float 1.000000e+00, float 0x3FF6A09E60000000>
+;
+entry:
+  %c = call <3 x float> @_Z4sqrtDv3_f(<3 x float> <float 0.000000e+00, float 1.000000e+00, float 2.000000e+00>)
+  ret <3 x float> %c
+}
+
+define half @test_tdo_scalar_f16_sqrt() {
+; CHECK-LABEL: define half @test_tdo_scalar_f16_sqrt() {
+; CHECK-NEXT:  [[ENTRY:.*:]]
+; CHECK-NEXT:    ret half 0xH3C00
+;
+entry:
+  %c = call half @_Z4sqrtDh(half 1.000000e+00)
+  ret half %c
+}
+
+define <3 x half> @test_tdo_v2_f16_sqrt() {
+; CHECK-LABEL: define <3 x half> @test_tdo_v2_f16_sqrt() {
+; CHECK-NEXT:  [[ENTRY:.*:]]
+; CHECK-NEXT:    ret <3 x half> <half 0xH0000, half 0xH3C00, half 0xH3DA8>
+;
+entry:
+  %c = call <3 x half> @_Z4sqrtDv3_Dh(<3 x half> <half 0.000000e+00, half 1.000000e+00, half 2.000000e+00>)
+  ret <3 x half> %c
+}
+
+define double @test_tdo_scalar_f64_sqrt() {
+; CHECK-LABEL: define double @test_tdo_scalar_f64_sqrt() {
+; CHECK-NEXT:  [[ENTRY:.*:]]
+; CHECK-NEXT:    ret double 1.000000e+00
+;
+entry:
+  %c = call double @_Z4sqrtd(double 1.000000e+00)
+  ret double %c
+}
+
+define <3 x double> @test_tdo_v2_f64_sqrt() {
+; CHECK-LABEL: define <3 x double> @test_tdo_v2_f64_sqrt() {
+; CHECK-NEXT:  [[ENTRY:.*:]]
+; CHECK-NEXT:    ret <3 x double> <double 0.000000e+00, double 1.000000e+00, double 0x3FF6A09E667F3BCD>
+;
+entry:
+  %c = call <3 x double> @_Z4sqrtDv3_d(<3 x double> <double 0.000000e+00, double 1.000000e+00, double 2.000000e+00>)
+  ret <3 x double> %c
+}
+
+declare float        @_Z4sqrtf(float)
+declare <3 x float>  @_Z4sqrtDv3_f(<3 x float>)
+declare half         @_Z4sqrtDh(half)
+declare <3 x half>   @_Z4sqrtDv3_Dh(<3 x half>)
+declare double       @_Z4sqrtd(double)
+declare <3 x double> @_Z4sqrtDv3_d(<3 x double>)
--- a/llvm/test/CodeGen/AMDGPU/amdgpu-simplify-libcall-tdo-tan.ll
+++ b/llvm/test/CodeGen/AMDGPU/amdgpu-simplify-libcall-tdo-tan.ll
@ -0,0 +1,69 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 6
+; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -passes=amdgpu-simplifylib %s | FileCheck %s
+
+define float @test_tdo_scalar_f32_tan() {
+; CHECK-LABEL: define float @test_tdo_scalar_f32_tan() {
+; CHECK-NEXT:  [[ENTRY:.*:]]
+; CHECK-NEXT:    ret float 0.000000e+00
+;
+entry:
+  %c = call float @_Z3tanf(float 0.000000e+00)
+  ret float %c
+}
+
+define <2 x float> @test_tdo_v2_f32_tan() {
+; CHECK-LABEL: define <2 x float> @test_tdo_v2_f32_tan() {
+; CHECK-NEXT:  [[ENTRY:.*:]]
+; CHECK-NEXT:    ret <2 x float> zeroinitializer
+;
+entry:
+  %c = call <2 x float> @_Z3tanDv2_f(<2 x float> zeroinitializer)
+  ret <2 x float> %c
+}
+
+define half @test_tdo_scalar_f16_tan() {
+; CHECK-LABEL: define half @test_tdo_scalar_f16_tan() {
+; CHECK-NEXT:  [[ENTRY:.*:]]
+; CHECK-NEXT:    ret half 0xH0000
+;
+entry:
+  %c = call half @_Z3tanDh(half 0.000000e+00)
+  ret half %c
+}
+
+define <2 x half> @test_tdo_v2_f16_tan() {
+; CHECK-LABEL: define <2 x half> @test_tdo_v2_f16_tan() {
+; CHECK-NEXT:  [[ENTRY:.*:]]
+; CHECK-NEXT:    ret <2 x half> zeroinitializer
+;
+entry:
+  %c = call <2 x half> @_Z3tanDv2_Dh(<2 x half> zeroinitializer)
+  ret <2 x half> %c
+}
+
+define double @test_tdo_scalar_f64_tan() {
+; CHECK-LABEL: define double @test_tdo_scalar_f64_tan() {
+; CHECK-NEXT:  [[ENTRY:.*:]]
+; CHECK-NEXT:    ret double 0.000000e+00
+;
+entry:
+  %c = call double @_Z3tand(double 0.000000e+00)
+  ret double %c
+}
+
+define <2 x double> @test_tdo_v2_f64_tan() {
+; CHECK-LABEL: define <2 x double> @test_tdo_v2_f64_tan() {
+; CHECK-NEXT:  [[ENTRY:.*:]]
+; CHECK-NEXT:    ret <2 x double> zeroinitializer
+;
+entry:
+  %c = call <2 x double> @_Z3tanDv2_d(<2 x double> zeroinitializer)
+  ret <2 x double> %c
+}
+
+declare float        @_Z3tanf(float)
+declare <2 x float>  @_Z3tanDv2_f(<2 x float>)
+declare half         @_Z3tanDh(half)
+declare <2 x half>   @_Z3tanDv2_Dh(<2 x half>)
+declare double       @_Z3tand(double)
+declare <2 x double> @_Z3tanDv2_d(<2 x double>)
--- a/llvm/test/CodeGen/AMDGPU/amdgpu-simplify-libcall-tdo-tanh.ll
+++ b/llvm/test/CodeGen/AMDGPU/amdgpu-simplify-libcall-tdo-tanh.ll
@ -0,0 +1,69 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 6
+; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -passes=amdgpu-simplifylib %s | FileCheck %s
+
+define float @test_tdo_scalar_f32_tanh() {
+; CHECK-LABEL: define float @test_tdo_scalar_f32_tanh() {
+; CHECK-NEXT:  [[ENTRY:.*:]]
+; CHECK-NEXT:    ret float 0.000000e+00
+;
+entry:
+  %c = call float @_Z4tanhf(float 0.000000e+00)
+  ret float %c
+}
+
+define <2 x float> @test_tdo_v2_f32_tanh() {
+; CHECK-LABEL: define <2 x float> @test_tdo_v2_f32_tanh() {
+; CHECK-NEXT:  [[ENTRY:.*:]]
+; CHECK-NEXT:    ret <2 x float> zeroinitializer
+;
+entry:
+  %c = call <2 x float> @_Z4tanhDv2_f(<2 x float> zeroinitializer)
+  ret <2 x float> %c
+}
+
+define half @test_tdo_scalar_f16_tanh() {
+; CHECK-LABEL: define half @test_tdo_scalar_f16_tanh() {
+; CHECK-NEXT:  [[ENTRY:.*:]]
+; CHECK-NEXT:    ret half 0xH0000
+;
+entry:
+  %c = call half @_Z4tanhDh(half 0.000000e+00)
+  ret half %c
+}
+
+define <2 x half> @test_tdo_v2_f16_tanh() {
+; CHECK-LABEL: define <2 x half> @test_tdo_v2_f16_tanh() {
+; CHECK-NEXT:  [[ENTRY:.*:]]
+; CHECK-NEXT:    ret <2 x half> zeroinitializer
+;
+entry:
+  %c = call <2 x half> @_Z4tanhDv2_Dh(<2 x half> zeroinitializer)
+  ret <2 x half> %c
+}
+
+define double @test_tdo_scalar_f64_tanh() {
+; CHECK-LABEL: define double @test_tdo_scalar_f64_tanh() {
+; CHECK-NEXT:  [[ENTRY:.*:]]
+; CHECK-NEXT:    ret double 0.000000e+00
+;
+entry:
+  %c = call double @_Z4tanhd(double 0.000000e+00)
+  ret double %c
+}
+
+define <2 x double> @test_tdo_v2_f64_tanh() {
+; CHECK-LABEL: define <2 x double> @test_tdo_v2_f64_tanh() {
+; CHECK-NEXT:  [[ENTRY:.*:]]
+; CHECK-NEXT:    ret <2 x double> zeroinitializer
+;
+entry:
+  %c = call <2 x double> @_Z4tanhDv2_d(<2 x double> zeroinitializer)
+  ret <2 x double> %c
+}
+
+declare float        @_Z4tanhf(float)
+declare <2 x float>  @_Z4tanhDv2_f(<2 x float>)
+declare half         @_Z4tanhDh(half)
+declare <2 x half>   @_Z4tanhDv2_Dh(<2 x half>)
+declare double       @_Z4tanhd(double)
+declare <2 x double> @_Z4tanhDv2_d(<2 x double>)
--- a/llvm/test/CodeGen/AMDGPU/amdgpu-simplify-libcall-tdo-tanpi.ll
+++ b/llvm/test/CodeGen/AMDGPU/amdgpu-simplify-libcall-tdo-tanpi.ll
@ -0,0 +1,69 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 6
+; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -passes=amdgpu-simplifylib %s | FileCheck %s
+
+define float @test_tdo_scalar_f32_tanpi() {
+; CHECK-LABEL: define float @test_tdo_scalar_f32_tanpi() {
+; CHECK-NEXT:  [[ENTRY:.*:]]
+; CHECK-NEXT:    ret float 0.000000e+00
+;
+entry:
+  %c = call float @_Z5tanpif(float 0.000000e+00)
+  ret float %c
+}
+
+define <2 x float> @test_tdo_v2_f32_tanpi() {
+; CHECK-LABEL: define <2 x float> @test_tdo_v2_f32_tanpi() {
+; CHECK-NEXT:  [[ENTRY:.*:]]
+; CHECK-NEXT:    ret <2 x float> zeroinitializer
+;
+entry:
+  %c = call <2 x float> @_Z5tanpiDv2_f(<2 x float> zeroinitializer)
+  ret <2 x float> %c
+}
+
+define half @test_tdo_scalar_f16_tanpi() {
+; CHECK-LABEL: define half @test_tdo_scalar_f16_tanpi() {
+; CHECK-NEXT:  [[ENTRY:.*:]]
+; CHECK-NEXT:    ret half 0xH0000
+;
+entry:
+  %c = call half @_Z5tanpiDh(half 0.000000e+00)
+  ret half %c
+}
+
+define <2 x half> @test_tdo_v2_f16_tanpi() {
+; CHECK-LABEL: define <2 x half> @test_tdo_v2_f16_tanpi() {
+; CHECK-NEXT:  [[ENTRY:.*:]]
+; CHECK-NEXT:    ret <2 x half> zeroinitializer
+;
+entry:
+  %c = call <2 x half> @_Z5tanpiDv2_Dh(<2 x half> zeroinitializer)
+  ret <2 x half> %c
+}
+
+define double @test_tdo_scalar_f64_tanpi() {
+; CHECK-LABEL: define double @test_tdo_scalar_f64_tanpi() {
+; CHECK-NEXT:  [[ENTRY:.*:]]
+; CHECK-NEXT:    ret double 0.000000e+00
+;
+entry:
+  %c = call double @_Z5tanpid(double 0.000000e+00)
+  ret double %c
+}
+
+define <2 x double> @test_tdo_v2_f64_tanpi() {
+; CHECK-LABEL: define <2 x double> @test_tdo_v2_f64_tanpi() {
+; CHECK-NEXT:  [[ENTRY:.*:]]
+; CHECK-NEXT:    ret <2 x double> zeroinitializer
+;
+entry:
+  %c = call <2 x double> @_Z5tanpiDv2_d(<2 x double> zeroinitializer)
+  ret <2 x double> %c
+}
+
+declare float        @_Z5tanpif(float)
+declare <2 x float>  @_Z5tanpiDv2_f(<2 x float>)
+declare half         @_Z5tanpiDh(half)
+declare <2 x half>   @_Z5tanpiDv2_Dh(<2 x half>)
+declare double       @_Z5tanpid(double)
+declare <2 x double> @_Z5tanpiDv2_d(<2 x double>)
--- a/llvm/test/CodeGen/AMDGPU/amdgpu-simplify-libcall-tdo-tgamma.ll
+++ b/llvm/test/CodeGen/AMDGPU/amdgpu-simplify-libcall-tdo-tgamma.ll
@ -0,0 +1,69 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 6
+; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -passes=amdgpu-simplifylib %s | FileCheck %s
+
+define float @test_tdo_scalar_f32_tgamma() {
+; CHECK-LABEL: define float @test_tdo_scalar_f32_tgamma() {
+; CHECK-NEXT:  [[ENTRY:.*:]]
+; CHECK-NEXT:    ret float 1.000000e+00
+;
+entry:
+  %c = call float @_Z6tgammaf(float 1.000000e+00)
+  ret float %c
+}
+
+define <4 x float> @test_tdo_v2_f32_tgamma() {
+; CHECK-LABEL: define <4 x float> @test_tdo_v2_f32_tgamma() {
+; CHECK-NEXT:  [[ENTRY:.*:]]
+; CHECK-NEXT:    ret <4 x float> <float 1.000000e+00, float 1.000000e+00, float 2.000000e+00, float 6.000000e+00>
+;
+entry:
+  %c = call <4 x float> @_Z6tgammaDv4_f(<4 x float> <float 1.000000e+00, float 2.000000e+00, float 3.000000e+00, float 4.000000e+00>)
+  ret <4 x float> %c
+}
+
+define half @test_tdo_scalar_f16_tgamma() {
+; CHECK-LABEL: define half @test_tdo_scalar_f16_tgamma() {
+; CHECK-NEXT:  [[ENTRY:.*:]]
+; CHECK-NEXT:    ret half 0xH3C00
+;
+entry:
+  %c = call half @_Z6tgammaDh(half 1.000000e+00)
+  ret half %c
+}
+
+define <4 x half> @test_tdo_v2_f16_tgamma() {
+; CHECK-LABEL: define <4 x half> @test_tdo_v2_f16_tgamma() {
+; CHECK-NEXT:  [[ENTRY:.*:]]
+; CHECK-NEXT:    ret <4 x half> <half 0xH3C00, half 0xH3C00, half 0xH4000, half 0xH4600>
+;
+entry:
+  %c = call <4 x half> @_Z6tgammaDv4_Dh(<4 x half> <half 1.000000e+00, half 2.000000e+00, half 3.000000e+00, half 4.000000e+00>)
+  ret <4 x half> %c
+}
+
+define double @test_tdo_scalar_f64_tgamma() {
+; CHECK-LABEL: define double @test_tdo_scalar_f64_tgamma() {
+; CHECK-NEXT:  [[ENTRY:.*:]]
+; CHECK-NEXT:    ret double 1.000000e+00
+;
+entry:
+  %c = call double @_Z6tgammad(double 1.000000e+00)
+  ret double %c
+}
+
+define <4 x double> @test_tdo_v2_f64_tgamma() {
+; CHECK-LABEL: define <4 x double> @test_tdo_v2_f64_tgamma() {
+; CHECK-NEXT:  [[ENTRY:.*:]]
+; CHECK-NEXT:    ret <4 x double> <double 1.000000e+00, double 1.000000e+00, double 2.000000e+00, double 6.000000e+00>
+;
+entry:
+  %c = call <4 x double> @_Z6tgammaDv4_d(<4 x double> <double 1.000000e+00, double 2.000000e+00, double 3.000000e+00, double 4.000000e+00>)
+  ret <4 x double> %c
+}
+
+declare float        @_Z6tgammaf(float)
+declare <4 x float>  @_Z6tgammaDv4_f(<4 x float>)
+declare half         @_Z6tgammaDh(half)
+declare <4 x half>   @_Z6tgammaDv4_Dh(<4 x half>)
+declare double       @_Z6tgammad(double)
+declare <4 x double> @_Z6tgammaDv4_d(<4 x double>)