AMDGPU: Strip sign bit operations on llvm.amdgcn.trig.preop uses (#179712)

The instruction ignores the sign bit, so we can find the magnitude source. The real library use has a fabs input which this avoids. stripSignOnlyFPOps should probably go directly into PatternMatch in some form.
2026-02-19 18:44:33 +01:00 · 2026-02-19 18:44:33 +01:00 · bcfff580f4
commit bcfff580f4
parent 680b56aa4b
4 changed files with 80 additions and 9 deletions
--- a/llvm/include/llvm/Transforms/InstCombine/InstCombiner.h
+++ b/llvm/include/llvm/Transforms/InstCombine/InstCombiner.h
@ -331,6 +331,17 @@ public:
    return ConstantVector::get(Out);
  }

+  /// Ignore all operations which only change the sign of a value, returning the
+  /// underlying magnitude value.
+  static Value *stripSignOnlyFPOps(Value *Val) {
+    using namespace llvm::PatternMatch;
+
+    match(Val, m_FNeg(m_Value(Val)));
+    match(Val, m_FAbs(m_Value(Val)));
+    match(Val, m_CopySign(m_Value(Val), m_Value()));
+    return Val;
+  }
+
  void addToWorklist(Instruction *I) { Worklist.push(I); }

  AssumptionCache &getAssumptionCache() const { return AC; }
--- a/llvm/lib/Target/AMDGPU/AMDGPUInstCombineIntrinsic.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUInstCombineIntrinsic.cpp
@ -1545,6 +1545,11 @@ GCNTTIImpl::instCombineIntrinsic(InstCombiner &IC, IntrinsicInst &II) const {
    if (isa<UndefValue>(Segment))
      return IC.replaceInstUsesWith(II, ConstantFP::getZero(II.getType()));

+    // Sign bit is not used.
+    Value *StrippedSign = InstCombiner::stripSignOnlyFPOps(Src);
+    if (StrippedSign != Src)
+      return IC.replaceOperand(II, 0, StrippedSign);
+
    if (II.isStrictFP())
      break;

--- a/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp
@ -1409,15 +1409,6 @@ Value *InstCombinerImpl::foldAndOrOfICmpsUsingRanges(ICmpInst *ICmp1,
  return Builder.CreateICmp(NewPred, NewV, ConstantInt::get(Ty, NewC));
 }

-/// Ignore all operations which only change the sign of a value, returning the
-/// underlying magnitude value.
-static Value *stripSignOnlyFPOps(Value *Val) {
-  match(Val, m_FNeg(m_Value(Val)));
-  match(Val, m_FAbs(m_Value(Val)));
-  match(Val, m_CopySign(m_Value(Val), m_Value()));
-  return Val;
-}
-
 /// Matches canonical form of isnan, fcmp ord x, 0
 static bool matchIsNotNaN(FCmpInst::Predicate P, Value *LHS, Value *RHS) {
  return P == FCmpInst::FCMP_ORD && match(RHS, m_AnyZeroFP());
--- a/llvm/test/Transforms/InstCombine/AMDGPU/amdgcn-intrinsics.ll
+++ b/llvm/test/Transforms/InstCombine/AMDGPU/amdgcn-intrinsics.ll
@ -5980,6 +5980,70 @@ define double @trig_preop_constfold_exponent1968_mantissaX__outbound_segment() {
  ret double %val
 }

+define double @trig_preop_strip_fabs(double %val, i32 %idx) {
+; CHECK-LABEL: @trig_preop_strip_fabs(
+; CHECK-NEXT:    [[RESULT:%.*]] = call double @llvm.amdgcn.trig.preop.f64(double [[VAL:%.*]], i32 [[IDX:%.*]])
+; CHECK-NEXT:    ret double [[RESULT]]
+;
+  %fabs = call double @llvm.fabs.f64(double %val)
+  %result = call double @llvm.amdgcn.trig.preop.f64(double %fabs, i32 %idx)
+  ret double %result
+}
+
+define double @trig_preop_strip_fabs_multi_use(double %val, i32 %idx, ptr %ptr) {
+; CHECK-LABEL: @trig_preop_strip_fabs_multi_use(
+; CHECK-NEXT:    [[FABS:%.*]] = call double @llvm.fabs.f64(double [[VAL:%.*]])
+; CHECK-NEXT:    store double [[FABS]], ptr [[PTR:%.*]], align 8
+; CHECK-NEXT:    [[RESULT:%.*]] = call double @llvm.amdgcn.trig.preop.f64(double [[VAL]], i32 [[IDX:%.*]])
+; CHECK-NEXT:    ret double [[RESULT]]
+;
+  %fabs = call double @llvm.fabs.f64(double %val)
+  store double %fabs, ptr %ptr
+  %result = call double @llvm.amdgcn.trig.preop.f64(double %fabs, i32 %idx)
+  ret double %result
+}
+
+define double @trig_preop_strip_fneg(double %val, i32 %idx) {
+; CHECK-LABEL: @trig_preop_strip_fneg(
+; CHECK-NEXT:    [[RESULT:%.*]] = call double @llvm.amdgcn.trig.preop.f64(double [[VAL:%.*]], i32 [[IDX:%.*]])
+; CHECK-NEXT:    ret double [[RESULT]]
+;
+  %fneg = fneg double %val
+  %result = call double @llvm.amdgcn.trig.preop.f64(double %fneg, i32 %idx)
+  ret double %result
+}
+
+define double @trig_preop_strip_fneg_fabs(double %val, i32 %idx) {
+; CHECK-LABEL: @trig_preop_strip_fneg_fabs(
+; CHECK-NEXT:    [[RESULT:%.*]] = call double @llvm.amdgcn.trig.preop.f64(double [[VAL:%.*]], i32 [[IDX:%.*]])
+; CHECK-NEXT:    ret double [[RESULT]]
+;
+  %fabs = call double @llvm.fabs.f64(double %val)
+  %fneg.fabs = fneg double %fabs
+  %result = call double @llvm.amdgcn.trig.preop.f64(double %fneg.fabs, i32 %idx)
+  ret double %result
+}
+
+define double @trig_preop_strip_copysign(double %mag, double %sign, i32 %idx) {
+; CHECK-LABEL: @trig_preop_strip_copysign(
+; CHECK-NEXT:    [[RESULT:%.*]] = call double @llvm.amdgcn.trig.preop.f64(double [[MAG:%.*]], i32 [[IDX:%.*]])
+; CHECK-NEXT:    ret double [[RESULT]]
+;
+  %copysign = call double @llvm.copysign.f64(double %mag, double %sign)
+  %result = call double @llvm.amdgcn.trig.preop.f64(double %copysign, i32 %idx)
+  ret double %result
+}
+
+define double @trig_preop_strip_fabs_strictfp(double %val, i32 %idx) strictfp {
+; CHECK-LABEL: @trig_preop_strip_fabs_strictfp(
+; CHECK-NEXT:    [[RESULT:%.*]] = call double @llvm.amdgcn.trig.preop.f64(double [[VAL:%.*]], i32 [[IDX:%.*]]) #[[ATTR20]]
+; CHECK-NEXT:    ret double [[RESULT]]
+;
+  %fabs = call double @llvm.fabs.f64(double %val)
+  %result = call double @llvm.amdgcn.trig.preop.f64(double %fabs, i32 %idx) strictfp
+  ret double %result
+}
+
 ; --------------------------------------------------------------------
 ; llvm.amdgcn.log
 ; --------------------------------------------------------------------