[VPlan] Preserve trunc nuw/nsw in VPRecipeWithIRFlags (#144700)

This preserves the nuw/nsw flags on widened truncs by checking for
TruncInst in the VPIRFlags constructor

The motivation for this is to be able to fold away some redundant truncs
feeding into uitofps (or potentially narrow the inductions feeding them)
This commit is contained in:
Luke Lau 2025-07-15 15:34:14 +08:00 committed by GitHub
parent b0769aa290
commit c8d0e24745
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
5 changed files with 49 additions and 11 deletions

View File

@ -597,6 +597,7 @@ class VPIRFlags {
enum class OperationType : unsigned char {
Cmp,
OverflowingBinOp,
Trunc,
DisjointOp,
PossiblyExactOp,
GEPOp,
@ -613,6 +614,13 @@ public:
WrapFlagsTy(bool HasNUW, bool HasNSW) : HasNUW(HasNUW), HasNSW(HasNSW) {}
};
struct TruncFlagsTy {
char HasNUW : 1;
char HasNSW : 1;
TruncFlagsTy(bool HasNUW, bool HasNSW) : HasNUW(HasNUW), HasNSW(HasNSW) {}
};
struct DisjointFlagsTy {
char IsDisjoint : 1;
DisjointFlagsTy(bool IsDisjoint) : IsDisjoint(IsDisjoint) {}
@ -644,6 +652,7 @@ private:
union {
CmpInst::Predicate CmpPredicate;
WrapFlagsTy WrapFlags;
TruncFlagsTy TruncFlags;
DisjointFlagsTy DisjointFlags;
ExactFlagsTy ExactFlags;
GEPNoWrapFlags GEPFlags;
@ -665,6 +674,9 @@ public:
} else if (auto *Op = dyn_cast<OverflowingBinaryOperator>(&I)) {
OpType = OperationType::OverflowingBinOp;
WrapFlags = {Op->hasNoUnsignedWrap(), Op->hasNoSignedWrap()};
} else if (auto *Op = dyn_cast<TruncInst>(&I)) {
OpType = OperationType::Trunc;
TruncFlags = {Op->hasNoUnsignedWrap(), Op->hasNoSignedWrap()};
} else if (auto *Op = dyn_cast<PossiblyExactOperator>(&I)) {
OpType = OperationType::PossiblyExactOp;
ExactFlags.IsExact = Op->isExact();
@ -715,6 +727,10 @@ public:
WrapFlags.HasNUW = false;
WrapFlags.HasNSW = false;
break;
case OperationType::Trunc:
TruncFlags.HasNUW = false;
TruncFlags.HasNSW = false;
break;
case OperationType::DisjointOp:
DisjointFlags.IsDisjoint = false;
break;
@ -744,6 +760,10 @@ public:
I.setHasNoUnsignedWrap(WrapFlags.HasNUW);
I.setHasNoSignedWrap(WrapFlags.HasNSW);
break;
case OperationType::Trunc:
I.setHasNoUnsignedWrap(TruncFlags.HasNUW);
I.setHasNoSignedWrap(TruncFlags.HasNSW);
break;
case OperationType::DisjointOp:
cast<PossiblyDisjointInst>(&I)->setIsDisjoint(DisjointFlags.IsDisjoint);
break;
@ -800,15 +820,25 @@ public:
}
bool hasNoUnsignedWrap() const {
assert(OpType == OperationType::OverflowingBinOp &&
"recipe doesn't have a NUW flag");
return WrapFlags.HasNUW;
switch (OpType) {
case OperationType::OverflowingBinOp:
return WrapFlags.HasNUW;
case OperationType::Trunc:
return TruncFlags.HasNUW;
default:
llvm_unreachable("recipe doesn't have a NUW flag");
}
}
bool hasNoSignedWrap() const {
assert(OpType == OperationType::OverflowingBinOp &&
"recipe doesn't have a NSW flag");
return WrapFlags.HasNSW;
switch (OpType) {
case OperationType::OverflowingBinOp:
return WrapFlags.HasNSW;
case OperationType::Trunc:
return TruncFlags.HasNSW;
default:
llvm_unreachable("recipe doesn't have a NSW flag");
}
}
bool isDisjoint() const {

View File

@ -1763,6 +1763,8 @@ bool VPIRFlags::flagsValidForOpcode(unsigned Opcode) const {
return Opcode == Instruction::Add || Opcode == Instruction::Sub ||
Opcode == Instruction::Mul ||
Opcode == VPInstruction::VPInstruction::CanonicalIVIncrementForPart;
case OperationType::Trunc:
return Opcode == Instruction::Trunc;
case OperationType::DisjointOp:
return Opcode == Instruction::Or;
case OperationType::PossiblyExactOp:
@ -1810,6 +1812,12 @@ void VPIRFlags::printFlags(raw_ostream &O) const {
if (WrapFlags.HasNSW)
O << " nsw";
break;
case OperationType::Trunc:
if (TruncFlags.HasNUW)
O << " nuw";
if (TruncFlags.HasNSW)
O << " nsw";
break;
case OperationType::FPMathOp:
getFastMathFlags().print(O);
break;

View File

@ -1484,7 +1484,7 @@ define void @redundant_branch_and_tail_folding(ptr %dst, i1 %c) {
; DEFAULT-NEXT: [[VEC_IND:%.*]] = phi <4 x i64> [ <i64 0, i64 1, i64 2, i64 3>, %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[VECTOR_BODY]] ]
; DEFAULT-NEXT: [[STEP_ADD:%.*]] = add <4 x i64> [[VEC_IND]], splat (i64 4)
; DEFAULT-NEXT: [[TMP0:%.*]] = add nuw nsw <4 x i64> [[STEP_ADD]], splat (i64 1)
; DEFAULT-NEXT: [[TMP1:%.*]] = trunc <4 x i64> [[TMP0]] to <4 x i32>
; DEFAULT-NEXT: [[TMP1:%.*]] = trunc nuw nsw <4 x i64> [[TMP0]] to <4 x i32>
; DEFAULT-NEXT: [[TMP2:%.*]] = extractelement <4 x i32> [[TMP1]], i32 3
; DEFAULT-NEXT: store i32 [[TMP2]], ptr [[DST]], align 4
; DEFAULT-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8
@ -1521,7 +1521,7 @@ define void @redundant_branch_and_tail_folding(ptr %dst, i1 %c) {
; PRED-NEXT: [[VEC_IND:%.*]] = phi <4 x i64> [ <i64 0, i64 1, i64 2, i64 3>, %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[PRED_STORE_CONTINUE6]] ]
; PRED-NEXT: [[TMP0:%.*]] = icmp ule <4 x i64> [[VEC_IND]], splat (i64 20)
; PRED-NEXT: [[TMP1:%.*]] = add nuw nsw <4 x i64> [[VEC_IND]], splat (i64 1)
; PRED-NEXT: [[TMP2:%.*]] = trunc <4 x i64> [[TMP1]] to <4 x i32>
; PRED-NEXT: [[TMP2:%.*]] = trunc nuw nsw <4 x i64> [[TMP1]] to <4 x i32>
; PRED-NEXT: [[TMP3:%.*]] = extractelement <4 x i1> [[TMP0]], i32 0
; PRED-NEXT: br i1 [[TMP3]], label %[[PRED_STORE_IF:.*]], label %[[PRED_STORE_CONTINUE:.*]]
; PRED: [[PRED_STORE_IF]]:

View File

@ -1,4 +1,4 @@
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --filter-out-after "^scalar.ph:" --version 5
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --check-globals none --filter-out-after "^scalar.ph:" --version 5
; RUN: opt -mattr=+mve -passes=loop-vectorize < %s -S -o - | FileCheck %s
target datalayout = "e-m:e-p:32:32-Fi8-i64:64-v128:64:128-a:0:32-n32-S64"
@ -49,7 +49,7 @@ define void @fn(i32 noundef %n, ptr %in, ptr %out) #0 {
; CHECK-NEXT: [[TMP10:%.*]] = add nuw nsw <4 x i32> [[TMP9]], [[TMP6]]
; CHECK-NEXT: [[TMP11:%.*]] = add nuw nsw <4 x i32> [[TMP10]], [[TMP8]]
; CHECK-NEXT: [[TMP12:%.*]] = lshr <4 x i32> [[TMP11]], splat (i32 16)
; CHECK-NEXT: [[TMP13:%.*]] = trunc <4 x i32> [[TMP12]] to <4 x i8>
; CHECK-NEXT: [[TMP13:%.*]] = trunc nuw <4 x i32> [[TMP12]] to <4 x i8>
; CHECK-NEXT: [[TMP14:%.*]] = mul nuw nsw <4 x i32> [[TMP3]], splat (i32 32767)
; CHECK-NEXT: [[TMP15:%.*]] = mul nuw <4 x i32> [[TMP5]], splat (i32 16762097)
; CHECK-NEXT: [[TMP16:%.*]] = mul nuw <4 x i32> [[TMP7]], splat (i32 16759568)

View File

@ -41,7 +41,7 @@ define void @arm_mult_q15(ptr %pSrcA, ptr %pSrcB, ptr noalias %pDst, i32 %blockS
; CHECK-NEXT: [[TMP5:%.*]] = mul nsw <8 x i32> [[TMP4]], [[TMP3]]
; CHECK-NEXT: [[TMP6:%.*]] = ashr <8 x i32> [[TMP5]], splat (i32 15)
; CHECK-NEXT: [[TMP7:%.*]] = tail call <8 x i32> @llvm.smin.v8i32(<8 x i32> [[TMP6]], <8 x i32> splat (i32 32767))
; CHECK-NEXT: [[TMP8:%.*]] = trunc <8 x i32> [[TMP7]] to <8 x i16>
; CHECK-NEXT: [[TMP8:%.*]] = trunc nsw <8 x i32> [[TMP7]] to <8 x i16>
; CHECK-NEXT: store <8 x i16> [[TMP8]], ptr [[NEXT_GEP14]], align 2
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 8
; CHECK-NEXT: [[TMP9:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]]