llvm-project/llvm/test/Transforms/LoopUnroll/peel-loop-phi-analysis-iv-not-triggered.ll
Ryotaro Kasuga 2330fd2f73
[LoopPeel] Add new option to peeling loops to convert PHI into IV (#121104)
LoopPeel currently considers PHI nodes that become loop invariants
through peeling. However, in some cases, peeling transforms PHI nodes
into induction variables (IVs), potentially enabling further
optimizations such as loop vectorization. For example:

```c
// TSVC s292
int im = N-1;
for (int i=0; i<N; i++) {
  a[i] = b[i] + b[im];
  im = i;
}
```

In this case, peeling one iteration converts `im` into an IV, allowing
it to be handled by the loop vectorizer.

This patch adds a new feature to peel loops when to convert PHIs into
IVs. At the moment this feature is disabled by default.

Enabling it allows to vectorize the above example. I have measured on
neoverse-v2 and observed a speedup of more than 60% (options: `-O3
-ffast-math -mcpu=neoverse-v2 -mllvm -enable-peeling-for-iv`).

This PR is taken over from #94900
Related #81851
2025-08-20 13:44:56 +00:00

151 lines
6.2 KiB
LLVM

; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
; RUN: opt < %s -S -passes=loop-unroll -enable-peeling-for-iv | FileCheck %s
; RUN: opt < %s -S -passes=loop-unroll-full -enable-peeling-for-iv | FileCheck %s
; Check that unnecessary peeling doesn't occur if for a comparison instruction
; between two instructions. The original code is as below. Both i and j are
; inductions, but the comparison i < j is not an induction.
;
; val = 42;
; for (i=0,j=100; i<10000; i+=2,j+=1) {
; a[i] = val;
; val = i < j;
; }
;
define void @dont_peel_cmp_ind_ind(ptr %a) {
; CHECK-LABEL: define void @dont_peel_cmp_ind_ind(
; CHECK-SAME: ptr [[A:%.*]]) {
; CHECK-NEXT: [[ENTRY:.*]]:
; CHECK-NEXT: br label %[[FOR_BODY:.*]]
; CHECK: [[FOR_BODY]]:
; CHECK-NEXT: [[I:%.*]] = phi i32 [ 0, %[[ENTRY]] ], [ [[I_NEXT:%.*]], %[[FOR_BODY]] ]
; CHECK-NEXT: [[J:%.*]] = phi i32 [ 100, %[[ENTRY]] ], [ [[J_NEXT:%.*]], %[[FOR_BODY]] ]
; CHECK-NEXT: [[VAL:%.*]] = phi i32 [ 42, %[[ENTRY]] ], [ [[VAL_NEXT:%.*]], %[[FOR_BODY]] ]
; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds nuw i32, ptr [[A]], i32 [[I]]
; CHECK-NEXT: store i32 10, ptr [[ARRAYIDX]], align 4
; CHECK-NEXT: [[VAL_NEXT_CMP:%.*]] = icmp slt i32 [[I]], [[J]]
; CHECK-NEXT: [[VAL_NEXT]] = zext i1 [[VAL_NEXT_CMP]] to i32
; CHECK-NEXT: [[I_NEXT]] = add i32 [[I]], 2
; CHECK-NEXT: [[J_NEXT]] = add i32 [[J]], 1
; CHECK-NEXT: [[CMP:%.*]] = icmp ne i32 [[I_NEXT]], 10000
; CHECK-NEXT: [[EXITCOND:%.*]] = icmp slt i32 [[I_NEXT]], 10000
; CHECK-NEXT: br i1 [[EXITCOND]], label %[[FOR_BODY]], label %[[EXIT:.*]]
; CHECK: [[EXIT]]:
; CHECK-NEXT: ret void
;
entry:
br label %for.body
for.body:
%i = phi i32 [ 0, %entry ], [ %i.next, %for.body ]
%j = phi i32 [ 100, %entry ] , [ %j.next, %for.body ]
%val = phi i32 [ 42, %entry ], [ %val.next, %for.body ]
%arrayidx = getelementptr inbounds nuw i32, ptr %a, i32 %i
store i32 10, ptr %arrayidx, align 4
%val.next.cmp = icmp slt i32 %i, %j
%val.next = zext i1 %val.next.cmp to i32
%i.next = add i32 %i, 2
%j.next = add i32 %j, 1
%cmp = icmp ne i32 %i.next, 10000
%exitcond = icmp slt i32 %i.next, 10000
br i1 %exitcond, label %for.body, label %exit
exit:
ret void
}
; Check that unnecessary peeling doesn't occur if for a bitwise instructions
; between IVs. The original code is as below. The variable i is an induction,
; but vals (val0 through val4) are not.
;
; val0 = 42;
; val1 = 42;
; val2 = 42;
; val3 = 42;
; val4 = 42;
; for (i=0,j=100; i<10000; i+=2,j+=1) {
; a[i] = val0;
; b[i] = val1;
; c[i] = val2;
; d[i] = val3;
; e[i] = val4;
; val0 = i & j;
; val1 = i | j;
; val2 = i ^ j;
; val3 = i >> j;
; val4 = i << j;
; }
;
define void @dont_peel_bitwise_op_iv_iv(ptr %a, ptr %b, ptr %c, ptr %d, ptr %e) {
; CHECK-LABEL: define void @dont_peel_bitwise_op_iv_iv(
; CHECK-SAME: ptr [[A:%.*]], ptr [[B:%.*]], ptr [[C:%.*]], ptr [[D:%.*]], ptr [[E:%.*]]) {
; CHECK-NEXT: [[ENTRY:.*]]:
; CHECK-NEXT: br label %[[FOR_BODY:.*]]
; CHECK: [[FOR_BODY]]:
; CHECK-NEXT: [[I:%.*]] = phi i32 [ 0, %[[ENTRY]] ], [ [[I_NEXT:%.*]], %[[FOR_BODY]] ]
; CHECK-NEXT: [[J:%.*]] = phi i32 [ 100, %[[ENTRY]] ], [ [[J_NEXT:%.*]], %[[FOR_BODY]] ]
; CHECK-NEXT: [[VAL0:%.*]] = phi i32 [ 42, %[[ENTRY]] ], [ [[VAL0_NEXT:%.*]], %[[FOR_BODY]] ]
; CHECK-NEXT: [[VAL1:%.*]] = phi i32 [ 42, %[[ENTRY]] ], [ [[VAL1_NEXT:%.*]], %[[FOR_BODY]] ]
; CHECK-NEXT: [[VAL2:%.*]] = phi i32 [ 42, %[[ENTRY]] ], [ [[VAL2_NEXT:%.*]], %[[FOR_BODY]] ]
; CHECK-NEXT: [[VAL3:%.*]] = phi i32 [ 42, %[[ENTRY]] ], [ [[VAL3_NEXT:%.*]], %[[FOR_BODY]] ]
; CHECK-NEXT: [[VAL4:%.*]] = phi i32 [ 42, %[[ENTRY]] ], [ [[VAL4_NEXT:%.*]], %[[FOR_BODY]] ]
; CHECK-NEXT: [[IDX_0:%.*]] = getelementptr inbounds nuw i32, ptr [[A]], i32 [[I]]
; CHECK-NEXT: [[IDX_1:%.*]] = getelementptr inbounds nuw i32, ptr [[B]], i32 [[I]]
; CHECK-NEXT: [[IDX_2:%.*]] = getelementptr inbounds nuw i32, ptr [[C]], i32 [[I]]
; CHECK-NEXT: [[IDX_3:%.*]] = getelementptr inbounds nuw i32, ptr [[D]], i32 [[I]]
; CHECK-NEXT: [[IDX_4:%.*]] = getelementptr inbounds nuw i32, ptr [[E]], i32 [[I]]
; CHECK-NEXT: store i32 [[VAL0]], ptr [[IDX_0]], align 4
; CHECK-NEXT: store i32 [[VAL1]], ptr [[IDX_1]], align 4
; CHECK-NEXT: store i32 [[VAL2]], ptr [[IDX_2]], align 4
; CHECK-NEXT: store i32 [[VAL3]], ptr [[IDX_3]], align 4
; CHECK-NEXT: store i32 [[VAL4]], ptr [[IDX_4]], align 4
; CHECK-NEXT: [[VAL0_NEXT]] = and i32 [[I]], [[J]]
; CHECK-NEXT: [[VAL1_NEXT]] = or i32 [[I]], [[J]]
; CHECK-NEXT: [[VAL2_NEXT]] = xor i32 [[I]], [[J]]
; CHECK-NEXT: [[VAL3_NEXT]] = shl i32 [[I]], [[J]]
; CHECK-NEXT: [[VAL4_NEXT]] = lshr i32 [[I]], [[J]]
; CHECK-NEXT: [[I_NEXT]] = add i32 [[I]], 2
; CHECK-NEXT: [[J_NEXT]] = add i32 [[J]], 1
; CHECK-NEXT: [[CMP:%.*]] = icmp ne i32 [[I_NEXT]], 10000
; CHECK-NEXT: [[EXITCOND:%.*]] = icmp slt i32 [[I_NEXT]], 10000
; CHECK-NEXT: br i1 [[EXITCOND]], label %[[FOR_BODY]], label %[[EXIT:.*]]
; CHECK: [[EXIT]]:
; CHECK-NEXT: ret void
;
entry:
br label %for.body
for.body:
%i = phi i32 [ 0, %entry ], [ %i.next, %for.body ]
%j = phi i32 [ 100, %entry ] , [ %j.next, %for.body ]
%val0 = phi i32 [ 42, %entry ], [ %val0.next, %for.body ]
%val1 = phi i32 [ 42, %entry ], [ %val1.next, %for.body ]
%val2 = phi i32 [ 42, %entry ], [ %val2.next, %for.body ]
%val3 = phi i32 [ 42, %entry ], [ %val3.next, %for.body ]
%val4 = phi i32 [ 42, %entry ], [ %val4.next, %for.body ]
%idx.0 = getelementptr inbounds nuw i32, ptr %a, i32 %i
%idx.1 = getelementptr inbounds nuw i32, ptr %b, i32 %i
%idx.2 = getelementptr inbounds nuw i32, ptr %c, i32 %i
%idx.3 = getelementptr inbounds nuw i32, ptr %d, i32 %i
%idx.4 = getelementptr inbounds nuw i32, ptr %e, i32 %i
store i32 %val0, ptr %idx.0, align 4
store i32 %val1, ptr %idx.1, align 4
store i32 %val2, ptr %idx.2, align 4
store i32 %val3, ptr %idx.3, align 4
store i32 %val4, ptr %idx.4, align 4
%val0.next = and i32 %i, %j
%val1.next = or i32 %i, %j
%val2.next = xor i32 %i, %j
%val3.next = shl i32 %i, %j
%val4.next = lshr i32 %i, %j
%i.next = add i32 %i, 2
%j.next = add i32 %j, 1
%cmp = icmp ne i32 %i.next, 10000
%exitcond = icmp slt i32 %i.next, 10000
br i1 %exitcond, label %for.body, label %exit
exit:
ret void
}