Sergey Kachkov 1f2a634c44 Reland "[LSR] Do not create duplicated PHI nodes while preserving LCSSA form" (#107380)
Motivating example: https://godbolt.org/z/eb97zrxhx
Here we have 2 induction variables in the loop: one is corresponding to
i variable (add rdx, 4), the other - to res (add rax, 2). The second
induction variable can be removed by rewriteLoopExitValues() method
(final value of res at loop exit is unroll_iter * -2); however, this
doesn't happen because we have duplicated LCSSA phi nodes at loop exit:
```
; Preheader:
for.body.preheader.new:                           ; preds = %for.body.preheader
  %unroll_iter = and i64 %N, -4
  br label %for.body

; Loop:
for.body:                                         ; preds = %for.body, %for.body.preheader.new
  %lsr.iv = phi i64 [ %lsr.iv.next, %for.body ], [ 0, %for.body.preheader.new ]
  %i.07 = phi i64 [ 0, %for.body.preheader.new ], [ %inc.3, %for.body ]
  %inc.3 = add nuw i64 %i.07, 4
  %lsr.iv.next = add nsw i64 %lsr.iv, -2
  %niter.ncmp.3.not = icmp eq i64 %unroll_iter, %inc.3
  br i1 %niter.ncmp.3.not, label %for.end.loopexit.unr-lcssa.loopexit, label %for.body, !llvm.loop !7

; Exit blocks
for.end.loopexit.unr-lcssa.loopexit:              ; preds = %for.body
  %inc.3.lcssa = phi i64 [ %inc.3, %for.body ]
  %lsr.iv.next.lcssa11 = phi i64 [ %lsr.iv.next, %for.body ]
  %lsr.iv.next.lcssa = phi i64 [ %lsr.iv.next, %for.body ]
  br label %for.end.loopexit.unr-lcssa
```
rewriteLoopExitValues requires %lsr.iv.next value to have only 2 uses:
one in LCSSA phi node, the other - in induction phi node. Here we have 3
uses of this value because of duplicated lcssa nodes, so the transform
doesn't apply and leads to an extra add operation inside the loop. The
proposed solution is to accumulate inserted instructions that will
require LCSSA form update into SetVector and then call
formLCSSAForInstructions for this SetVector once, so the same
instructions don't process twice.

Reland fixes the issue with preserve-lcssa.ll test: it fails in the situation
when x86_64-unknown-linux-gnu target is unavailable in opt. The changes are
moved into separate duplicated-phis.ll test with explicit x86 target requirement
to fix bots which are not building this target.
2024-09-09 16:14:51 +03:00

87 lines
4.1 KiB
LLVM

; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
; RUN: opt -S -loop-reduce %s | FileCheck %s
; REQUIRES: x86-registered-target
target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
target triple = "x86_64-unknown-linux-gnu"
define i64 @test_duplicated_phis(i64 noundef %N) {
; CHECK-LABEL: define i64 @test_duplicated_phis(
; CHECK-SAME: i64 noundef [[N:%.*]]) {
; CHECK-NEXT: [[ENTRY:.*]]:
; CHECK-NEXT: [[MUL:%.*]] = shl i64 [[N]], 1
; CHECK-NEXT: [[CMP6_NOT:%.*]] = icmp eq i64 [[MUL]], 0
; CHECK-NEXT: br i1 [[CMP6_NOT]], label %[[FOR_END:.*]], label %[[FOR_BODY_PREHEADER:.*]]
; CHECK: [[FOR_BODY_PREHEADER]]:
; CHECK-NEXT: [[TMP0:%.*]] = icmp ult i64 [[MUL]], 4
; CHECK-NEXT: br i1 [[TMP0]], label %[[FOR_END_LOOPEXIT_UNR_LCSSA:.*]], label %[[FOR_BODY_PREHEADER_NEW:.*]]
; CHECK: [[FOR_BODY_PREHEADER_NEW]]:
; CHECK-NEXT: [[UNROLL_ITER:%.*]] = and i64 [[MUL]], -4
; CHECK-NEXT: [[TMP4:%.*]] = add i64 [[UNROLL_ITER]], -4
; CHECK-NEXT: [[TMP5:%.*]] = lshr i64 [[TMP4]], 2
; CHECK-NEXT: [[TMP3:%.*]] = shl nuw nsw i64 [[TMP5]], 1
; CHECK-NEXT: [[LSR_IV_NEXT:%.*]] = sub i64 -3, [[TMP3]]
; CHECK-NEXT: br label %[[FOR_BODY:.*]]
; CHECK: [[FOR_BODY]]:
; CHECK-NEXT: [[I_07:%.*]] = phi i64 [ 0, %[[FOR_BODY_PREHEADER_NEW]] ], [ [[INC_3:%.*]], %[[FOR_BODY]] ]
; CHECK-NEXT: [[INC_3]] = add i64 [[I_07]], 4
; CHECK-NEXT: [[NITER_NCMP_3_NOT:%.*]] = icmp eq i64 [[UNROLL_ITER]], [[INC_3]]
; CHECK-NEXT: br i1 [[NITER_NCMP_3_NOT]], label %[[FOR_END_LOOPEXIT_UNR_LCSSA_LOOPEXIT:.*]], label %[[FOR_BODY]]
; CHECK: [[FOR_END_LOOPEXIT_UNR_LCSSA_LOOPEXIT]]:
; CHECK-NEXT: [[TMP1:%.*]] = add i64 [[LSR_IV_NEXT]], 1
; CHECK-NEXT: br label %[[FOR_END_LOOPEXIT_UNR_LCSSA]]
; CHECK: [[FOR_END_LOOPEXIT_UNR_LCSSA]]:
; CHECK-NEXT: [[RES_1_LCSSA_PH:%.*]] = phi i64 [ undef, %[[FOR_BODY_PREHEADER]] ], [ [[TMP1]], %[[FOR_END_LOOPEXIT_UNR_LCSSA_LOOPEXIT]] ]
; CHECK-NEXT: [[RES_09_UNR:%.*]] = phi i64 [ -1, %[[FOR_BODY_PREHEADER]] ], [ [[LSR_IV_NEXT]], %[[FOR_END_LOOPEXIT_UNR_LCSSA_LOOPEXIT]] ]
; CHECK-NEXT: [[TMP2:%.*]] = and i64 [[N]], 1
; CHECK-NEXT: [[LCMP_MOD_NOT:%.*]] = icmp eq i64 [[TMP2]], 0
; CHECK-NEXT: [[SPEC_SELECT:%.*]] = select i1 [[LCMP_MOD_NOT]], i64 [[RES_1_LCSSA_PH]], i64 [[RES_09_UNR]]
; CHECK-NEXT: br label %[[FOR_END]]
; CHECK: [[FOR_END]]:
; CHECK-NEXT: [[RES_0_LCSSA:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ [[SPEC_SELECT]], %[[FOR_END_LOOPEXIT_UNR_LCSSA]] ]
; CHECK-NEXT: ret i64 [[RES_0_LCSSA]]
;
entry:
%mul = shl i64 %N, 1
%cmp6.not = icmp eq i64 %mul, 0
br i1 %cmp6.not, label %for.end, label %for.body.preheader
for.body.preheader:
%0 = icmp ult i64 %mul, 4
br i1 %0, label %for.end.loopexit.unr-lcssa, label %for.body.preheader.new
for.body.preheader.new:
%unroll_iter = and i64 %mul, -4
br label %for.body
for.body:
%res.09 = phi i64 [ 0, %for.body.preheader.new ], [ %res.1.3, %for.body ]
%i.07 = phi i64 [ 0, %for.body.preheader.new ], [ %inc.3, %for.body ]
%niter = phi i64 [ 0, %for.body.preheader.new ], [ %niter.next.3, %for.body ]
%res.1.1 = add i64 %res.09, -1
%inc.1 = or disjoint i64 %i.07, 2
%res.1.2 = add i64 %inc.1, %res.1.1
%reass.sub = sub i64 %res.1.2, %i.07
%res.1.3 = add i64 %reass.sub, -3
%inc.3 = add nuw i64 %i.07, 4
%niter.next.3 = add i64 %niter, 4
%niter.ncmp.3.not = icmp eq i64 %niter.next.3, %unroll_iter
br i1 %niter.ncmp.3.not, label %for.end.loopexit.unr-lcssa.loopexit, label %for.body
for.end.loopexit.unr-lcssa.loopexit:
%1 = add i64 %reass.sub, -4
br label %for.end.loopexit.unr-lcssa
for.end.loopexit.unr-lcssa:
%res.1.lcssa.ph = phi i64 [ undef, %for.body.preheader ], [ %res.1.3, %for.end.loopexit.unr-lcssa.loopexit ]
%res.09.unr = phi i64 [ -1, %for.body.preheader ], [ %1, %for.end.loopexit.unr-lcssa.loopexit ]
%2 = and i64 %N, 1
%lcmp.mod.not = icmp eq i64 %2, 0
%spec.select = select i1 %lcmp.mod.not, i64 %res.1.lcssa.ph, i64 %res.09.unr
br label %for.end
for.end:
%res.0.lcssa = phi i64 [ 0, %entry ], [ %spec.select, %for.end.loopexit.unr-lcssa ]
ret i64 %res.0.lcssa
}