
Many profitable optimizations cannot be performed at -Oz, due to unrotated loops. While this is worse for size (minimally), many of the optimizations significantly reduce code size, such as memcpy optimizations and other patterns found by loop idiom recognition. Related discussion can be found in issue #50308. This patch adds an experimental, backend-only flag to allow loop header duplication, regardless of the optimization level. Downstream consumers can experiment with this flag, and if it is profitable, we can adjust the compiler's defaults accordingly, and expose any useful frontend flags to opt into the new behavior.
58 lines
2.6 KiB
LLVM
58 lines
2.6 KiB
LLVM
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 3
|
|
|
|
;; Check that -enable-loop-header-duplication at Oz enables certain types of
|
|
;; optimizations, for example replacing the loop body w/ a call to memset. If
|
|
;; loop idiom recognition begins to recognize unrotated loops, this test will
|
|
;; need to be updated.
|
|
|
|
; RUN: opt -passes='default<Oz>' -S < %s | FileCheck %s --check-prefix=NOROTATION
|
|
; RUN: opt -passes='default<Oz>' -S -enable-loop-header-duplication < %s | FileCheck %s --check-prefix=ROTATION
|
|
; RUN: opt -passes='default<O2>' -S < %s | FileCheck %s --check-prefix=ROTATION
|
|
|
|
define void @test(i8* noalias nonnull align 1 %start, i8* %end) unnamed_addr {
|
|
; NOROTATION-LABEL: define void @test(
|
|
; NOROTATION-SAME: ptr noalias nonnull writeonly align 1 [[START:%.*]], ptr readnone [[END:%.*]]) unnamed_addr #[[ATTR0:[0-9]+]] {
|
|
; NOROTATION-NEXT: entry:
|
|
; NOROTATION-NEXT: br label [[LOOP_HEADER:%.*]]
|
|
; NOROTATION: loop.header:
|
|
; NOROTATION-NEXT: [[PTR_IV:%.*]] = phi ptr [ [[START]], [[ENTRY:%.*]] ], [ [[PTR_IV_NEXT:%.*]], [[LOOP_LATCH:%.*]] ]
|
|
; NOROTATION-NEXT: [[_12_I:%.*]] = icmp eq ptr [[PTR_IV]], [[END]]
|
|
; NOROTATION-NEXT: br i1 [[_12_I]], label [[EXIT:%.*]], label [[LOOP_LATCH]]
|
|
; NOROTATION: loop.latch:
|
|
; NOROTATION-NEXT: [[PTR_IV_NEXT]] = getelementptr inbounds i8, ptr [[PTR_IV]], i64 1
|
|
; NOROTATION-NEXT: store i8 1, ptr [[PTR_IV]], align 1
|
|
; NOROTATION-NEXT: br label [[LOOP_HEADER]]
|
|
; NOROTATION: exit:
|
|
; NOROTATION-NEXT: ret void
|
|
;
|
|
; ROTATION-LABEL: define void @test(
|
|
; ROTATION-SAME: ptr noalias nonnull writeonly align 1 [[START:%.*]], ptr readnone [[END:%.*]]) unnamed_addr #[[ATTR0:[0-9]+]] {
|
|
; ROTATION-NEXT: entry:
|
|
; ROTATION-NEXT: [[_12_I1:%.*]] = icmp eq ptr [[START]], [[END]]
|
|
; ROTATION-NEXT: br i1 [[_12_I1]], label [[EXIT:%.*]], label [[LOOP_LATCH_PREHEADER:%.*]]
|
|
; ROTATION: loop.latch.preheader:
|
|
; ROTATION-NEXT: [[END3:%.*]] = ptrtoint ptr [[END]] to i64
|
|
; ROTATION-NEXT: [[START4:%.*]] = ptrtoint ptr [[START]] to i64
|
|
; ROTATION-NEXT: [[TMP0:%.*]] = sub i64 [[END3]], [[START4]]
|
|
; ROTATION-NEXT: tail call void @llvm.memset.p0.i64(ptr nonnull align 1 [[START]], i8 1, i64 [[TMP0]], i1 false)
|
|
; ROTATION-NEXT: br label [[EXIT]]
|
|
; ROTATION: exit:
|
|
; ROTATION-NEXT: ret void
|
|
;
|
|
entry:
|
|
br label %loop.header
|
|
|
|
loop.header:
|
|
%ptr.iv = phi i8* [ %start, %entry ], [ %ptr.iv.next, %loop.latch ]
|
|
%_12.i = icmp eq i8* %ptr.iv, %end
|
|
br i1 %_12.i, label %exit, label %loop.latch
|
|
|
|
loop.latch:
|
|
%ptr.iv.next = getelementptr inbounds i8, i8* %ptr.iv, i64 1
|
|
store i8 1, i8* %ptr.iv, align 1
|
|
br label %loop.header
|
|
|
|
exit:
|
|
ret void
|
|
}
|