Alex Bradbury 3877039fd1
[LoopIdiom] Select llvm.experimental.memset.pattern intrinsic rather than memset_pattern16 libcall (#126736)
In order to keep the change as incremental as possible, this only
introduces the memset.pattern intrinsic in cases where memset_pattern16
would have been used. Future patches can enable it on targets that don't
have the intrinsic, and select it in cases where the libcall isn't
directly usable. As the memset.pattern intrinsic takes the number of
times to store the pattern as an argument unlike memset_pattern16 which
takes the number of bytes to write, we no longer try to form an i128
pattern.

Special care is taken for cases where multiple stores in the same loop
iteration were combined to form a single pattern. For such cases, we
inherit the limitation that loops such as the following are supported:

```
for (unsigned i = 0; i < 2 * n; i += 2) {
  f[i] = 2;
  f[i+1] = 2;
}
```

But the following doesn't result in a memset.pattern (even though it
could be, by forming an appropriate pattern):
```
for (unsigned i = 0; i < 2 * n; i += 2) {
  f[i] = 2;
  f[i+1] = 3;
}
```

Addressing this existing deficiency is left for a follow-up due to a
desire not to change too much at once (i.e. to target equivalence to the
current codegen).

A command line option is introduced to force the selection of the
intrinsic even in cases it wouldn't be (i.e. in cases where the libcall
wouldn't have been selected). This is intended as a transitionary option
for testing and experimentation, to be removed at a later point.

The only platforms this should impact are those that have the memset_pattern16 libcall (Apple platforms). Testing performed to check for no unexpected codegen changes is described here https://github.com/llvm/llvm-project/pull/126736#issuecomment-3005097468
2025-07-09 13:48:15 +01:00

1628 lines
70 KiB
LLVM

; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --check-globals
; RUN: opt -passes=loop-idiom < %s -S | FileCheck %s
target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
target triple = "x86_64-apple-darwin10.0.0"
;.
; CHECK: @G = global i32 5
; CHECK: @g_50 = global [7 x i32] [i32 0, i32 0, i32 0, i32 0, i32 1, i32 0, i32 0], align 16
;.
define void @test1(ptr %Base, i64 %Size) nounwind ssp {
; CHECK-LABEL: @test1(
; CHECK-NEXT: bb.nph:
; CHECK-NEXT: call void @llvm.memset.p0.i64(ptr align 1 [[BASE:%.*]], i8 0, i64 [[SIZE:%.*]], i1 false)
; CHECK-NEXT: br label [[FOR_BODY:%.*]]
; CHECK: for.body:
; CHECK-NEXT: [[INDVAR:%.*]] = phi i64 [ 0, [[BB_NPH:%.*]] ], [ [[INDVAR_NEXT:%.*]], [[FOR_BODY]] ]
; CHECK-NEXT: [[I_0_014:%.*]] = getelementptr i8, ptr [[BASE]], i64 [[INDVAR]]
; CHECK-NEXT: [[INDVAR_NEXT]] = add i64 [[INDVAR]], 1
; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[INDVAR_NEXT]], [[SIZE]]
; CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_END:%.*]], label [[FOR_BODY]]
; CHECK: for.end:
; CHECK-NEXT: ret void
;
bb.nph: ; preds = %entry
br label %for.body
for.body: ; preds = %bb.nph, %for.body
%indvar = phi i64 [ 0, %bb.nph ], [ %indvar.next, %for.body ]
%I.0.014 = getelementptr i8, ptr %Base, i64 %indvar
store i8 0, ptr %I.0.014, align 1
%indvar.next = add i64 %indvar, 1
%exitcond = icmp eq i64 %indvar.next, %Size
br i1 %exitcond, label %for.end, label %for.body
for.end: ; preds = %for.body, %entry
ret void
}
; Make sure memset is formed for larger than 1 byte stores, and that the
; alignment of the store is preserved
define void @test1_i16(ptr align 2 %Base, i64 %Size) nounwind ssp {
; CHECK-LABEL: @test1_i16(
; CHECK-NEXT: bb.nph:
; CHECK-NEXT: [[TMP0:%.*]] = shl nuw i64 [[SIZE:%.*]], 1
; CHECK-NEXT: call void @llvm.memset.p0.i64(ptr align 2 [[BASE:%.*]], i8 0, i64 [[TMP0]], i1 false)
; CHECK-NEXT: br label [[FOR_BODY:%.*]]
; CHECK: for.body:
; CHECK-NEXT: [[INDVAR:%.*]] = phi i64 [ 0, [[BB_NPH:%.*]] ], [ [[INDVAR_NEXT:%.*]], [[FOR_BODY]] ]
; CHECK-NEXT: [[I_0_014:%.*]] = getelementptr i16, ptr [[BASE]], i64 [[INDVAR]]
; CHECK-NEXT: [[INDVAR_NEXT]] = add i64 [[INDVAR]], 1
; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[INDVAR_NEXT]], [[SIZE]]
; CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_END:%.*]], label [[FOR_BODY]]
; CHECK: for.end:
; CHECK-NEXT: ret void
;
bb.nph: ; preds = %entry
br label %for.body
for.body: ; preds = %bb.nph, %for.body
%indvar = phi i64 [ 0, %bb.nph ], [ %indvar.next, %for.body ]
%I.0.014 = getelementptr i16, ptr %Base, i64 %indvar
store i16 0, ptr %I.0.014, align 2
%indvar.next = add i64 %indvar, 1
%exitcond = icmp eq i64 %indvar.next, %Size
br i1 %exitcond, label %for.end, label %for.body
for.end: ; preds = %for.body, %entry
ret void
}
; This is a loop that was rotated but where the blocks weren't merged. This
; shouldn't perturb us.
define void @test1a(ptr %Base, i64 %Size) nounwind ssp {
; CHECK-LABEL: @test1a(
; CHECK-NEXT: bb.nph:
; CHECK-NEXT: call void @llvm.memset.p0.i64(ptr align 1 [[BASE:%.*]], i8 0, i64 [[SIZE:%.*]], i1 false)
; CHECK-NEXT: br label [[FOR_BODY:%.*]]
; CHECK: for.body:
; CHECK-NEXT: [[INDVAR:%.*]] = phi i64 [ 0, [[BB_NPH:%.*]] ], [ [[INDVAR_NEXT:%.*]], [[FOR_BODY_CONT:%.*]] ]
; CHECK-NEXT: [[I_0_014:%.*]] = getelementptr i8, ptr [[BASE]], i64 [[INDVAR]]
; CHECK-NEXT: [[INDVAR_NEXT]] = add i64 [[INDVAR]], 1
; CHECK-NEXT: br label [[FOR_BODY_CONT]]
; CHECK: for.body.cont:
; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[INDVAR_NEXT]], [[SIZE]]
; CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_END:%.*]], label [[FOR_BODY]]
; CHECK: for.end:
; CHECK-NEXT: ret void
;
bb.nph: ; preds = %entry
br label %for.body
for.body: ; preds = %bb.nph, %for.body
%indvar = phi i64 [ 0, %bb.nph ], [ %indvar.next, %for.body.cont ]
%I.0.014 = getelementptr i8, ptr %Base, i64 %indvar
store i8 0, ptr %I.0.014, align 1
%indvar.next = add i64 %indvar, 1
br label %for.body.cont
for.body.cont:
%exitcond = icmp eq i64 %indvar.next, %Size
br i1 %exitcond, label %for.end, label %for.body
for.end: ; preds = %for.body, %entry
ret void
}
define void @test2(ptr %Base, i64 %Size) nounwind ssp {
; CHECK-LABEL: @test2(
; CHECK-NEXT: entry:
; CHECK-NEXT: [[CMP10:%.*]] = icmp eq i64 [[SIZE:%.*]], 0
; CHECK-NEXT: br i1 [[CMP10]], label [[FOR_END:%.*]], label [[FOR_BODY_PREHEADER:%.*]]
; CHECK: for.body.preheader:
; CHECK-NEXT: [[TMP0:%.*]] = shl nuw i64 [[SIZE]], 2
; CHECK-NEXT: call void @llvm.memset.p0.i64(ptr align 4 [[BASE:%.*]], i8 1, i64 [[TMP0]], i1 false)
; CHECK-NEXT: br label [[FOR_BODY:%.*]]
; CHECK: for.body:
; CHECK-NEXT: [[I_011:%.*]] = phi i64 [ [[INC:%.*]], [[FOR_BODY]] ], [ 0, [[FOR_BODY_PREHEADER]] ]
; CHECK-NEXT: [[ADD_PTR_I:%.*]] = getelementptr i32, ptr [[BASE]], i64 [[I_011]]
; CHECK-NEXT: [[INC]] = add nsw i64 [[I_011]], 1
; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[INC]], [[SIZE]]
; CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_END_LOOPEXIT:%.*]], label [[FOR_BODY]]
; CHECK: for.end.loopexit:
; CHECK-NEXT: br label [[FOR_END]]
; CHECK: for.end:
; CHECK-NEXT: ret void
;
entry:
%cmp10 = icmp eq i64 %Size, 0
br i1 %cmp10, label %for.end, label %for.body
for.body: ; preds = %entry, %for.body
%i.011 = phi i64 [ %inc, %for.body ], [ 0, %entry ]
%add.ptr.i = getelementptr i32, ptr %Base, i64 %i.011
store i32 16843009, ptr %add.ptr.i, align 4
%inc = add nsw i64 %i.011, 1
%exitcond = icmp eq i64 %inc, %Size
br i1 %exitcond, label %for.end, label %for.body
for.end: ; preds = %for.body, %entry
ret void
}
; This is a case where there is an extra may-aliased store in the loop, we can't
; promote the memset.
define void @test3(ptr %Base, i64 %Size, ptr %MayAlias) nounwind ssp {
; CHECK-LABEL: @test3(
; CHECK-NEXT: entry:
; CHECK-NEXT: br label [[FOR_BODY:%.*]]
; CHECK: for.body:
; CHECK-NEXT: [[I_011:%.*]] = phi i64 [ [[INC:%.*]], [[FOR_BODY]] ], [ 0, [[ENTRY:%.*]] ]
; CHECK-NEXT: [[ADD_PTR_I:%.*]] = getelementptr i32, ptr [[BASE:%.*]], i64 [[I_011]]
; CHECK-NEXT: store i32 16843009, ptr [[ADD_PTR_I]], align 4
; CHECK-NEXT: store i8 42, ptr [[MAYALIAS:%.*]], align 1
; CHECK-NEXT: [[INC]] = add nsw i64 [[I_011]], 1
; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[INC]], [[SIZE:%.*]]
; CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_END:%.*]], label [[FOR_BODY]]
; CHECK: for.end:
; CHECK-NEXT: ret void
;
entry:
br label %for.body
for.body: ; preds = %entry, %for.body
%i.011 = phi i64 [ %inc, %for.body ], [ 0, %entry ]
%add.ptr.i = getelementptr i32, ptr %Base, i64 %i.011
store i32 16843009, ptr %add.ptr.i, align 4
store i8 42, ptr %MayAlias
%inc = add nsw i64 %i.011, 1
%exitcond = icmp eq i64 %inc, %Size
br i1 %exitcond, label %for.end, label %for.body
for.end: ; preds = %entry
ret void
}
; Make sure the first store in the loop is turned into a memset.
define void @test4(ptr %Base) nounwind ssp {
; CHECK-LABEL: @test4(
; CHECK-NEXT: bb.nph:
; CHECK-NEXT: [[BASE100:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 1000
; CHECK-NEXT: call void @llvm.memset.p0.i64(ptr align 1 [[BASE]], i8 0, i64 100, i1 false)
; CHECK-NEXT: br label [[FOR_BODY:%.*]]
; CHECK: for.body:
; CHECK-NEXT: [[INDVAR:%.*]] = phi i64 [ 0, [[BB_NPH:%.*]] ], [ [[INDVAR_NEXT:%.*]], [[FOR_BODY]] ]
; CHECK-NEXT: [[I_0_014:%.*]] = getelementptr i8, ptr [[BASE]], i64 [[INDVAR]]
; CHECK-NEXT: store i8 42, ptr [[BASE100]], align 1
; CHECK-NEXT: [[INDVAR_NEXT]] = add i64 [[INDVAR]], 1
; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[INDVAR_NEXT]], 100
; CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_END:%.*]], label [[FOR_BODY]]
; CHECK: for.end:
; CHECK-NEXT: ret void
;
bb.nph: ; preds = %entry
%Base100 = getelementptr i8, ptr %Base, i64 1000
br label %for.body
for.body: ; preds = %bb.nph, %for.body
%indvar = phi i64 [ 0, %bb.nph ], [ %indvar.next, %for.body ]
%I.0.014 = getelementptr i8, ptr %Base, i64 %indvar
store i8 0, ptr %I.0.014, align 1
;; Store beyond the range memset, should be safe to promote.
store i8 42, ptr %Base100
%indvar.next = add i64 %indvar, 1
%exitcond = icmp eq i64 %indvar.next, 100
br i1 %exitcond, label %for.end, label %for.body
for.end: ; preds = %for.body, %entry
ret void
}
; This can't be promoted: the memset is a store of a loop variant value.
define void @test5(ptr %Base, i64 %Size) nounwind ssp {
; CHECK-LABEL: @test5(
; CHECK-NEXT: bb.nph:
; CHECK-NEXT: br label [[FOR_BODY:%.*]]
; CHECK: for.body:
; CHECK-NEXT: [[INDVAR:%.*]] = phi i64 [ 0, [[BB_NPH:%.*]] ], [ [[INDVAR_NEXT:%.*]], [[FOR_BODY]] ]
; CHECK-NEXT: [[I_0_014:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[INDVAR]]
; CHECK-NEXT: [[V:%.*]] = trunc i64 [[INDVAR]] to i8
; CHECK-NEXT: store i8 [[V]], ptr [[I_0_014]], align 1
; CHECK-NEXT: [[INDVAR_NEXT]] = add i64 [[INDVAR]], 1
; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[INDVAR_NEXT]], [[SIZE:%.*]]
; CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_END:%.*]], label [[FOR_BODY]]
; CHECK: for.end:
; CHECK-NEXT: ret void
;
bb.nph: ; preds = %entry
br label %for.body
for.body: ; preds = %bb.nph, %for.body
%indvar = phi i64 [ 0, %bb.nph ], [ %indvar.next, %for.body ]
%I.0.014 = getelementptr i8, ptr %Base, i64 %indvar
%V = trunc i64 %indvar to i8
store i8 %V, ptr %I.0.014, align 1
%indvar.next = add i64 %indvar, 1
%exitcond = icmp eq i64 %indvar.next, %Size
br i1 %exitcond, label %for.end, label %for.body
for.end: ; preds = %for.body, %entry
ret void
}
;; memcpy formation
define void @test6(i64 %Size) nounwind ssp {
; CHECK-LABEL: @test6(
; CHECK-NEXT: bb.nph:
; CHECK-NEXT: [[BASE:%.*]] = alloca i8, i32 10000, align 1
; CHECK-NEXT: [[DEST:%.*]] = alloca i8, i32 10000, align 1
; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 1 [[DEST]], ptr align 1 [[BASE]], i64 [[SIZE:%.*]], i1 false)
; CHECK-NEXT: br label [[FOR_BODY:%.*]]
; CHECK: for.body:
; CHECK-NEXT: [[INDVAR:%.*]] = phi i64 [ 0, [[BB_NPH:%.*]] ], [ [[INDVAR_NEXT:%.*]], [[FOR_BODY]] ]
; CHECK-NEXT: [[I_0_014:%.*]] = getelementptr i8, ptr [[BASE]], i64 [[INDVAR]]
; CHECK-NEXT: [[DESTI:%.*]] = getelementptr i8, ptr [[DEST]], i64 [[INDVAR]]
; CHECK-NEXT: [[V:%.*]] = load i8, ptr [[I_0_014]], align 1
; CHECK-NEXT: [[INDVAR_NEXT]] = add i64 [[INDVAR]], 1
; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[INDVAR_NEXT]], [[SIZE]]
; CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_END:%.*]], label [[FOR_BODY]]
; CHECK: for.end:
; CHECK-NEXT: ret void
;
bb.nph:
%Base = alloca i8, i32 10000
%Dest = alloca i8, i32 10000
br label %for.body
for.body: ; preds = %bb.nph, %for.body
%indvar = phi i64 [ 0, %bb.nph ], [ %indvar.next, %for.body ]
%I.0.014 = getelementptr i8, ptr %Base, i64 %indvar
%DestI = getelementptr i8, ptr %Dest, i64 %indvar
%V = load i8, ptr %I.0.014, align 1
store i8 %V, ptr %DestI, align 1
%indvar.next = add i64 %indvar, 1
%exitcond = icmp eq i64 %indvar.next, %Size
br i1 %exitcond, label %for.end, label %for.body
for.end: ; preds = %for.body, %entry
ret void
}
;; memcpy formation, check alignment
define void @test6_dest_align(ptr noalias align 1 %Base, ptr noalias align 4 %Dest, i64 %Size) nounwind ssp {
; CHECK-LABEL: @test6_dest_align(
; CHECK-NEXT: bb.nph:
; CHECK-NEXT: [[TMP0:%.*]] = shl nuw i64 [[SIZE:%.*]], 2
; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[DEST:%.*]], ptr align 1 [[BASE:%.*]], i64 [[TMP0]], i1 false)
; CHECK-NEXT: br label [[FOR_BODY:%.*]]
; CHECK: for.body:
; CHECK-NEXT: [[INDVAR:%.*]] = phi i64 [ 0, [[BB_NPH:%.*]] ], [ [[INDVAR_NEXT:%.*]], [[FOR_BODY]] ]
; CHECK-NEXT: [[I_0_014:%.*]] = getelementptr i32, ptr [[BASE]], i64 [[INDVAR]]
; CHECK-NEXT: [[DESTI:%.*]] = getelementptr i32, ptr [[DEST]], i64 [[INDVAR]]
; CHECK-NEXT: [[V:%.*]] = load i32, ptr [[I_0_014]], align 1
; CHECK-NEXT: [[INDVAR_NEXT]] = add i64 [[INDVAR]], 1
; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[INDVAR_NEXT]], [[SIZE]]
; CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_END:%.*]], label [[FOR_BODY]]
; CHECK: for.end:
; CHECK-NEXT: ret void
;
bb.nph:
br label %for.body
for.body: ; preds = %bb.nph, %for.body
%indvar = phi i64 [ 0, %bb.nph ], [ %indvar.next, %for.body ]
%I.0.014 = getelementptr i32, ptr %Base, i64 %indvar
%DestI = getelementptr i32, ptr %Dest, i64 %indvar
%V = load i32, ptr %I.0.014, align 1
store i32 %V, ptr %DestI, align 4
%indvar.next = add i64 %indvar, 1
%exitcond = icmp eq i64 %indvar.next, %Size
br i1 %exitcond, label %for.end, label %for.body
for.end: ; preds = %for.body, %entry
ret void
}
;; memcpy formation, check alignment
define void @test6_src_align(ptr noalias align 4 %Base, ptr noalias align 1 %Dest, i64 %Size) nounwind ssp {
; CHECK-LABEL: @test6_src_align(
; CHECK-NEXT: bb.nph:
; CHECK-NEXT: [[TMP0:%.*]] = shl nuw i64 [[SIZE:%.*]], 2
; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 1 [[DEST:%.*]], ptr align 4 [[BASE:%.*]], i64 [[TMP0]], i1 false)
; CHECK-NEXT: br label [[FOR_BODY:%.*]]
; CHECK: for.body:
; CHECK-NEXT: [[INDVAR:%.*]] = phi i64 [ 0, [[BB_NPH:%.*]] ], [ [[INDVAR_NEXT:%.*]], [[FOR_BODY]] ]
; CHECK-NEXT: [[I_0_014:%.*]] = getelementptr i32, ptr [[BASE]], i64 [[INDVAR]]
; CHECK-NEXT: [[DESTI:%.*]] = getelementptr i32, ptr [[DEST]], i64 [[INDVAR]]
; CHECK-NEXT: [[V:%.*]] = load i32, ptr [[I_0_014]], align 4
; CHECK-NEXT: [[INDVAR_NEXT]] = add i64 [[INDVAR]], 1
; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[INDVAR_NEXT]], [[SIZE]]
; CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_END:%.*]], label [[FOR_BODY]]
; CHECK: for.end:
; CHECK-NEXT: ret void
;
bb.nph:
br label %for.body
for.body: ; preds = %bb.nph, %for.body
%indvar = phi i64 [ 0, %bb.nph ], [ %indvar.next, %for.body ]
%I.0.014 = getelementptr i32, ptr %Base, i64 %indvar
%DestI = getelementptr i32, ptr %Dest, i64 %indvar
%V = load i32, ptr %I.0.014, align 4
store i32 %V, ptr %DestI, align 1
%indvar.next = add i64 %indvar, 1
%exitcond = icmp eq i64 %indvar.next, %Size
br i1 %exitcond, label %for.end, label %for.body
for.end: ; preds = %for.body, %entry
ret void
}
; This is a loop that was rotated but where the blocks weren't merged. This
; shouldn't perturb us.
define void @test7(ptr %Base, i64 %Size) nounwind ssp {
; CHECK-LABEL: @test7(
; CHECK-NEXT: bb.nph:
; CHECK-NEXT: call void @llvm.memset.p0.i64(ptr align 1 [[BASE:%.*]], i8 0, i64 [[SIZE:%.*]], i1 false)
; CHECK-NEXT: br label [[FOR_BODY:%.*]]
; CHECK: for.body:
; CHECK-NEXT: [[INDVAR:%.*]] = phi i64 [ 0, [[BB_NPH:%.*]] ], [ [[INDVAR_NEXT:%.*]], [[FOR_BODY_CONT:%.*]] ]
; CHECK-NEXT: br label [[FOR_BODY_CONT]]
; CHECK: for.body.cont:
; CHECK-NEXT: [[I_0_014:%.*]] = getelementptr i8, ptr [[BASE]], i64 [[INDVAR]]
; CHECK-NEXT: [[INDVAR_NEXT]] = add i64 [[INDVAR]], 1
; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[INDVAR_NEXT]], [[SIZE]]
; CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_END:%.*]], label [[FOR_BODY]]
; CHECK: for.end:
; CHECK-NEXT: ret void
;
bb.nph: ; preds = %entry
br label %for.body
for.body: ; preds = %bb.nph, %for.body
%indvar = phi i64 [ 0, %bb.nph ], [ %indvar.next, %for.body.cont ]
br label %for.body.cont
for.body.cont:
%I.0.014 = getelementptr i8, ptr %Base, i64 %indvar
store i8 0, ptr %I.0.014, align 1
%indvar.next = add i64 %indvar, 1
%exitcond = icmp eq i64 %indvar.next, %Size
br i1 %exitcond, label %for.end, label %for.body
for.end: ; preds = %for.body, %entry
ret void
}
; This is a loop should not be transformed, it only executes one iteration.
define void @test8(ptr %Ptr, i64 %Size) nounwind ssp {
; CHECK-LABEL: @test8(
; CHECK-NEXT: bb.nph:
; CHECK-NEXT: br label [[FOR_BODY:%.*]]
; CHECK: for.body:
; CHECK-NEXT: [[INDVAR:%.*]] = phi i64 [ 0, [[BB_NPH:%.*]] ], [ [[INDVAR_NEXT:%.*]], [[FOR_BODY]] ]
; CHECK-NEXT: [[PI:%.*]] = getelementptr i64, ptr [[PTR:%.*]], i64 [[INDVAR]]
; CHECK-NEXT: store i64 0, ptr [[PI]], align 8
; CHECK-NEXT: [[INDVAR_NEXT]] = add i64 [[INDVAR]], 1
; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[INDVAR_NEXT]], 1
; CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_END:%.*]], label [[FOR_BODY]]
; CHECK: for.end:
; CHECK-NEXT: ret void
;
bb.nph: ; preds = %entry
br label %for.body
for.body: ; preds = %bb.nph, %for.body
%indvar = phi i64 [ 0, %bb.nph ], [ %indvar.next, %for.body ]
%PI = getelementptr i64, ptr %Ptr, i64 %indvar
store i64 0, ptr %PI
%indvar.next = add i64 %indvar, 1
%exitcond = icmp eq i64 %indvar.next, 1
br i1 %exitcond, label %for.end, label %for.body
for.end: ; preds = %for.body, %entry
ret void
}
declare ptr @external(ptr)
;; This cannot be transformed into a memcpy, because the read-from location is
;; mutated by the loop.
define void @test9(i64 %Size) nounwind ssp {
; CHECK-LABEL: @test9(
; CHECK-NEXT: bb.nph:
; CHECK-NEXT: [[BASE:%.*]] = alloca i8, i32 10000, align 1
; CHECK-NEXT: [[DEST:%.*]] = alloca i8, i32 10000, align 1
; CHECK-NEXT: [[BASEALIAS:%.*]] = call ptr @external(ptr [[BASE]])
; CHECK-NEXT: br label [[FOR_BODY:%.*]]
; CHECK: for.body:
; CHECK-NEXT: [[INDVAR:%.*]] = phi i64 [ 0, [[BB_NPH:%.*]] ], [ [[INDVAR_NEXT:%.*]], [[FOR_BODY]] ]
; CHECK-NEXT: [[I_0_014:%.*]] = getelementptr i8, ptr [[BASE]], i64 [[INDVAR]]
; CHECK-NEXT: [[DESTI:%.*]] = getelementptr i8, ptr [[DEST]], i64 [[INDVAR]]
; CHECK-NEXT: [[V:%.*]] = load i8, ptr [[I_0_014]], align 1
; CHECK-NEXT: store i8 [[V]], ptr [[DESTI]], align 1
; CHECK-NEXT: store i8 4, ptr [[BASEALIAS]], align 1
; CHECK-NEXT: [[INDVAR_NEXT]] = add i64 [[INDVAR]], 1
; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[INDVAR_NEXT]], [[SIZE:%.*]]
; CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_END:%.*]], label [[FOR_BODY]]
; CHECK: for.end:
; CHECK-NEXT: ret void
;
bb.nph:
%Base = alloca i8, i32 10000
%Dest = alloca i8, i32 10000
%BaseAlias = call ptr @external(ptr %Base)
br label %for.body
for.body: ; preds = %bb.nph, %for.body
%indvar = phi i64 [ 0, %bb.nph ], [ %indvar.next, %for.body ]
%I.0.014 = getelementptr i8, ptr %Base, i64 %indvar
%DestI = getelementptr i8, ptr %Dest, i64 %indvar
%V = load i8, ptr %I.0.014, align 1
store i8 %V, ptr %DestI, align 1
;; This store can clobber the input.
store i8 4, ptr %BaseAlias
%indvar.next = add i64 %indvar, 1
%exitcond = icmp eq i64 %indvar.next, %Size
br i1 %exitcond, label %for.end, label %for.body
for.end: ; preds = %for.body, %entry
ret void
}
; Two dimensional nested loop should be promoted to one big memset.
define void @test10(ptr %X) nounwind ssp {
; CHECK-LABEL: @test10(
; CHECK-NEXT: entry:
; CHECK-NEXT: call void @llvm.memset.p0.i64(ptr align 1 [[X:%.*]], i8 0, i64 10000, i1 false)
; CHECK-NEXT: br label [[BB_NPH:%.*]]
; CHECK: bb.nph:
; CHECK-NEXT: [[INDVAR:%.*]] = phi i64 [ [[INDVAR_NEXT:%.*]], [[FOR_INC10:%.*]] ], [ 0, [[ENTRY:%.*]] ]
; CHECK-NEXT: [[I_04:%.*]] = phi i32 [ 0, [[ENTRY]] ], [ [[INC12:%.*]], [[FOR_INC10]] ]
; CHECK-NEXT: [[TMP0:%.*]] = mul nuw nsw i64 [[INDVAR]], 100
; CHECK-NEXT: [[SCEVGEP:%.*]] = getelementptr nuw i8, ptr [[X]], i64 [[TMP0]]
; CHECK-NEXT: br label [[FOR_BODY5:%.*]]
; CHECK: for.body5:
; CHECK-NEXT: [[J_02:%.*]] = phi i32 [ 0, [[BB_NPH]] ], [ [[INC:%.*]], [[FOR_BODY5]] ]
; CHECK-NEXT: [[MUL:%.*]] = mul nsw i32 [[I_04]], 100
; CHECK-NEXT: [[ADD:%.*]] = add nsw i32 [[J_02]], [[MUL]]
; CHECK-NEXT: [[IDXPROM:%.*]] = sext i32 [[ADD]] to i64
; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i8, ptr [[X]], i64 [[IDXPROM]]
; CHECK-NEXT: [[INC]] = add nsw i32 [[J_02]], 1
; CHECK-NEXT: [[CMP4:%.*]] = icmp eq i32 [[INC]], 100
; CHECK-NEXT: br i1 [[CMP4]], label [[FOR_INC10]], label [[FOR_BODY5]]
; CHECK: for.inc10:
; CHECK-NEXT: [[INC12]] = add nsw i32 [[I_04]], 1
; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32 [[INC12]], 100
; CHECK-NEXT: [[INDVAR_NEXT]] = add i64 [[INDVAR]], 1
; CHECK-NEXT: br i1 [[CMP]], label [[FOR_END13:%.*]], label [[BB_NPH]]
; CHECK: for.end13:
; CHECK-NEXT: ret void
;
entry:
br label %bb.nph
bb.nph: ; preds = %entry, %for.inc10
%i.04 = phi i32 [ 0, %entry ], [ %inc12, %for.inc10 ]
br label %for.body5
for.body5: ; preds = %for.body5, %bb.nph
%j.02 = phi i32 [ 0, %bb.nph ], [ %inc, %for.body5 ]
%mul = mul nsw i32 %i.04, 100
%add = add nsw i32 %j.02, %mul
%idxprom = sext i32 %add to i64
%arrayidx = getelementptr inbounds i8, ptr %X, i64 %idxprom
store i8 0, ptr %arrayidx, align 1
%inc = add nsw i32 %j.02, 1
%cmp4 = icmp eq i32 %inc, 100
br i1 %cmp4, label %for.inc10, label %for.body5
for.inc10: ; preds = %for.body5
%inc12 = add nsw i32 %i.04, 1
%cmp = icmp eq i32 %inc12, 100
br i1 %cmp, label %for.end13, label %bb.nph
for.end13: ; preds = %for.inc10
ret void
}
; On darwin10 (which is the triple in this .ll file) this loop can be turned
; into a memset_pattern call.
; rdar://9009151
define void @test11_pattern(ptr nocapture %P) nounwind ssp {
; CHECK-LABEL: @test11_pattern(
; CHECK-NEXT: entry:
; CHECK-NEXT: call void @llvm.experimental.memset.pattern.p0.i32.i64(ptr align 4 [[P:%.*]], i32 1, i64 10000, i1 false)
; CHECK-NEXT: br label [[FOR_BODY:%.*]]
; CHECK: for.body:
; CHECK-NEXT: [[INDVAR:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[INDVAR_NEXT:%.*]], [[FOR_BODY]] ]
; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr i32, ptr [[P]], i64 [[INDVAR]]
; CHECK-NEXT: [[INDVAR_NEXT]] = add i64 [[INDVAR]], 1
; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[INDVAR_NEXT]], 10000
; CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_END:%.*]], label [[FOR_BODY]]
; CHECK: for.end:
; CHECK-NEXT: ret void
;
entry:
br label %for.body
for.body: ; preds = %entry, %for.body
%indvar = phi i64 [ 0, %entry ], [ %indvar.next, %for.body ]
%arrayidx = getelementptr i32, ptr %P, i64 %indvar
store i32 1, ptr %arrayidx, align 4
%indvar.next = add i64 %indvar, 1
%exitcond = icmp eq i64 %indvar.next, 10000
br i1 %exitcond, label %for.end, label %for.body
for.end: ; preds = %for.body
ret void
}
; Store of null should turn into memset of zero.
define void @test12(ptr nocapture %P) nounwind ssp {
; CHECK-LABEL: @test12(
; CHECK-NEXT: entry:
; CHECK-NEXT: call void @llvm.memset.p0.i64(ptr align 4 [[P:%.*]], i8 0, i64 80000, i1 false)
; CHECK-NEXT: br label [[FOR_BODY:%.*]]
; CHECK: for.body:
; CHECK-NEXT: [[INDVAR:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[INDVAR_NEXT:%.*]], [[FOR_BODY]] ]
; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr ptr, ptr [[P]], i64 [[INDVAR]]
; CHECK-NEXT: [[INDVAR_NEXT]] = add i64 [[INDVAR]], 1
; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[INDVAR_NEXT]], 10000
; CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_END:%.*]], label [[FOR_BODY]]
; CHECK: for.end:
; CHECK-NEXT: ret void
;
entry:
br label %for.body
for.body: ; preds = %entry, %for.body
%indvar = phi i64 [ 0, %entry ], [ %indvar.next, %for.body ]
%arrayidx = getelementptr ptr, ptr %P, i64 %indvar
store ptr null, ptr %arrayidx, align 4
%indvar.next = add i64 %indvar, 1
%exitcond = icmp eq i64 %indvar.next, 10000
br i1 %exitcond, label %for.end, label %for.body
for.end: ; preds = %for.body
ret void
}
@G = global i32 5
; This store-of-address loop can be turned into a memset_pattern call.
; rdar://9009151
define void @test13_pattern(ptr nocapture %P) nounwind ssp {
; CHECK-LABEL: @test13_pattern(
; CHECK-NEXT: entry:
; CHECK-NEXT: call void @llvm.experimental.memset.pattern.p0.p0.i64(ptr align 4 [[P:%.*]], ptr @G, i64 10000, i1 false)
; CHECK-NEXT: br label [[FOR_BODY:%.*]]
; CHECK: for.body:
; CHECK-NEXT: [[INDVAR:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[INDVAR_NEXT:%.*]], [[FOR_BODY]] ]
; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr ptr, ptr [[P]], i64 [[INDVAR]]
; CHECK-NEXT: [[INDVAR_NEXT]] = add i64 [[INDVAR]], 1
; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[INDVAR_NEXT]], 10000
; CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_END:%.*]], label [[FOR_BODY]]
; CHECK: for.end:
; CHECK-NEXT: ret void
;
entry:
br label %for.body
for.body: ; preds = %entry, %for.body
%indvar = phi i64 [ 0, %entry ], [ %indvar.next, %for.body ]
%arrayidx = getelementptr ptr, ptr %P, i64 %indvar
store ptr @G, ptr %arrayidx, align 4
%indvar.next = add i64 %indvar, 1
%exitcond = icmp eq i64 %indvar.next, 10000
br i1 %exitcond, label %for.end, label %for.body
for.end: ; preds = %for.body
ret void
}
; PR9815 - This is a partial overlap case that cannot be safely transformed
; into a memcpy.
@g_50 = global [7 x i32] [i32 0, i32 0, i32 0, i32 0, i32 1, i32 0, i32 0], align 16
define i32 @test14() nounwind {
; CHECK-LABEL: @test14(
; CHECK-NEXT: entry:
; CHECK-NEXT: br label [[FOR_BODY:%.*]]
; CHECK: for.body:
; CHECK-NEXT: [[T5:%.*]] = phi i32 [ [[INC:%.*]], [[FOR_BODY]] ], [ 0, [[ENTRY:%.*]] ]
; CHECK-NEXT: [[ADD:%.*]] = add nsw i32 [[T5]], 4
; CHECK-NEXT: [[IDXPROM:%.*]] = sext i32 [[ADD]] to i64
; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [7 x i32], ptr @g_50, i32 0, i64 [[IDXPROM]]
; CHECK-NEXT: [[T2:%.*]] = load i32, ptr [[ARRAYIDX]], align 4
; CHECK-NEXT: [[ADD4:%.*]] = add nsw i32 [[T5]], 5
; CHECK-NEXT: [[IDXPROM5:%.*]] = sext i32 [[ADD4]] to i64
; CHECK-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds [7 x i32], ptr @g_50, i32 0, i64 [[IDXPROM5]]
; CHECK-NEXT: store i32 [[T2]], ptr [[ARRAYIDX6]], align 4
; CHECK-NEXT: [[INC]] = add nsw i32 [[T5]], 1
; CHECK-NEXT: [[CMP:%.*]] = icmp slt i32 [[INC]], 2
; CHECK-NEXT: br i1 [[CMP]], label [[FOR_BODY]], label [[FOR_END:%.*]]
; CHECK: for.end:
; CHECK-NEXT: [[T8:%.*]] = load i32, ptr getelementptr inbounds ([7 x i32], ptr @g_50, i32 0, i64 6), align 4
; CHECK-NEXT: ret i32 [[T8]]
;
entry:
br label %for.body
for.body: ; preds = %for.inc, %for.body.lr.ph
%t5 = phi i32 [ %inc, %for.body ], [ 0, %entry ]
%add = add nsw i32 %t5, 4
%idxprom = sext i32 %add to i64
%arrayidx = getelementptr inbounds [7 x i32], ptr @g_50, i32 0, i64 %idxprom
%t2 = load i32, ptr %arrayidx, align 4
%add4 = add nsw i32 %t5, 5
%idxprom5 = sext i32 %add4 to i64
%arrayidx6 = getelementptr inbounds [7 x i32], ptr @g_50, i32 0, i64 %idxprom5
store i32 %t2, ptr %arrayidx6, align 4
%inc = add nsw i32 %t5, 1
%cmp = icmp slt i32 %inc, 2
br i1 %cmp, label %for.body, label %for.end
for.end: ; preds = %for.inc
%t8 = load i32, ptr getelementptr inbounds ([7 x i32], ptr @g_50, i32 0, i64 6), align 4
ret i32 %t8
}
define void @PR14241(ptr %s, i64 %size) {
; Ensure that we don't form a memcpy for strided loops. Briefly, when we taught
; LoopIdiom about memmove and strided loops, this got miscompiled into a memcpy
; instead of a memmove. If we get the memmove transform back, this will catch
; regressions.
;
; CHECK-LABEL: @PR14241(
; CHECK-NEXT: entry:
; CHECK-NEXT: [[END_IDX:%.*]] = add i64 [[SIZE:%.*]], -1
; CHECK-NEXT: [[END_PTR:%.*]] = getelementptr inbounds i32, ptr [[S:%.*]], i64 [[END_IDX]]
; CHECK-NEXT: [[SCEVGEP:%.*]] = getelementptr nuw i8, ptr [[S]], i64 4
; CHECK-NEXT: [[TMP0:%.*]] = shl i64 [[SIZE]], 2
; CHECK-NEXT: [[TMP1:%.*]] = add i64 [[TMP0]], -8
; CHECK-NEXT: [[TMP2:%.*]] = lshr i64 [[TMP1]], 2
; CHECK-NEXT: [[TMP3:%.*]] = shl nuw i64 [[TMP2]], 2
; CHECK-NEXT: [[TMP4:%.*]] = add i64 [[TMP3]], 4
; CHECK-NEXT: call void @llvm.memmove.p0.p0.i64(ptr align 4 [[S]], ptr align 4 [[SCEVGEP]], i64 [[TMP4]], i1 false)
; CHECK-NEXT: br label [[WHILE_BODY:%.*]]
; CHECK: while.body:
; CHECK-NEXT: [[PHI_PTR:%.*]] = phi ptr [ [[S]], [[ENTRY:%.*]] ], [ [[NEXT_PTR:%.*]], [[WHILE_BODY]] ]
; CHECK-NEXT: [[SRC_PTR:%.*]] = getelementptr inbounds i32, ptr [[PHI_PTR]], i64 1
; CHECK-NEXT: [[VAL:%.*]] = load i32, ptr [[SRC_PTR]], align 4
; CHECK-NEXT: [[NEXT_PTR]] = getelementptr inbounds i32, ptr [[PHI_PTR]], i64 1
; CHECK-NEXT: [[CMP:%.*]] = icmp eq ptr [[NEXT_PTR]], [[END_PTR]]
; CHECK-NEXT: br i1 [[CMP]], label [[EXIT:%.*]], label [[WHILE_BODY]]
; CHECK: exit:
; CHECK-NEXT: ret void
;
entry:
%end.idx = add i64 %size, -1
%end.ptr = getelementptr inbounds i32, ptr %s, i64 %end.idx
br label %while.body
while.body:
%phi.ptr = phi ptr [ %s, %entry ], [ %next.ptr, %while.body ]
%src.ptr = getelementptr inbounds i32, ptr %phi.ptr, i64 1
%val = load i32, ptr %src.ptr, align 4
store i32 %val, ptr %phi.ptr, align 4
%next.ptr = getelementptr inbounds i32, ptr %phi.ptr, i64 1
%cmp = icmp eq ptr %next.ptr, %end.ptr
br i1 %cmp, label %exit, label %while.body
exit:
ret void
}
; Recognize loops with a negative stride.
define void @test15(ptr nocapture %f) {
; CHECK-LABEL: @test15(
; CHECK-NEXT: entry:
; CHECK-NEXT: call void @llvm.memset.p0.i64(ptr align 4 [[F:%.*]], i8 0, i64 262148, i1 false)
; CHECK-NEXT: br label [[FOR_BODY:%.*]]
; CHECK: for.body:
; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ 65536, [[ENTRY:%.*]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ]
; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[F]], i64 [[INDVARS_IV]]
; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add nsw i64 [[INDVARS_IV]], -1
; CHECK-NEXT: [[CMP:%.*]] = icmp sgt i64 [[INDVARS_IV]], 0
; CHECK-NEXT: br i1 [[CMP]], label [[FOR_BODY]], label [[FOR_COND_CLEANUP:%.*]]
; CHECK: for.cond.cleanup:
; CHECK-NEXT: ret void
;
entry:
br label %for.body
for.body:
%indvars.iv = phi i64 [ 65536, %entry ], [ %indvars.iv.next, %for.body ]
%arrayidx = getelementptr inbounds i32, ptr %f, i64 %indvars.iv
store i32 0, ptr %arrayidx, align 4
%indvars.iv.next = add nsw i64 %indvars.iv, -1
%cmp = icmp sgt i64 %indvars.iv, 0
br i1 %cmp, label %for.body, label %for.cond.cleanup
for.cond.cleanup:
ret void
}
; Loop with a negative stride. Verify an aliasing write to f[65536] prevents
; the creation of a memset.
define void @test16(ptr nocapture %f) {
; CHECK-LABEL: @test16(
; CHECK-NEXT: entry:
; CHECK-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds i32, ptr [[F:%.*]], i64 65536
; CHECK-NEXT: br label [[FOR_BODY:%.*]]
; CHECK: for.body:
; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ 65536, [[ENTRY:%.*]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ]
; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[F]], i64 [[INDVARS_IV]]
; CHECK-NEXT: store i32 0, ptr [[ARRAYIDX]], align 4
; CHECK-NEXT: store i32 1, ptr [[ARRAYIDX1]], align 4
; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add nsw i64 [[INDVARS_IV]], -1
; CHECK-NEXT: [[CMP:%.*]] = icmp sgt i64 [[INDVARS_IV]], 0
; CHECK-NEXT: br i1 [[CMP]], label [[FOR_BODY]], label [[FOR_COND_CLEANUP:%.*]]
; CHECK: for.cond.cleanup:
; CHECK-NEXT: ret void
;
entry:
%arrayidx1 = getelementptr inbounds i32, ptr %f, i64 65536
br label %for.body
for.body: ; preds = %entry, %for.body
%indvars.iv = phi i64 [ 65536, %entry ], [ %indvars.iv.next, %for.body ]
%arrayidx = getelementptr inbounds i32, ptr %f, i64 %indvars.iv
store i32 0, ptr %arrayidx, align 4
store i32 1, ptr %arrayidx1, align 4
%indvars.iv.next = add nsw i64 %indvars.iv, -1
%cmp = icmp sgt i64 %indvars.iv, 0
br i1 %cmp, label %for.body, label %for.cond.cleanup
for.cond.cleanup: ; preds = %for.body
ret void
}
; Handle memcpy-able loops with negative stride.
define noalias ptr @test17(ptr nocapture readonly %a, i32 %c) {
; CHECK-LABEL: @test17(
; CHECK-NEXT: entry:
; CHECK-NEXT: [[CONV:%.*]] = sext i32 [[C:%.*]] to i64
; CHECK-NEXT: [[MUL:%.*]] = shl nsw i64 [[CONV]], 2
; CHECK-NEXT: [[CALL:%.*]] = tail call noalias ptr @malloc(i64 [[MUL]])
; CHECK-NEXT: [[TOBOOL_9:%.*]] = icmp eq i32 [[C]], 0
; CHECK-NEXT: br i1 [[TOBOOL_9]], label [[WHILE_END:%.*]], label [[WHILE_BODY_PREHEADER:%.*]]
; CHECK: while.body.preheader:
; CHECK-NEXT: [[TMP0:%.*]] = add i32 [[C]], -1
; CHECK-NEXT: [[TMP1:%.*]] = sext i32 [[TMP0]] to i64
; CHECK-NEXT: [[TMP2:%.*]] = shl nsw i64 [[TMP1]], 2
; CHECK-NEXT: [[TMP3:%.*]] = zext i32 [[TMP0]] to i64
; CHECK-NEXT: [[TMP4:%.*]] = shl nuw nsw i64 [[TMP3]], 2
; CHECK-NEXT: [[TMP5:%.*]] = sub i64 [[TMP2]], [[TMP4]]
; CHECK-NEXT: [[SCEVGEP:%.*]] = getelementptr i8, ptr [[CALL]], i64 [[TMP5]]
; CHECK-NEXT: [[SCEVGEP1:%.*]] = getelementptr i8, ptr [[A:%.*]], i64 [[TMP5]]
; CHECK-NEXT: [[TMP6:%.*]] = zext i32 [[C]] to i64
; CHECK-NEXT: [[TMP7:%.*]] = shl nuw nsw i64 [[TMP6]], 2
; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[SCEVGEP]], ptr align 4 [[SCEVGEP1]], i64 [[TMP7]], i1 false)
; CHECK-NEXT: br label [[WHILE_BODY:%.*]]
; CHECK: while.body:
; CHECK-NEXT: [[DEC10_IN:%.*]] = phi i32 [ [[DEC10:%.*]], [[WHILE_BODY]] ], [ [[C]], [[WHILE_BODY_PREHEADER]] ]
; CHECK-NEXT: [[DEC10]] = add nsw i32 [[DEC10_IN]], -1
; CHECK-NEXT: [[IDXPROM:%.*]] = sext i32 [[DEC10]] to i64
; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[IDXPROM]]
; CHECK-NEXT: [[TMP8:%.*]] = load i32, ptr [[ARRAYIDX]], align 4
; CHECK-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds i32, ptr [[CALL]], i64 [[IDXPROM]]
; CHECK-NEXT: [[TOBOOL:%.*]] = icmp eq i32 [[DEC10]], 0
; CHECK-NEXT: br i1 [[TOBOOL]], label [[WHILE_END_LOOPEXIT:%.*]], label [[WHILE_BODY]]
; CHECK: while.end.loopexit:
; CHECK-NEXT: br label [[WHILE_END]]
; CHECK: while.end:
; CHECK-NEXT: ret ptr [[CALL]]
;
entry:
%conv = sext i32 %c to i64
%mul = shl nsw i64 %conv, 2
%call = tail call noalias ptr @malloc(i64 %mul)
%tobool.9 = icmp eq i32 %c, 0
br i1 %tobool.9, label %while.end, label %while.body.preheader
while.body.preheader: ; preds = %entry
br label %while.body
while.body: ; preds = %while.body.preheader, %while.body
%dec10.in = phi i32 [ %dec10, %while.body ], [ %c, %while.body.preheader ]
%dec10 = add nsw i32 %dec10.in, -1
%idxprom = sext i32 %dec10 to i64
%arrayidx = getelementptr inbounds i32, ptr %a, i64 %idxprom
%0 = load i32, ptr %arrayidx, align 4
%arrayidx2 = getelementptr inbounds i32, ptr %call, i64 %idxprom
store i32 %0, ptr %arrayidx2, align 4
%tobool = icmp eq i32 %dec10, 0
br i1 %tobool, label %while.end.loopexit, label %while.body
while.end.loopexit: ; preds = %while.body
br label %while.end
while.end: ; preds = %while.end.loopexit, %entry
ret ptr %call
}
declare noalias ptr @malloc(i64)
; Handle memcpy-able loops with negative stride.
; void test18(unsigned *__restrict__ a, unsigned *__restrict__ b) {
; for (int i = 2047; i >= 0; --i) {
; a[i] = b[i];
; }
; }
define void @test18(ptr noalias nocapture %a, ptr noalias nocapture readonly %b) #0 {
; CHECK-LABEL: @test18(
; CHECK-NEXT: entry:
; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[A:%.*]], ptr align 4 [[B:%.*]], i64 8192, i1 false)
; CHECK-NEXT: br label [[FOR_BODY:%.*]]
; CHECK: for.body:
; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ 2047, [[ENTRY:%.*]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ]
; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[INDVARS_IV]]
; CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[ARRAYIDX]], align 4
; CHECK-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[INDVARS_IV]]
; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add nsw i64 [[INDVARS_IV]], -1
; CHECK-NEXT: [[CMP:%.*]] = icmp sgt i64 [[INDVARS_IV]], 0
; CHECK-NEXT: br i1 [[CMP]], label [[FOR_BODY]], label [[FOR_COND_CLEANUP:%.*]]
; CHECK: for.cond.cleanup:
; CHECK-NEXT: ret void
;
entry:
br label %for.body
for.body: ; preds = %entry, %for.body
%indvars.iv = phi i64 [ 2047, %entry ], [ %indvars.iv.next, %for.body ]
%arrayidx = getelementptr inbounds i32, ptr %b, i64 %indvars.iv
%0 = load i32, ptr %arrayidx, align 4
%arrayidx2 = getelementptr inbounds i32, ptr %a, i64 %indvars.iv
store i32 %0, ptr %arrayidx2, align 4
%indvars.iv.next = add nsw i64 %indvars.iv, -1
%cmp = icmp sgt i64 %indvars.iv, 0
br i1 %cmp, label %for.body, label %for.cond.cleanup
for.cond.cleanup: ; preds = %for.body
ret void
}
; Two dimensional nested loop with negative stride should be promoted to one big memset.
define void @test19(ptr nocapture %X) {
; CHECK-LABEL: @test19(
; CHECK-NEXT: entry:
; CHECK-NEXT: call void @llvm.memset.p0.i64(ptr align 1 [[X:%.*]], i8 0, i64 10000, i1 false)
; CHECK-NEXT: br label [[FOR_COND1_PREHEADER:%.*]]
; CHECK: for.cond1.preheader:
; CHECK-NEXT: [[INDVAR:%.*]] = phi i64 [ [[INDVAR_NEXT:%.*]], [[FOR_INC4:%.*]] ], [ 0, [[ENTRY:%.*]] ]
; CHECK-NEXT: [[I_06:%.*]] = phi i32 [ 99, [[ENTRY]] ], [ [[DEC5:%.*]], [[FOR_INC4]] ]
; CHECK-NEXT: [[TMP0:%.*]] = mul nsw i64 [[INDVAR]], -100
; CHECK-NEXT: [[TMP1:%.*]] = add i64 [[TMP0]], 9900
; CHECK-NEXT: [[SCEVGEP:%.*]] = getelementptr i8, ptr [[X]], i64 [[TMP1]]
; CHECK-NEXT: [[MUL:%.*]] = mul nsw i32 [[I_06]], 100
; CHECK-NEXT: br label [[FOR_BODY3:%.*]]
; CHECK: for.body3:
; CHECK-NEXT: [[J_05:%.*]] = phi i32 [ 99, [[FOR_COND1_PREHEADER]] ], [ [[DEC:%.*]], [[FOR_BODY3]] ]
; CHECK-NEXT: [[ADD:%.*]] = add nsw i32 [[J_05]], [[MUL]]
; CHECK-NEXT: [[IDXPROM:%.*]] = sext i32 [[ADD]] to i64
; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i8, ptr [[X]], i64 [[IDXPROM]]
; CHECK-NEXT: [[DEC]] = add nsw i32 [[J_05]], -1
; CHECK-NEXT: [[CMP2:%.*]] = icmp sgt i32 [[J_05]], 0
; CHECK-NEXT: br i1 [[CMP2]], label [[FOR_BODY3]], label [[FOR_INC4]]
; CHECK: for.inc4:
; CHECK-NEXT: [[DEC5]] = add nsw i32 [[I_06]], -1
; CHECK-NEXT: [[CMP:%.*]] = icmp sgt i32 [[I_06]], 0
; CHECK-NEXT: [[INDVAR_NEXT]] = add i64 [[INDVAR]], 1
; CHECK-NEXT: br i1 [[CMP]], label [[FOR_COND1_PREHEADER]], label [[FOR_END6:%.*]]
; CHECK: for.end6:
; CHECK-NEXT: ret void
;
entry:
br label %for.cond1.preheader
for.cond1.preheader: ; preds = %entry, %for.inc4
%i.06 = phi i32 [ 99, %entry ], [ %dec5, %for.inc4 ]
%mul = mul nsw i32 %i.06, 100
br label %for.body3
for.body3: ; preds = %for.cond1.preheader, %for.body3
%j.05 = phi i32 [ 99, %for.cond1.preheader ], [ %dec, %for.body3 ]
%add = add nsw i32 %j.05, %mul
%idxprom = sext i32 %add to i64
%arrayidx = getelementptr inbounds i8, ptr %X, i64 %idxprom
store i8 0, ptr %arrayidx, align 1
%dec = add nsw i32 %j.05, -1
%cmp2 = icmp sgt i32 %j.05, 0
br i1 %cmp2, label %for.body3, label %for.inc4
for.inc4: ; preds = %for.body3
%dec5 = add nsw i32 %i.06, -1
%cmp = icmp sgt i32 %i.06, 0
br i1 %cmp, label %for.cond1.preheader, label %for.end6
for.end6: ; preds = %for.inc4
ret void
}
; Handle loops where the trip count is a narrow integer that needs to be
; extended.
define void @form_memset_narrow_size(ptr %ptr, i32 %size) {
; CHECK-LABEL: @form_memset_narrow_size(
; CHECK-NEXT: entry:
; CHECK-NEXT: [[CMP1:%.*]] = icmp sgt i32 [[SIZE:%.*]], 0
; CHECK-NEXT: br i1 [[CMP1]], label [[LOOP_PH:%.*]], label [[EXIT:%.*]]
; CHECK: loop.ph:
; CHECK-NEXT: [[TMP0:%.*]] = zext i32 [[SIZE]] to i64
; CHECK-NEXT: [[TMP1:%.*]] = shl nuw nsw i64 [[TMP0]], 3
; CHECK-NEXT: call void @llvm.memset.p0.i64(ptr align 8 [[PTR:%.*]], i8 0, i64 [[TMP1]], i1 false)
; CHECK-NEXT: br label [[LOOP_BODY:%.*]]
; CHECK: loop.body:
; CHECK-NEXT: [[STOREMERGE4:%.*]] = phi i32 [ 0, [[LOOP_PH]] ], [ [[INC:%.*]], [[LOOP_BODY]] ]
; CHECK-NEXT: [[IDXPROM:%.*]] = sext i32 [[STOREMERGE4]] to i64
; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i64, ptr [[PTR]], i64 [[IDXPROM]]
; CHECK-NEXT: [[INC]] = add nsw i32 [[STOREMERGE4]], 1
; CHECK-NEXT: [[CMP2:%.*]] = icmp slt i32 [[INC]], [[SIZE]]
; CHECK-NEXT: br i1 [[CMP2]], label [[LOOP_BODY]], label [[LOOP_EXIT:%.*]]
; CHECK: loop.exit:
; CHECK-NEXT: br label [[EXIT]]
; CHECK: exit:
; CHECK-NEXT: ret void
;
entry:
%cmp1 = icmp sgt i32 %size, 0
br i1 %cmp1, label %loop.ph, label %exit
loop.ph:
br label %loop.body
loop.body:
%storemerge4 = phi i32 [ 0, %loop.ph ], [ %inc, %loop.body ]
%idxprom = sext i32 %storemerge4 to i64
%arrayidx = getelementptr inbounds i64, ptr %ptr, i64 %idxprom
store i64 0, ptr %arrayidx, align 8
%inc = add nsw i32 %storemerge4, 1
%cmp2 = icmp slt i32 %inc, %size
br i1 %cmp2, label %loop.body, label %loop.exit
loop.exit:
br label %exit
exit:
ret void
}
define void @form_memcpy_narrow_size(ptr noalias %dst, ptr noalias %src, i32 %size) {
; CHECK-LABEL: @form_memcpy_narrow_size(
; CHECK-NEXT: entry:
; CHECK-NEXT: [[CMP1:%.*]] = icmp sgt i32 [[SIZE:%.*]], 0
; CHECK-NEXT: br i1 [[CMP1]], label [[LOOP_PH:%.*]], label [[EXIT:%.*]]
; CHECK: loop.ph:
; CHECK-NEXT: [[TMP0:%.*]] = zext i32 [[SIZE]] to i64
; CHECK-NEXT: [[TMP1:%.*]] = shl nuw nsw i64 [[TMP0]], 3
; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[DST:%.*]], ptr align 8 [[SRC:%.*]], i64 [[TMP1]], i1 false)
; CHECK-NEXT: br label [[LOOP_BODY:%.*]]
; CHECK: loop.body:
; CHECK-NEXT: [[STOREMERGE4:%.*]] = phi i32 [ 0, [[LOOP_PH]] ], [ [[INC:%.*]], [[LOOP_BODY]] ]
; CHECK-NEXT: [[IDXPROM1:%.*]] = sext i32 [[STOREMERGE4]] to i64
; CHECK-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds i64, ptr [[SRC]], i64 [[IDXPROM1]]
; CHECK-NEXT: [[V:%.*]] = load i64, ptr [[ARRAYIDX1]], align 8
; CHECK-NEXT: [[IDXPROM2:%.*]] = sext i32 [[STOREMERGE4]] to i64
; CHECK-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds i64, ptr [[DST]], i64 [[IDXPROM2]]
; CHECK-NEXT: [[INC]] = add nsw i32 [[STOREMERGE4]], 1
; CHECK-NEXT: [[CMP2:%.*]] = icmp slt i32 [[INC]], [[SIZE]]
; CHECK-NEXT: br i1 [[CMP2]], label [[LOOP_BODY]], label [[LOOP_EXIT:%.*]]
; CHECK: loop.exit:
; CHECK-NEXT: br label [[EXIT]]
; CHECK: exit:
; CHECK-NEXT: ret void
;
entry:
%cmp1 = icmp sgt i32 %size, 0
br i1 %cmp1, label %loop.ph, label %exit
loop.ph:
br label %loop.body
loop.body:
%storemerge4 = phi i32 [ 0, %loop.ph ], [ %inc, %loop.body ]
%idxprom1 = sext i32 %storemerge4 to i64
%arrayidx1 = getelementptr inbounds i64, ptr %src, i64 %idxprom1
%v = load i64, ptr %arrayidx1, align 8
%idxprom2 = sext i32 %storemerge4 to i64
%arrayidx2 = getelementptr inbounds i64, ptr %dst, i64 %idxprom2
store i64 %v, ptr %arrayidx2, align 8
%inc = add nsw i32 %storemerge4, 1
%cmp2 = icmp slt i32 %inc, %size
br i1 %cmp2, label %loop.body, label %loop.exit
loop.exit:
br label %exit
exit:
ret void
}
;; Memmove formation.
define void @PR46179_positive_stride(ptr %Src, i64 %Size) {
; CHECK-LABEL: @PR46179_positive_stride(
; CHECK-NEXT: bb.nph:
; CHECK-NEXT: [[SCEVGEP:%.*]] = getelementptr i8, ptr [[SRC:%.*]], i64 1
; CHECK-NEXT: call void @llvm.memmove.p0.p0.i64(ptr align 1 [[SRC]], ptr align 1 [[SCEVGEP]], i64 [[SIZE:%.*]], i1 false)
; CHECK-NEXT: br label [[FOR_BODY:%.*]]
; CHECK: for.body:
; CHECK-NEXT: [[INDVAR:%.*]] = phi i64 [ 0, [[BB_NPH:%.*]] ], [ [[INDVAR_NEXT:%.*]], [[FOR_BODY]] ]
; CHECK-NEXT: [[STEP:%.*]] = add nuw nsw i64 [[INDVAR]], 1
; CHECK-NEXT: [[SRCI:%.*]] = getelementptr i8, ptr [[SRC]], i64 [[STEP]]
; CHECK-NEXT: [[DESTI:%.*]] = getelementptr i8, ptr [[SRC]], i64 [[INDVAR]]
; CHECK-NEXT: [[V:%.*]] = load i8, ptr [[SRCI]], align 1
; CHECK-NEXT: [[INDVAR_NEXT]] = add i64 [[INDVAR]], 1
; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[INDVAR_NEXT]], [[SIZE]]
; CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_END:%.*]], label [[FOR_BODY]]
; CHECK: for.end:
; CHECK-NEXT: ret void
;
bb.nph:
br label %for.body
for.body: ; preds = %bb.nph, %for.body
%indvar = phi i64 [ 0, %bb.nph ], [ %indvar.next, %for.body ]
%Step = add nuw nsw i64 %indvar, 1
%SrcI = getelementptr i8, ptr %Src, i64 %Step
%DestI = getelementptr i8, ptr %Src, i64 %indvar
%V = load i8, ptr %SrcI, align 1
store i8 %V, ptr %DestI, align 1
%indvar.next = add i64 %indvar, 1
%exitcond = icmp eq i64 %indvar.next, %Size
br i1 %exitcond, label %for.end, label %for.body
for.end: ; preds = %for.body, %entry
ret void
}
declare void @llvm.memcpy.p0.p0.i64(ptr noalias nocapture writeonly, ptr noalias nocapture readonly, i64, i1 immarg)
;; Memmove formation. We expect exactly same memmove result like in PR46179_positive_stride output.
define void @loop_with_memcpy_PR46179_positive_stride(ptr %Src, i64 %Size) {
; CHECK-LABEL: @loop_with_memcpy_PR46179_positive_stride(
; CHECK-NEXT: bb.nph:
; CHECK-NEXT: [[SCEVGEP:%.*]] = getelementptr i8, ptr [[SRC:%.*]], i64 1
; CHECK-NEXT: call void @llvm.memmove.p0.p0.i64(ptr align 1 [[SRC]], ptr align 1 [[SCEVGEP]], i64 [[SIZE:%.*]], i1 false)
; CHECK-NEXT: br label [[FOR_BODY:%.*]]
; CHECK: for.body:
; CHECK-NEXT: [[INDVAR:%.*]] = phi i64 [ 0, [[BB_NPH:%.*]] ], [ [[INDVAR_NEXT:%.*]], [[FOR_BODY]] ]
; CHECK-NEXT: [[STEP:%.*]] = add nuw nsw i64 [[INDVAR]], 1
; CHECK-NEXT: [[SRCI:%.*]] = getelementptr i8, ptr [[SRC]], i64 [[STEP]]
; CHECK-NEXT: [[DESTI:%.*]] = getelementptr i8, ptr [[SRC]], i64 [[INDVAR]]
; CHECK-NEXT: [[INDVAR_NEXT]] = add i64 [[INDVAR]], 1
; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[INDVAR_NEXT]], [[SIZE]]
; CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_END:%.*]], label [[FOR_BODY]]
; CHECK: for.end:
; CHECK-NEXT: ret void
;
bb.nph:
br label %for.body
for.body: ; preds = %bb.nph, %for.body
%indvar = phi i64 [ 0, %bb.nph ], [ %indvar.next, %for.body ]
%Step = add nuw nsw i64 %indvar, 1
%SrcI = getelementptr i8, ptr %Src, i64 %Step
%DestI = getelementptr i8, ptr %Src, i64 %indvar
call void @llvm.memcpy.p0.p0.i64(ptr align 1 %DestI, ptr align 1 %SrcI, i64 1, i1 false)
%indvar.next = add i64 %indvar, 1
%exitcond = icmp eq i64 %indvar.next, %Size
br i1 %exitcond, label %for.end, label %for.body
for.end: ; preds = %for.body, %entry
ret void
}
;; Memmove formation.
define void @PR46179_negative_stride(ptr %Src, i64 %Size) {
; CHECK-LABEL: @PR46179_negative_stride(
; CHECK-NEXT: bb.nph:
; CHECK-NEXT: [[CMP1:%.*]] = icmp sgt i64 [[SIZE:%.*]], 0
; CHECK-NEXT: br i1 [[CMP1]], label [[FOR_BODY_PREHEADER:%.*]], label [[FOR_END:%.*]]
; CHECK: for.body.preheader:
; CHECK-NEXT: [[SCEVGEP:%.*]] = getelementptr i8, ptr [[SRC:%.*]], i64 1
; CHECK-NEXT: call void @llvm.memmove.p0.p0.i64(ptr align 1 [[SCEVGEP]], ptr align 1 [[SRC]], i64 [[SIZE]], i1 false)
; CHECK-NEXT: br label [[FOR_BODY:%.*]]
; CHECK: for.body:
; CHECK-NEXT: [[INDVAR:%.*]] = phi i64 [ [[STEP:%.*]], [[FOR_BODY]] ], [ [[SIZE]], [[FOR_BODY_PREHEADER]] ]
; CHECK-NEXT: [[STEP]] = add nsw i64 [[INDVAR]], -1
; CHECK-NEXT: [[SRCI:%.*]] = getelementptr inbounds i8, ptr [[SRC]], i64 [[STEP]]
; CHECK-NEXT: [[V:%.*]] = load i8, ptr [[SRCI]], align 1
; CHECK-NEXT: [[DESTI:%.*]] = getelementptr inbounds i8, ptr [[SRC]], i64 [[INDVAR]]
; CHECK-NEXT: [[EXITCOND:%.*]] = icmp sgt i64 [[INDVAR]], 1
; CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_BODY]], label [[FOR_END_LOOPEXIT:%.*]]
; CHECK: for.end.loopexit:
; CHECK-NEXT: br label [[FOR_END]]
; CHECK: for.end:
; CHECK-NEXT: ret void
;
bb.nph:
%cmp1 = icmp sgt i64 %Size, 0
br i1 %cmp1, label %for.body, label %for.end
for.body: ; preds = %bb.nph, %.for.body
%indvar = phi i64 [ %Step, %for.body ], [ %Size, %bb.nph ]
%Step = add nsw i64 %indvar, -1
%SrcI = getelementptr inbounds i8, ptr %Src, i64 %Step
%V = load i8, ptr %SrcI, align 1
%DestI = getelementptr inbounds i8, ptr %Src, i64 %indvar
store i8 %V, ptr %DestI, align 1
%exitcond = icmp sgt i64 %indvar, 1
br i1 %exitcond, label %for.body, label %for.end
for.end: ; preds = %.for.body, %bb.nph
ret void
}
;; Memmove formation. We expect exactly same memmove result like in PR46179_negative_stride output.
define void @loop_with_memcpy_PR46179_negative_stride(ptr %Src, i64 %Size) {
; CHECK-LABEL: @loop_with_memcpy_PR46179_negative_stride(
; CHECK-NEXT: bb.nph:
; CHECK-NEXT: [[CMP1:%.*]] = icmp sgt i64 [[SIZE:%.*]], 0
; CHECK-NEXT: br i1 [[CMP1]], label [[FOR_BODY_PREHEADER:%.*]], label [[FOR_END:%.*]]
; CHECK: for.body.preheader:
; CHECK-NEXT: [[SCEVGEP:%.*]] = getelementptr i8, ptr [[SRC:%.*]], i64 1
; CHECK-NEXT: call void @llvm.memmove.p0.p0.i64(ptr align 1 [[SCEVGEP]], ptr align 1 [[SRC]], i64 [[SIZE]], i1 false)
; CHECK-NEXT: br label [[FOR_BODY:%.*]]
; CHECK: for.body:
; CHECK-NEXT: [[INDVAR:%.*]] = phi i64 [ [[STEP:%.*]], [[FOR_BODY]] ], [ [[SIZE]], [[FOR_BODY_PREHEADER]] ]
; CHECK-NEXT: [[STEP]] = add nsw i64 [[INDVAR]], -1
; CHECK-NEXT: [[SRCI:%.*]] = getelementptr inbounds i8, ptr [[SRC]], i64 [[STEP]]
; CHECK-NEXT: [[DESTI:%.*]] = getelementptr inbounds i8, ptr [[SRC]], i64 [[INDVAR]]
; CHECK-NEXT: [[EXITCOND:%.*]] = icmp sgt i64 [[INDVAR]], 1
; CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_BODY]], label [[FOR_END_LOOPEXIT:%.*]]
; CHECK: for.end.loopexit:
; CHECK-NEXT: br label [[FOR_END]]
; CHECK: for.end:
; CHECK-NEXT: ret void
;
bb.nph:
%cmp1 = icmp sgt i64 %Size, 0
br i1 %cmp1, label %for.body, label %for.end
for.body: ; preds = %bb.nph, %.for.body
%indvar = phi i64 [ %Step, %for.body ], [ %Size, %bb.nph ]
%Step = add nsw i64 %indvar, -1
%SrcI = getelementptr inbounds i8, ptr %Src, i64 %Step
%DestI = getelementptr inbounds i8, ptr %Src, i64 %indvar
call void @llvm.memcpy.p0.p0.i64(ptr align 1 %DestI, ptr align 1 %SrcI, i64 1, i1 false)
%exitcond = icmp sgt i64 %indvar, 1
br i1 %exitcond, label %for.body, label %for.end
for.end: ; preds = %.for.body, %bb.nph
ret void
}
;; Memmove formation.
define void @loop_with_memcpy_stride16(ptr %Src, i64 %Size) {
; CHECK-LABEL: @loop_with_memcpy_stride16(
; CHECK-NEXT: bb.nph:
; CHECK-NEXT: [[SCEVGEP:%.*]] = getelementptr i8, ptr [[SRC:%.*]], i64 16
; CHECK-NEXT: [[SMAX:%.*]] = call i64 @llvm.smax.i64(i64 [[SIZE:%.*]], i64 16)
; CHECK-NEXT: [[TMP0:%.*]] = add nsw i64 [[SMAX]], -1
; CHECK-NEXT: [[TMP1:%.*]] = lshr i64 [[TMP0]], 4
; CHECK-NEXT: [[TMP2:%.*]] = shl nuw nsw i64 [[TMP1]], 4
; CHECK-NEXT: [[TMP3:%.*]] = add nuw i64 [[TMP2]], 16
; CHECK-NEXT: call void @llvm.memmove.p0.p0.i64(ptr align 1 [[SRC]], ptr align 1 [[SCEVGEP]], i64 [[TMP3]], i1 false)
; CHECK-NEXT: br label [[FOR_BODY:%.*]]
; CHECK: for.body:
; CHECK-NEXT: [[INDVAR:%.*]] = phi i64 [ [[STEP:%.*]], [[FOR_BODY]] ], [ 0, [[BB_NPH:%.*]] ]
; CHECK-NEXT: [[STEP]] = add nuw nsw i64 [[INDVAR]], 16
; CHECK-NEXT: [[SRCI:%.*]] = getelementptr inbounds i8, ptr [[SRC]], i64 [[STEP]]
; CHECK-NEXT: [[DESTI:%.*]] = getelementptr inbounds i8, ptr [[SRC]], i64 [[INDVAR]]
; CHECK-NEXT: [[EXITCOND:%.*]] = icmp slt i64 [[STEP]], [[SIZE]]
; CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_BODY]], label [[FOR_END:%.*]]
; CHECK: for.end:
; CHECK-NEXT: ret void
;
bb.nph:
br label %for.body
for.body: ; preds = %for.body, %bb.nph
%indvar = phi i64 [ %Step, %for.body ], [ 0, %bb.nph ]
%Step = add nuw nsw i64 %indvar, 16
%SrcI = getelementptr inbounds i8, ptr %Src, i64 %Step
%DestI = getelementptr inbounds i8, ptr %Src, i64 %indvar
call void @llvm.memcpy.p0.p0.i64(ptr align 1 %DestI, ptr align 1 %SrcI, i64 16, i1 false)
%exitcond = icmp slt i64 %Step, %Size
br i1 %exitcond, label %for.body, label %for.end
for.end: ; preds = %for.body
ret void
}
;; Do not form memmove from previous load when stride is positive.
define void @do_not_form_memmove1(ptr %Src, i64 %Size) {
; CHECK-LABEL: @do_not_form_memmove1(
; CHECK-NEXT: bb.nph:
; CHECK-NEXT: br label [[FOR_BODY:%.*]]
; CHECK: for.body:
; CHECK-NEXT: [[INDVAR:%.*]] = phi i64 [ 1, [[BB_NPH:%.*]] ], [ [[INDVAR_NEXT:%.*]], [[FOR_BODY]] ]
; CHECK-NEXT: [[STEP:%.*]] = add nuw nsw i64 [[INDVAR]], -1
; CHECK-NEXT: [[SRCI:%.*]] = getelementptr i8, ptr [[SRC:%.*]], i64 [[STEP]]
; CHECK-NEXT: [[DESTI:%.*]] = getelementptr i8, ptr [[SRC]], i64 [[INDVAR]]
; CHECK-NEXT: [[V:%.*]] = load i8, ptr [[SRCI]], align 1
; CHECK-NEXT: store i8 [[V]], ptr [[DESTI]], align 1
; CHECK-NEXT: [[INDVAR_NEXT]] = add i64 [[INDVAR]], 1
; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[INDVAR_NEXT]], [[SIZE:%.*]]
; CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_END:%.*]], label [[FOR_BODY]]
; CHECK: for.end:
; CHECK-NEXT: ret void
;
bb.nph:
br label %for.body
for.body: ; preds = %bb.nph, %for.body
%indvar = phi i64 [ 1, %bb.nph ], [ %indvar.next, %for.body ]
%Step = add nuw nsw i64 %indvar, -1
%SrcI = getelementptr i8, ptr %Src, i64 %Step
%DestI = getelementptr i8, ptr %Src, i64 %indvar
%V = load i8, ptr %SrcI, align 1
store i8 %V, ptr %DestI, align 1
%indvar.next = add i64 %indvar, 1
%exitcond = icmp eq i64 %indvar.next, %Size
br i1 %exitcond, label %for.end, label %for.body
for.end: ; preds = %for.body, %entry
ret void
}
;; Do not form memmove from previous load in memcpy when stride is positive.
define void @do_not_form_memmove2(ptr %Src, i64 %Size) {
; CHECK-LABEL: @do_not_form_memmove2(
; CHECK-NEXT: bb.nph:
; CHECK-NEXT: br label [[FOR_BODY:%.*]]
; CHECK: for.body:
; CHECK-NEXT: [[INDVAR:%.*]] = phi i64 [ 1, [[BB_NPH:%.*]] ], [ [[INDVAR_NEXT:%.*]], [[FOR_BODY]] ]
; CHECK-NEXT: [[STEP:%.*]] = add nuw nsw i64 [[INDVAR]], -1
; CHECK-NEXT: [[SRCI:%.*]] = getelementptr i8, ptr [[SRC:%.*]], i64 [[STEP]]
; CHECK-NEXT: [[DESTI:%.*]] = getelementptr i8, ptr [[SRC]], i64 [[INDVAR]]
; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 1 [[DESTI]], ptr align 1 [[SRCI]], i64 1, i1 false)
; CHECK-NEXT: [[INDVAR_NEXT]] = add i64 [[INDVAR]], 1
; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[INDVAR_NEXT]], [[SIZE:%.*]]
; CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_END:%.*]], label [[FOR_BODY]]
; CHECK: for.end:
; CHECK-NEXT: ret void
;
bb.nph:
br label %for.body
for.body: ; preds = %bb.nph, %for.body
%indvar = phi i64 [ 1, %bb.nph ], [ %indvar.next, %for.body ]
%Step = add nuw nsw i64 %indvar, -1
%SrcI = getelementptr i8, ptr %Src, i64 %Step
%DestI = getelementptr i8, ptr %Src, i64 %indvar
call void @llvm.memcpy.p0.p0.i64(ptr align 1 %DestI, ptr align 1 %SrcI, i64 1, i1 false)
%indvar.next = add i64 %indvar, 1
%exitcond = icmp eq i64 %indvar.next, %Size
br i1 %exitcond, label %for.end, label %for.body
for.end: ; preds = %for.body, %entry
ret void
}
;; Do not form memmove from next load when stride is negative.
define void @do_not_form_memmove3(ptr %Src, i64 %Size) {
; CHECK-LABEL: @do_not_form_memmove3(
; CHECK-NEXT: bb.nph:
; CHECK-NEXT: [[CMP1:%.*]] = icmp sgt i64 [[SIZE:%.*]], 0
; CHECK-NEXT: br i1 [[CMP1]], label [[FOR_BODY_PREHEADER:%.*]], label [[FOR_END:%.*]]
; CHECK: for.body.preheader:
; CHECK-NEXT: br label [[FOR_BODY:%.*]]
; CHECK: for.body:
; CHECK-NEXT: [[INDVAR:%.*]] = phi i64 [ [[INDVAR_NEXT:%.*]], [[FOR_BODY]] ], [ [[SIZE]], [[FOR_BODY_PREHEADER]] ]
; CHECK-NEXT: [[STEP:%.*]] = add nuw nsw i64 [[INDVAR]], 1
; CHECK-NEXT: [[SRCI:%.*]] = getelementptr inbounds i8, ptr [[SRC:%.*]], i64 [[STEP]]
; CHECK-NEXT: [[V:%.*]] = load i8, ptr [[SRCI]], align 1
; CHECK-NEXT: [[DESTI:%.*]] = getelementptr inbounds i8, ptr [[SRC]], i64 [[INDVAR]]
; CHECK-NEXT: store i8 [[V]], ptr [[DESTI]], align 1
; CHECK-NEXT: [[INDVAR_NEXT]] = add nsw i64 [[INDVAR]], -1
; CHECK-NEXT: [[EXITCOND:%.*]] = icmp sgt i64 [[INDVAR]], 1
; CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_BODY]], label [[FOR_END_LOOPEXIT:%.*]]
; CHECK: for.end.loopexit:
; CHECK-NEXT: br label [[FOR_END]]
; CHECK: for.end:
; CHECK-NEXT: ret void
;
bb.nph:
%cmp1 = icmp sgt i64 %Size, 0
br i1 %cmp1, label %for.body, label %for.end
for.body: ; preds = %bb.nph, %.for.body
%indvar = phi i64 [ %indvar.next, %for.body ], [ %Size, %bb.nph ]
%Step = add nuw nsw i64 %indvar, 1
%SrcI = getelementptr inbounds i8, ptr %Src, i64 %Step
%V = load i8, ptr %SrcI, align 1
%DestI = getelementptr inbounds i8, ptr %Src, i64 %indvar
store i8 %V, ptr %DestI, align 1
%indvar.next = add nsw i64 %indvar, -1
%exitcond = icmp sgt i64 %indvar, 1
br i1 %exitcond, label %for.body, label %for.end
for.end: ; preds = %.for.body, %bb.nph
ret void
}
;; Do not form memmove from next load in memcpy when stride is negative.
define void @do_not_form_memmove4(ptr %Src, i64 %Size) {
; CHECK-LABEL: @do_not_form_memmove4(
; CHECK-NEXT: bb.nph:
; CHECK-NEXT: [[CMP1:%.*]] = icmp sgt i64 [[SIZE:%.*]], 0
; CHECK-NEXT: br i1 [[CMP1]], label [[FOR_BODY_PREHEADER:%.*]], label [[FOR_END:%.*]]
; CHECK: for.body.preheader:
; CHECK-NEXT: br label [[FOR_BODY:%.*]]
; CHECK: for.body:
; CHECK-NEXT: [[INDVAR:%.*]] = phi i64 [ [[INDVAR_NEXT:%.*]], [[FOR_BODY]] ], [ [[SIZE]], [[FOR_BODY_PREHEADER]] ]
; CHECK-NEXT: [[STEP:%.*]] = add nuw nsw i64 [[INDVAR]], 1
; CHECK-NEXT: [[SRCI:%.*]] = getelementptr inbounds i8, ptr [[SRC:%.*]], i64 [[STEP]]
; CHECK-NEXT: [[DESTI:%.*]] = getelementptr inbounds i8, ptr [[SRC]], i64 [[INDVAR]]
; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 1 [[DESTI]], ptr align 1 [[SRCI]], i64 1, i1 false)
; CHECK-NEXT: [[INDVAR_NEXT]] = add nsw i64 [[INDVAR]], -1
; CHECK-NEXT: [[EXITCOND:%.*]] = icmp sgt i64 [[INDVAR]], 1
; CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_BODY]], label [[FOR_END_LOOPEXIT:%.*]]
; CHECK: for.end.loopexit:
; CHECK-NEXT: br label [[FOR_END]]
; CHECK: for.end:
; CHECK-NEXT: ret void
;
bb.nph:
%cmp1 = icmp sgt i64 %Size, 0
br i1 %cmp1, label %for.body, label %for.end
for.body: ; preds = %bb.nph, %.for.body
%indvar = phi i64 [ %indvar.next, %for.body ], [ %Size, %bb.nph ]
%Step = add nuw nsw i64 %indvar, 1
%SrcI = getelementptr inbounds i8, ptr %Src, i64 %Step
%DestI = getelementptr inbounds i8, ptr %Src, i64 %indvar
call void @llvm.memcpy.p0.p0.i64(ptr align 1 %DestI, ptr align 1 %SrcI, i64 1, i1 false)
%indvar.next = add nsw i64 %indvar, -1
%exitcond = icmp sgt i64 %indvar, 1
br i1 %exitcond, label %for.body, label %for.end
for.end: ; preds = %.for.body, %bb.nph
ret void
}
;; Do not form memmove when underaligned load is overlapped with store.
define void @do_not_form_memmove5(ptr %s, i64 %size) {
; CHECK-LABEL: @do_not_form_memmove5(
; CHECK-NEXT: entry:
; CHECK-NEXT: [[END_IDX:%.*]] = add i64 [[SIZE:%.*]], -1
; CHECK-NEXT: [[END_PTR:%.*]] = getelementptr inbounds i32, ptr [[S:%.*]], i64 [[END_IDX]]
; CHECK-NEXT: br label [[WHILE_BODY:%.*]]
; CHECK: while.body:
; CHECK-NEXT: [[PHI_PTR:%.*]] = phi ptr [ [[S]], [[ENTRY:%.*]] ], [ [[NEXT_PTR:%.*]], [[WHILE_BODY]] ]
; CHECK-NEXT: [[SRC_PTR:%.*]] = getelementptr i16, ptr [[PHI_PTR]], i64 1
; CHECK-NEXT: [[VAL:%.*]] = load i32, ptr [[SRC_PTR]], align 2
; CHECK-NEXT: store i32 [[VAL]], ptr [[PHI_PTR]], align 4
; CHECK-NEXT: [[NEXT_PTR]] = getelementptr i32, ptr [[PHI_PTR]], i64 1
; CHECK-NEXT: [[CMP:%.*]] = icmp eq ptr [[NEXT_PTR]], [[END_PTR]]
; CHECK-NEXT: br i1 [[CMP]], label [[EXIT:%.*]], label [[WHILE_BODY]]
; CHECK: exit:
; CHECK-NEXT: ret void
;
entry:
%end.idx = add i64 %size, -1
%end.ptr = getelementptr inbounds i32, ptr %s, i64 %end.idx
br label %while.body
while.body:
%phi.ptr = phi ptr [ %s, %entry ], [ %next.ptr, %while.body ]
%src.ptr = getelementptr i16, ptr %phi.ptr, i64 1
; below underaligned load is overlapped with store.
%val = load i32, ptr %src.ptr, align 2
store i32 %val, ptr %phi.ptr, align 4
%next.ptr = getelementptr i32, ptr %phi.ptr, i64 1
%cmp = icmp eq ptr %next.ptr, %end.ptr
br i1 %cmp, label %exit, label %while.body
exit:
ret void
}
;; Do not form memmove for memcpy with aliasing store.
define void @do_not_form_memmove6(ptr %Src, i64 %Size) {
; CHECK-LABEL: @do_not_form_memmove6(
; CHECK-NEXT: bb.nph:
; CHECK-NEXT: [[BASEALIAS:%.*]] = call ptr @external(ptr [[SRC:%.*]])
; CHECK-NEXT: br label [[FOR_BODY:%.*]]
; CHECK: for.body:
; CHECK-NEXT: [[INDVAR:%.*]] = phi i64 [ 0, [[BB_NPH:%.*]] ], [ [[INDVAR_NEXT:%.*]], [[FOR_BODY]] ]
; CHECK-NEXT: [[STEP:%.*]] = add nuw nsw i64 [[INDVAR]], 1
; CHECK-NEXT: [[SRCI:%.*]] = getelementptr i8, ptr [[SRC]], i64 [[STEP]]
; CHECK-NEXT: [[DESTI:%.*]] = getelementptr i8, ptr [[SRC]], i64 [[INDVAR]]
; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 1 [[DESTI]], ptr align 1 [[SRCI]], i64 1, i1 false)
; CHECK-NEXT: store i8 4, ptr [[BASEALIAS]], align 1
; CHECK-NEXT: [[INDVAR_NEXT]] = add i64 [[INDVAR]], 1
; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[INDVAR_NEXT]], [[SIZE:%.*]]
; CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_END:%.*]], label [[FOR_BODY]]
; CHECK: for.end:
; CHECK-NEXT: ret void
;
bb.nph:
%BaseAlias = call ptr @external(ptr %Src)
br label %for.body
for.body: ; preds = %bb.nph, %for.body
%indvar = phi i64 [ 0, %bb.nph ], [ %indvar.next, %for.body ]
%Step = add nuw nsw i64 %indvar, 1
%SrcI = getelementptr i8, ptr %Src, i64 %Step
%DestI = getelementptr i8, ptr %Src, i64 %indvar
call void @llvm.memcpy.p0.p0.i64(ptr align 1 %DestI, ptr align 1 %SrcI, i64 1, i1 false)
store i8 4, ptr %BaseAlias
%indvar.next = add i64 %indvar, 1
%exitcond = icmp eq i64 %indvar.next, %Size
br i1 %exitcond, label %for.end, label %for.body
for.end: ; preds = %for.body, %entry
ret void
}
;; Do not form memmove when load has more than one use.
define i32 @do_not_form_memmove7(ptr %p) {
; CHECK-LABEL: @do_not_form_memmove7(
; CHECK-NEXT: entry:
; CHECK-NEXT: br label [[FOR_BODY:%.*]]
; CHECK: for.cond.cleanup:
; CHECK-NEXT: [[ADD_LCSSA:%.*]] = phi i32 [ [[ADD:%.*]], [[FOR_BODY]] ]
; CHECK-NEXT: ret i32 [[ADD_LCSSA]]
; CHECK: for.body:
; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 15, [[ENTRY:%.*]] ], [ [[SUB:%.*]], [[FOR_BODY]] ]
; CHECK-NEXT: [[SUM:%.*]] = phi i32 [ 0, [[ENTRY]] ], [ [[ADD]], [[FOR_BODY]] ]
; CHECK-NEXT: [[SUB]] = add nsw i32 [[INDEX]], -1
; CHECK-NEXT: [[TMP0:%.*]] = zext i32 [[SUB]] to i64
; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[P:%.*]], i64 [[TMP0]]
; CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[ARRAYIDX]], align 4
; CHECK-NEXT: [[IDXPROM:%.*]] = zext i32 [[INDEX]] to i64
; CHECK-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds i32, ptr [[P]], i64 [[IDXPROM]]
; CHECK-NEXT: store i32 [[TMP1]], ptr [[ARRAYIDX2]], align 4
; CHECK-NEXT: [[ADD]] = add nsw i32 [[TMP1]], [[SUM]]
; CHECK-NEXT: [[CMP:%.*]] = icmp sgt i32 [[INDEX]], 1
; CHECK-NEXT: br i1 [[CMP]], label [[FOR_BODY]], label [[FOR_COND_CLEANUP:%.*]]
;
entry:
br label %for.body
for.cond.cleanup: ; preds = %for.body
%add.lcssa = phi i32 [ %add, %for.body ]
ret i32 %add.lcssa
for.body: ; preds = %entry, %for.body
%index = phi i32 [ 15, %entry ], [ %sub, %for.body ]
%sum = phi i32 [ 0, %entry ], [ %add, %for.body ]
%sub = add nsw i32 %index, -1
%0 = zext i32 %sub to i64
%arrayidx = getelementptr inbounds i32, ptr %p, i64 %0
%1 = load i32, ptr %arrayidx, align 4
%idxprom = zext i32 %index to i64
%arrayidx2 = getelementptr inbounds i32, ptr %p, i64 %idxprom
store i32 %1, ptr %arrayidx2, align 4
%add = add nsw i32 %1, %sum
%cmp = icmp sgt i32 %index, 1
br i1 %cmp, label %for.body, label %for.cond.cleanup
}
; Do not form memmove when there's an aliasing operation, even
; if the memcpy source and destination are in the same object.
define void @do_not_form_memmove8(ptr %p) {
; CHECK-LABEL: @do_not_form_memmove8(
; CHECK-NEXT: entry:
; CHECK-NEXT: [[P2:%.*]] = getelementptr inbounds i64, ptr [[P:%.*]], i64 1000
; CHECK-NEXT: br label [[LOOP:%.*]]
; CHECK: exit:
; CHECK-NEXT: ret void
; CHECK: loop:
; CHECK-NEXT: [[X4:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[X13:%.*]], [[LOOP]] ]
; CHECK-NEXT: [[X5:%.*]] = zext i32 [[X4]] to i64
; CHECK-NEXT: [[X7:%.*]] = getelementptr inbounds i64, ptr [[P2]], i64 [[X5]]
; CHECK-NEXT: store i64 1, ptr [[X7]], align 4
; CHECK-NEXT: [[X11:%.*]] = getelementptr inbounds i64, ptr [[P]], i64 [[X5]]
; CHECK-NEXT: tail call void @llvm.memcpy.p0.p0.i64(ptr [[X11]], ptr [[X7]], i64 8, i1 false)
; CHECK-NEXT: [[X13]] = add i32 [[X4]], 1
; CHECK-NEXT: [[X14:%.*]] = icmp eq i32 [[X13]], 44
; CHECK-NEXT: br i1 [[X14]], label [[EXIT:%.*]], label [[LOOP]]
;
entry:
%p2 = getelementptr inbounds i64, ptr %p, i64 1000
br label %loop
exit:
ret void
loop:
%x4 = phi i32 [ 0, %entry ], [ %x13, %loop ]
%x5 = zext i32 %x4 to i64
%x7 = getelementptr inbounds i64, ptr %p2, i64 %x5
store i64 1, ptr %x7, align 4
%x11 = getelementptr inbounds i64, ptr %p, i64 %x5
tail call void @llvm.memcpy.p0.p0.i64(ptr %x11, ptr %x7, i64 8, i1 false)
%x13 = add i32 %x4, 1
%x14 = icmp eq i32 %x13, 44
br i1 %x14, label %exit, label %loop
}
;; Memcpy formation is still preferred over memmove.
define void @prefer_memcpy_over_memmove(ptr noalias %Src, ptr noalias %Dest, i64 %Size) {
; CHECK-LABEL: @prefer_memcpy_over_memmove(
; CHECK-NEXT: bb.nph:
; CHECK-NEXT: [[SCEVGEP:%.*]] = getelementptr i8, ptr [[SRC:%.*]], i64 42
; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 1 [[DEST:%.*]], ptr align 1 [[SCEVGEP]], i64 [[SIZE:%.*]], i1 false)
; CHECK-NEXT: br label [[FOR_BODY:%.*]]
; CHECK: for.body:
; CHECK-NEXT: [[INDVAR:%.*]] = phi i64 [ 0, [[BB_NPH:%.*]] ], [ [[INDVAR_NEXT:%.*]], [[FOR_BODY]] ]
; CHECK-NEXT: [[STEP:%.*]] = add nuw nsw i64 [[INDVAR]], 42
; CHECK-NEXT: [[SRCI:%.*]] = getelementptr i8, ptr [[SRC]], i64 [[STEP]]
; CHECK-NEXT: [[DESTI:%.*]] = getelementptr i8, ptr [[DEST]], i64 [[INDVAR]]
; CHECK-NEXT: [[V:%.*]] = load i8, ptr [[SRCI]], align 1
; CHECK-NEXT: [[INDVAR_NEXT]] = add i64 [[INDVAR]], 1
; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[INDVAR_NEXT]], [[SIZE]]
; CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_END:%.*]], label [[FOR_BODY]]
; CHECK: for.end:
; CHECK-NEXT: ret void
;
bb.nph:
br label %for.body
for.body: ; preds = %bb.nph, %for.body
%indvar = phi i64 [ 0, %bb.nph ], [ %indvar.next, %for.body ]
%Step = add nuw nsw i64 %indvar, 42
%SrcI = getelementptr i8, ptr %Src, i64 %Step
%DestI = getelementptr i8, ptr %Dest, i64 %indvar
%V = load i8, ptr %SrcI, align 1
store i8 %V, ptr %DestI, align 1
%indvar.next = add i64 %indvar, 1
%exitcond = icmp eq i64 %indvar.next, %Size
br i1 %exitcond, label %for.end, label %for.body
for.end: ; preds = %for.body, %entry
ret void
}
%class.CMSPULog = type { %struct._opaque_pthread_mutex_t, ptr, i32, i32, i32, i8, i8, i8, [512 x i32] }
%struct._opaque_pthread_mutex_t = type { i64, [56 x i8] }
define noalias ptr @_ZN8CMSPULog9beginImplEja(ptr nocapture writeonly %0) local_unnamed_addr #0 {
; CHECK-LABEL: @_ZN8CMSPULog9beginImplEja(
; CHECK-NEXT: br label [[TMP2:%.*]]
; CHECK: 2:
; CHECK-NEXT: [[TMP3:%.*]] = phi i32 [ 0, [[TMP1:%.*]] ], [ [[TMP4:%.*]], [[TMP2]] ]
; CHECK-NEXT: [[TMP4]] = add nuw nsw i32 [[TMP3]], 1
; CHECK-NEXT: [[TMP5:%.*]] = zext i32 [[TMP3]] to i64
; CHECK-NEXT: [[TMP6:%.*]] = getelementptr [[CLASS_CMSPULOG:%.*]], ptr [[TMP0:%.*]], i64 0, i32 8, i64 [[TMP5]]
; CHECK-NEXT: [[AND:%.*]] = and i64 ptrtoint (ptr @G to i64), 16777215
; CHECK-NEXT: [[TRUNC:%.*]] = trunc i64 [[AND]] to i32
; CHECK-NEXT: store i32 [[TRUNC]], ptr [[TMP6]], align 4
; CHECK-NEXT: [[TMP7:%.*]] = icmp ult i32 [[TMP3]], 511
; CHECK-NEXT: br i1 [[TMP7]], label [[TMP2]], label [[TMP8:%.*]]
; CHECK: 8:
; CHECK-NEXT: ret ptr null
;
br label %2
2: ; preds = %1, %2
%3 = phi i32 [ 0, %1 ], [ %4, %2 ]
%4 = add nuw nsw i32 %3, 1
%5 = zext i32 %3 to i64
%6 = getelementptr %class.CMSPULog, ptr %0, i64 0, i32 8, i64 %5
%and = and i64 ptrtoint (ptr @G to i64), 16777215
%trunc = trunc i64 %and to i32
store i32 %trunc, ptr %6, align 4
%7 = icmp ult i32 %3, 511
br i1 %7, label %2, label %8
8: ; preds = %2
ret ptr null
}
; Validate that "memset_pattern" has the proper attributes.
;.
; CHECK: attributes #[[ATTR0:[0-9]+]] = { nounwind ssp }
; CHECK: attributes #[[ATTR1:[0-9]+]] = { nounwind }
; CHECK: attributes #[[ATTR2:[0-9]+]] = { nocallback nofree nounwind willreturn memory(argmem: readwrite) }
; CHECK: attributes #[[ATTR3:[0-9]+]] = { nocallback nofree nounwind willreturn memory(argmem: write) }
; CHECK: attributes #[[ATTR4:[0-9]+]] = { nocallback nofree nosync nounwind speculatable willreturn memory(none) }
;.