
In order to keep the change as incremental as possible, this only introduces the memset.pattern intrinsic in cases where memset_pattern16 would have been used. Future patches can enable it on targets that don't have the intrinsic, and select it in cases where the libcall isn't directly usable. As the memset.pattern intrinsic takes the number of times to store the pattern as an argument unlike memset_pattern16 which takes the number of bytes to write, we no longer try to form an i128 pattern. Special care is taken for cases where multiple stores in the same loop iteration were combined to form a single pattern. For such cases, we inherit the limitation that loops such as the following are supported: ``` for (unsigned i = 0; i < 2 * n; i += 2) { f[i] = 2; f[i+1] = 2; } ``` But the following doesn't result in a memset.pattern (even though it could be, by forming an appropriate pattern): ``` for (unsigned i = 0; i < 2 * n; i += 2) { f[i] = 2; f[i+1] = 3; } ``` Addressing this existing deficiency is left for a follow-up due to a desire not to change too much at once (i.e. to target equivalence to the current codegen). A command line option is introduced to force the selection of the intrinsic even in cases it wouldn't be (i.e. in cases where the libcall wouldn't have been selected). This is intended as a transitionary option for testing and experimentation, to be removed at a later point. The only platforms this should impact are those that have the memset_pattern16 libcall (Apple platforms). Testing performed to check for no unexpected codegen changes is described here https://github.com/llvm/llvm-project/pull/126736#issuecomment-3005097468
1628 lines
70 KiB
LLVM
1628 lines
70 KiB
LLVM
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --check-globals
|
|
; RUN: opt -passes=loop-idiom < %s -S | FileCheck %s
|
|
target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
|
|
|
|
target triple = "x86_64-apple-darwin10.0.0"
|
|
|
|
;.
|
|
; CHECK: @G = global i32 5
|
|
; CHECK: @g_50 = global [7 x i32] [i32 0, i32 0, i32 0, i32 0, i32 1, i32 0, i32 0], align 16
|
|
;.
|
|
define void @test1(ptr %Base, i64 %Size) nounwind ssp {
|
|
; CHECK-LABEL: @test1(
|
|
; CHECK-NEXT: bb.nph:
|
|
; CHECK-NEXT: call void @llvm.memset.p0.i64(ptr align 1 [[BASE:%.*]], i8 0, i64 [[SIZE:%.*]], i1 false)
|
|
; CHECK-NEXT: br label [[FOR_BODY:%.*]]
|
|
; CHECK: for.body:
|
|
; CHECK-NEXT: [[INDVAR:%.*]] = phi i64 [ 0, [[BB_NPH:%.*]] ], [ [[INDVAR_NEXT:%.*]], [[FOR_BODY]] ]
|
|
; CHECK-NEXT: [[I_0_014:%.*]] = getelementptr i8, ptr [[BASE]], i64 [[INDVAR]]
|
|
; CHECK-NEXT: [[INDVAR_NEXT]] = add i64 [[INDVAR]], 1
|
|
; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[INDVAR_NEXT]], [[SIZE]]
|
|
; CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_END:%.*]], label [[FOR_BODY]]
|
|
; CHECK: for.end:
|
|
; CHECK-NEXT: ret void
|
|
;
|
|
bb.nph: ; preds = %entry
|
|
br label %for.body
|
|
|
|
for.body: ; preds = %bb.nph, %for.body
|
|
%indvar = phi i64 [ 0, %bb.nph ], [ %indvar.next, %for.body ]
|
|
%I.0.014 = getelementptr i8, ptr %Base, i64 %indvar
|
|
store i8 0, ptr %I.0.014, align 1
|
|
%indvar.next = add i64 %indvar, 1
|
|
%exitcond = icmp eq i64 %indvar.next, %Size
|
|
br i1 %exitcond, label %for.end, label %for.body
|
|
|
|
for.end: ; preds = %for.body, %entry
|
|
ret void
|
|
}
|
|
|
|
; Make sure memset is formed for larger than 1 byte stores, and that the
|
|
; alignment of the store is preserved
|
|
define void @test1_i16(ptr align 2 %Base, i64 %Size) nounwind ssp {
|
|
; CHECK-LABEL: @test1_i16(
|
|
; CHECK-NEXT: bb.nph:
|
|
; CHECK-NEXT: [[TMP0:%.*]] = shl nuw i64 [[SIZE:%.*]], 1
|
|
; CHECK-NEXT: call void @llvm.memset.p0.i64(ptr align 2 [[BASE:%.*]], i8 0, i64 [[TMP0]], i1 false)
|
|
; CHECK-NEXT: br label [[FOR_BODY:%.*]]
|
|
; CHECK: for.body:
|
|
; CHECK-NEXT: [[INDVAR:%.*]] = phi i64 [ 0, [[BB_NPH:%.*]] ], [ [[INDVAR_NEXT:%.*]], [[FOR_BODY]] ]
|
|
; CHECK-NEXT: [[I_0_014:%.*]] = getelementptr i16, ptr [[BASE]], i64 [[INDVAR]]
|
|
; CHECK-NEXT: [[INDVAR_NEXT]] = add i64 [[INDVAR]], 1
|
|
; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[INDVAR_NEXT]], [[SIZE]]
|
|
; CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_END:%.*]], label [[FOR_BODY]]
|
|
; CHECK: for.end:
|
|
; CHECK-NEXT: ret void
|
|
;
|
|
bb.nph: ; preds = %entry
|
|
br label %for.body
|
|
|
|
for.body: ; preds = %bb.nph, %for.body
|
|
%indvar = phi i64 [ 0, %bb.nph ], [ %indvar.next, %for.body ]
|
|
%I.0.014 = getelementptr i16, ptr %Base, i64 %indvar
|
|
store i16 0, ptr %I.0.014, align 2
|
|
%indvar.next = add i64 %indvar, 1
|
|
%exitcond = icmp eq i64 %indvar.next, %Size
|
|
br i1 %exitcond, label %for.end, label %for.body
|
|
|
|
for.end: ; preds = %for.body, %entry
|
|
ret void
|
|
}
|
|
|
|
; This is a loop that was rotated but where the blocks weren't merged. This
|
|
; shouldn't perturb us.
|
|
define void @test1a(ptr %Base, i64 %Size) nounwind ssp {
|
|
; CHECK-LABEL: @test1a(
|
|
; CHECK-NEXT: bb.nph:
|
|
; CHECK-NEXT: call void @llvm.memset.p0.i64(ptr align 1 [[BASE:%.*]], i8 0, i64 [[SIZE:%.*]], i1 false)
|
|
; CHECK-NEXT: br label [[FOR_BODY:%.*]]
|
|
; CHECK: for.body:
|
|
; CHECK-NEXT: [[INDVAR:%.*]] = phi i64 [ 0, [[BB_NPH:%.*]] ], [ [[INDVAR_NEXT:%.*]], [[FOR_BODY_CONT:%.*]] ]
|
|
; CHECK-NEXT: [[I_0_014:%.*]] = getelementptr i8, ptr [[BASE]], i64 [[INDVAR]]
|
|
; CHECK-NEXT: [[INDVAR_NEXT]] = add i64 [[INDVAR]], 1
|
|
; CHECK-NEXT: br label [[FOR_BODY_CONT]]
|
|
; CHECK: for.body.cont:
|
|
; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[INDVAR_NEXT]], [[SIZE]]
|
|
; CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_END:%.*]], label [[FOR_BODY]]
|
|
; CHECK: for.end:
|
|
; CHECK-NEXT: ret void
|
|
;
|
|
bb.nph: ; preds = %entry
|
|
br label %for.body
|
|
|
|
for.body: ; preds = %bb.nph, %for.body
|
|
%indvar = phi i64 [ 0, %bb.nph ], [ %indvar.next, %for.body.cont ]
|
|
%I.0.014 = getelementptr i8, ptr %Base, i64 %indvar
|
|
store i8 0, ptr %I.0.014, align 1
|
|
%indvar.next = add i64 %indvar, 1
|
|
br label %for.body.cont
|
|
for.body.cont:
|
|
%exitcond = icmp eq i64 %indvar.next, %Size
|
|
br i1 %exitcond, label %for.end, label %for.body
|
|
|
|
for.end: ; preds = %for.body, %entry
|
|
ret void
|
|
}
|
|
|
|
|
|
define void @test2(ptr %Base, i64 %Size) nounwind ssp {
|
|
; CHECK-LABEL: @test2(
|
|
; CHECK-NEXT: entry:
|
|
; CHECK-NEXT: [[CMP10:%.*]] = icmp eq i64 [[SIZE:%.*]], 0
|
|
; CHECK-NEXT: br i1 [[CMP10]], label [[FOR_END:%.*]], label [[FOR_BODY_PREHEADER:%.*]]
|
|
; CHECK: for.body.preheader:
|
|
; CHECK-NEXT: [[TMP0:%.*]] = shl nuw i64 [[SIZE]], 2
|
|
; CHECK-NEXT: call void @llvm.memset.p0.i64(ptr align 4 [[BASE:%.*]], i8 1, i64 [[TMP0]], i1 false)
|
|
; CHECK-NEXT: br label [[FOR_BODY:%.*]]
|
|
; CHECK: for.body:
|
|
; CHECK-NEXT: [[I_011:%.*]] = phi i64 [ [[INC:%.*]], [[FOR_BODY]] ], [ 0, [[FOR_BODY_PREHEADER]] ]
|
|
; CHECK-NEXT: [[ADD_PTR_I:%.*]] = getelementptr i32, ptr [[BASE]], i64 [[I_011]]
|
|
; CHECK-NEXT: [[INC]] = add nsw i64 [[I_011]], 1
|
|
; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[INC]], [[SIZE]]
|
|
; CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_END_LOOPEXIT:%.*]], label [[FOR_BODY]]
|
|
; CHECK: for.end.loopexit:
|
|
; CHECK-NEXT: br label [[FOR_END]]
|
|
; CHECK: for.end:
|
|
; CHECK-NEXT: ret void
|
|
;
|
|
entry:
|
|
%cmp10 = icmp eq i64 %Size, 0
|
|
br i1 %cmp10, label %for.end, label %for.body
|
|
|
|
for.body: ; preds = %entry, %for.body
|
|
%i.011 = phi i64 [ %inc, %for.body ], [ 0, %entry ]
|
|
%add.ptr.i = getelementptr i32, ptr %Base, i64 %i.011
|
|
store i32 16843009, ptr %add.ptr.i, align 4
|
|
%inc = add nsw i64 %i.011, 1
|
|
%exitcond = icmp eq i64 %inc, %Size
|
|
br i1 %exitcond, label %for.end, label %for.body
|
|
|
|
for.end: ; preds = %for.body, %entry
|
|
ret void
|
|
}
|
|
|
|
; This is a case where there is an extra may-aliased store in the loop, we can't
|
|
; promote the memset.
|
|
define void @test3(ptr %Base, i64 %Size, ptr %MayAlias) nounwind ssp {
|
|
; CHECK-LABEL: @test3(
|
|
; CHECK-NEXT: entry:
|
|
; CHECK-NEXT: br label [[FOR_BODY:%.*]]
|
|
; CHECK: for.body:
|
|
; CHECK-NEXT: [[I_011:%.*]] = phi i64 [ [[INC:%.*]], [[FOR_BODY]] ], [ 0, [[ENTRY:%.*]] ]
|
|
; CHECK-NEXT: [[ADD_PTR_I:%.*]] = getelementptr i32, ptr [[BASE:%.*]], i64 [[I_011]]
|
|
; CHECK-NEXT: store i32 16843009, ptr [[ADD_PTR_I]], align 4
|
|
; CHECK-NEXT: store i8 42, ptr [[MAYALIAS:%.*]], align 1
|
|
; CHECK-NEXT: [[INC]] = add nsw i64 [[I_011]], 1
|
|
; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[INC]], [[SIZE:%.*]]
|
|
; CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_END:%.*]], label [[FOR_BODY]]
|
|
; CHECK: for.end:
|
|
; CHECK-NEXT: ret void
|
|
;
|
|
entry:
|
|
br label %for.body
|
|
|
|
for.body: ; preds = %entry, %for.body
|
|
%i.011 = phi i64 [ %inc, %for.body ], [ 0, %entry ]
|
|
%add.ptr.i = getelementptr i32, ptr %Base, i64 %i.011
|
|
store i32 16843009, ptr %add.ptr.i, align 4
|
|
|
|
store i8 42, ptr %MayAlias
|
|
%inc = add nsw i64 %i.011, 1
|
|
%exitcond = icmp eq i64 %inc, %Size
|
|
br i1 %exitcond, label %for.end, label %for.body
|
|
|
|
for.end: ; preds = %entry
|
|
ret void
|
|
}
|
|
|
|
; Make sure the first store in the loop is turned into a memset.
|
|
define void @test4(ptr %Base) nounwind ssp {
|
|
; CHECK-LABEL: @test4(
|
|
; CHECK-NEXT: bb.nph:
|
|
; CHECK-NEXT: [[BASE100:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 1000
|
|
; CHECK-NEXT: call void @llvm.memset.p0.i64(ptr align 1 [[BASE]], i8 0, i64 100, i1 false)
|
|
; CHECK-NEXT: br label [[FOR_BODY:%.*]]
|
|
; CHECK: for.body:
|
|
; CHECK-NEXT: [[INDVAR:%.*]] = phi i64 [ 0, [[BB_NPH:%.*]] ], [ [[INDVAR_NEXT:%.*]], [[FOR_BODY]] ]
|
|
; CHECK-NEXT: [[I_0_014:%.*]] = getelementptr i8, ptr [[BASE]], i64 [[INDVAR]]
|
|
; CHECK-NEXT: store i8 42, ptr [[BASE100]], align 1
|
|
; CHECK-NEXT: [[INDVAR_NEXT]] = add i64 [[INDVAR]], 1
|
|
; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[INDVAR_NEXT]], 100
|
|
; CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_END:%.*]], label [[FOR_BODY]]
|
|
; CHECK: for.end:
|
|
; CHECK-NEXT: ret void
|
|
;
|
|
bb.nph: ; preds = %entry
|
|
%Base100 = getelementptr i8, ptr %Base, i64 1000
|
|
br label %for.body
|
|
|
|
for.body: ; preds = %bb.nph, %for.body
|
|
%indvar = phi i64 [ 0, %bb.nph ], [ %indvar.next, %for.body ]
|
|
%I.0.014 = getelementptr i8, ptr %Base, i64 %indvar
|
|
store i8 0, ptr %I.0.014, align 1
|
|
|
|
;; Store beyond the range memset, should be safe to promote.
|
|
store i8 42, ptr %Base100
|
|
|
|
%indvar.next = add i64 %indvar, 1
|
|
%exitcond = icmp eq i64 %indvar.next, 100
|
|
br i1 %exitcond, label %for.end, label %for.body
|
|
|
|
for.end: ; preds = %for.body, %entry
|
|
ret void
|
|
}
|
|
|
|
; This can't be promoted: the memset is a store of a loop variant value.
|
|
define void @test5(ptr %Base, i64 %Size) nounwind ssp {
|
|
; CHECK-LABEL: @test5(
|
|
; CHECK-NEXT: bb.nph:
|
|
; CHECK-NEXT: br label [[FOR_BODY:%.*]]
|
|
; CHECK: for.body:
|
|
; CHECK-NEXT: [[INDVAR:%.*]] = phi i64 [ 0, [[BB_NPH:%.*]] ], [ [[INDVAR_NEXT:%.*]], [[FOR_BODY]] ]
|
|
; CHECK-NEXT: [[I_0_014:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[INDVAR]]
|
|
; CHECK-NEXT: [[V:%.*]] = trunc i64 [[INDVAR]] to i8
|
|
; CHECK-NEXT: store i8 [[V]], ptr [[I_0_014]], align 1
|
|
; CHECK-NEXT: [[INDVAR_NEXT]] = add i64 [[INDVAR]], 1
|
|
; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[INDVAR_NEXT]], [[SIZE:%.*]]
|
|
; CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_END:%.*]], label [[FOR_BODY]]
|
|
; CHECK: for.end:
|
|
; CHECK-NEXT: ret void
|
|
;
|
|
bb.nph: ; preds = %entry
|
|
br label %for.body
|
|
|
|
for.body: ; preds = %bb.nph, %for.body
|
|
%indvar = phi i64 [ 0, %bb.nph ], [ %indvar.next, %for.body ]
|
|
%I.0.014 = getelementptr i8, ptr %Base, i64 %indvar
|
|
|
|
%V = trunc i64 %indvar to i8
|
|
store i8 %V, ptr %I.0.014, align 1
|
|
%indvar.next = add i64 %indvar, 1
|
|
%exitcond = icmp eq i64 %indvar.next, %Size
|
|
br i1 %exitcond, label %for.end, label %for.body
|
|
|
|
for.end: ; preds = %for.body, %entry
|
|
ret void
|
|
}
|
|
|
|
|
|
;; memcpy formation
|
|
define void @test6(i64 %Size) nounwind ssp {
|
|
; CHECK-LABEL: @test6(
|
|
; CHECK-NEXT: bb.nph:
|
|
; CHECK-NEXT: [[BASE:%.*]] = alloca i8, i32 10000, align 1
|
|
; CHECK-NEXT: [[DEST:%.*]] = alloca i8, i32 10000, align 1
|
|
; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 1 [[DEST]], ptr align 1 [[BASE]], i64 [[SIZE:%.*]], i1 false)
|
|
; CHECK-NEXT: br label [[FOR_BODY:%.*]]
|
|
; CHECK: for.body:
|
|
; CHECK-NEXT: [[INDVAR:%.*]] = phi i64 [ 0, [[BB_NPH:%.*]] ], [ [[INDVAR_NEXT:%.*]], [[FOR_BODY]] ]
|
|
; CHECK-NEXT: [[I_0_014:%.*]] = getelementptr i8, ptr [[BASE]], i64 [[INDVAR]]
|
|
; CHECK-NEXT: [[DESTI:%.*]] = getelementptr i8, ptr [[DEST]], i64 [[INDVAR]]
|
|
; CHECK-NEXT: [[V:%.*]] = load i8, ptr [[I_0_014]], align 1
|
|
; CHECK-NEXT: [[INDVAR_NEXT]] = add i64 [[INDVAR]], 1
|
|
; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[INDVAR_NEXT]], [[SIZE]]
|
|
; CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_END:%.*]], label [[FOR_BODY]]
|
|
; CHECK: for.end:
|
|
; CHECK-NEXT: ret void
|
|
;
|
|
bb.nph:
|
|
%Base = alloca i8, i32 10000
|
|
%Dest = alloca i8, i32 10000
|
|
br label %for.body
|
|
|
|
for.body: ; preds = %bb.nph, %for.body
|
|
%indvar = phi i64 [ 0, %bb.nph ], [ %indvar.next, %for.body ]
|
|
%I.0.014 = getelementptr i8, ptr %Base, i64 %indvar
|
|
%DestI = getelementptr i8, ptr %Dest, i64 %indvar
|
|
%V = load i8, ptr %I.0.014, align 1
|
|
store i8 %V, ptr %DestI, align 1
|
|
%indvar.next = add i64 %indvar, 1
|
|
%exitcond = icmp eq i64 %indvar.next, %Size
|
|
br i1 %exitcond, label %for.end, label %for.body
|
|
|
|
for.end: ; preds = %for.body, %entry
|
|
ret void
|
|
}
|
|
|
|
;; memcpy formation, check alignment
|
|
define void @test6_dest_align(ptr noalias align 1 %Base, ptr noalias align 4 %Dest, i64 %Size) nounwind ssp {
|
|
; CHECK-LABEL: @test6_dest_align(
|
|
; CHECK-NEXT: bb.nph:
|
|
; CHECK-NEXT: [[TMP0:%.*]] = shl nuw i64 [[SIZE:%.*]], 2
|
|
; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[DEST:%.*]], ptr align 1 [[BASE:%.*]], i64 [[TMP0]], i1 false)
|
|
; CHECK-NEXT: br label [[FOR_BODY:%.*]]
|
|
; CHECK: for.body:
|
|
; CHECK-NEXT: [[INDVAR:%.*]] = phi i64 [ 0, [[BB_NPH:%.*]] ], [ [[INDVAR_NEXT:%.*]], [[FOR_BODY]] ]
|
|
; CHECK-NEXT: [[I_0_014:%.*]] = getelementptr i32, ptr [[BASE]], i64 [[INDVAR]]
|
|
; CHECK-NEXT: [[DESTI:%.*]] = getelementptr i32, ptr [[DEST]], i64 [[INDVAR]]
|
|
; CHECK-NEXT: [[V:%.*]] = load i32, ptr [[I_0_014]], align 1
|
|
; CHECK-NEXT: [[INDVAR_NEXT]] = add i64 [[INDVAR]], 1
|
|
; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[INDVAR_NEXT]], [[SIZE]]
|
|
; CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_END:%.*]], label [[FOR_BODY]]
|
|
; CHECK: for.end:
|
|
; CHECK-NEXT: ret void
|
|
;
|
|
bb.nph:
|
|
br label %for.body
|
|
|
|
for.body: ; preds = %bb.nph, %for.body
|
|
%indvar = phi i64 [ 0, %bb.nph ], [ %indvar.next, %for.body ]
|
|
%I.0.014 = getelementptr i32, ptr %Base, i64 %indvar
|
|
%DestI = getelementptr i32, ptr %Dest, i64 %indvar
|
|
%V = load i32, ptr %I.0.014, align 1
|
|
store i32 %V, ptr %DestI, align 4
|
|
%indvar.next = add i64 %indvar, 1
|
|
%exitcond = icmp eq i64 %indvar.next, %Size
|
|
br i1 %exitcond, label %for.end, label %for.body
|
|
|
|
for.end: ; preds = %for.body, %entry
|
|
ret void
|
|
}
|
|
|
|
;; memcpy formation, check alignment
|
|
define void @test6_src_align(ptr noalias align 4 %Base, ptr noalias align 1 %Dest, i64 %Size) nounwind ssp {
|
|
; CHECK-LABEL: @test6_src_align(
|
|
; CHECK-NEXT: bb.nph:
|
|
; CHECK-NEXT: [[TMP0:%.*]] = shl nuw i64 [[SIZE:%.*]], 2
|
|
; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 1 [[DEST:%.*]], ptr align 4 [[BASE:%.*]], i64 [[TMP0]], i1 false)
|
|
; CHECK-NEXT: br label [[FOR_BODY:%.*]]
|
|
; CHECK: for.body:
|
|
; CHECK-NEXT: [[INDVAR:%.*]] = phi i64 [ 0, [[BB_NPH:%.*]] ], [ [[INDVAR_NEXT:%.*]], [[FOR_BODY]] ]
|
|
; CHECK-NEXT: [[I_0_014:%.*]] = getelementptr i32, ptr [[BASE]], i64 [[INDVAR]]
|
|
; CHECK-NEXT: [[DESTI:%.*]] = getelementptr i32, ptr [[DEST]], i64 [[INDVAR]]
|
|
; CHECK-NEXT: [[V:%.*]] = load i32, ptr [[I_0_014]], align 4
|
|
; CHECK-NEXT: [[INDVAR_NEXT]] = add i64 [[INDVAR]], 1
|
|
; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[INDVAR_NEXT]], [[SIZE]]
|
|
; CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_END:%.*]], label [[FOR_BODY]]
|
|
; CHECK: for.end:
|
|
; CHECK-NEXT: ret void
|
|
;
|
|
bb.nph:
|
|
br label %for.body
|
|
|
|
for.body: ; preds = %bb.nph, %for.body
|
|
%indvar = phi i64 [ 0, %bb.nph ], [ %indvar.next, %for.body ]
|
|
%I.0.014 = getelementptr i32, ptr %Base, i64 %indvar
|
|
%DestI = getelementptr i32, ptr %Dest, i64 %indvar
|
|
%V = load i32, ptr %I.0.014, align 4
|
|
store i32 %V, ptr %DestI, align 1
|
|
%indvar.next = add i64 %indvar, 1
|
|
%exitcond = icmp eq i64 %indvar.next, %Size
|
|
br i1 %exitcond, label %for.end, label %for.body
|
|
|
|
for.end: ; preds = %for.body, %entry
|
|
ret void
|
|
}
|
|
|
|
|
|
; This is a loop that was rotated but where the blocks weren't merged. This
|
|
; shouldn't perturb us.
|
|
define void @test7(ptr %Base, i64 %Size) nounwind ssp {
|
|
; CHECK-LABEL: @test7(
|
|
; CHECK-NEXT: bb.nph:
|
|
; CHECK-NEXT: call void @llvm.memset.p0.i64(ptr align 1 [[BASE:%.*]], i8 0, i64 [[SIZE:%.*]], i1 false)
|
|
; CHECK-NEXT: br label [[FOR_BODY:%.*]]
|
|
; CHECK: for.body:
|
|
; CHECK-NEXT: [[INDVAR:%.*]] = phi i64 [ 0, [[BB_NPH:%.*]] ], [ [[INDVAR_NEXT:%.*]], [[FOR_BODY_CONT:%.*]] ]
|
|
; CHECK-NEXT: br label [[FOR_BODY_CONT]]
|
|
; CHECK: for.body.cont:
|
|
; CHECK-NEXT: [[I_0_014:%.*]] = getelementptr i8, ptr [[BASE]], i64 [[INDVAR]]
|
|
; CHECK-NEXT: [[INDVAR_NEXT]] = add i64 [[INDVAR]], 1
|
|
; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[INDVAR_NEXT]], [[SIZE]]
|
|
; CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_END:%.*]], label [[FOR_BODY]]
|
|
; CHECK: for.end:
|
|
; CHECK-NEXT: ret void
|
|
;
|
|
bb.nph: ; preds = %entry
|
|
br label %for.body
|
|
|
|
for.body: ; preds = %bb.nph, %for.body
|
|
%indvar = phi i64 [ 0, %bb.nph ], [ %indvar.next, %for.body.cont ]
|
|
br label %for.body.cont
|
|
for.body.cont:
|
|
%I.0.014 = getelementptr i8, ptr %Base, i64 %indvar
|
|
store i8 0, ptr %I.0.014, align 1
|
|
%indvar.next = add i64 %indvar, 1
|
|
%exitcond = icmp eq i64 %indvar.next, %Size
|
|
br i1 %exitcond, label %for.end, label %for.body
|
|
|
|
for.end: ; preds = %for.body, %entry
|
|
ret void
|
|
}
|
|
|
|
; This is a loop should not be transformed, it only executes one iteration.
|
|
define void @test8(ptr %Ptr, i64 %Size) nounwind ssp {
|
|
; CHECK-LABEL: @test8(
|
|
; CHECK-NEXT: bb.nph:
|
|
; CHECK-NEXT: br label [[FOR_BODY:%.*]]
|
|
; CHECK: for.body:
|
|
; CHECK-NEXT: [[INDVAR:%.*]] = phi i64 [ 0, [[BB_NPH:%.*]] ], [ [[INDVAR_NEXT:%.*]], [[FOR_BODY]] ]
|
|
; CHECK-NEXT: [[PI:%.*]] = getelementptr i64, ptr [[PTR:%.*]], i64 [[INDVAR]]
|
|
; CHECK-NEXT: store i64 0, ptr [[PI]], align 8
|
|
; CHECK-NEXT: [[INDVAR_NEXT]] = add i64 [[INDVAR]], 1
|
|
; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[INDVAR_NEXT]], 1
|
|
; CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_END:%.*]], label [[FOR_BODY]]
|
|
; CHECK: for.end:
|
|
; CHECK-NEXT: ret void
|
|
;
|
|
bb.nph: ; preds = %entry
|
|
br label %for.body
|
|
|
|
for.body: ; preds = %bb.nph, %for.body
|
|
%indvar = phi i64 [ 0, %bb.nph ], [ %indvar.next, %for.body ]
|
|
%PI = getelementptr i64, ptr %Ptr, i64 %indvar
|
|
store i64 0, ptr %PI
|
|
%indvar.next = add i64 %indvar, 1
|
|
%exitcond = icmp eq i64 %indvar.next, 1
|
|
br i1 %exitcond, label %for.end, label %for.body
|
|
|
|
for.end: ; preds = %for.body, %entry
|
|
ret void
|
|
}
|
|
|
|
declare ptr @external(ptr)
|
|
|
|
;; This cannot be transformed into a memcpy, because the read-from location is
|
|
;; mutated by the loop.
|
|
define void @test9(i64 %Size) nounwind ssp {
|
|
; CHECK-LABEL: @test9(
|
|
; CHECK-NEXT: bb.nph:
|
|
; CHECK-NEXT: [[BASE:%.*]] = alloca i8, i32 10000, align 1
|
|
; CHECK-NEXT: [[DEST:%.*]] = alloca i8, i32 10000, align 1
|
|
; CHECK-NEXT: [[BASEALIAS:%.*]] = call ptr @external(ptr [[BASE]])
|
|
; CHECK-NEXT: br label [[FOR_BODY:%.*]]
|
|
; CHECK: for.body:
|
|
; CHECK-NEXT: [[INDVAR:%.*]] = phi i64 [ 0, [[BB_NPH:%.*]] ], [ [[INDVAR_NEXT:%.*]], [[FOR_BODY]] ]
|
|
; CHECK-NEXT: [[I_0_014:%.*]] = getelementptr i8, ptr [[BASE]], i64 [[INDVAR]]
|
|
; CHECK-NEXT: [[DESTI:%.*]] = getelementptr i8, ptr [[DEST]], i64 [[INDVAR]]
|
|
; CHECK-NEXT: [[V:%.*]] = load i8, ptr [[I_0_014]], align 1
|
|
; CHECK-NEXT: store i8 [[V]], ptr [[DESTI]], align 1
|
|
; CHECK-NEXT: store i8 4, ptr [[BASEALIAS]], align 1
|
|
; CHECK-NEXT: [[INDVAR_NEXT]] = add i64 [[INDVAR]], 1
|
|
; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[INDVAR_NEXT]], [[SIZE:%.*]]
|
|
; CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_END:%.*]], label [[FOR_BODY]]
|
|
; CHECK: for.end:
|
|
; CHECK-NEXT: ret void
|
|
;
|
|
bb.nph:
|
|
%Base = alloca i8, i32 10000
|
|
%Dest = alloca i8, i32 10000
|
|
|
|
%BaseAlias = call ptr @external(ptr %Base)
|
|
br label %for.body
|
|
|
|
for.body: ; preds = %bb.nph, %for.body
|
|
%indvar = phi i64 [ 0, %bb.nph ], [ %indvar.next, %for.body ]
|
|
%I.0.014 = getelementptr i8, ptr %Base, i64 %indvar
|
|
%DestI = getelementptr i8, ptr %Dest, i64 %indvar
|
|
%V = load i8, ptr %I.0.014, align 1
|
|
store i8 %V, ptr %DestI, align 1
|
|
|
|
;; This store can clobber the input.
|
|
store i8 4, ptr %BaseAlias
|
|
|
|
%indvar.next = add i64 %indvar, 1
|
|
%exitcond = icmp eq i64 %indvar.next, %Size
|
|
br i1 %exitcond, label %for.end, label %for.body
|
|
|
|
for.end: ; preds = %for.body, %entry
|
|
ret void
|
|
}
|
|
|
|
; Two dimensional nested loop should be promoted to one big memset.
|
|
define void @test10(ptr %X) nounwind ssp {
|
|
; CHECK-LABEL: @test10(
|
|
; CHECK-NEXT: entry:
|
|
; CHECK-NEXT: call void @llvm.memset.p0.i64(ptr align 1 [[X:%.*]], i8 0, i64 10000, i1 false)
|
|
; CHECK-NEXT: br label [[BB_NPH:%.*]]
|
|
; CHECK: bb.nph:
|
|
; CHECK-NEXT: [[INDVAR:%.*]] = phi i64 [ [[INDVAR_NEXT:%.*]], [[FOR_INC10:%.*]] ], [ 0, [[ENTRY:%.*]] ]
|
|
; CHECK-NEXT: [[I_04:%.*]] = phi i32 [ 0, [[ENTRY]] ], [ [[INC12:%.*]], [[FOR_INC10]] ]
|
|
; CHECK-NEXT: [[TMP0:%.*]] = mul nuw nsw i64 [[INDVAR]], 100
|
|
; CHECK-NEXT: [[SCEVGEP:%.*]] = getelementptr nuw i8, ptr [[X]], i64 [[TMP0]]
|
|
; CHECK-NEXT: br label [[FOR_BODY5:%.*]]
|
|
; CHECK: for.body5:
|
|
; CHECK-NEXT: [[J_02:%.*]] = phi i32 [ 0, [[BB_NPH]] ], [ [[INC:%.*]], [[FOR_BODY5]] ]
|
|
; CHECK-NEXT: [[MUL:%.*]] = mul nsw i32 [[I_04]], 100
|
|
; CHECK-NEXT: [[ADD:%.*]] = add nsw i32 [[J_02]], [[MUL]]
|
|
; CHECK-NEXT: [[IDXPROM:%.*]] = sext i32 [[ADD]] to i64
|
|
; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i8, ptr [[X]], i64 [[IDXPROM]]
|
|
; CHECK-NEXT: [[INC]] = add nsw i32 [[J_02]], 1
|
|
; CHECK-NEXT: [[CMP4:%.*]] = icmp eq i32 [[INC]], 100
|
|
; CHECK-NEXT: br i1 [[CMP4]], label [[FOR_INC10]], label [[FOR_BODY5]]
|
|
; CHECK: for.inc10:
|
|
; CHECK-NEXT: [[INC12]] = add nsw i32 [[I_04]], 1
|
|
; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32 [[INC12]], 100
|
|
; CHECK-NEXT: [[INDVAR_NEXT]] = add i64 [[INDVAR]], 1
|
|
; CHECK-NEXT: br i1 [[CMP]], label [[FOR_END13:%.*]], label [[BB_NPH]]
|
|
; CHECK: for.end13:
|
|
; CHECK-NEXT: ret void
|
|
;
|
|
entry:
|
|
br label %bb.nph
|
|
|
|
bb.nph: ; preds = %entry, %for.inc10
|
|
%i.04 = phi i32 [ 0, %entry ], [ %inc12, %for.inc10 ]
|
|
br label %for.body5
|
|
|
|
for.body5: ; preds = %for.body5, %bb.nph
|
|
%j.02 = phi i32 [ 0, %bb.nph ], [ %inc, %for.body5 ]
|
|
%mul = mul nsw i32 %i.04, 100
|
|
%add = add nsw i32 %j.02, %mul
|
|
%idxprom = sext i32 %add to i64
|
|
%arrayidx = getelementptr inbounds i8, ptr %X, i64 %idxprom
|
|
store i8 0, ptr %arrayidx, align 1
|
|
%inc = add nsw i32 %j.02, 1
|
|
%cmp4 = icmp eq i32 %inc, 100
|
|
br i1 %cmp4, label %for.inc10, label %for.body5
|
|
|
|
for.inc10: ; preds = %for.body5
|
|
%inc12 = add nsw i32 %i.04, 1
|
|
%cmp = icmp eq i32 %inc12, 100
|
|
br i1 %cmp, label %for.end13, label %bb.nph
|
|
|
|
for.end13: ; preds = %for.inc10
|
|
ret void
|
|
}
|
|
|
|
; On darwin10 (which is the triple in this .ll file) this loop can be turned
|
|
; into a memset_pattern call.
|
|
; rdar://9009151
|
|
define void @test11_pattern(ptr nocapture %P) nounwind ssp {
|
|
; CHECK-LABEL: @test11_pattern(
|
|
; CHECK-NEXT: entry:
|
|
; CHECK-NEXT: call void @llvm.experimental.memset.pattern.p0.i32.i64(ptr align 4 [[P:%.*]], i32 1, i64 10000, i1 false)
|
|
; CHECK-NEXT: br label [[FOR_BODY:%.*]]
|
|
; CHECK: for.body:
|
|
; CHECK-NEXT: [[INDVAR:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[INDVAR_NEXT:%.*]], [[FOR_BODY]] ]
|
|
; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr i32, ptr [[P]], i64 [[INDVAR]]
|
|
; CHECK-NEXT: [[INDVAR_NEXT]] = add i64 [[INDVAR]], 1
|
|
; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[INDVAR_NEXT]], 10000
|
|
; CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_END:%.*]], label [[FOR_BODY]]
|
|
; CHECK: for.end:
|
|
; CHECK-NEXT: ret void
|
|
;
|
|
entry:
|
|
br label %for.body
|
|
|
|
for.body: ; preds = %entry, %for.body
|
|
%indvar = phi i64 [ 0, %entry ], [ %indvar.next, %for.body ]
|
|
%arrayidx = getelementptr i32, ptr %P, i64 %indvar
|
|
store i32 1, ptr %arrayidx, align 4
|
|
%indvar.next = add i64 %indvar, 1
|
|
%exitcond = icmp eq i64 %indvar.next, 10000
|
|
br i1 %exitcond, label %for.end, label %for.body
|
|
|
|
for.end: ; preds = %for.body
|
|
ret void
|
|
}
|
|
|
|
; Store of null should turn into memset of zero.
|
|
define void @test12(ptr nocapture %P) nounwind ssp {
|
|
; CHECK-LABEL: @test12(
|
|
; CHECK-NEXT: entry:
|
|
; CHECK-NEXT: call void @llvm.memset.p0.i64(ptr align 4 [[P:%.*]], i8 0, i64 80000, i1 false)
|
|
; CHECK-NEXT: br label [[FOR_BODY:%.*]]
|
|
; CHECK: for.body:
|
|
; CHECK-NEXT: [[INDVAR:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[INDVAR_NEXT:%.*]], [[FOR_BODY]] ]
|
|
; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr ptr, ptr [[P]], i64 [[INDVAR]]
|
|
; CHECK-NEXT: [[INDVAR_NEXT]] = add i64 [[INDVAR]], 1
|
|
; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[INDVAR_NEXT]], 10000
|
|
; CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_END:%.*]], label [[FOR_BODY]]
|
|
; CHECK: for.end:
|
|
; CHECK-NEXT: ret void
|
|
;
|
|
entry:
|
|
br label %for.body
|
|
|
|
for.body: ; preds = %entry, %for.body
|
|
%indvar = phi i64 [ 0, %entry ], [ %indvar.next, %for.body ]
|
|
%arrayidx = getelementptr ptr, ptr %P, i64 %indvar
|
|
store ptr null, ptr %arrayidx, align 4
|
|
%indvar.next = add i64 %indvar, 1
|
|
%exitcond = icmp eq i64 %indvar.next, 10000
|
|
br i1 %exitcond, label %for.end, label %for.body
|
|
|
|
for.end: ; preds = %for.body
|
|
ret void
|
|
}
|
|
|
|
@G = global i32 5
|
|
|
|
; This store-of-address loop can be turned into a memset_pattern call.
|
|
; rdar://9009151
|
|
define void @test13_pattern(ptr nocapture %P) nounwind ssp {
|
|
; CHECK-LABEL: @test13_pattern(
|
|
; CHECK-NEXT: entry:
|
|
; CHECK-NEXT: call void @llvm.experimental.memset.pattern.p0.p0.i64(ptr align 4 [[P:%.*]], ptr @G, i64 10000, i1 false)
|
|
; CHECK-NEXT: br label [[FOR_BODY:%.*]]
|
|
; CHECK: for.body:
|
|
; CHECK-NEXT: [[INDVAR:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[INDVAR_NEXT:%.*]], [[FOR_BODY]] ]
|
|
; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr ptr, ptr [[P]], i64 [[INDVAR]]
|
|
; CHECK-NEXT: [[INDVAR_NEXT]] = add i64 [[INDVAR]], 1
|
|
; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[INDVAR_NEXT]], 10000
|
|
; CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_END:%.*]], label [[FOR_BODY]]
|
|
; CHECK: for.end:
|
|
; CHECK-NEXT: ret void
|
|
;
|
|
entry:
|
|
br label %for.body
|
|
|
|
for.body: ; preds = %entry, %for.body
|
|
%indvar = phi i64 [ 0, %entry ], [ %indvar.next, %for.body ]
|
|
%arrayidx = getelementptr ptr, ptr %P, i64 %indvar
|
|
store ptr @G, ptr %arrayidx, align 4
|
|
%indvar.next = add i64 %indvar, 1
|
|
%exitcond = icmp eq i64 %indvar.next, 10000
|
|
br i1 %exitcond, label %for.end, label %for.body
|
|
|
|
for.end: ; preds = %for.body
|
|
ret void
|
|
}
|
|
|
|
|
|
|
|
; PR9815 - This is a partial overlap case that cannot be safely transformed
|
|
; into a memcpy.
|
|
@g_50 = global [7 x i32] [i32 0, i32 0, i32 0, i32 0, i32 1, i32 0, i32 0], align 16
|
|
|
|
define i32 @test14() nounwind {
|
|
; CHECK-LABEL: @test14(
|
|
; CHECK-NEXT: entry:
|
|
; CHECK-NEXT: br label [[FOR_BODY:%.*]]
|
|
; CHECK: for.body:
|
|
; CHECK-NEXT: [[T5:%.*]] = phi i32 [ [[INC:%.*]], [[FOR_BODY]] ], [ 0, [[ENTRY:%.*]] ]
|
|
; CHECK-NEXT: [[ADD:%.*]] = add nsw i32 [[T5]], 4
|
|
; CHECK-NEXT: [[IDXPROM:%.*]] = sext i32 [[ADD]] to i64
|
|
; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [7 x i32], ptr @g_50, i32 0, i64 [[IDXPROM]]
|
|
; CHECK-NEXT: [[T2:%.*]] = load i32, ptr [[ARRAYIDX]], align 4
|
|
; CHECK-NEXT: [[ADD4:%.*]] = add nsw i32 [[T5]], 5
|
|
; CHECK-NEXT: [[IDXPROM5:%.*]] = sext i32 [[ADD4]] to i64
|
|
; CHECK-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds [7 x i32], ptr @g_50, i32 0, i64 [[IDXPROM5]]
|
|
; CHECK-NEXT: store i32 [[T2]], ptr [[ARRAYIDX6]], align 4
|
|
; CHECK-NEXT: [[INC]] = add nsw i32 [[T5]], 1
|
|
; CHECK-NEXT: [[CMP:%.*]] = icmp slt i32 [[INC]], 2
|
|
; CHECK-NEXT: br i1 [[CMP]], label [[FOR_BODY]], label [[FOR_END:%.*]]
|
|
; CHECK: for.end:
|
|
; CHECK-NEXT: [[T8:%.*]] = load i32, ptr getelementptr inbounds ([7 x i32], ptr @g_50, i32 0, i64 6), align 4
|
|
; CHECK-NEXT: ret i32 [[T8]]
|
|
;
|
|
entry:
|
|
br label %for.body
|
|
|
|
for.body: ; preds = %for.inc, %for.body.lr.ph
|
|
%t5 = phi i32 [ %inc, %for.body ], [ 0, %entry ]
|
|
%add = add nsw i32 %t5, 4
|
|
%idxprom = sext i32 %add to i64
|
|
%arrayidx = getelementptr inbounds [7 x i32], ptr @g_50, i32 0, i64 %idxprom
|
|
%t2 = load i32, ptr %arrayidx, align 4
|
|
%add4 = add nsw i32 %t5, 5
|
|
%idxprom5 = sext i32 %add4 to i64
|
|
%arrayidx6 = getelementptr inbounds [7 x i32], ptr @g_50, i32 0, i64 %idxprom5
|
|
store i32 %t2, ptr %arrayidx6, align 4
|
|
%inc = add nsw i32 %t5, 1
|
|
%cmp = icmp slt i32 %inc, 2
|
|
br i1 %cmp, label %for.body, label %for.end
|
|
|
|
for.end: ; preds = %for.inc
|
|
%t8 = load i32, ptr getelementptr inbounds ([7 x i32], ptr @g_50, i32 0, i64 6), align 4
|
|
ret i32 %t8
|
|
|
|
}
|
|
|
|
define void @PR14241(ptr %s, i64 %size) {
|
|
; Ensure that we don't form a memcpy for strided loops. Briefly, when we taught
|
|
; LoopIdiom about memmove and strided loops, this got miscompiled into a memcpy
|
|
; instead of a memmove. If we get the memmove transform back, this will catch
|
|
; regressions.
|
|
;
|
|
; CHECK-LABEL: @PR14241(
|
|
; CHECK-NEXT: entry:
|
|
; CHECK-NEXT: [[END_IDX:%.*]] = add i64 [[SIZE:%.*]], -1
|
|
; CHECK-NEXT: [[END_PTR:%.*]] = getelementptr inbounds i32, ptr [[S:%.*]], i64 [[END_IDX]]
|
|
; CHECK-NEXT: [[SCEVGEP:%.*]] = getelementptr nuw i8, ptr [[S]], i64 4
|
|
; CHECK-NEXT: [[TMP0:%.*]] = shl i64 [[SIZE]], 2
|
|
; CHECK-NEXT: [[TMP1:%.*]] = add i64 [[TMP0]], -8
|
|
; CHECK-NEXT: [[TMP2:%.*]] = lshr i64 [[TMP1]], 2
|
|
; CHECK-NEXT: [[TMP3:%.*]] = shl nuw i64 [[TMP2]], 2
|
|
; CHECK-NEXT: [[TMP4:%.*]] = add i64 [[TMP3]], 4
|
|
; CHECK-NEXT: call void @llvm.memmove.p0.p0.i64(ptr align 4 [[S]], ptr align 4 [[SCEVGEP]], i64 [[TMP4]], i1 false)
|
|
; CHECK-NEXT: br label [[WHILE_BODY:%.*]]
|
|
; CHECK: while.body:
|
|
; CHECK-NEXT: [[PHI_PTR:%.*]] = phi ptr [ [[S]], [[ENTRY:%.*]] ], [ [[NEXT_PTR:%.*]], [[WHILE_BODY]] ]
|
|
; CHECK-NEXT: [[SRC_PTR:%.*]] = getelementptr inbounds i32, ptr [[PHI_PTR]], i64 1
|
|
; CHECK-NEXT: [[VAL:%.*]] = load i32, ptr [[SRC_PTR]], align 4
|
|
; CHECK-NEXT: [[NEXT_PTR]] = getelementptr inbounds i32, ptr [[PHI_PTR]], i64 1
|
|
; CHECK-NEXT: [[CMP:%.*]] = icmp eq ptr [[NEXT_PTR]], [[END_PTR]]
|
|
; CHECK-NEXT: br i1 [[CMP]], label [[EXIT:%.*]], label [[WHILE_BODY]]
|
|
; CHECK: exit:
|
|
; CHECK-NEXT: ret void
|
|
;
|
|
|
|
entry:
|
|
%end.idx = add i64 %size, -1
|
|
%end.ptr = getelementptr inbounds i32, ptr %s, i64 %end.idx
|
|
br label %while.body
|
|
|
|
while.body:
|
|
%phi.ptr = phi ptr [ %s, %entry ], [ %next.ptr, %while.body ]
|
|
%src.ptr = getelementptr inbounds i32, ptr %phi.ptr, i64 1
|
|
%val = load i32, ptr %src.ptr, align 4
|
|
store i32 %val, ptr %phi.ptr, align 4
|
|
%next.ptr = getelementptr inbounds i32, ptr %phi.ptr, i64 1
|
|
%cmp = icmp eq ptr %next.ptr, %end.ptr
|
|
br i1 %cmp, label %exit, label %while.body
|
|
|
|
exit:
|
|
ret void
|
|
}
|
|
|
|
; Recognize loops with a negative stride.
|
|
define void @test15(ptr nocapture %f) {
|
|
; CHECK-LABEL: @test15(
|
|
; CHECK-NEXT: entry:
|
|
; CHECK-NEXT: call void @llvm.memset.p0.i64(ptr align 4 [[F:%.*]], i8 0, i64 262148, i1 false)
|
|
; CHECK-NEXT: br label [[FOR_BODY:%.*]]
|
|
; CHECK: for.body:
|
|
; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ 65536, [[ENTRY:%.*]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ]
|
|
; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[F]], i64 [[INDVARS_IV]]
|
|
; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add nsw i64 [[INDVARS_IV]], -1
|
|
; CHECK-NEXT: [[CMP:%.*]] = icmp sgt i64 [[INDVARS_IV]], 0
|
|
; CHECK-NEXT: br i1 [[CMP]], label [[FOR_BODY]], label [[FOR_COND_CLEANUP:%.*]]
|
|
; CHECK: for.cond.cleanup:
|
|
; CHECK-NEXT: ret void
|
|
;
|
|
entry:
|
|
br label %for.body
|
|
|
|
for.body:
|
|
%indvars.iv = phi i64 [ 65536, %entry ], [ %indvars.iv.next, %for.body ]
|
|
%arrayidx = getelementptr inbounds i32, ptr %f, i64 %indvars.iv
|
|
store i32 0, ptr %arrayidx, align 4
|
|
%indvars.iv.next = add nsw i64 %indvars.iv, -1
|
|
%cmp = icmp sgt i64 %indvars.iv, 0
|
|
br i1 %cmp, label %for.body, label %for.cond.cleanup
|
|
|
|
for.cond.cleanup:
|
|
ret void
|
|
}
|
|
|
|
; Loop with a negative stride. Verify an aliasing write to f[65536] prevents
|
|
; the creation of a memset.
|
|
define void @test16(ptr nocapture %f) {
|
|
; CHECK-LABEL: @test16(
|
|
; CHECK-NEXT: entry:
|
|
; CHECK-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds i32, ptr [[F:%.*]], i64 65536
|
|
; CHECK-NEXT: br label [[FOR_BODY:%.*]]
|
|
; CHECK: for.body:
|
|
; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ 65536, [[ENTRY:%.*]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ]
|
|
; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[F]], i64 [[INDVARS_IV]]
|
|
; CHECK-NEXT: store i32 0, ptr [[ARRAYIDX]], align 4
|
|
; CHECK-NEXT: store i32 1, ptr [[ARRAYIDX1]], align 4
|
|
; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add nsw i64 [[INDVARS_IV]], -1
|
|
; CHECK-NEXT: [[CMP:%.*]] = icmp sgt i64 [[INDVARS_IV]], 0
|
|
; CHECK-NEXT: br i1 [[CMP]], label [[FOR_BODY]], label [[FOR_COND_CLEANUP:%.*]]
|
|
; CHECK: for.cond.cleanup:
|
|
; CHECK-NEXT: ret void
|
|
;
|
|
entry:
|
|
%arrayidx1 = getelementptr inbounds i32, ptr %f, i64 65536
|
|
br label %for.body
|
|
|
|
for.body: ; preds = %entry, %for.body
|
|
%indvars.iv = phi i64 [ 65536, %entry ], [ %indvars.iv.next, %for.body ]
|
|
%arrayidx = getelementptr inbounds i32, ptr %f, i64 %indvars.iv
|
|
store i32 0, ptr %arrayidx, align 4
|
|
store i32 1, ptr %arrayidx1, align 4
|
|
%indvars.iv.next = add nsw i64 %indvars.iv, -1
|
|
%cmp = icmp sgt i64 %indvars.iv, 0
|
|
br i1 %cmp, label %for.body, label %for.cond.cleanup
|
|
|
|
for.cond.cleanup: ; preds = %for.body
|
|
ret void
|
|
}
|
|
|
|
; Handle memcpy-able loops with negative stride.
|
|
define noalias ptr @test17(ptr nocapture readonly %a, i32 %c) {
|
|
; CHECK-LABEL: @test17(
|
|
; CHECK-NEXT: entry:
|
|
; CHECK-NEXT: [[CONV:%.*]] = sext i32 [[C:%.*]] to i64
|
|
; CHECK-NEXT: [[MUL:%.*]] = shl nsw i64 [[CONV]], 2
|
|
; CHECK-NEXT: [[CALL:%.*]] = tail call noalias ptr @malloc(i64 [[MUL]])
|
|
; CHECK-NEXT: [[TOBOOL_9:%.*]] = icmp eq i32 [[C]], 0
|
|
; CHECK-NEXT: br i1 [[TOBOOL_9]], label [[WHILE_END:%.*]], label [[WHILE_BODY_PREHEADER:%.*]]
|
|
; CHECK: while.body.preheader:
|
|
; CHECK-NEXT: [[TMP0:%.*]] = add i32 [[C]], -1
|
|
; CHECK-NEXT: [[TMP1:%.*]] = sext i32 [[TMP0]] to i64
|
|
; CHECK-NEXT: [[TMP2:%.*]] = shl nsw i64 [[TMP1]], 2
|
|
; CHECK-NEXT: [[TMP3:%.*]] = zext i32 [[TMP0]] to i64
|
|
; CHECK-NEXT: [[TMP4:%.*]] = shl nuw nsw i64 [[TMP3]], 2
|
|
; CHECK-NEXT: [[TMP5:%.*]] = sub i64 [[TMP2]], [[TMP4]]
|
|
; CHECK-NEXT: [[SCEVGEP:%.*]] = getelementptr i8, ptr [[CALL]], i64 [[TMP5]]
|
|
; CHECK-NEXT: [[SCEVGEP1:%.*]] = getelementptr i8, ptr [[A:%.*]], i64 [[TMP5]]
|
|
; CHECK-NEXT: [[TMP6:%.*]] = zext i32 [[C]] to i64
|
|
; CHECK-NEXT: [[TMP7:%.*]] = shl nuw nsw i64 [[TMP6]], 2
|
|
; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[SCEVGEP]], ptr align 4 [[SCEVGEP1]], i64 [[TMP7]], i1 false)
|
|
; CHECK-NEXT: br label [[WHILE_BODY:%.*]]
|
|
; CHECK: while.body:
|
|
; CHECK-NEXT: [[DEC10_IN:%.*]] = phi i32 [ [[DEC10:%.*]], [[WHILE_BODY]] ], [ [[C]], [[WHILE_BODY_PREHEADER]] ]
|
|
; CHECK-NEXT: [[DEC10]] = add nsw i32 [[DEC10_IN]], -1
|
|
; CHECK-NEXT: [[IDXPROM:%.*]] = sext i32 [[DEC10]] to i64
|
|
; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[IDXPROM]]
|
|
; CHECK-NEXT: [[TMP8:%.*]] = load i32, ptr [[ARRAYIDX]], align 4
|
|
; CHECK-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds i32, ptr [[CALL]], i64 [[IDXPROM]]
|
|
; CHECK-NEXT: [[TOBOOL:%.*]] = icmp eq i32 [[DEC10]], 0
|
|
; CHECK-NEXT: br i1 [[TOBOOL]], label [[WHILE_END_LOOPEXIT:%.*]], label [[WHILE_BODY]]
|
|
; CHECK: while.end.loopexit:
|
|
; CHECK-NEXT: br label [[WHILE_END]]
|
|
; CHECK: while.end:
|
|
; CHECK-NEXT: ret ptr [[CALL]]
|
|
;
|
|
entry:
|
|
%conv = sext i32 %c to i64
|
|
%mul = shl nsw i64 %conv, 2
|
|
%call = tail call noalias ptr @malloc(i64 %mul)
|
|
%tobool.9 = icmp eq i32 %c, 0
|
|
br i1 %tobool.9, label %while.end, label %while.body.preheader
|
|
|
|
while.body.preheader: ; preds = %entry
|
|
br label %while.body
|
|
|
|
while.body: ; preds = %while.body.preheader, %while.body
|
|
%dec10.in = phi i32 [ %dec10, %while.body ], [ %c, %while.body.preheader ]
|
|
%dec10 = add nsw i32 %dec10.in, -1
|
|
%idxprom = sext i32 %dec10 to i64
|
|
%arrayidx = getelementptr inbounds i32, ptr %a, i64 %idxprom
|
|
%0 = load i32, ptr %arrayidx, align 4
|
|
%arrayidx2 = getelementptr inbounds i32, ptr %call, i64 %idxprom
|
|
store i32 %0, ptr %arrayidx2, align 4
|
|
%tobool = icmp eq i32 %dec10, 0
|
|
br i1 %tobool, label %while.end.loopexit, label %while.body
|
|
|
|
while.end.loopexit: ; preds = %while.body
|
|
br label %while.end
|
|
|
|
while.end: ; preds = %while.end.loopexit, %entry
|
|
ret ptr %call
|
|
}
|
|
|
|
declare noalias ptr @malloc(i64)
|
|
|
|
; Handle memcpy-able loops with negative stride.
|
|
; void test18(unsigned *__restrict__ a, unsigned *__restrict__ b) {
|
|
; for (int i = 2047; i >= 0; --i) {
|
|
; a[i] = b[i];
|
|
; }
|
|
; }
|
|
define void @test18(ptr noalias nocapture %a, ptr noalias nocapture readonly %b) #0 {
|
|
; CHECK-LABEL: @test18(
|
|
; CHECK-NEXT: entry:
|
|
; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[A:%.*]], ptr align 4 [[B:%.*]], i64 8192, i1 false)
|
|
; CHECK-NEXT: br label [[FOR_BODY:%.*]]
|
|
; CHECK: for.body:
|
|
; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ 2047, [[ENTRY:%.*]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ]
|
|
; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[INDVARS_IV]]
|
|
; CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[ARRAYIDX]], align 4
|
|
; CHECK-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[INDVARS_IV]]
|
|
; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add nsw i64 [[INDVARS_IV]], -1
|
|
; CHECK-NEXT: [[CMP:%.*]] = icmp sgt i64 [[INDVARS_IV]], 0
|
|
; CHECK-NEXT: br i1 [[CMP]], label [[FOR_BODY]], label [[FOR_COND_CLEANUP:%.*]]
|
|
; CHECK: for.cond.cleanup:
|
|
; CHECK-NEXT: ret void
|
|
;
|
|
entry:
|
|
br label %for.body
|
|
|
|
for.body: ; preds = %entry, %for.body
|
|
%indvars.iv = phi i64 [ 2047, %entry ], [ %indvars.iv.next, %for.body ]
|
|
%arrayidx = getelementptr inbounds i32, ptr %b, i64 %indvars.iv
|
|
%0 = load i32, ptr %arrayidx, align 4
|
|
%arrayidx2 = getelementptr inbounds i32, ptr %a, i64 %indvars.iv
|
|
store i32 %0, ptr %arrayidx2, align 4
|
|
%indvars.iv.next = add nsw i64 %indvars.iv, -1
|
|
%cmp = icmp sgt i64 %indvars.iv, 0
|
|
br i1 %cmp, label %for.body, label %for.cond.cleanup
|
|
|
|
for.cond.cleanup: ; preds = %for.body
|
|
ret void
|
|
}
|
|
|
|
; Two dimensional nested loop with negative stride should be promoted to one big memset.
|
|
define void @test19(ptr nocapture %X) {
|
|
; CHECK-LABEL: @test19(
|
|
; CHECK-NEXT: entry:
|
|
; CHECK-NEXT: call void @llvm.memset.p0.i64(ptr align 1 [[X:%.*]], i8 0, i64 10000, i1 false)
|
|
; CHECK-NEXT: br label [[FOR_COND1_PREHEADER:%.*]]
|
|
; CHECK: for.cond1.preheader:
|
|
; CHECK-NEXT: [[INDVAR:%.*]] = phi i64 [ [[INDVAR_NEXT:%.*]], [[FOR_INC4:%.*]] ], [ 0, [[ENTRY:%.*]] ]
|
|
; CHECK-NEXT: [[I_06:%.*]] = phi i32 [ 99, [[ENTRY]] ], [ [[DEC5:%.*]], [[FOR_INC4]] ]
|
|
; CHECK-NEXT: [[TMP0:%.*]] = mul nsw i64 [[INDVAR]], -100
|
|
; CHECK-NEXT: [[TMP1:%.*]] = add i64 [[TMP0]], 9900
|
|
; CHECK-NEXT: [[SCEVGEP:%.*]] = getelementptr i8, ptr [[X]], i64 [[TMP1]]
|
|
; CHECK-NEXT: [[MUL:%.*]] = mul nsw i32 [[I_06]], 100
|
|
; CHECK-NEXT: br label [[FOR_BODY3:%.*]]
|
|
; CHECK: for.body3:
|
|
; CHECK-NEXT: [[J_05:%.*]] = phi i32 [ 99, [[FOR_COND1_PREHEADER]] ], [ [[DEC:%.*]], [[FOR_BODY3]] ]
|
|
; CHECK-NEXT: [[ADD:%.*]] = add nsw i32 [[J_05]], [[MUL]]
|
|
; CHECK-NEXT: [[IDXPROM:%.*]] = sext i32 [[ADD]] to i64
|
|
; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i8, ptr [[X]], i64 [[IDXPROM]]
|
|
; CHECK-NEXT: [[DEC]] = add nsw i32 [[J_05]], -1
|
|
; CHECK-NEXT: [[CMP2:%.*]] = icmp sgt i32 [[J_05]], 0
|
|
; CHECK-NEXT: br i1 [[CMP2]], label [[FOR_BODY3]], label [[FOR_INC4]]
|
|
; CHECK: for.inc4:
|
|
; CHECK-NEXT: [[DEC5]] = add nsw i32 [[I_06]], -1
|
|
; CHECK-NEXT: [[CMP:%.*]] = icmp sgt i32 [[I_06]], 0
|
|
; CHECK-NEXT: [[INDVAR_NEXT]] = add i64 [[INDVAR]], 1
|
|
; CHECK-NEXT: br i1 [[CMP]], label [[FOR_COND1_PREHEADER]], label [[FOR_END6:%.*]]
|
|
; CHECK: for.end6:
|
|
; CHECK-NEXT: ret void
|
|
;
|
|
entry:
|
|
br label %for.cond1.preheader
|
|
|
|
for.cond1.preheader: ; preds = %entry, %for.inc4
|
|
%i.06 = phi i32 [ 99, %entry ], [ %dec5, %for.inc4 ]
|
|
%mul = mul nsw i32 %i.06, 100
|
|
br label %for.body3
|
|
|
|
for.body3: ; preds = %for.cond1.preheader, %for.body3
|
|
%j.05 = phi i32 [ 99, %for.cond1.preheader ], [ %dec, %for.body3 ]
|
|
%add = add nsw i32 %j.05, %mul
|
|
%idxprom = sext i32 %add to i64
|
|
%arrayidx = getelementptr inbounds i8, ptr %X, i64 %idxprom
|
|
store i8 0, ptr %arrayidx, align 1
|
|
%dec = add nsw i32 %j.05, -1
|
|
%cmp2 = icmp sgt i32 %j.05, 0
|
|
br i1 %cmp2, label %for.body3, label %for.inc4
|
|
|
|
for.inc4: ; preds = %for.body3
|
|
%dec5 = add nsw i32 %i.06, -1
|
|
%cmp = icmp sgt i32 %i.06, 0
|
|
br i1 %cmp, label %for.cond1.preheader, label %for.end6
|
|
|
|
for.end6: ; preds = %for.inc4
|
|
ret void
|
|
}
|
|
|
|
; Handle loops where the trip count is a narrow integer that needs to be
|
|
; extended.
|
|
define void @form_memset_narrow_size(ptr %ptr, i32 %size) {
|
|
; CHECK-LABEL: @form_memset_narrow_size(
|
|
; CHECK-NEXT: entry:
|
|
; CHECK-NEXT: [[CMP1:%.*]] = icmp sgt i32 [[SIZE:%.*]], 0
|
|
; CHECK-NEXT: br i1 [[CMP1]], label [[LOOP_PH:%.*]], label [[EXIT:%.*]]
|
|
; CHECK: loop.ph:
|
|
; CHECK-NEXT: [[TMP0:%.*]] = zext i32 [[SIZE]] to i64
|
|
; CHECK-NEXT: [[TMP1:%.*]] = shl nuw nsw i64 [[TMP0]], 3
|
|
; CHECK-NEXT: call void @llvm.memset.p0.i64(ptr align 8 [[PTR:%.*]], i8 0, i64 [[TMP1]], i1 false)
|
|
; CHECK-NEXT: br label [[LOOP_BODY:%.*]]
|
|
; CHECK: loop.body:
|
|
; CHECK-NEXT: [[STOREMERGE4:%.*]] = phi i32 [ 0, [[LOOP_PH]] ], [ [[INC:%.*]], [[LOOP_BODY]] ]
|
|
; CHECK-NEXT: [[IDXPROM:%.*]] = sext i32 [[STOREMERGE4]] to i64
|
|
; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i64, ptr [[PTR]], i64 [[IDXPROM]]
|
|
; CHECK-NEXT: [[INC]] = add nsw i32 [[STOREMERGE4]], 1
|
|
; CHECK-NEXT: [[CMP2:%.*]] = icmp slt i32 [[INC]], [[SIZE]]
|
|
; CHECK-NEXT: br i1 [[CMP2]], label [[LOOP_BODY]], label [[LOOP_EXIT:%.*]]
|
|
; CHECK: loop.exit:
|
|
; CHECK-NEXT: br label [[EXIT]]
|
|
; CHECK: exit:
|
|
; CHECK-NEXT: ret void
|
|
;
|
|
entry:
|
|
%cmp1 = icmp sgt i32 %size, 0
|
|
br i1 %cmp1, label %loop.ph, label %exit
|
|
|
|
loop.ph:
|
|
br label %loop.body
|
|
|
|
loop.body:
|
|
%storemerge4 = phi i32 [ 0, %loop.ph ], [ %inc, %loop.body ]
|
|
%idxprom = sext i32 %storemerge4 to i64
|
|
%arrayidx = getelementptr inbounds i64, ptr %ptr, i64 %idxprom
|
|
store i64 0, ptr %arrayidx, align 8
|
|
%inc = add nsw i32 %storemerge4, 1
|
|
%cmp2 = icmp slt i32 %inc, %size
|
|
br i1 %cmp2, label %loop.body, label %loop.exit
|
|
|
|
loop.exit:
|
|
br label %exit
|
|
|
|
exit:
|
|
ret void
|
|
}
|
|
|
|
define void @form_memcpy_narrow_size(ptr noalias %dst, ptr noalias %src, i32 %size) {
|
|
; CHECK-LABEL: @form_memcpy_narrow_size(
|
|
; CHECK-NEXT: entry:
|
|
; CHECK-NEXT: [[CMP1:%.*]] = icmp sgt i32 [[SIZE:%.*]], 0
|
|
; CHECK-NEXT: br i1 [[CMP1]], label [[LOOP_PH:%.*]], label [[EXIT:%.*]]
|
|
; CHECK: loop.ph:
|
|
; CHECK-NEXT: [[TMP0:%.*]] = zext i32 [[SIZE]] to i64
|
|
; CHECK-NEXT: [[TMP1:%.*]] = shl nuw nsw i64 [[TMP0]], 3
|
|
; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[DST:%.*]], ptr align 8 [[SRC:%.*]], i64 [[TMP1]], i1 false)
|
|
; CHECK-NEXT: br label [[LOOP_BODY:%.*]]
|
|
; CHECK: loop.body:
|
|
; CHECK-NEXT: [[STOREMERGE4:%.*]] = phi i32 [ 0, [[LOOP_PH]] ], [ [[INC:%.*]], [[LOOP_BODY]] ]
|
|
; CHECK-NEXT: [[IDXPROM1:%.*]] = sext i32 [[STOREMERGE4]] to i64
|
|
; CHECK-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds i64, ptr [[SRC]], i64 [[IDXPROM1]]
|
|
; CHECK-NEXT: [[V:%.*]] = load i64, ptr [[ARRAYIDX1]], align 8
|
|
; CHECK-NEXT: [[IDXPROM2:%.*]] = sext i32 [[STOREMERGE4]] to i64
|
|
; CHECK-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds i64, ptr [[DST]], i64 [[IDXPROM2]]
|
|
; CHECK-NEXT: [[INC]] = add nsw i32 [[STOREMERGE4]], 1
|
|
; CHECK-NEXT: [[CMP2:%.*]] = icmp slt i32 [[INC]], [[SIZE]]
|
|
; CHECK-NEXT: br i1 [[CMP2]], label [[LOOP_BODY]], label [[LOOP_EXIT:%.*]]
|
|
; CHECK: loop.exit:
|
|
; CHECK-NEXT: br label [[EXIT]]
|
|
; CHECK: exit:
|
|
; CHECK-NEXT: ret void
|
|
;
|
|
entry:
|
|
%cmp1 = icmp sgt i32 %size, 0
|
|
br i1 %cmp1, label %loop.ph, label %exit
|
|
|
|
loop.ph:
|
|
br label %loop.body
|
|
|
|
loop.body:
|
|
%storemerge4 = phi i32 [ 0, %loop.ph ], [ %inc, %loop.body ]
|
|
%idxprom1 = sext i32 %storemerge4 to i64
|
|
%arrayidx1 = getelementptr inbounds i64, ptr %src, i64 %idxprom1
|
|
%v = load i64, ptr %arrayidx1, align 8
|
|
%idxprom2 = sext i32 %storemerge4 to i64
|
|
%arrayidx2 = getelementptr inbounds i64, ptr %dst, i64 %idxprom2
|
|
store i64 %v, ptr %arrayidx2, align 8
|
|
%inc = add nsw i32 %storemerge4, 1
|
|
%cmp2 = icmp slt i32 %inc, %size
|
|
br i1 %cmp2, label %loop.body, label %loop.exit
|
|
|
|
loop.exit:
|
|
br label %exit
|
|
|
|
exit:
|
|
ret void
|
|
}
|
|
|
|
;; Memmove formation.
|
|
define void @PR46179_positive_stride(ptr %Src, i64 %Size) {
|
|
; CHECK-LABEL: @PR46179_positive_stride(
|
|
; CHECK-NEXT: bb.nph:
|
|
; CHECK-NEXT: [[SCEVGEP:%.*]] = getelementptr i8, ptr [[SRC:%.*]], i64 1
|
|
; CHECK-NEXT: call void @llvm.memmove.p0.p0.i64(ptr align 1 [[SRC]], ptr align 1 [[SCEVGEP]], i64 [[SIZE:%.*]], i1 false)
|
|
; CHECK-NEXT: br label [[FOR_BODY:%.*]]
|
|
; CHECK: for.body:
|
|
; CHECK-NEXT: [[INDVAR:%.*]] = phi i64 [ 0, [[BB_NPH:%.*]] ], [ [[INDVAR_NEXT:%.*]], [[FOR_BODY]] ]
|
|
; CHECK-NEXT: [[STEP:%.*]] = add nuw nsw i64 [[INDVAR]], 1
|
|
; CHECK-NEXT: [[SRCI:%.*]] = getelementptr i8, ptr [[SRC]], i64 [[STEP]]
|
|
; CHECK-NEXT: [[DESTI:%.*]] = getelementptr i8, ptr [[SRC]], i64 [[INDVAR]]
|
|
; CHECK-NEXT: [[V:%.*]] = load i8, ptr [[SRCI]], align 1
|
|
; CHECK-NEXT: [[INDVAR_NEXT]] = add i64 [[INDVAR]], 1
|
|
; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[INDVAR_NEXT]], [[SIZE]]
|
|
; CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_END:%.*]], label [[FOR_BODY]]
|
|
; CHECK: for.end:
|
|
; CHECK-NEXT: ret void
|
|
;
|
|
bb.nph:
|
|
br label %for.body
|
|
|
|
for.body: ; preds = %bb.nph, %for.body
|
|
%indvar = phi i64 [ 0, %bb.nph ], [ %indvar.next, %for.body ]
|
|
%Step = add nuw nsw i64 %indvar, 1
|
|
%SrcI = getelementptr i8, ptr %Src, i64 %Step
|
|
%DestI = getelementptr i8, ptr %Src, i64 %indvar
|
|
%V = load i8, ptr %SrcI, align 1
|
|
store i8 %V, ptr %DestI, align 1
|
|
%indvar.next = add i64 %indvar, 1
|
|
%exitcond = icmp eq i64 %indvar.next, %Size
|
|
br i1 %exitcond, label %for.end, label %for.body
|
|
|
|
for.end: ; preds = %for.body, %entry
|
|
ret void
|
|
}
|
|
|
|
declare void @llvm.memcpy.p0.p0.i64(ptr noalias nocapture writeonly, ptr noalias nocapture readonly, i64, i1 immarg)
|
|
|
|
;; Memmove formation. We expect exactly same memmove result like in PR46179_positive_stride output.
|
|
define void @loop_with_memcpy_PR46179_positive_stride(ptr %Src, i64 %Size) {
|
|
; CHECK-LABEL: @loop_with_memcpy_PR46179_positive_stride(
|
|
; CHECK-NEXT: bb.nph:
|
|
; CHECK-NEXT: [[SCEVGEP:%.*]] = getelementptr i8, ptr [[SRC:%.*]], i64 1
|
|
; CHECK-NEXT: call void @llvm.memmove.p0.p0.i64(ptr align 1 [[SRC]], ptr align 1 [[SCEVGEP]], i64 [[SIZE:%.*]], i1 false)
|
|
; CHECK-NEXT: br label [[FOR_BODY:%.*]]
|
|
; CHECK: for.body:
|
|
; CHECK-NEXT: [[INDVAR:%.*]] = phi i64 [ 0, [[BB_NPH:%.*]] ], [ [[INDVAR_NEXT:%.*]], [[FOR_BODY]] ]
|
|
; CHECK-NEXT: [[STEP:%.*]] = add nuw nsw i64 [[INDVAR]], 1
|
|
; CHECK-NEXT: [[SRCI:%.*]] = getelementptr i8, ptr [[SRC]], i64 [[STEP]]
|
|
; CHECK-NEXT: [[DESTI:%.*]] = getelementptr i8, ptr [[SRC]], i64 [[INDVAR]]
|
|
; CHECK-NEXT: [[INDVAR_NEXT]] = add i64 [[INDVAR]], 1
|
|
; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[INDVAR_NEXT]], [[SIZE]]
|
|
; CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_END:%.*]], label [[FOR_BODY]]
|
|
; CHECK: for.end:
|
|
; CHECK-NEXT: ret void
|
|
;
|
|
bb.nph:
|
|
br label %for.body
|
|
|
|
for.body: ; preds = %bb.nph, %for.body
|
|
%indvar = phi i64 [ 0, %bb.nph ], [ %indvar.next, %for.body ]
|
|
%Step = add nuw nsw i64 %indvar, 1
|
|
%SrcI = getelementptr i8, ptr %Src, i64 %Step
|
|
%DestI = getelementptr i8, ptr %Src, i64 %indvar
|
|
call void @llvm.memcpy.p0.p0.i64(ptr align 1 %DestI, ptr align 1 %SrcI, i64 1, i1 false)
|
|
%indvar.next = add i64 %indvar, 1
|
|
%exitcond = icmp eq i64 %indvar.next, %Size
|
|
br i1 %exitcond, label %for.end, label %for.body
|
|
|
|
for.end: ; preds = %for.body, %entry
|
|
ret void
|
|
}
|
|
|
|
;; Memmove formation.
|
|
define void @PR46179_negative_stride(ptr %Src, i64 %Size) {
|
|
; CHECK-LABEL: @PR46179_negative_stride(
|
|
; CHECK-NEXT: bb.nph:
|
|
; CHECK-NEXT: [[CMP1:%.*]] = icmp sgt i64 [[SIZE:%.*]], 0
|
|
; CHECK-NEXT: br i1 [[CMP1]], label [[FOR_BODY_PREHEADER:%.*]], label [[FOR_END:%.*]]
|
|
; CHECK: for.body.preheader:
|
|
; CHECK-NEXT: [[SCEVGEP:%.*]] = getelementptr i8, ptr [[SRC:%.*]], i64 1
|
|
; CHECK-NEXT: call void @llvm.memmove.p0.p0.i64(ptr align 1 [[SCEVGEP]], ptr align 1 [[SRC]], i64 [[SIZE]], i1 false)
|
|
; CHECK-NEXT: br label [[FOR_BODY:%.*]]
|
|
; CHECK: for.body:
|
|
; CHECK-NEXT: [[INDVAR:%.*]] = phi i64 [ [[STEP:%.*]], [[FOR_BODY]] ], [ [[SIZE]], [[FOR_BODY_PREHEADER]] ]
|
|
; CHECK-NEXT: [[STEP]] = add nsw i64 [[INDVAR]], -1
|
|
; CHECK-NEXT: [[SRCI:%.*]] = getelementptr inbounds i8, ptr [[SRC]], i64 [[STEP]]
|
|
; CHECK-NEXT: [[V:%.*]] = load i8, ptr [[SRCI]], align 1
|
|
; CHECK-NEXT: [[DESTI:%.*]] = getelementptr inbounds i8, ptr [[SRC]], i64 [[INDVAR]]
|
|
; CHECK-NEXT: [[EXITCOND:%.*]] = icmp sgt i64 [[INDVAR]], 1
|
|
; CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_BODY]], label [[FOR_END_LOOPEXIT:%.*]]
|
|
; CHECK: for.end.loopexit:
|
|
; CHECK-NEXT: br label [[FOR_END]]
|
|
; CHECK: for.end:
|
|
; CHECK-NEXT: ret void
|
|
;
|
|
bb.nph:
|
|
%cmp1 = icmp sgt i64 %Size, 0
|
|
br i1 %cmp1, label %for.body, label %for.end
|
|
|
|
for.body: ; preds = %bb.nph, %.for.body
|
|
%indvar = phi i64 [ %Step, %for.body ], [ %Size, %bb.nph ]
|
|
%Step = add nsw i64 %indvar, -1
|
|
%SrcI = getelementptr inbounds i8, ptr %Src, i64 %Step
|
|
%V = load i8, ptr %SrcI, align 1
|
|
%DestI = getelementptr inbounds i8, ptr %Src, i64 %indvar
|
|
store i8 %V, ptr %DestI, align 1
|
|
%exitcond = icmp sgt i64 %indvar, 1
|
|
br i1 %exitcond, label %for.body, label %for.end
|
|
|
|
for.end: ; preds = %.for.body, %bb.nph
|
|
ret void
|
|
}
|
|
|
|
;; Memmove formation. We expect exactly same memmove result like in PR46179_negative_stride output.
|
|
define void @loop_with_memcpy_PR46179_negative_stride(ptr %Src, i64 %Size) {
|
|
; CHECK-LABEL: @loop_with_memcpy_PR46179_negative_stride(
|
|
; CHECK-NEXT: bb.nph:
|
|
; CHECK-NEXT: [[CMP1:%.*]] = icmp sgt i64 [[SIZE:%.*]], 0
|
|
; CHECK-NEXT: br i1 [[CMP1]], label [[FOR_BODY_PREHEADER:%.*]], label [[FOR_END:%.*]]
|
|
; CHECK: for.body.preheader:
|
|
; CHECK-NEXT: [[SCEVGEP:%.*]] = getelementptr i8, ptr [[SRC:%.*]], i64 1
|
|
; CHECK-NEXT: call void @llvm.memmove.p0.p0.i64(ptr align 1 [[SCEVGEP]], ptr align 1 [[SRC]], i64 [[SIZE]], i1 false)
|
|
; CHECK-NEXT: br label [[FOR_BODY:%.*]]
|
|
; CHECK: for.body:
|
|
; CHECK-NEXT: [[INDVAR:%.*]] = phi i64 [ [[STEP:%.*]], [[FOR_BODY]] ], [ [[SIZE]], [[FOR_BODY_PREHEADER]] ]
|
|
; CHECK-NEXT: [[STEP]] = add nsw i64 [[INDVAR]], -1
|
|
; CHECK-NEXT: [[SRCI:%.*]] = getelementptr inbounds i8, ptr [[SRC]], i64 [[STEP]]
|
|
; CHECK-NEXT: [[DESTI:%.*]] = getelementptr inbounds i8, ptr [[SRC]], i64 [[INDVAR]]
|
|
; CHECK-NEXT: [[EXITCOND:%.*]] = icmp sgt i64 [[INDVAR]], 1
|
|
; CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_BODY]], label [[FOR_END_LOOPEXIT:%.*]]
|
|
; CHECK: for.end.loopexit:
|
|
; CHECK-NEXT: br label [[FOR_END]]
|
|
; CHECK: for.end:
|
|
; CHECK-NEXT: ret void
|
|
;
|
|
bb.nph:
|
|
%cmp1 = icmp sgt i64 %Size, 0
|
|
br i1 %cmp1, label %for.body, label %for.end
|
|
|
|
for.body: ; preds = %bb.nph, %.for.body
|
|
%indvar = phi i64 [ %Step, %for.body ], [ %Size, %bb.nph ]
|
|
%Step = add nsw i64 %indvar, -1
|
|
%SrcI = getelementptr inbounds i8, ptr %Src, i64 %Step
|
|
%DestI = getelementptr inbounds i8, ptr %Src, i64 %indvar
|
|
call void @llvm.memcpy.p0.p0.i64(ptr align 1 %DestI, ptr align 1 %SrcI, i64 1, i1 false)
|
|
%exitcond = icmp sgt i64 %indvar, 1
|
|
br i1 %exitcond, label %for.body, label %for.end
|
|
|
|
for.end: ; preds = %.for.body, %bb.nph
|
|
ret void
|
|
}
|
|
|
|
;; Memmove formation.
|
|
define void @loop_with_memcpy_stride16(ptr %Src, i64 %Size) {
|
|
; CHECK-LABEL: @loop_with_memcpy_stride16(
|
|
; CHECK-NEXT: bb.nph:
|
|
; CHECK-NEXT: [[SCEVGEP:%.*]] = getelementptr i8, ptr [[SRC:%.*]], i64 16
|
|
; CHECK-NEXT: [[SMAX:%.*]] = call i64 @llvm.smax.i64(i64 [[SIZE:%.*]], i64 16)
|
|
; CHECK-NEXT: [[TMP0:%.*]] = add nsw i64 [[SMAX]], -1
|
|
; CHECK-NEXT: [[TMP1:%.*]] = lshr i64 [[TMP0]], 4
|
|
; CHECK-NEXT: [[TMP2:%.*]] = shl nuw nsw i64 [[TMP1]], 4
|
|
; CHECK-NEXT: [[TMP3:%.*]] = add nuw i64 [[TMP2]], 16
|
|
; CHECK-NEXT: call void @llvm.memmove.p0.p0.i64(ptr align 1 [[SRC]], ptr align 1 [[SCEVGEP]], i64 [[TMP3]], i1 false)
|
|
; CHECK-NEXT: br label [[FOR_BODY:%.*]]
|
|
; CHECK: for.body:
|
|
; CHECK-NEXT: [[INDVAR:%.*]] = phi i64 [ [[STEP:%.*]], [[FOR_BODY]] ], [ 0, [[BB_NPH:%.*]] ]
|
|
; CHECK-NEXT: [[STEP]] = add nuw nsw i64 [[INDVAR]], 16
|
|
; CHECK-NEXT: [[SRCI:%.*]] = getelementptr inbounds i8, ptr [[SRC]], i64 [[STEP]]
|
|
; CHECK-NEXT: [[DESTI:%.*]] = getelementptr inbounds i8, ptr [[SRC]], i64 [[INDVAR]]
|
|
; CHECK-NEXT: [[EXITCOND:%.*]] = icmp slt i64 [[STEP]], [[SIZE]]
|
|
; CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_BODY]], label [[FOR_END:%.*]]
|
|
; CHECK: for.end:
|
|
; CHECK-NEXT: ret void
|
|
;
|
|
bb.nph:
|
|
br label %for.body
|
|
|
|
for.body: ; preds = %for.body, %bb.nph
|
|
%indvar = phi i64 [ %Step, %for.body ], [ 0, %bb.nph ]
|
|
%Step = add nuw nsw i64 %indvar, 16
|
|
%SrcI = getelementptr inbounds i8, ptr %Src, i64 %Step
|
|
%DestI = getelementptr inbounds i8, ptr %Src, i64 %indvar
|
|
call void @llvm.memcpy.p0.p0.i64(ptr align 1 %DestI, ptr align 1 %SrcI, i64 16, i1 false)
|
|
%exitcond = icmp slt i64 %Step, %Size
|
|
br i1 %exitcond, label %for.body, label %for.end
|
|
|
|
for.end: ; preds = %for.body
|
|
ret void
|
|
}
|
|
|
|
;; Do not form memmove from previous load when stride is positive.
|
|
define void @do_not_form_memmove1(ptr %Src, i64 %Size) {
|
|
; CHECK-LABEL: @do_not_form_memmove1(
|
|
; CHECK-NEXT: bb.nph:
|
|
; CHECK-NEXT: br label [[FOR_BODY:%.*]]
|
|
; CHECK: for.body:
|
|
; CHECK-NEXT: [[INDVAR:%.*]] = phi i64 [ 1, [[BB_NPH:%.*]] ], [ [[INDVAR_NEXT:%.*]], [[FOR_BODY]] ]
|
|
; CHECK-NEXT: [[STEP:%.*]] = add nuw nsw i64 [[INDVAR]], -1
|
|
; CHECK-NEXT: [[SRCI:%.*]] = getelementptr i8, ptr [[SRC:%.*]], i64 [[STEP]]
|
|
; CHECK-NEXT: [[DESTI:%.*]] = getelementptr i8, ptr [[SRC]], i64 [[INDVAR]]
|
|
; CHECK-NEXT: [[V:%.*]] = load i8, ptr [[SRCI]], align 1
|
|
; CHECK-NEXT: store i8 [[V]], ptr [[DESTI]], align 1
|
|
; CHECK-NEXT: [[INDVAR_NEXT]] = add i64 [[INDVAR]], 1
|
|
; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[INDVAR_NEXT]], [[SIZE:%.*]]
|
|
; CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_END:%.*]], label [[FOR_BODY]]
|
|
; CHECK: for.end:
|
|
; CHECK-NEXT: ret void
|
|
;
|
|
bb.nph:
|
|
br label %for.body
|
|
|
|
for.body: ; preds = %bb.nph, %for.body
|
|
%indvar = phi i64 [ 1, %bb.nph ], [ %indvar.next, %for.body ]
|
|
%Step = add nuw nsw i64 %indvar, -1
|
|
%SrcI = getelementptr i8, ptr %Src, i64 %Step
|
|
%DestI = getelementptr i8, ptr %Src, i64 %indvar
|
|
%V = load i8, ptr %SrcI, align 1
|
|
store i8 %V, ptr %DestI, align 1
|
|
%indvar.next = add i64 %indvar, 1
|
|
%exitcond = icmp eq i64 %indvar.next, %Size
|
|
br i1 %exitcond, label %for.end, label %for.body
|
|
|
|
for.end: ; preds = %for.body, %entry
|
|
ret void
|
|
}
|
|
|
|
;; Do not form memmove from previous load in memcpy when stride is positive.
|
|
define void @do_not_form_memmove2(ptr %Src, i64 %Size) {
|
|
; CHECK-LABEL: @do_not_form_memmove2(
|
|
; CHECK-NEXT: bb.nph:
|
|
; CHECK-NEXT: br label [[FOR_BODY:%.*]]
|
|
; CHECK: for.body:
|
|
; CHECK-NEXT: [[INDVAR:%.*]] = phi i64 [ 1, [[BB_NPH:%.*]] ], [ [[INDVAR_NEXT:%.*]], [[FOR_BODY]] ]
|
|
; CHECK-NEXT: [[STEP:%.*]] = add nuw nsw i64 [[INDVAR]], -1
|
|
; CHECK-NEXT: [[SRCI:%.*]] = getelementptr i8, ptr [[SRC:%.*]], i64 [[STEP]]
|
|
; CHECK-NEXT: [[DESTI:%.*]] = getelementptr i8, ptr [[SRC]], i64 [[INDVAR]]
|
|
; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 1 [[DESTI]], ptr align 1 [[SRCI]], i64 1, i1 false)
|
|
; CHECK-NEXT: [[INDVAR_NEXT]] = add i64 [[INDVAR]], 1
|
|
; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[INDVAR_NEXT]], [[SIZE:%.*]]
|
|
; CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_END:%.*]], label [[FOR_BODY]]
|
|
; CHECK: for.end:
|
|
; CHECK-NEXT: ret void
|
|
;
|
|
bb.nph:
|
|
br label %for.body
|
|
|
|
for.body: ; preds = %bb.nph, %for.body
|
|
%indvar = phi i64 [ 1, %bb.nph ], [ %indvar.next, %for.body ]
|
|
%Step = add nuw nsw i64 %indvar, -1
|
|
%SrcI = getelementptr i8, ptr %Src, i64 %Step
|
|
%DestI = getelementptr i8, ptr %Src, i64 %indvar
|
|
call void @llvm.memcpy.p0.p0.i64(ptr align 1 %DestI, ptr align 1 %SrcI, i64 1, i1 false)
|
|
%indvar.next = add i64 %indvar, 1
|
|
%exitcond = icmp eq i64 %indvar.next, %Size
|
|
br i1 %exitcond, label %for.end, label %for.body
|
|
|
|
for.end: ; preds = %for.body, %entry
|
|
ret void
|
|
}
|
|
|
|
;; Do not form memmove from next load when stride is negative.
|
|
define void @do_not_form_memmove3(ptr %Src, i64 %Size) {
|
|
; CHECK-LABEL: @do_not_form_memmove3(
|
|
; CHECK-NEXT: bb.nph:
|
|
; CHECK-NEXT: [[CMP1:%.*]] = icmp sgt i64 [[SIZE:%.*]], 0
|
|
; CHECK-NEXT: br i1 [[CMP1]], label [[FOR_BODY_PREHEADER:%.*]], label [[FOR_END:%.*]]
|
|
; CHECK: for.body.preheader:
|
|
; CHECK-NEXT: br label [[FOR_BODY:%.*]]
|
|
; CHECK: for.body:
|
|
; CHECK-NEXT: [[INDVAR:%.*]] = phi i64 [ [[INDVAR_NEXT:%.*]], [[FOR_BODY]] ], [ [[SIZE]], [[FOR_BODY_PREHEADER]] ]
|
|
; CHECK-NEXT: [[STEP:%.*]] = add nuw nsw i64 [[INDVAR]], 1
|
|
; CHECK-NEXT: [[SRCI:%.*]] = getelementptr inbounds i8, ptr [[SRC:%.*]], i64 [[STEP]]
|
|
; CHECK-NEXT: [[V:%.*]] = load i8, ptr [[SRCI]], align 1
|
|
; CHECK-NEXT: [[DESTI:%.*]] = getelementptr inbounds i8, ptr [[SRC]], i64 [[INDVAR]]
|
|
; CHECK-NEXT: store i8 [[V]], ptr [[DESTI]], align 1
|
|
; CHECK-NEXT: [[INDVAR_NEXT]] = add nsw i64 [[INDVAR]], -1
|
|
; CHECK-NEXT: [[EXITCOND:%.*]] = icmp sgt i64 [[INDVAR]], 1
|
|
; CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_BODY]], label [[FOR_END_LOOPEXIT:%.*]]
|
|
; CHECK: for.end.loopexit:
|
|
; CHECK-NEXT: br label [[FOR_END]]
|
|
; CHECK: for.end:
|
|
; CHECK-NEXT: ret void
|
|
;
|
|
bb.nph:
|
|
%cmp1 = icmp sgt i64 %Size, 0
|
|
br i1 %cmp1, label %for.body, label %for.end
|
|
|
|
for.body: ; preds = %bb.nph, %.for.body
|
|
%indvar = phi i64 [ %indvar.next, %for.body ], [ %Size, %bb.nph ]
|
|
%Step = add nuw nsw i64 %indvar, 1
|
|
%SrcI = getelementptr inbounds i8, ptr %Src, i64 %Step
|
|
%V = load i8, ptr %SrcI, align 1
|
|
%DestI = getelementptr inbounds i8, ptr %Src, i64 %indvar
|
|
store i8 %V, ptr %DestI, align 1
|
|
%indvar.next = add nsw i64 %indvar, -1
|
|
%exitcond = icmp sgt i64 %indvar, 1
|
|
br i1 %exitcond, label %for.body, label %for.end
|
|
|
|
for.end: ; preds = %.for.body, %bb.nph
|
|
ret void
|
|
}
|
|
|
|
;; Do not form memmove from next load in memcpy when stride is negative.
|
|
define void @do_not_form_memmove4(ptr %Src, i64 %Size) {
|
|
; CHECK-LABEL: @do_not_form_memmove4(
|
|
; CHECK-NEXT: bb.nph:
|
|
; CHECK-NEXT: [[CMP1:%.*]] = icmp sgt i64 [[SIZE:%.*]], 0
|
|
; CHECK-NEXT: br i1 [[CMP1]], label [[FOR_BODY_PREHEADER:%.*]], label [[FOR_END:%.*]]
|
|
; CHECK: for.body.preheader:
|
|
; CHECK-NEXT: br label [[FOR_BODY:%.*]]
|
|
; CHECK: for.body:
|
|
; CHECK-NEXT: [[INDVAR:%.*]] = phi i64 [ [[INDVAR_NEXT:%.*]], [[FOR_BODY]] ], [ [[SIZE]], [[FOR_BODY_PREHEADER]] ]
|
|
; CHECK-NEXT: [[STEP:%.*]] = add nuw nsw i64 [[INDVAR]], 1
|
|
; CHECK-NEXT: [[SRCI:%.*]] = getelementptr inbounds i8, ptr [[SRC:%.*]], i64 [[STEP]]
|
|
; CHECK-NEXT: [[DESTI:%.*]] = getelementptr inbounds i8, ptr [[SRC]], i64 [[INDVAR]]
|
|
; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 1 [[DESTI]], ptr align 1 [[SRCI]], i64 1, i1 false)
|
|
; CHECK-NEXT: [[INDVAR_NEXT]] = add nsw i64 [[INDVAR]], -1
|
|
; CHECK-NEXT: [[EXITCOND:%.*]] = icmp sgt i64 [[INDVAR]], 1
|
|
; CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_BODY]], label [[FOR_END_LOOPEXIT:%.*]]
|
|
; CHECK: for.end.loopexit:
|
|
; CHECK-NEXT: br label [[FOR_END]]
|
|
; CHECK: for.end:
|
|
; CHECK-NEXT: ret void
|
|
;
|
|
bb.nph:
|
|
%cmp1 = icmp sgt i64 %Size, 0
|
|
br i1 %cmp1, label %for.body, label %for.end
|
|
|
|
for.body: ; preds = %bb.nph, %.for.body
|
|
%indvar = phi i64 [ %indvar.next, %for.body ], [ %Size, %bb.nph ]
|
|
%Step = add nuw nsw i64 %indvar, 1
|
|
%SrcI = getelementptr inbounds i8, ptr %Src, i64 %Step
|
|
%DestI = getelementptr inbounds i8, ptr %Src, i64 %indvar
|
|
call void @llvm.memcpy.p0.p0.i64(ptr align 1 %DestI, ptr align 1 %SrcI, i64 1, i1 false)
|
|
%indvar.next = add nsw i64 %indvar, -1
|
|
%exitcond = icmp sgt i64 %indvar, 1
|
|
br i1 %exitcond, label %for.body, label %for.end
|
|
|
|
for.end: ; preds = %.for.body, %bb.nph
|
|
ret void
|
|
}
|
|
|
|
;; Do not form memmove when underaligned load is overlapped with store.
|
|
define void @do_not_form_memmove5(ptr %s, i64 %size) {
|
|
; CHECK-LABEL: @do_not_form_memmove5(
|
|
; CHECK-NEXT: entry:
|
|
; CHECK-NEXT: [[END_IDX:%.*]] = add i64 [[SIZE:%.*]], -1
|
|
; CHECK-NEXT: [[END_PTR:%.*]] = getelementptr inbounds i32, ptr [[S:%.*]], i64 [[END_IDX]]
|
|
; CHECK-NEXT: br label [[WHILE_BODY:%.*]]
|
|
; CHECK: while.body:
|
|
; CHECK-NEXT: [[PHI_PTR:%.*]] = phi ptr [ [[S]], [[ENTRY:%.*]] ], [ [[NEXT_PTR:%.*]], [[WHILE_BODY]] ]
|
|
; CHECK-NEXT: [[SRC_PTR:%.*]] = getelementptr i16, ptr [[PHI_PTR]], i64 1
|
|
; CHECK-NEXT: [[VAL:%.*]] = load i32, ptr [[SRC_PTR]], align 2
|
|
; CHECK-NEXT: store i32 [[VAL]], ptr [[PHI_PTR]], align 4
|
|
; CHECK-NEXT: [[NEXT_PTR]] = getelementptr i32, ptr [[PHI_PTR]], i64 1
|
|
; CHECK-NEXT: [[CMP:%.*]] = icmp eq ptr [[NEXT_PTR]], [[END_PTR]]
|
|
; CHECK-NEXT: br i1 [[CMP]], label [[EXIT:%.*]], label [[WHILE_BODY]]
|
|
; CHECK: exit:
|
|
; CHECK-NEXT: ret void
|
|
;
|
|
entry:
|
|
%end.idx = add i64 %size, -1
|
|
%end.ptr = getelementptr inbounds i32, ptr %s, i64 %end.idx
|
|
br label %while.body
|
|
|
|
while.body:
|
|
%phi.ptr = phi ptr [ %s, %entry ], [ %next.ptr, %while.body ]
|
|
%src.ptr = getelementptr i16, ptr %phi.ptr, i64 1
|
|
; below underaligned load is overlapped with store.
|
|
%val = load i32, ptr %src.ptr, align 2
|
|
store i32 %val, ptr %phi.ptr, align 4
|
|
%next.ptr = getelementptr i32, ptr %phi.ptr, i64 1
|
|
%cmp = icmp eq ptr %next.ptr, %end.ptr
|
|
br i1 %cmp, label %exit, label %while.body
|
|
|
|
exit:
|
|
ret void
|
|
}
|
|
|
|
;; Do not form memmove for memcpy with aliasing store.
|
|
define void @do_not_form_memmove6(ptr %Src, i64 %Size) {
|
|
; CHECK-LABEL: @do_not_form_memmove6(
|
|
; CHECK-NEXT: bb.nph:
|
|
; CHECK-NEXT: [[BASEALIAS:%.*]] = call ptr @external(ptr [[SRC:%.*]])
|
|
; CHECK-NEXT: br label [[FOR_BODY:%.*]]
|
|
; CHECK: for.body:
|
|
; CHECK-NEXT: [[INDVAR:%.*]] = phi i64 [ 0, [[BB_NPH:%.*]] ], [ [[INDVAR_NEXT:%.*]], [[FOR_BODY]] ]
|
|
; CHECK-NEXT: [[STEP:%.*]] = add nuw nsw i64 [[INDVAR]], 1
|
|
; CHECK-NEXT: [[SRCI:%.*]] = getelementptr i8, ptr [[SRC]], i64 [[STEP]]
|
|
; CHECK-NEXT: [[DESTI:%.*]] = getelementptr i8, ptr [[SRC]], i64 [[INDVAR]]
|
|
; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 1 [[DESTI]], ptr align 1 [[SRCI]], i64 1, i1 false)
|
|
; CHECK-NEXT: store i8 4, ptr [[BASEALIAS]], align 1
|
|
; CHECK-NEXT: [[INDVAR_NEXT]] = add i64 [[INDVAR]], 1
|
|
; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[INDVAR_NEXT]], [[SIZE:%.*]]
|
|
; CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_END:%.*]], label [[FOR_BODY]]
|
|
; CHECK: for.end:
|
|
; CHECK-NEXT: ret void
|
|
;
|
|
bb.nph:
|
|
%BaseAlias = call ptr @external(ptr %Src)
|
|
br label %for.body
|
|
|
|
for.body: ; preds = %bb.nph, %for.body
|
|
%indvar = phi i64 [ 0, %bb.nph ], [ %indvar.next, %for.body ]
|
|
%Step = add nuw nsw i64 %indvar, 1
|
|
%SrcI = getelementptr i8, ptr %Src, i64 %Step
|
|
%DestI = getelementptr i8, ptr %Src, i64 %indvar
|
|
call void @llvm.memcpy.p0.p0.i64(ptr align 1 %DestI, ptr align 1 %SrcI, i64 1, i1 false)
|
|
store i8 4, ptr %BaseAlias
|
|
%indvar.next = add i64 %indvar, 1
|
|
%exitcond = icmp eq i64 %indvar.next, %Size
|
|
br i1 %exitcond, label %for.end, label %for.body
|
|
|
|
for.end: ; preds = %for.body, %entry
|
|
ret void
|
|
}
|
|
|
|
;; Do not form memmove when load has more than one use.
|
|
define i32 @do_not_form_memmove7(ptr %p) {
|
|
; CHECK-LABEL: @do_not_form_memmove7(
|
|
; CHECK-NEXT: entry:
|
|
; CHECK-NEXT: br label [[FOR_BODY:%.*]]
|
|
; CHECK: for.cond.cleanup:
|
|
; CHECK-NEXT: [[ADD_LCSSA:%.*]] = phi i32 [ [[ADD:%.*]], [[FOR_BODY]] ]
|
|
; CHECK-NEXT: ret i32 [[ADD_LCSSA]]
|
|
; CHECK: for.body:
|
|
; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 15, [[ENTRY:%.*]] ], [ [[SUB:%.*]], [[FOR_BODY]] ]
|
|
; CHECK-NEXT: [[SUM:%.*]] = phi i32 [ 0, [[ENTRY]] ], [ [[ADD]], [[FOR_BODY]] ]
|
|
; CHECK-NEXT: [[SUB]] = add nsw i32 [[INDEX]], -1
|
|
; CHECK-NEXT: [[TMP0:%.*]] = zext i32 [[SUB]] to i64
|
|
; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[P:%.*]], i64 [[TMP0]]
|
|
; CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[ARRAYIDX]], align 4
|
|
; CHECK-NEXT: [[IDXPROM:%.*]] = zext i32 [[INDEX]] to i64
|
|
; CHECK-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds i32, ptr [[P]], i64 [[IDXPROM]]
|
|
; CHECK-NEXT: store i32 [[TMP1]], ptr [[ARRAYIDX2]], align 4
|
|
; CHECK-NEXT: [[ADD]] = add nsw i32 [[TMP1]], [[SUM]]
|
|
; CHECK-NEXT: [[CMP:%.*]] = icmp sgt i32 [[INDEX]], 1
|
|
; CHECK-NEXT: br i1 [[CMP]], label [[FOR_BODY]], label [[FOR_COND_CLEANUP:%.*]]
|
|
;
|
|
entry:
|
|
br label %for.body
|
|
|
|
for.cond.cleanup: ; preds = %for.body
|
|
%add.lcssa = phi i32 [ %add, %for.body ]
|
|
ret i32 %add.lcssa
|
|
|
|
for.body: ; preds = %entry, %for.body
|
|
%index = phi i32 [ 15, %entry ], [ %sub, %for.body ]
|
|
%sum = phi i32 [ 0, %entry ], [ %add, %for.body ]
|
|
%sub = add nsw i32 %index, -1
|
|
%0 = zext i32 %sub to i64
|
|
%arrayidx = getelementptr inbounds i32, ptr %p, i64 %0
|
|
%1 = load i32, ptr %arrayidx, align 4
|
|
%idxprom = zext i32 %index to i64
|
|
%arrayidx2 = getelementptr inbounds i32, ptr %p, i64 %idxprom
|
|
store i32 %1, ptr %arrayidx2, align 4
|
|
%add = add nsw i32 %1, %sum
|
|
%cmp = icmp sgt i32 %index, 1
|
|
br i1 %cmp, label %for.body, label %for.cond.cleanup
|
|
}
|
|
|
|
; Do not form memmove when there's an aliasing operation, even
|
|
; if the memcpy source and destination are in the same object.
|
|
define void @do_not_form_memmove8(ptr %p) {
|
|
; CHECK-LABEL: @do_not_form_memmove8(
|
|
; CHECK-NEXT: entry:
|
|
; CHECK-NEXT: [[P2:%.*]] = getelementptr inbounds i64, ptr [[P:%.*]], i64 1000
|
|
; CHECK-NEXT: br label [[LOOP:%.*]]
|
|
; CHECK: exit:
|
|
; CHECK-NEXT: ret void
|
|
; CHECK: loop:
|
|
; CHECK-NEXT: [[X4:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[X13:%.*]], [[LOOP]] ]
|
|
; CHECK-NEXT: [[X5:%.*]] = zext i32 [[X4]] to i64
|
|
; CHECK-NEXT: [[X7:%.*]] = getelementptr inbounds i64, ptr [[P2]], i64 [[X5]]
|
|
; CHECK-NEXT: store i64 1, ptr [[X7]], align 4
|
|
; CHECK-NEXT: [[X11:%.*]] = getelementptr inbounds i64, ptr [[P]], i64 [[X5]]
|
|
; CHECK-NEXT: tail call void @llvm.memcpy.p0.p0.i64(ptr [[X11]], ptr [[X7]], i64 8, i1 false)
|
|
; CHECK-NEXT: [[X13]] = add i32 [[X4]], 1
|
|
; CHECK-NEXT: [[X14:%.*]] = icmp eq i32 [[X13]], 44
|
|
; CHECK-NEXT: br i1 [[X14]], label [[EXIT:%.*]], label [[LOOP]]
|
|
;
|
|
entry:
|
|
%p2 = getelementptr inbounds i64, ptr %p, i64 1000
|
|
br label %loop
|
|
|
|
exit:
|
|
ret void
|
|
|
|
loop:
|
|
%x4 = phi i32 [ 0, %entry ], [ %x13, %loop ]
|
|
%x5 = zext i32 %x4 to i64
|
|
%x7 = getelementptr inbounds i64, ptr %p2, i64 %x5
|
|
store i64 1, ptr %x7, align 4
|
|
%x11 = getelementptr inbounds i64, ptr %p, i64 %x5
|
|
tail call void @llvm.memcpy.p0.p0.i64(ptr %x11, ptr %x7, i64 8, i1 false)
|
|
%x13 = add i32 %x4, 1
|
|
%x14 = icmp eq i32 %x13, 44
|
|
br i1 %x14, label %exit, label %loop
|
|
}
|
|
|
|
;; Memcpy formation is still preferred over memmove.
|
|
define void @prefer_memcpy_over_memmove(ptr noalias %Src, ptr noalias %Dest, i64 %Size) {
|
|
; CHECK-LABEL: @prefer_memcpy_over_memmove(
|
|
; CHECK-NEXT: bb.nph:
|
|
; CHECK-NEXT: [[SCEVGEP:%.*]] = getelementptr i8, ptr [[SRC:%.*]], i64 42
|
|
; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 1 [[DEST:%.*]], ptr align 1 [[SCEVGEP]], i64 [[SIZE:%.*]], i1 false)
|
|
; CHECK-NEXT: br label [[FOR_BODY:%.*]]
|
|
; CHECK: for.body:
|
|
; CHECK-NEXT: [[INDVAR:%.*]] = phi i64 [ 0, [[BB_NPH:%.*]] ], [ [[INDVAR_NEXT:%.*]], [[FOR_BODY]] ]
|
|
; CHECK-NEXT: [[STEP:%.*]] = add nuw nsw i64 [[INDVAR]], 42
|
|
; CHECK-NEXT: [[SRCI:%.*]] = getelementptr i8, ptr [[SRC]], i64 [[STEP]]
|
|
; CHECK-NEXT: [[DESTI:%.*]] = getelementptr i8, ptr [[DEST]], i64 [[INDVAR]]
|
|
; CHECK-NEXT: [[V:%.*]] = load i8, ptr [[SRCI]], align 1
|
|
; CHECK-NEXT: [[INDVAR_NEXT]] = add i64 [[INDVAR]], 1
|
|
; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[INDVAR_NEXT]], [[SIZE]]
|
|
; CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_END:%.*]], label [[FOR_BODY]]
|
|
; CHECK: for.end:
|
|
; CHECK-NEXT: ret void
|
|
;
|
|
bb.nph:
|
|
br label %for.body
|
|
|
|
for.body: ; preds = %bb.nph, %for.body
|
|
%indvar = phi i64 [ 0, %bb.nph ], [ %indvar.next, %for.body ]
|
|
%Step = add nuw nsw i64 %indvar, 42
|
|
%SrcI = getelementptr i8, ptr %Src, i64 %Step
|
|
%DestI = getelementptr i8, ptr %Dest, i64 %indvar
|
|
%V = load i8, ptr %SrcI, align 1
|
|
store i8 %V, ptr %DestI, align 1
|
|
%indvar.next = add i64 %indvar, 1
|
|
%exitcond = icmp eq i64 %indvar.next, %Size
|
|
br i1 %exitcond, label %for.end, label %for.body
|
|
|
|
for.end: ; preds = %for.body, %entry
|
|
ret void
|
|
}
|
|
|
|
%class.CMSPULog = type { %struct._opaque_pthread_mutex_t, ptr, i32, i32, i32, i8, i8, i8, [512 x i32] }
|
|
%struct._opaque_pthread_mutex_t = type { i64, [56 x i8] }
|
|
|
|
define noalias ptr @_ZN8CMSPULog9beginImplEja(ptr nocapture writeonly %0) local_unnamed_addr #0 {
|
|
; CHECK-LABEL: @_ZN8CMSPULog9beginImplEja(
|
|
; CHECK-NEXT: br label [[TMP2:%.*]]
|
|
; CHECK: 2:
|
|
; CHECK-NEXT: [[TMP3:%.*]] = phi i32 [ 0, [[TMP1:%.*]] ], [ [[TMP4:%.*]], [[TMP2]] ]
|
|
; CHECK-NEXT: [[TMP4]] = add nuw nsw i32 [[TMP3]], 1
|
|
; CHECK-NEXT: [[TMP5:%.*]] = zext i32 [[TMP3]] to i64
|
|
; CHECK-NEXT: [[TMP6:%.*]] = getelementptr [[CLASS_CMSPULOG:%.*]], ptr [[TMP0:%.*]], i64 0, i32 8, i64 [[TMP5]]
|
|
; CHECK-NEXT: [[AND:%.*]] = and i64 ptrtoint (ptr @G to i64), 16777215
|
|
; CHECK-NEXT: [[TRUNC:%.*]] = trunc i64 [[AND]] to i32
|
|
; CHECK-NEXT: store i32 [[TRUNC]], ptr [[TMP6]], align 4
|
|
; CHECK-NEXT: [[TMP7:%.*]] = icmp ult i32 [[TMP3]], 511
|
|
; CHECK-NEXT: br i1 [[TMP7]], label [[TMP2]], label [[TMP8:%.*]]
|
|
; CHECK: 8:
|
|
; CHECK-NEXT: ret ptr null
|
|
;
|
|
br label %2
|
|
|
|
2: ; preds = %1, %2
|
|
%3 = phi i32 [ 0, %1 ], [ %4, %2 ]
|
|
%4 = add nuw nsw i32 %3, 1
|
|
%5 = zext i32 %3 to i64
|
|
%6 = getelementptr %class.CMSPULog, ptr %0, i64 0, i32 8, i64 %5
|
|
%and = and i64 ptrtoint (ptr @G to i64), 16777215
|
|
%trunc = trunc i64 %and to i32
|
|
store i32 %trunc, ptr %6, align 4
|
|
%7 = icmp ult i32 %3, 511
|
|
br i1 %7, label %2, label %8
|
|
|
|
8: ; preds = %2
|
|
ret ptr null
|
|
}
|
|
|
|
; Validate that "memset_pattern" has the proper attributes.
|
|
;.
|
|
; CHECK: attributes #[[ATTR0:[0-9]+]] = { nounwind ssp }
|
|
; CHECK: attributes #[[ATTR1:[0-9]+]] = { nounwind }
|
|
; CHECK: attributes #[[ATTR2:[0-9]+]] = { nocallback nofree nounwind willreturn memory(argmem: readwrite) }
|
|
; CHECK: attributes #[[ATTR3:[0-9]+]] = { nocallback nofree nounwind willreturn memory(argmem: write) }
|
|
; CHECK: attributes #[[ATTR4:[0-9]+]] = { nocallback nofree nosync nounwind speculatable willreturn memory(none) }
|
|
;.
|