
In order to keep the change as incremental as possible, this only introduces the memset.pattern intrinsic in cases where memset_pattern16 would have been used. Future patches can enable it on targets that don't have the intrinsic, and select it in cases where the libcall isn't directly usable. As the memset.pattern intrinsic takes the number of times to store the pattern as an argument unlike memset_pattern16 which takes the number of bytes to write, we no longer try to form an i128 pattern. Special care is taken for cases where multiple stores in the same loop iteration were combined to form a single pattern. For such cases, we inherit the limitation that loops such as the following are supported: ``` for (unsigned i = 0; i < 2 * n; i += 2) { f[i] = 2; f[i+1] = 2; } ``` But the following doesn't result in a memset.pattern (even though it could be, by forming an appropriate pattern): ``` for (unsigned i = 0; i < 2 * n; i += 2) { f[i] = 2; f[i+1] = 3; } ``` Addressing this existing deficiency is left for a follow-up due to a desire not to change too much at once (i.e. to target equivalence to the current codegen). A command line option is introduced to force the selection of the intrinsic even in cases it wouldn't be (i.e. in cases where the libcall wouldn't have been selected). This is intended as a transitionary option for testing and experimentation, to be removed at a later point. The only platforms this should impact are those that have the memset_pattern16 libcall (Apple platforms). Testing performed to check for no unexpected codegen changes is described here https://github.com/llvm/llvm-project/pull/126736#issuecomment-3005097468
293 lines
13 KiB
LLVM
293 lines
13 KiB
LLVM
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --check-globals
|
|
; RUN: opt -passes=loop-idiom < %s -S | FileCheck %s
|
|
target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
|
|
|
|
|
|
target triple = "x86_64-apple-darwin10.0.0"
|
|
|
|
%struct.foo = type { i32, i32 }
|
|
%struct.foo1 = type { i32, i32, i32 }
|
|
|
|
;void bar1(foo_t *f, unsigned n) {
|
|
; for (unsigned i = 0; i < n; ++i) {
|
|
; f[i].a = 2;
|
|
; f[i].b = 2;
|
|
; }
|
|
;}
|
|
|
|
|
|
define void @bar1(ptr %f, i32 %n) nounwind ssp {
|
|
; CHECK-LABEL: @bar1(
|
|
; CHECK-NEXT: entry:
|
|
; CHECK-NEXT: [[CMP1:%.*]] = icmp eq i32 [[N:%.*]], 0
|
|
; CHECK-NEXT: br i1 [[CMP1]], label [[FOR_END:%.*]], label [[FOR_BODY_PREHEADER:%.*]]
|
|
; CHECK: for.body.preheader:
|
|
; CHECK-NEXT: [[TMP0:%.*]] = zext i32 [[N]] to i64
|
|
; CHECK-NEXT: [[TMP1:%.*]] = mul i64 [[TMP0]], 2
|
|
; CHECK-NEXT: call void @llvm.experimental.memset.pattern.p0.i32.i64(ptr align 4 [[F:%.*]], i32 2, i64 [[TMP1]], i1 false)
|
|
; CHECK-NEXT: br label [[FOR_BODY:%.*]]
|
|
; CHECK: for.body:
|
|
; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ 0, [[FOR_BODY_PREHEADER]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ]
|
|
; CHECK-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_FOO:%.*]], ptr [[F]], i64 [[INDVARS_IV]], i32 0
|
|
; CHECK-NEXT: [[B:%.*]] = getelementptr inbounds [[STRUCT_FOO]], ptr [[F]], i64 [[INDVARS_IV]], i32 1
|
|
; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1
|
|
; CHECK-NEXT: [[LFTR_WIDEIV:%.*]] = trunc i64 [[INDVARS_IV_NEXT]] to i32
|
|
; CHECK-NEXT: [[EXITCOND:%.*]] = icmp ne i32 [[LFTR_WIDEIV]], [[N]]
|
|
; CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_BODY]], label [[FOR_END_LOOPEXIT:%.*]]
|
|
; CHECK: for.end.loopexit:
|
|
; CHECK-NEXT: br label [[FOR_END]]
|
|
; CHECK: for.end:
|
|
; CHECK-NEXT: ret void
|
|
;
|
|
entry:
|
|
%cmp1 = icmp eq i32 %n, 0
|
|
br i1 %cmp1, label %for.end, label %for.body.preheader
|
|
|
|
for.body.preheader: ; preds = %entry
|
|
br label %for.body
|
|
|
|
for.body: ; preds = %for.body.preheader, %for.body
|
|
%indvars.iv = phi i64 [ 0, %for.body.preheader ], [ %indvars.iv.next, %for.body ]
|
|
%a = getelementptr inbounds %struct.foo, ptr %f, i64 %indvars.iv, i32 0
|
|
store i32 2, ptr %a, align 4
|
|
%b = getelementptr inbounds %struct.foo, ptr %f, i64 %indvars.iv, i32 1
|
|
store i32 2, ptr %b, align 4
|
|
%indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
|
|
%lftr.wideiv = trunc i64 %indvars.iv.next to i32
|
|
%exitcond = icmp ne i32 %lftr.wideiv, %n
|
|
br i1 %exitcond, label %for.body, label %for.end.loopexit
|
|
|
|
for.end.loopexit: ; preds = %for.body
|
|
br label %for.end
|
|
|
|
for.end: ; preds = %for.end.loopexit, %entry
|
|
ret void
|
|
}
|
|
|
|
;void bar2(foo_t *f, unsigned n) {
|
|
; for (unsigned i = 0; i < n; ++i) {
|
|
; f[i].b = 2;
|
|
; f[i].a = 2;
|
|
; }
|
|
;}
|
|
define void @bar2(ptr %f, i32 %n) nounwind ssp {
|
|
; CHECK-LABEL: @bar2(
|
|
; CHECK-NEXT: entry:
|
|
; CHECK-NEXT: [[CMP1:%.*]] = icmp eq i32 [[N:%.*]], 0
|
|
; CHECK-NEXT: br i1 [[CMP1]], label [[FOR_END:%.*]], label [[FOR_BODY_PREHEADER:%.*]]
|
|
; CHECK: for.body.preheader:
|
|
; CHECK-NEXT: [[TMP0:%.*]] = zext i32 [[N]] to i64
|
|
; CHECK-NEXT: [[TMP1:%.*]] = mul i64 [[TMP0]], 2
|
|
; CHECK-NEXT: call void @llvm.experimental.memset.pattern.p0.i32.i64(ptr align 4 [[F:%.*]], i32 2, i64 [[TMP1]], i1 false)
|
|
; CHECK-NEXT: br label [[FOR_BODY:%.*]]
|
|
; CHECK: for.body:
|
|
; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ 0, [[FOR_BODY_PREHEADER]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ]
|
|
; CHECK-NEXT: [[B:%.*]] = getelementptr inbounds [[STRUCT_FOO:%.*]], ptr [[F]], i64 [[INDVARS_IV]], i32 1
|
|
; CHECK-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_FOO]], ptr [[F]], i64 [[INDVARS_IV]], i32 0
|
|
; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1
|
|
; CHECK-NEXT: [[LFTR_WIDEIV:%.*]] = trunc i64 [[INDVARS_IV_NEXT]] to i32
|
|
; CHECK-NEXT: [[EXITCOND:%.*]] = icmp ne i32 [[LFTR_WIDEIV]], [[N]]
|
|
; CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_BODY]], label [[FOR_END_LOOPEXIT:%.*]]
|
|
; CHECK: for.end.loopexit:
|
|
; CHECK-NEXT: br label [[FOR_END]]
|
|
; CHECK: for.end:
|
|
; CHECK-NEXT: ret void
|
|
;
|
|
entry:
|
|
%cmp1 = icmp eq i32 %n, 0
|
|
br i1 %cmp1, label %for.end, label %for.body.preheader
|
|
|
|
for.body.preheader: ; preds = %entry
|
|
br label %for.body
|
|
|
|
for.body: ; preds = %for.body.preheader, %for.body
|
|
%indvars.iv = phi i64 [ 0, %for.body.preheader ], [ %indvars.iv.next, %for.body ]
|
|
%b = getelementptr inbounds %struct.foo, ptr %f, i64 %indvars.iv, i32 1
|
|
store i32 2, ptr %b, align 4
|
|
%a = getelementptr inbounds %struct.foo, ptr %f, i64 %indvars.iv, i32 0
|
|
store i32 2, ptr %a, align 4
|
|
%indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
|
|
%lftr.wideiv = trunc i64 %indvars.iv.next to i32
|
|
%exitcond = icmp ne i32 %lftr.wideiv, %n
|
|
br i1 %exitcond, label %for.body, label %for.end.loopexit
|
|
|
|
for.end.loopexit: ; preds = %for.body
|
|
br label %for.end
|
|
|
|
for.end: ; preds = %for.end.loopexit, %entry
|
|
ret void
|
|
}
|
|
|
|
;void bar3(foo_t *f, unsigned n) {
|
|
; for (unsigned i = n; i > 0; --i) {
|
|
; f[i].a = 2;
|
|
; f[i].b = 2;
|
|
; }
|
|
;}
|
|
define void @bar3(ptr nocapture %f, i32 %n) nounwind ssp {
|
|
; CHECK-LABEL: @bar3(
|
|
; CHECK-NEXT: entry:
|
|
; CHECK-NEXT: [[CMP1:%.*]] = icmp eq i32 [[N:%.*]], 0
|
|
; CHECK-NEXT: br i1 [[CMP1]], label [[FOR_END:%.*]], label [[FOR_BODY_PREHEADER:%.*]]
|
|
; CHECK: for.body.preheader:
|
|
; CHECK-NEXT: [[TMP0:%.*]] = zext i32 [[N]] to i64
|
|
; CHECK-NEXT: [[TMP1:%.*]] = shl nuw nsw i64 [[TMP0]], 3
|
|
; CHECK-NEXT: [[TMP2:%.*]] = add i32 [[N]], -1
|
|
; CHECK-NEXT: [[TMP3:%.*]] = zext i32 [[TMP2]] to i64
|
|
; CHECK-NEXT: [[TMP4:%.*]] = shl nuw nsw i64 [[TMP3]], 3
|
|
; CHECK-NEXT: [[TMP5:%.*]] = sub i64 [[TMP1]], [[TMP4]]
|
|
; CHECK-NEXT: [[UGLYGEP:%.*]] = getelementptr i8, ptr [[F:%.*]], i64 [[TMP5]]
|
|
; CHECK-NEXT: [[TMP7:%.*]] = mul i64 [[TMP0]], 2
|
|
; CHECK-NEXT: call void @llvm.experimental.memset.pattern.p0.i32.i64(ptr align 4 [[UGLYGEP]], i32 2, i64 [[TMP7]], i1 false)
|
|
; CHECK-NEXT: br label [[FOR_BODY:%.*]]
|
|
; CHECK: for.body:
|
|
; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[TMP0]], [[FOR_BODY_PREHEADER]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ]
|
|
; CHECK-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_FOO:%.*]], ptr [[F]], i64 [[INDVARS_IV]], i32 0
|
|
; CHECK-NEXT: [[B:%.*]] = getelementptr inbounds [[STRUCT_FOO]], ptr [[F]], i64 [[INDVARS_IV]], i32 1
|
|
; CHECK-NEXT: [[TMP6:%.*]] = trunc i64 [[INDVARS_IV]] to i32
|
|
; CHECK-NEXT: [[DEC:%.*]] = add i32 [[TMP6]], -1
|
|
; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32 [[DEC]], 0
|
|
; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add nsw i64 [[INDVARS_IV]], -1
|
|
; CHECK-NEXT: br i1 [[CMP]], label [[FOR_END_LOOPEXIT:%.*]], label [[FOR_BODY]]
|
|
; CHECK: for.end.loopexit:
|
|
; CHECK-NEXT: br label [[FOR_END]]
|
|
; CHECK: for.end:
|
|
; CHECK-NEXT: ret void
|
|
;
|
|
entry:
|
|
%cmp1 = icmp eq i32 %n, 0
|
|
br i1 %cmp1, label %for.end, label %for.body.preheader
|
|
|
|
for.body.preheader: ; preds = %entry
|
|
%0 = zext i32 %n to i64
|
|
br label %for.body
|
|
|
|
for.body: ; preds = %for.body.preheader, %for.body
|
|
%indvars.iv = phi i64 [ %0, %for.body.preheader ], [ %indvars.iv.next, %for.body ]
|
|
%a = getelementptr inbounds %struct.foo, ptr %f, i64 %indvars.iv, i32 0
|
|
store i32 2, ptr %a, align 4
|
|
%b = getelementptr inbounds %struct.foo, ptr %f, i64 %indvars.iv, i32 1
|
|
store i32 2, ptr %b, align 4
|
|
%1 = trunc i64 %indvars.iv to i32
|
|
%dec = add i32 %1, -1
|
|
%cmp = icmp eq i32 %dec, 0
|
|
%indvars.iv.next = add nsw i64 %indvars.iv, -1
|
|
br i1 %cmp, label %for.end.loopexit, label %for.body
|
|
|
|
for.end.loopexit: ; preds = %for.body
|
|
br label %for.end
|
|
|
|
for.end: ; preds = %for.end.loopexit, %entry
|
|
ret void
|
|
}
|
|
|
|
;void bar4(foo_t *f, unsigned n) {
|
|
; for (unsigned i = 0; i < n; ++i) {
|
|
; f[i].a = 0;
|
|
; f[i].b = 1;
|
|
; }
|
|
;}
|
|
define void @bar4(ptr nocapture %f, i32 %n) nounwind ssp {
|
|
; CHECK-LABEL: @bar4(
|
|
; CHECK-NEXT: entry:
|
|
; CHECK-NEXT: [[CMP1:%.*]] = icmp eq i32 [[N:%.*]], 0
|
|
; CHECK-NEXT: br i1 [[CMP1]], label [[FOR_END:%.*]], label [[FOR_BODY_PREHEADER:%.*]]
|
|
; CHECK: for.body.preheader:
|
|
; CHECK-NEXT: br label [[FOR_BODY:%.*]]
|
|
; CHECK: for.body:
|
|
; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ 0, [[FOR_BODY_PREHEADER]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ]
|
|
; CHECK-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_FOO:%.*]], ptr [[F:%.*]], i64 [[INDVARS_IV]], i32 0
|
|
; CHECK-NEXT: store i32 0, ptr [[A]], align 4
|
|
; CHECK-NEXT: [[B:%.*]] = getelementptr inbounds [[STRUCT_FOO]], ptr [[F]], i64 [[INDVARS_IV]], i32 1
|
|
; CHECK-NEXT: store i32 1, ptr [[B]], align 4
|
|
; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1
|
|
; CHECK-NEXT: [[LFTR_WIDEIV:%.*]] = trunc i64 [[INDVARS_IV_NEXT]] to i32
|
|
; CHECK-NEXT: [[EXITCOND:%.*]] = icmp ne i32 [[LFTR_WIDEIV]], [[N]]
|
|
; CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_BODY]], label [[FOR_END_LOOPEXIT:%.*]]
|
|
; CHECK: for.end.loopexit:
|
|
; CHECK-NEXT: br label [[FOR_END]]
|
|
; CHECK: for.end:
|
|
; CHECK-NEXT: ret void
|
|
;
|
|
entry:
|
|
%cmp1 = icmp eq i32 %n, 0
|
|
br i1 %cmp1, label %for.end, label %for.body.preheader
|
|
|
|
for.body.preheader: ; preds = %entry
|
|
br label %for.body
|
|
|
|
for.body: ; preds = %for.body.preheader, %for.body
|
|
%indvars.iv = phi i64 [ 0, %for.body.preheader ], [ %indvars.iv.next, %for.body ]
|
|
%a = getelementptr inbounds %struct.foo, ptr %f, i64 %indvars.iv, i32 0
|
|
store i32 0, ptr %a, align 4
|
|
%b = getelementptr inbounds %struct.foo, ptr %f, i64 %indvars.iv, i32 1
|
|
store i32 1, ptr %b, align 4
|
|
%indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
|
|
%lftr.wideiv = trunc i64 %indvars.iv.next to i32
|
|
%exitcond = icmp ne i32 %lftr.wideiv, %n
|
|
br i1 %exitcond, label %for.body, label %for.end.loopexit
|
|
|
|
for.end.loopexit: ; preds = %for.body
|
|
br label %for.end
|
|
|
|
for.end: ; preds = %for.end.loopexit, %entry
|
|
ret void
|
|
}
|
|
|
|
;void bar5(foo1_t *f, unsigned n) {
|
|
; for (unsigned i = 0; i < n; ++i) {
|
|
; f[i].a = 1;
|
|
; f[i].b = 1;
|
|
; }
|
|
;}
|
|
define void @bar5(ptr nocapture %f, i32 %n) nounwind ssp {
|
|
; CHECK-LABEL: @bar5(
|
|
; CHECK-NEXT: entry:
|
|
; CHECK-NEXT: [[CMP1:%.*]] = icmp eq i32 [[N:%.*]], 0
|
|
; CHECK-NEXT: br i1 [[CMP1]], label [[FOR_END:%.*]], label [[FOR_BODY_PREHEADER:%.*]]
|
|
; CHECK: for.body.preheader:
|
|
; CHECK-NEXT: br label [[FOR_BODY:%.*]]
|
|
; CHECK: for.body:
|
|
; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ 0, [[FOR_BODY_PREHEADER]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ]
|
|
; CHECK-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_FOO1:%.*]], ptr [[F:%.*]], i64 [[INDVARS_IV]], i32 0
|
|
; CHECK-NEXT: store i32 1, ptr [[A]], align 4
|
|
; CHECK-NEXT: [[B:%.*]] = getelementptr inbounds [[STRUCT_FOO1]], ptr [[F]], i64 [[INDVARS_IV]], i32 1
|
|
; CHECK-NEXT: store i32 1, ptr [[B]], align 4
|
|
; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1
|
|
; CHECK-NEXT: [[LFTR_WIDEIV:%.*]] = trunc i64 [[INDVARS_IV_NEXT]] to i32
|
|
; CHECK-NEXT: [[EXITCOND:%.*]] = icmp ne i32 [[LFTR_WIDEIV]], [[N]]
|
|
; CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_BODY]], label [[FOR_END_LOOPEXIT:%.*]]
|
|
; CHECK: for.end.loopexit:
|
|
; CHECK-NEXT: br label [[FOR_END]]
|
|
; CHECK: for.end:
|
|
; CHECK-NEXT: ret void
|
|
;
|
|
entry:
|
|
%cmp1 = icmp eq i32 %n, 0
|
|
br i1 %cmp1, label %for.end, label %for.body.preheader
|
|
|
|
for.body.preheader: ; preds = %entry
|
|
br label %for.body
|
|
|
|
for.body: ; preds = %for.body.preheader, %for.body
|
|
%indvars.iv = phi i64 [ 0, %for.body.preheader ], [ %indvars.iv.next, %for.body ]
|
|
%a = getelementptr inbounds %struct.foo1, ptr %f, i64 %indvars.iv, i32 0
|
|
store i32 1, ptr %a, align 4
|
|
%b = getelementptr inbounds %struct.foo1, ptr %f, i64 %indvars.iv, i32 1
|
|
store i32 1, ptr %b, align 4
|
|
%indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
|
|
%lftr.wideiv = trunc i64 %indvars.iv.next to i32
|
|
%exitcond = icmp ne i32 %lftr.wideiv, %n
|
|
br i1 %exitcond, label %for.body, label %for.end.loopexit
|
|
|
|
for.end.loopexit: ; preds = %for.body
|
|
br label %for.end
|
|
|
|
for.end: ; preds = %for.end.loopexit, %entry
|
|
ret void
|
|
}
|
|
;.
|
|
; CHECK: attributes #[[ATTR0:[0-9]+]] = { nounwind ssp }
|
|
; CHECK: attributes #[[ATTR1:[0-9]+]] = { nocallback nofree nounwind willreturn memory(argmem: write) }
|
|
;.
|