
In order to keep the change as incremental as possible, this only introduces the memset.pattern intrinsic in cases where memset_pattern16 would have been used. Future patches can enable it on targets that don't have the intrinsic, and select it in cases where the libcall isn't directly usable. As the memset.pattern intrinsic takes the number of times to store the pattern as an argument unlike memset_pattern16 which takes the number of bytes to write, we no longer try to form an i128 pattern. Special care is taken for cases where multiple stores in the same loop iteration were combined to form a single pattern. For such cases, we inherit the limitation that loops such as the following are supported: ``` for (unsigned i = 0; i < 2 * n; i += 2) { f[i] = 2; f[i+1] = 2; } ``` But the following doesn't result in a memset.pattern (even though it could be, by forming an appropriate pattern): ``` for (unsigned i = 0; i < 2 * n; i += 2) { f[i] = 2; f[i+1] = 3; } ``` Addressing this existing deficiency is left for a follow-up due to a desire not to change too much at once (i.e. to target equivalence to the current codegen). A command line option is introduced to force the selection of the intrinsic even in cases it wouldn't be (i.e. in cases where the libcall wouldn't have been selected). This is intended as a transitionary option for testing and experimentation, to be removed at a later point. The only platforms this should impact are those that have the memset_pattern16 libcall (Apple platforms). Testing performed to check for no unexpected codegen changes is described here https://github.com/llvm/llvm-project/pull/126736#issuecomment-3005097468
127 lines
5.5 KiB
LLVM
127 lines
5.5 KiB
LLVM
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --check-globals
|
|
; RUN: opt -passes=loop-idiom < %s -S | FileCheck %s
|
|
target datalayout = "e-p:64:64:64:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
|
|
|
|
|
|
target triple = "x86_64-apple-darwin10.0.0"
|
|
|
|
;void test(int *f, unsigned n) {
|
|
; for (unsigned i = 0; i < 2 * n; i += 2) {
|
|
; f[i] = 0;
|
|
; f[i+1] = 0;
|
|
; }
|
|
;}
|
|
define void @test(ptr %f, i32 %n) nounwind ssp {
|
|
; CHECK-LABEL: @test(
|
|
; CHECK-NEXT: entry:
|
|
; CHECK-NEXT: [[TMP0:%.*]] = shl i32 [[N:%.*]], 1
|
|
; CHECK-NEXT: [[CMP1:%.*]] = icmp eq i32 [[TMP0]], 0
|
|
; CHECK-NEXT: br i1 [[CMP1]], label [[FOR_END:%.*]], label [[FOR_BODY_PREHEADER:%.*]]
|
|
; CHECK: for.body.preheader:
|
|
; CHECK-NEXT: [[TMP1:%.*]] = add i32 [[TMP0]], -1
|
|
; CHECK-NEXT: [[TMP2:%.*]] = lshr i32 [[TMP1]], 1
|
|
; CHECK-NEXT: [[TMP3:%.*]] = shl i32 [[TMP2]], 3
|
|
; CHECK-NEXT: [[TMP4:%.*]] = add i32 [[TMP3]], 8
|
|
; CHECK-NEXT: call void @llvm.memset.p0.i32(ptr align 4 [[F:%.*]], i8 0, i32 [[TMP4]], i1 false)
|
|
; CHECK-NEXT: br label [[FOR_BODY:%.*]]
|
|
; CHECK: for.body:
|
|
; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i32 [ 0, [[FOR_BODY_PREHEADER]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ]
|
|
; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[F]], i32 [[INDVARS_IV]]
|
|
; CHECK-NEXT: [[TMP5:%.*]] = or disjoint i32 [[INDVARS_IV]], 1
|
|
; CHECK-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds i32, ptr [[F]], i32 [[TMP5]]
|
|
; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i32 [[INDVARS_IV]], 2
|
|
; CHECK-NEXT: [[CMP:%.*]] = icmp ult i32 [[INDVARS_IV_NEXT]], [[TMP0]]
|
|
; CHECK-NEXT: br i1 [[CMP]], label [[FOR_BODY]], label [[FOR_END_LOOPEXIT:%.*]]
|
|
; CHECK: for.end.loopexit:
|
|
; CHECK-NEXT: br label [[FOR_END]]
|
|
; CHECK: for.end:
|
|
; CHECK-NEXT: ret void
|
|
;
|
|
entry:
|
|
%0 = shl i32 %n, 1
|
|
%cmp1 = icmp eq i32 %0, 0
|
|
br i1 %cmp1, label %for.end, label %for.body.preheader
|
|
|
|
for.body.preheader: ; preds = %entry
|
|
br label %for.body
|
|
|
|
for.body: ; preds = %for.body.preheader, %for.body
|
|
%indvars.iv = phi i32 [ 0, %for.body.preheader ], [ %indvars.iv.next, %for.body ]
|
|
%arrayidx = getelementptr inbounds i32, ptr %f, i32 %indvars.iv
|
|
store i32 0, ptr %arrayidx, align 4
|
|
%1 = or disjoint i32 %indvars.iv, 1
|
|
%arrayidx2 = getelementptr inbounds i32, ptr %f, i32 %1
|
|
store i32 0, ptr %arrayidx2, align 4
|
|
%indvars.iv.next = add nuw nsw i32 %indvars.iv, 2
|
|
%cmp = icmp ult i32 %indvars.iv.next, %0
|
|
br i1 %cmp, label %for.body, label %for.end.loopexit
|
|
|
|
for.end.loopexit: ; preds = %for.body
|
|
br label %for.end
|
|
|
|
for.end: ; preds = %for.end.loopexit, %entry
|
|
ret void
|
|
}
|
|
|
|
;void test_pattern(int *f, unsigned n) {
|
|
; for (unsigned i = 0; i < 2 * n; i += 2) {
|
|
; f[i] = 2;
|
|
; f[i+1] = 2;
|
|
; }
|
|
;}
|
|
define void @test_pattern(ptr %f, i32 %n) nounwind ssp {
|
|
; CHECK-LABEL: @test_pattern(
|
|
; CHECK-NEXT: entry:
|
|
; CHECK-NEXT: [[MUL:%.*]] = shl i32 [[N:%.*]], 1
|
|
; CHECK-NEXT: [[CMP1:%.*]] = icmp eq i32 [[MUL]], 0
|
|
; CHECK-NEXT: br i1 [[CMP1]], label [[FOR_END:%.*]], label [[FOR_BODY_PREHEADER:%.*]]
|
|
; CHECK: for.body.preheader:
|
|
; CHECK-NEXT: [[TMP0:%.*]] = add i32 [[MUL]], -1
|
|
; CHECK-NEXT: [[TMP1:%.*]] = lshr i32 [[TMP0]], 1
|
|
; CHECK-NEXT: [[TMP2:%.*]] = add nuw i32 [[TMP1]], 1
|
|
; CHECK-NEXT: [[TMP3:%.*]] = mul i32 [[TMP2]], 2
|
|
; CHECK-NEXT: call void @llvm.experimental.memset.pattern.p0.i32.i32(ptr align 4 [[F:%.*]], i32 2, i32 [[TMP3]], i1 false)
|
|
; CHECK-NEXT: br label [[FOR_BODY:%.*]]
|
|
; CHECK: for.body:
|
|
; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i32 [ 0, [[FOR_BODY_PREHEADER]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ]
|
|
; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[F]], i32 [[INDVARS_IV]]
|
|
; CHECK-NEXT: [[X1:%.*]] = or disjoint i32 [[INDVARS_IV]], 1
|
|
; CHECK-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds i32, ptr [[F]], i32 [[X1]]
|
|
; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i32 [[INDVARS_IV]], 2
|
|
; CHECK-NEXT: [[CMP:%.*]] = icmp ult i32 [[INDVARS_IV_NEXT]], [[MUL]]
|
|
; CHECK-NEXT: br i1 [[CMP]], label [[FOR_BODY]], label [[FOR_END_LOOPEXIT:%.*]]
|
|
; CHECK: for.end.loopexit:
|
|
; CHECK-NEXT: br label [[FOR_END]]
|
|
; CHECK: for.end:
|
|
; CHECK-NEXT: ret void
|
|
;
|
|
entry:
|
|
%mul = shl i32 %n, 1
|
|
%cmp1 = icmp eq i32 %mul, 0
|
|
br i1 %cmp1, label %for.end, label %for.body.preheader
|
|
|
|
for.body.preheader: ; preds = %entry
|
|
br label %for.body
|
|
|
|
for.body: ; preds = %for.body.preheader, %for.body
|
|
%indvars.iv = phi i32 [ 0, %for.body.preheader ], [ %indvars.iv.next, %for.body ]
|
|
%arrayidx = getelementptr inbounds i32, ptr %f, i32 %indvars.iv
|
|
store i32 2, ptr %arrayidx, align 4
|
|
%x1 = or disjoint i32 %indvars.iv, 1
|
|
%arrayidx2 = getelementptr inbounds i32, ptr %f, i32 %x1
|
|
store i32 2, ptr %arrayidx2, align 4
|
|
%indvars.iv.next = add nuw nsw i32 %indvars.iv, 2
|
|
%cmp = icmp ult i32 %indvars.iv.next, %mul
|
|
br i1 %cmp, label %for.body, label %for.end.loopexit
|
|
|
|
for.end.loopexit: ; preds = %for.body
|
|
br label %for.end
|
|
|
|
for.end: ; preds = %for.end.loopexit, %entry
|
|
ret void
|
|
}
|
|
;.
|
|
; CHECK: attributes #[[ATTR0:[0-9]+]] = { nounwind ssp }
|
|
; CHECK: attributes #[[ATTR1:[0-9]+]] = { nocallback nofree nounwind willreturn memory(argmem: write) }
|
|
;.
|