This patch changes the lowering of the [experimental.memset.pattern intrinsic](https://llvm.org/docs/LangRef.html#llvm-experimental-memset-pattern-intrinsic) to match the optimized memset and memcpy lowering when possible. (The tl;dr of memset.pattern is that it is like memset, except that you can use it to set values that are wider than a single byte.) The memset.pattern lowering now queries `TTI::getMemcpyLoopLoweringType` for a preferred memory access type. If the size of that type is a multiple of the set value's type, and if both types have consistent store and alloc sizes (since memset.pattern behaves in a way that is not well suitable for access widening if store and alloc size differ), the memset.pattern is lowered into two loops: a main loop that stores a sufficiently wide vector splat of the SetValue with the preferred memory access type and a residual loop that covers the remaining set values individually. In contrast to the memset lowering, this patch doesn't include a specialized lowering for residual loops with known constant lengths. Loops that are statically known to be unreachable will not be emitted. For backends that don't override `TTI::getMemcpyLoopLoweringType`, the generated code is mostly unchanged except for more consistent basic block names, no more `br i1 false` for memset.patterns with known size, and a flipped loop condition for memset.patterns with known size (see test changes). This is a follow-up to a similar patch for memset: #169040
128 lines
6.4 KiB
LLVM
128 lines
6.4 KiB
LLVM
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
|
|
; RUN: opt -mtriple=riscv64 -passes=pre-isel-intrinsic-lowering -S -o - %s | FileCheck %s
|
|
|
|
define void @memset_pattern_i128_1(ptr %a, i128 %value) nounwind {
|
|
; CHECK-LABEL: define void @memset_pattern_i128_1(
|
|
; CHECK-SAME: ptr [[A:%.*]], i128 [[VALUE:%.*]]) #[[ATTR0:[0-9]+]] {
|
|
; CHECK-NEXT: br label %[[LOADSTORELOOP:.*]]
|
|
; CHECK: [[LOADSTORELOOP]]:
|
|
; CHECK-NEXT: [[TMP2:%.*]] = phi i64 [ 0, [[TMP0:%.*]] ], [ [[TMP3:%.*]], %[[LOADSTORELOOP]] ]
|
|
; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i128, ptr [[A]], i64 [[TMP2]]
|
|
; CHECK-NEXT: store i128 [[VALUE]], ptr [[TMP1]], align 1
|
|
; CHECK-NEXT: [[TMP3]] = add i64 [[TMP2]], 1
|
|
; CHECK-NEXT: [[TMP4:%.*]] = icmp ult i64 [[TMP3]], 1
|
|
; CHECK-NEXT: br i1 [[TMP4]], label %[[LOADSTORELOOP]], label %[[SPLIT:.*]]
|
|
; CHECK: [[SPLIT]]:
|
|
; CHECK-NEXT: ret void
|
|
;
|
|
tail call void @llvm.experimental.memset.pattern(ptr %a, i128 %value, i64 1, i1 0)
|
|
ret void
|
|
}
|
|
|
|
define void @memset_pattern_i128_16(ptr %a, i128 %value) nounwind {
|
|
; CHECK-LABEL: define void @memset_pattern_i128_16(
|
|
; CHECK-SAME: ptr [[A:%.*]], i128 [[VALUE:%.*]]) #[[ATTR0]] {
|
|
; CHECK-NEXT: br label %[[LOADSTORELOOP:.*]]
|
|
; CHECK: [[LOADSTORELOOP]]:
|
|
; CHECK-NEXT: [[TMP2:%.*]] = phi i64 [ 0, [[TMP0:%.*]] ], [ [[TMP3:%.*]], %[[LOADSTORELOOP]] ]
|
|
; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i128, ptr [[A]], i64 [[TMP2]]
|
|
; CHECK-NEXT: store i128 [[VALUE]], ptr [[TMP1]], align 1
|
|
; CHECK-NEXT: [[TMP3]] = add i64 [[TMP2]], 1
|
|
; CHECK-NEXT: [[TMP4:%.*]] = icmp ult i64 [[TMP3]], 16
|
|
; CHECK-NEXT: br i1 [[TMP4]], label %[[LOADSTORELOOP]], label %[[SPLIT:.*]]
|
|
; CHECK: [[SPLIT]]:
|
|
; CHECK-NEXT: ret void
|
|
;
|
|
tail call void @llvm.experimental.memset.pattern(ptr %a, i128 %value, i64 16, i1 0)
|
|
ret void
|
|
}
|
|
|
|
define void @memset_pattern_i127_x(ptr %a, i127 %value, i64 %x) nounwind {
|
|
; CHECK-LABEL: define void @memset_pattern_i127_x(
|
|
; CHECK-SAME: ptr [[A:%.*]], i127 [[VALUE:%.*]], i64 [[X:%.*]]) #[[ATTR0]] {
|
|
; CHECK-NEXT: [[TMP1:%.*]] = icmp ne i64 [[X]], 0
|
|
; CHECK-NEXT: br i1 [[TMP1]], label %[[LOADSTORELOOP:.*]], label %[[SPLIT:.*]]
|
|
; CHECK: [[LOADSTORELOOP]]:
|
|
; CHECK-NEXT: [[TMP3:%.*]] = phi i64 [ 0, [[TMP0:%.*]] ], [ [[TMP4:%.*]], %[[LOADSTORELOOP]] ]
|
|
; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i127, ptr [[A]], i64 [[TMP3]]
|
|
; CHECK-NEXT: store i127 [[VALUE]], ptr [[TMP2]], align 1
|
|
; CHECK-NEXT: [[TMP4]] = add i64 [[TMP3]], 1
|
|
; CHECK-NEXT: [[TMP5:%.*]] = icmp ult i64 [[TMP4]], [[X]]
|
|
; CHECK-NEXT: br i1 [[TMP5]], label %[[LOADSTORELOOP]], label %[[SPLIT]]
|
|
; CHECK: [[SPLIT]]:
|
|
; CHECK-NEXT: ret void
|
|
;
|
|
tail call void @llvm.experimental.memset.pattern(ptr %a, i127 %value, i64 %x, i1 0)
|
|
ret void
|
|
}
|
|
|
|
define void @memset_pattern_i128_x(ptr %a, i128 %value, i64 %x) nounwind {
|
|
; CHECK-LABEL: define void @memset_pattern_i128_x(
|
|
; CHECK-SAME: ptr [[A:%.*]], i128 [[VALUE:%.*]], i64 [[X:%.*]]) #[[ATTR0]] {
|
|
; CHECK-NEXT: [[TMP1:%.*]] = icmp ne i64 [[X]], 0
|
|
; CHECK-NEXT: br i1 [[TMP1]], label %[[LOADSTORELOOP:.*]], label %[[SPLIT:.*]]
|
|
; CHECK: [[LOADSTORELOOP]]:
|
|
; CHECK-NEXT: [[TMP2:%.*]] = phi i64 [ 0, [[TMP0:%.*]] ], [ [[TMP6:%.*]], %[[LOADSTORELOOP]] ]
|
|
; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds i128, ptr [[A]], i64 [[TMP2]]
|
|
; CHECK-NEXT: store i128 [[VALUE]], ptr [[TMP4]], align 1
|
|
; CHECK-NEXT: [[TMP6]] = add i64 [[TMP2]], 1
|
|
; CHECK-NEXT: [[TMP5:%.*]] = icmp ult i64 [[TMP6]], [[X]]
|
|
; CHECK-NEXT: br i1 [[TMP5]], label %[[LOADSTORELOOP]], label %[[SPLIT]]
|
|
; CHECK: [[SPLIT]]:
|
|
; CHECK-NEXT: ret void
|
|
;
|
|
tail call void @llvm.experimental.memset.pattern(ptr %a, i128 %value, i64 %x, i1 0)
|
|
ret void
|
|
}
|
|
|
|
define void @memset_pattern_i256_x(ptr %a, i256 %value, i64 %x) nounwind {
|
|
; CHECK-LABEL: define void @memset_pattern_i256_x(
|
|
; CHECK-SAME: ptr [[A:%.*]], i256 [[VALUE:%.*]], i64 [[X:%.*]]) #[[ATTR0]] {
|
|
; CHECK-NEXT: [[TMP1:%.*]] = icmp ne i64 [[X]], 0
|
|
; CHECK-NEXT: br i1 [[TMP1]], label %[[LOADSTORELOOP:.*]], label %[[SPLIT:.*]]
|
|
; CHECK: [[LOADSTORELOOP]]:
|
|
; CHECK-NEXT: [[TMP2:%.*]] = phi i64 [ 0, [[TMP0:%.*]] ], [ [[TMP6:%.*]], %[[LOADSTORELOOP]] ]
|
|
; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds i256, ptr [[A]], i64 [[TMP2]]
|
|
; CHECK-NEXT: store i256 [[VALUE]], ptr [[TMP4]], align 1
|
|
; CHECK-NEXT: [[TMP6]] = add i64 [[TMP2]], 1
|
|
; CHECK-NEXT: [[TMP5:%.*]] = icmp ult i64 [[TMP6]], [[X]]
|
|
; CHECK-NEXT: br i1 [[TMP5]], label %[[LOADSTORELOOP]], label %[[SPLIT]]
|
|
; CHECK: [[SPLIT]]:
|
|
; CHECK-NEXT: ret void
|
|
;
|
|
tail call void @llvm.experimental.memset.pattern(ptr %a, i256 %value, i64 %x, i1 0)
|
|
ret void
|
|
}
|
|
|
|
; The common alignment of the allocation of the pattern stride (its allocation
|
|
; size) and the destination pointer should be used.
|
|
define void @memset_pattern_i15_x_alignment(ptr %a, i15 %value, i64 %x) nounwind {
|
|
; CHECK-LABEL: define void @memset_pattern_i15_x_alignment(
|
|
; CHECK-SAME: ptr [[A:%.*]], i15 [[VALUE:%.*]], i64 [[X:%.*]]) #[[ATTR0]] {
|
|
; CHECK-NEXT: [[TMP1:%.*]] = icmp ne i64 [[X]], 0
|
|
; CHECK-NEXT: br i1 [[TMP1]], label %[[LOADSTORELOOP:.*]], label %[[SPLIT:.*]]
|
|
; CHECK: [[LOADSTORELOOP]]:
|
|
; CHECK-NEXT: [[TMP3:%.*]] = phi i64 [ 0, [[TMP0:%.*]] ], [ [[TMP4:%.*]], %[[LOADSTORELOOP]] ]
|
|
; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i15, ptr [[A]], i64 [[TMP3]]
|
|
; CHECK-NEXT: store i15 [[VALUE]], ptr [[TMP2]], align 1
|
|
; CHECK-NEXT: [[TMP4]] = add i64 [[TMP3]], 1
|
|
; CHECK-NEXT: [[TMP5:%.*]] = icmp ult i64 [[TMP4]], [[X]]
|
|
; CHECK-NEXT: br i1 [[TMP5]], label %[[LOADSTORELOOP]], label %[[SPLIT]]
|
|
; CHECK: [[SPLIT]]:
|
|
; CHECK-NEXT: [[TMP6:%.*]] = icmp ne i64 [[X]], 0
|
|
; CHECK-NEXT: br i1 [[TMP6]], label %[[LOADSTORELOOP2:.*]], label %[[SPLIT1:.*]]
|
|
; CHECK: [[LOADSTORELOOP2]]:
|
|
; CHECK-NEXT: [[TMP11:%.*]] = phi i64 [ 0, %[[SPLIT]] ], [ [[TMP9:%.*]], %[[LOADSTORELOOP2]] ]
|
|
; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds i15, ptr [[A]], i64 [[TMP11]]
|
|
; CHECK-NEXT: store i15 [[VALUE]], ptr [[TMP8]], align 2
|
|
; CHECK-NEXT: [[TMP9]] = add i64 [[TMP11]], 1
|
|
; CHECK-NEXT: [[TMP10:%.*]] = icmp ult i64 [[TMP9]], [[X]]
|
|
; CHECK-NEXT: br i1 [[TMP10]], label %[[LOADSTORELOOP2]], label %[[SPLIT1]]
|
|
; CHECK: [[SPLIT1]]:
|
|
; CHECK-NEXT: ret void
|
|
;
|
|
call void @llvm.experimental.memset.pattern(ptr align 1 %a, i15 %value, i64 %x, i1 0)
|
|
call void @llvm.experimental.memset.pattern(ptr align 2 %a, i15 %value, i64 %x, i1 0)
|
|
ret void
|
|
}
|