
Allows memcpy to memcpy forwarding in cases where the second memcpy is larger, but the overread is known to be undef, by shrinking the memcpy size. Refs https://github.com/llvm/llvm-project/pull/140954 which laid some of the groundwork for this.
240 lines
12 KiB
LLVM
240 lines
12 KiB
LLVM
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 4
|
|
; RUN: opt < %s -passes=memcpyopt -S -verify-memoryssa | FileCheck %s
|
|
|
|
%buf = type [9 x i8]
|
|
|
|
; We can forward `memcpy` because the copy location are the same,
|
|
define void @forward_offset(ptr %src, ptr %dest) {
|
|
; CHECK-LABEL: define void @forward_offset(
|
|
; CHECK-SAME: ptr [[SRC:%.*]], ptr [[DEST:%.*]]) {
|
|
; CHECK-NEXT: [[DEP_DEST:%.*]] = alloca [9 x i8], align 1
|
|
; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 1 [[DEP_DEST]], ptr align 1 [[SRC]], i64 7, i1 false)
|
|
; CHECK-NEXT: [[SRC_OFFSET:%.*]] = getelementptr inbounds i8, ptr [[DEP_DEST]], i64 1
|
|
; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i8, ptr [[SRC]], i64 1
|
|
; CHECK-NEXT: call void @llvm.memmove.p0.p0.i64(ptr align 1 [[DEST]], ptr align 1 [[TMP1]], i64 6, i1 false)
|
|
; CHECK-NEXT: ret void
|
|
;
|
|
%cpy_tmp = alloca %buf, align 1
|
|
call void @llvm.memcpy.p0.p0.i64(ptr align 1 %cpy_tmp, ptr align 1 %src, i64 7, i1 false)
|
|
%cpy_tmp_offset = getelementptr inbounds i8, ptr %cpy_tmp, i64 1
|
|
call void @llvm.memcpy.p0.p0.i64(ptr align 1 %dest, ptr align 1 %cpy_tmp_offset, i64 6, i1 false)
|
|
ret void
|
|
}
|
|
|
|
; We need to update the align value of the source of `memcpy` when forwarding.
|
|
define void @forward_offset_align(ptr %src, ptr %dest) {
|
|
; CHECK-LABEL: define void @forward_offset_align(
|
|
; CHECK-SAME: ptr [[SRC:%.*]], ptr [[DEST:%.*]]) {
|
|
; CHECK-NEXT: [[DEP_DEST:%.*]] = alloca [9 x i8], align 1
|
|
; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 1 [[DEP_DEST]], ptr align 4 [[SRC]], i64 9, i1 false)
|
|
; CHECK-NEXT: [[TMP_OFFSET:%.*]] = getelementptr inbounds i8, ptr [[DEP_DEST]], i64 3
|
|
; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i8, ptr [[SRC]], i64 3
|
|
; CHECK-NEXT: call void @llvm.memmove.p0.p0.i64(ptr align 1 [[DEST]], ptr align 1 [[TMP1]], i64 5, i1 false)
|
|
; CHECK-NEXT: ret void
|
|
;
|
|
%cpy_tmp = alloca %buf, align 1
|
|
call void @llvm.memcpy.p0.p0.i64(ptr align 1 %cpy_tmp, ptr align 4 %src, i64 9, i1 false)
|
|
%cpy_tmp_offset = getelementptr inbounds i8, ptr %cpy_tmp, i64 3
|
|
call void @llvm.memcpy.p0.p0.i64(ptr align 1 %dest, ptr align 1 %cpy_tmp_offset, i64 5, i1 false)
|
|
ret void
|
|
}
|
|
|
|
; We can change the align value to 2 when forwarding.
|
|
define void @forward_offset_align_2(ptr %src, ptr %dest) {
|
|
; CHECK-LABEL: define void @forward_offset_align_2(
|
|
; CHECK-SAME: ptr [[SRC:%.*]], ptr [[DEST:%.*]]) {
|
|
; CHECK-NEXT: [[DEP_DEST:%.*]] = alloca [9 x i8], align 1
|
|
; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 1 [[DEP_DEST]], ptr align 4 [[SRC]], i64 9, i1 false)
|
|
; CHECK-NEXT: [[TMP_OFFSET:%.*]] = getelementptr inbounds i8, ptr [[DEP_DEST]], i64 2
|
|
; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i8, ptr [[SRC]], i64 2
|
|
; CHECK-NEXT: call void @llvm.memmove.p0.p0.i64(ptr align 1 [[DEST]], ptr align 2 [[TMP1]], i64 6, i1 false)
|
|
; CHECK-NEXT: ret void
|
|
;
|
|
%cpy_tmp = alloca %buf, align 1
|
|
call void @llvm.memcpy.p0.p0.i64(ptr align 1 %cpy_tmp, ptr align 4 %src, i64 9, i1 false)
|
|
%cpy_tmp_offset = getelementptr inbounds i8, ptr %cpy_tmp, i64 2
|
|
call void @llvm.memcpy.p0.p0.i64(ptr align 1 %dest, ptr align 1 %cpy_tmp_offset, i64 6, i1 false)
|
|
ret void
|
|
}
|
|
|
|
; If the copy destination can be used as the copy source, we don't need to create a GEP instruction.
|
|
define void @forward_offset_without_gep(ptr %src) {
|
|
; CHECK-LABEL: define void @forward_offset_without_gep(
|
|
; CHECK-SAME: ptr [[SRC:%.*]]) {
|
|
; CHECK-NEXT: [[TMP:%.*]] = alloca [9 x i8], align 1
|
|
; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 1 [[TMP]], ptr align 1 [[SRC]], i64 7, i1 false)
|
|
; CHECK-NEXT: [[TMP_OFFSET:%.*]] = getelementptr inbounds i8, ptr [[TMP]], i64 1
|
|
; CHECK-NEXT: [[DEST:%.*]] = getelementptr inbounds i8, ptr [[SRC]], i64 1
|
|
; CHECK-NEXT: ret void
|
|
;
|
|
%cpy_tmp = alloca %buf, align 1
|
|
call void @llvm.memcpy.p0.p0.i64(ptr align 1 %cpy_tmp, ptr align 1 %src, i64 7, i1 false)
|
|
%cpy_tmp_offset = getelementptr inbounds i8, ptr %cpy_tmp, i64 1
|
|
%dest = getelementptr inbounds i8, ptr %src, i64 1
|
|
call void @llvm.memcpy.p0.p0.i64(ptr align 1 %dest, ptr align 1 %cpy_tmp_offset, i64 6, i1 false)
|
|
ret void
|
|
}
|
|
|
|
; We need to create a GEP instruction when forwarding.
|
|
define void @forward_offset_with_gep(ptr %src) {
|
|
; CHECK-LABEL: define void @forward_offset_with_gep(
|
|
; CHECK-SAME: ptr [[SRC:%.*]]) {
|
|
; CHECK-NEXT: [[DEP_DEST:%.*]] = alloca [9 x i8], align 1
|
|
; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 1 [[DEP_DEST]], ptr align 1 [[SRC]], i64 7, i1 false)
|
|
; CHECK-NEXT: [[TMP_OFFSET:%.*]] = getelementptr inbounds i8, ptr [[DEP_DEST]], i64 1
|
|
; CHECK-NEXT: [[DEST:%.*]] = getelementptr inbounds i8, ptr [[SRC]], i64 2
|
|
; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i8, ptr [[SRC]], i64 1
|
|
; CHECK-NEXT: call void @llvm.memmove.p0.p0.i64(ptr align 1 [[DEST]], ptr align 1 [[TMP1]], i64 6, i1 false)
|
|
; CHECK-NEXT: ret void
|
|
;
|
|
%cpy_tmp = alloca %buf, align 1
|
|
call void @llvm.memcpy.p0.p0.i64(ptr align 1 %cpy_tmp, ptr align 1 %src, i64 7, i1 false)
|
|
%cpy_tmp_offset = getelementptr inbounds i8, ptr %cpy_tmp, i64 1
|
|
%dest = getelementptr inbounds i8, ptr %src, i64 2
|
|
call void @llvm.memcpy.p0.p0.i64(ptr align 1 %dest, ptr align 1 %cpy_tmp_offset, i64 6, i1 false)
|
|
ret void
|
|
}
|
|
|
|
; Make sure we pass the right parameters when calling `memcpy`.
|
|
define void @forward_offset_memcpy(ptr %src, ptr %dest) {
|
|
; CHECK-LABEL: define void @forward_offset_memcpy(
|
|
; CHECK-SAME: ptr [[SRC:%.*]], ptr [[DEST:%.*]]) {
|
|
; CHECK-NEXT: [[DEP_DEST:%.*]] = alloca [9 x i8], align 1
|
|
; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 1 [[DEP_DEST]], ptr align 1 [[SRC]], i64 7, i1 false)
|
|
; CHECK-NEXT: [[TMP_OFFSET:%.*]] = getelementptr inbounds i8, ptr [[DEP_DEST]], i64 1
|
|
; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i8, ptr [[SRC]], i64 1
|
|
; CHECK-NEXT: call void @llvm.memmove.p0.p0.i64(ptr align 1 [[DEST]], ptr align 1 [[TMP1]], i64 6, i1 false)
|
|
; CHECK-NEXT: call void @use(ptr [[DEST]])
|
|
; CHECK-NEXT: ret void
|
|
;
|
|
%cpy_tmp = alloca %buf, align 1
|
|
call void @llvm.memcpy.p0.p0.i64(ptr align 1 %cpy_tmp, ptr align 1 %src, i64 7, i1 false)
|
|
%cpy_tmp_offset = getelementptr inbounds i8, ptr %cpy_tmp, i64 1
|
|
call void @llvm.memcpy.p0.p0.i64(ptr align 1 %dest, ptr align 1 %cpy_tmp_offset, i64 6, i1 false)
|
|
call void @use(ptr %dest)
|
|
ret void
|
|
}
|
|
|
|
; Make sure we pass the right parameters when calling `memcpy.inline`.
|
|
define void @forward_offset_memcpy_inline(ptr %src, ptr %dest) {
|
|
; CHECK-LABEL: define void @forward_offset_memcpy_inline(
|
|
; CHECK-SAME: ptr [[SRC:%.*]], ptr [[DEST:%.*]]) {
|
|
; CHECK-NEXT: [[DEP_DEST:%.*]] = alloca [9 x i8], align 1
|
|
; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 1 [[DEP_DEST]], ptr align 1 [[SRC]], i64 7, i1 false)
|
|
; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i8, ptr [[DEP_DEST]], i64 1
|
|
; CHECK-NEXT: call void @llvm.memcpy.inline.p0.p0.i64(ptr align 1 [[DEST]], ptr align 1 [[TMP1]], i64 6, i1 false)
|
|
; CHECK-NEXT: call void @use(ptr [[DEST]])
|
|
; CHECK-NEXT: ret void
|
|
;
|
|
%cpy_tmp = alloca %buf, align 1
|
|
call void @llvm.memcpy.p0.p0.i64(ptr align 1 %cpy_tmp, ptr align 1 %src, i64 7, i1 false)
|
|
%cpy_tmp_offset = getelementptr inbounds i8, ptr %cpy_tmp, i64 1
|
|
call void @llvm.memcpy.inline.p0.p0.i64(ptr align 1 %dest, ptr align 1 %cpy_tmp_offset, i64 6, i1 false)
|
|
call void @use(ptr %dest)
|
|
ret void
|
|
}
|
|
|
|
; We can forward `memcpy` by shrinking it to the size of the `memcpy` it depends on.
|
|
define void @forward_oversize_offset(ptr %src, ptr %dest) {
|
|
; CHECK-LABEL: define void @forward_oversize_offset(
|
|
; CHECK-SAME: ptr [[SRC:%.*]], ptr [[DEST:%.*]]) {
|
|
; CHECK-NEXT: [[CPY_TMP:%.*]] = alloca [9 x i8], align 1
|
|
; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 1 [[CPY_TMP]], ptr align 1 [[SRC]], i64 6, i1 false)
|
|
; CHECK-NEXT: [[CPY_TMP_OFFSET:%.*]] = getelementptr inbounds i8, ptr [[CPY_TMP]], i64 1
|
|
; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i8, ptr [[SRC]], i64 1
|
|
; CHECK-NEXT: call void @llvm.memmove.p0.p0.i64(ptr align 1 [[DEST]], ptr align 1 [[TMP1]], i64 5, i1 false)
|
|
; CHECK-NEXT: ret void
|
|
;
|
|
%cpy_tmp = alloca %buf, align 1
|
|
call void @llvm.memcpy.p0.p0.i64(ptr align 1 %cpy_tmp, ptr align 1 %src, i64 6, i1 false)
|
|
%cpy_tmp_offset = getelementptr inbounds i8, ptr %cpy_tmp, i64 1
|
|
call void @llvm.memcpy.p0.p0.i64(ptr align 1 %dest, ptr align 1 %cpy_tmp_offset, i64 6, i1 false)
|
|
ret void
|
|
}
|
|
|
|
; We can forward `memcpy` because the write operation does not corrupt the location to be copied.
|
|
define void @forward_offset_and_store(ptr %src, ptr %dest) {
|
|
; CHECK-LABEL: define void @forward_offset_and_store(
|
|
; CHECK-SAME: ptr [[SRC:%.*]], ptr [[DEST:%.*]]) {
|
|
; CHECK-NEXT: [[DEP_DEST:%.*]] = alloca [9 x i8], align 1
|
|
; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 1 [[DEP_DEST]], ptr align 1 [[SRC]], i64 7, i1 false)
|
|
; CHECK-NEXT: store i8 1, ptr [[SRC]], align 1
|
|
; CHECK-NEXT: [[DEP_SRC_END:%.*]] = getelementptr inbounds i8, ptr [[SRC]], i64 6
|
|
; CHECK-NEXT: store i8 1, ptr [[DEP_SRC_END]], align 1
|
|
; CHECK-NEXT: [[TMP_OFFSET:%.*]] = getelementptr inbounds i8, ptr [[DEP_DEST]], i64 1
|
|
; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i8, ptr [[SRC]], i64 1
|
|
; CHECK-NEXT: call void @llvm.memmove.p0.p0.i64(ptr align 1 [[DEST]], ptr align 1 [[TMP1]], i64 5, i1 false)
|
|
; CHECK-NEXT: ret void
|
|
;
|
|
%cpy_tmp = alloca %buf, align 1
|
|
call void @llvm.memcpy.p0.p0.i64(ptr align 1 %cpy_tmp, ptr align 1 %src, i64 7, i1 false)
|
|
store i8 1, ptr %src, align 1
|
|
%src_end = getelementptr inbounds i8, ptr %src, i64 6
|
|
store i8 1, ptr %src_end, align 1
|
|
%cpy_tmp_offset = getelementptr inbounds i8, ptr %cpy_tmp, i64 1
|
|
call void @llvm.memcpy.p0.p0.i64(ptr align 1 %dest, ptr align 1 %cpy_tmp_offset, i64 5, i1 false)
|
|
ret void
|
|
}
|
|
|
|
; We cannot forward `memcpy` because the write operation alters the location to be copied.
|
|
; Also, make sure we have removed the GEP instruction that was created temporarily.
|
|
define void @do_not_forward_offset_and_store(ptr %src, ptr %dest) {
|
|
; CHECK-LABEL: define void @do_not_forward_offset_and_store(
|
|
; CHECK-SAME: ptr [[SRC:%.*]], ptr [[DEST:%.*]]) {
|
|
; CHECK-NEXT: [[DEP_DEST:%.*]] = alloca [9 x i8], align 1
|
|
; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 1 [[DEP_DEST]], ptr align 1 [[SRC]], i64 7, i1 false)
|
|
; CHECK-NEXT: [[DEP:%.*]] = getelementptr inbounds i8, ptr [[SRC]], i64 1
|
|
; CHECK-NEXT: store i8 1, ptr [[DEP]], align 1
|
|
; CHECK-NEXT: [[TMP_OFFSET:%.*]] = getelementptr inbounds i8, ptr [[DEP_DEST]], i64 1
|
|
; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 1 [[DEST]], ptr align 1 [[TMP_OFFSET]], i64 5, i1 false)
|
|
; CHECK-NEXT: ret void
|
|
;
|
|
%cpy_tmp = alloca %buf, align 1
|
|
call void @llvm.memcpy.p0.p0.i64(ptr align 1 %cpy_tmp, ptr align 1 %src, i64 7, i1 false)
|
|
%src_offset = getelementptr inbounds i8, ptr %src, i64 1
|
|
store i8 1, ptr %src_offset, align 1
|
|
%cpy_tmp_offset = getelementptr inbounds i8, ptr %cpy_tmp, i64 1
|
|
call void @llvm.memcpy.p0.p0.i64(ptr align 1 %dest, ptr align 1 %cpy_tmp_offset, i64 5, i1 false)
|
|
ret void
|
|
}
|
|
|
|
; Make sure we don't crash when the copy source is a constant.
|
|
@buf = external global [32 x i8]
|
|
|
|
define void @pr98675(ptr noalias %p1, ptr noalias %p2) {
|
|
; CHECK-LABEL: define void @pr98675(
|
|
; CHECK-SAME: ptr noalias [[P1:%.*]], ptr noalias [[P2:%.*]]) {
|
|
; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr [[P1]], ptr @buf, i64 26, i1 false)
|
|
; CHECK-NEXT: [[GEP:%.*]] = getelementptr i8, ptr [[P1]], i64 10
|
|
; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr [[P2]], ptr getelementptr inbounds (i8, ptr @buf, i64 10), i64 1, i1 false)
|
|
; CHECK-NEXT: ret void
|
|
;
|
|
call void @llvm.memcpy.p0.p0.i64(ptr %p1, ptr @buf, i64 26, i1 false)
|
|
%gep = getelementptr i8, ptr %p1, i64 10
|
|
call void @llvm.memmove.p0.p0.i64(ptr %p2, ptr %gep, i64 1, i1 false)
|
|
ret void
|
|
}
|
|
|
|
define void @over_offset_cpy(ptr %src) {
|
|
; CHECK-LABEL: define void @over_offset_cpy(
|
|
; CHECK-SAME: ptr [[SRC:%.*]]) {
|
|
; CHECK-NEXT: [[TMP:%.*]] = alloca [2 x i8], align 1
|
|
; CHECK-NEXT: [[DST:%.*]] = alloca i8, align 1
|
|
; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[TMP]], ptr align 8 [[SRC]], i64 1, i1 false)
|
|
; CHECK-NEXT: [[TMP_OFFSET:%.*]] = getelementptr inbounds i8, ptr [[TMP]], i64 1
|
|
; CHECK-NEXT: ret void
|
|
;
|
|
%tmp = alloca [2 x i8]
|
|
%dst = alloca i8
|
|
call void @llvm.memcpy.p0.p0.i64(ptr align 8 %tmp, ptr align 8 %src, i64 1, i1 false)
|
|
%tmp_offset = getelementptr inbounds i8, ptr %tmp, i64 1
|
|
call void @llvm.memcpy.p0.p0.i64(ptr align 8 %dst, ptr align 8 %tmp_offset, i64 1, i1 false)
|
|
|
|
ret void
|
|
}
|
|
|
|
declare void @use(ptr)
|
|
|
|
declare void @llvm.memcpy.p0.p0.i64(ptr nocapture, ptr nocapture, i64, i1)
|
|
declare void @llvm.memcpy.inline.p0.p0.i64(ptr nocapture, ptr nocapture, i64, i1)
|