
For the case where the memcpy size is zero, this transform is a complex no-op. This can lead to an infinite loop when the size is zero in a way that BasicAA understands, because it can still understand that dst and dst + src_size are MustAlias. I've tried to mitigate this before using the isZeroSize() check, but we can hit cases where InstSimplify doesn't understand that the size is zero, but BasicAA does. As such, this bites the bullet and adds an explicit isKnownNonZero() check to guard against no-op transforms. Fixes https://github.com/llvm/llvm-project/issues/98610.
59 lines
2.7 KiB
LLVM
59 lines
2.7 KiB
LLVM
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
|
|
; RUN: opt -passes=memcpyopt -S %s -verify-memoryssa | FileCheck %s
|
|
|
|
define void @test_memset_memcpy(ptr %src, i64 range(i64 1, 42) %src_size, ptr noalias %dst, i64 %dst_size, i8 %c) {
|
|
; CHECK-LABEL: @test_memset_memcpy(
|
|
; CHECK-NEXT: [[TMP1:%.*]] = icmp ule i64 [[DST_SIZE:%.*]], [[SRC_SIZE:%.*]]
|
|
; CHECK-NEXT: [[TMP2:%.*]] = sub i64 [[DST_SIZE]], [[SRC_SIZE]]
|
|
; CHECK-NEXT: [[TMP3:%.*]] = select i1 [[TMP1]], i64 0, i64 [[TMP2]]
|
|
; CHECK-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[DST:%.*]], i64 [[SRC_SIZE]]
|
|
; CHECK-NEXT: call void @llvm.memset.p0.i64(ptr align 1 [[TMP4]], i8 [[C:%.*]], i64 [[TMP3]], i1 false)
|
|
; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr [[DST]], ptr [[SRC:%.*]], i64 [[SRC_SIZE]], i1 false)
|
|
; CHECK-NEXT: ret void
|
|
;
|
|
call void @llvm.memset.p0.i64(ptr %dst, i8 %c, i64 %dst_size, i1 false)
|
|
call void @llvm.memcpy.p0.p0.i64(ptr %dst, ptr %src, i64 %src_size, i1 false)
|
|
ret void
|
|
}
|
|
|
|
%a = type { i64, i64, i64 }
|
|
%b = type { i32, i32, i32 }
|
|
|
|
define void @test_different_gep_source_elements(ptr %src) {
|
|
; CHECK-LABEL: @test_different_gep_source_elements(
|
|
; CHECK-NEXT: [[PB:%.*]] = getelementptr [[B:%.*]], ptr [[SRC:%.*]], i64 0, i32 1
|
|
; CHECK-NEXT: [[PA:%.*]] = getelementptr [[A:%.*]], ptr [[SRC]], i64 0, i32 1
|
|
; CHECK-NEXT: [[PA2:%.*]] = getelementptr [[A]], ptr [[SRC]], i64 0, i32 2
|
|
; CHECK-NEXT: call void @llvm.memset.p0.i64(ptr align 4 [[PB]], i8 0, i64 20, i1 false)
|
|
; CHECK-NEXT: ret void
|
|
;
|
|
%pb = getelementptr %b, ptr %src, i64 0, i32 1
|
|
store i64 0, ptr %pb
|
|
%pa = getelementptr %a, ptr %src, i64 0, i32 1
|
|
store i64 0, ptr %pa
|
|
%pa2 = getelementptr %a, ptr %src, i64 0, i32 2
|
|
store i64 0, ptr %pa2
|
|
ret void
|
|
}
|
|
|
|
define void @test_gep_of_vscale_non_const_gep(ptr %p, i64 %idx) {
|
|
; CHECK-LABEL: @test_gep_of_vscale_non_const_gep(
|
|
; CHECK-NEXT: [[G1:%.*]] = getelementptr <vscale x 16 x i8>, ptr [[P:%.*]], i64 [[IDX:%.*]], i32 1
|
|
; CHECK-NEXT: [[G2:%.*]] = getelementptr <vscale x 16 x i8>, ptr [[P]], i64 [[IDX]], i32 5
|
|
; CHECK-NEXT: [[H1:%.*]] = getelementptr i8, ptr [[G1]], i64 2
|
|
; CHECK-NEXT: [[H2:%.*]] = getelementptr i8, ptr [[G2]], i64 6
|
|
; CHECK-NEXT: call void @llvm.memset.p0.i64(ptr align 4 [[H1]], i8 0, i64 16, i1 false)
|
|
; CHECK-NEXT: ret void
|
|
;
|
|
%g1 = getelementptr <vscale x 16 x i8>, ptr %p, i64 %idx, i32 1
|
|
%g2 = getelementptr <vscale x 16 x i8>, ptr %p, i64 %idx, i32 5
|
|
%h1 = getelementptr i8, ptr %g1, i64 2
|
|
%h2 = getelementptr i8, ptr %g2, i64 6
|
|
store i64 0, ptr %h1
|
|
store i64 0, ptr %h2
|
|
ret void
|
|
}
|
|
|
|
declare void @llvm.memset.p0.i64(ptr nocapture, i8, i64, i1)
|
|
declare void @llvm.memcpy.p0.p0.i64(ptr nocapture, ptr nocapture readonly, i64, i1)
|