Nikita Popov 71051deff2
[MemCpyOpt] Fix infinite loop in memset+memcpy fold (#98638)
For the case where the memcpy size is zero, this transform is a complex
no-op. This can lead to an infinite loop when the size is zero in a way
that BasicAA understands, because it can still understand that dst and
dst + src_size are MustAlias.

I've tried to mitigate this before using the isZeroSize() check, but we
can hit cases where InstSimplify doesn't understand that the size is
zero, but BasicAA does.

As such, this bites the bullet and adds an explicit isKnownNonZero()
check to guard against no-op transforms.

Fixes https://github.com/llvm/llvm-project/issues/98610.
2024-07-15 09:41:11 +02:00

59 lines
2.7 KiB
LLVM

; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
; RUN: opt -passes=memcpyopt -S %s -verify-memoryssa | FileCheck %s
define void @test_memset_memcpy(ptr %src, i64 range(i64 1, 42) %src_size, ptr noalias %dst, i64 %dst_size, i8 %c) {
; CHECK-LABEL: @test_memset_memcpy(
; CHECK-NEXT: [[TMP1:%.*]] = icmp ule i64 [[DST_SIZE:%.*]], [[SRC_SIZE:%.*]]
; CHECK-NEXT: [[TMP2:%.*]] = sub i64 [[DST_SIZE]], [[SRC_SIZE]]
; CHECK-NEXT: [[TMP3:%.*]] = select i1 [[TMP1]], i64 0, i64 [[TMP2]]
; CHECK-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[DST:%.*]], i64 [[SRC_SIZE]]
; CHECK-NEXT: call void @llvm.memset.p0.i64(ptr align 1 [[TMP4]], i8 [[C:%.*]], i64 [[TMP3]], i1 false)
; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr [[DST]], ptr [[SRC:%.*]], i64 [[SRC_SIZE]], i1 false)
; CHECK-NEXT: ret void
;
call void @llvm.memset.p0.i64(ptr %dst, i8 %c, i64 %dst_size, i1 false)
call void @llvm.memcpy.p0.p0.i64(ptr %dst, ptr %src, i64 %src_size, i1 false)
ret void
}
%a = type { i64, i64, i64 }
%b = type { i32, i32, i32 }
define void @test_different_gep_source_elements(ptr %src) {
; CHECK-LABEL: @test_different_gep_source_elements(
; CHECK-NEXT: [[PB:%.*]] = getelementptr [[B:%.*]], ptr [[SRC:%.*]], i64 0, i32 1
; CHECK-NEXT: [[PA:%.*]] = getelementptr [[A:%.*]], ptr [[SRC]], i64 0, i32 1
; CHECK-NEXT: [[PA2:%.*]] = getelementptr [[A]], ptr [[SRC]], i64 0, i32 2
; CHECK-NEXT: call void @llvm.memset.p0.i64(ptr align 4 [[PB]], i8 0, i64 20, i1 false)
; CHECK-NEXT: ret void
;
%pb = getelementptr %b, ptr %src, i64 0, i32 1
store i64 0, ptr %pb
%pa = getelementptr %a, ptr %src, i64 0, i32 1
store i64 0, ptr %pa
%pa2 = getelementptr %a, ptr %src, i64 0, i32 2
store i64 0, ptr %pa2
ret void
}
define void @test_gep_of_vscale_non_const_gep(ptr %p, i64 %idx) {
; CHECK-LABEL: @test_gep_of_vscale_non_const_gep(
; CHECK-NEXT: [[G1:%.*]] = getelementptr <vscale x 16 x i8>, ptr [[P:%.*]], i64 [[IDX:%.*]], i32 1
; CHECK-NEXT: [[G2:%.*]] = getelementptr <vscale x 16 x i8>, ptr [[P]], i64 [[IDX]], i32 5
; CHECK-NEXT: [[H1:%.*]] = getelementptr i8, ptr [[G1]], i64 2
; CHECK-NEXT: [[H2:%.*]] = getelementptr i8, ptr [[G2]], i64 6
; CHECK-NEXT: call void @llvm.memset.p0.i64(ptr align 4 [[H1]], i8 0, i64 16, i1 false)
; CHECK-NEXT: ret void
;
%g1 = getelementptr <vscale x 16 x i8>, ptr %p, i64 %idx, i32 1
%g2 = getelementptr <vscale x 16 x i8>, ptr %p, i64 %idx, i32 5
%h1 = getelementptr i8, ptr %g1, i64 2
%h2 = getelementptr i8, ptr %g2, i64 6
store i64 0, ptr %h1
store i64 0, ptr %h2
ret void
}
declare void @llvm.memset.p0.i64(ptr nocapture, i8, i64, i1)
declare void @llvm.memcpy.p0.p0.i64(ptr nocapture, ptr nocapture readonly, i64, i1)