Jameson Nash 7460c700ae
[MemCpyOpt] handle memcpy from memset in more cases (#140954)
This aims to reduce the divergence between the initial checks in this
function and processMemCpyMemCpyDependence (in particular, adding
handling of offsets), with the goal to eventually reduce duplication
there and improve this pass in other ways.
2025-06-11 10:42:05 +02:00

46 lines
1.8 KiB
LLVM

; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
; RUN: opt < %s -passes=memcpyopt -S -verify-memoryssa | FileCheck %s
; Handle memcpy-memcpy dependencies of differing sizes correctly.
target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
; Don't delete the second memcpy, even though there's an earlier
; memcpy with a larger size from the same address.
define i32 @foo(i1 %z) {
; CHECK-LABEL: @foo(
; CHECK-NEXT: entry:
; CHECK-NEXT: [[A:%.*]] = alloca [10 x i32], align 4
; CHECK-NEXT: [[S:%.*]] = alloca [10 x i32], align 4
; CHECK-NEXT: call void @llvm.memset.p0.i64(ptr nonnull align 16 [[S]], i8 0, i64 40, i1 false)
; CHECK-NEXT: store i32 1, ptr [[A]], align 4
; CHECK-NEXT: [[SCEVGEP:%.*]] = getelementptr [10 x i32], ptr [[S]], i64 0, i64 1
; CHECK-NEXT: br i1 [[Z:%.*]], label [[FOR_BODY3_LR_PH:%.*]], label [[FOR_INC7_1:%.*]]
; CHECK: for.body3.lr.ph:
; CHECK-NEXT: br label [[FOR_INC7_1]]
; CHECK: for.inc7.1:
; CHECK-NEXT: call void @llvm.memset.p0.i64(ptr align 4 [[A]], i8 0, i64 4, i1 false)
; CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr [[A]], align 4
; CHECK-NEXT: ret i32 [[TMP2]]
;
entry:
%a = alloca [10 x i32]
%s = alloca [10 x i32]
call void @llvm.memset.p0.i64(ptr nonnull align 16 %s, i8 0, i64 40, i1 false)
store i32 1, ptr %a
%scevgep = getelementptr [10 x i32], ptr %s, i64 0, i64 1
br i1 %z, label %for.body3.lr.ph, label %for.inc7.1
for.body3.lr.ph: ; preds = %entry
call void @llvm.memcpy.p0.p0.i64(ptr align 4 %a, ptr align 4 %scevgep, i64 17179869180, i1 false)
br label %for.inc7.1
for.inc7.1:
call void @llvm.memcpy.p0.p0.i64(ptr align 4 %a, ptr align 4 %scevgep, i64 4, i1 false)
%0 = load i32, ptr %a
ret i32 %0
}
declare void @llvm.memcpy.p0.p0.i64(ptr, ptr, i64, i1)
declare void @llvm.memset.p0.i64(ptr, i8, i64, i1)