
This aims to reduce the divergence between the initial checks in this function and processMemCpyMemCpyDependence (in particular, adding handling of offsets), with the goal to eventually reduce duplication there and improve this pass in other ways.
46 lines
1.8 KiB
LLVM
46 lines
1.8 KiB
LLVM
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
|
|
; RUN: opt < %s -passes=memcpyopt -S -verify-memoryssa | FileCheck %s
|
|
; Handle memcpy-memcpy dependencies of differing sizes correctly.
|
|
|
|
target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
|
|
|
|
; Don't delete the second memcpy, even though there's an earlier
|
|
; memcpy with a larger size from the same address.
|
|
|
|
define i32 @foo(i1 %z) {
|
|
; CHECK-LABEL: @foo(
|
|
; CHECK-NEXT: entry:
|
|
; CHECK-NEXT: [[A:%.*]] = alloca [10 x i32], align 4
|
|
; CHECK-NEXT: [[S:%.*]] = alloca [10 x i32], align 4
|
|
; CHECK-NEXT: call void @llvm.memset.p0.i64(ptr nonnull align 16 [[S]], i8 0, i64 40, i1 false)
|
|
; CHECK-NEXT: store i32 1, ptr [[A]], align 4
|
|
; CHECK-NEXT: [[SCEVGEP:%.*]] = getelementptr [10 x i32], ptr [[S]], i64 0, i64 1
|
|
; CHECK-NEXT: br i1 [[Z:%.*]], label [[FOR_BODY3_LR_PH:%.*]], label [[FOR_INC7_1:%.*]]
|
|
; CHECK: for.body3.lr.ph:
|
|
; CHECK-NEXT: br label [[FOR_INC7_1]]
|
|
; CHECK: for.inc7.1:
|
|
; CHECK-NEXT: call void @llvm.memset.p0.i64(ptr align 4 [[A]], i8 0, i64 4, i1 false)
|
|
; CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr [[A]], align 4
|
|
; CHECK-NEXT: ret i32 [[TMP2]]
|
|
;
|
|
entry:
|
|
%a = alloca [10 x i32]
|
|
%s = alloca [10 x i32]
|
|
call void @llvm.memset.p0.i64(ptr nonnull align 16 %s, i8 0, i64 40, i1 false)
|
|
store i32 1, ptr %a
|
|
%scevgep = getelementptr [10 x i32], ptr %s, i64 0, i64 1
|
|
br i1 %z, label %for.body3.lr.ph, label %for.inc7.1
|
|
|
|
for.body3.lr.ph: ; preds = %entry
|
|
call void @llvm.memcpy.p0.p0.i64(ptr align 4 %a, ptr align 4 %scevgep, i64 17179869180, i1 false)
|
|
br label %for.inc7.1
|
|
|
|
for.inc7.1:
|
|
call void @llvm.memcpy.p0.p0.i64(ptr align 4 %a, ptr align 4 %scevgep, i64 4, i1 false)
|
|
%0 = load i32, ptr %a
|
|
ret i32 %0
|
|
}
|
|
|
|
declare void @llvm.memcpy.p0.p0.i64(ptr, ptr, i64, i1)
|
|
declare void @llvm.memset.p0.i64(ptr, i8, i64, i1)
|