[SROA] Limit the number of allowed slices when trying to split allocas

This patch adds a hidden CLI option "--sroa-max-alloca-slices", which is
an integer that controls the maximum number of alloca slices SROA can
consider before bailing out. This is useful because it may not be
profitable to split memcpys into (possibly tens of) thousands of loads/stores.
This also prevents an issue with exponential compile time explosion in passes
like DSE and MemCpyOpt caused by excessive alloca splitting.

Fixes https://github.com/rust-lang/rust/issues/88580.

Differential Revision: https://reviews.llvm.org/D159354
This commit is contained in:
Dhruv Chawla 2023-09-01 18:07:46 +05:30
parent f8074942c5
commit e13e808283
No known key found for this signature in database
GPG Key ID: B2CCB71C873FCFF4
2 changed files with 185 additions and 0 deletions

View File

@ -121,6 +121,14 @@ static cl::opt<bool> SROAStrictInbounds("sroa-strict-inbounds", cl::init(false),
/// Disable running mem2reg during SROA in order to test or debug SROA.
static cl::opt<bool> SROASkipMem2Reg("sroa-skip-mem2reg", cl::init(false),
cl::Hidden);
/// The maximum number of alloca slices allowed when splitting.
static cl::opt<int>
SROAMaxAllocaSlices("sroa-max-alloca-slices", cl::init(1024),
cl::desc("Maximum number of alloca slices allowed "
"after which splitting is not attempted"),
cl::Hidden);
namespace {
/// Calculate the fragment of a variable to use when slicing a store
@ -4961,6 +4969,9 @@ SROAPass::runOnAlloca(AllocaInst &AI) {
if (AS.isEscaped())
return {Changed, CFGChanged};
if (std::distance(AS.begin(), AS.end()) > SROAMaxAllocaSlices)
return {Changed, CFGChanged};
// Delete all the dead users of this alloca before splitting and rewriting it.
for (Instruction *DeadUser : AS.getDeadUsers()) {
// Free up everything used by this instruction.

View File

@ -0,0 +1,174 @@
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 3
; RUN: opt -passes='loop-unroll,sroa' --sroa-max-alloca-slices=16 -S -o - < %s | FileCheck %s
; (Very) Reduced from this Rust code:
;
; extern "C" {
; fn _use(x: [[[Option::<usize>; 5]; 5]; 5]) -> bool;
; }
; fn main() {
; let s = [[[Option::<usize>::None; 5]; 5]; 5];
; unsafe {
; _use(s);
; }
; }
define void @huge_size() {
; CHECK-LABEL: define void @huge_size() {
; CHECK-NEXT: start:
; CHECK-NEXT: [[ARRAY:%.*]] = alloca [5 x [5 x [5 x { i64, i64 }]]], align 8
; CHECK-NEXT: [[ARRAY_SUB_1:%.*]] = alloca [5 x [5 x { i64, i64 }]], align 8
; CHECK-NEXT: br label [[LOOP_1:%.*]]
; CHECK: loop.1:
; CHECK-NEXT: br label [[LOOP_2:%.*]]
; CHECK: loop.2:
; CHECK-NEXT: store i64 0, ptr [[ARRAY_SUB_1]], align 1
; CHECK-NEXT: [[ARRAY_SUB_2_SROA_6_0_ARRAY_SUB_1_SROA_IDX:%.*]] = getelementptr inbounds i8, ptr [[ARRAY_SUB_1]], i64 8
; CHECK-NEXT: store i64 undef, ptr [[ARRAY_SUB_2_SROA_6_0_ARRAY_SUB_1_SROA_IDX]], align 1
; CHECK-NEXT: [[ARRAY_SUB_2_SROA_69_0_ARRAY_SUB_1_SROA_IDX:%.*]] = getelementptr inbounds i8, ptr [[ARRAY_SUB_1]], i64 16
; CHECK-NEXT: store i64 0, ptr [[ARRAY_SUB_2_SROA_69_0_ARRAY_SUB_1_SROA_IDX]], align 1
; CHECK-NEXT: [[ARRAY_SUB_2_SROA_7_0_ARRAY_SUB_1_SROA_IDX:%.*]] = getelementptr inbounds i8, ptr [[ARRAY_SUB_1]], i64 24
; CHECK-NEXT: store i64 undef, ptr [[ARRAY_SUB_2_SROA_7_0_ARRAY_SUB_1_SROA_IDX]], align 1
; CHECK-NEXT: [[ARRAY_SUB_2_SROA_718_0_ARRAY_SUB_1_SROA_IDX:%.*]] = getelementptr inbounds i8, ptr [[ARRAY_SUB_1]], i64 32
; CHECK-NEXT: store i64 0, ptr [[ARRAY_SUB_2_SROA_718_0_ARRAY_SUB_1_SROA_IDX]], align 1
; CHECK-NEXT: [[ARRAY_SUB_2_SROA_8_0_ARRAY_SUB_1_SROA_IDX:%.*]] = getelementptr inbounds i8, ptr [[ARRAY_SUB_1]], i64 40
; CHECK-NEXT: store i64 undef, ptr [[ARRAY_SUB_2_SROA_8_0_ARRAY_SUB_1_SROA_IDX]], align 1
; CHECK-NEXT: [[ARRAY_SUB_2_SROA_827_0_ARRAY_SUB_1_SROA_IDX:%.*]] = getelementptr inbounds i8, ptr [[ARRAY_SUB_1]], i64 48
; CHECK-NEXT: store i64 0, ptr [[ARRAY_SUB_2_SROA_827_0_ARRAY_SUB_1_SROA_IDX]], align 1
; CHECK-NEXT: [[ARRAY_SUB_2_SROA_9_0_ARRAY_SUB_1_SROA_IDX:%.*]] = getelementptr inbounds i8, ptr [[ARRAY_SUB_1]], i64 56
; CHECK-NEXT: store i64 undef, ptr [[ARRAY_SUB_2_SROA_9_0_ARRAY_SUB_1_SROA_IDX]], align 1
; CHECK-NEXT: [[ARRAY_SUB_2_SROA_936_0_ARRAY_SUB_1_SROA_IDX:%.*]] = getelementptr inbounds i8, ptr [[ARRAY_SUB_1]], i64 64
; CHECK-NEXT: store i64 0, ptr [[ARRAY_SUB_2_SROA_936_0_ARRAY_SUB_1_SROA_IDX]], align 1
; CHECK-NEXT: [[ARRAY_SUB_2_SROA_10_0_ARRAY_SUB_1_SROA_IDX:%.*]] = getelementptr inbounds i8, ptr [[ARRAY_SUB_1]], i64 72
; CHECK-NEXT: store i64 undef, ptr [[ARRAY_SUB_2_SROA_10_0_ARRAY_SUB_1_SROA_IDX]], align 1
; CHECK-NEXT: [[GEP_2_1:%.*]] = getelementptr [5 x [5 x { i64, i64 }]], ptr [[ARRAY_SUB_1]], i64 0, i64 1
; CHECK-NEXT: store i64 0, ptr [[GEP_2_1]], align 1
; CHECK-NEXT: [[ARRAY_SUB_2_SROA_6_0_GEP_2_1_SROA_IDX:%.*]] = getelementptr inbounds i8, ptr [[GEP_2_1]], i64 8
; CHECK-NEXT: store i64 undef, ptr [[ARRAY_SUB_2_SROA_6_0_GEP_2_1_SROA_IDX]], align 1
; CHECK-NEXT: [[ARRAY_SUB_2_SROA_69_0_GEP_2_1_SROA_IDX:%.*]] = getelementptr inbounds i8, ptr [[GEP_2_1]], i64 16
; CHECK-NEXT: store i64 0, ptr [[ARRAY_SUB_2_SROA_69_0_GEP_2_1_SROA_IDX]], align 1
; CHECK-NEXT: [[ARRAY_SUB_2_SROA_7_0_GEP_2_1_SROA_IDX:%.*]] = getelementptr inbounds i8, ptr [[GEP_2_1]], i64 24
; CHECK-NEXT: store i64 undef, ptr [[ARRAY_SUB_2_SROA_7_0_GEP_2_1_SROA_IDX]], align 1
; CHECK-NEXT: [[ARRAY_SUB_2_SROA_718_0_GEP_2_1_SROA_IDX:%.*]] = getelementptr inbounds i8, ptr [[GEP_2_1]], i64 32
; CHECK-NEXT: store i64 0, ptr [[ARRAY_SUB_2_SROA_718_0_GEP_2_1_SROA_IDX]], align 1
; CHECK-NEXT: [[ARRAY_SUB_2_SROA_8_0_GEP_2_1_SROA_IDX:%.*]] = getelementptr inbounds i8, ptr [[GEP_2_1]], i64 40
; CHECK-NEXT: store i64 undef, ptr [[ARRAY_SUB_2_SROA_8_0_GEP_2_1_SROA_IDX]], align 1
; CHECK-NEXT: [[ARRAY_SUB_2_SROA_827_0_GEP_2_1_SROA_IDX:%.*]] = getelementptr inbounds i8, ptr [[GEP_2_1]], i64 48
; CHECK-NEXT: store i64 0, ptr [[ARRAY_SUB_2_SROA_827_0_GEP_2_1_SROA_IDX]], align 1
; CHECK-NEXT: [[ARRAY_SUB_2_SROA_9_0_GEP_2_1_SROA_IDX:%.*]] = getelementptr inbounds i8, ptr [[GEP_2_1]], i64 56
; CHECK-NEXT: store i64 undef, ptr [[ARRAY_SUB_2_SROA_9_0_GEP_2_1_SROA_IDX]], align 1
; CHECK-NEXT: [[ARRAY_SUB_2_SROA_936_0_GEP_2_1_SROA_IDX:%.*]] = getelementptr inbounds i8, ptr [[GEP_2_1]], i64 64
; CHECK-NEXT: store i64 0, ptr [[ARRAY_SUB_2_SROA_936_0_GEP_2_1_SROA_IDX]], align 1
; CHECK-NEXT: [[ARRAY_SUB_2_SROA_10_0_GEP_2_1_SROA_IDX:%.*]] = getelementptr inbounds i8, ptr [[GEP_2_1]], i64 72
; CHECK-NEXT: store i64 undef, ptr [[ARRAY_SUB_2_SROA_10_0_GEP_2_1_SROA_IDX]], align 1
; CHECK-NEXT: [[GEP_2_2:%.*]] = getelementptr [5 x [5 x { i64, i64 }]], ptr [[ARRAY_SUB_1]], i64 0, i64 2
; CHECK-NEXT: store i64 0, ptr [[GEP_2_2]], align 1
; CHECK-NEXT: [[ARRAY_SUB_2_SROA_6_0_GEP_2_2_SROA_IDX:%.*]] = getelementptr inbounds i8, ptr [[GEP_2_2]], i64 8
; CHECK-NEXT: store i64 undef, ptr [[ARRAY_SUB_2_SROA_6_0_GEP_2_2_SROA_IDX]], align 1
; CHECK-NEXT: [[ARRAY_SUB_2_SROA_69_0_GEP_2_2_SROA_IDX:%.*]] = getelementptr inbounds i8, ptr [[GEP_2_2]], i64 16
; CHECK-NEXT: store i64 0, ptr [[ARRAY_SUB_2_SROA_69_0_GEP_2_2_SROA_IDX]], align 1
; CHECK-NEXT: [[ARRAY_SUB_2_SROA_7_0_GEP_2_2_SROA_IDX:%.*]] = getelementptr inbounds i8, ptr [[GEP_2_2]], i64 24
; CHECK-NEXT: store i64 undef, ptr [[ARRAY_SUB_2_SROA_7_0_GEP_2_2_SROA_IDX]], align 1
; CHECK-NEXT: [[ARRAY_SUB_2_SROA_718_0_GEP_2_2_SROA_IDX:%.*]] = getelementptr inbounds i8, ptr [[GEP_2_2]], i64 32
; CHECK-NEXT: store i64 0, ptr [[ARRAY_SUB_2_SROA_718_0_GEP_2_2_SROA_IDX]], align 1
; CHECK-NEXT: [[ARRAY_SUB_2_SROA_8_0_GEP_2_2_SROA_IDX:%.*]] = getelementptr inbounds i8, ptr [[GEP_2_2]], i64 40
; CHECK-NEXT: store i64 undef, ptr [[ARRAY_SUB_2_SROA_8_0_GEP_2_2_SROA_IDX]], align 1
; CHECK-NEXT: [[ARRAY_SUB_2_SROA_827_0_GEP_2_2_SROA_IDX:%.*]] = getelementptr inbounds i8, ptr [[GEP_2_2]], i64 48
; CHECK-NEXT: store i64 0, ptr [[ARRAY_SUB_2_SROA_827_0_GEP_2_2_SROA_IDX]], align 1
; CHECK-NEXT: [[ARRAY_SUB_2_SROA_9_0_GEP_2_2_SROA_IDX:%.*]] = getelementptr inbounds i8, ptr [[GEP_2_2]], i64 56
; CHECK-NEXT: store i64 undef, ptr [[ARRAY_SUB_2_SROA_9_0_GEP_2_2_SROA_IDX]], align 1
; CHECK-NEXT: [[ARRAY_SUB_2_SROA_936_0_GEP_2_2_SROA_IDX:%.*]] = getelementptr inbounds i8, ptr [[GEP_2_2]], i64 64
; CHECK-NEXT: store i64 0, ptr [[ARRAY_SUB_2_SROA_936_0_GEP_2_2_SROA_IDX]], align 1
; CHECK-NEXT: [[ARRAY_SUB_2_SROA_10_0_GEP_2_2_SROA_IDX:%.*]] = getelementptr inbounds i8, ptr [[GEP_2_2]], i64 72
; CHECK-NEXT: store i64 undef, ptr [[ARRAY_SUB_2_SROA_10_0_GEP_2_2_SROA_IDX]], align 1
; CHECK-NEXT: [[GEP_2_3:%.*]] = getelementptr [5 x [5 x { i64, i64 }]], ptr [[ARRAY_SUB_1]], i64 0, i64 3
; CHECK-NEXT: store i64 0, ptr [[GEP_2_3]], align 1
; CHECK-NEXT: [[ARRAY_SUB_2_SROA_6_0_GEP_2_3_SROA_IDX:%.*]] = getelementptr inbounds i8, ptr [[GEP_2_3]], i64 8
; CHECK-NEXT: store i64 undef, ptr [[ARRAY_SUB_2_SROA_6_0_GEP_2_3_SROA_IDX]], align 1
; CHECK-NEXT: [[ARRAY_SUB_2_SROA_69_0_GEP_2_3_SROA_IDX:%.*]] = getelementptr inbounds i8, ptr [[GEP_2_3]], i64 16
; CHECK-NEXT: store i64 0, ptr [[ARRAY_SUB_2_SROA_69_0_GEP_2_3_SROA_IDX]], align 1
; CHECK-NEXT: [[ARRAY_SUB_2_SROA_7_0_GEP_2_3_SROA_IDX:%.*]] = getelementptr inbounds i8, ptr [[GEP_2_3]], i64 24
; CHECK-NEXT: store i64 undef, ptr [[ARRAY_SUB_2_SROA_7_0_GEP_2_3_SROA_IDX]], align 1
; CHECK-NEXT: [[ARRAY_SUB_2_SROA_718_0_GEP_2_3_SROA_IDX:%.*]] = getelementptr inbounds i8, ptr [[GEP_2_3]], i64 32
; CHECK-NEXT: store i64 0, ptr [[ARRAY_SUB_2_SROA_718_0_GEP_2_3_SROA_IDX]], align 1
; CHECK-NEXT: [[ARRAY_SUB_2_SROA_8_0_GEP_2_3_SROA_IDX:%.*]] = getelementptr inbounds i8, ptr [[GEP_2_3]], i64 40
; CHECK-NEXT: store i64 undef, ptr [[ARRAY_SUB_2_SROA_8_0_GEP_2_3_SROA_IDX]], align 1
; CHECK-NEXT: [[ARRAY_SUB_2_SROA_827_0_GEP_2_3_SROA_IDX:%.*]] = getelementptr inbounds i8, ptr [[GEP_2_3]], i64 48
; CHECK-NEXT: store i64 0, ptr [[ARRAY_SUB_2_SROA_827_0_GEP_2_3_SROA_IDX]], align 1
; CHECK-NEXT: [[ARRAY_SUB_2_SROA_9_0_GEP_2_3_SROA_IDX:%.*]] = getelementptr inbounds i8, ptr [[GEP_2_3]], i64 56
; CHECK-NEXT: store i64 undef, ptr [[ARRAY_SUB_2_SROA_9_0_GEP_2_3_SROA_IDX]], align 1
; CHECK-NEXT: [[ARRAY_SUB_2_SROA_936_0_GEP_2_3_SROA_IDX:%.*]] = getelementptr inbounds i8, ptr [[GEP_2_3]], i64 64
; CHECK-NEXT: store i64 0, ptr [[ARRAY_SUB_2_SROA_936_0_GEP_2_3_SROA_IDX]], align 1
; CHECK-NEXT: [[ARRAY_SUB_2_SROA_10_0_GEP_2_3_SROA_IDX:%.*]] = getelementptr inbounds i8, ptr [[GEP_2_3]], i64 72
; CHECK-NEXT: store i64 undef, ptr [[ARRAY_SUB_2_SROA_10_0_GEP_2_3_SROA_IDX]], align 1
; CHECK-NEXT: [[GEP_2_4:%.*]] = getelementptr [5 x [5 x { i64, i64 }]], ptr [[ARRAY_SUB_1]], i64 0, i64 4
; CHECK-NEXT: store i64 0, ptr [[GEP_2_4]], align 1
; CHECK-NEXT: [[ARRAY_SUB_2_SROA_6_0_GEP_2_4_SROA_IDX:%.*]] = getelementptr inbounds i8, ptr [[GEP_2_4]], i64 8
; CHECK-NEXT: store i64 undef, ptr [[ARRAY_SUB_2_SROA_6_0_GEP_2_4_SROA_IDX]], align 1
; CHECK-NEXT: [[ARRAY_SUB_2_SROA_69_0_GEP_2_4_SROA_IDX:%.*]] = getelementptr inbounds i8, ptr [[GEP_2_4]], i64 16
; CHECK-NEXT: store i64 0, ptr [[ARRAY_SUB_2_SROA_69_0_GEP_2_4_SROA_IDX]], align 1
; CHECK-NEXT: [[ARRAY_SUB_2_SROA_7_0_GEP_2_4_SROA_IDX:%.*]] = getelementptr inbounds i8, ptr [[GEP_2_4]], i64 24
; CHECK-NEXT: store i64 undef, ptr [[ARRAY_SUB_2_SROA_7_0_GEP_2_4_SROA_IDX]], align 1
; CHECK-NEXT: [[ARRAY_SUB_2_SROA_718_0_GEP_2_4_SROA_IDX:%.*]] = getelementptr inbounds i8, ptr [[GEP_2_4]], i64 32
; CHECK-NEXT: store i64 0, ptr [[ARRAY_SUB_2_SROA_718_0_GEP_2_4_SROA_IDX]], align 1
; CHECK-NEXT: [[ARRAY_SUB_2_SROA_8_0_GEP_2_4_SROA_IDX:%.*]] = getelementptr inbounds i8, ptr [[GEP_2_4]], i64 40
; CHECK-NEXT: store i64 undef, ptr [[ARRAY_SUB_2_SROA_8_0_GEP_2_4_SROA_IDX]], align 1
; CHECK-NEXT: [[ARRAY_SUB_2_SROA_827_0_GEP_2_4_SROA_IDX:%.*]] = getelementptr inbounds i8, ptr [[GEP_2_4]], i64 48
; CHECK-NEXT: store i64 0, ptr [[ARRAY_SUB_2_SROA_827_0_GEP_2_4_SROA_IDX]], align 1
; CHECK-NEXT: [[ARRAY_SUB_2_SROA_9_0_GEP_2_4_SROA_IDX:%.*]] = getelementptr inbounds i8, ptr [[GEP_2_4]], i64 56
; CHECK-NEXT: store i64 undef, ptr [[ARRAY_SUB_2_SROA_9_0_GEP_2_4_SROA_IDX]], align 1
; CHECK-NEXT: [[ARRAY_SUB_2_SROA_936_0_GEP_2_4_SROA_IDX:%.*]] = getelementptr inbounds i8, ptr [[GEP_2_4]], i64 64
; CHECK-NEXT: store i64 0, ptr [[ARRAY_SUB_2_SROA_936_0_GEP_2_4_SROA_IDX]], align 1
; CHECK-NEXT: [[ARRAY_SUB_2_SROA_10_0_GEP_2_4_SROA_IDX:%.*]] = getelementptr inbounds i8, ptr [[GEP_2_4]], i64 72
; CHECK-NEXT: store i64 undef, ptr [[ARRAY_SUB_2_SROA_10_0_GEP_2_4_SROA_IDX]], align 1
; CHECK-NEXT: br label [[LOOP_3:%.*]]
; CHECK: loop.3:
; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr [[ARRAY]], ptr [[ARRAY_SUB_1]], i64 1600, i1 false)
; CHECK-NEXT: [[GEP_3_1:%.*]] = getelementptr [5 x [5 x [5 x { i64, i64 }]]], ptr [[ARRAY]], i64 0, i64 1
; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr [[GEP_3_1]], ptr [[ARRAY_SUB_1]], i64 1600, i1 false)
; CHECK-NEXT: [[GEP_3_2:%.*]] = getelementptr [5 x [5 x [5 x { i64, i64 }]]], ptr [[ARRAY]], i64 0, i64 2
; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr [[GEP_3_2]], ptr [[ARRAY_SUB_1]], i64 1600, i1 false)
; CHECK-NEXT: [[GEP_3_3:%.*]] = getelementptr [5 x [5 x [5 x { i64, i64 }]]], ptr [[ARRAY]], i64 0, i64 3
; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr [[GEP_3_3]], ptr [[ARRAY_SUB_1]], i64 1600, i1 false)
; CHECK-NEXT: [[GEP_3_4:%.*]] = getelementptr [5 x [5 x [5 x { i64, i64 }]]], ptr [[ARRAY]], i64 0, i64 4
; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr [[GEP_3_4]], ptr [[ARRAY_SUB_1]], i64 1600, i1 false)
; CHECK-NEXT: ret void
;
start:
%array = alloca [5 x [5 x [5 x { i64, i64 }]]], align 8
%array.sub.1 = alloca [5 x [5 x { i64, i64 }]], align 8
%array.sub.2 = alloca [5 x { i64, i64 }], align 8
br label %loop.1
; Set up %array.sub.2
loop.1:
%ind.1 = phi i64 [ 0, %start ], [ %ind.1.next, %loop.1 ]
%gep.1 = getelementptr [5 x { i64, i64 }], ptr %array.sub.2, i64 0, i64 %ind.1
store i64 0, ptr %gep.1, align 8
%ind.1.next = add i64 %ind.1, 1
%loop1.cond = icmp ult i64 %ind.1.next, 5
br i1 %loop1.cond, label %loop.1, label %loop.2
; Set up %array.sub.1
loop.2:
%ind.2 = phi i64 [ 0, %loop.1 ], [ %ind.2.next, %loop.2 ]
%gep.2 = getelementptr [5 x [5 x { i64, i64 }]], ptr %array.sub.1, i64 0, i64 %ind.2
call void @llvm.memcpy.p0.p0.i64(ptr %gep.2, ptr %array.sub.2, i64 160, i1 false)
%ind.2.next = add i64 %ind.2, 1
%loop.2.cond = icmp ult i64 %ind.2.next, 5
br i1 %loop.2.cond, label %loop.2, label %loop.3
; Set up %array
loop.3:
%ind.3 = phi i64 [ 0, %loop.2 ], [ %ind.3.next, %loop.3 ]
%gep.3 = getelementptr [5 x [5 x [5 x { i64, i64 }]]], ptr %array, i64 0, i64 %ind.3
call void @llvm.memcpy.p0.p0.i64(ptr %gep.3, ptr %array.sub.1, i64 1600, i1 false)
%ind.3.next = add i64 %ind.3, 1
%loop.3.cond = icmp ult i64 %ind.3.next, 5
br i1 %loop.3.cond, label %loop.3, label %exit
exit:
ret void
}
declare void @llvm.memcpy.p0.p0.i64(ptr, ptr, i64, i1)