
Extend `isAllocSiteRemovable` to be able to check if the ModRef info indicates the alloca is only Ref or only Mod, and be able to remove it accordingly. It seemed that there were a surprising number of benchmarks with this pattern which weren't getting optimized previously (due to MemorySSA walk limits). There were somewhat more existing tests than I'd like to have modified which were simply doing exactly this pattern (and thus relying on undef memory). Claude code contributed the new tests (and found an important typo that I'd made). This implements the discussion in https://github.com/llvm/llvm-project/pull/143782#discussion_r2142720376.
141 lines
4.7 KiB
LLVM
141 lines
4.7 KiB
LLVM
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
|
|
; RUN: opt < %s -passes=instcombine -S | FileCheck %s
|
|
|
|
declare noalias ptr @calloc(i32, i32) nounwind allockind("alloc,zeroed") allocsize(0,1) "alloc-family"="malloc"
|
|
declare void @free(ptr) allockind("free") "alloc-family"="malloc"
|
|
|
|
; Test load from uninitialized alloca - should be removed and replaced with undef
|
|
define i32 @test_load_uninitialized_alloca() {
|
|
; CHECK-LABEL: @test_load_uninitialized_alloca(
|
|
; CHECK-NEXT: ret i32 undef
|
|
;
|
|
%a = alloca i32
|
|
%v = load i32, ptr %a
|
|
ret i32 %v
|
|
}
|
|
|
|
; Test load from zero-initialized malloc - should be removed and replaced with zero
|
|
define i32 @test_load_zero_initialized_malloc() {
|
|
; CHECK-LABEL: @test_load_zero_initialized_malloc(
|
|
; CHECK-NEXT: ret i32 0
|
|
;
|
|
%a = call ptr @calloc(i32 1, i32 4)
|
|
%v = load i32, ptr %a
|
|
call void @free(ptr %a)
|
|
ret i32 %v
|
|
}
|
|
|
|
; Test memcpy from uninitialized source - should be removed
|
|
define void @test_memcpy_from_uninitialized_alloca(ptr %dest) {
|
|
; CHECK-LABEL: @test_memcpy_from_uninitialized_alloca(
|
|
; CHECK-NEXT: ret void
|
|
;
|
|
%src = alloca i32, align 1
|
|
call void @llvm.memcpy.p0.p0.i32(ptr %src, ptr %src, i32 4, i1 false)
|
|
call void @llvm.memcpy.p0.p0.i32(ptr %dest, ptr %src, i32 4, i1 false)
|
|
ret void
|
|
}
|
|
|
|
; Test memcpy from zeroed source - should transform to memset with zero
|
|
define void @test_memcpy_from_uninitialized_calloc(ptr %dest) {
|
|
; CHECK-LABEL: @test_memcpy_from_uninitialized_calloc(
|
|
; CHECK-NEXT: call void @llvm.memset.p0.i32(ptr noundef nonnull align 1 dereferenceable(16) [[DEST:%.*]], i8 0, i32 16, i1 false)
|
|
; CHECK-NEXT: ret void
|
|
;
|
|
%src = call ptr @calloc(i32 1, i32 16)
|
|
call void @llvm.memcpy.p0.p0.i32(ptr %dest, ptr %src, i32 16, i1 false)
|
|
call void @free(ptr %src)
|
|
ret void
|
|
}
|
|
|
|
; Test mixed read/write pattern - should not be removable due to write before read
|
|
define i32 @test_write_then_read_alloca() {
|
|
; CHECK-LABEL: @test_write_then_read_alloca(
|
|
; CHECK-NEXT: [[A:%.*]] = alloca i32, align 4
|
|
; CHECK-NEXT: store i8 42, ptr [[A]], align 1
|
|
; CHECK-NEXT: [[V:%.*]] = load i32, ptr [[A]], align 4
|
|
; CHECK-NEXT: ret i32 [[V]]
|
|
;
|
|
%a = alloca i32
|
|
store i8 42, ptr %a
|
|
%v = load i32, ptr %a
|
|
ret i32 %v
|
|
}
|
|
|
|
; Test read then write pattern - should not be removable due to conflicting access
|
|
define void @test_read_then_write_alloca() {
|
|
; CHECK-LABEL: @test_read_then_write_alloca(
|
|
; CHECK-NEXT: [[A:%.*]] = alloca i32, align 4
|
|
; CHECK-NEXT: [[V:%.*]] = load i32, ptr [[A]], align 4
|
|
; CHECK-NEXT: [[V8:%.*]] = trunc i32 [[V]] to i8
|
|
; CHECK-NEXT: store i8 [[V8]], ptr [[A]], align 1
|
|
; CHECK-NEXT: ret void
|
|
;
|
|
%a = alloca i32
|
|
%v = load i32, ptr %a
|
|
%v8 = trunc i32 %v to i8
|
|
store i8 %v8, ptr %a
|
|
ret void
|
|
}
|
|
|
|
; Test load through GEP from uninitialized alloca
|
|
define i8 @test_load_gep_uninitialized_alloca() {
|
|
; CHECK-LABEL: @test_load_gep_uninitialized_alloca(
|
|
; CHECK-NEXT: ret i8 undef
|
|
;
|
|
%a = alloca [4 x i8]
|
|
%gep = getelementptr [4 x i8], ptr %a, i32 0, i32 2
|
|
%v = load i8, ptr %gep
|
|
ret i8 %v
|
|
}
|
|
|
|
; Test load through bitcast from uninitialized alloca
|
|
define i16 @test_load_bitcast_uninitialized_alloca() {
|
|
; CHECK-LABEL: @test_load_bitcast_uninitialized_alloca(
|
|
; CHECK-NEXT: ret i16 undef
|
|
;
|
|
%a = alloca i32
|
|
%bc = bitcast ptr %a to ptr
|
|
%v = load i16, ptr %bc
|
|
ret i16 %v
|
|
}
|
|
|
|
; Test memmove from zero-initialized malloc
|
|
define void @test_memmove_from_zero_initialized_malloc(ptr %dest) {
|
|
; CHECK-LABEL: @test_memmove_from_zero_initialized_malloc(
|
|
; CHECK-NEXT: call void @llvm.memset.p0.i32(ptr noundef nonnull align 1 dereferenceable(32) [[DEST:%.*]], i8 0, i32 32, i1 false)
|
|
; CHECK-NEXT: ret void
|
|
;
|
|
%src = call ptr @calloc(i32 8, i32 4)
|
|
call void @llvm.memmove.p0.p0.i32(ptr %dest, ptr %src, i32 32, i1 false)
|
|
call void @free(ptr %src)
|
|
ret void
|
|
}
|
|
|
|
; Test multiple loads from same uninitialized alloca
|
|
define { i32, i32 } @test_multiple_loads_uninitialized_alloca() {
|
|
; CHECK-LABEL: @test_multiple_loads_uninitialized_alloca(
|
|
; CHECK-NEXT: ret { i32, i32 } undef
|
|
;
|
|
%a = alloca [2 x i32]
|
|
%gep1 = getelementptr [2 x i32], ptr %a, i32 0, i32 0
|
|
%gep2 = getelementptr [2 x i32], ptr %a, i32 0, i32 1
|
|
%v1 = load i32, ptr %gep1
|
|
%v2 = load i32, ptr %gep2
|
|
%ret = insertvalue { i32, i32 } { i32 undef, i32 poison }, i32 %v1, 0
|
|
%ret2 = insertvalue { i32, i32 } %ret, i32 %v2, 1
|
|
ret { i32, i32 } %ret2
|
|
}
|
|
|
|
; Test that volatile operations prevent removal
|
|
define i32 @test_volatile_load_prevents_removal() {
|
|
; CHECK-LABEL: @test_volatile_load_prevents_removal(
|
|
; CHECK-NEXT: [[A:%.*]] = alloca i32, align 4
|
|
; CHECK-NEXT: [[V:%.*]] = load volatile i32, ptr [[A]], align 4
|
|
; CHECK-NEXT: ret i32 [[V]]
|
|
;
|
|
%a = alloca i32
|
|
%v = load volatile i32, ptr %a
|
|
ret i32 %v
|
|
}
|