Jameson Nash 96ab74bf17
[InstCombine] remove undef loads, such as memcpy from undef (#143958)
Extend `isAllocSiteRemovable` to be able to check if the ModRef info
indicates the alloca is only Ref or only Mod, and be able to remove it
accordingly. It seemed that there were a surprising number of
benchmarks with this pattern which weren't getting optimized previously
(due to MemorySSA walk limits). There were somewhat more existing tests
than I'd like to have modified which were simply doing exactly this
pattern (and thus relying on undef memory). Claude code contributed the
new tests (and found an important typo that I'd made).

This implements the discussion in
https://github.com/llvm/llvm-project/pull/143782#discussion_r2142720376.
2025-06-20 10:32:31 -04:00

141 lines
4.7 KiB
LLVM

; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
; RUN: opt < %s -passes=instcombine -S | FileCheck %s
declare noalias ptr @calloc(i32, i32) nounwind allockind("alloc,zeroed") allocsize(0,1) "alloc-family"="malloc"
declare void @free(ptr) allockind("free") "alloc-family"="malloc"
; Test load from uninitialized alloca - should be removed and replaced with undef
define i32 @test_load_uninitialized_alloca() {
; CHECK-LABEL: @test_load_uninitialized_alloca(
; CHECK-NEXT: ret i32 undef
;
%a = alloca i32
%v = load i32, ptr %a
ret i32 %v
}
; Test load from zero-initialized malloc - should be removed and replaced with zero
define i32 @test_load_zero_initialized_malloc() {
; CHECK-LABEL: @test_load_zero_initialized_malloc(
; CHECK-NEXT: ret i32 0
;
%a = call ptr @calloc(i32 1, i32 4)
%v = load i32, ptr %a
call void @free(ptr %a)
ret i32 %v
}
; Test memcpy from uninitialized source - should be removed
define void @test_memcpy_from_uninitialized_alloca(ptr %dest) {
; CHECK-LABEL: @test_memcpy_from_uninitialized_alloca(
; CHECK-NEXT: ret void
;
%src = alloca i32, align 1
call void @llvm.memcpy.p0.p0.i32(ptr %src, ptr %src, i32 4, i1 false)
call void @llvm.memcpy.p0.p0.i32(ptr %dest, ptr %src, i32 4, i1 false)
ret void
}
; Test memcpy from zeroed source - should transform to memset with zero
define void @test_memcpy_from_uninitialized_calloc(ptr %dest) {
; CHECK-LABEL: @test_memcpy_from_uninitialized_calloc(
; CHECK-NEXT: call void @llvm.memset.p0.i32(ptr noundef nonnull align 1 dereferenceable(16) [[DEST:%.*]], i8 0, i32 16, i1 false)
; CHECK-NEXT: ret void
;
%src = call ptr @calloc(i32 1, i32 16)
call void @llvm.memcpy.p0.p0.i32(ptr %dest, ptr %src, i32 16, i1 false)
call void @free(ptr %src)
ret void
}
; Test mixed read/write pattern - should not be removable due to write before read
define i32 @test_write_then_read_alloca() {
; CHECK-LABEL: @test_write_then_read_alloca(
; CHECK-NEXT: [[A:%.*]] = alloca i32, align 4
; CHECK-NEXT: store i8 42, ptr [[A]], align 1
; CHECK-NEXT: [[V:%.*]] = load i32, ptr [[A]], align 4
; CHECK-NEXT: ret i32 [[V]]
;
%a = alloca i32
store i8 42, ptr %a
%v = load i32, ptr %a
ret i32 %v
}
; Test read then write pattern - should not be removable due to conflicting access
define void @test_read_then_write_alloca() {
; CHECK-LABEL: @test_read_then_write_alloca(
; CHECK-NEXT: [[A:%.*]] = alloca i32, align 4
; CHECK-NEXT: [[V:%.*]] = load i32, ptr [[A]], align 4
; CHECK-NEXT: [[V8:%.*]] = trunc i32 [[V]] to i8
; CHECK-NEXT: store i8 [[V8]], ptr [[A]], align 1
; CHECK-NEXT: ret void
;
%a = alloca i32
%v = load i32, ptr %a
%v8 = trunc i32 %v to i8
store i8 %v8, ptr %a
ret void
}
; Test load through GEP from uninitialized alloca
define i8 @test_load_gep_uninitialized_alloca() {
; CHECK-LABEL: @test_load_gep_uninitialized_alloca(
; CHECK-NEXT: ret i8 undef
;
%a = alloca [4 x i8]
%gep = getelementptr [4 x i8], ptr %a, i32 0, i32 2
%v = load i8, ptr %gep
ret i8 %v
}
; Test load through bitcast from uninitialized alloca
define i16 @test_load_bitcast_uninitialized_alloca() {
; CHECK-LABEL: @test_load_bitcast_uninitialized_alloca(
; CHECK-NEXT: ret i16 undef
;
%a = alloca i32
%bc = bitcast ptr %a to ptr
%v = load i16, ptr %bc
ret i16 %v
}
; Test memmove from zero-initialized malloc
define void @test_memmove_from_zero_initialized_malloc(ptr %dest) {
; CHECK-LABEL: @test_memmove_from_zero_initialized_malloc(
; CHECK-NEXT: call void @llvm.memset.p0.i32(ptr noundef nonnull align 1 dereferenceable(32) [[DEST:%.*]], i8 0, i32 32, i1 false)
; CHECK-NEXT: ret void
;
%src = call ptr @calloc(i32 8, i32 4)
call void @llvm.memmove.p0.p0.i32(ptr %dest, ptr %src, i32 32, i1 false)
call void @free(ptr %src)
ret void
}
; Test multiple loads from same uninitialized alloca
define { i32, i32 } @test_multiple_loads_uninitialized_alloca() {
; CHECK-LABEL: @test_multiple_loads_uninitialized_alloca(
; CHECK-NEXT: ret { i32, i32 } undef
;
%a = alloca [2 x i32]
%gep1 = getelementptr [2 x i32], ptr %a, i32 0, i32 0
%gep2 = getelementptr [2 x i32], ptr %a, i32 0, i32 1
%v1 = load i32, ptr %gep1
%v2 = load i32, ptr %gep2
%ret = insertvalue { i32, i32 } { i32 undef, i32 poison }, i32 %v1, 0
%ret2 = insertvalue { i32, i32 } %ret, i32 %v2, 1
ret { i32, i32 } %ret2
}
; Test that volatile operations prevent removal
define i32 @test_volatile_load_prevents_removal() {
; CHECK-LABEL: @test_volatile_load_prevents_removal(
; CHECK-NEXT: [[A:%.*]] = alloca i32, align 4
; CHECK-NEXT: [[V:%.*]] = load volatile i32, ptr [[A]], align 4
; CHECK-NEXT: ret i32 [[V]]
;
%a = alloca i32
%v = load volatile i32, ptr %a
ret i32 %v
}