llvm-project/llvm/test/CodeGen/X86/memset-sse-stack-realignment.ll
Haohai Wen 21f23a37c6 [SelectionDAG] Clamp stack alignment for memset, memmove
memcpy has clamped dst stack alignment to NaturalStackAlignment if
hasStackRealignment is false. We should also clamp stack alignment
for memset and memmove. If we don't clamp, SelectionDAG may first
do tail call optimization which requires no stack realignment. Then
memmove, memset in same function may be lowered to load/store with
larger alignment leading to PEI emit stack realignment code which
is absolutely not correct.

Reviewed By: LuoYuanke

Differential Revision: https://reviews.llvm.org/D136456
2022-10-26 16:45:31 +08:00

162 lines
4.9 KiB
LLVM

; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; Make sure that we realign the stack. Mingw32 uses 4 byte stack alignment, we
; need 16 bytes for SSE and 32 bytes for AVX.
; RUN: llc < %s -mtriple=i386-pc-mingw32 -mcpu=pentium2 | FileCheck %s --check-prefix=NOSSE
; RUN: llc < %s -stackrealign -mtriple=i386-pc-mingw32 -mcpu=pentium3 | FileCheck %s --check-prefix=SSE
; RUN: llc < %s -stackrealign -mtriple=i386-pc-mingw32 -mcpu=yonah | FileCheck %s --check-prefix=SSE
; RUN: llc < %s -stackrealign -mtriple=i386-pc-mingw32 -mcpu=corei7-avx | FileCheck %s --check-prefix=AVX
; RUN: llc < %s -stackrealign -mtriple=i386-pc-mingw32 -mcpu=core-avx2 | FileCheck %s --check-prefix=AVX
define void @test1(i32 %t) nounwind {
; NOSSE-LABEL: test1:
; NOSSE: # %bb.0:
; NOSSE-NEXT: pushl %ebp
; NOSSE-NEXT: movl %esp, %ebp
; NOSSE-NEXT: subl $32, %esp
; NOSSE-NEXT: movl 8(%ebp), %eax
; NOSSE-NEXT: movl $0, -4(%ebp)
; NOSSE-NEXT: movl $0, -8(%ebp)
; NOSSE-NEXT: movl $0, -12(%ebp)
; NOSSE-NEXT: movl $0, -16(%ebp)
; NOSSE-NEXT: movl $0, -20(%ebp)
; NOSSE-NEXT: movl $0, -24(%ebp)
; NOSSE-NEXT: movl $0, -28(%ebp)
; NOSSE-NEXT: movl $0, -32(%ebp)
; NOSSE-NEXT: addl $3, %eax
; NOSSE-NEXT: andl $-4, %eax
; NOSSE-NEXT: calll __alloca
; NOSSE-NEXT: movl %esp, %eax
; NOSSE-NEXT: pushl %eax
; NOSSE-NEXT: calll _dummy
; NOSSE-NEXT: movl %ebp, %esp
; NOSSE-NEXT: popl %ebp
; NOSSE-NEXT: retl
;
; SSE-LABEL: test1:
; SSE: # %bb.0:
; SSE-NEXT: pushl %ebp
; SSE-NEXT: movl %esp, %ebp
; SSE-NEXT: pushl %esi
; SSE-NEXT: andl $-16, %esp
; SSE-NEXT: subl $48, %esp
; SSE-NEXT: movl %esp, %esi
; SSE-NEXT: movl 8(%ebp), %eax
; SSE-NEXT: xorps %xmm0, %xmm0
; SSE-NEXT: movaps %xmm0, 16(%esi)
; SSE-NEXT: movaps %xmm0, (%esi)
; SSE-NEXT: addl $3, %eax
; SSE-NEXT: andl $-4, %eax
; SSE-NEXT: calll __alloca
; SSE-NEXT: movl %esp, %eax
; SSE-NEXT: pushl %eax
; SSE-NEXT: calll _dummy
; SSE-NEXT: leal -4(%ebp), %esp
; SSE-NEXT: popl %esi
; SSE-NEXT: popl %ebp
; SSE-NEXT: retl
;
; AVX-LABEL: test1:
; AVX: # %bb.0:
; AVX-NEXT: pushl %ebp
; AVX-NEXT: movl %esp, %ebp
; AVX-NEXT: pushl %esi
; AVX-NEXT: andl $-32, %esp
; AVX-NEXT: subl $64, %esp
; AVX-NEXT: movl %esp, %esi
; AVX-NEXT: movl 8(%ebp), %eax
; AVX-NEXT: vxorps %xmm0, %xmm0, %xmm0
; AVX-NEXT: vmovaps %ymm0, (%esi)
; AVX-NEXT: addl $3, %eax
; AVX-NEXT: andl $-4, %eax
; AVX-NEXT: calll __alloca
; AVX-NEXT: movl %esp, %eax
; AVX-NEXT: pushl %eax
; AVX-NEXT: vzeroupper
; AVX-NEXT: calll _dummy
; AVX-NEXT: leal -4(%ebp), %esp
; AVX-NEXT: popl %esi
; AVX-NEXT: popl %ebp
; AVX-NEXT: retl
%tmp1210 = alloca i8, i32 32, align 4
call void @llvm.memset.p0.i64(ptr align 4 %tmp1210, i8 0, i64 32, i1 false)
%x = alloca i8, i32 %t
call void @dummy(ptr %x)
ret void
}
define void @test2(i32 %t) nounwind {
; NOSSE-LABEL: test2:
; NOSSE: # %bb.0:
; NOSSE-NEXT: pushl %ebp
; NOSSE-NEXT: movl %esp, %ebp
; NOSSE-NEXT: subl $16, %esp
; NOSSE-NEXT: movl 8(%ebp), %eax
; NOSSE-NEXT: movl $0, -4(%ebp)
; NOSSE-NEXT: movl $0, -8(%ebp)
; NOSSE-NEXT: movl $0, -12(%ebp)
; NOSSE-NEXT: movl $0, -16(%ebp)
; NOSSE-NEXT: addl $3, %eax
; NOSSE-NEXT: andl $-4, %eax
; NOSSE-NEXT: calll __alloca
; NOSSE-NEXT: movl %esp, %eax
; NOSSE-NEXT: pushl %eax
; NOSSE-NEXT: calll _dummy
; NOSSE-NEXT: movl %ebp, %esp
; NOSSE-NEXT: popl %ebp
; NOSSE-NEXT: retl
;
; SSE-LABEL: test2:
; SSE: # %bb.0:
; SSE-NEXT: pushl %ebp
; SSE-NEXT: movl %esp, %ebp
; SSE-NEXT: pushl %esi
; SSE-NEXT: andl $-16, %esp
; SSE-NEXT: subl $32, %esp
; SSE-NEXT: movl %esp, %esi
; SSE-NEXT: movl 8(%ebp), %eax
; SSE-NEXT: xorps %xmm0, %xmm0
; SSE-NEXT: movaps %xmm0, (%esi)
; SSE-NEXT: addl $3, %eax
; SSE-NEXT: andl $-4, %eax
; SSE-NEXT: calll __alloca
; SSE-NEXT: movl %esp, %eax
; SSE-NEXT: pushl %eax
; SSE-NEXT: calll _dummy
; SSE-NEXT: leal -4(%ebp), %esp
; SSE-NEXT: popl %esi
; SSE-NEXT: popl %ebp
; SSE-NEXT: retl
;
; AVX-LABEL: test2:
; AVX: # %bb.0:
; AVX-NEXT: pushl %ebp
; AVX-NEXT: movl %esp, %ebp
; AVX-NEXT: pushl %esi
; AVX-NEXT: andl $-16, %esp
; AVX-NEXT: subl $32, %esp
; AVX-NEXT: movl %esp, %esi
; AVX-NEXT: movl 8(%ebp), %eax
; AVX-NEXT: vxorps %xmm0, %xmm0, %xmm0
; AVX-NEXT: vmovaps %xmm0, (%esi)
; AVX-NEXT: addl $3, %eax
; AVX-NEXT: andl $-4, %eax
; AVX-NEXT: calll __alloca
; AVX-NEXT: movl %esp, %eax
; AVX-NEXT: pushl %eax
; AVX-NEXT: calll _dummy
; AVX-NEXT: leal -4(%ebp), %esp
; AVX-NEXT: popl %esi
; AVX-NEXT: popl %ebp
; AVX-NEXT: retl
%tmp1210 = alloca i8, i32 16, align 4
call void @llvm.memset.p0.i64(ptr align 4 %tmp1210, i8 0, i64 16, i1 false)
%x = alloca i8, i32 %t
call void @dummy(ptr %x)
ret void
}
declare void @dummy(ptr)
declare void @llvm.memset.p0.i64(ptr nocapture, i8, i64, i1) nounwind