Omair Javaid e1e1836bbd
[CodeGen] Inline stack guard check on Windows (#136290)
This patch optimizes the Windows security cookie check mechanism by
moving the comparison inline and only calling __security_check_cookie
when the check fails. This reduces the overhead of making a DLL call 
for every function return.

Previously, we implemented this optimization through a machine pass
(X86WinFixupBufferSecurityCheckPass) in PR #95904 submitted by
@mahesh-attarde. We have reverted that pass in favor of this new 
approach. Also we have abandoned the AArch64 specific implementation 
of same pass in PR #121938 in favor of this more general solution.

The old machine instruction pass approach:
- Scanned the generated code to find __security_check_cookie calls
- Modified these calls by splitting basic blocks
- Added comparison logic and conditional branching
- Required complex block management and live register computation

The new approach:
- Implements the same optimization during instruction selection
- Directly emits the comparison and conditional branching
- No need for post-processing or basic block manipulation
- Disables optimization at -Oz.

Thanks @tamaspetz, @efriedma-quic and @arsenm for their help.
2025-06-12 19:38:42 +05:00

164 lines
5.4 KiB
LLVM

; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
; RUN: llc -mtriple=x86_64-windows-msvc %s -o - -verify-machineinstrs | FileCheck %s -check-prefix=WINDOWS
; RUN: llc -mtriple=x86_64-linux-gnu %s -o - -verify-machineinstrs | FileCheck %s -check-prefix=LINUX
declare void @h(ptr, i64, ptr)
define tailcc void @tailcall_frame(ptr %0, i64 %1) sspreq {
; WINDOWS-LABEL: tailcall_frame:
; WINDOWS: # %bb.0:
; WINDOWS-NEXT: subq $56, %rsp
; WINDOWS-NEXT: .seh_stackalloc 56
; WINDOWS-NEXT: .seh_endprologue
; WINDOWS-NEXT: movq __security_cookie(%rip), %rax
; WINDOWS-NEXT: xorq %rsp, %rax
; WINDOWS-NEXT: movq %rax, {{[0-9]+}}(%rsp)
; WINDOWS-NEXT: movq {{[0-9]+}}(%rsp), %rax
; WINDOWS-NEXT: xorq %rsp, %rax
; WINDOWS-NEXT: movq __security_cookie(%rip), %rcx
; WINDOWS-NEXT: cmpq %rax, %rcx
; WINDOWS-NEXT: jne .LBB0_1
; WINDOWS-NEXT: # %bb.2:
; WINDOWS-NEXT: xorl %ecx, %ecx
; WINDOWS-NEXT: xorl %edx, %edx
; WINDOWS-NEXT: xorl %r8d, %r8d
; WINDOWS-NEXT: .seh_startepilogue
; WINDOWS-NEXT: addq $56, %rsp
; WINDOWS-NEXT: .seh_endepilogue
; WINDOWS-NEXT: jmp h # TAILCALL
; WINDOWS-NEXT: .LBB0_1:
; WINDOWS-NEXT: movq {{[0-9]+}}(%rsp), %rcx
; WINDOWS-NEXT: xorq %rsp, %rcx
; WINDOWS-NEXT: callq __security_check_cookie
; WINDOWS-NEXT: int3
; WINDOWS-NEXT: .seh_endproc
;
; LINUX-LABEL: tailcall_frame:
; LINUX: # %bb.0:
; LINUX-NEXT: subq $24, %rsp
; LINUX-NEXT: .cfi_def_cfa_offset 32
; LINUX-NEXT: movq %fs:40, %rax
; LINUX-NEXT: movq %rax, {{[0-9]+}}(%rsp)
; LINUX-NEXT: movq %fs:40, %rax
; LINUX-NEXT: cmpq {{[0-9]+}}(%rsp), %rax
; LINUX-NEXT: jne .LBB0_2
; LINUX-NEXT: # %bb.1: # %SP_return
; LINUX-NEXT: xorl %edi, %edi
; LINUX-NEXT: xorl %esi, %esi
; LINUX-NEXT: xorl %edx, %edx
; LINUX-NEXT: addq $24, %rsp
; LINUX-NEXT: .cfi_def_cfa_offset 8
; LINUX-NEXT: jmp h@PLT # TAILCALL
; LINUX-NEXT: .LBB0_2: # %CallStackCheckFailBlk
; LINUX-NEXT: .cfi_def_cfa_offset 32
; LINUX-NEXT: callq __stack_chk_fail@PLT
tail call tailcc void @h(ptr null, i64 0, ptr null)
ret void
}
declare void @bar()
define void @tailcall_unrelated_frame() sspreq {
; WINDOWS-LABEL: tailcall_unrelated_frame:
; WINDOWS: # %bb.0:
; WINDOWS-NEXT: subq $40, %rsp
; WINDOWS-NEXT: .seh_stackalloc 40
; WINDOWS-NEXT: .seh_endprologue
; WINDOWS-NEXT: movq __security_cookie(%rip), %rax
; WINDOWS-NEXT: xorq %rsp, %rax
; WINDOWS-NEXT: movq %rax, {{[0-9]+}}(%rsp)
; WINDOWS-NEXT: callq bar
; WINDOWS-NEXT: movq {{[0-9]+}}(%rsp), %rax
; WINDOWS-NEXT: xorq %rsp, %rax
; WINDOWS-NEXT: movq __security_cookie(%rip), %rcx
; WINDOWS-NEXT: cmpq %rax, %rcx
; WINDOWS-NEXT: jne .LBB1_1
; WINDOWS-NEXT: # %bb.2:
; WINDOWS-NEXT: .seh_startepilogue
; WINDOWS-NEXT: addq $40, %rsp
; WINDOWS-NEXT: .seh_endepilogue
; WINDOWS-NEXT: jmp bar # TAILCALL
; WINDOWS-NEXT: .LBB1_1:
; WINDOWS-NEXT: movq {{[0-9]+}}(%rsp), %rcx
; WINDOWS-NEXT: xorq %rsp, %rcx
; WINDOWS-NEXT: callq __security_check_cookie
; WINDOWS-NEXT: int3
; WINDOWS-NEXT: .seh_endproc
;
; LINUX-LABEL: tailcall_unrelated_frame:
; LINUX: # %bb.0:
; LINUX-NEXT: pushq %rax
; LINUX-NEXT: .cfi_def_cfa_offset 16
; LINUX-NEXT: movq %fs:40, %rax
; LINUX-NEXT: movq %rax, (%rsp)
; LINUX-NEXT: callq bar@PLT
; LINUX-NEXT: movq %fs:40, %rax
; LINUX-NEXT: cmpq (%rsp), %rax
; LINUX-NEXT: jne .LBB1_2
; LINUX-NEXT: # %bb.1: # %SP_return
; LINUX-NEXT: popq %rax
; LINUX-NEXT: .cfi_def_cfa_offset 8
; LINUX-NEXT: jmp bar@PLT # TAILCALL
; LINUX-NEXT: .LBB1_2: # %CallStackCheckFailBlk
; LINUX-NEXT: .cfi_def_cfa_offset 16
; LINUX-NEXT: callq __stack_chk_fail@PLT
call void @bar()
tail call void @bar()
ret void
}
declare void @callee()
define void @caller() sspreq {
; WINDOWS-LABEL: caller:
; WINDOWS: # %bb.0:
; WINDOWS-NEXT: subq $40, %rsp
; WINDOWS-NEXT: .seh_stackalloc 40
; WINDOWS-NEXT: .seh_endprologue
; WINDOWS-NEXT: movq __security_cookie(%rip), %rax
; WINDOWS-NEXT: xorq %rsp, %rax
; WINDOWS-NEXT: movq %rax, {{[0-9]+}}(%rsp)
; WINDOWS-NEXT: callq callee
; WINDOWS-NEXT: callq callee
; WINDOWS-NEXT: movq {{[0-9]+}}(%rsp), %rax
; WINDOWS-NEXT: xorq %rsp, %rax
; WINDOWS-NEXT: movq __security_cookie(%rip), %rcx
; WINDOWS-NEXT: cmpq %rax, %rcx
; WINDOWS-NEXT: jne .LBB2_2
; WINDOWS-NEXT: # %bb.1:
; WINDOWS-NEXT: .seh_startepilogue
; WINDOWS-NEXT: addq $40, %rsp
; WINDOWS-NEXT: .seh_endepilogue
; WINDOWS-NEXT: retq
; WINDOWS-NEXT: .LBB2_2:
; WINDOWS-NEXT: movq {{[0-9]+}}(%rsp), %rcx
; WINDOWS-NEXT: xorq %rsp, %rcx
; WINDOWS-NEXT: callq __security_check_cookie
; WINDOWS-NEXT: int3
; WINDOWS-NEXT: .seh_endproc
;
; LINUX-LABEL: caller:
; LINUX: # %bb.0:
; LINUX-NEXT: pushq %rax
; LINUX-NEXT: .cfi_def_cfa_offset 16
; LINUX-NEXT: movq %fs:40, %rax
; LINUX-NEXT: movq %rax, (%rsp)
; LINUX-NEXT: callq callee@PLT
; LINUX-NEXT: callq callee@PLT
; LINUX-NEXT: movq %fs:40, %rax
; LINUX-NEXT: cmpq (%rsp), %rax
; LINUX-NEXT: jne .LBB2_2
; LINUX-NEXT: # %bb.1: # %SP_return
; LINUX-NEXT: popq %rax
; LINUX-NEXT: .cfi_def_cfa_offset 8
; LINUX-NEXT: retq
; LINUX-NEXT: .LBB2_2: # %CallStackCheckFailBlk
; LINUX-NEXT: .cfi_def_cfa_offset 16
; LINUX-NEXT: callq __stack_chk_fail@PLT
tail call void @callee()
call void @callee()
ret void
}