[win][x64] Guarantee shape of tail call to a control flow guard function (#174108)
For the purposes of Import Call Optimization, a tail call to a Control Flow Guard function must always be lowered as `TAILJMPm64_REX`. This is useful in general, as we know that the call will be handled via a fixup rather than needing scratch registers for base+index. This change introduces a new pseudo-instruction, `TCRETURN_CFG`, specifically for the purposes of noting that there is a "tail-call return" control flow guard function call, which is then always lowered to `TAILJMPm64_REX` instead of relying on pattern matching.
This commit is contained in:
parent
f8284d500d
commit
7142687ab0
@ -12,9 +12,11 @@
|
||||
#define LLVM_TRANSFORMS_CFGUARD_H
|
||||
|
||||
#include "llvm/IR/PassManager.h"
|
||||
#include "llvm/Support/Compiler.h"
|
||||
|
||||
namespace llvm {
|
||||
|
||||
class CallBase;
|
||||
class FunctionPass;
|
||||
class GlobalValue;
|
||||
|
||||
@ -23,19 +25,20 @@ public:
|
||||
enum class Mechanism { Check, Dispatch };
|
||||
|
||||
CFGuardPass(Mechanism M = Mechanism::Check) : GuardMechanism(M) {}
|
||||
PreservedAnalyses run(Function &F, FunctionAnalysisManager &FAM);
|
||||
LLVM_ABI PreservedAnalyses run(Function &F, FunctionAnalysisManager &FAM);
|
||||
|
||||
private:
|
||||
Mechanism GuardMechanism;
|
||||
};
|
||||
|
||||
/// Insert Control FLow Guard checks on indirect function calls.
|
||||
FunctionPass *createCFGuardCheckPass();
|
||||
LLVM_ABI FunctionPass *createCFGuardCheckPass();
|
||||
|
||||
/// Insert Control FLow Guard dispatches on indirect function calls.
|
||||
FunctionPass *createCFGuardDispatchPass();
|
||||
LLVM_ABI FunctionPass *createCFGuardDispatchPass();
|
||||
|
||||
bool isCFGuardFunction(const GlobalValue *GV);
|
||||
LLVM_ABI bool isCFGuardCall(const CallBase *CB);
|
||||
LLVM_ABI bool isCFGuardFunction(const GlobalValue *GV);
|
||||
|
||||
} // namespace llvm
|
||||
|
||||
|
||||
@ -35845,6 +35845,8 @@ const char *X86TargetLowering::getTargetNodeName(unsigned Opcode) const {
|
||||
NODE_NAME_CASE(CVTTP2UIS)
|
||||
NODE_NAME_CASE(MCVTTP2UIS)
|
||||
NODE_NAME_CASE(POP_FROM_X87_REG)
|
||||
NODE_NAME_CASE(TC_RETURN_GLOBALADDR)
|
||||
NODE_NAME_CASE(CALL_GLOBALADDR)
|
||||
}
|
||||
return nullptr;
|
||||
#undef NODE_NAME_CASE
|
||||
|
||||
@ -81,6 +81,10 @@ namespace llvm {
|
||||
// marker instruction.
|
||||
CALL_RVMARKER,
|
||||
|
||||
// Psuedo for a call to a global address that must be called via a memory
|
||||
// address (i.e., not loaded into a register then called).
|
||||
CALL_GLOBALADDR,
|
||||
|
||||
/// The same as ISD::CopyFromReg except that this node makes it explicit
|
||||
/// that it may lower to an x87 FPU stack pop. Optimizations should be more
|
||||
/// cautious when handling this node than a normal CopyFromReg to avoid
|
||||
@ -336,6 +340,10 @@ namespace llvm {
|
||||
/// the list of operands.
|
||||
TC_RETURN,
|
||||
|
||||
// Psuedo for a tail call return to a global address that must be called via
|
||||
// a memory address (i.e., not loaded into a register then called).
|
||||
TC_RETURN_GLOBALADDR,
|
||||
|
||||
// Vector move to low scalar and zero higher vector elements.
|
||||
VZEXT_MOVL,
|
||||
|
||||
|
||||
@ -27,6 +27,7 @@
|
||||
#include "llvm/IR/DiagnosticInfo.h"
|
||||
#include "llvm/IR/IRBuilder.h"
|
||||
#include "llvm/IR/Module.h"
|
||||
#include "llvm/Transforms/CFGuard.h"
|
||||
|
||||
#define DEBUG_TYPE "x86-isel"
|
||||
|
||||
@ -2551,6 +2552,7 @@ X86TargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
|
||||
}
|
||||
|
||||
bool IsImpCall = false;
|
||||
bool IsCFGuardCall = false;
|
||||
if (DAG.getTarget().getCodeModel() == CodeModel::Large) {
|
||||
assert(Is64Bit && "Large code model is only legal in 64-bit mode.");
|
||||
// In the 64-bit large code model, we have to make all calls
|
||||
@ -2568,6 +2570,21 @@ X86TargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
|
||||
Callee.getValueType() == MVT::i32) {
|
||||
// Zero-extend the 32-bit Callee address into a 64-bit according to x32 ABI
|
||||
Callee = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i64, Callee);
|
||||
} else if (Is64Bit && CB && isCFGuardCall(CB)) {
|
||||
// We'll use a specific psuedo instruction for tail calls to control flow
|
||||
// guard functions to guarantee the instruction used for the call. To do
|
||||
// this we need to unwrap the load now and use the CFG Func GV as the
|
||||
// callee.
|
||||
IsCFGuardCall = true;
|
||||
auto *LoadNode = cast<LoadSDNode>(Callee);
|
||||
GlobalAddressSDNode *GA =
|
||||
cast<GlobalAddressSDNode>(unwrapAddress(LoadNode->getBasePtr()));
|
||||
assert(isCFGuardFunction(GA->getGlobal()) &&
|
||||
"CFG Call should be to a guard function");
|
||||
assert(LoadNode->getOffset()->isUndef() &&
|
||||
"CFG Function load should not have an offset");
|
||||
Callee = DAG.getTargetGlobalAddress(
|
||||
GA->getGlobal(), dl, GA->getValueType(0), 0, X86II::MO_NO_FLAG);
|
||||
}
|
||||
|
||||
SmallVector<SDValue, 8> Ops;
|
||||
@ -2672,7 +2689,9 @@ X86TargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
|
||||
// should be computed from returns not tail calls. Consider a void
|
||||
// function making a tail call to a function returning int.
|
||||
MF.getFrameInfo().setHasTailCall();
|
||||
SDValue Ret = DAG.getNode(X86ISD::TC_RETURN, dl, MVT::Other, Ops);
|
||||
auto Opcode =
|
||||
IsCFGuardCall ? X86ISD::TC_RETURN_GLOBALADDR : X86ISD::TC_RETURN;
|
||||
SDValue Ret = DAG.getNode(Opcode, dl, MVT::Other, Ops);
|
||||
|
||||
if (IsCFICall)
|
||||
Ret.getNode()->setCFIType(CLI.CFIType->getZExtValue());
|
||||
@ -2688,6 +2707,8 @@ X86TargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
|
||||
Chain = DAG.getNode(X86ISD::IMP_CALL, dl, NodeTys, Ops);
|
||||
} else if (IsNoTrackIndirectCall) {
|
||||
Chain = DAG.getNode(X86ISD::NT_CALL, dl, NodeTys, Ops);
|
||||
} else if (IsCFGuardCall) {
|
||||
Chain = DAG.getNode(X86ISD::CALL_GLOBALADDR, dl, NodeTys, Ops);
|
||||
} else if (CLI.CB && objcarc::hasAttachedCallOpBundle(CLI.CB)) {
|
||||
// Calls with a "clang.arc.attachedcall" bundle are special. They should be
|
||||
// expanded to the call, directly followed by a special marker sequence and
|
||||
|
||||
@ -1379,6 +1379,14 @@ def : Pat<(X86tcret_6regs (load addr:$dst), timm:$off),
|
||||
(TCRETURN_WINmi64 addr:$dst, timm:$off)>,
|
||||
Requires<[IsWin64CCFunc, NotUseIndirectThunkCalls]>;
|
||||
|
||||
def : Pat<(X86tcret_globaladdr tglobaladdr:$dst, timm:$off),
|
||||
(TCRETURNmi64 RIP, 1, zero_reg, tglobaladdr:$dst, zero_reg, timm:$off)>,
|
||||
Requires<[In64BitMode, IsNotWin64CCFunc]>;
|
||||
|
||||
def : Pat<(X86tcret_globaladdr tglobaladdr:$dst, timm:$off),
|
||||
(TCRETURN_WINmi64 RIP, 1, zero_reg, tglobaladdr:$dst, zero_reg, timm:$off)>,
|
||||
Requires<[IsWin64CCFunc]>;
|
||||
|
||||
def : Pat<(X86tcret ptr_rc_tailcall:$dst, timm:$off),
|
||||
(INDIRECT_THUNK_TCRETURN64 ptr_rc_tailcall:$dst, timm:$off)>,
|
||||
Requires<[In64BitMode, UseIndirectThunkCalls]>;
|
||||
@ -1403,6 +1411,9 @@ def : Pat<(X86call (i32 texternalsym:$dst)),
|
||||
def : Pat<(X86call (i32 imm:$dst)),
|
||||
(CALLpcrel32 imm:$dst)>, Requires<[CallImmAddr]>;
|
||||
|
||||
def : Pat<(X86call_globaladdr tglobaladdr:$dst),
|
||||
(CALL64m RIP, 1, zero_reg, tglobaladdr:$dst, zero_reg)>;
|
||||
|
||||
// Comparisons.
|
||||
|
||||
// TEST R,R is smaller than CMP R,0
|
||||
|
||||
@ -372,6 +372,7 @@ let isCall = 1, isTerminator = 1, isReturn = 1, isBarrier = 1,
|
||||
def TCRETURNmi64 : PseudoI<(outs),
|
||||
(ins i64mem_TC:$dst, i32imm:$offset),
|
||||
[]>, Sched<[WriteJumpLd]>;
|
||||
let mayLoad = 1 in
|
||||
def TCRETURN_WINmi64 : PseudoI<(outs),
|
||||
(ins i64mem_w64TC:$dst, i32imm:$offset),
|
||||
[]>, Sched<[WriteJumpLd]>;
|
||||
|
||||
@ -210,6 +210,10 @@ def X86call_rvmarker : SDNode<"X86ISD::CALL_RVMARKER", SDT_X86Call,
|
||||
[SDNPHasChain, SDNPOutGlue, SDNPOptInGlue,
|
||||
SDNPVariadic]>;
|
||||
|
||||
def X86call_globaladdr : SDNode<"X86ISD::CALL_GLOBALADDR", SDT_X86Call,
|
||||
[SDNPHasChain, SDNPOutGlue, SDNPOptInGlue,
|
||||
SDNPVariadic]>;
|
||||
|
||||
def X86imp_call : SDNode<"X86ISD::IMP_CALL", SDT_X86Call,
|
||||
[SDNPHasChain, SDNPOutGlue, SDNPOptInGlue,
|
||||
SDNPVariadic]>;
|
||||
@ -259,6 +263,9 @@ def X86eh_sjlj_setup_dispatch : SDNode<"X86ISD::EH_SJLJ_SETUP_DISPATCH",
|
||||
def X86tcret : SDNode<"X86ISD::TC_RETURN", SDT_X86TCRET,
|
||||
[SDNPHasChain, SDNPOptInGlue, SDNPVariadic]>;
|
||||
|
||||
def X86tcret_globaladdr : SDNode<"X86ISD::TC_RETURN_GLOBALADDR", SDT_X86TCRET,
|
||||
[SDNPHasChain, SDNPOptInGlue, SDNPVariadic]>;
|
||||
|
||||
def X86add_flag : SDNode<"X86ISD::ADD", SDTBinaryArithWithFlags,
|
||||
[SDNPCommutative]>;
|
||||
def X86sub_flag : SDNode<"X86ISD::SUB", SDTBinaryArithWithFlags>;
|
||||
|
||||
@ -313,6 +313,11 @@ FunctionPass *llvm::createCFGuardDispatchPass() {
|
||||
return new CFGuard(CFGuardPass::Mechanism::Dispatch);
|
||||
}
|
||||
|
||||
bool llvm::isCFGuardCall(const CallBase *CB) {
|
||||
return CB->getCallingConv() == CallingConv::CFGuard_Check ||
|
||||
CB->countOperandBundlesOfType(LLVMContext::OB_cfguardtarget);
|
||||
}
|
||||
|
||||
bool llvm::isCFGuardFunction(const GlobalValue *GV) {
|
||||
if (GV->getLinkage() != GlobalValue::ExternalLinkage)
|
||||
return false;
|
||||
|
||||
@ -56,8 +56,7 @@ entry:
|
||||
; On x86_64, __guard_dispatch_icall_fptr tail calls the function, so there should be only one call instruction.
|
||||
; X64-LABEL: func_optnone_cf
|
||||
; X64: leaq target_func(%rip), %rax
|
||||
; X64: movq __guard_dispatch_icall_fptr(%rip), %rcx
|
||||
; X64: callq *%rcx
|
||||
; X64: callq *__guard_dispatch_icall_fptr(%rip)
|
||||
; X64-NOT: callq
|
||||
}
|
||||
attributes #1 = { noinline optnone }
|
||||
@ -125,6 +124,38 @@ lpad: ; preds = %entry
|
||||
|
||||
declare void @h()
|
||||
|
||||
; Regression test: even if the invoke has many arguments, we should be calling
|
||||
; via a rip wrapper, rather than loading the CFG func into a register.
|
||||
define i32 @invoke_many_args(ptr %func_ptr, ptr %pass_thru1, ptr %pass_thru2) personality ptr @h {
|
||||
%stack_arg1 = alloca ptr, align 8
|
||||
%stack_arg2 = alloca ptr, align 8
|
||||
%stack_arg3 = alloca ptr, align 8
|
||||
invoke void %func_ptr(ptr %pass_thru1, ptr %pass_thru2, ptr %stack_arg1, ptr %stack_arg2, ptr %stack_arg3)
|
||||
to label %invoke.cont unwind label %lpad
|
||||
|
||||
invoke.cont:
|
||||
ret i32 2
|
||||
|
||||
lpad:
|
||||
%tmp = landingpad { ptr, i32 }
|
||||
catch ptr null
|
||||
ret i32 -1
|
||||
|
||||
; On i686, the call to __guard_check_icall_fptr should come immediately before the call to the target function.
|
||||
; X86-LABEL: invoke_many_args
|
||||
; X86: calll *___guard_check_icall_fptr
|
||||
; X86_MINGW-NEXT: Ltmp3:
|
||||
; X86: calll *%ecx
|
||||
; X86: # %invoke.cont
|
||||
; X86: # %lpad
|
||||
|
||||
; On x86_64, __guard_dispatch_icall_fptr tail calls the function, so there should be only one call instruction.
|
||||
; X64-LABEL: invoke_many_args
|
||||
; X64: callq *__guard_dispatch_icall_fptr(%rip)
|
||||
; X64-NOT: callq
|
||||
; X64: # %invoke.cont
|
||||
; X64: # %lpad
|
||||
}
|
||||
|
||||
; Test that Control Flow Guard preserves floating point arguments.
|
||||
declare double @target_func_doubles(double, double, double, double)
|
||||
@ -152,10 +183,10 @@ entry:
|
||||
; X64_MSVC: movsd __real@4000000000000000(%rip), %xmm1
|
||||
; X64_MSVC: movsd __real@4008000000000000(%rip), %xmm2
|
||||
; X64_MSVC: movsd __real@4010000000000000(%rip), %xmm3
|
||||
; X64_MINGW: movsd .LCPI4_0(%rip), %xmm0
|
||||
; X64_MINGW: movsd .LCPI4_1(%rip), %xmm1
|
||||
; X64_MINGW: movsd .LCPI4_2(%rip), %xmm2
|
||||
; X64_MINGW: movsd .LCPI4_3(%rip), %xmm3
|
||||
; X64_MINGW: movsd .LCPI5_0(%rip), %xmm0
|
||||
; X64_MINGW: movsd .LCPI5_1(%rip), %xmm1
|
||||
; X64_MINGW: movsd .LCPI5_2(%rip), %xmm2
|
||||
; X64_MINGW: movsd .LCPI5_3(%rip), %xmm3
|
||||
; X64: callq *__guard_dispatch_icall_fptr(%rip)
|
||||
; X64-NOT: callq
|
||||
|
||||
@ -213,8 +244,7 @@ entry:
|
||||
; X64-LABEL: vmptr_thunk:
|
||||
; X64: movq (%rcx), %rax
|
||||
; X64-NEXT: movq 8(%rax), %rax
|
||||
; X64-NEXT: movq __guard_dispatch_icall_fptr(%rip), %rdx
|
||||
; X64-NEXT: rex64 jmpq *%rdx # TAILCALL
|
||||
; X64-NEXT: rex64 jmpq *__guard_dispatch_icall_fptr(%rip) # TAILCALL
|
||||
; X64-NOT: callq
|
||||
}
|
||||
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user