
AAPCS64 reserves any of X9-X15 for a compiler to choose to use for this purpose, and says not to use X16 or X18 like GCC (and the previous implementation) chose to use. The X18 register may need to get used by the kernel in some circumstances, as specified by the platform ABI, so it is generally an unwise choice. Simply choosing a different register fixes the problem of this being broken on any platform that actually follows the platform ABI (which is all of them except EABI, if I am reading this linux kernel bug correctly https://lkml2.uits.iu.edu/hypermail/linux/kernel/2001.2/01502.html). As a side benefit, also generate slightly better code and avoids needing the compiler-rt to be present. I did that by following the XCore implementation instead of PPC (although in hindsight, following the RISCV might have been slightly more readable). That X18 is wrong to use for this purpose has been known for many years (e.g. https://www.mail-archive.com/gcc@gcc.gnu.org/msg76934.html) and also known that fixing this to use one of the correct registers is not an ABI break, since this only appears inside of a translation unit. Some of the other temporary registers (e.g. X9) are already reserved inside llvm for internal use as a generic temporary register in the prologue before saving registers, while X15 was already used in rare cases as a scratch register in the prologue as well, so I felt that seemed the most logical choice to choose here.
44 lines
1.8 KiB
C
44 lines
1.8 KiB
C
//===----- trampoline_setup.c - Implement __trampoline_setup -------------===//
|
|
//
|
|
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
|
|
// See https://llvm.org/LICENSE.txt for license information.
|
|
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
|
|
//
|
|
//===----------------------------------------------------------------------===//
|
|
|
|
#include "int_lib.h"
|
|
|
|
extern void __clear_cache(void *start, void *end);
|
|
|
|
// The ppc compiler generates calls to __trampoline_setup() when creating
|
|
// trampoline functions on the stack for use with nested functions.
|
|
// This function creates a custom 40-byte trampoline function on the stack
|
|
// which loads r11 with a pointer to the outer function's locals
|
|
// and then jumps to the target nested function.
|
|
|
|
#if __powerpc__ && !defined(__powerpc64__)
|
|
COMPILER_RT_ABI void __trampoline_setup(uint32_t *trampOnStack,
|
|
int trampSizeAllocated,
|
|
const void *realFunc, void *localsPtr) {
|
|
// should never happen, but if compiler did not allocate
|
|
// enough space on stack for the trampoline, abort
|
|
if (trampSizeAllocated < 40)
|
|
compilerrt_abort();
|
|
|
|
// create trampoline
|
|
trampOnStack[0] = 0x7c0802a6; // mflr r0
|
|
trampOnStack[1] = 0x4800000d; // bl Lbase
|
|
trampOnStack[2] = (uint32_t)realFunc;
|
|
trampOnStack[3] = (uint32_t)localsPtr;
|
|
trampOnStack[4] = 0x7d6802a6; // Lbase: mflr r11
|
|
trampOnStack[5] = 0x818b0000; // lwz r12,0(r11)
|
|
trampOnStack[6] = 0x7c0803a6; // mtlr r0
|
|
trampOnStack[7] = 0x7d8903a6; // mtctr r12
|
|
trampOnStack[8] = 0x816b0004; // lwz r11,4(r11)
|
|
trampOnStack[9] = 0x4e800420; // bctr
|
|
|
|
// clear instruction cache
|
|
__clear_cache(trampOnStack, &trampOnStack[10]);
|
|
}
|
|
#endif // __powerpc__ && !defined(__powerpc64__)
|