
As discussed in [1], introduce BPF instructions with load-acquire and store-release semantics under -mcpu=v4. Define 2 new flags: BPF_LOAD_ACQ 0x100 BPF_STORE_REL 0x110 A "load-acquire" is a BPF_STX | BPF_ATOMIC instruction with the 'imm' field set to BPF_LOAD_ACQ (0x100). Similarly, a "store-release" is a BPF_STX | BPF_ATOMIC instruction with the 'imm' field set to BPF_STORE_REL (0x110). Unlike existing atomic read-modify-write operations that only support BPF_W (32-bit) and BPF_DW (64-bit) size modifiers, load-acquires and store-releases also support BPF_B (8-bit) and BPF_H (16-bit). An 8- or 16-bit load-acquire zero-extends the value before writing it to a 32-bit register, just like ARM64 instruction LDAPRH and friends. As an example (assuming little-endian): long foo(long *ptr) { return __atomic_load_n(ptr, __ATOMIC_ACQUIRE); } foo() can be compiled to: db 10 00 00 00 01 00 00 r0 = load_acquire((u64 *)(r1 + 0x0)) 95 00 00 00 00 00 00 00 exit opcode (0xdb): BPF_ATOMIC | BPF_DW | BPF_STX imm (0x00000100): BPF_LOAD_ACQ Similarly: void bar(short *ptr, short val) { __atomic_store_n(ptr, val, __ATOMIC_RELEASE); } bar() can be compiled to: cb 21 00 00 10 01 00 00 store_release((u16 *)(r1 + 0x0), w2) 95 00 00 00 00 00 00 00 exit opcode (0xcb): BPF_ATOMIC | BPF_H | BPF_STX imm (0x00000110): BPF_STORE_REL Inline assembly is also supported. Add a pre-defined macro, __BPF_FEATURE_LOAD_ACQ_STORE_REL, to let developers detect this new feature. It can also be disabled using a new llc option, -disable-load-acq-store-rel. Using __ATOMIC_RELAXED for __atomic_store{,_n}() will generate a "plain" store (BPF_MEM | BPF_STX) instruction: void foo(short *ptr, short val) { __atomic_store_n(ptr, val, __ATOMIC_RELAXED); } 6b 21 00 00 00 00 00 00 *(u16 *)(r1 + 0x0) = w2 95 00 00 00 00 00 00 00 exit Similarly, using __ATOMIC_RELAXED for __atomic_load{,_n}() will generate a zero-extending, "plain" load (BPF_MEM | BPF_LDX) instruction: int foo(char *ptr) { return __atomic_load_n(ptr, __ATOMIC_RELAXED); } 71 11 00 00 00 00 00 00 w1 = *(u8 *)(r1 + 0x0) bc 10 08 00 00 00 00 00 w0 = (s8)w1 95 00 00 00 00 00 00 00 exit Currently __ATOMIC_CONSUME is an alias for __ATOMIC_ACQUIRE. Using __ATOMIC_SEQ_CST ("sequentially consistent") is not supported yet and will cause an error: $ clang --target=bpf -mcpu=v4 -c bar.c > /dev/null bar.c:1:5: error: sequentially consistent (seq_cst) atomic load/store is not supported 1 | int foo(int *ptr) { return __atomic_load_n(ptr, __ATOMIC_SEQ_CST); } | ^ ... Finally, rename those isST*() and isLD*() helper functions in BPFMISimplifyPatchable.cpp based on what the instructions actually do, rather than their instruction class. [1] https://lore.kernel.org/all/20240729183246.4110549-1-yepeilin@google.com/
134 lines
4.2 KiB
C++
134 lines
4.2 KiB
C++
//===-- BPFSubtarget.cpp - BPF Subtarget Information ----------------------===//
|
|
//
|
|
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
|
|
// See https://llvm.org/LICENSE.txt for license information.
|
|
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
|
|
//
|
|
//===----------------------------------------------------------------------===//
|
|
//
|
|
// This file implements the BPF specific subclass of TargetSubtargetInfo.
|
|
//
|
|
//===----------------------------------------------------------------------===//
|
|
|
|
#include "BPFSubtarget.h"
|
|
#include "BPF.h"
|
|
#include "BPFTargetMachine.h"
|
|
#include "GISel/BPFCallLowering.h"
|
|
#include "GISel/BPFLegalizerInfo.h"
|
|
#include "GISel/BPFRegisterBankInfo.h"
|
|
#include "llvm/MC/TargetRegistry.h"
|
|
#include "llvm/TargetParser/Host.h"
|
|
|
|
using namespace llvm;
|
|
|
|
#define DEBUG_TYPE "bpf-subtarget"
|
|
|
|
#define GET_SUBTARGETINFO_TARGET_DESC
|
|
#define GET_SUBTARGETINFO_CTOR
|
|
#include "BPFGenSubtargetInfo.inc"
|
|
|
|
static cl::opt<bool> Disable_ldsx("disable-ldsx", cl::Hidden, cl::init(false),
|
|
cl::desc("Disable ldsx insns"));
|
|
static cl::opt<bool> Disable_movsx("disable-movsx", cl::Hidden, cl::init(false),
|
|
cl::desc("Disable movsx insns"));
|
|
static cl::opt<bool> Disable_bswap("disable-bswap", cl::Hidden, cl::init(false),
|
|
cl::desc("Disable bswap insns"));
|
|
static cl::opt<bool> Disable_sdiv_smod("disable-sdiv-smod", cl::Hidden,
|
|
cl::init(false), cl::desc("Disable sdiv/smod insns"));
|
|
static cl::opt<bool> Disable_gotol("disable-gotol", cl::Hidden, cl::init(false),
|
|
cl::desc("Disable gotol insn"));
|
|
static cl::opt<bool>
|
|
Disable_StoreImm("disable-storeimm", cl::Hidden, cl::init(false),
|
|
cl::desc("Disable BPF_ST (immediate store) insn"));
|
|
static cl::opt<bool> Disable_load_acq_store_rel(
|
|
"disable-load-acq-store-rel", cl::Hidden, cl::init(false),
|
|
cl::desc("Disable load-acquire and store-release insns"));
|
|
|
|
void BPFSubtarget::anchor() {}
|
|
|
|
BPFSubtarget &BPFSubtarget::initializeSubtargetDependencies(StringRef CPU,
|
|
StringRef FS) {
|
|
initializeEnvironment();
|
|
initSubtargetFeatures(CPU, FS);
|
|
ParseSubtargetFeatures(CPU, /*TuneCPU*/ CPU, FS);
|
|
return *this;
|
|
}
|
|
|
|
void BPFSubtarget::initializeEnvironment() {
|
|
HasJmpExt = false;
|
|
HasJmp32 = false;
|
|
HasAlu32 = false;
|
|
UseDwarfRIS = false;
|
|
HasLdsx = false;
|
|
HasMovsx = false;
|
|
HasBswap = false;
|
|
HasSdivSmod = false;
|
|
HasGotol = false;
|
|
HasStoreImm = false;
|
|
HasLoadAcqStoreRel = false;
|
|
}
|
|
|
|
void BPFSubtarget::initSubtargetFeatures(StringRef CPU, StringRef FS) {
|
|
if (CPU.empty())
|
|
CPU = "v3";
|
|
if (CPU == "probe")
|
|
CPU = sys::detail::getHostCPUNameForBPF();
|
|
if (CPU == "generic" || CPU == "v1")
|
|
return;
|
|
if (CPU == "v2") {
|
|
HasJmpExt = true;
|
|
return;
|
|
}
|
|
if (CPU == "v3") {
|
|
HasJmpExt = true;
|
|
HasJmp32 = true;
|
|
HasAlu32 = true;
|
|
return;
|
|
}
|
|
if (CPU == "v4") {
|
|
HasJmpExt = true;
|
|
HasJmp32 = true;
|
|
HasAlu32 = true;
|
|
HasLdsx = !Disable_ldsx;
|
|
HasMovsx = !Disable_movsx;
|
|
HasBswap = !Disable_bswap;
|
|
HasSdivSmod = !Disable_sdiv_smod;
|
|
HasGotol = !Disable_gotol;
|
|
HasStoreImm = !Disable_StoreImm;
|
|
HasLoadAcqStoreRel = !Disable_load_acq_store_rel;
|
|
return;
|
|
}
|
|
}
|
|
|
|
BPFSubtarget::BPFSubtarget(const Triple &TT, const std::string &CPU,
|
|
const std::string &FS, const TargetMachine &TM)
|
|
: BPFGenSubtargetInfo(TT, CPU, /*TuneCPU*/ CPU, FS),
|
|
FrameLowering(initializeSubtargetDependencies(CPU, FS)),
|
|
TLInfo(TM, *this) {
|
|
IsLittleEndian = TT.isLittleEndian();
|
|
|
|
CallLoweringInfo.reset(new BPFCallLowering(*getTargetLowering()));
|
|
Legalizer.reset(new BPFLegalizerInfo(*this));
|
|
auto *RBI = new BPFRegisterBankInfo(*getRegisterInfo());
|
|
RegBankInfo.reset(RBI);
|
|
|
|
InstSelector.reset(createBPFInstructionSelector(
|
|
*static_cast<const BPFTargetMachine *>(&TM), *this, *RBI));
|
|
}
|
|
|
|
const CallLowering *BPFSubtarget::getCallLowering() const {
|
|
return CallLoweringInfo.get();
|
|
}
|
|
|
|
InstructionSelector *BPFSubtarget::getInstructionSelector() const {
|
|
return InstSelector.get();
|
|
}
|
|
|
|
const LegalizerInfo *BPFSubtarget::getLegalizerInfo() const {
|
|
return Legalizer.get();
|
|
}
|
|
|
|
const RegisterBankInfo *BPFSubtarget::getRegBankInfo() const {
|
|
return RegBankInfo.get();
|
|
}
|