llvm-project/llvm/lib/Target/BPF/BPFSubtarget.cpp
Peilin Ye 17bfc00f7c
[BPF] Add load-acquire and store-release instructions under -mcpu=v4 (#108636)
As discussed in [1], introduce BPF instructions with load-acquire and
store-release semantics under -mcpu=v4.  Define 2 new flags:

  BPF_LOAD_ACQ    0x100
  BPF_STORE_REL   0x110

A "load-acquire" is a BPF_STX | BPF_ATOMIC instruction with the 'imm'
field set to BPF_LOAD_ACQ (0x100).

Similarly, a "store-release" is a BPF_STX | BPF_ATOMIC instruction with
the 'imm' field set to BPF_STORE_REL (0x110).

Unlike existing atomic read-modify-write operations that only support
BPF_W (32-bit) and BPF_DW (64-bit) size modifiers, load-acquires and
store-releases also support BPF_B (8-bit) and BPF_H (16-bit).  An 8- or
16-bit load-acquire zero-extends the value before writing it to a 32-bit
register, just like ARM64 instruction LDAPRH and friends.

As an example (assuming little-endian):

  long foo(long *ptr) {
      return __atomic_load_n(ptr, __ATOMIC_ACQUIRE);
  }

foo() can be compiled to:

  db 10 00 00 00 01 00 00  r0 = load_acquire((u64 *)(r1 + 0x0))
  95 00 00 00 00 00 00 00  exit

  opcode (0xdb): BPF_ATOMIC | BPF_DW | BPF_STX
  imm (0x00000100): BPF_LOAD_ACQ

Similarly:

  void bar(short *ptr, short val) {
      __atomic_store_n(ptr, val, __ATOMIC_RELEASE);
  }

bar() can be compiled to:

  cb 21 00 00 10 01 00 00  store_release((u16 *)(r1 + 0x0), w2)
  95 00 00 00 00 00 00 00  exit

  opcode (0xcb): BPF_ATOMIC | BPF_H | BPF_STX
  imm (0x00000110): BPF_STORE_REL

Inline assembly is also supported.

Add a pre-defined macro, __BPF_FEATURE_LOAD_ACQ_STORE_REL, to let
developers detect this new feature.  It can also be disabled using a new
llc option, -disable-load-acq-store-rel.

Using __ATOMIC_RELAXED for __atomic_store{,_n}() will generate a "plain"
store (BPF_MEM | BPF_STX) instruction:

  void foo(short *ptr, short val) {
      __atomic_store_n(ptr, val, __ATOMIC_RELAXED);
  }

  6b 21 00 00 00 00 00 00  *(u16 *)(r1 + 0x0) = w2
  95 00 00 00 00 00 00 00  exit

Similarly, using __ATOMIC_RELAXED for __atomic_load{,_n}() will generate
a zero-extending, "plain" load (BPF_MEM | BPF_LDX) instruction:

  int foo(char *ptr) {
      return __atomic_load_n(ptr, __ATOMIC_RELAXED);
  }

  71 11 00 00 00 00 00 00  w1 = *(u8 *)(r1 + 0x0)
  bc 10 08 00 00 00 00 00  w0 = (s8)w1
  95 00 00 00 00 00 00 00  exit

Currently __ATOMIC_CONSUME is an alias for __ATOMIC_ACQUIRE.  Using
__ATOMIC_SEQ_CST ("sequentially consistent") is not supported yet and
will cause an error:

  $ clang --target=bpf -mcpu=v4 -c bar.c > /dev/null
bar.c:1:5: error: sequentially consistent (seq_cst) atomic load/store is
not supported
1 | int foo(int *ptr) { return __atomic_load_n(ptr, __ATOMIC_SEQ_CST); }
      |     ^
  ...

Finally, rename those isST*() and isLD*() helper functions in
BPFMISimplifyPatchable.cpp based on what the instructions actually do,
rather than their instruction class.

[1]
https://lore.kernel.org/all/20240729183246.4110549-1-yepeilin@google.com/
2025-03-04 09:19:39 -08:00

134 lines
4.2 KiB
C++

//===-- BPFSubtarget.cpp - BPF Subtarget Information ----------------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
// This file implements the BPF specific subclass of TargetSubtargetInfo.
//
//===----------------------------------------------------------------------===//
#include "BPFSubtarget.h"
#include "BPF.h"
#include "BPFTargetMachine.h"
#include "GISel/BPFCallLowering.h"
#include "GISel/BPFLegalizerInfo.h"
#include "GISel/BPFRegisterBankInfo.h"
#include "llvm/MC/TargetRegistry.h"
#include "llvm/TargetParser/Host.h"
using namespace llvm;
#define DEBUG_TYPE "bpf-subtarget"
#define GET_SUBTARGETINFO_TARGET_DESC
#define GET_SUBTARGETINFO_CTOR
#include "BPFGenSubtargetInfo.inc"
static cl::opt<bool> Disable_ldsx("disable-ldsx", cl::Hidden, cl::init(false),
cl::desc("Disable ldsx insns"));
static cl::opt<bool> Disable_movsx("disable-movsx", cl::Hidden, cl::init(false),
cl::desc("Disable movsx insns"));
static cl::opt<bool> Disable_bswap("disable-bswap", cl::Hidden, cl::init(false),
cl::desc("Disable bswap insns"));
static cl::opt<bool> Disable_sdiv_smod("disable-sdiv-smod", cl::Hidden,
cl::init(false), cl::desc("Disable sdiv/smod insns"));
static cl::opt<bool> Disable_gotol("disable-gotol", cl::Hidden, cl::init(false),
cl::desc("Disable gotol insn"));
static cl::opt<bool>
Disable_StoreImm("disable-storeimm", cl::Hidden, cl::init(false),
cl::desc("Disable BPF_ST (immediate store) insn"));
static cl::opt<bool> Disable_load_acq_store_rel(
"disable-load-acq-store-rel", cl::Hidden, cl::init(false),
cl::desc("Disable load-acquire and store-release insns"));
void BPFSubtarget::anchor() {}
BPFSubtarget &BPFSubtarget::initializeSubtargetDependencies(StringRef CPU,
StringRef FS) {
initializeEnvironment();
initSubtargetFeatures(CPU, FS);
ParseSubtargetFeatures(CPU, /*TuneCPU*/ CPU, FS);
return *this;
}
void BPFSubtarget::initializeEnvironment() {
HasJmpExt = false;
HasJmp32 = false;
HasAlu32 = false;
UseDwarfRIS = false;
HasLdsx = false;
HasMovsx = false;
HasBswap = false;
HasSdivSmod = false;
HasGotol = false;
HasStoreImm = false;
HasLoadAcqStoreRel = false;
}
void BPFSubtarget::initSubtargetFeatures(StringRef CPU, StringRef FS) {
if (CPU.empty())
CPU = "v3";
if (CPU == "probe")
CPU = sys::detail::getHostCPUNameForBPF();
if (CPU == "generic" || CPU == "v1")
return;
if (CPU == "v2") {
HasJmpExt = true;
return;
}
if (CPU == "v3") {
HasJmpExt = true;
HasJmp32 = true;
HasAlu32 = true;
return;
}
if (CPU == "v4") {
HasJmpExt = true;
HasJmp32 = true;
HasAlu32 = true;
HasLdsx = !Disable_ldsx;
HasMovsx = !Disable_movsx;
HasBswap = !Disable_bswap;
HasSdivSmod = !Disable_sdiv_smod;
HasGotol = !Disable_gotol;
HasStoreImm = !Disable_StoreImm;
HasLoadAcqStoreRel = !Disable_load_acq_store_rel;
return;
}
}
BPFSubtarget::BPFSubtarget(const Triple &TT, const std::string &CPU,
const std::string &FS, const TargetMachine &TM)
: BPFGenSubtargetInfo(TT, CPU, /*TuneCPU*/ CPU, FS),
FrameLowering(initializeSubtargetDependencies(CPU, FS)),
TLInfo(TM, *this) {
IsLittleEndian = TT.isLittleEndian();
CallLoweringInfo.reset(new BPFCallLowering(*getTargetLowering()));
Legalizer.reset(new BPFLegalizerInfo(*this));
auto *RBI = new BPFRegisterBankInfo(*getRegisterInfo());
RegBankInfo.reset(RBI);
InstSelector.reset(createBPFInstructionSelector(
*static_cast<const BPFTargetMachine *>(&TM), *this, *RBI));
}
const CallLowering *BPFSubtarget::getCallLowering() const {
return CallLoweringInfo.get();
}
InstructionSelector *BPFSubtarget::getInstructionSelector() const {
return InstSelector.get();
}
const LegalizerInfo *BPFSubtarget::getLegalizerInfo() const {
return Legalizer.get();
}
const RegisterBankInfo *BPFSubtarget::getRegBankInfo() const {
return RegBankInfo.get();
}