llvm-project/llvm/lib/Target/BPF/BPFTargetMachine.cpp
yonghong-song c3fb2e1cee
[BPF] Support Jump Table (#149715)
Add jump table (switch statement and computed goto) support for BPF
backend.
A `gotox <reg>` insn is implemented and the `<reg>` holds the target
insn where the gotox will go.

For a switch statement like
```
...
            switch (ctx->x) {
            case 1: ret_user = 18; break;
            case 20: ret_user = 6; break;
            case 16: ret_user = 9; break;
            case 6: ret_user = 16; break;
            case 8: ret_user = 14; break;
            case 30: ret_user = 2; break;
            default: ret_user = 1; break;
            }
...
```
and the final binary
```
    The final binary:
       4:       67 01 00 00 03 00 00 00 r1 <<= 0x3                                                           
       5:       18 02 00 00 00 00 00 00 00 00 00 00 00 00 00 00 r2 = 0x0 ll                                  
                0000000000000028:  R_BPF_64_64  BPF.JT.0.0                                                   
       7:       0f 12 00 00 00 00 00 00 r2 += r1
        ...
    Symbol table:
     4: 0000000000000000   240 OBJECT  GLOBAL DEFAULT     4 BPF.JT.0.0
     5: 0000000000000000     4 OBJECT  GLOBAL DEFAULT     6 ret_user
     6: 0000000000000000     0 NOTYPE  GLOBAL DEFAULT   UND bar
     7: 00000000000000f0   256 OBJECT  GLOBAL DEFAULT     4 BPF.JT.0.1
    and
  [ 4] .jumptables       PROGBITS        0000000000000000 0001c8 0001f0 00      0   0  1
```
Note that for the above example, `-mllvm -bpf-min-jump-table-entries=5`
should be in compilation flags as the current default
bpf-min-jump-table-entries is 13. For example.
```
clang --target=bpf -mcpu=v4 -O2 -mllvm -bpf-min-jump-table-entries=5 -S -g test.c
```

For computed goto like
```
      int foo(int a, int b) {
        __label__ l1, l2, l3, l4;
        void *jt1[] = {[0]=&&l1, [1]=&&l2};
        void *jt2[] = {[0]=&&l3, [1]=&&l4};
        int ret = 0;
    
        goto *jt1[a % 2];
        l1: ret += 1;
        l2: ret += 3;
        goto *jt2[b % 2];
        l3: ret += 5;
        l4: ret += 7;
        return ret;
      }
```
The final binary:
```
      12:       bf 23 20 00 00 00 00 00 r3 = (s32)r2
      13:       67 03 00 00 03 00 00 00 r3 <<= 0x3
      14:       18 02 00 00 00 00 00 00 00 00 00 00 00 00 00 00 r2 = 0x0 ll
                0000000000000070:  R_BPF_64_64  BPF.JT.0.0
      16:       0f 32 00 00 00 00 00 00 r2 += r3
      17:       bf 11 20 00 00 00 00 00 r1 = (s32)r1
      18:       67 01 00 00 03 00 00 00 r1 <<= 0x3
      19:       18 03 00 00 00 00 00 00 00 00 00 00 00 00 00 00 r3 = 0x0 ll
                0000000000000098:  R_BPF_64_64  BPF.JT.0.1
      21:       0f 13 00 00 00 00 00 00 r3 += r1
    
  [ 4] .jumptables       PROGBITS        0000000000000000 000160 000020 00      0   0  1

     4: 0000000000000000    16 OBJECT  GLOBAL DEFAULT     4 BPF.JT.0.0
     5: 0000000000000010    16 OBJECT  GLOBAL DEFAULT     4 BPF.JT.0.1
```

A more complicated test with both switch-statement triggered jump table
and compute gotos:

```
$ cat test3.c
struct simple_ctx {
        int x;
        int y;
        int z;
};

int ret_user, ret_user2;
void bar(void);
int foo(struct simple_ctx *ctx, struct simple_ctx *ctx2, int a, int b)
{
        __label__ l1, l2, l3, l4;
        void *jt1[] = {[0]=&&l1, [1]=&&l2};
        void *jt2[] = {[0]=&&l3, [1]=&&l4};
        int ret = 0;

        goto *jt1[a % 2];
        l1: ret += 1;
        l2: ret += 3;
        goto *jt2[b % 2];
        l3: ret += 5;
        l4: ret += 7;

        bar();

        switch (ctx->x) {
        case 1: ret_user = 18; break;
        case 20: ret_user = 6; break;
        case 16: ret_user = 9; break;
        case 6: ret_user = 16; break;
        case 8: ret_user = 14; break;
        case 30: ret_user = 2; break;
        default: ret_user = 1; break;
        }

        return ret;
}
```
Compile with
```
  clang --target=bpf -mcpu=v4 -O2 -S test3.c
  clang --target=bpf -mcpu=v4 -O2 -c test3.c
```
  The binary:
```
     /* For computed goto */
      13:       bf 42 20 00 00 00 00 00 r2 = (s32)r4                                                         
      14:       67 02 00 00 03 00 00 00 r2 <<= 0x3                                                           
      15:       18 01 00 00 00 00 00 00 00 00 00 00 00 00 00 00 r1 = 0x0 ll                                  
                0000000000000078:  R_BPF_64_64  BPF.JT.0.1                                                   
      17:       0f 21 00 00 00 00 00 00 r1 += r2                                                             
      18:       bf 32 20 00 00 00 00 00 r2 = (s32)r3                                                         
      19:       67 02 00 00 03 00 00 00 r2 <<= 0x3                                                           
      20:       18 03 00 00 00 00 00 00 00 00 00 00 00 00 00 00 r3 = 0x0 ll                                  
                00000000000000a0:  R_BPF_64_64  BPF.JT.0.2                                                   
      22:       0f 23 00 00 00 00 00 00 r3 += r2

      /* For switch statement */
      39:       67 01 00 00 03 00 00 00 r1 <<= 0x3
      40:       18 02 00 00 00 00 00 00 00 00 00 00 00 00 00 00 r2 = 0x0 ll
                0000000000000140:  R_BPF_64_64  BPF.JT.0.0
      42:       0f 12 00 00 00 00 00 00 r2 += r1
```
You can see jump table symbols are all different.
2025-09-16 09:27:08 -07:00

212 lines
7.2 KiB
C++

//===-- BPFTargetMachine.cpp - Define TargetMachine for BPF ---------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
// Implements the info about BPF target spec.
//
//===----------------------------------------------------------------------===//
#include "BPFTargetMachine.h"
#include "BPF.h"
#include "BPFTargetLoweringObjectFile.h"
#include "BPFTargetTransformInfo.h"
#include "MCTargetDesc/BPFMCAsmInfo.h"
#include "TargetInfo/BPFTargetInfo.h"
#include "llvm/CodeGen/GlobalISel/IRTranslator.h"
#include "llvm/CodeGen/GlobalISel/InstructionSelect.h"
#include "llvm/CodeGen/GlobalISel/Legalizer.h"
#include "llvm/CodeGen/GlobalISel/RegBankSelect.h"
#include "llvm/CodeGen/Passes.h"
#include "llvm/CodeGen/TargetLoweringObjectFileImpl.h"
#include "llvm/CodeGen/TargetPassConfig.h"
#include "llvm/IR/PassManager.h"
#include "llvm/InitializePasses.h"
#include "llvm/MC/TargetRegistry.h"
#include "llvm/Passes/PassBuilder.h"
#include "llvm/Support/Compiler.h"
#include "llvm/Target/TargetOptions.h"
#include "llvm/Transforms/Scalar.h"
#include "llvm/Transforms/Scalar/SimplifyCFG.h"
#include "llvm/Transforms/Utils/SimplifyCFGOptions.h"
#include <optional>
using namespace llvm;
static cl::
opt<bool> DisableMIPeephole("disable-bpf-peephole", cl::Hidden,
cl::desc("Disable machine peepholes for BPF"));
static cl::opt<bool>
DisableCheckUnreachable("bpf-disable-trap-unreachable", cl::Hidden,
cl::desc("Disable Trap Unreachable for BPF"));
extern "C" LLVM_ABI LLVM_EXTERNAL_VISIBILITY void LLVMInitializeBPFTarget() {
// Register the target.
RegisterTargetMachine<BPFTargetMachine> X(getTheBPFleTarget());
RegisterTargetMachine<BPFTargetMachine> Y(getTheBPFbeTarget());
RegisterTargetMachine<BPFTargetMachine> Z(getTheBPFTarget());
PassRegistry &PR = *PassRegistry::getPassRegistry();
initializeGlobalISel(PR);
initializeBPFAsmPrinterPass(PR);
initializeBPFCheckAndAdjustIRPass(PR);
initializeBPFMIPeepholePass(PR);
initializeBPFMIPreEmitPeepholePass(PR);
initializeBPFDAGToDAGISelLegacyPass(PR);
initializeBPFMISimplifyPatchablePass(PR);
initializeBPFMIPreEmitCheckingPass(PR);
}
static Reloc::Model getEffectiveRelocModel(std::optional<Reloc::Model> RM) {
return RM.value_or(Reloc::PIC_);
}
BPFTargetMachine::BPFTargetMachine(const Target &T, const Triple &TT,
StringRef CPU, StringRef FS,
const TargetOptions &Options,
std::optional<Reloc::Model> RM,
std::optional<CodeModel::Model> CM,
CodeGenOptLevel OL, bool JIT)
: CodeGenTargetMachineImpl(T, TT.computeDataLayout(), TT, CPU, FS, Options,
getEffectiveRelocModel(RM),
getEffectiveCodeModel(CM, CodeModel::Small), OL),
TLOF(std::make_unique<BPFTargetLoweringObjectFileELF>()),
Subtarget(TT, std::string(CPU), std::string(FS), *this) {
if (!DisableCheckUnreachable) {
this->Options.TrapUnreachable = true;
this->Options.NoTrapAfterNoreturn = true;
}
initAsmInfo();
BPFMCAsmInfo *MAI =
static_cast<BPFMCAsmInfo *>(const_cast<MCAsmInfo *>(AsmInfo.get()));
MAI->setDwarfUsesRelocationsAcrossSections(!Subtarget.getUseDwarfRIS());
}
namespace {
// BPF Code Generator Pass Configuration Options.
class BPFPassConfig : public TargetPassConfig {
public:
BPFPassConfig(BPFTargetMachine &TM, PassManagerBase &PM)
: TargetPassConfig(TM, PM) {}
BPFTargetMachine &getBPFTargetMachine() const {
return getTM<BPFTargetMachine>();
}
void addIRPasses() override;
bool addInstSelector() override;
void addMachineSSAOptimization() override;
void addPreEmitPass() override;
bool addIRTranslator() override;
bool addLegalizeMachineIR() override;
bool addRegBankSelect() override;
bool addGlobalInstructionSelect() override;
};
}
TargetPassConfig *BPFTargetMachine::createPassConfig(PassManagerBase &PM) {
return new BPFPassConfig(*this, PM);
}
static Expected<bool> parseBPFPreserveStaticOffsetOptions(StringRef Params) {
return PassBuilder::parseSinglePassOption(Params, "allow-partial",
"BPFPreserveStaticOffsetPass");
}
void BPFTargetMachine::registerPassBuilderCallbacks(PassBuilder &PB) {
#define GET_PASS_REGISTRY "BPFPassRegistry.def"
#include "llvm/Passes/TargetPassRegistry.inc"
PB.registerPipelineStartEPCallback(
[=](ModulePassManager &MPM, OptimizationLevel) {
FunctionPassManager FPM;
FPM.addPass(BPFPreserveStaticOffsetPass(true));
FPM.addPass(BPFAbstractMemberAccessPass(this));
FPM.addPass(BPFPreserveDITypePass());
FPM.addPass(BPFIRPeepholePass());
MPM.addPass(createModuleToFunctionPassAdaptor(std::move(FPM)));
});
PB.registerPeepholeEPCallback([=](FunctionPassManager &FPM,
OptimizationLevel Level) {
FPM.addPass(SimplifyCFGPass(SimplifyCFGOptions().hoistCommonInsts(true)));
FPM.addPass(BPFASpaceCastSimplifyPass());
});
PB.registerScalarOptimizerLateEPCallback(
[=](FunctionPassManager &FPM, OptimizationLevel Level) {
// Run this after loop unrolling but before
// SimplifyCFGPass(... .sinkCommonInsts(true))
FPM.addPass(BPFPreserveStaticOffsetPass(false));
});
PB.registerPipelineEarlySimplificationEPCallback(
[=](ModulePassManager &MPM, OptimizationLevel, ThinOrFullLTOPhase) {
MPM.addPass(BPFAdjustOptPass());
});
}
void BPFPassConfig::addIRPasses() {
addPass(createAtomicExpandLegacyPass());
addPass(createBPFCheckAndAdjustIR());
TargetPassConfig::addIRPasses();
}
TargetTransformInfo
BPFTargetMachine::getTargetTransformInfo(const Function &F) const {
return TargetTransformInfo(std::make_unique<BPFTTIImpl>(this, F));
}
// Install an instruction selector pass using
// the ISelDag to gen BPF code.
bool BPFPassConfig::addInstSelector() {
addPass(createBPFISelDag(getBPFTargetMachine()));
return false;
}
void BPFPassConfig::addMachineSSAOptimization() {
addPass(createBPFMISimplifyPatchablePass());
// The default implementation must be called first as we want eBPF
// Peephole ran at last.
TargetPassConfig::addMachineSSAOptimization();
const BPFSubtarget *Subtarget = getBPFTargetMachine().getSubtargetImpl();
if (!DisableMIPeephole) {
if (Subtarget->getHasAlu32())
addPass(createBPFMIPeepholePass());
}
}
void BPFPassConfig::addPreEmitPass() {
addPass(createBPFMIPreEmitCheckingPass());
if (getOptLevel() != CodeGenOptLevel::None)
if (!DisableMIPeephole)
addPass(createBPFMIPreEmitPeepholePass());
}
bool BPFPassConfig::addIRTranslator() {
addPass(new IRTranslator());
return false;
}
bool BPFPassConfig::addLegalizeMachineIR() {
addPass(new Legalizer());
return false;
}
bool BPFPassConfig::addRegBankSelect() {
addPass(new RegBankSelect());
return false;
}
bool BPFPassConfig::addGlobalInstructionSelect() {
addPass(new InstructionSelect(getOptLevel()));
return false;
}