
Three commits in this pull request: commit 1: implement pattern matching for memory ordering seq_cst, acq_rel, release, acquire and monotonic. Specially, for monotonic memory ordering (relaxed memory model), if no return value is used, locked insn is used. commit 2: add support to handle dwarf atomic modifier in BTF generation. Actually atomic modifier is ignored in BTF. commit 3: add tests for new atomic ordering support and BTF support with _Atomic type. I removed RFC tag as now patch sets are in reasonable states. For atomic fetch_and_*() operations, do pattern matching with memory ordering seq_cst, acq_rel, release, acquire and monotonic (relaxed). For fetch_and_*() operations with seq_cst/acq_rel/release/acquire ordering, atomic_fetch_*() instructions are generated. For monotonic ordering, locked insns are generated if return value is not used. Otherwise, atomic_fetch_*() insns are used. The main motivation is to resolve the kernel issue [1]. The following are memory ordering are supported: seq_cst, acq_rel, release, acquire, relaxed Current gcc style __sync_fetch_and_*() operations are all seq_cst. To use explicit memory ordering, the _Atomic type is needed. The following is an example: ``` $ cat test.c \#include <stdatomic.h> void f1(_Atomic int *i) { (void)__c11_atomic_fetch_and(i, 10, memory_order_relaxed); } void f2(_Atomic int *i) { (void)__c11_atomic_fetch_and(i, 10, memory_order_acquire); } void f3(_Atomic int *i) { (void)__c11_atomic_fetch_and(i, 10, memory_order_seq_cst); } $ cat run.sh clang -I/home/yhs/work/bpf-next/tools/testing/selftests/bpf -O2 --target=bpf -c test.c -o test.o && llvm-objdum p -d test.o $ ./run.sh test.o: file format elf64-bpf Disassembly of section .text: 0000000000000000 <f1>: 0: b4 02 00 00 0a 00 00 00 w2 = 0xa 1: c3 21 00 00 50 00 00 00 lock *(u32 *)(r1 + 0x0) &= w2 2: 95 00 00 00 00 00 00 00 exit 0000000000000018 <f2>: 3: b4 02 00 00 0a 00 00 00 w2 = 0xa 4: c3 21 00 00 51 00 00 00 w2 = atomic_fetch_and((u32 *)(r1 + 0x0), w2) 5: 95 00 00 00 00 00 00 00 exit 0000000000000030 <f3>: 6: b4 02 00 00 0a 00 00 00 w2 = 0xa 7: c3 21 00 00 51 00 00 00 w2 = atomic_fetch_and((u32 *)(r1 + 0x0), w2) 8: 95 00 00 00 00 00 00 00 exit ``` The following is another example where return value is used: ``` $ cat test1.c \#include <stdatomic.h> int f1(_Atomic int *i) { return __c11_atomic_fetch_and(i, 10, memory_order_relaxed); } int f2(_Atomic int *i) { return __c11_atomic_fetch_and(i, 10, memory_order_acquire); } int f3(_Atomic int *i) { return __c11_atomic_fetch_and(i, 10, memory_order_seq_cst); } $ cat run.sh clang -I/home/yhs/work/bpf-next/tools/testing/selftests/bpf -O2 --target=bpf -c test1.c -o test1.o && llvm-objdump -d test1.o $ ./run.sh test.o: file format elf64-bpf Disassembly of section .text: 0000000000000000 <f1>: 0: b4 00 00 00 0a 00 00 00 w0 = 0xa 1: c3 01 00 00 51 00 00 00 w0 = atomic_fetch_and((u32 *)(r1 + 0x0), w0) 2: 95 00 00 00 00 00 00 00 exit 0000000000000018 <f2>: 3: b4 00 00 00 0a 00 00 00 w0 = 0xa 4: c3 01 00 00 51 00 00 00 w0 = atomic_fetch_and((u32 *)(r1 + 0x0), w0) 5: 95 00 00 00 00 00 00 00 exit 0000000000000030 <f3>: 6: b4 00 00 00 0a 00 00 00 w0 = 0xa 7: c3 01 00 00 51 00 00 00 w0 = atomic_fetch_and((u32 *)(r1 + 0x0), w0) 8: 95 00 00 00 00 00 00 00 exit ``` You can see that for relaxed memory ordering, if return value is used, atomic_fetch_and() insn is used. Otherwise, if return value is not used, locked insn is used. Here is another example with global _Atomic variable: ``` $ cat test3.c \#include <stdatomic.h> _Atomic int i; void f1(void) { (void)__c11_atomic_fetch_and(&i, 10, memory_order_relaxed); } void f2(void) { (void)__c11_atomic_fetch_and(&i, 10, memory_order_seq_cst); } $ cat run.sh clang -I/home/yhs/work/bpf-next/tools/testing/selftests/bpf -O2 --target=bpf -c test3.c -o test3.o && llvm-objdump -d test3.o $ ./run.sh test3.o: file format elf64-bpf Disassembly of section .text: 0000000000000000 <f1>: 0: b4 01 00 00 0a 00 00 00 w1 = 0xa 1: 18 02 00 00 00 00 00 00 00 00 00 00 00 00 00 00 r2 = 0x0 ll 3: c3 12 00 00 50 00 00 00 lock *(u32 *)(r2 + 0x0) &= w1 4: 95 00 00 00 00 00 00 00 exit 0000000000000028 <f2>: 5: b4 01 00 00 0a 00 00 00 w1 = 0xa 6: 18 02 00 00 00 00 00 00 00 00 00 00 00 00 00 00 r2 = 0x0 ll 8: c3 12 00 00 51 00 00 00 w1 = atomic_fetch_and((u32 *)(r2 + 0x0), w1) 9: 95 00 00 00 00 00 00 00 exit ``` Note that in the above compilations, '-g' is not used. The reason is due to the following IR related to _Atomic type: ``` $clang -I/home/yhs/work/bpf-next/tools/testing/selftests/bpf -O2 --target=bpf -g -S -emit-llvm test3.c ``` The related debug info for test3.c: ``` !0 = !DIGlobalVariableExpression(var: !1, expr: !DIExpression()) !1 = distinct !DIGlobalVariable(name: "i", scope: !2, file: !3, line: 3, type: !16, isLocal: false, isDefinition: true) ... !16 = !DIDerivedType(tag: DW_TAG_atomic_type, baseType: !17) !17 = !DIBasicType(name: "int", size: 32, encoding: DW_ATE_signed) ``` If compiling test.c, the related debug info: ``` ... !19 = distinct !DISubprogram(name: "f1", scope: !1, file: !1, line: 3, type: !20, scopeLine: 3, flags: DIFlagPrototyped | DIFlagAllCallsDescribed, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !0, retainedNodes: !25) !20 = !DISubroutineType(types: !21) !21 = !{null, !22} !22 = !DIDerivedType(tag: DW_TAG_pointer_type, baseType: !23, size: 64) !23 = !DIDerivedType(tag: DW_TAG_atomic_type, baseType: !24) !24 = !DIBasicType(name: "int", size: 32, encoding: DW_ATE_signed) !25 = !{!26} !26 = !DILocalVariable(name: "i", arg: 1, scope: !19, file: !1, line: 3, type: !22) ``` All the above suggests _Atomic behaves like a modifier (e.g. const, restrict, volatile). This seems true based on doc [1]. Without proper handling DW_TAG_atomic_type, llvm BTF generation will be incorrect since the current implementation assumes no existence of DW_TAG_atomic_type. So we have two choices here: (1). llvm bpf backend processes DW_TAG_atomic_type but ignores it in BTF encoding. (2). Add another type, e.g., BTF_KIND_ATOMIC to BTF. BTF_KIND_ATOMIC behaves as a modifier like const/volatile/restrict. For choice (1), llvm bpf backend should skip dwarf::DW_TAG_atomic_type during BTF generation whenever necessary. For choice (2), BTF_KIND_ATOMIC will be added to BTF so llvm backend and kernel needs to handle that properly. The main advantage of it probably is to maintain this atomic type so it is also available to skeleton. But I think for skeleton a raw type might be good enough unless user space intends to do some atomic operation with that, which is a unlikely case. So I choose choice (1) in this RFC implementation. See the commit message of the second commit for details. [1] https://lore.kernel.org/bpf/7b941f53-2a05-48ec-9032-8f106face3a3@linux.dev/ [2] https://dwarfstd.org/issues/131112.1.html ---------
186 lines
6.4 KiB
C++
186 lines
6.4 KiB
C++
//===-------------- BPFMIChecking.cpp - MI Checking Legality -------------===//
|
|
//
|
|
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
|
|
// See https://llvm.org/LICENSE.txt for license information.
|
|
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
|
|
//
|
|
//===----------------------------------------------------------------------===//
|
|
//
|
|
// This pass performs checking to signal errors for certain illegal usages at
|
|
// MachineInstruction layer. Specially, the result of XADD{32,64} insn should
|
|
// not be used. The pass is done at the PreEmit pass right before the
|
|
// machine code is emitted at which point the register liveness information
|
|
// is still available.
|
|
//
|
|
//===----------------------------------------------------------------------===//
|
|
|
|
#include "BPF.h"
|
|
#include "BPFInstrInfo.h"
|
|
#include "BPFTargetMachine.h"
|
|
#include "llvm/CodeGen/MachineFunctionPass.h"
|
|
#include "llvm/CodeGen/MachineInstrBuilder.h"
|
|
#include "llvm/CodeGen/MachineRegisterInfo.h"
|
|
#include "llvm/IR/DiagnosticInfo.h"
|
|
#include "llvm/Support/Debug.h"
|
|
|
|
using namespace llvm;
|
|
|
|
#define DEBUG_TYPE "bpf-mi-checking"
|
|
|
|
namespace {
|
|
|
|
struct BPFMIPreEmitChecking : public MachineFunctionPass {
|
|
|
|
static char ID;
|
|
MachineFunction *MF;
|
|
const TargetRegisterInfo *TRI;
|
|
|
|
BPFMIPreEmitChecking() : MachineFunctionPass(ID) {
|
|
initializeBPFMIPreEmitCheckingPass(*PassRegistry::getPassRegistry());
|
|
}
|
|
|
|
private:
|
|
// Initialize class variables.
|
|
void initialize(MachineFunction &MFParm);
|
|
|
|
void processAtomicInsts();
|
|
|
|
public:
|
|
// Main entry point for this pass.
|
|
bool runOnMachineFunction(MachineFunction &MF) override {
|
|
if (!skipFunction(MF.getFunction())) {
|
|
initialize(MF);
|
|
processAtomicInsts();
|
|
}
|
|
return false;
|
|
}
|
|
};
|
|
|
|
// Initialize class variables.
|
|
void BPFMIPreEmitChecking::initialize(MachineFunction &MFParm) {
|
|
MF = &MFParm;
|
|
TRI = MF->getSubtarget<BPFSubtarget>().getRegisterInfo();
|
|
LLVM_DEBUG(dbgs() << "*** BPF PreEmit checking pass ***\n\n");
|
|
}
|
|
|
|
// Make sure all Defs of XADD are dead, meaning any result of XADD insn is not
|
|
// used.
|
|
//
|
|
// NOTE: BPF backend hasn't enabled sub-register liveness track, so when the
|
|
// source and destination operands of XADD are GPR32, there is no sub-register
|
|
// dead info. If we rely on the generic MachineInstr::allDefsAreDead, then we
|
|
// will raise false alarm on GPR32 Def.
|
|
//
|
|
// To support GPR32 Def, ideally we could just enable sub-registr liveness track
|
|
// on BPF backend, then allDefsAreDead could work on GPR32 Def. This requires
|
|
// implementing TargetSubtargetInfo::enableSubRegLiveness on BPF.
|
|
//
|
|
// However, sub-register liveness tracking module inside LLVM is actually
|
|
// designed for the situation where one register could be split into more than
|
|
// one sub-registers for which case each sub-register could have their own
|
|
// liveness and kill one of them doesn't kill others. So, tracking liveness for
|
|
// each make sense.
|
|
//
|
|
// For BPF, each 64-bit register could only have one 32-bit sub-register. This
|
|
// is exactly the case which LLVM think brings no benefits for doing
|
|
// sub-register tracking, because the live range of sub-register must always
|
|
// equal to its parent register, therefore liveness tracking is disabled even
|
|
// the back-end has implemented enableSubRegLiveness. The detailed information
|
|
// is at r232695:
|
|
//
|
|
// Author: Matthias Braun <matze@braunis.de>
|
|
// Date: Thu Mar 19 00:21:58 2015 +0000
|
|
// Do not track subregister liveness when it brings no benefits
|
|
//
|
|
// Hence, for BPF, we enhance MachineInstr::allDefsAreDead. Given the solo
|
|
// sub-register always has the same liveness as its parent register, LLVM is
|
|
// already attaching a implicit 64-bit register Def whenever the there is
|
|
// a sub-register Def. The liveness of the implicit 64-bit Def is available.
|
|
// For example, for "lock *(u32 *)(r0 + 4) += w9", the MachineOperand info could
|
|
// be:
|
|
//
|
|
// $w9 = XADDW32 killed $r0, 4, $w9(tied-def 0),
|
|
// implicit killed $r9, implicit-def dead $r9
|
|
//
|
|
// Even though w9 is not marked as Dead, the parent register r9 is marked as
|
|
// Dead correctly, and it is safe to use such information or our purpose.
|
|
static bool hasLiveDefs(const MachineInstr &MI, const TargetRegisterInfo *TRI) {
|
|
const MCRegisterClass *GPR64RegClass =
|
|
&BPFMCRegisterClasses[BPF::GPRRegClassID];
|
|
std::vector<unsigned> GPR32LiveDefs;
|
|
std::vector<unsigned> GPR64DeadDefs;
|
|
|
|
for (const MachineOperand &MO : MI.operands()) {
|
|
bool RegIsGPR64;
|
|
|
|
if (!MO.isReg() || MO.isUse())
|
|
continue;
|
|
|
|
RegIsGPR64 = GPR64RegClass->contains(MO.getReg());
|
|
if (!MO.isDead()) {
|
|
// It is a GPR64 live Def, we are sure it is live.
|
|
if (RegIsGPR64)
|
|
return true;
|
|
// It is a GPR32 live Def, we are unsure whether it is really dead due to
|
|
// no sub-register liveness tracking. Push it to vector for deferred
|
|
// check.
|
|
GPR32LiveDefs.push_back(MO.getReg());
|
|
continue;
|
|
}
|
|
|
|
// Record any GPR64 dead Def as some unmarked GPR32 could be alias of its
|
|
// low 32-bit.
|
|
if (RegIsGPR64)
|
|
GPR64DeadDefs.push_back(MO.getReg());
|
|
}
|
|
|
|
// No GPR32 live Def, safe to return false.
|
|
if (GPR32LiveDefs.empty())
|
|
return false;
|
|
|
|
// No GPR64 dead Def, so all those GPR32 live Def can't have alias, therefore
|
|
// must be truely live, safe to return true.
|
|
if (GPR64DeadDefs.empty())
|
|
return true;
|
|
|
|
// Otherwise, return true if any aliased SuperReg of GPR32 is not dead.
|
|
for (auto I : GPR32LiveDefs)
|
|
for (MCPhysReg SR : TRI->superregs(I))
|
|
if (!llvm::is_contained(GPR64DeadDefs, SR))
|
|
return true;
|
|
|
|
return false;
|
|
}
|
|
|
|
void BPFMIPreEmitChecking::processAtomicInsts() {
|
|
if (MF->getSubtarget<BPFSubtarget>().getHasJmp32())
|
|
return;
|
|
|
|
// Only check for cpu version 1 and 2.
|
|
for (MachineBasicBlock &MBB : *MF) {
|
|
for (MachineInstr &MI : MBB) {
|
|
if (MI.getOpcode() != BPF::XADDW && MI.getOpcode() != BPF::XADDD)
|
|
continue;
|
|
|
|
LLVM_DEBUG(MI.dump());
|
|
if (hasLiveDefs(MI, TRI)) {
|
|
DebugLoc Empty;
|
|
const DebugLoc &DL = MI.getDebugLoc();
|
|
const Function &F = MF->getFunction();
|
|
F.getContext().diagnose(DiagnosticInfoUnsupported{
|
|
F, "Invalid usage of the XADD return value", DL});
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
} // namespace
|
|
|
|
INITIALIZE_PASS(BPFMIPreEmitChecking, "bpf-mi-pemit-checking",
|
|
"BPF PreEmit Checking", false, false)
|
|
|
|
char BPFMIPreEmitChecking::ID = 0;
|
|
FunctionPass *llvm::createBPFMIPreEmitCheckingPass() {
|
|
return new BPFMIPreEmitChecking();
|
|
}
|