
As discussed in [1], introduce BPF instructions with load-acquire and store-release semantics under -mcpu=v4. Define 2 new flags: BPF_LOAD_ACQ 0x100 BPF_STORE_REL 0x110 A "load-acquire" is a BPF_STX | BPF_ATOMIC instruction with the 'imm' field set to BPF_LOAD_ACQ (0x100). Similarly, a "store-release" is a BPF_STX | BPF_ATOMIC instruction with the 'imm' field set to BPF_STORE_REL (0x110). Unlike existing atomic read-modify-write operations that only support BPF_W (32-bit) and BPF_DW (64-bit) size modifiers, load-acquires and store-releases also support BPF_B (8-bit) and BPF_H (16-bit). An 8- or 16-bit load-acquire zero-extends the value before writing it to a 32-bit register, just like ARM64 instruction LDAPRH and friends. As an example (assuming little-endian): long foo(long *ptr) { return __atomic_load_n(ptr, __ATOMIC_ACQUIRE); } foo() can be compiled to: db 10 00 00 00 01 00 00 r0 = load_acquire((u64 *)(r1 + 0x0)) 95 00 00 00 00 00 00 00 exit opcode (0xdb): BPF_ATOMIC | BPF_DW | BPF_STX imm (0x00000100): BPF_LOAD_ACQ Similarly: void bar(short *ptr, short val) { __atomic_store_n(ptr, val, __ATOMIC_RELEASE); } bar() can be compiled to: cb 21 00 00 10 01 00 00 store_release((u16 *)(r1 + 0x0), w2) 95 00 00 00 00 00 00 00 exit opcode (0xcb): BPF_ATOMIC | BPF_H | BPF_STX imm (0x00000110): BPF_STORE_REL Inline assembly is also supported. Add a pre-defined macro, __BPF_FEATURE_LOAD_ACQ_STORE_REL, to let developers detect this new feature. It can also be disabled using a new llc option, -disable-load-acq-store-rel. Using __ATOMIC_RELAXED for __atomic_store{,_n}() will generate a "plain" store (BPF_MEM | BPF_STX) instruction: void foo(short *ptr, short val) { __atomic_store_n(ptr, val, __ATOMIC_RELAXED); } 6b 21 00 00 00 00 00 00 *(u16 *)(r1 + 0x0) = w2 95 00 00 00 00 00 00 00 exit Similarly, using __ATOMIC_RELAXED for __atomic_load{,_n}() will generate a zero-extending, "plain" load (BPF_MEM | BPF_LDX) instruction: int foo(char *ptr) { return __atomic_load_n(ptr, __ATOMIC_RELAXED); } 71 11 00 00 00 00 00 00 w1 = *(u8 *)(r1 + 0x0) bc 10 08 00 00 00 00 00 w0 = (s8)w1 95 00 00 00 00 00 00 00 exit Currently __ATOMIC_CONSUME is an alias for __ATOMIC_ACQUIRE. Using __ATOMIC_SEQ_CST ("sequentially consistent") is not supported yet and will cause an error: $ clang --target=bpf -mcpu=v4 -c bar.c > /dev/null bar.c:1:5: error: sequentially consistent (seq_cst) atomic load/store is not supported 1 | int foo(int *ptr) { return __atomic_load_n(ptr, __ATOMIC_SEQ_CST); } | ^ ... Finally, rename those isST*() and isLD*() helper functions in BPFMISimplifyPatchable.cpp based on what the instructions actually do, rather than their instruction class. [1] https://lore.kernel.org/all/20240729183246.4110549-1-yepeilin@google.com/
143 lines
6.5 KiB
LLVM
143 lines
6.5 KiB
LLVM
; RUN: llc < %s -march=bpfel -mcpu=v4 -verify-machineinstrs -show-mc-encoding \
|
|
; RUN: | FileCheck -check-prefixes=CHECK-LE %s
|
|
; RUN: llc < %s -march=bpfeb -mcpu=v4 -verify-machineinstrs -show-mc-encoding \
|
|
; RUN: | FileCheck -check-prefixes=CHECK-BE %s
|
|
|
|
; Source:
|
|
; void atomic_load_i8(char *p) {
|
|
; (void)__atomic_load_n(p, __ATOMIC_RELAXED);
|
|
; (void)__atomic_load_n(p, __ATOMIC_ACQUIRE);
|
|
; }
|
|
; void atomic_load_i16(short *p) {
|
|
; (void)__atomic_load_n(p, __ATOMIC_RELAXED);
|
|
; (void)__atomic_load_n(p, __ATOMIC_ACQUIRE);
|
|
; }
|
|
; void atomic_load_i32(int *p) {
|
|
; (void)__atomic_load_n(p, __ATOMIC_RELAXED);
|
|
; (void)__atomic_load_n(p, __ATOMIC_ACQUIRE);
|
|
; }
|
|
; void atomic_load_i64(long *p) {
|
|
; (void)__atomic_load_n(p, __ATOMIC_RELAXED);
|
|
; (void)__atomic_load_n(p, __ATOMIC_ACQUIRE);
|
|
; }
|
|
; void atomic_store_i8(char *p, char v) {
|
|
; __atomic_store_n(p, v, __ATOMIC_RELAXED);
|
|
; __atomic_store_n(p, v, __ATOMIC_RELEASE);
|
|
; }
|
|
; void atomic_store_i16(short *p, short v) {
|
|
; __atomic_store_n(p, v, __ATOMIC_RELAXED);
|
|
; __atomic_store_n(p, v, __ATOMIC_RELEASE);
|
|
; }
|
|
; void atomic_store_i32(int *p, int v) {
|
|
; __atomic_store_n(p, v, __ATOMIC_RELAXED);
|
|
; __atomic_store_n(p, v, __ATOMIC_RELEASE);
|
|
; }
|
|
; void atomic_store_i64(long *p, long v) {
|
|
; __atomic_store_n(p, v, __ATOMIC_RELAXED);
|
|
; __atomic_store_n(p, v, __ATOMIC_RELEASE);
|
|
; }
|
|
|
|
define dso_local void @atomic_load_i8(ptr nocapture noundef readonly %p) local_unnamed_addr #0 {
|
|
; CHECK-LABEL: atomic_load_i8
|
|
; CHECK-LE: w2 = *(u8 *)(r1 + 0) # encoding: [0x71,0x12,0x00,0x00,0x00,0x00,0x00,0x00]
|
|
; CHECK-LE-NEXT: w1 = load_acquire((u8 *)(r1 + 0)) # encoding: [0xd3,0x11,0x00,0x00,0x00,0x01,0x00,0x00]
|
|
;
|
|
; CHECK-BE: w2 = *(u8 *)(r1 + 0) # encoding: [0x71,0x21,0x00,0x00,0x00,0x00,0x00,0x00]
|
|
; CHECK-BE-NEXT: w1 = load_acquire((u8 *)(r1 + 0)) # encoding: [0xd3,0x11,0x00,0x00,0x00,0x00,0x01,0x00]
|
|
entry:
|
|
%0 = load atomic i8, ptr %p monotonic, align 1
|
|
%1 = load atomic i8, ptr %p acquire, align 1
|
|
ret void
|
|
}
|
|
|
|
define dso_local void @atomic_load_i16(ptr nocapture noundef readonly %p) local_unnamed_addr #0 {
|
|
; CHECK-LABEL: atomic_load_i16
|
|
; CHECK-LE: w2 = *(u16 *)(r1 + 0) # encoding: [0x69,0x12,0x00,0x00,0x00,0x00,0x00,0x00]
|
|
; CHECK-LE-NEXT: w1 = load_acquire((u16 *)(r1 + 0)) # encoding: [0xcb,0x11,0x00,0x00,0x00,0x01,0x00,0x00]
|
|
;
|
|
; CHECK-BE: w2 = *(u16 *)(r1 + 0) # encoding: [0x69,0x21,0x00,0x00,0x00,0x00,0x00,0x00]
|
|
; CHECK-BE-NEXT: w1 = load_acquire((u16 *)(r1 + 0)) # encoding: [0xcb,0x11,0x00,0x00,0x00,0x00,0x01,0x00]
|
|
entry:
|
|
%0 = load atomic i16, ptr %p monotonic, align 2
|
|
%1 = load atomic i16, ptr %p acquire, align 2
|
|
ret void
|
|
}
|
|
|
|
define dso_local void @atomic_load_i32(ptr nocapture noundef readonly %p) local_unnamed_addr #0 {
|
|
; CHECK-LABEL: atomic_load_i32
|
|
; CHECK-LE: w2 = *(u32 *)(r1 + 0) # encoding: [0x61,0x12,0x00,0x00,0x00,0x00,0x00,0x00]
|
|
; CHECK-LE-NEXT: w1 = load_acquire((u32 *)(r1 + 0)) # encoding: [0xc3,0x11,0x00,0x00,0x00,0x01,0x00,0x00]
|
|
;
|
|
; CHECK-BE: w2 = *(u32 *)(r1 + 0) # encoding: [0x61,0x21,0x00,0x00,0x00,0x00,0x00,0x00]
|
|
; CHECK-BE-NEXT: w1 = load_acquire((u32 *)(r1 + 0)) # encoding: [0xc3,0x11,0x00,0x00,0x00,0x00,0x01,0x00]
|
|
entry:
|
|
%0 = load atomic i32, ptr %p monotonic, align 4
|
|
%1 = load atomic i32, ptr %p acquire, align 4
|
|
ret void
|
|
}
|
|
|
|
define dso_local void @atomic_load_i64(ptr nocapture noundef readonly %p) local_unnamed_addr #0 {
|
|
; CHECK-LABEL: atomic_load_i64
|
|
; CHECK-LE: r2 = *(u64 *)(r1 + 0) # encoding: [0x79,0x12,0x00,0x00,0x00,0x00,0x00,0x00]
|
|
; CHECK-LE-NEXT: r1 = load_acquire((u64 *)(r1 + 0)) # encoding: [0xdb,0x11,0x00,0x00,0x00,0x01,0x00,0x00]
|
|
;
|
|
; CHECK-BE: r2 = *(u64 *)(r1 + 0) # encoding: [0x79,0x21,0x00,0x00,0x00,0x00,0x00,0x00]
|
|
; CHECK-BE-NEXT: r1 = load_acquire((u64 *)(r1 + 0)) # encoding: [0xdb,0x11,0x00,0x00,0x00,0x00,0x01,0x00]
|
|
entry:
|
|
%0 = load atomic i64, ptr %p monotonic, align 8
|
|
%1 = load atomic i64, ptr %p acquire, align 8
|
|
ret void
|
|
}
|
|
|
|
define dso_local void @atomic_store_i8(ptr nocapture noundef writeonly %p, i8 noundef signext %v) local_unnamed_addr #0 {
|
|
; CHECK-LABEL: atomic_store_i8
|
|
; CHECK-LE: *(u8 *)(r1 + 0) = w2 # encoding: [0x73,0x21,0x00,0x00,0x00,0x00,0x00,0x00]
|
|
; CHECK-LE-NEXT: store_release((u8 *)(r1 + 0), w2) # encoding: [0xd3,0x21,0x00,0x00,0x10,0x01,0x00,0x00]
|
|
;
|
|
; CHECK-BE: *(u8 *)(r1 + 0) = w2 # encoding: [0x73,0x12,0x00,0x00,0x00,0x00,0x00,0x00]
|
|
; CHECK-BE-NEXT: store_release((u8 *)(r1 + 0), w2) # encoding: [0xd3,0x12,0x00,0x00,0x00,0x00,0x01,0x10]
|
|
entry:
|
|
store atomic i8 %v, ptr %p monotonic, align 1
|
|
store atomic i8 %v, ptr %p release, align 1
|
|
ret void
|
|
}
|
|
|
|
define dso_local void @atomic_store_i16(ptr nocapture noundef writeonly %p, i16 noundef signext %v) local_unnamed_addr #0 {
|
|
; CHECK-LABEL: atomic_store_i16
|
|
; CHECK-LE: *(u16 *)(r1 + 0) = w2 # encoding: [0x6b,0x21,0x00,0x00,0x00,0x00,0x00,0x00]
|
|
; CHECK-LE-NEXT: store_release((u16 *)(r1 + 0), w2) # encoding: [0xcb,0x21,0x00,0x00,0x10,0x01,0x00,0x00]
|
|
;
|
|
; CHECK-BE: *(u16 *)(r1 + 0) = w2 # encoding: [0x6b,0x12,0x00,0x00,0x00,0x00,0x00,0x00]
|
|
; CHECK-BE-NEXT: store_release((u16 *)(r1 + 0), w2) # encoding: [0xcb,0x12,0x00,0x00,0x00,0x00,0x01,0x10]
|
|
entry:
|
|
store atomic i16 %v, ptr %p monotonic, align 2
|
|
store atomic i16 %v, ptr %p release, align 2
|
|
ret void
|
|
}
|
|
|
|
define dso_local void @atomic_store_i32(ptr nocapture noundef writeonly %p, i32 noundef %v) local_unnamed_addr #0 {
|
|
; CHECK-LABEL: atomic_store_i32
|
|
; CHECK-LE: *(u32 *)(r1 + 0) = w2 # encoding: [0x63,0x21,0x00,0x00,0x00,0x00,0x00,0x00]
|
|
; CHECK-LE-NEXT: store_release((u32 *)(r1 + 0), w2) # encoding: [0xc3,0x21,0x00,0x00,0x10,0x01,0x00,0x00]
|
|
;
|
|
; CHECK-BE: *(u32 *)(r1 + 0) = w2 # encoding: [0x63,0x12,0x00,0x00,0x00,0x00,0x00,0x00]
|
|
; CHECK-BE-NEXT: store_release((u32 *)(r1 + 0), w2) # encoding: [0xc3,0x12,0x00,0x00,0x00,0x00,0x01,0x10]
|
|
entry:
|
|
store atomic i32 %v, ptr %p monotonic, align 4
|
|
store atomic i32 %v, ptr %p release, align 4
|
|
ret void
|
|
}
|
|
|
|
define dso_local void @atomic_store_i64(ptr nocapture noundef writeonly %p, i64 noundef %v) local_unnamed_addr #0 {
|
|
; CHECK-LABEL: atomic_store_i64
|
|
; CHECK-LE: *(u64 *)(r1 + 0) = r2 # encoding: [0x7b,0x21,0x00,0x00,0x00,0x00,0x00,0x00]
|
|
; CHECK-LE-NEXT: store_release((u64 *)(r1 + 0), r2) # encoding: [0xdb,0x21,0x00,0x00,0x10,0x01,0x00,0x00]
|
|
;
|
|
; CHECK-BE: *(u64 *)(r1 + 0) = r2 # encoding: [0x7b,0x12,0x00,0x00,0x00,0x00,0x00,0x00]
|
|
; CHECK-BE-NEXT: store_release((u64 *)(r1 + 0), r2) # encoding: [0xdb,0x12,0x00,0x00,0x00,0x00,0x01,0x10]
|
|
entry:
|
|
store atomic i64 %v, ptr %p monotonic, align 8
|
|
store atomic i64 %v, ptr %p release, align 8
|
|
ret void
|
|
}
|