From bf50489eeba2126c526c8d57f73448fa48429d3f Mon Sep 17 00:00:00 2001 From: Henry Jiang Date: Tue, 31 Mar 2026 16:27:58 -0700 Subject: [PATCH] [Psuedoprobe][MachO] Enable pseudo probes emission for MachO (#185758) Enable pseudo probes emission for MachO. Due to the 16 character limit of MachO segment and section, the file sections will be `__PSEUDO_PROBE,__probes` and `__PSEUDO_PROBE,__probe_descs`. --- .../CodeGen/TargetLoweringObjectFileImpl.cpp | 2 + llvm/lib/MC/MCObjectFileInfo.cpp | 10 ++ .../Target/AArch64/AArch64TargetMachine.cpp | 3 + llvm/test/CodeGen/AArch64/O0-pipeline.ll | 1 + llvm/test/CodeGen/AArch64/O3-pipeline.ll | 1 + .../SampleProfile/pseudo-probe-emit-macho.ll | 136 ++++++++++++++++++ 6 files changed, 153 insertions(+) create mode 100644 llvm/test/Transforms/SampleProfile/pseudo-probe-emit-macho.ll diff --git a/llvm/lib/CodeGen/TargetLoweringObjectFileImpl.cpp b/llvm/lib/CodeGen/TargetLoweringObjectFileImpl.cpp index 40ddbca84b11..da1e030c1222 100644 --- a/llvm/lib/CodeGen/TargetLoweringObjectFileImpl.cpp +++ b/llvm/lib/CodeGen/TargetLoweringObjectFileImpl.cpp @@ -1306,6 +1306,8 @@ void TargetLoweringObjectFileMachO::emitModuleMetadata(MCStreamer &Streamer, // Emit the linker options if present. emitLinkerDirectives(Streamer, M); + emitPseudoProbeDescMetadata(Streamer, M); + unsigned VersionVal = 0; unsigned ImageInfoFlags = 0; StringRef SectionVal; diff --git a/llvm/lib/MC/MCObjectFileInfo.cpp b/llvm/lib/MC/MCObjectFileInfo.cpp index f2917179e1fc..56a22241ae5d 100644 --- a/llvm/lib/MC/MCObjectFileInfo.cpp +++ b/llvm/lib/MC/MCObjectFileInfo.cpp @@ -26,6 +26,7 @@ #include "llvm/MC/MCSectionWasm.h" #include "llvm/MC/MCSectionXCOFF.h" #include "llvm/MC/MCSymbolGOFF.h" +#include "llvm/MC/SectionKind.h" #include "llvm/TargetParser/Triple.h" using namespace llvm; @@ -320,6 +321,15 @@ void MCObjectFileInfo::initMachOMCObjectFileInfo(const Triple &T) { RemarksSection = Ctx->getMachOSection( "__LLVM", "__remarks", MachO::S_ATTR_DEBUG, SectionKind::getMetadata()); + PseudoProbeSection = + Ctx->getMachOSection("__PSEUDO_PROBE", "__probes", + MachO::S_ATTR_DEBUG | MachO::S_ATTR_NO_DEAD_STRIP, + SectionKind::getMetadata()); + PseudoProbeDescSection = + Ctx->getMachOSection("__PSEUDO_PROBE", "__probe_descs", + MachO::S_ATTR_DEBUG | MachO::S_ATTR_NO_DEAD_STRIP, + SectionKind::getMetadata()); + // The architecture of dsymutil makes it very difficult to copy the Swift // reflection metadata sections into the __TEXT segment, so dsymutil creates // these sections in the __DWARF segment instead. diff --git a/llvm/lib/Target/AArch64/AArch64TargetMachine.cpp b/llvm/lib/Target/AArch64/AArch64TargetMachine.cpp index b8c2c96301b6..e491cbbde415 100644 --- a/llvm/lib/Target/AArch64/AArch64TargetMachine.cpp +++ b/llvm/lib/Target/AArch64/AArch64TargetMachine.cpp @@ -939,6 +939,9 @@ void AArch64PassConfig::addPostBBSections() { } void AArch64PassConfig::addPreEmitPass2() { + // Insert pseudo probe annotation for callsite profiling + addPass(createPseudoProbeInserter()); + // SVE bundles move prefixes with destructive operations. BLR_RVMARKER pseudo // instructions are lowered to bundles as well. addPass(createUnpackMachineBundlesLegacy(nullptr)); diff --git a/llvm/test/CodeGen/AArch64/O0-pipeline.ll b/llvm/test/CodeGen/AArch64/O0-pipeline.ll index 9f9e47865c1b..32dbe49df0c1 100644 --- a/llvm/test/CodeGen/AArch64/O0-pipeline.ll +++ b/llvm/test/CodeGen/AArch64/O0-pipeline.ll @@ -88,6 +88,7 @@ ; CHECK-NEXT: Lazy Machine Block Frequency Analysis ; CHECK-NEXT: Machine Optimization Remark Emitter ; CHECK-NEXT: Stack Frame Layout Analysis +; CHECK-NEXT: Pseudo Probe Inserter ; CHECK-NEXT: Unpack machine instruction bundles ; CHECK-NEXT: Lazy Machine Block Frequency Analysis ; CHECK-NEXT: Machine Optimization Remark Emitter diff --git a/llvm/test/CodeGen/AArch64/O3-pipeline.ll b/llvm/test/CodeGen/AArch64/O3-pipeline.ll index 374b0fe10c00..16b33376fa84 100644 --- a/llvm/test/CodeGen/AArch64/O3-pipeline.ll +++ b/llvm/test/CodeGen/AArch64/O3-pipeline.ll @@ -265,6 +265,7 @@ ; CHECK-NEXT: Lazy Machine Block Frequency Analysis ; CHECK-NEXT: Machine Optimization Remark Emitter ; CHECK-NEXT: Stack Frame Layout Analysis +; CHECK-NEXT: Pseudo Probe Inserter ; CHECK-NEXT: Unpack machine instruction bundles ; CHECK-NEXT: Lazy Machine Block Frequency Analysis ; CHECK-NEXT: Machine Optimization Remark Emitter diff --git a/llvm/test/Transforms/SampleProfile/pseudo-probe-emit-macho.ll b/llvm/test/Transforms/SampleProfile/pseudo-probe-emit-macho.ll new file mode 100644 index 000000000000..3a9bd93a98ad --- /dev/null +++ b/llvm/test/Transforms/SampleProfile/pseudo-probe-emit-macho.ll @@ -0,0 +1,136 @@ +; REQUIRES: aarch64-registered-target +; RUN: opt < %s -passes=pseudo-probe -S -o %t +; RUN: FileCheck %s < %t --check-prefix=CHECK-IL +; RUN: llc %t -mtriple=arm64-apple-darwin -stop-after=pseudo-probe-inserter -o - | FileCheck %s --check-prefix=CHECK-MIR + +; MachO +; RUN: llc %t -function-sections -mtriple=arm64-apple-darwin -filetype=asm -o %t1 +; RUN: FileCheck %s < %t1 --check-prefix=CHECK-ASM-MACHO +; RUN: llc %t -function-sections -mtriple=arm64-apple-darwin -filetype=obj -o %t2 +; RUN: llvm-readobj -Ss %t2 | FileCheck %s --check-prefix=CHECK-SEC-MACHO +; RUN: llvm-mc %t1 -triple=arm64-apple-darwin -filetype=obj -o %t3 +; RUN: llvm-readobj -Ss %t3 | FileCheck %s --check-prefix=CHECK-SEC-MACHO-MC + +@a = dso_local global i32 0, align 4 + +define void @foo(i32 %x) !dbg !3 { +bb0: + %cmp = icmp eq i32 %x, 0 +; CHECK-IL-LABEL: void @foo(i32 %x) !dbg ![[#]] { +; CHECK-IL: call void @llvm.pseudoprobe(i64 [[#GUID:]], i64 1, i32 0, i64 -1), !dbg ![[#FAKELINE:]] +; CHECK-MIR: PSEUDO_PROBE [[#GUID:]], 1, 0, 0 +; CHECK-ASM-MACHO: .pseudoprobe [[#GUID:]] 1 0 0 _foo + br i1 %cmp, label %bb1, label %bb2 + +bb1: +; CHECK-IL: call void @llvm.pseudoprobe(i64 [[#GUID:]], i64 2, i32 0, i64 -1), !dbg ![[#FAKELINE]] +; CHECK-MIR: PSEUDO_PROBE [[#GUID]], 3, 0, 0 +; CHECK-MIR: PSEUDO_PROBE [[#GUID]], 4, 0, 0 +; CHECK-ASM-MACHO: .pseudoprobe [[#GUID]] 3 0 0 _foo +; CHECK-ASM-MACHO: .pseudoprobe [[#GUID]] 4 0 0 _foo + store i32 6, ptr @a, align 4 + br label %bb3 + +bb2: +; CHECK-IL: call void @llvm.pseudoprobe(i64 [[#GUID:]], i64 3, i32 0, i64 -1), !dbg ![[#FAKELINE]] +; CHECK-MIR: PSEUDO_PROBE [[#GUID]], 2, 0, 0 +; CHECK-MIR: PSEUDO_PROBE [[#GUID]], 4, 0, 0 +; CHECK-ASM-MACHO: .pseudoprobe [[#GUID]] 2 0 0 _foo +; CHECK-ASM-MACHO: .pseudoprobe [[#GUID]] 4 0 0 _foo + store i32 8, ptr @a, align 4 + br label %bb3 + +bb3: +; CHECK-IL: call void @llvm.pseudoprobe(i64 [[#GUID]], i64 4, i32 0, i64 -1), !dbg ![[#REALLINE:]] + ret void, !dbg !12 +} + +declare void @bar(i32 %x) + +define internal void @foo2(ptr %f) !dbg !4 { +entry: +; CHECK-IL-LABEL: void @foo2(ptr %f) !dbg ![[#]] { +; CHECK-IL: call void @llvm.pseudoprobe(i64 [[#GUID2:]], i64 1, i32 0, i64 -1) +; CHECK-MIR: PSEUDO_PROBE [[#GUID2:]], 1, 0, 0 +; CHECK-ASM-MACHO: .pseudoprobe [[#GUID2:]] 1 0 0 _foo2 +; Check pseudo_probe metadata attached to the indirect call instruction. +; CHECK-IL: call void %f(i32 1), !dbg ![[#PROBE0:]] +; CHECK-MIR: PSEUDO_PROBE [[#GUID2]], 2, 1, 0 +; CHECK-ASM-MACHO: .pseudoprobe [[#GUID2]] 2 1 0 _foo2 + call void %f(i32 1), !dbg !13 +; Check pseudo_probe metadata attached to the direct call instruction. +; CHECK-IL: call void @bar(i32 1), !dbg ![[#PROBE1:]] +; CHECK-MIR: PSEUDO_PROBE [[#GUID2]], 3, 2, 0 +; CHECK-ASM-MACHO: .pseudoprobe [[#GUID2]] 3 2 0 _foo2 + call void @bar(i32 1) + ret void +} + +; CHECK-IL: Function Attrs: nocallback nofree nosync nounwind willreturn memory(inaccessiblemem: readwrite) +; CHECK-IL-NEXT: declare void @llvm.pseudoprobe(i64, i64, i32, i64) + +; CHECK-IL: ![[#FOO:]] = distinct !DISubprogram(name: "foo" +; CHECK-IL: ![[#FAKELINE]] = !DILocation(line: 0, scope: ![[#FOO]]) +; CHECK-IL: ![[#REALLINE]] = !DILocation(line: 2, scope: ![[#DISC0:]]) +; CHECK-IL: ![[#DISC0]] = !DILexicalBlockFile(scope: ![[#FOO]], file: ![[#]], discriminator: 0) +; CHECK-IL: ![[#PROBE0]] = !DILocation(line: 2, column: 20, scope: ![[#SCOPE0:]]) +;; A discriminator of 387973143 which is 0x17200017 in hexdecimal, stands for a direct call probe +;; with an index of 2. +; CHECK-IL: ![[#SCOPE0]] = !DILexicalBlockFile(scope: ![[#]], file: ![[#]], discriminator: 387973143) +; CHECK-IL: ![[#PROBE1]] = !DILocation(line: 0, scope: ![[#SCOPE1:]]) +;; A discriminator of 455082015 which is 0x1b20001f in hexdecimal, stands for a direct call probe +;; with an index of 3. +; CHECK-IL: ![[#SCOPE1]] = !DILexicalBlockFile(scope: ![[#]], file: ![[#]], discriminator: 455082015) + +; Check the generation of pseudo_probe_desc section for MachO +; CHECK-ASM-MACHO: .section __PSEUDO_PROBE,__probe_descs,regular,no_dead_strip+debug +; CHECK-ASM-MACHO-NEXT: .quad [[#GUID]] +; CHECK-ASM-MACHO-NEXT: .quad [[#HASH:]] +; CHECK-ASM-MACHO-NEXT: .byte 3 +; CHECK-ASM-MACHO-NEXT: .ascii "foo" +; CHECK-ASM-MACHO-NEXT: .quad [[#GUID2]] +; CHECK-ASM-MACHO-NEXT: .quad [[#HASH2:]] +; CHECK-ASM-MACHO-NEXT: .byte 4 +; CHECK-ASM-MACHO-NEXT: .ascii "foo2" + +; CHECK-SEC-MACHO-LABEL: Sections [ +; CHECK-SEC-MACHO: Name: __probe_descs +; CHECK-SEC-MACHO-NEXT: Segment: __PSEUDO_PROBE +; CHECK-SEC-MACHO: Attributes [ (0x120000) +; CHECK-SEC-MACHO-NEXT: Debug (0x20000) +; CHECK-SEC-MACHO-NEXT: NoDeadStrip (0x100000) +; CHECK-SEC-MACHO: Name: __probes +; CHECK-SEC-MACHO-NEXT: Segment: __PSEUDO_PROBE +; CHECK-SEC-MACHO: Attributes [ (0x120000) +; CHECK-SEC-MACHO-NEXT: Debug (0x20000) +; CHECK-SEC-MACHO-NEXT: NoDeadStrip (0x100000) + +; CHECK-SEC-MACHO-MC-LABEL: Sections [ +; CHECK-SEC-MACHO-MC: Name: __probe_descs +; CHECK-SEC-MACHO-MC-NEXT: Segment: __PSEUDO_PROBE +; CHECK-SEC-MACHO-MC: Attributes [ (0x120000) +; CHECK-SEC-MACHO-MC-NEXT: Debug (0x20000) +; CHECK-SEC-MACHO-MC-NEXT: NoDeadStrip (0x100000) +; CHECK-SEC-MACHO-MC: Name: __probes +; CHECK-SEC-MACHO-MC-NEXT: Segment: __PSEUDO_PROBE +; CHECK-SEC-MACHO-MC: Attributes [ (0x120000) +; CHECK-SEC-MACHO-MC-NEXT: Debug (0x20000) +; CHECK-SEC-MACHO-MC-NEXT: NoDeadStrip (0x100000) + +!llvm.dbg.cu = !{!0} +!llvm.module.flags = !{!9, !10} + +!0 = distinct !DICompileUnit(language: DW_LANG_C99, file: !1) +!1 = !DIFile(filename: "test.c", directory: "") +!2 = !{} +!3 = distinct !DISubprogram(name: "foo", scope: !1, file: !1, line: 1, type: !5, unit: !0, retainedNodes: !2) +!4 = distinct !DISubprogram(name: "foo2", scope: !1, file: !1, line: 2, type: !5, unit: !0, retainedNodes: !2) +!5 = !DISubroutineType(types: !6) +!6 = !{!7} +!7 = !DIBasicType(name: "int", size: 32, align: 32, encoding: DW_ATE_signed) +!9 = !{i32 2, !"Dwarf Version", i32 4} +!10 = !{i32 2, !"Debug Info Version", i32 3} +!11 = !{!"clang version 3.9.0"} +!12 = !DILocation(line: 2, scope: !14) +!13 = !DILocation(line: 2, column: 20, scope: !4) +!14 = !DILexicalBlockFile(scope: !3, file: !1, discriminator: 1)