diff --git a/CMakeLists.txt b/CMakeLists.txt index e7de4f2f..4cb71bdd 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -78,6 +78,7 @@ set_option(TRACY_NO_VERIFY "Disable zone validation for C API" OFF) set_option(TRACY_NO_VSYNC_CAPTURE "Disable capture of hardware Vsync events" OFF) set_option(TRACY_NO_FRAME_IMAGE "Disable the frame image support and its thread" OFF) set_option(TRACY_NO_SYSTEM_TRACING "Disable systrace sampling" OFF) +set_option(TRACY_PATCHABLE_NOPSLEDS "Enable nopsleds for efficient patching by system-level tools (e.g. rr)" OFF) set_option(TRACY_DELAYED_INIT "Enable delayed initialization of the library (init on first call)" OFF) set_option(TRACY_MANUAL_LIFETIME "Enable the manual lifetime management of the profile" OFF) set_option(TRACY_FIBERS "Enable fibers support" OFF) diff --git a/meson.build b/meson.build index c7e443fa..eee37817 100644 --- a/meson.build +++ b/meson.build @@ -64,6 +64,10 @@ if get_option('tracy_no_system_tracing') add_project_arguments('-DTRACY_NO_SYSTEM_TRACING', language : 'cpp') endif +if get_option('tracy_no_extra_nopsleds') + add_project_arguments('-DTRACY_PATCHABLE_NOPSLEDS', language : 'cpp') +endif + if get_option('tracy_delayed_init') add_project_arguments('-DTRACY_DELAYED_INIT', language : 'cpp') endif diff --git a/public/client/TracyProfiler.hpp b/public/client/TracyProfiler.hpp index dbeb35ce..e3b256df 100644 --- a/public/client/TracyProfiler.hpp +++ b/public/client/TracyProfiler.hpp @@ -209,7 +209,22 @@ public: if( HardwareSupportsInvariantTSC() ) { uint64_t rax, rdx; +#ifdef TRACY_PATCHABLE_NOPSLEDS + // Some external tooling (such as rr) wants to patch our rdtsc and replace it by a + // branch to control the external input seen by a program. This kind of patching is + // not generally possible depending on the surrounding code and can lead to significant + // slowdowns if the compiler generated unlucky code and rr and tracy are used together. + // To avoid this, use the rr-safe `nopl 0(%rax, %rax, 1); rdtsc` instruction sequence, + // which rr promises will be patchable independent of the surrounding code. + asm volatile ( + // This is nopl 0(%rax, %rax, 1), but assemblers are inconsistent about whether + // they emit that as a 4 or 5 byte sequence and we need to be guaranteed to use + // the 5 byte one. + ".byte 0x0f, 0x1f, 0x44, 0x00, 0x00\n\t" + "rdtsc" : "=a" (rax), "=d" (rdx) ); +#else asm volatile ( "rdtsc" : "=a" (rax), "=d" (rdx) ); +#endif return (int64_t)(( rdx << 32 ) + rax); } # else