From 1c1faeff2d51f16eb6389ee83e9445df349b1685 Mon Sep 17 00:00:00 2001 From: Bartosz Taudul Date: Thu, 26 Sep 2024 14:38:11 +0200 Subject: [PATCH] Replace parallel STL with PPQSort. PPQSort is supposedly quite fast: https://github.com/GabTux/PPQSort More importantly, it does not depend on TBB fuckery, so there's no longer a need to link with an external library that people may or may not have. The NO_PARALLEL_STL option is out, as it was provided solely to deal with TBB being not available. Sequential sorting is still used on emscripten. --- .github/workflows/linux.yml | 2 +- capture/CMakeLists.txt | 1 - cmake/server.cmake | 6 +--- cmake/vendor.cmake | 35 ++++--------------- csvexport/CMakeLists.txt | 1 - import/CMakeLists.txt | 1 - manual/tracy.tex | 2 -- profiler/CMakeLists.txt | 1 - profiler/src/profiler/TracySourceView.cpp | 2 +- .../src/profiler/TracyTimelineItemPlot.cpp | 1 + .../src/profiler/TracyView_Annotations.cpp | 1 + .../src/profiler/TracyView_ContextSwitch.cpp | 1 + profiler/src/profiler/TracyView_CpuData.cpp | 1 + profiler/src/profiler/TracyView_FindZone.cpp | 6 ++-- profiler/src/profiler/TracyView_FrameTree.cpp | 1 + profiler/src/profiler/TracyView_Memory.cpp | 1 + profiler/src/profiler/TracyView_Samples.cpp | 1 + profiler/src/profiler/TracyView_ZoneInfo.cpp | 1 + server/TracySort.hpp | 17 +++------ server/TracySortedVector.hpp | 6 ++-- server/TracyWorker.cpp | 24 +++++++------ update/CMakeLists.txt | 1 - 22 files changed, 42 insertions(+), 71 deletions(-) diff --git a/.github/workflows/linux.yml b/.github/workflows/linux.yml index 511db29d..ad433dc7 100644 --- a/.github/workflows/linux.yml +++ b/.github/workflows/linux.yml @@ -12,7 +12,7 @@ jobs: container: archlinux:base-devel steps: - name: Install dependencies - run: pacman -Syu --noconfirm && pacman -S --noconfirm --needed freetype2 tbb debuginfod wayland dbus libxkbcommon libglvnd meson cmake git wayland-protocols nodejs + run: pacman -Syu --noconfirm && pacman -S --noconfirm --needed freetype2 debuginfod wayland dbus libxkbcommon libglvnd meson cmake git wayland-protocols nodejs - name: Trust git repo run: git config --global --add safe.directory '*' - uses: actions/checkout@v4 diff --git a/capture/CMakeLists.txt b/capture/CMakeLists.txt index 2417c781..97cb8e0c 100644 --- a/capture/CMakeLists.txt +++ b/capture/CMakeLists.txt @@ -2,7 +2,6 @@ cmake_minimum_required(VERSION 3.16) option(NO_ISA_EXTENSIONS "Disable ISA extensions (don't pass -march=native or -mcpu=native to the compiler)" OFF) option(NO_STATISTICS "Disable calculation of statistics" ON) -option(NO_PARALLEL_STL "Disable parallel STL" OFF) include(${CMAKE_CURRENT_LIST_DIR}/../cmake/version.cmake) diff --git a/cmake/server.cmake b/cmake/server.cmake index 3a9eac01..12985c56 100644 --- a/cmake/server.cmake +++ b/cmake/server.cmake @@ -29,11 +29,7 @@ list(TRANSFORM TRACY_SERVER_SOURCES PREPEND "${TRACY_SERVER_DIR}/") add_library(TracyServer STATIC EXCLUDE_FROM_ALL ${TRACY_COMMON_SOURCES} ${TRACY_SERVER_SOURCES}) target_include_directories(TracyServer PUBLIC ${TRACY_COMMON_DIR} ${TRACY_SERVER_DIR}) -target_link_libraries(TracyServer PUBLIC TracyCapstone TracyZstd) +target_link_libraries(TracyServer PUBLIC TracyCapstone TracyZstd PPQSort::PPQSort) if(NO_STATISTICS) target_compile_definitions(TracyServer PUBLIC TRACY_NO_STATISTICS) endif() - -if(NOT NO_PARALLEL_STL AND UNIX AND NOT APPLE AND NOT EMSCRIPTEN) - target_link_libraries(TracyServer PRIVATE TracyTbb) -endif() diff --git a/cmake/vendor.cmake b/cmake/vendor.cmake index 30ce0057..50cbf294 100644 --- a/cmake/vendor.cmake +++ b/cmake/vendor.cmake @@ -214,32 +214,11 @@ if (NOT NO_FILESELECTOR AND NOT EMSCRIPTEN) endif() endif() -# TBB -if (NO_PARALLEL_STL) - set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -DNO_PARALLEL_SORT") - set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -DNO_PARALLEL_SORT") -else() - if (UNIX AND NOT APPLE AND NOT EMSCRIPTEN) - # Tracy does not use TBB directly, but the implementation of parallel algorithms - # in some versions of libstdc++ depends on TBB. When it does, you must - # explicitly link against -ltbb. - # - # Some distributions have pgk-config files for TBB, others don't. +# PPQSort - pkg_check_modules(TBB tbb) - if (TBB_FOUND) - add_library(TracyTbb INTERFACE) - target_include_directories(TracyTbb INTERFACE ${TBB_INCLUDE_DIRS}) - target_link_libraries(TracyTbb INTERFACE ${TBB_LINK_LIBRARIES}) - else() - CPMAddPackage( - NAME tbb - GITHUB_REPOSITORY oneapi-src/oneTBB - GIT_TAG v2021.12.0-rc2 - OPTIONS "TBB_TEST OFF" - ) - add_library(TracyTbb INTERFACE) - target_link_libraries(TracyTbb INTERFACE tbb) - endif() - endif() -endif() +CPMAddPackage( + NAME PPQSort + GITHUB_REPOSITORY GabTux/PPQSort + VERSION 1.0.3 + EXCLUDE_FROM_ALL TRUE +) diff --git a/csvexport/CMakeLists.txt b/csvexport/CMakeLists.txt index a69c03a8..90c485a9 100644 --- a/csvexport/CMakeLists.txt +++ b/csvexport/CMakeLists.txt @@ -1,7 +1,6 @@ cmake_minimum_required(VERSION 3.16) option(NO_ISA_EXTENSIONS "Disable ISA extensions (don't pass -march=native or -mcpu=native to the compiler)" OFF) -option(NO_PARALLEL_STL "Disable parallel STL" OFF) set(NO_STATISTICS OFF) diff --git a/import/CMakeLists.txt b/import/CMakeLists.txt index fec2c7d0..a532ca62 100644 --- a/import/CMakeLists.txt +++ b/import/CMakeLists.txt @@ -2,7 +2,6 @@ cmake_minimum_required(VERSION 3.16) option(NO_ISA_EXTENSIONS "Disable ISA extensions (don't pass -march=native or -mcpu=native to the compiler)" OFF) option(NO_STATISTICS "Disable calculation of statistics" ON) -option(NO_PARALLEL_STL "Disable parallel STL" OFF) include(${CMAKE_CURRENT_LIST_DIR}/../cmake/version.cmake) diff --git a/manual/tracy.tex b/manual/tracy.tex index 0946692f..2d390abd 100644 --- a/manual/tracy.tex +++ b/manual/tracy.tex @@ -837,8 +837,6 @@ There is no need to install external libraries (e.g. with vcpkg). All libraries On Unix systems (including Linux), you will need to install the \texttt{pkg-config} utility to provide information about libraries. -Due to some questionable design decisions by the compiler developers, you will most likely also need the \texttt{tbb} library\footnote{Technically, this is not a Tracy dependency, but rather a \texttt{libstdc++} dependency, but it may still not be installed by default.}. If not found, this library is downloaded automatically. - Installation of the libraries on OSX can be facilitated using the \texttt{brew} package manager. \paragraph{Linux} diff --git a/profiler/CMakeLists.txt b/profiler/CMakeLists.txt index f6afbe0d..fdf60adc 100644 --- a/profiler/CMakeLists.txt +++ b/profiler/CMakeLists.txt @@ -6,7 +6,6 @@ option(LEGACY "Instead of Wayland, use the legacy X11 backend on Linux" OFF) option(NO_ISA_EXTENSIONS "Disable ISA extensions (don't pass -march=native or -mcpu=native to the compiler)" OFF) option(NO_STATISTICS "Disable calculation of statistics" OFF) option(SELF_PROFILE "Enable self-profiling" OFF) -option(NO_PARALLEL_STL "Disable parallel STL" OFF) include(${CMAKE_CURRENT_LIST_DIR}/../cmake/version.cmake) diff --git a/profiler/src/profiler/TracySourceView.cpp b/profiler/src/profiler/TracySourceView.cpp index 10d75b50..9acaf153 100644 --- a/profiler/src/profiler/TracySourceView.cpp +++ b/profiler/src/profiler/TracySourceView.cpp @@ -13,11 +13,11 @@ #include "TracyImGui.hpp" #include "TracyMicroArchitecture.hpp" #include "TracyPrint.hpp" -#include "TracySort.hpp" #include "TracySourceView.hpp" #include "TracyUtility.hpp" #include "TracyView.hpp" #include "TracyWorker.hpp" +#include "tracy_pdqsort.h" #include "IconsFontAwesome6.h" diff --git a/profiler/src/profiler/TracyTimelineItemPlot.cpp b/profiler/src/profiler/TracyTimelineItemPlot.cpp index cd49fffd..bb328726 100644 --- a/profiler/src/profiler/TracyTimelineItemPlot.cpp +++ b/profiler/src/profiler/TracyTimelineItemPlot.cpp @@ -5,6 +5,7 @@ #include "TracyUtility.hpp" #include "TracyView.hpp" #include "TracyWorker.hpp" +#include "tracy_pdqsort.h" namespace tracy { diff --git a/profiler/src/profiler/TracyView_Annotations.cpp b/profiler/src/profiler/TracyView_Annotations.cpp index 85aee88e..67e8b291 100644 --- a/profiler/src/profiler/TracyView_Annotations.cpp +++ b/profiler/src/profiler/TracyView_Annotations.cpp @@ -1,6 +1,7 @@ #include "TracyImGui.hpp" #include "TracyPrint.hpp" #include "TracyView.hpp" +#include "tracy_pdqsort.h" namespace tracy { diff --git a/profiler/src/profiler/TracyView_ContextSwitch.cpp b/profiler/src/profiler/TracyView_ContextSwitch.cpp index d6334eab..a1de9c13 100644 --- a/profiler/src/profiler/TracyView_ContextSwitch.cpp +++ b/profiler/src/profiler/TracyView_ContextSwitch.cpp @@ -6,6 +6,7 @@ #include "TracyTimelineContext.hpp" #include "TracyTimelineDraw.hpp" #include "TracyView.hpp" +#include "tracy_pdqsort.h" namespace tracy { diff --git a/profiler/src/profiler/TracyView_CpuData.cpp b/profiler/src/profiler/TracyView_CpuData.cpp index 8d94a62e..f2952111 100644 --- a/profiler/src/profiler/TracyView_CpuData.cpp +++ b/profiler/src/profiler/TracyView_CpuData.cpp @@ -8,6 +8,7 @@ #include "TracyTimelineItem.hpp" #include "TracyTimelineContext.hpp" #include "TracyView.hpp" +#include "tracy_pdqsort.h" constexpr float MinVisSize = 3; diff --git a/profiler/src/profiler/TracyView_FindZone.cpp b/profiler/src/profiler/TracyView_FindZone.cpp index c1eb8d6c..153b5d00 100644 --- a/profiler/src/profiler/TracyView_FindZone.cpp +++ b/profiler/src/profiler/TracyView_FindZone.cpp @@ -7,7 +7,9 @@ #include "TracyImGui.hpp" #include "TracyMouse.hpp" #include "TracyPrint.hpp" +#include "TracySort.hpp" #include "TracyView.hpp" +#include "tracy_pdqsort.h" namespace tracy { @@ -507,10 +509,10 @@ void View::DrawFindZone() } } auto mid = vec.begin() + vszorig; -#ifdef NO_PARALLEL_SORT +#ifdef __EMSCRIPTEN__ pdqsort_branchless( mid, vec.end() ); #else - std::sort( std::execution::par_unseq, mid, vec.end() ); + ppqsort::sort( ppqsort::execution::par, mid, vec.end() ); #endif std::inplace_merge( vec.begin(), mid, vec.end() ); diff --git a/profiler/src/profiler/TracyView_FrameTree.cpp b/profiler/src/profiler/TracyView_FrameTree.cpp index 710aaa62..b4571d0e 100644 --- a/profiler/src/profiler/TracyView_FrameTree.cpp +++ b/profiler/src/profiler/TracyView_FrameTree.cpp @@ -1,6 +1,7 @@ #include "TracyImGui.hpp" #include "TracyPrint.hpp" #include "TracyView.hpp" +#include "tracy_pdqsort.h" namespace tracy { diff --git a/profiler/src/profiler/TracyView_Memory.cpp b/profiler/src/profiler/TracyView_Memory.cpp index 4652eb25..bb53131a 100644 --- a/profiler/src/profiler/TracyView_Memory.cpp +++ b/profiler/src/profiler/TracyView_Memory.cpp @@ -4,6 +4,7 @@ #include "TracyMouse.hpp" #include "TracyPrint.hpp" #include "TracyView.hpp" +#include "tracy_pdqsort.h" namespace tracy { diff --git a/profiler/src/profiler/TracyView_Samples.cpp b/profiler/src/profiler/TracyView_Samples.cpp index 4bbd7112..f487d8a7 100644 --- a/profiler/src/profiler/TracyView_Samples.cpp +++ b/profiler/src/profiler/TracyView_Samples.cpp @@ -9,6 +9,7 @@ #include "TracyTimelineContext.hpp" #include "TracyTimelineDraw.hpp" #include "TracyView.hpp" +#include "tracy_pdqsort.h" namespace tracy { diff --git a/profiler/src/profiler/TracyView_ZoneInfo.cpp b/profiler/src/profiler/TracyView_ZoneInfo.cpp index 7f68f7d1..6a788020 100644 --- a/profiler/src/profiler/TracyView_ZoneInfo.cpp +++ b/profiler/src/profiler/TracyView_ZoneInfo.cpp @@ -5,6 +5,7 @@ #include "TracyPrint.hpp" #include "TracyMouse.hpp" #include "TracyView.hpp" +#include "tracy_pdqsort.h" namespace tracy { diff --git a/server/TracySort.hpp b/server/TracySort.hpp index fce0c0a7..ae50f60a 100644 --- a/server/TracySort.hpp +++ b/server/TracySort.hpp @@ -1,19 +1,10 @@ #ifndef __TRACYSORT_HPP__ #define __TRACYSORT_HPP__ -#ifndef NO_PARALLEL_SORT -# if !defined __APPLE__ && !defined __EMSCRIPTEN__ && ( ( defined _MSC_VER && _MSVC_LANG >= 201703L ) || __cplusplus >= 201703L ) -# if __has_include() -# include -# include -# else -# define NO_PARALLEL_SORT -# endif -# else -# define NO_PARALLEL_SORT -# endif +#ifdef __EMSCRIPTEN__ +# include "tracy_pdqsort.h" +#else +# include #endif -#include "tracy_pdqsort.h" - #endif diff --git a/server/TracySortedVector.hpp b/server/TracySortedVector.hpp index f157198f..1a6a6da5 100644 --- a/server/TracySortedVector.hpp +++ b/server/TracySortedVector.hpp @@ -102,10 +102,10 @@ public: const auto se = sb + sortedEnd; const auto sl = se - 1; const auto ue = v.end(); -#ifdef NO_PARALLEL_SORT - pdqsort_branchless( se, ue, comp ); +#ifdef __EMSCRIPTEN__ + pdqsort_branchless( sb, se, comp ); #else - std::sort( std::execution::par_unseq, se, ue, comp ); + ppqsort::sort( ppqsort::execution::par, sb, se, comp ); #endif const auto ss = std::lower_bound( sb, se, *se, comp ); const auto uu = std::lower_bound( se, ue, *sl, comp ); diff --git a/server/TracyWorker.cpp b/server/TracyWorker.cpp index 20b52c54..ecc93fd7 100644 --- a/server/TracyWorker.cpp +++ b/server/TracyWorker.cpp @@ -37,6 +37,7 @@ #include "TracySort.hpp" #include "TracyTaskDispatch.hpp" #include "TracyWorker.hpp" +#include "tracy_pdqsort.h" namespace tracy { @@ -1552,12 +1553,13 @@ Worker::Worker( FileRead& f, EventType::Type eventMask, bool bgTasks, bool allow m_data.symbolLoc[symIdx++] = SymbolLocation { symAddr, size.Val() }; } } -#ifdef NO_PARALLEL_SORT + +#ifdef __EMSCRIPTEN__ pdqsort_branchless( m_data.symbolLoc.begin(), m_data.symbolLoc.end(), [] ( const auto& l, const auto& r ) { return l.addr < r.addr; } ); pdqsort_branchless( m_data.symbolLocInline.begin(), m_data.symbolLocInline.end() ); #else - std::sort( std::execution::par_unseq, m_data.symbolLoc.begin(), m_data.symbolLoc.end(), [] ( const auto& l, const auto& r ) { return l.addr < r.addr; } ); - std::sort( std::execution::par_unseq, m_data.symbolLocInline.begin(), m_data.symbolLocInline.end() ); + ppqsort::sort( ppqsort::execution::par, m_data.symbolLoc.begin(), m_data.symbolLoc.end(), [] ( const auto& l, const auto& r ) { return l.addr < r.addr; } ); + ppqsort::sort( ppqsort::execution::par, m_data.symbolLocInline.begin(), m_data.symbolLocInline.end() ); #endif f.Read( sz ); @@ -4155,10 +4157,10 @@ void Worker::DoPostponedSymbols() { if( m_data.newSymbolsIndex >= 0 ) { -#ifdef NO_PARALLEL_SORT +#ifdef __EMSCRIPTEN__ pdqsort_branchless( m_data.symbolLoc.begin() + m_data.newSymbolsIndex, m_data.symbolLoc.end(), [] ( const auto& l, const auto& r ) { return l.addr < r.addr; } ); #else - std::sort( std::execution::par_unseq, m_data.symbolLoc.begin() + m_data.newSymbolsIndex, m_data.symbolLoc.end(), [] ( const auto& l, const auto& r ) { return l.addr < r.addr; } ); + ppqsort::sort( ppqsort::execution::par, m_data.symbolLoc.begin() + m_data.newSymbolsIndex, m_data.symbolLoc.end(), [] ( const auto& l, const auto& r ) { return l.addr < r.addr; } ); #endif const auto ms = std::lower_bound( m_data.symbolLoc.begin(), m_data.symbolLoc.begin() + m_data.newSymbolsIndex, m_data.symbolLoc[m_data.newSymbolsIndex], [] ( const auto& l, const auto& r ) { return l.addr < r.addr; } ); std::inplace_merge( ms, m_data.symbolLoc.begin() + m_data.newSymbolsIndex, m_data.symbolLoc.end(), [] ( const auto& l, const auto& r ) { return l.addr < r.addr; } ); @@ -4170,10 +4172,10 @@ void Worker::DoPostponedInlineSymbols() { if( m_data.newInlineSymbolsIndex >= 0 ) { -#ifdef NO_PARALLEL_SORT +#ifdef __EMSCRIPTEN__ pdqsort_branchless( m_data.symbolLocInline.begin() + m_data.newInlineSymbolsIndex, m_data.symbolLocInline.end() ); #else - std::sort( std::execution::par_unseq, m_data.symbolLocInline.begin() + m_data.newInlineSymbolsIndex, m_data.symbolLocInline.end() ); + ppqsort::sort( ppqsort::execution::par, m_data.symbolLocInline.begin() + m_data.newInlineSymbolsIndex, m_data.symbolLocInline.end() ); #endif const auto ms = std::lower_bound( m_data.symbolLocInline.begin(), m_data.symbolLocInline.begin() + m_data.newInlineSymbolsIndex, m_data.symbolLocInline[m_data.newInlineSymbolsIndex] ); std::inplace_merge( ms, m_data.symbolLocInline.begin() + m_data.newInlineSymbolsIndex, m_data.symbolLocInline.end() ); @@ -7017,10 +7019,10 @@ void Worker::CreateMemAllocPlot( MemData& memdata ) void Worker::ReconstructMemAllocPlot( MemData& mem ) { -#ifdef NO_PARALLEL_SORT +#ifdef __EMSCRIPTEN__ pdqsort_branchless( mem.frees.begin(), mem.frees.end(), [&mem] ( const auto& lhs, const auto& rhs ) { return mem.data[lhs].TimeFree() < mem.data[rhs].TimeFree(); } ); #else - std::sort( std::execution::par_unseq, mem.frees.begin(), mem.frees.end(), [&mem] ( const auto& lhs, const auto& rhs ) { return mem.data[lhs].TimeFree() < mem.data[rhs].TimeFree(); } ); + ppqsort::sort( ppqsort::execution::par, mem.frees.begin(), mem.frees.end(), [&mem] ( const auto& lhs, const auto& rhs ) { return mem.data[lhs].TimeFree() < mem.data[rhs].TimeFree(); } ); #endif const auto psz = mem.data.size() + mem.frees.size() + 1; @@ -7962,10 +7964,10 @@ void Worker::Write( FileWrite& f, bool fiDict ) } if( m_inconsistentSamples ) { -#ifdef NO_PARALLEL_SORT +#ifdef __EMSCRIPTEN__ pdqsort_branchless( thread->samples.begin(), thread->samples.end(), [] ( const auto& lhs, const auto& rhs ) { return lhs.time.Val() < rhs.time.Val(); } ); #else - std::sort( std::execution::par_unseq, thread->samples.begin(), thread->samples.end(), [] ( const auto& lhs, const auto& rhs ) { return lhs.time.Val() < rhs.time.Val(); } ); + ppqsort::sort( ppqsort::execution::par, thread->samples.begin(), thread->samples.end(), [] ( const auto& lhs, const auto& rhs ) { return lhs.time.Val() < rhs.time.Val(); } ); #endif } sz = thread->samples.size(); diff --git a/update/CMakeLists.txt b/update/CMakeLists.txt index 6c347e39..06a70845 100644 --- a/update/CMakeLists.txt +++ b/update/CMakeLists.txt @@ -2,7 +2,6 @@ cmake_minimum_required(VERSION 3.16) option(NO_ISA_EXTENSIONS "Disable ISA extensions (don't pass -march=native or -mcpu=native to the compiler)" OFF) option(NO_STATISTICS "Disable calculation of statistics" ON) -option(NO_PARALLEL_STL "Disable parallel STL" OFF) include(${CMAKE_CURRENT_LIST_DIR}/../cmake/version.cmake)