Replace parallel STL with PPQSort.

PPQSort is supposedly quite fast: https://github.com/GabTux/PPQSort

More importantly, it does not depend on TBB fuckery, so there's no longer
a need to link with an external library that people may or may not have.

The NO_PARALLEL_STL option is out, as it was provided solely to deal with
TBB being not available. Sequential sorting is still used on emscripten.
This commit is contained in:
Bartosz Taudul 2024-09-26 14:38:11 +02:00
parent d400183483
commit 1c1faeff2d
No known key found for this signature in database
GPG Key ID: B7FE2008B7575DF3
22 changed files with 42 additions and 71 deletions

View File

@ -12,7 +12,7 @@ jobs:
container: archlinux:base-devel container: archlinux:base-devel
steps: steps:
- name: Install dependencies - name: Install dependencies
run: pacman -Syu --noconfirm && pacman -S --noconfirm --needed freetype2 tbb debuginfod wayland dbus libxkbcommon libglvnd meson cmake git wayland-protocols nodejs run: pacman -Syu --noconfirm && pacman -S --noconfirm --needed freetype2 debuginfod wayland dbus libxkbcommon libglvnd meson cmake git wayland-protocols nodejs
- name: Trust git repo - name: Trust git repo
run: git config --global --add safe.directory '*' run: git config --global --add safe.directory '*'
- uses: actions/checkout@v4 - uses: actions/checkout@v4

View File

@ -2,7 +2,6 @@ cmake_minimum_required(VERSION 3.16)
option(NO_ISA_EXTENSIONS "Disable ISA extensions (don't pass -march=native or -mcpu=native to the compiler)" OFF) option(NO_ISA_EXTENSIONS "Disable ISA extensions (don't pass -march=native or -mcpu=native to the compiler)" OFF)
option(NO_STATISTICS "Disable calculation of statistics" ON) option(NO_STATISTICS "Disable calculation of statistics" ON)
option(NO_PARALLEL_STL "Disable parallel STL" OFF)
include(${CMAKE_CURRENT_LIST_DIR}/../cmake/version.cmake) include(${CMAKE_CURRENT_LIST_DIR}/../cmake/version.cmake)

View File

@ -29,11 +29,7 @@ list(TRANSFORM TRACY_SERVER_SOURCES PREPEND "${TRACY_SERVER_DIR}/")
add_library(TracyServer STATIC EXCLUDE_FROM_ALL ${TRACY_COMMON_SOURCES} ${TRACY_SERVER_SOURCES}) add_library(TracyServer STATIC EXCLUDE_FROM_ALL ${TRACY_COMMON_SOURCES} ${TRACY_SERVER_SOURCES})
target_include_directories(TracyServer PUBLIC ${TRACY_COMMON_DIR} ${TRACY_SERVER_DIR}) target_include_directories(TracyServer PUBLIC ${TRACY_COMMON_DIR} ${TRACY_SERVER_DIR})
target_link_libraries(TracyServer PUBLIC TracyCapstone TracyZstd) target_link_libraries(TracyServer PUBLIC TracyCapstone TracyZstd PPQSort::PPQSort)
if(NO_STATISTICS) if(NO_STATISTICS)
target_compile_definitions(TracyServer PUBLIC TRACY_NO_STATISTICS) target_compile_definitions(TracyServer PUBLIC TRACY_NO_STATISTICS)
endif() endif()
if(NOT NO_PARALLEL_STL AND UNIX AND NOT APPLE AND NOT EMSCRIPTEN)
target_link_libraries(TracyServer PRIVATE TracyTbb)
endif()

View File

@ -214,32 +214,11 @@ if (NOT NO_FILESELECTOR AND NOT EMSCRIPTEN)
endif() endif()
endif() endif()
# TBB # PPQSort
if (NO_PARALLEL_STL)
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -DNO_PARALLEL_SORT")
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -DNO_PARALLEL_SORT")
else()
if (UNIX AND NOT APPLE AND NOT EMSCRIPTEN)
# Tracy does not use TBB directly, but the implementation of parallel algorithms
# in some versions of libstdc++ depends on TBB. When it does, you must
# explicitly link against -ltbb.
#
# Some distributions have pgk-config files for TBB, others don't.
pkg_check_modules(TBB tbb)
if (TBB_FOUND)
add_library(TracyTbb INTERFACE)
target_include_directories(TracyTbb INTERFACE ${TBB_INCLUDE_DIRS})
target_link_libraries(TracyTbb INTERFACE ${TBB_LINK_LIBRARIES})
else()
CPMAddPackage( CPMAddPackage(
NAME tbb NAME PPQSort
GITHUB_REPOSITORY oneapi-src/oneTBB GITHUB_REPOSITORY GabTux/PPQSort
GIT_TAG v2021.12.0-rc2 VERSION 1.0.3
OPTIONS "TBB_TEST OFF" EXCLUDE_FROM_ALL TRUE
) )
add_library(TracyTbb INTERFACE)
target_link_libraries(TracyTbb INTERFACE tbb)
endif()
endif()
endif()

View File

@ -1,7 +1,6 @@
cmake_minimum_required(VERSION 3.16) cmake_minimum_required(VERSION 3.16)
option(NO_ISA_EXTENSIONS "Disable ISA extensions (don't pass -march=native or -mcpu=native to the compiler)" OFF) option(NO_ISA_EXTENSIONS "Disable ISA extensions (don't pass -march=native or -mcpu=native to the compiler)" OFF)
option(NO_PARALLEL_STL "Disable parallel STL" OFF)
set(NO_STATISTICS OFF) set(NO_STATISTICS OFF)

View File

@ -2,7 +2,6 @@ cmake_minimum_required(VERSION 3.16)
option(NO_ISA_EXTENSIONS "Disable ISA extensions (don't pass -march=native or -mcpu=native to the compiler)" OFF) option(NO_ISA_EXTENSIONS "Disable ISA extensions (don't pass -march=native or -mcpu=native to the compiler)" OFF)
option(NO_STATISTICS "Disable calculation of statistics" ON) option(NO_STATISTICS "Disable calculation of statistics" ON)
option(NO_PARALLEL_STL "Disable parallel STL" OFF)
include(${CMAKE_CURRENT_LIST_DIR}/../cmake/version.cmake) include(${CMAKE_CURRENT_LIST_DIR}/../cmake/version.cmake)

View File

@ -837,8 +837,6 @@ There is no need to install external libraries (e.g. with vcpkg). All libraries
On Unix systems (including Linux), you will need to install the \texttt{pkg-config} utility to provide information about libraries. On Unix systems (including Linux), you will need to install the \texttt{pkg-config} utility to provide information about libraries.
Due to some questionable design decisions by the compiler developers, you will most likely also need the \texttt{tbb} library\footnote{Technically, this is not a Tracy dependency, but rather a \texttt{libstdc++} dependency, but it may still not be installed by default.}. If not found, this library is downloaded automatically.
Installation of the libraries on OSX can be facilitated using the \texttt{brew} package manager. Installation of the libraries on OSX can be facilitated using the \texttt{brew} package manager.
\paragraph{Linux} \paragraph{Linux}

View File

@ -6,7 +6,6 @@ option(LEGACY "Instead of Wayland, use the legacy X11 backend on Linux" OFF)
option(NO_ISA_EXTENSIONS "Disable ISA extensions (don't pass -march=native or -mcpu=native to the compiler)" OFF) option(NO_ISA_EXTENSIONS "Disable ISA extensions (don't pass -march=native or -mcpu=native to the compiler)" OFF)
option(NO_STATISTICS "Disable calculation of statistics" OFF) option(NO_STATISTICS "Disable calculation of statistics" OFF)
option(SELF_PROFILE "Enable self-profiling" OFF) option(SELF_PROFILE "Enable self-profiling" OFF)
option(NO_PARALLEL_STL "Disable parallel STL" OFF)
include(${CMAKE_CURRENT_LIST_DIR}/../cmake/version.cmake) include(${CMAKE_CURRENT_LIST_DIR}/../cmake/version.cmake)

View File

@ -13,11 +13,11 @@
#include "TracyImGui.hpp" #include "TracyImGui.hpp"
#include "TracyMicroArchitecture.hpp" #include "TracyMicroArchitecture.hpp"
#include "TracyPrint.hpp" #include "TracyPrint.hpp"
#include "TracySort.hpp"
#include "TracySourceView.hpp" #include "TracySourceView.hpp"
#include "TracyUtility.hpp" #include "TracyUtility.hpp"
#include "TracyView.hpp" #include "TracyView.hpp"
#include "TracyWorker.hpp" #include "TracyWorker.hpp"
#include "tracy_pdqsort.h"
#include "IconsFontAwesome6.h" #include "IconsFontAwesome6.h"

View File

@ -5,6 +5,7 @@
#include "TracyUtility.hpp" #include "TracyUtility.hpp"
#include "TracyView.hpp" #include "TracyView.hpp"
#include "TracyWorker.hpp" #include "TracyWorker.hpp"
#include "tracy_pdqsort.h"
namespace tracy namespace tracy
{ {

View File

@ -1,6 +1,7 @@
#include "TracyImGui.hpp" #include "TracyImGui.hpp"
#include "TracyPrint.hpp" #include "TracyPrint.hpp"
#include "TracyView.hpp" #include "TracyView.hpp"
#include "tracy_pdqsort.h"
namespace tracy namespace tracy
{ {

View File

@ -6,6 +6,7 @@
#include "TracyTimelineContext.hpp" #include "TracyTimelineContext.hpp"
#include "TracyTimelineDraw.hpp" #include "TracyTimelineDraw.hpp"
#include "TracyView.hpp" #include "TracyView.hpp"
#include "tracy_pdqsort.h"
namespace tracy namespace tracy
{ {

View File

@ -8,6 +8,7 @@
#include "TracyTimelineItem.hpp" #include "TracyTimelineItem.hpp"
#include "TracyTimelineContext.hpp" #include "TracyTimelineContext.hpp"
#include "TracyView.hpp" #include "TracyView.hpp"
#include "tracy_pdqsort.h"
constexpr float MinVisSize = 3; constexpr float MinVisSize = 3;

View File

@ -7,7 +7,9 @@
#include "TracyImGui.hpp" #include "TracyImGui.hpp"
#include "TracyMouse.hpp" #include "TracyMouse.hpp"
#include "TracyPrint.hpp" #include "TracyPrint.hpp"
#include "TracySort.hpp"
#include "TracyView.hpp" #include "TracyView.hpp"
#include "tracy_pdqsort.h"
namespace tracy namespace tracy
{ {
@ -507,10 +509,10 @@ void View::DrawFindZone()
} }
} }
auto mid = vec.begin() + vszorig; auto mid = vec.begin() + vszorig;
#ifdef NO_PARALLEL_SORT #ifdef __EMSCRIPTEN__
pdqsort_branchless( mid, vec.end() ); pdqsort_branchless( mid, vec.end() );
#else #else
std::sort( std::execution::par_unseq, mid, vec.end() ); ppqsort::sort( ppqsort::execution::par, mid, vec.end() );
#endif #endif
std::inplace_merge( vec.begin(), mid, vec.end() ); std::inplace_merge( vec.begin(), mid, vec.end() );

View File

@ -1,6 +1,7 @@
#include "TracyImGui.hpp" #include "TracyImGui.hpp"
#include "TracyPrint.hpp" #include "TracyPrint.hpp"
#include "TracyView.hpp" #include "TracyView.hpp"
#include "tracy_pdqsort.h"
namespace tracy namespace tracy
{ {

View File

@ -4,6 +4,7 @@
#include "TracyMouse.hpp" #include "TracyMouse.hpp"
#include "TracyPrint.hpp" #include "TracyPrint.hpp"
#include "TracyView.hpp" #include "TracyView.hpp"
#include "tracy_pdqsort.h"
namespace tracy namespace tracy
{ {

View File

@ -9,6 +9,7 @@
#include "TracyTimelineContext.hpp" #include "TracyTimelineContext.hpp"
#include "TracyTimelineDraw.hpp" #include "TracyTimelineDraw.hpp"
#include "TracyView.hpp" #include "TracyView.hpp"
#include "tracy_pdqsort.h"
namespace tracy namespace tracy
{ {

View File

@ -5,6 +5,7 @@
#include "TracyPrint.hpp" #include "TracyPrint.hpp"
#include "TracyMouse.hpp" #include "TracyMouse.hpp"
#include "TracyView.hpp" #include "TracyView.hpp"
#include "tracy_pdqsort.h"
namespace tracy namespace tracy
{ {

View File

@ -1,19 +1,10 @@
#ifndef __TRACYSORT_HPP__ #ifndef __TRACYSORT_HPP__
#define __TRACYSORT_HPP__ #define __TRACYSORT_HPP__
#ifndef NO_PARALLEL_SORT #ifdef __EMSCRIPTEN__
# if !defined __APPLE__ && !defined __EMSCRIPTEN__ && ( ( defined _MSC_VER && _MSVC_LANG >= 201703L ) || __cplusplus >= 201703L )
# if __has_include(<execution>)
# include <algorithm>
# include <execution>
# else
# define NO_PARALLEL_SORT
# endif
# else
# define NO_PARALLEL_SORT
# endif
#endif
# include "tracy_pdqsort.h" # include "tracy_pdqsort.h"
#else
# include <ppqsort.h>
#endif
#endif #endif

View File

@ -102,10 +102,10 @@ public:
const auto se = sb + sortedEnd; const auto se = sb + sortedEnd;
const auto sl = se - 1; const auto sl = se - 1;
const auto ue = v.end(); const auto ue = v.end();
#ifdef NO_PARALLEL_SORT #ifdef __EMSCRIPTEN__
pdqsort_branchless( se, ue, comp ); pdqsort_branchless( sb, se, comp );
#else #else
std::sort( std::execution::par_unseq, se, ue, comp ); ppqsort::sort( ppqsort::execution::par, sb, se, comp );
#endif #endif
const auto ss = std::lower_bound( sb, se, *se, comp ); const auto ss = std::lower_bound( sb, se, *se, comp );
const auto uu = std::lower_bound( se, ue, *sl, comp ); const auto uu = std::lower_bound( se, ue, *sl, comp );

View File

@ -37,6 +37,7 @@
#include "TracySort.hpp" #include "TracySort.hpp"
#include "TracyTaskDispatch.hpp" #include "TracyTaskDispatch.hpp"
#include "TracyWorker.hpp" #include "TracyWorker.hpp"
#include "tracy_pdqsort.h"
namespace tracy namespace tracy
{ {
@ -1552,12 +1553,13 @@ Worker::Worker( FileRead& f, EventType::Type eventMask, bool bgTasks, bool allow
m_data.symbolLoc[symIdx++] = SymbolLocation { symAddr, size.Val() }; m_data.symbolLoc[symIdx++] = SymbolLocation { symAddr, size.Val() };
} }
} }
#ifdef NO_PARALLEL_SORT
#ifdef __EMSCRIPTEN__
pdqsort_branchless( m_data.symbolLoc.begin(), m_data.symbolLoc.end(), [] ( const auto& l, const auto& r ) { return l.addr < r.addr; } ); pdqsort_branchless( m_data.symbolLoc.begin(), m_data.symbolLoc.end(), [] ( const auto& l, const auto& r ) { return l.addr < r.addr; } );
pdqsort_branchless( m_data.symbolLocInline.begin(), m_data.symbolLocInline.end() ); pdqsort_branchless( m_data.symbolLocInline.begin(), m_data.symbolLocInline.end() );
#else #else
std::sort( std::execution::par_unseq, m_data.symbolLoc.begin(), m_data.symbolLoc.end(), [] ( const auto& l, const auto& r ) { return l.addr < r.addr; } ); ppqsort::sort( ppqsort::execution::par, m_data.symbolLoc.begin(), m_data.symbolLoc.end(), [] ( const auto& l, const auto& r ) { return l.addr < r.addr; } );
std::sort( std::execution::par_unseq, m_data.symbolLocInline.begin(), m_data.symbolLocInline.end() ); ppqsort::sort( ppqsort::execution::par, m_data.symbolLocInline.begin(), m_data.symbolLocInline.end() );
#endif #endif
f.Read( sz ); f.Read( sz );
@ -4155,10 +4157,10 @@ void Worker::DoPostponedSymbols()
{ {
if( m_data.newSymbolsIndex >= 0 ) if( m_data.newSymbolsIndex >= 0 )
{ {
#ifdef NO_PARALLEL_SORT #ifdef __EMSCRIPTEN__
pdqsort_branchless( m_data.symbolLoc.begin() + m_data.newSymbolsIndex, m_data.symbolLoc.end(), [] ( const auto& l, const auto& r ) { return l.addr < r.addr; } ); pdqsort_branchless( m_data.symbolLoc.begin() + m_data.newSymbolsIndex, m_data.symbolLoc.end(), [] ( const auto& l, const auto& r ) { return l.addr < r.addr; } );
#else #else
std::sort( std::execution::par_unseq, m_data.symbolLoc.begin() + m_data.newSymbolsIndex, m_data.symbolLoc.end(), [] ( const auto& l, const auto& r ) { return l.addr < r.addr; } ); ppqsort::sort( ppqsort::execution::par, m_data.symbolLoc.begin() + m_data.newSymbolsIndex, m_data.symbolLoc.end(), [] ( const auto& l, const auto& r ) { return l.addr < r.addr; } );
#endif #endif
const auto ms = std::lower_bound( m_data.symbolLoc.begin(), m_data.symbolLoc.begin() + m_data.newSymbolsIndex, m_data.symbolLoc[m_data.newSymbolsIndex], [] ( const auto& l, const auto& r ) { return l.addr < r.addr; } ); const auto ms = std::lower_bound( m_data.symbolLoc.begin(), m_data.symbolLoc.begin() + m_data.newSymbolsIndex, m_data.symbolLoc[m_data.newSymbolsIndex], [] ( const auto& l, const auto& r ) { return l.addr < r.addr; } );
std::inplace_merge( ms, m_data.symbolLoc.begin() + m_data.newSymbolsIndex, m_data.symbolLoc.end(), [] ( const auto& l, const auto& r ) { return l.addr < r.addr; } ); std::inplace_merge( ms, m_data.symbolLoc.begin() + m_data.newSymbolsIndex, m_data.symbolLoc.end(), [] ( const auto& l, const auto& r ) { return l.addr < r.addr; } );
@ -4170,10 +4172,10 @@ void Worker::DoPostponedInlineSymbols()
{ {
if( m_data.newInlineSymbolsIndex >= 0 ) if( m_data.newInlineSymbolsIndex >= 0 )
{ {
#ifdef NO_PARALLEL_SORT #ifdef __EMSCRIPTEN__
pdqsort_branchless( m_data.symbolLocInline.begin() + m_data.newInlineSymbolsIndex, m_data.symbolLocInline.end() ); pdqsort_branchless( m_data.symbolLocInline.begin() + m_data.newInlineSymbolsIndex, m_data.symbolLocInline.end() );
#else #else
std::sort( std::execution::par_unseq, m_data.symbolLocInline.begin() + m_data.newInlineSymbolsIndex, m_data.symbolLocInline.end() ); ppqsort::sort( ppqsort::execution::par, m_data.symbolLocInline.begin() + m_data.newInlineSymbolsIndex, m_data.symbolLocInline.end() );
#endif #endif
const auto ms = std::lower_bound( m_data.symbolLocInline.begin(), m_data.symbolLocInline.begin() + m_data.newInlineSymbolsIndex, m_data.symbolLocInline[m_data.newInlineSymbolsIndex] ); const auto ms = std::lower_bound( m_data.symbolLocInline.begin(), m_data.symbolLocInline.begin() + m_data.newInlineSymbolsIndex, m_data.symbolLocInline[m_data.newInlineSymbolsIndex] );
std::inplace_merge( ms, m_data.symbolLocInline.begin() + m_data.newInlineSymbolsIndex, m_data.symbolLocInline.end() ); std::inplace_merge( ms, m_data.symbolLocInline.begin() + m_data.newInlineSymbolsIndex, m_data.symbolLocInline.end() );
@ -7017,10 +7019,10 @@ void Worker::CreateMemAllocPlot( MemData& memdata )
void Worker::ReconstructMemAllocPlot( MemData& mem ) void Worker::ReconstructMemAllocPlot( MemData& mem )
{ {
#ifdef NO_PARALLEL_SORT #ifdef __EMSCRIPTEN__
pdqsort_branchless( mem.frees.begin(), mem.frees.end(), [&mem] ( const auto& lhs, const auto& rhs ) { return mem.data[lhs].TimeFree() < mem.data[rhs].TimeFree(); } ); pdqsort_branchless( mem.frees.begin(), mem.frees.end(), [&mem] ( const auto& lhs, const auto& rhs ) { return mem.data[lhs].TimeFree() < mem.data[rhs].TimeFree(); } );
#else #else
std::sort( std::execution::par_unseq, mem.frees.begin(), mem.frees.end(), [&mem] ( const auto& lhs, const auto& rhs ) { return mem.data[lhs].TimeFree() < mem.data[rhs].TimeFree(); } ); ppqsort::sort( ppqsort::execution::par, mem.frees.begin(), mem.frees.end(), [&mem] ( const auto& lhs, const auto& rhs ) { return mem.data[lhs].TimeFree() < mem.data[rhs].TimeFree(); } );
#endif #endif
const auto psz = mem.data.size() + mem.frees.size() + 1; const auto psz = mem.data.size() + mem.frees.size() + 1;
@ -7962,10 +7964,10 @@ void Worker::Write( FileWrite& f, bool fiDict )
} }
if( m_inconsistentSamples ) if( m_inconsistentSamples )
{ {
#ifdef NO_PARALLEL_SORT #ifdef __EMSCRIPTEN__
pdqsort_branchless( thread->samples.begin(), thread->samples.end(), [] ( const auto& lhs, const auto& rhs ) { return lhs.time.Val() < rhs.time.Val(); } ); pdqsort_branchless( thread->samples.begin(), thread->samples.end(), [] ( const auto& lhs, const auto& rhs ) { return lhs.time.Val() < rhs.time.Val(); } );
#else #else
std::sort( std::execution::par_unseq, thread->samples.begin(), thread->samples.end(), [] ( const auto& lhs, const auto& rhs ) { return lhs.time.Val() < rhs.time.Val(); } ); ppqsort::sort( ppqsort::execution::par, thread->samples.begin(), thread->samples.end(), [] ( const auto& lhs, const auto& rhs ) { return lhs.time.Val() < rhs.time.Val(); } );
#endif #endif
} }
sz = thread->samples.size(); sz = thread->samples.size();

View File

@ -2,7 +2,6 @@ cmake_minimum_required(VERSION 3.16)
option(NO_ISA_EXTENSIONS "Disable ISA extensions (don't pass -march=native or -mcpu=native to the compiler)" OFF) option(NO_ISA_EXTENSIONS "Disable ISA extensions (don't pass -march=native or -mcpu=native to the compiler)" OFF)
option(NO_STATISTICS "Disable calculation of statistics" ON) option(NO_STATISTICS "Disable calculation of statistics" ON)
option(NO_PARALLEL_STL "Disable parallel STL" OFF)
include(${CMAKE_CURRENT_LIST_DIR}/../cmake/version.cmake) include(${CMAKE_CURRENT_LIST_DIR}/../cmake/version.cmake)