Replace parallel STL with PPQSort.

PPQSort is supposedly quite fast: https://github.com/GabTux/PPQSort

More importantly, it does not depend on TBB fuckery, so there's no longer
a need to link with an external library that people may or may not have.

The NO_PARALLEL_STL option is out, as it was provided solely to deal with
TBB being not available. Sequential sorting is still used on emscripten.
This commit is contained in:
Bartosz Taudul 2024-09-26 14:38:11 +02:00
parent d400183483
commit 1c1faeff2d
No known key found for this signature in database
GPG Key ID: B7FE2008B7575DF3
22 changed files with 42 additions and 71 deletions

View File

@ -12,7 +12,7 @@ jobs:
container: archlinux:base-devel
steps:
- name: Install dependencies
run: pacman -Syu --noconfirm && pacman -S --noconfirm --needed freetype2 tbb debuginfod wayland dbus libxkbcommon libglvnd meson cmake git wayland-protocols nodejs
run: pacman -Syu --noconfirm && pacman -S --noconfirm --needed freetype2 debuginfod wayland dbus libxkbcommon libglvnd meson cmake git wayland-protocols nodejs
- name: Trust git repo
run: git config --global --add safe.directory '*'
- uses: actions/checkout@v4

View File

@ -2,7 +2,6 @@ cmake_minimum_required(VERSION 3.16)
option(NO_ISA_EXTENSIONS "Disable ISA extensions (don't pass -march=native or -mcpu=native to the compiler)" OFF)
option(NO_STATISTICS "Disable calculation of statistics" ON)
option(NO_PARALLEL_STL "Disable parallel STL" OFF)
include(${CMAKE_CURRENT_LIST_DIR}/../cmake/version.cmake)

View File

@ -29,11 +29,7 @@ list(TRANSFORM TRACY_SERVER_SOURCES PREPEND "${TRACY_SERVER_DIR}/")
add_library(TracyServer STATIC EXCLUDE_FROM_ALL ${TRACY_COMMON_SOURCES} ${TRACY_SERVER_SOURCES})
target_include_directories(TracyServer PUBLIC ${TRACY_COMMON_DIR} ${TRACY_SERVER_DIR})
target_link_libraries(TracyServer PUBLIC TracyCapstone TracyZstd)
target_link_libraries(TracyServer PUBLIC TracyCapstone TracyZstd PPQSort::PPQSort)
if(NO_STATISTICS)
target_compile_definitions(TracyServer PUBLIC TRACY_NO_STATISTICS)
endif()
if(NOT NO_PARALLEL_STL AND UNIX AND NOT APPLE AND NOT EMSCRIPTEN)
target_link_libraries(TracyServer PRIVATE TracyTbb)
endif()

View File

@ -214,32 +214,11 @@ if (NOT NO_FILESELECTOR AND NOT EMSCRIPTEN)
endif()
endif()
# TBB
if (NO_PARALLEL_STL)
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -DNO_PARALLEL_SORT")
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -DNO_PARALLEL_SORT")
else()
if (UNIX AND NOT APPLE AND NOT EMSCRIPTEN)
# Tracy does not use TBB directly, but the implementation of parallel algorithms
# in some versions of libstdc++ depends on TBB. When it does, you must
# explicitly link against -ltbb.
#
# Some distributions have pgk-config files for TBB, others don't.
# PPQSort
pkg_check_modules(TBB tbb)
if (TBB_FOUND)
add_library(TracyTbb INTERFACE)
target_include_directories(TracyTbb INTERFACE ${TBB_INCLUDE_DIRS})
target_link_libraries(TracyTbb INTERFACE ${TBB_LINK_LIBRARIES})
else()
CPMAddPackage(
NAME tbb
GITHUB_REPOSITORY oneapi-src/oneTBB
GIT_TAG v2021.12.0-rc2
OPTIONS "TBB_TEST OFF"
)
add_library(TracyTbb INTERFACE)
target_link_libraries(TracyTbb INTERFACE tbb)
endif()
endif()
endif()
CPMAddPackage(
NAME PPQSort
GITHUB_REPOSITORY GabTux/PPQSort
VERSION 1.0.3
EXCLUDE_FROM_ALL TRUE
)

View File

@ -1,7 +1,6 @@
cmake_minimum_required(VERSION 3.16)
option(NO_ISA_EXTENSIONS "Disable ISA extensions (don't pass -march=native or -mcpu=native to the compiler)" OFF)
option(NO_PARALLEL_STL "Disable parallel STL" OFF)
set(NO_STATISTICS OFF)

View File

@ -2,7 +2,6 @@ cmake_minimum_required(VERSION 3.16)
option(NO_ISA_EXTENSIONS "Disable ISA extensions (don't pass -march=native or -mcpu=native to the compiler)" OFF)
option(NO_STATISTICS "Disable calculation of statistics" ON)
option(NO_PARALLEL_STL "Disable parallel STL" OFF)
include(${CMAKE_CURRENT_LIST_DIR}/../cmake/version.cmake)

View File

@ -837,8 +837,6 @@ There is no need to install external libraries (e.g. with vcpkg). All libraries
On Unix systems (including Linux), you will need to install the \texttt{pkg-config} utility to provide information about libraries.
Due to some questionable design decisions by the compiler developers, you will most likely also need the \texttt{tbb} library\footnote{Technically, this is not a Tracy dependency, but rather a \texttt{libstdc++} dependency, but it may still not be installed by default.}. If not found, this library is downloaded automatically.
Installation of the libraries on OSX can be facilitated using the \texttt{brew} package manager.
\paragraph{Linux}

View File

@ -6,7 +6,6 @@ option(LEGACY "Instead of Wayland, use the legacy X11 backend on Linux" OFF)
option(NO_ISA_EXTENSIONS "Disable ISA extensions (don't pass -march=native or -mcpu=native to the compiler)" OFF)
option(NO_STATISTICS "Disable calculation of statistics" OFF)
option(SELF_PROFILE "Enable self-profiling" OFF)
option(NO_PARALLEL_STL "Disable parallel STL" OFF)
include(${CMAKE_CURRENT_LIST_DIR}/../cmake/version.cmake)

View File

@ -13,11 +13,11 @@
#include "TracyImGui.hpp"
#include "TracyMicroArchitecture.hpp"
#include "TracyPrint.hpp"
#include "TracySort.hpp"
#include "TracySourceView.hpp"
#include "TracyUtility.hpp"
#include "TracyView.hpp"
#include "TracyWorker.hpp"
#include "tracy_pdqsort.h"
#include "IconsFontAwesome6.h"

View File

@ -5,6 +5,7 @@
#include "TracyUtility.hpp"
#include "TracyView.hpp"
#include "TracyWorker.hpp"
#include "tracy_pdqsort.h"
namespace tracy
{

View File

@ -1,6 +1,7 @@
#include "TracyImGui.hpp"
#include "TracyPrint.hpp"
#include "TracyView.hpp"
#include "tracy_pdqsort.h"
namespace tracy
{

View File

@ -6,6 +6,7 @@
#include "TracyTimelineContext.hpp"
#include "TracyTimelineDraw.hpp"
#include "TracyView.hpp"
#include "tracy_pdqsort.h"
namespace tracy
{

View File

@ -8,6 +8,7 @@
#include "TracyTimelineItem.hpp"
#include "TracyTimelineContext.hpp"
#include "TracyView.hpp"
#include "tracy_pdqsort.h"
constexpr float MinVisSize = 3;

View File

@ -7,7 +7,9 @@
#include "TracyImGui.hpp"
#include "TracyMouse.hpp"
#include "TracyPrint.hpp"
#include "TracySort.hpp"
#include "TracyView.hpp"
#include "tracy_pdqsort.h"
namespace tracy
{
@ -507,10 +509,10 @@ void View::DrawFindZone()
}
}
auto mid = vec.begin() + vszorig;
#ifdef NO_PARALLEL_SORT
#ifdef __EMSCRIPTEN__
pdqsort_branchless( mid, vec.end() );
#else
std::sort( std::execution::par_unseq, mid, vec.end() );
ppqsort::sort( ppqsort::execution::par, mid, vec.end() );
#endif
std::inplace_merge( vec.begin(), mid, vec.end() );

View File

@ -1,6 +1,7 @@
#include "TracyImGui.hpp"
#include "TracyPrint.hpp"
#include "TracyView.hpp"
#include "tracy_pdqsort.h"
namespace tracy
{

View File

@ -4,6 +4,7 @@
#include "TracyMouse.hpp"
#include "TracyPrint.hpp"
#include "TracyView.hpp"
#include "tracy_pdqsort.h"
namespace tracy
{

View File

@ -9,6 +9,7 @@
#include "TracyTimelineContext.hpp"
#include "TracyTimelineDraw.hpp"
#include "TracyView.hpp"
#include "tracy_pdqsort.h"
namespace tracy
{

View File

@ -5,6 +5,7 @@
#include "TracyPrint.hpp"
#include "TracyMouse.hpp"
#include "TracyView.hpp"
#include "tracy_pdqsort.h"
namespace tracy
{

View File

@ -1,19 +1,10 @@
#ifndef __TRACYSORT_HPP__
#define __TRACYSORT_HPP__
#ifndef NO_PARALLEL_SORT
# if !defined __APPLE__ && !defined __EMSCRIPTEN__ && ( ( defined _MSC_VER && _MSVC_LANG >= 201703L ) || __cplusplus >= 201703L )
# if __has_include(<execution>)
# include <algorithm>
# include <execution>
# else
# define NO_PARALLEL_SORT
# endif
# else
# define NO_PARALLEL_SORT
# endif
#ifdef __EMSCRIPTEN__
# include "tracy_pdqsort.h"
#else
# include <ppqsort.h>
#endif
#include "tracy_pdqsort.h"
#endif

View File

@ -102,10 +102,10 @@ public:
const auto se = sb + sortedEnd;
const auto sl = se - 1;
const auto ue = v.end();
#ifdef NO_PARALLEL_SORT
pdqsort_branchless( se, ue, comp );
#ifdef __EMSCRIPTEN__
pdqsort_branchless( sb, se, comp );
#else
std::sort( std::execution::par_unseq, se, ue, comp );
ppqsort::sort( ppqsort::execution::par, sb, se, comp );
#endif
const auto ss = std::lower_bound( sb, se, *se, comp );
const auto uu = std::lower_bound( se, ue, *sl, comp );

View File

@ -37,6 +37,7 @@
#include "TracySort.hpp"
#include "TracyTaskDispatch.hpp"
#include "TracyWorker.hpp"
#include "tracy_pdqsort.h"
namespace tracy
{
@ -1552,12 +1553,13 @@ Worker::Worker( FileRead& f, EventType::Type eventMask, bool bgTasks, bool allow
m_data.symbolLoc[symIdx++] = SymbolLocation { symAddr, size.Val() };
}
}
#ifdef NO_PARALLEL_SORT
#ifdef __EMSCRIPTEN__
pdqsort_branchless( m_data.symbolLoc.begin(), m_data.symbolLoc.end(), [] ( const auto& l, const auto& r ) { return l.addr < r.addr; } );
pdqsort_branchless( m_data.symbolLocInline.begin(), m_data.symbolLocInline.end() );
#else
std::sort( std::execution::par_unseq, m_data.symbolLoc.begin(), m_data.symbolLoc.end(), [] ( const auto& l, const auto& r ) { return l.addr < r.addr; } );
std::sort( std::execution::par_unseq, m_data.symbolLocInline.begin(), m_data.symbolLocInline.end() );
ppqsort::sort( ppqsort::execution::par, m_data.symbolLoc.begin(), m_data.symbolLoc.end(), [] ( const auto& l, const auto& r ) { return l.addr < r.addr; } );
ppqsort::sort( ppqsort::execution::par, m_data.symbolLocInline.begin(), m_data.symbolLocInline.end() );
#endif
f.Read( sz );
@ -4155,10 +4157,10 @@ void Worker::DoPostponedSymbols()
{
if( m_data.newSymbolsIndex >= 0 )
{
#ifdef NO_PARALLEL_SORT
#ifdef __EMSCRIPTEN__
pdqsort_branchless( m_data.symbolLoc.begin() + m_data.newSymbolsIndex, m_data.symbolLoc.end(), [] ( const auto& l, const auto& r ) { return l.addr < r.addr; } );
#else
std::sort( std::execution::par_unseq, m_data.symbolLoc.begin() + m_data.newSymbolsIndex, m_data.symbolLoc.end(), [] ( const auto& l, const auto& r ) { return l.addr < r.addr; } );
ppqsort::sort( ppqsort::execution::par, m_data.symbolLoc.begin() + m_data.newSymbolsIndex, m_data.symbolLoc.end(), [] ( const auto& l, const auto& r ) { return l.addr < r.addr; } );
#endif
const auto ms = std::lower_bound( m_data.symbolLoc.begin(), m_data.symbolLoc.begin() + m_data.newSymbolsIndex, m_data.symbolLoc[m_data.newSymbolsIndex], [] ( const auto& l, const auto& r ) { return l.addr < r.addr; } );
std::inplace_merge( ms, m_data.symbolLoc.begin() + m_data.newSymbolsIndex, m_data.symbolLoc.end(), [] ( const auto& l, const auto& r ) { return l.addr < r.addr; } );
@ -4170,10 +4172,10 @@ void Worker::DoPostponedInlineSymbols()
{
if( m_data.newInlineSymbolsIndex >= 0 )
{
#ifdef NO_PARALLEL_SORT
#ifdef __EMSCRIPTEN__
pdqsort_branchless( m_data.symbolLocInline.begin() + m_data.newInlineSymbolsIndex, m_data.symbolLocInline.end() );
#else
std::sort( std::execution::par_unseq, m_data.symbolLocInline.begin() + m_data.newInlineSymbolsIndex, m_data.symbolLocInline.end() );
ppqsort::sort( ppqsort::execution::par, m_data.symbolLocInline.begin() + m_data.newInlineSymbolsIndex, m_data.symbolLocInline.end() );
#endif
const auto ms = std::lower_bound( m_data.symbolLocInline.begin(), m_data.symbolLocInline.begin() + m_data.newInlineSymbolsIndex, m_data.symbolLocInline[m_data.newInlineSymbolsIndex] );
std::inplace_merge( ms, m_data.symbolLocInline.begin() + m_data.newInlineSymbolsIndex, m_data.symbolLocInline.end() );
@ -7017,10 +7019,10 @@ void Worker::CreateMemAllocPlot( MemData& memdata )
void Worker::ReconstructMemAllocPlot( MemData& mem )
{
#ifdef NO_PARALLEL_SORT
#ifdef __EMSCRIPTEN__
pdqsort_branchless( mem.frees.begin(), mem.frees.end(), [&mem] ( const auto& lhs, const auto& rhs ) { return mem.data[lhs].TimeFree() < mem.data[rhs].TimeFree(); } );
#else
std::sort( std::execution::par_unseq, mem.frees.begin(), mem.frees.end(), [&mem] ( const auto& lhs, const auto& rhs ) { return mem.data[lhs].TimeFree() < mem.data[rhs].TimeFree(); } );
ppqsort::sort( ppqsort::execution::par, mem.frees.begin(), mem.frees.end(), [&mem] ( const auto& lhs, const auto& rhs ) { return mem.data[lhs].TimeFree() < mem.data[rhs].TimeFree(); } );
#endif
const auto psz = mem.data.size() + mem.frees.size() + 1;
@ -7962,10 +7964,10 @@ void Worker::Write( FileWrite& f, bool fiDict )
}
if( m_inconsistentSamples )
{
#ifdef NO_PARALLEL_SORT
#ifdef __EMSCRIPTEN__
pdqsort_branchless( thread->samples.begin(), thread->samples.end(), [] ( const auto& lhs, const auto& rhs ) { return lhs.time.Val() < rhs.time.Val(); } );
#else
std::sort( std::execution::par_unseq, thread->samples.begin(), thread->samples.end(), [] ( const auto& lhs, const auto& rhs ) { return lhs.time.Val() < rhs.time.Val(); } );
ppqsort::sort( ppqsort::execution::par, thread->samples.begin(), thread->samples.end(), [] ( const auto& lhs, const auto& rhs ) { return lhs.time.Val() < rhs.time.Val(); } );
#endif
}
sz = thread->samples.size();

View File

@ -2,7 +2,6 @@ cmake_minimum_required(VERSION 3.16)
option(NO_ISA_EXTENSIONS "Disable ISA extensions (don't pass -march=native or -mcpu=native to the compiler)" OFF)
option(NO_STATISTICS "Disable calculation of statistics" ON)
option(NO_PARALLEL_STL "Disable parallel STL" OFF)
include(${CMAKE_CURRENT_LIST_DIR}/../cmake/version.cmake)