mirror of
https://github.com/wolfpld/tracy.git
synced 2024-11-24 15:14:34 +00:00
Replace parallel STL with PPQSort.
PPQSort is supposedly quite fast: https://github.com/GabTux/PPQSort More importantly, it does not depend on TBB fuckery, so there's no longer a need to link with an external library that people may or may not have. The NO_PARALLEL_STL option is out, as it was provided solely to deal with TBB being not available. Sequential sorting is still used on emscripten.
This commit is contained in:
parent
d400183483
commit
1c1faeff2d
2
.github/workflows/linux.yml
vendored
2
.github/workflows/linux.yml
vendored
@ -12,7 +12,7 @@ jobs:
|
||||
container: archlinux:base-devel
|
||||
steps:
|
||||
- name: Install dependencies
|
||||
run: pacman -Syu --noconfirm && pacman -S --noconfirm --needed freetype2 tbb debuginfod wayland dbus libxkbcommon libglvnd meson cmake git wayland-protocols nodejs
|
||||
run: pacman -Syu --noconfirm && pacman -S --noconfirm --needed freetype2 debuginfod wayland dbus libxkbcommon libglvnd meson cmake git wayland-protocols nodejs
|
||||
- name: Trust git repo
|
||||
run: git config --global --add safe.directory '*'
|
||||
- uses: actions/checkout@v4
|
||||
|
@ -2,7 +2,6 @@ cmake_minimum_required(VERSION 3.16)
|
||||
|
||||
option(NO_ISA_EXTENSIONS "Disable ISA extensions (don't pass -march=native or -mcpu=native to the compiler)" OFF)
|
||||
option(NO_STATISTICS "Disable calculation of statistics" ON)
|
||||
option(NO_PARALLEL_STL "Disable parallel STL" OFF)
|
||||
|
||||
include(${CMAKE_CURRENT_LIST_DIR}/../cmake/version.cmake)
|
||||
|
||||
|
@ -29,11 +29,7 @@ list(TRANSFORM TRACY_SERVER_SOURCES PREPEND "${TRACY_SERVER_DIR}/")
|
||||
|
||||
add_library(TracyServer STATIC EXCLUDE_FROM_ALL ${TRACY_COMMON_SOURCES} ${TRACY_SERVER_SOURCES})
|
||||
target_include_directories(TracyServer PUBLIC ${TRACY_COMMON_DIR} ${TRACY_SERVER_DIR})
|
||||
target_link_libraries(TracyServer PUBLIC TracyCapstone TracyZstd)
|
||||
target_link_libraries(TracyServer PUBLIC TracyCapstone TracyZstd PPQSort::PPQSort)
|
||||
if(NO_STATISTICS)
|
||||
target_compile_definitions(TracyServer PUBLIC TRACY_NO_STATISTICS)
|
||||
endif()
|
||||
|
||||
if(NOT NO_PARALLEL_STL AND UNIX AND NOT APPLE AND NOT EMSCRIPTEN)
|
||||
target_link_libraries(TracyServer PRIVATE TracyTbb)
|
||||
endif()
|
||||
|
@ -214,32 +214,11 @@ if (NOT NO_FILESELECTOR AND NOT EMSCRIPTEN)
|
||||
endif()
|
||||
endif()
|
||||
|
||||
# TBB
|
||||
if (NO_PARALLEL_STL)
|
||||
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -DNO_PARALLEL_SORT")
|
||||
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -DNO_PARALLEL_SORT")
|
||||
else()
|
||||
if (UNIX AND NOT APPLE AND NOT EMSCRIPTEN)
|
||||
# Tracy does not use TBB directly, but the implementation of parallel algorithms
|
||||
# in some versions of libstdc++ depends on TBB. When it does, you must
|
||||
# explicitly link against -ltbb.
|
||||
#
|
||||
# Some distributions have pgk-config files for TBB, others don't.
|
||||
# PPQSort
|
||||
|
||||
pkg_check_modules(TBB tbb)
|
||||
if (TBB_FOUND)
|
||||
add_library(TracyTbb INTERFACE)
|
||||
target_include_directories(TracyTbb INTERFACE ${TBB_INCLUDE_DIRS})
|
||||
target_link_libraries(TracyTbb INTERFACE ${TBB_LINK_LIBRARIES})
|
||||
else()
|
||||
CPMAddPackage(
|
||||
NAME tbb
|
||||
GITHUB_REPOSITORY oneapi-src/oneTBB
|
||||
GIT_TAG v2021.12.0-rc2
|
||||
OPTIONS "TBB_TEST OFF"
|
||||
)
|
||||
add_library(TracyTbb INTERFACE)
|
||||
target_link_libraries(TracyTbb INTERFACE tbb)
|
||||
endif()
|
||||
endif()
|
||||
endif()
|
||||
CPMAddPackage(
|
||||
NAME PPQSort
|
||||
GITHUB_REPOSITORY GabTux/PPQSort
|
||||
VERSION 1.0.3
|
||||
EXCLUDE_FROM_ALL TRUE
|
||||
)
|
||||
|
@ -1,7 +1,6 @@
|
||||
cmake_minimum_required(VERSION 3.16)
|
||||
|
||||
option(NO_ISA_EXTENSIONS "Disable ISA extensions (don't pass -march=native or -mcpu=native to the compiler)" OFF)
|
||||
option(NO_PARALLEL_STL "Disable parallel STL" OFF)
|
||||
|
||||
set(NO_STATISTICS OFF)
|
||||
|
||||
|
@ -2,7 +2,6 @@ cmake_minimum_required(VERSION 3.16)
|
||||
|
||||
option(NO_ISA_EXTENSIONS "Disable ISA extensions (don't pass -march=native or -mcpu=native to the compiler)" OFF)
|
||||
option(NO_STATISTICS "Disable calculation of statistics" ON)
|
||||
option(NO_PARALLEL_STL "Disable parallel STL" OFF)
|
||||
|
||||
include(${CMAKE_CURRENT_LIST_DIR}/../cmake/version.cmake)
|
||||
|
||||
|
@ -837,8 +837,6 @@ There is no need to install external libraries (e.g. with vcpkg). All libraries
|
||||
|
||||
On Unix systems (including Linux), you will need to install the \texttt{pkg-config} utility to provide information about libraries.
|
||||
|
||||
Due to some questionable design decisions by the compiler developers, you will most likely also need the \texttt{tbb} library\footnote{Technically, this is not a Tracy dependency, but rather a \texttt{libstdc++} dependency, but it may still not be installed by default.}. If not found, this library is downloaded automatically.
|
||||
|
||||
Installation of the libraries on OSX can be facilitated using the \texttt{brew} package manager.
|
||||
|
||||
\paragraph{Linux}
|
||||
|
@ -6,7 +6,6 @@ option(LEGACY "Instead of Wayland, use the legacy X11 backend on Linux" OFF)
|
||||
option(NO_ISA_EXTENSIONS "Disable ISA extensions (don't pass -march=native or -mcpu=native to the compiler)" OFF)
|
||||
option(NO_STATISTICS "Disable calculation of statistics" OFF)
|
||||
option(SELF_PROFILE "Enable self-profiling" OFF)
|
||||
option(NO_PARALLEL_STL "Disable parallel STL" OFF)
|
||||
|
||||
include(${CMAKE_CURRENT_LIST_DIR}/../cmake/version.cmake)
|
||||
|
||||
|
@ -13,11 +13,11 @@
|
||||
#include "TracyImGui.hpp"
|
||||
#include "TracyMicroArchitecture.hpp"
|
||||
#include "TracyPrint.hpp"
|
||||
#include "TracySort.hpp"
|
||||
#include "TracySourceView.hpp"
|
||||
#include "TracyUtility.hpp"
|
||||
#include "TracyView.hpp"
|
||||
#include "TracyWorker.hpp"
|
||||
#include "tracy_pdqsort.h"
|
||||
|
||||
#include "IconsFontAwesome6.h"
|
||||
|
||||
|
@ -5,6 +5,7 @@
|
||||
#include "TracyUtility.hpp"
|
||||
#include "TracyView.hpp"
|
||||
#include "TracyWorker.hpp"
|
||||
#include "tracy_pdqsort.h"
|
||||
|
||||
namespace tracy
|
||||
{
|
||||
|
@ -1,6 +1,7 @@
|
||||
#include "TracyImGui.hpp"
|
||||
#include "TracyPrint.hpp"
|
||||
#include "TracyView.hpp"
|
||||
#include "tracy_pdqsort.h"
|
||||
|
||||
namespace tracy
|
||||
{
|
||||
|
@ -6,6 +6,7 @@
|
||||
#include "TracyTimelineContext.hpp"
|
||||
#include "TracyTimelineDraw.hpp"
|
||||
#include "TracyView.hpp"
|
||||
#include "tracy_pdqsort.h"
|
||||
|
||||
namespace tracy
|
||||
{
|
||||
|
@ -8,6 +8,7 @@
|
||||
#include "TracyTimelineItem.hpp"
|
||||
#include "TracyTimelineContext.hpp"
|
||||
#include "TracyView.hpp"
|
||||
#include "tracy_pdqsort.h"
|
||||
|
||||
constexpr float MinVisSize = 3;
|
||||
|
||||
|
@ -7,7 +7,9 @@
|
||||
#include "TracyImGui.hpp"
|
||||
#include "TracyMouse.hpp"
|
||||
#include "TracyPrint.hpp"
|
||||
#include "TracySort.hpp"
|
||||
#include "TracyView.hpp"
|
||||
#include "tracy_pdqsort.h"
|
||||
|
||||
namespace tracy
|
||||
{
|
||||
@ -507,10 +509,10 @@ void View::DrawFindZone()
|
||||
}
|
||||
}
|
||||
auto mid = vec.begin() + vszorig;
|
||||
#ifdef NO_PARALLEL_SORT
|
||||
#ifdef __EMSCRIPTEN__
|
||||
pdqsort_branchless( mid, vec.end() );
|
||||
#else
|
||||
std::sort( std::execution::par_unseq, mid, vec.end() );
|
||||
ppqsort::sort( ppqsort::execution::par, mid, vec.end() );
|
||||
#endif
|
||||
std::inplace_merge( vec.begin(), mid, vec.end() );
|
||||
|
||||
|
@ -1,6 +1,7 @@
|
||||
#include "TracyImGui.hpp"
|
||||
#include "TracyPrint.hpp"
|
||||
#include "TracyView.hpp"
|
||||
#include "tracy_pdqsort.h"
|
||||
|
||||
namespace tracy
|
||||
{
|
||||
|
@ -4,6 +4,7 @@
|
||||
#include "TracyMouse.hpp"
|
||||
#include "TracyPrint.hpp"
|
||||
#include "TracyView.hpp"
|
||||
#include "tracy_pdqsort.h"
|
||||
|
||||
namespace tracy
|
||||
{
|
||||
|
@ -9,6 +9,7 @@
|
||||
#include "TracyTimelineContext.hpp"
|
||||
#include "TracyTimelineDraw.hpp"
|
||||
#include "TracyView.hpp"
|
||||
#include "tracy_pdqsort.h"
|
||||
|
||||
namespace tracy
|
||||
{
|
||||
|
@ -5,6 +5,7 @@
|
||||
#include "TracyPrint.hpp"
|
||||
#include "TracyMouse.hpp"
|
||||
#include "TracyView.hpp"
|
||||
#include "tracy_pdqsort.h"
|
||||
|
||||
namespace tracy
|
||||
{
|
||||
|
@ -1,19 +1,10 @@
|
||||
#ifndef __TRACYSORT_HPP__
|
||||
#define __TRACYSORT_HPP__
|
||||
|
||||
#ifndef NO_PARALLEL_SORT
|
||||
# if !defined __APPLE__ && !defined __EMSCRIPTEN__ && ( ( defined _MSC_VER && _MSVC_LANG >= 201703L ) || __cplusplus >= 201703L )
|
||||
# if __has_include(<execution>)
|
||||
# include <algorithm>
|
||||
# include <execution>
|
||||
# else
|
||||
# define NO_PARALLEL_SORT
|
||||
# endif
|
||||
# else
|
||||
# define NO_PARALLEL_SORT
|
||||
# endif
|
||||
#ifdef __EMSCRIPTEN__
|
||||
# include "tracy_pdqsort.h"
|
||||
#else
|
||||
# include <ppqsort.h>
|
||||
#endif
|
||||
|
||||
#include "tracy_pdqsort.h"
|
||||
|
||||
#endif
|
||||
|
@ -102,10 +102,10 @@ public:
|
||||
const auto se = sb + sortedEnd;
|
||||
const auto sl = se - 1;
|
||||
const auto ue = v.end();
|
||||
#ifdef NO_PARALLEL_SORT
|
||||
pdqsort_branchless( se, ue, comp );
|
||||
#ifdef __EMSCRIPTEN__
|
||||
pdqsort_branchless( sb, se, comp );
|
||||
#else
|
||||
std::sort( std::execution::par_unseq, se, ue, comp );
|
||||
ppqsort::sort( ppqsort::execution::par, sb, se, comp );
|
||||
#endif
|
||||
const auto ss = std::lower_bound( sb, se, *se, comp );
|
||||
const auto uu = std::lower_bound( se, ue, *sl, comp );
|
||||
|
@ -37,6 +37,7 @@
|
||||
#include "TracySort.hpp"
|
||||
#include "TracyTaskDispatch.hpp"
|
||||
#include "TracyWorker.hpp"
|
||||
#include "tracy_pdqsort.h"
|
||||
|
||||
namespace tracy
|
||||
{
|
||||
@ -1552,12 +1553,13 @@ Worker::Worker( FileRead& f, EventType::Type eventMask, bool bgTasks, bool allow
|
||||
m_data.symbolLoc[symIdx++] = SymbolLocation { symAddr, size.Val() };
|
||||
}
|
||||
}
|
||||
#ifdef NO_PARALLEL_SORT
|
||||
|
||||
#ifdef __EMSCRIPTEN__
|
||||
pdqsort_branchless( m_data.symbolLoc.begin(), m_data.symbolLoc.end(), [] ( const auto& l, const auto& r ) { return l.addr < r.addr; } );
|
||||
pdqsort_branchless( m_data.symbolLocInline.begin(), m_data.symbolLocInline.end() );
|
||||
#else
|
||||
std::sort( std::execution::par_unseq, m_data.symbolLoc.begin(), m_data.symbolLoc.end(), [] ( const auto& l, const auto& r ) { return l.addr < r.addr; } );
|
||||
std::sort( std::execution::par_unseq, m_data.symbolLocInline.begin(), m_data.symbolLocInline.end() );
|
||||
ppqsort::sort( ppqsort::execution::par, m_data.symbolLoc.begin(), m_data.symbolLoc.end(), [] ( const auto& l, const auto& r ) { return l.addr < r.addr; } );
|
||||
ppqsort::sort( ppqsort::execution::par, m_data.symbolLocInline.begin(), m_data.symbolLocInline.end() );
|
||||
#endif
|
||||
|
||||
f.Read( sz );
|
||||
@ -4155,10 +4157,10 @@ void Worker::DoPostponedSymbols()
|
||||
{
|
||||
if( m_data.newSymbolsIndex >= 0 )
|
||||
{
|
||||
#ifdef NO_PARALLEL_SORT
|
||||
#ifdef __EMSCRIPTEN__
|
||||
pdqsort_branchless( m_data.symbolLoc.begin() + m_data.newSymbolsIndex, m_data.symbolLoc.end(), [] ( const auto& l, const auto& r ) { return l.addr < r.addr; } );
|
||||
#else
|
||||
std::sort( std::execution::par_unseq, m_data.symbolLoc.begin() + m_data.newSymbolsIndex, m_data.symbolLoc.end(), [] ( const auto& l, const auto& r ) { return l.addr < r.addr; } );
|
||||
ppqsort::sort( ppqsort::execution::par, m_data.symbolLoc.begin() + m_data.newSymbolsIndex, m_data.symbolLoc.end(), [] ( const auto& l, const auto& r ) { return l.addr < r.addr; } );
|
||||
#endif
|
||||
const auto ms = std::lower_bound( m_data.symbolLoc.begin(), m_data.symbolLoc.begin() + m_data.newSymbolsIndex, m_data.symbolLoc[m_data.newSymbolsIndex], [] ( const auto& l, const auto& r ) { return l.addr < r.addr; } );
|
||||
std::inplace_merge( ms, m_data.symbolLoc.begin() + m_data.newSymbolsIndex, m_data.symbolLoc.end(), [] ( const auto& l, const auto& r ) { return l.addr < r.addr; } );
|
||||
@ -4170,10 +4172,10 @@ void Worker::DoPostponedInlineSymbols()
|
||||
{
|
||||
if( m_data.newInlineSymbolsIndex >= 0 )
|
||||
{
|
||||
#ifdef NO_PARALLEL_SORT
|
||||
#ifdef __EMSCRIPTEN__
|
||||
pdqsort_branchless( m_data.symbolLocInline.begin() + m_data.newInlineSymbolsIndex, m_data.symbolLocInline.end() );
|
||||
#else
|
||||
std::sort( std::execution::par_unseq, m_data.symbolLocInline.begin() + m_data.newInlineSymbolsIndex, m_data.symbolLocInline.end() );
|
||||
ppqsort::sort( ppqsort::execution::par, m_data.symbolLocInline.begin() + m_data.newInlineSymbolsIndex, m_data.symbolLocInline.end() );
|
||||
#endif
|
||||
const auto ms = std::lower_bound( m_data.symbolLocInline.begin(), m_data.symbolLocInline.begin() + m_data.newInlineSymbolsIndex, m_data.symbolLocInline[m_data.newInlineSymbolsIndex] );
|
||||
std::inplace_merge( ms, m_data.symbolLocInline.begin() + m_data.newInlineSymbolsIndex, m_data.symbolLocInline.end() );
|
||||
@ -7017,10 +7019,10 @@ void Worker::CreateMemAllocPlot( MemData& memdata )
|
||||
|
||||
void Worker::ReconstructMemAllocPlot( MemData& mem )
|
||||
{
|
||||
#ifdef NO_PARALLEL_SORT
|
||||
#ifdef __EMSCRIPTEN__
|
||||
pdqsort_branchless( mem.frees.begin(), mem.frees.end(), [&mem] ( const auto& lhs, const auto& rhs ) { return mem.data[lhs].TimeFree() < mem.data[rhs].TimeFree(); } );
|
||||
#else
|
||||
std::sort( std::execution::par_unseq, mem.frees.begin(), mem.frees.end(), [&mem] ( const auto& lhs, const auto& rhs ) { return mem.data[lhs].TimeFree() < mem.data[rhs].TimeFree(); } );
|
||||
ppqsort::sort( ppqsort::execution::par, mem.frees.begin(), mem.frees.end(), [&mem] ( const auto& lhs, const auto& rhs ) { return mem.data[lhs].TimeFree() < mem.data[rhs].TimeFree(); } );
|
||||
#endif
|
||||
|
||||
const auto psz = mem.data.size() + mem.frees.size() + 1;
|
||||
@ -7962,10 +7964,10 @@ void Worker::Write( FileWrite& f, bool fiDict )
|
||||
}
|
||||
if( m_inconsistentSamples )
|
||||
{
|
||||
#ifdef NO_PARALLEL_SORT
|
||||
#ifdef __EMSCRIPTEN__
|
||||
pdqsort_branchless( thread->samples.begin(), thread->samples.end(), [] ( const auto& lhs, const auto& rhs ) { return lhs.time.Val() < rhs.time.Val(); } );
|
||||
#else
|
||||
std::sort( std::execution::par_unseq, thread->samples.begin(), thread->samples.end(), [] ( const auto& lhs, const auto& rhs ) { return lhs.time.Val() < rhs.time.Val(); } );
|
||||
ppqsort::sort( ppqsort::execution::par, thread->samples.begin(), thread->samples.end(), [] ( const auto& lhs, const auto& rhs ) { return lhs.time.Val() < rhs.time.Val(); } );
|
||||
#endif
|
||||
}
|
||||
sz = thread->samples.size();
|
||||
|
@ -2,7 +2,6 @@ cmake_minimum_required(VERSION 3.16)
|
||||
|
||||
option(NO_ISA_EXTENSIONS "Disable ISA extensions (don't pass -march=native or -mcpu=native to the compiler)" OFF)
|
||||
option(NO_STATISTICS "Disable calculation of statistics" ON)
|
||||
option(NO_PARALLEL_STL "Disable parallel STL" OFF)
|
||||
|
||||
include(${CMAKE_CURRENT_LIST_DIR}/../cmake/version.cmake)
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user