Use the fastest mutex available.

The selection is based on the following test results:

MSVC:
=== Lock test, 6 threads ===
=> NonRecursiveBenaphore
     No contention: 11.641 ns/iter
     2 thread contention: 141.559 ns/iter
     3 thread contention: 242.733 ns/iter
     4 thread contention: 409.807 ns/iter
     5 thread contention: 561.544 ns/iter
     6 thread contention: 785.845 ns/iter
=> std::mutex
     No contention: 19.190 ns/iter
     2 thread contention: 39.305 ns/iter
     3 thread contention: 58.999 ns/iter
     4 thread contention: 59.532 ns/iter
     5 thread contention: 103.539 ns/iter
     6 thread contention: 110.314 ns/iter
=> std::shared_timed_mutex
     No contention: 45.487 ns/iter
     2 thread contention: 96.351 ns/iter
     3 thread contention: 142.871 ns/iter
     4 thread contention: 184.999 ns/iter
     5 thread contention: 336.608 ns/iter
     6 thread contention: 542.551 ns/iter
=> std::shared_mutex
     No contention: 10.861 ns/iter
     2 thread contention: 17.495 ns/iter
     3 thread contention: 31.126 ns/iter
     4 thread contention: 40.468 ns/iter
     5 thread contention: 15.677 ns/iter
     6 thread contention: 64.505 ns/iter

Cygwin (clang):
=== Lock test, 6 threads ===
=> NonRecursiveBenaphore
     No contention: 11.536 ns/iter
     2 thread contention: 121.082 ns/iter
     3 thread contention: 396.430 ns/iter
     4 thread contention: 672.555 ns/iter
     5 thread contention: 1327.761 ns/iter
     6 thread contention: 14151.955 ns/iter
=> std::mutex
     No contention: 62.583 ns/iter
     2 thread contention: 3990.464 ns/iter
     3 thread contention: 7161.189 ns/iter
     4 thread contention: 9870.820 ns/iter
     5 thread contention: 12355.178 ns/iter
     6 thread contention: 14694.903 ns/iter
=> std::shared_timed_mutex
     No contention: 91.687 ns/iter
     2 thread contention: 1115.037 ns/iter
     3 thread contention: 4183.792 ns/iter
     4 thread contention: 15283.491 ns/iter
     5 thread contention: 27812.477 ns/iter
     6 thread contention: 35028.140 ns/iter
=> std::shared_mutex
     No contention: 91.764 ns/iter
     2 thread contention: 1051.826 ns/iter
     3 thread contention: 5574.720 ns/iter
     4 thread contention: 15721.416 ns/iter
     5 thread contention: 27721.487 ns/iter
     6 thread contention: 35420.404 ns/iter

Linux (x64):
=== Lock test, 6 threads ===
=> NonRecursiveBenaphore
     No contention: 13.487 ns/iter
     2 thread contention: 210.317 ns/iter
     3 thread contention: 430.855 ns/iter
     4 thread contention: 510.533 ns/iter
     5 thread contention: 1003.609 ns/iter
     6 thread contention: 1787.683 ns/iter
=> std::mutex
     No contention: 12.403 ns/iter
     2 thread contention: 157.122 ns/iter
     3 thread contention: 186.791 ns/iter
     4 thread contention: 265.073 ns/iter
     5 thread contention: 283.778 ns/iter
     6 thread contention: 270.687 ns/iter
=> std::shared_timed_mutex
     No contention: 21.509 ns/iter
     2 thread contention: 150.179 ns/iter
     3 thread contention: 256.574 ns/iter
     4 thread contention: 415.351 ns/iter
     5 thread contention: 611.532 ns/iter
     6 thread contention: 944.695 ns/iter
=> std::shared_mutex
     No contention: 20.805 ns/iter
     2 thread contention: 157.034 ns/iter
     3 thread contention: 244.025 ns/iter
     4 thread contention: 406.269 ns/iter
     5 thread contention: 387.985 ns/iter
     6 thread contention: 468.550 ns/iter

Linux (arm64):
=== Lock test, 6 threads ===
=> NonRecursiveBenaphore
     No contention: 20.891 ns/iter
     2 thread contention: 211.037 ns/iter
     3 thread contention: 409.962 ns/iter
     4 thread contention: 657.441 ns/iter
     5 thread contention: 828.405 ns/iter
     6 thread contention: 1131.827 ns/iter
=> std::mutex
     No contention: 50.884 ns/iter
     2 thread contention: 103.620 ns/iter
     3 thread contention: 332.429 ns/iter
     4 thread contention: 620.802 ns/iter
     5 thread contention: 783.943 ns/iter
     6 thread contention: 834.002 ns/iter
=> std::shared_timed_mutex
     No contention: 64.948 ns/iter
     2 thread contention: 173.191 ns/iter
     3 thread contention: 490.352 ns/iter
     4 thread contention: 660.668 ns/iter
     5 thread contention: 1014.546 ns/iter
     6 thread contention: 1451.553 ns/iter
=> std::shared_mutex
     No contention: 64.521 ns/iter
     2 thread contention: 195.222 ns/iter
     3 thread contention: 490.819 ns/iter
     4 thread contention: 654.786 ns/iter
     5 thread contention: 955.759 ns/iter
     6 thread contention: 1282.544 ns/iter
This commit is contained in:
Bartosz Taudul 2018-07-14 00:39:01 +02:00
parent a26ab263dd
commit 561d2dc360
9 changed files with 56 additions and 19 deletions

View File

@ -472,7 +472,7 @@ void Profiler::ClearQueues( moodycamel::ConsumerToken& token )
for( size_t i=0; i<sz; i++ ) FreeAssociatedMemory( m_itemBuf[i] );
}
std::lock_guard<NonRecursiveBenaphore> lock( m_serialLock );
std::lock_guard<TracyMutex> lock( m_serialLock );
for( auto& v : m_serialDequeue ) FreeAssociatedMemory( v );
m_serialDequeue.clear();
@ -536,7 +536,7 @@ Profiler::DequeueStatus Profiler::Dequeue( moodycamel::ConsumerToken& token )
Profiler::DequeueStatus Profiler::DequeueSerial()
{
{
std::lock_guard<NonRecursiveBenaphore> lock( m_serialLock );
std::lock_guard<TracyMutex> lock( m_serialLock );
m_serialQueue.swap( m_serialDequeue );
}

View File

@ -10,10 +10,10 @@
#include "TracyCallstack.hpp"
#include "TracyFastVector.hpp"
#include "../common/tracy_lz4.hpp"
#include "../common/tracy_benaphore.h"
#include "../common/TracyQueue.hpp"
#include "../common/TracyAlign.hpp"
#include "../common/TracyAlloc.hpp"
#include "../common/TracyMutex.hpp"
#include "../common/TracySystem.hpp"
#if defined _MSC_VER || defined __CYGWIN__
@ -412,13 +412,13 @@ private:
char* m_lz4Buf;
FastVector<QueueItem> m_serialQueue, m_serialDequeue;
NonRecursiveBenaphore m_serialLock;
TracyMutex m_serialLock;
#ifdef TRACY_ON_DEMAND
std::atomic<bool> m_isConnected;
std::atomic<uint64_t> m_frameCount;
NonRecursiveBenaphore m_deferredLock;
TracyMutex m_deferredLock;
FastVector<QueueItem> m_deferredQueue;
#endif
};

33
common/TracyMutex.hpp Normal file
View File

@ -0,0 +1,33 @@
#ifndef __TRACYMUTEX_HPP__
#define __TRACYMUTEX_HPP__
#if defined _MSC_VER
# include <shared_mutex>
namespace tracy
{
using TracyMutex = std::shared_mutex;
}
#elif defined __CYGWIN__
#include "tracy_benaphore.h"
namespace tracy
{
using TracyMutex = NonRecursiveBenaphore;
}
#else
#include <mutex>
namespace tracy
{
using TracyMutex = std::mutex;
}
#endif
#endif

View File

@ -9,6 +9,7 @@
#include <stdlib.h>
#include <time.h>
#include "../common/TracyMutex.hpp"
#include "../common/TracySystem.hpp"
#include "tracy_pdqsort.h"
#include "TracyBadVersion.hpp"
@ -364,7 +365,7 @@ bool View::DrawImpl()
keepOpenPtr = &keepOpen;
}
std::lock_guard<NonRecursiveBenaphore> lock( m_worker.GetDataLock() );
std::lock_guard<TracyMutex> lock( m_worker.GetDataLock() );
char tmp[2048];
sprintf( tmp, "%s###Profiler", m_worker.GetCaptureName().c_str() );
ImGui::SetNextWindowSize( ImVec2( 1550, 800 ), ImGuiCond_FirstUseEver );
@ -437,7 +438,7 @@ void View::DrawConnection()
const auto cs = ty * 0.9f;
{
std::lock_guard<NonRecursiveBenaphore> lock( m_worker.GetMbpsDataLock() );
std::lock_guard<TracyMutex> lock( m_worker.GetMbpsDataLock() );
ImGui::Begin( m_worker.GetAddr().c_str(), nullptr, ImGuiWindowFlags_AlwaysAutoResize );
const auto& mbpsVector = m_worker.GetMbpsData();
const auto mbps = mbpsVector.back();
@ -461,7 +462,7 @@ void View::DrawConnection()
const auto wpos = ImGui::GetWindowPos() + ImGui::GetWindowContentRegionMin();
ImGui::GetWindowDrawList()->AddCircleFilled( wpos + ImVec2( 1 + cs * 0.5, 3 + ty * 0.5 ), cs * 0.5, m_worker.IsConnected() ? 0xFF2222CC : 0xFF444444, 10 );
std::lock_guard<NonRecursiveBenaphore> lock( m_worker.GetDataLock() );
std::lock_guard<TracyMutex> lock( m_worker.GetDataLock() );
{
const auto sz = m_worker.GetFrameCount();
if( sz > 1 )

View File

@ -8,7 +8,6 @@
#include <thread>
#include <vector>
#include "../common/tracy_benaphore.h"
#include "TracyVector.hpp"
#include "TracyWorker.hpp"
#include "tracy_flat_hash_map.hpp"

View File

@ -501,7 +501,7 @@ Worker::Worker( FileRead& f, EventType::Type eventMask )
std::sort( std::execution::par_unseq, zones.begin(), zones.end(), []( const auto& lhs, const auto& rhs ) { return lhs.zone->start < rhs.zone->start; } );
#endif
}
std::lock_guard<NonRecursiveBenaphore> lock( m_data.lock );
std::lock_guard<TracyMutex> lock( m_data.lock );
m_data.sourceLocationZonesReady = true;
} );
#endif
@ -1031,7 +1031,7 @@ void Worker::Exec()
const char* end = buf + sz;
{
std::lock_guard<NonRecursiveBenaphore> lock( m_data.lock );
std::lock_guard<TracyMutex> lock( m_data.lock );
while( ptr < end )
{
auto ev = (const QueueItem*)ptr;
@ -1049,7 +1049,7 @@ void Worker::Exec()
enum { MbpsUpdateTime = 200 };
if( td > MbpsUpdateTime )
{
std::lock_guard<NonRecursiveBenaphore> lock( m_mbpsData.lock );
std::lock_guard<TracyMutex> lock( m_mbpsData.lock );
m_mbpsData.mbps.erase( m_mbpsData.mbps.begin() );
m_mbpsData.mbps.emplace_back( bytes / ( td * 125.f ) );
m_mbpsData.compRatio = float( bytes ) / decBytes;
@ -2331,7 +2331,7 @@ void Worker::ReconstructMemAllocPlot()
PlotData* plot;
{
std::lock_guard<NonRecursiveBenaphore> lock( m_data.lock );
std::lock_guard<TracyMutex> lock( m_data.lock );
plot = m_slab.AllocInit<PlotData>();
}
@ -2413,7 +2413,7 @@ void Worker::ReconstructMemAllocPlot()
plot->min = 0;
plot->max = max;
std::lock_guard<NonRecursiveBenaphore> lock( m_data.lock );
std::lock_guard<TracyMutex> lock( m_data.lock );
m_data.plots.insert( m_data.plots.begin(), plot );
m_data.memory.plot = plot;
}

View File

@ -9,9 +9,9 @@
#include <thread>
#include <vector>
#include "../common/tracy_benaphore.h"
#include "../common/tracy_lz4.hpp"
#include "../common/TracyForceInline.hpp"
#include "../common/TracyMutex.hpp"
#include "../common/TracyQueue.hpp"
#include "../common/TracySocket.hpp"
#include "tracy_flat_hash_map.hpp"
@ -75,7 +75,7 @@ class Worker
{
DataBlock() : zonesCnt( 0 ), lastTime( 0 ), frameOffset( 0 ), threadLast( std::numeric_limits<uint64_t>::max(), 0 ) {}
NonRecursiveBenaphore lock;
TracyMutex lock;
Vector<int64_t> frames;
Vector<GpuCtxData*> gpuData;
Vector<MessageData*> messages;
@ -115,7 +115,7 @@ class Worker
{
MbpsBlock() : mbps( 64 ), compRatio( 1.0 ) {}
NonRecursiveBenaphore lock;
TracyMutex lock;
std::vector<float> mbps;
float compRatio;
};
@ -146,7 +146,7 @@ public:
int64_t GetDelay() const { return m_delay; }
int64_t GetResolution() const { return m_resolution; }
NonRecursiveBenaphore& GetDataLock() { return m_data.lock; }
TracyMutex& GetDataLock() { return m_data.lock; }
size_t GetFrameCount() const { return m_data.frames.size(); }
int64_t GetLastTime() const { return m_data.lastTime; }
uint64_t GetZoneCount() const { return m_data.zonesCnt; }
@ -202,7 +202,7 @@ public:
}
tracy_force_inline uint64_t DecompressThread( uint16_t thread ) const { assert( thread < m_data.threadExpand.size() ); return m_data.threadExpand[thread]; }
NonRecursiveBenaphore& GetMbpsDataLock() { return m_mbpsData.lock; }
TracyMutex& GetMbpsDataLock() { return m_mbpsData.lock; }
const std::vector<float>& GetMbpsData() const { return m_mbpsData.mbps; }
float GetCompRatio() const { return m_mbpsData.compRatio; }

View File

@ -112,6 +112,7 @@
<ItemGroup>
<ClInclude Include="..\..\..\common\TracyAlign.hpp" />
<ClInclude Include="..\..\..\common\TracyForceInline.hpp" />
<ClInclude Include="..\..\..\common\TracyMutex.hpp" />
<ClInclude Include="..\..\..\common\TracyProtocol.hpp" />
<ClInclude Include="..\..\..\common\TracyQueue.hpp" />
<ClInclude Include="..\..\..\common\TracySocket.hpp" />

View File

@ -179,6 +179,9 @@
<ClInclude Include="..\..\..\server\TracyVarArray.hpp">
<Filter>server</Filter>
</ClInclude>
<ClInclude Include="..\..\..\common\TracyMutex.hpp">
<Filter>common</Filter>
</ClInclude>
</ItemGroup>
<ItemGroup>
<Natvis Include="DebugVis.natvis" />