Use the fastest mutex available.

The selection is based on the following test results: MSVC: === Lock test, 6 threads === => NonRecursiveBenaphore No contention: 11.641 ns/iter 2 thread contention: 141.559 ns/iter 3 thread contention: 242.733 ns/iter 4 thread contention: 409.807 ns/iter 5 thread contention: 561.544 ns/iter 6 thread contention: 785.845 ns/iter => std::mutex No contention: 19.190 ns/iter 2 thread contention: 39.305 ns/iter 3 thread contention: 58.999 ns/iter 4 thread contention: 59.532 ns/iter 5 thread contention: 103.539 ns/iter 6 thread contention: 110.314 ns/iter => std::shared_timed_mutex No contention: 45.487 ns/iter 2 thread contention: 96.351 ns/iter 3 thread contention: 142.871 ns/iter 4 thread contention: 184.999 ns/iter 5 thread contention: 336.608 ns/iter 6 thread contention: 542.551 ns/iter => std::shared_mutex No contention: 10.861 ns/iter 2 thread contention: 17.495 ns/iter 3 thread contention: 31.126 ns/iter 4 thread contention: 40.468 ns/iter 5 thread contention: 15.677 ns/iter 6 thread contention: 64.505 ns/iter Cygwin (clang): === Lock test, 6 threads === => NonRecursiveBenaphore No contention: 11.536 ns/iter 2 thread contention: 121.082 ns/iter 3 thread contention: 396.430 ns/iter 4 thread contention: 672.555 ns/iter 5 thread contention: 1327.761 ns/iter 6 thread contention: 14151.955 ns/iter => std::mutex No contention: 62.583 ns/iter 2 thread contention: 3990.464 ns/iter 3 thread contention: 7161.189 ns/iter 4 thread contention: 9870.820 ns/iter 5 thread contention: 12355.178 ns/iter 6 thread contention: 14694.903 ns/iter => std::shared_timed_mutex No contention: 91.687 ns/iter 2 thread contention: 1115.037 ns/iter 3 thread contention: 4183.792 ns/iter 4 thread contention: 15283.491 ns/iter 5 thread contention: 27812.477 ns/iter 6 thread contention: 35028.140 ns/iter => std::shared_mutex No contention: 91.764 ns/iter 2 thread contention: 1051.826 ns/iter 3 thread contention: 5574.720 ns/iter 4 thread contention: 15721.416 ns/iter 5 thread contention: 27721.487 ns/iter 6 thread contention: 35420.404 ns/iter Linux (x64): === Lock test, 6 threads === => NonRecursiveBenaphore No contention: 13.487 ns/iter 2 thread contention: 210.317 ns/iter 3 thread contention: 430.855 ns/iter 4 thread contention: 510.533 ns/iter 5 thread contention: 1003.609 ns/iter 6 thread contention: 1787.683 ns/iter => std::mutex No contention: 12.403 ns/iter 2 thread contention: 157.122 ns/iter 3 thread contention: 186.791 ns/iter 4 thread contention: 265.073 ns/iter 5 thread contention: 283.778 ns/iter 6 thread contention: 270.687 ns/iter => std::shared_timed_mutex No contention: 21.509 ns/iter 2 thread contention: 150.179 ns/iter 3 thread contention: 256.574 ns/iter 4 thread contention: 415.351 ns/iter 5 thread contention: 611.532 ns/iter 6 thread contention: 944.695 ns/iter => std::shared_mutex No contention: 20.805 ns/iter 2 thread contention: 157.034 ns/iter 3 thread contention: 244.025 ns/iter 4 thread contention: 406.269 ns/iter 5 thread contention: 387.985 ns/iter 6 thread contention: 468.550 ns/iter Linux (arm64): === Lock test, 6 threads === => NonRecursiveBenaphore No contention: 20.891 ns/iter 2 thread contention: 211.037 ns/iter 3 thread contention: 409.962 ns/iter 4 thread contention: 657.441 ns/iter 5 thread contention: 828.405 ns/iter 6 thread contention: 1131.827 ns/iter => std::mutex No contention: 50.884 ns/iter 2 thread contention: 103.620 ns/iter 3 thread contention: 332.429 ns/iter 4 thread contention: 620.802 ns/iter 5 thread contention: 783.943 ns/iter 6 thread contention: 834.002 ns/iter => std::shared_timed_mutex No contention: 64.948 ns/iter 2 thread contention: 173.191 ns/iter 3 thread contention: 490.352 ns/iter 4 thread contention: 660.668 ns/iter 5 thread contention: 1014.546 ns/iter 6 thread contention: 1451.553 ns/iter => std::shared_mutex No contention: 64.521 ns/iter 2 thread contention: 195.222 ns/iter 3 thread contention: 490.819 ns/iter 4 thread contention: 654.786 ns/iter 5 thread contention: 955.759 ns/iter 6 thread contention: 1282.544 ns/iter
2024-11-10 02:31:48 +00:00 · 2018-07-14 00:39:01 +02:00 · 2018-07-14 00:39:01 +02:00 · 561d2dc360
commit 561d2dc360
parent a26ab263dd
9 changed files with 56 additions and 19 deletions
--- a/client/TracyProfiler.cpp
+++ b/client/TracyProfiler.cpp
@ -472,7 +472,7 @@ void Profiler::ClearQueues( moodycamel::ConsumerToken& token )
        for( size_t i=0; i<sz; i++ ) FreeAssociatedMemory( m_itemBuf[i] );
    }

-    std::lock_guard<NonRecursiveBenaphore> lock( m_serialLock );
+    std::lock_guard<TracyMutex> lock( m_serialLock );

    for( auto& v : m_serialDequeue ) FreeAssociatedMemory( v );
    m_serialDequeue.clear();
@ -536,7 +536,7 @@ Profiler::DequeueStatus Profiler::Dequeue( moodycamel::ConsumerToken& token )
 Profiler::DequeueStatus Profiler::DequeueSerial()
 {
    {
-        std::lock_guard<NonRecursiveBenaphore> lock( m_serialLock );
+        std::lock_guard<TracyMutex> lock( m_serialLock );
        m_serialQueue.swap( m_serialDequeue );
    }

--- a/client/TracyProfiler.hpp
+++ b/client/TracyProfiler.hpp
@ -10,10 +10,10 @@
 #include "TracyCallstack.hpp"
 #include "TracyFastVector.hpp"
 #include "../common/tracy_lz4.hpp"
-#include "../common/tracy_benaphore.h"
 #include "../common/TracyQueue.hpp"
 #include "../common/TracyAlign.hpp"
 #include "../common/TracyAlloc.hpp"
+#include "../common/TracyMutex.hpp"
 #include "../common/TracySystem.hpp"

 #if defined _MSC_VER || defined __CYGWIN__
@ -412,13 +412,13 @@ private:
    char* m_lz4Buf;

    FastVector<QueueItem> m_serialQueue, m_serialDequeue;
-    NonRecursiveBenaphore m_serialLock;
+    TracyMutex m_serialLock;

 #ifdef TRACY_ON_DEMAND
    std::atomic<bool> m_isConnected;
    std::atomic<uint64_t> m_frameCount;

-    NonRecursiveBenaphore m_deferredLock;
+    TracyMutex m_deferredLock;
    FastVector<QueueItem> m_deferredQueue;
 #endif
 };
--- a/common/TracyMutex.hpp
+++ b/common/TracyMutex.hpp
@ -0,0 +1,33 @@
+#ifndef __TRACYMUTEX_HPP__
+#define __TRACYMUTEX_HPP__
+
+#if defined _MSC_VER
+
+#  include <shared_mutex>
+
+namespace tracy
+{
+using TracyMutex = std::shared_mutex;
+}
+
+#elif defined __CYGWIN__
+
+#include "tracy_benaphore.h"
+
+namespace tracy
+{
+using TracyMutex = NonRecursiveBenaphore;
+}
+
+#else
+
+#include <mutex>
+
+namespace tracy
+{
+using TracyMutex = std::mutex;
+}
+
+#endif
+
+#endif
--- a/server/TracyView.cpp
+++ b/server/TracyView.cpp
@ -9,6 +9,7 @@
 #include <stdlib.h>
 #include <time.h>

+#include "../common/TracyMutex.hpp"
 #include "../common/TracySystem.hpp"
 #include "tracy_pdqsort.h"
 #include "TracyBadVersion.hpp"
@ -364,7 +365,7 @@ bool View::DrawImpl()
        keepOpenPtr = &keepOpen;
    }

-    std::lock_guard<NonRecursiveBenaphore> lock( m_worker.GetDataLock() );
+    std::lock_guard<TracyMutex> lock( m_worker.GetDataLock() );
    char tmp[2048];
    sprintf( tmp, "%s###Profiler", m_worker.GetCaptureName().c_str() );
    ImGui::SetNextWindowSize( ImVec2( 1550, 800 ), ImGuiCond_FirstUseEver );
@ -437,7 +438,7 @@ void View::DrawConnection()
    const auto cs = ty * 0.9f;

    {
-        std::lock_guard<NonRecursiveBenaphore> lock( m_worker.GetMbpsDataLock() );
+        std::lock_guard<TracyMutex> lock( m_worker.GetMbpsDataLock() );
        ImGui::Begin( m_worker.GetAddr().c_str(), nullptr, ImGuiWindowFlags_AlwaysAutoResize );
        const auto& mbpsVector = m_worker.GetMbpsData();
        const auto mbps = mbpsVector.back();
@ -461,7 +462,7 @@ void View::DrawConnection()
    const auto wpos = ImGui::GetWindowPos() + ImGui::GetWindowContentRegionMin();
    ImGui::GetWindowDrawList()->AddCircleFilled( wpos + ImVec2( 1 + cs * 0.5, 3 + ty * 0.5 ), cs * 0.5, m_worker.IsConnected() ? 0xFF2222CC : 0xFF444444, 10 );

-    std::lock_guard<NonRecursiveBenaphore> lock( m_worker.GetDataLock() );
+    std::lock_guard<TracyMutex> lock( m_worker.GetDataLock() );
    {
        const auto sz = m_worker.GetFrameCount();
        if( sz > 1 )
--- a/server/TracyView.hpp
+++ b/server/TracyView.hpp
@ -8,7 +8,6 @@
 #include <thread>
 #include <vector>

-#include "../common/tracy_benaphore.h"
 #include "TracyVector.hpp"
 #include "TracyWorker.hpp"
 #include "tracy_flat_hash_map.hpp"
--- a/server/TracyWorker.cpp
+++ b/server/TracyWorker.cpp
@ -501,7 +501,7 @@ Worker::Worker( FileRead& f, EventType::Type eventMask )
            std::sort( std::execution::par_unseq, zones.begin(), zones.end(), []( const auto& lhs, const auto& rhs ) { return lhs.zone->start < rhs.zone->start; } );
 #endif
        }
-        std::lock_guard<NonRecursiveBenaphore> lock( m_data.lock );
+        std::lock_guard<TracyMutex> lock( m_data.lock );
        m_data.sourceLocationZonesReady = true;
    } );
 #endif
@ -1031,7 +1031,7 @@ void Worker::Exec()
            const char* end = buf + sz;

            {
-                std::lock_guard<NonRecursiveBenaphore> lock( m_data.lock );
+                std::lock_guard<TracyMutex> lock( m_data.lock );
                while( ptr < end )
                {
                    auto ev = (const QueueItem*)ptr;
@ -1049,7 +1049,7 @@ void Worker::Exec()
            enum { MbpsUpdateTime = 200 };
            if( td > MbpsUpdateTime )
            {
-                std::lock_guard<NonRecursiveBenaphore> lock( m_mbpsData.lock );
+                std::lock_guard<TracyMutex> lock( m_mbpsData.lock );
                m_mbpsData.mbps.erase( m_mbpsData.mbps.begin() );
                m_mbpsData.mbps.emplace_back( bytes / ( td * 125.f ) );
                m_mbpsData.compRatio = float( bytes ) / decBytes;
@ -2331,7 +2331,7 @@ void Worker::ReconstructMemAllocPlot()

    PlotData* plot;
    {
-        std::lock_guard<NonRecursiveBenaphore> lock( m_data.lock );
+        std::lock_guard<TracyMutex> lock( m_data.lock );
        plot = m_slab.AllocInit<PlotData>();
    }

@ -2413,7 +2413,7 @@ void Worker::ReconstructMemAllocPlot()
    plot->min = 0;
    plot->max = max;

-    std::lock_guard<NonRecursiveBenaphore> lock( m_data.lock );
+    std::lock_guard<TracyMutex> lock( m_data.lock );
    m_data.plots.insert( m_data.plots.begin(), plot );
    m_data.memory.plot = plot;
 }
--- a/server/TracyWorker.hpp
+++ b/server/TracyWorker.hpp
@ -9,9 +9,9 @@
 #include <thread>
 #include <vector>

-#include "../common/tracy_benaphore.h"
 #include "../common/tracy_lz4.hpp"
 #include "../common/TracyForceInline.hpp"
+#include "../common/TracyMutex.hpp"
 #include "../common/TracyQueue.hpp"
 #include "../common/TracySocket.hpp"
 #include "tracy_flat_hash_map.hpp"
@ -75,7 +75,7 @@ class Worker
    {
        DataBlock() : zonesCnt( 0 ), lastTime( 0 ), frameOffset( 0 ), threadLast( std::numeric_limits<uint64_t>::max(), 0 ) {}

-        NonRecursiveBenaphore lock;
+        TracyMutex lock;
        Vector<int64_t> frames;
        Vector<GpuCtxData*> gpuData;
        Vector<MessageData*> messages;
@ -115,7 +115,7 @@ class Worker
    {
        MbpsBlock() : mbps( 64 ), compRatio( 1.0 ) {}

-        NonRecursiveBenaphore lock;
+        TracyMutex lock;
        std::vector<float> mbps;
        float compRatio;
    };
@ -146,7 +146,7 @@ public:
    int64_t GetDelay() const { return m_delay; }
    int64_t GetResolution() const { return m_resolution; }

-    NonRecursiveBenaphore& GetDataLock() { return m_data.lock; }
+    TracyMutex& GetDataLock() { return m_data.lock; }
    size_t GetFrameCount() const { return m_data.frames.size(); }
    int64_t GetLastTime() const { return m_data.lastTime; }
    uint64_t GetZoneCount() const { return m_data.zonesCnt; }
@ -202,7 +202,7 @@ public:
    }
    tracy_force_inline uint64_t DecompressThread( uint16_t thread ) const { assert( thread < m_data.threadExpand.size() ); return m_data.threadExpand[thread]; }

-    NonRecursiveBenaphore& GetMbpsDataLock() { return m_mbpsData.lock; }
+    TracyMutex& GetMbpsDataLock() { return m_mbpsData.lock; }
    const std::vector<float>& GetMbpsData() const { return m_mbpsData.mbps; }
    float GetCompRatio() const { return m_mbpsData.compRatio; }

--- a/standalone/build/win32/Tracy.vcxproj
+++ b/standalone/build/win32/Tracy.vcxproj
@ -112,6 +112,7 @@
  <ItemGroup>
    <ClInclude Include="..\..\..\common\TracyAlign.hpp" />
    <ClInclude Include="..\..\..\common\TracyForceInline.hpp" />
+    <ClInclude Include="..\..\..\common\TracyMutex.hpp" />
    <ClInclude Include="..\..\..\common\TracyProtocol.hpp" />
    <ClInclude Include="..\..\..\common\TracyQueue.hpp" />
    <ClInclude Include="..\..\..\common\TracySocket.hpp" />
--- a/standalone/build/win32/Tracy.vcxproj.filters
+++ b/standalone/build/win32/Tracy.vcxproj.filters
@ -179,6 +179,9 @@
    <ClInclude Include="..\..\..\server\TracyVarArray.hpp">
      <Filter>server</Filter>
    </ClInclude>
+    <ClInclude Include="..\..\..\common\TracyMutex.hpp">
+      <Filter>common</Filter>
+    </ClInclude>
  </ItemGroup>
  <ItemGroup>
    <Natvis Include="DebugVis.natvis" />