Do not store tail index in memory.

2024-11-22 22:44:34 +00:00 · 2017-10-03 14:50:55 +02:00 · 2017-10-03 14:50:55 +02:00 · ba037e5798
commit ba037e5798
parent dbb90e51b0
4 changed files with 52 additions and 42 deletions
--- a/client/TracyProfiler.cpp
+++ b/client/TracyProfiler.cpp
@ -280,20 +280,22 @@ void Profiler::CalibrateDelay()
        static const tracy::SourceLocation __tracy_source_location { __FUNCTION__,  __FILE__, (uint32_t)__LINE__, 0 };
        const auto id = GetNewId();
        {
-            auto item = s_queue.enqueue_begin( ptoken );
+            Magic magic;
+            auto item = s_queue.enqueue_begin( ptoken, magic );
            item->hdr.type = QueueType::ZoneBegin;
            item->hdr.id = id;
            item->zoneBegin.time = GetTime( item->zoneBegin.cpu );
            item->zoneBegin.srcloc = (uint64_t)&__tracy_source_location;
            item->zoneBegin.thread = GetThreadHandle();
-            s_queue.enqueue_finish( ptoken );
+            s_queue.enqueue_finish( ptoken, magic );
        }
        {
-            auto item = s_queue.enqueue_begin( ptoken );
+            Magic magic;
+            auto item = s_queue.enqueue_begin( ptoken, magic );
            item->hdr.type = QueueType::ZoneEnd;
            item->hdr.id = id;
            item->zoneEnd.time = GetTime( item->zoneEnd.cpu );
-            s_queue.enqueue_finish( ptoken );
+            s_queue.enqueue_finish( ptoken, magic );
        }
    }
    const auto f0 = GetTime( cpu );
@ -308,20 +310,22 @@ void Profiler::CalibrateDelay()
        static const tracy::SourceLocation __tracy_source_location { __FUNCTION__,  __FILE__, (uint32_t)__LINE__, 0 };
        const auto id = GetNewId();
        {
-            auto item = s_queue.enqueue_begin( ptoken );
+            Magic magic;
+            auto item = s_queue.enqueue_begin( ptoken, magic );
            item->hdr.type = QueueType::ZoneBegin;
            item->hdr.id = id;
            item->zoneBegin.time = GetTime( item->zoneBegin.cpu );
            item->zoneBegin.srcloc = (uint64_t)&__tracy_source_location;
            item->zoneBegin.thread = GetThreadHandle();
-            s_queue.enqueue_finish( ptoken );
+            s_queue.enqueue_finish( ptoken, magic );
        }
        {
-            auto item = s_queue.enqueue_begin( ptoken );
+            Magic magic;
+            auto item = s_queue.enqueue_begin( ptoken, magic );
            item->hdr.type = QueueType::ZoneEnd;
            item->hdr.id = id;
            item->zoneEnd.time = GetTime( item->zoneEnd.cpu );
-            s_queue.enqueue_finish( ptoken );
+            s_queue.enqueue_finish( ptoken, magic );
        }
    }
    const auto t1 = GetTime( cpu );
--- a/client/TracyProfiler.hpp
+++ b/client/TracyProfiler.hpp
@ -31,6 +31,8 @@ extern moodycamel::ConcurrentQueue<QueueItem> s_queue;
 extern thread_local moodycamel::ProducerToken s_token;;
 extern std::atomic<uint64_t> s_id;

+using Magic = moodycamel::ConcurrentQueueDefaultTraits::index_t;
+
 class Profiler
 {
 public:
@ -50,17 +52,18 @@ public:
 #endif
    }

-    static QueueItem* StartItem() { return s_queue.enqueue_begin( s_token ); }
-    static void FinishItem() { s_queue.enqueue_finish( s_token ); }
+    static QueueItem* StartItem( Magic& magic ) { return s_queue.enqueue_begin( s_token, magic ); }
+    static void FinishItem( Magic magic ) { s_queue.enqueue_finish( s_token, magic ); }
    static uint64_t GetNewId() { return s_id.fetch_add( 1, std::memory_order_relaxed ); }

    static void FrameMark()
    {
        int8_t cpu;
-        auto item = s_queue.enqueue_begin( s_token );
+        Magic magic;
+        auto item = s_queue.enqueue_begin( s_token, magic );
        item->hdr.type = QueueType::FrameMarkMsg;
        item->hdr.id = (uint64_t)GetTime( cpu );
-        s_queue.enqueue_finish( s_token );
+        s_queue.enqueue_finish( s_token, magic );
    }

    static bool ShouldExit();
--- a/client/TracyScoped.hpp
+++ b/client/TracyScoped.hpp
@ -16,43 +16,47 @@ public:
    ScopedZone( const SourceLocation* srcloc )
        : m_id( Profiler::GetNewId() )
    {
-        auto item = Profiler::StartItem();
+        Magic magic;
+        auto item = Profiler::StartItem( magic );
        item->hdr.type = QueueType::ZoneBegin;
        item->hdr.id = m_id;
        item->zoneBegin.time = Profiler::GetTime( item->zoneBegin.cpu );
        item->zoneBegin.srcloc = (uint64_t)srcloc;
        item->zoneBegin.thread = GetThreadHandle();
-        Profiler::FinishItem();
+        Profiler::FinishItem( magic );
    }

    ~ScopedZone()
    {
-        auto item = Profiler::StartItem();
+        Magic magic;
+        auto item = Profiler::StartItem( magic );
        item->hdr.type = QueueType::ZoneEnd;
        item->hdr.id = m_id;
        item->zoneEnd.time = Profiler::GetTime( item->zoneEnd.cpu );
-        Profiler::FinishItem();
+        Profiler::FinishItem( magic );
    }

    void Text( const char* txt, size_t size )
    {
+        Magic magic;
        auto ptr = new char[size+1];
        memcpy( ptr, txt, size );
        ptr[size] = '\0';
-        auto item = Profiler::StartItem();
+        auto item = Profiler::StartItem( magic );
        item->hdr.type = QueueType::ZoneText;
        item->hdr.id = m_id;
        item->zoneText.text = (uint64_t)ptr;
-        Profiler::FinishItem();
+        Profiler::FinishItem( magic );
    }

    void Name( const char* name )
    {
-        auto item = Profiler::StartItem();
+        Magic magic;
+        auto item = Profiler::StartItem( magic );
        item->hdr.type = QueueType::ZoneName;
        item->hdr.id = m_id;
        item->zoneName.name = (uint64_t)name;
-        Profiler::FinishItem();
+        Profiler::FinishItem( magic );
    }

 private:
--- a/client/concurrentqueue.h
+++ b/client/concurrentqueue.h
@ -951,14 +951,14 @@ public:
 		return inner_enqueue<CanAlloc>(token, std::move(item));
 	}

-    force_inline T* enqueue_begin(producer_token_t const& token)
+    force_inline T* enqueue_begin(producer_token_t const& token, index_t& currentTailIndex)
    {
-        return inner_enqueue_begin<CanAlloc>(token);
+        return inner_enqueue_begin<CanAlloc>(token, currentTailIndex);
    }

-    force_inline void enqueue_finish(producer_token_t const& token)
+    force_inline void enqueue_finish(producer_token_t const& token, index_t currentTailIndex)
    {
-        inner_enqueue_finish(token);
+        inner_enqueue_finish(token, currentTailIndex);
    }

 	// Enqueues several items.
@ -1301,14 +1301,14 @@ private:
 	}

    template<AllocationMode canAlloc>
-    force_inline T* inner_enqueue_begin(producer_token_t const& token)
+    force_inline T* inner_enqueue_begin(producer_token_t const& token, index_t& currentTailIndex)
    {
-        return static_cast<ExplicitProducer*>(token.producer)->ConcurrentQueue::ExplicitProducer::template enqueue_begin<canAlloc>();
+        return static_cast<ExplicitProducer*>(token.producer)->ConcurrentQueue::ExplicitProducer::template enqueue_begin<canAlloc>(currentTailIndex);
    }

-    force_inline void inner_enqueue_finish(producer_token_t const& token)
+    force_inline void inner_enqueue_finish(producer_token_t const& token, index_t currentTailIndex)
    {
-        return static_cast<ExplicitProducer*>(token.producer)->ConcurrentQueue::ExplicitProducer::template enqueue_finish();
+        return static_cast<ExplicitProducer*>(token.producer)->ConcurrentQueue::ExplicitProducer::template enqueue_finish(currentTailIndex);
    }

 	template<AllocationMode canAlloc, typename U>
@ -1909,7 +1909,7 @@ private:
 		}

        template<AllocationMode allocMode>
-        inline void enqueue_begin_alloc()
+        inline void enqueue_begin_alloc(index_t currentTailIndex)
        {
            // We reached the end of a block, start a new one
            if (this->tailBlock != nullptr && this->tailBlock->next->ConcurrentQueue::Block::template is_empty<explicit_context>()) {
@ -1928,9 +1928,9 @@ private:
                // and <= its current value. Since we have the most recent tail, the head must be
                // <= to it.
                auto head = this->headIndex.load(std::memory_order_relaxed);
-                assert(!details::circular_less_than<index_t>(pr_currentTailIndex, head));
-                if (!details::circular_less_than<index_t>(head, pr_currentTailIndex + BLOCK_SIZE)
-                    || (MAX_SUBQUEUE_SIZE != details::const_numeric_max<size_t>::value && (MAX_SUBQUEUE_SIZE == 0 || MAX_SUBQUEUE_SIZE - BLOCK_SIZE < pr_currentTailIndex - head))) {
+                assert(!details::circular_less_than<index_t>(currentTailIndex, head));
+                if (!details::circular_less_than<index_t>(head, currentTailIndex + BLOCK_SIZE)
+                    || (MAX_SUBQUEUE_SIZE != details::const_numeric_max<size_t>::value && (MAX_SUBQUEUE_SIZE == 0 || MAX_SUBQUEUE_SIZE - BLOCK_SIZE < currentTailIndex - head))) {
                    // We can't enqueue in another block because there's not enough leeway -- the
                    // tail could surpass the head by the time the block fills up! (Or we'll exceed
                    // the size limit, if the second part of the condition was true.)
@ -1969,28 +1969,28 @@ private:

            // Add block to block index
            auto& entry = blockIndex.load(std::memory_order_relaxed)->entries[pr_blockIndexFront];
-            entry.base = pr_currentTailIndex;
+            entry.base = currentTailIndex;
            entry.block = this->tailBlock;
            blockIndex.load(std::memory_order_relaxed)->front.store(pr_blockIndexFront, std::memory_order_release);
            pr_blockIndexFront = (pr_blockIndexFront + 1) & (pr_blockIndexSize - 1);
        }

        template<AllocationMode allocMode>
-        force_inline T* enqueue_begin()
+        force_inline T* enqueue_begin(index_t& currentTailIndex)
        {
-            pr_currentTailIndex = this->tailIndex.load(std::memory_order_relaxed);
-            if ((pr_currentTailIndex & static_cast<index_t>(BLOCK_SIZE - 1)) != 0) {
-                return (*this->tailBlock)[pr_currentTailIndex];
+            currentTailIndex = this->tailIndex.load(std::memory_order_relaxed);
+            if ((currentTailIndex & static_cast<index_t>(BLOCK_SIZE - 1)) != 0) {
+                return (*this->tailBlock)[currentTailIndex];
            }
            else {
-                this->enqueue_begin_alloc<allocMode>();
-                return (*this->tailBlock)[pr_currentTailIndex];
+                this->enqueue_begin_alloc<allocMode>(currentTailIndex);
+                return (*this->tailBlock)[currentTailIndex];
            }
        }

-        force_inline void enqueue_finish()
+        force_inline void enqueue_finish(index_t currentTailIndex)
        {
-            this->tailIndex.store(pr_currentTailIndex + 1, std::memory_order_release);
+            this->tailIndex.store(currentTailIndex + 1, std::memory_order_release);
        }

 		template<typename U>
@ -2430,7 +2430,6 @@ private:
 		std::atomic<BlockIndexHeader*> blockIndex;
 		
 		// To be used by producer only -- consumer must use the ones in referenced by blockIndex
-        index_t pr_currentTailIndex;
 		size_t pr_blockIndexSlotsUsed;
 		size_t pr_blockIndexSize;
 		size_t pr_blockIndexFront;		// Next slot (not current)