From d6e6d6bdf298cdbc456e24297c7caf0f15d0234b Mon Sep 17 00:00:00 2001 From: Adam Sawicki Date: Fri, 21 Sep 2018 14:07:02 +0200 Subject: [PATCH] VmaBlockMetadata_Buddy: Introduced concept of m_UsableSize to always use powers of two even when memory block size is not. --- src/Tests.cpp | 4 +-- src/vk_mem_alloc.h | 89 +++++++++++++++++++++++++++++----------------- 2 files changed, 58 insertions(+), 35 deletions(-) diff --git a/src/Tests.cpp b/src/Tests.cpp index 0d0d4cb..1ccae95 100644 --- a/src/Tests.cpp +++ b/src/Tests.cpp @@ -4125,7 +4125,8 @@ static void BasicTestBuddyAllocator() VkResult res = vmaFindMemoryTypeIndexForBufferInfo(g_hAllocator, &sampleBufCreateInfo, &sampleAllocCreateInfo, &poolCreateInfo.memoryTypeIndex); assert(res == VK_SUCCESS); - poolCreateInfo.blockSize = 1024 * 1024; + // Deliberately adding 1023 to test usable size smaller than memory block size. + poolCreateInfo.blockSize = 1024 * 1024 + 1023; poolCreateInfo.flags = VMA_POOL_CREATE_BUDDY_ALGORITHM_BIT; //poolCreateInfo.minBlockCount = poolCreateInfo.maxBlockCount = 1; @@ -4160,7 +4161,6 @@ static void BasicTestBuddyAllocator() assert(res == VK_SUCCESS); bufInfo.push_back(newBufInfo); - VmaPoolStats stats = {}; vmaGetPoolStats(g_hAllocator, pool, &stats); int DBG = 0; // Set breakpoint here to inspect `stats`. diff --git a/src/vk_mem_alloc.h b/src/vk_mem_alloc.h index ee3b8dc..411b300 100644 --- a/src/vk_mem_alloc.h +++ b/src/vk_mem_alloc.h @@ -4967,6 +4967,12 @@ private: }; /* +- GetSize() is the original size of allocated memory block. +- m_UsableSize is this size aligned down to a power of two. + All allocations and calculations happen relative to m_UsableSize. +- GetUnusableSize() is the difference between them. + It is repoted as separate unused range. + Level 0 has block size = GetSize(). Level 1 has block size = GetSize() / 2 and so on... */ class VmaBlockMetadata_Buddy : public VmaBlockMetadata @@ -4979,7 +4985,7 @@ public: virtual bool Validate() const; virtual size_t GetAllocationCount() const { return m_AllocationCount; } - virtual VkDeviceSize GetSumFreeSize() const { return m_SumFreeSize; } + virtual VkDeviceSize GetSumFreeSize() const { return m_SumFreeSize + GetUnusableSize(); } virtual VkDeviceSize GetUnusedRangeSizeMax() const; virtual bool IsEmpty() const { return m_Root->type == Node::TYPE_FREE; } @@ -5067,6 +5073,8 @@ private: }; }; + // Size of the memory block aligned down to a power of two. + VkDeviceSize m_UsableSize; Node* m_Root; struct { Node* front; @@ -5076,13 +5084,14 @@ private: size_t m_AllocationCount; // Number of nodes in the tree with type == TYPE_FREE. size_t m_FreeCount; - // This includes space wasted due to internal fragmentation. + // This includes space wasted due to internal fragmentation. Doesn't include unusable size. VkDeviceSize m_SumFreeSize; + VkDeviceSize GetUnusableSize() const { return GetSize() - m_UsableSize; } void DeleteNode(Node* node); bool ValidateNode(ValidationContext& ctx, const Node* parent, const Node* curr, uint32_t level, VkDeviceSize levelNodeSize) const; uint32_t AllocSizeToLevel(VkDeviceSize allocSize) const; - VkDeviceSize LevelToNodeSize(uint32_t level) const; + inline VkDeviceSize LevelToNodeSize(uint32_t level) const { return m_UsableSize >> level; } // Alloc passed just for validation. Can be null. void FreeAtOffset(VmaAllocation alloc, VkDeviceSize offset); void CalcAllocationStatInfoNode(VmaStatInfo& outInfo, const Node* node, VkDeviceSize levelNodeSize) const; @@ -9218,7 +9227,8 @@ void VmaBlockMetadata_Buddy::Init(VkDeviceSize size) { VmaBlockMetadata::Init(size); - m_SumFreeSize = size; + m_UsableSize = VmaPrevPow2(size); + m_SumFreeSize = m_UsableSize; Node* rootNode = new Node(); rootNode->offset = 0; @@ -9234,7 +9244,7 @@ bool VmaBlockMetadata_Buddy::Validate() const { // Validate tree. ValidationContext ctx; - if(!ValidateNode(ctx, VMA_NULL, m_Root, 0, GetSize())) + if(!ValidateNode(ctx, VMA_NULL, m_Root, 0, LevelToNodeSize(0))) { VMA_VALIDATE(false && "ValidateNode failed."); } @@ -9269,12 +9279,11 @@ bool VmaBlockMetadata_Buddy::Validate() const VkDeviceSize VmaBlockMetadata_Buddy::GetUnusedRangeSizeMax() const { - VkDeviceSize levelNodeSize = GetSize(); - for(uint32_t level = 0; level < MAX_LEVELS; ++level, levelNodeSize /= 2) + for(uint32_t level = 0; level < MAX_LEVELS; ++level) { if(m_FreeList[level].front != VMA_NULL) { - return levelNodeSize; + return LevelToNodeSize(level); } } return 0; @@ -9282,6 +9291,8 @@ VkDeviceSize VmaBlockMetadata_Buddy::GetUnusedRangeSizeMax() const void VmaBlockMetadata_Buddy::CalcAllocationStatInfo(VmaStatInfo& outInfo) const { + const VkDeviceSize unusableSize = GetUnusableSize(); + outInfo.blockCount = 1; outInfo.allocationCount = outInfo.unusedRangeCount = 0; @@ -9291,16 +9302,32 @@ void VmaBlockMetadata_Buddy::CalcAllocationStatInfo(VmaStatInfo& outInfo) const outInfo.allocationSizeMin = outInfo.unusedRangeSizeMin = UINT64_MAX; outInfo.allocationSizeAvg = outInfo.unusedRangeSizeAvg = 0; // Unused. - CalcAllocationStatInfoNode(outInfo, m_Root, GetSize()); + CalcAllocationStatInfoNode(outInfo, m_Root, LevelToNodeSize(0)); + + if(unusableSize > 0) + { + ++outInfo.unusedRangeCount; + outInfo.unusedBytes += unusableSize; + outInfo.unusedRangeSizeMax = VMA_MAX(outInfo.unusedRangeSizeMax, unusableSize); + outInfo.unusedRangeSizeMin = VMA_MIN(outInfo.unusedRangeSizeMin, unusableSize); + } } void VmaBlockMetadata_Buddy::AddPoolStats(VmaPoolStats& inoutStats) const { + const VkDeviceSize unusableSize = GetUnusableSize(); + inoutStats.size += GetSize(); - inoutStats.unusedSize += m_SumFreeSize; + inoutStats.unusedSize += m_SumFreeSize + unusableSize; inoutStats.allocationCount += m_AllocationCount; inoutStats.unusedRangeCount += m_FreeCount; inoutStats.unusedRangeSizeMax = VMA_MAX(inoutStats.unusedRangeSizeMax, GetUnusedRangeSizeMax()); + + if(unusableSize > 0) + { + ++inoutStats.unusedRangeCount; + // Not updating inoutStats.unusedRangeSizeMax with unusableSize because this space is not available for allocations. + } } #if VMA_STATS_STRING_ENABLED @@ -9317,7 +9344,15 @@ void VmaBlockMetadata_Buddy::PrintDetailedMap(class VmaJsonWriter& json) const stat.allocationCount, stat.unusedRangeCount); - PrintDetailedMapNode(json, m_Root, GetSize()); + PrintDetailedMapNode(json, m_Root, LevelToNodeSize(0)); + + const VkDeviceSize unusableSize = GetUnusableSize(); + if(unusableSize > 0) + { + PrintDetailedMap_UnusedRange(json, + m_UsableSize, // offset + unusableSize); // size + } PrintDetailedMap_End(json); } @@ -9338,8 +9373,7 @@ bool VmaBlockMetadata_Buddy::CreateAllocationRequest( { VMA_ASSERT(!upperAddress && "VMA_ALLOCATION_CREATE_UPPER_ADDRESS_BIT can be used only with linear algorithm."); - const VkDeviceSize size = GetSize(); - if(allocSize > size) + if(allocSize > m_UsableSize) { return false; } @@ -9428,7 +9462,7 @@ void VmaBlockMetadata_Buddy::Alloc( AddToFreeListFront(childrenLevel, leftChild); ++m_FreeCount; - m_SumFreeSize -= LevelToNodeSize(currLevel) % 2; // Useful only when level node sizes can be non power of 2. + //m_SumFreeSize -= LevelToNodeSize(currLevel) % 2; // Useful only when level node sizes can be non power of 2. ++currLevel; currNode = m_FreeList[currLevel].front; } @@ -9502,40 +9536,29 @@ bool VmaBlockMetadata_Buddy::ValidateNode(ValidationContext& ctx, const Node* pa uint32_t VmaBlockMetadata_Buddy::AllocSizeToLevel(VkDeviceSize allocSize) const { - // TODO optimize + // I know this could be optimized somehow e.g. by using std::log2p1 from C++20. uint32_t level = 0; - VkDeviceSize currLevelNodeSize = GetSize(); - VkDeviceSize nextLevelNodeSize = currLevelNodeSize / 2; + VkDeviceSize currLevelNodeSize = m_UsableSize; + VkDeviceSize nextLevelNodeSize = currLevelNodeSize >> 1; while(allocSize <= nextLevelNodeSize && level + 1 < MAX_LEVELS) { ++level; currLevelNodeSize = nextLevelNodeSize; - nextLevelNodeSize = currLevelNodeSize / 2; + nextLevelNodeSize = currLevelNodeSize >> 1; } return level; } -VkDeviceSize VmaBlockMetadata_Buddy::LevelToNodeSize(uint32_t level) const -{ - // TODO optimize - VkDeviceSize result = GetSize(); - for(uint32_t i = 0; i < level; ++i) - { - result /= 2; - } - return result; -} - void VmaBlockMetadata_Buddy::FreeAtOffset(VmaAllocation alloc, VkDeviceSize offset) { // Find node and level. Node* node = m_Root; VkDeviceSize nodeOffset = 0; uint32_t level = 0; - VkDeviceSize levelSize = GetSize(); + VkDeviceSize levelNodeSize = LevelToNodeSize(0); while(node->type == Node::TYPE_SPLIT) { - const VkDeviceSize nextLevelSize = levelSize / 2; + const VkDeviceSize nextLevelSize = levelNodeSize >> 1; if(offset < nodeOffset + nextLevelSize) { node = node->split.leftChild; @@ -9546,7 +9569,7 @@ void VmaBlockMetadata_Buddy::FreeAtOffset(VmaAllocation alloc, VkDeviceSize offs nodeOffset += nextLevelSize; } ++level; - levelSize = nextLevelSize; + levelNodeSize = nextLevelSize; } VMA_ASSERT(node != VMA_NULL && node->type == Node::TYPE_ALLOCATION); @@ -9570,7 +9593,7 @@ void VmaBlockMetadata_Buddy::FreeAtOffset(VmaAllocation alloc, VkDeviceSize offs node = parent; --level; - m_SumFreeSize += LevelToNodeSize(level) % 2; // Useful only when level node sizes can be non power of 2. + //m_SumFreeSize += LevelToNodeSize(level) % 2; // Useful only when level node sizes can be non power of 2. --m_FreeCount; }