VmaBlockMetadata_Buddy: Introduced concept of m_UsableSize to always use powers of two even when memory block size is not.

This commit is contained in:
Adam Sawicki 2018-09-21 14:07:02 +02:00
parent a79d2746f1
commit d6e6d6bdf2
2 changed files with 58 additions and 35 deletions

View File

@ -4125,7 +4125,8 @@ static void BasicTestBuddyAllocator()
VkResult res = vmaFindMemoryTypeIndexForBufferInfo(g_hAllocator, &sampleBufCreateInfo, &sampleAllocCreateInfo, &poolCreateInfo.memoryTypeIndex); VkResult res = vmaFindMemoryTypeIndexForBufferInfo(g_hAllocator, &sampleBufCreateInfo, &sampleAllocCreateInfo, &poolCreateInfo.memoryTypeIndex);
assert(res == VK_SUCCESS); assert(res == VK_SUCCESS);
poolCreateInfo.blockSize = 1024 * 1024; // Deliberately adding 1023 to test usable size smaller than memory block size.
poolCreateInfo.blockSize = 1024 * 1024 + 1023;
poolCreateInfo.flags = VMA_POOL_CREATE_BUDDY_ALGORITHM_BIT; poolCreateInfo.flags = VMA_POOL_CREATE_BUDDY_ALGORITHM_BIT;
//poolCreateInfo.minBlockCount = poolCreateInfo.maxBlockCount = 1; //poolCreateInfo.minBlockCount = poolCreateInfo.maxBlockCount = 1;
@ -4160,7 +4161,6 @@ static void BasicTestBuddyAllocator()
assert(res == VK_SUCCESS); assert(res == VK_SUCCESS);
bufInfo.push_back(newBufInfo); bufInfo.push_back(newBufInfo);
VmaPoolStats stats = {}; VmaPoolStats stats = {};
vmaGetPoolStats(g_hAllocator, pool, &stats); vmaGetPoolStats(g_hAllocator, pool, &stats);
int DBG = 0; // Set breakpoint here to inspect `stats`. int DBG = 0; // Set breakpoint here to inspect `stats`.

View File

@ -4967,6 +4967,12 @@ private:
}; };
/* /*
- GetSize() is the original size of allocated memory block.
- m_UsableSize is this size aligned down to a power of two.
All allocations and calculations happen relative to m_UsableSize.
- GetUnusableSize() is the difference between them.
It is repoted as separate unused range.
Level 0 has block size = GetSize(). Level 1 has block size = GetSize() / 2 and so on... Level 0 has block size = GetSize(). Level 1 has block size = GetSize() / 2 and so on...
*/ */
class VmaBlockMetadata_Buddy : public VmaBlockMetadata class VmaBlockMetadata_Buddy : public VmaBlockMetadata
@ -4979,7 +4985,7 @@ public:
virtual bool Validate() const; virtual bool Validate() const;
virtual size_t GetAllocationCount() const { return m_AllocationCount; } virtual size_t GetAllocationCount() const { return m_AllocationCount; }
virtual VkDeviceSize GetSumFreeSize() const { return m_SumFreeSize; } virtual VkDeviceSize GetSumFreeSize() const { return m_SumFreeSize + GetUnusableSize(); }
virtual VkDeviceSize GetUnusedRangeSizeMax() const; virtual VkDeviceSize GetUnusedRangeSizeMax() const;
virtual bool IsEmpty() const { return m_Root->type == Node::TYPE_FREE; } virtual bool IsEmpty() const { return m_Root->type == Node::TYPE_FREE; }
@ -5067,6 +5073,8 @@ private:
}; };
}; };
// Size of the memory block aligned down to a power of two.
VkDeviceSize m_UsableSize;
Node* m_Root; Node* m_Root;
struct { struct {
Node* front; Node* front;
@ -5076,13 +5084,14 @@ private:
size_t m_AllocationCount; size_t m_AllocationCount;
// Number of nodes in the tree with type == TYPE_FREE. // Number of nodes in the tree with type == TYPE_FREE.
size_t m_FreeCount; size_t m_FreeCount;
// This includes space wasted due to internal fragmentation. // This includes space wasted due to internal fragmentation. Doesn't include unusable size.
VkDeviceSize m_SumFreeSize; VkDeviceSize m_SumFreeSize;
VkDeviceSize GetUnusableSize() const { return GetSize() - m_UsableSize; }
void DeleteNode(Node* node); void DeleteNode(Node* node);
bool ValidateNode(ValidationContext& ctx, const Node* parent, const Node* curr, uint32_t level, VkDeviceSize levelNodeSize) const; bool ValidateNode(ValidationContext& ctx, const Node* parent, const Node* curr, uint32_t level, VkDeviceSize levelNodeSize) const;
uint32_t AllocSizeToLevel(VkDeviceSize allocSize) const; uint32_t AllocSizeToLevel(VkDeviceSize allocSize) const;
VkDeviceSize LevelToNodeSize(uint32_t level) const; inline VkDeviceSize LevelToNodeSize(uint32_t level) const { return m_UsableSize >> level; }
// Alloc passed just for validation. Can be null. // Alloc passed just for validation. Can be null.
void FreeAtOffset(VmaAllocation alloc, VkDeviceSize offset); void FreeAtOffset(VmaAllocation alloc, VkDeviceSize offset);
void CalcAllocationStatInfoNode(VmaStatInfo& outInfo, const Node* node, VkDeviceSize levelNodeSize) const; void CalcAllocationStatInfoNode(VmaStatInfo& outInfo, const Node* node, VkDeviceSize levelNodeSize) const;
@ -9218,7 +9227,8 @@ void VmaBlockMetadata_Buddy::Init(VkDeviceSize size)
{ {
VmaBlockMetadata::Init(size); VmaBlockMetadata::Init(size);
m_SumFreeSize = size; m_UsableSize = VmaPrevPow2(size);
m_SumFreeSize = m_UsableSize;
Node* rootNode = new Node(); Node* rootNode = new Node();
rootNode->offset = 0; rootNode->offset = 0;
@ -9234,7 +9244,7 @@ bool VmaBlockMetadata_Buddy::Validate() const
{ {
// Validate tree. // Validate tree.
ValidationContext ctx; ValidationContext ctx;
if(!ValidateNode(ctx, VMA_NULL, m_Root, 0, GetSize())) if(!ValidateNode(ctx, VMA_NULL, m_Root, 0, LevelToNodeSize(0)))
{ {
VMA_VALIDATE(false && "ValidateNode failed."); VMA_VALIDATE(false && "ValidateNode failed.");
} }
@ -9269,12 +9279,11 @@ bool VmaBlockMetadata_Buddy::Validate() const
VkDeviceSize VmaBlockMetadata_Buddy::GetUnusedRangeSizeMax() const VkDeviceSize VmaBlockMetadata_Buddy::GetUnusedRangeSizeMax() const
{ {
VkDeviceSize levelNodeSize = GetSize(); for(uint32_t level = 0; level < MAX_LEVELS; ++level)
for(uint32_t level = 0; level < MAX_LEVELS; ++level, levelNodeSize /= 2)
{ {
if(m_FreeList[level].front != VMA_NULL) if(m_FreeList[level].front != VMA_NULL)
{ {
return levelNodeSize; return LevelToNodeSize(level);
} }
} }
return 0; return 0;
@ -9282,6 +9291,8 @@ VkDeviceSize VmaBlockMetadata_Buddy::GetUnusedRangeSizeMax() const
void VmaBlockMetadata_Buddy::CalcAllocationStatInfo(VmaStatInfo& outInfo) const void VmaBlockMetadata_Buddy::CalcAllocationStatInfo(VmaStatInfo& outInfo) const
{ {
const VkDeviceSize unusableSize = GetUnusableSize();
outInfo.blockCount = 1; outInfo.blockCount = 1;
outInfo.allocationCount = outInfo.unusedRangeCount = 0; outInfo.allocationCount = outInfo.unusedRangeCount = 0;
@ -9291,16 +9302,32 @@ void VmaBlockMetadata_Buddy::CalcAllocationStatInfo(VmaStatInfo& outInfo) const
outInfo.allocationSizeMin = outInfo.unusedRangeSizeMin = UINT64_MAX; outInfo.allocationSizeMin = outInfo.unusedRangeSizeMin = UINT64_MAX;
outInfo.allocationSizeAvg = outInfo.unusedRangeSizeAvg = 0; // Unused. outInfo.allocationSizeAvg = outInfo.unusedRangeSizeAvg = 0; // Unused.
CalcAllocationStatInfoNode(outInfo, m_Root, GetSize()); CalcAllocationStatInfoNode(outInfo, m_Root, LevelToNodeSize(0));
if(unusableSize > 0)
{
++outInfo.unusedRangeCount;
outInfo.unusedBytes += unusableSize;
outInfo.unusedRangeSizeMax = VMA_MAX(outInfo.unusedRangeSizeMax, unusableSize);
outInfo.unusedRangeSizeMin = VMA_MIN(outInfo.unusedRangeSizeMin, unusableSize);
}
} }
void VmaBlockMetadata_Buddy::AddPoolStats(VmaPoolStats& inoutStats) const void VmaBlockMetadata_Buddy::AddPoolStats(VmaPoolStats& inoutStats) const
{ {
const VkDeviceSize unusableSize = GetUnusableSize();
inoutStats.size += GetSize(); inoutStats.size += GetSize();
inoutStats.unusedSize += m_SumFreeSize; inoutStats.unusedSize += m_SumFreeSize + unusableSize;
inoutStats.allocationCount += m_AllocationCount; inoutStats.allocationCount += m_AllocationCount;
inoutStats.unusedRangeCount += m_FreeCount; inoutStats.unusedRangeCount += m_FreeCount;
inoutStats.unusedRangeSizeMax = VMA_MAX(inoutStats.unusedRangeSizeMax, GetUnusedRangeSizeMax()); inoutStats.unusedRangeSizeMax = VMA_MAX(inoutStats.unusedRangeSizeMax, GetUnusedRangeSizeMax());
if(unusableSize > 0)
{
++inoutStats.unusedRangeCount;
// Not updating inoutStats.unusedRangeSizeMax with unusableSize because this space is not available for allocations.
}
} }
#if VMA_STATS_STRING_ENABLED #if VMA_STATS_STRING_ENABLED
@ -9317,7 +9344,15 @@ void VmaBlockMetadata_Buddy::PrintDetailedMap(class VmaJsonWriter& json) const
stat.allocationCount, stat.allocationCount,
stat.unusedRangeCount); stat.unusedRangeCount);
PrintDetailedMapNode(json, m_Root, GetSize()); PrintDetailedMapNode(json, m_Root, LevelToNodeSize(0));
const VkDeviceSize unusableSize = GetUnusableSize();
if(unusableSize > 0)
{
PrintDetailedMap_UnusedRange(json,
m_UsableSize, // offset
unusableSize); // size
}
PrintDetailedMap_End(json); PrintDetailedMap_End(json);
} }
@ -9338,8 +9373,7 @@ bool VmaBlockMetadata_Buddy::CreateAllocationRequest(
{ {
VMA_ASSERT(!upperAddress && "VMA_ALLOCATION_CREATE_UPPER_ADDRESS_BIT can be used only with linear algorithm."); VMA_ASSERT(!upperAddress && "VMA_ALLOCATION_CREATE_UPPER_ADDRESS_BIT can be used only with linear algorithm.");
const VkDeviceSize size = GetSize(); if(allocSize > m_UsableSize)
if(allocSize > size)
{ {
return false; return false;
} }
@ -9428,7 +9462,7 @@ void VmaBlockMetadata_Buddy::Alloc(
AddToFreeListFront(childrenLevel, leftChild); AddToFreeListFront(childrenLevel, leftChild);
++m_FreeCount; ++m_FreeCount;
m_SumFreeSize -= LevelToNodeSize(currLevel) % 2; // Useful only when level node sizes can be non power of 2. //m_SumFreeSize -= LevelToNodeSize(currLevel) % 2; // Useful only when level node sizes can be non power of 2.
++currLevel; ++currLevel;
currNode = m_FreeList[currLevel].front; currNode = m_FreeList[currLevel].front;
} }
@ -9502,40 +9536,29 @@ bool VmaBlockMetadata_Buddy::ValidateNode(ValidationContext& ctx, const Node* pa
uint32_t VmaBlockMetadata_Buddy::AllocSizeToLevel(VkDeviceSize allocSize) const uint32_t VmaBlockMetadata_Buddy::AllocSizeToLevel(VkDeviceSize allocSize) const
{ {
// TODO optimize // I know this could be optimized somehow e.g. by using std::log2p1 from C++20.
uint32_t level = 0; uint32_t level = 0;
VkDeviceSize currLevelNodeSize = GetSize(); VkDeviceSize currLevelNodeSize = m_UsableSize;
VkDeviceSize nextLevelNodeSize = currLevelNodeSize / 2; VkDeviceSize nextLevelNodeSize = currLevelNodeSize >> 1;
while(allocSize <= nextLevelNodeSize && level + 1 < MAX_LEVELS) while(allocSize <= nextLevelNodeSize && level + 1 < MAX_LEVELS)
{ {
++level; ++level;
currLevelNodeSize = nextLevelNodeSize; currLevelNodeSize = nextLevelNodeSize;
nextLevelNodeSize = currLevelNodeSize / 2; nextLevelNodeSize = currLevelNodeSize >> 1;
} }
return level; return level;
} }
VkDeviceSize VmaBlockMetadata_Buddy::LevelToNodeSize(uint32_t level) const
{
// TODO optimize
VkDeviceSize result = GetSize();
for(uint32_t i = 0; i < level; ++i)
{
result /= 2;
}
return result;
}
void VmaBlockMetadata_Buddy::FreeAtOffset(VmaAllocation alloc, VkDeviceSize offset) void VmaBlockMetadata_Buddy::FreeAtOffset(VmaAllocation alloc, VkDeviceSize offset)
{ {
// Find node and level. // Find node and level.
Node* node = m_Root; Node* node = m_Root;
VkDeviceSize nodeOffset = 0; VkDeviceSize nodeOffset = 0;
uint32_t level = 0; uint32_t level = 0;
VkDeviceSize levelSize = GetSize(); VkDeviceSize levelNodeSize = LevelToNodeSize(0);
while(node->type == Node::TYPE_SPLIT) while(node->type == Node::TYPE_SPLIT)
{ {
const VkDeviceSize nextLevelSize = levelSize / 2; const VkDeviceSize nextLevelSize = levelNodeSize >> 1;
if(offset < nodeOffset + nextLevelSize) if(offset < nodeOffset + nextLevelSize)
{ {
node = node->split.leftChild; node = node->split.leftChild;
@ -9546,7 +9569,7 @@ void VmaBlockMetadata_Buddy::FreeAtOffset(VmaAllocation alloc, VkDeviceSize offs
nodeOffset += nextLevelSize; nodeOffset += nextLevelSize;
} }
++level; ++level;
levelSize = nextLevelSize; levelNodeSize = nextLevelSize;
} }
VMA_ASSERT(node != VMA_NULL && node->type == Node::TYPE_ALLOCATION); VMA_ASSERT(node != VMA_NULL && node->type == Node::TYPE_ALLOCATION);
@ -9570,7 +9593,7 @@ void VmaBlockMetadata_Buddy::FreeAtOffset(VmaAllocation alloc, VkDeviceSize offs
node = parent; node = parent;
--level; --level;
m_SumFreeSize += LevelToNodeSize(level) % 2; // Useful only when level node sizes can be non power of 2. //m_SumFreeSize += LevelToNodeSize(level) % 2; // Useful only when level node sizes can be non power of 2.
--m_FreeCount; --m_FreeCount;
} }