diff --git a/src/Common.h b/src/Common.h index d034196..111ccde 100644 --- a/src/Common.h +++ b/src/Common.h @@ -206,6 +206,19 @@ private: uint32_t GenerateFast() { return m_Value = (m_Value * 196314165 + 907633515); } }; +// Wrapper for RandomNumberGenerator compatible with STL "UniformRandomNumberGenerator" idea. +struct MyUniformRandomNumberGenerator +{ + typedef uint32_t result_type; + MyUniformRandomNumberGenerator(RandomNumberGenerator& gen) : m_Gen(gen) { } + static uint32_t min() { return 0; } + static uint32_t max() { return UINT32_MAX; } + uint32_t operator()() { return m_Gen.Generate(); } + +private: + RandomNumberGenerator& m_Gen; +}; + void ReadFile(std::vector& out, const char* fileName); enum class CONSOLE_COLOR diff --git a/src/Tests.cpp b/src/Tests.cpp index bc70869..36e86bc 100644 --- a/src/Tests.cpp +++ b/src/Tests.cpp @@ -7,8 +7,26 @@ #ifdef _WIN32 +enum CONFIG_TYPE { + CONFIG_TYPE_MINIMUM, + CONFIG_TYPE_SMALL, + CONFIG_TYPE_AVERAGE, + CONFIG_TYPE_LARGE, + CONFIG_TYPE_MAXIMUM, + CONFIG_TYPE_COUNT +}; + +static constexpr CONFIG_TYPE ConfigType = CONFIG_TYPE_SMALL; +//static constexpr CONFIG_TYPE ConfigType = CONFIG_TYPE_LARGE; + enum class FREE_ORDER { FORWARD, BACKWARD, RANDOM, COUNT }; +static const wchar_t* FREE_ORDER_NAMES[] = { + L"FORWARD", + L"BACKWARD", + L"RANDOM", +}; + struct AllocationSize { uint32_t Probability; @@ -1948,6 +1966,169 @@ static void ManuallyTestLinearAllocator() vmaDestroyPool(g_hAllocator, pool); } +static void BenchmarkLinearAllocatorCase(bool linear, bool empty, FREE_ORDER freeOrder) +{ + RandomNumberGenerator rand{16223}; + + const VkDeviceSize bufSizeMin = 32; + const VkDeviceSize bufSizeMax = 1024; + const size_t maxBufCapacity = 10000; + const uint32_t iterationCount = 10; + + VkBufferCreateInfo sampleBufCreateInfo = { VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO }; + sampleBufCreateInfo.size = bufSizeMax; + sampleBufCreateInfo.usage = VK_BUFFER_USAGE_TRANSFER_DST_BIT | VK_BUFFER_USAGE_VERTEX_BUFFER_BIT; + + VmaAllocationCreateInfo sampleAllocCreateInfo = {}; + sampleAllocCreateInfo.usage = VMA_MEMORY_USAGE_GPU_ONLY; + + VmaPoolCreateInfo poolCreateInfo = {}; + VkResult res = vmaFindMemoryTypeIndexForBufferInfo(g_hAllocator, &sampleBufCreateInfo, &sampleAllocCreateInfo, &poolCreateInfo.memoryTypeIndex); + assert(res == VK_SUCCESS); + + poolCreateInfo.blockSize = bufSizeMax * maxBufCapacity; + if(linear) + poolCreateInfo.flags = VMA_POOL_CREATE_LINEAR_ALGORITHM_BIT; + poolCreateInfo.minBlockCount = poolCreateInfo.maxBlockCount = 1; + + VmaPool pool = nullptr; + res = vmaCreatePool(g_hAllocator, &poolCreateInfo, &pool); + assert(res == VK_SUCCESS); + + // Buffer created just to get memory requirements. Never bound to any memory. + VkBuffer dummyBuffer = VK_NULL_HANDLE; + res = vkCreateBuffer(g_hDevice, &sampleBufCreateInfo, nullptr, &dummyBuffer); + assert(res == VK_SUCCESS && dummyBuffer); + + VkMemoryRequirements memReq = {}; + vkGetBufferMemoryRequirements(g_hDevice, dummyBuffer, &memReq); + + vkDestroyBuffer(g_hDevice, dummyBuffer, nullptr); + + VmaAllocationCreateInfo allocCreateInfo = {}; + allocCreateInfo.pool = pool; + + VmaAllocation alloc; + std::vector baseAllocations; + + if(!empty) + { + // Make allocations up to half of pool size. + VkDeviceSize totalSize = 0; + while(totalSize < poolCreateInfo.blockSize / 2) + { + memReq.size = bufSizeMin + rand.Generate() % (bufSizeMax - bufSizeMin); + res = vmaAllocateMemory(g_hAllocator, &memReq, &allocCreateInfo, &alloc, nullptr); + assert(res == VK_SUCCESS); + baseAllocations.push_back(alloc); + totalSize += memReq.size; + } + + // Delete half of them, choose randomly. + size_t allocsToDelete = baseAllocations.size() / 2; + for(size_t i = 0; i < allocsToDelete; ++i) + { + const size_t index = (size_t)rand.Generate() % baseAllocations.size(); + vmaFreeMemory(g_hAllocator, baseAllocations[index]); + baseAllocations.erase(baseAllocations.begin() + index); + } + } + + // BENCHMARK + const size_t allocCount = maxBufCapacity / 2; + std::vector testAllocations; + testAllocations.reserve(allocCount); + duration allocTotalDuration = duration::zero(); + duration freeTotalDuration = duration::zero(); + for(uint32_t iterationIndex = 0; iterationIndex < iterationCount; ++iterationIndex) + { + // Allocations + time_point allocTimeBeg = std::chrono::high_resolution_clock::now(); + for(size_t i = 0; i < allocCount; ++i) + { + memReq.size = bufSizeMin + rand.Generate() % (bufSizeMax - bufSizeMin); + res = vmaAllocateMemory(g_hAllocator, &memReq, &allocCreateInfo, &alloc, nullptr); + assert(res == VK_SUCCESS); + testAllocations.push_back(alloc); + } + allocTotalDuration += std::chrono::high_resolution_clock::now() - allocTimeBeg; + + // Deallocations + switch(freeOrder) + { + case FREE_ORDER::FORWARD: + // Leave testAllocations unchanged. + break; + case FREE_ORDER::BACKWARD: + std::reverse(testAllocations.begin(), testAllocations.end()); + break; + case FREE_ORDER::RANDOM: + std::shuffle(testAllocations.begin(), testAllocations.end(), MyUniformRandomNumberGenerator(rand)); + break; + default: assert(0); + } + + time_point freeTimeBeg = std::chrono::high_resolution_clock::now(); + for(size_t i = 0; i < allocCount; ++i) + vmaFreeMemory(g_hAllocator, testAllocations[i]); + freeTotalDuration += std::chrono::high_resolution_clock::now() - freeTimeBeg; + + testAllocations.clear(); + } + + // Delete baseAllocations + while(!baseAllocations.empty()) + { + vmaFreeMemory(g_hAllocator, baseAllocations.back()); + baseAllocations.pop_back(); + } + + vmaDestroyPool(g_hAllocator, pool); + + wprintf(L" LinearAlgorithm=%u %s FreeOrder=%s: allocations %g s, free %g s\n", + linear ? 1 : 0, + empty ? L"Empty" : L"Not empty", + FREE_ORDER_NAMES[(size_t)freeOrder], + ToFloatSeconds(allocTotalDuration), + ToFloatSeconds(freeTotalDuration)); +} + +static void BenchmarkLinearAllocator() +{ + wprintf(L"Benchmark linear allocator\n"); + + uint32_t freeOrderCount = 1; + if(ConfigType >= CONFIG_TYPE::CONFIG_TYPE_LARGE) + freeOrderCount = 3; + else if(ConfigType >= CONFIG_TYPE::CONFIG_TYPE_SMALL) + freeOrderCount = 2; + + const uint32_t emptyCount = ConfigType >= CONFIG_TYPE::CONFIG_TYPE_SMALL ? 2 : 1; + + for(uint32_t freeOrderIndex = 0; freeOrderIndex < freeOrderCount; ++freeOrderIndex) + { + FREE_ORDER freeOrder = FREE_ORDER::COUNT; + switch(freeOrderIndex) + { + case 0: freeOrder = FREE_ORDER::BACKWARD; break; + case 1: freeOrder = FREE_ORDER::FORWARD; break; + case 2: freeOrder = FREE_ORDER::RANDOM; break; + default: assert(0); + } + + for(uint32_t emptyIndex = 0; emptyIndex < emptyCount; ++emptyIndex) + { + for(uint32_t linearIndex = 0; linearIndex < 2; ++linearIndex) + { + BenchmarkLinearAllocatorCase( + linearIndex ? 1 : 0, // linear + emptyIndex ? 0 : 1, // empty + freeOrder); // freeOrder + } + } + } +} + static void TestPool_SameSize() { const VkDeviceSize BUF_SIZE = 1024 * 1024; @@ -3194,17 +3375,6 @@ static void PerformCustomPoolTest(FILE* file) WritePoolTestResult(file, "Code desc", "Test desc", config, result); } -enum CONFIG_TYPE { - CONFIG_TYPE_MINIMUM, - CONFIG_TYPE_SMALL, - CONFIG_TYPE_AVERAGE, - CONFIG_TYPE_LARGE, - CONFIG_TYPE_MAXIMUM, - CONFIG_TYPE_COUNT -}; - -static constexpr CONFIG_TYPE ConfigType = CONFIG_TYPE_SMALL; -//static constexpr CONFIG_TYPE ConfigType = CONFIG_TYPE_LARGE; static const char* CODE_DESCRIPTION = "Foo"; static void PerformMainTests(FILE* file) @@ -3687,6 +3857,7 @@ void Test() TestMappingMultithreaded(); TestLinearAllocator(); ManuallyTestLinearAllocator(); + BenchmarkLinearAllocator(); TestDefragmentationSimple(); TestDefragmentationFull(); diff --git a/src/VmaUsage.h b/src/VmaUsage.h index e788a30..a85bf9e 100644 --- a/src/VmaUsage.h +++ b/src/VmaUsage.h @@ -16,16 +16,14 @@ macros if you want to configure the library and then include its header to include all public interface declarations. Example: */ -//#define VMA_USE_STL_CONTAINERS 1 - //#define VMA_HEAVY_ASSERT(expr) assert(expr) - +//#define VMA_USE_STL_CONTAINERS 1 //#define VMA_DEDICATED_ALLOCATION 0 - //#define VMA_DEBUG_MARGIN 16 //#define VMA_DEBUG_DETECT_CORRUPTION 1 //#define VMA_DEBUG_INITIALIZE_ALLOCATIONS 1 //#define VMA_RECORDING_ENABLED 0 +//#define VMA_DEBUG_MIN_BUFFER_IMAGE_GRANULARITY 256 #pragma warning(push, 4) #pragma warning(disable: 4127) // conditional expression is constant diff --git a/src/vk_mem_alloc.h b/src/vk_mem_alloc.h index 3907819..e92873b 100644 --- a/src/vk_mem_alloc.h +++ b/src/vk_mem_alloc.h @@ -641,7 +641,7 @@ you can achieve behavior of a ring buffer / queue. ![Ring buffer](../gfx/Linear_allocator_5_ring_buffer.png) -Pools with linear algorithm support lost allocations when used as ring buffer. +Pools with linear algorithm support [lost allocations](@ref lost_allocations) when used as ring buffer. If there is not enough free space for a new allocation, but existing allocations from the front of the queue can become lost, they become lost and the allocation succeeds. @@ -8333,7 +8333,7 @@ bool VmaBlockMetadata_Linear::CreateAllocationRequest( for(size_t nextSuballocIndex = suballocations2nd.size(); nextSuballocIndex--; ) { const VmaSuballocation& nextSuballoc = suballocations2nd[nextSuballocIndex]; - if(VmaBlocksOnSamePage(nextSuballoc.offset, nextSuballoc.size, resultOffset, bufferImageGranularity)) + if(VmaBlocksOnSamePage(resultOffset, allocSize, nextSuballoc.offset, bufferImageGranularity)) { if(VmaIsBufferImageGranularityConflict(nextSuballoc.type, allocType)) { @@ -8364,7 +8364,7 @@ bool VmaBlockMetadata_Linear::CreateAllocationRequest( for(size_t prevSuballocIndex = suballocations1st.size(); prevSuballocIndex--; ) { const VmaSuballocation& prevSuballoc = suballocations1st[prevSuballocIndex]; - if(VmaBlocksOnSamePage(resultOffset, allocSize, prevSuballoc.offset, bufferImageGranularity)) + if(VmaBlocksOnSamePage(prevSuballoc.offset, prevSuballoc.size, resultOffset, bufferImageGranularity)) { if(VmaIsBufferImageGranularityConflict(allocType, prevSuballoc.type)) {