From d5d0a8676ef7dd731e5eaf9fea176aabc7f7bef9 Mon Sep 17 00:00:00 2001 From: Cloud Han Date: Thu, 13 Jan 2022 12:58:57 +0800 Subject: [PATCH 1/9] Update OpenCL support for better error discoverability Solely rely on `assert` make the error undiscoverable because it only works if NDEBUG is not defined. Always log with TracyMessage for Release build. --- TracyOpenCL.hpp | 89 +++++++++++++++++++++++++++++-------------------- 1 file changed, 52 insertions(+), 37 deletions(-) diff --git a/TracyOpenCL.hpp b/TracyOpenCL.hpp index 1fd3e741..8c490e00 100644 --- a/TracyOpenCL.hpp +++ b/TracyOpenCL.hpp @@ -35,12 +35,28 @@ using TracyCLCtx = void*; #include #include +#include #include "Tracy.hpp" #include "client/TracyCallstack.hpp" #include "client/TracyProfiler.hpp" #include "common/TracyAlloc.hpp" +#define TRACY_CL_TO_STRING_INDIRECT(T) #T +#define TRACY_CL_TO_STRING(T) TRACY_CL_TO_STRING_INDIRECT(T) +#define TRACY_CL_ASSERT(p) if(!(p)) { \ + TracyMessageL( "TRACY_CL_ASSERT failed on " __FILE__ ":" TRACY_CL_TO_STRING(__LINE__) ); \ + assert(false && "TRACY_CL_ASSERT failed"); \ +} +#define TRACY_CL_CHECK_ERROR(err) if(err != CL_SUCCESS) { \ + std::ostringstream oss; \ + oss << ("TRACY_CL_CHECK_ERROR failed on " __FILE__ ":" TRACY_CL_TO_STRING(__LINE__)) \ + << ": error code " << err; \ + auto msg = oss.str(); \ + TracyMessage(msg.data(), msg.size()); \ + assert(false && "TRACY_CL_CHECK_ERROR failed"); \ +} + namespace tracy { enum class EventPhase : uint8_t @@ -66,34 +82,27 @@ namespace tracy { , m_tail(0) { int64_t tcpu, tgpu; - assert(m_contextId != 255); + TRACY_CL_ASSERT(m_contextId != 255); cl_int err = CL_SUCCESS; cl_command_queue queue = clCreateCommandQueue(context, device, CL_QUEUE_PROFILING_ENABLE, &err); - assert(err == CL_SUCCESS); + TRACY_CL_CHECK_ERROR(err) uint32_t dummyValue = 42; cl_mem dummyBuffer = clCreateBuffer(context, CL_MEM_WRITE_ONLY, sizeof(uint32_t), nullptr, &err); - assert(err == CL_SUCCESS); + TRACY_CL_CHECK_ERROR(err) cl_event writeBufferEvent; - err = clEnqueueWriteBuffer(queue, dummyBuffer, CL_FALSE, 0, sizeof(uint32_t), &dummyValue, 0, nullptr, &writeBufferEvent); - assert(err == CL_SUCCESS); - err = clWaitForEvents(1, &writeBufferEvent); + TRACY_CL_CHECK_ERROR(clEnqueueWriteBuffer(queue, dummyBuffer, CL_FALSE, 0, sizeof(uint32_t), &dummyValue, 0, nullptr, &writeBufferEvent)); + TRACY_CL_CHECK_ERROR(clWaitForEvents(1, &writeBufferEvent)); tcpu = Profiler::GetTime(); - assert(err == CL_SUCCESS); cl_int eventStatus; - err = clGetEventInfo(writeBufferEvent, CL_EVENT_COMMAND_EXECUTION_STATUS, sizeof(cl_int), &eventStatus, nullptr); - assert(err == CL_SUCCESS); - assert(eventStatus == CL_COMPLETE); - err = clGetEventProfilingInfo(writeBufferEvent, CL_PROFILING_COMMAND_END, sizeof(cl_ulong), &tgpu, nullptr); - assert(err == CL_SUCCESS); - err = clReleaseEvent(writeBufferEvent); - assert(err == CL_SUCCESS); - err = clReleaseMemObject(dummyBuffer); - assert(err == CL_SUCCESS); - err = clReleaseCommandQueue(queue); - assert(err == CL_SUCCESS); + TRACY_CL_CHECK_ERROR(clGetEventInfo(writeBufferEvent, CL_EVENT_COMMAND_EXECUTION_STATUS, sizeof(cl_int), &eventStatus, nullptr)); + TRACY_CL_ASSERT(eventStatus == CL_COMPLETE); + TRACY_CL_CHECK_ERROR(clGetEventProfilingInfo(writeBufferEvent, CL_PROFILING_COMMAND_END, sizeof(cl_ulong), &tgpu, nullptr)); + TRACY_CL_CHECK_ERROR(clReleaseEvent(writeBufferEvent)); + TRACY_CL_CHECK_ERROR(clReleaseMemObject(dummyBuffer)); + TRACY_CL_CHECK_ERROR(clReleaseCommandQueue(queue)); auto item = Profiler::QueueSerial(); MemWrite(&item->hdr.type, QueueType::GpuNewContext); @@ -139,23 +148,33 @@ namespace tracy { } #endif - while (m_tail != m_head) + for (; m_tail != m_head; m_tail = (m_tail + 1) % QueryCount) { - EventInfo eventInfo = m_query[m_tail]; - cl_event event = eventInfo.event; + EventInfo eventInfo = GetQuery(m_tail); cl_int eventStatus; - cl_int err = clGetEventInfo(event, CL_EVENT_COMMAND_EXECUTION_STATUS, sizeof(cl_int), &eventStatus, nullptr); - assert(err == CL_SUCCESS); - if (eventStatus != CL_COMPLETE) return; + cl_int err = clGetEventInfo(eventInfo.event, CL_EVENT_COMMAND_EXECUTION_STATUS, sizeof(cl_int), &eventStatus, nullptr); + if (err != CL_SUCCESS) { + std::ostringstream oss; + oss << "clGetEventInfo falied with error code " << err << ", on event " << eventInfo.event << ", skipping..."; + auto msg = oss.str(); + TracyMessage(msg.data(), msg.size()); + if (eventInfo.event == nullptr) { + TracyMessageL("A TracyCLZone must be paird with a TracyCLZoneSetEvent, check you code!"); + } + assert(false && "clGetEventInfo failed, maybe a TracyCLZone is not paired with TracyCLZoneSetEvent"); + continue; + } + if (eventStatus != CL_COMPLETE) { + TRACY_CL_CHECK_ERROR(clWaitForEvents(1, &eventInfo.event)); + } cl_int eventInfoQuery = (eventInfo.phase == EventPhase::Begin) ? CL_PROFILING_COMMAND_START : CL_PROFILING_COMMAND_END; cl_ulong eventTimeStamp = 0; - err = clGetEventProfilingInfo(event, eventInfoQuery, sizeof(cl_ulong), &eventTimeStamp, nullptr); - assert(err == CL_SUCCESS); - assert(eventTimeStamp != 0); + TRACY_CL_CHECK_ERROR(clGetEventProfilingInfo(eventInfo.event, eventInfoQuery, sizeof(cl_ulong), &eventTimeStamp, nullptr)); + TRACY_CL_ASSERT(eventTimeStamp != 0); auto item = Profiler::QueueSerial(); MemWrite(&item->hdr.type, QueueType::GpuTime); @@ -167,11 +186,8 @@ namespace tracy { if (eventInfo.phase == EventPhase::End) { // Done with the event, so release it - err = clReleaseEvent(event); - assert(err == CL_SUCCESS); + TRACY_CL_CHECK_ERROR(clReleaseEvent(eventInfo.event)); } - - m_tail = (m_tail + 1) % QueryCount; } } @@ -184,14 +200,14 @@ namespace tracy { { const auto id = m_head; m_head = (m_head + 1) % QueryCount; - assert(m_head != m_tail); + TRACY_CL_ASSERT(m_head != m_tail); m_query[id] = eventInfo; return id; } tracy_force_inline EventInfo& GetQuery(unsigned int id) { - assert(id < QueryCount); + TRACY_CL_ASSERT(id < QueryCount); return m_query[id]; } @@ -200,8 +216,8 @@ namespace tracy { unsigned int m_contextId; EventInfo m_query[QueryCount]; - unsigned int m_head; - unsigned int m_tail; + unsigned int m_head; // index at which a new event should be inserted + unsigned int m_tail; // oldest event }; @@ -259,8 +275,7 @@ namespace tracy { { if (!m_active) return; m_event = event; - cl_int err = clRetainEvent(m_event); - assert(err == CL_SUCCESS); + TRACY_CL_CHECK_ERROR(clRetainEvent(m_event)); m_ctx->GetQuery(m_beginQueryId).event = m_event; } From f6894c8d6c2938a556c1e3c58bdcaa068e4d5387 Mon Sep 17 00:00:00 2001 From: Cloud Han Date: Thu, 13 Jan 2022 12:58:57 +0800 Subject: [PATCH 2/9] Make OpenCL example async --- examples/OpenCLVectorAdd/OpenCLVectorAdd.cpp | 15 ++++----------- 1 file changed, 4 insertions(+), 11 deletions(-) diff --git a/examples/OpenCLVectorAdd/OpenCLVectorAdd.cpp b/examples/OpenCLVectorAdd/OpenCLVectorAdd.cpp index b3ee0e44..36d4a12f 100644 --- a/examples/OpenCLVectorAdd/OpenCLVectorAdd.cpp +++ b/examples/OpenCLVectorAdd/OpenCLVectorAdd.cpp @@ -123,7 +123,7 @@ int main() ZoneScopedN("Write Buffer A"); TracyCLZoneS(tracyCLCtx, "Write BufferA", 5); - CL_ASSERT(clEnqueueWriteBuffer(commandQueue, bufferA, CL_TRUE, 0, N * sizeof(float), hostA.data(), 0, nullptr, &writeBufferAEvent)); + CL_ASSERT(clEnqueueWriteBuffer(commandQueue, bufferA, CL_FALSE, 0, N * sizeof(float), hostA.data(), 0, nullptr, &writeBufferAEvent)); TracyCLZoneSetEvent(writeBufferAEvent); } @@ -131,13 +131,13 @@ int main() ZoneScopedN("Write Buffer B"); TracyCLZone(tracyCLCtx, "Write BufferB"); - CL_ASSERT(clEnqueueWriteBuffer(commandQueue, bufferB, CL_TRUE, 0, N * sizeof(float), hostB.data(), 0, nullptr, &writeBufferBEvent)); + CL_ASSERT(clEnqueueWriteBuffer(commandQueue, bufferB, CL_FALSE, 0, N * sizeof(float), hostB.data(), 0, nullptr, &writeBufferBEvent)); TracyCLZoneSetEvent(writeBufferBEvent); } } - for (int i = 0; i < 10; ++i) + for (int i = 0; i < 100; ++i) { int n_value = static_cast(N); ZoneScopedN("VectorAdd Kernel Launch"); @@ -150,15 +150,8 @@ int main() cl_event vectorAddKernelEvent; CL_ASSERT(clEnqueueNDRangeKernel(commandQueue, vectorAddKernel, 1, nullptr, &N, nullptr, 0, nullptr, &vectorAddKernelEvent)); - - CL_ASSERT(clWaitForEvents(1, &vectorAddKernelEvent)); - TracyCLZoneSetEvent(vectorAddKernelEvent); - - cl_ulong kernelStartTime, kernelEndTime; - CL_ASSERT(clGetEventProfilingInfo(vectorAddKernelEvent, CL_PROFILING_COMMAND_START, sizeof(cl_ulong), &kernelStartTime, nullptr)); - CL_ASSERT(clGetEventProfilingInfo(vectorAddKernelEvent, CL_PROFILING_COMMAND_END, sizeof(cl_ulong), &kernelEndTime, nullptr)); - std::cout << "VectorAdd Kernel Elapsed: " << ((kernelEndTime - kernelStartTime) / 1000) << " us" << std::endl; + std::cout << "VectorAdd Kernel Enqueued" << std::endl; } { From e28c562a20a46c9c60533faaf39a3b9ab3117179 Mon Sep 17 00:00:00 2001 From: Cloud Han Date: Tue, 18 Jan 2022 13:51:27 +0800 Subject: [PATCH 3/9] add log and simple statistics for async kernel --- examples/OpenCLVectorAdd/OpenCLVectorAdd.cpp | 26 +++++++++++++++++++- 1 file changed, 25 insertions(+), 1 deletion(-) diff --git a/examples/OpenCLVectorAdd/OpenCLVectorAdd.cpp b/examples/OpenCLVectorAdd/OpenCLVectorAdd.cpp index 36d4a12f..71184e00 100644 --- a/examples/OpenCLVectorAdd/OpenCLVectorAdd.cpp +++ b/examples/OpenCLVectorAdd/OpenCLVectorAdd.cpp @@ -1,3 +1,4 @@ +#include #include #include #include @@ -137,7 +138,10 @@ int main() } } - for (int i = 0; i < 100; ++i) + const int numLaunchs = 100; + std::vector kernelLaunchEvts; + kernelLaunchEvts.reserve(numLaunchs); + for (int i = 0; i < numLaunchs; ++i) { int n_value = static_cast(N); ZoneScopedN("VectorAdd Kernel Launch"); @@ -151,6 +155,8 @@ int main() cl_event vectorAddKernelEvent; CL_ASSERT(clEnqueueNDRangeKernel(commandQueue, vectorAddKernel, 1, nullptr, &N, nullptr, 0, nullptr, &vectorAddKernelEvent)); TracyCLZoneSetEvent(vectorAddKernelEvent); + CL_ASSERT(clRetainEvent(vectorAddKernelEvent)); + kernelLaunchEvts.push_back(vectorAddKernelEvent); std::cout << "VectorAdd Kernel Enqueued" << std::endl; } @@ -164,6 +170,24 @@ int main() } CL_ASSERT(clFinish(commandQueue)); + std::vector durations(kernelLaunchEvts.size()); + for (int i=0; i(durations[i]) << "us" << std::endl; + }; + float avg = std::accumulate(durations.cbegin(), durations.cend(), 0.0f) / durations.size(); + float stddev2 = std::accumulate(durations.cbegin(), durations.cend(), 0.0f, [avg](const float& acc, const float& v) { + auto d = v - avg; + return acc + d*d; + }) / (durations.size() - 1.0f); + std::cout << "VectorAdd runtime avg: " << avg << "us, std: " << sqrt(stddev2) << "us over " << numLaunchs << " runs." << std::endl; + TracyCLCollect(tracyCLCtx); From f8d231bbc9922ff97ab57b0b358f279917a48f12 Mon Sep 17 00:00:00 2001 From: Cloud Han Date: Tue, 18 Jan 2022 14:01:20 +0800 Subject: [PATCH 4/9] stop using macro in TRACY_CL_CHECK_ERROR --- TracyOpenCL.hpp | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/TracyOpenCL.hpp b/TracyOpenCL.hpp index 8c490e00..e4b70ca6 100644 --- a/TracyOpenCL.hpp +++ b/TracyOpenCL.hpp @@ -48,13 +48,13 @@ using TracyCLCtx = void*; TracyMessageL( "TRACY_CL_ASSERT failed on " __FILE__ ":" TRACY_CL_TO_STRING(__LINE__) ); \ assert(false && "TRACY_CL_ASSERT failed"); \ } -#define TRACY_CL_CHECK_ERROR(err) if(err != CL_SUCCESS) { \ - std::ostringstream oss; \ - oss << ("TRACY_CL_CHECK_ERROR failed on " __FILE__ ":" TRACY_CL_TO_STRING(__LINE__)) \ - << ": error code " << err; \ - auto msg = oss.str(); \ - TracyMessage(msg.data(), msg.size()); \ - assert(false && "TRACY_CL_CHECK_ERROR failed"); \ +#define TRACY_CL_CHECK_ERROR(err) if(err != CL_SUCCESS) { \ + std::ostringstream oss; \ + oss << "TRACY_CL_CHECK_ERROR failed on " << __FILE__ << ":" << __LINE__ \ + << ": error code " << err; \ + auto msg = oss.str(); \ + TracyMessage(msg.data(), msg.size()); \ + assert(false && "TRACY_CL_CHECK_ERROR failed"); \ } namespace tracy { From d660425e00ca4cae8bbf43279dd2f87fc8947cdc Mon Sep 17 00:00:00 2001 From: Cloud Han Date: Tue, 25 Jan 2022 11:44:23 +0800 Subject: [PATCH 5/9] mute warnings --- examples/OpenCLVectorAdd/OpenCLVectorAdd.cpp | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/examples/OpenCLVectorAdd/OpenCLVectorAdd.cpp b/examples/OpenCLVectorAdd/OpenCLVectorAdd.cpp index 71184e00..3a59f2d8 100644 --- a/examples/OpenCLVectorAdd/OpenCLVectorAdd.cpp +++ b/examples/OpenCLVectorAdd/OpenCLVectorAdd.cpp @@ -105,8 +105,8 @@ int main() hostB.resize(N); hostC.resize(N); - std::iota(std::begin(hostA), std::end(hostA), 0); - std::iota(std::begin(hostB), std::end(hostB), 0); + std::iota(std::begin(hostA), std::end(hostA), 0.0f); + std::iota(std::begin(hostB), std::end(hostB), 0.0f); } { @@ -178,7 +178,7 @@ int main() CL_ASSERT(clGetEventProfilingInfo(evt, CL_PROFILING_COMMAND_START, sizeof(cl_ulong), &start, nullptr)); CL_ASSERT(clGetEventProfilingInfo(evt, CL_PROFILING_COMMAND_END, sizeof(cl_ulong), &end, nullptr)); CL_ASSERT(clReleaseEvent(evt)); - durations[i] = (end - start) * 0.001; + durations[i] = (end - start) * 0.001f; std::cout << "VectorAdd Kernel " << i << " tooks " << static_cast(durations[i]) << "us" << std::endl; }; float avg = std::accumulate(durations.cbegin(), durations.cend(), 0.0f) / durations.size(); From 21b748ac846eba0466db3eec8a08d51d194bedfd Mon Sep 17 00:00:00 2001 From: Cloud Han Date: Tue, 25 Jan 2022 11:54:35 +0800 Subject: [PATCH 6/9] add back non-blocking collect --- TracyOpenCL.hpp | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/TracyOpenCL.hpp b/TracyOpenCL.hpp index e4b70ca6..4f029686 100644 --- a/TracyOpenCL.hpp +++ b/TracyOpenCL.hpp @@ -21,6 +21,7 @@ #define TracyCLZoneSetEvent(e) #define TracyCLCollect(c) +#define TracyCLBlockingCollect(c) namespace tracy { @@ -135,7 +136,7 @@ namespace tracy { Profiler::QueueSerialFinish(); } - void Collect() + void Collect(bool blocking) { ZoneScopedC(Color::Red4); @@ -165,6 +166,8 @@ namespace tracy { continue; } if (eventStatus != CL_COMPLETE) { + if (!blocking) + return; TRACY_CL_CHECK_ERROR(clWaitForEvents(1, &eventInfo.event)); } @@ -346,7 +349,8 @@ using TracyCLCtx = tracy::OpenCLCtx*; #define TracyCLNamedZoneSetEvent(varname, event) varname.SetEvent(event) #define TracyCLZoneSetEvent(event) __tracy_gpu_zone.SetEvent(event) -#define TracyCLCollect(ctx) ctx->Collect() +#define TracyCLCollect(ctx) ctx->Collect(/*blocking=*/false) +#define TracyCLBlockingCollect(ctx) ctx->Collect(/*blocking=*/true) #endif From 9b60c4970e90f3d5de8f19c5e60e13d20c070be8 Mon Sep 17 00:00:00 2001 From: Cloud Han Date: Tue, 25 Jan 2022 12:13:04 +0800 Subject: [PATCH 7/9] better example --- examples/OpenCLVectorAdd/OpenCLVectorAdd.cpp | 46 ++++++++++++-------- 1 file changed, 29 insertions(+), 17 deletions(-) diff --git a/examples/OpenCLVectorAdd/OpenCLVectorAdd.cpp b/examples/OpenCLVectorAdd/OpenCLVectorAdd.cpp index 3a59f2d8..ce5adece 100644 --- a/examples/OpenCLVectorAdd/OpenCLVectorAdd.cpp +++ b/examples/OpenCLVectorAdd/OpenCLVectorAdd.cpp @@ -138,26 +138,38 @@ int main() } } - const int numLaunchs = 100; + cl_int clN = static_cast(N); + const int numFrames = 10; + const int launchsPerFrame = 10; + constexpr int numLaunchs = numFrames * launchsPerFrame; std::vector kernelLaunchEvts; kernelLaunchEvts.reserve(numLaunchs); - for (int i = 0; i < numLaunchs; ++i) + for (int i = 0; i < numFrames; ++i) { - int n_value = static_cast(N); - ZoneScopedN("VectorAdd Kernel Launch"); - TracyCLZoneC(tracyCLCtx, "VectorAdd Kernel", tracy::Color::Blue4); + FrameMark; + for (int j = 0; j < launchsPerFrame; ++j) { + ZoneScopedN("VectorAdd Kernel Launch"); + TracyCLZoneC(tracyCLCtx, "VectorAdd Kernel", tracy::Color::Blue4); - CL_ASSERT(clSetKernelArg(vectorAddKernel, 0, sizeof(cl_mem), &bufferC)); - CL_ASSERT(clSetKernelArg(vectorAddKernel, 1, sizeof(cl_mem), &bufferA)); - CL_ASSERT(clSetKernelArg(vectorAddKernel, 2, sizeof(cl_mem), &bufferB)); - CL_ASSERT(clSetKernelArg(vectorAddKernel, 3, sizeof(int), &n_value)); + CL_ASSERT(clSetKernelArg(vectorAddKernel, 0, sizeof(cl_mem), &bufferC)); + CL_ASSERT(clSetKernelArg(vectorAddKernel, 1, sizeof(cl_mem), &bufferA)); + CL_ASSERT(clSetKernelArg(vectorAddKernel, 2, sizeof(cl_mem), &bufferB)); + CL_ASSERT(clSetKernelArg(vectorAddKernel, 3, sizeof(cl_int), &clN)); - cl_event vectorAddKernelEvent; - CL_ASSERT(clEnqueueNDRangeKernel(commandQueue, vectorAddKernel, 1, nullptr, &N, nullptr, 0, nullptr, &vectorAddKernelEvent)); - TracyCLZoneSetEvent(vectorAddKernelEvent); - CL_ASSERT(clRetainEvent(vectorAddKernelEvent)); - kernelLaunchEvts.push_back(vectorAddKernelEvent); - std::cout << "VectorAdd Kernel Enqueued" << std::endl; + cl_event vectorAddKernelEvent; + CL_ASSERT(clEnqueueNDRangeKernel(commandQueue, vectorAddKernel, 1, nullptr, &N, nullptr, 0, nullptr, &vectorAddKernelEvent)); + TracyCLZoneSetEvent(vectorAddKernelEvent); + CL_ASSERT(clRetainEvent(vectorAddKernelEvent)); + kernelLaunchEvts.push_back(vectorAddKernelEvent); + std::cout << "VectorAdd Kernel Enqueued" << std::endl; + } + { + // Wait frame events to be finished + ZoneScopedN("clFinish"); + CL_ASSERT(clFinish(commandQueue)); + } + // You should collect on each 'frame' ends, so that streaming can be achieved. + TracyCLCollect(tracyCLCtx); } { @@ -188,8 +200,8 @@ int main() }) / (durations.size() - 1.0f); std::cout << "VectorAdd runtime avg: " << avg << "us, std: " << sqrt(stddev2) << "us over " << numLaunchs << " runs." << std::endl; - - TracyCLCollect(tracyCLCtx); + // Use blocking collect will ensure all queued events is finished + TracyCLBlockingCollect(tracyCLCtx); { ZoneScopedN("Checking results"); From f6769ecb910f05fd1e6088a13a207afa0f8cfe28 Mon Sep 17 00:00:00 2001 From: Cloud Han Date: Tue, 25 Jan 2022 12:35:09 +0800 Subject: [PATCH 8/9] better error discoverability if queue is not created with CL_QUEUE_PROFILING_ENABLE --- TracyOpenCL.hpp | 18 ++++++++++++++---- 1 file changed, 14 insertions(+), 4 deletions(-) diff --git a/TracyOpenCL.hpp b/TracyOpenCL.hpp index 4f029686..fb60431c 100644 --- a/TracyOpenCL.hpp +++ b/TracyOpenCL.hpp @@ -154,18 +154,20 @@ namespace tracy { EventInfo eventInfo = GetQuery(m_tail); cl_int eventStatus; cl_int err = clGetEventInfo(eventInfo.event, CL_EVENT_COMMAND_EXECUTION_STATUS, sizeof(cl_int), &eventStatus, nullptr); - if (err != CL_SUCCESS) { + if (err != CL_SUCCESS) + { std::ostringstream oss; oss << "clGetEventInfo falied with error code " << err << ", on event " << eventInfo.event << ", skipping..."; auto msg = oss.str(); TracyMessage(msg.data(), msg.size()); if (eventInfo.event == nullptr) { - TracyMessageL("A TracyCLZone must be paird with a TracyCLZoneSetEvent, check you code!"); + TracyMessageL("A TracyCLZone must be paird with a TracyCLZoneSetEvent, check your code!"); } assert(false && "clGetEventInfo failed, maybe a TracyCLZone is not paired with TracyCLZoneSetEvent"); continue; } - if (eventStatus != CL_COMPLETE) { + if (eventStatus != CL_COMPLETE) + { if (!blocking) return; TRACY_CL_CHECK_ERROR(clWaitForEvents(1, &eventInfo.event)); @@ -176,7 +178,15 @@ namespace tracy { : CL_PROFILING_COMMAND_END; cl_ulong eventTimeStamp = 0; - TRACY_CL_CHECK_ERROR(clGetEventProfilingInfo(eventInfo.event, eventInfoQuery, sizeof(cl_ulong), &eventTimeStamp, nullptr)); + err = clGetEventProfilingInfo(eventInfo.event, eventInfoQuery, sizeof(cl_ulong), &eventTimeStamp, nullptr); + if (err == CL_PROFILING_INFO_NOT_AVAILABLE) + { + TracyMessageL("command queue is not created with CL_QUEUE_PROFILING_ENABLE flag, check your code!"); + assert(false && "command queue is not created with CL_QUEUE_PROFILING_ENABLE flag"); + } + else + TRACY_CL_CHECK_ERROR(err); + TRACY_CL_ASSERT(eventTimeStamp != 0); auto item = Profiler::QueueSerial(); From c9fb07ffe57372f982dccd368a649c96effc880b Mon Sep 17 00:00:00 2001 From: Cloud Han Date: Fri, 28 Jan 2022 11:55:42 +0800 Subject: [PATCH 9/9] remove blocking collect --- TracyOpenCL.hpp | 13 +++---------- examples/OpenCLVectorAdd/OpenCLVectorAdd.cpp | 4 ++-- 2 files changed, 5 insertions(+), 12 deletions(-) diff --git a/TracyOpenCL.hpp b/TracyOpenCL.hpp index fb60431c..d477158d 100644 --- a/TracyOpenCL.hpp +++ b/TracyOpenCL.hpp @@ -21,7 +21,6 @@ #define TracyCLZoneSetEvent(e) #define TracyCLCollect(c) -#define TracyCLBlockingCollect(c) namespace tracy { @@ -136,7 +135,7 @@ namespace tracy { Profiler::QueueSerialFinish(); } - void Collect(bool blocking) + void Collect() { ZoneScopedC(Color::Red4); @@ -166,12 +165,7 @@ namespace tracy { assert(false && "clGetEventInfo failed, maybe a TracyCLZone is not paired with TracyCLZoneSetEvent"); continue; } - if (eventStatus != CL_COMPLETE) - { - if (!blocking) - return; - TRACY_CL_CHECK_ERROR(clWaitForEvents(1, &eventInfo.event)); - } + if (eventStatus != CL_COMPLETE) return; cl_int eventInfoQuery = (eventInfo.phase == EventPhase::Begin) ? CL_PROFILING_COMMAND_START @@ -359,8 +353,7 @@ using TracyCLCtx = tracy::OpenCLCtx*; #define TracyCLNamedZoneSetEvent(varname, event) varname.SetEvent(event) #define TracyCLZoneSetEvent(event) __tracy_gpu_zone.SetEvent(event) -#define TracyCLCollect(ctx) ctx->Collect(/*blocking=*/false) -#define TracyCLBlockingCollect(ctx) ctx->Collect(/*blocking=*/true) +#define TracyCLCollect(ctx) ctx->Collect() #endif diff --git a/examples/OpenCLVectorAdd/OpenCLVectorAdd.cpp b/examples/OpenCLVectorAdd/OpenCLVectorAdd.cpp index ce5adece..a2560eae 100644 --- a/examples/OpenCLVectorAdd/OpenCLVectorAdd.cpp +++ b/examples/OpenCLVectorAdd/OpenCLVectorAdd.cpp @@ -200,8 +200,8 @@ int main() }) / (durations.size() - 1.0f); std::cout << "VectorAdd runtime avg: " << avg << "us, std: " << sqrt(stddev2) << "us over " << numLaunchs << " runs." << std::endl; - // Use blocking collect will ensure all queued events is finished - TracyCLBlockingCollect(tracyCLCtx); + // User should ensure all events are finished, in this case, collect after the clFinish will do the trick. + TracyCLCollect(tracyCLCtx); { ZoneScopedN("Checking results");