Improve OpenCL time calibration

by moving the host time retrieval
immediately after clWaitForEvents,
similar to the Vulkan approach.
This commit is contained in:
John Lynch 2021-01-25 19:09:23 -06:00
parent f4186d51d4
commit 9f55a5083c

View File

@ -66,13 +66,37 @@ namespace tracy {
{ {
assert(m_contextId != 255); assert(m_contextId != 255);
cl_int err = CL_SUCCESS;
cl_command_queue queue = clCreateCommandQueue(context, device, CL_QUEUE_PROFILING_ENABLE, &err);
assert(err == CL_SUCCESS);
uint32_t dummyValue = 42;
cl_mem dummyBuffer = clCreateBuffer(context, CL_MEM_WRITE_ONLY, sizeof(uint32_t), nullptr, &err);
assert(err == CL_SUCCESS);
cl_event writeBufferEvent;
err = clEnqueueWriteBuffer(queue, dummyBuffer, CL_FALSE, 0, sizeof(uint32_t), &dummyValue, 0, nullptr, &writeBufferEvent);
assert(err == CL_SUCCESS);
err = clWaitForEvents(1, &writeBufferEvent);
m_hostStartTime = Profiler::GetTime(); m_hostStartTime = Profiler::GetTime();
m_deviceStartTime = GetDeviceTimestamp(context, device);
assert(err == CL_SUCCESS);
cl_int eventStatus;
err = clGetEventInfo(writeBufferEvent, CL_EVENT_COMMAND_EXECUTION_STATUS, sizeof(cl_int), &eventStatus, nullptr);
assert(err == CL_SUCCESS);
assert(eventStatus == CL_COMPLETE);
err = clGetEventProfilingInfo(writeBufferEvent, CL_PROFILING_COMMAND_END, sizeof(cl_ulong), &m_deviceStartTime, nullptr);
assert(err == CL_SUCCESS);
err = clReleaseEvent(writeBufferEvent);
assert(err == CL_SUCCESS);
err = clReleaseMemObject(dummyBuffer);
assert(err == CL_SUCCESS);
err = clReleaseCommandQueue(queue);
assert(err == CL_SUCCESS);
auto item = Profiler::QueueSerial(); auto item = Profiler::QueueSerial();
MemWrite(&item->hdr.type, QueueType::GpuNewContext); MemWrite(&item->hdr.type, QueueType::GpuNewContext);
MemWrite(&item->gpuNewContext.cpuTime, m_hostStartTime); MemWrite(&item->gpuNewContext.cpuTime, m_hostStartTime);
MemWrite(&item->gpuNewContext.gpuTime, m_hostStartTime); MemWrite(&item->gpuNewContext.gpuTime, m_deviceStartTime);
memset(&item->gpuNewContext.thread, 0, sizeof(item->gpuNewContext.thread)); memset(&item->gpuNewContext.thread, 0, sizeof(item->gpuNewContext.thread));
MemWrite(&item->gpuNewContext.period, 1.0f); MemWrite(&item->gpuNewContext.period, 1.0f);
MemWrite(&item->gpuNewContext.type, GpuContextType::OpenCL); MemWrite(&item->gpuNewContext.type, GpuContextType::OpenCL);
@ -117,7 +141,7 @@ namespace tracy {
auto item = Profiler::QueueSerial(); auto item = Profiler::QueueSerial();
MemWrite(&item->hdr.type, QueueType::GpuTime); MemWrite(&item->hdr.type, QueueType::GpuTime);
MemWrite(&item->gpuTime.gpuTime, TimestampOffset(eventTimeStamp)); MemWrite(&item->gpuTime.gpuTime, (int64_t)eventTimeStamp);
MemWrite(&item->gpuTime.queryId, (uint16_t)m_tail); MemWrite(&item->gpuTime.queryId, (uint16_t)m_tail);
MemWrite(&item->gpuTime.context, m_contextId); MemWrite(&item->gpuTime.context, m_contextId);
Profiler::QueueSerialFinish(); Profiler::QueueSerialFinish();
@ -164,41 +188,6 @@ namespace tracy {
return m_deviceStartTime; return m_deviceStartTime;
} }
tracy_force_inline int64_t TimestampOffset(int64_t deviceTimestamp) const
{
return m_hostStartTime + (deviceTimestamp - m_deviceStartTime);
}
tracy_force_inline int64_t GetDeviceTimestamp(cl_context context, cl_device_id device) const
{
cl_ulong deviceTimestamp = 0;
cl_int err = CL_SUCCESS;
cl_command_queue queue = clCreateCommandQueue(context, device, CL_QUEUE_PROFILING_ENABLE, &err);
assert(err == CL_SUCCESS);
uint32_t dummyValue = 42;
cl_mem dummyBuffer = clCreateBuffer(context, CL_MEM_WRITE_ONLY, sizeof(uint32_t), nullptr, &err);
assert(err == CL_SUCCESS);
cl_event writeBufferEvent;
err = clEnqueueWriteBuffer(queue, dummyBuffer, CL_TRUE, 0, sizeof(uint32_t), &dummyValue, 0, nullptr, &writeBufferEvent);
assert(err == CL_SUCCESS);
err = clWaitForEvents(1, &writeBufferEvent);
assert(err == CL_SUCCESS);
cl_int eventStatus;
err = clGetEventInfo(writeBufferEvent, CL_EVENT_COMMAND_EXECUTION_STATUS, sizeof(cl_int), &eventStatus, nullptr);
assert(err == CL_SUCCESS);
assert(eventStatus == CL_COMPLETE);
err = clGetEventProfilingInfo(writeBufferEvent, CL_PROFILING_COMMAND_END, sizeof(cl_ulong), &deviceTimestamp, nullptr);
assert(err == CL_SUCCESS);
err = clReleaseEvent(writeBufferEvent);
assert(err == CL_SUCCESS);
err = clReleaseMemObject(dummyBuffer);
assert(err == CL_SUCCESS);
err = clReleaseCommandQueue(queue);
assert(err == CL_SUCCESS);
return (int64_t)deviceTimestamp;
}
unsigned int m_contextId; unsigned int m_contextId;
EventInfo m_query[QueryCount]; EventInfo m_query[QueryCount];