mirror of
https://github.com/wolfpld/tracy.git
synced 2024-11-22 22:44:34 +00:00
better example
This commit is contained in:
parent
21b748ac84
commit
9b60c4970e
@ -138,19 +138,23 @@ int main()
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
const int numLaunchs = 100;
|
cl_int clN = static_cast<cl_int>(N);
|
||||||
|
const int numFrames = 10;
|
||||||
|
const int launchsPerFrame = 10;
|
||||||
|
constexpr int numLaunchs = numFrames * launchsPerFrame;
|
||||||
std::vector<cl_event> kernelLaunchEvts;
|
std::vector<cl_event> kernelLaunchEvts;
|
||||||
kernelLaunchEvts.reserve(numLaunchs);
|
kernelLaunchEvts.reserve(numLaunchs);
|
||||||
for (int i = 0; i < numLaunchs; ++i)
|
for (int i = 0; i < numFrames; ++i)
|
||||||
{
|
{
|
||||||
int n_value = static_cast<int>(N);
|
FrameMark;
|
||||||
|
for (int j = 0; j < launchsPerFrame; ++j) {
|
||||||
ZoneScopedN("VectorAdd Kernel Launch");
|
ZoneScopedN("VectorAdd Kernel Launch");
|
||||||
TracyCLZoneC(tracyCLCtx, "VectorAdd Kernel", tracy::Color::Blue4);
|
TracyCLZoneC(tracyCLCtx, "VectorAdd Kernel", tracy::Color::Blue4);
|
||||||
|
|
||||||
CL_ASSERT(clSetKernelArg(vectorAddKernel, 0, sizeof(cl_mem), &bufferC));
|
CL_ASSERT(clSetKernelArg(vectorAddKernel, 0, sizeof(cl_mem), &bufferC));
|
||||||
CL_ASSERT(clSetKernelArg(vectorAddKernel, 1, sizeof(cl_mem), &bufferA));
|
CL_ASSERT(clSetKernelArg(vectorAddKernel, 1, sizeof(cl_mem), &bufferA));
|
||||||
CL_ASSERT(clSetKernelArg(vectorAddKernel, 2, sizeof(cl_mem), &bufferB));
|
CL_ASSERT(clSetKernelArg(vectorAddKernel, 2, sizeof(cl_mem), &bufferB));
|
||||||
CL_ASSERT(clSetKernelArg(vectorAddKernel, 3, sizeof(int), &n_value));
|
CL_ASSERT(clSetKernelArg(vectorAddKernel, 3, sizeof(cl_int), &clN));
|
||||||
|
|
||||||
cl_event vectorAddKernelEvent;
|
cl_event vectorAddKernelEvent;
|
||||||
CL_ASSERT(clEnqueueNDRangeKernel(commandQueue, vectorAddKernel, 1, nullptr, &N, nullptr, 0, nullptr, &vectorAddKernelEvent));
|
CL_ASSERT(clEnqueueNDRangeKernel(commandQueue, vectorAddKernel, 1, nullptr, &N, nullptr, 0, nullptr, &vectorAddKernelEvent));
|
||||||
@ -159,6 +163,14 @@ int main()
|
|||||||
kernelLaunchEvts.push_back(vectorAddKernelEvent);
|
kernelLaunchEvts.push_back(vectorAddKernelEvent);
|
||||||
std::cout << "VectorAdd Kernel Enqueued" << std::endl;
|
std::cout << "VectorAdd Kernel Enqueued" << std::endl;
|
||||||
}
|
}
|
||||||
|
{
|
||||||
|
// Wait frame events to be finished
|
||||||
|
ZoneScopedN("clFinish");
|
||||||
|
CL_ASSERT(clFinish(commandQueue));
|
||||||
|
}
|
||||||
|
// You should collect on each 'frame' ends, so that streaming can be achieved.
|
||||||
|
TracyCLCollect(tracyCLCtx);
|
||||||
|
}
|
||||||
|
|
||||||
{
|
{
|
||||||
ZoneScopedN("Device to Host Memory Copy");
|
ZoneScopedN("Device to Host Memory Copy");
|
||||||
@ -188,8 +200,8 @@ int main()
|
|||||||
}) / (durations.size() - 1.0f);
|
}) / (durations.size() - 1.0f);
|
||||||
std::cout << "VectorAdd runtime avg: " << avg << "us, std: " << sqrt(stddev2) << "us over " << numLaunchs << " runs." << std::endl;
|
std::cout << "VectorAdd runtime avg: " << avg << "us, std: " << sqrt(stddev2) << "us over " << numLaunchs << " runs." << std::endl;
|
||||||
|
|
||||||
|
// Use blocking collect will ensure all queued events is finished
|
||||||
TracyCLCollect(tracyCLCtx);
|
TracyCLBlockingCollect(tracyCLCtx);
|
||||||
|
|
||||||
{
|
{
|
||||||
ZoneScopedN("Checking results");
|
ZoneScopedN("Checking results");
|
||||||
|
Loading…
Reference in New Issue
Block a user