Adds two "launch kernel" tests for lib offload, one testing that global memory works and persists between different kernels, and one verifying that `[[gnu::constructor]]` works correctly. Since we now have tests that contain multiple kernels in the same binary, the test framework has been updated a bit.
26 lines
575 B
C
26 lines
575 B
C
#include <gpuintrin.h>
|
|
#include <stdint.h>
|
|
|
|
uint32_t global[64];
|
|
|
|
[[gnu::constructor(202)]] void ctorc() {
|
|
for (unsigned I = 0; I < 64; I++)
|
|
global[I] += 20;
|
|
}
|
|
|
|
[[gnu::constructor(200)]] void ctora() {
|
|
for (unsigned I = 0; I < 64; I++)
|
|
global[I] = 40;
|
|
}
|
|
|
|
[[gnu::constructor(201)]] void ctorb() {
|
|
for (unsigned I = 0; I < 64; I++)
|
|
global[I] *= 2;
|
|
}
|
|
|
|
__gpu_kernel void global_ctor(uint32_t *out) {
|
|
global[__gpu_thread_id(0)] += __gpu_thread_id(0);
|
|
out[__gpu_thread_id(0) + (__gpu_num_threads(0) * __gpu_block_id(0))] =
|
|
global[__gpu_thread_id(0)];
|
|
}
|