Move tick to GPU

This commit is contained in:
shylie 2026-05-05 10:39:09 -04:00
parent 24246c86a1
commit af3d33b1f9
Signed by: shylie
GPG Key ID: 20C8D70580687D9D
7 changed files with 232 additions and 103 deletions

View File

@ -1,5 +1,10 @@
Documentation: Documentation:
CommentFormat: Doxygen CommentFormat: Doxygen
CompileFlags:
Remove:
- -forward-unknown-to-host-compiler*
- --generate-code=*
- -Xcompiler=*
If: If:
PathMatch: .*\.hip PathMatch: .*\.hip

View File

@ -20,7 +20,6 @@ add_library(sand STATIC
src/type_range.cpp src/type_range.cpp
src/rule.cpp src/rule.cpp
src/rule_builder.cpp src/rule_builder.cpp
src/sand.cpp
src/sand.hip src/sand.hip
) )
@ -40,10 +39,10 @@ if(EXISTS third/tracy/CMakeLists.txt)
add_subdirectory(third/tracy) add_subdirectory(third/tracy)
target_link_libraries(sand PUBLIC Tracy::TracyClient) target_link_libraries(sand PUBLIC Tracy::TracyClient)
if(NOT TRACY_ENABLE) if(NOT TRACY_ENABLE)
target_compile_definitions(sand PUBLIC FrameMark ZoneScoped) target_compile_definitions(sand PUBLIC FrameMark= ZoneScoped=)
endif() endif()
else() else()
target_compile_definitions(sand PUBLIC FrameMark ZoneScoped) target_compile_definitions(sand PUBLIC FrameMark= ZoneScoped=)
endif() endif()
find_package(SDL3 CONFIG REQUIRED) find_package(SDL3 CONFIG REQUIRED)

View File

@ -15,7 +15,14 @@ namespace sand
class sand class sand
{ {
public: public:
type get(int x, int y) const; sand(const sand&) = delete;
sand(sand&&) = default;
sand& operator=(const sand&) = delete;
~sand();
type get(int x, int y);
void set(int x, int y, type type); void set(int x, int y, type type);
void tick(); void tick();
@ -26,14 +33,25 @@ private:
const std::vector<rule::mask>& masks, int width, int height, const std::vector<rule::mask>& masks, int width, int height,
type initial); type initial);
void initialize_device_state();
void sync_device_write_data();
void sync_device_read_data();
int width;
int height;
type::range types; type::range types;
std::vector<type> conversions; std::vector<type> conversions;
std::vector<rule::metadata> metas; std::vector<rule::metadata> metas;
std::vector<rule::mask> masks; std::vector<rule::mask> masks;
int width; std::vector<type::id_ty> data;
int height;
std::vector<type> data;
bool current; bool current;
bool write_dirty;
bool read_dirty;
type::id_ty* d_conversions;
rule::metadata* d_metas;
rule::mask* d_masks;
type::id_ty* d_data;
friend class rule::builder; friend class rule::builder;
}; };

View File

@ -13,6 +13,7 @@ class type
public: public:
class builder; class builder;
class range; class range;
using id_ty = uint16_t;
bool operator==(const type&) const; bool operator==(const type&) const;
@ -21,8 +22,6 @@ public:
static constexpr unsigned int MAX_TYPES = UINT16_MAX; static constexpr unsigned int MAX_TYPES = UINT16_MAX;
private: private:
using id_ty = uint16_t;
type(id_ty id); type(id_ty id);
operator id_ty() const; operator id_ty() const;

View File

@ -11,8 +11,10 @@
#include <tracy/Tracy.hpp> #include <tracy/Tracy.hpp>
#endif // TRACY_ENABLE #endif // TRACY_ENABLE
constexpr uint16_t WIDTH = 64; constexpr uint16_t WIDTH = 256;
constexpr uint16_t HEIGHT = 64; constexpr uint16_t HEIGHT = 256;
constexpr float UPDATE_TIME = 0;
constexpr int UPDATE_TICKS = 1;
constexpr int WINDOW_WIDTH = 1024; constexpr int WINDOW_WIDTH = 1024;
constexpr int WINDOW_HEIGHT = 1024; constexpr int WINDOW_HEIGHT = 1024;
@ -135,11 +137,14 @@ int main(int argc, char** argv)
float x, y; float x, y;
int state; int state;
if (time > 0)
{
time = 0;
if (time > UPDATE_TIME)
{
for (int i = 0; i < UPDATE_TICKS; i++)
{
s.tick(); s.tick();
}
time = 0;
int count = 0; int count = 0;
for (int i = 0; i < WIDTH; i++) for (int i = 0; i < WIDTH; i++)

View File

@ -1,85 +0,0 @@
#include "sand/sand.h"
#include "sand/type.h"
#ifdef TRACY_ENABLE
#include <tracy/Tracy.hpp>
#endif // TRACY_ENABLE
sand::type sand::sand::get(int x, int y) const
{
if (x < 0 || x >= width || y < 0 || y >= height)
{
return type::OFF_GRID;
}
return data[x + y * width + current * width * height];
}
void sand::sand::set(int x, int y, type type)
{
data[x + y * width + current * width * height] = type;
}
void sand::sand::tick()
{
ZoneScoped;
#pragma omp parallel for
for (int tile_index = 0; tile_index < width * height; tile_index++)
{
const int x = tile_index % width;
const int y = tile_index / width;
auto [begin, end] = metas[get(x, y)];
uint32_t mask = -1U;
int neighbor_index = 0;
for (int dy = -1; dy <= 1; dy++)
{
for (int dx = -1; dx <= 1; dx++)
{
if (dx == 0 && dy == 0)
{
continue;
}
mask &= masks[neighbor_index + get(x + dx, y + dy) * 8
+ get(x, y) * types.size() * 8];
neighbor_index += 1;
}
}
bool found = false;
for (int bit = begin; bit < end; bit++)
{
if (mask & 1)
{
data[x + y * width + !current * width * height] = conversions[bit];
found = true;
break;
}
mask >>= 1;
}
if (!found)
{
data[x + y * width + !current * width * height] = conversions[begin];
}
}
current = !current;
}
sand::sand::sand(type::range types, const std::vector<type>& conversions,
const std::vector<rule::metadata>& metas,
const std::vector<rule::mask>& masks, int width, int height,
type initial) :
types(types),
conversions(conversions),
metas(metas),
masks(masks),
width(width),
height(height),
data(width * height * 2, initial),
current(false)
{
}

View File

@ -1,9 +1,14 @@
#include <hip/amd_detail/amd_hip_runtime.h> #include "sand/sand.h"
#include "sand/type.h"
#include <hip/driver_types.h> #include <hip/driver_types.h>
#ifdef TRACY_ENABLE
#include <tracy/Tracy.hpp>
#endif // TRACY_ENABLE
#include <hip/hip_runtime.h> #include <hip/hip_runtime.h>
#include <iostream> #include <iostream>
#include <numeric>
#include <vector>
#define HIP_CHECK(condition) \ #define HIP_CHECK(condition) \
do \ do \
@ -21,3 +26,186 @@ template <typename T> constexpr T ceildiv(const T& a, const T& b)
{ {
return (a + b - 1) / b; return (a + b - 1) / b;
} }
namespace
{
__global__ void
tick_kernel(sand::type::id_ty* data, unsigned int width, unsigned int height,
sand::type::id_ty* conversions, unsigned int conversions_size,
sand::rule::metadata* metas, unsigned int metas_size,
sand::rule::mask* masks, unsigned masks_size, bool current,
uint16_t types_size)
{
const unsigned int tile_x = blockIdx.x * blockDim.x + threadIdx.x;
const unsigned int tile_y = blockIdx.y * blockDim.y + threadIdx.y;
if (tile_x >= width || tile_y >= height)
{
return;
}
auto d = data[tile_x + tile_y * width + current * width * height];
auto [begin, end] = metas[d];
sand::rule::mask mask = -1U;
const unsigned int current_type_mask_lookup = d * types_size * 8;
int neighbor_index = 0;
for (int dy = -1; dy <= 1; dy++)
{
for (int dx = -1; dx <= 1; dx++)
{
if (dx == 0 && dy == 0)
{
continue;
}
auto neighbor_type = 0;
if (tile_x + dx > 0 && tile_x + dx < width && tile_y + dy > 0
&& tile_y + dy < height)
{
neighbor_type = data[(tile_x + dx) + (tile_y + dy) * width
+ current * width * height];
}
const auto found_mask
= masks[neighbor_index + neighbor_type * 8 + current_type_mask_lookup];
mask &= found_mask;
neighbor_index += 1;
}
}
bool found = false;
for (int bit = begin; bit < end; bit++)
{
if (mask & 1)
{
data[tile_x + tile_y * width + !current * width * height]
= conversions[bit];
found = true;
break;
}
mask >>= 1;
}
if (!found)
{
data[tile_x + tile_y * width + !current * width * height]
= conversions[begin];
}
}
}
sand::sand::~sand()
{
HIP_CHECK(hipFree(d_conversions));
HIP_CHECK(hipFree(d_metas));
HIP_CHECK(hipFree(d_masks));
HIP_CHECK(hipFree(d_data));
}
sand::type sand::sand::get(int x, int y)
{
sync_device_read_data();
if (x < 0 || x >= width || y < 0 || y >= height)
{
return type::OFF_GRID;
}
return data[x + y * width + current * width * height];
}
void sand::sand::set(int x, int y, type type)
{
data[x + y * width + current * width * height] = type;
write_dirty = true;
}
void sand::sand::tick()
{
ZoneScoped;
sync_device_write_data();
constexpr int block_size_x = 32;
constexpr int block_size_y = 32;
const unsigned int grid_size_x = ceildiv(width, block_size_x);
const unsigned int grid_size_y = ceildiv(width, block_size_y);
tick_kernel<<<dim3(grid_size_x, grid_size_y),
dim3(block_size_x, block_size_y), 0, hipStreamDefault>>>(
d_data, width, height, d_conversions, conversions.size(), d_metas,
metas.size(), d_masks, masks.size(), current, types.size());
HIP_CHECK(hipGetLastError());
read_dirty = true;
current = !current;
}
sand::sand::sand(type::range types, const std::vector<type>& conversions,
const std::vector<rule::metadata>& metas,
const std::vector<rule::mask>& masks, int width, int height,
type initial) :
width(width),
height(height),
types(types),
conversions(conversions),
metas(metas),
masks(masks),
data(width * height * 2, initial),
current(false),
write_dirty(true),
read_dirty(false),
d_conversions(nullptr),
d_metas(nullptr),
d_masks(nullptr),
d_data(nullptr)
{
initialize_device_state();
}
void sand::sand::initialize_device_state()
{
HIP_CHECK(
hipMalloc(&d_conversions, conversions.size() * sizeof(type::id_ty)));
HIP_CHECK(hipMalloc(&d_metas, metas.size() * sizeof(rule::metadata)));
HIP_CHECK(hipMalloc(&d_masks, masks.size() * sizeof(rule::mask)));
HIP_CHECK(hipMalloc(&d_data, data.size() * sizeof(type::id_ty)));
HIP_CHECK(hipMemcpy(d_conversions, conversions.data(),
conversions.size() * sizeof(type::id_ty),
hipMemcpyHostToDevice));
HIP_CHECK(hipMemcpy(d_metas, metas.data(),
metas.size() * sizeof(rule::metadata),
hipMemcpyHostToDevice));
HIP_CHECK(hipMemcpy(d_masks, masks.data(), masks.size() * sizeof(rule::mask),
hipMemcpyHostToDevice));
HIP_CHECK(hipMemcpy(d_data, data.data(), data.size() * sizeof(type::id_ty),
hipMemcpyHostToDevice));
}
void sand::sand::sync_device_write_data()
{
ZoneScoped;
if (write_dirty)
{
HIP_CHECK(hipMemcpy(d_data, data.data(), data.size() * sizeof(type::id_ty),
hipMemcpyHostToDevice));
write_dirty = false;
}
}
void sand::sand::sync_device_read_data()
{
ZoneScoped;
if (read_dirty)
{
HIP_CHECK(hipMemcpy(data.data(), d_data, data.size() * sizeof(type::id_ty),
hipMemcpyDeviceToHost));
read_dirty = false;
}
}