sand/src/sand.hip
2026-05-05 10:39:09 -04:00

212 lines
5.8 KiB
Plaintext

#include "sand/sand.h"
#include "sand/type.h"
#include <hip/driver_types.h>
#ifdef TRACY_ENABLE
#include <tracy/Tracy.hpp>
#endif // TRACY_ENABLE
#include <hip/hip_runtime.h>
#include <iostream>
#define HIP_CHECK(condition) \
do \
{ \
const hipError_t error = condition; \
if (error != hipSuccess) \
{ \
std::cerr << "An error occured: \"" << hipGetErrorString(error) \
<< "\t at " << __FILE__ << ":" << __LINE__ << std::endl; \
std::exit(-1); \
} \
} while (false)
template <typename T> constexpr T ceildiv(const T& a, const T& b)
{
return (a + b - 1) / b;
}
namespace
{
__global__ void
tick_kernel(sand::type::id_ty* data, unsigned int width, unsigned int height,
sand::type::id_ty* conversions, unsigned int conversions_size,
sand::rule::metadata* metas, unsigned int metas_size,
sand::rule::mask* masks, unsigned masks_size, bool current,
uint16_t types_size)
{
const unsigned int tile_x = blockIdx.x * blockDim.x + threadIdx.x;
const unsigned int tile_y = blockIdx.y * blockDim.y + threadIdx.y;
if (tile_x >= width || tile_y >= height)
{
return;
}
auto d = data[tile_x + tile_y * width + current * width * height];
auto [begin, end] = metas[d];
sand::rule::mask mask = -1U;
const unsigned int current_type_mask_lookup = d * types_size * 8;
int neighbor_index = 0;
for (int dy = -1; dy <= 1; dy++)
{
for (int dx = -1; dx <= 1; dx++)
{
if (dx == 0 && dy == 0)
{
continue;
}
auto neighbor_type = 0;
if (tile_x + dx > 0 && tile_x + dx < width && tile_y + dy > 0
&& tile_y + dy < height)
{
neighbor_type = data[(tile_x + dx) + (tile_y + dy) * width
+ current * width * height];
}
const auto found_mask
= masks[neighbor_index + neighbor_type * 8 + current_type_mask_lookup];
mask &= found_mask;
neighbor_index += 1;
}
}
bool found = false;
for (int bit = begin; bit < end; bit++)
{
if (mask & 1)
{
data[tile_x + tile_y * width + !current * width * height]
= conversions[bit];
found = true;
break;
}
mask >>= 1;
}
if (!found)
{
data[tile_x + tile_y * width + !current * width * height]
= conversions[begin];
}
}
}
sand::sand::~sand()
{
HIP_CHECK(hipFree(d_conversions));
HIP_CHECK(hipFree(d_metas));
HIP_CHECK(hipFree(d_masks));
HIP_CHECK(hipFree(d_data));
}
sand::type sand::sand::get(int x, int y)
{
sync_device_read_data();
if (x < 0 || x >= width || y < 0 || y >= height)
{
return type::OFF_GRID;
}
return data[x + y * width + current * width * height];
}
void sand::sand::set(int x, int y, type type)
{
data[x + y * width + current * width * height] = type;
write_dirty = true;
}
void sand::sand::tick()
{
ZoneScoped;
sync_device_write_data();
constexpr int block_size_x = 32;
constexpr int block_size_y = 32;
const unsigned int grid_size_x = ceildiv(width, block_size_x);
const unsigned int grid_size_y = ceildiv(width, block_size_y);
tick_kernel<<<dim3(grid_size_x, grid_size_y),
dim3(block_size_x, block_size_y), 0, hipStreamDefault>>>(
d_data, width, height, d_conversions, conversions.size(), d_metas,
metas.size(), d_masks, masks.size(), current, types.size());
HIP_CHECK(hipGetLastError());
read_dirty = true;
current = !current;
}
sand::sand::sand(type::range types, const std::vector<type>& conversions,
const std::vector<rule::metadata>& metas,
const std::vector<rule::mask>& masks, int width, int height,
type initial) :
width(width),
height(height),
types(types),
conversions(conversions),
metas(metas),
masks(masks),
data(width * height * 2, initial),
current(false),
write_dirty(true),
read_dirty(false),
d_conversions(nullptr),
d_metas(nullptr),
d_masks(nullptr),
d_data(nullptr)
{
initialize_device_state();
}
void sand::sand::initialize_device_state()
{
HIP_CHECK(
hipMalloc(&d_conversions, conversions.size() * sizeof(type::id_ty)));
HIP_CHECK(hipMalloc(&d_metas, metas.size() * sizeof(rule::metadata)));
HIP_CHECK(hipMalloc(&d_masks, masks.size() * sizeof(rule::mask)));
HIP_CHECK(hipMalloc(&d_data, data.size() * sizeof(type::id_ty)));
HIP_CHECK(hipMemcpy(d_conversions, conversions.data(),
conversions.size() * sizeof(type::id_ty),
hipMemcpyHostToDevice));
HIP_CHECK(hipMemcpy(d_metas, metas.data(),
metas.size() * sizeof(rule::metadata),
hipMemcpyHostToDevice));
HIP_CHECK(hipMemcpy(d_masks, masks.data(), masks.size() * sizeof(rule::mask),
hipMemcpyHostToDevice));
HIP_CHECK(hipMemcpy(d_data, data.data(), data.size() * sizeof(type::id_ty),
hipMemcpyHostToDevice));
}
void sand::sand::sync_device_write_data()
{
ZoneScoped;
if (write_dirty)
{
HIP_CHECK(hipMemcpy(d_data, data.data(), data.size() * sizeof(type::id_ty),
hipMemcpyHostToDevice));
write_dirty = false;
}
}
void sand::sand::sync_device_read_data()
{
ZoneScoped;
if (read_dirty)
{
HIP_CHECK(hipMemcpy(data.data(), d_data, data.size() * sizeof(type::id_ty),
hipMemcpyDeviceToHost));
read_dirty = false;
}
}