212 lines
5.8 KiB
Plaintext
212 lines
5.8 KiB
Plaintext
#include "sand/sand.h"
|
|
#include "sand/type.h"
|
|
|
|
#include <hip/driver_types.h>
|
|
|
|
#ifdef TRACY_ENABLE
|
|
#include <tracy/Tracy.hpp>
|
|
#endif // TRACY_ENABLE
|
|
|
|
#include <hip/hip_runtime.h>
|
|
#include <iostream>
|
|
|
|
#define HIP_CHECK(condition) \
|
|
do \
|
|
{ \
|
|
const hipError_t error = condition; \
|
|
if (error != hipSuccess) \
|
|
{ \
|
|
std::cerr << "An error occured: \"" << hipGetErrorString(error) \
|
|
<< "\t at " << __FILE__ << ":" << __LINE__ << std::endl; \
|
|
std::exit(-1); \
|
|
} \
|
|
} while (false)
|
|
|
|
template <typename T> constexpr T ceildiv(const T& a, const T& b)
|
|
{
|
|
return (a + b - 1) / b;
|
|
}
|
|
|
|
namespace
|
|
{
|
|
|
|
__global__ void
|
|
tick_kernel(sand::type::id_ty* data, unsigned int width, unsigned int height,
|
|
sand::type::id_ty* conversions, unsigned int conversions_size,
|
|
sand::rule::metadata* metas, unsigned int metas_size,
|
|
sand::rule::mask* masks, unsigned masks_size, bool current,
|
|
uint16_t types_size)
|
|
{
|
|
const unsigned int tile_x = blockIdx.x * blockDim.x + threadIdx.x;
|
|
const unsigned int tile_y = blockIdx.y * blockDim.y + threadIdx.y;
|
|
|
|
if (tile_x >= width || tile_y >= height)
|
|
{
|
|
return;
|
|
}
|
|
|
|
auto d = data[tile_x + tile_y * width + current * width * height];
|
|
auto [begin, end] = metas[d];
|
|
sand::rule::mask mask = -1U;
|
|
|
|
const unsigned int current_type_mask_lookup = d * types_size * 8;
|
|
|
|
int neighbor_index = 0;
|
|
for (int dy = -1; dy <= 1; dy++)
|
|
{
|
|
for (int dx = -1; dx <= 1; dx++)
|
|
{
|
|
if (dx == 0 && dy == 0)
|
|
{
|
|
continue;
|
|
}
|
|
|
|
auto neighbor_type = 0;
|
|
if (tile_x + dx > 0 && tile_x + dx < width && tile_y + dy > 0
|
|
&& tile_y + dy < height)
|
|
{
|
|
neighbor_type = data[(tile_x + dx) + (tile_y + dy) * width
|
|
+ current * width * height];
|
|
}
|
|
|
|
const auto found_mask
|
|
= masks[neighbor_index + neighbor_type * 8 + current_type_mask_lookup];
|
|
mask &= found_mask;
|
|
|
|
neighbor_index += 1;
|
|
}
|
|
}
|
|
|
|
bool found = false;
|
|
for (int bit = begin; bit < end; bit++)
|
|
{
|
|
if (mask & 1)
|
|
{
|
|
data[tile_x + tile_y * width + !current * width * height]
|
|
= conversions[bit];
|
|
found = true;
|
|
break;
|
|
}
|
|
mask >>= 1;
|
|
}
|
|
if (!found)
|
|
{
|
|
data[tile_x + tile_y * width + !current * width * height]
|
|
= conversions[begin];
|
|
}
|
|
}
|
|
|
|
}
|
|
|
|
sand::sand::~sand()
|
|
{
|
|
HIP_CHECK(hipFree(d_conversions));
|
|
HIP_CHECK(hipFree(d_metas));
|
|
HIP_CHECK(hipFree(d_masks));
|
|
HIP_CHECK(hipFree(d_data));
|
|
}
|
|
|
|
sand::type sand::sand::get(int x, int y)
|
|
{
|
|
sync_device_read_data();
|
|
if (x < 0 || x >= width || y < 0 || y >= height)
|
|
{
|
|
return type::OFF_GRID;
|
|
}
|
|
return data[x + y * width + current * width * height];
|
|
}
|
|
|
|
void sand::sand::set(int x, int y, type type)
|
|
{
|
|
data[x + y * width + current * width * height] = type;
|
|
write_dirty = true;
|
|
}
|
|
|
|
void sand::sand::tick()
|
|
{
|
|
ZoneScoped;
|
|
|
|
sync_device_write_data();
|
|
|
|
constexpr int block_size_x = 32;
|
|
constexpr int block_size_y = 32;
|
|
const unsigned int grid_size_x = ceildiv(width, block_size_x);
|
|
const unsigned int grid_size_y = ceildiv(width, block_size_y);
|
|
|
|
tick_kernel<<<dim3(grid_size_x, grid_size_y),
|
|
dim3(block_size_x, block_size_y), 0, hipStreamDefault>>>(
|
|
d_data, width, height, d_conversions, conversions.size(), d_metas,
|
|
metas.size(), d_masks, masks.size(), current, types.size());
|
|
|
|
HIP_CHECK(hipGetLastError());
|
|
|
|
read_dirty = true;
|
|
current = !current;
|
|
}
|
|
|
|
sand::sand::sand(type::range types, const std::vector<type>& conversions,
|
|
const std::vector<rule::metadata>& metas,
|
|
const std::vector<rule::mask>& masks, int width, int height,
|
|
type initial) :
|
|
width(width),
|
|
height(height),
|
|
types(types),
|
|
conversions(conversions),
|
|
metas(metas),
|
|
masks(masks),
|
|
data(width * height * 2, initial),
|
|
current(false),
|
|
write_dirty(true),
|
|
read_dirty(false),
|
|
d_conversions(nullptr),
|
|
d_metas(nullptr),
|
|
d_masks(nullptr),
|
|
d_data(nullptr)
|
|
{
|
|
initialize_device_state();
|
|
}
|
|
|
|
void sand::sand::initialize_device_state()
|
|
{
|
|
HIP_CHECK(
|
|
hipMalloc(&d_conversions, conversions.size() * sizeof(type::id_ty)));
|
|
HIP_CHECK(hipMalloc(&d_metas, metas.size() * sizeof(rule::metadata)));
|
|
HIP_CHECK(hipMalloc(&d_masks, masks.size() * sizeof(rule::mask)));
|
|
HIP_CHECK(hipMalloc(&d_data, data.size() * sizeof(type::id_ty)));
|
|
|
|
HIP_CHECK(hipMemcpy(d_conversions, conversions.data(),
|
|
conversions.size() * sizeof(type::id_ty),
|
|
hipMemcpyHostToDevice));
|
|
HIP_CHECK(hipMemcpy(d_metas, metas.data(),
|
|
metas.size() * sizeof(rule::metadata),
|
|
hipMemcpyHostToDevice));
|
|
HIP_CHECK(hipMemcpy(d_masks, masks.data(), masks.size() * sizeof(rule::mask),
|
|
hipMemcpyHostToDevice));
|
|
HIP_CHECK(hipMemcpy(d_data, data.data(), data.size() * sizeof(type::id_ty),
|
|
hipMemcpyHostToDevice));
|
|
}
|
|
|
|
void sand::sand::sync_device_write_data()
|
|
{
|
|
ZoneScoped;
|
|
|
|
if (write_dirty)
|
|
{
|
|
HIP_CHECK(hipMemcpy(d_data, data.data(), data.size() * sizeof(type::id_ty),
|
|
hipMemcpyHostToDevice));
|
|
write_dirty = false;
|
|
}
|
|
}
|
|
|
|
void sand::sand::sync_device_read_data()
|
|
{
|
|
ZoneScoped;
|
|
|
|
if (read_dirty)
|
|
{
|
|
HIP_CHECK(hipMemcpy(data.data(), d_data, data.size() * sizeof(type::id_ty),
|
|
hipMemcpyDeviceToHost));
|
|
read_dirty = false;
|
|
}
|
|
}
|