#include "sand/sand.h" #include "sand/type.h" #include #ifdef TRACY_ENABLE #include #endif // TRACY_ENABLE #include #include #define HIP_CHECK(condition) \ do \ { \ const hipError_t error = condition; \ if (error != hipSuccess) \ { \ std::cerr << "An error occured: \"" << hipGetErrorString(error) \ << "\t at " << __FILE__ << ":" << __LINE__ << std::endl; \ std::exit(-1); \ } \ } while (false) template constexpr T ceildiv(const T& a, const T& b) { return (a + b - 1) / b; } namespace { __global__ void tick_kernel(sand::type::id_ty* data, unsigned int width, unsigned int height, sand::type::id_ty* conversions, unsigned int conversions_size, sand::rule::metadata* metas, unsigned int metas_size, sand::rule::mask* masks, unsigned masks_size, bool current, uint16_t types_size) { const unsigned int tile_x = blockIdx.x * blockDim.x + threadIdx.x; const unsigned int tile_y = blockIdx.y * blockDim.y + threadIdx.y; if (tile_x >= width || tile_y >= height) { return; } auto d = data[tile_x + tile_y * width + current * width * height]; auto [begin, end] = metas[d]; sand::rule::mask mask = -1U; const unsigned int current_type_mask_lookup = d * types_size * 8; int neighbor_index = 0; for (int dy = -1; dy <= 1; dy++) { for (int dx = -1; dx <= 1; dx++) { if (dx == 0 && dy == 0) { continue; } auto neighbor_type = 0; if (tile_x + dx > 0 && tile_x + dx < width && tile_y + dy > 0 && tile_y + dy < height) { neighbor_type = data[(tile_x + dx) + (tile_y + dy) * width + current * width * height]; } const auto found_mask = masks[neighbor_index + neighbor_type * 8 + current_type_mask_lookup]; mask &= found_mask; neighbor_index += 1; } } bool found = false; for (int bit = begin; bit < end; bit++) { if (mask & 1) { data[tile_x + tile_y * width + !current * width * height] = conversions[bit]; found = true; break; } mask >>= 1; } if (!found) { data[tile_x + tile_y * width + !current * width * height] = conversions[begin]; } } } sand::sand::~sand() { HIP_CHECK(hipFree(d_conversions)); HIP_CHECK(hipFree(d_metas)); HIP_CHECK(hipFree(d_masks)); HIP_CHECK(hipFree(d_data)); } sand::type sand::sand::get(int x, int y) { sync_device_read_data(); if (x < 0 || x >= width || y < 0 || y >= height) { return type::OFF_GRID; } return data[x + y * width + current * width * height]; } void sand::sand::set(int x, int y, type type) { data[x + y * width + current * width * height] = type; write_dirty = true; } void sand::sand::tick() { ZoneScoped; sync_device_write_data(); constexpr int block_size_x = 32; constexpr int block_size_y = 32; const unsigned int grid_size_x = ceildiv(width, block_size_x); const unsigned int grid_size_y = ceildiv(width, block_size_y); tick_kernel<<>>( d_data, width, height, d_conversions, conversions.size(), d_metas, metas.size(), d_masks, masks.size(), current, types.size()); HIP_CHECK(hipGetLastError()); read_dirty = true; current = !current; } sand::sand::sand(type::range types, const std::vector& conversions, const std::vector& metas, const std::vector& masks, int width, int height, type initial) : width(width), height(height), types(types), conversions(conversions), metas(metas), masks(masks), data(width * height * 2, initial), current(false), write_dirty(true), read_dirty(false), d_conversions(nullptr), d_metas(nullptr), d_masks(nullptr), d_data(nullptr) { initialize_device_state(); } void sand::sand::initialize_device_state() { HIP_CHECK( hipMalloc(&d_conversions, conversions.size() * sizeof(type::id_ty))); HIP_CHECK(hipMalloc(&d_metas, metas.size() * sizeof(rule::metadata))); HIP_CHECK(hipMalloc(&d_masks, masks.size() * sizeof(rule::mask))); HIP_CHECK(hipMalloc(&d_data, data.size() * sizeof(type::id_ty))); HIP_CHECK(hipMemcpy(d_conversions, conversions.data(), conversions.size() * sizeof(type::id_ty), hipMemcpyHostToDevice)); HIP_CHECK(hipMemcpy(d_metas, metas.data(), metas.size() * sizeof(rule::metadata), hipMemcpyHostToDevice)); HIP_CHECK(hipMemcpy(d_masks, masks.data(), masks.size() * sizeof(rule::mask), hipMemcpyHostToDevice)); HIP_CHECK(hipMemcpy(d_data, data.data(), data.size() * sizeof(type::id_ty), hipMemcpyHostToDevice)); } void sand::sand::sync_device_write_data() { ZoneScoped; if (write_dirty) { HIP_CHECK(hipMemcpy(d_data, data.data(), data.size() * sizeof(type::id_ty), hipMemcpyHostToDevice)); write_dirty = false; } } void sand::sand::sync_device_read_data() { ZoneScoped; if (read_dirty) { HIP_CHECK(hipMemcpy(data.data(), d_data, data.size() * sizeof(type::id_ty), hipMemcpyDeviceToHost)); read_dirty = false; } }