Move tick to GPU
This commit is contained in:
parent
24246c86a1
commit
af3d33b1f9
5
.clangd
5
.clangd
@ -1,5 +1,10 @@
|
||||
Documentation:
|
||||
CommentFormat: Doxygen
|
||||
CompileFlags:
|
||||
Remove:
|
||||
- -forward-unknown-to-host-compiler*
|
||||
- --generate-code=*
|
||||
- -Xcompiler=*
|
||||
|
||||
If:
|
||||
PathMatch: .*\.hip
|
||||
|
||||
@ -20,7 +20,6 @@ add_library(sand STATIC
|
||||
src/type_range.cpp
|
||||
src/rule.cpp
|
||||
src/rule_builder.cpp
|
||||
src/sand.cpp
|
||||
|
||||
src/sand.hip
|
||||
)
|
||||
@ -40,10 +39,10 @@ if(EXISTS third/tracy/CMakeLists.txt)
|
||||
add_subdirectory(third/tracy)
|
||||
target_link_libraries(sand PUBLIC Tracy::TracyClient)
|
||||
if(NOT TRACY_ENABLE)
|
||||
target_compile_definitions(sand PUBLIC FrameMark ZoneScoped)
|
||||
target_compile_definitions(sand PUBLIC FrameMark= ZoneScoped=)
|
||||
endif()
|
||||
else()
|
||||
target_compile_definitions(sand PUBLIC FrameMark ZoneScoped)
|
||||
target_compile_definitions(sand PUBLIC FrameMark= ZoneScoped=)
|
||||
endif()
|
||||
|
||||
find_package(SDL3 CONFIG REQUIRED)
|
||||
|
||||
@ -15,7 +15,14 @@ namespace sand
|
||||
class sand
|
||||
{
|
||||
public:
|
||||
type get(int x, int y) const;
|
||||
sand(const sand&) = delete;
|
||||
sand(sand&&) = default;
|
||||
|
||||
sand& operator=(const sand&) = delete;
|
||||
|
||||
~sand();
|
||||
|
||||
type get(int x, int y);
|
||||
void set(int x, int y, type type);
|
||||
|
||||
void tick();
|
||||
@ -26,14 +33,25 @@ private:
|
||||
const std::vector<rule::mask>& masks, int width, int height,
|
||||
type initial);
|
||||
|
||||
void initialize_device_state();
|
||||
void sync_device_write_data();
|
||||
void sync_device_read_data();
|
||||
|
||||
int width;
|
||||
int height;
|
||||
type::range types;
|
||||
std::vector<type> conversions;
|
||||
std::vector<rule::metadata> metas;
|
||||
std::vector<rule::mask> masks;
|
||||
int width;
|
||||
int height;
|
||||
std::vector<type> data;
|
||||
std::vector<type::id_ty> data;
|
||||
bool current;
|
||||
bool write_dirty;
|
||||
bool read_dirty;
|
||||
|
||||
type::id_ty* d_conversions;
|
||||
rule::metadata* d_metas;
|
||||
rule::mask* d_masks;
|
||||
type::id_ty* d_data;
|
||||
|
||||
friend class rule::builder;
|
||||
};
|
||||
|
||||
@ -13,6 +13,7 @@ class type
|
||||
public:
|
||||
class builder;
|
||||
class range;
|
||||
using id_ty = uint16_t;
|
||||
|
||||
bool operator==(const type&) const;
|
||||
|
||||
@ -21,8 +22,6 @@ public:
|
||||
static constexpr unsigned int MAX_TYPES = UINT16_MAX;
|
||||
|
||||
private:
|
||||
using id_ty = uint16_t;
|
||||
|
||||
type(id_ty id);
|
||||
operator id_ty() const;
|
||||
|
||||
|
||||
15
src/main.cpp
15
src/main.cpp
@ -11,8 +11,10 @@
|
||||
#include <tracy/Tracy.hpp>
|
||||
#endif // TRACY_ENABLE
|
||||
|
||||
constexpr uint16_t WIDTH = 64;
|
||||
constexpr uint16_t HEIGHT = 64;
|
||||
constexpr uint16_t WIDTH = 256;
|
||||
constexpr uint16_t HEIGHT = 256;
|
||||
constexpr float UPDATE_TIME = 0;
|
||||
constexpr int UPDATE_TICKS = 1;
|
||||
|
||||
constexpr int WINDOW_WIDTH = 1024;
|
||||
constexpr int WINDOW_HEIGHT = 1024;
|
||||
@ -135,11 +137,14 @@ int main(int argc, char** argv)
|
||||
|
||||
float x, y;
|
||||
int state;
|
||||
if (time > 0)
|
||||
{
|
||||
time = 0;
|
||||
|
||||
if (time > UPDATE_TIME)
|
||||
{
|
||||
for (int i = 0; i < UPDATE_TICKS; i++)
|
||||
{
|
||||
s.tick();
|
||||
}
|
||||
time = 0;
|
||||
|
||||
int count = 0;
|
||||
for (int i = 0; i < WIDTH; i++)
|
||||
|
||||
85
src/sand.cpp
85
src/sand.cpp
@ -1,85 +0,0 @@
|
||||
#include "sand/sand.h"
|
||||
|
||||
#include "sand/type.h"
|
||||
|
||||
#ifdef TRACY_ENABLE
|
||||
#include <tracy/Tracy.hpp>
|
||||
#endif // TRACY_ENABLE
|
||||
|
||||
sand::type sand::sand::get(int x, int y) const
|
||||
{
|
||||
if (x < 0 || x >= width || y < 0 || y >= height)
|
||||
{
|
||||
return type::OFF_GRID;
|
||||
}
|
||||
return data[x + y * width + current * width * height];
|
||||
}
|
||||
|
||||
void sand::sand::set(int x, int y, type type)
|
||||
{
|
||||
data[x + y * width + current * width * height] = type;
|
||||
}
|
||||
|
||||
void sand::sand::tick()
|
||||
{
|
||||
ZoneScoped;
|
||||
#pragma omp parallel for
|
||||
for (int tile_index = 0; tile_index < width * height; tile_index++)
|
||||
{
|
||||
const int x = tile_index % width;
|
||||
const int y = tile_index / width;
|
||||
|
||||
auto [begin, end] = metas[get(x, y)];
|
||||
uint32_t mask = -1U;
|
||||
|
||||
int neighbor_index = 0;
|
||||
for (int dy = -1; dy <= 1; dy++)
|
||||
{
|
||||
for (int dx = -1; dx <= 1; dx++)
|
||||
{
|
||||
if (dx == 0 && dy == 0)
|
||||
{
|
||||
continue;
|
||||
}
|
||||
|
||||
mask &= masks[neighbor_index + get(x + dx, y + dy) * 8
|
||||
+ get(x, y) * types.size() * 8];
|
||||
|
||||
neighbor_index += 1;
|
||||
}
|
||||
}
|
||||
|
||||
bool found = false;
|
||||
for (int bit = begin; bit < end; bit++)
|
||||
{
|
||||
if (mask & 1)
|
||||
{
|
||||
data[x + y * width + !current * width * height] = conversions[bit];
|
||||
found = true;
|
||||
break;
|
||||
}
|
||||
mask >>= 1;
|
||||
}
|
||||
if (!found)
|
||||
{
|
||||
data[x + y * width + !current * width * height] = conversions[begin];
|
||||
}
|
||||
}
|
||||
|
||||
current = !current;
|
||||
}
|
||||
|
||||
sand::sand::sand(type::range types, const std::vector<type>& conversions,
|
||||
const std::vector<rule::metadata>& metas,
|
||||
const std::vector<rule::mask>& masks, int width, int height,
|
||||
type initial) :
|
||||
types(types),
|
||||
conversions(conversions),
|
||||
metas(metas),
|
||||
masks(masks),
|
||||
width(width),
|
||||
height(height),
|
||||
data(width * height * 2, initial),
|
||||
current(false)
|
||||
{
|
||||
}
|
||||
194
src/sand.hip
194
src/sand.hip
@ -1,9 +1,14 @@
|
||||
#include <hip/amd_detail/amd_hip_runtime.h>
|
||||
#include "sand/sand.h"
|
||||
#include "sand/type.h"
|
||||
|
||||
#include <hip/driver_types.h>
|
||||
|
||||
#ifdef TRACY_ENABLE
|
||||
#include <tracy/Tracy.hpp>
|
||||
#endif // TRACY_ENABLE
|
||||
|
||||
#include <hip/hip_runtime.h>
|
||||
#include <iostream>
|
||||
#include <numeric>
|
||||
#include <vector>
|
||||
|
||||
#define HIP_CHECK(condition) \
|
||||
do \
|
||||
@ -21,3 +26,186 @@ template <typename T> constexpr T ceildiv(const T& a, const T& b)
|
||||
{
|
||||
return (a + b - 1) / b;
|
||||
}
|
||||
|
||||
namespace
|
||||
{
|
||||
|
||||
__global__ void
|
||||
tick_kernel(sand::type::id_ty* data, unsigned int width, unsigned int height,
|
||||
sand::type::id_ty* conversions, unsigned int conversions_size,
|
||||
sand::rule::metadata* metas, unsigned int metas_size,
|
||||
sand::rule::mask* masks, unsigned masks_size, bool current,
|
||||
uint16_t types_size)
|
||||
{
|
||||
const unsigned int tile_x = blockIdx.x * blockDim.x + threadIdx.x;
|
||||
const unsigned int tile_y = blockIdx.y * blockDim.y + threadIdx.y;
|
||||
|
||||
if (tile_x >= width || tile_y >= height)
|
||||
{
|
||||
return;
|
||||
}
|
||||
|
||||
auto d = data[tile_x + tile_y * width + current * width * height];
|
||||
auto [begin, end] = metas[d];
|
||||
sand::rule::mask mask = -1U;
|
||||
|
||||
const unsigned int current_type_mask_lookup = d * types_size * 8;
|
||||
|
||||
int neighbor_index = 0;
|
||||
for (int dy = -1; dy <= 1; dy++)
|
||||
{
|
||||
for (int dx = -1; dx <= 1; dx++)
|
||||
{
|
||||
if (dx == 0 && dy == 0)
|
||||
{
|
||||
continue;
|
||||
}
|
||||
|
||||
auto neighbor_type = 0;
|
||||
if (tile_x + dx > 0 && tile_x + dx < width && tile_y + dy > 0
|
||||
&& tile_y + dy < height)
|
||||
{
|
||||
neighbor_type = data[(tile_x + dx) + (tile_y + dy) * width
|
||||
+ current * width * height];
|
||||
}
|
||||
|
||||
const auto found_mask
|
||||
= masks[neighbor_index + neighbor_type * 8 + current_type_mask_lookup];
|
||||
mask &= found_mask;
|
||||
|
||||
neighbor_index += 1;
|
||||
}
|
||||
}
|
||||
|
||||
bool found = false;
|
||||
for (int bit = begin; bit < end; bit++)
|
||||
{
|
||||
if (mask & 1)
|
||||
{
|
||||
data[tile_x + tile_y * width + !current * width * height]
|
||||
= conversions[bit];
|
||||
found = true;
|
||||
break;
|
||||
}
|
||||
mask >>= 1;
|
||||
}
|
||||
if (!found)
|
||||
{
|
||||
data[tile_x + tile_y * width + !current * width * height]
|
||||
= conversions[begin];
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
sand::sand::~sand()
|
||||
{
|
||||
HIP_CHECK(hipFree(d_conversions));
|
||||
HIP_CHECK(hipFree(d_metas));
|
||||
HIP_CHECK(hipFree(d_masks));
|
||||
HIP_CHECK(hipFree(d_data));
|
||||
}
|
||||
|
||||
sand::type sand::sand::get(int x, int y)
|
||||
{
|
||||
sync_device_read_data();
|
||||
if (x < 0 || x >= width || y < 0 || y >= height)
|
||||
{
|
||||
return type::OFF_GRID;
|
||||
}
|
||||
return data[x + y * width + current * width * height];
|
||||
}
|
||||
|
||||
void sand::sand::set(int x, int y, type type)
|
||||
{
|
||||
data[x + y * width + current * width * height] = type;
|
||||
write_dirty = true;
|
||||
}
|
||||
|
||||
void sand::sand::tick()
|
||||
{
|
||||
ZoneScoped;
|
||||
|
||||
sync_device_write_data();
|
||||
|
||||
constexpr int block_size_x = 32;
|
||||
constexpr int block_size_y = 32;
|
||||
const unsigned int grid_size_x = ceildiv(width, block_size_x);
|
||||
const unsigned int grid_size_y = ceildiv(width, block_size_y);
|
||||
|
||||
tick_kernel<<<dim3(grid_size_x, grid_size_y),
|
||||
dim3(block_size_x, block_size_y), 0, hipStreamDefault>>>(
|
||||
d_data, width, height, d_conversions, conversions.size(), d_metas,
|
||||
metas.size(), d_masks, masks.size(), current, types.size());
|
||||
|
||||
HIP_CHECK(hipGetLastError());
|
||||
|
||||
read_dirty = true;
|
||||
current = !current;
|
||||
}
|
||||
|
||||
sand::sand::sand(type::range types, const std::vector<type>& conversions,
|
||||
const std::vector<rule::metadata>& metas,
|
||||
const std::vector<rule::mask>& masks, int width, int height,
|
||||
type initial) :
|
||||
width(width),
|
||||
height(height),
|
||||
types(types),
|
||||
conversions(conversions),
|
||||
metas(metas),
|
||||
masks(masks),
|
||||
data(width * height * 2, initial),
|
||||
current(false),
|
||||
write_dirty(true),
|
||||
read_dirty(false),
|
||||
d_conversions(nullptr),
|
||||
d_metas(nullptr),
|
||||
d_masks(nullptr),
|
||||
d_data(nullptr)
|
||||
{
|
||||
initialize_device_state();
|
||||
}
|
||||
|
||||
void sand::sand::initialize_device_state()
|
||||
{
|
||||
HIP_CHECK(
|
||||
hipMalloc(&d_conversions, conversions.size() * sizeof(type::id_ty)));
|
||||
HIP_CHECK(hipMalloc(&d_metas, metas.size() * sizeof(rule::metadata)));
|
||||
HIP_CHECK(hipMalloc(&d_masks, masks.size() * sizeof(rule::mask)));
|
||||
HIP_CHECK(hipMalloc(&d_data, data.size() * sizeof(type::id_ty)));
|
||||
|
||||
HIP_CHECK(hipMemcpy(d_conversions, conversions.data(),
|
||||
conversions.size() * sizeof(type::id_ty),
|
||||
hipMemcpyHostToDevice));
|
||||
HIP_CHECK(hipMemcpy(d_metas, metas.data(),
|
||||
metas.size() * sizeof(rule::metadata),
|
||||
hipMemcpyHostToDevice));
|
||||
HIP_CHECK(hipMemcpy(d_masks, masks.data(), masks.size() * sizeof(rule::mask),
|
||||
hipMemcpyHostToDevice));
|
||||
HIP_CHECK(hipMemcpy(d_data, data.data(), data.size() * sizeof(type::id_ty),
|
||||
hipMemcpyHostToDevice));
|
||||
}
|
||||
|
||||
void sand::sand::sync_device_write_data()
|
||||
{
|
||||
ZoneScoped;
|
||||
|
||||
if (write_dirty)
|
||||
{
|
||||
HIP_CHECK(hipMemcpy(d_data, data.data(), data.size() * sizeof(type::id_ty),
|
||||
hipMemcpyHostToDevice));
|
||||
write_dirty = false;
|
||||
}
|
||||
}
|
||||
|
||||
void sand::sand::sync_device_read_data()
|
||||
{
|
||||
ZoneScoped;
|
||||
|
||||
if (read_dirty)
|
||||
{
|
||||
HIP_CHECK(hipMemcpy(data.data(), d_data, data.size() * sizeof(type::id_ty),
|
||||
hipMemcpyDeviceToHost));
|
||||
read_dirty = false;
|
||||
}
|
||||
}
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user