diff --git a/.clangd b/.clangd index 9a8849b..ba28999 100644 --- a/.clangd +++ b/.clangd @@ -1,5 +1,10 @@ Documentation: CommentFormat: Doxygen +CompileFlags: + Remove: + - -forward-unknown-to-host-compiler* + - --generate-code=* + - -Xcompiler=* If: PathMatch: .*\.hip diff --git a/CMakeLists.txt b/CMakeLists.txt index bdbea5a..1d020ac 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -20,7 +20,6 @@ add_library(sand STATIC src/type_range.cpp src/rule.cpp src/rule_builder.cpp - src/sand.cpp src/sand.hip ) @@ -40,10 +39,10 @@ if(EXISTS third/tracy/CMakeLists.txt) add_subdirectory(third/tracy) target_link_libraries(sand PUBLIC Tracy::TracyClient) if(NOT TRACY_ENABLE) - target_compile_definitions(sand PUBLIC FrameMark ZoneScoped) + target_compile_definitions(sand PUBLIC FrameMark= ZoneScoped=) endif() else() - target_compile_definitions(sand PUBLIC FrameMark ZoneScoped) + target_compile_definitions(sand PUBLIC FrameMark= ZoneScoped=) endif() find_package(SDL3 CONFIG REQUIRED) diff --git a/include/sand/sand.h b/include/sand/sand.h index 898ba98..5778ad2 100644 --- a/include/sand/sand.h +++ b/include/sand/sand.h @@ -15,7 +15,14 @@ namespace sand class sand { public: - type get(int x, int y) const; + sand(const sand&) = delete; + sand(sand&&) = default; + + sand& operator=(const sand&) = delete; + + ~sand(); + + type get(int x, int y); void set(int x, int y, type type); void tick(); @@ -26,14 +33,25 @@ private: const std::vector& masks, int width, int height, type initial); + void initialize_device_state(); + void sync_device_write_data(); + void sync_device_read_data(); + + int width; + int height; type::range types; std::vector conversions; std::vector metas; std::vector masks; - int width; - int height; - std::vector data; + std::vector data; bool current; + bool write_dirty; + bool read_dirty; + + type::id_ty* d_conversions; + rule::metadata* d_metas; + rule::mask* d_masks; + type::id_ty* d_data; friend class rule::builder; }; diff --git a/include/sand/type.h b/include/sand/type.h index 7a1f3b9..f42e14a 100644 --- a/include/sand/type.h +++ b/include/sand/type.h @@ -13,6 +13,7 @@ class type public: class builder; class range; + using id_ty = uint16_t; bool operator==(const type&) const; @@ -21,8 +22,6 @@ public: static constexpr unsigned int MAX_TYPES = UINT16_MAX; private: - using id_ty = uint16_t; - type(id_ty id); operator id_ty() const; diff --git a/src/main.cpp b/src/main.cpp index 4cc3443..f38228a 100644 --- a/src/main.cpp +++ b/src/main.cpp @@ -11,8 +11,10 @@ #include #endif // TRACY_ENABLE -constexpr uint16_t WIDTH = 64; -constexpr uint16_t HEIGHT = 64; +constexpr uint16_t WIDTH = 256; +constexpr uint16_t HEIGHT = 256; +constexpr float UPDATE_TIME = 0; +constexpr int UPDATE_TICKS = 1; constexpr int WINDOW_WIDTH = 1024; constexpr int WINDOW_HEIGHT = 1024; @@ -135,11 +137,14 @@ int main(int argc, char** argv) float x, y; int state; - if (time > 0) - { - time = 0; - s.tick(); + if (time > UPDATE_TIME) + { + for (int i = 0; i < UPDATE_TICKS; i++) + { + s.tick(); + } + time = 0; int count = 0; for (int i = 0; i < WIDTH; i++) diff --git a/src/sand.cpp b/src/sand.cpp deleted file mode 100644 index 2b2aec2..0000000 --- a/src/sand.cpp +++ /dev/null @@ -1,85 +0,0 @@ -#include "sand/sand.h" - -#include "sand/type.h" - -#ifdef TRACY_ENABLE -#include -#endif // TRACY_ENABLE - -sand::type sand::sand::get(int x, int y) const -{ - if (x < 0 || x >= width || y < 0 || y >= height) - { - return type::OFF_GRID; - } - return data[x + y * width + current * width * height]; -} - -void sand::sand::set(int x, int y, type type) -{ - data[x + y * width + current * width * height] = type; -} - -void sand::sand::tick() -{ - ZoneScoped; -#pragma omp parallel for - for (int tile_index = 0; tile_index < width * height; tile_index++) - { - const int x = tile_index % width; - const int y = tile_index / width; - - auto [begin, end] = metas[get(x, y)]; - uint32_t mask = -1U; - - int neighbor_index = 0; - for (int dy = -1; dy <= 1; dy++) - { - for (int dx = -1; dx <= 1; dx++) - { - if (dx == 0 && dy == 0) - { - continue; - } - - mask &= masks[neighbor_index + get(x + dx, y + dy) * 8 - + get(x, y) * types.size() * 8]; - - neighbor_index += 1; - } - } - - bool found = false; - for (int bit = begin; bit < end; bit++) - { - if (mask & 1) - { - data[x + y * width + !current * width * height] = conversions[bit]; - found = true; - break; - } - mask >>= 1; - } - if (!found) - { - data[x + y * width + !current * width * height] = conversions[begin]; - } - } - - current = !current; -} - -sand::sand::sand(type::range types, const std::vector& conversions, - const std::vector& metas, - const std::vector& masks, int width, int height, - type initial) : - types(types), - conversions(conversions), - metas(metas), - masks(masks), - width(width), - height(height), - data(width * height * 2, initial), - current(false) -{ -} diff --git a/src/sand.hip b/src/sand.hip index 5c5af53..1dbce5a 100644 --- a/src/sand.hip +++ b/src/sand.hip @@ -1,9 +1,14 @@ -#include +#include "sand/sand.h" +#include "sand/type.h" + #include + +#ifdef TRACY_ENABLE +#include +#endif // TRACY_ENABLE + #include #include -#include -#include #define HIP_CHECK(condition) \ do \ @@ -21,3 +26,186 @@ template constexpr T ceildiv(const T& a, const T& b) { return (a + b - 1) / b; } + +namespace +{ + +__global__ void +tick_kernel(sand::type::id_ty* data, unsigned int width, unsigned int height, + sand::type::id_ty* conversions, unsigned int conversions_size, + sand::rule::metadata* metas, unsigned int metas_size, + sand::rule::mask* masks, unsigned masks_size, bool current, + uint16_t types_size) +{ + const unsigned int tile_x = blockIdx.x * blockDim.x + threadIdx.x; + const unsigned int tile_y = blockIdx.y * blockDim.y + threadIdx.y; + + if (tile_x >= width || tile_y >= height) + { + return; + } + + auto d = data[tile_x + tile_y * width + current * width * height]; + auto [begin, end] = metas[d]; + sand::rule::mask mask = -1U; + + const unsigned int current_type_mask_lookup = d * types_size * 8; + + int neighbor_index = 0; + for (int dy = -1; dy <= 1; dy++) + { + for (int dx = -1; dx <= 1; dx++) + { + if (dx == 0 && dy == 0) + { + continue; + } + + auto neighbor_type = 0; + if (tile_x + dx > 0 && tile_x + dx < width && tile_y + dy > 0 + && tile_y + dy < height) + { + neighbor_type = data[(tile_x + dx) + (tile_y + dy) * width + + current * width * height]; + } + + const auto found_mask + = masks[neighbor_index + neighbor_type * 8 + current_type_mask_lookup]; + mask &= found_mask; + + neighbor_index += 1; + } + } + + bool found = false; + for (int bit = begin; bit < end; bit++) + { + if (mask & 1) + { + data[tile_x + tile_y * width + !current * width * height] + = conversions[bit]; + found = true; + break; + } + mask >>= 1; + } + if (!found) + { + data[tile_x + tile_y * width + !current * width * height] + = conversions[begin]; + } +} + +} + +sand::sand::~sand() +{ + HIP_CHECK(hipFree(d_conversions)); + HIP_CHECK(hipFree(d_metas)); + HIP_CHECK(hipFree(d_masks)); + HIP_CHECK(hipFree(d_data)); +} + +sand::type sand::sand::get(int x, int y) +{ + sync_device_read_data(); + if (x < 0 || x >= width || y < 0 || y >= height) + { + return type::OFF_GRID; + } + return data[x + y * width + current * width * height]; +} + +void sand::sand::set(int x, int y, type type) +{ + data[x + y * width + current * width * height] = type; + write_dirty = true; +} + +void sand::sand::tick() +{ + ZoneScoped; + + sync_device_write_data(); + + constexpr int block_size_x = 32; + constexpr int block_size_y = 32; + const unsigned int grid_size_x = ceildiv(width, block_size_x); + const unsigned int grid_size_y = ceildiv(width, block_size_y); + + tick_kernel<<>>( + d_data, width, height, d_conversions, conversions.size(), d_metas, + metas.size(), d_masks, masks.size(), current, types.size()); + + HIP_CHECK(hipGetLastError()); + + read_dirty = true; + current = !current; +} + +sand::sand::sand(type::range types, const std::vector& conversions, + const std::vector& metas, + const std::vector& masks, int width, int height, + type initial) : + width(width), + height(height), + types(types), + conversions(conversions), + metas(metas), + masks(masks), + data(width * height * 2, initial), + current(false), + write_dirty(true), + read_dirty(false), + d_conversions(nullptr), + d_metas(nullptr), + d_masks(nullptr), + d_data(nullptr) +{ + initialize_device_state(); +} + +void sand::sand::initialize_device_state() +{ + HIP_CHECK( + hipMalloc(&d_conversions, conversions.size() * sizeof(type::id_ty))); + HIP_CHECK(hipMalloc(&d_metas, metas.size() * sizeof(rule::metadata))); + HIP_CHECK(hipMalloc(&d_masks, masks.size() * sizeof(rule::mask))); + HIP_CHECK(hipMalloc(&d_data, data.size() * sizeof(type::id_ty))); + + HIP_CHECK(hipMemcpy(d_conversions, conversions.data(), + conversions.size() * sizeof(type::id_ty), + hipMemcpyHostToDevice)); + HIP_CHECK(hipMemcpy(d_metas, metas.data(), + metas.size() * sizeof(rule::metadata), + hipMemcpyHostToDevice)); + HIP_CHECK(hipMemcpy(d_masks, masks.data(), masks.size() * sizeof(rule::mask), + hipMemcpyHostToDevice)); + HIP_CHECK(hipMemcpy(d_data, data.data(), data.size() * sizeof(type::id_ty), + hipMemcpyHostToDevice)); +} + +void sand::sand::sync_device_write_data() +{ + ZoneScoped; + + if (write_dirty) + { + HIP_CHECK(hipMemcpy(d_data, data.data(), data.size() * sizeof(type::id_ty), + hipMemcpyHostToDevice)); + write_dirty = false; + } +} + +void sand::sand::sync_device_read_data() +{ + ZoneScoped; + + if (read_dirty) + { + HIP_CHECK(hipMemcpy(data.data(), d_data, data.size() * sizeof(type::id_ty), + hipMemcpyDeviceToHost)); + read_dirty = false; + } +}