diff --git a/examples/basic.cpp b/examples/basic.cpp index 7c49342..1162f41 100644 --- a/examples/basic.cpp +++ b/examples/basic.cpp @@ -54,32 +54,32 @@ void init(sprstk* instance, void* userdata) for (int i = 0; i < 24; i++) { uint8_t val = 0x33 / (12 - i / 2.0f) + 0x33; - pal.colors[i] = color(val, val, val, 0x7F); + pal.colors[i] = color(val, val, val, 0x60); } for (int i = 24; i < 32; i++) { uint8_t val = 0x55 / (16 - i / 2.0f) + 0xAA; - pal.colors[i] = color(val, val, val, 0x7F); + pal.colors[i] = color(val, val, val, 0x60); } sprstk_set_palette(instance, 0, &pal); for (int i = 0; i < 16; i++) { - pal.colors[i] = color(0x70 / (5.0f - i / 4.0f), 0x35 / (5.0f - i / 4.0f), 0, 0x7F); + pal.colors[i] = color(0x70 / (5.0f - i / 4.0f), 0x35 / (5.0f - i / 4.0f), 0, 0x60); } for (int i = 16; i < 32; i++) { - pal.colors[i] = color(0x05 / (8.5f - i / 2.5f) + 0x15, 0x40 / (8.5f - i / 2.5f) + 0x0, 0, 0x7F); + pal.colors[i] = color(0x05 / (8.5f - i / 2.5f) + 0x15, 0x40 / (8.5f - i / 2.5f) + 0x0, 0, 0x60); } sprstk_set_palette(instance, 1, &pal); for (int i = 0; i < 8; i++) { - pal.colors[i] = color(0x20 / (8 - i) + 0x20, 0x40 / (8 - i) + 0x40, 0x90 / (8 - i) + 0x40, 0x7F); + pal.colors[i] = color(0x20 / (8 - i) + 0x10, 0x40 / (8 - i) + 0x20, 0xB0 / (8 - i) + 0x40, 0x60); } sprstk_set_palette(instance, 2, &pal); diff --git a/src/sprstk.cpp b/src/sprstk.cpp index 301f1d8..136690e 100644 --- a/src/sprstk.cpp +++ b/src/sprstk.cpp @@ -7,11 +7,11 @@ #include -#define OIT_LAYERS 1 +#define OIT_LAYERS 32 #define _STRINGIFY(x) #x #define STRINGIFY(x) _STRINGIFY(x) -const char* MESH_SHADER_CODE = R"( +const char* AZ_PASS_MESH_SHADER_CODE = R"( #version 460 #extension GL_NV_mesh_shader : require @@ -22,7 +22,7 @@ layout (triangles, max_vertices = 128, max_primitives = 64) out; layout (location = 0) out PerVertexData { flat uint layer; - vec4 color; + flat uint color; } v_out[]; layout (location = 1) uniform vec3 screen_size_and_pixel_scale; @@ -74,23 +74,18 @@ void main() uint palette_lookup = bitfieldExtract(t_info.position, 27, 5); ColorInfo c_info = color_infos[palette_lookup]; uint c = c_info.color[gl_LocalInvocationID.x]; - float a = bitfieldExtract(c, 0, 8); - float b = bitfieldExtract(c, 8, 8); - float g = bitfieldExtract(c, 16, 8); - float r = bitfieldExtract(c, 24, 8); - vec4 color = vec4(r, g, b, a) / vec4(256); for (uint i = 4 * gl_LocalInvocationID.x; i < 4 * gl_LocalInvocationID.x + 4; i++) { vec4 position = vec4(rotation_matrix * positions[i % 4], float(4 * gl_LocalInvocationID.x + z_offset) / 128, 1); position.xy /= screen_size_and_pixel_scale.xy; position.xy *= scale; - position.y += 16 * gl_LocalInvocationID.x * scale / screen_size_and_pixel_scale.y; + position.y += 20 * gl_LocalInvocationID.x * scale / screen_size_and_pixel_scale.y; gl_MeshVerticesNV[i].gl_Position = position; v_out[i].layer = 4 * gl_LocalInvocationID.x + z_offset; - v_out[i].color = color; + v_out[i].color = c; } for (uint i = 6 * gl_LocalInvocationID.x; i < 6 * gl_LocalInvocationID.x + 6; i++) @@ -102,78 +97,86 @@ void main() } )"; -const char* FRAGMENT_SHADER_CODE = R"( +const char* AZ_PASS_FRAGMENT_SHADER_CODE = R"( #version 460 -#extension GL_NV_fragment_shader_interlock : enable -#extension GL_ARB_fragment_shader_interlock : enable - -#if GL_NV_fragment_shader_interlock || GL_ARB_fragment_shader_interlock - -layout (pixel_interlock_unordered) in; - -#if GL_NV_fragment_shader_interlock -#define beginInvocationInterlock beginInvocationInterlockNV -#define endInvocationInterlock endInvocationInterlockNV -#else // GL_NV_fragment_shader_interlock -#define beginInvocationInterlock beginInvocationInterlockARB -#define endInvocationInterlock endInvocationInterlockARB -#endif // GL_NV_fragment_shader_interlock - -#else // #if GL_NV_fragment_shader_interlock || GL_ARB_fragment_shader_interlock - -#pragma error "fragment shader interlock is required" - -#endif // #if GL_NV_fragment_shader_interlock || GL_ARB_fragment_shader_interlock - layout (location = 0) out vec4 FragColor; -layout (binding = 0, rgba8) uniform restrict coherent image3D ABuffer; -layout (binding = 1, r8ui) uniform restrict coherent uimage3D ZBuffer; +layout (binding = 0, rg32ui) uniform restrict writeonly uimage3D AZBuffer; +layout (binding = 1, r32ui) uniform restrict uimage2D AZNextBuffer; in PerVertexData { flat uint layer; - vec4 color; + flat uint color; } fragIn; void main() { - vec4 color = fragIn.color; - uint z_current = fragIn.layer; + const uint position = imageAtomicAdd(AZNextBuffer, ivec2(gl_FragCoord.xy), 1); - beginInvocationInterlock(); - for (int i = 0; i < )" STRINGIFY(OIT_LAYERS) R"(; i++) - { - const ivec3 image_coord = ivec3(gl_FragCoord.xy, i); - - const uint z_from_buffer = imageLoad(ZBuffer, image_coord).x; - if (z_current < z_from_buffer) - { - imageStore(ZBuffer, image_coord, uvec4(z_current, 0, 0, 0)); - const vec4 temp_color = imageLoad(ABuffer, image_coord); - imageStore(ABuffer, image_coord, color); - - z_current = z_from_buffer; - color = temp_color; - - if (z_current == 0xFF) { break; } - } - } - endInvocationInterlock(); - - memoryBarrierImage(); - - color = vec4(0); - for (int i = 0; i < )" STRINGIFY(OIT_LAYERS) R"(; i++) + if (position < )" STRINGIFY(OIT_LAYERS) R"() { - const ivec3 image_coord = ivec3(gl_FragCoord.xy, i); - - const vec4 temp_color = imageLoad(ABuffer, image_coord); - color = vec4(temp_color.rgb * temp_color.a + color.rgb * (1 - temp_color.a), temp_color.a); + imageStore(AZBuffer, ivec3(gl_FragCoord.xy, position), uvec4(fragIn.layer, fragIn.color, 0, 0)); } - FragColor = color; + FragColor = vec4(0); +} +)"; + +const char* COMPOSITE_PASS_VERTEX_SHADER = R"( +#version 460 + +vec2 positions[3] = { vec2(-2.1, -1.1), vec2(0, 3.1), vec2(2.1, -1.1) }; + +void main() +{ + gl_Position = vec4(positions[gl_VertexID], 0, 1); +} +)"; + +const char* COMPOSITE_PASS_FRAGMENT_SHADER = R"( +#version 460 + +layout (location = 0) out vec4 FragColor; + +layout (binding = 0, rg32ui) uniform restrict readonly uimage3D AZBuffer; +layout (binding = 1, r32ui) uniform restrict readonly uimage2D AZNextBuffer; + +void main() +{ + uvec2 data[ )" STRINGIFY(OIT_LAYERS) R"(]; + + const uint layer_count = imageLoad(AZNextBuffer, ivec2(gl_FragCoord.xy)).x; + + for (uint i = 0; i < layer_count; i++) + { + data[i] = imageLoad(AZBuffer, ivec3(gl_FragCoord.xy, i)).xy; + } + + for (int i = 0; i < layer_count; i++) + { + for (int j = i; j > 0 && data[j - 1].x > data[j].x; j--) + { + const uvec2 temp = data[j]; + data[j] = data[j - 1]; + data[j - 1] = temp; + } + } + + vec3 color = vec3(0); + for (int i = 0; i < layer_count; i++) + { + uint a = bitfieldExtract(data[i].y, 0, 8); + uint b = bitfieldExtract(data[i].y, 8, 8); + uint g = bitfieldExtract(data[i].y, 16, 8); + uint r = bitfieldExtract(data[i].y, 24, 8); + vec4 temp_color = vec4(r, g, b, a) / vec4(255); + + color = vec3(temp_color.rgb * temp_color.a + color.rgb * (1 - temp_color.a)); + } + + FragColor = vec4(color, 1); } )"; @@ -199,6 +202,9 @@ public: prev_resized_ticks(0), resized(false) { + gl.az_buffer = 0; + gl.az_next_buffer = 0; + if (!callbacks.update) { throw std::runtime_error("No update callback"); @@ -248,9 +254,9 @@ public: int width, height; SDL_GetWindowSizeInPixels(sdl.window, &width, &height); glViewport(0, 0, width, height); - glProgramUniform3f(gl.program, 1, width, height, 8); + glProgramUniform3f(gl.az_pass_program, 1, width, height, 8); - create_buffers(); + update_buffers(); } callbacks.update(this, dt, userdata); @@ -259,17 +265,27 @@ public: glClear(GL_COLOR_BUFFER_BIT); constexpr uint32_t color = 0; - glClearTexImage(gl.a_buffer, 0, GL_RGBA, GL_UNSIGNED_INT, &color); + glClearTexImage(gl.az_buffer, 0, GL_RG_INTEGER, GL_UNSIGNED_INT, &color); - constexpr uint8_t z = 0xFF; - glClearTexImage(gl.z_buffer, 0, GL_RED_INTEGER, GL_UNSIGNED_BYTE, &z); + constexpr uint8_t z = 0; + glClearTexImage(gl.az_next_buffer, 0, GL_RED_INTEGER, GL_UNSIGNED_BYTE, &z); - int i; - for (i = 0; i < gl.tile_count; i += 65535) + glUseProgram(gl.az_pass_program); + for (int i = 0; i < gl.tile_count; i += 65535) { - glDrawMeshTasksNV(i, 65535); + int count = 65535; + if (i + 65535 > gl.tile_count) + { + count = gl.tile_count - i; + } + glDrawMeshTasksNV(i, count); } + glMemoryBarrier(GL_SHADER_IMAGE_ACCESS_BARRIER_BIT); + + glUseProgram(gl.composite_pass_program); + glDrawArrays(GL_TRIANGLES, 0, 3); + SDL_GL_SwapWindow(sdl.window); } @@ -316,7 +332,7 @@ public: void set_scale(float scale) { - glProgramUniform1f(gl.program, 2, scale); + glProgramUniform1f(gl.az_pass_program, 2, scale); } void set_angle(float angle) @@ -325,7 +341,7 @@ public: cosf(angle), -sinf(angle), sinf(angle), cosf(angle) }; - glProgramUniformMatrix2fv(gl.program, 3, 1, false, arr); + glProgramUniformMatrix2fv(gl.az_pass_program, 3, 1, false, arr); } private: @@ -344,14 +360,16 @@ private: } sdl; struct { - unsigned int program; + unsigned int vao; + unsigned int az_pass_program; + unsigned int composite_pass_program; unsigned int tile_buffer; TileInfo* tile_buffer_map; unsigned int tile_count; unsigned int color_buffer; sprstk_palette* color_info_map; - unsigned int a_buffer; - unsigned int z_buffer; + unsigned int az_buffer; + unsigned int az_next_buffer; } gl; void init_sdl() @@ -403,11 +421,14 @@ private: SDL_LogWarn(SDL_LOG_CATEGORY_APPLICATION, "%s", message); }, nullptr); + glGenVertexArrays(1, &gl.vao); + glBindVertexArray(gl.vao); + int success; char info_log[512]; unsigned int mesh = glCreateShader(GL_MESH_SHADER_NV); - glShaderSource(mesh, 1, &MESH_SHADER_CODE, nullptr); + glShaderSource(mesh, 1, &AZ_PASS_MESH_SHADER_CODE, nullptr); glCompileShader(mesh); glGetShaderiv(mesh, GL_COMPILE_STATUS, &success); if (!success) @@ -416,41 +437,73 @@ private: SDL_LogError(SDL_LOG_CATEGORY_APPLICATION, "Mesh shader: %s", info_log); } - unsigned int fragment = glCreateShader(GL_FRAGMENT_SHADER); - glShaderSource(fragment, 1, &FRAGMENT_SHADER_CODE, nullptr); - glCompileShader(fragment); - glGetShaderiv(fragment, GL_COMPILE_STATUS, &success); + unsigned int az_fragment = glCreateShader(GL_FRAGMENT_SHADER); + glShaderSource(az_fragment, 1, &AZ_PASS_FRAGMENT_SHADER_CODE, nullptr); + glCompileShader(az_fragment); + glGetShaderiv(az_fragment, GL_COMPILE_STATUS, &success); if (!success) { - glGetShaderInfoLog(fragment, sizeof(info_log), nullptr, info_log); - SDL_LogError(SDL_LOG_CATEGORY_APPLICATION, "Fragment shader: %s", info_log); + glGetShaderInfoLog(az_fragment, sizeof(info_log), nullptr, info_log); + SDL_LogError(SDL_LOG_CATEGORY_APPLICATION, "AZ Pass fragment shader: %s", info_log); } - gl.program = glCreateProgram(); - glAttachShader(gl.program, mesh); - glAttachShader(gl.program, fragment); - glLinkProgram(gl.program); - glGetProgramiv(gl.program, GL_LINK_STATUS, &success); + gl.az_pass_program = glCreateProgram(); + glAttachShader(gl.az_pass_program, mesh); + glAttachShader(gl.az_pass_program, az_fragment); + glLinkProgram(gl.az_pass_program); + glGetProgramiv(gl.az_pass_program, GL_LINK_STATUS, &success); if (!success) { - glGetProgramInfoLog(gl.program, sizeof(info_log), nullptr, info_log); - SDL_LogError(SDL_LOG_CATEGORY_APPLICATION, "Program: %s", info_log); + glGetProgramInfoLog(gl.az_pass_program, sizeof(info_log), nullptr, info_log); + SDL_LogError(SDL_LOG_CATEGORY_APPLICATION, "AZ pass program: %s", info_log); } glDeleteShader(mesh); - glDeleteShader(fragment); + glDeleteShader(az_fragment); + + unsigned int vertex = glCreateShader(GL_VERTEX_SHADER); + glShaderSource(vertex, 1, &COMPOSITE_PASS_VERTEX_SHADER, nullptr); + glCompileShader(vertex); + glGetShaderiv(vertex, GL_COMPILE_STATUS, &success); + if (!success) + { + glGetShaderInfoLog(vertex, sizeof(info_log), nullptr, info_log); + SDL_LogError(SDL_LOG_CATEGORY_APPLICATION, "Vertex shader: %s", info_log); + } + + unsigned int composite_pass_fragment = glCreateShader(GL_FRAGMENT_SHADER); + glShaderSource(composite_pass_fragment, 1, &COMPOSITE_PASS_FRAGMENT_SHADER, nullptr); + glCompileShader(composite_pass_fragment); + glGetShaderiv(composite_pass_fragment, GL_COMPILE_STATUS, &success); + if (!success) + { + glGetShaderInfoLog(composite_pass_fragment, sizeof(info_log), nullptr, info_log); + SDL_LogError(SDL_LOG_CATEGORY_APPLICATION, "Composite pass fragment shader: %s", info_log); + } + + gl.composite_pass_program = glCreateProgram(); + glAttachShader(gl.composite_pass_program, vertex); + glAttachShader(gl.composite_pass_program, composite_pass_fragment); + glLinkProgram(gl.composite_pass_program); + glGetProgramiv(gl.composite_pass_program, GL_LINK_STATUS, &success); + if (!success) + { + glGetProgramInfoLog(gl.az_pass_program, sizeof(info_log), nullptr, info_log); + SDL_LogError(SDL_LOG_CATEGORY_APPLICATION, "Composite pass program: %s", info_log); + } + + glDeleteShader(vertex); + glDeleteShader(composite_pass_fragment); if (!success) { throw application_error("Failed to compile and link shader program"); } - glUseProgram(gl.program); - int width, height; SDL_GetWindowSizeInPixels(sdl.window, &width, &height); glViewport(0, 0, width, height); - glProgramUniform3f(gl.program, 1, width, height, 8); + glProgramUniform3f(gl.az_pass_program, 1, width, height, 8); glGenBuffers(1, &gl.tile_buffer); glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 0, gl.tile_buffer); @@ -466,48 +519,48 @@ private: gl.color_info_map = (sprstk_palette*)glMapNamedBufferRange(gl.color_buffer, 0, sizeof(sprstk_palette) * (1 << 5), GL_MAP_WRITE_BIT | GL_MAP_COHERENT_BIT); const float arr[4] = {1, 0, 0, 1}; - glProgramUniformMatrix2fv(gl.program, 3, 1, false, arr); - glProgramUniform1f(gl.program, 2, 1); + glProgramUniformMatrix2fv(gl.az_pass_program, 3, 1, false, arr); + glProgramUniform1f(gl.az_pass_program, 2, 1); - create_buffers(); + update_buffers(); } void destroy_gl() { - glDeleteTextures(1, &gl.a_buffer); - glDeleteTextures(1, &gl.z_buffer); - glDeleteProgram(gl.program); + glDeleteTextures(1, &gl.az_buffer); + glDeleteTextures(1, &gl.az_next_buffer); + glDeleteProgram(gl.az_pass_program); + glDeleteProgram(gl.composite_pass_program); + glDeleteBuffers(1, &gl.color_buffer); glDeleteBuffers(1, &gl.tile_buffer); SDL_GL_DestroyContext(sdl.context); } - void create_buffers() + void update_buffers() { - if (gl.a_buffer) + if (gl.az_buffer) { - glDeleteTextures(1, &gl.a_buffer); - glDeleteTextures(1, &gl.z_buffer); + glDeleteTextures(1, &gl.az_buffer); + glDeleteTextures(1, &gl.az_next_buffer); } int width, height; SDL_GetWindowSizeInPixels(sdl.window, &width, &height); - glGenTextures(1, &gl.a_buffer); - glBindTexture(GL_TEXTURE_3D, gl.a_buffer); - glTexStorage3D(GL_TEXTURE_3D, 1, GL_RGBA8, width, height, OIT_LAYERS); - glGenTextures(1, &gl.z_buffer); - glBindTexture(GL_TEXTURE_3D, gl.z_buffer); - glTexStorage3D(GL_TEXTURE_3D, 1, GL_R8UI, width, height, OIT_LAYERS); + glGenTextures(1, &gl.az_buffer); + glBindTexture(GL_TEXTURE_3D, gl.az_buffer); + glTexStorage3D(GL_TEXTURE_3D, 1, GL_RG32UI, width, height, OIT_LAYERS); + glTexParameteri(GL_TEXTURE_3D, GL_TEXTURE_MIN_FILTER, GL_NEAREST); + glTexParameteri(GL_TEXTURE_3D, GL_TEXTURE_MAG_FILTER, GL_NEAREST); - constexpr uint32_t color = 0; - glClearTexImage(gl.a_buffer, 0, GL_RGBA, GL_UNSIGNED_INT, &color); - glBindImageTexture(0, gl.a_buffer, 0, true, 0, GL_READ_WRITE, GL_RGBA8); + glGenTextures(1, &gl.az_next_buffer); + glBindTexture(GL_TEXTURE_2D, gl.az_next_buffer); + glTexStorage2D(GL_TEXTURE_2D, 1, GL_R32UI, width, height); + glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_NEAREST); + glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_NEAREST); - constexpr uint8_t z = 0xFF; - glClearTexImage(gl.z_buffer, 0, GL_RED_INTEGER, GL_UNSIGNED_BYTE, &z); - glBindImageTexture(1, gl.z_buffer, 0, true, 0, GL_READ_WRITE, GL_R8UI); - - glBindTexture(GL_TEXTURE_3D, gl.a_buffer); + glBindImageTexture(0, gl.az_buffer, 0, true, 0, GL_READ_WRITE, GL_RG32UI); + glBindImageTexture(1, gl.az_next_buffer, 0, false, 0, GL_READ_WRITE, GL_R32UI); } };