Finish implementing OIT

This commit is contained in:
shylie 2025-08-07 08:03:28 -04:00
parent 2af1acf9a9
commit 429db38cb2
2 changed files with 179 additions and 126 deletions

View File

@ -54,32 +54,32 @@ void init(sprstk* instance, void* userdata)
for (int i = 0; i < 24; i++)
{
uint8_t val = 0x33 / (12 - i / 2.0f) + 0x33;
pal.colors[i] = color(val, val, val, 0x7F);
pal.colors[i] = color(val, val, val, 0x60);
}
for (int i = 24; i < 32; i++)
{
uint8_t val = 0x55 / (16 - i / 2.0f) + 0xAA;
pal.colors[i] = color(val, val, val, 0x7F);
pal.colors[i] = color(val, val, val, 0x60);
}
sprstk_set_palette(instance, 0, &pal);
for (int i = 0; i < 16; i++)
{
pal.colors[i] = color(0x70 / (5.0f - i / 4.0f), 0x35 / (5.0f - i / 4.0f), 0, 0x7F);
pal.colors[i] = color(0x70 / (5.0f - i / 4.0f), 0x35 / (5.0f - i / 4.0f), 0, 0x60);
}
for (int i = 16; i < 32; i++)
{
pal.colors[i] = color(0x05 / (8.5f - i / 2.5f) + 0x15, 0x40 / (8.5f - i / 2.5f) + 0x0, 0, 0x7F);
pal.colors[i] = color(0x05 / (8.5f - i / 2.5f) + 0x15, 0x40 / (8.5f - i / 2.5f) + 0x0, 0, 0x60);
}
sprstk_set_palette(instance, 1, &pal);
for (int i = 0; i < 8; i++)
{
pal.colors[i] = color(0x20 / (8 - i) + 0x20, 0x40 / (8 - i) + 0x40, 0x90 / (8 - i) + 0x40, 0x7F);
pal.colors[i] = color(0x20 / (8 - i) + 0x10, 0x40 / (8 - i) + 0x20, 0xB0 / (8 - i) + 0x40, 0x60);
}
sprstk_set_palette(instance, 2, &pal);

View File

@ -7,11 +7,11 @@
#include <cmath>
#define OIT_LAYERS 1
#define OIT_LAYERS 32
#define _STRINGIFY(x) #x
#define STRINGIFY(x) _STRINGIFY(x)
const char* MESH_SHADER_CODE = R"(
const char* AZ_PASS_MESH_SHADER_CODE = R"(
#version 460
#extension GL_NV_mesh_shader : require
@ -22,7 +22,7 @@ layout (triangles, max_vertices = 128, max_primitives = 64) out;
layout (location = 0) out PerVertexData
{
flat uint layer;
vec4 color;
flat uint color;
} v_out[];
layout (location = 1) uniform vec3 screen_size_and_pixel_scale;
@ -74,23 +74,18 @@ void main()
uint palette_lookup = bitfieldExtract(t_info.position, 27, 5);
ColorInfo c_info = color_infos[palette_lookup];
uint c = c_info.color[gl_LocalInvocationID.x];
float a = bitfieldExtract(c, 0, 8);
float b = bitfieldExtract(c, 8, 8);
float g = bitfieldExtract(c, 16, 8);
float r = bitfieldExtract(c, 24, 8);
vec4 color = vec4(r, g, b, a) / vec4(256);
for (uint i = 4 * gl_LocalInvocationID.x; i < 4 * gl_LocalInvocationID.x + 4; i++)
{
vec4 position = vec4(rotation_matrix * positions[i % 4], float(4 * gl_LocalInvocationID.x + z_offset) / 128, 1);
position.xy /= screen_size_and_pixel_scale.xy;
position.xy *= scale;
position.y += 16 * gl_LocalInvocationID.x * scale / screen_size_and_pixel_scale.y;
position.y += 20 * gl_LocalInvocationID.x * scale / screen_size_and_pixel_scale.y;
gl_MeshVerticesNV[i].gl_Position = position;
v_out[i].layer = 4 * gl_LocalInvocationID.x + z_offset;
v_out[i].color = color;
v_out[i].color = c;
}
for (uint i = 6 * gl_LocalInvocationID.x; i < 6 * gl_LocalInvocationID.x + 6; i++)
@ -102,78 +97,86 @@ void main()
}
)";
const char* FRAGMENT_SHADER_CODE = R"(
const char* AZ_PASS_FRAGMENT_SHADER_CODE = R"(
#version 460
#extension GL_NV_fragment_shader_interlock : enable
#extension GL_ARB_fragment_shader_interlock : enable
#if GL_NV_fragment_shader_interlock || GL_ARB_fragment_shader_interlock
layout (pixel_interlock_unordered) in;
#if GL_NV_fragment_shader_interlock
#define beginInvocationInterlock beginInvocationInterlockNV
#define endInvocationInterlock endInvocationInterlockNV
#else // GL_NV_fragment_shader_interlock
#define beginInvocationInterlock beginInvocationInterlockARB
#define endInvocationInterlock endInvocationInterlockARB
#endif // GL_NV_fragment_shader_interlock
#else // #if GL_NV_fragment_shader_interlock || GL_ARB_fragment_shader_interlock
#pragma error "fragment shader interlock is required"
#endif // #if GL_NV_fragment_shader_interlock || GL_ARB_fragment_shader_interlock
layout (location = 0) out vec4 FragColor;
layout (binding = 0, rgba8) uniform restrict coherent image3D ABuffer;
layout (binding = 1, r8ui) uniform restrict coherent uimage3D ZBuffer;
layout (binding = 0, rg32ui) uniform restrict writeonly uimage3D AZBuffer;
layout (binding = 1, r32ui) uniform restrict uimage2D AZNextBuffer;
in PerVertexData
{
flat uint layer;
vec4 color;
flat uint color;
} fragIn;
void main()
{
vec4 color = fragIn.color;
uint z_current = fragIn.layer;
const uint position = imageAtomicAdd(AZNextBuffer, ivec2(gl_FragCoord.xy), 1);
beginInvocationInterlock();
for (int i = 0; i < )" STRINGIFY(OIT_LAYERS) R"(; i++)
{
const ivec3 image_coord = ivec3(gl_FragCoord.xy, i);
const uint z_from_buffer = imageLoad(ZBuffer, image_coord).x;
if (z_current < z_from_buffer)
{
imageStore(ZBuffer, image_coord, uvec4(z_current, 0, 0, 0));
const vec4 temp_color = imageLoad(ABuffer, image_coord);
imageStore(ABuffer, image_coord, color);
z_current = z_from_buffer;
color = temp_color;
if (z_current == 0xFF) { break; }
}
}
endInvocationInterlock();
memoryBarrierImage();
color = vec4(0);
for (int i = 0; i < )" STRINGIFY(OIT_LAYERS) R"(; i++)
if (position < )" STRINGIFY(OIT_LAYERS) R"()
{
const ivec3 image_coord = ivec3(gl_FragCoord.xy, i);
const vec4 temp_color = imageLoad(ABuffer, image_coord);
color = vec4(temp_color.rgb * temp_color.a + color.rgb * (1 - temp_color.a), temp_color.a);
imageStore(AZBuffer, ivec3(gl_FragCoord.xy, position), uvec4(fragIn.layer, fragIn.color, 0, 0));
}
FragColor = color;
FragColor = vec4(0);
}
)";
const char* COMPOSITE_PASS_VERTEX_SHADER = R"(
#version 460
vec2 positions[3] = { vec2(-2.1, -1.1), vec2(0, 3.1), vec2(2.1, -1.1) };
void main()
{
gl_Position = vec4(positions[gl_VertexID], 0, 1);
}
)";
const char* COMPOSITE_PASS_FRAGMENT_SHADER = R"(
#version 460
layout (location = 0) out vec4 FragColor;
layout (binding = 0, rg32ui) uniform restrict readonly uimage3D AZBuffer;
layout (binding = 1, r32ui) uniform restrict readonly uimage2D AZNextBuffer;
void main()
{
uvec2 data[ )" STRINGIFY(OIT_LAYERS) R"(];
const uint layer_count = imageLoad(AZNextBuffer, ivec2(gl_FragCoord.xy)).x;
for (uint i = 0; i < layer_count; i++)
{
data[i] = imageLoad(AZBuffer, ivec3(gl_FragCoord.xy, i)).xy;
}
for (int i = 0; i < layer_count; i++)
{
for (int j = i; j > 0 && data[j - 1].x > data[j].x; j--)
{
const uvec2 temp = data[j];
data[j] = data[j - 1];
data[j - 1] = temp;
}
}
vec3 color = vec3(0);
for (int i = 0; i < layer_count; i++)
{
uint a = bitfieldExtract(data[i].y, 0, 8);
uint b = bitfieldExtract(data[i].y, 8, 8);
uint g = bitfieldExtract(data[i].y, 16, 8);
uint r = bitfieldExtract(data[i].y, 24, 8);
vec4 temp_color = vec4(r, g, b, a) / vec4(255);
color = vec3(temp_color.rgb * temp_color.a + color.rgb * (1 - temp_color.a));
}
FragColor = vec4(color, 1);
}
)";
@ -199,6 +202,9 @@ public:
prev_resized_ticks(0),
resized(false)
{
gl.az_buffer = 0;
gl.az_next_buffer = 0;
if (!callbacks.update)
{
throw std::runtime_error("No update callback");
@ -248,9 +254,9 @@ public:
int width, height;
SDL_GetWindowSizeInPixels(sdl.window, &width, &height);
glViewport(0, 0, width, height);
glProgramUniform3f(gl.program, 1, width, height, 8);
glProgramUniform3f(gl.az_pass_program, 1, width, height, 8);
create_buffers();
update_buffers();
}
callbacks.update(this, dt, userdata);
@ -259,17 +265,27 @@ public:
glClear(GL_COLOR_BUFFER_BIT);
constexpr uint32_t color = 0;
glClearTexImage(gl.a_buffer, 0, GL_RGBA, GL_UNSIGNED_INT, &color);
glClearTexImage(gl.az_buffer, 0, GL_RG_INTEGER, GL_UNSIGNED_INT, &color);
constexpr uint8_t z = 0xFF;
glClearTexImage(gl.z_buffer, 0, GL_RED_INTEGER, GL_UNSIGNED_BYTE, &z);
constexpr uint8_t z = 0;
glClearTexImage(gl.az_next_buffer, 0, GL_RED_INTEGER, GL_UNSIGNED_BYTE, &z);
int i;
for (i = 0; i < gl.tile_count; i += 65535)
glUseProgram(gl.az_pass_program);
for (int i = 0; i < gl.tile_count; i += 65535)
{
glDrawMeshTasksNV(i, 65535);
int count = 65535;
if (i + 65535 > gl.tile_count)
{
count = gl.tile_count - i;
}
glDrawMeshTasksNV(i, count);
}
glMemoryBarrier(GL_SHADER_IMAGE_ACCESS_BARRIER_BIT);
glUseProgram(gl.composite_pass_program);
glDrawArrays(GL_TRIANGLES, 0, 3);
SDL_GL_SwapWindow(sdl.window);
}
@ -316,7 +332,7 @@ public:
void set_scale(float scale)
{
glProgramUniform1f(gl.program, 2, scale);
glProgramUniform1f(gl.az_pass_program, 2, scale);
}
void set_angle(float angle)
@ -325,7 +341,7 @@ public:
cosf(angle), -sinf(angle),
sinf(angle), cosf(angle)
};
glProgramUniformMatrix2fv(gl.program, 3, 1, false, arr);
glProgramUniformMatrix2fv(gl.az_pass_program, 3, 1, false, arr);
}
private:
@ -344,14 +360,16 @@ private:
} sdl;
struct
{
unsigned int program;
unsigned int vao;
unsigned int az_pass_program;
unsigned int composite_pass_program;
unsigned int tile_buffer;
TileInfo* tile_buffer_map;
unsigned int tile_count;
unsigned int color_buffer;
sprstk_palette* color_info_map;
unsigned int a_buffer;
unsigned int z_buffer;
unsigned int az_buffer;
unsigned int az_next_buffer;
} gl;
void init_sdl()
@ -403,11 +421,14 @@ private:
SDL_LogWarn(SDL_LOG_CATEGORY_APPLICATION, "%s", message);
}, nullptr);
glGenVertexArrays(1, &gl.vao);
glBindVertexArray(gl.vao);
int success;
char info_log[512];
unsigned int mesh = glCreateShader(GL_MESH_SHADER_NV);
glShaderSource(mesh, 1, &MESH_SHADER_CODE, nullptr);
glShaderSource(mesh, 1, &AZ_PASS_MESH_SHADER_CODE, nullptr);
glCompileShader(mesh);
glGetShaderiv(mesh, GL_COMPILE_STATUS, &success);
if (!success)
@ -416,41 +437,73 @@ private:
SDL_LogError(SDL_LOG_CATEGORY_APPLICATION, "Mesh shader: %s", info_log);
}
unsigned int fragment = glCreateShader(GL_FRAGMENT_SHADER);
glShaderSource(fragment, 1, &FRAGMENT_SHADER_CODE, nullptr);
glCompileShader(fragment);
glGetShaderiv(fragment, GL_COMPILE_STATUS, &success);
unsigned int az_fragment = glCreateShader(GL_FRAGMENT_SHADER);
glShaderSource(az_fragment, 1, &AZ_PASS_FRAGMENT_SHADER_CODE, nullptr);
glCompileShader(az_fragment);
glGetShaderiv(az_fragment, GL_COMPILE_STATUS, &success);
if (!success)
{
glGetShaderInfoLog(fragment, sizeof(info_log), nullptr, info_log);
SDL_LogError(SDL_LOG_CATEGORY_APPLICATION, "Fragment shader: %s", info_log);
glGetShaderInfoLog(az_fragment, sizeof(info_log), nullptr, info_log);
SDL_LogError(SDL_LOG_CATEGORY_APPLICATION, "AZ Pass fragment shader: %s", info_log);
}
gl.program = glCreateProgram();
glAttachShader(gl.program, mesh);
glAttachShader(gl.program, fragment);
glLinkProgram(gl.program);
glGetProgramiv(gl.program, GL_LINK_STATUS, &success);
gl.az_pass_program = glCreateProgram();
glAttachShader(gl.az_pass_program, mesh);
glAttachShader(gl.az_pass_program, az_fragment);
glLinkProgram(gl.az_pass_program);
glGetProgramiv(gl.az_pass_program, GL_LINK_STATUS, &success);
if (!success)
{
glGetProgramInfoLog(gl.program, sizeof(info_log), nullptr, info_log);
SDL_LogError(SDL_LOG_CATEGORY_APPLICATION, "Program: %s", info_log);
glGetProgramInfoLog(gl.az_pass_program, sizeof(info_log), nullptr, info_log);
SDL_LogError(SDL_LOG_CATEGORY_APPLICATION, "AZ pass program: %s", info_log);
}
glDeleteShader(mesh);
glDeleteShader(fragment);
glDeleteShader(az_fragment);
unsigned int vertex = glCreateShader(GL_VERTEX_SHADER);
glShaderSource(vertex, 1, &COMPOSITE_PASS_VERTEX_SHADER, nullptr);
glCompileShader(vertex);
glGetShaderiv(vertex, GL_COMPILE_STATUS, &success);
if (!success)
{
glGetShaderInfoLog(vertex, sizeof(info_log), nullptr, info_log);
SDL_LogError(SDL_LOG_CATEGORY_APPLICATION, "Vertex shader: %s", info_log);
}
unsigned int composite_pass_fragment = glCreateShader(GL_FRAGMENT_SHADER);
glShaderSource(composite_pass_fragment, 1, &COMPOSITE_PASS_FRAGMENT_SHADER, nullptr);
glCompileShader(composite_pass_fragment);
glGetShaderiv(composite_pass_fragment, GL_COMPILE_STATUS, &success);
if (!success)
{
glGetShaderInfoLog(composite_pass_fragment, sizeof(info_log), nullptr, info_log);
SDL_LogError(SDL_LOG_CATEGORY_APPLICATION, "Composite pass fragment shader: %s", info_log);
}
gl.composite_pass_program = glCreateProgram();
glAttachShader(gl.composite_pass_program, vertex);
glAttachShader(gl.composite_pass_program, composite_pass_fragment);
glLinkProgram(gl.composite_pass_program);
glGetProgramiv(gl.composite_pass_program, GL_LINK_STATUS, &success);
if (!success)
{
glGetProgramInfoLog(gl.az_pass_program, sizeof(info_log), nullptr, info_log);
SDL_LogError(SDL_LOG_CATEGORY_APPLICATION, "Composite pass program: %s", info_log);
}
glDeleteShader(vertex);
glDeleteShader(composite_pass_fragment);
if (!success)
{
throw application_error("Failed to compile and link shader program");
}
glUseProgram(gl.program);
int width, height;
SDL_GetWindowSizeInPixels(sdl.window, &width, &height);
glViewport(0, 0, width, height);
glProgramUniform3f(gl.program, 1, width, height, 8);
glProgramUniform3f(gl.az_pass_program, 1, width, height, 8);
glGenBuffers(1, &gl.tile_buffer);
glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 0, gl.tile_buffer);
@ -466,48 +519,48 @@ private:
gl.color_info_map = (sprstk_palette*)glMapNamedBufferRange(gl.color_buffer, 0, sizeof(sprstk_palette) * (1 << 5), GL_MAP_WRITE_BIT | GL_MAP_COHERENT_BIT);
const float arr[4] = {1, 0, 0, 1};
glProgramUniformMatrix2fv(gl.program, 3, 1, false, arr);
glProgramUniform1f(gl.program, 2, 1);
glProgramUniformMatrix2fv(gl.az_pass_program, 3, 1, false, arr);
glProgramUniform1f(gl.az_pass_program, 2, 1);
create_buffers();
update_buffers();
}
void destroy_gl()
{
glDeleteTextures(1, &gl.a_buffer);
glDeleteTextures(1, &gl.z_buffer);
glDeleteProgram(gl.program);
glDeleteTextures(1, &gl.az_buffer);
glDeleteTextures(1, &gl.az_next_buffer);
glDeleteProgram(gl.az_pass_program);
glDeleteProgram(gl.composite_pass_program);
glDeleteBuffers(1, &gl.color_buffer);
glDeleteBuffers(1, &gl.tile_buffer);
SDL_GL_DestroyContext(sdl.context);
}
void create_buffers()
void update_buffers()
{
if (gl.a_buffer)
if (gl.az_buffer)
{
glDeleteTextures(1, &gl.a_buffer);
glDeleteTextures(1, &gl.z_buffer);
glDeleteTextures(1, &gl.az_buffer);
glDeleteTextures(1, &gl.az_next_buffer);
}
int width, height;
SDL_GetWindowSizeInPixels(sdl.window, &width, &height);
glGenTextures(1, &gl.a_buffer);
glBindTexture(GL_TEXTURE_3D, gl.a_buffer);
glTexStorage3D(GL_TEXTURE_3D, 1, GL_RGBA8, width, height, OIT_LAYERS);
glGenTextures(1, &gl.z_buffer);
glBindTexture(GL_TEXTURE_3D, gl.z_buffer);
glTexStorage3D(GL_TEXTURE_3D, 1, GL_R8UI, width, height, OIT_LAYERS);
glGenTextures(1, &gl.az_buffer);
glBindTexture(GL_TEXTURE_3D, gl.az_buffer);
glTexStorage3D(GL_TEXTURE_3D, 1, GL_RG32UI, width, height, OIT_LAYERS);
glTexParameteri(GL_TEXTURE_3D, GL_TEXTURE_MIN_FILTER, GL_NEAREST);
glTexParameteri(GL_TEXTURE_3D, GL_TEXTURE_MAG_FILTER, GL_NEAREST);
constexpr uint32_t color = 0;
glClearTexImage(gl.a_buffer, 0, GL_RGBA, GL_UNSIGNED_INT, &color);
glBindImageTexture(0, gl.a_buffer, 0, true, 0, GL_READ_WRITE, GL_RGBA8);
glGenTextures(1, &gl.az_next_buffer);
glBindTexture(GL_TEXTURE_2D, gl.az_next_buffer);
glTexStorage2D(GL_TEXTURE_2D, 1, GL_R32UI, width, height);
glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_NEAREST);
glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_NEAREST);
constexpr uint8_t z = 0xFF;
glClearTexImage(gl.z_buffer, 0, GL_RED_INTEGER, GL_UNSIGNED_BYTE, &z);
glBindImageTexture(1, gl.z_buffer, 0, true, 0, GL_READ_WRITE, GL_R8UI);
glBindTexture(GL_TEXTURE_3D, gl.a_buffer);
glBindImageTexture(0, gl.az_buffer, 0, true, 0, GL_READ_WRITE, GL_RG32UI);
glBindImageTexture(1, gl.az_next_buffer, 0, false, 0, GL_READ_WRITE, GL_R32UI);
}
};