diff --git a/examples/CMakeLists.txt b/examples/CMakeLists.txt index 09ef097..81bf4b3 100644 --- a/examples/CMakeLists.txt +++ b/examples/CMakeLists.txt @@ -1,7 +1,8 @@ cmake_minimum_required(VERSION 3.21) function(add_example name) - add_executable("example-${name}" ${name}.cpp) + add_executable("example-${name}" ${name}.cpp simplexnoise1234.cpp) + target_include_directories("example-${name}" PUBLIC .) target_link_libraries("example-${name}" PUBLIC sprstk) endfunction() diff --git a/examples/basic.cpp b/examples/basic.cpp index 2fab33b..7c49342 100644 --- a/examples/basic.cpp +++ b/examples/basic.cpp @@ -1,39 +1,114 @@ #include -#include +#include + +#include namespace { +constexpr int SIZE = 256; + +double octaves(SimplexNoise1234& simplex, double x, double y, int layers, double persistence, double frequency) +{ + double ampl = 1; + double maxval = 0; + double val = 0; + + for (int i = 0; i < layers; i++) + { + val += simplex.noise(x * frequency, y * frequency) * ampl; + + maxval += ampl; + + ampl *= persistence; + frequency *= 2; + } + + return val / maxval; +} + +uint8_t data[SIZE * SIZE]; + +double ease(double n) +{ + return pow(n, 1.5); +} + +uint32_t color(uint8_t r, uint8_t g, uint8_t b, uint8_t a) +{ + return (r << 24) | (g << 16) | (b << 8) | a; +} + +int pick_pal(uint8_t height) +{ + if (height > 20) { return 0; } + if (height > 8) { return 1; } + return 2; +} + void init(sprstk* instance, void* userdata) { sprstk_palette pal = {}; - for (int i = 0; i < 28; i++) + for (int i = 0; i < 24; i++) { - pal.colors[i] = 0x7F3F0040; + uint8_t val = 0x33 / (12 - i / 2.0f) + 0x33; + pal.colors[i] = color(val, val, val, 0x7F); } - for (int i = 28; i < 32; i++) + for (int i = 24; i < 32; i++) { - pal.colors[i] = 0x00FF0040; + uint8_t val = 0x55 / (16 - i / 2.0f) + 0xAA; + pal.colors[i] = color(val, val, val, 0x7F); } sprstk_set_palette(instance, 0, &pal); + + for (int i = 0; i < 16; i++) + { + pal.colors[i] = color(0x70 / (5.0f - i / 4.0f), 0x35 / (5.0f - i / 4.0f), 0, 0x7F); + } + + for (int i = 16; i < 32; i++) + { + pal.colors[i] = color(0x05 / (8.5f - i / 2.5f) + 0x15, 0x40 / (8.5f - i / 2.5f) + 0x0, 0, 0x7F); + } + + sprstk_set_palette(instance, 1, &pal); + + for (int i = 0; i < 8; i++) + { + pal.colors[i] = color(0x20 / (8 - i) + 0x20, 0x40 / (8 - i) + 0x40, 0x90 / (8 - i) + 0x40, 0x7F); + } + + sprstk_set_palette(instance, 2, &pal); + + sprstk_set_scale(instance, 0.4f); + + SimplexNoise1234 simplex; + for (int i = 0; i < SIZE; i++) + { + for (int j = 0; j < SIZE; j++) + { + double value = octaves(simplex, i, j, 6, 0.4, 1.0 / 128.0); + data[i + j * SIZE] = 28 * ease((value + 1) / 2) + 3; + } + } + + for (int i = 0; i < SIZE; i++) + { + for (int j = 0; j < SIZE; j++) + { + sprstk_put(instance, i - SIZE / 2, j - SIZE / 2, data[i + SIZE * j], pick_pal(data[i + SIZE * j])); + } + } } void update(sprstk* instance, float dt, float* userdata) { *userdata += dt / 2; sprstk_set_angle(instance, *userdata); - - for (int i = -512; i < 512; i++) - { - for (int j = -512; j < 512; j++) - { - sprstk_put(instance, i, j, 31, 0); - } - } } } diff --git a/examples/simplexnoise1234.cpp b/examples/simplexnoise1234.cpp new file mode 100644 index 0000000..de497c1 --- /dev/null +++ b/examples/simplexnoise1234.cpp @@ -0,0 +1,470 @@ +// SimplexNoise1234 +// Copyright © 2003-2011, Stefan Gustavson +// +// Contact: stegu@itn.liu.se +// +// This library is public domain software, released by the author +// into the public domain in February 2011. You may do anything +// you like with it. You may even remove all attributions, +// but of course I'd appreciate it if you kept my name somewhere. +// +// This library is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +// General Public License for more details. + +// Modified by the LOVE Development Team to use double precision. + +/** \file + \brief Implements the SimplexNoise1234 class for producing Perlin simplex noise. + \author Stefan Gustavson (stegu@itn.liu.se) +*/ + +/* + * This implementation is "Simplex Noise" as presented by + * Ken Perlin at a relatively obscure and not often cited course + * session "Real-Time Shading" at Siggraph 2001 (before real + * time shading actually took on), under the title "hardware noise". + * The 3D function is numerically equivalent to his Java reference + * code available in the PDF course notes, although I re-implemented + * it from scratch to get more readable code. The 1D, 2D and 4D cases + * were implemented from scratch by me from Ken Perlin's text. + * + * This is a highly reusable class. It has no dependencies + * on any other file, apart from its own header file. + */ + + +#include "simplexnoise1234.h" + +#define FASTFLOOR(x) ( ((x)>0) ? ((int)x) : (((int)x)-1) ) + +//--------------------------------------------------------------------- +// Static data + +/* + * Permutation table. This is just a random jumble of all numbers 0-255, + * repeated twice to avoid wrapping the index at 255 for each lookup. + * This needs to be exactly the same for all instances on all platforms, + * so it's easiest to just keep it as static explicit data. + * This also removes the need for any initialisation of this class. + * + * Note that making this an int[] instead of a char[] might make the + * code run faster on platforms with a high penalty for unaligned single + * byte addressing. Intel x86 is generally single-byte-friendly, but + * some other CPUs are faster with 4-aligned reads. + * However, a char[] is smaller, which avoids cache trashing, and that + * is probably the most important aspect on most architectures. + * This array is accessed a *lot* by the noise functions. + * A vector-valued noise over 3D accesses it 96 times, and a + * float-valued 4D noise 64 times. We want this to fit in the cache! + */ +unsigned char SimplexNoise1234::perm[512] = {151,160,137,91,90,15, + 131,13,201,95,96,53,194,233,7,225,140,36,103,30,69,142,8,99,37,240,21,10,23, + 190, 6,148,247,120,234,75,0,26,197,62,94,252,219,203,117,35,11,32,57,177,33, + 88,237,149,56,87,174,20,125,136,171,168, 68,175,74,165,71,134,139,48,27,166, + 77,146,158,231,83,111,229,122,60,211,133,230,220,105,92,41,55,46,245,40,244, + 102,143,54, 65,25,63,161, 1,216,80,73,209,76,132,187,208, 89,18,169,200,196, + 135,130,116,188,159,86,164,100,109,198,173,186, 3,64,52,217,226,250,124,123, + 5,202,38,147,118,126,255,82,85,212,207,206,59,227,47,16,58,17,182,189,28,42, + 223,183,170,213,119,248,152, 2,44,154,163, 70,221,153,101,155,167, 43,172,9, + 129,22,39,253, 19,98,108,110,79,113,224,232,178,185, 112,104,218,246,97,228, + 251,34,242,193,238,210,144,12,191,179,162,241, 81,51,145,235,249,14,239,107, + 49,192,214, 31,181,199,106,157,184, 84,204,176,115,121,50,45,127, 4,150,254, + 138,236,205,93,222,114,67,29,24,72,243,141,128,195,78,66,215,61,156,180, + 151,160,137,91,90,15, + 131,13,201,95,96,53,194,233,7,225,140,36,103,30,69,142,8,99,37,240,21,10,23, + 190, 6,148,247,120,234,75,0,26,197,62,94,252,219,203,117,35,11,32,57,177,33, + 88,237,149,56,87,174,20,125,136,171,168, 68,175,74,165,71,134,139,48,27,166, + 77,146,158,231,83,111,229,122,60,211,133,230,220,105,92,41,55,46,245,40,244, + 102,143,54, 65,25,63,161, 1,216,80,73,209,76,132,187,208, 89,18,169,200,196, + 135,130,116,188,159,86,164,100,109,198,173,186, 3,64,52,217,226,250,124,123, + 5,202,38,147,118,126,255,82,85,212,207,206,59,227,47,16,58,17,182,189,28,42, + 223,183,170,213,119,248,152, 2,44,154,163, 70,221,153,101,155,167, 43,172,9, + 129,22,39,253, 19,98,108,110,79,113,224,232,178,185, 112,104,218,246,97,228, + 251,34,242,193,238,210,144,12,191,179,162,241, 81,51,145,235,249,14,239,107, + 49,192,214, 31,181,199,106,157,184, 84,204,176,115,121,50,45,127, 4,150,254, + 138,236,205,93,222,114,67,29,24,72,243,141,128,195,78,66,215,61,156,180 +}; + +//--------------------------------------------------------------------- + +/* + * Helper functions to compute gradients-dot-residualvectors (1D to 4D) + * Note that these generate gradients of more than unit length. To make + * a close match with the value range of classic Perlin noise, the final + * noise values need to be rescaled to fit nicely within [-1,1]. + * (The simplex noise functions as such also have different scaling.) + * Note also that these noise functions are the most practical and useful + * signed version of Perlin noise. To return values according to the + * RenderMan specification from the SL noise() and pnoise() functions, + * the noise values need to be scaled and offset to [0,1], like this: + * float SLnoise = (SimplexNoise1234::noise(x,y,z) + 1.0) * 0.5; + */ + +double SimplexNoise1234::grad( int hash, double x ) { + int h = hash & 15; + double grad = 1.0 + (h & 7); // Gradient value 1.0, 2.0, ..., 8.0 + if (h&8) grad = -grad; // Set a random sign for the gradient + return ( grad * x ); // Multiply the gradient with the distance +} + +double SimplexNoise1234::grad( int hash, double x, double y ) { + int h = hash & 7; // Convert low 3 bits of hash code + double u = h<4 ? x : y; // into 8 simple gradient directions, + double v = h<4 ? y : x; // and compute the dot product with (x,y). + return ((h&1)? -u : u) + ((h&2)? -2.0*v : 2.0*v); +} + +double SimplexNoise1234::grad( int hash, double x, double y , double z ) { + int h = hash & 15; // Convert low 4 bits of hash code into 12 simple + double u = h<8 ? x : y; // gradient directions, and compute dot product. + double v = h<4 ? y : h==12||h==14 ? x : z; // Fix repeats at h = 12 to 15 + return ((h&1)? -u : u) + ((h&2)? -v : v); +} + +double SimplexNoise1234::grad( int hash, double x, double y, double z, double t ) { + int h = hash & 31; // Convert low 5 bits of hash code into 32 simple + double u = h<24 ? x : y; // gradient directions, and compute dot product. + double v = h<16 ? y : z; + double w = h<8 ? z : t; + return ((h&1)? -u : u) + ((h&2)? -v : v) + ((h&4)? -w : w); +} + +// A lookup table to traverse the simplex around a given point in 4D. +// Details can be found where this table is used, in the 4D noise method. +/* TODO: This should not be required, backport it from Bill's GLSL code! */ +static unsigned char simplex[64][4] = { + {0,1,2,3},{0,1,3,2},{0,0,0,0},{0,2,3,1},{0,0,0,0},{0,0,0,0},{0,0,0,0},{1,2,3,0}, + {0,2,1,3},{0,0,0,0},{0,3,1,2},{0,3,2,1},{0,0,0,0},{0,0,0,0},{0,0,0,0},{1,3,2,0}, + {0,0,0,0},{0,0,0,0},{0,0,0,0},{0,0,0,0},{0,0,0,0},{0,0,0,0},{0,0,0,0},{0,0,0,0}, + {1,2,0,3},{0,0,0,0},{1,3,0,2},{0,0,0,0},{0,0,0,0},{0,0,0,0},{2,3,0,1},{2,3,1,0}, + {1,0,2,3},{1,0,3,2},{0,0,0,0},{0,0,0,0},{0,0,0,0},{2,0,3,1},{0,0,0,0},{2,1,3,0}, + {0,0,0,0},{0,0,0,0},{0,0,0,0},{0,0,0,0},{0,0,0,0},{0,0,0,0},{0,0,0,0},{0,0,0,0}, + {2,0,1,3},{0,0,0,0},{0,0,0,0},{0,0,0,0},{3,0,1,2},{3,0,2,1},{0,0,0,0},{3,1,2,0}, + {2,1,0,3},{0,0,0,0},{0,0,0,0},{0,0,0,0},{3,1,0,2},{0,0,0,0},{3,2,0,1},{3,2,1,0}}; + +// 1D simplex noise +double SimplexNoise1234::noise(double x) { + + int i0 = FASTFLOOR(x); + int i1 = i0 + 1; + double x0 = x - i0; + double x1 = x0 - 1.0; + + double n0, n1; + + double t0 = 1.0 - x0*x0; + t0 *= t0; + n0 = t0 * t0 * grad(perm[i0 & 0xff], x0); + + double t1 = 1.0 - x1*x1; + t1 *= t1; + n1 = t1 * t1 * grad(perm[i1 & 0xff], x1); + // The maximum value of this noise is 8*(3/4)^4 = 2.53125 + // A factor of 0.395 will scale to fit exactly within [-1,1] + return 0.395 * (n0 + n1); + +} + +// 2D simplex noise +double SimplexNoise1234::noise(double x, double y) { + +#define F2 0.366025403 // F2 = 0.5*(sqrt(3.0)-1.0) +#define G2 0.211324865 // G2 = (3.0-Math.sqrt(3.0))/6.0 + + double n0, n1, n2; // Noise contributions from the three corners + + // Skew the input space to determine which simplex cell we're in + double s = (x+y)*F2; // Hairy factor for 2D + double xs = x + s; + double ys = y + s; + int i = FASTFLOOR(xs); + int j = FASTFLOOR(ys); + + double t = (i+j)*G2; + double X0 = i-t; // Unskew the cell origin back to (x,y) space + double Y0 = j-t; + double x0 = x-X0; // The x,y distances from the cell origin + double y0 = y-Y0; + + // For the 2D case, the simplex shape is an equilateral triangle. + // Determine which simplex we are in. + int i1, j1; // Offsets for second (middle) corner of simplex in (i,j) coords + if(x0>y0) {i1=1; j1=0;} // lower triangle, XY order: (0,0)->(1,0)->(1,1) + else {i1=0; j1=1;} // upper triangle, YX order: (0,0)->(0,1)->(1,1) + + // A step of (1,0) in (i,j) means a step of (1-c,-c) in (x,y), and + // a step of (0,1) in (i,j) means a step of (-c,1-c) in (x,y), where + // c = (3-sqrt(3))/6 + + double x1 = x0 - i1 + G2; // Offsets for middle corner in (x,y) unskewed coords + double y1 = y0 - j1 + G2; + double x2 = x0 - 1.0 + 2.0 * G2; // Offsets for last corner in (x,y) unskewed coords + double y2 = y0 - 1.0 + 2.0 * G2; + + // Wrap the integer indices at 256, to avoid indexing perm[] out of bounds + int ii = i & 0xff; + int jj = j & 0xff; + + // Calculate the contribution from the three corners + double t0 = 0.5 - x0*x0-y0*y0; + if(t0 < 0.0) n0 = 0.0; + else { + t0 *= t0; + n0 = t0 * t0 * grad(perm[ii+perm[jj]], x0, y0); + } + + double t1 = 0.5 - x1*x1-y1*y1; + if(t1 < 0.0) n1 = 0.0; + else { + t1 *= t1; + n1 = t1 * t1 * grad(perm[ii+i1+perm[jj+j1]], x1, y1); + } + + double t2 = 0.5 - x2*x2-y2*y2; + if(t2 < 0.0) n2 = 0.0; + else { + t2 *= t2; + n2 = t2 * t2 * grad(perm[ii+1+perm[jj+1]], x2, y2); + } + + // Add contributions from each corner to get the final noise value. + // The result is scaled to return values in the interval [-1,1]. + return 45.23 * (n0 + n1 + n2); // TODO: The scale factor is preliminary! + } + +// 3D simplex noise +double SimplexNoise1234::noise(double x, double y, double z) { + + // Simple skewing factors for the 3D case +#define F3 0.333333333 +#define G3 0.166666667 + + double n0, n1, n2, n3; // Noise contributions from the four corners + + // Skew the input space to determine which simplex cell we're in + double s = (x+y+z)*F3; // Very nice and simple skew factor for 3D + double xs = x+s; + double ys = y+s; + double zs = z+s; + int i = FASTFLOOR(xs); + int j = FASTFLOOR(ys); + int k = FASTFLOOR(zs); + + double t = (float)(i+j+k)*G3; + double X0 = i-t; // Unskew the cell origin back to (x,y,z) space + double Y0 = j-t; + double Z0 = k-t; + double x0 = x-X0; // The x,y,z distances from the cell origin + double y0 = y-Y0; + double z0 = z-Z0; + + // For the 3D case, the simplex shape is a slightly irregular tetrahedron. + // Determine which simplex we are in. + int i1, j1, k1; // Offsets for second corner of simplex in (i,j,k) coords + int i2, j2, k2; // Offsets for third corner of simplex in (i,j,k) coords + + /* This code would benefit from a backport from the GLSL version! */ + if(x0>=y0) { + if(y0>=z0) + { i1=1; j1=0; k1=0; i2=1; j2=1; k2=0; } // X Y Z order + else if(x0>=z0) { i1=1; j1=0; k1=0; i2=1; j2=0; k2=1; } // X Z Y order + else { i1=0; j1=0; k1=1; i2=1; j2=0; k2=1; } // Z X Y order + } + else { // x0 y0) ? 32 : 0; + int c2 = (x0 > z0) ? 16 : 0; + int c3 = (y0 > z0) ? 8 : 0; + int c4 = (x0 > w0) ? 4 : 0; + int c5 = (y0 > w0) ? 2 : 0; + int c6 = (z0 > w0) ? 1 : 0; + int c = c1 + c2 + c3 + c4 + c5 + c6; + + int i1, j1, k1, l1; // The integer offsets for the second simplex corner + int i2, j2, k2, l2; // The integer offsets for the third simplex corner + int i3, j3, k3, l3; // The integer offsets for the fourth simplex corner + + // simplex[c] is a 4-vector with the numbers 0, 1, 2 and 3 in some order. + // Many values of c will never occur, since e.g. x>y>z>w makes x=3 ? 1 : 0; + j1 = simplex[c][1]>=3 ? 1 : 0; + k1 = simplex[c][2]>=3 ? 1 : 0; + l1 = simplex[c][3]>=3 ? 1 : 0; + // The number 2 in the "simplex" array is at the second largest coordinate. + i2 = simplex[c][0]>=2 ? 1 : 0; + j2 = simplex[c][1]>=2 ? 1 : 0; + k2 = simplex[c][2]>=2 ? 1 : 0; + l2 = simplex[c][3]>=2 ? 1 : 0; + // The number 1 in the "simplex" array is at the second smallest coordinate. + i3 = simplex[c][0]>=1 ? 1 : 0; + j3 = simplex[c][1]>=1 ? 1 : 0; + k3 = simplex[c][2]>=1 ? 1 : 0; + l3 = simplex[c][3]>=1 ? 1 : 0; + // The fifth corner has all coordinate offsets = 1, so no need to look that up. + + double x1 = x0 - i1 + G4; // Offsets for second corner in (x,y,z,w) coords + double y1 = y0 - j1 + G4; + double z1 = z0 - k1 + G4; + double w1 = w0 - l1 + G4; + double x2 = x0 - i2 + 2.0f*G4; // Offsets for third corner in (x,y,z,w) coords + double y2 = y0 - j2 + 2.0f*G4; + double z2 = z0 - k2 + 2.0f*G4; + double w2 = w0 - l2 + 2.0f*G4; + double x3 = x0 - i3 + 3.0f*G4; // Offsets for fourth corner in (x,y,z,w) coords + double y3 = y0 - j3 + 3.0f*G4; + double z3 = z0 - k3 + 3.0f*G4; + double w3 = w0 - l3 + 3.0f*G4; + double x4 = x0 - 1.0f + 4.0f*G4; // Offsets for last corner in (x,y,z,w) coords + double y4 = y0 - 1.0f + 4.0f*G4; + double z4 = z0 - 1.0f + 4.0f*G4; + double w4 = w0 - 1.0f + 4.0f*G4; + + // Wrap the integer indices at 256, to avoid indexing perm[] out of bounds + int ii = i & 0xff; + int jj = j & 0xff; + int kk = k & 0xff; + int ll = l & 0xff; + + // Calculate the contribution from the five corners + double t0 = 0.6f - x0*x0 - y0*y0 - z0*z0 - w0*w0; + if(t0 < 0.0f) n0 = 0.0f; + else { + t0 *= t0; + n0 = t0 * t0 * grad(perm[ii+perm[jj+perm[kk+perm[ll]]]], x0, y0, z0, w0); + } + + double t1 = 0.6f - x1*x1 - y1*y1 - z1*z1 - w1*w1; + if(t1 < 0.0f) n1 = 0.0f; + else { + t1 *= t1; + n1 = t1 * t1 * grad(perm[ii+i1+perm[jj+j1+perm[kk+k1+perm[ll+l1]]]], x1, y1, z1, w1); + } + + double t2 = 0.6f - x2*x2 - y2*y2 - z2*z2 - w2*w2; + if(t2 < 0.0f) n2 = 0.0f; + else { + t2 *= t2; + n2 = t2 * t2 * grad(perm[ii+i2+perm[jj+j2+perm[kk+k2+perm[ll+l2]]]], x2, y2, z2, w2); + } + + double t3 = 0.6f - x3*x3 - y3*y3 - z3*z3 - w3*w3; + if(t3 < 0.0f) n3 = 0.0f; + else { + t3 *= t3; + n3 = t3 * t3 * grad(perm[ii+i3+perm[jj+j3+perm[kk+k3+perm[ll+l3]]]], x3, y3, z3, w3); + } + + double t4 = 0.6f - x4*x4 - y4*y4 - z4*z4 - w4*w4; + if(t4 < 0.0f) n4 = 0.0f; + else { + t4 *= t4; + n4 = t4 * t4 * grad(perm[ii+1+perm[jj+1+perm[kk+1+perm[ll+1]]]], x4, y4, z4, w4); + } + + // Sum up and scale the result to cover the range [-1,1] + return 27.3 * (n0 + n1 + n2 + n3 + n4); // TODO: The scale factor is preliminary! +} +//--------------------------------------------------------------------- diff --git a/examples/simplexnoise1234.h b/examples/simplexnoise1234.h new file mode 100644 index 0000000..dc49a81 --- /dev/null +++ b/examples/simplexnoise1234.h @@ -0,0 +1,48 @@ +// SimplexNoise1234 +// Copyright © 2003-2011, Stefan Gustavson +// +// Contact: stegu@itn.liu.se +// +// This library is public domain software, released by the author +// into the public domain in February 2011. You may do anything +// you like with it. You may even remove all attributions, +// but of course I'd appreciate it if you kept my name somewhere. +// +// This library is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +// General Public License for more details. + +// Modified by the LOVE Development Team to use double precision. + +/** \file + \brief Declares the SimplexNoise1234 class for producing Perlin simplex noise. + \author Stefan Gustavson (stegu@itn.liu.se) +*/ + +/* + * This is a clean, fast, modern and free Perlin Simplex noise class in C++. + * Being a stand-alone class with no external dependencies, it is + * highly reusable without source code modifications. + */ + +class SimplexNoise1234 { + + public: + SimplexNoise1234() {} + ~SimplexNoise1234() {} + +/** 1D and 2D float Perlin noise + */ + static double noise( double x ); + static double noise( double x, double y ); + static double noise( double x, double y, double z ); + static double noise( double x, double y, double z, double w); + + private: + static unsigned char perm[]; + static double grad( int hash, double x ); + static double grad( int hash, double x, double y ); + static double grad( int hash, double x, double y, double z ); + static double grad( int hash, double x, double y, double z, double t ); +}; diff --git a/include/sprstk/sprstk.h b/include/sprstk/sprstk.h index 8784c6e..1bc83fd 100644 --- a/include/sprstk/sprstk.h +++ b/include/sprstk/sprstk.h @@ -30,11 +30,14 @@ void sprstk_del(sprstk* instance); void sprstk_run(sprstk* instance); void sprstk_stop(sprstk* instance); +void sprstk_clear(sprstk* instance); + void sprstk_put(sprstk* instance, int x, int y, unsigned int layers, unsigned int palette_lookup); void sprstk_putz(sprstk* instance, int x, int y, unsigned int layers, unsigned int palette_lookup, unsigned int z_offset); void sprstk_set_palette(sprstk* instance, unsigned int index, const sprstk_palette* palette); +void sprstk_set_scale(sprstk* instance, float scale); void sprstk_set_angle(sprstk* instance, float angle); #ifdef __cplusplus diff --git a/src/sprstk.cpp b/src/sprstk.cpp index b45151e..301f1d8 100644 --- a/src/sprstk.cpp +++ b/src/sprstk.cpp @@ -7,6 +7,10 @@ #include +#define OIT_LAYERS 1 +#define _STRINGIFY(x) #x +#define STRINGIFY(x) _STRINGIFY(x) + const char* MESH_SHADER_CODE = R"( #version 460 @@ -17,11 +21,13 @@ layout (triangles, max_vertices = 128, max_primitives = 64) out; layout (location = 0) out PerVertexData { + flat uint layer; vec4 color; } v_out[]; layout (location = 1) uniform vec3 screen_size_and_pixel_scale; -layout (location = 2) uniform mat2 rotation_matrix; +layout (location = 2) uniform float scale; +layout (location = 3) uniform mat2 rotation_matrix; struct TileInfo { @@ -54,31 +60,37 @@ void main() uint layer_count = bitfieldExtract(t_info.position, 20, 5); - vec2 positions[4] = { vec2(0, 0), vec2(1, 0), vec2(0, 1), vec2(1, 1) }; + float minsize = min(screen_size_and_pixel_scale.x, screen_size_and_pixel_scale.y); + + vec2 positions[4] = { vec2(-0.5, -0.5), vec2(0.5, -0.5), vec2(-0.5, 0.5), vec2(0.5, 0.5) }; for (uint i = 0; i < 4; i++) { positions[i] += stack_position; positions[i] *= screen_size_and_pixel_scale.zz; - positions[i] /= vec2(min(screen_size_and_pixel_scale.x, screen_size_and_pixel_scale.y)); } uint z_offset = bitfieldExtract(t_info.position, 25, 2); uint palette_lookup = bitfieldExtract(t_info.position, 27, 5); ColorInfo c_info = color_infos[palette_lookup]; - float a = bitfieldExtract(c_info.color[gl_LocalInvocationID.x], 0, 8); - float b = bitfieldExtract(c_info.color[gl_LocalInvocationID.x], 8, 8); - float g = bitfieldExtract(c_info.color[gl_LocalInvocationID.x], 16, 8); - float r = bitfieldExtract(c_info.color[gl_LocalInvocationID.x], 24, 8); + uint c = c_info.color[gl_LocalInvocationID.x]; + float a = bitfieldExtract(c, 0, 8); + float b = bitfieldExtract(c, 8, 8); + float g = bitfieldExtract(c, 16, 8); + float r = bitfieldExtract(c, 24, 8); + vec4 color = vec4(r, g, b, a) / vec4(256); for (uint i = 4 * gl_LocalInvocationID.x; i < 4 * gl_LocalInvocationID.x + 4; i++) { vec4 position = vec4(rotation_matrix * positions[i % 4], float(4 * gl_LocalInvocationID.x + z_offset) / 128, 1); - position.y += 24 * position.z / screen_size_and_pixel_scale.z; - position.xy *= vec2(0.05); + position.xy /= screen_size_and_pixel_scale.xy; + position.xy *= scale; + position.y += 16 * gl_LocalInvocationID.x * scale / screen_size_and_pixel_scale.y; gl_MeshVerticesNV[i].gl_Position = position; - v_out[i].color = vec4(r, g, b, a) / vec4(256, 256, 256, 256); + v_out[i].layer = 4 * gl_LocalInvocationID.x + z_offset; + + v_out[i].color = color; } for (uint i = 6 * gl_LocalInvocationID.x; i < 6 * gl_LocalInvocationID.x + 6; i++) @@ -93,16 +105,75 @@ void main() const char* FRAGMENT_SHADER_CODE = R"( #version 460 -layout(location = 0) out vec4 FragColor; +#extension GL_NV_fragment_shader_interlock : enable +#extension GL_ARB_fragment_shader_interlock : enable + +#if GL_NV_fragment_shader_interlock || GL_ARB_fragment_shader_interlock + +layout (pixel_interlock_unordered) in; + +#if GL_NV_fragment_shader_interlock +#define beginInvocationInterlock beginInvocationInterlockNV +#define endInvocationInterlock endInvocationInterlockNV +#else // GL_NV_fragment_shader_interlock +#define beginInvocationInterlock beginInvocationInterlockARB +#define endInvocationInterlock endInvocationInterlockARB +#endif // GL_NV_fragment_shader_interlock + +#else // #if GL_NV_fragment_shader_interlock || GL_ARB_fragment_shader_interlock + +#pragma error "fragment shader interlock is required" + +#endif // #if GL_NV_fragment_shader_interlock || GL_ARB_fragment_shader_interlock + +layout (location = 0) out vec4 FragColor; + +layout (binding = 0, rgba8) uniform restrict coherent image3D ABuffer; +layout (binding = 1, r8ui) uniform restrict coherent uimage3D ZBuffer; in PerVertexData { + flat uint layer; vec4 color; } fragIn; void main() { - FragColor = fragIn.color; + vec4 color = fragIn.color; + uint z_current = fragIn.layer; + + beginInvocationInterlock(); + for (int i = 0; i < )" STRINGIFY(OIT_LAYERS) R"(; i++) + { + const ivec3 image_coord = ivec3(gl_FragCoord.xy, i); + + const uint z_from_buffer = imageLoad(ZBuffer, image_coord).x; + if (z_current < z_from_buffer) + { + imageStore(ZBuffer, image_coord, uvec4(z_current, 0, 0, 0)); + const vec4 temp_color = imageLoad(ABuffer, image_coord); + imageStore(ABuffer, image_coord, color); + + z_current = z_from_buffer; + color = temp_color; + + if (z_current == 0xFF) { break; } + } + } + endInvocationInterlock(); + + memoryBarrierImage(); + + color = vec4(0); + for (int i = 0; i < )" STRINGIFY(OIT_LAYERS) R"(; i++) + { + const ivec3 image_coord = ivec3(gl_FragCoord.xy, i); + + const vec4 temp_color = imageLoad(ABuffer, image_coord); + color = vec4(temp_color.rgb * temp_color.a + color.rgb * (1 - temp_color.a), temp_color.a); + } + + FragColor = color; } )"; @@ -124,7 +195,9 @@ public: callbacks(callbacks), userdata(userdata), should_stop(false), - prev_ticks(0) + prev_ticks(0), + prev_resized_ticks(0), + resized(false) { if (!callbacks.update) { @@ -158,10 +231,8 @@ public: if (e.type == SDL_EVENT_WINDOW_RESIZED) { - int width, height; - SDL_GetWindowSizeInPixels(sdl.window, &width, &height); - glViewport(0, 0, width, height); - glProgramUniform3f(gl.program, 1, width, height, 8); + prev_resized_ticks = prev_ticks; + resized = true; } } @@ -169,11 +240,30 @@ public: float dt = (current_ticks - prev_ticks) / 1000.0f; prev_ticks = current_ticks; - gl.tile_count = 0; + if (resized && (current_ticks - prev_resized_ticks) / 1000.0f > 0.5f) + { + prev_resized_ticks = 0; + resized = false; + + int width, height; + SDL_GetWindowSizeInPixels(sdl.window, &width, &height); + glViewport(0, 0, width, height); + glProgramUniform3f(gl.program, 1, width, height, 8); + + create_buffers(); + } + callbacks.update(this, dt, userdata); glClearColor(0, 0, 0, 1); glClear(GL_COLOR_BUFFER_BIT); + + constexpr uint32_t color = 0; + glClearTexImage(gl.a_buffer, 0, GL_RGBA, GL_UNSIGNED_INT, &color); + + constexpr uint8_t z = 0xFF; + glClearTexImage(gl.z_buffer, 0, GL_RED_INTEGER, GL_UNSIGNED_BYTE, &z); + int i; for (i = 0; i < gl.tile_count; i += 65535) { @@ -191,6 +281,11 @@ public: should_stop = true; } + void clear() + { + gl.tile_count = 0; + } + void put(int x, int y, unsigned int layers, unsigned int palette_lookup, unsigned int z_offset = 0) { x += 512; @@ -219,13 +314,18 @@ public: gl.color_info_map[index] = *palette; } + void set_scale(float scale) + { + glProgramUniform1f(gl.program, 2, scale); + } + void set_angle(float angle) { const float arr[4] = { cosf(angle), -sinf(angle), sinf(angle), cosf(angle) }; - glProgramUniformMatrix2fv(gl.program, 2, 1, false, arr); + glProgramUniformMatrix2fv(gl.program, 3, 1, false, arr); } private: @@ -234,6 +334,8 @@ private: bool should_stop; uint64_t prev_ticks; + uint64_t prev_resized_ticks; + bool resized; struct { @@ -248,6 +350,8 @@ private: unsigned int tile_count; unsigned int color_buffer; sprstk_palette* color_info_map; + unsigned int a_buffer; + unsigned int z_buffer; } gl; void init_sdl() @@ -262,7 +366,7 @@ private: SDL_GL_SetAttribute(SDL_GL_CONTEXT_PROFILE_MASK, SDL_GL_CONTEXT_PROFILE_CORE); SDL_GL_SetAttribute(SDL_GL_DOUBLEBUFFER, 1); SDL_GL_SetAttribute(SDL_GL_DEPTH_SIZE, 24); - SDL_GL_SetAttribute(SDL_GL_CONTEXT_FLAGS, SDL_GL_CONTEXT_DEBUG_FLAG); + //SDL_GL_SetAttribute(SDL_GL_CONTEXT_FLAGS, SDL_GL_CONTEXT_DEBUG_FLAG); sdl.window = SDL_CreateWindow("sprstk", 640, 480, SDL_WINDOW_OPENGL | SDL_WINDOW_RESIZABLE); if (!sdl.window) @@ -294,9 +398,6 @@ private: throw application_error("Mesh shaders not supported"); } - glEnable(GL_BLEND); - glBlendFuncSeparate(GL_SRC_ALPHA, GL_ONE_MINUS_SRC_ALPHA, GL_ONE, GL_ZERO); - glDebugMessageCallback([](GLenum source, GLenum type, GLuint id, GLenum severity, GLsizei length, const GLchar* message, const void* userdata) { SDL_LogWarn(SDL_LOG_CATEGORY_APPLICATION, "%s", message); @@ -365,15 +466,49 @@ private: gl.color_info_map = (sprstk_palette*)glMapNamedBufferRange(gl.color_buffer, 0, sizeof(sprstk_palette) * (1 << 5), GL_MAP_WRITE_BIT | GL_MAP_COHERENT_BIT); const float arr[4] = {1, 0, 0, 1}; - glProgramUniformMatrix2fv(gl.program, 2, 1, false, arr); + glProgramUniformMatrix2fv(gl.program, 3, 1, false, arr); + glProgramUniform1f(gl.program, 2, 1); + + create_buffers(); } void destroy_gl() { + glDeleteTextures(1, &gl.a_buffer); + glDeleteTextures(1, &gl.z_buffer); glDeleteProgram(gl.program); glDeleteBuffers(1, &gl.tile_buffer); SDL_GL_DestroyContext(sdl.context); } + + void create_buffers() + { + if (gl.a_buffer) + { + glDeleteTextures(1, &gl.a_buffer); + glDeleteTextures(1, &gl.z_buffer); + } + + int width, height; + SDL_GetWindowSizeInPixels(sdl.window, &width, &height); + + glGenTextures(1, &gl.a_buffer); + glBindTexture(GL_TEXTURE_3D, gl.a_buffer); + glTexStorage3D(GL_TEXTURE_3D, 1, GL_RGBA8, width, height, OIT_LAYERS); + glGenTextures(1, &gl.z_buffer); + glBindTexture(GL_TEXTURE_3D, gl.z_buffer); + glTexStorage3D(GL_TEXTURE_3D, 1, GL_R8UI, width, height, OIT_LAYERS); + + constexpr uint32_t color = 0; + glClearTexImage(gl.a_buffer, 0, GL_RGBA, GL_UNSIGNED_INT, &color); + glBindImageTexture(0, gl.a_buffer, 0, true, 0, GL_READ_WRITE, GL_RGBA8); + + constexpr uint8_t z = 0xFF; + glClearTexImage(gl.z_buffer, 0, GL_RED_INTEGER, GL_UNSIGNED_BYTE, &z); + glBindImageTexture(1, gl.z_buffer, 0, true, 0, GL_READ_WRITE, GL_R8UI); + + glBindTexture(GL_TEXTURE_3D, gl.a_buffer); + } }; extern "C" @@ -420,6 +555,11 @@ void sprstk_stop(sprstk* instance) instance->stop(); } +void sprstk_clear(sprstk* instance) +{ + instance->clear(); +} + void sprstk_put(sprstk* instance, int x, int y, unsigned int layers, unsigned int palette_lookup) { instance->put(x, y, layers, palette_lookup); @@ -435,6 +575,11 @@ void sprstk_set_palette(sprstk* instance, unsigned int index, const sprstk_palet instance->set_palette(index, palette); } +void sprstk_set_scale(sprstk* instance, float scale) +{ + instance->set_scale(scale); +} + void sprstk_set_angle(sprstk* instance, float angle) { instance->set_angle(angle);