mirror of
https://github.com/wolfpld/tracy.git
synced 2024-11-14 12:04:35 +00:00
0b1eff8b0d
https://github.com/aras-p/ToyPathTracer b076563906169aa2f9e6d7218ef85decf81f8f72
204 lines
6.1 KiB
C++
204 lines
6.1 KiB
C++
#include "Maths.h"
|
|
#include <stdlib.h>
|
|
#include <stdint.h>
|
|
|
|
static uint32_t XorShift32(uint32_t& state)
|
|
{
|
|
uint32_t x = state;
|
|
x ^= x << 13;
|
|
x ^= x >> 17;
|
|
x ^= x << 15;
|
|
state = x;
|
|
return x;
|
|
}
|
|
|
|
float RandomFloat01(uint32_t& state)
|
|
{
|
|
return (XorShift32(state) & 0xFFFFFF) / 16777216.0f;
|
|
}
|
|
|
|
float3 RandomInUnitDisk(uint32_t& state)
|
|
{
|
|
float3 p;
|
|
do
|
|
{
|
|
p = 2.0 * float3(RandomFloat01(state),RandomFloat01(state),0) - float3(1,1,0);
|
|
} while (dot(p,p) >= 1.0);
|
|
return p;
|
|
}
|
|
|
|
float3 RandomInUnitSphere(uint32_t& state)
|
|
{
|
|
float3 p;
|
|
do {
|
|
p = 2.0*float3(RandomFloat01(state),RandomFloat01(state),RandomFloat01(state)) - float3(1,1,1);
|
|
} while (sqLength(p) >= 1.0);
|
|
return p;
|
|
}
|
|
|
|
float3 RandomUnitVector(uint32_t& state)
|
|
{
|
|
float z = RandomFloat01(state) * 2.0f - 1.0f;
|
|
float a = RandomFloat01(state) * 2.0f * kPI;
|
|
float r = sqrtf(1.0f - z * z);
|
|
float x = r * cosf(a);
|
|
float y = r * sinf(a);
|
|
return float3(x, y, z);
|
|
}
|
|
|
|
|
|
int HitSpheres(const Ray& r, const SpheresSoA& spheres, float tMin, float tMax, Hit& outHit)
|
|
{
|
|
#if DO_HIT_SPHERES_SIMD
|
|
float4 hitT = float4(tMax);
|
|
#if USE_NEON
|
|
int32x4_t id = vdupq_n_s32(-1);
|
|
#else
|
|
__m128i id = _mm_set1_epi32(-1);
|
|
#endif
|
|
|
|
#if DO_FLOAT3_WITH_SIMD && !USE_NEON
|
|
float4 rOrigX = SHUFFLE4(r.orig, 0, 0, 0, 0);
|
|
float4 rOrigY = SHUFFLE4(r.orig, 1, 1, 1, 1);
|
|
float4 rOrigZ = SHUFFLE4(r.orig, 2, 2, 2, 2);
|
|
float4 rDirX = SHUFFLE4(r.dir, 0, 0, 0, 0);
|
|
float4 rDirY = SHUFFLE4(r.dir, 1, 1, 1, 1);
|
|
float4 rDirZ = SHUFFLE4(r.dir, 2, 2, 2, 2);
|
|
#elif DO_FLOAT3_WITH_SIMD
|
|
float4 rOrigX = splatX(r.orig.m);
|
|
float4 rOrigY = splatY(r.orig.m);
|
|
float4 rOrigZ = splatZ(r.orig.m);
|
|
float4 rDirX = splatX(r.dir.m);
|
|
float4 rDirY = splatY(r.dir.m);
|
|
float4 rDirZ = splatZ(r.dir.m);
|
|
#else
|
|
float4 rOrigX = float4(r.orig.x);
|
|
float4 rOrigY = float4(r.orig.y);
|
|
float4 rOrigZ = float4(r.orig.z);
|
|
float4 rDirX = float4(r.dir.x);
|
|
float4 rDirY = float4(r.dir.y);
|
|
float4 rDirZ = float4(r.dir.z);
|
|
#endif
|
|
float4 tMin4 = float4(tMin);
|
|
#if USE_NEON
|
|
int32x4_t curId = vcombine_u32(vcreate_u32(0ULL | (1ULL<<32)), vcreate_u32(2ULL | (3ULL<<32)));
|
|
#else
|
|
__m128i curId = _mm_set_epi32(3, 2, 1, 0);
|
|
#endif
|
|
// process 4 spheres at once
|
|
for (int i = 0; i < spheres.simdCount; i += kSimdWidth)
|
|
{
|
|
// load data for 4 spheres
|
|
float4 sCenterX = float4(spheres.centerX + i);
|
|
float4 sCenterY = float4(spheres.centerY + i);
|
|
float4 sCenterZ = float4(spheres.centerZ + i);
|
|
float4 sSqRadius = float4(spheres.sqRadius + i);
|
|
// note: we flip this vector and calculate -b (nb) since that happens to be slightly preferable computationally
|
|
float4 coX = sCenterX - rOrigX;
|
|
float4 coY = sCenterY - rOrigY;
|
|
float4 coZ = sCenterZ - rOrigZ;
|
|
float4 nb = coX * rDirX + coY * rDirY + coZ * rDirZ;
|
|
float4 c = coX * coX + coY * coY + coZ * coZ - sSqRadius;
|
|
float4 discr = nb * nb - c;
|
|
bool4 discrPos = discr > float4(0.0f);
|
|
// if ray hits any of the 4 spheres
|
|
if (any(discrPos))
|
|
{
|
|
float4 discrSq = sqrtf(discr);
|
|
|
|
// ray could hit spheres at t0 & t1
|
|
float4 t0 = nb - discrSq;
|
|
float4 t1 = nb + discrSq;
|
|
|
|
float4 t = select(t1, t0, t0 > tMin4); // if t0 is above min, take it (since it's the earlier hit); else try t1.
|
|
bool4 msk = discrPos & (t > tMin4) & (t < hitT);
|
|
// if hit, take it
|
|
id = select(id, curId, msk);
|
|
hitT = select(hitT, t, msk);
|
|
}
|
|
#if USE_NEON
|
|
curId = vaddq_s32(curId, vdupq_n_s32(kSimdWidth));
|
|
#else
|
|
curId = _mm_add_epi32(curId, _mm_set1_epi32(kSimdWidth));
|
|
#endif
|
|
}
|
|
// now we have up to 4 hits, find and return closest one
|
|
float minT = hmin(hitT);
|
|
if (minT < tMax) // any actual hits?
|
|
{
|
|
int minMask = mask(hitT == float4(minT));
|
|
if (minMask != 0)
|
|
{
|
|
int id_scalar[4];
|
|
float hitT_scalar[4];
|
|
#if USE_NEON
|
|
vst1q_s32(id_scalar, id);
|
|
vst1q_f32(hitT_scalar, hitT.m);
|
|
#else
|
|
_mm_storeu_si128((__m128i *)id_scalar, id);
|
|
_mm_storeu_ps(hitT_scalar, hitT.m);
|
|
#endif
|
|
|
|
// In general, you would do this with a bit scan (first set/trailing zero count).
|
|
// But who cares, it's only 16 options.
|
|
static const int laneId[16] =
|
|
{
|
|
0, 0, 1, 0, // 00xx
|
|
2, 0, 1, 0, // 01xx
|
|
3, 0, 1, 0, // 10xx
|
|
2, 0, 1, 0, // 11xx
|
|
};
|
|
|
|
int lane = laneId[minMask];
|
|
int hitId = id_scalar[lane];
|
|
float finalHitT = hitT_scalar[lane];
|
|
|
|
outHit.pos = r.pointAt(finalHitT);
|
|
outHit.normal = (outHit.pos - float3(spheres.centerX[hitId], spheres.centerY[hitId], spheres.centerZ[hitId])) * spheres.invRadius[hitId];
|
|
outHit.t = finalHitT;
|
|
return hitId;
|
|
}
|
|
}
|
|
|
|
return -1;
|
|
|
|
#else // #if DO_HIT_SPHERES_SIMD
|
|
|
|
float hitT = tMax;
|
|
int id = -1;
|
|
for (int i = 0; i < spheres.count; ++i)
|
|
{
|
|
float coX = spheres.centerX[i] - r.orig.getX();
|
|
float coY = spheres.centerY[i] - r.orig.getY();
|
|
float coZ = spheres.centerZ[i] - r.orig.getZ();
|
|
float nb = coX * r.dir.getX() + coY * r.dir.getY() + coZ * r.dir.getZ();
|
|
float c = coX * coX + coY * coY + coZ * coZ - spheres.sqRadius[i];
|
|
float discr = nb * nb - c;
|
|
if (discr > 0)
|
|
{
|
|
float discrSq = sqrtf(discr);
|
|
|
|
// Try earlier t
|
|
float t = nb - discrSq;
|
|
if (t <= tMin) // before min, try later t!
|
|
t = nb + discrSq;
|
|
|
|
if (t > tMin && t < hitT)
|
|
{
|
|
id = i;
|
|
hitT = t;
|
|
}
|
|
}
|
|
}
|
|
if (id != -1)
|
|
{
|
|
outHit.pos = r.pointAt(hitT);
|
|
outHit.normal = (outHit.pos - float3(spheres.centerX[id], spheres.centerY[id], spheres.centerZ[id])) * spheres.invRadius[id];
|
|
outHit.t = hitT;
|
|
return id;
|
|
}
|
|
else
|
|
return -1;
|
|
#endif // #else of #if DO_HIT_SPHERES_SIMD
|
|
}
|