Fix 32 bit NEON version of DXT1 compression.

This reverts commit b32e8fa24e. Apparently it is possible to receive non-uniform data in alpha channel, which breaks the original assumption about not needing the mask. This seemed to be a problem only on 32 bit NEON implementation of DXT1 compression. Other implementations handle such data without degradation of visual output.
2024-11-25 23:44:35 +00:00 · 2019-09-03 21:26:17 +02:00 · 2019-09-03 21:26:17 +02:00 · 37661fd2ee
commit 37661fd2ee
parent aa2530d442
2 changed files with 5 additions and 4 deletions
--- a/client/TracyDxt1.cpp
+++ b/client/TracyDxt1.cpp
@ -372,10 +372,11 @@ static tracy_force_inline uint64_t ProcessRGB( const uint8_t* src )
        return uint64_t( to565( src[0], src[1], src[2] ) ) << 16;
    }
-    uint8x16_t l0 = vreinterpretq_u8_u32( px0 );
+    uint32x4_t mask = vdupq_n_u32( 0xFFFFFF );
-    uint8x16_t l1 = vreinterpretq_u8_u32( px1 );
+    uint8x16_t l0 = vreinterpretq_u8_u32( vandq_u32( mask, px0 ) );
-    uint8x16_t l2 = vreinterpretq_u8_u32( px2 );
+    uint8x16_t l1 = vreinterpretq_u8_u32( vandq_u32( mask, px1 ) );
-    uint8x16_t l3 = vreinterpretq_u8_u32( px3 );
+    uint8x16_t l2 = vreinterpretq_u8_u32( vandq_u32( mask, px2 ) );
    uint8x16_t l3 = vreinterpretq_u8_u32( vandq_u32( mask, px3 ) );
    uint8x16_t min0 = vminq_u8( l0, l1 );
    uint8x16_t min1 = vminq_u8( l2, l3 );
--- a/doc/issues/dxt1+alpha.png
+++ b/doc/issues/dxt1+alpha.png