mirror of
https://github.com/wolfpld/tracy.git
synced 2024-11-10 02:31:48 +00:00
Two ands less in NEON DXT1 compression.
This commit is contained in:
parent
275d992cb1
commit
03189a30b8
@ -369,27 +369,18 @@ static tracy_force_inline uint64_t ProcessRGB( const uint8_t* src )
|
||||
uint8x8_t p01 = vmovn_u16( m1 );
|
||||
uint8x16_t p0 = vcombine_u8( p00, p01 );
|
||||
|
||||
uint32x4_t mask0 = vdupq_n_u32( 0x00000003 );
|
||||
uint32x4_t mask1 = vdupq_n_u32( 0x00000300 );
|
||||
uint32x4_t mask2 = vdupq_n_u32( 0x00030000 );
|
||||
uint32x4_t mask3 = vdupq_n_u32( 0x03000000 );
|
||||
uint32x4_t p1 = vorrq_u32( vshrq_n_u32( vreinterpretq_u32_u8( p0 ), 6 ), vshrq_n_u32( vreinterpretq_u32_u8( p0 ), 12 ) );
|
||||
uint32x4_t p2 = vorrq_u32( vshrq_n_u32( vreinterpretq_u32_u8( p0 ), 18 ), vreinterpretq_u32_u8( p0 ) );
|
||||
uint32x4_t p3 = vorrq_u32( p1, p2 );
|
||||
uint32x4_t p4 = vandq_u32( p3, vdupq_n_u32( 0xFF ) );
|
||||
|
||||
uint32x4_t p1 = vandq_u32( vreinterpretq_u32_u8( p0 ), mask0 );
|
||||
uint32x4_t p2 = vshrq_n_u32( vandq_u32( vreinterpretq_u32_u8( p0 ), mask1 ), 6 );
|
||||
uint32x4_t p3 = vshrq_n_u32( vandq_u32( vreinterpretq_u32_u8( p0 ), mask2 ), 12 );
|
||||
uint32x4_t p4 = vshrq_n_u32( vandq_u32( vreinterpretq_u32_u8( p0 ), mask3 ), 18 );
|
||||
|
||||
uint32x4_t p5 = vorrq_u32( p1, p2 );
|
||||
uint32x4_t p6 = vorrq_u32( p3, p4 );
|
||||
uint32x4_t p7 = vorrq_u32( p5, p6 );
|
||||
|
||||
uint16x4x2_t p8 = vuzp_u16( vget_low_u16( vreinterpretq_u16_u32( p7 ) ), vget_high_u16( vreinterpretq_u16_u32( p7 ) ) );
|
||||
uint8x8x2_t p9 = vuzp_u8( vreinterpret_u8_u16( p8.val[0] ), vreinterpret_u8_u16( p8.val[0] ) );
|
||||
uint16x4x2_t p5 = vuzp_u16( vget_low_u16( vreinterpretq_u16_u32( p4 ) ), vget_high_u16( vreinterpretq_u16_u32( p4 ) ) );
|
||||
uint8x8x2_t p = vuzp_u8( vreinterpret_u8_u16( p5.val[0] ), vreinterpret_u8_u16( p5.val[0] ) );
|
||||
|
||||
uint32_t vmin, vmax, vp;
|
||||
vst1q_lane_u32( &vmin, vreinterpretq_u32_u8( min ), 0 );
|
||||
vst1q_lane_u32( &vmax, vreinterpretq_u32_u8( max ), 0 );
|
||||
vst1_lane_u32( &vp, vreinterpret_u32_u8( p9.val[0] ), 0 );
|
||||
vst1_lane_u32( &vp, vreinterpret_u32_u8( p.val[0] ), 0 );
|
||||
|
||||
uint32_t data = 0;
|
||||
for( int i=0; i<4; i++ )
|
||||
|
Loading…
Reference in New Issue
Block a user