From cef8124247505aa8ebfa181d30770a1849307d85 Mon Sep 17 00:00:00 2001 From: Bartosz Taudul Date: Fri, 19 Jul 2019 01:40:27 +0200 Subject: [PATCH] Replace or with addition to enable usra instruction. --- client/TracyDxt1.cpp | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/client/TracyDxt1.cpp b/client/TracyDxt1.cpp index a5a29d6d..da565ad5 100644 --- a/client/TracyDxt1.cpp +++ b/client/TracyDxt1.cpp @@ -355,9 +355,9 @@ static tracy_force_inline uint64_t ProcessRGB( const uint8_t* src ) uint8x8_t p01 = vmovn_u16( m1 ); uint8x16_t p0 = vcombine_u8( p00, p01 ); - uint32x4_t p1 = vorrq_u32( vshrq_n_u32( vreinterpretq_u32_u8( p0 ), 6 ), vshrq_n_u32( vreinterpretq_u32_u8( p0 ), 12 ) ); - uint32x4_t p2 = vorrq_u32( vshrq_n_u32( vreinterpretq_u32_u8( p0 ), 18 ), vreinterpretq_u32_u8( p0 ) ); - uint32x4_t p3 = vorrq_u32( p1, p2 ); + uint32x4_t p1 = vaddq_u32( vshrq_n_u32( vreinterpretq_u32_u8( p0 ), 6 ), vshrq_n_u32( vreinterpretq_u32_u8( p0 ), 12 ) ); + uint32x4_t p2 = vaddq_u32( vshrq_n_u32( vreinterpretq_u32_u8( p0 ), 18 ), vreinterpretq_u32_u8( p0 ) ); + uint32x4_t p3 = vaddq_u32( p1, p2 ); uint16x4x2_t p4 = vuzp_u16( vget_low_u16( vreinterpretq_u16_u32( p3 ) ), vget_high_u16( vreinterpretq_u16_u32( p3 ) ) ); uint8x8x2_t p = vuzp_u8( vreinterpret_u8_u16( p4.val[0] ), vreinterpret_u8_u16( p4.val[0] ) ); @@ -469,9 +469,9 @@ static tracy_force_inline uint64_t ProcessRGB( const uint8_t* src ) uint8x8_t p01 = vmovn_u16( m1 ); uint8x16_t p0 = vcombine_u8( p00, p01 ); - uint32x4_t p1 = vorrq_u32( vshrq_n_u32( vreinterpretq_u32_u8( p0 ), 6 ), vshrq_n_u32( vreinterpretq_u32_u8( p0 ), 12 ) ); - uint32x4_t p2 = vorrq_u32( vshrq_n_u32( vreinterpretq_u32_u8( p0 ), 18 ), vreinterpretq_u32_u8( p0 ) ); - uint32x4_t p3 = vorrq_u32( p1, p2 ); + uint32x4_t p1 = vaddq_u32( vshrq_n_u32( vreinterpretq_u32_u8( p0 ), 6 ), vshrq_n_u32( vreinterpretq_u32_u8( p0 ), 12 ) ); + uint32x4_t p2 = vaddq_u32( vshrq_n_u32( vreinterpretq_u32_u8( p0 ), 18 ), vreinterpretq_u32_u8( p0 ) ); + uint32x4_t p3 = vaddq_u32( p1, p2 ); uint16x4x2_t p4 = vuzp_u16( vget_low_u16( vreinterpretq_u16_u32( p3 ) ), vget_high_u16( vreinterpretq_u16_u32( p3 ) ) ); uint8x8x2_t p = vuzp_u8( vreinterpret_u8_u16( p4.val[0] ), vreinterpret_u8_u16( p4.val[0] ) );