mirror of
https://github.com/wolfpld/tracy.git
synced 2024-11-10 02:31:48 +00:00
Combine block data directly in AVX registers.
This commit is contained in:
parent
396c28011e
commit
178dc9eba7
@ -575,15 +575,9 @@ static tracy_force_inline void ProcessRGB_AVX( const uint8_t* src, char*& dst )
|
||||
__m256i mm4 = _mm256_or_si256( mm3, mmb );
|
||||
__m256i mm5 = _mm256_shuffle_epi8( mm4, _mm256_set1_epi32( 0x09080100 ) );
|
||||
|
||||
uint32_t minmax0 = _mm256_cvtsi256_si32( mm5 );
|
||||
uint32_t minmax1 = _mm256_extract_epi32( mm5, 4 );
|
||||
uint32_t vp0 = _mm256_cvtsi256_si32( p );
|
||||
uint32_t vp1 = _mm256_extract_epi32( p, 4 );
|
||||
|
||||
memcpy( dst, &minmax0, 4 );
|
||||
memcpy( dst+4, &vp0, 4 );
|
||||
memcpy( dst+8, &minmax1, 4 );
|
||||
memcpy( dst+12, &vp1, 4 );
|
||||
__m256i d0 = _mm256_unpacklo_epi32( mm5, p );
|
||||
__m256i d1 = _mm256_permute4x64_epi64( d0, _MM_SHUFFLE( 3, 2, 2, 0 ) );
|
||||
_mm_storeu_si128( (__m128i*)dst, _mm256_castsi256_si128( d1 ) );
|
||||
dst += 16;
|
||||
}
|
||||
#endif
|
||||
|
Loading…
Reference in New Issue
Block a user