[Headers][X86] Allow AVX movddup/movsldup/movshdup intrinsics to be used in constexpr (#152340)

Matches SSE3 handling
This commit is contained in:
Simon Pilgrim 2025-08-07 08:17:31 +01:00 committed by GitHub
parent b83f7f195c
commit 6abf4f376e
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
4 changed files with 12 additions and 6 deletions

View File

@ -5303,7 +5303,7 @@ _mm512_mask_store_epi64 (void *__P, __mmask8 __U, __m512i __A)
(__mmask8) __U);
}
static __inline__ __m512d __DEFAULT_FN_ATTRS512
static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR
_mm512_movedup_pd (__m512d __A)
{
return (__m512d)__builtin_shufflevector((__v8df)__A, (__v8df)__A,
@ -8665,7 +8665,7 @@ _mm512_mask_testn_epi64_mask (__mmask8 __U, __m512i __A, __m512i __B)
_mm512_setzero_si512());
}
static __inline__ __m512 __DEFAULT_FN_ATTRS512
static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR
_mm512_movehdup_ps (__m512 __A)
{
return (__m512)__builtin_shufflevector((__v16sf)__A, (__v16sf)__A,
@ -8688,7 +8688,7 @@ _mm512_maskz_movehdup_ps (__mmask16 __U, __m512 __A)
(__v16sf)_mm512_setzero_ps());
}
static __inline__ __m512 __DEFAULT_FN_ATTRS512
static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR
_mm512_moveldup_ps (__m512 __A)
{
return (__m512)__builtin_shufflevector((__v16sf)__A, (__v16sf)__A,

View File

@ -2392,7 +2392,7 @@ _mm256_cvtss_f32(__m256 __a)
/// return value.
/// \returns A 256-bit vector of [8 x float] containing the moved and duplicated
/// values.
static __inline __m256 __DEFAULT_FN_ATTRS
static __inline __m256 __DEFAULT_FN_ATTRS_CONSTEXPR
_mm256_movehdup_ps(__m256 __a)
{
return __builtin_shufflevector((__v8sf)__a, (__v8sf)__a, 1, 1, 3, 3, 5, 5, 7, 7);
@ -2417,7 +2417,7 @@ _mm256_movehdup_ps(__m256 __a)
/// return value.
/// \returns A 256-bit vector of [8 x float] containing the moved and duplicated
/// values.
static __inline __m256 __DEFAULT_FN_ATTRS
static __inline __m256 __DEFAULT_FN_ATTRS_CONSTEXPR
_mm256_moveldup_ps(__m256 __a)
{
return __builtin_shufflevector((__v8sf)__a, (__v8sf)__a, 0, 0, 2, 2, 4, 4, 6, 6);
@ -2439,7 +2439,7 @@ _mm256_moveldup_ps(__m256 __a)
/// the return value.
/// \returns A 256-bit vector of [4 x double] containing the moved and
/// duplicated values.
static __inline __m256d __DEFAULT_FN_ATTRS
static __inline __m256d __DEFAULT_FN_ATTRS_CONSTEXPR
_mm256_movedup_pd(__m256d __a)
{
return __builtin_shufflevector((__v4df)__a, (__v4df)__a, 0, 0, 2, 2);

View File

@ -1262,18 +1262,21 @@ __m256d test_mm256_movedup_pd(__m256d A) {
// CHECK: shufflevector <4 x double> %{{.*}}, <4 x double> %{{.*}}, <4 x i32> <i32 0, i32 0, i32 2, i32 2>
return _mm256_movedup_pd(A);
}
TEST_CONSTEXPR(match_m256d(_mm256_movedup_pd((__m256d){+7.0, -7.0, -42.0, +42.0}), +7.0, +7.0, -42.0, -42.0));
__m256 test_mm256_movehdup_ps(__m256 A) {
// CHECK-LABEL: test_mm256_movehdup_ps
// CHECK: shufflevector <8 x float> %{{.*}}, <8 x float> %{{.*}}, <8 x i32> <i32 1, i32 1, i32 3, i32 3, i32 5, i32 5, i32 7, i32 7>
return _mm256_movehdup_ps(A);
}
TEST_CONSTEXPR(match_m256(_mm256_movehdup_ps((__m256){+1.0f,-1.0f,+2.0f,+4.0f,+8.0f,-8.0f,-3.0f,+3.0f}), -1.0f, -1.0f, +4.0f, +4.0f, -8.0f, -8.0f, +3.0f, +3.0f));
__m256 test_mm256_moveldup_ps(__m256 A) {
// CHECK-LABEL: test_mm256_moveldup_ps
// CHECK: shufflevector <8 x float> %{{.*}}, <8 x float> %{{.*}}, <8 x i32> <i32 0, i32 0, i32 2, i32 2, i32 4, i32 4, i32 6, i32 6>
return _mm256_moveldup_ps(A);
}
TEST_CONSTEXPR(match_m256(_mm256_moveldup_ps((__m256){+1.0f,-1.0f,+2.0f,+4.0f,+8.0f,-8.0f,-3.0f,+3.0f}), +1.0f, +1.0f, +2.0f, +2.0f, +8.0f, +8.0f, -3.0f, -3.0f));
int test_mm256_movemask_pd(__m256d A) {
// CHECK-LABEL: test_mm256_movemask_pd

View File

@ -4395,6 +4395,7 @@ __m512d test_mm512_movedup_pd(__m512d __A) {
// CHECK: shufflevector <8 x double> %{{.*}}, <8 x double> %{{.*}}, <8 x i32> <i32 0, i32 0, i32 2, i32 2, i32 4, i32 4, i32 6, i32 6>
return _mm512_movedup_pd(__A);
}
TEST_CONSTEXPR(match_m512d(_mm512_movedup_pd((__m512d){-1.0, +2.0, +3.0, +4.0, -5.0, -6.0, +7.0, +8.0}), -1.0, -1.0, +3.0, +3.0, -5.0, -5.0, +7.0, +7.0));
__m512d test_mm512_mask_movedup_pd(__m512d __W, __mmask8 __U, __m512d __A) {
// CHECK-LABEL: test_mm512_mask_movedup_pd
@ -8691,6 +8692,7 @@ __m512 test_mm512_movehdup_ps(__m512 __A) {
// CHECK: shufflevector <16 x float> %{{.*}}, <16 x float> %{{.*}}, <16 x i32> <i32 1, i32 1, i32 3, i32 3, i32 5, i32 5, i32 7, i32 7, i32 9, i32 9, i32 11, i32 11, i32 13, i32 13, i32 15, i32 15>
return _mm512_movehdup_ps(__A);
}
TEST_CONSTEXPR(match_m512(_mm512_movehdup_ps((__m512){+1.0f,-1.0f,+2.0f,-2.0f,+3.0f,-3.0f,+4.0f,-4.0f,+5.0f,-5.0f,+6.0f,-6.0f,+7.0f,-7.0f,+8.0f,-8.0f}), -1.0f, -1.0f, -2.0f, -2.0f, -3.0f, -3.0f, -4.0f, -4.0f, -5.0f, -5.0f, -6.0f, -6.0f, -7.0f, -7.0f, -8.0f, -8.0f));
__m512 test_mm512_mask_movehdup_ps(__m512 __W, __mmask16 __U, __m512 __A) {
// CHECK-LABEL: test_mm512_mask_movehdup_ps
@ -8711,6 +8713,7 @@ __m512 test_mm512_moveldup_ps(__m512 __A) {
// CHECK: shufflevector <16 x float> %{{.*}}, <16 x float> %{{.*}}, <16 x i32> <i32 0, i32 0, i32 2, i32 2, i32 4, i32 4, i32 6, i32 6, i32 8, i32 8, i32 10, i32 10, i32 12, i32 12, i32 14, i32 14>
return _mm512_moveldup_ps(__A);
}
TEST_CONSTEXPR(match_m512(_mm512_moveldup_ps((__m512){+1.0f,-1.0f,+2.0f,-2.0f,+3.0f,-3.0f,+4.0f,-4.0f,+5.0f,-5.0f,+6.0f,-6.0f,+7.0f,-7.0f,+8.0f,-8.0f}), +1.0f, +1.0f, +2.0f, +2.0f, +3.0f, +3.0f, +4.0f, +4.0f, +5.0f, +5.0f, +6.0f, +6.0f, +7.0f, +7.0f, +8.0f, +8.0f));
__m512 test_mm512_mask_moveldup_ps(__m512 __W, __mmask16 __U, __m512 __A) {
// CHECK-LABEL: test_mm512_mask_moveldup_ps