[Headers][X86] Update SSE/AVX and/andnot/or/xor intrinsics to be used in constexpr (#152305)

This commit is contained in:
Simon Pilgrim 2025-08-07 08:16:26 +01:00 committed by GitHub
parent f44d8d583c
commit b83f7f195c
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
10 changed files with 95 additions and 40 deletions

View File

@ -31,6 +31,14 @@
__min_vector_width__(128)))
#endif
#if defined(__cplusplus) && (__cplusplus >= 201103L)
#define __DEFAULT_FN_ATTRS256_CONSTEXPR __DEFAULT_FN_ATTRS256 constexpr
#define __DEFAULT_FN_ATTRS128_CONSTEXPR __DEFAULT_FN_ATTRS128 constexpr
#else
#define __DEFAULT_FN_ATTRS256_CONSTEXPR __DEFAULT_FN_ATTRS256
#define __DEFAULT_FN_ATTRS128_CONSTEXPR __DEFAULT_FN_ATTRS128
#endif
/* SSE4 Multiple Packed Sums of Absolute Difference. */
/// Computes sixteen sum of absolute difference (SAD) operations on sets of
/// four unsigned 8-bit integers from the 256-bit integer vectors \a X and
@ -460,7 +468,7 @@ _mm256_adds_epu16(__m256i __a, __m256i __b)
/// \param __b
/// A 256-bit integer vector.
/// \returns A 256-bit integer vector containing the result.
static __inline__ __m256i __DEFAULT_FN_ATTRS256
static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
_mm256_and_si256(__m256i __a, __m256i __b)
{
return (__m256i)((__v4du)__a & (__v4du)__b);
@ -478,7 +486,7 @@ _mm256_and_si256(__m256i __a, __m256i __b)
/// \param __b
/// A 256-bit integer vector.
/// \returns A 256-bit integer vector containing the result.
static __inline__ __m256i __DEFAULT_FN_ATTRS256
static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
_mm256_andnot_si256(__m256i __a, __m256i __b)
{
return (__m256i)(~(__v4du)__a & (__v4du)__b);
@ -1822,7 +1830,7 @@ _mm256_mul_epu32(__m256i __a, __m256i __b)
/// \param __b
/// A 256-bit integer vector.
/// \returns A 256-bit integer vector containing the result.
static __inline__ __m256i __DEFAULT_FN_ATTRS256
static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
_mm256_or_si256(__m256i __a, __m256i __b)
{
return (__m256i)((__v4du)__a | (__v4du)__b);
@ -2974,7 +2982,7 @@ _mm256_unpacklo_epi64(__m256i __a, __m256i __b)
/// \param __b
/// A 256-bit integer vector.
/// \returns A 256-bit integer vector containing the result.
static __inline__ __m256i __DEFAULT_FN_ATTRS256
static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
_mm256_xor_si256(__m256i __a, __m256i __b)
{
return (__m256i)((__v4du)__a ^ (__v4du)__b);
@ -5289,5 +5297,7 @@ _mm_srlv_epi64(__m128i __X, __m128i __Y)
#undef __DEFAULT_FN_ATTRS256
#undef __DEFAULT_FN_ATTRS128
#undef __DEFAULT_FN_ATTRS256_CONSTEXPR
#undef __DEFAULT_FN_ATTRS128_CONSTEXPR
#endif /* __AVX2INTRIN_H */

View File

@ -20,6 +20,14 @@
__attribute__((__always_inline__, __nodebug__, \
__target__("avx512dq,no-evex512")))
#if defined(__cplusplus) && (__cplusplus >= 201103L)
#define __DEFAULT_FN_ATTRS512_CONSTEXPR __DEFAULT_FN_ATTRS512 constexpr
#define __DEFAULT_FN_ATTRS_CONSTEXPR __DEFAULT_FN_ATTRS constexpr
#else
#define __DEFAULT_FN_ATTRS512_CONSTEXPR __DEFAULT_FN_ATTRS512
#define __DEFAULT_FN_ATTRS_CONSTEXPR __DEFAULT_FN_ATTRS
#endif
static __inline __mmask8 __DEFAULT_FN_ATTRS
_knot_mask8(__mmask8 __M)
{
@ -167,7 +175,7 @@ _mm512_maskz_mullo_epi64(__mmask8 __U, __m512i __A, __m512i __B) {
(__v8di)_mm512_setzero_si512());
}
static __inline__ __m512d __DEFAULT_FN_ATTRS512
static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR
_mm512_xor_pd(__m512d __A, __m512d __B) {
return (__m512d)((__v8du)__A ^ (__v8du)__B);
}
@ -186,7 +194,7 @@ _mm512_maskz_xor_pd(__mmask8 __U, __m512d __A, __m512d __B) {
(__v8df)_mm512_setzero_pd());
}
static __inline__ __m512 __DEFAULT_FN_ATTRS512
static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR
_mm512_xor_ps (__m512 __A, __m512 __B) {
return (__m512)((__v16su)__A ^ (__v16su)__B);
}
@ -205,7 +213,7 @@ _mm512_maskz_xor_ps(__mmask16 __U, __m512 __A, __m512 __B) {
(__v16sf)_mm512_setzero_ps());
}
static __inline__ __m512d __DEFAULT_FN_ATTRS512
static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR
_mm512_or_pd(__m512d __A, __m512d __B) {
return (__m512d)((__v8du)__A | (__v8du)__B);
}
@ -224,7 +232,7 @@ _mm512_maskz_or_pd(__mmask8 __U, __m512d __A, __m512d __B) {
(__v8df)_mm512_setzero_pd());
}
static __inline__ __m512 __DEFAULT_FN_ATTRS512
static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR
_mm512_or_ps(__m512 __A, __m512 __B) {
return (__m512)((__v16su)__A | (__v16su)__B);
}
@ -243,7 +251,7 @@ _mm512_maskz_or_ps(__mmask16 __U, __m512 __A, __m512 __B) {
(__v16sf)_mm512_setzero_ps());
}
static __inline__ __m512d __DEFAULT_FN_ATTRS512
static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR
_mm512_and_pd(__m512d __A, __m512d __B) {
return (__m512d)((__v8du)__A & (__v8du)__B);
}
@ -262,7 +270,7 @@ _mm512_maskz_and_pd(__mmask8 __U, __m512d __A, __m512d __B) {
(__v8df)_mm512_setzero_pd());
}
static __inline__ __m512 __DEFAULT_FN_ATTRS512
static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR
_mm512_and_ps(__m512 __A, __m512 __B) {
return (__m512)((__v16su)__A & (__v16su)__B);
}
@ -281,7 +289,7 @@ _mm512_maskz_and_ps(__mmask16 __U, __m512 __A, __m512 __B) {
(__v16sf)_mm512_setzero_ps());
}
static __inline__ __m512d __DEFAULT_FN_ATTRS512
static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR
_mm512_andnot_pd(__m512d __A, __m512d __B) {
return (__m512d)(~(__v8du)__A & (__v8du)__B);
}
@ -300,7 +308,7 @@ _mm512_maskz_andnot_pd(__mmask8 __U, __m512d __A, __m512d __B) {
(__v8df)_mm512_setzero_pd());
}
static __inline__ __m512 __DEFAULT_FN_ATTRS512
static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR
_mm512_andnot_ps(__m512 __A, __m512 __B) {
return (__m512)(~(__v16su)__A & (__v16su)__B);
}
@ -1375,5 +1383,7 @@ _mm512_maskz_broadcast_i64x2(__mmask8 __M, __m128i __A)
#undef __DEFAULT_FN_ATTRS512
#undef __DEFAULT_FN_ATTRS
#undef __DEFAULT_FN_ATTRS512_CONSTEXPR
#undef __DEFAULT_FN_ATTRS_CONSTEXPR
#endif

View File

@ -645,7 +645,7 @@ _mm512_zextsi256_si512(__m256i __a)
}
/* Bitwise operators */
static __inline__ __m512i __DEFAULT_FN_ATTRS512
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
_mm512_and_epi32(__m512i __a, __m512i __b)
{
return (__m512i)((__v16su)__a & (__v16su)__b);
@ -666,7 +666,7 @@ _mm512_maskz_and_epi32(__mmask16 __k, __m512i __a, __m512i __b)
__k, __a, __b);
}
static __inline__ __m512i __DEFAULT_FN_ATTRS512
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
_mm512_and_epi64(__m512i __a, __m512i __b)
{
return (__m512i)((__v8du)__a & (__v8du)__b);
@ -687,13 +687,13 @@ _mm512_maskz_and_epi64(__mmask8 __k, __m512i __a, __m512i __b)
__k, __a, __b);
}
static __inline__ __m512i __DEFAULT_FN_ATTRS512
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
_mm512_andnot_si512 (__m512i __A, __m512i __B)
{
return (__m512i)(~(__v8du)__A & (__v8du)__B);
}
static __inline__ __m512i __DEFAULT_FN_ATTRS512
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
_mm512_andnot_epi32 (__m512i __A, __m512i __B)
{
return (__m512i)(~(__v16su)__A & (__v16su)__B);
@ -714,7 +714,7 @@ _mm512_maskz_andnot_epi32(__mmask16 __U, __m512i __A, __m512i __B)
__U, __A, __B);
}
static __inline__ __m512i __DEFAULT_FN_ATTRS512
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
_mm512_andnot_epi64(__m512i __A, __m512i __B)
{
return (__m512i)(~(__v8du)__A & (__v8du)__B);
@ -735,7 +735,7 @@ _mm512_maskz_andnot_epi64(__mmask8 __U, __m512i __A, __m512i __B)
__U, __A, __B);
}
static __inline__ __m512i __DEFAULT_FN_ATTRS512
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
_mm512_or_epi32(__m512i __a, __m512i __b)
{
return (__m512i)((__v16su)__a | (__v16su)__b);
@ -755,7 +755,7 @@ _mm512_maskz_or_epi32(__mmask16 __k, __m512i __a, __m512i __b)
return (__m512i)_mm512_mask_or_epi32(_mm512_setzero_si512(), __k, __a, __b);
}
static __inline__ __m512i __DEFAULT_FN_ATTRS512
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
_mm512_or_epi64(__m512i __a, __m512i __b)
{
return (__m512i)((__v8du)__a | (__v8du)__b);
@ -775,7 +775,7 @@ _mm512_maskz_or_epi64(__mmask8 __k, __m512i __a, __m512i __b)
return (__m512i)_mm512_mask_or_epi64(_mm512_setzero_si512(), __k, __a, __b);
}
static __inline__ __m512i __DEFAULT_FN_ATTRS512
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
_mm512_xor_epi32(__m512i __a, __m512i __b)
{
return (__m512i)((__v16su)__a ^ (__v16su)__b);
@ -795,7 +795,7 @@ _mm512_maskz_xor_epi32(__mmask16 __k, __m512i __a, __m512i __b)
return (__m512i)_mm512_mask_xor_epi32(_mm512_setzero_si512(), __k, __a, __b);
}
static __inline__ __m512i __DEFAULT_FN_ATTRS512
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
_mm512_xor_epi64(__m512i __a, __m512i __b)
{
return (__m512i)((__v8du)__a ^ (__v8du)__b);
@ -815,19 +815,19 @@ _mm512_maskz_xor_epi64(__mmask8 __k, __m512i __a, __m512i __b)
return (__m512i)_mm512_mask_xor_epi64(_mm512_setzero_si512(), __k, __a, __b);
}
static __inline__ __m512i __DEFAULT_FN_ATTRS512
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
_mm512_and_si512(__m512i __a, __m512i __b)
{
return (__m512i)((__v8du)__a & (__v8du)__b);
}
static __inline__ __m512i __DEFAULT_FN_ATTRS512
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
_mm512_or_si512(__m512i __a, __m512i __b)
{
return (__m512i)((__v8du)__a | (__v8du)__b);
}
static __inline__ __m512i __DEFAULT_FN_ATTRS512
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
_mm512_xor_si512(__m512i __a, __m512i __b)
{
return (__m512i)((__v8du)__a ^ (__v8du)__b);

View File

@ -555,7 +555,7 @@ _mm256_rcp_ps(__m256 __a)
/// A 256-bit vector of [4 x double] containing one of the source operands.
/// \returns A 256-bit vector of [4 x double] containing the bitwise AND of the
/// values between both operands.
static __inline __m256d __DEFAULT_FN_ATTRS
static __inline __m256d __DEFAULT_FN_ATTRS_CONSTEXPR
_mm256_and_pd(__m256d __a, __m256d __b)
{
return (__m256d)((__v4du)__a & (__v4du)__b);
@ -573,7 +573,7 @@ _mm256_and_pd(__m256d __a, __m256d __b)
/// A 256-bit vector of [8 x float] containing one of the source operands.
/// \returns A 256-bit vector of [8 x float] containing the bitwise AND of the
/// values between both operands.
static __inline __m256 __DEFAULT_FN_ATTRS
static __inline __m256 __DEFAULT_FN_ATTRS_CONSTEXPR
_mm256_and_ps(__m256 __a, __m256 __b)
{
return (__m256)((__v8su)__a & (__v8su)__b);
@ -594,7 +594,7 @@ _mm256_and_ps(__m256 __a, __m256 __b)
/// \returns A 256-bit vector of [4 x double] containing the bitwise AND of the
/// values of the second operand and the one's complement of the first
/// operand.
static __inline __m256d __DEFAULT_FN_ATTRS
static __inline __m256d __DEFAULT_FN_ATTRS_CONSTEXPR
_mm256_andnot_pd(__m256d __a, __m256d __b)
{
return (__m256d)(~(__v4du)__a & (__v4du)__b);
@ -615,7 +615,7 @@ _mm256_andnot_pd(__m256d __a, __m256d __b)
/// \returns A 256-bit vector of [8 x float] containing the bitwise AND of the
/// values of the second operand and the one's complement of the first
/// operand.
static __inline __m256 __DEFAULT_FN_ATTRS
static __inline __m256 __DEFAULT_FN_ATTRS_CONSTEXPR
_mm256_andnot_ps(__m256 __a, __m256 __b)
{
return (__m256)(~(__v8su)__a & (__v8su)__b);
@ -633,7 +633,7 @@ _mm256_andnot_ps(__m256 __a, __m256 __b)
/// A 256-bit vector of [4 x double] containing one of the source operands.
/// \returns A 256-bit vector of [4 x double] containing the bitwise OR of the
/// values between both operands.
static __inline __m256d __DEFAULT_FN_ATTRS
static __inline __m256d __DEFAULT_FN_ATTRS_CONSTEXPR
_mm256_or_pd(__m256d __a, __m256d __b)
{
return (__m256d)((__v4du)__a | (__v4du)__b);
@ -651,7 +651,7 @@ _mm256_or_pd(__m256d __a, __m256d __b)
/// A 256-bit vector of [8 x float] containing one of the source operands.
/// \returns A 256-bit vector of [8 x float] containing the bitwise OR of the
/// values between both operands.
static __inline __m256 __DEFAULT_FN_ATTRS
static __inline __m256 __DEFAULT_FN_ATTRS_CONSTEXPR
_mm256_or_ps(__m256 __a, __m256 __b)
{
return (__m256)((__v8su)__a | (__v8su)__b);
@ -669,7 +669,7 @@ _mm256_or_ps(__m256 __a, __m256 __b)
/// A 256-bit vector of [4 x double] containing one of the source operands.
/// \returns A 256-bit vector of [4 x double] containing the bitwise XOR of the
/// values between both operands.
static __inline __m256d __DEFAULT_FN_ATTRS
static __inline __m256d __DEFAULT_FN_ATTRS_CONSTEXPR
_mm256_xor_pd(__m256d __a, __m256d __b)
{
return (__m256d)((__v4du)__a ^ (__v4du)__b);
@ -687,7 +687,7 @@ _mm256_xor_pd(__m256d __a, __m256d __b)
/// A 256-bit vector of [8 x float] containing one of the source operands.
/// \returns A 256-bit vector of [8 x float] containing the bitwise XOR of the
/// values between both operands.
static __inline __m256 __DEFAULT_FN_ATTRS
static __inline __m256 __DEFAULT_FN_ATTRS_CONSTEXPR
_mm256_xor_ps(__m256 __a, __m256 __b)
{
return (__m256)((__v8su)__a ^ (__v8su)__b);

View File

@ -2678,8 +2678,8 @@ static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_subs_epu16(__m128i __a,
/// A 128-bit integer vector containing one of the source operands.
/// \returns A 128-bit integer vector containing the bitwise AND of the values
/// in both operands.
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_and_si128(__m128i __a,
__m128i __b) {
static __inline__ __m128i __DEFAULT_FN_ATTRS_CONSTEXPR
_mm_and_si128(__m128i __a, __m128i __b) {
return (__m128i)((__v2du)__a & (__v2du)__b);
}
@ -2697,8 +2697,8 @@ static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_and_si128(__m128i __a,
/// A 128-bit vector containing the right source operand.
/// \returns A 128-bit integer vector containing the bitwise AND of the one's
/// complement of the first operand and the values in the second operand.
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_andnot_si128(__m128i __a,
__m128i __b) {
static __inline__ __m128i __DEFAULT_FN_ATTRS_CONSTEXPR
_mm_andnot_si128(__m128i __a, __m128i __b) {
return (__m128i)(~(__v2du)__a & (__v2du)__b);
}
/// Performs a bitwise OR of two 128-bit integer vectors.
@ -2713,8 +2713,8 @@ static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_andnot_si128(__m128i __a,
/// A 128-bit integer vector containing one of the source operands.
/// \returns A 128-bit integer vector containing the bitwise OR of the values
/// in both operands.
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_or_si128(__m128i __a,
__m128i __b) {
static __inline__ __m128i __DEFAULT_FN_ATTRS_CONSTEXPR
_mm_or_si128(__m128i __a, __m128i __b) {
return (__m128i)((__v2du)__a | (__v2du)__b);
}
@ -2730,8 +2730,8 @@ static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_or_si128(__m128i __a,
/// A 128-bit integer vector containing one of the source operands.
/// \returns A 128-bit integer vector containing the bitwise exclusive OR of the
/// values in both operands.
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_xor_si128(__m128i __a,
__m128i __b) {
static __inline__ __m128i __DEFAULT_FN_ATTRS_CONSTEXPR
_mm_xor_si128(__m128i __a, __m128i __b) {
return (__m128i)((__v2du)__a ^ (__v2du)__b);
}

View File

@ -44,12 +44,14 @@ __m256d test_mm256_and_pd(__m256d A, __m256d B) {
// CHECK: and <4 x i64>
return _mm256_and_pd(A, B);
}
TEST_CONSTEXPR(match_m256d(_mm256_and_pd((__m256d){-4.0, -5.0, +6.0, +7.0}, (__m256d){+0.0, -0.0, -0.0, +7.0}), -0.0, -0.0, +0.0, +7.0));
__m256 test_mm256_and_ps(__m256 A, __m256 B) {
// CHECK-LABEL: test_mm256_and_ps
// CHECK: and <8 x i32>
return _mm256_and_ps(A, B);
}
TEST_CONSTEXPR(match_m256(_mm256_and_ps((__m256){-4.0f, -5.0f, +6.0f, +7.0f, +7.0f, +6.0f, -5.0f, -4.0f}, (__m256){+0.0f, -0.0f, -0.0f, +7.0f, +7.0f, -0.0f, -0.0f, +0.0f}), -0.0f, -0.0f, +0.0f, +7.0f, +7.0f, +0.0f, -0.0f, -0.0f));
__m256d test_mm256_andnot_pd(__m256d A, __m256d B) {
// CHECK-LABEL: test_mm256_andnot_pd
@ -57,6 +59,7 @@ __m256d test_mm256_andnot_pd(__m256d A, __m256d B) {
// CHECK: and <4 x i64>
return _mm256_andnot_pd(A, B);
}
TEST_CONSTEXPR(match_m256d(_mm256_andnot_pd((__m256d){-4.0, -5.0, +6.0, +7.0}, (__m256d){+0.0, -0.0, -0.0, +7.0}), +0.0, +0.0, +0.0, +0.0));
__m256 test_mm256_andnot_ps(__m256 A, __m256 B) {
// CHECK-LABEL: test_mm256_andnot_ps
@ -64,6 +67,7 @@ __m256 test_mm256_andnot_ps(__m256 A, __m256 B) {
// CHECK: and <8 x i32>
return _mm256_andnot_ps(A, B);
}
TEST_CONSTEXPR(match_m256(_mm256_andnot_ps((__m256){-4.0f, -5.0f, +6.0f, +7.0f, +7.0f, +6.0f, -5.0f, -4.0f}, (__m256){+0.0f, -0.0f, -0.0f, +7.0f, +7.0f, -0.0f, -0.0f, +0.0f}), +0.0f, +0.0f, +0.0f, +0.0f, +0.0f, +0.0f, +0.0f, +0.0f));
__m256d test_mm256_blend_pd(__m256d A, __m256d B) {
// CHECK-LABEL: test_mm256_blend_pd
@ -1300,12 +1304,14 @@ __m256d test_mm256_or_pd(__m256d A, __m256d B) {
// CHECK: or <4 x i64>
return _mm256_or_pd(A, B);
}
TEST_CONSTEXPR(match_m256d(_mm256_or_pd((__m256d){-4.0, -5.0, +6.0, +7.0}, (__m256d){+0.0, -0.0, -0.0, +7.0}), -4.0, -5.0, -6.0, +7.0));
__m256 test_mm256_or_ps(__m256 A, __m256 B) {
// CHECK-LABEL: test_mm256_or_ps
// CHECK: or <8 x i32>
return _mm256_or_ps(A, B);
}
TEST_CONSTEXPR(match_m256(_mm256_or_ps((__m256){-4.0f, -5.0f, +6.0f, +7.0f, +7.0f, +6.0f, -5.0f, -4.0f}, (__m256){+0.0f, -0.0f, -0.0f, +7.0f, +7.0f, -0.0f, -0.0f, +0.0f}), -4.0f, -5.0f, -6.0f, +7.0f, +7.0f, -6.0f, -5.0f, -4.0f));
__m128d test_mm_permute_pd(__m128d A) {
// CHECK-LABEL: test_mm_permute_pd
@ -2077,12 +2083,14 @@ __m256d test_mm256_xor_pd(__m256d A, __m256d B) {
// CHECK: xor <4 x i64>
return _mm256_xor_pd(A, B);
}
TEST_CONSTEXPR(match_m256d(_mm256_xor_pd((__m256d){-4.0, -5.0, +6.0, +7.0}, (__m256d){+0.0, -0.0, -0.0, +7.0}), -4.0, +5.0, -6.0, +0.0));
__m256 test_mm256_xor_ps(__m256 A, __m256 B) {
// CHECK-LABEL: test_mm256_xor_ps
// CHECK: xor <8 x i32>
return _mm256_xor_ps(A, B);
}
TEST_CONSTEXPR(match_m256(_mm256_xor_ps((__m256){-4.0f, -5.0f, +6.0f, +7.0f, +7.0f, +6.0f, -5.0f, -4.0f}, (__m256){+0.0f, -0.0f, -0.0f, +7.0f, +7.0f, -0.0f, -0.0f, +0.0f}), -4.0f, +5.0f, -6.0f, +0.0f, +0.0f, -6.0f, +5.0f, -4.0f));
void test_mm256_zeroall(void) {
// CHECK-LABEL: test_mm256_zeroall

View File

@ -9,6 +9,7 @@
#include <immintrin.h>
#include "builtin_test_helpers.h"
// NOTE: This should match the tests in llvm/test/CodeGen/X86/avx2-intrinsics-fast-isel.ll
@ -97,6 +98,7 @@ __m256i test_mm256_and_si256(__m256i a, __m256i b) {
// CHECK: and <4 x i64>
return _mm256_and_si256(a, b);
}
TEST_CONSTEXPR(match_v4di(_mm256_and_si256((__m256i)(__v4di){0, -1, 0, -1}, (__m256i)(__v4di){0, 0, -1, -1}), 0, 0, 0, -1));
__m256i test_mm256_andnot_si256(__m256i a, __m256i b) {
// CHECK-LABEL: test_mm256_andnot_si256
@ -104,6 +106,7 @@ __m256i test_mm256_andnot_si256(__m256i a, __m256i b) {
// CHECK: and <4 x i64>
return _mm256_andnot_si256(a, b);
}
TEST_CONSTEXPR(match_v4di(_mm256_andnot_si256((__m256i)(__v4di){0, -1, 0, -1}, (__m256i)(__v4di){0, 0, -1, -1}), 0, 0, -1, 0));
__m256i test_mm256_avg_epu8(__m256i a, __m256i b) {
// CHECK-LABEL: test_mm256_avg_epu8
@ -920,6 +923,7 @@ __m256i test_mm256_or_si256(__m256i a, __m256i b) {
// CHECK: or <4 x i64>
return _mm256_or_si256(a, b);
}
TEST_CONSTEXPR(match_v4di(_mm256_or_si256((__m256i)(__v4di){0, -1, 0, -1}, (__m256i)(__v4di){0, 0, -1, -1}), 0, -1, -1, -1));
__m256i test_mm256_packs_epi16(__m256i a, __m256i b) {
// CHECK-LABEL: test_mm256_packs_epi16
@ -1336,3 +1340,4 @@ __m256i test_mm256_xor_si256(__m256i a, __m256i b) {
// CHECK: xor <4 x i64>
return _mm256_xor_si256(a, b);
}
TEST_CONSTEXPR(match_v4di(_mm256_xor_si256((__m256i)(__v4di){0, -1, 0, -1}, (__m256i)(__v4di){0, 0, -1, -1}), 0, -1, -1, 0));

View File

@ -2,6 +2,7 @@
#include <immintrin.h>
#include "builtin_test_helpers.h"
__mmask8 test_knot_mask8(__mmask8 a) {
// CHECK-LABEL: @test_knot_mask8
@ -267,6 +268,7 @@ __m512d test_mm512_mask_xor_pd (__m512d __W, __mmask8 __U, __m512d __A, __m512d
// CHECK: select <8 x i1> %[[MASK]], <8 x double> %{{.*}}, <8 x double> %{{.*}}
return (__m512d) _mm512_mask_xor_pd(__W, __U, __A, __B);
}
TEST_CONSTEXPR(match_m512d(_mm512_xor_pd((__m512d){-4.0, -5.0, +6.0, +7.0, +7.0, +6.0, -5.0, -4.0}, (__m512d){+0.0, -0.0, -0.0, +7.0, +7.0, -0.0, -0.0, +0.0}), -4.0, +5.0, -6.0, +0.0, +0.0, -6.0, +5.0, -4.0));
__m512d test_mm512_maskz_xor_pd (__mmask8 __U, __m512d __A, __m512d __B) {
// CHECK-LABEL: @test_mm512_maskz_xor_pd
@ -281,6 +283,7 @@ __m512 test_mm512_xor_ps (__m512 __A, __m512 __B) {
// CHECK: xor <16 x i32>
return (__m512) _mm512_xor_ps(__A, __B);
}
TEST_CONSTEXPR(match_m512(_mm512_xor_ps((__m256){-4.0f, -5.0f, +6.0f, +7.0f, +7.0f, +6.0f, -5.0f, -4.0f, -4.0f, -5.0f, +6.0f, +7.0f, +7.0f, +6.0f, -5.0f, -4.0f}, (__m256){+0.0f, -0.0f, -0.0f, +7.0f, +7.0f, -0.0f, -0.0f, +0.0f, +0.0f, -0.0f, -0.0f, +7.0f, +7.0f, -0.0f, -0.0f, +0.0f}), -4.0f, +5.0f, -6.0f, +0.0f, +0.0f, -6.0f, +5.0f, -4.0f, -4.0f, +5.0f, -6.0f, +0.0f, +0.0f, -6.0f, +5.0f, -4.0f));
__m512 test_mm512_mask_xor_ps (__m512 __W, __mmask16 __U, __m512 __A, __m512 __B) {
// CHECK-LABEL: @test_mm512_mask_xor_ps
@ -303,6 +306,7 @@ __m512d test_mm512_or_pd (__m512d __A, __m512d __B) {
// CHECK: or <8 x i64>
return (__m512d) _mm512_or_pd(__A, __B);
}
TEST_CONSTEXPR(match_m512d(_mm512_or_pd((__m512d){-4.0, -5.0, +6.0, +7.0, +7.0, +6.0, -5.0, -4.0}, (__m512d){+0.0, -0.0, -0.0, +7.0, +7.0, -0.0, -0.0, +0.0}), -4.0, -5.0, -6.0, +7.0, +7.0, -6.0, -5.0, -4.0));
__m512d test_mm512_mask_or_pd (__m512d __W, __mmask8 __U, __m512d __A, __m512d __B) {
// CHECK-LABEL: @test_mm512_mask_or_pd
@ -325,6 +329,7 @@ __m512 test_mm512_or_ps (__m512 __A, __m512 __B) {
// CHECK: or <16 x i32>
return (__m512) _mm512_or_ps(__A, __B);
}
TEST_CONSTEXPR(match_m512(_mm512_or_ps((__m256){-4.0f, -5.0f, +6.0f, +7.0f, +7.0f, +6.0f, -5.0f, -4.0f, -4.0f, -5.0f, +6.0f, +7.0f, +7.0f, +6.0f, -5.0f, -4.0f}, (__m256){+0.0f, -0.0f, -0.0f, +7.0f, +7.0f, -0.0f, -0.0f, +0.0f, +0.0f, -0.0f, -0.0f, +7.0f, +7.0f, -0.0f, -0.0f, +0.0f}), -4.0f, -5.0f, -6.0f, +7.0f, +7.0f, -6.0f, -5.0f, -4.0f, -4.0f, -5.0f, -6.0f, +7.0f, +7.0f, -6.0f, -5.0f, -4.0f));
__m512 test_mm512_mask_or_ps (__m512 __W, __mmask16 __U, __m512 __A, __m512 __B) {
// CHECK-LABEL: @test_mm512_mask_or_ps
@ -347,6 +352,7 @@ __m512d test_mm512_and_pd (__m512d __A, __m512d __B) {
// CHECK: and <8 x i64>
return (__m512d) _mm512_and_pd(__A, __B);
}
TEST_CONSTEXPR(match_m512d(_mm512_and_pd((__m512d){-4.0, -5.0, +6.0, +7.0, +7.0, +6.0, -5.0, -4.0}, (__m512d){+0.0, -0.0, -0.0, +7.0, +7.0, -0.0, -0.0, +0.0}), -0.0, -0.0, +0.0, +7.0, +7.0, +0.0, -0.0, -0.0));
__m512d test_mm512_mask_and_pd (__m512d __W, __mmask8 __U, __m512d __A, __m512d __B) {
// CHECK-LABEL: @test_mm512_mask_and_pd
@ -369,6 +375,7 @@ __m512 test_mm512_and_ps (__m512 __A, __m512 __B) {
// CHECK: and <16 x i32>
return (__m512) _mm512_and_ps(__A, __B);
}
TEST_CONSTEXPR(match_m512(_mm512_and_ps((__m256){-4.0f, -5.0f, +6.0f, +7.0f, +7.0f, +6.0f, -5.0f, -4.0f, -4.0f, -5.0f, +6.0f, +7.0f, +7.0f, +6.0f, -5.0f, -4.0f}, (__m256){+0.0f, -0.0f, -0.0f, +7.0f, +7.0f, -0.0f, -0.0f, +0.0f, +0.0f, -0.0f, -0.0f, +7.0f, +7.0f, -0.0f, -0.0f, +0.0f}), -0.0f, -0.0f, +0.0f, +7.0f, +7.0f, +0.0f, -0.0f, -0.0f, -0.0f, -0.0f, +0.0f, +7.0f, +7.0f, +0.0f, -0.0f, -0.0f));
__m512 test_mm512_mask_and_ps (__m512 __W, __mmask16 __U, __m512 __A, __m512 __B) {
// CHECK-LABEL: @test_mm512_mask_and_ps
@ -392,6 +399,7 @@ __m512d test_mm512_andnot_pd (__m512d __A, __m512d __B) {
// CHECK: and <8 x i64>
return (__m512d) _mm512_andnot_pd(__A, __B);
}
TEST_CONSTEXPR(match_m512d(_mm512_andnot_pd((__m512d){-4.0, -5.0, +6.0, +7.0, +7.0, +6.0, -5.0, -4.0}, (__m512d){+0.0, -0.0, -0.0, +7.0, +7.0, -0.0, -0.0, +0.0}), +0.0, +0.0, +0.0, +0.0, +0.0, +0.0, +0.0, +0.0));
__m512d test_mm512_mask_andnot_pd (__m512d __W, __mmask8 __U, __m512d __A, __m512d __B) {
// CHECK-LABEL: @test_mm512_mask_andnot_pd
@ -415,6 +423,7 @@ __m512 test_mm512_andnot_ps (__m512 __A, __m512 __B) {
// CHECK: and <16 x i32>
return (__m512) _mm512_andnot_ps(__A, __B);
}
TEST_CONSTEXPR(match_m512(_mm512_andnot_ps((__m256){-4.0f, -5.0f, +6.0f, +7.0f, +7.0f, +6.0f, -5.0f, -4.0f, -4.0f, -5.0f, +6.0f, +7.0f, +7.0f, +6.0f, -5.0f, -4.0f}, (__m256){+0.0f, -0.0f, -0.0f, +7.0f, +7.0f, -0.0f, -0.0f, +0.0f, +0.0f, -0.0f, -0.0f, +7.0f, +7.0f, -0.0f, -0.0f, +0.0f}), +0.0f, +0.0f, +0.0f, +0.0f, +0.0f, +0.0f, +0.0f, +0.0f, +0.0f, +0.0f, +0.0f, +0.0f, +0.0f, +0.0f, +0.0f, +0.0f));
__m512 test_mm512_mask_andnot_ps (__m512 __W, __mmask16 __U, __m512 __A, __m512 __B) {
// CHECK-LABEL: @test_mm512_mask_andnot_ps

View File

@ -2811,36 +2811,42 @@ __m512i test_mm512_and_epi32(__m512i __src,__mmask16 __k, __m512i __a, __m512i _
// CHECK: and <16 x i32>
return _mm512_and_epi32(__a, __b);
}
TEST_CONSTEXPR(match_v16si(_mm512_and_epi32((__m512i)(__v16si){0, -1, 0, -1, 0, 0, -1, -1, 0, -1, 0, -1, 0, 0, -1, -1}, (__m512i)(__v16si){0, 0, -1, -1, 0, -1, 0, -1, 0, 0, -1, -1, 0, -1, 0, -1}), 0, 0, 0, -1, 0, 0, 0, -1, 0, 0, 0, -1, 0, 0, 0, -1));
__m512i test_mm512_and_epi64(__m512i __src,__mmask8 __k, __m512i __a, __m512i __b) {
// CHECK-LABEL: test_mm512_and_epi64
// CHECK: and <8 x i64>
return _mm512_and_epi64(__a, __b);
}
TEST_CONSTEXPR(match_v8di(_mm512_and_epi64((__m512i)(__v8di){0, -1, 0, -1, 0, 0, -1, -1}, (__m512i)(__v8di){0, 0, -1, -1, 0, -1, 0, -1}), 0, 0, 0, -1, 0, 0, 0, -1));
__m512i test_mm512_or_epi32(__m512i __src,__mmask16 __k, __m512i __a, __m512i __b) {
// CHECK-LABEL: test_mm512_or_epi32
// CHECK: or <16 x i32>
return _mm512_or_epi32(__a, __b);
}
TEST_CONSTEXPR(match_v16si(_mm512_or_epi32((__m512i)(__v16si){0, -1, 0, -1, 0, 0, -1, -1, 0, -1, 0, -1, 0, 0, -1, -1}, (__m512i)(__v16si){0, 0, -1, -1, 0, -1, 0, -1, 0, 0, -1, -1, 0, -1, 0, -1}), 0, -1, -1, -1, 0, -1, -1, -1, 0, -1, -1, -1, 0, -1, -1, -1));
__m512i test_mm512_or_epi64(__m512i __src,__mmask8 __k, __m512i __a, __m512i __b) {
// CHECK-LABEL: test_mm512_or_epi64
// CHECK: or <8 x i64>
return _mm512_or_epi64(__a, __b);
}
TEST_CONSTEXPR(match_v8di(_mm512_or_epi64((__m512i)(__v8di){0, -1, 0, -1, 0, 0, -1, -1}, (__m512i)(__v8di){0, 0, -1, -1, 0, -1, 0, -1}), 0, -1, -1, -1, 0, -1, -1, -1));
__m512i test_mm512_xor_epi32(__m512i __src,__mmask16 __k, __m512i __a, __m512i __b) {
// CHECK-LABEL: test_mm512_xor_epi32
// CHECK: xor <16 x i32>
return _mm512_xor_epi32(__a, __b);
}
TEST_CONSTEXPR(match_v16si(_mm512_xor_epi32((__m512i)(__v16si){0, -1, 0, -1, 0, 0, -1, -1, 0, -1, 0, -1, 0, 0, -1, -1}, (__m512i)(__v16si){0, 0, -1, -1, 0, -1, 0, -1, 0, 0, -1, -1, 0, -1, 0, -1}), 0, -1, -1, 0, 0, -1, -1, 0, 0, -1, -1, 0, 0, -1, -1, 0));
__m512i test_mm512_xor_epi64(__m512i __src,__mmask8 __k, __m512i __a, __m512i __b) {
// CHECK-LABEL: test_mm512_xor_epi64
// CHECK: xor <8 x i64>
return _mm512_xor_epi64(__a, __b);
}
TEST_CONSTEXPR(match_v8di(_mm512_xor_epi64((__m512i)(__v8di){0, -1, 0, -1, 0, 0, -1, -1}, (__m512i)(__v8di){0, 0, -1, -1, 0, -1, 0, -1}), 0, -1, -1, 0, 0, -1, -1, 0));
__m512i test_mm512_maskz_andnot_epi32 (__mmask16 __k,__m512i __A, __m512i __B){
// CHECK-LABEL: test_mm512_maskz_andnot_epi32
@ -2869,6 +2875,7 @@ __m512i test_mm512_andnot_si512(__m512i __A, __m512i __B)
return _mm512_andnot_si512(__A, __B);
}
TEST_CONSTEXPR(match_v8di(_mm512_andnot_si512((__m512i)(__v8di){0, -1, 0, -1, 0, 0, -1, -1}, (__m512i)(__v8di){0, 0, -1, -1, 0, -1, 0, -1}), 0, 0, -1, 0, 0, -1, 0, 0));
__m512i test_mm512_andnot_epi32(__m512i __A, __m512i __B) {
// CHECK-LABEL: test_mm512_andnot_epi32
@ -2876,6 +2883,7 @@ __m512i test_mm512_andnot_epi32(__m512i __A, __m512i __B) {
// CHECK: and <16 x i32> %{{.*}}, %{{.*}}
return _mm512_andnot_epi32(__A,__B);
}
TEST_CONSTEXPR(match_v16si(_mm512_andnot_epi32((__m512i)(__v16si){0, -1, 0, -1, 0, 0, -1, -1, 0, -1, 0, -1, 0, 0, -1, -1}, (__m512i)(__v16si){0, 0, -1, -1, 0, -1, 0, -1, 0, 0, -1, -1, 0, -1, 0, -1}), 0, 0, -1, 0, 0, -1, 0, 0, 0, 0, -1, 0, 0, -1, 0, 0));
__m512i test_mm512_maskz_andnot_epi64 (__mmask8 __k,__m512i __A, __m512i __B) {
// CHECK-LABEL: test_mm512_maskz_andnot_epi64
@ -2900,6 +2908,7 @@ __m512i test_mm512_andnot_epi64(__m512i __A, __m512i __B) {
// CHECK: and <8 x i64> %{{.*}}, %{{.*}}
return _mm512_andnot_epi64(__A,__B);
}
TEST_CONSTEXPR(match_v8di(_mm512_andnot_epi64((__m512i)(__v8di){0, -1, 0, -1, 0, 0, -1, -1}, (__m512i)(__v8di){0, 0, -1, -1, 0, -1, 0, -1}), 0, 0, -1, 0, 0, -1, 0, 0));
__m512i test_mm512_maskz_sub_epi32 (__mmask16 __k,__m512i __A, __m512i __B) {
//CHECK-LABEL: test_mm512_maskz_sub_epi32

View File

@ -96,6 +96,7 @@ __m128i test_mm_and_si128(__m128i A, __m128i B) {
// CHECK: and <2 x i64>
return _mm_and_si128(A, B);
}
TEST_CONSTEXPR(match_v4si(_mm_and_si128((__m128i)(__v4si){0, -1, 0, -1}, (__m128i)(__v4si){0, 0, -1, -1}), 0, 0, 0, -1));
__m128d test_mm_andnot_pd(__m128d A, __m128d B) {
// CHECK-LABEL: test_mm_andnot_pd
@ -111,6 +112,7 @@ __m128i test_mm_andnot_si128(__m128i A, __m128i B) {
// CHECK: and <2 x i64>
return _mm_andnot_si128(A, B);
}
TEST_CONSTEXPR(match_v4si(_mm_andnot_si128((__m128i)(__v4si){0, -1, 0, -1}, (__m128i)(__v4si){0, 0, -1, -1}), 0, 0, -1, 0));
__m128i test_mm_avg_epu8(__m128i A, __m128i B) {
// CHECK-LABEL: test_mm_avg_epu8
@ -963,6 +965,7 @@ __m128i test_mm_or_si128(__m128i A, __m128i B) {
// CHECK: or <2 x i64> %{{.*}}, %{{.*}}
return _mm_or_si128(A, B);
}
TEST_CONSTEXPR(match_v4si(_mm_or_si128((__m128i)(__v4si){0, -1, 0, -1}, (__m128i)(__v4si){0, 0, -1, -1}), 0, -1, -1, -1));
__m128i test_mm_packs_epi16(__m128i A, __m128i B) {
// CHECK-LABEL: test_mm_packs_epi16
@ -1831,3 +1834,4 @@ __m128i test_mm_xor_si128(__m128i A, __m128i B) {
// CHECK: xor <2 x i64> %{{.*}}, %{{.*}}
return _mm_xor_si128(A, B);
}
TEST_CONSTEXPR(match_v4si(_mm_xor_si128((__m128i)(__v4si){0, -1, 0, -1}, (__m128i)(__v4si){0, 0, -1, -1}), 0, -1, -1, 0));