[X86] Finally handle target of __builtin_ia32_cmp[p|s][s|d] from avx into sse/sse2/avx (#84136)

This patch relands #67410 and fixes the cmpfail below:
#include <immintrin.h>
__attribute__((target("avx"))) void test(__m128 a, __m128 b) {
  _mm_cmp_ps(a, b, 14);
}

According to Intel SDM, SSE/SSE2 instructions cmp[p|s][s|d] are
supported when imm8 is in range of [0, 7]
This commit is contained in:
Freddy Ye 2024-03-09 13:49:15 +08:00 committed by GitHub
parent c58c8278f9
commit fc0fc768cc
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
11 changed files with 316 additions and 121 deletions

View File

@ -226,6 +226,8 @@ TARGET_BUILTIN(__builtin_ia32_minps, "V4fV4fV4f", "ncV:128:", "sse")
TARGET_BUILTIN(__builtin_ia32_maxps, "V4fV4fV4f", "ncV:128:", "sse")
TARGET_BUILTIN(__builtin_ia32_minss, "V4fV4fV4f", "ncV:128:", "sse")
TARGET_BUILTIN(__builtin_ia32_maxss, "V4fV4fV4f", "ncV:128:", "sse")
TARGET_BUILTIN(__builtin_ia32_cmpps, "V4fV4fV4fIc", "ncV:128:", "sse")
TARGET_BUILTIN(__builtin_ia32_cmpss, "V4fV4fV4fIc", "ncV:128:", "sse")
TARGET_BUILTIN(__builtin_ia32_cmpeqpd, "V2dV2dV2d", "ncV:128:", "sse2")
TARGET_BUILTIN(__builtin_ia32_cmpltpd, "V2dV2dV2d", "ncV:128:", "sse2")
@ -243,6 +245,8 @@ TARGET_BUILTIN(__builtin_ia32_cmpneqsd, "V2dV2dV2d", "ncV:128:", "sse2")
TARGET_BUILTIN(__builtin_ia32_cmpnltsd, "V2dV2dV2d", "ncV:128:", "sse2")
TARGET_BUILTIN(__builtin_ia32_cmpnlesd, "V2dV2dV2d", "ncV:128:", "sse2")
TARGET_BUILTIN(__builtin_ia32_cmpordsd, "V2dV2dV2d", "ncV:128:", "sse2")
TARGET_BUILTIN(__builtin_ia32_cmpsd, "V2dV2dV2dIc", "ncV:128:", "sse2")
TARGET_BUILTIN(__builtin_ia32_cmppd, "V2dV2dV2dIc", "ncV:128:", "sse2")
TARGET_BUILTIN(__builtin_ia32_minpd, "V2dV2dV2d", "ncV:128:", "sse2")
TARGET_BUILTIN(__builtin_ia32_maxpd, "V2dV2dV2d", "ncV:128:", "sse2")
TARGET_BUILTIN(__builtin_ia32_minsd, "V2dV2dV2d", "ncV:128:", "sse2")
@ -462,12 +466,8 @@ TARGET_BUILTIN(__builtin_ia32_blendvps256, "V8fV8fV8fV8f", "ncV:256:", "avx")
TARGET_BUILTIN(__builtin_ia32_shufpd256, "V4dV4dV4dIi", "ncV:256:", "avx")
TARGET_BUILTIN(__builtin_ia32_shufps256, "V8fV8fV8fIi", "ncV:256:", "avx")
TARGET_BUILTIN(__builtin_ia32_dpps256, "V8fV8fV8fIc", "ncV:256:", "avx")
TARGET_BUILTIN(__builtin_ia32_cmppd, "V2dV2dV2dIc", "ncV:128:", "avx")
TARGET_BUILTIN(__builtin_ia32_cmppd256, "V4dV4dV4dIc", "ncV:256:", "avx")
TARGET_BUILTIN(__builtin_ia32_cmpps, "V4fV4fV4fIc", "ncV:128:", "avx")
TARGET_BUILTIN(__builtin_ia32_cmpps256, "V8fV8fV8fIc", "ncV:256:", "avx")
TARGET_BUILTIN(__builtin_ia32_cmpsd, "V2dV2dV2dIc", "ncV:128:", "avx")
TARGET_BUILTIN(__builtin_ia32_cmpss, "V4fV4fV4fIc", "ncV:128:", "avx")
TARGET_BUILTIN(__builtin_ia32_vextractf128_pd256, "V2dV4dIi", "ncV:256:", "avx")
TARGET_BUILTIN(__builtin_ia32_vextractf128_ps256, "V4fV8fIi", "ncV:256:", "avx")
TARGET_BUILTIN(__builtin_ia32_vextractf128_si256, "V4iV8iIi", "ncV:256:", "avx")

View File

@ -31,6 +31,7 @@
#include "clang/AST/StmtObjC.h"
#include "clang/Basic/Builtins.h"
#include "clang/Basic/CodeGenOptions.h"
#include "clang/Basic/TargetBuiltins.h"
#include "clang/Basic/TargetInfo.h"
#include "clang/CodeGen/CGFunctionInfo.h"
#include "clang/Frontend/FrontendDiagnostic.h"
@ -2613,6 +2614,24 @@ void CGBuilderInserter::InsertHelper(
// called function.
void CodeGenFunction::checkTargetFeatures(const CallExpr *E,
const FunctionDecl *TargetDecl) {
// SemaChecking cannot handle below x86 builtins because they have different
// parameter ranges with different TargetAttribute of caller.
if (CGM.getContext().getTargetInfo().getTriple().isX86()) {
unsigned BuiltinID = TargetDecl->getBuiltinID();
if (BuiltinID == X86::BI__builtin_ia32_cmpps ||
BuiltinID == X86::BI__builtin_ia32_cmpss ||
BuiltinID == X86::BI__builtin_ia32_cmppd ||
BuiltinID == X86::BI__builtin_ia32_cmpsd) {
const FunctionDecl *FD = dyn_cast_or_null<FunctionDecl>(CurCodeDecl);
llvm::StringMap<bool> TargetFetureMap;
CGM.getContext().getFunctionFeatureMap(TargetFetureMap, FD);
llvm::APSInt Result =
*(E->getArg(2)->getIntegerConstantExpr(CGM.getContext()));
if (Result.getSExtValue() > 7 && !TargetFetureMap.lookup("avx"))
CGM.getDiags().Report(E->getBeginLoc(), diag::err_builtin_needs_feature)
<< TargetDecl->getDeclName() << "avx";
}
}
return checkTargetFeatures(E->getBeginLoc(), TargetDecl);
}

View File

@ -1574,14 +1574,6 @@ _mm256_blendv_ps(__m256 __a, __m256 __b, __m256 __c)
(__v4df)(__m256d)(b), (int)(mask)))
/* Compare */
#define _CMP_EQ_OQ 0x00 /* Equal (ordered, non-signaling) */
#define _CMP_LT_OS 0x01 /* Less-than (ordered, signaling) */
#define _CMP_LE_OS 0x02 /* Less-than-or-equal (ordered, signaling) */
#define _CMP_UNORD_Q 0x03 /* Unordered (non-signaling) */
#define _CMP_NEQ_UQ 0x04 /* Not-equal (unordered, non-signaling) */
#define _CMP_NLT_US 0x05 /* Not-less-than (unordered, signaling) */
#define _CMP_NLE_US 0x06 /* Not-less-than-or-equal (unordered, signaling) */
#define _CMP_ORD_Q 0x07 /* Ordered (non-signaling) */
#define _CMP_EQ_UQ 0x08 /* Equal (unordered, non-signaling) */
#define _CMP_NGE_US 0x09 /* Not-greater-than-or-equal (unordered, signaling) */
#define _CMP_NGT_US 0x0a /* Not-greater-than (unordered, signaling) */
@ -1607,6 +1599,7 @@ _mm256_blendv_ps(__m256 __a, __m256 __b, __m256 __c)
#define _CMP_GT_OQ 0x1e /* Greater-than (ordered, non-signaling) */
#define _CMP_TRUE_US 0x1f /* True (unordered, signaling) */
/* Below intrinsic defined in emmintrin.h can be used for AVX */
/// Compares each of the corresponding double-precision values of two
/// 128-bit vectors of [2 x double], using the operation specified by the
/// immediate integer operand.
@ -1663,10 +1656,9 @@ _mm256_blendv_ps(__m256 __a, __m256 __b, __m256 __c)
/// 0x1E: Greater-than (ordered, non-signaling) \n
/// 0x1F: True (unordered, signaling)
/// \returns A 128-bit vector of [2 x double] containing the comparison results.
#define _mm_cmp_pd(a, b, c) \
((__m128d)__builtin_ia32_cmppd((__v2df)(__m128d)(a), \
(__v2df)(__m128d)(b), (c)))
/// \fn __m128d _mm_cmp_pd(__m128d a, __m128d b, const int c)
/* Below intrinsic defined in xmmintrin.h can be used for AVX */
/// Compares each of the corresponding values of two 128-bit vectors of
/// [4 x float], using the operation specified by the immediate integer
/// operand.
@ -1723,9 +1715,7 @@ _mm256_blendv_ps(__m256 __a, __m256 __b, __m256 __c)
/// 0x1E: Greater-than (ordered, non-signaling) \n
/// 0x1F: True (unordered, signaling)
/// \returns A 128-bit vector of [4 x float] containing the comparison results.
#define _mm_cmp_ps(a, b, c) \
((__m128)__builtin_ia32_cmpps((__v4sf)(__m128)(a), \
(__v4sf)(__m128)(b), (c)))
/// \fn __m128 _mm_cmp_ps(__m128 a, __m128 b, const int c)
/// Compares each of the corresponding double-precision values of two
/// 256-bit vectors of [4 x double], using the operation specified by the
@ -1847,6 +1837,7 @@ _mm256_blendv_ps(__m256 __a, __m256 __b, __m256 __c)
((__m256)__builtin_ia32_cmpps256((__v8sf)(__m256)(a), \
(__v8sf)(__m256)(b), (c)))
/* Below intrinsic defined in emmintrin.h can be used for AVX */
/// Compares each of the corresponding scalar double-precision values of
/// two 128-bit vectors of [2 x double], using the operation specified by the
/// immediate integer operand.
@ -1902,10 +1893,9 @@ _mm256_blendv_ps(__m256 __a, __m256 __b, __m256 __c)
/// 0x1E: Greater-than (ordered, non-signaling) \n
/// 0x1F: True (unordered, signaling)
/// \returns A 128-bit vector of [2 x double] containing the comparison results.
#define _mm_cmp_sd(a, b, c) \
((__m128d)__builtin_ia32_cmpsd((__v2df)(__m128d)(a), \
(__v2df)(__m128d)(b), (c)))
/// \fn __m128d _mm_cmp_sd(__m128d a, __m128d b, const int c)
/* Below intrinsic defined in xmmintrin.h can be used for AVX */
/// Compares each of the corresponding scalar values of two 128-bit
/// vectors of [4 x float], using the operation specified by the immediate
/// integer operand.
@ -1961,9 +1951,7 @@ _mm256_blendv_ps(__m256 __a, __m256 __b, __m256 __c)
/// 0x1E: Greater-than (ordered, non-signaling) \n
/// 0x1F: True (unordered, signaling)
/// \returns A 128-bit vector of [4 x float] containing the comparison results.
#define _mm_cmp_ss(a, b, c) \
((__m128)__builtin_ia32_cmpss((__v4sf)(__m128)(a), \
(__v4sf)(__m128)(b), (c)))
/// \fn __m128 _mm_cmp_ss(__m128 a, __m128 b, const int c)
/// Takes a [8 x i32] vector and returns the vector element value
/// indexed by the immediate constant operand.

View File

@ -4745,6 +4745,74 @@ static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_castsi128_pd(__m128i __a) {
return (__m128d)__a;
}
/// Compares each of the corresponding double-precision values of two
/// 128-bit vectors of [2 x double], using the operation specified by the
/// immediate integer operand.
///
/// Each comparison yields 0x0 for false, 0xFFFFFFFFFFFFFFFF for true.
///
/// \headerfile <x86intrin.h>
///
/// \code
/// __m128d _mm_cmp_pd(__m128d a, __m128d b, const int c);
/// \endcode
///
/// This intrinsic corresponds to the <c> (V)CMPPD </c> instruction.
///
/// \param a
/// A 128-bit vector of [2 x double].
/// \param b
/// A 128-bit vector of [2 x double].
/// \param c
/// An immediate integer operand, with bits [4:0] specifying which comparison
/// operation to use: \n
/// 0x00: Equal (ordered, non-signaling) \n
/// 0x01: Less-than (ordered, signaling) \n
/// 0x02: Less-than-or-equal (ordered, signaling) \n
/// 0x03: Unordered (non-signaling) \n
/// 0x04: Not-equal (unordered, non-signaling) \n
/// 0x05: Not-less-than (unordered, signaling) \n
/// 0x06: Not-less-than-or-equal (unordered, signaling) \n
/// 0x07: Ordered (non-signaling) \n
/// \returns A 128-bit vector of [2 x double] containing the comparison results.
#define _mm_cmp_pd(a, b, c) \
((__m128d)__builtin_ia32_cmppd((__v2df)(__m128d)(a), (__v2df)(__m128d)(b), \
(c)))
/// Compares each of the corresponding scalar double-precision values of
/// two 128-bit vectors of [2 x double], using the operation specified by the
/// immediate integer operand.
///
/// Each comparison yields 0x0 for false, 0xFFFFFFFFFFFFFFFF for true.
///
/// \headerfile <x86intrin.h>
///
/// \code
/// __m128d _mm_cmp_sd(__m128d a, __m128d b, const int c);
/// \endcode
///
/// This intrinsic corresponds to the <c> (V)CMPSD </c> instruction.
///
/// \param a
/// A 128-bit vector of [2 x double].
/// \param b
/// A 128-bit vector of [2 x double].
/// \param c
/// An immediate integer operand, with bits [4:0] specifying which comparison
/// operation to use: \n
/// 0x00: Equal (ordered, non-signaling) \n
/// 0x01: Less-than (ordered, signaling) \n
/// 0x02: Less-than-or-equal (ordered, signaling) \n
/// 0x03: Unordered (non-signaling) \n
/// 0x04: Not-equal (unordered, non-signaling) \n
/// 0x05: Not-less-than (unordered, signaling) \n
/// 0x06: Not-less-than-or-equal (unordered, signaling) \n
/// 0x07: Ordered (non-signaling) \n
/// \returns A 128-bit vector of [2 x double] containing the comparison results.
#define _mm_cmp_sd(a, b, c) \
((__m128d)__builtin_ia32_cmpsd((__v2df)(__m128d)(a), (__v2df)(__m128d)(b), \
(c)))
#if defined(__cplusplus)
extern "C" {
#endif

View File

@ -2940,6 +2940,81 @@ _mm_movemask_ps(__m128 __a)
return __builtin_ia32_movmskps((__v4sf)__a);
}
/* Compare */
#define _CMP_EQ_OQ 0x00 /* Equal (ordered, non-signaling) */
#define _CMP_LT_OS 0x01 /* Less-than (ordered, signaling) */
#define _CMP_LE_OS 0x02 /* Less-than-or-equal (ordered, signaling) */
#define _CMP_UNORD_Q 0x03 /* Unordered (non-signaling) */
#define _CMP_NEQ_UQ 0x04 /* Not-equal (unordered, non-signaling) */
#define _CMP_NLT_US 0x05 /* Not-less-than (unordered, signaling) */
#define _CMP_NLE_US 0x06 /* Not-less-than-or-equal (unordered, signaling) */
#define _CMP_ORD_Q 0x07 /* Ordered (non-signaling) */
/// Compares each of the corresponding values of two 128-bit vectors of
/// [4 x float], using the operation specified by the immediate integer
/// operand.
///
/// Each comparison yields 0x0 for false, 0xFFFFFFFF for true.
///
/// \headerfile <x86intrin.h>
///
/// \code
/// __m128 _mm_cmp_ps(__m128 a, __m128 b, const int c);
/// \endcode
///
/// This intrinsic corresponds to the <c> (V)CMPPS </c> instruction.
///
/// \param a
/// A 128-bit vector of [4 x float].
/// \param b
/// A 128-bit vector of [4 x float].
/// \param c
/// An immediate integer operand, with bits [4:0] specifying which comparison
/// operation to use: \n
/// 0x00: Equal (ordered, non-signaling) \n
/// 0x01: Less-than (ordered, signaling) \n
/// 0x02: Less-than-or-equal (ordered, signaling) \n
/// 0x03: Unordered (non-signaling) \n
/// 0x04: Not-equal (unordered, non-signaling) \n
/// 0x05: Not-less-than (unordered, signaling) \n
/// 0x06: Not-less-than-or-equal (unordered, signaling) \n
/// 0x07: Ordered (non-signaling) \n
/// \returns A 128-bit vector of [4 x float] containing the comparison results.
#define _mm_cmp_ps(a, b, c) \
((__m128)__builtin_ia32_cmpps((__v4sf)(__m128)(a), (__v4sf)(__m128)(b), (c)))
/// Compares each of the corresponding scalar values of two 128-bit
/// vectors of [4 x float], using the operation specified by the immediate
/// integer operand.
///
/// Each comparison yields 0x0 for false, 0xFFFFFFFF for true.
///
/// \headerfile <x86intrin.h>
///
/// \code
/// __m128 _mm_cmp_ss(__m128 a, __m128 b, const int c);
/// \endcode
///
/// This intrinsic corresponds to the <c> (V)CMPSS </c> instruction.
///
/// \param a
/// A 128-bit vector of [4 x float].
/// \param b
/// A 128-bit vector of [4 x float].
/// \param c
/// An immediate integer operand, with bits [4:0] specifying which comparison
/// operation to use: \n
/// 0x00: Equal (ordered, non-signaling) \n
/// 0x01: Less-than (ordered, signaling) \n
/// 0x02: Less-than-or-equal (ordered, signaling) \n
/// 0x03: Unordered (non-signaling) \n
/// 0x04: Not-equal (unordered, non-signaling) \n
/// 0x05: Not-less-than (unordered, signaling) \n
/// 0x06: Not-less-than-or-equal (unordered, signaling) \n
/// 0x07: Ordered (non-signaling) \n
/// \returns A 128-bit vector of [4 x float] containing the comparison results.
#define _mm_cmp_ss(a, b, c) \
((__m128)__builtin_ia32_cmpss((__v4sf)(__m128)(a), (__v4sf)(__m128)(b), (c)))
#define _MM_ALIGN16 __attribute__((aligned(16)))

View File

@ -0,0 +1,11 @@
// RUN: %clang_cc1 %s -ffreestanding -triple=x86_64-unknown-unknown-emit-llvm -o /dev/null -verify
// RUN: %clang_cc1 %s -ffreestanding -triple=i386-unknown-unknown-emit-llvm -o /dev/null -verify
// expected-no-diagnostics
#include <immintrin.h>
__attribute__((target("avx")))
__m128 test(__m128 a, __m128 b) {
return _mm_cmp_ps(a, b, 14);
}

View File

@ -596,54 +596,6 @@ __m256 test_mm256_cmp_ps_true_us(__m256 a, __m256 b) {
return _mm256_cmp_ps(a, b, _CMP_TRUE_US);
}
__m128d test_mm_cmp_pd_eq_oq(__m128d a, __m128d b) {
// CHECK-LABEL: test_mm_cmp_pd_eq_oq
// CHECK: fcmp oeq <2 x double> %{{.*}}, %{{.*}}
return _mm_cmp_pd(a, b, _CMP_EQ_OQ);
}
__m128d test_mm_cmp_pd_lt_os(__m128d a, __m128d b) {
// CHECK-LABEL: test_mm_cmp_pd_lt_os
// CHECK: fcmp olt <2 x double> %{{.*}}, %{{.*}}
return _mm_cmp_pd(a, b, _CMP_LT_OS);
}
__m128d test_mm_cmp_pd_le_os(__m128d a, __m128d b) {
// CHECK-LABEL: test_mm_cmp_pd_le_os
// CHECK: fcmp ole <2 x double> %{{.*}}, %{{.*}}
return _mm_cmp_pd(a, b, _CMP_LE_OS);
}
__m128d test_mm_cmp_pd_unord_q(__m128d a, __m128d b) {
// CHECK-LABEL: test_mm_cmp_pd_unord_q
// CHECK: fcmp uno <2 x double> %{{.*}}, %{{.*}}
return _mm_cmp_pd(a, b, _CMP_UNORD_Q);
}
__m128d test_mm_cmp_pd_neq_uq(__m128d a, __m128d b) {
// CHECK-LABEL: test_mm_cmp_pd_neq_uq
// CHECK: fcmp une <2 x double> %{{.*}}, %{{.*}}
return _mm_cmp_pd(a, b, _CMP_NEQ_UQ);
}
__m128d test_mm_cmp_pd_nlt_us(__m128d a, __m128d b) {
// CHECK-LABEL: test_mm_cmp_pd_nlt_us
// CHECK: fcmp uge <2 x double> %{{.*}}, %{{.*}}
return _mm_cmp_pd(a, b, _CMP_NLT_US);
}
__m128d test_mm_cmp_pd_nle_us(__m128d a, __m128d b) {
// CHECK-LABEL: test_mm_cmp_pd_nle_us
// CHECK: fcmp ugt <2 x double> %{{.*}}, %{{.*}}
return _mm_cmp_pd(a, b, _CMP_NLE_US);
}
__m128d test_mm_cmp_pd_ord_q(__m128d a, __m128d b) {
// CHECK-LABEL: test_mm_cmp_pd_ord_q
// CHECK: fcmp ord <2 x double> %{{.*}}, %{{.*}}
return _mm_cmp_pd(a, b, _CMP_ORD_Q);
}
__m128d test_mm_cmp_pd_eq_uq(__m128d a, __m128d b) {
// CHECK-LABEL: test_mm_cmp_pd_eq_uq
// CHECK: fcmp ueq <2 x double> %{{.*}}, %{{.*}}
@ -788,54 +740,6 @@ __m128d test_mm_cmp_pd_true_us(__m128d a, __m128d b) {
return _mm_cmp_pd(a, b, _CMP_TRUE_US);
}
__m128 test_mm_cmp_ps_eq_oq(__m128 a, __m128 b) {
// CHECK-LABEL: test_mm_cmp_ps_eq_oq
// CHECK: fcmp oeq <4 x float> %{{.*}}, %{{.*}}
return _mm_cmp_ps(a, b, _CMP_EQ_OQ);
}
__m128 test_mm_cmp_ps_lt_os(__m128 a, __m128 b) {
// CHECK-LABEL: test_mm_cmp_ps_lt_os
// CHECK: fcmp olt <4 x float> %{{.*}}, %{{.*}}
return _mm_cmp_ps(a, b, _CMP_LT_OS);
}
__m128 test_mm_cmp_ps_le_os(__m128 a, __m128 b) {
// CHECK-LABEL: test_mm_cmp_ps_le_os
// CHECK: fcmp ole <4 x float> %{{.*}}, %{{.*}}
return _mm_cmp_ps(a, b, _CMP_LE_OS);
}
__m128 test_mm_cmp_ps_unord_q(__m128 a, __m128 b) {
// CHECK-LABEL: test_mm_cmp_ps_unord_q
// CHECK: fcmp uno <4 x float> %{{.*}}, %{{.*}}
return _mm_cmp_ps(a, b, _CMP_UNORD_Q);
}
__m128 test_mm_cmp_ps_neq_uq(__m128 a, __m128 b) {
// CHECK-LABEL: test_mm_cmp_ps_neq_uq
// CHECK: fcmp une <4 x float> %{{.*}}, %{{.*}}
return _mm_cmp_ps(a, b, _CMP_NEQ_UQ);
}
__m128 test_mm_cmp_ps_nlt_us(__m128 a, __m128 b) {
// CHECK-LABEL: test_mm_cmp_ps_nlt_us
// CHECK: fcmp uge <4 x float> %{{.*}}, %{{.*}}
return _mm_cmp_ps(a, b, _CMP_NLT_US);
}
__m128 test_mm_cmp_ps_nle_us(__m128 a, __m128 b) {
// CHECK-LABEL: test_mm_cmp_ps_nle_us
// CHECK: fcmp ugt <4 x float> %{{.*}}, %{{.*}}
return _mm_cmp_ps(a, b, _CMP_NLE_US);
}
__m128 test_mm_cmp_ps_ord_q(__m128 a, __m128 b) {
// CHECK-LABEL: test_mm_cmp_ps_ord_q
// CHECK: fcmp ord <4 x float> %{{.*}}, %{{.*}}
return _mm_cmp_ps(a, b, _CMP_ORD_Q);
}
__m128 test_mm_cmp_ps_eq_uq(__m128 a, __m128 b) {
// CHECK-LABEL: test_mm_cmp_ps_eq_uq
// CHECK: fcmp ueq <4 x float> %{{.*}}, %{{.*}}

View File

@ -0,0 +1,22 @@
// RUN: %clang_cc1 %s -ffreestanding -triple=x86_64-unknown-unknown \
// RUN: -target-feature +avx -emit-llvm -fsyntax-only -verify
// RUN: %clang_cc1 %s -ffreestanding -triple=i386-unknown-unknown \
// RUN: -target-feature +avx -emit-llvm -fsyntax-only -verify
#include <immintrin.h>
__m128d test_mm_cmp_pd(__m128d a, __m128d b) {
return _mm_cmp_pd(a, b, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}}
}
__m128d test_mm_cmp_sd(__m128d a, __m128d b) {
return _mm_cmp_sd(a, b, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}}
}
__m128 test_mm_cmp_ps(__m128 a, __m128 b) {
return _mm_cmp_ps(a, b, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}}
}
__m128 test_mm_cmp_ss(__m128 a, __m128 b) {
return _mm_cmp_ss(a, b, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}}
}

View File

@ -34,6 +34,60 @@ __m128 test_mm_andnot_ps(__m128 A, __m128 B) {
return _mm_andnot_ps(A, B);
}
__m128 test_mm_cmp_ps_eq_oq(__m128 a, __m128 b) {
// CHECK-LABEL: test_mm_cmp_ps_eq_oq
// CHECK: fcmp oeq <4 x float> %{{.*}}, %{{.*}}
return _mm_cmp_ps(a, b, _CMP_EQ_OQ);
}
__m128 test_mm_cmp_ps_lt_os(__m128 a, __m128 b) {
// CHECK-LABEL: test_mm_cmp_ps_lt_os
// CHECK: fcmp olt <4 x float> %{{.*}}, %{{.*}}
return _mm_cmp_ps(a, b, _CMP_LT_OS);
}
__m128 test_mm_cmp_ps_le_os(__m128 a, __m128 b) {
// CHECK-LABEL: test_mm_cmp_ps_le_os
// CHECK: fcmp ole <4 x float> %{{.*}}, %{{.*}}
return _mm_cmp_ps(a, b, _CMP_LE_OS);
}
__m128 test_mm_cmp_ps_unord_q(__m128 a, __m128 b) {
// CHECK-LABEL: test_mm_cmp_ps_unord_q
// CHECK: fcmp uno <4 x float> %{{.*}}, %{{.*}}
return _mm_cmp_ps(a, b, _CMP_UNORD_Q);
}
__m128 test_mm_cmp_ps_neq_uq(__m128 a, __m128 b) {
// CHECK-LABEL: test_mm_cmp_ps_neq_uq
// CHECK: fcmp une <4 x float> %{{.*}}, %{{.*}}
return _mm_cmp_ps(a, b, _CMP_NEQ_UQ);
}
__m128 test_mm_cmp_ps_nlt_us(__m128 a, __m128 b) {
// CHECK-LABEL: test_mm_cmp_ps_nlt_us
// CHECK: fcmp uge <4 x float> %{{.*}}, %{{.*}}
return _mm_cmp_ps(a, b, _CMP_NLT_US);
}
__m128 test_mm_cmp_ps_nle_us(__m128 a, __m128 b) {
// CHECK-LABEL: test_mm_cmp_ps_nle_us
// CHECK: fcmp ugt <4 x float> %{{.*}}, %{{.*}}
return _mm_cmp_ps(a, b, _CMP_NLE_US);
}
__m128 test_mm_cmp_ps_ord_q(__m128 a, __m128 b) {
// CHECK-LABEL: test_mm_cmp_ps_ord_q
// CHECK: fcmp ord <4 x float> %{{.*}}, %{{.*}}
return _mm_cmp_ps(a, b, _CMP_ORD_Q);
}
__m128 test_mm_cmp_ss(__m128 A, __m128 B) {
// CHECK-LABEL: test_mm_cmp_ss
// CHECK: call <4 x float> @llvm.x86.sse.cmp.ss(<4 x float> %{{.*}}, <4 x float> %{{.*}}, i8 7)
return _mm_cmp_ss(A, B, _CMP_ORD_Q);
}
__m128 test_mm_cmpeq_ps(__m128 __a, __m128 __b) {
// CHECK-LABEL: test_mm_cmpeq_ps
// CHECK: [[CMP:%.*]] = fcmp oeq <4 x float>

View File

@ -160,6 +160,60 @@ void test_mm_clflush(void* A) {
_mm_clflush(A);
}
__m128d test_mm_cmp_pd_eq_oq(__m128d a, __m128d b) {
// CHECK-LABEL: test_mm_cmp_pd_eq_oq
// CHECK: fcmp oeq <2 x double> %{{.*}}, %{{.*}}
return _mm_cmp_pd(a, b, _CMP_EQ_OQ);
}
__m128d test_mm_cmp_pd_lt_os(__m128d a, __m128d b) {
// CHECK-LABEL: test_mm_cmp_pd_lt_os
// CHECK: fcmp olt <2 x double> %{{.*}}, %{{.*}}
return _mm_cmp_pd(a, b, _CMP_LT_OS);
}
__m128d test_mm_cmp_pd_le_os(__m128d a, __m128d b) {
// CHECK-LABEL: test_mm_cmp_pd_le_os
// CHECK: fcmp ole <2 x double> %{{.*}}, %{{.*}}
return _mm_cmp_pd(a, b, _CMP_LE_OS);
}
__m128d test_mm_cmp_pd_unord_q(__m128d a, __m128d b) {
// CHECK-LABEL: test_mm_cmp_pd_unord_q
// CHECK: fcmp uno <2 x double> %{{.*}}, %{{.*}}
return _mm_cmp_pd(a, b, _CMP_UNORD_Q);
}
__m128d test_mm_cmp_pd_neq_uq(__m128d a, __m128d b) {
// CHECK-LABEL: test_mm_cmp_pd_neq_uq
// CHECK: fcmp une <2 x double> %{{.*}}, %{{.*}}
return _mm_cmp_pd(a, b, _CMP_NEQ_UQ);
}
__m128d test_mm_cmp_pd_nlt_us(__m128d a, __m128d b) {
// CHECK-LABEL: test_mm_cmp_pd_nlt_us
// CHECK: fcmp uge <2 x double> %{{.*}}, %{{.*}}
return _mm_cmp_pd(a, b, _CMP_NLT_US);
}
__m128d test_mm_cmp_pd_nle_us(__m128d a, __m128d b) {
// CHECK-LABEL: test_mm_cmp_pd_nle_us
// CHECK: fcmp ugt <2 x double> %{{.*}}, %{{.*}}
return _mm_cmp_pd(a, b, _CMP_NLE_US);
}
__m128d test_mm_cmp_pd_ord_q(__m128d a, __m128d b) {
// CHECK-LABEL: test_mm_cmp_pd_ord_q
// CHECK: fcmp ord <2 x double> %{{.*}}, %{{.*}}
return _mm_cmp_pd(a, b, _CMP_ORD_Q);
}
__m128d test_mm_cmp_sd(__m128d A, __m128d B) {
// CHECK-LABEL: test_mm_cmp_sd
// CHECK: call <2 x double> @llvm.x86.sse2.cmp.sd(<2 x double> %{{.*}}, <2 x double> %{{.*}}, i8 7)
return _mm_cmp_sd(A, B, _CMP_ORD_Q);
}
__m128i test_mm_cmpeq_epi8(__m128i A, __m128i B) {
// CHECK-LABEL: test_mm_cmpeq_epi8
// CHECK: icmp eq <16 x i8>

View File

@ -15,7 +15,7 @@ int baz(__m256i a) {
#if NEED_AVX_2
__m128 need_avx(__m128 a, __m128 b) {
return _mm_cmp_ps(a, b, 0); // expected-error {{'__builtin_ia32_cmpps' needs target feature avx}}
return _mm_cmp_ps(a, b, 8); // expected-error {{'__builtin_ia32_cmpps' needs target feature avx}}
}
#endif