[X86] Convert fmin/fmax _mm_reduce_* intrinsics to emit llvm.reduction intrinsics (PR47506)
This is a follow up of D92940. We have successfully converted fadd/fmul _mm_reduce_* intrinsics to llvm.reduction + reassoc flag. We can do the same approach for fmin/fmax too, i.e. llvm.reduction + nnan flag. Reviewed By: spatel Differential Revision: https://reviews.llvm.org/D93179
This commit is contained in:
parent
aef781b47a
commit
61da20575d
@ -1878,6 +1878,10 @@ TARGET_BUILTIN(__builtin_ia32_reduce_and_d512, "iV16i", "ncV:512:", "avx512f")
|
|||||||
TARGET_BUILTIN(__builtin_ia32_reduce_and_q512, "OiV8Oi", "ncV:512:", "avx512f")
|
TARGET_BUILTIN(__builtin_ia32_reduce_and_q512, "OiV8Oi", "ncV:512:", "avx512f")
|
||||||
TARGET_BUILTIN(__builtin_ia32_reduce_fadd_pd512, "ddV8d", "ncV:512:", "avx512f")
|
TARGET_BUILTIN(__builtin_ia32_reduce_fadd_pd512, "ddV8d", "ncV:512:", "avx512f")
|
||||||
TARGET_BUILTIN(__builtin_ia32_reduce_fadd_ps512, "ffV16f", "ncV:512:", "avx512f")
|
TARGET_BUILTIN(__builtin_ia32_reduce_fadd_ps512, "ffV16f", "ncV:512:", "avx512f")
|
||||||
|
TARGET_BUILTIN(__builtin_ia32_reduce_fmax_pd512, "dV8d", "ncV:512:", "avx512f")
|
||||||
|
TARGET_BUILTIN(__builtin_ia32_reduce_fmax_ps512, "fV16f", "ncV:512:", "avx512f")
|
||||||
|
TARGET_BUILTIN(__builtin_ia32_reduce_fmin_pd512, "dV8d", "ncV:512:", "avx512f")
|
||||||
|
TARGET_BUILTIN(__builtin_ia32_reduce_fmin_ps512, "fV16f", "ncV:512:", "avx512f")
|
||||||
TARGET_BUILTIN(__builtin_ia32_reduce_fmul_pd512, "ddV8d", "ncV:512:", "avx512f")
|
TARGET_BUILTIN(__builtin_ia32_reduce_fmul_pd512, "ddV8d", "ncV:512:", "avx512f")
|
||||||
TARGET_BUILTIN(__builtin_ia32_reduce_fmul_ps512, "ffV16f", "ncV:512:", "avx512f")
|
TARGET_BUILTIN(__builtin_ia32_reduce_fmul_ps512, "ffV16f", "ncV:512:", "avx512f")
|
||||||
TARGET_BUILTIN(__builtin_ia32_reduce_mul_d512, "iV16i", "ncV:512:", "avx512f")
|
TARGET_BUILTIN(__builtin_ia32_reduce_mul_d512, "iV16i", "ncV:512:", "avx512f")
|
||||||
|
@ -13851,16 +13851,30 @@ Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID,
|
|||||||
case X86::BI__builtin_ia32_reduce_fadd_ps512: {
|
case X86::BI__builtin_ia32_reduce_fadd_ps512: {
|
||||||
Function *F =
|
Function *F =
|
||||||
CGM.getIntrinsic(Intrinsic::vector_reduce_fadd, Ops[1]->getType());
|
CGM.getIntrinsic(Intrinsic::vector_reduce_fadd, Ops[1]->getType());
|
||||||
Builder.getFastMathFlags().setAllowReassoc(true);
|
Builder.getFastMathFlags().setAllowReassoc();
|
||||||
return Builder.CreateCall(F, {Ops[0], Ops[1]});
|
return Builder.CreateCall(F, {Ops[0], Ops[1]});
|
||||||
}
|
}
|
||||||
case X86::BI__builtin_ia32_reduce_fmul_pd512:
|
case X86::BI__builtin_ia32_reduce_fmul_pd512:
|
||||||
case X86::BI__builtin_ia32_reduce_fmul_ps512: {
|
case X86::BI__builtin_ia32_reduce_fmul_ps512: {
|
||||||
Function *F =
|
Function *F =
|
||||||
CGM.getIntrinsic(Intrinsic::vector_reduce_fmul, Ops[1]->getType());
|
CGM.getIntrinsic(Intrinsic::vector_reduce_fmul, Ops[1]->getType());
|
||||||
Builder.getFastMathFlags().setAllowReassoc(true);
|
Builder.getFastMathFlags().setAllowReassoc();
|
||||||
return Builder.CreateCall(F, {Ops[0], Ops[1]});
|
return Builder.CreateCall(F, {Ops[0], Ops[1]});
|
||||||
}
|
}
|
||||||
|
case X86::BI__builtin_ia32_reduce_fmax_pd512:
|
||||||
|
case X86::BI__builtin_ia32_reduce_fmax_ps512: {
|
||||||
|
Function *F =
|
||||||
|
CGM.getIntrinsic(Intrinsic::vector_reduce_fmax, Ops[0]->getType());
|
||||||
|
Builder.getFastMathFlags().setNoNaNs();
|
||||||
|
return Builder.CreateCall(F, {Ops[0]});
|
||||||
|
}
|
||||||
|
case X86::BI__builtin_ia32_reduce_fmin_pd512:
|
||||||
|
case X86::BI__builtin_ia32_reduce_fmin_ps512: {
|
||||||
|
Function *F =
|
||||||
|
CGM.getIntrinsic(Intrinsic::vector_reduce_fmin, Ops[0]->getType());
|
||||||
|
Builder.getFastMathFlags().setNoNaNs();
|
||||||
|
return Builder.CreateCall(F, {Ops[0]});
|
||||||
|
}
|
||||||
case X86::BI__builtin_ia32_reduce_mul_d512:
|
case X86::BI__builtin_ia32_reduce_mul_d512:
|
||||||
case X86::BI__builtin_ia32_reduce_mul_q512: {
|
case X86::BI__builtin_ia32_reduce_mul_q512: {
|
||||||
Function *F =
|
Function *F =
|
||||||
|
@ -9300,8 +9300,11 @@ _mm512_mask_abs_pd(__m512d __W, __mmask8 __K, __m512d __A)
|
|||||||
* computations. In vector-reduction arithmetic, the evaluation order is
|
* computations. In vector-reduction arithmetic, the evaluation order is
|
||||||
* independent of the order of the input elements of V.
|
* independent of the order of the input elements of V.
|
||||||
|
|
||||||
* For floating point types, we always assume the elements are reassociable even
|
* For floating-point intrinsics:
|
||||||
* if -fast-math is off.
|
* 1. When using fadd/fmul intrinsics, the order of operations within the
|
||||||
|
* vector is unspecified (associative math).
|
||||||
|
* 2. When using fmin/fmax intrinsics, NaN or -0.0 elements within the vector
|
||||||
|
* produce unspecified results.
|
||||||
|
|
||||||
* Used bisection method. At each step, we partition the vector with previous
|
* Used bisection method. At each step, we partition the vector with previous
|
||||||
* step in half, and the operation is performed on its two halves.
|
* step in half, and the operation is performed on its two halves.
|
||||||
@ -9524,75 +9527,49 @@ _mm512_mask_reduce_min_epu32(__mmask16 __M, __m512i __V) {
|
|||||||
return __builtin_ia32_reduce_umin_d512((__v16si)__V);
|
return __builtin_ia32_reduce_umin_d512((__v16si)__V);
|
||||||
}
|
}
|
||||||
|
|
||||||
#define _mm512_mask_reduce_operator(op) \
|
|
||||||
__m256d __t1 = _mm512_extractf64x4_pd(__V, 0); \
|
|
||||||
__m256d __t2 = _mm512_extractf64x4_pd(__V, 1); \
|
|
||||||
__m256d __t3 = _mm256_##op(__t1, __t2); \
|
|
||||||
__m128d __t4 = _mm256_extractf128_pd(__t3, 0); \
|
|
||||||
__m128d __t5 = _mm256_extractf128_pd(__t3, 1); \
|
|
||||||
__m128d __t6 = _mm_##op(__t4, __t5); \
|
|
||||||
__m128d __t7 = __builtin_shufflevector(__t6, __t6, 1, 0); \
|
|
||||||
__m128d __t8 = _mm_##op(__t6, __t7); \
|
|
||||||
return __t8[0]
|
|
||||||
|
|
||||||
static __inline__ double __DEFAULT_FN_ATTRS512
|
static __inline__ double __DEFAULT_FN_ATTRS512
|
||||||
_mm512_reduce_max_pd(__m512d __V) {
|
_mm512_reduce_max_pd(__m512d __V) {
|
||||||
_mm512_mask_reduce_operator(max_pd);
|
return __builtin_ia32_reduce_fmax_pd512(__V);
|
||||||
}
|
}
|
||||||
|
|
||||||
static __inline__ double __DEFAULT_FN_ATTRS512
|
static __inline__ double __DEFAULT_FN_ATTRS512
|
||||||
_mm512_reduce_min_pd(__m512d __V) {
|
_mm512_reduce_min_pd(__m512d __V) {
|
||||||
_mm512_mask_reduce_operator(min_pd);
|
return __builtin_ia32_reduce_fmin_pd512(__V);
|
||||||
}
|
}
|
||||||
|
|
||||||
static __inline__ double __DEFAULT_FN_ATTRS512
|
static __inline__ double __DEFAULT_FN_ATTRS512
|
||||||
_mm512_mask_reduce_max_pd(__mmask8 __M, __m512d __V) {
|
_mm512_mask_reduce_max_pd(__mmask8 __M, __m512d __V) {
|
||||||
__V = _mm512_mask_mov_pd(_mm512_set1_pd(-__builtin_inf()), __M, __V);
|
__V = _mm512_mask_mov_pd(_mm512_set1_pd(-__builtin_inf()), __M, __V);
|
||||||
_mm512_mask_reduce_operator(max_pd);
|
return __builtin_ia32_reduce_fmax_pd512(__V);
|
||||||
}
|
}
|
||||||
|
|
||||||
static __inline__ double __DEFAULT_FN_ATTRS512
|
static __inline__ double __DEFAULT_FN_ATTRS512
|
||||||
_mm512_mask_reduce_min_pd(__mmask8 __M, __m512d __V) {
|
_mm512_mask_reduce_min_pd(__mmask8 __M, __m512d __V) {
|
||||||
__V = _mm512_mask_mov_pd(_mm512_set1_pd(__builtin_inf()), __M, __V);
|
__V = _mm512_mask_mov_pd(_mm512_set1_pd(__builtin_inf()), __M, __V);
|
||||||
_mm512_mask_reduce_operator(min_pd);
|
return __builtin_ia32_reduce_fmin_pd512(__V);
|
||||||
}
|
}
|
||||||
#undef _mm512_mask_reduce_operator
|
|
||||||
|
|
||||||
#define _mm512_mask_reduce_operator(op) \
|
|
||||||
__m256 __t1 = (__m256)_mm512_extractf64x4_pd((__m512d)__V, 0); \
|
|
||||||
__m256 __t2 = (__m256)_mm512_extractf64x4_pd((__m512d)__V, 1); \
|
|
||||||
__m256 __t3 = _mm256_##op(__t1, __t2); \
|
|
||||||
__m128 __t4 = _mm256_extractf128_ps(__t3, 0); \
|
|
||||||
__m128 __t5 = _mm256_extractf128_ps(__t3, 1); \
|
|
||||||
__m128 __t6 = _mm_##op(__t4, __t5); \
|
|
||||||
__m128 __t7 = __builtin_shufflevector(__t6, __t6, 2, 3, 0, 1); \
|
|
||||||
__m128 __t8 = _mm_##op(__t6, __t7); \
|
|
||||||
__m128 __t9 = __builtin_shufflevector(__t8, __t8, 1, 0, 3, 2); \
|
|
||||||
__m128 __t10 = _mm_##op(__t8, __t9); \
|
|
||||||
return __t10[0]
|
|
||||||
|
|
||||||
static __inline__ float __DEFAULT_FN_ATTRS512
|
static __inline__ float __DEFAULT_FN_ATTRS512
|
||||||
_mm512_reduce_max_ps(__m512 __V) {
|
_mm512_reduce_max_ps(__m512 __V) {
|
||||||
_mm512_mask_reduce_operator(max_ps);
|
return __builtin_ia32_reduce_fmax_ps512(__V);
|
||||||
}
|
}
|
||||||
|
|
||||||
static __inline__ float __DEFAULT_FN_ATTRS512
|
static __inline__ float __DEFAULT_FN_ATTRS512
|
||||||
_mm512_reduce_min_ps(__m512 __V) {
|
_mm512_reduce_min_ps(__m512 __V) {
|
||||||
_mm512_mask_reduce_operator(min_ps);
|
return __builtin_ia32_reduce_fmin_ps512(__V);
|
||||||
}
|
}
|
||||||
|
|
||||||
static __inline__ float __DEFAULT_FN_ATTRS512
|
static __inline__ float __DEFAULT_FN_ATTRS512
|
||||||
_mm512_mask_reduce_max_ps(__mmask16 __M, __m512 __V) {
|
_mm512_mask_reduce_max_ps(__mmask16 __M, __m512 __V) {
|
||||||
__V = _mm512_mask_mov_ps(_mm512_set1_ps(-__builtin_inff()), __M, __V);
|
__V = _mm512_mask_mov_ps(_mm512_set1_ps(-__builtin_inff()), __M, __V);
|
||||||
_mm512_mask_reduce_operator(max_ps);
|
return __builtin_ia32_reduce_fmax_ps512(__V);
|
||||||
}
|
}
|
||||||
|
|
||||||
static __inline__ float __DEFAULT_FN_ATTRS512
|
static __inline__ float __DEFAULT_FN_ATTRS512
|
||||||
_mm512_mask_reduce_min_ps(__mmask16 __M, __m512 __V) {
|
_mm512_mask_reduce_min_ps(__mmask16 __M, __m512 __V) {
|
||||||
__V = _mm512_mask_mov_ps(_mm512_set1_ps(__builtin_inff()), __M, __V);
|
__V = _mm512_mask_mov_ps(_mm512_set1_ps(__builtin_inff()), __M, __V);
|
||||||
_mm512_mask_reduce_operator(min_ps);
|
return __builtin_ia32_reduce_fmin_ps512(__V);
|
||||||
}
|
}
|
||||||
#undef _mm512_mask_reduce_operator
|
|
||||||
|
|
||||||
/// Moves the least significant 32 bits of a vector of [16 x i32] to a
|
/// Moves the least significant 32 bits of a vector of [16 x i32] to a
|
||||||
/// 32-bit signed integer value.
|
/// 32-bit signed integer value.
|
||||||
|
@ -14,18 +14,12 @@ unsigned long long test_mm512_reduce_max_epu64(__m512i __W){
|
|||||||
return _mm512_reduce_max_epu64(__W);
|
return _mm512_reduce_max_epu64(__W);
|
||||||
}
|
}
|
||||||
|
|
||||||
double test_mm512_reduce_max_pd(__m512d __W){
|
double test_mm512_reduce_max_pd(__m512d __W, double ExtraAddOp){
|
||||||
// CHECK-LABEL: @test_mm512_reduce_max_pd(
|
// CHECK-LABEL: @test_mm512_reduce_max_pd(
|
||||||
// CHECK: shufflevector <8 x double> %{{.*}}, <8 x double> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
|
// CHECK-NOT: nnan
|
||||||
// CHECK: shufflevector <8 x double> %{{.*}}, <8 x double> poison, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
|
// CHECK: call nnan double @llvm.vector.reduce.fmax.v8f64(<8 x double> %{{.*}})
|
||||||
// CHECK: call <4 x double> @llvm.x86.avx.max.pd.256(<4 x double> %{{.*}}, <4 x double> %{{.*}})
|
// CHECK-NOT: nnan
|
||||||
// CHECK: shufflevector <4 x double> %{{.*}}, <4 x double> poison, <2 x i32> <i32 0, i32 1>
|
return _mm512_reduce_max_pd(__W) + ExtraAddOp;
|
||||||
// CHECK: shufflevector <4 x double> %{{.*}}, <4 x double> poison, <2 x i32> <i32 2, i32 3>
|
|
||||||
// CHECK: call <2 x double> @llvm.x86.sse2.max.pd(<2 x double> %{{.*}}, <2 x double> %{{.*}})
|
|
||||||
// CHECK: shufflevector <2 x double> %{{.*}}, <2 x double> %{{.*}}, <2 x i32> <i32 1, i32 0>
|
|
||||||
// CHECK: call <2 x double> @llvm.x86.sse2.max.pd(<2 x double> %{{.*}}, <2 x double> %{{.*}})
|
|
||||||
// CHECK: extractelement <2 x double> %{{.*}}, i32 0
|
|
||||||
return _mm512_reduce_max_pd(__W);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
long long test_mm512_reduce_min_epi64(__m512i __W){
|
long long test_mm512_reduce_min_epi64(__m512i __W){
|
||||||
@ -40,18 +34,12 @@ unsigned long long test_mm512_reduce_min_epu64(__m512i __W){
|
|||||||
return _mm512_reduce_min_epu64(__W);
|
return _mm512_reduce_min_epu64(__W);
|
||||||
}
|
}
|
||||||
|
|
||||||
double test_mm512_reduce_min_pd(__m512d __W){
|
double test_mm512_reduce_min_pd(__m512d __W, double ExtraMulOp){
|
||||||
// CHECK-LABEL: @test_mm512_reduce_min_pd(
|
// CHECK-LABEL: @test_mm512_reduce_min_pd(
|
||||||
// CHECK: shufflevector <8 x double> %{{.*}}, <8 x double> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
|
// CHECK-NOT: nnan
|
||||||
// CHECK: shufflevector <8 x double> %{{.*}}, <8 x double> poison, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
|
// CHECK: call nnan double @llvm.vector.reduce.fmin.v8f64(<8 x double> %{{.*}})
|
||||||
// CHECK: call <4 x double> @llvm.x86.avx.min.pd.256(<4 x double> %{{.*}}, <4 x double> %{{.*}})
|
// CHECK-NOT: nnan
|
||||||
// CHECK: shufflevector <4 x double> %{{.*}}, <4 x double> poison, <2 x i32> <i32 0, i32 1>
|
return _mm512_reduce_min_pd(__W) * ExtraMulOp;
|
||||||
// CHECK: shufflevector <4 x double> %{{.*}}, <4 x double> poison, <2 x i32> <i32 2, i32 3>
|
|
||||||
// CHECK: call <2 x double> @llvm.x86.sse2.min.pd(<2 x double> %{{.*}}, <2 x double> %{{.*}})
|
|
||||||
// CHECK: shufflevector <2 x double> %{{.*}}, <2 x double> %{{.*}}, <2 x i32> <i32 1, i32 0>
|
|
||||||
// CHECK: call <2 x double> @llvm.x86.sse2.min.pd(<2 x double> %{{.*}}, <2 x double> %{{.*}})
|
|
||||||
// CHECK: extractelement <2 x double> %{{.*}}, i32 0
|
|
||||||
return _mm512_reduce_min_pd(__W);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
long long test_mm512_mask_reduce_max_epi64(__mmask8 __M, __m512i __W){
|
long long test_mm512_mask_reduce_max_epi64(__mmask8 __M, __m512i __W){
|
||||||
@ -59,7 +47,7 @@ long long test_mm512_mask_reduce_max_epi64(__mmask8 __M, __m512i __W){
|
|||||||
// CHECK: bitcast i8 %{{.*}} to <8 x i1>
|
// CHECK: bitcast i8 %{{.*}} to <8 x i1>
|
||||||
// CHECK: select <8 x i1> %{{.*}}, <8 x i64> %{{.*}}, <8 x i64> %{{.*}}
|
// CHECK: select <8 x i1> %{{.*}}, <8 x i64> %{{.*}}, <8 x i64> %{{.*}}
|
||||||
// CHECK: call i64 @llvm.vector.reduce.smax.v8i64(<8 x i64> %{{.*}})
|
// CHECK: call i64 @llvm.vector.reduce.smax.v8i64(<8 x i64> %{{.*}})
|
||||||
return _mm512_mask_reduce_max_epi64(__M, __W);
|
return _mm512_mask_reduce_max_epi64(__M, __W);
|
||||||
}
|
}
|
||||||
|
|
||||||
unsigned long test_mm512_mask_reduce_max_epu64(__mmask8 __M, __m512i __W){
|
unsigned long test_mm512_mask_reduce_max_epu64(__mmask8 __M, __m512i __W){
|
||||||
@ -67,23 +55,15 @@ unsigned long test_mm512_mask_reduce_max_epu64(__mmask8 __M, __m512i __W){
|
|||||||
// CHECK: bitcast i8 %{{.*}} to <8 x i1>
|
// CHECK: bitcast i8 %{{.*}} to <8 x i1>
|
||||||
// CHECK: select <8 x i1> %{{.*}}, <8 x i64> %{{.*}}, <8 x i64> %{{.*}}
|
// CHECK: select <8 x i1> %{{.*}}, <8 x i64> %{{.*}}, <8 x i64> %{{.*}}
|
||||||
// CHECK: call i64 @llvm.vector.reduce.umax.v8i64(<8 x i64> %{{.*}})
|
// CHECK: call i64 @llvm.vector.reduce.umax.v8i64(<8 x i64> %{{.*}})
|
||||||
return _mm512_mask_reduce_max_epu64(__M, __W);
|
return _mm512_mask_reduce_max_epu64(__M, __W);
|
||||||
}
|
}
|
||||||
|
|
||||||
double test_mm512_mask_reduce_max_pd(__mmask8 __M, __m512d __W){
|
double test_mm512_mask_reduce_max_pd(__mmask8 __M, __m512d __W){
|
||||||
// CHECK-LABEL: @test_mm512_mask_reduce_max_pd(
|
// CHECK-LABEL: @test_mm512_mask_reduce_max_pd(
|
||||||
// CHECK: bitcast i8 %{{.*}} to <8 x i1>
|
// CHECK: bitcast i8 %{{.*}} to <8 x i1>
|
||||||
// CHECK: select <8 x i1> %{{.*}}, <8 x double> %{{.*}}, <8 x double> %{{.*}}
|
// CHECK: select <8 x i1> %{{.*}}, <8 x double> %{{.*}}, <8 x double> %{{.*}}
|
||||||
// CHECK: shufflevector <8 x double> %{{.*}}, <8 x double> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
|
// CHECK: call nnan double @llvm.vector.reduce.fmax.v8f64(<8 x double> %{{.*}})
|
||||||
// CHECK: shufflevector <8 x double> %{{.*}}, <8 x double> poison, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
|
return _mm512_mask_reduce_max_pd(__M, __W);
|
||||||
// CHECK: call <4 x double> @llvm.x86.avx.max.pd.256(<4 x double> %{{.*}}, <4 x double> %{{.*}})
|
|
||||||
// CHECK: shufflevector <4 x double> %{{.*}}, <4 x double> poison, <2 x i32> <i32 0, i32 1>
|
|
||||||
// CHECK: shufflevector <4 x double> %{{.*}}, <4 x double> poison, <2 x i32> <i32 2, i32 3>
|
|
||||||
// CHECK: call <2 x double> @llvm.x86.sse2.max.pd(<2 x double> %{{.*}}, <2 x double> %{{.*}})
|
|
||||||
// CHECK: shufflevector <2 x double> %{{.*}}, <2 x double> %{{.*}}, <2 x i32> <i32 1, i32 0>
|
|
||||||
// CHECK: call <2 x double> @llvm.x86.sse2.max.pd(<2 x double> %{{.*}}, <2 x double> %{{.*}})
|
|
||||||
// CHECK: extractelement <2 x double> %{{.*}}, i32 0
|
|
||||||
return _mm512_mask_reduce_max_pd(__M, __W);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
long long test_mm512_mask_reduce_min_epi64(__mmask8 __M, __m512i __W){
|
long long test_mm512_mask_reduce_min_epi64(__mmask8 __M, __m512i __W){
|
||||||
@ -91,7 +71,7 @@ long long test_mm512_mask_reduce_min_epi64(__mmask8 __M, __m512i __W){
|
|||||||
// CHECK: bitcast i8 %{{.*}} to <8 x i1>
|
// CHECK: bitcast i8 %{{.*}} to <8 x i1>
|
||||||
// CHECK: select <8 x i1> %{{.*}}, <8 x i64> %{{.*}}, <8 x i64> %{{.*}}
|
// CHECK: select <8 x i1> %{{.*}}, <8 x i64> %{{.*}}, <8 x i64> %{{.*}}
|
||||||
// CHECK: call i64 @llvm.vector.reduce.smin.v8i64(<8 x i64> %{{.*}})
|
// CHECK: call i64 @llvm.vector.reduce.smin.v8i64(<8 x i64> %{{.*}})
|
||||||
return _mm512_mask_reduce_min_epi64(__M, __W);
|
return _mm512_mask_reduce_min_epi64(__M, __W);
|
||||||
}
|
}
|
||||||
|
|
||||||
unsigned long long test_mm512_mask_reduce_min_epu64(__mmask8 __M, __m512i __W){
|
unsigned long long test_mm512_mask_reduce_min_epu64(__mmask8 __M, __m512i __W){
|
||||||
@ -99,23 +79,15 @@ unsigned long long test_mm512_mask_reduce_min_epu64(__mmask8 __M, __m512i __W){
|
|||||||
// CHECK: bitcast i8 %{{.*}} to <8 x i1>
|
// CHECK: bitcast i8 %{{.*}} to <8 x i1>
|
||||||
// CHECK: select <8 x i1> %{{.*}}, <8 x i64> %{{.*}}, <8 x i64> %{{.*}}
|
// CHECK: select <8 x i1> %{{.*}}, <8 x i64> %{{.*}}, <8 x i64> %{{.*}}
|
||||||
// CHECK: call i64 @llvm.vector.reduce.umin.v8i64(<8 x i64> %{{.*}})
|
// CHECK: call i64 @llvm.vector.reduce.umin.v8i64(<8 x i64> %{{.*}})
|
||||||
return _mm512_mask_reduce_min_epu64(__M, __W);
|
return _mm512_mask_reduce_min_epu64(__M, __W);
|
||||||
}
|
}
|
||||||
|
|
||||||
double test_mm512_mask_reduce_min_pd(__mmask8 __M, __m512d __W){
|
double test_mm512_mask_reduce_min_pd(__mmask8 __M, __m512d __W){
|
||||||
// CHECK-LABEL: @test_mm512_mask_reduce_min_pd(
|
// CHECK-LABEL: @test_mm512_mask_reduce_min_pd(
|
||||||
// CHECK: bitcast i8 %{{.*}} to <8 x i1>
|
// CHECK: bitcast i8 %{{.*}} to <8 x i1>
|
||||||
// CHECK: select <8 x i1> %{{.*}}, <8 x double> %{{.*}}, <8 x double> %{{.*}}
|
// CHECK: select <8 x i1> %{{.*}}, <8 x double> %{{.*}}, <8 x double> %{{.*}}
|
||||||
// CHECK: shufflevector <8 x double> %{{.*}}, <8 x double> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
|
// CHECK: call nnan double @llvm.vector.reduce.fmin.v8f64(<8 x double> %{{.*}})
|
||||||
// CHECK: shufflevector <8 x double> %{{.*}}, <8 x double> poison, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
|
return _mm512_mask_reduce_min_pd(__M, __W);
|
||||||
// CHECK: call <4 x double> @llvm.x86.avx.min.pd.256(<4 x double> %{{.*}}, <4 x double> %{{.*}})
|
|
||||||
// CHECK: shufflevector <4 x double> %{{.*}}, <4 x double> poison, <2 x i32> <i32 0, i32 1>
|
|
||||||
// CHECK: shufflevector <4 x double> %{{.*}}, <4 x double> poison, <2 x i32> <i32 2, i32 3>
|
|
||||||
// CHECK: call <2 x double> @llvm.x86.sse2.min.pd(<2 x double> %{{.*}}, <2 x double> %{{.*}})
|
|
||||||
// CHECK: shufflevector <2 x double> %{{.*}}, <2 x double> %{{.*}}, <2 x i32> <i32 1, i32 0>
|
|
||||||
// CHECK: call <2 x double> @llvm.x86.sse2.min.pd(<2 x double> %{{.*}}, <2 x double> %{{.*}})
|
|
||||||
// CHECK: extractelement <2 x double> %{{.*}}, i32 0
|
|
||||||
return _mm512_mask_reduce_min_pd(__M, __W);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
int test_mm512_reduce_max_epi32(__m512i __W){
|
int test_mm512_reduce_max_epi32(__m512i __W){
|
||||||
@ -131,19 +103,9 @@ unsigned int test_mm512_reduce_max_epu32(__m512i __W){
|
|||||||
}
|
}
|
||||||
|
|
||||||
float test_mm512_reduce_max_ps(__m512 __W){
|
float test_mm512_reduce_max_ps(__m512 __W){
|
||||||
// CHECK-LABEL: define{{.*}} float @test_mm512_reduce_max_ps(
|
// CHECK-LABEL: @test_mm512_reduce_max_ps(
|
||||||
// CHECK: shufflevector <8 x double> %{{.*}}, <8 x double> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
|
// CHECK: call nnan float @llvm.vector.reduce.fmax.v16f32(<16 x float> %{{.*}})
|
||||||
// CHECK: shufflevector <8 x double> %{{.*}}, <8 x double> poison, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
|
return _mm512_reduce_max_ps(__W);
|
||||||
// CHECK: call <8 x float> @llvm.x86.avx.max.ps.256(<8 x float> %{{.*}}, <8 x float> %{{.*}})
|
|
||||||
// CHECK: shufflevector <8 x float> %{{.*}}, <8 x float> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
|
|
||||||
// CHECK: shufflevector <8 x float> %{{.*}}, <8 x float> poison, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
|
|
||||||
// CHECK: call <4 x float> @llvm.x86.sse.max.ps(<4 x float> %{{.*}}, <4 x float> %{{.*}})
|
|
||||||
// CHECK: shufflevector <4 x float> %{{.*}}, <4 x float> %{{.*}}, <4 x i32> <i32 2, i32 3, i32 0, i32 1>
|
|
||||||
// CHECK: call <4 x float> @llvm.x86.sse.max.ps(<4 x float> %{{.*}}, <4 x float> %{{.*}})
|
|
||||||
// CHECK: shufflevector <4 x float> %{{.*}}, <4 x float> %{{.*}}, <4 x i32> <i32 1, i32 0, i32 3, i32 2>
|
|
||||||
// CHECK: call <4 x float> @llvm.x86.sse.max.ps(<4 x float> %{{.*}}, <4 x float> %{{.*}})
|
|
||||||
// CHECK: extractelement <4 x float> %{{.*}}, i32 0
|
|
||||||
return _mm512_reduce_max_ps(__W);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
int test_mm512_reduce_min_epi32(__m512i __W){
|
int test_mm512_reduce_min_epi32(__m512i __W){
|
||||||
@ -159,19 +121,9 @@ unsigned int test_mm512_reduce_min_epu32(__m512i __W){
|
|||||||
}
|
}
|
||||||
|
|
||||||
float test_mm512_reduce_min_ps(__m512 __W){
|
float test_mm512_reduce_min_ps(__m512 __W){
|
||||||
// CHECK-LABEL: define{{.*}} float @test_mm512_reduce_min_ps(
|
// CHECK-LABEL: @test_mm512_reduce_min_ps(
|
||||||
// CHECK: shufflevector <8 x double> %{{.*}}, <8 x double> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
|
// CHECK: call nnan float @llvm.vector.reduce.fmin.v16f32(<16 x float> %{{.*}})
|
||||||
// CHECK: shufflevector <8 x double> %{{.*}}, <8 x double> poison, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
|
return _mm512_reduce_min_ps(__W);
|
||||||
// CHECK: call <8 x float> @llvm.x86.avx.min.ps.256(<8 x float> %{{.*}}, <8 x float> %{{.*}})
|
|
||||||
// CHECK: shufflevector <8 x float> %{{.*}}, <8 x float> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
|
|
||||||
// CHECK: shufflevector <8 x float> %{{.*}}, <8 x float> poison, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
|
|
||||||
// CHECK: call <4 x float> @llvm.x86.sse.min.ps(<4 x float> %{{.*}}, <4 x float> %{{.*}})
|
|
||||||
// CHECK: shufflevector <4 x float> %{{.*}}, <4 x float> %{{.*}}, <4 x i32> <i32 2, i32 3, i32 0, i32 1>
|
|
||||||
// CHECK: call <4 x float> @llvm.x86.sse.min.ps(<4 x float> %{{.*}}, <4 x float> %{{.*}})
|
|
||||||
// CHECK: shufflevector <4 x float> %{{.*}}, <4 x float> %{{.*}}, <4 x i32> <i32 1, i32 0, i32 3, i32 2>
|
|
||||||
// CHECK: call <4 x float> @llvm.x86.sse.min.ps(<4 x float> %{{.*}}, <4 x float> %{{.*}})
|
|
||||||
// CHECK: extractelement <4 x float> %{{.*}}, i32 0
|
|
||||||
return _mm512_reduce_min_ps(__W);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
int test_mm512_mask_reduce_max_epi32(__mmask16 __M, __m512i __W){
|
int test_mm512_mask_reduce_max_epi32(__mmask16 __M, __m512i __W){
|
||||||
@ -179,7 +131,7 @@ int test_mm512_mask_reduce_max_epi32(__mmask16 __M, __m512i __W){
|
|||||||
// CHECK: bitcast i16 %{{.*}} to <16 x i1>
|
// CHECK: bitcast i16 %{{.*}} to <16 x i1>
|
||||||
// CHECK: select <16 x i1> %{{.*}}, <16 x i32> %{{.*}}, <16 x i32> %{{.*}}
|
// CHECK: select <16 x i1> %{{.*}}, <16 x i32> %{{.*}}, <16 x i32> %{{.*}}
|
||||||
// CHECK: call i32 @llvm.vector.reduce.smax.v16i32(<16 x i32> %{{.*}})
|
// CHECK: call i32 @llvm.vector.reduce.smax.v16i32(<16 x i32> %{{.*}})
|
||||||
return _mm512_mask_reduce_max_epi32(__M, __W);
|
return _mm512_mask_reduce_max_epi32(__M, __W);
|
||||||
}
|
}
|
||||||
|
|
||||||
unsigned int test_mm512_mask_reduce_max_epu32(__mmask16 __M, __m512i __W){
|
unsigned int test_mm512_mask_reduce_max_epu32(__mmask16 __M, __m512i __W){
|
||||||
@ -187,25 +139,15 @@ unsigned int test_mm512_mask_reduce_max_epu32(__mmask16 __M, __m512i __W){
|
|||||||
// CHECK: bitcast i16 %{{.*}} to <16 x i1>
|
// CHECK: bitcast i16 %{{.*}} to <16 x i1>
|
||||||
// CHECK: select <16 x i1> %{{.*}}, <16 x i32> %{{.*}}, <16 x i32> %{{.*}}
|
// CHECK: select <16 x i1> %{{.*}}, <16 x i32> %{{.*}}, <16 x i32> %{{.*}}
|
||||||
// CHECK: call i32 @llvm.vector.reduce.umax.v16i32(<16 x i32> %{{.*}})
|
// CHECK: call i32 @llvm.vector.reduce.umax.v16i32(<16 x i32> %{{.*}})
|
||||||
return _mm512_mask_reduce_max_epu32(__M, __W);
|
return _mm512_mask_reduce_max_epu32(__M, __W);
|
||||||
}
|
}
|
||||||
|
|
||||||
float test_mm512_mask_reduce_max_ps(__mmask16 __M, __m512 __W){
|
float test_mm512_mask_reduce_max_ps(__mmask16 __M, __m512 __W){
|
||||||
// CHECK-LABEL: define{{.*}} float @test_mm512_mask_reduce_max_ps(
|
// CHECK-LABEL: @test_mm512_mask_reduce_max_ps(
|
||||||
// CHECK: bitcast i16 %{{.*}} to <16 x i1>
|
// CHECK: bitcast i16 %{{.*}} to <16 x i1>
|
||||||
// CHECK: select <16 x i1> %{{.*}}, <16 x float> %{{.*}}, <16 x float> %{{.*}}
|
// CHECK: select <16 x i1> %{{.*}}, <16 x float> %{{.*}}, <16 x float> %{{.*}}
|
||||||
// CHECK: shufflevector <8 x double> %{{.*}}, <8 x double> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
|
// CHECK: call nnan float @llvm.vector.reduce.fmax.v16f32(<16 x float> %{{.*}})
|
||||||
// CHECK: shufflevector <8 x double> %{{.*}}, <8 x double> poison, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
|
return _mm512_mask_reduce_max_ps(__M, __W);
|
||||||
// CHECK: call <8 x float> @llvm.x86.avx.max.ps.256(<8 x float> %{{.*}}, <8 x float> %{{.*}})
|
|
||||||
// CHECK: shufflevector <8 x float> %{{.*}}, <8 x float> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
|
|
||||||
// CHECK: shufflevector <8 x float> %{{.*}}, <8 x float> poison, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
|
|
||||||
// CHECK: call <4 x float> @llvm.x86.sse.max.ps(<4 x float> %{{.*}}, <4 x float> %{{.*}})
|
|
||||||
// CHECK: shufflevector <4 x float> %{{.*}}, <4 x float> %{{.*}}, <4 x i32> <i32 2, i32 3, i32 0, i32 1>
|
|
||||||
// CHECK: call <4 x float> @llvm.x86.sse.max.ps(<4 x float> %{{.*}}, <4 x float> %{{.*}})
|
|
||||||
// CHECK: shufflevector <4 x float> %{{.*}}, <4 x float> %{{.*}}, <4 x i32> <i32 1, i32 0, i32 3, i32 2>
|
|
||||||
// CHECK: call <4 x float> @llvm.x86.sse.max.ps(<4 x float> %{{.*}}, <4 x float> %{{.*}})
|
|
||||||
// CHECK: extractelement <4 x float> %{{.*}}, i32 0
|
|
||||||
return _mm512_mask_reduce_max_ps(__M, __W);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
int test_mm512_mask_reduce_min_epi32(__mmask16 __M, __m512i __W){
|
int test_mm512_mask_reduce_min_epi32(__mmask16 __M, __m512i __W){
|
||||||
@ -213,7 +155,7 @@ int test_mm512_mask_reduce_min_epi32(__mmask16 __M, __m512i __W){
|
|||||||
// CHECK: bitcast i16 %{{.*}} to <16 x i1>
|
// CHECK: bitcast i16 %{{.*}} to <16 x i1>
|
||||||
// CHECK: select <16 x i1> %{{.*}}, <16 x i32> %{{.*}}, <16 x i32> %{{.*}}
|
// CHECK: select <16 x i1> %{{.*}}, <16 x i32> %{{.*}}, <16 x i32> %{{.*}}
|
||||||
// CHECK: call i32 @llvm.vector.reduce.smin.v16i32(<16 x i32> %{{.*}})
|
// CHECK: call i32 @llvm.vector.reduce.smin.v16i32(<16 x i32> %{{.*}})
|
||||||
return _mm512_mask_reduce_min_epi32(__M, __W);
|
return _mm512_mask_reduce_min_epi32(__M, __W);
|
||||||
}
|
}
|
||||||
|
|
||||||
unsigned int test_mm512_mask_reduce_min_epu32(__mmask16 __M, __m512i __W){
|
unsigned int test_mm512_mask_reduce_min_epu32(__mmask16 __M, __m512i __W){
|
||||||
@ -221,24 +163,14 @@ unsigned int test_mm512_mask_reduce_min_epu32(__mmask16 __M, __m512i __W){
|
|||||||
// CHECK: bitcast i16 %{{.*}} to <16 x i1>
|
// CHECK: bitcast i16 %{{.*}} to <16 x i1>
|
||||||
// CHECK: select <16 x i1> %{{.*}}, <16 x i32> %{{.*}}, <16 x i32> %{{.*}}
|
// CHECK: select <16 x i1> %{{.*}}, <16 x i32> %{{.*}}, <16 x i32> %{{.*}}
|
||||||
// CHECK: call i32 @llvm.vector.reduce.umin.v16i32(<16 x i32> %{{.*}})
|
// CHECK: call i32 @llvm.vector.reduce.umin.v16i32(<16 x i32> %{{.*}})
|
||||||
return _mm512_mask_reduce_min_epu32(__M, __W);
|
return _mm512_mask_reduce_min_epu32(__M, __W);
|
||||||
}
|
}
|
||||||
|
|
||||||
float test_mm512_mask_reduce_min_ps(__mmask16 __M, __m512 __W){
|
float test_mm512_mask_reduce_min_ps(__mmask16 __M, __m512 __W){
|
||||||
// CHECK-LABEL: define{{.*}} float @test_mm512_mask_reduce_min_ps(
|
// CHECK-LABEL: @test_mm512_mask_reduce_min_ps(
|
||||||
// CHECK: bitcast i16 %{{.*}} to <16 x i1>
|
// CHECK: bitcast i16 %{{.*}} to <16 x i1>
|
||||||
// CHECK: select <16 x i1> %{{.*}}, <16 x float> %{{.*}}, <16 x float> %{{.*}}
|
// CHECK: select <16 x i1> %{{.*}}, <16 x float> %{{.*}}, <16 x float> %{{.*}}
|
||||||
// CHECK: shufflevector <8 x double> %{{.*}}, <8 x double> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
|
// CHECK: call nnan float @llvm.vector.reduce.fmin.v16f32(<16 x float> %{{.*}})
|
||||||
// CHECK: shufflevector <8 x double> %{{.*}}, <8 x double> poison, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
|
return _mm512_mask_reduce_min_ps(__M, __W);
|
||||||
// CHECK: call <8 x float> @llvm.x86.avx.min.ps.256(<8 x float> %{{.*}}, <8 x float> %{{.*}})
|
|
||||||
// CHECK: shufflevector <8 x float> %{{.*}}, <8 x float> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
|
|
||||||
// CHECK: shufflevector <8 x float> %{{.*}}, <8 x float> poison, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
|
|
||||||
// CHECK: call <4 x float> @llvm.x86.sse.min.ps(<4 x float> %{{.*}}, <4 x float> %{{.*}})
|
|
||||||
// CHECK: shufflevector <4 x float> %{{.*}}, <4 x float> %{{.*}}, <4 x i32> <i32 2, i32 3, i32 0, i32 1>
|
|
||||||
// CHECK: call <4 x float> @llvm.x86.sse.min.ps(<4 x float> %{{.*}}, <4 x float> %{{.*}})
|
|
||||||
// CHECK: shufflevector <4 x float> %{{.*}}, <4 x float> %{{.*}}, <4 x i32> <i32 1, i32 0, i32 3, i32 2>
|
|
||||||
// CHECK: call <4 x float> @llvm.x86.sse.min.ps(<4 x float> %{{.*}}, <4 x float> %{{.*}})
|
|
||||||
// CHECK: extractelement <4 x float> %{{.*}}, i32 0
|
|
||||||
return _mm512_mask_reduce_min_ps(__M, __W);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
Loading…
x
Reference in New Issue
Block a user