[HLSL] Implement elementwise popcount (#108121)
Add new elementwise popcount builtin to support HLSL function 'countbits'. elementwise popcount only accepts integer types. Add hlsl intrinsic 'countbits' Closes #99094
This commit is contained in:
parent
b334ca6739
commit
67518a44fe
@ -667,6 +667,7 @@ Unless specified otherwise operation(±0) = ±0 and operation(±infinity) = ±in
|
||||
T __builtin_elementwise_log(T x) return the natural logarithm of x floating point types
|
||||
T __builtin_elementwise_log2(T x) return the base 2 logarithm of x floating point types
|
||||
T __builtin_elementwise_log10(T x) return the base 10 logarithm of x floating point types
|
||||
T __builtin_elementwise_popcount(T x) return the number of 1 bits in x integer types
|
||||
T __builtin_elementwise_pow(T x, T y) return x raised to the power of y floating point types
|
||||
T __builtin_elementwise_bitreverse(T x) return the integer represented after reversing the bits of x integer types
|
||||
T __builtin_elementwise_exp(T x) returns the base-e exponential, e^x, of the specified value floating point types
|
||||
|
@ -116,6 +116,7 @@ C++ Language Changes
|
||||
|
||||
- Accept C++26 user-defined ``static_assert`` messages in C++11 as an extension.
|
||||
|
||||
- Add ``__builtin_elementwise_popcount`` builtin for integer types only.
|
||||
|
||||
C++2c Feature Support
|
||||
^^^^^^^^^^^^^^^^^^^^^
|
||||
|
@ -1322,6 +1322,12 @@ def ElementwiseLog10 : Builtin {
|
||||
let Prototype = "void(...)";
|
||||
}
|
||||
|
||||
def ElementwisePopcount : Builtin {
|
||||
let Spellings = ["__builtin_elementwise_popcount"];
|
||||
let Attributes = [NoThrow, Const, CustomTypeChecking];
|
||||
let Prototype = "void(...)";
|
||||
}
|
||||
|
||||
def ElementwisePow : Builtin {
|
||||
let Spellings = ["__builtin_elementwise_pow"];
|
||||
let Attributes = [NoThrow, Const, CustomTypeChecking];
|
||||
|
@ -3834,6 +3834,9 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID,
|
||||
case Builtin::BI__builtin_elementwise_floor:
|
||||
return RValue::get(emitBuiltinWithOneOverloadedType<1>(
|
||||
*this, E, llvm::Intrinsic::floor, "elt.floor"));
|
||||
case Builtin::BI__builtin_elementwise_popcount:
|
||||
return RValue::get(emitBuiltinWithOneOverloadedType<1>(
|
||||
*this, E, llvm::Intrinsic::ctpop, "elt.ctpop"));
|
||||
case Builtin::BI__builtin_elementwise_roundeven:
|
||||
return RValue::get(emitBuiltinWithOneOverloadedType<1>(
|
||||
*this, E, llvm::Intrinsic::roundeven, "elt.roundeven"));
|
||||
|
@ -650,6 +650,77 @@ float3 cosh(float3);
|
||||
_HLSL_BUILTIN_ALIAS(__builtin_elementwise_cosh)
|
||||
float4 cosh(float4);
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
// count bits builtins
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
/// \fn T countbits(T Val)
|
||||
/// \brief Return the number of bits (per component) set in the input integer.
|
||||
/// \param Val The input value.
|
||||
|
||||
#ifdef __HLSL_ENABLE_16_BIT
|
||||
_HLSL_AVAILABILITY(shadermodel, 6.2)
|
||||
_HLSL_BUILTIN_ALIAS(__builtin_elementwise_popcount)
|
||||
int16_t countbits(int16_t);
|
||||
_HLSL_AVAILABILITY(shadermodel, 6.2)
|
||||
_HLSL_BUILTIN_ALIAS(__builtin_elementwise_popcount)
|
||||
int16_t2 countbits(int16_t2);
|
||||
_HLSL_AVAILABILITY(shadermodel, 6.2)
|
||||
_HLSL_BUILTIN_ALIAS(__builtin_elementwise_popcount)
|
||||
int16_t3 countbits(int16_t3);
|
||||
_HLSL_AVAILABILITY(shadermodel, 6.2)
|
||||
_HLSL_BUILTIN_ALIAS(__builtin_elementwise_popcount)
|
||||
int16_t4 countbits(int16_t4);
|
||||
_HLSL_AVAILABILITY(shadermodel, 6.2)
|
||||
_HLSL_BUILTIN_ALIAS(__builtin_elementwise_popcount)
|
||||
uint16_t countbits(uint16_t);
|
||||
_HLSL_AVAILABILITY(shadermodel, 6.2)
|
||||
_HLSL_BUILTIN_ALIAS(__builtin_elementwise_popcount)
|
||||
uint16_t2 countbits(uint16_t2);
|
||||
_HLSL_AVAILABILITY(shadermodel, 6.2)
|
||||
_HLSL_BUILTIN_ALIAS(__builtin_elementwise_popcount)
|
||||
uint16_t3 countbits(uint16_t3);
|
||||
_HLSL_AVAILABILITY(shadermodel, 6.2)
|
||||
_HLSL_BUILTIN_ALIAS(__builtin_elementwise_popcount)
|
||||
uint16_t4 countbits(uint16_t4);
|
||||
#endif
|
||||
|
||||
_HLSL_BUILTIN_ALIAS(__builtin_elementwise_popcount)
|
||||
int countbits(int);
|
||||
_HLSL_BUILTIN_ALIAS(__builtin_elementwise_popcount)
|
||||
int2 countbits(int2);
|
||||
_HLSL_BUILTIN_ALIAS(__builtin_elementwise_popcount)
|
||||
int3 countbits(int3);
|
||||
_HLSL_BUILTIN_ALIAS(__builtin_elementwise_popcount)
|
||||
int4 countbits(int4);
|
||||
|
||||
_HLSL_BUILTIN_ALIAS(__builtin_elementwise_popcount)
|
||||
uint countbits(uint);
|
||||
_HLSL_BUILTIN_ALIAS(__builtin_elementwise_popcount)
|
||||
uint2 countbits(uint2);
|
||||
_HLSL_BUILTIN_ALIAS(__builtin_elementwise_popcount)
|
||||
uint3 countbits(uint3);
|
||||
_HLSL_BUILTIN_ALIAS(__builtin_elementwise_popcount)
|
||||
uint4 countbits(uint4);
|
||||
|
||||
_HLSL_BUILTIN_ALIAS(__builtin_elementwise_popcount)
|
||||
int64_t countbits(int64_t);
|
||||
_HLSL_BUILTIN_ALIAS(__builtin_elementwise_popcount)
|
||||
int64_t2 countbits(int64_t2);
|
||||
_HLSL_BUILTIN_ALIAS(__builtin_elementwise_popcount)
|
||||
int64_t3 countbits(int64_t3);
|
||||
_HLSL_BUILTIN_ALIAS(__builtin_elementwise_popcount)
|
||||
int64_t4 countbits(int64_t4);
|
||||
|
||||
_HLSL_BUILTIN_ALIAS(__builtin_elementwise_popcount)
|
||||
uint64_t countbits(uint64_t);
|
||||
_HLSL_BUILTIN_ALIAS(__builtin_elementwise_popcount)
|
||||
uint64_t2 countbits(uint64_t2);
|
||||
_HLSL_BUILTIN_ALIAS(__builtin_elementwise_popcount)
|
||||
uint64_t3 countbits(uint64_t3);
|
||||
_HLSL_BUILTIN_ALIAS(__builtin_elementwise_popcount)
|
||||
uint64_t4 countbits(uint64_t4);
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
// dot product builtins
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
@ -2795,7 +2795,7 @@ Sema::CheckBuiltinFunctionCall(FunctionDecl *FDecl, unsigned BuiltinID,
|
||||
if (BuiltinElementwiseMath(TheCall))
|
||||
return ExprError();
|
||||
break;
|
||||
|
||||
case Builtin::BI__builtin_elementwise_popcount:
|
||||
case Builtin::BI__builtin_elementwise_bitreverse: {
|
||||
if (PrepareBuiltinElementwiseMathOneArgCall(TheCall))
|
||||
return ExprError();
|
||||
|
@ -570,6 +570,43 @@ void test_builtin_elementwise_log2(float f1, float f2, double d1, double d2,
|
||||
vf2 = __builtin_elementwise_log2(vf1);
|
||||
}
|
||||
|
||||
void test_builtin_elementwise_popcount(si8 vi1, si8 vi2,
|
||||
long long int i1, long long int i2, short si,
|
||||
_BitInt(31) bi1, _BitInt(31) bi2) {
|
||||
|
||||
|
||||
// CHECK: [[I1:%.+]] = load i64, ptr %i1.addr, align 8
|
||||
// CHECK-NEXT: call i64 @llvm.ctpop.i64(i64 [[I1]])
|
||||
i2 = __builtin_elementwise_popcount(i1);
|
||||
|
||||
// CHECK: [[VI1:%.+]] = load <8 x i16>, ptr %vi1.addr, align 16
|
||||
// CHECK-NEXT: call <8 x i16> @llvm.ctpop.v8i16(<8 x i16> [[VI1]])
|
||||
vi2 = __builtin_elementwise_popcount(vi1);
|
||||
|
||||
// CHECK: [[CVI2:%.+]] = load <8 x i16>, ptr %cvi2, align 16
|
||||
// CHECK-NEXT: call <8 x i16> @llvm.ctpop.v8i16(<8 x i16> [[CVI2]])
|
||||
const si8 cvi2 = vi2;
|
||||
vi2 = __builtin_elementwise_popcount(cvi2);
|
||||
|
||||
// CHECK: [[BI1:%.+]] = load i32, ptr %bi1.addr, align 4
|
||||
// CHECK-NEXT: [[LOADEDV:%.+]] = trunc i32 [[BI1]] to i31
|
||||
// CHECK-NEXT: call i31 @llvm.ctpop.i31(i31 [[LOADEDV]])
|
||||
bi2 = __builtin_elementwise_popcount(bi1);
|
||||
|
||||
// CHECK: [[IA1:%.+]] = load i32, ptr addrspace(1) @int_as_one, align 4
|
||||
// CHECK-NEXT: call i32 @llvm.ctpop.i32(i32 [[IA1]])
|
||||
b = __builtin_elementwise_popcount(int_as_one);
|
||||
|
||||
// CHECK: call i32 @llvm.ctpop.i32(i32 -10)
|
||||
b = __builtin_elementwise_popcount(-10);
|
||||
|
||||
// CHECK: [[SI:%.+]] = load i16, ptr %si.addr, align 2
|
||||
// CHECK-NEXT: [[SI_EXT:%.+]] = sext i16 [[SI]] to i32
|
||||
// CHECK-NEXT: [[RES:%.+]] = call i32 @llvm.ctpop.i32(i32 [[SI_EXT]])
|
||||
// CHECK-NEXT: = trunc i32 [[RES]] to i16
|
||||
si = __builtin_elementwise_popcount(si);
|
||||
}
|
||||
|
||||
void test_builtin_elementwise_pow(float f1, float f2, double d1, double d2,
|
||||
float4 vf1, float4 vf2) {
|
||||
|
||||
|
80
clang/test/CodeGenHLSL/builtins/countbits.hlsl
Normal file
80
clang/test/CodeGenHLSL/builtins/countbits.hlsl
Normal file
@ -0,0 +1,80 @@
|
||||
// RUN: %clang_cc1 -std=hlsl2021 -finclude-default-header -x hlsl -triple \
|
||||
// RUN: dxil-pc-shadermodel6.3-library %s -fnative-half-type \
|
||||
// RUN: -emit-llvm -disable-llvm-passes -O3 -o - | FileCheck %s
|
||||
|
||||
#ifdef __HLSL_ENABLE_16_BIT
|
||||
// CHECK-LABEL: test_countbits_ushort
|
||||
// CHECK: call i16 @llvm.ctpop.i16
|
||||
uint16_t test_countbits_ushort(uint16_t p0)
|
||||
{
|
||||
return countbits(p0);
|
||||
}
|
||||
// CHECK-LABEL: test_countbits_ushort2
|
||||
// CHECK: call <2 x i16> @llvm.ctpop.v2i16
|
||||
uint16_t2 test_countbits_ushort2(uint16_t2 p0)
|
||||
{
|
||||
return countbits(p0);
|
||||
}
|
||||
// CHECK-LABEL: test_countbits_ushort3
|
||||
// CHECK: call <3 x i16> @llvm.ctpop.v3i16
|
||||
uint16_t3 test_countbits_ushort3(uint16_t3 p0)
|
||||
{
|
||||
return countbits(p0);
|
||||
}
|
||||
// CHECK-LABEL: test_countbits_ushort4
|
||||
// CHECK: call <4 x i16> @llvm.ctpop.v4i16
|
||||
uint16_t4 test_countbits_ushort4(uint16_t4 p0)
|
||||
{
|
||||
return countbits(p0);
|
||||
}
|
||||
#endif
|
||||
|
||||
// CHECK-LABEL: test_countbits_uint
|
||||
// CHECK: call i32 @llvm.ctpop.i32
|
||||
int test_countbits_uint(uint p0)
|
||||
{
|
||||
return countbits(p0);
|
||||
}
|
||||
// CHECK-LABEL: test_countbits_uint2
|
||||
// CHECK: call <2 x i32> @llvm.ctpop.v2i32
|
||||
uint2 test_countbits_uint2(uint2 p0)
|
||||
{
|
||||
return countbits(p0);
|
||||
}
|
||||
// CHECK-LABEL: test_countbits_uint3
|
||||
// CHECK: call <3 x i32> @llvm.ctpop.v3i32
|
||||
uint3 test_countbits_uint3(uint3 p0)
|
||||
{
|
||||
return countbits(p0);
|
||||
}
|
||||
// CHECK-LABEL: test_countbits_uint4
|
||||
// CHECK: call <4 x i32> @llvm.ctpop.v4i32
|
||||
uint4 test_countbits_uint4(uint4 p0)
|
||||
{
|
||||
return countbits(p0);
|
||||
}
|
||||
|
||||
// CHECK-LABEL: test_countbits_long
|
||||
// CHECK: call i64 @llvm.ctpop.i64
|
||||
uint64_t test_countbits_long(uint64_t p0)
|
||||
{
|
||||
return countbits(p0);
|
||||
}
|
||||
// CHECK-LABEL: test_countbits_long2
|
||||
// CHECK: call <2 x i64> @llvm.ctpop.v2i64
|
||||
uint64_t2 test_countbits_long2(uint64_t2 p0)
|
||||
{
|
||||
return countbits(p0);
|
||||
}
|
||||
// CHECK-LABEL: test_countbits_long3
|
||||
// CHECK: call <3 x i64> @llvm.ctpop.v3i64
|
||||
uint64_t3 test_countbits_long3(uint64_t3 p0)
|
||||
{
|
||||
return countbits(p0);
|
||||
}
|
||||
// CHECK-LABEL: test_countbits_long4
|
||||
// CHECK: call <4 x i64> @llvm.ctpop.v4i64
|
||||
uint64_t4 test_countbits_long4(uint64_t4 p0)
|
||||
{
|
||||
return countbits(p0);
|
||||
}
|
@ -505,6 +505,39 @@ void test_builtin_elementwise_log2(int i, float f, double d, float4 v, int3 iv,
|
||||
// expected-error@-1 {{1st argument must be a floating point type (was 'unsigned4' (vector of 4 'unsigned int' values))}}
|
||||
}
|
||||
|
||||
void test_builtin_elementwise_popcount(int i, float f, double d, float4 v, int3 iv, unsigned u, unsigned4 uv) {
|
||||
|
||||
struct Foo s = __builtin_elementwise_popcount(i);
|
||||
// expected-error@-1 {{initializing 'struct Foo' with an expression of incompatible type 'int'}}
|
||||
|
||||
i = __builtin_elementwise_popcount();
|
||||
// expected-error@-1 {{too few arguments to function call, expected 1, have 0}}
|
||||
|
||||
i = __builtin_elementwise_popcount(f);
|
||||
// expected-error@-1 {{1st argument must be a vector of integers (was 'float')}}
|
||||
|
||||
i = __builtin_elementwise_popcount(f, f);
|
||||
// expected-error@-1 {{too many arguments to function call, expected 1, have 2}}
|
||||
|
||||
u = __builtin_elementwise_popcount(d);
|
||||
// expected-error@-1 {{1st argument must be a vector of integers (was 'double')}}
|
||||
|
||||
v = __builtin_elementwise_popcount(v);
|
||||
// expected-error@-1 {{1st argument must be a vector of integers (was 'float4' (vector of 4 'float' values))}}
|
||||
|
||||
int2 i2 = __builtin_elementwise_popcount(iv);
|
||||
// expected-error@-1 {{initializing 'int2' (vector of 2 'int' values) with an expression of incompatible type 'int3' (vector of 3 'int' values)}}
|
||||
|
||||
iv = __builtin_elementwise_popcount(i2);
|
||||
// expected-error@-1 {{assigning to 'int3' (vector of 3 'int' values) from incompatible type 'int2' (vector of 2 'int' values)}}
|
||||
|
||||
unsigned3 u3 = __builtin_elementwise_popcount(iv);
|
||||
// expected-error@-1 {{initializing 'unsigned3' (vector of 3 'unsigned int' values) with an expression of incompatible type 'int3' (vector of 3 'int' values)}}
|
||||
|
||||
iv = __builtin_elementwise_popcount(u3);
|
||||
// expected-error@-1 {{assigning to 'int3' (vector of 3 'int' values) from incompatible type 'unsigned3' (vector of 3 'unsigned int' values)}}
|
||||
}
|
||||
|
||||
void test_builtin_elementwise_pow(int i, short s, double d, float4 v, int3 iv, unsigned3 uv, int *p) {
|
||||
i = __builtin_elementwise_pow(p, d);
|
||||
// expected-error@-1 {{arguments are of different types ('int *' vs 'double')}}
|
||||
|
28
clang/test/Sema/countbits-errors.hlsl
Normal file
28
clang/test/Sema/countbits-errors.hlsl
Normal file
@ -0,0 +1,28 @@
|
||||
// RUN: %clang_cc1 -finclude-default-header
|
||||
// -triple dxil-pc-shadermodel6.6-library %s -fnative-half-type -emit-llvm-only
|
||||
// -disable-llvm-passes -verify
|
||||
|
||||
double2 test_int_builtin(double2 p0) {
|
||||
return __builtin_hlsl_elementwise_countbits(p0);
|
||||
// expected-error@-1 {{passing 'double2' (aka 'vector<double, 2>') to
|
||||
// parameter of incompatible type
|
||||
// '__attribute__((__vector_size__(2 * sizeof(int)))) int'
|
||||
// (vector of 2 'int' values)}}
|
||||
}
|
||||
|
||||
float test_ambiguous(float p0) {
|
||||
return countbits(p0);
|
||||
// expected-error@-1 {{call to 'countbits' is ambiguous}}
|
||||
// expected-note@hlsl/hlsl_intrinsics.h:* {{candidate function}}
|
||||
// expected-note@hlsl/hlsl_intrinsics.h:* {{candidate function}}
|
||||
// expected-note@hlsl/hlsl_intrinsics.h:* {{candidate function}}
|
||||
// expected-note@hlsl/hlsl_intrinsics.h:* {{candidate function}}
|
||||
// expected-note@hlsl/hlsl_intrinsics.h:* {{candidate function}}
|
||||
// expected-note@hlsl/hlsl_intrinsics.h:* {{candidate function}}
|
||||
}
|
||||
|
||||
float test_float_builtin(float p0) {
|
||||
return __builtin_hlsl_elementwise_countbits(p0);
|
||||
// expected-error@-1 {{passing 'double' to parameter of incompatible type
|
||||
// 'int'}}
|
||||
}
|
@ -269,3 +269,11 @@ void test_builtin_elementwise_bitreverse() {
|
||||
static_assert(!is_const<decltype(__builtin_elementwise_bitreverse(a))>::value);
|
||||
static_assert(!is_const<decltype(__builtin_elementwise_bitreverse(b))>::value);
|
||||
}
|
||||
|
||||
void test_builtin_elementwise_popcount() {
|
||||
const int a = 2;
|
||||
int b = 1;
|
||||
static_assert(!is_const<decltype(__builtin_elementwise_popcount(a))>::value);
|
||||
static_assert(!is_const<decltype(__builtin_elementwise_popcount(b))>::value);
|
||||
}
|
||||
|
||||
|
21
clang/test/SemaHLSL/BuiltIns/countbits-errors.hlsl
Normal file
21
clang/test/SemaHLSL/BuiltIns/countbits-errors.hlsl
Normal file
@ -0,0 +1,21 @@
|
||||
// RUN: %clang_cc1 -finclude-default-header
|
||||
// -triple dxil-pc-shadermodel6.6-library %s -fnative-half-type -emit-llvm-only
|
||||
// -disable-llvm-passes -verify -verify-ignore-unexpected
|
||||
|
||||
|
||||
double test_int_builtin(double p0) {
|
||||
return countbits(p0);
|
||||
// expected-error@-1 {{call to 'countbits' is ambiguous}}
|
||||
}
|
||||
|
||||
double2 test_int_builtin_2(double2 p0) {
|
||||
return __builtin_elementwise_popcount(p0);
|
||||
// expected-error@-1 {{1st argument must be a vector of integers
|
||||
// (was 'double2' (aka 'vector<double, 2>'))}}
|
||||
}
|
||||
|
||||
double test_int_builtin_3(float p0) {
|
||||
return __builtin_elementwise_popcount(p0);
|
||||
// expected-error@-1 {{1st argument must be a vector of integers
|
||||
// (was 'float')}}
|
||||
}
|
@ -553,6 +553,17 @@ def Rbits : DXILOp<30, unary> {
|
||||
let attributes = [Attributes<DXIL1_0, [ReadNone]>];
|
||||
}
|
||||
|
||||
def CBits : DXILOp<31, unary> {
|
||||
let Doc = "Returns the number of 1 bits in the specified value.";
|
||||
let LLVMIntrinsic = int_ctpop;
|
||||
let arguments = [OverloadTy];
|
||||
let result = OverloadTy;
|
||||
let overloads =
|
||||
[Overloads<DXIL1_0, [Int16Ty, Int32Ty, Int64Ty]>];
|
||||
let stages = [Stages<DXIL1_0, [all_stages]>];
|
||||
let attributes = [Attributes<DXIL1_0, [ReadNone]>];
|
||||
}
|
||||
|
||||
def FMax : DXILOp<35, binary> {
|
||||
let Doc = "Float maximum. FMax(a,b) = a > b ? a : b";
|
||||
let LLVMIntrinsic = int_maxnum;
|
||||
|
47
llvm/test/CodeGen/DirectX/countbits.ll
Normal file
47
llvm/test/CodeGen/DirectX/countbits.ll
Normal file
@ -0,0 +1,47 @@
|
||||
; RUN: opt -S -scalarizer -dxil-op-lower -mtriple=dxil-pc-shadermodel6.3-library %s | FileCheck %s
|
||||
|
||||
; Make sure dxil operation function calls for countbits are generated for all integer types.
|
||||
|
||||
define noundef i16 @test_countbits_short(i16 noundef %a) {
|
||||
entry:
|
||||
; CHECK: call i16 @dx.op.unary.i16(i32 31, i16 %{{.*}})
|
||||
%elt.ctpop = call i16 @llvm.ctpop.i16(i16 %a)
|
||||
ret i16 %elt.ctpop
|
||||
}
|
||||
|
||||
define noundef i32 @test_countbits_int(i32 noundef %a) {
|
||||
entry:
|
||||
; CHECK: call i32 @dx.op.unary.i32(i32 31, i32 %{{.*}})
|
||||
%elt.ctpop = call i32 @llvm.ctpop.i32(i32 %a)
|
||||
ret i32 %elt.ctpop
|
||||
}
|
||||
|
||||
define noundef i64 @test_countbits_long(i64 noundef %a) {
|
||||
entry:
|
||||
; CHECK: call i64 @dx.op.unary.i64(i32 31, i64 %{{.*}})
|
||||
%elt.ctpop = call i64 @llvm.ctpop.i64(i64 %a)
|
||||
ret i64 %elt.ctpop
|
||||
}
|
||||
|
||||
define noundef <4 x i32> @countbits_vec4_i32(<4 x i32> noundef %a) {
|
||||
entry:
|
||||
; CHECK: [[ee0:%.*]] = extractelement <4 x i32> %a, i64 0
|
||||
; CHECK: [[ie0:%.*]] = call i32 @dx.op.unary.i32(i32 31, i32 [[ee0]])
|
||||
; CHECK: [[ee1:%.*]] = extractelement <4 x i32> %a, i64 1
|
||||
; CHECK: [[ie1:%.*]] = call i32 @dx.op.unary.i32(i32 31, i32 [[ee1]])
|
||||
; CHECK: [[ee2:%.*]] = extractelement <4 x i32> %a, i64 2
|
||||
; CHECK: [[ie2:%.*]] = call i32 @dx.op.unary.i32(i32 31, i32 [[ee2]])
|
||||
; CHECK: [[ee3:%.*]] = extractelement <4 x i32> %a, i64 3
|
||||
; CHECK: [[ie3:%.*]] = call i32 @dx.op.unary.i32(i32 31, i32 [[ee3]])
|
||||
; CHECK: insertelement <4 x i32> poison, i32 [[ie0]], i64 0
|
||||
; CHECK: insertelement <4 x i32> %{{.*}}, i32 [[ie1]], i64 1
|
||||
; CHECK: insertelement <4 x i32> %{{.*}}, i32 [[ie2]], i64 2
|
||||
; CHECK: insertelement <4 x i32> %{{.*}}, i32 [[ie3]], i64 3
|
||||
%2 = call <4 x i32> @llvm.ctpop.v4i32(<4 x i32> %a)
|
||||
ret <4 x i32> %2
|
||||
}
|
||||
|
||||
declare i16 @llvm.ctpop.i16(i16)
|
||||
declare i32 @llvm.ctpop.i32(i32)
|
||||
declare i64 @llvm.ctpop.i64(i64)
|
||||
declare <4 x i32> @llvm.ctpop.v4i32(<4 x i32>)
|
21
llvm/test/CodeGen/SPIRV/hlsl-intrinsics/countbits.ll
Normal file
21
llvm/test/CodeGen/SPIRV/hlsl-intrinsics/countbits.ll
Normal file
@ -0,0 +1,21 @@
|
||||
; RUN: llc -verify-machineinstrs -O0 -mtriple=spirv-unknown-unknown %s -o - | FileCheck %s
|
||||
; RUN: %if spirv-tools %{ llc -O0 -mtriple=spirv-unknown-unknown %s -o - -filetype=obj | spirv-val %}
|
||||
|
||||
; CHECK: OpMemoryModel Logical GLSL450
|
||||
|
||||
define noundef i32 @countbits_i32(i32 noundef %a) {
|
||||
entry:
|
||||
; CHECK: %[[#]] = OpBitCount %[[#]] %[[#]]
|
||||
%elt.bitreverse = call i32 @llvm.ctpop.i32(i32 %a)
|
||||
ret i32 %elt.bitreverse
|
||||
}
|
||||
|
||||
define noundef i16 @countbits_i16(i16 noundef %a) {
|
||||
entry:
|
||||
; CHECK: %[[#]] = OpBitCount %[[#]] %[[#]]
|
||||
%elt.ctpop = call i16 @llvm.ctpop.i16(i16 %a)
|
||||
ret i16 %elt.ctpop
|
||||
}
|
||||
|
||||
declare i16 @llvm.ctpop.i16(i16)
|
||||
declare i32 @llvm.ctpop.i32(i32)
|
Loading…
x
Reference in New Issue
Block a user