[HLSL] Implement elementwise firstbitlow builtin (#116858)
Closes https://github.com/llvm/llvm-project/issues/99116 Implements `firstbitlow` by extracting common functionality from `firstbithigh` into a shared function while also fixing a bug for an edge case where `u64x3` and larger vectors will attempt to create vectors larger than the SPRIV max of 4. --------- Co-authored-by: Steven Perron <stevenperron@google.com>
This commit is contained in:
parent
4a4a8a1476
commit
4f48abff0f
@ -4855,6 +4855,12 @@ def HLSLFirstBitHigh : LangBuiltin<"HLSL_LANG"> {
|
||||
let Prototype = "void(...)";
|
||||
}
|
||||
|
||||
def HLSLFirstBitLow : LangBuiltin<"HLSL_LANG"> {
|
||||
let Spellings = ["__builtin_hlsl_elementwise_firstbitlow"];
|
||||
let Attributes = [NoThrow, Const];
|
||||
let Prototype = "void(...)";
|
||||
}
|
||||
|
||||
def HLSLFrac : LangBuiltin<"HLSL_LANG"> {
|
||||
let Spellings = ["__builtin_hlsl_elementwise_frac"];
|
||||
let Attributes = [NoThrow, Const];
|
||||
|
@ -19316,7 +19316,6 @@ Value *CodeGenFunction::EmitHLSLBuiltinExpr(unsigned BuiltinID,
|
||||
"hlsl.dot4add.u8packed");
|
||||
}
|
||||
case Builtin::BI__builtin_hlsl_elementwise_firstbithigh: {
|
||||
|
||||
Value *X = EmitScalarExpr(E->getArg(0));
|
||||
|
||||
return Builder.CreateIntrinsic(
|
||||
@ -19324,6 +19323,14 @@ Value *CodeGenFunction::EmitHLSLBuiltinExpr(unsigned BuiltinID,
|
||||
getFirstBitHighIntrinsic(CGM.getHLSLRuntime(), E->getArg(0)->getType()),
|
||||
ArrayRef<Value *>{X}, nullptr, "hlsl.firstbithigh");
|
||||
}
|
||||
case Builtin::BI__builtin_hlsl_elementwise_firstbitlow: {
|
||||
Value *X = EmitScalarExpr(E->getArg(0));
|
||||
|
||||
return Builder.CreateIntrinsic(
|
||||
/*ReturnType=*/ConvertType(E->getType()),
|
||||
CGM.getHLSLRuntime().getFirstBitLowIntrinsic(), ArrayRef<Value *>{X},
|
||||
nullptr, "hlsl.firstbitlow");
|
||||
}
|
||||
case Builtin::BI__builtin_hlsl_lerp: {
|
||||
Value *X = EmitScalarExpr(E->getArg(0));
|
||||
Value *Y = EmitScalarExpr(E->getArg(1));
|
||||
|
@ -99,6 +99,7 @@ public:
|
||||
GENERATE_HLSL_INTRINSIC_FUNCTION(WaveReadLaneAt, wave_readlane)
|
||||
GENERATE_HLSL_INTRINSIC_FUNCTION(FirstBitUHigh, firstbituhigh)
|
||||
GENERATE_HLSL_INTRINSIC_FUNCTION(FirstBitSHigh, firstbitshigh)
|
||||
GENERATE_HLSL_INTRINSIC_FUNCTION(FirstBitLow, firstbitlow)
|
||||
GENERATE_HLSL_INTRINSIC_FUNCTION(NClamp, nclamp)
|
||||
GENERATE_HLSL_INTRINSIC_FUNCTION(SClamp, sclamp)
|
||||
GENERATE_HLSL_INTRINSIC_FUNCTION(UClamp, uclamp)
|
||||
|
@ -1150,6 +1150,78 @@ uint3 firstbithigh(uint64_t3);
|
||||
_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_firstbithigh)
|
||||
uint4 firstbithigh(uint64_t4);
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
// firstbitlow builtins
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
/// \fn T firstbitlow(T Val)
|
||||
/// \brief Returns the location of the first set bit starting from the lowest
|
||||
/// order bit and working upward, per component.
|
||||
/// \param Val the input value.
|
||||
|
||||
#ifdef __HLSL_ENABLE_16_BIT
|
||||
_HLSL_AVAILABILITY(shadermodel, 6.2)
|
||||
_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_firstbitlow)
|
||||
uint firstbitlow(int16_t);
|
||||
_HLSL_AVAILABILITY(shadermodel, 6.2)
|
||||
_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_firstbitlow)
|
||||
uint2 firstbitlow(int16_t2);
|
||||
_HLSL_AVAILABILITY(shadermodel, 6.2)
|
||||
_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_firstbitlow)
|
||||
uint3 firstbitlow(int16_t3);
|
||||
_HLSL_AVAILABILITY(shadermodel, 6.2)
|
||||
_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_firstbitlow)
|
||||
uint4 firstbitlow(int16_t4);
|
||||
_HLSL_AVAILABILITY(shadermodel, 6.2)
|
||||
_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_firstbitlow)
|
||||
uint firstbitlow(uint16_t);
|
||||
_HLSL_AVAILABILITY(shadermodel, 6.2)
|
||||
_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_firstbitlow)
|
||||
uint2 firstbitlow(uint16_t2);
|
||||
_HLSL_AVAILABILITY(shadermodel, 6.2)
|
||||
_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_firstbitlow)
|
||||
uint3 firstbitlow(uint16_t3);
|
||||
_HLSL_AVAILABILITY(shadermodel, 6.2)
|
||||
_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_firstbitlow)
|
||||
uint4 firstbitlow(uint16_t4);
|
||||
#endif
|
||||
|
||||
_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_firstbitlow)
|
||||
uint firstbitlow(int);
|
||||
_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_firstbitlow)
|
||||
uint2 firstbitlow(int2);
|
||||
_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_firstbitlow)
|
||||
uint3 firstbitlow(int3);
|
||||
_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_firstbitlow)
|
||||
uint4 firstbitlow(int4);
|
||||
|
||||
_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_firstbitlow)
|
||||
uint firstbitlow(uint);
|
||||
_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_firstbitlow)
|
||||
uint2 firstbitlow(uint2);
|
||||
_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_firstbitlow)
|
||||
uint3 firstbitlow(uint3);
|
||||
_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_firstbitlow)
|
||||
uint4 firstbitlow(uint4);
|
||||
|
||||
_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_firstbitlow)
|
||||
uint firstbitlow(int64_t);
|
||||
_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_firstbitlow)
|
||||
uint2 firstbitlow(int64_t2);
|
||||
_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_firstbitlow)
|
||||
uint3 firstbitlow(int64_t3);
|
||||
_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_firstbitlow)
|
||||
uint4 firstbitlow(int64_t4);
|
||||
|
||||
_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_firstbitlow)
|
||||
uint firstbitlow(uint64_t);
|
||||
_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_firstbitlow)
|
||||
uint2 firstbitlow(uint64_t2);
|
||||
_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_firstbitlow)
|
||||
uint3 firstbitlow(uint64_t3);
|
||||
_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_firstbitlow)
|
||||
uint4 firstbitlow(uint64_t4);
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
// floor builtins
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
@ -2036,7 +2036,8 @@ bool SemaHLSL::CheckBuiltinFunctionCall(unsigned BuiltinID, CallExpr *TheCall) {
|
||||
return true;
|
||||
break;
|
||||
}
|
||||
case Builtin::BI__builtin_hlsl_elementwise_firstbithigh: {
|
||||
case Builtin::BI__builtin_hlsl_elementwise_firstbithigh:
|
||||
case Builtin::BI__builtin_hlsl_elementwise_firstbitlow: {
|
||||
if (SemaRef.PrepareBuiltinElementwiseMathOneArgCall(TheCall))
|
||||
return true;
|
||||
|
||||
|
153
clang/test/CodeGenHLSL/builtins/firstbitlow.hlsl
Normal file
153
clang/test/CodeGenHLSL/builtins/firstbitlow.hlsl
Normal file
@ -0,0 +1,153 @@
|
||||
// RUN: %clang_cc1 -finclude-default-header -x hlsl -triple \
|
||||
// RUN: dxil-pc-shadermodel6.3-library %s -fnative-half-type \
|
||||
// RUN: -emit-llvm -disable-llvm-passes -o - | FileCheck %s -DTARGET=dx
|
||||
// RUN: %clang_cc1 -finclude-default-header -x hlsl -triple \
|
||||
// RUN: spirv-unknown-vulkan-compute %s -fnative-half-type \
|
||||
// RUN: -emit-llvm -disable-llvm-passes \
|
||||
// RUN: -o - | FileCheck %s -DTARGET=spv
|
||||
|
||||
#ifdef __HLSL_ENABLE_16_BIT
|
||||
// CHECK-LABEL: test_firstbitlow_ushort
|
||||
// CHECK: call i32 @llvm.[[TARGET]].firstbitlow.i16
|
||||
uint test_firstbitlow_ushort(uint16_t p0) {
|
||||
return firstbitlow(p0);
|
||||
}
|
||||
|
||||
// CHECK-LABEL: test_firstbitlow_ushort2
|
||||
// CHECK: call <2 x i32> @llvm.[[TARGET]].firstbitlow.v2i16
|
||||
uint2 test_firstbitlow_ushort2(uint16_t2 p0) {
|
||||
return firstbitlow(p0);
|
||||
}
|
||||
|
||||
// CHECK-LABEL: test_firstbitlow_ushort3
|
||||
// CHECK: call <3 x i32> @llvm.[[TARGET]].firstbitlow.v3i16
|
||||
uint3 test_firstbitlow_ushort3(uint16_t3 p0) {
|
||||
return firstbitlow(p0);
|
||||
}
|
||||
|
||||
// CHECK-LABEL: test_firstbitlow_ushort4
|
||||
// CHECK: call <4 x i32> @llvm.[[TARGET]].firstbitlow.v4i16
|
||||
uint4 test_firstbitlow_ushort4(uint16_t4 p0) {
|
||||
return firstbitlow(p0);
|
||||
}
|
||||
|
||||
// CHECK-LABEL: test_firstbitlow_short
|
||||
// CHECK: call i32 @llvm.[[TARGET]].firstbitlow.i16
|
||||
uint test_firstbitlow_short(int16_t p0) {
|
||||
return firstbitlow(p0);
|
||||
}
|
||||
|
||||
// CHECK-LABEL: test_firstbitlow_short2
|
||||
// CHECK: call <2 x i32> @llvm.[[TARGET]].firstbitlow.v2i16
|
||||
uint2 test_firstbitlow_short2(int16_t2 p0) {
|
||||
return firstbitlow(p0);
|
||||
}
|
||||
|
||||
// CHECK-LABEL: test_firstbitlow_short3
|
||||
// CHECK: call <3 x i32> @llvm.[[TARGET]].firstbitlow.v3i16
|
||||
uint3 test_firstbitlow_short3(int16_t3 p0) {
|
||||
return firstbitlow(p0);
|
||||
}
|
||||
|
||||
// CHECK-LABEL: test_firstbitlow_short4
|
||||
// CHECK: call <4 x i32> @llvm.[[TARGET]].firstbitlow.v4i16
|
||||
uint4 test_firstbitlow_short4(int16_t4 p0) {
|
||||
return firstbitlow(p0);
|
||||
}
|
||||
#endif // __HLSL_ENABLE_16_BIT
|
||||
|
||||
// CHECK-LABEL: test_firstbitlow_uint
|
||||
// CHECK: call i32 @llvm.[[TARGET]].firstbitlow.i32
|
||||
uint test_firstbitlow_uint(uint p0) {
|
||||
return firstbitlow(p0);
|
||||
}
|
||||
|
||||
// CHECK-LABEL: test_firstbitlow_uint2
|
||||
// CHECK: call <2 x i32> @llvm.[[TARGET]].firstbitlow.v2i32
|
||||
uint2 test_firstbitlow_uint2(uint2 p0) {
|
||||
return firstbitlow(p0);
|
||||
}
|
||||
|
||||
// CHECK-LABEL: test_firstbitlow_uint3
|
||||
// CHECK: call <3 x i32> @llvm.[[TARGET]].firstbitlow.v3i32
|
||||
uint3 test_firstbitlow_uint3(uint3 p0) {
|
||||
return firstbitlow(p0);
|
||||
}
|
||||
|
||||
// CHECK-LABEL: test_firstbitlow_uint4
|
||||
// CHECK: call <4 x i32> @llvm.[[TARGET]].firstbitlow.v4i32
|
||||
uint4 test_firstbitlow_uint4(uint4 p0) {
|
||||
return firstbitlow(p0);
|
||||
}
|
||||
|
||||
// CHECK-LABEL: test_firstbitlow_ulong
|
||||
// CHECK: call i32 @llvm.[[TARGET]].firstbitlow.i64
|
||||
uint test_firstbitlow_ulong(uint64_t p0) {
|
||||
return firstbitlow(p0);
|
||||
}
|
||||
|
||||
// CHECK-LABEL: test_firstbitlow_ulong2
|
||||
// CHECK: call <2 x i32> @llvm.[[TARGET]].firstbitlow.v2i64
|
||||
uint2 test_firstbitlow_ulong2(uint64_t2 p0) {
|
||||
return firstbitlow(p0);
|
||||
}
|
||||
|
||||
// CHECK-LABEL: test_firstbitlow_ulong3
|
||||
// CHECK: call <3 x i32> @llvm.[[TARGET]].firstbitlow.v3i64
|
||||
uint3 test_firstbitlow_ulong3(uint64_t3 p0) {
|
||||
return firstbitlow(p0);
|
||||
}
|
||||
|
||||
// CHECK-LABEL: test_firstbitlow_ulong4
|
||||
// CHECK: call <4 x i32> @llvm.[[TARGET]].firstbitlow.v4i64
|
||||
uint4 test_firstbitlow_ulong4(uint64_t4 p0) {
|
||||
return firstbitlow(p0);
|
||||
}
|
||||
|
||||
// CHECK-LABEL: test_firstbitlow_int
|
||||
// CHECK: call i32 @llvm.[[TARGET]].firstbitlow.i32
|
||||
uint test_firstbitlow_int(int p0) {
|
||||
return firstbitlow(p0);
|
||||
}
|
||||
|
||||
// CHECK-LABEL: test_firstbitlow_int2
|
||||
// CHECK: call <2 x i32> @llvm.[[TARGET]].firstbitlow.v2i32
|
||||
uint2 test_firstbitlow_int2(int2 p0) {
|
||||
return firstbitlow(p0);
|
||||
}
|
||||
|
||||
// CHECK-LABEL: test_firstbitlow_int3
|
||||
// CHECK: call <3 x i32> @llvm.[[TARGET]].firstbitlow.v3i32
|
||||
uint3 test_firstbitlow_int3(int3 p0) {
|
||||
return firstbitlow(p0);
|
||||
}
|
||||
|
||||
// CHECK-LABEL: test_firstbitlow_int4
|
||||
// CHECK: call <4 x i32> @llvm.[[TARGET]].firstbitlow.v4i32
|
||||
uint4 test_firstbitlow_int4(int4 p0) {
|
||||
return firstbitlow(p0);
|
||||
}
|
||||
|
||||
// CHECK-LABEL: test_firstbitlow_long
|
||||
// CHECK: call i32 @llvm.[[TARGET]].firstbitlow.i64
|
||||
uint test_firstbitlow_long(int64_t p0) {
|
||||
return firstbitlow(p0);
|
||||
}
|
||||
|
||||
// CHECK-LABEL: test_firstbitlow_long2
|
||||
// CHECK: call <2 x i32> @llvm.[[TARGET]].firstbitlow.v2i64
|
||||
uint2 test_firstbitlow_long2(int64_t2 p0) {
|
||||
return firstbitlow(p0);
|
||||
}
|
||||
|
||||
// CHECK-LABEL: test_firstbitlow_long3
|
||||
// CHECK: call <3 x i32> @llvm.[[TARGET]].firstbitlow.v3i64
|
||||
uint3 test_firstbitlow_long3(int64_t3 p0) {
|
||||
return firstbitlow(p0);
|
||||
}
|
||||
|
||||
// CHECK-LABEL: test_firstbitlow_long4
|
||||
// CHECK: call <4 x i32> @llvm.[[TARGET]].firstbitlow.v4i64
|
||||
uint4 test_firstbitlow_long4(int64_t4 p0) {
|
||||
return firstbitlow(p0);
|
||||
}
|
@ -17,12 +17,10 @@ double test_int_builtin(double p0) {
|
||||
|
||||
double2 test_int_builtin_2(double2 p0) {
|
||||
return __builtin_hlsl_elementwise_firstbithigh(p0);
|
||||
// expected-error@-1 {{1st argument must be a vector of integers
|
||||
// (was 'double2' (aka 'vector<double, 2>'))}}
|
||||
// expected-error@-1 {{1st argument must be a vector of integers (was 'double2' (aka 'vector<double, 2>'))}}
|
||||
}
|
||||
|
||||
float test_int_builtin_3(float p0) {
|
||||
return __builtin_hlsl_elementwise_firstbithigh(p0);
|
||||
// expected-error@-1 {{1st argument must be a vector of integers
|
||||
// (was 'float')}}
|
||||
// expected-error@-1 {{1st argument must be a vector of integers (was 'double')}}
|
||||
}
|
||||
|
26
clang/test/SemaHLSL/BuiltIns/firstbitlow-errors.hlsl
Normal file
26
clang/test/SemaHLSL/BuiltIns/firstbitlow-errors.hlsl
Normal file
@ -0,0 +1,26 @@
|
||||
// RUN: %clang_cc1 -finclude-default-header -triple dxil-pc-shadermodel6.6-library %s -fnative-half-type -emit-llvm-only -disable-llvm-passes -verify -verify-ignore-unexpected
|
||||
|
||||
int test_too_few_arg() {
|
||||
return firstbitlow();
|
||||
// expected-error@-1 {{no matching function for call to 'firstbitlow'}}
|
||||
}
|
||||
|
||||
int test_too_many_arg(int p0) {
|
||||
return firstbitlow(p0, p0);
|
||||
// expected-error@-1 {{no matching function for call to 'firstbitlow'}}
|
||||
}
|
||||
|
||||
double test_int_builtin(double p0) {
|
||||
return firstbitlow(p0);
|
||||
// expected-error@-1 {{call to 'firstbitlow' is ambiguous}}
|
||||
}
|
||||
|
||||
double2 test_int_builtin_2(double2 p0) {
|
||||
return __builtin_hlsl_elementwise_firstbitlow(p0);
|
||||
// expected-error@-1 {{1st argument must be a vector of integers (was 'double2' (aka 'vector<double, 2>'))}}
|
||||
}
|
||||
|
||||
float test_int_builtin_3(float p0) {
|
||||
return __builtin_hlsl_elementwise_firstbitlow(p0);
|
||||
// expected-error@-1 {{1st argument must be a vector of integers (was 'double')}}
|
||||
}
|
@ -115,6 +115,7 @@ def int_dx_radians : DefaultAttrsIntrinsic<[llvm_anyfloat_ty], [LLVMMatchType<0>
|
||||
def int_dx_discard : DefaultAttrsIntrinsic<[], [llvm_i1_ty], []>;
|
||||
def int_dx_firstbituhigh : DefaultAttrsIntrinsic<[LLVMScalarOrSameVectorWidth<0, llvm_i32_ty>], [llvm_anyint_ty], [IntrNoMem]>;
|
||||
def int_dx_firstbitshigh : DefaultAttrsIntrinsic<[LLVMScalarOrSameVectorWidth<0, llvm_i32_ty>], [llvm_anyint_ty], [IntrNoMem]>;
|
||||
def int_dx_firstbitlow : DefaultAttrsIntrinsic<[LLVMScalarOrSameVectorWidth<0, llvm_i32_ty>], [llvm_anyint_ty], [IntrNoMem]>;
|
||||
|
||||
def int_dx_group_memory_barrier_with_group_sync : DefaultAttrsIntrinsic<[], [], []>;
|
||||
}
|
||||
|
@ -113,6 +113,7 @@ let TargetPrefix = "spv" in {
|
||||
|
||||
def int_spv_firstbituhigh : DefaultAttrsIntrinsic<[LLVMScalarOrSameVectorWidth<0, llvm_i32_ty>], [llvm_anyint_ty], [IntrNoMem]>;
|
||||
def int_spv_firstbitshigh : DefaultAttrsIntrinsic<[LLVMScalarOrSameVectorWidth<0, llvm_i32_ty>], [llvm_anyint_ty], [IntrNoMem]>;
|
||||
def int_spv_firstbitlow : DefaultAttrsIntrinsic<[LLVMScalarOrSameVectorWidth<0, llvm_i32_ty>], [llvm_anyint_ty], [IntrNoMem]>;
|
||||
|
||||
def int_spv_resource_updatecounter
|
||||
: DefaultAttrsIntrinsic<[llvm_i32_ty], [llvm_any_ty, llvm_i8_ty],
|
||||
|
@ -620,6 +620,18 @@ def CountBits : DXILOp<31, unaryBits> {
|
||||
let attributes = [Attributes<DXIL1_0, [ReadNone]>];
|
||||
}
|
||||
|
||||
def FirstbitLo : DXILOp<32, unaryBits> {
|
||||
let Doc = "Returns the location of the first set bit starting from "
|
||||
"the lowest order bit and working upward.";
|
||||
let intrinsics = [ IntrinSelect<int_dx_firstbitlow> ];
|
||||
let arguments = [OverloadTy];
|
||||
let result = Int32Ty;
|
||||
let overloads =
|
||||
[Overloads<DXIL1_0, [Int16Ty, Int32Ty, Int64Ty]>];
|
||||
let stages = [Stages<DXIL1_0, [all_stages]>];
|
||||
let attributes = [Attributes<DXIL1_0, [ReadNone]>];
|
||||
}
|
||||
|
||||
def FirstbitHi : DXILOp<33, unaryBits> {
|
||||
let Doc = "Returns the location of the first set bit starting from "
|
||||
"the highest order bit and working downward.";
|
||||
|
@ -45,6 +45,7 @@ bool DirectXTTIImpl::isTargetIntrinsicTriviallyScalarizable(
|
||||
case Intrinsic::dx_splitdouble:
|
||||
case Intrinsic::dx_firstbituhigh:
|
||||
case Intrinsic::dx_firstbitshigh:
|
||||
case Intrinsic::dx_firstbitlow:
|
||||
return true;
|
||||
default:
|
||||
return false;
|
||||
|
@ -109,15 +109,25 @@ private:
|
||||
bool selectFirstBitHigh(Register ResVReg, const SPIRVType *ResType,
|
||||
MachineInstr &I, bool IsSigned) const;
|
||||
|
||||
bool selectFirstBitHigh16(Register ResVReg, const SPIRVType *ResType,
|
||||
MachineInstr &I, bool IsSigned) const;
|
||||
bool selectFirstBitLow(Register ResVReg, const SPIRVType *ResType,
|
||||
MachineInstr &I) const;
|
||||
|
||||
bool selectFirstBitHigh32(Register ResVReg, const SPIRVType *ResType,
|
||||
MachineInstr &I, Register SrcReg,
|
||||
bool IsSigned) const;
|
||||
bool selectFirstBitSet16(Register ResVReg, const SPIRVType *ResType,
|
||||
MachineInstr &I, unsigned ExtendOpcode,
|
||||
unsigned BitSetOpcode) const;
|
||||
|
||||
bool selectFirstBitHigh64(Register ResVReg, const SPIRVType *ResType,
|
||||
MachineInstr &I, bool IsSigned) const;
|
||||
bool selectFirstBitSet32(Register ResVReg, const SPIRVType *ResType,
|
||||
MachineInstr &I, Register SrcReg,
|
||||
unsigned BitSetOpcode) const;
|
||||
|
||||
bool selectFirstBitSet64(Register ResVReg, const SPIRVType *ResType,
|
||||
MachineInstr &I, Register SrcReg,
|
||||
unsigned BitSetOpcode, bool SwapPrimarySide) const;
|
||||
|
||||
bool selectFirstBitSet64Overflow(Register ResVReg, const SPIRVType *ResType,
|
||||
MachineInstr &I, Register SrcReg,
|
||||
unsigned BitSetOpcode,
|
||||
bool SwapPrimarySide) const;
|
||||
|
||||
bool selectGlobalValue(Register ResVReg, MachineInstr &I,
|
||||
const MachineInstr *Init = nullptr) const;
|
||||
@ -2952,6 +2962,8 @@ bool SPIRVInstructionSelector::selectIntrinsic(Register ResVReg,
|
||||
return selectFirstBitHigh(ResVReg, ResType, I, /*IsSigned=*/false);
|
||||
case Intrinsic::spv_firstbitshigh: // There is no CL equivalent of FindSMsb
|
||||
return selectFirstBitHigh(ResVReg, ResType, I, /*IsSigned=*/true);
|
||||
case Intrinsic::spv_firstbitlow: // There is no CL equivlent of FindILsb
|
||||
return selectFirstBitLow(ResVReg, ResType, I);
|
||||
case Intrinsic::spv_group_memory_barrier_with_group_sync: {
|
||||
bool Result = true;
|
||||
auto MemSemConstant =
|
||||
@ -3208,136 +3220,249 @@ Register SPIRVInstructionSelector::buildPointerToResource(
|
||||
return AcReg;
|
||||
}
|
||||
|
||||
bool SPIRVInstructionSelector::selectFirstBitHigh16(Register ResVReg,
|
||||
const SPIRVType *ResType,
|
||||
MachineInstr &I,
|
||||
bool IsSigned) const {
|
||||
unsigned Opcode = IsSigned ? SPIRV::OpSConvert : SPIRV::OpUConvert;
|
||||
// zero or sign extend
|
||||
bool SPIRVInstructionSelector::selectFirstBitSet16(
|
||||
Register ResVReg, const SPIRVType *ResType, MachineInstr &I,
|
||||
unsigned ExtendOpcode, unsigned BitSetOpcode) const {
|
||||
Register ExtReg = MRI->createVirtualRegister(GR.getRegClass(ResType));
|
||||
bool Result =
|
||||
selectOpWithSrcs(ExtReg, ResType, I, {I.getOperand(2).getReg()}, Opcode);
|
||||
return Result && selectFirstBitHigh32(ResVReg, ResType, I, ExtReg, IsSigned);
|
||||
bool Result = selectOpWithSrcs(ExtReg, ResType, I, {I.getOperand(2).getReg()},
|
||||
ExtendOpcode);
|
||||
|
||||
return Result &&
|
||||
selectFirstBitSet32(ResVReg, ResType, I, ExtReg, BitSetOpcode);
|
||||
}
|
||||
|
||||
bool SPIRVInstructionSelector::selectFirstBitHigh32(Register ResVReg,
|
||||
const SPIRVType *ResType,
|
||||
MachineInstr &I,
|
||||
Register SrcReg,
|
||||
bool IsSigned) const {
|
||||
unsigned Opcode = IsSigned ? GL::FindSMsb : GL::FindUMsb;
|
||||
bool SPIRVInstructionSelector::selectFirstBitSet32(
|
||||
Register ResVReg, const SPIRVType *ResType, MachineInstr &I,
|
||||
Register SrcReg, unsigned BitSetOpcode) const {
|
||||
return BuildMI(*I.getParent(), I, I.getDebugLoc(), TII.get(SPIRV::OpExtInst))
|
||||
.addDef(ResVReg)
|
||||
.addUse(GR.getSPIRVTypeID(ResType))
|
||||
.addImm(static_cast<uint32_t>(SPIRV::InstructionSet::GLSL_std_450))
|
||||
.addImm(Opcode)
|
||||
.addImm(BitSetOpcode)
|
||||
.addUse(SrcReg)
|
||||
.constrainAllUses(TII, TRI, RBI);
|
||||
}
|
||||
|
||||
bool SPIRVInstructionSelector::selectFirstBitHigh64(Register ResVReg,
|
||||
const SPIRVType *ResType,
|
||||
MachineInstr &I,
|
||||
bool IsSigned) const {
|
||||
Register OpReg = I.getOperand(2).getReg();
|
||||
// 1. split our int64 into 2 pieces using a bitcast
|
||||
unsigned count = GR.getScalarOrVectorComponentCount(ResType);
|
||||
SPIRVType *baseType = GR.retrieveScalarOrVectorIntType(ResType);
|
||||
bool SPIRVInstructionSelector::selectFirstBitSet64Overflow(
|
||||
Register ResVReg, const SPIRVType *ResType, MachineInstr &I,
|
||||
Register SrcReg, unsigned BitSetOpcode, bool SwapPrimarySide) const {
|
||||
|
||||
// SPIR-V allow vectors of size 2,3,4 only. Calling with a larger vectors
|
||||
// requires creating a param register and return register with an invalid
|
||||
// vector size. If that is resolved, then this function can be used for
|
||||
// vectors of any component size.
|
||||
unsigned ComponentCount = GR.getScalarOrVectorComponentCount(ResType);
|
||||
assert(ComponentCount < 5 && "Vec 5+ will generate invalid SPIR-V ops");
|
||||
|
||||
MachineIRBuilder MIRBuilder(I);
|
||||
SPIRVType *postCastT =
|
||||
GR.getOrCreateSPIRVVectorType(baseType, 2 * count, MIRBuilder);
|
||||
Register bitcastReg = MRI->createVirtualRegister(GR.getRegClass(postCastT));
|
||||
bool Result =
|
||||
selectOpWithSrcs(bitcastReg, postCastT, I, {OpReg}, SPIRV::OpBitcast);
|
||||
SPIRVType *BaseType = GR.retrieveScalarOrVectorIntType(ResType);
|
||||
SPIRVType *I64Type = GR.getOrCreateSPIRVIntegerType(64, MIRBuilder);
|
||||
SPIRVType *I64x2Type = GR.getOrCreateSPIRVVectorType(I64Type, 2, MIRBuilder);
|
||||
SPIRVType *Vec2ResType =
|
||||
GR.getOrCreateSPIRVVectorType(BaseType, 2, MIRBuilder);
|
||||
|
||||
// 2. call firstbithigh
|
||||
Register FBHReg = MRI->createVirtualRegister(GR.getRegClass(postCastT));
|
||||
Result &= selectFirstBitHigh32(FBHReg, postCastT, I, bitcastReg, IsSigned);
|
||||
std::vector<Register> PartialRegs;
|
||||
|
||||
// 3. split result vector into high bits and low bits
|
||||
// Loops 0, 2, 4, ... but stops one loop early when ComponentCount is odd
|
||||
unsigned CurrentComponent = 0;
|
||||
for (; CurrentComponent + 1 < ComponentCount; CurrentComponent += 2) {
|
||||
// This register holds the firstbitX result for each of the i64x2 vectors
|
||||
// extracted from SrcReg
|
||||
Register BitSetResult =
|
||||
MRI->createVirtualRegister(GR.getRegClass(I64x2Type));
|
||||
|
||||
auto MIB = BuildMI(*I.getParent(), I, I.getDebugLoc(),
|
||||
TII.get(SPIRV::OpVectorShuffle))
|
||||
.addDef(BitSetResult)
|
||||
.addUse(GR.getSPIRVTypeID(I64x2Type))
|
||||
.addUse(SrcReg)
|
||||
.addUse(SrcReg)
|
||||
.addImm(CurrentComponent)
|
||||
.addImm(CurrentComponent + 1);
|
||||
|
||||
if (!MIB.constrainAllUses(TII, TRI, RBI))
|
||||
return false;
|
||||
|
||||
Register SubVecBitSetReg =
|
||||
MRI->createVirtualRegister(GR.getRegClass(Vec2ResType));
|
||||
|
||||
if (!selectFirstBitSet64(SubVecBitSetReg, Vec2ResType, I, BitSetResult,
|
||||
BitSetOpcode, SwapPrimarySide))
|
||||
return false;
|
||||
|
||||
PartialRegs.push_back(SubVecBitSetReg);
|
||||
}
|
||||
|
||||
// On odd component counts we need to handle one more component
|
||||
if (CurrentComponent != ComponentCount) {
|
||||
bool ZeroAsNull = STI.isOpenCLEnv();
|
||||
Register FinalElemReg = MRI->createVirtualRegister(GR.getRegClass(I64Type));
|
||||
Register ConstIntLastIdx = GR.getOrCreateConstInt(
|
||||
ComponentCount - 1, I, BaseType, TII, ZeroAsNull);
|
||||
|
||||
if (!selectOpWithSrcs(FinalElemReg, I64Type, I, {SrcReg, ConstIntLastIdx},
|
||||
SPIRV::OpVectorExtractDynamic))
|
||||
return false;
|
||||
|
||||
Register FinalElemBitSetReg =
|
||||
MRI->createVirtualRegister(GR.getRegClass(BaseType));
|
||||
|
||||
if (!selectFirstBitSet64(FinalElemBitSetReg, BaseType, I, FinalElemReg,
|
||||
BitSetOpcode, SwapPrimarySide))
|
||||
return false;
|
||||
|
||||
PartialRegs.push_back(FinalElemBitSetReg);
|
||||
}
|
||||
|
||||
// Join all the resulting registers back into the return type in order
|
||||
// (ie i32x2, i32x2, i32x1 -> i32x5)
|
||||
return selectOpWithSrcs(ResVReg, ResType, I, PartialRegs,
|
||||
SPIRV::OpCompositeConstruct);
|
||||
}
|
||||
|
||||
bool SPIRVInstructionSelector::selectFirstBitSet64(
|
||||
Register ResVReg, const SPIRVType *ResType, MachineInstr &I,
|
||||
Register SrcReg, unsigned BitSetOpcode, bool SwapPrimarySide) const {
|
||||
unsigned ComponentCount = GR.getScalarOrVectorComponentCount(ResType);
|
||||
SPIRVType *BaseType = GR.retrieveScalarOrVectorIntType(ResType);
|
||||
bool ZeroAsNull = STI.isOpenCLEnv();
|
||||
Register ConstIntZero =
|
||||
GR.getOrCreateConstInt(0, I, BaseType, TII, ZeroAsNull);
|
||||
Register ConstIntOne =
|
||||
GR.getOrCreateConstInt(1, I, BaseType, TII, ZeroAsNull);
|
||||
|
||||
// SPIRV doesn't support vectors with more than 4 components. Since the
|
||||
// algoritm below converts i64 -> i32x2 and i64x4 -> i32x8 it can only
|
||||
// operate on vectors with 2 or less components. When largers vectors are
|
||||
// seen. Split them, recurse, then recombine them.
|
||||
if (ComponentCount > 2) {
|
||||
return selectFirstBitSet64Overflow(ResVReg, ResType, I, SrcReg,
|
||||
BitSetOpcode, SwapPrimarySide);
|
||||
}
|
||||
|
||||
// 1. Split int64 into 2 pieces using a bitcast
|
||||
MachineIRBuilder MIRBuilder(I);
|
||||
SPIRVType *PostCastType =
|
||||
GR.getOrCreateSPIRVVectorType(BaseType, 2 * ComponentCount, MIRBuilder);
|
||||
Register BitcastReg =
|
||||
MRI->createVirtualRegister(GR.getRegClass(PostCastType));
|
||||
|
||||
if (!selectOpWithSrcs(BitcastReg, PostCastType, I, {SrcReg},
|
||||
SPIRV::OpBitcast))
|
||||
return false;
|
||||
|
||||
// 2. Find the first set bit from the primary side for all the pieces in #1
|
||||
Register FBSReg = MRI->createVirtualRegister(GR.getRegClass(PostCastType));
|
||||
if (!selectFirstBitSet32(FBSReg, PostCastType, I, BitcastReg, BitSetOpcode))
|
||||
return false;
|
||||
|
||||
// 3. Split result vector into high bits and low bits
|
||||
Register HighReg = MRI->createVirtualRegister(GR.getRegClass(ResType));
|
||||
Register LowReg = MRI->createVirtualRegister(GR.getRegClass(ResType));
|
||||
|
||||
bool ZeroAsNull = STI.isOpenCLEnv();
|
||||
bool isScalarRes = ResType->getOpcode() != SPIRV::OpTypeVector;
|
||||
if (isScalarRes) {
|
||||
bool IsScalarRes = ResType->getOpcode() != SPIRV::OpTypeVector;
|
||||
if (IsScalarRes) {
|
||||
// if scalar do a vector extract
|
||||
Result &= selectOpWithSrcs(
|
||||
HighReg, ResType, I,
|
||||
{FBHReg, GR.getOrCreateConstInt(0, I, ResType, TII, ZeroAsNull)},
|
||||
SPIRV::OpVectorExtractDynamic);
|
||||
Result &= selectOpWithSrcs(
|
||||
LowReg, ResType, I,
|
||||
{FBHReg, GR.getOrCreateConstInt(1, I, ResType, TII, ZeroAsNull)},
|
||||
SPIRV::OpVectorExtractDynamic);
|
||||
} else { // vector case do a shufflevector
|
||||
if (!selectOpWithSrcs(HighReg, ResType, I, {FBSReg, ConstIntZero},
|
||||
SPIRV::OpVectorExtractDynamic))
|
||||
return false;
|
||||
if (!selectOpWithSrcs(LowReg, ResType, I, {FBSReg, ConstIntOne},
|
||||
SPIRV::OpVectorExtractDynamic))
|
||||
return false;
|
||||
} else {
|
||||
// if vector do a shufflevector
|
||||
auto MIB = BuildMI(*I.getParent(), I, I.getDebugLoc(),
|
||||
TII.get(SPIRV::OpVectorShuffle))
|
||||
.addDef(HighReg)
|
||||
.addUse(GR.getSPIRVTypeID(ResType))
|
||||
.addUse(FBHReg)
|
||||
.addUse(FBHReg);
|
||||
// ^^ this vector will not be selected from; could be empty
|
||||
unsigned j;
|
||||
for (j = 0; j < count * 2; j += 2) {
|
||||
MIB.addImm(j);
|
||||
}
|
||||
Result &= MIB.constrainAllUses(TII, TRI, RBI);
|
||||
.addUse(FBSReg)
|
||||
// Per the spec, repeat the vector if only one vec is needed
|
||||
.addUse(FBSReg);
|
||||
|
||||
// high bits are stored in even indexes. Extract them from FBSReg
|
||||
for (unsigned J = 0; J < ComponentCount * 2; J += 2) {
|
||||
MIB.addImm(J);
|
||||
}
|
||||
|
||||
if (!MIB.constrainAllUses(TII, TRI, RBI))
|
||||
return false;
|
||||
|
||||
// get low bits
|
||||
MIB = BuildMI(*I.getParent(), I, I.getDebugLoc(),
|
||||
TII.get(SPIRV::OpVectorShuffle))
|
||||
.addDef(LowReg)
|
||||
.addUse(GR.getSPIRVTypeID(ResType))
|
||||
.addUse(FBHReg)
|
||||
.addUse(FBHReg);
|
||||
// ^^ this vector will not be selected from; could be empty
|
||||
for (j = 1; j < count * 2; j += 2) {
|
||||
MIB.addImm(j);
|
||||
.addUse(FBSReg)
|
||||
// Per the spec, repeat the vector if only one vec is needed
|
||||
.addUse(FBSReg);
|
||||
|
||||
// low bits are stored in odd indexes. Extract them from FBSReg
|
||||
for (unsigned J = 1; J < ComponentCount * 2; J += 2) {
|
||||
MIB.addImm(J);
|
||||
}
|
||||
Result &= MIB.constrainAllUses(TII, TRI, RBI);
|
||||
if (!MIB.constrainAllUses(TII, TRI, RBI))
|
||||
return false;
|
||||
}
|
||||
|
||||
// 4. check if result of each top 32 bits is == -1
|
||||
// 4. Check the result. When primary bits == -1 use secondary, otherwise use
|
||||
// primary
|
||||
SPIRVType *BoolType = GR.getOrCreateSPIRVBoolType(I, TII);
|
||||
Register NegOneReg;
|
||||
Register Reg0;
|
||||
Register Reg32;
|
||||
unsigned selectOp;
|
||||
unsigned addOp;
|
||||
if (isScalarRes) {
|
||||
unsigned SelectOp;
|
||||
unsigned AddOp;
|
||||
|
||||
if (IsScalarRes) {
|
||||
NegOneReg =
|
||||
GR.getOrCreateConstInt((unsigned)-1, I, ResType, TII, ZeroAsNull);
|
||||
Reg0 = GR.getOrCreateConstInt(0, I, ResType, TII, ZeroAsNull);
|
||||
Reg32 = GR.getOrCreateConstInt(32, I, ResType, TII, ZeroAsNull);
|
||||
selectOp = SPIRV::OpSelectSISCond;
|
||||
addOp = SPIRV::OpIAddS;
|
||||
SelectOp = SPIRV::OpSelectSISCond;
|
||||
AddOp = SPIRV::OpIAddS;
|
||||
} else {
|
||||
BoolType = GR.getOrCreateSPIRVVectorType(BoolType, count, MIRBuilder);
|
||||
BoolType =
|
||||
GR.getOrCreateSPIRVVectorType(BoolType, ComponentCount, MIRBuilder);
|
||||
NegOneReg =
|
||||
GR.getOrCreateConstVector((unsigned)-1, I, ResType, TII, ZeroAsNull);
|
||||
Reg0 = GR.getOrCreateConstVector(0, I, ResType, TII, ZeroAsNull);
|
||||
Reg32 = GR.getOrCreateConstVector(32, I, ResType, TII, ZeroAsNull);
|
||||
selectOp = SPIRV::OpSelectVIVCond;
|
||||
addOp = SPIRV::OpIAddV;
|
||||
SelectOp = SPIRV::OpSelectVIVCond;
|
||||
AddOp = SPIRV::OpIAddV;
|
||||
}
|
||||
|
||||
// check if the high bits are == -1; true if -1
|
||||
Register PrimaryReg = HighReg;
|
||||
Register SecondaryReg = LowReg;
|
||||
Register PrimaryShiftReg = Reg32;
|
||||
Register SecondaryShiftReg = Reg0;
|
||||
|
||||
// By default the emitted opcodes check for the set bit from the MSB side.
|
||||
// Setting SwapPrimarySide checks the set bit from the LSB side
|
||||
if (SwapPrimarySide) {
|
||||
PrimaryReg = LowReg;
|
||||
SecondaryReg = HighReg;
|
||||
PrimaryShiftReg = Reg0;
|
||||
SecondaryShiftReg = Reg32;
|
||||
}
|
||||
|
||||
// Check if the primary bits are == -1
|
||||
Register BReg = MRI->createVirtualRegister(GR.getRegClass(BoolType));
|
||||
Result &= selectOpWithSrcs(BReg, BoolType, I, {HighReg, NegOneReg},
|
||||
SPIRV::OpIEqual);
|
||||
if (!selectOpWithSrcs(BReg, BoolType, I, {PrimaryReg, NegOneReg},
|
||||
SPIRV::OpIEqual))
|
||||
return false;
|
||||
|
||||
// Select low bits if true in BReg, otherwise high bits
|
||||
// Select secondary bits if true in BReg, otherwise primary bits
|
||||
Register TmpReg = MRI->createVirtualRegister(GR.getRegClass(ResType));
|
||||
Result &=
|
||||
selectOpWithSrcs(TmpReg, ResType, I, {BReg, LowReg, HighReg}, selectOp);
|
||||
if (!selectOpWithSrcs(TmpReg, ResType, I, {BReg, SecondaryReg, PrimaryReg},
|
||||
SelectOp))
|
||||
return false;
|
||||
|
||||
// Add 32 for high bits, 0 for low bits
|
||||
// 5. Add 32 when high bits are used, otherwise 0 for low bits
|
||||
Register ValReg = MRI->createVirtualRegister(GR.getRegClass(ResType));
|
||||
Result &= selectOpWithSrcs(ValReg, ResType, I, {BReg, Reg0, Reg32}, selectOp);
|
||||
if (!selectOpWithSrcs(ValReg, ResType, I,
|
||||
{BReg, SecondaryShiftReg, PrimaryShiftReg}, SelectOp))
|
||||
return false;
|
||||
|
||||
return Result &&
|
||||
selectOpWithSrcs(ResVReg, ResType, I, {ValReg, TmpReg}, addOp);
|
||||
return selectOpWithSrcs(ResVReg, ResType, I, {ValReg, TmpReg}, AddOp);
|
||||
}
|
||||
|
||||
bool SPIRVInstructionSelector::selectFirstBitHigh(Register ResVReg,
|
||||
@ -3347,20 +3472,49 @@ bool SPIRVInstructionSelector::selectFirstBitHigh(Register ResVReg,
|
||||
// FindUMsb and FindSMsb intrinsics only support 32 bit integers
|
||||
Register OpReg = I.getOperand(2).getReg();
|
||||
SPIRVType *OpType = GR.getSPIRVTypeForVReg(OpReg);
|
||||
// zero or sign extend
|
||||
unsigned ExtendOpcode = IsSigned ? SPIRV::OpSConvert : SPIRV::OpUConvert;
|
||||
unsigned BitSetOpcode = IsSigned ? GL::FindSMsb : GL::FindUMsb;
|
||||
|
||||
switch (GR.getScalarOrVectorBitWidth(OpType)) {
|
||||
case 16:
|
||||
return selectFirstBitHigh16(ResVReg, ResType, I, IsSigned);
|
||||
return selectFirstBitSet16(ResVReg, ResType, I, ExtendOpcode, BitSetOpcode);
|
||||
case 32:
|
||||
return selectFirstBitHigh32(ResVReg, ResType, I, OpReg, IsSigned);
|
||||
return selectFirstBitSet32(ResVReg, ResType, I, OpReg, BitSetOpcode);
|
||||
case 64:
|
||||
return selectFirstBitHigh64(ResVReg, ResType, I, IsSigned);
|
||||
return selectFirstBitSet64(ResVReg, ResType, I, OpReg, BitSetOpcode,
|
||||
/*SwapPrimarySide=*/false);
|
||||
default:
|
||||
report_fatal_error(
|
||||
"spv_firstbituhigh and spv_firstbitshigh only support 16,32,64 bits.");
|
||||
}
|
||||
}
|
||||
|
||||
bool SPIRVInstructionSelector::selectFirstBitLow(Register ResVReg,
|
||||
const SPIRVType *ResType,
|
||||
MachineInstr &I) const {
|
||||
// FindILsb intrinsic only supports 32 bit integers
|
||||
Register OpReg = I.getOperand(2).getReg();
|
||||
SPIRVType *OpType = GR.getSPIRVTypeForVReg(OpReg);
|
||||
// OpUConvert treats the operand bits as an unsigned i16 and zero extends it
|
||||
// to an unsigned i32. As this leaves all the least significant bits unchanged
|
||||
// so the first set bit from the LSB side doesn't change.
|
||||
unsigned ExtendOpcode = SPIRV::OpUConvert;
|
||||
unsigned BitSetOpcode = GL::FindILsb;
|
||||
|
||||
switch (GR.getScalarOrVectorBitWidth(OpType)) {
|
||||
case 16:
|
||||
return selectFirstBitSet16(ResVReg, ResType, I, ExtendOpcode, BitSetOpcode);
|
||||
case 32:
|
||||
return selectFirstBitSet32(ResVReg, ResType, I, OpReg, BitSetOpcode);
|
||||
case 64:
|
||||
return selectFirstBitSet64(ResVReg, ResType, I, OpReg, BitSetOpcode,
|
||||
/*SwapPrimarySide=*/true);
|
||||
default:
|
||||
report_fatal_error("spv_firstbitlow only supports 16,32,64 bits.");
|
||||
}
|
||||
}
|
||||
|
||||
bool SPIRVInstructionSelector::selectAllocaArray(Register ResVReg,
|
||||
const SPIRVType *ResType,
|
||||
MachineInstr &I) const {
|
||||
|
47
llvm/test/CodeGen/DirectX/firstbitlow.ll
Normal file
47
llvm/test/CodeGen/DirectX/firstbitlow.ll
Normal file
@ -0,0 +1,47 @@
|
||||
; RUN: opt -S -scalarizer -dxil-op-lower -mtriple=dxil-pc-shadermodel6.3-library %s | FileCheck %s
|
||||
|
||||
; Make sure dxil operation function calls for firstbitlow are generated for all integer types.
|
||||
|
||||
define noundef i32 @test_firstbitlow_short(i16 noundef %a) {
|
||||
entry:
|
||||
; CHECK: call i32 @dx.op.unaryBits.i16(i32 32, i16 %{{.*}})
|
||||
%elt.firstbitlow = call i32 @llvm.dx.firstbitlow.i16(i16 %a)
|
||||
ret i32 %elt.firstbitlow
|
||||
}
|
||||
|
||||
define noundef i32 @test_firstbitlow_int(i32 noundef %a) {
|
||||
entry:
|
||||
; CHECK: call i32 @dx.op.unaryBits.i32(i32 32, i32 %{{.*}})
|
||||
%elt.firstbitlow = call i32 @llvm.dx.firstbitlow.i32(i32 %a)
|
||||
ret i32 %elt.firstbitlow
|
||||
}
|
||||
|
||||
define noundef i32 @test_firstbitlow_long(i64 noundef %a) {
|
||||
entry:
|
||||
; CHECK: call i32 @dx.op.unaryBits.i64(i32 32, i64 %{{.*}})
|
||||
%elt.firstbitlow = call i32 @llvm.dx.firstbitlow.i64(i64 %a)
|
||||
ret i32 %elt.firstbitlow
|
||||
}
|
||||
|
||||
define noundef <4 x i32> @test_firstbitlow_vec4_i32(<4 x i32> noundef %a) {
|
||||
entry:
|
||||
; CHECK: [[ee0:%.*]] = extractelement <4 x i32> %a, i64 0
|
||||
; CHECK: [[ie0:%.*]] = call i32 @dx.op.unaryBits.i32(i32 32, i32 [[ee0]])
|
||||
; CHECK: [[ee1:%.*]] = extractelement <4 x i32> %a, i64 1
|
||||
; CHECK: [[ie1:%.*]] = call i32 @dx.op.unaryBits.i32(i32 32, i32 [[ee1]])
|
||||
; CHECK: [[ee2:%.*]] = extractelement <4 x i32> %a, i64 2
|
||||
; CHECK: [[ie2:%.*]] = call i32 @dx.op.unaryBits.i32(i32 32, i32 [[ee2]])
|
||||
; CHECK: [[ee3:%.*]] = extractelement <4 x i32> %a, i64 3
|
||||
; CHECK: [[ie3:%.*]] = call i32 @dx.op.unaryBits.i32(i32 32, i32 [[ee3]])
|
||||
; CHECK: insertelement <4 x i32> poison, i32 [[ie0]], i64 0
|
||||
; CHECK: insertelement <4 x i32> %{{.*}}, i32 [[ie1]], i64 1
|
||||
; CHECK: insertelement <4 x i32> %{{.*}}, i32 [[ie2]], i64 2
|
||||
; CHECK: insertelement <4 x i32> %{{.*}}, i32 [[ie3]], i64 3
|
||||
%2 = call <4 x i32> @llvm.dx.firstbitlow.v4i32(<4 x i32> %a)
|
||||
ret <4 x i32> %2
|
||||
}
|
||||
|
||||
declare i32 @llvm.dx.firstbitlow.i16(i16)
|
||||
declare i32 @llvm.dx.firstbitlow.i32(i32)
|
||||
declare i32 @llvm.dx.firstbitlow.i64(i64)
|
||||
declare <4 x i32> @llvm.dx.firstbitlow.v4i32(<4 x i32>)
|
10
llvm/test/CodeGen/DirectX/firstbitlow_error.ll
Normal file
10
llvm/test/CodeGen/DirectX/firstbitlow_error.ll
Normal file
@ -0,0 +1,10 @@
|
||||
; RUN: not opt -S -dxil-op-lower -mtriple=dxil-pc-shadermodel6.3-library %s 2>&1 | FileCheck %s
|
||||
|
||||
; DXIL operation firstbitshigh does not support double overload type
|
||||
; CHECK: invalid intrinsic signature
|
||||
|
||||
define noundef double @firstbitlow_double(double noundef %a) {
|
||||
entry:
|
||||
%1 = call double @llvm.dx.firstbitlow.f64(double %a)
|
||||
ret double %1
|
||||
}
|
@ -1,94 +1,261 @@
|
||||
; RUN: llc -verify-machineinstrs -O0 -mtriple=spirv-unknown-unknown %s -o - | FileCheck %s
|
||||
; RUN: %if spirv-tools %{ llc -O0 -mtriple=spirv-unknown-unknown %s -o - -filetype=obj | spirv-val %}
|
||||
|
||||
; CHECK: OpMemoryModel Logical GLSL450
|
||||
; CHECK-DAG: [[Z:%.*]] = OpConstant %[[#]] 0
|
||||
; CHECK-DAG: [[X:%.*]] = OpConstant %[[#]] 1
|
||||
; CHECK-DAG: [[glsl_450_ext:%.+]] = OpExtInstImport "GLSL.std.450"
|
||||
; CHECK-DAG: OpMemoryModel Logical GLSL450
|
||||
; CHECK-DAG: [[u32_t:%.+]] = OpTypeInt 32 0
|
||||
; CHECK-DAG: [[u32x2_t:%.+]] = OpTypeVector [[u32_t]] 2
|
||||
; CHECK-DAG: [[u32x3_t:%.+]] = OpTypeVector [[u32_t]] 3
|
||||
; CHECK-DAG: [[u32x4_t:%.+]] = OpTypeVector [[u32_t]] 4
|
||||
; CHECK-DAG: [[const_0:%.*]] = OpConstant [[u32_t]] 0
|
||||
; CHECK-DAG: [[const_2:%.*]] = OpConstant [[u32_t]] 2
|
||||
; CHECK-DAG: [[const_0x2:%.*]] = OpConstantComposite [[u32x2_t]] [[const_0]] [[const_0]]
|
||||
; CHECK-DAG: [[const_1:%.*]] = OpConstant [[u32_t]] 1
|
||||
; CHECK-DAG: [[const_32:%.*]] = OpConstant [[u32_t]] 32
|
||||
; CHECK-DAG: [[const_32x2:%.*]] = OpConstantComposite [[u32x2_t]] [[const_32]] [[const_32]]
|
||||
; CHECK-DAG: [[const_neg1:%.*]] = OpConstant [[u32_t]] 4294967295
|
||||
; CHECK-DAG: [[const_neg1x2:%.*]] = OpConstantComposite [[u32x2_t]] [[const_neg1]] [[const_neg1]]
|
||||
; CHECK-DAG: [[u16_t:%.+]] = OpTypeInt 16 0
|
||||
; CHECK-DAG: [[u16x2_t:%.+]] = OpTypeVector [[u16_t]] 2
|
||||
; CHECK-DAG: [[u16x3_t:%.+]] = OpTypeVector [[u16_t]] 3
|
||||
; CHECK-DAG: [[u16x4_t:%.+]] = OpTypeVector [[u16_t]] 4
|
||||
; CHECK-DAG: [[u64_t:%.+]] = OpTypeInt 64 0
|
||||
; CHECK-DAG: [[u64x2_t:%.+]] = OpTypeVector [[u64_t]] 2
|
||||
; CHECK-DAG: [[u64x3_t:%.+]] = OpTypeVector [[u64_t]] 3
|
||||
; CHECK-DAG: [[u64x4_t:%.+]] = OpTypeVector [[u64_t]] 4
|
||||
; CHECK-DAG: [[bool_t:%.+]] = OpTypeBool
|
||||
; CHECK-DAG: [[boolx2_t:%.+]] = OpTypeVector [[bool_t]] 2
|
||||
|
||||
; CHECK-LABEL: Begin function firstbituhigh_i32
|
||||
define noundef i32 @firstbituhigh_i32(i32 noundef %a) {
|
||||
entry:
|
||||
; CHECK: %[[#]] = OpExtInst %[[#]] %[[#]] FindUMsb %[[#]]
|
||||
; CHECK: [[a:%.+]] = OpFunctionParameter [[u32_t]]
|
||||
; CHECK: [[ret:%.+]] = OpExtInst [[u32_t]] [[glsl_450_ext]] FindUMsb [[a]]
|
||||
; CHECK: OpReturnValue [[ret]]
|
||||
%elt.firstbituhigh = call i32 @llvm.spv.firstbituhigh.i32(i32 %a)
|
||||
ret i32 %elt.firstbituhigh
|
||||
}
|
||||
|
||||
define noundef <2 x i32> @firstbituhigh_2xi32(<2 x i32> noundef %a) {
|
||||
; CHECK-LABEL: Begin function firstbituhigh_v2xi32
|
||||
define noundef <2 x i32> @firstbituhigh_v2xi32(<2 x i32> noundef %a) {
|
||||
entry:
|
||||
; CHECK: %[[#]] = OpExtInst %[[#]] %[[#]] FindUMsb %[[#]]
|
||||
; CHECK: [[a:%.+]] = OpFunctionParameter [[u32x2_t]]
|
||||
; CHECK: [[ret:%.+]] = OpExtInst [[u32x2_t]] [[glsl_450_ext]] FindUMsb [[a]]
|
||||
; CHECK: OpReturnValue [[ret]]
|
||||
%elt.firstbituhigh = call <2 x i32> @llvm.spv.firstbituhigh.v2i32(<2 x i32> %a)
|
||||
ret <2 x i32> %elt.firstbituhigh
|
||||
}
|
||||
|
||||
; CHECK-LABEL: Begin function firstbituhigh_v3xi32
|
||||
define noundef <3 x i32> @firstbituhigh_v3xi32(<3 x i32> noundef %a) {
|
||||
entry:
|
||||
; CHECK: [[a:%.+]] = OpFunctionParameter [[u32x3_t]]
|
||||
; CHECK: [[ret:%.+]] = OpExtInst [[u32x3_t]] [[glsl_450_ext]] FindUMsb [[a]]
|
||||
; CHECK: OpReturnValue [[ret]]
|
||||
%elt.firstbituhigh = call <3 x i32> @llvm.spv.firstbituhigh.v3i32(<3 x i32> %a)
|
||||
ret <3 x i32> %elt.firstbituhigh
|
||||
}
|
||||
|
||||
; CHECK-LABEL: Begin function firstbituhigh_v4xi32
|
||||
define noundef <4 x i32> @firstbituhigh_v4xi32(<4 x i32> noundef %a) {
|
||||
entry:
|
||||
; CHECK: [[a:%.+]] = OpFunctionParameter [[u32x4_t]]
|
||||
; CHECK: [[ret:%.+]] = OpExtInst [[u32x4_t]] [[glsl_450_ext]] FindUMsb [[a]]
|
||||
; CHECK: OpReturnValue [[ret]]
|
||||
%elt.firstbituhigh = call <4 x i32> @llvm.spv.firstbituhigh.v4i32(<4 x i32> %a)
|
||||
ret <4 x i32> %elt.firstbituhigh
|
||||
}
|
||||
|
||||
; CHECK-LABEL: Begin function firstbituhigh_i16
|
||||
define noundef i32 @firstbituhigh_i16(i16 noundef %a) {
|
||||
entry:
|
||||
; CHECK: [[A:%.*]] = OpUConvert %[[#]]
|
||||
; CHECK: %[[#]] = OpExtInst %[[#]] %[[#]] FindUMsb [[A]]
|
||||
; CHECK: [[a16:%.+]] = OpFunctionParameter [[u16_t]]
|
||||
; CHECK: [[a32:%.+]] = OpUConvert [[u32_t]] [[a16]]
|
||||
; CHECK: [[ret:%.+]] = OpExtInst [[u32_t]] [[glsl_450_ext]] FindUMsb [[a32]]
|
||||
; CHECK: OpReturnValue [[ret]]
|
||||
%elt.firstbituhigh = call i32 @llvm.spv.firstbituhigh.i16(i16 %a)
|
||||
ret i32 %elt.firstbituhigh
|
||||
}
|
||||
|
||||
define noundef <2 x i32> @firstbituhigh_v2i16(<2 x i16> noundef %a) {
|
||||
; CHECK-LABEL: Begin function firstbituhigh_v2xi16
|
||||
define noundef <2 x i32> @firstbituhigh_v2xi16(<2 x i16> noundef %a) {
|
||||
entry:
|
||||
; CHECK: [[A:%.*]] = OpUConvert %[[#]]
|
||||
; CHECK: %[[#]] = OpExtInst %[[#]] %[[#]] FindUMsb [[A]]
|
||||
; CHECK: [[a16:%.+]] = OpFunctionParameter [[u16x2_t]]
|
||||
; CHECK: [[a32:%.+]] = OpUConvert [[u32x2_t]] [[a16]]
|
||||
; CHECK: [[ret:%.+]] = OpExtInst [[u32x2_t]] [[glsl_450_ext]] FindUMsb [[a32]]
|
||||
; CHECK: OpReturnValue [[ret]]
|
||||
%elt.firstbituhigh = call <2 x i32> @llvm.spv.firstbituhigh.v2i16(<2 x i16> %a)
|
||||
ret <2 x i32> %elt.firstbituhigh
|
||||
}
|
||||
|
||||
; CHECK-LABEL: Begin function firstbituhigh_v3xi16
|
||||
define noundef <3 x i32> @firstbituhigh_v3xi16(<3 x i16> noundef %a) {
|
||||
entry:
|
||||
; CHECK: [[a16:%.+]] = OpFunctionParameter [[u16x3_t]]
|
||||
; CHECK: [[a32:%.+]] = OpUConvert [[u32x3_t]] [[a16]]
|
||||
; CHECK: [[ret:%.+]] = OpExtInst [[u32x3_t]] [[glsl_450_ext]] FindUMsb [[a32]]
|
||||
; CHECK: OpReturnValue [[ret]]
|
||||
%elt.firstbituhigh = call <3 x i32> @llvm.spv.firstbituhigh.v3i16(<3 x i16> %a)
|
||||
ret <3 x i32> %elt.firstbituhigh
|
||||
}
|
||||
|
||||
; CHECK-LABEL: Begin function firstbituhigh_v4xi16
|
||||
define noundef <4 x i32> @firstbituhigh_v4xi16(<4 x i16> noundef %a) {
|
||||
entry:
|
||||
; CHECK: [[a16:%.+]] = OpFunctionParameter [[u16x4_t]]
|
||||
; CHECK: [[a32:%.+]] = OpUConvert [[u32x4_t]] [[a16]]
|
||||
; CHECK: [[ret:%.+]] = OpExtInst [[u32x4_t]] [[glsl_450_ext]] FindUMsb [[a32]]
|
||||
; CHECK: OpReturnValue [[ret]]
|
||||
%elt.firstbituhigh = call <4 x i32> @llvm.spv.firstbituhigh.v4i16(<4 x i16> %a)
|
||||
ret <4 x i32> %elt.firstbituhigh
|
||||
}
|
||||
|
||||
; CHECK-LABEL: Begin function firstbituhigh_i64
|
||||
define noundef i32 @firstbituhigh_i64(i64 noundef %a) {
|
||||
entry:
|
||||
; CHECK: [[O:%.*]] = OpBitcast %[[#]] %[[#]]
|
||||
; CHECK: [[N:%.*]] = OpExtInst %[[#]] %[[#]] FindUMsb [[O]]
|
||||
; CHECK: [[M:%.*]] = OpVectorExtractDynamic %[[#]] [[N]] [[Z]]
|
||||
; CHECK: [[L:%.*]] = OpVectorExtractDynamic %[[#]] [[N]] [[X]]
|
||||
; CHECK: [[I:%.*]] = OpIEqual %[[#]] [[M]] %[[#]]
|
||||
; CHECK: [[H:%.*]] = OpSelect %[[#]] [[I]] [[L]] [[M]]
|
||||
; CHECK: [[C:%.*]] = OpSelect %[[#]] [[I]] %[[#]] %[[#]]
|
||||
; CHECK: [[B:%.*]] = OpIAdd %[[#]] [[C]] [[H]]
|
||||
; CHECK: [[a64:%.+]] = OpFunctionParameter [[u64_t]]
|
||||
; CHECK: [[a32x2:%.+]] = OpBitcast [[u32x2_t]] [[a64]]
|
||||
; CHECK: [[lsb_bits:%.+]] = OpExtInst [[u32x2_t]] [[glsl_450_ext]] FindUMsb [[a32x2]]
|
||||
; CHECK: [[high_bits:%.+]] = OpVectorExtractDynamic [[u32_t]] [[lsb_bits]] [[const_0]]
|
||||
; CHECK: [[low_bits:%.+]] = OpVectorExtractDynamic [[u32_t]] [[lsb_bits]] [[const_1]]
|
||||
; CHECK: [[should_use_low:%.+]] = OpIEqual [[bool_t]] [[high_bits]] [[const_neg1]]
|
||||
; CHECK: [[ans_bits:%.+]] = OpSelect [[u32_t]] [[should_use_low]] [[low_bits]] [[high_bits]]
|
||||
; CHECK: [[ans_offset:%.+]] = OpSelect [[u32_t]] [[should_use_low]] [[const_0]] [[const_32]]
|
||||
; CHECK: [[ret:%.+]] = OpIAdd [[u32_t]] [[ans_offset]] [[ans_bits]]
|
||||
; CHECK: OpReturnValue [[ret]]
|
||||
%elt.firstbituhigh = call i32 @llvm.spv.firstbituhigh.i64(i64 %a)
|
||||
ret i32 %elt.firstbituhigh
|
||||
}
|
||||
|
||||
define noundef <2 x i32> @firstbituhigh_v2i64(<2 x i64> noundef %a) {
|
||||
; CHECK-LABEL: Begin function firstbituhigh_v2xi64
|
||||
define noundef <2 x i32> @firstbituhigh_v2xi64(<2 x i64> noundef %a) {
|
||||
entry:
|
||||
; CHECK: [[O:%.*]] = OpBitcast %[[#]] %[[#]]
|
||||
; CHECK: [[N:%.*]] = OpExtInst %[[#]] %[[#]] FindUMsb [[O]]
|
||||
; CHECK: [[M:%.*]] = OpVectorShuffle %[[#]] [[N]] [[N]] 0
|
||||
; CHECK: [[L:%.*]] = OpVectorShuffle %[[#]] [[N]] [[N]] 1
|
||||
; CHECK: [[I:%.*]] = OpIEqual %[[#]] [[M]] %[[#]]
|
||||
; CHECK: [[H:%.*]] = OpSelect %[[#]] [[I]] [[L]] [[M]]
|
||||
; CHECK: [[C:%.*]] = OpSelect %[[#]] [[I]] %[[#]] %[[#]]
|
||||
; CHECK: [[B:%.*]] = OpIAdd %[[#]] [[C]] [[H]]
|
||||
; CHECK: OpReturnValue [[B]]
|
||||
; CHECK: [[a64x2:%.+]] = OpFunctionParameter [[u64x2_t]]
|
||||
; CHECK: [[a32x4:%.+]] = OpBitcast [[u32x4_t]] [[a64x2]]
|
||||
; CHECK: [[lsb_bits:%.+]] = OpExtInst [[u32x4_t]] [[glsl_450_ext]] FindUMsb [[a32x4]]
|
||||
; CHECK: [[high_bits:%.+]] = OpVectorShuffle [[u32x2_t]] [[lsb_bits]] [[lsb_bits]] 0 2
|
||||
; CHECK: [[low_bits:%.+]] = OpVectorShuffle [[u32x2_t]] [[lsb_bits]] [[lsb_bits]] 1 3
|
||||
; CHECK: [[should_use_low:%.+]] = OpIEqual [[boolx2_t]] [[high_bits]] [[const_neg1x2]]
|
||||
; CHECK: [[ans_bits:%.+]] = OpSelect [[u32x2_t]] [[should_use_low]] [[low_bits]] [[high_bits]]
|
||||
; CHECK: [[ans_offset:%.+]] = OpSelect [[u32x2_t]] [[should_use_low]] [[const_0x2]] [[const_32x2]]
|
||||
; CHECK: [[ret:%.+]] = OpIAdd [[u32x2_t]] [[ans_offset]] [[ans_bits]]
|
||||
; CHECK: OpReturnValue [[ret]]
|
||||
%elt.firstbituhigh = call <2 x i32> @llvm.spv.firstbituhigh.v2i64(<2 x i64> %a)
|
||||
ret <2 x i32> %elt.firstbituhigh
|
||||
}
|
||||
|
||||
; CHECK-LABEL: Begin function firstbituhigh_v3xi64
|
||||
define noundef <3 x i32> @firstbituhigh_v3xi64(<3 x i64> noundef %a) {
|
||||
entry:
|
||||
; Preamble
|
||||
; CHECK: [[a:%.+]] = OpFunctionParameter [[u64x3_t]]
|
||||
|
||||
; Extract first 2 components from %a
|
||||
; CHECK: [[pt1:%.+]] = OpVectorShuffle [[u64x2_t]] [[a]] [[a]] 0 1
|
||||
|
||||
; Do firstbituhigh on the first 2 components
|
||||
; CHECK: [[pt1_cast:%.+]] = OpBitcast [[u32x4_t]] [[pt1]]
|
||||
; CHECK: [[pt1_lsb_bits:%.+]] = OpExtInst [[u32x4_t]] [[glsl_450_ext]] FindUMsb [[pt1_cast]]
|
||||
; CHECK: [[pt1_high_bits:%.+]] = OpVectorShuffle [[u32x2_t]] [[pt1_lsb_bits]] [[pt1_lsb_bits]] 0 2
|
||||
; CHECK: [[pt1_low_bits:%.+]] = OpVectorShuffle [[u32x2_t]] [[pt1_lsb_bits]] [[pt1_lsb_bits]] 1 3
|
||||
; CHECK: [[pt1_should_use_low:%.+]] = OpIEqual [[boolx2_t]] [[pt1_high_bits]] [[const_neg1x2]]
|
||||
; CHECK: [[pt1_ans_bits:%.+]] = OpSelect [[u32x2_t]] [[pt1_should_use_low]] [[pt1_low_bits]] [[pt1_high_bits]]
|
||||
; CHECK: [[pt1_ans_offset:%.+]] = OpSelect [[u32x2_t]] [[pt1_should_use_low]] [[const_0x2]] [[const_32x2]]
|
||||
; CHECK: [[pt1_res:%.+]] = OpIAdd [[u32x2_t]] [[pt1_ans_offset]] [[pt1_ans_bits]]
|
||||
|
||||
; Extract the last component from %a
|
||||
; CHECK: [[pt2:%.+]] = OpVectorExtractDynamic [[u64_t]] [[a]] [[const_2]]
|
||||
|
||||
; Do firstbituhigh on the last component
|
||||
; CHECK: [[pt2_cast:%.+]] = OpBitcast [[u32x2_t]] [[pt2]]
|
||||
; CHECK: [[pt2_lsb_bits:%.+]] = OpExtInst [[u32x2_t]] [[glsl_450_ext]] FindUMsb [[pt2_cast]]
|
||||
; CHECK: [[pt2_high_bits:%.+]] = OpVectorExtractDynamic [[u32_t]] [[pt2_lsb_bits]] [[const_0]]
|
||||
; CHECK: [[pt2_low_bits:%.+]] = OpVectorExtractDynamic [[u32_t]] [[pt2_lsb_bits]] [[const_1]]
|
||||
; CHECK: [[pt2_should_use_low:%.+]] = OpIEqual [[bool_t]] [[pt2_high_bits]] [[const_neg1]]
|
||||
; CHECK: [[pt2_ans_bits:%.+]] = OpSelect [[u32_t]] [[pt2_should_use_low]] [[pt2_low_bits]] [[pt2_high_bits]]
|
||||
; CHECK: [[pt2_ans_offset:%.+]] = OpSelect [[u32_t]] [[pt2_should_use_low]] [[const_0]] [[const_32]]
|
||||
; CHECK: [[pt2_res:%.+]] = OpIAdd [[u32_t]] [[pt2_ans_offset]] [[pt2_ans_bits]]
|
||||
|
||||
; Merge the parts into the final i32x3 and return it
|
||||
; CHECK: [[ret:%.+]] = OpCompositeConstruct [[u32x3_t]] [[pt1_res]] [[pt2_res]]
|
||||
; CHECK: OpReturnValue [[ret]]
|
||||
%elt.firstbituhigh = call <3 x i32> @llvm.spv.firstbituhigh.v3i64(<3 x i64> %a)
|
||||
ret <3 x i32> %elt.firstbituhigh
|
||||
}
|
||||
|
||||
; CHECK-LABEL: Begin function firstbituhigh_v4xi64
|
||||
define noundef <4 x i32> @firstbituhigh_v4xi64(<4 x i64> noundef %a) {
|
||||
entry:
|
||||
; Preamble
|
||||
; CHECK: [[a:%.+]] = OpFunctionParameter [[u64x4_t]]
|
||||
|
||||
; Extract first 2 components from %a
|
||||
; CHECK: [[pt1:%.+]] = OpVectorShuffle [[u64x2_t]] [[a]] [[a]] 0 1
|
||||
|
||||
; Do firstbituhigh on the first 2 components
|
||||
; CHECK: [[pt1_cast:%.+]] = OpBitcast [[u32x4_t]] [[pt1]]
|
||||
; CHECK: [[pt1_lsb_bits:%.+]] = OpExtInst [[u32x4_t]] [[glsl_450_ext]] FindUMsb [[pt1_cast]]
|
||||
; CHECK: [[pt1_high_bits:%.+]] = OpVectorShuffle [[u32x2_t]] [[pt1_lsb_bits]] [[pt1_lsb_bits]] 0 2
|
||||
; CHECK: [[pt1_low_bits:%.+]] = OpVectorShuffle [[u32x2_t]] [[pt1_lsb_bits]] [[pt1_lsb_bits]] 1 3
|
||||
; CHECK: [[pt1_should_use_low:%.+]] = OpIEqual [[boolx2_t]] [[pt1_high_bits]] [[const_neg1x2]]
|
||||
; CHECK: [[pt1_ans_bits:%.+]] = OpSelect [[u32x2_t]] [[pt1_should_use_low]] [[pt1_low_bits]] [[pt1_high_bits]]
|
||||
; CHECK: [[pt1_ans_offset:%.+]] = OpSelect [[u32x2_t]] [[pt1_should_use_low]] [[const_0x2]] [[const_32x2]]
|
||||
; CHECK: [[pt1_res:%.+]] = OpIAdd [[u32x2_t]] [[pt1_ans_offset]] [[pt1_ans_bits]]
|
||||
|
||||
; Extract last 2 components from %a
|
||||
; CHECK: [[pt2:%.+]] = OpVectorShuffle [[u64x2_t]] [[a]] [[a]] 2 3
|
||||
|
||||
; Do firstbituhigh on the last 2 components
|
||||
; CHECK: [[pt2_cast:%.+]] = OpBitcast [[u32x4_t]] [[pt2]]
|
||||
; CHECK: [[pt2_lsb_bits:%.+]] = OpExtInst [[u32x4_t]] [[glsl_450_ext]] FindUMsb [[pt2_cast]]
|
||||
; CHECK: [[pt2_high_bits:%.+]] = OpVectorShuffle [[u32x2_t]] [[pt2_lsb_bits]] [[pt2_lsb_bits]] 0 2
|
||||
; CHECK: [[pt2_low_bits:%.+]] = OpVectorShuffle [[u32x2_t]] [[pt2_lsb_bits]] [[pt2_lsb_bits]] 1 3
|
||||
; CHECK: [[pt2_should_use_low:%.+]] = OpIEqual [[boolx2_t]] [[pt2_high_bits]] [[const_neg1x2]]
|
||||
; CHECK: [[pt2_ans_bits:%.+]] = OpSelect [[u32x2_t]] [[pt2_should_use_low]] [[pt2_low_bits]] [[pt2_high_bits]]
|
||||
; CHECK: [[pt2_ans_offset:%.+]] = OpSelect [[u32x2_t]] [[pt2_should_use_low]] [[const_0x2]] [[const_32x2]]
|
||||
; CHECK: [[pt2_res:%.+]] = OpIAdd [[u32x2_t]] [[pt2_ans_offset]] [[pt2_ans_bits]]
|
||||
|
||||
; Merge the parts into the final i32x4 and return it
|
||||
; CHECK: [[ret:%.+]] = OpCompositeConstruct [[u32x4_t]] [[pt1_res]] [[pt2_res]]
|
||||
; CHECK: OpReturnValue [[ret]]
|
||||
%elt.firstbituhigh = call <4 x i32> @llvm.spv.firstbituhigh.v4i64(<4 x i64> %a)
|
||||
ret <4 x i32> %elt.firstbituhigh
|
||||
}
|
||||
|
||||
; CHECK-LABEL: Begin function firstbitshigh_i32
|
||||
define noundef i32 @firstbitshigh_i32(i32 noundef %a) {
|
||||
entry:
|
||||
; CHECK: %[[#]] = OpExtInst %[[#]] %[[#]] FindSMsb %[[#]]
|
||||
; CHECK: [[a:%.+]] = OpFunctionParameter [[u32_t]]
|
||||
; CHECK: [[ret:%.+]] = OpExtInst [[u32_t]] [[glsl_450_ext]] FindSMsb [[a]]
|
||||
; CHECK: OpReturnValue [[ret]]
|
||||
%elt.firstbitshigh = call i32 @llvm.spv.firstbitshigh.i32(i32 %a)
|
||||
ret i32 %elt.firstbitshigh
|
||||
}
|
||||
|
||||
; CHECK-LABEL: Begin function firstbitshigh_i16
|
||||
define noundef i32 @firstbitshigh_i16(i16 noundef %a) {
|
||||
entry:
|
||||
; CHECK: [[A:%.*]] = OpSConvert %[[#]]
|
||||
; CHECK: %[[#]] = OpExtInst %[[#]] %[[#]] FindSMsb %[[#]]
|
||||
; CHECK: [[a16:%.+]] = OpFunctionParameter [[u16_t]]
|
||||
; CHECK: [[a32:%.+]] = OpSConvert [[u32_t]] [[a16]]
|
||||
; CHECK: [[ret:%.+]] = OpExtInst [[u32_t]] [[glsl_450_ext]] FindSMsb [[a32]]
|
||||
; CHECK: OpReturnValue [[ret]]
|
||||
%elt.firstbitshigh = call i32 @llvm.spv.firstbitshigh.i16(i16 %a)
|
||||
ret i32 %elt.firstbitshigh
|
||||
}
|
||||
|
||||
; CHECK-LABEL: Begin function firstbitshigh_i64
|
||||
define noundef i32 @firstbitshigh_i64(i64 noundef %a) {
|
||||
entry:
|
||||
; CHECK: [[O:%.*]] = OpBitcast %[[#]] %[[#]]
|
||||
; CHECK: [[N:%.*]] = OpExtInst %[[#]] %[[#]] FindSMsb [[O]]
|
||||
; CHECK: [[M:%.*]] = OpVectorExtractDynamic %[[#]] [[N]] [[Z]]
|
||||
; CHECK: [[L:%.*]] = OpVectorExtractDynamic %[[#]] [[N]] [[X]]
|
||||
; CHECK: [[I:%.*]] = OpIEqual %[[#]] [[M]] %[[#]]
|
||||
; CHECK: [[H:%.*]] = OpSelect %[[#]] [[I]] [[L]] [[M]]
|
||||
; CHECK: [[C:%.*]] = OpSelect %[[#]] [[I]] %[[#]] %[[#]]
|
||||
; CHECK: [[B:%.*]] = OpIAdd %[[#]] [[C]] [[H]]
|
||||
; CHECK: [[a64:%.+]] = OpFunctionParameter [[u64_t]]
|
||||
; CHECK: [[a32x2:%.+]] = OpBitcast [[u32x2_t]] [[a64]]
|
||||
; CHECK: [[lsb_bits:%.+]] = OpExtInst [[u32x2_t]] [[glsl_450_ext]] FindSMsb [[a32x2]]
|
||||
; CHECK: [[high_bits:%.+]] = OpVectorExtractDynamic [[u32_t]] [[lsb_bits]] [[const_0]]
|
||||
; CHECK: [[low_bits:%.+]] = OpVectorExtractDynamic [[u32_t]] [[lsb_bits]] [[const_1]]
|
||||
; CHECK: [[should_use_low:%.+]] = OpIEqual [[bool_t]] [[high_bits]] [[const_neg1]]
|
||||
; CHECK: [[ans_bits:%.+]] = OpSelect [[u32_t]] [[should_use_low]] [[low_bits]] [[high_bits]]
|
||||
; CHECK: [[ans_offset:%.+]] = OpSelect [[u32_t]] [[should_use_low]] [[const_0]] [[const_32]]
|
||||
; CHECK: [[ret:%.+]] = OpIAdd [[u32_t]] [[ans_offset]] [[ans_bits]]
|
||||
; CHECK: OpReturnValue [[ret]]
|
||||
%elt.firstbitshigh = call i32 @llvm.spv.firstbitshigh.i64(i64 %a)
|
||||
ret i32 %elt.firstbitshigh
|
||||
}
|
||||
|
230
llvm/test/CodeGen/SPIRV/hlsl-intrinsics/firstbitlow.ll
Normal file
230
llvm/test/CodeGen/SPIRV/hlsl-intrinsics/firstbitlow.ll
Normal file
@ -0,0 +1,230 @@
|
||||
; RUN: llc -verify-machineinstrs -O0 -mtriple=spirv-unknown-unknown %s -o - | FileCheck %s
|
||||
; RUN: %if spirv-tools %{ llc -O0 -mtriple=spirv-unknown-unknown %s -o - -filetype=obj | spirv-val %}
|
||||
|
||||
; CHECK-DAG: [[glsl_450_ext:%.+]] = OpExtInstImport "GLSL.std.450"
|
||||
; CHECK-DAG: OpMemoryModel Logical GLSL450
|
||||
; CHECK-DAG: [[u32_t:%.+]] = OpTypeInt 32 0
|
||||
; CHECK-DAG: [[u32x2_t:%.+]] = OpTypeVector [[u32_t]] 2
|
||||
; CHECK-DAG: [[u32x3_t:%.+]] = OpTypeVector [[u32_t]] 3
|
||||
; CHECK-DAG: [[u32x4_t:%.+]] = OpTypeVector [[u32_t]] 4
|
||||
; CHECK-DAG: [[const_0:%.*]] = OpConstant [[u32_t]] 0
|
||||
; CHECK-DAG: [[const_0x2:%.*]] = OpConstantComposite [[u32x2_t]] [[const_0]] [[const_0]]
|
||||
; CHECK-DAG: [[const_1:%.*]] = OpConstant [[u32_t]] 1
|
||||
; CHECK-DAG: [[const_2:%.*]] = OpConstant [[u32_t]] 2
|
||||
; CHECK-DAG: [[const_32:%.*]] = OpConstant [[u32_t]] 32
|
||||
; CHECK-DAG: [[const_32x2:%.*]] = OpConstantComposite [[u32x2_t]] [[const_32]] [[const_32]]
|
||||
; CHECK-DAG: [[const_neg1:%.*]] = OpConstant [[u32_t]] 4294967295
|
||||
; CHECK-DAG: [[const_neg1x2:%.*]] = OpConstantComposite [[u32x2_t]] [[const_neg1]] [[const_neg1]]
|
||||
; CHECK-DAG: [[u16_t:%.+]] = OpTypeInt 16 0
|
||||
; CHECK-DAG: [[u16x2_t:%.+]] = OpTypeVector [[u16_t]] 2
|
||||
; CHECK-DAG: [[u16x3_t:%.+]] = OpTypeVector [[u16_t]] 3
|
||||
; CHECK-DAG: [[u16x4_t:%.+]] = OpTypeVector [[u16_t]] 4
|
||||
; CHECK-DAG: [[u64_t:%.+]] = OpTypeInt 64 0
|
||||
; CHECK-DAG: [[u64x2_t:%.+]] = OpTypeVector [[u64_t]] 2
|
||||
; CHECK-DAG: [[u64x3_t:%.+]] = OpTypeVector [[u64_t]] 3
|
||||
; CHECK-DAG: [[u64x4_t:%.+]] = OpTypeVector [[u64_t]] 4
|
||||
; CHECK-DAG: [[bool_t:%.+]] = OpTypeBool
|
||||
; CHECK-DAG: [[boolx2_t:%.+]] = OpTypeVector [[bool_t]] 2
|
||||
|
||||
; CHECK-LABEL: Begin function firstbitlow_i32
|
||||
define noundef i32 @firstbitlow_i32(i32 noundef %a) {
|
||||
entry:
|
||||
; CHECK: [[a:%.+]] = OpFunctionParameter [[u32_t]]
|
||||
; CHECK: [[ret:%.+]] = OpExtInst [[u32_t]] [[glsl_450_ext]] FindILsb [[a]]
|
||||
; CHECK: OpReturnValue [[ret]]
|
||||
%elt.firstbitlow = call i32 @llvm.spv.firstbitlow.i32(i32 %a)
|
||||
ret i32 %elt.firstbitlow
|
||||
}
|
||||
|
||||
; CHECK-LABEL: Begin function firstbitlow_v2xi32
|
||||
define noundef <2 x i32> @firstbitlow_v2xi32(<2 x i32> noundef %a) {
|
||||
entry:
|
||||
; CHECK: [[a:%.+]] = OpFunctionParameter [[u32x2_t]]
|
||||
; CHECK: [[ret:%.+]] = OpExtInst [[u32x2_t]] [[glsl_450_ext]] FindILsb [[a]]
|
||||
; CHECK: OpReturnValue [[ret]]
|
||||
%elt.firstbitlow = call <2 x i32> @llvm.spv.firstbitlow.v2i32(<2 x i32> %a)
|
||||
ret <2 x i32> %elt.firstbitlow
|
||||
}
|
||||
|
||||
; CHECK-LABEL: Begin function firstbitlow_v3xi32
|
||||
define noundef <3 x i32> @firstbitlow_v3xi32(<3 x i32> noundef %a) {
|
||||
entry:
|
||||
; CHECK: [[a:%.+]] = OpFunctionParameter [[u32x3_t]]
|
||||
; CHECK: [[ret:%.+]] = OpExtInst [[u32x3_t]] [[glsl_450_ext]] FindILsb [[a]]
|
||||
; CHECK: OpReturnValue [[ret]]
|
||||
%elt.firstbitlow = call <3 x i32> @llvm.spv.firstbitlow.v3i32(<3 x i32> %a)
|
||||
ret <3 x i32> %elt.firstbitlow
|
||||
}
|
||||
|
||||
; CHECK-LABEL: Begin function firstbitlow_v4xi32
|
||||
define noundef <4 x i32> @firstbitlow_v4xi32(<4 x i32> noundef %a) {
|
||||
entry:
|
||||
; CHECK: [[a:%.+]] = OpFunctionParameter [[u32x4_t]]
|
||||
; CHECK: [[ret:%.+]] = OpExtInst [[u32x4_t]] [[glsl_450_ext]] FindILsb [[a]]
|
||||
; CHECK: OpReturnValue [[ret]]
|
||||
%elt.firstbitlow = call <4 x i32> @llvm.spv.firstbitlow.v4i32(<4 x i32> %a)
|
||||
ret <4 x i32> %elt.firstbitlow
|
||||
}
|
||||
|
||||
; CHECK-LABEL: Begin function firstbitlow_i16
|
||||
define noundef i32 @firstbitlow_i16(i16 noundef %a) {
|
||||
entry:
|
||||
; CHECK: [[a16:%.+]] = OpFunctionParameter [[u16_t]]
|
||||
; CHECK: [[a32:%.+]] = OpUConvert [[u32_t]] [[a16]]
|
||||
; CHECK: [[ret:%.+]] = OpExtInst [[u32_t]] [[glsl_450_ext]] FindILsb [[a32]]
|
||||
; CHECK: OpReturnValue [[ret]]
|
||||
%elt.firstbitlow = call i32 @llvm.spv.firstbitlow.i16(i16 %a)
|
||||
ret i32 %elt.firstbitlow
|
||||
}
|
||||
|
||||
; CHECK-LABEL: Begin function firstbitlow_v2xi16
|
||||
define noundef <2 x i32> @firstbitlow_v2xi16(<2 x i16> noundef %a) {
|
||||
entry:
|
||||
; CHECK: [[a16:%.+]] = OpFunctionParameter [[u16x2_t]]
|
||||
; CHECK: [[a32:%.+]] = OpUConvert [[u32x2_t]] [[a16]]
|
||||
; CHECK: [[ret:%.+]] = OpExtInst [[u32x2_t]] [[glsl_450_ext]] FindILsb [[a32]]
|
||||
; CHECK: OpReturnValue [[ret]]
|
||||
%elt.firstbitlow = call <2 x i32> @llvm.spv.firstbitlow.v2i16(<2 x i16> %a)
|
||||
ret <2 x i32> %elt.firstbitlow
|
||||
}
|
||||
|
||||
; CHECK-LABEL: Begin function firstbitlow_v3xi16
|
||||
define noundef <3 x i32> @firstbitlow_v3xi16(<3 x i16> noundef %a) {
|
||||
entry:
|
||||
; CHECK: [[a16:%.+]] = OpFunctionParameter [[u16x3_t]]
|
||||
; CHECK: [[a32:%.+]] = OpUConvert [[u32x3_t]] [[a16]]
|
||||
; CHECK: [[ret:%.+]] = OpExtInst [[u32x3_t]] [[glsl_450_ext]] FindILsb [[a32]]
|
||||
; CHECK: OpReturnValue [[ret]]
|
||||
%elt.firstbitlow = call <3 x i32> @llvm.spv.firstbitlow.v3i16(<3 x i16> %a)
|
||||
ret <3 x i32> %elt.firstbitlow
|
||||
}
|
||||
|
||||
; CHECK-LABEL: Begin function firstbitlow_v4xi16
|
||||
define noundef <4 x i32> @firstbitlow_v4xi16(<4 x i16> noundef %a) {
|
||||
entry:
|
||||
; CHECK: [[a16:%.+]] = OpFunctionParameter [[u16x4_t]]
|
||||
; CHECK: [[a32:%.+]] = OpUConvert [[u32x4_t]] [[a16]]
|
||||
; CHECK: [[ret:%.+]] = OpExtInst [[u32x4_t]] [[glsl_450_ext]] FindILsb [[a32]]
|
||||
; CHECK: OpReturnValue [[ret]]
|
||||
%elt.firstbitlow = call <4 x i32> @llvm.spv.firstbitlow.v4i16(<4 x i16> %a)
|
||||
ret <4 x i32> %elt.firstbitlow
|
||||
}
|
||||
|
||||
; CHECK-LABEL: Begin function firstbitlow_i64
|
||||
define noundef i32 @firstbitlow_i64(i64 noundef %a) {
|
||||
entry:
|
||||
; CHECK: [[a64:%.+]] = OpFunctionParameter [[u64_t]]
|
||||
; CHECK: [[a32x2:%.+]] = OpBitcast [[u32x2_t]] [[a64]]
|
||||
; CHECK: [[lsb_bits:%.+]] = OpExtInst [[u32x2_t]] [[glsl_450_ext]] FindILsb [[a32x2]]
|
||||
; CHECK: [[high_bits:%.+]] = OpVectorExtractDynamic [[u32_t]] [[lsb_bits]] [[const_0]]
|
||||
; CHECK: [[low_bits:%.+]] = OpVectorExtractDynamic [[u32_t]] [[lsb_bits]] [[const_1]]
|
||||
; CHECK: [[should_use_high:%.+]] = OpIEqual [[bool_t]] [[low_bits]] [[const_neg1]]
|
||||
; CHECK: [[ans_bits:%.+]] = OpSelect [[u32_t]] [[should_use_high]] [[high_bits]] [[low_bits]]
|
||||
; CHECK: [[ans_offset:%.+]] = OpSelect [[u32_t]] [[should_use_high]] [[const_32]] [[const_0]]
|
||||
; CHECK: [[ret:%.+]] = OpIAdd [[u32_t]] [[ans_offset]] [[ans_bits]]
|
||||
; CHECK: OpReturnValue [[ret]]
|
||||
%elt.firstbitlow = call i32 @llvm.spv.firstbitlow.i64(i64 %a)
|
||||
ret i32 %elt.firstbitlow
|
||||
}
|
||||
|
||||
; CHECK-LABEL: Begin function firstbitlow_v2xi64
|
||||
define noundef <2 x i32> @firstbitlow_v2xi64(<2 x i64> noundef %a) {
|
||||
entry:
|
||||
; CHECK: [[a64x2:%.+]] = OpFunctionParameter [[u64x2_t]]
|
||||
; CHECK: [[a32x4:%.+]] = OpBitcast [[u32x4_t]] [[a64x2]]
|
||||
; CHECK: [[lsb_bits:%.+]] = OpExtInst [[u32x4_t]] [[glsl_450_ext]] FindILsb [[a32x4]]
|
||||
; CHECK: [[high_bits:%.+]] = OpVectorShuffle [[u32x2_t]] [[lsb_bits]] [[lsb_bits]] 0 2
|
||||
; CHECK: [[low_bits:%.+]] = OpVectorShuffle [[u32x2_t]] [[lsb_bits]] [[lsb_bits]] 1 3
|
||||
; CHECK: [[should_use_high:%.+]] = OpIEqual [[boolx2_t]] [[low_bits]] [[const_neg1x2]]
|
||||
; CHECK: [[ans_bits:%.+]] = OpSelect [[u32x2_t]] [[should_use_high]] [[high_bits]] [[low_bits]]
|
||||
; CHECK: [[ans_offset:%.+]] = OpSelect [[u32x2_t]] [[should_use_high]] [[const_32x2]] [[const_0x2]]
|
||||
; CHECK: [[ret:%.+]] = OpIAdd [[u32x2_t]] [[ans_offset]] [[ans_bits]]
|
||||
; CHECK: OpReturnValue [[ret]]
|
||||
%elt.firstbitlow = call <2 x i32> @llvm.spv.firstbitlow.v2i64(<2 x i64> %a)
|
||||
ret <2 x i32> %elt.firstbitlow
|
||||
}
|
||||
|
||||
; CHECK-LABEL: Begin function firstbitlow_v3xi64
|
||||
define noundef <3 x i32> @firstbitlow_v3xi64(<3 x i64> noundef %a) {
|
||||
entry:
|
||||
; Preamble
|
||||
; CHECK: [[a:%.+]] = OpFunctionParameter [[u64x3_t]]
|
||||
|
||||
; Extract first 2 components from %a
|
||||
; CHECK: [[pt1:%.+]] = OpVectorShuffle [[u64x2_t]] [[a]] [[a]] 0 1
|
||||
|
||||
; Do firstbitlow on the first 2 components
|
||||
; CHECK: [[pt1_cast:%.+]] = OpBitcast [[u32x4_t]] [[pt1]]
|
||||
; CHECK: [[pt1_lsb_bits:%.+]] = OpExtInst [[u32x4_t]] [[glsl_450_ext]] FindILsb [[pt1_cast]]
|
||||
; CHECK: [[pt1_high_bits:%.+]] = OpVectorShuffle [[u32x2_t]] [[pt1_lsb_bits]] [[pt1_lsb_bits]] 0 2
|
||||
; CHECK: [[pt1_low_bits:%.+]] = OpVectorShuffle [[u32x2_t]] [[pt1_lsb_bits]] [[pt1_lsb_bits]] 1 3
|
||||
; CHECK: [[pt1_should_use_high:%.+]] = OpIEqual [[boolx2_t]] [[pt1_low_bits]] [[const_neg1x2]]
|
||||
; CHECK: [[pt1_ans_bits:%.+]] = OpSelect [[u32x2_t]] [[pt1_should_use_high]] [[pt1_high_bits]] [[pt1_low_bits]]
|
||||
; CHECK: [[pt1_ans_offset:%.+]] = OpSelect [[u32x2_t]] [[pt1_should_use_high]] [[const_32x2]] [[const_0x2]]
|
||||
; CHECK: [[pt1_res:%.+]] = OpIAdd [[u32x2_t]] [[pt1_ans_offset]] [[pt1_ans_bits]]
|
||||
|
||||
; Extract the last component from %a
|
||||
; CHECK: [[pt2:%.+]] = OpVectorExtractDynamic [[u64_t]] [[a]] [[const_2]]
|
||||
|
||||
; Do firstbitlow on the last component
|
||||
; CHECK: [[pt2_cast:%.+]] = OpBitcast [[u32x2_t]] [[pt2]]
|
||||
; CHECK: [[pt2_lsb_bits:%.+]] = OpExtInst [[u32x2_t]] [[glsl_450_ext]] FindILsb [[pt2_cast]]
|
||||
; CHECK: [[pt2_high_bits:%.+]] = OpVectorExtractDynamic [[u32_t]] [[pt2_lsb_bits]] [[const_0]]
|
||||
; CHECK: [[pt2_low_bits:%.+]] = OpVectorExtractDynamic [[u32_t]] [[pt2_lsb_bits]] [[const_1]]
|
||||
; CHECK: [[pt2_should_use_high:%.+]] = OpIEqual [[bool_t]] [[pt2_low_bits]] [[const_neg1]]
|
||||
; CHECK: [[pt2_ans_bits:%.+]] = OpSelect [[u32_t]] [[pt2_should_use_high]] [[pt2_high_bits]] [[pt2_low_bits]]
|
||||
; CHECK: [[pt2_ans_offset:%.+]] = OpSelect [[u32_t]] [[pt2_should_use_high]] [[const_32]] [[const_0]]
|
||||
; CHECK: [[pt2_res:%.+]] = OpIAdd [[u32_t]] [[pt2_ans_offset]] [[pt2_ans_bits]]
|
||||
|
||||
; Merge the parts into the final i32x3 and return it
|
||||
; CHECK: [[ret:%.+]] = OpCompositeConstruct [[u32x3_t]] [[pt1_res]] [[pt2_res]]
|
||||
; CHECK: OpReturnValue [[ret]]
|
||||
%elt.firstbitlow = call <3 x i32> @llvm.spv.firstbitlow.v3i64(<3 x i64> %a)
|
||||
ret <3 x i32> %elt.firstbitlow
|
||||
}
|
||||
|
||||
; CHECK-LABEL: Begin function firstbitlow_v4xi64
|
||||
define noundef <4 x i32> @firstbitlow_v4xi64(<4 x i64> noundef %a) {
|
||||
entry:
|
||||
; Preamble
|
||||
; CHECK: [[a:%.+]] = OpFunctionParameter [[u64x4_t]]
|
||||
|
||||
; Extract first 2 components from %a
|
||||
; CHECK: [[pt1:%.+]] = OpVectorShuffle [[u64x2_t]] [[a]] [[a]] 0 1
|
||||
|
||||
; Do firstbitlow on the first 2 components
|
||||
; CHECK: [[pt1_cast:%.+]] = OpBitcast [[u32x4_t]] [[pt1]]
|
||||
; CHECK: [[pt1_lsb_bits:%.+]] = OpExtInst [[u32x4_t]] [[glsl_450_ext]] FindILsb [[pt1_cast]]
|
||||
; CHECK: [[pt1_high_bits:%.+]] = OpVectorShuffle [[u32x2_t]] [[pt1_lsb_bits]] [[pt1_lsb_bits]] 0 2
|
||||
; CHECK: [[pt1_low_bits:%.+]] = OpVectorShuffle [[u32x2_t]] [[pt1_lsb_bits]] [[pt1_lsb_bits]] 1 3
|
||||
; CHECK: [[pt1_should_use_high:%.+]] = OpIEqual [[boolx2_t]] [[pt1_low_bits]] [[const_neg1x2]]
|
||||
; CHECK: [[pt1_ans_bits:%.+]] = OpSelect [[u32x2_t]] [[pt1_should_use_high]] [[pt1_high_bits]] [[pt1_low_bits]]
|
||||
; CHECK: [[pt1_ans_offset:%.+]] = OpSelect [[u32x2_t]] [[pt1_should_use_high]] [[const_32x2]] [[const_0x2]]
|
||||
; CHECK: [[pt1_res:%.+]] = OpIAdd [[u32x2_t]] [[pt1_ans_offset]] [[pt1_ans_bits]]
|
||||
|
||||
; Extract last 2 components from %a
|
||||
; CHECK: [[pt2:%.+]] = OpVectorShuffle [[u64x2_t]] [[a]] [[a]] 2 3
|
||||
|
||||
; Do firstbituhigh on the last 2 components
|
||||
; CHECK: [[pt2_cast:%.+]] = OpBitcast [[u32x4_t]] [[pt2]]
|
||||
; CHECK: [[pt2_lsb_bits:%.+]] = OpExtInst [[u32x4_t]] [[glsl_450_ext]] FindILsb [[pt2_cast]]
|
||||
; CHECK: [[pt2_high_bits:%.+]] = OpVectorShuffle [[u32x2_t]] [[pt2_lsb_bits]] [[pt2_lsb_bits]] 0 2
|
||||
; CHECK: [[pt2_low_bits:%.+]] = OpVectorShuffle [[u32x2_t]] [[pt2_lsb_bits]] [[pt2_lsb_bits]] 1 3
|
||||
; CHECK: [[pt2_should_use_high:%.+]] = OpIEqual [[boolx2_t]] [[pt2_low_bits]] [[const_neg1x2]]
|
||||
; CHECK: [[pt2_ans_bits:%.+]] = OpSelect [[u32x2_t]] [[pt2_should_use_high]] [[pt2_high_bits]] [[pt2_low_bits]]
|
||||
; CHECK: [[pt2_ans_offset:%.+]] = OpSelect [[u32x2_t]] [[pt2_should_use_high]] [[const_32x2]] [[const_0x2]]
|
||||
; CHECK: [[pt2_res:%.+]] = OpIAdd [[u32x2_t]] [[pt2_ans_offset]] [[pt2_ans_bits]]
|
||||
|
||||
; Merge the parts into the final i32x4 and return it
|
||||
; CHECK: [[ret:%.+]] = OpCompositeConstruct [[u32x4_t]] [[pt1_res]] [[pt2_res]]
|
||||
; CHECK: OpReturnValue [[ret]]
|
||||
%elt.firstbitlow = call <4 x i32> @llvm.spv.firstbitlow.v4i64(<4 x i64> %a)
|
||||
ret <4 x i32> %elt.firstbitlow
|
||||
}
|
||||
|
||||
;declare i16 @llvm.spv.firstbitlow.i16(i16)
|
||||
;declare i32 @llvm.spv.firstbitlow.i32(i32)
|
||||
;declare i64 @llvm.spv.firstbitlow.i64(i64)
|
||||
;declare i16 @llvm.spv.firstbitlow.v2i16(<2 x i16>)
|
||||
;declare i32 @llvm.spv.firstbitlow.v2i32(<2 x i32>)
|
||||
;declare i64 @llvm.spv.firstbitlow.v2i64(<2 x i64>)
|
Loading…
x
Reference in New Issue
Block a user