[HLSL] [DXIL] Implement the AddUint64 HLSL function and the UAddc DXIL op (#127137)
Fixes #99205. - Implements the HLSL intrinsic `AddUint64` used to perform unsigned 64-bit integer addition by using pairs of unsigned 32-bit integers instead of native 64-bit types - The LLVM intrinsic `uadd_with_overflow` is used in the implementation of `AddUint64` in `CGBuiltin.cpp` - The DXIL op `UAddc` was defined in `DXIL.td`, and a lowering of the LLVM intrinsic `uadd_with_overflow` to the `UAddc` DXIL op was implemented in `DXILOpLowering.cpp` Notes: - `__builtin_addc` was not able to be used to implement `AddUint64` in `hlsl_intrinsics.h` because its `CarryOut` argument is a pointer, and pointers are not supported in HLSL - A lowering of the LLVM intrinsic `uadd_with_overflow` to SPIR-V [already exists](https://github.com/llvm/llvm-project/blob/main/llvm/test/CodeGen/SPIRV/llvm-intrinsics/uadd.with.overflow.ll) - When lowering the LLVM intrinsic `uadd_with_overflow` to the `UAddc` DXIL op, the anonymous struct type `{ i32, i1 }` is replaced with a named struct type `%dx.types.i32c`. This aspect of the implementation may be changed when issue #113192 gets addressed - Fixes issues mentioned in the comments on the original PR #125319 --------- Co-authored-by: Finn Plummer <50529406+inbelic@users.noreply.github.com> Co-authored-by: Farzon Lotfi <farzonlotfi@microsoft.com> Co-authored-by: Chris B <beanz@abolishcrlf.org> Co-authored-by: Justin Bogner <mail@justinbogner.com>
This commit is contained in:
parent
5b3ba261c4
commit
b4ecebe745
@ -4765,6 +4765,12 @@ def GetDeviceSideMangledName : LangBuiltin<"CUDA_LANG"> {
|
|||||||
}
|
}
|
||||||
|
|
||||||
// HLSL
|
// HLSL
|
||||||
|
def HLSLAddUint64: LangBuiltin<"HLSL_LANG"> {
|
||||||
|
let Spellings = ["__builtin_hlsl_adduint64"];
|
||||||
|
let Attributes = [NoThrow, Const];
|
||||||
|
let Prototype = "void(...)";
|
||||||
|
}
|
||||||
|
|
||||||
def HLSLResourceGetPointer : LangBuiltin<"HLSL_LANG"> {
|
def HLSLResourceGetPointer : LangBuiltin<"HLSL_LANG"> {
|
||||||
let Spellings = ["__builtin_hlsl_resource_getpointer"];
|
let Spellings = ["__builtin_hlsl_resource_getpointer"];
|
||||||
let Attributes = [NoThrow];
|
let Attributes = [NoThrow];
|
||||||
|
@ -10709,6 +10709,11 @@ def err_vector_incorrect_num_elements : Error<
|
|||||||
"%select{too many|too few}0 elements in vector %select{initialization|operand}3 (expected %1 elements, have %2)">;
|
"%select{too many|too few}0 elements in vector %select{initialization|operand}3 (expected %1 elements, have %2)">;
|
||||||
def err_altivec_empty_initializer : Error<"expected initializer">;
|
def err_altivec_empty_initializer : Error<"expected initializer">;
|
||||||
|
|
||||||
|
def err_vector_incorrect_bit_count : Error<
|
||||||
|
"incorrect number of bits in vector operand (expected %select{|a multiple of}0 %1 bits, have %2)">;
|
||||||
|
def err_integer_incorrect_bit_count : Error<
|
||||||
|
"incorrect number of bits in integer (expected %0 bits, have %1)">;
|
||||||
|
|
||||||
def err_invalid_neon_type_code : Error<
|
def err_invalid_neon_type_code : Error<
|
||||||
"incompatible constant for this __builtin_neon function">;
|
"incompatible constant for this __builtin_neon function">;
|
||||||
def err_argument_invalid_range : Error<
|
def err_argument_invalid_range : Error<
|
||||||
|
@ -19470,6 +19470,62 @@ Value *CodeGenFunction::EmitHLSLBuiltinExpr(unsigned BuiltinID,
|
|||||||
return nullptr;
|
return nullptr;
|
||||||
|
|
||||||
switch (BuiltinID) {
|
switch (BuiltinID) {
|
||||||
|
case Builtin::BI__builtin_hlsl_adduint64: {
|
||||||
|
Value *OpA = EmitScalarExpr(E->getArg(0));
|
||||||
|
Value *OpB = EmitScalarExpr(E->getArg(1));
|
||||||
|
QualType Arg0Ty = E->getArg(0)->getType();
|
||||||
|
uint64_t NumElements = Arg0Ty->castAs<VectorType>()->getNumElements();
|
||||||
|
assert(Arg0Ty == E->getArg(1)->getType() &&
|
||||||
|
"AddUint64 operand types must match");
|
||||||
|
assert(Arg0Ty->hasIntegerRepresentation() &&
|
||||||
|
"AddUint64 operands must have an integer representation");
|
||||||
|
assert((NumElements == 2 || NumElements == 4) &&
|
||||||
|
"AddUint64 operands must have 2 or 4 elements");
|
||||||
|
|
||||||
|
llvm::Value *LowA;
|
||||||
|
llvm::Value *HighA;
|
||||||
|
llvm::Value *LowB;
|
||||||
|
llvm::Value *HighB;
|
||||||
|
|
||||||
|
// Obtain low and high words of inputs A and B
|
||||||
|
if (NumElements == 2) {
|
||||||
|
LowA = Builder.CreateExtractElement(OpA, (uint64_t)0, "LowA");
|
||||||
|
HighA = Builder.CreateExtractElement(OpA, (uint64_t)1, "HighA");
|
||||||
|
LowB = Builder.CreateExtractElement(OpB, (uint64_t)0, "LowB");
|
||||||
|
HighB = Builder.CreateExtractElement(OpB, (uint64_t)1, "HighB");
|
||||||
|
} else {
|
||||||
|
LowA = Builder.CreateShuffleVector(OpA, ArrayRef<int>{0, 2}, "LowA");
|
||||||
|
HighA = Builder.CreateShuffleVector(OpA, ArrayRef<int>{1, 3}, "HighA");
|
||||||
|
LowB = Builder.CreateShuffleVector(OpB, ArrayRef<int>{0, 2}, "LowB");
|
||||||
|
HighB = Builder.CreateShuffleVector(OpB, ArrayRef<int>{1, 3}, "HighB");
|
||||||
|
}
|
||||||
|
|
||||||
|
// Use an uadd_with_overflow to compute the sum of low words and obtain a
|
||||||
|
// carry value
|
||||||
|
llvm::Value *Carry;
|
||||||
|
llvm::Value *LowSum = EmitOverflowIntrinsic(
|
||||||
|
*this, llvm::Intrinsic::uadd_with_overflow, LowA, LowB, Carry);
|
||||||
|
llvm::Value *ZExtCarry =
|
||||||
|
Builder.CreateZExt(Carry, HighA->getType(), "CarryZExt");
|
||||||
|
|
||||||
|
// Sum the high words and the carry
|
||||||
|
llvm::Value *HighSum = Builder.CreateAdd(HighA, HighB, "HighSum");
|
||||||
|
llvm::Value *HighSumPlusCarry =
|
||||||
|
Builder.CreateAdd(HighSum, ZExtCarry, "HighSumPlusCarry");
|
||||||
|
|
||||||
|
if (NumElements == 4) {
|
||||||
|
return Builder.CreateShuffleVector(LowSum, HighSumPlusCarry,
|
||||||
|
ArrayRef<int>{0, 2, 1, 3},
|
||||||
|
"hlsl.AddUint64");
|
||||||
|
}
|
||||||
|
|
||||||
|
llvm::Value *Result = PoisonValue::get(OpA->getType());
|
||||||
|
Result = Builder.CreateInsertElement(Result, LowSum, (uint64_t)0,
|
||||||
|
"hlsl.AddUint64.upto0");
|
||||||
|
Result = Builder.CreateInsertElement(Result, HighSumPlusCarry, (uint64_t)1,
|
||||||
|
"hlsl.AddUint64");
|
||||||
|
return Result;
|
||||||
|
}
|
||||||
case Builtin::BI__builtin_hlsl_resource_getpointer: {
|
case Builtin::BI__builtin_hlsl_resource_getpointer: {
|
||||||
Value *HandleOp = EmitScalarExpr(E->getArg(0));
|
Value *HandleOp = EmitScalarExpr(E->getArg(0));
|
||||||
Value *IndexOp = EmitScalarExpr(E->getArg(1));
|
Value *IndexOp = EmitScalarExpr(E->getArg(1));
|
||||||
|
@ -174,6 +174,27 @@ float3 acos(float3);
|
|||||||
_HLSL_BUILTIN_ALIAS(__builtin_elementwise_acos)
|
_HLSL_BUILTIN_ALIAS(__builtin_elementwise_acos)
|
||||||
float4 acos(float4);
|
float4 acos(float4);
|
||||||
|
|
||||||
|
//===----------------------------------------------------------------------===//
|
||||||
|
// AddUint64 builtins
|
||||||
|
//===----------------------------------------------------------------------===//
|
||||||
|
|
||||||
|
/// \fn T AddUint64(T a, T b)
|
||||||
|
/// \brief Implements unsigned 64-bit integer addition using pairs of unsigned
|
||||||
|
/// 32-bit integers.
|
||||||
|
/// \param x [in] The first unsigned 32-bit integer pair(s)
|
||||||
|
/// \param y [in] The second unsigned 32-bit integer pair(s)
|
||||||
|
///
|
||||||
|
/// This function takes one or two pairs (low, high) of unsigned 32-bit integer
|
||||||
|
/// values and returns pairs (low, high) of unsigned 32-bit integer
|
||||||
|
/// values representing the result of unsigned 64-bit integer addition.
|
||||||
|
|
||||||
|
_HLSL_AVAILABILITY(shadermodel, 6.0)
|
||||||
|
_HLSL_BUILTIN_ALIAS(__builtin_hlsl_adduint64)
|
||||||
|
uint32_t2 AddUint64(uint32_t2, uint32_t2);
|
||||||
|
_HLSL_AVAILABILITY(shadermodel, 6.0)
|
||||||
|
_HLSL_BUILTIN_ALIAS(__builtin_hlsl_adduint64)
|
||||||
|
uint32_t4 AddUint64(uint32_t4, uint32_t4);
|
||||||
|
|
||||||
//===----------------------------------------------------------------------===//
|
//===----------------------------------------------------------------------===//
|
||||||
// all builtins
|
// all builtins
|
||||||
//===----------------------------------------------------------------------===//
|
//===----------------------------------------------------------------------===//
|
||||||
|
@ -2086,6 +2086,18 @@ static bool CheckAllArgsHaveFloatRepresentation(Sema *S, CallExpr *TheCall) {
|
|||||||
checkAllFloatTypes);
|
checkAllFloatTypes);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static bool CheckUnsignedIntRepresentations(Sema *S, CallExpr *TheCall) {
|
||||||
|
auto checkUnsignedInteger = [](clang::QualType PassedType) -> bool {
|
||||||
|
clang::QualType BaseType =
|
||||||
|
PassedType->isVectorType()
|
||||||
|
? PassedType->getAs<clang::VectorType>()->getElementType()
|
||||||
|
: PassedType;
|
||||||
|
return !BaseType->isUnsignedIntegerType();
|
||||||
|
};
|
||||||
|
return CheckAllArgTypesAreCorrect(S, TheCall, S->Context.UnsignedIntTy,
|
||||||
|
checkUnsignedInteger);
|
||||||
|
}
|
||||||
|
|
||||||
static bool CheckFloatOrHalfRepresentations(Sema *S, CallExpr *TheCall) {
|
static bool CheckFloatOrHalfRepresentations(Sema *S, CallExpr *TheCall) {
|
||||||
auto checkFloatorHalf = [](clang::QualType PassedType) -> bool {
|
auto checkFloatorHalf = [](clang::QualType PassedType) -> bool {
|
||||||
clang::QualType BaseType =
|
clang::QualType BaseType =
|
||||||
@ -2277,6 +2289,52 @@ static bool CheckResourceHandle(
|
|||||||
// returning an ExprError
|
// returning an ExprError
|
||||||
bool SemaHLSL::CheckBuiltinFunctionCall(unsigned BuiltinID, CallExpr *TheCall) {
|
bool SemaHLSL::CheckBuiltinFunctionCall(unsigned BuiltinID, CallExpr *TheCall) {
|
||||||
switch (BuiltinID) {
|
switch (BuiltinID) {
|
||||||
|
case Builtin::BI__builtin_hlsl_adduint64: {
|
||||||
|
if (SemaRef.checkArgCount(TheCall, 2))
|
||||||
|
return true;
|
||||||
|
if (CheckVectorElementCallArgs(&SemaRef, TheCall))
|
||||||
|
return true;
|
||||||
|
if (CheckUnsignedIntRepresentations(&SemaRef, TheCall))
|
||||||
|
return true;
|
||||||
|
|
||||||
|
// CheckVectorElementCallArgs(...) guarantees both args are the same type.
|
||||||
|
assert(TheCall->getArg(0)->getType() == TheCall->getArg(1)->getType() &&
|
||||||
|
"Both args must be of the same type");
|
||||||
|
|
||||||
|
// ensure both args are vectors
|
||||||
|
auto *VTy = TheCall->getArg(0)->getType()->getAs<VectorType>();
|
||||||
|
if (!VTy) {
|
||||||
|
SemaRef.Diag(TheCall->getBeginLoc(), diag::err_vec_builtin_non_vector)
|
||||||
|
<< TheCall->getDirectCallee() << /*all*/ 1;
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
// ensure arg integers are 32-bits
|
||||||
|
uint64_t ElementBitCount = getASTContext()
|
||||||
|
.getTypeSizeInChars(VTy->getElementType())
|
||||||
|
.getQuantity() *
|
||||||
|
8;
|
||||||
|
if (ElementBitCount != 32) {
|
||||||
|
SemaRef.Diag(TheCall->getBeginLoc(),
|
||||||
|
diag::err_integer_incorrect_bit_count)
|
||||||
|
<< 32 << ElementBitCount;
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
// ensure both args are vectors of total bit size of a multiple of 64
|
||||||
|
int NumElementsArg = VTy->getNumElements();
|
||||||
|
if (NumElementsArg != 2 && NumElementsArg != 4) {
|
||||||
|
SemaRef.Diag(TheCall->getBeginLoc(), diag::err_vector_incorrect_bit_count)
|
||||||
|
<< 1 /*a multiple of*/ << 64 << NumElementsArg * ElementBitCount;
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
ExprResult A = TheCall->getArg(0);
|
||||||
|
QualType ArgTyA = A.get()->getType();
|
||||||
|
// return type is the same as the input type
|
||||||
|
TheCall->setType(ArgTyA);
|
||||||
|
break;
|
||||||
|
}
|
||||||
case Builtin::BI__builtin_hlsl_resource_getpointer: {
|
case Builtin::BI__builtin_hlsl_resource_getpointer: {
|
||||||
if (SemaRef.checkArgCount(TheCall, 2) ||
|
if (SemaRef.checkArgCount(TheCall, 2) ||
|
||||||
CheckResourceHandle(&SemaRef, TheCall, 0) ||
|
CheckResourceHandle(&SemaRef, TheCall, 0) ||
|
||||||
|
58
clang/test/CodeGenHLSL/builtins/AddUint64.hlsl
Normal file
58
clang/test/CodeGenHLSL/builtins/AddUint64.hlsl
Normal file
@ -0,0 +1,58 @@
|
|||||||
|
// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 5
|
||||||
|
// RUN: %clang_cc1 -finclude-default-header -triple dxil-pc-shadermodel6.3-library %s \
|
||||||
|
// RUN: -emit-llvm -disable-llvm-passes -o - | \
|
||||||
|
// RUN: FileCheck %s --check-prefixes=CHECK
|
||||||
|
|
||||||
|
|
||||||
|
// CHECK-LABEL: define noundef <2 x i32> @_Z20test_AddUint64_uint2Dv2_jS_(
|
||||||
|
// CHECK-SAME: <2 x i32> noundef [[A:%.*]], <2 x i32> noundef [[B:%.*]]) #[[ATTR0:[0-9]+]] {
|
||||||
|
// CHECK-NEXT: [[ENTRY:.*:]]
|
||||||
|
// CHECK-NEXT: [[A_ADDR:%.*]] = alloca <2 x i32>, align 8
|
||||||
|
// CHECK-NEXT: [[B_ADDR:%.*]] = alloca <2 x i32>, align 8
|
||||||
|
// CHECK-NEXT: store <2 x i32> [[A]], ptr [[A_ADDR]], align 8
|
||||||
|
// CHECK-NEXT: store <2 x i32> [[B]], ptr [[B_ADDR]], align 8
|
||||||
|
// CHECK-NEXT: [[TMP0:%.*]] = load <2 x i32>, ptr [[A_ADDR]], align 8
|
||||||
|
// CHECK-NEXT: [[TMP1:%.*]] = load <2 x i32>, ptr [[B_ADDR]], align 8
|
||||||
|
// CHECK-NEXT: [[LOWA:%.*]] = extractelement <2 x i32> [[TMP0]], i64 0
|
||||||
|
// CHECK-NEXT: [[HIGHA:%.*]] = extractelement <2 x i32> [[TMP0]], i64 1
|
||||||
|
// CHECK-NEXT: [[LOWB:%.*]] = extractelement <2 x i32> [[TMP1]], i64 0
|
||||||
|
// CHECK-NEXT: [[HIGHB:%.*]] = extractelement <2 x i32> [[TMP1]], i64 1
|
||||||
|
// CHECK-NEXT: [[TMP2:%.*]] = call { i32, i1 } @llvm.uadd.with.overflow.i32(i32 [[LOWA]], i32 [[LOWB]])
|
||||||
|
// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { i32, i1 } [[TMP2]], 1
|
||||||
|
// CHECK-NEXT: [[TMP4:%.*]] = extractvalue { i32, i1 } [[TMP2]], 0
|
||||||
|
// CHECK-NEXT: [[CARRYZEXT:%.*]] = zext i1 [[TMP3]] to i32
|
||||||
|
// CHECK-NEXT: [[HIGHSUM:%.*]] = add i32 [[HIGHA]], [[HIGHB]]
|
||||||
|
// CHECK-NEXT: [[HIGHSUMPLUSCARRY:%.*]] = add i32 [[HIGHSUM]], [[CARRYZEXT]]
|
||||||
|
// CHECK-NEXT: [[HLSL_ADDUINT64_UPTO0:%.*]] = insertelement <2 x i32> poison, i32 [[TMP4]], i64 0
|
||||||
|
// CHECK-NEXT: [[HLSL_ADDUINT64:%.*]] = insertelement <2 x i32> [[HLSL_ADDUINT64_UPTO0]], i32 [[HIGHSUMPLUSCARRY]], i64 1
|
||||||
|
// CHECK-NEXT: ret <2 x i32> [[HLSL_ADDUINT64]]
|
||||||
|
//
|
||||||
|
uint2 test_AddUint64_uint2(uint2 a, uint2 b) {
|
||||||
|
return AddUint64(a, b);
|
||||||
|
}
|
||||||
|
|
||||||
|
// CHECK-LABEL: define noundef <4 x i32> @_Z20test_AddUint64_uint4Dv4_jS_(
|
||||||
|
// CHECK-SAME: <4 x i32> noundef [[A:%.*]], <4 x i32> noundef [[B:%.*]]) #[[ATTR0]] {
|
||||||
|
// CHECK-NEXT: [[ENTRY:.*:]]
|
||||||
|
// CHECK-NEXT: [[A_ADDR:%.*]] = alloca <4 x i32>, align 16
|
||||||
|
// CHECK-NEXT: [[B_ADDR:%.*]] = alloca <4 x i32>, align 16
|
||||||
|
// CHECK-NEXT: store <4 x i32> [[A]], ptr [[A_ADDR]], align 16
|
||||||
|
// CHECK-NEXT: store <4 x i32> [[B]], ptr [[B_ADDR]], align 16
|
||||||
|
// CHECK-NEXT: [[TMP0:%.*]] = load <4 x i32>, ptr [[A_ADDR]], align 16
|
||||||
|
// CHECK-NEXT: [[TMP1:%.*]] = load <4 x i32>, ptr [[B_ADDR]], align 16
|
||||||
|
// CHECK-NEXT: [[LOWA:%.*]] = shufflevector <4 x i32> [[TMP0]], <4 x i32> poison, <2 x i32> <i32 0, i32 2>
|
||||||
|
// CHECK-NEXT: [[HIGHA:%.*]] = shufflevector <4 x i32> [[TMP0]], <4 x i32> poison, <2 x i32> <i32 1, i32 3>
|
||||||
|
// CHECK-NEXT: [[LOWB:%.*]] = shufflevector <4 x i32> [[TMP1]], <4 x i32> poison, <2 x i32> <i32 0, i32 2>
|
||||||
|
// CHECK-NEXT: [[HIGHB:%.*]] = shufflevector <4 x i32> [[TMP1]], <4 x i32> poison, <2 x i32> <i32 1, i32 3>
|
||||||
|
// CHECK-NEXT: [[TMP2:%.*]] = call { <2 x i32>, <2 x i1> } @llvm.uadd.with.overflow.v2i32(<2 x i32> [[LOWA]], <2 x i32> [[LOWB]])
|
||||||
|
// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { <2 x i32>, <2 x i1> } [[TMP2]], 1
|
||||||
|
// CHECK-NEXT: [[TMP4:%.*]] = extractvalue { <2 x i32>, <2 x i1> } [[TMP2]], 0
|
||||||
|
// CHECK-NEXT: [[CARRYZEXT:%.*]] = zext <2 x i1> [[TMP3]] to <2 x i32>
|
||||||
|
// CHECK-NEXT: [[HIGHSUM:%.*]] = add <2 x i32> [[HIGHA]], [[HIGHB]]
|
||||||
|
// CHECK-NEXT: [[HIGHSUMPLUSCARRY:%.*]] = add <2 x i32> [[HIGHSUM]], [[CARRYZEXT]]
|
||||||
|
// CHECK-NEXT: [[HLSL_ADDUINT64:%.*]] = shufflevector <2 x i32> [[TMP4]], <2 x i32> [[HIGHSUMPLUSCARRY]], <4 x i32> <i32 0, i32 2, i32 1, i32 3>
|
||||||
|
// CHECK-NEXT: ret <4 x i32> [[HLSL_ADDUINT64]]
|
||||||
|
//
|
||||||
|
uint4 test_AddUint64_uint4(uint4 a, uint4 b) {
|
||||||
|
return AddUint64(a, b);
|
||||||
|
}
|
46
clang/test/SemaHLSL/BuiltIns/AddUint64-errors.hlsl
Normal file
46
clang/test/SemaHLSL/BuiltIns/AddUint64-errors.hlsl
Normal file
@ -0,0 +1,46 @@
|
|||||||
|
// RUN: %clang_cc1 -finclude-default-header -triple dxil-pc-shadermodel6.6-library %s -fnative-half-type -emit-llvm-only -disable-llvm-passes -verify
|
||||||
|
|
||||||
|
uint2 test_too_few_arg() {
|
||||||
|
return __builtin_hlsl_adduint64();
|
||||||
|
// expected-error@-1 {{too few arguments to function call, expected 2, have 0}}
|
||||||
|
}
|
||||||
|
|
||||||
|
uint4 test_too_many_arg(uint4 a) {
|
||||||
|
return __builtin_hlsl_adduint64(a, a, a);
|
||||||
|
// expected-error@-1 {{too many arguments to function call, expected 2, have 3}}
|
||||||
|
}
|
||||||
|
|
||||||
|
uint2 test_mismatched_arg_types(uint2 a, uint4 b) {
|
||||||
|
return __builtin_hlsl_adduint64(a, b);
|
||||||
|
// expected-error@-1 {{all arguments to '__builtin_hlsl_adduint64' must have the same type}}
|
||||||
|
}
|
||||||
|
|
||||||
|
uint2 test_bad_num_arg_elements(uint3 a, uint3 b) {
|
||||||
|
return __builtin_hlsl_adduint64(a, b);
|
||||||
|
// expected-error@-1 {{incorrect number of bits in vector operand (expected a multiple of 64 bits, have 96)}}
|
||||||
|
}
|
||||||
|
|
||||||
|
uint2 test_scalar_arg_type(uint a) {
|
||||||
|
return __builtin_hlsl_adduint64(a, a);
|
||||||
|
// expected-error@-1 {{all arguments to '__builtin_hlsl_adduint64' must be vectors}}
|
||||||
|
}
|
||||||
|
|
||||||
|
uint2 test_uint64_args(uint16_t2 a) {
|
||||||
|
return __builtin_hlsl_adduint64(a, a);
|
||||||
|
// expected-error@-1 {{incorrect number of bits in integer (expected 32 bits, have 16)}}
|
||||||
|
}
|
||||||
|
|
||||||
|
uint2 test_signed_integer_args(int2 a, int2 b) {
|
||||||
|
return __builtin_hlsl_adduint64(a, b);
|
||||||
|
// expected-error@-1 {{passing 'int2' (aka 'vector<int, 2>') to parameter of incompatible type '__attribute__((__vector_size__(2 * sizeof(unsigned int)))) unsigned int' (vector of 2 'unsigned int' values)}}
|
||||||
|
}
|
||||||
|
|
||||||
|
struct S {
|
||||||
|
uint2 a;
|
||||||
|
};
|
||||||
|
|
||||||
|
uint2 test_incorrect_arg_type(S a) {
|
||||||
|
return __builtin_hlsl_adduint64(a, a);
|
||||||
|
// expected-error@-1 {{passing 'S' to parameter of incompatible type 'unsigned int'}}
|
||||||
|
}
|
||||||
|
|
@ -56,6 +56,7 @@ def HandleTy : DXILOpParamType;
|
|||||||
def ResBindTy : DXILOpParamType;
|
def ResBindTy : DXILOpParamType;
|
||||||
def ResPropsTy : DXILOpParamType;
|
def ResPropsTy : DXILOpParamType;
|
||||||
def SplitDoubleTy : DXILOpParamType;
|
def SplitDoubleTy : DXILOpParamType;
|
||||||
|
def BinaryWithCarryTy : DXILOpParamType;
|
||||||
|
|
||||||
class DXILOpClass;
|
class DXILOpClass;
|
||||||
|
|
||||||
@ -744,6 +745,16 @@ def UMin : DXILOp<40, binary> {
|
|||||||
let attributes = [Attributes<DXIL1_0, [ReadNone]>];
|
let attributes = [Attributes<DXIL1_0, [ReadNone]>];
|
||||||
}
|
}
|
||||||
|
|
||||||
|
def UAddc : DXILOp<44, binaryWithCarryOrBorrow > {
|
||||||
|
let Doc = "unsigned add of 32-bit operand with the carry";
|
||||||
|
let intrinsics = [IntrinSelect<int_uadd_with_overflow>];
|
||||||
|
let arguments = [OverloadTy, OverloadTy];
|
||||||
|
let result = BinaryWithCarryTy;
|
||||||
|
let overloads = [Overloads<DXIL1_0, [Int32Ty]>];
|
||||||
|
let stages = [Stages<DXIL1_0, [all_stages]>];
|
||||||
|
let attributes = [Attributes<DXIL1_0, [ReadNone]>];
|
||||||
|
}
|
||||||
|
|
||||||
def FMad : DXILOp<46, tertiary> {
|
def FMad : DXILOp<46, tertiary> {
|
||||||
let Doc = "Floating point arithmetic multiply/add operation. fmad(m,a,b) = m "
|
let Doc = "Floating point arithmetic multiply/add operation. fmad(m,a,b) = m "
|
||||||
"* a + b.";
|
"* a + b.";
|
||||||
|
@ -253,6 +253,14 @@ static StructType *getSplitDoubleType(LLVMContext &Context) {
|
|||||||
return StructType::create({Int32Ty, Int32Ty}, "dx.types.splitdouble");
|
return StructType::create({Int32Ty, Int32Ty}, "dx.types.splitdouble");
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static StructType *getBinaryWithCarryType(LLVMContext &Context) {
|
||||||
|
if (auto *ST = StructType::getTypeByName(Context, "dx.types.i32c"))
|
||||||
|
return ST;
|
||||||
|
Type *Int32Ty = Type::getInt32Ty(Context);
|
||||||
|
Type *Int1Ty = Type::getInt1Ty(Context);
|
||||||
|
return StructType::create({Int32Ty, Int1Ty}, "dx.types.i32c");
|
||||||
|
}
|
||||||
|
|
||||||
static Type *getTypeFromOpParamType(OpParamType Kind, LLVMContext &Ctx,
|
static Type *getTypeFromOpParamType(OpParamType Kind, LLVMContext &Ctx,
|
||||||
Type *OverloadTy) {
|
Type *OverloadTy) {
|
||||||
switch (Kind) {
|
switch (Kind) {
|
||||||
@ -308,6 +316,8 @@ static Type *getTypeFromOpParamType(OpParamType Kind, LLVMContext &Ctx,
|
|||||||
return getResPropsType(Ctx);
|
return getResPropsType(Ctx);
|
||||||
case OpParamType::SplitDoubleTy:
|
case OpParamType::SplitDoubleTy:
|
||||||
return getSplitDoubleType(Ctx);
|
return getSplitDoubleType(Ctx);
|
||||||
|
case OpParamType::BinaryWithCarryTy:
|
||||||
|
return getBinaryWithCarryType(Ctx);
|
||||||
}
|
}
|
||||||
llvm_unreachable("Invalid parameter kind");
|
llvm_unreachable("Invalid parameter kind");
|
||||||
return nullptr;
|
return nullptr;
|
||||||
|
75
llvm/test/CodeGen/DirectX/UAddc.ll
Normal file
75
llvm/test/CodeGen/DirectX/UAddc.ll
Normal file
@ -0,0 +1,75 @@
|
|||||||
|
; RUN: opt -S -scalarizer -dxil-op-lower -mtriple=dxil-pc-shadermodel6.3-library %s | FileCheck %s
|
||||||
|
|
||||||
|
; This test exercises the lowering of the intrinsic @llvm.uadd.with.overflow.i32 to the UAddc DXIL op
|
||||||
|
|
||||||
|
; CHECK-DAG: [[DX_TYPES_I32C:%dx\.types\.i32c]] = type { i32, i1 }
|
||||||
|
|
||||||
|
; NOTE: The uint2 overload of AddUint64 HLSL uses @llvm.uadd.with.overflow.i32, resulting in one UAddc op
|
||||||
|
define noundef i32 @test_UAddc(i32 noundef %a, i32 noundef %b) {
|
||||||
|
; CHECK-LABEL: define noundef i32 @test_UAddc(
|
||||||
|
; CHECK-SAME: i32 noundef [[A:%.*]], i32 noundef [[B:%.*]]) {
|
||||||
|
; CHECK-NEXT: [[UADDC:%.*]] = call [[DX_TYPES_I32C]] @dx.op.binaryWithCarryOrBorrow.i32(i32 44, i32 [[A]], i32 [[B]]) #[[ATTR0:[0-9]+]]
|
||||||
|
; CHECK-NEXT: [[CARRY:%.*]] = extractvalue [[DX_TYPES_I32C]] [[UADDC]], 1
|
||||||
|
; CHECK-NEXT: [[SUM:%.*]] = extractvalue [[DX_TYPES_I32C]] [[UADDC]], 0
|
||||||
|
; CHECK-NEXT: [[CARRY_ZEXT:%.*]] = zext i1 [[CARRY]] to i32
|
||||||
|
; CHECK-NEXT: [[RESULT:%.*]] = add i32 [[SUM]], [[CARRY_ZEXT]]
|
||||||
|
; CHECK-NEXT: ret i32 [[RESULT]]
|
||||||
|
;
|
||||||
|
%uaddc = call { i32, i1 } @llvm.uadd.with.overflow.i32(i32 %a, i32 %b)
|
||||||
|
%carry = extractvalue { i32, i1 } %uaddc, 1
|
||||||
|
%sum = extractvalue { i32, i1 } %uaddc, 0
|
||||||
|
%carry_zext = zext i1 %carry to i32
|
||||||
|
%result = add i32 %sum, %carry_zext
|
||||||
|
ret i32 %result
|
||||||
|
}
|
||||||
|
|
||||||
|
; NOTE: The uint4 overload of AddUint64 HLSL uses @llvm.uadd.with.overflow.v2i32, resulting in two UAddc ops after scalarization
|
||||||
|
define noundef <2 x i32> @test_UAddc_vec2(<2 x i32> noundef %a, <2 x i32> noundef %b) {
|
||||||
|
; CHECK-LABEL: define noundef <2 x i32> @test_UAddc_vec2(
|
||||||
|
; CHECK-SAME: <2 x i32> noundef [[A:%.*]], <2 x i32> noundef [[B:%.*]]) {
|
||||||
|
; CHECK-NEXT: [[A_I0:%.*]] = extractelement <2 x i32> [[A]], i64 0
|
||||||
|
; CHECK-NEXT: [[B_I0:%.*]] = extractelement <2 x i32> [[B]], i64 0
|
||||||
|
; CHECK-NEXT: [[UADDC_I0:%.*]] = call [[DX_TYPES_I32C]] @dx.op.binaryWithCarryOrBorrow.i32(i32 44, i32 [[A_I0]], i32 [[B_I0]]) #[[ATTR0]]
|
||||||
|
; CHECK-NEXT: [[A_I1:%.*]] = extractelement <2 x i32> [[A]], i64 1
|
||||||
|
; CHECK-NEXT: [[B_I1:%.*]] = extractelement <2 x i32> [[B]], i64 1
|
||||||
|
; CHECK-NEXT: [[UADDC_I1:%.*]] = call [[DX_TYPES_I32C]] @dx.op.binaryWithCarryOrBorrow.i32(i32 44, i32 [[A_I1]], i32 [[B_I1]]) #[[ATTR0]]
|
||||||
|
; CHECK-NEXT: [[CARRY_ELEM0:%.*]] = extractvalue [[DX_TYPES_I32C]] [[UADDC_I0]], 1
|
||||||
|
; CHECK-NEXT: [[CARRY_ELEM1:%.*]] = extractvalue [[DX_TYPES_I32C]] [[UADDC_I1]], 1
|
||||||
|
; CHECK-NEXT: [[CARRY_UPTO0:%.*]] = insertelement <2 x i1> poison, i1 [[CARRY_ELEM0]], i64 0
|
||||||
|
; CHECK-NEXT: [[CARRY:%.*]] = insertelement <2 x i1> [[CARRY_UPTO0]], i1 [[CARRY_ELEM1]], i64 1
|
||||||
|
; CHECK-NEXT: [[CARRY_I0:%.*]] = extractelement <2 x i1> [[CARRY]], i64 0
|
||||||
|
; CHECK-NEXT: [[CARRY_I1:%.*]] = extractelement <2 x i1> [[CARRY]], i64 1
|
||||||
|
; CHECK-NEXT: [[SUM_ELEM0:%.*]] = extractvalue [[DX_TYPES_I32C]] [[UADDC_I0]], 0
|
||||||
|
; CHECK-NEXT: [[SUM_ELEM1:%.*]] = extractvalue [[DX_TYPES_I32C]] [[UADDC_I1]], 0
|
||||||
|
; CHECK-NEXT: [[CARRY_ZEXT_I0:%.*]] = zext i1 [[CARRY_I0]] to i32
|
||||||
|
; CHECK-NEXT: [[CARRY_ZEXT_I1:%.*]] = zext i1 [[CARRY_I1]] to i32
|
||||||
|
; CHECK-NEXT: [[RESULT_I0:%.*]] = add i32 [[SUM_ELEM0]], [[CARRY_ZEXT_I0]]
|
||||||
|
; CHECK-NEXT: [[RESULT_I1:%.*]] = add i32 [[SUM_ELEM1]], [[CARRY_ZEXT_I1]]
|
||||||
|
; CHECK-NEXT: [[RESULT_UPTO0:%.*]] = insertelement <2 x i32> poison, i32 [[RESULT_I0]], i64 0
|
||||||
|
; CHECK-NEXT: [[RESULT:%.*]] = insertelement <2 x i32> [[RESULT_UPTO0]], i32 [[RESULT_I1]], i64 1
|
||||||
|
; CHECK-NEXT: ret <2 x i32> [[RESULT]]
|
||||||
|
;
|
||||||
|
%uaddc = call { <2 x i32>, <2 x i1> } @llvm.uadd.with.overflow.v2i32(<2 x i32> %a, <2 x i32> %b)
|
||||||
|
%carry = extractvalue { <2 x i32>, <2 x i1> } %uaddc, 1
|
||||||
|
%sum = extractvalue { <2 x i32>, <2 x i1> } %uaddc, 0
|
||||||
|
%carry_zext = zext <2 x i1> %carry to <2 x i32>
|
||||||
|
%result = add <2 x i32> %sum, %carry_zext
|
||||||
|
ret <2 x i32> %result
|
||||||
|
}
|
||||||
|
|
||||||
|
define noundef i32 @test_UAddc_insert(i32 noundef %a, i32 noundef %b) {
|
||||||
|
; CHECK-LABEL: define noundef i32 @test_UAddc_insert(
|
||||||
|
; CHECK-SAME: i32 noundef [[A:%.*]], i32 noundef [[B:%.*]]) {
|
||||||
|
; CHECK-NEXT: [[UADDC:%.*]] = call [[DX_TYPES_I32C]] @dx.op.binaryWithCarryOrBorrow.i32(i32 44, i32 [[A]], i32 [[B]]) #[[ATTR0]]
|
||||||
|
; CHECK-NEXT: [[UNUSED:%.*]] = insertvalue [[DX_TYPES_I32C]] [[UADDC]], i32 [[A]], 0
|
||||||
|
; CHECK-NEXT: [[RESULT:%.*]] = extractvalue [[DX_TYPES_I32C]] [[UADDC]], 0
|
||||||
|
; CHECK-NEXT: ret i32 [[RESULT]]
|
||||||
|
;
|
||||||
|
%uaddc = call { i32, i1 } @llvm.uadd.with.overflow.i32(i32 %a, i32 %b)
|
||||||
|
insertvalue { i32, i1 } %uaddc, i32 %a, 0
|
||||||
|
%result = extractvalue { i32, i1 } %uaddc, 0
|
||||||
|
ret i32 %result
|
||||||
|
}
|
||||||
|
|
||||||
|
declare { i32, i1 } @llvm.uadd.with.overflow.i32(i32, i32)
|
||||||
|
|
30
llvm/test/CodeGen/DirectX/UAddc_errors.ll
Normal file
30
llvm/test/CodeGen/DirectX/UAddc_errors.ll
Normal file
@ -0,0 +1,30 @@
|
|||||||
|
; We use llc for this test so that we don't abort after the first error.
|
||||||
|
; RUN: not llc %s -o /dev/null 2>&1 | FileCheck %s
|
||||||
|
|
||||||
|
target triple = "dxil-pc-shadermodel6.3-library"
|
||||||
|
|
||||||
|
; DXIL operation UAddc only supports i32. Other integer types are unsupported.
|
||||||
|
; CHECK: error:
|
||||||
|
; CHECK-SAME: in function uaddc_i16
|
||||||
|
; CHECK-SAME: Cannot create UAddc operation: Invalid overload type
|
||||||
|
|
||||||
|
define noundef i16 @uaddc_i16(i16 noundef %a, i16 noundef %b) "hlsl.export" {
|
||||||
|
%uaddc = call { i16, i1 } @llvm.uadd.with.overflow.i16(i16 %a, i16 %b)
|
||||||
|
%carry = extractvalue { i16, i1 } %uaddc, 1
|
||||||
|
%sum = extractvalue { i16, i1 } %uaddc, 0
|
||||||
|
%carry_zext = zext i1 %carry to i16
|
||||||
|
%result = add i16 %sum, %carry_zext
|
||||||
|
ret i16 %result
|
||||||
|
}
|
||||||
|
|
||||||
|
; CHECK: error:
|
||||||
|
; CHECK-SAME: in function uaddc_return
|
||||||
|
; CHECK-SAME: DXIL ops that return structs may only be used by insert- and extractvalue
|
||||||
|
|
||||||
|
define noundef { i32, i1 } @uaddc_return(i32 noundef %a, i32 noundef %b) "hlsl.export" {
|
||||||
|
%uaddc = call { i32, i1 } @llvm.uadd.with.overflow.i32(i32 %a, i32 %b)
|
||||||
|
ret { i32, i1 } %uaddc
|
||||||
|
}
|
||||||
|
|
||||||
|
declare { i16, i1 } @llvm.uadd.with.overflow.i16(i16, i16)
|
||||||
|
|
88
llvm/test/CodeGen/SPIRV/hlsl-intrinsics/AddUint64.ll
Normal file
88
llvm/test/CodeGen/SPIRV/hlsl-intrinsics/AddUint64.ll
Normal file
@ -0,0 +1,88 @@
|
|||||||
|
; RUN: llc -verify-machineinstrs -O0 -mtriple=spirv-unknown-unknown %s -o - | FileCheck %s
|
||||||
|
; RUN: %if spirv-tools %{ llc -O0 -mtriple=spirv-unknown-unknown %s -o - -filetype=obj | spirv-val %}
|
||||||
|
|
||||||
|
; Code here is an excerpt of clang/test/CodeGenHLSL/builtins/AddUint64.hlsl compiled for spirv using the following command
|
||||||
|
; clang -cc1 -finclude-default-header -triple spirv-unknown-vulkan-compute clang/test/CodeGenHLSL/builtins/AddUint64.hlsl -emit-llvm -disable-llvm-passes -o llvm/test/CodeGen/SPIRV/hlsl-intrinsics/uadd_with_overflow.ll
|
||||||
|
|
||||||
|
; CHECK-DAG: %[[#int_32:]] = OpTypeInt 32 0
|
||||||
|
; CHECK-DAG: %[[#vec2_int_32:]] = OpTypeVector %[[#int_32]] 2
|
||||||
|
; CHECK-DAG: %[[#bool:]] = OpTypeBool
|
||||||
|
; CHECK-DAG: %[[#const_i32_1:]] = OpConstant %[[#int_32]] 1
|
||||||
|
; CHECK-DAG: %[[#struct_i32_i32:]] = OpTypeStruct %[[#int_32]] %[[#int_32]]
|
||||||
|
; CHECK-DAG: %[[#func_v2i32_v2i32_v2i32:]] = OpTypeFunction %[[#vec2_int_32]] %[[#vec2_int_32]] %[[#vec2_int_32]]
|
||||||
|
; CHECK-DAG: %[[#const_i32_0:]] = OpConstant %[[#int_32]] 0
|
||||||
|
; CHECK-DAG: %[[#undef_v2i32:]] = OpUndef %[[#vec2_int_32]]
|
||||||
|
; CHECK-DAG: %[[#vec4_int_32:]] = OpTypeVector %[[#int_32]] 4
|
||||||
|
; CHECK-DAG: %[[#vec2_bool:]] = OpTypeVector %[[#bool]] 2
|
||||||
|
; CHECK-DAG: %[[#const_v2i32_0_0:]] = OpConstantComposite %[[#vec2_int_32]] %[[#const_i32_0]] %[[#const_i32_0]]
|
||||||
|
; CHECK-DAG: %[[#const_v2i32_1_1:]] = OpConstantComposite %[[#vec2_int_32]] %[[#const_i32_1]] %[[#const_i32_1]]
|
||||||
|
; CHECK-DAG: %[[#struct_v2i32_v2i32:]] = OpTypeStruct %[[#vec2_int_32]] %[[#vec2_int_32]]
|
||||||
|
; CHECK-DAG: %[[#func_v4i32_v4i32_v4i32:]] = OpTypeFunction %[[#vec4_int_32]] %[[#vec4_int_32]] %[[#vec4_int_32]]
|
||||||
|
; CHECK-DAG: %[[#undef_v4i32:]] = OpUndef %[[#vec4_int_32]]
|
||||||
|
|
||||||
|
|
||||||
|
define spir_func <2 x i32> @test_AddUint64_uint2(<2 x i32> %a, <2 x i32> %b) {
|
||||||
|
entry:
|
||||||
|
; CHECK: %[[#a:]] = OpFunctionParameter %[[#vec2_int_32]]
|
||||||
|
; CHECK: %[[#b:]] = OpFunctionParameter %[[#vec2_int_32]]
|
||||||
|
; CHECK: %[[#a_low:]] = OpCompositeExtract %[[#int_32]] %[[#a]] 0
|
||||||
|
; CHECK: %[[#a_high:]] = OpCompositeExtract %[[#int_32]] %[[#a]] 1
|
||||||
|
; CHECK: %[[#b_low:]] = OpCompositeExtract %[[#int_32]] %[[#b]] 0
|
||||||
|
; CHECK: %[[#b_high:]] = OpCompositeExtract %[[#int_32]] %[[#b]] 1
|
||||||
|
; CHECK: %[[#iaddcarry:]] = OpIAddCarry %[[#struct_i32_i32]] %[[#a_low]] %[[#b_low]]
|
||||||
|
; CHECK: %[[#lowsum:]] = OpCompositeExtract %[[#int_32]] %[[#iaddcarry]] 0
|
||||||
|
; CHECK: %[[#carry:]] = OpCompositeExtract %[[#int_32]] %[[#iaddcarry]] 1
|
||||||
|
; CHECK: %[[#carry_ne0:]] = OpINotEqual %[[#bool]] %[[#carry]] %[[#const_i32_0]]
|
||||||
|
; CHECK: %[[#select_1_or_0:]] = OpSelect %[[#int_32]] %[[#carry_ne0]] %[[#const_i32_1]] %[[#const_i32_0]]
|
||||||
|
; CHECK: %[[#highsum:]] = OpIAdd %[[#int_32]] %[[#a_high]] %[[#b_high]]
|
||||||
|
; CHECK: %[[#highsumpluscarry:]] = OpIAdd %[[#int_32]] %[[#highsum]] %[[#select_1_or_0]]
|
||||||
|
; CHECK: %[[#adduint64_upto0:]] = OpCompositeInsert %[[#vec2_int_32]] %[[#lowsum]] %[[#undef_v2i32]] 0
|
||||||
|
; CHECK: %[[#adduint64:]] = OpCompositeInsert %[[#vec2_int_32]] %[[#highsumpluscarry]] %[[#adduint64_upto0]] 1
|
||||||
|
; CHECK: OpReturnValue %[[#adduint64]]
|
||||||
|
;
|
||||||
|
%LowA = extractelement <2 x i32> %a, i64 0
|
||||||
|
%HighA = extractelement <2 x i32> %a, i64 1
|
||||||
|
%LowB = extractelement <2 x i32> %b, i64 0
|
||||||
|
%HighB = extractelement <2 x i32> %b, i64 1
|
||||||
|
%3 = call { i32, i1 } @llvm.uadd.with.overflow.i32(i32 %LowA, i32 %LowB)
|
||||||
|
%4 = extractvalue { i32, i1 } %3, 1
|
||||||
|
%5 = extractvalue { i32, i1 } %3, 0
|
||||||
|
%CarryZExt = zext i1 %4 to i32
|
||||||
|
%HighSum = add i32 %HighA, %HighB
|
||||||
|
%HighSumPlusCarry = add i32 %HighSum, %CarryZExt
|
||||||
|
%hlsl.AddUint64.upto0 = insertelement <2 x i32> poison, i32 %5, i64 0
|
||||||
|
%hlsl.AddUint64 = insertelement <2 x i32> %hlsl.AddUint64.upto0, i32 %HighSumPlusCarry, i64 1
|
||||||
|
ret <2 x i32> %hlsl.AddUint64
|
||||||
|
}
|
||||||
|
|
||||||
|
define spir_func <4 x i32> @test_AddUint64_uint4(<4 x i32> %a, <4 x i32> %b) #0 {
|
||||||
|
entry:
|
||||||
|
; CHECK: %[[#a:]] = OpFunctionParameter %[[#vec4_int_32]]
|
||||||
|
; CHECK: %[[#b:]] = OpFunctionParameter %[[#vec4_int_32]]
|
||||||
|
; CHECK: %[[#a_low:]] = OpVectorShuffle %[[#vec2_int_32]] %[[#a]] %[[#undef_v4i32]] 0 2
|
||||||
|
; CHECK: %[[#a_high:]] = OpVectorShuffle %[[#vec2_int_32]] %[[#a]] %[[#undef_v4i32]] 1 3
|
||||||
|
; CHECK: %[[#b_low:]] = OpVectorShuffle %[[#vec2_int_32]] %[[#b]] %[[#undef_v4i32]] 0 2
|
||||||
|
; CHECK: %[[#b_high:]] = OpVectorShuffle %[[#vec2_int_32]] %[[#b]] %[[#undef_v4i32]] 1 3
|
||||||
|
; CHECK: %[[#iaddcarry:]] = OpIAddCarry %[[#struct_v2i32_v2i32]] %[[#a_low]] %[[#vec2_int_32]]
|
||||||
|
; CHECK: %[[#lowsum:]] = OpCompositeExtract %[[#vec2_int_32]] %[[#iaddcarry]] 0
|
||||||
|
; CHECK: %[[#carry:]] = OpCompositeExtract %[[#vec2_int_32]] %[[#iaddcarry]] 1
|
||||||
|
; CHECK: %[[#carry_ne0:]] = OpINotEqual %[[#vec2_bool]] %[[#carry]] %[[#const_v2i32_0_0]]
|
||||||
|
; CHECK: %[[#select_1_or_0:]] = OpSelect %[[#vec2_int_32]] %[[#carry_ne0]] %[[#const_v2i32_1_1]] %[[#const_v2i32_0_0]]
|
||||||
|
; CHECK: %[[#highsum:]] = OpIAdd %[[#vec2_int_32]] %[[#a_high]] %[[#b_high]]
|
||||||
|
; CHECK: %[[#highsumpluscarry:]] = OpIAdd %[[#vec2_int_32]] %[[#highsum]] %[[#select_1_or_0]]
|
||||||
|
; CHECK: %[[#adduint64:]] = OpVectorShuffle %[[#vec4_int_32]] %[[#lowsum]] %[[#highsumpluscarry]] 0 2 1 3
|
||||||
|
; CHECK: OpReturnValue %[[#adduint64]]
|
||||||
|
;
|
||||||
|
%LowA = shufflevector <4 x i32> %a, <4 x i32> poison, <2 x i32> <i32 0, i32 2>
|
||||||
|
%HighA = shufflevector <4 x i32> %a, <4 x i32> poison, <2 x i32> <i32 1, i32 3>
|
||||||
|
%LowB = shufflevector <4 x i32> %b, <4 x i32> poison, <2 x i32> <i32 0, i32 2>
|
||||||
|
%HighB = shufflevector <4 x i32> %b, <4 x i32> poison, <2 x i32> <i32 1, i32 3>
|
||||||
|
%3 = call { <2 x i32>, <2 x i1> } @llvm.uadd.with.overflow.v2i32(<2 x i32> %LowA, <2 x i32> %LowB)
|
||||||
|
%4 = extractvalue { <2 x i32>, <2 x i1> } %3, 1
|
||||||
|
%5 = extractvalue { <2 x i32>, <2 x i1> } %3, 0
|
||||||
|
%CarryZExt = zext <2 x i1> %4 to <2 x i32>
|
||||||
|
%HighSum = add <2 x i32> %HighA, %HighB
|
||||||
|
%HighSumPlusCarry = add <2 x i32> %HighSum, %CarryZExt
|
||||||
|
%hlsl.AddUint64 = shufflevector <2 x i32> %5, <2 x i32> %HighSumPlusCarry, <4 x i32> <i32 0, i32 2, i32 1, i32 3>
|
||||||
|
ret <4 x i32> %hlsl.AddUint64
|
||||||
|
}
|
Loading…
x
Reference in New Issue
Block a user