[NVVM] Add various intrinsic attrs, cleanup and consolidate td (#153436)
- llvm.nvvm.reflect - Use a PureIntrinsic for (adding speculatable), this will be replaced by a constant prior to lowering so speculation is fine. - llvm.nvvm.tex.* - Add [IntrNoCallback, IntrNoFree, IntrWillReturn] - llvm.nvvm.suld.* - Add [IntrNoCallback, IntrNoFree] and [IntrWillReturn] when not using "clamp" mode - llvm.nvvm.sust.* - Add [IntrNoCallback, IntrNoFree, IntrWriteMem] and [IntrWillReturn] when not using "clamp" mode - llvm.nvvm.[suq|txq|istypep].* - Use DefaultAttrsIntrinsic - llvm.nvvm.read.ptx.sreg.* - Add [IntrNoFree, IntrWillReturn] to non-constant reads as well.
This commit is contained in:
parent
916218ccbd
commit
d12f58ff11
@ -793,31 +793,42 @@ class NVVMBuiltin :
|
||||
"NVVMBuiltin must be a NVVM intrinsic starting with 'int_nvvm_'";
|
||||
}
|
||||
|
||||
class PureIntrinsic<list<LLVMType> ret_types,
|
||||
list<LLVMType> param_types = [],
|
||||
list<IntrinsicProperty> intr_properties = [],
|
||||
string name = ""> :
|
||||
DefaultAttrsIntrinsic<ret_types, param_types,
|
||||
intr_properties # [IntrNoMem, IntrSpeculatable], name> {}
|
||||
|
||||
let TargetPrefix = "nvvm" in {
|
||||
|
||||
//
|
||||
// PRMT - permute
|
||||
|
||||
let IntrProperties = [IntrNoMem, IntrSpeculatable] in {
|
||||
//
|
||||
def int_nvvm_prmt : NVVMBuiltin,
|
||||
DefaultAttrsIntrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty]>;
|
||||
PureIntrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty]>;
|
||||
|
||||
foreach mode = ["f4e", "b4e"] in
|
||||
def int_nvvm_prmt_ # mode :
|
||||
DefaultAttrsIntrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty]>;
|
||||
PureIntrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty]>;
|
||||
|
||||
// Note: these variants also have 2 source operands but only one will ever
|
||||
// be used so we eliminate the other operand in the IR (0 is used as the
|
||||
// placeholder in the backend).
|
||||
foreach mode = ["rc8", "ecl", "ecr", "rc16"] in
|
||||
def int_nvvm_prmt_ # mode :
|
||||
DefaultAttrsIntrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty]>;
|
||||
}
|
||||
PureIntrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty]>;
|
||||
|
||||
//
|
||||
// Nanosleep
|
||||
//
|
||||
def int_nvvm_nanosleep : NVVMBuiltin,
|
||||
DefaultAttrsIntrinsic<[], [llvm_i32_ty],
|
||||
[IntrConvergent, IntrNoMem, IntrHasSideEffects]>;
|
||||
|
||||
//
|
||||
// Performance Monitor Events (pm events) intrinsics
|
||||
//
|
||||
def int_nvvm_pm_event_mask : NVVMBuiltin,
|
||||
DefaultAttrsIntrinsic<[], [llvm_i16_ty],
|
||||
[IntrConvergent, IntrNoMem, IntrHasSideEffects,
|
||||
@ -904,32 +915,28 @@ let TargetPrefix = "nvvm" in {
|
||||
}
|
||||
|
||||
//
|
||||
// Sad
|
||||
// Sad - Sum of Absolute Differences
|
||||
//
|
||||
let IntrProperties = [IntrNoMem, IntrSpeculatable] in {
|
||||
foreach sign = ["", "u"] in {
|
||||
def int_nvvm_sad_ # sign # s : NVVMBuiltin,
|
||||
DefaultAttrsIntrinsic<[llvm_i16_ty], [llvm_i16_ty, llvm_i16_ty, llvm_i16_ty]>;
|
||||
PureIntrinsic<[llvm_i16_ty], [llvm_i16_ty, llvm_i16_ty, llvm_i16_ty]>;
|
||||
|
||||
def int_nvvm_sad_ # sign # i : NVVMBuiltin,
|
||||
DefaultAttrsIntrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty]>;
|
||||
PureIntrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty]>;
|
||||
|
||||
def int_nvvm_sad_ # sign # ll : NVVMBuiltin,
|
||||
DefaultAttrsIntrinsic<[llvm_i64_ty], [llvm_i64_ty, llvm_i64_ty, llvm_i64_ty]>;
|
||||
}
|
||||
PureIntrinsic<[llvm_i64_ty], [llvm_i64_ty, llvm_i64_ty, llvm_i64_ty]>;
|
||||
}
|
||||
|
||||
//
|
||||
// Floor Ceil
|
||||
//
|
||||
let IntrProperties = [IntrNoMem, IntrSpeculatable] in {
|
||||
foreach op = ["floor", "ceil"] in {
|
||||
foreach ftz = ["", "_ftz"] in
|
||||
def int_nvvm_ # op # ftz # _f : NVVMBuiltin,
|
||||
DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_float_ty]>;
|
||||
PureIntrinsic<[llvm_float_ty], [llvm_float_ty]>;
|
||||
def int_nvvm_ # op # _d : NVVMBuiltin,
|
||||
DefaultAttrsIntrinsic<[llvm_double_ty], [llvm_double_ty]>;
|
||||
}
|
||||
PureIntrinsic<[llvm_double_ty], [llvm_double_ty]>;
|
||||
}
|
||||
|
||||
//
|
||||
@ -937,52 +944,45 @@ let TargetPrefix = "nvvm" in {
|
||||
//
|
||||
foreach ftz = ["", "_ftz"] in
|
||||
def int_nvvm_fabs # ftz :
|
||||
DefaultAttrsIntrinsic<[llvm_anyfloat_ty], [LLVMMatchType<0>],
|
||||
[IntrNoMem, IntrSpeculatable]>;
|
||||
PureIntrinsic<[llvm_anyfloat_ty], [LLVMMatchType<0>]>;
|
||||
|
||||
//
|
||||
// Abs, Neg bf16, bf16x2
|
||||
// Neg bf16, bf16x2
|
||||
//
|
||||
def int_nvvm_neg_bf16 : NVVMBuiltin,
|
||||
DefaultAttrsIntrinsic<[llvm_bfloat_ty], [llvm_bfloat_ty], [IntrNoMem]>;
|
||||
PureIntrinsic<[llvm_bfloat_ty], [llvm_bfloat_ty]>;
|
||||
def int_nvvm_neg_bf16x2 : NVVMBuiltin,
|
||||
DefaultAttrsIntrinsic<[llvm_v2bf16_ty], [llvm_v2bf16_ty], [IntrNoMem]>;
|
||||
PureIntrinsic<[llvm_v2bf16_ty], [llvm_v2bf16_ty]>;
|
||||
|
||||
//
|
||||
// Round
|
||||
//
|
||||
let IntrProperties = [IntrNoMem, IntrSpeculatable] in {
|
||||
foreach ftz = ["", "_ftz"] in
|
||||
def int_nvvm_round # ftz # _f : NVVMBuiltin,
|
||||
DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_float_ty]>;
|
||||
PureIntrinsic<[llvm_float_ty], [llvm_float_ty]>;
|
||||
|
||||
def int_nvvm_round_d : NVVMBuiltin,
|
||||
DefaultAttrsIntrinsic<[llvm_double_ty], [llvm_double_ty]>;
|
||||
}
|
||||
PureIntrinsic<[llvm_double_ty], [llvm_double_ty]>;
|
||||
|
||||
//
|
||||
// Trunc
|
||||
//
|
||||
let IntrProperties = [IntrNoMem, IntrSpeculatable] in {
|
||||
foreach ftz = ["", "_ftz"] in
|
||||
def int_nvvm_trunc # ftz # _f : NVVMBuiltin,
|
||||
DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_float_ty]>;
|
||||
PureIntrinsic<[llvm_float_ty], [llvm_float_ty]>;
|
||||
|
||||
def int_nvvm_trunc_d : NVVMBuiltin,
|
||||
DefaultAttrsIntrinsic<[llvm_double_ty], [llvm_double_ty]>;
|
||||
}
|
||||
PureIntrinsic<[llvm_double_ty], [llvm_double_ty]>;
|
||||
|
||||
//
|
||||
// Saturate
|
||||
//
|
||||
let IntrProperties = [IntrNoMem, IntrSpeculatable] in {
|
||||
foreach ftz = ["", "_ftz"] in
|
||||
def int_nvvm_saturate # ftz # _f : NVVMBuiltin,
|
||||
DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_float_ty]>;
|
||||
PureIntrinsic<[llvm_float_ty], [llvm_float_ty]>;
|
||||
|
||||
def int_nvvm_saturate_d : NVVMBuiltin,
|
||||
DefaultAttrsIntrinsic<[llvm_double_ty], [llvm_double_ty]>;
|
||||
}
|
||||
PureIntrinsic<[llvm_double_ty], [llvm_double_ty]>;
|
||||
|
||||
//
|
||||
// Exp2 Log2
|
||||
@ -1018,23 +1018,22 @@ let TargetPrefix = "nvvm" in {
|
||||
//
|
||||
// Fma
|
||||
//
|
||||
let IntrProperties = [IntrNoMem, IntrSpeculatable] in {
|
||||
foreach variant = ["", "_sat", "_relu"] in {
|
||||
foreach ftz = ["", "_ftz"] in {
|
||||
def int_nvvm_fma_rn # ftz # variant # _f16 :
|
||||
DefaultAttrsIntrinsic<[llvm_half_ty],
|
||||
PureIntrinsic<[llvm_half_ty],
|
||||
[llvm_half_ty, llvm_half_ty, llvm_half_ty]>;
|
||||
|
||||
def int_nvvm_fma_rn # ftz # variant # _f16x2 :
|
||||
DefaultAttrsIntrinsic<[llvm_v2f16_ty],
|
||||
PureIntrinsic<[llvm_v2f16_ty],
|
||||
[llvm_v2f16_ty, llvm_v2f16_ty, llvm_v2f16_ty]>;
|
||||
|
||||
def int_nvvm_fma_rn # ftz # variant # _bf16 : NVVMBuiltin,
|
||||
DefaultAttrsIntrinsic<[llvm_bfloat_ty],
|
||||
PureIntrinsic<[llvm_bfloat_ty],
|
||||
[llvm_bfloat_ty, llvm_bfloat_ty, llvm_bfloat_ty]>;
|
||||
|
||||
def int_nvvm_fma_rn # ftz # variant # _bf16x2 : NVVMBuiltin,
|
||||
DefaultAttrsIntrinsic<[llvm_v2bf16_ty],
|
||||
PureIntrinsic<[llvm_v2bf16_ty],
|
||||
[llvm_v2bf16_ty, llvm_v2bf16_ty, llvm_v2bf16_ty]>;
|
||||
} // ftz
|
||||
} // variant
|
||||
@ -1042,14 +1041,13 @@ let TargetPrefix = "nvvm" in {
|
||||
foreach rnd = ["rn", "rz", "rm", "rp"] in {
|
||||
foreach ftz = ["", "_ftz"] in
|
||||
def int_nvvm_fma_ # rnd # ftz # _f : NVVMBuiltin,
|
||||
DefaultAttrsIntrinsic<[llvm_float_ty],
|
||||
PureIntrinsic<[llvm_float_ty],
|
||||
[llvm_float_ty, llvm_float_ty, llvm_float_ty]>;
|
||||
|
||||
def int_nvvm_fma_ # rnd # _d : NVVMBuiltin,
|
||||
DefaultAttrsIntrinsic<[llvm_double_ty],
|
||||
PureIntrinsic<[llvm_double_ty],
|
||||
[llvm_double_ty, llvm_double_ty, llvm_double_ty]>;
|
||||
}
|
||||
}
|
||||
|
||||
//
|
||||
// Rcp
|
||||
@ -1123,13 +1121,12 @@ let TargetPrefix = "nvvm" in {
|
||||
foreach a_type = ["s", "u"] in {
|
||||
foreach b_type = ["s", "u"] in {
|
||||
def int_nvvm_idp4a_ # a_type # _ # b_type :
|
||||
DefaultAttrsIntrinsic<[llvm_i32_ty],
|
||||
[llvm_i32_ty, llvm_i32_ty, llvm_i32_ty],
|
||||
[IntrNoMem, IntrSpeculatable]>;
|
||||
PureIntrinsic<[llvm_i32_ty],
|
||||
[llvm_i32_ty, llvm_i32_ty, llvm_i32_ty]>;
|
||||
def int_nvvm_idp2a_ # a_type # _ # b_type :
|
||||
DefaultAttrsIntrinsic<[llvm_i32_ty],
|
||||
PureIntrinsic<[llvm_i32_ty],
|
||||
[llvm_i32_ty, llvm_i32_ty, llvm_i1_ty, llvm_i32_ty],
|
||||
[IntrNoMem, IntrSpeculatable, ImmArg<ArgIndex<2>>]>;
|
||||
[ImmArg<ArgIndex<2>>]>;
|
||||
}
|
||||
}
|
||||
|
||||
@ -1138,18 +1135,16 @@ let TargetPrefix = "nvvm" in {
|
||||
//
|
||||
foreach direction = ["l", "r"] in
|
||||
def int_nvvm_fsh # direction # _clamp :
|
||||
DefaultAttrsIntrinsic<[llvm_anyint_ty],
|
||||
[LLVMMatchType<0>, LLVMMatchType<0>, LLVMMatchType<0>],
|
||||
[IntrNoMem, IntrSpeculatable]>;
|
||||
PureIntrinsic<[llvm_anyint_ty],
|
||||
[LLVMMatchType<0>, LLVMMatchType<0>, LLVMMatchType<0>]>;
|
||||
|
||||
//
|
||||
// FLO - Find Leading One
|
||||
//
|
||||
foreach sign = ["s", "u"] in
|
||||
def int_nvvm_flo_ # sign :
|
||||
DefaultAttrsIntrinsic<[llvm_i32_ty],
|
||||
[llvm_anyint_ty, llvm_i1_ty],
|
||||
[IntrNoMem, IntrSpeculatable, ImmArg<ArgIndex<1>>]>;
|
||||
PureIntrinsic<[llvm_i32_ty], [llvm_anyint_ty, llvm_i1_ty],
|
||||
[ImmArg<ArgIndex<1>>]>;
|
||||
|
||||
//
|
||||
// szext
|
||||
@ -1157,128 +1152,133 @@ let TargetPrefix = "nvvm" in {
|
||||
foreach ext = ["sext", "zext"] in
|
||||
foreach mode = ["wrap", "clamp"] in
|
||||
def int_nvvm_ # ext # _ # mode :
|
||||
DefaultAttrsIntrinsic<[llvm_i32_ty],
|
||||
[llvm_i32_ty, llvm_i32_ty],
|
||||
[IntrNoMem, IntrSpeculatable]>;
|
||||
PureIntrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty]>;
|
||||
|
||||
//
|
||||
// BMSK - bit mask
|
||||
//
|
||||
foreach mode = ["wrap", "clamp"] in
|
||||
def int_nvvm_bmsk_ # mode :
|
||||
DefaultAttrsIntrinsic<[llvm_i32_ty],
|
||||
[llvm_i32_ty, llvm_i32_ty],
|
||||
[IntrNoMem, IntrSpeculatable]>;
|
||||
PureIntrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty]>;
|
||||
|
||||
//
|
||||
// FNS - Find the n-th set bit
|
||||
//
|
||||
def int_nvvm_fns : NVVMBuiltin,
|
||||
PureIntrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty]>;
|
||||
|
||||
//
|
||||
// Convert
|
||||
//
|
||||
let IntrProperties = [IntrNoMem, IntrSpeculatable] in {
|
||||
// TODO: All these intrinsics are defined as PureIntrinsic, this attaches the
|
||||
// IntrSpeculatable property to them. Consider if some of these should
|
||||
// have this attribute removed as they may be too expensive.
|
||||
//
|
||||
def int_nvvm_lohi_i2d : NVVMBuiltin,
|
||||
DefaultAttrsIntrinsic<[llvm_double_ty], [llvm_i32_ty, llvm_i32_ty]>;
|
||||
PureIntrinsic<[llvm_double_ty], [llvm_i32_ty, llvm_i32_ty]>;
|
||||
|
||||
def int_nvvm_d2i_lo : NVVMBuiltin,
|
||||
DefaultAttrsIntrinsic<[llvm_i32_ty], [llvm_double_ty]>;
|
||||
PureIntrinsic<[llvm_i32_ty], [llvm_double_ty]>;
|
||||
def int_nvvm_d2i_hi : NVVMBuiltin,
|
||||
DefaultAttrsIntrinsic<[llvm_i32_ty], [llvm_double_ty]>;
|
||||
PureIntrinsic<[llvm_i32_ty], [llvm_double_ty]>;
|
||||
|
||||
foreach rnd = ["rn", "rz", "rm", "rp"] in {
|
||||
foreach ftz = ["", "_ftz"] in
|
||||
def int_nvvm_d2f_ # rnd # ftz : NVVMBuiltin,
|
||||
DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_double_ty]>;
|
||||
PureIntrinsic<[llvm_float_ty], [llvm_double_ty]>;
|
||||
|
||||
foreach sign = ["", "u"] in {
|
||||
|
||||
def int_nvvm_d2 # sign # i_ # rnd : NVVMBuiltin,
|
||||
DefaultAttrsIntrinsic<[llvm_i32_ty], [llvm_double_ty]>;
|
||||
PureIntrinsic<[llvm_i32_ty], [llvm_double_ty]>;
|
||||
|
||||
def int_nvvm_ # sign # i2d_ # rnd : NVVMBuiltin,
|
||||
DefaultAttrsIntrinsic<[llvm_double_ty], [llvm_i32_ty]>;
|
||||
PureIntrinsic<[llvm_double_ty], [llvm_i32_ty]>;
|
||||
|
||||
foreach ftz = ["", "_ftz"] in
|
||||
def int_nvvm_f2 # sign # i_ # rnd # ftz : NVVMBuiltin,
|
||||
DefaultAttrsIntrinsic<[llvm_i32_ty], [llvm_float_ty]>;
|
||||
PureIntrinsic<[llvm_i32_ty], [llvm_float_ty]>;
|
||||
|
||||
def int_nvvm_ # sign # i2f_ # rnd : NVVMBuiltin,
|
||||
DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_i32_ty]>;
|
||||
PureIntrinsic<[llvm_float_ty], [llvm_i32_ty]>;
|
||||
|
||||
foreach ftz = ["", "_ftz"] in
|
||||
def int_nvvm_f2 # sign # ll_ # rnd # ftz : NVVMBuiltin,
|
||||
DefaultAttrsIntrinsic<[llvm_i64_ty], [llvm_float_ty]>;
|
||||
PureIntrinsic<[llvm_i64_ty], [llvm_float_ty]>;
|
||||
|
||||
def int_nvvm_d2 # sign # ll_ # rnd : NVVMBuiltin,
|
||||
DefaultAttrsIntrinsic<[llvm_i64_ty], [llvm_double_ty]>;
|
||||
PureIntrinsic<[llvm_i64_ty], [llvm_double_ty]>;
|
||||
|
||||
def int_nvvm_ # sign # ll2f_ # rnd : NVVMBuiltin,
|
||||
DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_i64_ty]>;
|
||||
PureIntrinsic<[llvm_float_ty], [llvm_i64_ty]>;
|
||||
|
||||
def int_nvvm_ # sign # ll2d_ # rnd : NVVMBuiltin,
|
||||
DefaultAttrsIntrinsic<[llvm_double_ty], [llvm_i64_ty]>;
|
||||
PureIntrinsic<[llvm_double_ty], [llvm_i64_ty]>;
|
||||
|
||||
} // sign
|
||||
} // rnd
|
||||
|
||||
foreach ftz = ["", "_ftz"] in {
|
||||
def int_nvvm_f2h_rn # ftz : NVVMBuiltin,
|
||||
DefaultAttrsIntrinsic<[llvm_i16_ty], [llvm_float_ty]>;
|
||||
PureIntrinsic<[llvm_i16_ty], [llvm_float_ty]>;
|
||||
|
||||
def int_nvvm_bf2h_rn # ftz : NVVMBuiltin,
|
||||
DefaultAttrsIntrinsic<[llvm_i16_ty], [llvm_bfloat_ty]>;
|
||||
PureIntrinsic<[llvm_i16_ty], [llvm_bfloat_ty]>;
|
||||
}
|
||||
|
||||
foreach rnd = ["rn", "rz"] in {
|
||||
foreach relu = ["", "_relu"] in {
|
||||
def int_nvvm_ff2bf16x2_ # rnd # relu : NVVMBuiltin,
|
||||
DefaultAttrsIntrinsic<[llvm_v2bf16_ty], [llvm_float_ty, llvm_float_ty]>;
|
||||
PureIntrinsic<[llvm_v2bf16_ty], [llvm_float_ty, llvm_float_ty]>;
|
||||
|
||||
def int_nvvm_ff2f16x2_ # rnd # relu : NVVMBuiltin,
|
||||
DefaultAttrsIntrinsic<[llvm_v2f16_ty], [llvm_float_ty, llvm_float_ty]>;
|
||||
PureIntrinsic<[llvm_v2f16_ty], [llvm_float_ty, llvm_float_ty]>;
|
||||
|
||||
def int_nvvm_f2bf16_ # rnd # relu : NVVMBuiltin,
|
||||
DefaultAttrsIntrinsic<[llvm_bfloat_ty], [llvm_float_ty]>;
|
||||
PureIntrinsic<[llvm_bfloat_ty], [llvm_float_ty]>;
|
||||
}
|
||||
}
|
||||
|
||||
foreach satfinite = ["", "_satfinite"] in {
|
||||
def int_nvvm_f2tf32_rna # satfinite : NVVMBuiltin,
|
||||
DefaultAttrsIntrinsic<[llvm_i32_ty], [llvm_float_ty]>;
|
||||
PureIntrinsic<[llvm_i32_ty], [llvm_float_ty]>;
|
||||
|
||||
foreach rnd = ["rn", "rz"] in
|
||||
foreach relu = ["", "_relu"] in
|
||||
def int_nvvm_f2tf32_ # rnd # relu # satfinite : NVVMBuiltin,
|
||||
DefaultAttrsIntrinsic<[llvm_i32_ty], [llvm_float_ty]>;
|
||||
PureIntrinsic<[llvm_i32_ty], [llvm_float_ty]>;
|
||||
}
|
||||
|
||||
foreach type = ["e4m3x2", "e5m2x2"] in {
|
||||
foreach relu = ["", "_relu"] in {
|
||||
def int_nvvm_ff_to_ # type # _rn # relu : NVVMBuiltin,
|
||||
DefaultAttrsIntrinsic<[llvm_i16_ty], [llvm_float_ty, llvm_float_ty]>;
|
||||
PureIntrinsic<[llvm_i16_ty], [llvm_float_ty, llvm_float_ty]>;
|
||||
|
||||
def int_nvvm_f16x2_to_ # type # _rn # relu : NVVMBuiltin,
|
||||
DefaultAttrsIntrinsic<[llvm_i16_ty], [llvm_v2f16_ty]>;
|
||||
PureIntrinsic<[llvm_i16_ty], [llvm_v2f16_ty]>;
|
||||
|
||||
def int_nvvm_ # type # _to_f16x2_rn # relu : NVVMBuiltin,
|
||||
DefaultAttrsIntrinsic<[llvm_v2f16_ty], [llvm_i16_ty]>;
|
||||
PureIntrinsic<[llvm_v2f16_ty], [llvm_i16_ty]>;
|
||||
}
|
||||
}
|
||||
|
||||
// FP4 conversions.
|
||||
foreach relu = ["", "_relu"] in {
|
||||
def int_nvvm_ff_to_e2m1x2_rn # relu # _satfinite : NVVMBuiltin,
|
||||
DefaultAttrsIntrinsic<[llvm_i16_ty], [llvm_float_ty, llvm_float_ty]>;
|
||||
PureIntrinsic<[llvm_i16_ty], [llvm_float_ty, llvm_float_ty]>;
|
||||
|
||||
def int_nvvm_e2m1x2_to_f16x2_rn # relu : NVVMBuiltin,
|
||||
DefaultAttrsIntrinsic<[llvm_v2f16_ty], [llvm_i16_ty]>;
|
||||
PureIntrinsic<[llvm_v2f16_ty], [llvm_i16_ty]>;
|
||||
}
|
||||
|
||||
// FP6 conversions.
|
||||
foreach type = ["e2m3x2", "e3m2x2"] in {
|
||||
foreach relu = ["", "_relu"] in {
|
||||
def int_nvvm_ff_to_ # type # _rn # relu # _satfinite : NVVMBuiltin,
|
||||
DefaultAttrsIntrinsic<[llvm_i16_ty], [llvm_float_ty, llvm_float_ty]>;
|
||||
PureIntrinsic<[llvm_i16_ty], [llvm_float_ty, llvm_float_ty]>;
|
||||
|
||||
def int_nvvm_ # type # _to_f16x2_rn # relu : NVVMBuiltin,
|
||||
DefaultAttrsIntrinsic<[llvm_v2f16_ty], [llvm_i16_ty]>;
|
||||
PureIntrinsic<[llvm_v2f16_ty], [llvm_i16_ty]>;
|
||||
}
|
||||
}
|
||||
|
||||
@ -1287,24 +1287,20 @@ let TargetPrefix = "nvvm" in {
|
||||
foreach satmode = ["", "_satfinite"] in {
|
||||
defvar suffix = rmode # satmode;
|
||||
def int_nvvm_ff_to_ue8m0x2 # suffix : NVVMBuiltin,
|
||||
DefaultAttrsIntrinsic<[llvm_i16_ty], [llvm_float_ty, llvm_float_ty]>;
|
||||
PureIntrinsic<[llvm_i16_ty], [llvm_float_ty, llvm_float_ty]>;
|
||||
|
||||
def int_nvvm_bf16x2_to_ue8m0x2 # suffix : NVVMBuiltin,
|
||||
DefaultAttrsIntrinsic<[llvm_i16_ty], [llvm_v2bf16_ty]>;
|
||||
PureIntrinsic<[llvm_i16_ty], [llvm_v2bf16_ty]>;
|
||||
|
||||
}
|
||||
}
|
||||
|
||||
def int_nvvm_ue8m0x2_to_bf16x2 : NVVMBuiltin,
|
||||
DefaultAttrsIntrinsic<[llvm_v2bf16_ty], [llvm_i16_ty]>;
|
||||
|
||||
} // IntrProperties = [IntrNoMem, IntrSpeculatable]
|
||||
|
||||
// FNS
|
||||
def int_nvvm_fns : NVVMBuiltin,
|
||||
DefaultAttrsIntrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty],
|
||||
[IntrNoMem]>;
|
||||
PureIntrinsic<[llvm_v2bf16_ty], [llvm_i16_ty]>;
|
||||
|
||||
//
|
||||
// Atomic operations
|
||||
//
|
||||
class SCOPED_ATOMIC2_impl<LLVMType elty>
|
||||
: Intrinsic<[elty],
|
||||
[llvm_anyptr_ty, LLVMMatchType<0>],
|
||||
@ -1337,7 +1333,9 @@ let TargetPrefix = "nvvm" in {
|
||||
defm int_nvvm_atomic_and_gen_i : PTXAtomicWithScope2<llvm_anyint_ty>;
|
||||
defm int_nvvm_atomic_cas_gen_i : PTXAtomicWithScope3<llvm_anyint_ty>;
|
||||
|
||||
//
|
||||
// Bar.Sync
|
||||
//
|
||||
def int_nvvm_barrier0_popc : ClangBuiltin<"__nvvm_bar0_popc">,
|
||||
Intrinsic<[llvm_i32_ty], [llvm_i32_ty], [IntrConvergent, IntrNoCallback]>;
|
||||
def int_nvvm_barrier0_and : ClangBuiltin<"__nvvm_bar0_and">,
|
||||
@ -1361,29 +1359,31 @@ let TargetPrefix = "nvvm" in {
|
||||
}
|
||||
}
|
||||
|
||||
let IntrProperties = [IntrConvergent, IntrNoCallback] in {
|
||||
// barrier.cluster.[wait, arrive, arrive.relaxed]
|
||||
def int_nvvm_barrier_cluster_arrive :
|
||||
Intrinsic<[], [], [IntrConvergent, IntrNoCallback]>;
|
||||
def int_nvvm_barrier_cluster_arrive_relaxed :
|
||||
Intrinsic<[], [], [IntrConvergent, IntrNoCallback]>;
|
||||
def int_nvvm_barrier_cluster_wait :
|
||||
Intrinsic<[], [], [IntrConvergent, IntrNoCallback]>;
|
||||
def int_nvvm_barrier_cluster_arrive : Intrinsic<[]>;
|
||||
def int_nvvm_barrier_cluster_arrive_relaxed : Intrinsic<[]>;
|
||||
def int_nvvm_barrier_cluster_wait : Intrinsic<[]>;
|
||||
|
||||
// 'aligned' versions of the above barrier.cluster.* intrinsics
|
||||
def int_nvvm_barrier_cluster_arrive_aligned :
|
||||
Intrinsic<[], [], [IntrConvergent, IntrNoCallback]>;
|
||||
def int_nvvm_barrier_cluster_arrive_relaxed_aligned :
|
||||
Intrinsic<[], [], [IntrConvergent, IntrNoCallback]>;
|
||||
def int_nvvm_barrier_cluster_wait_aligned :
|
||||
Intrinsic<[], [], [IntrConvergent, IntrNoCallback]>;
|
||||
def int_nvvm_barrier_cluster_arrive_aligned : Intrinsic<[]>;
|
||||
def int_nvvm_barrier_cluster_arrive_relaxed_aligned : Intrinsic<[]>;
|
||||
def int_nvvm_barrier_cluster_wait_aligned : Intrinsic<[]>;
|
||||
}
|
||||
|
||||
//
|
||||
// Membar
|
||||
def int_nvvm_membar_cta : NVVMBuiltin, Intrinsic<[], [], [IntrNoCallback]>;
|
||||
def int_nvvm_membar_gl : NVVMBuiltin, Intrinsic<[], [], [IntrNoCallback]>;
|
||||
def int_nvvm_membar_sys : NVVMBuiltin, Intrinsic<[], [], [IntrNoCallback]>;
|
||||
def int_nvvm_fence_sc_cluster : Intrinsic<[], [], [IntrNoCallback]>;
|
||||
//
|
||||
let IntrProperties = [IntrNoCallback] in {
|
||||
def int_nvvm_membar_cta : NVVMBuiltin, Intrinsic<[]>;
|
||||
def int_nvvm_membar_gl : NVVMBuiltin, Intrinsic<[]>;
|
||||
def int_nvvm_membar_sys : NVVMBuiltin, Intrinsic<[]>;
|
||||
def int_nvvm_fence_sc_cluster : Intrinsic<[]>;
|
||||
}
|
||||
|
||||
//
|
||||
// Proxy fence (uni-directional)
|
||||
//
|
||||
foreach scope = ["cta", "cluster", "gpu", "sys"] in {
|
||||
|
||||
def int_nvvm_fence_proxy_tensormap_generic_release_ # scope :
|
||||
@ -1398,7 +1398,9 @@ foreach scope = ["cta", "cluster", "gpu", "sys"] in {
|
||||
"llvm.nvvm.fence.proxy.tensormap_generic.acquire." # scope>;
|
||||
}
|
||||
|
||||
//
|
||||
// Async Copy
|
||||
//
|
||||
let IntrProperties = [IntrConvergent, IntrNoCallback] in {
|
||||
def int_nvvm_cp_async_mbarrier_arrive : NVVMBuiltin,
|
||||
Intrinsic<[], [llvm_ptr_ty]>;
|
||||
@ -1411,12 +1413,11 @@ let IntrProperties = [IntrConvergent, IntrNoCallback] in {
|
||||
}
|
||||
|
||||
multiclass CP_ASYNC_SHARED_GLOBAL {
|
||||
def NAME : Intrinsic<[], [llvm_shared_ptr_ty, llvm_global_ptr_ty],
|
||||
[IntrArgMemOnly, IntrNoCallback, NoAlias<ArgIndex<0>>, NoAlias<ArgIndex<1>>,
|
||||
WriteOnly<ArgIndex<0>>, ReadOnly<ArgIndex<1>>]>;
|
||||
def _s : Intrinsic<[], [llvm_shared_ptr_ty, llvm_global_ptr_ty, llvm_i32_ty],
|
||||
[IntrArgMemOnly, IntrNoCallback, NoAlias<ArgIndex<0>>, NoAlias<ArgIndex<1>>,
|
||||
WriteOnly<ArgIndex<0>>, ReadOnly<ArgIndex<1>>]>;
|
||||
let IntrProperties = [IntrArgMemOnly, IntrNoCallback, NoAlias<ArgIndex<0>>,
|
||||
NoAlias<ArgIndex<1>>, WriteOnly<ArgIndex<0>>, ReadOnly<ArgIndex<1>>] in {
|
||||
def NAME : Intrinsic<[], [llvm_shared_ptr_ty, llvm_global_ptr_ty]>;
|
||||
def _s : Intrinsic<[], [llvm_shared_ptr_ty, llvm_global_ptr_ty, llvm_i32_ty]>;
|
||||
}
|
||||
}
|
||||
|
||||
defm int_nvvm_cp_async_ca_shared_global_4 : CP_ASYNC_SHARED_GLOBAL;
|
||||
@ -1424,17 +1425,15 @@ defm int_nvvm_cp_async_ca_shared_global_8 : CP_ASYNC_SHARED_GLOBAL;
|
||||
defm int_nvvm_cp_async_ca_shared_global_16 : CP_ASYNC_SHARED_GLOBAL;
|
||||
defm int_nvvm_cp_async_cg_shared_global_16 : CP_ASYNC_SHARED_GLOBAL;
|
||||
|
||||
def int_nvvm_cp_async_commit_group : NVVMBuiltin, Intrinsic<[], [], []>;
|
||||
def int_nvvm_cp_async_commit_group : NVVMBuiltin, Intrinsic<[]>;
|
||||
|
||||
def int_nvvm_cp_async_wait_group : NVVMBuiltin,
|
||||
Intrinsic<[], [llvm_i32_ty], [ImmArg<ArgIndex<0>>]>;
|
||||
|
||||
def int_nvvm_cp_async_wait_all : NVVMBuiltin,
|
||||
Intrinsic<[], [], []>;
|
||||
def int_nvvm_cp_async_wait_all : NVVMBuiltin, Intrinsic<[]>;
|
||||
|
||||
// cp.async.bulk variants of the commit/wait group
|
||||
def int_nvvm_cp_async_bulk_commit_group :
|
||||
Intrinsic<[], [], []>;
|
||||
def int_nvvm_cp_async_bulk_commit_group : Intrinsic<[]>;
|
||||
|
||||
def int_nvvm_cp_async_bulk_wait_group :
|
||||
Intrinsic<[], [llvm_i32_ty], [ImmArg<ArgIndex<0>>]>;
|
||||
@ -1457,29 +1456,30 @@ def int_nvvm_mbarrier_inval_shared : NVVMBuiltin,
|
||||
[IntrConvergent, IntrWriteMem, IntrArgMemOnly, IntrNoCallback,
|
||||
WriteOnly<ArgIndex<0>>, NoCapture<ArgIndex<0>>]>;
|
||||
|
||||
let IntrProperties = [IntrConvergent, IntrNoCallback] in {
|
||||
def int_nvvm_mbarrier_arrive : NVVMBuiltin,
|
||||
Intrinsic<[llvm_i64_ty], [llvm_ptr_ty], [IntrConvergent, IntrNoCallback]>;
|
||||
Intrinsic<[llvm_i64_ty], [llvm_ptr_ty]>;
|
||||
def int_nvvm_mbarrier_arrive_shared : NVVMBuiltin,
|
||||
Intrinsic<[llvm_i64_ty], [llvm_shared_ptr_ty], [IntrConvergent, IntrNoCallback]>;
|
||||
Intrinsic<[llvm_i64_ty], [llvm_shared_ptr_ty]>;
|
||||
def int_nvvm_mbarrier_arrive_noComplete : NVVMBuiltin,
|
||||
Intrinsic<[llvm_i64_ty], [llvm_ptr_ty, llvm_i32_ty], [IntrConvergent, IntrNoCallback]>;
|
||||
Intrinsic<[llvm_i64_ty], [llvm_ptr_ty, llvm_i32_ty]>;
|
||||
def int_nvvm_mbarrier_arrive_noComplete_shared : NVVMBuiltin,
|
||||
Intrinsic<[llvm_i64_ty], [llvm_shared_ptr_ty,
|
||||
llvm_i32_ty], [IntrConvergent, IntrNoCallback]>;
|
||||
Intrinsic<[llvm_i64_ty], [llvm_shared_ptr_ty, llvm_i32_ty]>;
|
||||
|
||||
def int_nvvm_mbarrier_arrive_drop : NVVMBuiltin,
|
||||
Intrinsic<[llvm_i64_ty], [llvm_ptr_ty], [IntrConvergent, IntrNoCallback]>;
|
||||
Intrinsic<[llvm_i64_ty], [llvm_ptr_ty]>;
|
||||
def int_nvvm_mbarrier_arrive_drop_shared : NVVMBuiltin,
|
||||
Intrinsic<[llvm_i64_ty], [llvm_shared_ptr_ty], [IntrConvergent, IntrNoCallback]>;
|
||||
Intrinsic<[llvm_i64_ty], [llvm_shared_ptr_ty]>;
|
||||
def int_nvvm_mbarrier_arrive_drop_noComplete : NVVMBuiltin,
|
||||
Intrinsic<[llvm_i64_ty], [llvm_ptr_ty, llvm_i32_ty], [IntrConvergent, IntrNoCallback]>;
|
||||
Intrinsic<[llvm_i64_ty], [llvm_ptr_ty, llvm_i32_ty]>;
|
||||
def int_nvvm_mbarrier_arrive_drop_noComplete_shared : NVVMBuiltin,
|
||||
Intrinsic<[llvm_i64_ty], [llvm_shared_ptr_ty, llvm_i32_ty], [IntrConvergent, IntrNoCallback]>;
|
||||
Intrinsic<[llvm_i64_ty], [llvm_shared_ptr_ty, llvm_i32_ty]>;
|
||||
|
||||
def int_nvvm_mbarrier_test_wait : NVVMBuiltin,
|
||||
Intrinsic<[llvm_i1_ty], [llvm_ptr_ty, llvm_i64_ty], [IntrConvergent, IntrNoCallback]>;
|
||||
Intrinsic<[llvm_i1_ty], [llvm_ptr_ty, llvm_i64_ty]>;
|
||||
def int_nvvm_mbarrier_test_wait_shared : NVVMBuiltin,
|
||||
Intrinsic<[llvm_i1_ty], [llvm_shared_ptr_ty, llvm_i64_ty], [IntrConvergent, IntrNoCallback]>;
|
||||
Intrinsic<[llvm_i1_ty], [llvm_shared_ptr_ty, llvm_i64_ty]>;
|
||||
}
|
||||
|
||||
def int_nvvm_mbarrier_pending_count : NVVMBuiltin,
|
||||
Intrinsic<[llvm_i32_ty], [llvm_i64_ty], [IntrNoMem, IntrConvergent, IntrNoCallback]>;
|
||||
@ -1504,9 +1504,8 @@ let IntrProperties = [IntrReadMem, IntrArgMemOnly, IntrNoCallback, IntrWillRetur
|
||||
// space when lowered during ISel.
|
||||
//
|
||||
def int_nvvm_internal_addrspace_wrap :
|
||||
DefaultAttrsIntrinsic<[llvm_anyptr_ty], [llvm_anyptr_ty],
|
||||
[IntrNoMem, IntrSpeculatable, NoUndef<ArgIndex<0>>,
|
||||
NoUndef<RetIndex>]>;
|
||||
PureIntrinsic<[llvm_anyptr_ty], [llvm_anyptr_ty],
|
||||
[NoUndef<ArgIndex<0>>, NoUndef<RetIndex>]>;
|
||||
|
||||
// Move intrinsics, used in nvvm internally
|
||||
|
||||
@ -1520,36 +1519,26 @@ let IntrProperties = [IntrNoMem] in {
|
||||
}
|
||||
|
||||
// For getting the handle from a texture or surface variable
|
||||
let IntrProperties = [IntrNoMem, IntrSpeculatable] in {
|
||||
def int_nvvm_texsurf_handle
|
||||
: DefaultAttrsIntrinsic<[llvm_i64_ty], [llvm_metadata_ty, llvm_anyptr_ty]>;
|
||||
: PureIntrinsic<[llvm_i64_ty], [llvm_metadata_ty, llvm_anyptr_ty]>;
|
||||
def int_nvvm_texsurf_handle_internal
|
||||
: DefaultAttrsIntrinsic<[llvm_i64_ty], [llvm_anyptr_ty]>;
|
||||
}
|
||||
: PureIntrinsic<[llvm_i64_ty], [llvm_anyptr_ty]>;
|
||||
|
||||
/// Error / Warn
|
||||
def int_nvvm_compiler_error : Intrinsic<[], [llvm_anyptr_ty]>;
|
||||
def int_nvvm_compiler_warn : Intrinsic<[], [llvm_anyptr_ty]>;
|
||||
|
||||
def int_nvvm_reflect : NVVMBuiltin,
|
||||
Intrinsic<[llvm_i32_ty], [llvm_ptr_ty], [IntrNoMem]>;
|
||||
def int_nvvm_reflect : NVVMBuiltin, PureIntrinsic<[llvm_i32_ty], [llvm_ptr_ty]>;
|
||||
|
||||
// isspacep.{const, global, local, shared}
|
||||
foreach space = ["const", "global", "local", "shared", "shared_cluster"] in
|
||||
def int_nvvm_isspacep_ # space : NVVMBuiltin,
|
||||
DefaultAttrsIntrinsic<[llvm_i1_ty], [llvm_ptr_ty],
|
||||
[IntrNoMem, IntrSpeculatable, NoCapture<ArgIndex<0>>]>;
|
||||
|
||||
// Environment register read
|
||||
foreach i = 0...31 in
|
||||
def int_nvvm_read_ptx_sreg_envreg # i : NVVMBuiltin,
|
||||
DefaultAttrsIntrinsic<[llvm_i32_ty], [],
|
||||
[IntrNoMem, IntrSpeculatable, NoUndef<RetIndex>]>;
|
||||
PureIntrinsic<[llvm_i1_ty], [llvm_ptr_ty], [NoCapture<ArgIndex<0>>]>;
|
||||
|
||||
//
|
||||
// Texture Fetch
|
||||
//
|
||||
let IntrProperties = [IntrReadMem] in {
|
||||
let IntrProperties = [IntrReadMem, IntrNoCallback, IntrNoFree, IntrWillReturn] in {
|
||||
foreach is_unified = [true, false] in {
|
||||
defvar mode = !if(is_unified, "_unified", "");
|
||||
defvar addr_args = !if(is_unified, [llvm_i64_ty], [llvm_i64_ty, llvm_i64_ty]);
|
||||
@ -1558,77 +1547,64 @@ let IntrProperties = [IntrReadMem] in {
|
||||
foreach is_array = [true, false] in {
|
||||
defvar array = !if(is_array, "_array", "");
|
||||
defvar array_args = !if(is_array, [llvm_i32_ty], []<LLVMType>);
|
||||
defvar base_args = !listconcat(addr_args, array_args);
|
||||
|
||||
def int_nvvm_tex # mode # _1d # array # _ # vec.Name # _s32
|
||||
: Intrinsic<vec.Types,
|
||||
!listconcat(addr_args, array_args, !listsplat(llvm_i32_ty, 1))>;
|
||||
: Intrinsic<vec.Types, base_args # !listsplat(llvm_i32_ty, 1)>;
|
||||
def int_nvvm_tex # mode # _1d # array # _ # vec.Name # _f32
|
||||
: Intrinsic<vec.Types,
|
||||
!listconcat(addr_args, array_args, !listsplat(llvm_float_ty, 1))>;
|
||||
: Intrinsic<vec.Types, base_args # !listsplat(llvm_float_ty, 1)>;
|
||||
def int_nvvm_tex # mode # _1d # array # _level_ # vec.Name # _f32
|
||||
: Intrinsic<vec.Types,
|
||||
!listconcat(addr_args, array_args, !listsplat(llvm_float_ty, 2))>;
|
||||
: Intrinsic<vec.Types, base_args # !listsplat(llvm_float_ty, 2)>;
|
||||
def int_nvvm_tex # mode # _1d # array # _grad_ # vec.Name # _f32
|
||||
: Intrinsic<vec.Types,
|
||||
!listconcat(addr_args, array_args, !listsplat(llvm_float_ty, 3))>;
|
||||
: Intrinsic<vec.Types, base_args # !listsplat(llvm_float_ty, 3)>;
|
||||
|
||||
def int_nvvm_tex # mode # _2d # array # _ # vec.Name # _s32
|
||||
: Intrinsic<vec.Types,
|
||||
!listconcat(addr_args, array_args, !listsplat(llvm_i32_ty, 2))>;
|
||||
: Intrinsic<vec.Types, base_args # !listsplat(llvm_i32_ty, 2)>;
|
||||
def int_nvvm_tex # mode # _2d # array # _ # vec.Name # _f32
|
||||
: Intrinsic<vec.Types,
|
||||
!listconcat(addr_args, array_args, !listsplat(llvm_float_ty, 2))>;
|
||||
: Intrinsic<vec.Types, base_args # !listsplat(llvm_float_ty, 2)>;
|
||||
def int_nvvm_tex # mode # _2d # array # _level_ # vec.Name # _f32
|
||||
: Intrinsic<vec.Types,
|
||||
!listconcat(addr_args, array_args, !listsplat(llvm_float_ty, 3))>;
|
||||
: Intrinsic<vec.Types, base_args # !listsplat(llvm_float_ty, 3)>;
|
||||
def int_nvvm_tex # mode # _2d # array # _grad_ # vec.Name # _f32
|
||||
: Intrinsic<vec.Types,
|
||||
!listconcat(addr_args, array_args, !listsplat(llvm_float_ty, 6))>;
|
||||
: Intrinsic<vec.Types, base_args # !listsplat(llvm_float_ty, 6)>;
|
||||
|
||||
if !not(is_array) then {
|
||||
def int_nvvm_tex # mode # _3d_ # vec.Name # _s32
|
||||
: Intrinsic<vec.Types,
|
||||
!listconcat(addr_args, !listsplat(llvm_i32_ty, 3))>;
|
||||
: Intrinsic<vec.Types, base_args # !listsplat(llvm_i32_ty, 3)>;
|
||||
def int_nvvm_tex # mode # _3d_ # vec.Name # _f32
|
||||
: Intrinsic<vec.Types,
|
||||
!listconcat(addr_args, !listsplat(llvm_float_ty, 3))>;
|
||||
: Intrinsic<vec.Types, base_args # !listsplat(llvm_float_ty, 3)>;
|
||||
def int_nvvm_tex # mode # _3d_level_ # vec.Name # _f32
|
||||
: Intrinsic<vec.Types,
|
||||
!listconcat(addr_args, !listsplat(llvm_float_ty, 4))>;
|
||||
: Intrinsic<vec.Types, base_args # !listsplat(llvm_float_ty, 4)>;
|
||||
def int_nvvm_tex # mode # _3d_grad_ # vec.Name # _f32
|
||||
: Intrinsic<vec.Types,
|
||||
!listconcat(addr_args, !listsplat(llvm_float_ty, 9))>;
|
||||
: Intrinsic<vec.Types, base_args # !listsplat(llvm_float_ty, 9)>;
|
||||
}
|
||||
|
||||
def int_nvvm_tex # mode # _cube # array # _ # vec.Name # _f32
|
||||
: Intrinsic<vec.Types,
|
||||
!listconcat(addr_args, array_args, !listsplat(llvm_float_ty, 3))>;
|
||||
: Intrinsic<vec.Types, base_args # !listsplat(llvm_float_ty, 3)>;
|
||||
def int_nvvm_tex # mode # _cube # array # _level_ # vec.Name # _f32
|
||||
: Intrinsic<vec.Types,
|
||||
!listconcat(addr_args, array_args, !listsplat(llvm_float_ty, 4))>;
|
||||
: Intrinsic<vec.Types, base_args # !listsplat(llvm_float_ty, 4)>;
|
||||
|
||||
if is_unified then
|
||||
def int_nvvm_tex # mode # _cube # array # _grad_ # vec.Name # _f32
|
||||
: Intrinsic<vec.Types,
|
||||
!listconcat(addr_args, array_args, !listsplat(llvm_float_ty, 9))>;
|
||||
: Intrinsic<vec.Types, base_args # !listsplat(llvm_float_ty, 9)>;
|
||||
} // is_array
|
||||
|
||||
foreach comp = ["r", "g", "b", "a"] in {
|
||||
def int_nvvm_tld4 # mode # _ # comp # _2d_ # vec.Name # _f32
|
||||
: Intrinsic<vec.Types,
|
||||
!listconcat(addr_args, !listsplat(llvm_float_ty, 2))>;
|
||||
: Intrinsic<vec.Types, addr_args # !listsplat(llvm_float_ty, 2)>;
|
||||
} // comp
|
||||
} // vec
|
||||
} // is_unified
|
||||
} // IntrProperties = [IntrReadMem]
|
||||
|
||||
//=== Surface Load
|
||||
let IntrProperties = [IntrReadMem] in {
|
||||
foreach clamp = ["clamp", "trap", "zero"] in {
|
||||
foreach vec = [TV_I8, TV_I16, TV_I32, TV_I64,
|
||||
TV_V2I8, TV_V2I16, TV_V2I32, TV_V2I64,
|
||||
TV_V4I8, TV_V4I16, TV_V4I32] in {
|
||||
|
||||
let IntrProperties = [IntrNoCallback, IntrNoFree, IntrReadMem]
|
||||
# !if(!ne(clamp, "trap"), [IntrWillReturn], []<IntrinsicProperty>) in {
|
||||
|
||||
def int_nvvm_suld_1d_ # vec.Name # _ # clamp
|
||||
: Intrinsic<vec.Types,
|
||||
[llvm_i64_ty, llvm_i32_ty]>;
|
||||
@ -1648,47 +1624,50 @@ let IntrProperties = [IntrReadMem] in {
|
||||
def int_nvvm_suld_3d_ # vec.Name # _ # clamp
|
||||
: Intrinsic<vec.Types,
|
||||
[llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty]>;
|
||||
}
|
||||
} // vec
|
||||
} // clamp
|
||||
} // IntrProperties = [IntrReadMem]
|
||||
|
||||
//===- Texture Query ------------------------------------------------------===//
|
||||
|
||||
foreach query = ["channel_order", "channel_data_type", "width", "height",
|
||||
"depth", "array_size", "num_samples", "num_mipmap_levels"] in
|
||||
def int_nvvm_txq_ # query : NVVMBuiltin,
|
||||
Intrinsic<[llvm_i32_ty], [llvm_i64_ty], [IntrNoMem]>;
|
||||
DefaultAttrsIntrinsic<[llvm_i32_ty], [llvm_i64_ty], [IntrNoMem]>;
|
||||
|
||||
//===- Surface Query ------------------------------------------------------===//
|
||||
|
||||
foreach query = ["channel_order", "channel_data_type", "width", "height",
|
||||
"depth", "array_size"] in
|
||||
def int_nvvm_suq_ # query : NVVMBuiltin,
|
||||
Intrinsic<[llvm_i32_ty], [llvm_i64_ty], [IntrNoMem]>;
|
||||
DefaultAttrsIntrinsic<[llvm_i32_ty], [llvm_i64_ty], [IntrNoMem]>;
|
||||
|
||||
//===- Handle Query -------------------------------------------------------===//
|
||||
|
||||
foreach type = ["sampler", "surface", "texture"] in
|
||||
def int_nvvm_istypep_ # type : NVVMBuiltin,
|
||||
Intrinsic<[llvm_i1_ty], [llvm_i64_ty], [IntrNoMem]>;
|
||||
DefaultAttrsIntrinsic<[llvm_i1_ty], [llvm_i64_ty], [IntrNoMem]>;
|
||||
|
||||
//===- Surface Stores -----------------------------------------------------===//
|
||||
|
||||
multiclass SurfaceStoreIntrinsics<string clamp, TexVector vec> {
|
||||
let IntrProperties = [IntrNoCallback, IntrNoFree, IntrWriteMem] #
|
||||
!if(!ne(clamp, "trap"), [IntrWillReturn], []<IntrinsicProperty>) in {
|
||||
def _1d_ # vec.Name # _ # clamp : NVVMBuiltin,
|
||||
Intrinsic<[], !listconcat([llvm_i64_ty, llvm_i32_ty], vec.Types)>;
|
||||
Intrinsic<[], [llvm_i64_ty, llvm_i32_ty] # vec.Types>;
|
||||
|
||||
def _1d_array_ # vec.Name # _ # clamp : NVVMBuiltin,
|
||||
Intrinsic<[], !listconcat([llvm_i64_ty, llvm_i32_ty, llvm_i32_ty], vec.Types)>;
|
||||
Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty] # vec.Types>;
|
||||
|
||||
def _2d_ # vec.Name # _ # clamp : NVVMBuiltin,
|
||||
Intrinsic<[], !listconcat([llvm_i64_ty, llvm_i32_ty, llvm_i32_ty], vec.Types)>;
|
||||
Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty] # vec.Types>;
|
||||
|
||||
def _2d_array_ # vec.Name # _ # clamp : NVVMBuiltin,
|
||||
Intrinsic<[], !listconcat([llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], vec.Types)>;
|
||||
Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty] # vec.Types>;
|
||||
|
||||
def _3d_ # vec.Name # _ # clamp : NVVMBuiltin,
|
||||
Intrinsic<[], !listconcat([llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], vec.Types)>;
|
||||
Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty] # vec.Types>;
|
||||
}
|
||||
}
|
||||
|
||||
// Unformatted
|
||||
@ -1704,23 +1683,17 @@ foreach vec = [TV_I8, TV_I16, TV_I32,
|
||||
TV_V4I8, TV_V4I16, TV_V4I32] in
|
||||
defm int_nvvm_sust_p : SurfaceStoreIntrinsics<"trap", vec>;
|
||||
|
||||
//
|
||||
// Accessing special registers.
|
||||
|
||||
//
|
||||
class PTXReadSRegIntrinsicNB_r32<list<IntrinsicProperty> properties = []>
|
||||
: DefaultAttrsIntrinsic<[llvm_i32_ty], [],
|
||||
!listconcat([IntrNoMem, IntrSpeculatable, NoUndef<RetIndex>], properties)>;
|
||||
: PureIntrinsic<[llvm_i32_ty], [], [NoUndef<RetIndex>] # properties>;
|
||||
|
||||
class PTXReadSRegIntrinsic_r32<list<IntrinsicProperty> properties = []>
|
||||
: PTXReadSRegIntrinsicNB_r32<properties>,
|
||||
NVVMBuiltin;
|
||||
: PTXReadSRegIntrinsicNB_r32<properties>, NVVMBuiltin;
|
||||
|
||||
multiclass PTXReadSRegIntrinsic_v4i32<list<list<IntrinsicProperty>> properties = [[], [], [], []]> {
|
||||
assert !eq(!size(properties), 4), "properties must be a list of 4 lists";
|
||||
// FIXME: Do we need the 128-bit integer type version?
|
||||
// def _r64 : Intrinsic<[llvm_i128_ty], [], [IntrNoMem, IntrSpeculatable]>;
|
||||
|
||||
// FIXME: Enable this once v4i32 support is enabled in back-end.
|
||||
// def _v4i16 : Intrinsic<[llvm_v4i32_ty], [], [IntrNoMem, IntrSpeculatable]>;
|
||||
defvar suffixes = ["_x", "_y", "_z", "_w"];
|
||||
foreach i = !range(suffixes) in
|
||||
def suffixes[i] : PTXReadSRegIntrinsic_r32<properties[i]>;
|
||||
@ -1737,30 +1710,20 @@ multiclass PTXReadSRegIntrinsicNB_v4i32<list<list<IntrinsicProperty>> properties
|
||||
|
||||
// Intrinsics to read registers with non-constant values. E.g. the values that
|
||||
// do change over the kernel lifetime. Such reads should not be CSE'd.
|
||||
class PTXReadNCSRegIntrinsic_r32
|
||||
: Intrinsic<[llvm_i32_ty], [], [IntrInaccessibleMemOnly, IntrNoCallback, NoUndef<RetIndex>]>,
|
||||
NVVMBuiltin;
|
||||
class PTXReadNCSRegIntrinsic_r64
|
||||
: Intrinsic<[llvm_i64_ty], [], [IntrInaccessibleMemOnly, IntrNoCallback, NoUndef<RetIndex>]>,
|
||||
class PTXReadNCSRegIntrinsic<LLVMType ty>
|
||||
: Intrinsic<[ty], [], [IntrInaccessibleMemOnly, IntrNoCallback,
|
||||
IntrNoFree, IntrWillReturn, NoUndef<RetIndex>]>,
|
||||
NVVMBuiltin;
|
||||
|
||||
defm int_nvvm_read_ptx_sreg_tid
|
||||
: PTXReadSRegIntrinsic_v4i32<[[Range<RetIndex, 0, MAX_BLOCK_SIZE_X>],
|
||||
defvar MAX_BLOCK_ID_RANGE = [[Range<RetIndex, 0, MAX_BLOCK_SIZE_X>],
|
||||
[Range<RetIndex, 0, MAX_BLOCK_SIZE_Y>],
|
||||
[Range<RetIndex, 0, MAX_BLOCK_SIZE_Z>],
|
||||
[Range<RetIndex, 0, 1>]]>;
|
||||
[Range<RetIndex, 0, 1>]];
|
||||
|
||||
defm int_nvvm_read_ptx_sreg_ntid
|
||||
: PTXReadSRegIntrinsic_v4i32<[[Range<RetIndex, 1, !add(MAX_BLOCK_SIZE_X, 1)>],
|
||||
defvar MAX_BLOCK_NID_RANGE = [[Range<RetIndex, 1, !add(MAX_BLOCK_SIZE_X, 1)>],
|
||||
[Range<RetIndex, 1, !add(MAX_BLOCK_SIZE_Y, 1)>],
|
||||
[Range<RetIndex, 1, !add(MAX_BLOCK_SIZE_Z, 1)>],
|
||||
[Range<RetIndex, 0, 1>]]>;
|
||||
|
||||
def int_nvvm_read_ptx_sreg_laneid
|
||||
: PTXReadSRegIntrinsic_r32<[Range<RetIndex, 0, WARP_SIZE>]>;
|
||||
|
||||
def int_nvvm_read_ptx_sreg_warpid : PTXReadSRegIntrinsic_r32;
|
||||
def int_nvvm_read_ptx_sreg_nwarpid : PTXReadSRegIntrinsic_r32;
|
||||
[Range<RetIndex, 0, 1>]];
|
||||
|
||||
defvar MAX_GRID_ID_RANGE = [[Range<RetIndex, 0, MAX_GRID_SIZE_X>],
|
||||
[Range<RetIndex, 0, MAX_GRID_SIZE_Y>],
|
||||
@ -1772,11 +1735,17 @@ defvar MAX_GRID_NID_RANGE = [[Range<RetIndex, 1, !add(MAX_GRID_SIZE_X, 1)>],
|
||||
[Range<RetIndex, 1, !add(MAX_GRID_SIZE_Z, 1)>],
|
||||
[Range<RetIndex, 0, 1>]];
|
||||
|
||||
defm int_nvvm_read_ptx_sreg_ctaid
|
||||
: PTXReadSRegIntrinsic_v4i32<MAX_GRID_ID_RANGE>;
|
||||
defm int_nvvm_read_ptx_sreg_tid : PTXReadSRegIntrinsic_v4i32<MAX_BLOCK_ID_RANGE>;
|
||||
defm int_nvvm_read_ptx_sreg_ntid : PTXReadSRegIntrinsic_v4i32<MAX_BLOCK_NID_RANGE>;
|
||||
|
||||
defm int_nvvm_read_ptx_sreg_nctaid
|
||||
: PTXReadSRegIntrinsic_v4i32<MAX_GRID_NID_RANGE>;
|
||||
def int_nvvm_read_ptx_sreg_laneid
|
||||
: PTXReadSRegIntrinsic_r32<[Range<RetIndex, 0, WARP_SIZE>]>;
|
||||
|
||||
def int_nvvm_read_ptx_sreg_warpid : PTXReadSRegIntrinsic_r32;
|
||||
def int_nvvm_read_ptx_sreg_nwarpid : PTXReadSRegIntrinsic_r32;
|
||||
|
||||
defm int_nvvm_read_ptx_sreg_ctaid : PTXReadSRegIntrinsic_v4i32<MAX_GRID_ID_RANGE>;
|
||||
defm int_nvvm_read_ptx_sreg_nctaid : PTXReadSRegIntrinsic_v4i32<MAX_GRID_NID_RANGE>;
|
||||
|
||||
def int_nvvm_read_ptx_sreg_smid : PTXReadSRegIntrinsic_r32;
|
||||
def int_nvvm_read_ptx_sreg_nsmid : PTXReadSRegIntrinsic_r32;
|
||||
@ -1788,19 +1757,22 @@ def int_nvvm_read_ptx_sreg_lanemask_lt : PTXReadSRegIntrinsic_r32;
|
||||
def int_nvvm_read_ptx_sreg_lanemask_ge : PTXReadSRegIntrinsic_r32;
|
||||
def int_nvvm_read_ptx_sreg_lanemask_gt : PTXReadSRegIntrinsic_r32;
|
||||
|
||||
def int_nvvm_read_ptx_sreg_clock : PTXReadNCSRegIntrinsic_r32;
|
||||
def int_nvvm_read_ptx_sreg_clock64 : PTXReadNCSRegIntrinsic_r64;
|
||||
def int_nvvm_read_ptx_sreg_clock : PTXReadNCSRegIntrinsic<llvm_i32_ty>;
|
||||
def int_nvvm_read_ptx_sreg_clock64 : PTXReadNCSRegIntrinsic<llvm_i64_ty>;
|
||||
|
||||
def int_nvvm_read_ptx_sreg_globaltimer : PTXReadNCSRegIntrinsic_r64;
|
||||
def int_nvvm_read_ptx_sreg_globaltimer : PTXReadNCSRegIntrinsic<llvm_i64_ty>;
|
||||
|
||||
def int_nvvm_read_ptx_sreg_pm0 : PTXReadNCSRegIntrinsic_r32;
|
||||
def int_nvvm_read_ptx_sreg_pm1 : PTXReadNCSRegIntrinsic_r32;
|
||||
def int_nvvm_read_ptx_sreg_pm2 : PTXReadNCSRegIntrinsic_r32;
|
||||
def int_nvvm_read_ptx_sreg_pm3 : PTXReadNCSRegIntrinsic_r32;
|
||||
def int_nvvm_read_ptx_sreg_pm0 : PTXReadNCSRegIntrinsic<llvm_i32_ty>;
|
||||
def int_nvvm_read_ptx_sreg_pm1 : PTXReadNCSRegIntrinsic<llvm_i32_ty>;
|
||||
def int_nvvm_read_ptx_sreg_pm2 : PTXReadNCSRegIntrinsic<llvm_i32_ty>;
|
||||
def int_nvvm_read_ptx_sreg_pm3 : PTXReadNCSRegIntrinsic<llvm_i32_ty>;
|
||||
|
||||
def int_nvvm_read_ptx_sreg_warpsize
|
||||
: PTXReadSRegIntrinsic_r32<[Range<RetIndex, WARP_SIZE, !add(WARP_SIZE, 1)>]>;
|
||||
|
||||
foreach i = 0...31 in
|
||||
def int_nvvm_read_ptx_sreg_envreg # i : PTXReadSRegIntrinsic_r32;
|
||||
|
||||
// sm90+, PTX7.8+
|
||||
|
||||
// Note: Since clusters are subdivisions of the grid, we conservatively use the
|
||||
@ -1808,14 +1780,10 @@ def int_nvvm_read_ptx_sreg_warpsize
|
||||
// practice, the clusterid will likely be much smaller. The CUDA programming
|
||||
// guide recommends 8 as a maximum portable value and H100s support 16.
|
||||
|
||||
defm int_nvvm_read_ptx_sreg_clusterid
|
||||
: PTXReadSRegIntrinsicNB_v4i32<MAX_GRID_ID_RANGE>;
|
||||
defm int_nvvm_read_ptx_sreg_nclusterid
|
||||
: PTXReadSRegIntrinsicNB_v4i32<MAX_GRID_NID_RANGE>;
|
||||
defm int_nvvm_read_ptx_sreg_cluster_ctaid
|
||||
: PTXReadSRegIntrinsicNB_v4i32<MAX_GRID_ID_RANGE>;
|
||||
defm int_nvvm_read_ptx_sreg_cluster_nctaid
|
||||
: PTXReadSRegIntrinsicNB_v4i32<MAX_GRID_NID_RANGE>;
|
||||
defm int_nvvm_read_ptx_sreg_clusterid : PTXReadSRegIntrinsicNB_v4i32<MAX_GRID_ID_RANGE>;
|
||||
defm int_nvvm_read_ptx_sreg_nclusterid : PTXReadSRegIntrinsicNB_v4i32<MAX_GRID_NID_RANGE>;
|
||||
defm int_nvvm_read_ptx_sreg_cluster_ctaid : PTXReadSRegIntrinsicNB_v4i32<MAX_GRID_ID_RANGE>;
|
||||
defm int_nvvm_read_ptx_sreg_cluster_nctaid : PTXReadSRegIntrinsicNB_v4i32<MAX_GRID_NID_RANGE>;
|
||||
|
||||
def int_nvvm_read_ptx_sreg_cluster_ctarank : PTXReadSRegIntrinsicNB_r32;
|
||||
def int_nvvm_read_ptx_sreg_cluster_nctarank : PTXReadSRegIntrinsicNB_r32;
|
||||
@ -1843,13 +1811,13 @@ let IntrProperties = [IntrInaccessibleMemOnly, IntrConvergent, IntrNoCallback] i
|
||||
//
|
||||
// VOTE
|
||||
//
|
||||
|
||||
let IntrProperties = [IntrInaccessibleMemOnly, IntrConvergent, IntrNoCallback] in {
|
||||
def int_nvvm_vote_all : NVVMBuiltin, Intrinsic<[llvm_i1_ty], [llvm_i1_ty]>;
|
||||
def int_nvvm_vote_any : NVVMBuiltin, Intrinsic<[llvm_i1_ty], [llvm_i1_ty]>;
|
||||
def int_nvvm_vote_uni : NVVMBuiltin, Intrinsic<[llvm_i1_ty], [llvm_i1_ty]>;
|
||||
def int_nvvm_vote_ballot : NVVMBuiltin, Intrinsic<[llvm_i32_ty], [llvm_i1_ty]>;
|
||||
}
|
||||
|
||||
//
|
||||
// VOTE.SYNC
|
||||
//
|
||||
@ -2052,8 +2020,7 @@ let IntrProperties = [IntrNoMem, IntrSpeculatable, NoCapture<ArgIndex<0>>] in {
|
||||
}
|
||||
|
||||
def int_nvvm_is_explicit_cluster
|
||||
: DefaultAttrsIntrinsic<[llvm_i1_ty], [],
|
||||
[IntrNoMem, IntrSpeculatable, NoUndef<RetIndex>],
|
||||
: PureIntrinsic<[llvm_i1_ty], [], [NoUndef<RetIndex>],
|
||||
"llvm.nvvm.is_explicit_cluster">;
|
||||
|
||||
// Setmaxnreg inc/dec intrinsics
|
||||
@ -2458,13 +2425,12 @@ def int_nvvm_clusterlaunchcontrol_try_cancel_async_multicast_shared
|
||||
// clusterlaunchcontrol.query_cancel.is_canceled
|
||||
|
||||
def int_nvvm_clusterlaunchcontrol_query_cancel_is_canceled
|
||||
: DefaultAttrsIntrinsic<[llvm_i1_ty], [llvm_i128_ty], [IntrNoMem, IntrSpeculatable],
|
||||
: PureIntrinsic<[llvm_i1_ty], [llvm_i128_ty], [],
|
||||
"llvm.nvvm.clusterlaunchcontrol.query_cancel.is_canceled">;
|
||||
|
||||
foreach dim = ["x", "y", "z"] in {
|
||||
foreach dim = ["x", "y", "z"] in
|
||||
def int_nvvm_clusterlaunchcontrol_query_cancel_get_first_ctaid_ # dim
|
||||
: DefaultAttrsIntrinsic<[llvm_i32_ty], [llvm_i128_ty], [IntrNoMem, IntrSpeculatable],
|
||||
: PureIntrinsic<[llvm_i32_ty], [llvm_i128_ty], [],
|
||||
"llvm.nvvm.clusterlaunchcontrol.query_cancel.get_first_ctaid." # dim>;
|
||||
}
|
||||
|
||||
} // let TargetPrefix = "nvvm"
|
||||
|
Loading…
x
Reference in New Issue
Block a user