[AMDGPU] Add target features for SWMMAC instructions (#185785)

Introduce `swmmac-gfx1200-insts` and `swmmac-gfx1250-insts`
This commit is contained in:
Chinmay Deshpande 2026-03-18 13:52:34 -07:00 committed by GitHub
parent 3de7814b8d
commit e044c4ad81
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
7 changed files with 156 additions and 38 deletions

View File

@ -828,29 +828,29 @@ def __builtin_amdgcn_wmma_i32_16x16x32_iu4_w64_gfx12 : AMDGPUBuiltin<"_ExtVector
let ArgNames = ["a_sign", "a", "b_sign", "b", "c", "clamp"];
}
def __builtin_amdgcn_swmmac_f32_16x16x32_f16_w32 : AMDGPUBuiltin<"_ExtVector<8, float>(_ExtVector<8, __fp16>, _ExtVector<16, __fp16>, _ExtVector<8, float>, int)", [Const], "wmma-128b-insts,wavefrontsize32">;
def __builtin_amdgcn_swmmac_f32_16x16x32_bf16_w32 : AMDGPUBuiltin<"_ExtVector<8, float>(_ExtVector<8, short>, _ExtVector<16, short>, _ExtVector<8, float>, int)", [Const], "wmma-128b-insts,wavefrontsize32">;
def __builtin_amdgcn_swmmac_f16_16x16x32_f16_w32 : AMDGPUBuiltin<"_ExtVector<8, __fp16>(_ExtVector<8, __fp16>, _ExtVector<16, __fp16>, _ExtVector<8, __fp16>, int)", [Const], "wmma-128b-insts,wavefrontsize32">;
def __builtin_amdgcn_swmmac_bf16_16x16x32_bf16_w32 : AMDGPUBuiltin<"_ExtVector<8, short>(_ExtVector<8, short>, _ExtVector<16, short>, _ExtVector<8, short>, int)", [Const], "wmma-128b-insts,wavefrontsize32">;
def __builtin_amdgcn_swmmac_i32_16x16x32_iu8_w32 : AMDGPUBuiltin<"_ExtVector<8, int>(_Constant bool, _ExtVector<2, int>, _Constant bool, _ExtVector<4, int>, _ExtVector<8, int>, int, _Constant bool)", [Const], "wmma-128b-insts,wavefrontsize32">;
def __builtin_amdgcn_swmmac_i32_16x16x32_iu4_w32 : AMDGPUBuiltin<"_ExtVector<8, int>(_Constant bool, int, _Constant bool, _ExtVector<2, int>, _ExtVector<8, int>, int, _Constant bool)", [Const], "wmma-128b-insts,wavefrontsize32">;
def __builtin_amdgcn_swmmac_i32_16x16x64_iu4_w32 : AMDGPUBuiltin<"_ExtVector<8, int>(_Constant bool, _ExtVector<2, int>, _Constant bool, _ExtVector<4, int>, _ExtVector<8, int>, int, _Constant bool)", [Const], "wmma-128b-insts,wavefrontsize32">;
def __builtin_amdgcn_swmmac_f32_16x16x32_fp8_fp8_w32 : AMDGPUBuiltin<"_ExtVector<8, float>(_ExtVector<2, int>, _ExtVector<4, int>, _ExtVector<8, float>, int)", [Const], "wmma-128b-insts,wavefrontsize32">;
def __builtin_amdgcn_swmmac_f32_16x16x32_fp8_bf8_w32 : AMDGPUBuiltin<"_ExtVector<8, float>(_ExtVector<2, int>, _ExtVector<4, int>, _ExtVector<8, float>, int)", [Const], "wmma-128b-insts,wavefrontsize32">;
def __builtin_amdgcn_swmmac_f32_16x16x32_bf8_fp8_w32 : AMDGPUBuiltin<"_ExtVector<8, float>(_ExtVector<2, int>, _ExtVector<4, int>, _ExtVector<8, float>, int)", [Const], "wmma-128b-insts,wavefrontsize32">;
def __builtin_amdgcn_swmmac_f32_16x16x32_bf8_bf8_w32 : AMDGPUBuiltin<"_ExtVector<8, float>(_ExtVector<2, int>, _ExtVector<4, int>, _ExtVector<8, float>, int)", [Const], "wmma-128b-insts,wavefrontsize32">;
def __builtin_amdgcn_swmmac_f32_16x16x32_f16_w32 : AMDGPUBuiltin<"_ExtVector<8, float>(_ExtVector<8, __fp16>, _ExtVector<16, __fp16>, _ExtVector<8, float>, int)", [Const], "swmmac-gfx1200-insts,wavefrontsize32">;
def __builtin_amdgcn_swmmac_f32_16x16x32_bf16_w32 : AMDGPUBuiltin<"_ExtVector<8, float>(_ExtVector<8, short>, _ExtVector<16, short>, _ExtVector<8, float>, int)", [Const], "swmmac-gfx1200-insts,wavefrontsize32">;
def __builtin_amdgcn_swmmac_f16_16x16x32_f16_w32 : AMDGPUBuiltin<"_ExtVector<8, __fp16>(_ExtVector<8, __fp16>, _ExtVector<16, __fp16>, _ExtVector<8, __fp16>, int)", [Const], "swmmac-gfx1200-insts,wavefrontsize32">;
def __builtin_amdgcn_swmmac_bf16_16x16x32_bf16_w32 : AMDGPUBuiltin<"_ExtVector<8, short>(_ExtVector<8, short>, _ExtVector<16, short>, _ExtVector<8, short>, int)", [Const], "swmmac-gfx1200-insts,wavefrontsize32">;
def __builtin_amdgcn_swmmac_i32_16x16x32_iu8_w32 : AMDGPUBuiltin<"_ExtVector<8, int>(_Constant bool, _ExtVector<2, int>, _Constant bool, _ExtVector<4, int>, _ExtVector<8, int>, int, _Constant bool)", [Const], "swmmac-gfx1200-insts,wavefrontsize32">;
def __builtin_amdgcn_swmmac_i32_16x16x32_iu4_w32 : AMDGPUBuiltin<"_ExtVector<8, int>(_Constant bool, int, _Constant bool, _ExtVector<2, int>, _ExtVector<8, int>, int, _Constant bool)", [Const], "swmmac-gfx1200-insts,wavefrontsize32">;
def __builtin_amdgcn_swmmac_i32_16x16x64_iu4_w32 : AMDGPUBuiltin<"_ExtVector<8, int>(_Constant bool, _ExtVector<2, int>, _Constant bool, _ExtVector<4, int>, _ExtVector<8, int>, int, _Constant bool)", [Const], "swmmac-gfx1200-insts,wavefrontsize32">;
def __builtin_amdgcn_swmmac_f32_16x16x32_fp8_fp8_w32 : AMDGPUBuiltin<"_ExtVector<8, float>(_ExtVector<2, int>, _ExtVector<4, int>, _ExtVector<8, float>, int)", [Const], "swmmac-gfx1200-insts,wavefrontsize32">;
def __builtin_amdgcn_swmmac_f32_16x16x32_fp8_bf8_w32 : AMDGPUBuiltin<"_ExtVector<8, float>(_ExtVector<2, int>, _ExtVector<4, int>, _ExtVector<8, float>, int)", [Const], "swmmac-gfx1200-insts,wavefrontsize32">;
def __builtin_amdgcn_swmmac_f32_16x16x32_bf8_fp8_w32 : AMDGPUBuiltin<"_ExtVector<8, float>(_ExtVector<2, int>, _ExtVector<4, int>, _ExtVector<8, float>, int)", [Const], "swmmac-gfx1200-insts,wavefrontsize32">;
def __builtin_amdgcn_swmmac_f32_16x16x32_bf8_bf8_w32 : AMDGPUBuiltin<"_ExtVector<8, float>(_ExtVector<2, int>, _ExtVector<4, int>, _ExtVector<8, float>, int)", [Const], "swmmac-gfx1200-insts,wavefrontsize32">;
def __builtin_amdgcn_swmmac_f32_16x16x32_f16_w64 : AMDGPUBuiltin<"_ExtVector<4, float>(_ExtVector<4, __fp16>, _ExtVector<8, __fp16>, _ExtVector<4, float>, int)", [Const], "wmma-128b-insts,wavefrontsize64">;
def __builtin_amdgcn_swmmac_f32_16x16x32_bf16_w64 : AMDGPUBuiltin<"_ExtVector<4, float>(_ExtVector<4, short>, _ExtVector<8, short>, _ExtVector<4, float>, int)", [Const], "wmma-128b-insts,wavefrontsize64">;
def __builtin_amdgcn_swmmac_f16_16x16x32_f16_w64 : AMDGPUBuiltin<"_ExtVector<4, __fp16>(_ExtVector<4, __fp16>, _ExtVector<8, __fp16>, _ExtVector<4, __fp16>, int)", [Const], "wmma-128b-insts,wavefrontsize64">;
def __builtin_amdgcn_swmmac_bf16_16x16x32_bf16_w64 : AMDGPUBuiltin<"_ExtVector<4, short>(_ExtVector<4, short>, _ExtVector<8, short>, _ExtVector<4, short>, int)", [Const], "wmma-128b-insts,wavefrontsize64">;
def __builtin_amdgcn_swmmac_i32_16x16x32_iu8_w64 : AMDGPUBuiltin<"_ExtVector<4, int>(_Constant bool, int, _Constant bool, _ExtVector<2, int>, _ExtVector<4, int>, int, _Constant bool)", [Const], "wmma-128b-insts,wavefrontsize64">;
def __builtin_amdgcn_swmmac_i32_16x16x32_iu4_w64 : AMDGPUBuiltin<"_ExtVector<4, int>(_Constant bool, int, _Constant bool, int, _ExtVector<4, int>, int, _Constant bool)", [Const], "wmma-128b-insts,wavefrontsize64">;
def __builtin_amdgcn_swmmac_i32_16x16x64_iu4_w64 : AMDGPUBuiltin<"_ExtVector<4, int>(_Constant bool, int, _Constant bool, _ExtVector<2, int>, _ExtVector<4, int>, int, _Constant bool)", [Const], "wmma-128b-insts,wavefrontsize64">;
def __builtin_amdgcn_swmmac_f32_16x16x32_fp8_fp8_w64 : AMDGPUBuiltin<"_ExtVector<4, float>(int, _ExtVector<2, int>, _ExtVector<4, float>, int)", [Const], "wmma-128b-insts,wavefrontsize64">;
def __builtin_amdgcn_swmmac_f32_16x16x32_fp8_bf8_w64 : AMDGPUBuiltin<"_ExtVector<4, float>(int, _ExtVector<2, int>, _ExtVector<4, float>, int)", [Const], "wmma-128b-insts,wavefrontsize64">;
def __builtin_amdgcn_swmmac_f32_16x16x32_bf8_fp8_w64 : AMDGPUBuiltin<"_ExtVector<4, float>(int, _ExtVector<2, int>, _ExtVector<4, float>, int)", [Const], "wmma-128b-insts,wavefrontsize64">;
def __builtin_amdgcn_swmmac_f32_16x16x32_bf8_bf8_w64 : AMDGPUBuiltin<"_ExtVector<4, float>(int, _ExtVector<2, int>, _ExtVector<4, float>, int)", [Const], "wmma-128b-insts,wavefrontsize64">;
def __builtin_amdgcn_swmmac_f32_16x16x32_f16_w64 : AMDGPUBuiltin<"_ExtVector<4, float>(_ExtVector<4, __fp16>, _ExtVector<8, __fp16>, _ExtVector<4, float>, int)", [Const], "swmmac-gfx1200-insts,wavefrontsize64">;
def __builtin_amdgcn_swmmac_f32_16x16x32_bf16_w64 : AMDGPUBuiltin<"_ExtVector<4, float>(_ExtVector<4, short>, _ExtVector<8, short>, _ExtVector<4, float>, int)", [Const], "swmmac-gfx1200-insts,wavefrontsize64">;
def __builtin_amdgcn_swmmac_f16_16x16x32_f16_w64 : AMDGPUBuiltin<"_ExtVector<4, __fp16>(_ExtVector<4, __fp16>, _ExtVector<8, __fp16>, _ExtVector<4, __fp16>, int)", [Const], "swmmac-gfx1200-insts,wavefrontsize64">;
def __builtin_amdgcn_swmmac_bf16_16x16x32_bf16_w64 : AMDGPUBuiltin<"_ExtVector<4, short>(_ExtVector<4, short>, _ExtVector<8, short>, _ExtVector<4, short>, int)", [Const], "swmmac-gfx1200-insts,wavefrontsize64">;
def __builtin_amdgcn_swmmac_i32_16x16x32_iu8_w64 : AMDGPUBuiltin<"_ExtVector<4, int>(_Constant bool, int, _Constant bool, _ExtVector<2, int>, _ExtVector<4, int>, int, _Constant bool)", [Const], "swmmac-gfx1200-insts,wavefrontsize64">;
def __builtin_amdgcn_swmmac_i32_16x16x32_iu4_w64 : AMDGPUBuiltin<"_ExtVector<4, int>(_Constant bool, int, _Constant bool, int, _ExtVector<4, int>, int, _Constant bool)", [Const], "swmmac-gfx1200-insts,wavefrontsize64">;
def __builtin_amdgcn_swmmac_i32_16x16x64_iu4_w64 : AMDGPUBuiltin<"_ExtVector<4, int>(_Constant bool, int, _Constant bool, _ExtVector<2, int>, _ExtVector<4, int>, int, _Constant bool)", [Const], "swmmac-gfx1200-insts,wavefrontsize64">;
def __builtin_amdgcn_swmmac_f32_16x16x32_fp8_fp8_w64 : AMDGPUBuiltin<"_ExtVector<4, float>(int, _ExtVector<2, int>, _ExtVector<4, float>, int)", [Const], "swmmac-gfx1200-insts,wavefrontsize64">;
def __builtin_amdgcn_swmmac_f32_16x16x32_fp8_bf8_w64 : AMDGPUBuiltin<"_ExtVector<4, float>(int, _ExtVector<2, int>, _ExtVector<4, float>, int)", [Const], "swmmac-gfx1200-insts,wavefrontsize64">;
def __builtin_amdgcn_swmmac_f32_16x16x32_bf8_fp8_w64 : AMDGPUBuiltin<"_ExtVector<4, float>(int, _ExtVector<2, int>, _ExtVector<4, float>, int)", [Const], "swmmac-gfx1200-insts,wavefrontsize64">;
def __builtin_amdgcn_swmmac_f32_16x16x32_bf8_bf8_w64 : AMDGPUBuiltin<"_ExtVector<4, float>(int, _ExtVector<2, int>, _ExtVector<4, float>, int)", [Const], "swmmac-gfx1200-insts,wavefrontsize64">;
def __builtin_amdgcn_prng_b32 : AMDGPUBuiltin<"unsigned int(unsigned int)", [Const], "prng-inst">;
def __builtin_amdgcn_cvt_scalef32_pk32_fp6_f16 : AMDGPUBuiltin<"_ExtVector<6, unsigned int>(_ExtVector<32, _Float16>, float)", [Const], "f16bf16-to-fp6bf6-cvt-scale-insts">;
@ -1170,20 +1170,20 @@ def __builtin_amdgcn_wmma_scale16_f32_32x16x128_f4 : AMDGPUBuiltin<"_ExtVector<1
let Documentation = [DocWMMA_scale16_GFX1250];
let ArgNames = ["a", "b", "c_mod", "c", "matrix_a_scale", "matrix_a_scale_fmt", "matrix_a_scale_exp", "matrix_b_scale", "matrix_b_scale_fmt", "matrix_b_scale_exp", "matrix_a_reuse", "matrix_b_reuse"];
}
def __builtin_amdgcn_swmmac_f32_16x16x64_bf16 : AMDGPUBuiltin<"_ExtVector<8, float>(_Constant bool, _ExtVector<16, __bf16>, _Constant bool, _ExtVector<32, __bf16>, _ExtVector<8, float>, int, _Constant bool, _Constant bool)", [Const], "gfx1250-insts,wavefrontsize32">;
def __builtin_amdgcn_swmmac_bf16_16x16x64_bf16 : AMDGPUBuiltin<"_ExtVector<8, __bf16>(_Constant bool, _ExtVector<16, __bf16>, _Constant bool, _ExtVector<32, __bf16>, _ExtVector<8, __bf16>, int, _Constant bool, _Constant bool)", [Const], "gfx1250-insts,wavefrontsize32">;
def __builtin_amdgcn_swmmac_bf16f32_16x16x64_bf16 : AMDGPUBuiltin<"_ExtVector<8, float>(_Constant bool, _ExtVector<16, __bf16>, _Constant bool, _ExtVector<32, __bf16>, _ExtVector<8, float>, int, _Constant bool, _Constant bool)", [Const], "gfx1250-insts,wavefrontsize32">;
def __builtin_amdgcn_swmmac_f32_16x16x128_fp8_fp8 : AMDGPUBuiltin<"_ExtVector<8, float>(_ExtVector<8, int>, _ExtVector<16, int>, _ExtVector<8, float>, _ExtVector<2, int>, _Constant bool, _Constant bool)", [Const], "gfx1250-insts,wavefrontsize32">;
def __builtin_amdgcn_swmmac_f32_16x16x128_fp8_bf8 : AMDGPUBuiltin<"_ExtVector<8, float>(_ExtVector<8, int>, _ExtVector<16, int>, _ExtVector<8, float>, _ExtVector<2, int>, _Constant bool, _Constant bool)", [Const], "gfx1250-insts,wavefrontsize32">;
def __builtin_amdgcn_swmmac_f32_16x16x128_bf8_fp8 : AMDGPUBuiltin<"_ExtVector<8, float>(_ExtVector<8, int>, _ExtVector<16, int>, _ExtVector<8, float>, _ExtVector<2, int>, _Constant bool, _Constant bool)", [Const], "gfx1250-insts,wavefrontsize32">;
def __builtin_amdgcn_swmmac_f32_16x16x128_bf8_bf8 : AMDGPUBuiltin<"_ExtVector<8, float>(_ExtVector<8, int>, _ExtVector<16, int>, _ExtVector<8, float>, _ExtVector<2, int>, _Constant bool, _Constant bool)", [Const], "gfx1250-insts,wavefrontsize32">;
def __builtin_amdgcn_swmmac_f16_16x16x128_fp8_fp8 : AMDGPUBuiltin<"_ExtVector<8, _Float16>(_ExtVector<8, int>, _ExtVector<16, int>, _ExtVector<8, _Float16>, _ExtVector<2, int>, _Constant bool, _Constant bool)", [Const], "gfx1250-insts,wavefrontsize32">;
def __builtin_amdgcn_swmmac_f16_16x16x128_fp8_bf8 : AMDGPUBuiltin<"_ExtVector<8, _Float16>(_ExtVector<8, int>, _ExtVector<16, int>, _ExtVector<8, _Float16>, _ExtVector<2, int>, _Constant bool, _Constant bool)", [Const], "gfx1250-insts,wavefrontsize32">;
def __builtin_amdgcn_swmmac_f16_16x16x128_bf8_fp8 : AMDGPUBuiltin<"_ExtVector<8, _Float16>(_ExtVector<8, int>, _ExtVector<16, int>, _ExtVector<8, _Float16>, _ExtVector<2, int>, _Constant bool, _Constant bool)", [Const], "gfx1250-insts,wavefrontsize32">;
def __builtin_amdgcn_swmmac_f16_16x16x128_bf8_bf8 : AMDGPUBuiltin<"_ExtVector<8, _Float16>(_ExtVector<8, int>, _ExtVector<16, int>, _ExtVector<8, _Float16>, _ExtVector<2, int>, _Constant bool, _Constant bool)", [Const], "gfx1250-insts,wavefrontsize32">;
def __builtin_amdgcn_swmmac_i32_16x16x128_iu8 : AMDGPUBuiltin<"_ExtVector<8, int>(_Constant bool, _ExtVector<8, int>, _Constant bool, _ExtVector<16, int>, _ExtVector<8, int>, _ExtVector<2, int>, _Constant bool, _Constant bool, ...)", [Const], "gfx1250-insts,wavefrontsize32">;
def __builtin_amdgcn_swmmac_f32_16x16x64_f16 : AMDGPUBuiltin<"_ExtVector<8, float>(_Constant bool, _ExtVector<16, _Float16>, _Constant bool, _ExtVector<32, _Float16>, _ExtVector<8, float>, int, _Constant bool, _Constant bool)", [Const], "gfx1250-insts,wavefrontsize32">;
def __builtin_amdgcn_swmmac_f16_16x16x64_f16 : AMDGPUBuiltin<"_ExtVector<8, _Float16>(_Constant bool, _ExtVector<16, _Float16>, _Constant bool, _ExtVector<32, _Float16>, _ExtVector<8, _Float16>, int, _Constant bool, _Constant bool)", [Const], "gfx1250-insts,wavefrontsize32">;
def __builtin_amdgcn_swmmac_f32_16x16x64_bf16 : AMDGPUBuiltin<"_ExtVector<8, float>(_Constant bool, _ExtVector<16, __bf16>, _Constant bool, _ExtVector<32, __bf16>, _ExtVector<8, float>, int, _Constant bool, _Constant bool)", [Const], "swmmac-gfx1250-insts,wavefrontsize32">;
def __builtin_amdgcn_swmmac_bf16_16x16x64_bf16 : AMDGPUBuiltin<"_ExtVector<8, __bf16>(_Constant bool, _ExtVector<16, __bf16>, _Constant bool, _ExtVector<32, __bf16>, _ExtVector<8, __bf16>, int, _Constant bool, _Constant bool)", [Const], "swmmac-gfx1250-insts,wavefrontsize32">;
def __builtin_amdgcn_swmmac_bf16f32_16x16x64_bf16 : AMDGPUBuiltin<"_ExtVector<8, float>(_Constant bool, _ExtVector<16, __bf16>, _Constant bool, _ExtVector<32, __bf16>, _ExtVector<8, float>, int, _Constant bool, _Constant bool)", [Const], "swmmac-gfx1250-insts,wavefrontsize32">;
def __builtin_amdgcn_swmmac_f32_16x16x128_fp8_fp8 : AMDGPUBuiltin<"_ExtVector<8, float>(_ExtVector<8, int>, _ExtVector<16, int>, _ExtVector<8, float>, _ExtVector<2, int>, _Constant bool, _Constant bool)", [Const], "swmmac-gfx1250-insts,wavefrontsize32">;
def __builtin_amdgcn_swmmac_f32_16x16x128_fp8_bf8 : AMDGPUBuiltin<"_ExtVector<8, float>(_ExtVector<8, int>, _ExtVector<16, int>, _ExtVector<8, float>, _ExtVector<2, int>, _Constant bool, _Constant bool)", [Const], "swmmac-gfx1250-insts,wavefrontsize32">;
def __builtin_amdgcn_swmmac_f32_16x16x128_bf8_fp8 : AMDGPUBuiltin<"_ExtVector<8, float>(_ExtVector<8, int>, _ExtVector<16, int>, _ExtVector<8, float>, _ExtVector<2, int>, _Constant bool, _Constant bool)", [Const], "swmmac-gfx1250-insts,wavefrontsize32">;
def __builtin_amdgcn_swmmac_f32_16x16x128_bf8_bf8 : AMDGPUBuiltin<"_ExtVector<8, float>(_ExtVector<8, int>, _ExtVector<16, int>, _ExtVector<8, float>, _ExtVector<2, int>, _Constant bool, _Constant bool)", [Const], "swmmac-gfx1250-insts,wavefrontsize32">;
def __builtin_amdgcn_swmmac_f16_16x16x128_fp8_fp8 : AMDGPUBuiltin<"_ExtVector<8, _Float16>(_ExtVector<8, int>, _ExtVector<16, int>, _ExtVector<8, _Float16>, _ExtVector<2, int>, _Constant bool, _Constant bool)", [Const], "swmmac-gfx1250-insts,wavefrontsize32">;
def __builtin_amdgcn_swmmac_f16_16x16x128_fp8_bf8 : AMDGPUBuiltin<"_ExtVector<8, _Float16>(_ExtVector<8, int>, _ExtVector<16, int>, _ExtVector<8, _Float16>, _ExtVector<2, int>, _Constant bool, _Constant bool)", [Const], "swmmac-gfx1250-insts,wavefrontsize32">;
def __builtin_amdgcn_swmmac_f16_16x16x128_bf8_fp8 : AMDGPUBuiltin<"_ExtVector<8, _Float16>(_ExtVector<8, int>, _ExtVector<16, int>, _ExtVector<8, _Float16>, _ExtVector<2, int>, _Constant bool, _Constant bool)", [Const], "swmmac-gfx1250-insts,wavefrontsize32">;
def __builtin_amdgcn_swmmac_f16_16x16x128_bf8_bf8 : AMDGPUBuiltin<"_ExtVector<8, _Float16>(_ExtVector<8, int>, _ExtVector<16, int>, _ExtVector<8, _Float16>, _ExtVector<2, int>, _Constant bool, _Constant bool)", [Const], "swmmac-gfx1250-insts,wavefrontsize32">;
def __builtin_amdgcn_swmmac_i32_16x16x128_iu8 : AMDGPUBuiltin<"_ExtVector<8, int>(_Constant bool, _ExtVector<8, int>, _Constant bool, _ExtVector<16, int>, _ExtVector<8, int>, _ExtVector<2, int>, _Constant bool, _Constant bool, ...)", [Const], "swmmac-gfx1250-insts,wavefrontsize32">;
def __builtin_amdgcn_swmmac_f32_16x16x64_f16 : AMDGPUBuiltin<"_ExtVector<8, float>(_Constant bool, _ExtVector<16, _Float16>, _Constant bool, _ExtVector<32, _Float16>, _ExtVector<8, float>, int, _Constant bool, _Constant bool)", [Const], "swmmac-gfx1250-insts,wavefrontsize32">;
def __builtin_amdgcn_swmmac_f16_16x16x64_f16 : AMDGPUBuiltin<"_ExtVector<8, _Float16>(_Constant bool, _ExtVector<16, _Float16>, _Constant bool, _ExtVector<32, _Float16>, _ExtVector<8, _Float16>, int, _Constant bool, _Constant bool)", [Const], "swmmac-gfx1250-insts,wavefrontsize32">;
// GFX12.5 128B cooperative atomics
def __builtin_amdgcn_cooperative_atomic_load_32x4B : AMDGPUBuiltin<"int(int *, _Constant int, char const *)", [Const], "gfx1250-insts,wavefrontsize32">;

View File

@ -0,0 +1,36 @@
// REQUIRES: amdgpu-registered-target
// RUN: %clang_cc1 -triple amdgcn-unknown-unknown -target-cpu gfx1030 \
// RUN: -verify -S -o - %s
typedef float v8f __attribute__((ext_vector_type(8)));
typedef half v8h __attribute__((ext_vector_type(8)));
typedef half v16h __attribute__((ext_vector_type(16)));
typedef half v32h __attribute__((ext_vector_type(32)));
typedef __bf16 v8bf16 __attribute__((ext_vector_type(8)));
typedef __bf16 v16bf16 __attribute__((ext_vector_type(16)));
typedef __bf16 v32bf16 __attribute__((ext_vector_type(32)));
typedef int v2i __attribute__((ext_vector_type(2)));
typedef int v8i __attribute__((ext_vector_type(8)));
typedef int v16i __attribute__((ext_vector_type(16)));
void test_amdgcn_swmmac_gfx1250(global v8f* out8f, global v8h* out8h, global v8bf16* out8bf16, global v8i* out8i,
v16bf16 a16bf16, v16h a16h, v8i a8i,
v32bf16 b32bf16, v32h b32h, v16i b16i,
v8f c8f, v8bf16 c8bf16, v8h c8h, v8i c8i,
int index, v2i index2)
{
*out8f = __builtin_amdgcn_swmmac_f32_16x16x64_bf16(0, a16bf16, 0, b32bf16, c8f, index, false, true); // expected-error{{'__builtin_amdgcn_swmmac_f32_16x16x64_bf16' needs target feature swmmac-gfx1250-insts,wavefrontsize32}}
*out8bf16 = __builtin_amdgcn_swmmac_bf16_16x16x64_bf16(0, a16bf16, 0, b32bf16, c8bf16, index, false, true); // expected-error{{'__builtin_amdgcn_swmmac_bf16_16x16x64_bf16' needs target feature swmmac-gfx1250-insts,wavefrontsize32}}
*out8f = __builtin_amdgcn_swmmac_bf16f32_16x16x64_bf16(0, a16bf16, 0, b32bf16, c8f, index, false, true); // expected-error{{'__builtin_amdgcn_swmmac_bf16f32_16x16x64_bf16' needs target feature swmmac-gfx1250-insts,wavefrontsize32}}
*out8f = __builtin_amdgcn_swmmac_f32_16x16x128_fp8_fp8(a8i, b16i, c8f, index2, false, true); // expected-error{{'__builtin_amdgcn_swmmac_f32_16x16x128_fp8_fp8' needs target feature swmmac-gfx1250-insts,wavefrontsize32}}
*out8f = __builtin_amdgcn_swmmac_f32_16x16x128_fp8_bf8(a8i, b16i, c8f, index2, false, true); // expected-error{{'__builtin_amdgcn_swmmac_f32_16x16x128_fp8_bf8' needs target feature swmmac-gfx1250-insts,wavefrontsize32}}
*out8f = __builtin_amdgcn_swmmac_f32_16x16x128_bf8_fp8(a8i, b16i, c8f, index2, false, true); // expected-error{{'__builtin_amdgcn_swmmac_f32_16x16x128_bf8_fp8' needs target feature swmmac-gfx1250-insts,wavefrontsize32}}
*out8f = __builtin_amdgcn_swmmac_f32_16x16x128_bf8_bf8(a8i, b16i, c8f, index2, false, true); // expected-error{{'__builtin_amdgcn_swmmac_f32_16x16x128_bf8_bf8' needs target feature swmmac-gfx1250-insts,wavefrontsize32}}
*out8h = __builtin_amdgcn_swmmac_f16_16x16x128_fp8_fp8(a8i, b16i, c8h, index2, false, true); // expected-error{{'__builtin_amdgcn_swmmac_f16_16x16x128_fp8_fp8' needs target feature swmmac-gfx1250-insts,wavefrontsize32}}
*out8h = __builtin_amdgcn_swmmac_f16_16x16x128_fp8_bf8(a8i, b16i, c8h, index2, false, true); // expected-error{{'__builtin_amdgcn_swmmac_f16_16x16x128_fp8_bf8' needs target feature swmmac-gfx1250-insts,wavefrontsize32}}
*out8h = __builtin_amdgcn_swmmac_f16_16x16x128_bf8_fp8(a8i, b16i, c8h, index2, false, true); // expected-error{{'__builtin_amdgcn_swmmac_f16_16x16x128_bf8_fp8' needs target feature swmmac-gfx1250-insts,wavefrontsize32}}
*out8h = __builtin_amdgcn_swmmac_f16_16x16x128_bf8_bf8(a8i, b16i, c8h, index2, false, true); // expected-error{{'__builtin_amdgcn_swmmac_f16_16x16x128_bf8_bf8' needs target feature swmmac-gfx1250-insts,wavefrontsize32}}
*out8i = __builtin_amdgcn_swmmac_i32_16x16x128_iu8(0, a8i, 0, b16i, c8i, index2, false, true); // expected-error{{'__builtin_amdgcn_swmmac_i32_16x16x128_iu8' needs target feature swmmac-gfx1250-insts,wavefrontsize32}}
*out8f = __builtin_amdgcn_swmmac_f32_16x16x64_f16(0, a16h, 0, b32h, c8f, index, false, true); // expected-error{{'__builtin_amdgcn_swmmac_f32_16x16x64_f16' needs target feature swmmac-gfx1250-insts,wavefrontsize32}}
*out8h = __builtin_amdgcn_swmmac_f16_16x16x64_f16(0, a16h, 0, b32h, c8h, index, false, true); // expected-error{{'__builtin_amdgcn_swmmac_f16_16x16x64_f16' needs target feature swmmac-gfx1250-insts,wavefrontsize32}}
}

View File

@ -0,0 +1,31 @@
// REQUIRES: amdgpu-registered-target
// RUN: %clang_cc1 -triple amdgcn-unknown-unknown -target-cpu gfx1030 \
// RUN: -verify -S -o - %s
typedef int v2i __attribute__((ext_vector_type(2)));
typedef int v4i __attribute__((ext_vector_type(4)));
typedef float v8f __attribute__((ext_vector_type(8)));
typedef half v8h __attribute__((ext_vector_type(8)));
typedef short v8s __attribute__((ext_vector_type(8)));
typedef int v8i __attribute__((ext_vector_type(8)));
typedef half v16h __attribute__((ext_vector_type(16)));
typedef short v16s __attribute__((ext_vector_type(16)));
void test_amdgcn_swmmac_w32(global v8f* out8f, global v8h* out8h, global v8s* out8s, global v8i* out8i,
v8h a8h, v8s a8s, v2i a2i, int ai,
v16h b16h, v16s b16s, v4i b4i, v2i b2i,
v8f c8f, v8h c8h, v8s c8s, v8i c8i,
int index)
{
*out8f = __builtin_amdgcn_swmmac_f32_16x16x32_f16_w32(a8h, b16h, c8f, index); // expected-error{{'__builtin_amdgcn_swmmac_f32_16x16x32_f16_w32' needs target feature swmmac-gfx1200-insts,wavefrontsize32}}
*out8f = __builtin_amdgcn_swmmac_f32_16x16x32_bf16_w32(a8s, b16s, c8f, index); // expected-error{{'__builtin_amdgcn_swmmac_f32_16x16x32_bf16_w32' needs target feature swmmac-gfx1200-insts,wavefrontsize32}}
*out8h = __builtin_amdgcn_swmmac_f16_16x16x32_f16_w32(a8h, b16h, c8h, index); // expected-error{{'__builtin_amdgcn_swmmac_f16_16x16x32_f16_w32' needs target feature swmmac-gfx1200-insts,wavefrontsize32}}
*out8s = __builtin_amdgcn_swmmac_bf16_16x16x32_bf16_w32(a8s, b16s, c8s, index); // expected-error{{'__builtin_amdgcn_swmmac_bf16_16x16x32_bf16_w32' needs target feature swmmac-gfx1200-insts,wavefrontsize32}}
*out8i = __builtin_amdgcn_swmmac_i32_16x16x32_iu8_w32(true, a2i, true, b4i, c8i, index, true); // expected-error{{'__builtin_amdgcn_swmmac_i32_16x16x32_iu8_w32' needs target feature swmmac-gfx1200-insts,wavefrontsize32}}
*out8i = __builtin_amdgcn_swmmac_i32_16x16x32_iu4_w32(true, ai, true, b2i, c8i, index, true); // expected-error{{'__builtin_amdgcn_swmmac_i32_16x16x32_iu4_w32' needs target feature swmmac-gfx1200-insts,wavefrontsize32}}
*out8i = __builtin_amdgcn_swmmac_i32_16x16x64_iu4_w32(true, a2i, true, b4i, c8i, index, true); // expected-error{{'__builtin_amdgcn_swmmac_i32_16x16x64_iu4_w32' needs target feature swmmac-gfx1200-insts,wavefrontsize32}}
*out8f = __builtin_amdgcn_swmmac_f32_16x16x32_fp8_fp8_w32(a2i, b4i, c8f, index); // expected-error{{'__builtin_amdgcn_swmmac_f32_16x16x32_fp8_fp8_w32' needs target feature swmmac-gfx1200-insts,wavefrontsize32}}
*out8f = __builtin_amdgcn_swmmac_f32_16x16x32_fp8_bf8_w32(a2i, b4i, c8f, index); // expected-error{{'__builtin_amdgcn_swmmac_f32_16x16x32_fp8_bf8_w32' needs target feature swmmac-gfx1200-insts,wavefrontsize32}}
*out8f = __builtin_amdgcn_swmmac_f32_16x16x32_bf8_fp8_w32(a2i, b4i, c8f, index); // expected-error{{'__builtin_amdgcn_swmmac_f32_16x16x32_bf8_fp8_w32' needs target feature swmmac-gfx1200-insts,wavefrontsize32}}
*out8f = __builtin_amdgcn_swmmac_f32_16x16x32_bf8_bf8_w32(a2i, b4i, c8f, index); // expected-error{{'__builtin_amdgcn_swmmac_f32_16x16x32_bf8_bf8_w32' needs target feature swmmac-gfx1200-insts,wavefrontsize32}}
}

View File

@ -0,0 +1,30 @@
// REQUIRES: amdgpu-registered-target
// RUN: %clang_cc1 -triple amdgcn-unknown-unknown -target-cpu gfx1030 \
// RUN: -verify -S -o - %s
typedef int v2i __attribute__((ext_vector_type(2)));
typedef float v4f __attribute__((ext_vector_type(4)));
typedef half v4h __attribute__((ext_vector_type(4)));
typedef short v4s __attribute__((ext_vector_type(4)));
typedef int v4i __attribute__((ext_vector_type(4)));
typedef half v8h __attribute__((ext_vector_type(8)));
typedef short v8s __attribute__((ext_vector_type(8)));
void test_amdgcn_swmmac_w64(global v4f* out4f, global v4h* out4h, global v4s* out4s, global v4i* out4i,
v4h a4h, v4s a4s, int ai,
v8h b8h, v8s b8s, v2i b2i, int bi,
v4f c4f, v4h c4h, v4s c4s, v4i c4i,
int index)
{
*out4f = __builtin_amdgcn_swmmac_f32_16x16x32_f16_w64(a4h, b8h, c4f, index); // expected-error{{'__builtin_amdgcn_swmmac_f32_16x16x32_f16_w64' needs target feature swmmac-gfx1200-insts,wavefrontsize64}}
*out4f = __builtin_amdgcn_swmmac_f32_16x16x32_bf16_w64(a4s, b8s, c4f, index); // expected-error{{'__builtin_amdgcn_swmmac_f32_16x16x32_bf16_w64' needs target feature swmmac-gfx1200-insts,wavefrontsize64}}
*out4h = __builtin_amdgcn_swmmac_f16_16x16x32_f16_w64(a4h, b8h, c4h, index); // expected-error{{'__builtin_amdgcn_swmmac_f16_16x16x32_f16_w64' needs target feature swmmac-gfx1200-insts,wavefrontsize64}}
*out4s = __builtin_amdgcn_swmmac_bf16_16x16x32_bf16_w64(a4s, b8s, c4s, index); // expected-error{{'__builtin_amdgcn_swmmac_bf16_16x16x32_bf16_w64' needs target feature swmmac-gfx1200-insts,wavefrontsize64}}
*out4i = __builtin_amdgcn_swmmac_i32_16x16x32_iu8_w64(true, ai, true, b2i, c4i, index, true); // expected-error{{'__builtin_amdgcn_swmmac_i32_16x16x32_iu8_w64' needs target feature swmmac-gfx1200-insts,wavefrontsize64}}
*out4i = __builtin_amdgcn_swmmac_i32_16x16x32_iu4_w64(true, ai, true, bi, c4i, index, true); // expected-error{{'__builtin_amdgcn_swmmac_i32_16x16x32_iu4_w64' needs target feature swmmac-gfx1200-insts,wavefrontsize64}}
*out4i = __builtin_amdgcn_swmmac_i32_16x16x64_iu4_w64(true, ai, true, b2i, c4i, index, true); // expected-error{{'__builtin_amdgcn_swmmac_i32_16x16x64_iu4_w64' needs target feature swmmac-gfx1200-insts,wavefrontsize64}}
*out4f = __builtin_amdgcn_swmmac_f32_16x16x32_fp8_fp8_w64(ai, b2i, c4f, index); // expected-error{{'__builtin_amdgcn_swmmac_f32_16x16x32_fp8_fp8_w64' needs target feature swmmac-gfx1200-insts,wavefrontsize64}}
*out4f = __builtin_amdgcn_swmmac_f32_16x16x32_fp8_bf8_w64(ai, b2i, c4f, index); // expected-error{{'__builtin_amdgcn_swmmac_f32_16x16x32_fp8_bf8_w64' needs target feature swmmac-gfx1200-insts,wavefrontsize64}}
*out4f = __builtin_amdgcn_swmmac_f32_16x16x32_bf8_fp8_w64(ai, b2i, c4f, index); // expected-error{{'__builtin_amdgcn_swmmac_f32_16x16x32_bf8_fp8_w64' needs target feature swmmac-gfx1200-insts,wavefrontsize64}}
*out4f = __builtin_amdgcn_swmmac_f32_16x16x32_bf8_bf8_w64(ai, b2i, c4f, index); // expected-error{{'__builtin_amdgcn_swmmac_f32_16x16x32_bf8_bf8_w64' needs target feature swmmac-gfx1200-insts,wavefrontsize64}}
}

View File

@ -814,6 +814,14 @@ defm WMMA128bInsts : AMDGPUSubtargetFeature<"wmma-128b-insts",
"Has WMMA instructions where A and B matrices do not have duplicated data"
>;
defm SWMMACGfx1200Insts : AMDGPUSubtargetFeature<"swmmac-gfx1200-insts",
"Has GFX1200 SWMMAC instructions"
>;
defm SWMMACGfx1250Insts : AMDGPUSubtargetFeature<"swmmac-gfx1250-insts",
"Has GFX1250 SWMMAC instructions"
>;
defm PkFmacF16Inst : AMDGPUSubtargetFeature<"pk-fmac-f16-inst",
"Has v_pk_fmac_f16 instruction"
>;
@ -1950,6 +1958,7 @@ def FeatureISAVersion11_7_0 : FeatureSet<
FeatureFP8ConversionInsts,
FeatureDot11Insts,
FeatureWMMA128bInsts,
FeatureSWMMACGfx1200Insts,
FeatureIEEEMinimumMaximumInsts,
FeatureMinimum3Maximum3F32,
FeatureMinimum3Maximum3F16])>;
@ -1983,6 +1992,7 @@ def FeatureISAVersion12 : FeatureSet<
FeatureExtendedImageInsts,
FeatureFP8ConversionInsts,
FeatureWMMA128bInsts,
FeatureSWMMACGfx1200Insts,
FeatureIEEEMinimumMaximumInsts,
FeaturePackedTID,
FeatureVcmpxPermlaneHazard,
@ -2080,7 +2090,9 @@ def FeatureISAVersion12_50_Common : FeatureSet<
FeatureXNACK,
FeatureClusters,
FeatureD16Writes32BitVgpr,
FeatureMcastLoadInsts
FeatureMcastLoadInsts,
FeatureSWMMACGfx1200Insts,
FeatureSWMMACGfx1250Insts
]>;
def FeatureISAVersion12_50 : FeatureSet<

View File

@ -2061,7 +2061,9 @@ let WaveSizePredicate = isWave32, SubtargetPredicate = isGFX11PlusNot12_50, Othe
defm : WMMAPat<"V_WMMA_F32_16X16X16_BF8_FP8_w32", int_amdgcn_wmma_f32_16x16x16_bf8_fp8, F32_FP8BF8_WMMA_w32>;
defm : WMMAPat<"V_WMMA_F32_16X16X16_BF8_BF8_w32", int_amdgcn_wmma_f32_16x16x16_bf8_bf8, F32_FP8BF8_WMMA_w32>;
defm : WMMAPat<"V_WMMA_I32_16X16X32_IU4_w32", int_amdgcn_wmma_i32_16x16x32_iu4, I32_IU4X32_WMMA_w32>;
}
let WaveSizePredicate = isWave32, SubtargetPredicate = HasSWMMACGfx1200Insts in {
def : SWMMACPat<V_SWMMAC_F32_16X16X32_F16_w32_twoaddr, int_amdgcn_swmmac_f32_16x16x32_f16, F32_F16_SWMMAC_w32>;
def : SWMMACPat<V_SWMMAC_F32_16X16X32_BF16_w32_twoaddr, int_amdgcn_swmmac_f32_16x16x32_bf16, F32_BF16_SWMMAC_w32>;
def : SWMMACPat<V_SWMMAC_F16_16X16X32_F16_w32_twoaddr, int_amdgcn_swmmac_f16_16x16x32_f16, F16_F16_SWMMAC_w32>;
@ -2088,7 +2090,9 @@ let WaveSizePredicate = isWave64, SubtargetPredicate = isGFX11PlusNot12_50, Othe
defm : WMMAPat<"V_WMMA_F32_16X16X16_BF8_FP8_w64", int_amdgcn_wmma_f32_16x16x16_bf8_fp8, F32_FP8BF8_WMMA_w64>;
defm : WMMAPat<"V_WMMA_F32_16X16X16_BF8_BF8_w64", int_amdgcn_wmma_f32_16x16x16_bf8_bf8, F32_FP8BF8_WMMA_w64>;
defm : WMMAPat<"V_WMMA_I32_16X16X32_IU4_w64", int_amdgcn_wmma_i32_16x16x32_iu4, I32_IU4X32_WMMA_w64>;
}
let WaveSizePredicate = isWave64, SubtargetPredicate = HasSWMMACGfx1200Insts in {
def : SWMMACPat<V_SWMMAC_F32_16X16X32_F16_w64_twoaddr, int_amdgcn_swmmac_f32_16x16x32_f16, F32_F16_SWMMAC_w64>;
def : SWMMACPat<V_SWMMAC_F32_16X16X32_BF16_w64_twoaddr, int_amdgcn_swmmac_f32_16x16x32_bf16, F32_BF16_SWMMAC_w64>;
def : SWMMACPat<V_SWMMAC_F16_16X16X32_F16_w64_twoaddr, int_amdgcn_swmmac_f16_16x16x32_f16, F16_F16_SWMMAC_w64>;

View File

@ -249,10 +249,13 @@ static void fillAMDGCNFeatureMap(StringRef GPU, const Triple &T,
StringMap<bool> &Features) {
AMDGPU::GPUKind Kind = parseArchAMDGCN(GPU);
switch (Kind) {
case GK_GFX1310:
case GK_GFX1251:
case GK_GFX1250:
case GK_GFX12_5_GENERIC:
Features["swmmac-gfx1200-insts"] = true;
Features["swmmac-gfx1250-insts"] = true;
[[fallthrough]];
case GK_GFX1310:
Features["ci-insts"] = true;
Features["dot7-insts"] = true;
Features["dot8-insts"] = true;
@ -333,6 +336,7 @@ static void fillAMDGCNFeatureMap(StringRef GPU, const Triple &T,
Features["cvt-pknorm-vop2-insts"] = true;
Features["fp8-conversion-insts"] = true;
Features["wmma-128b-insts"] = true;
Features["swmmac-gfx1200-insts"] = true;
Features["atomic-fmin-fmax-global-f32"] = true;
break;
case GK_GFX1170:
@ -361,6 +365,7 @@ static void fillAMDGCNFeatureMap(StringRef GPU, const Triple &T,
Features["dot11-insts"] = true;
Features["fp8-conversion-insts"] = true;
Features["wmma-128b-insts"] = true;
Features["swmmac-gfx1200-insts"] = true;
Features["atomic-fmin-fmax-global-f32"] = true;
break;
case GK_GFX1153: