[WIP][ROCDL] Added SWMMAC ops for gfx12 and gfx1250 (#181943)
This PR adds SWMMAC ops for gfx12 and gfx1250 arch.
This commit is contained in:
parent
ea6fee062d
commit
14ba1ece89
@ -1459,6 +1459,132 @@ def ROCDL_wmma_scale16_f32_16x16x128_f8f6f4 : ROCDL_WMMA_Scale_IntrOp<"wmma.scal
|
||||
def ROCDL_wmma_scale_f32_32x16x128_f4 : ROCDL_WMMA_Scale_F4_IntrOp<"wmma.scale.f32.32x16x128.f4", AnyInteger, F32, I32>;
|
||||
def ROCDL_wmma_scale16_f32_32x16x128_f4 : ROCDL_WMMA_Scale_F4_IntrOp<"wmma.scale16.f32.32x16x128.f4", AnyInteger, F32, I64>;
|
||||
|
||||
|
||||
//===---------------------------------------------------------------------===//
|
||||
// SWMMAC intrinsics
|
||||
class ROCDL_SWMMAC_V0_IntrOp<string mnemonic, Type AB, Type CD> : ROCDL_IntrOp<mnemonic,
|
||||
[0], [0, 1, 3], [], 1, 0, 0, 0, [], []>,
|
||||
Arguments<(ins
|
||||
LLVM_VectorOf<AB>:$a,
|
||||
LLVM_VectorOf<AB>:$b,
|
||||
LLVM_VectorOf<CD>:$c,
|
||||
I32:$index)> {
|
||||
let results = (outs LLVM_VectorOf<CD>:$res);
|
||||
let assemblyFormat = [{
|
||||
$a `,` $b `,` $c `,` $index attr-dict `:` functional-type(operands, $res)
|
||||
}];
|
||||
}
|
||||
|
||||
class ROCDL_SWMMAC_V1_IntrOp<string mnemonic, Type A, Type B, Type CD> : ROCDL_IntrOp<mnemonic,
|
||||
[0], [0, 1, 3], [], 1, 0, 0, 0, [], []>,
|
||||
Arguments<(ins
|
||||
LLVM_ScalarOrVectorOf<A>:$a,
|
||||
LLVM_ScalarOrVectorOf<B>:$b,
|
||||
LLVM_ScalarOrVectorOf<CD>:$c,
|
||||
I32:$index)> {
|
||||
let results = (outs LLVM_ScalarOrVectorOf<CD>:$res);
|
||||
let assemblyFormat = [{
|
||||
$a `,` $b `,` $c `,` $index attr-dict `:` functional-type(operands, $res)
|
||||
}];
|
||||
}
|
||||
|
||||
class ROCDL_SWMMAC_V1_Reuse_IntrOp<string mnemonic, Type A, Type B, Type CD> : ROCDL_IntrOp<mnemonic,
|
||||
[0], [0, 1, 3], [], 1, 0, 0, 0, [4, 5], ["reuseA", "reuseB"]>,
|
||||
Arguments<(ins
|
||||
LLVM_ScalarOrVectorOf<A>:$a,
|
||||
LLVM_ScalarOrVectorOf<B>:$b,
|
||||
LLVM_ScalarOrVectorOf<CD>:$c,
|
||||
I32:$index,
|
||||
DefaultValuedAttr<I1Attr, "0">:$reuseA,
|
||||
DefaultValuedAttr<I1Attr, "0">:$reuseB
|
||||
)> {
|
||||
let results = (outs LLVM_ScalarOrVectorOf<CD>:$res);
|
||||
let assemblyFormat = [{
|
||||
$a `,` $b `,` $c `,` $index attr-dict `:` functional-type(operands, $res)
|
||||
}];
|
||||
}
|
||||
|
||||
class ROCDL_SWMMAC_IU_IntrOp<string mnemonic, Type AB, Type CD> : ROCDL_IntrOp<mnemonic,
|
||||
[0], [1, 3, 5], [], 1, 0, 0, 0, [0, 2, 6], ["signA", "signB", "clamp"]>,
|
||||
Arguments<(ins
|
||||
DefaultValuedAttr<I1Attr, "0">:$signA,
|
||||
LLVM_ScalarOrVectorOf<AB>:$a,
|
||||
DefaultValuedAttr<I1Attr, "0">:$signB,
|
||||
LLVM_ScalarOrVectorOf<AB>:$b,
|
||||
LLVM_ScalarOrVectorOf<CD>:$c,
|
||||
I32:$index,
|
||||
DefaultValuedAttr<I1Attr, "0">:$clamp)> {
|
||||
let results = (outs LLVM_ScalarOrVectorOf<CD>:$res);
|
||||
let assemblyFormat = [{
|
||||
$a `,` $b `,` $c `,` $index attr-dict `:` functional-type(operands, $res)
|
||||
}];
|
||||
}
|
||||
|
||||
class ROCDL_SWMMAC_ModsAB_IntrOp<string mnemonic, Type AB, Type C, Type D> : ROCDL_IntrOp<mnemonic,
|
||||
[0], [1, 3, 5], [], 1, 0, 0, 0, [0, 2, 6, 7], ["signA", "signB", "reuseA", "reuseB"]>,
|
||||
Arguments<(ins
|
||||
DefaultValuedAttr<I1Attr, "0">:$signA,
|
||||
LLVM_ScalarOrVectorOf<AB>:$a,
|
||||
DefaultValuedAttr<I1Attr, "0">:$signB,
|
||||
LLVM_ScalarOrVectorOf<AB>:$b,
|
||||
LLVM_ScalarOrVectorOf<C>:$c,
|
||||
I32:$index,
|
||||
DefaultValuedAttr<I1Attr, "0">:$reuseA,
|
||||
DefaultValuedAttr<I1Attr, "0">:$reuseB)> {
|
||||
let results = (outs LLVM_ScalarOrVectorOf<D>:$res);
|
||||
let assemblyFormat = [{
|
||||
$a `,` $b `,` $c `,` $index attr-dict `:` functional-type(operands, $res)
|
||||
}];
|
||||
}
|
||||
|
||||
class ROCDL_SWMMAC_ModsABClamp_IntrOp<string mnemonic, Type AB, Type C, Type D> : ROCDL_IntrOp<mnemonic,
|
||||
[0], [1, 3, 5], [], 1, 0, 0, 0, [0, 2, 6, 7, 8], ["signA", "signB", "reuseA", "reuseB", "clamp"]>,
|
||||
Arguments<(ins
|
||||
DefaultValuedAttr<I1Attr, "0">:$signA,
|
||||
LLVM_ScalarOrVectorOf<AB>:$a,
|
||||
DefaultValuedAttr<I1Attr, "0">:$signB,
|
||||
LLVM_ScalarOrVectorOf<AB>:$b,
|
||||
LLVM_ScalarOrVectorOf<C>:$c,
|
||||
I32:$index,
|
||||
DefaultValuedAttr<I1Attr, "0">:$reuseA,
|
||||
DefaultValuedAttr<I1Attr, "0">:$reuseB,
|
||||
DefaultValuedAttr<I1Attr, "0">:$clamp)> {
|
||||
let results = (outs LLVM_ScalarOrVectorOf<D>:$res);
|
||||
let assemblyFormat = [{
|
||||
$a `,` $b `,` $c `,` $index attr-dict `:` functional-type(operands, $res)
|
||||
}];
|
||||
}
|
||||
|
||||
// Available from gfx12
|
||||
def ROCDL_swmmac_f32_16x16x32_f16 : ROCDL_SWMMAC_V0_IntrOp<"swmmac.f32.16x16x32.f16", F16, F32>;
|
||||
def ROCDL_swmmac_f32_16x16x32_bf16 : ROCDL_SWMMAC_V0_IntrOp<"swmmac.f32.16x16x32.bf16", AnyInteger, F32>;
|
||||
def ROCDL_swmmac_f16_16x16x32_f16 : ROCDL_SWMMAC_V0_IntrOp<"swmmac.f16.16x16x32.f16", F16, F16>;
|
||||
def ROCDL_swmmac_bf16_16x16x32_bf16 : ROCDL_SWMMAC_V0_IntrOp<"swmmac.bf16.16x16x32.bf16", AnyInteger, AnyInteger>;
|
||||
def ROCDL_swmmac_i32_16x16x32_iu8 : ROCDL_SWMMAC_IU_IntrOp<"swmmac.i32.16x16x32.iu8", AnyInteger, AnyInteger>;
|
||||
def ROCDL_swmmac_i32_16x16x32_iu4 : ROCDL_SWMMAC_IU_IntrOp<"swmmac.i32.16x16x32.iu4", AnyInteger, AnyInteger>;
|
||||
def ROCDL_swmmac_i32_16x16x64_iu4 : ROCDL_SWMMAC_IU_IntrOp<"swmmac.i32.16x16x64.iu4", AnyInteger, AnyInteger>;
|
||||
def ROCDL_swmmac_f32_16x16x32_fp8_fp8 : ROCDL_SWMMAC_V1_IntrOp<"swmmac.f32.16x16x32.fp8.fp8", AnyInteger, AnyInteger, F32>;
|
||||
def ROCDL_swmmac_f32_16x16x32_fp8_bf8 : ROCDL_SWMMAC_V1_IntrOp<"swmmac.f32.16x16x32.fp8.bf8", AnyInteger, AnyInteger, F32>;
|
||||
def ROCDL_swmmac_f32_16x16x32_bf8_fp8 : ROCDL_SWMMAC_V1_IntrOp<"swmmac.f32.16x16x32.bf8.fp8", AnyInteger, AnyInteger, F32>;
|
||||
def ROCDL_swmmac_f32_16x16x32_bf8_bf8 : ROCDL_SWMMAC_V1_IntrOp<"swmmac.f32.16x16x32.bf8.bf8", AnyInteger, AnyInteger, F32>;
|
||||
|
||||
// Available from gfx1250
|
||||
def ROCDL_swmmac_f32_16x16x64_f16 : ROCDL_SWMMAC_ModsAB_IntrOp<"swmmac.f32.16x16x64.f16", F16, F32, F32>;
|
||||
def ROCDL_swmmac_f32_16x16x64_bf16 : ROCDL_SWMMAC_ModsAB_IntrOp<"swmmac.f32.16x16x64.bf16", BF16, F32, F32>;
|
||||
def ROCDL_swmmac_f16_16x16x64_f16 : ROCDL_SWMMAC_ModsAB_IntrOp<"swmmac.f16.16x16x64.f16", F16, F16, F16>;
|
||||
def ROCDL_swmmac_bf16_16x16x64_bf16 : ROCDL_SWMMAC_ModsAB_IntrOp<"swmmac.bf16.16x16x64.bf16", BF16, BF16, BF16>;
|
||||
def ROCDL_swmmac_bf16f32_16x16x64_bf16 : ROCDL_SWMMAC_ModsAB_IntrOp<"swmmac.bf16f32.16x16x64.bf16", BF16, BF16, BF16>;
|
||||
def ROCDL_swmmac_f32_16x16x128_fp8_fp8 : ROCDL_SWMMAC_V1_Reuse_IntrOp<"swmmac.f32.16x16x128.fp8.fp8", AnyInteger, AnyInteger, F32>;
|
||||
def ROCDL_swmmac_f32_16x16x128_fp8_bf8 : ROCDL_SWMMAC_V1_Reuse_IntrOp<"swmmac.f32.16x16x128.fp8.bf8", AnyInteger, AnyInteger, F32>;
|
||||
def ROCDL_swmmac_f32_16x16x128_bf8_fp8 : ROCDL_SWMMAC_V1_Reuse_IntrOp<"swmmac.f32.16x16x128.bf8.fp8", AnyInteger, AnyInteger, F32>;
|
||||
def ROCDL_swmmac_f32_16x16x128_bf8_bf8 : ROCDL_SWMMAC_V1_Reuse_IntrOp<"swmmac.f32.16x16x128.bf8.bf8", AnyInteger, AnyInteger, F32>;
|
||||
def ROCDL_swmmac_f16_16x16x128_fp8_fp8 : ROCDL_SWMMAC_V1_Reuse_IntrOp<"swmmac.f16.16x16x128.fp8.fp8", AnyInteger, AnyInteger, F16>;
|
||||
def ROCDL_swmmac_f16_16x16x128_fp8_bf8 : ROCDL_SWMMAC_V1_Reuse_IntrOp<"swmmac.f16.16x16x128.fp8.bf8", AnyInteger, AnyInteger, F16>;
|
||||
def ROCDL_swmmac_f16_16x16x128_bf8_fp8 : ROCDL_SWMMAC_V1_Reuse_IntrOp<"swmmac.f16.16x16x128.bf8.fp8", AnyInteger, AnyInteger, F16>;
|
||||
def ROCDL_swmmac_f16_16x16x128_bf8_bf8 : ROCDL_SWMMAC_V1_Reuse_IntrOp<"swmmac.f16.16x16x128.bf8.bf8", AnyInteger, AnyInteger, F16>;
|
||||
def ROCDL_swmmac_i32_16x16x128_iu8 : ROCDL_SWMMAC_ModsABClamp_IntrOp<"swmmac.i32.16x16x128.iu8", AnyInteger, AnyInteger, AnyInteger>;
|
||||
|
||||
|
||||
//===---------------------------------------------------------------------===//
|
||||
// LDS transpose intrinsics (available in GFX950)
|
||||
|
||||
|
||||
@ -1513,6 +1513,133 @@ llvm.func @rocdl_wmma_scale_ops(%a_f8: vector<8xi32>, %a_f4: vector<4xi32>, %c_f
|
||||
llvm.return
|
||||
}
|
||||
|
||||
// -----
|
||||
|
||||
llvm.func @rocdl.swmmac(%v32f16 : vector<32xf16>, %v32bf16 : vector<32xbf16>,
|
||||
%v16f16 : vector<16xf16>, %v16bf16 : vector<16xbf16>, %v16i32 : vector<16xi32>, %v16i16 : vector<16xi16>,
|
||||
%v8f32 : vector<8xf32>, %v8i32 : vector<8xi32>, %v8f16 : vector<8xf16>, %v8bf16 : vector<8xbf16>, %v8i16 : vector<8xi16>,
|
||||
%v4f32 : vector<4xf32>, %v4f16 : vector<4xf16>, %v4i32 : vector<4xi32>, %v4i16 : vector<4xi16>,
|
||||
%v2i32 : vector<2xi32>, %v1i32 : i32, %index : i32) -> vector<8xf32> {
|
||||
|
||||
// CHECK-LABEL: @rocdl.swmmac
|
||||
|
||||
// Wave32
|
||||
|
||||
// CHECK: rocdl.swmmac.f32.16x16x32.f16 %{{.*}}, %{{.*}}, %{{.*}}, %{{.*}} : (vector<8xf16>, vector<16xf16>, vector<8xf32>, i32) -> vector<8xf32>
|
||||
%w32_0 = rocdl.swmmac.f32.16x16x32.f16 %v8f16, %v16f16, %v8f32, %index : (vector<8xf16>, vector<16xf16>, vector<8xf32>, i32) -> vector<8xf32>
|
||||
|
||||
// CHECK: rocdl.swmmac.f32.16x16x32.bf16 %{{.*}}, %{{.*}}, %{{.*}}, %{{.*}} : (vector<8xi16>, vector<16xi16>, vector<8xf32>, i32) -> vector<8xf32>
|
||||
%w32_1 = rocdl.swmmac.f32.16x16x32.bf16 %v8i16, %v16i16, %v8f32, %index : (vector<8xi16>, vector<16xi16>, vector<8xf32>, i32) -> vector<8xf32>
|
||||
|
||||
// CHECK: rocdl.swmmac.f16.16x16x32.f16 %{{.*}}, %{{.*}}, %{{.*}}, %{{.*}} : (vector<8xf16>, vector<16xf16>, vector<8xf16>, i32) -> vector<8xf16>
|
||||
%w32_2 = rocdl.swmmac.f16.16x16x32.f16 %v8f16, %v16f16, %v8f16, %index : (vector<8xf16>, vector<16xf16>, vector<8xf16>, i32) -> vector<8xf16>
|
||||
|
||||
// CHECK: rocdl.swmmac.bf16.16x16x32.bf16 %{{.*}}, %{{.*}}, %{{.*}}, %{{.*}} : (vector<8xi16>, vector<16xi16>, vector<8xi16>, i32) -> vector<8xi16>
|
||||
%w32_3 = rocdl.swmmac.bf16.16x16x32.bf16 %v8i16, %v16i16, %v8i16, %index : (vector<8xi16>, vector<16xi16>, vector<8xi16>, i32) -> vector<8xi16>
|
||||
|
||||
// CHECK: rocdl.swmmac.i32.16x16x32.iu8 %{{.*}}, %{{.*}}, %{{.*}}, %{{.*}} : (vector<2xi32>, vector<4xi32>, vector<8xi32>, i32) -> vector<8xi32>
|
||||
%w32_4 = rocdl.swmmac.i32.16x16x32.iu8 %v2i32, %v4i32, %v8i32, %index {signA = false, signB = false, clamp = false} : (vector<2xi32>, vector<4xi32>, vector<8xi32>, i32) -> vector<8xi32>
|
||||
|
||||
// CHECK: rocdl.swmmac.i32.16x16x32.iu4 %{{.*}}, %{{.*}}, %{{.*}}, %{{.*}} : (i32, vector<2xi32>, vector<8xi32>, i32) -> vector<8xi32>
|
||||
%w32_5 = rocdl.swmmac.i32.16x16x32.iu4 %v1i32, %v2i32, %v8i32, %index {signA = false, signB = false, clamp = false} : (i32, vector<2xi32>, vector<8xi32>, i32) -> vector<8xi32>
|
||||
|
||||
// CHECK: rocdl.swmmac.i32.16x16x64.iu4 %{{.*}}, %{{.*}}, %{{.*}}, %{{.*}} : (vector<2xi32>, vector<4xi32>, vector<8xi32>, i32) -> vector<8xi32>
|
||||
%w32_6 = rocdl.swmmac.i32.16x16x64.iu4 %v2i32, %v4i32, %v8i32, %index {signA = false, signB = false, clamp = false} : (vector<2xi32>, vector<4xi32>, vector<8xi32>, i32) -> vector<8xi32>
|
||||
|
||||
// CHECK: rocdl.swmmac.f32.16x16x32.fp8.fp8 %{{.*}}, %{{.*}}, %{{.*}}, %{{.*}} : (vector<2xi32>, vector<4xi32>, vector<8xf32>, i32) -> vector<8xf32>
|
||||
%w32_7 = rocdl.swmmac.f32.16x16x32.fp8.fp8 %v2i32, %v4i32, %v8f32, %index : (vector<2xi32>, vector<4xi32>, vector<8xf32>, i32) -> vector<8xf32>
|
||||
|
||||
// CHECK: rocdl.swmmac.f32.16x16x32.fp8.bf8 %{{.*}}, %{{.*}}, %{{.*}}, %{{.*}} : (vector<2xi32>, vector<4xi32>, vector<8xf32>, i32) -> vector<8xf32>
|
||||
%w32_8 = rocdl.swmmac.f32.16x16x32.fp8.bf8 %v2i32, %v4i32, %v8f32, %index : (vector<2xi32>, vector<4xi32>, vector<8xf32>, i32) -> vector<8xf32>
|
||||
|
||||
// CHECK: rocdl.swmmac.f32.16x16x32.bf8.fp8 %{{.*}}, %{{.*}}, %{{.*}}, %{{.*}} : (vector<2xi32>, vector<4xi32>, vector<8xf32>, i32) -> vector<8xf32>
|
||||
%w32_9 = rocdl.swmmac.f32.16x16x32.bf8.fp8 %v2i32, %v4i32, %v8f32, %index : (vector<2xi32>, vector<4xi32>, vector<8xf32>, i32) -> vector<8xf32>
|
||||
|
||||
// CHECK: rocdl.swmmac.f32.16x16x32.bf8.bf8 %{{.*}}, %{{.*}}, %{{.*}}, %{{.*}} : (vector<2xi32>, vector<4xi32>, vector<8xf32>, i32) -> vector<8xf32>
|
||||
%w32_10 = rocdl.swmmac.f32.16x16x32.bf8.bf8 %v2i32, %v4i32, %v8f32, %index : (vector<2xi32>, vector<4xi32>, vector<8xf32>, i32) -> vector<8xf32>
|
||||
|
||||
// CHECK: rocdl.swmmac.f32.16x16x64.f16 %{{.*}}, %{{.*}}, %{{.*}}, %{{.*}} : (vector<16xf16>, vector<32xf16>, vector<8xf32>, i32) -> vector<8xf32>
|
||||
%w32_11 = rocdl.swmmac.f32.16x16x64.f16 %v16f16, %v32f16, %v8f32, %index {signA = false, signB = false, reuseA = false, reuseB = false} : (vector<16xf16>, vector<32xf16>, vector<8xf32>, i32) -> vector<8xf32>
|
||||
|
||||
// CHECK: rocdl.swmmac.f32.16x16x64.bf16 %{{.*}}, %{{.*}}, %{{.*}}, %{{.*}} : (vector<16xbf16>, vector<32xbf16>, vector<8xf32>, i32) -> vector<8xf32>
|
||||
%w32_12 = rocdl.swmmac.f32.16x16x64.bf16 %v16bf16, %v32bf16, %v8f32, %index {signA = false, signB = false, reuseA = false, reuseB = false} : (vector<16xbf16>, vector<32xbf16>, vector<8xf32>, i32) -> vector<8xf32>
|
||||
|
||||
// CHECK: rocdl.swmmac.f16.16x16x64.f16 %{{.*}}, %{{.*}}, %{{.*}}, %{{.*}} : (vector<16xf16>, vector<32xf16>, vector<8xf16>, i32) -> vector<8xf16>
|
||||
%w32_13 = rocdl.swmmac.f16.16x16x64.f16 %v16f16, %v32f16, %v8f16, %index {signA = false, signB = false, reuseA = false, reuseB = false} : (vector<16xf16>, vector<32xf16>, vector<8xf16>, i32) -> vector<8xf16>
|
||||
|
||||
// CHECK: rocdl.swmmac.bf16.16x16x64.bf16 %{{.*}}, %{{.*}}, %{{.*}}, %{{.*}} : (vector<16xbf16>, vector<32xbf16>, vector<8xbf16>, i32) -> vector<8xbf16>
|
||||
%w32_14 = rocdl.swmmac.bf16.16x16x64.bf16 %v16bf16, %v32bf16, %v8bf16, %index {signA = false, signB = false, reuseA = false, reuseB = false} : (vector<16xbf16>, vector<32xbf16>, vector<8xbf16>, i32) -> vector<8xbf16>
|
||||
|
||||
// CHECK: rocdl.swmmac.bf16f32.16x16x64.bf16 %{{.*}}, %{{.*}}, %{{.*}}, %{{.*}} : (vector<16xbf16>, vector<32xbf16>, vector<8xbf16>, i32) -> vector<8xbf16>
|
||||
%w32_15 = rocdl.swmmac.bf16f32.16x16x64.bf16 %v16bf16, %v32bf16, %v8bf16, %index {signA = false, signB = false, reuseA = false, reuseB = false} : (vector<16xbf16>, vector<32xbf16>, vector<8xbf16>, i32) -> vector<8xbf16>
|
||||
|
||||
// CHECK: rocdl.swmmac.f32.16x16x128.fp8.fp8 %{{.*}}, %{{.*}}, %{{.*}}, %{{.*}} : (vector<8xi32>, vector<16xi32>, vector<8xf32>, i32) -> vector<8xf32>
|
||||
%w32_16 = rocdl.swmmac.f32.16x16x128.fp8.fp8 %v8i32, %v16i32, %v8f32, %index {reuseA = false, reuseB = false} : (vector<8xi32>, vector<16xi32>, vector<8xf32>, i32) -> vector<8xf32>
|
||||
|
||||
// CHECK: rocdl.swmmac.f32.16x16x128.fp8.bf8 %{{.*}}, %{{.*}}, %{{.*}}, %{{.*}} : (vector<8xi32>, vector<16xi32>, vector<8xf32>, i32) -> vector<8xf32>
|
||||
%w32_17 = rocdl.swmmac.f32.16x16x128.fp8.bf8 %v8i32, %v16i32, %v8f32, %index {reuseA = false, reuseB = false} : (vector<8xi32>, vector<16xi32>, vector<8xf32>, i32) -> vector<8xf32>
|
||||
|
||||
// CHECK: rocdl.swmmac.f32.16x16x128.bf8.fp8 %{{.*}}, %{{.*}}, %{{.*}}, %{{.*}} : (vector<8xi32>, vector<16xi32>, vector<8xf32>, i32) -> vector<8xf32>
|
||||
%w32_18 = rocdl.swmmac.f32.16x16x128.bf8.fp8 %v8i32, %v16i32, %v8f32, %index {reuseA = false, reuseB = false} : (vector<8xi32>, vector<16xi32>, vector<8xf32>, i32) -> vector<8xf32>
|
||||
|
||||
// CHECK: rocdl.swmmac.f32.16x16x128.bf8.bf8 %{{.*}}, %{{.*}}, %{{.*}}, %{{.*}} : (vector<8xi32>, vector<16xi32>, vector<8xf32>, i32) -> vector<8xf32>
|
||||
%w32_19 = rocdl.swmmac.f32.16x16x128.bf8.bf8 %v8i32, %v16i32, %v8f32, %index {reuseA = false, reuseB = false} : (vector<8xi32>, vector<16xi32>, vector<8xf32>, i32) -> vector<8xf32>
|
||||
|
||||
// CHECK: rocdl.swmmac.f16.16x16x128.fp8.fp8 %{{.*}}, %{{.*}}, %{{.*}}, %{{.*}} : (vector<8xi32>, vector<16xi32>, vector<8xf16>, i32) -> vector<8xf16>
|
||||
%w32_20 = rocdl.swmmac.f16.16x16x128.fp8.fp8 %v8i32, %v16i32, %v8f16, %index {reuseA = false, reuseB = false} : (vector<8xi32>, vector<16xi32>, vector<8xf16>, i32) -> vector<8xf16>
|
||||
|
||||
// CHECK: rocdl.swmmac.f16.16x16x128.fp8.bf8 %{{.*}}, %{{.*}}, %{{.*}}, %{{.*}} : (vector<8xi32>, vector<16xi32>, vector<8xf16>, i32) -> vector<8xf16>
|
||||
%w32_21 = rocdl.swmmac.f16.16x16x128.fp8.bf8 %v8i32, %v16i32, %v8f16, %index {reuseA = false, reuseB = false} : (vector<8xi32>, vector<16xi32>, vector<8xf16>, i32) -> vector<8xf16>
|
||||
|
||||
// CHECK: rocdl.swmmac.f16.16x16x128.bf8.fp8 %{{.*}}, %{{.*}}, %{{.*}}, %{{.*}} : (vector<8xi32>, vector<16xi32>, vector<8xf16>, i32) -> vector<8xf16>
|
||||
%w32_22 = rocdl.swmmac.f16.16x16x128.bf8.fp8 %v8i32, %v16i32, %v8f16, %index {reuseA = false, reuseB = false} : (vector<8xi32>, vector<16xi32>, vector<8xf16>, i32) -> vector<8xf16>
|
||||
|
||||
// CHECK: rocdl.swmmac.f16.16x16x128.bf8.bf8 %{{.*}}, %{{.*}}, %{{.*}}, %{{.*}} : (vector<8xi32>, vector<16xi32>, vector<8xf16>, i32) -> vector<8xf16>
|
||||
%w32_23 = rocdl.swmmac.f16.16x16x128.bf8.bf8 %v8i32, %v16i32, %v8f16, %index {reuseA = false, reuseB = false} : (vector<8xi32>, vector<16xi32>, vector<8xf16>, i32) -> vector<8xf16>
|
||||
|
||||
// CHECK: rocdl.swmmac.i32.16x16x128.iu8 %{{.*}}, %{{.*}}, %{{.*}}, %{{.*}} : (vector<8xi32>, vector<16xi32>, vector<8xi32>, i32) -> vector<8xi32>
|
||||
%w32_24 = rocdl.swmmac.i32.16x16x128.iu8 %v8i32, %v16i32, %v8i32, %index {signA = false, signB = false, reuseA = false, reuseB = false, clamp = false} : (vector<8xi32>, vector<16xi32>, vector<8xi32>, i32) -> vector<8xi32>
|
||||
|
||||
|
||||
// Wave64
|
||||
|
||||
// CHECK: rocdl.swmmac.f32.16x16x32.f16 %{{.*}}, %{{.*}}, %{{.*}}, %{{.*}} : (vector<4xf16>, vector<8xf16>, vector<4xf32>, i32) -> vector<4xf32>
|
||||
%w64_0 = rocdl.swmmac.f32.16x16x32.f16 %v4f16, %v8f16, %v4f32, %index : (vector<4xf16>, vector<8xf16>, vector<4xf32>, i32) -> vector<4xf32>
|
||||
|
||||
// CHECK: rocdl.swmmac.f32.16x16x32.bf16 %{{.*}}, %{{.*}}, %{{.*}}, %{{.*}} : (vector<4xi16>, vector<8xi16>, vector<4xf32>, i32) -> vector<4xf32>
|
||||
%w64_1 = rocdl.swmmac.f32.16x16x32.bf16 %v4i16, %v8i16, %v4f32, %index : (vector<4xi16>, vector<8xi16>, vector<4xf32>, i32) -> vector<4xf32>
|
||||
|
||||
// CHECK: rocdl.swmmac.f16.16x16x32.f16 %{{.*}}, %{{.*}}, %{{.*}}, %{{.*}} : (vector<4xf16>, vector<8xf16>, vector<4xf16>, i32) -> vector<4xf16>
|
||||
%w64_2 = rocdl.swmmac.f16.16x16x32.f16 %v4f16, %v8f16, %v4f16, %index : (vector<4xf16>, vector<8xf16>, vector<4xf16>, i32) -> vector<4xf16>
|
||||
|
||||
// CHECK: rocdl.swmmac.bf16.16x16x32.bf16 %{{.*}}, %{{.*}}, %{{.*}}, %{{.*}} : (vector<4xi16>, vector<8xi16>, vector<4xi16>, i32) -> vector<4xi16>
|
||||
%w64_3 = rocdl.swmmac.bf16.16x16x32.bf16 %v4i16, %v8i16, %v4i16, %index : (vector<4xi16>, vector<8xi16>, vector<4xi16>, i32) -> vector<4xi16>
|
||||
|
||||
// CHECK: rocdl.swmmac.i32.16x16x32.iu8 %{{.*}}, %{{.*}}, %{{.*}}, %{{.*}} : (i32, vector<2xi32>, vector<4xi32>, i32) -> vector<4xi32>
|
||||
%w64_4 = rocdl.swmmac.i32.16x16x32.iu8 %v1i32, %v2i32, %v4i32, %index {signA = false, signB = false, clamp = false} : (i32, vector<2xi32>, vector<4xi32>, i32) -> vector<4xi32>
|
||||
|
||||
// CHECK: rocdl.swmmac.i32.16x16x32.iu4 %{{.*}}, %{{.*}}, %{{.*}}, %{{.*}} : (i32, i32, vector<4xi32>, i32) -> vector<4xi32>
|
||||
%w64_5 = rocdl.swmmac.i32.16x16x32.iu4 %v1i32, %v1i32, %v4i32, %index {signA = false, signB = false, clamp = false} : (i32, i32, vector<4xi32>, i32) -> vector<4xi32>
|
||||
|
||||
// CHECK: rocdl.swmmac.i32.16x16x64.iu4 %{{.*}}, %{{.*}}, %{{.*}}, %{{.*}} : (i32, vector<2xi32>, vector<4xi32>, i32) -> vector<4xi32>
|
||||
%w64_6 = rocdl.swmmac.i32.16x16x64.iu4 %v1i32, %v2i32, %v4i32, %index {signA = false, signB = false, clamp = false} : (i32, vector<2xi32>, vector<4xi32>, i32) -> vector<4xi32>
|
||||
|
||||
// CHECK: rocdl.swmmac.f32.16x16x32.fp8.fp8 %{{.*}}, %{{.*}}, %{{.*}}, %{{.*}} : (i32, vector<2xi32>, vector<4xf32>, i32) -> vector<4xf32>
|
||||
%w64_7 = rocdl.swmmac.f32.16x16x32.fp8.fp8 %v1i32, %v2i32, %v4f32, %index : (i32, vector<2xi32>, vector<4xf32>, i32) -> vector<4xf32>
|
||||
|
||||
// CHECK: rocdl.swmmac.f32.16x16x32.fp8.bf8 %{{.*}}, %{{.*}}, %{{.*}}, %{{.*}} : (i32, vector<2xi32>, vector<4xf32>, i32) -> vector<4xf32>
|
||||
%w64_8 = rocdl.swmmac.f32.16x16x32.fp8.bf8 %v1i32, %v2i32, %v4f32, %index : (i32, vector<2xi32>, vector<4xf32>, i32) -> vector<4xf32>
|
||||
|
||||
// CHECK: rocdl.swmmac.f32.16x16x32.bf8.fp8 %{{.*}}, %{{.*}}, %{{.*}}, %{{.*}} : (i32, vector<2xi32>, vector<4xf32>, i32) -> vector<4xf32>
|
||||
%w64_9 = rocdl.swmmac.f32.16x16x32.bf8.fp8 %v1i32, %v2i32, %v4f32, %index : (i32, vector<2xi32>, vector<4xf32>, i32) -> vector<4xf32>
|
||||
|
||||
// CHECK: rocdl.swmmac.f32.16x16x32.bf8.bf8 %{{.*}}, %{{.*}}, %{{.*}}, %{{.*}} : (i32, vector<2xi32>, vector<4xf32>, i32) -> vector<4xf32>
|
||||
%w64_10 = rocdl.swmmac.f32.16x16x32.bf8.bf8 %v1i32, %v2i32, %v4f32, %index : (i32, vector<2xi32>, vector<4xf32>, i32) -> vector<4xf32>
|
||||
|
||||
llvm.return %w32_0 : vector<8xf32>
|
||||
}
|
||||
|
||||
|
||||
// -----
|
||||
|
||||
// expected-error@below {{attribute attached to unexpected op}}
|
||||
|
||||
@ -1170,6 +1170,131 @@ llvm.func @rocdl.wmma(%arg0 : vector<8xf32>, %arg1 : vector<16 x f16>, %arg2 : v
|
||||
llvm.return %r0 : vector<8xf32>
|
||||
}
|
||||
|
||||
|
||||
llvm.func @rocdl.swmmac(%v32f16 : vector<32xf16>, %v32bf16 : vector<32xbf16>,
|
||||
%v16f16 : vector<16xf16>, %v16bf16 : vector<16xbf16>, %v16i32 : vector<16xi32>, %v16i16 : vector<16xi16>,
|
||||
%v8f32 : vector<8xf32>, %v8i32 : vector<8xi32>, %v8f16 : vector<8xf16>, %v8bf16 : vector<8xbf16>, %v8i16 : vector<8xi16>,
|
||||
%v4f32 : vector<4xf32>, %v4f16 : vector<4xf16>, %v4i32 : vector<4xi32>, %v4i16 : vector<4xi16>,
|
||||
%v2i32 : vector<2xi32>, %v1i32 : i32, %index : i32) -> vector<8xf32> {
|
||||
|
||||
// CHECK-LABEL: @rocdl.swmmac
|
||||
|
||||
// ---- Wave32 -----
|
||||
|
||||
// CHECK: call <8 x float> @llvm.amdgcn.swmmac.f32.16x16x32.f16.v8f32.v8f16.v16f16.i32(<8 x half> %{{.*}}, <16 x half> %{{.*}}, <8 x float> %{{.*}}, i32 %{{.*}})
|
||||
%w32_0 = rocdl.swmmac.f32.16x16x32.f16 %v8f16, %v16f16, %v8f32, %index : (vector<8xf16>, vector<16xf16>, vector<8xf32>, i32) -> vector<8xf32>
|
||||
|
||||
// CHECK: call <8 x float> @llvm.amdgcn.swmmac.f32.16x16x32.bf16.v8f32.v8i16.v16i16.i32(<8 x i16> %{{.*}}, <16 x i16> %{{.*}}, <8 x float> %{{.*}}, i32 %{{.*}})
|
||||
%w32_1 = rocdl.swmmac.f32.16x16x32.bf16 %v8i16, %v16i16, %v8f32, %index : (vector<8xi16>, vector<16xi16>, vector<8xf32>, i32) -> vector<8xf32>
|
||||
|
||||
// CHECK: call <8 x half> @llvm.amdgcn.swmmac.f16.16x16x32.f16.v8f16.v8f16.v16f16.i32(<8 x half> %{{.*}}, <16 x half> %{{.*}}, <8 x half> %{{.*}}, i32 %{{.*}})
|
||||
%w32_2 = rocdl.swmmac.f16.16x16x32.f16 %v8f16, %v16f16, %v8f16, %index : (vector<8xf16>, vector<16xf16>, vector<8xf16>, i32) -> vector<8xf16>
|
||||
|
||||
// CHECK: call <8 x i16> @llvm.amdgcn.swmmac.bf16.16x16x32.bf16.v8i16.v8i16.v16i16.i32(<8 x i16> %{{.*}}, <16 x i16> %{{.*}}, <8 x i16> %{{.*}}, i32 %{{.*}})
|
||||
%w32_3 = rocdl.swmmac.bf16.16x16x32.bf16 %v8i16, %v16i16, %v8i16, %index : (vector<8xi16>, vector<16xi16>, vector<8xi16>, i32) -> vector<8xi16>
|
||||
|
||||
// CHECK: call <8 x i32> @llvm.amdgcn.swmmac.i32.16x16x32.iu8.v8i32.v2i32.v4i32.i32(i1 false, <2 x i32> %{{.*}}, i1 false, <4 x i32> %{{.*}}, <8 x i32> %{{.*}}, i32 %{{.*}}, i1 false)
|
||||
%w32_4 = rocdl.swmmac.i32.16x16x32.iu8 %v2i32, %v4i32, %v8i32, %index {signA = false, signB = false, clamp = false} : (vector<2xi32>, vector<4xi32>, vector<8xi32>, i32) -> vector<8xi32>
|
||||
|
||||
// CHECK: call <8 x i32> @llvm.amdgcn.swmmac.i32.16x16x32.iu4.v8i32.i32.v2i32.i32(i1 false, i32 %{{.*}}, i1 false, <2 x i32> %{{.*}}, <8 x i32> %{{.*}}, i32 %{{.*}}, i1 false)
|
||||
%w32_5 = rocdl.swmmac.i32.16x16x32.iu4 %v1i32, %v2i32, %v8i32, %index {signA = false, signB = false, clamp = false} : (i32, vector<2xi32>, vector<8xi32>, i32) -> vector<8xi32>
|
||||
|
||||
// CHECK: call <8 x i32> @llvm.amdgcn.swmmac.i32.16x16x64.iu4.v8i32.v2i32.v4i32.i32(i1 false, <2 x i32> %{{.*}}, i1 false, <4 x i32> %{{.*}}, <8 x i32> %{{.*}}, i32 %{{.*}}, i1 false)
|
||||
%w32_6 = rocdl.swmmac.i32.16x16x64.iu4 %v2i32, %v4i32, %v8i32, %index {signA = false, signB = false, clamp = false} : (vector<2xi32>, vector<4xi32>, vector<8xi32>, i32) -> vector<8xi32>
|
||||
|
||||
// CHECK: call <8 x float> @llvm.amdgcn.swmmac.f32.16x16x32.fp8.fp8.v8f32.v2i32.v4i32.i32(<2 x i32> %{{.*}}, <4 x i32> %{{.*}}, <8 x float> %{{.*}}, i32 %{{.*}})
|
||||
%w32_7 = rocdl.swmmac.f32.16x16x32.fp8.fp8 %v2i32, %v4i32, %v8f32, %index : (vector<2xi32>, vector<4xi32>, vector<8xf32>, i32) -> vector<8xf32>
|
||||
|
||||
// CHECK: call <8 x float> @llvm.amdgcn.swmmac.f32.16x16x32.fp8.bf8.v8f32.v2i32.v4i32.i32(<2 x i32> %{{.*}}, <4 x i32> %{{.*}}, <8 x float> %{{.*}}, i32 %{{.*}})
|
||||
%w32_8 = rocdl.swmmac.f32.16x16x32.fp8.bf8 %v2i32, %v4i32, %v8f32, %index : (vector<2xi32>, vector<4xi32>, vector<8xf32>, i32) -> vector<8xf32>
|
||||
|
||||
// CHECK: call <8 x float> @llvm.amdgcn.swmmac.f32.16x16x32.bf8.fp8.v8f32.v2i32.v4i32.i32(<2 x i32> %{{.*}}, <4 x i32> %{{.*}}, <8 x float> %{{.*}}, i32 %{{.*}})
|
||||
%w32_9 = rocdl.swmmac.f32.16x16x32.bf8.fp8 %v2i32, %v4i32, %v8f32, %index : (vector<2xi32>, vector<4xi32>, vector<8xf32>, i32) -> vector<8xf32>
|
||||
|
||||
// CHECK: call <8 x float> @llvm.amdgcn.swmmac.f32.16x16x32.bf8.bf8.v8f32.v2i32.v4i32.i32(<2 x i32> %{{.*}}, <4 x i32> %{{.*}}, <8 x float> %{{.*}}, i32 %{{.*}})
|
||||
%w32_10 = rocdl.swmmac.f32.16x16x32.bf8.bf8 %v2i32, %v4i32, %v8f32, %index : (vector<2xi32>, vector<4xi32>, vector<8xf32>, i32) -> vector<8xf32>
|
||||
|
||||
// CHECK: call <8 x float> @llvm.amdgcn.swmmac.f32.16x16x64.f16.v8f32.v16f16.v32f16.i32(i1 false, <16 x half> %{{.*}}, i1 false, <32 x half> %{{.*}}, <8 x float> %{{.*}}, i32 %{{.*}}, i1 false, i1 false)
|
||||
%w32_11 = rocdl.swmmac.f32.16x16x64.f16 %v16f16, %v32f16, %v8f32, %index {signA = false, signB = false, reuseA = false, reuseB = false} : (vector<16xf16>, vector<32xf16>, vector<8xf32>, i32) -> vector<8xf32>
|
||||
|
||||
// CHECK: call <8 x float> @llvm.amdgcn.swmmac.f32.16x16x64.bf16.v8f32.v16bf16.v32bf16.i32(i1 false, <16 x bfloat> %{{.*}}, i1 false, <32 x bfloat> %{{.*}}, <8 x float> %{{.*}}, i32 %{{.*}}, i1 false, i1 false)
|
||||
%w32_12 = rocdl.swmmac.f32.16x16x64.bf16 %v16bf16, %v32bf16, %v8f32, %index {signA = false, signB = false, reuseA = false, reuseB = false} : (vector<16xbf16>, vector<32xbf16>, vector<8xf32>, i32) -> vector<8xf32>
|
||||
|
||||
// CHECK: call <8 x half> @llvm.amdgcn.swmmac.f16.16x16x64.f16.v8f16.v16f16.v32f16.i32(i1 false, <16 x half> %{{.*}}, i1 false, <32 x half> %{{.*}}, <8 x half> %{{.*}}, i32 %{{.*}}, i1 false, i1 false)
|
||||
%w32_13 = rocdl.swmmac.f16.16x16x64.f16 %v16f16, %v32f16, %v8f16, %index {signA = false, signB = false, reuseA = false, reuseB = false} : (vector<16xf16>, vector<32xf16>, vector<8xf16>, i32) -> vector<8xf16>
|
||||
|
||||
// CHECK: call <8 x bfloat> @llvm.amdgcn.swmmac.bf16.16x16x64.bf16.v8bf16.v16bf16.v32bf16.i32(i1 false, <16 x bfloat> %{{.*}}, i1 false, <32 x bfloat> %{{.*}}, <8 x bfloat> %{{.*}}, i32 %{{.*}}, i1 false, i1 false)
|
||||
%w32_14 = rocdl.swmmac.bf16.16x16x64.bf16 %v16bf16, %v32bf16, %v8bf16, %index {signA = false, signB = false, reuseA = false, reuseB = false} : (vector<16xbf16>, vector<32xbf16>, vector<8xbf16>, i32) -> vector<8xbf16>
|
||||
|
||||
// CHECK: call <8 x bfloat> @llvm.amdgcn.swmmac.bf16f32.16x16x64.bf16.v8bf16.v16bf16.v32bf16.i32(i1 false, <16 x bfloat> %{{.*}}, i1 false, <32 x bfloat> %{{.*}}, <8 x bfloat> %{{.*}}, i32 %{{.*}}, i1 false, i1 false)
|
||||
%w32_15 = rocdl.swmmac.bf16f32.16x16x64.bf16 %v16bf16, %v32bf16, %v8bf16, %index {signA = false, signB = false, reuseA = false, reuseB = false} : (vector<16xbf16>, vector<32xbf16>, vector<8xbf16>, i32) -> vector<8xbf16>
|
||||
|
||||
// CHECK: call <8 x float> @llvm.amdgcn.swmmac.f32.16x16x128.fp8.fp8.v8f32.v8i32.v16i32.i32(<8 x i32> %{{.*}}, <16 x i32> %{{.*}}, <8 x float> %{{.*}}, i32 %{{.*}}, i1 false, i1 false)
|
||||
%w32_16 = rocdl.swmmac.f32.16x16x128.fp8.fp8 %v8i32, %v16i32, %v8f32, %index {reuseA = false, reuseB = false} : (vector<8xi32>, vector<16xi32>, vector<8xf32>, i32) -> vector<8xf32>
|
||||
|
||||
// CHECK: call <8 x float> @llvm.amdgcn.swmmac.f32.16x16x128.fp8.bf8.v8f32.v8i32.v16i32.i32(<8 x i32> %{{.*}}, <16 x i32> %{{.*}}, <8 x float> %{{.*}}, i32 %{{.*}}, i1 false, i1 false)
|
||||
%w32_17 = rocdl.swmmac.f32.16x16x128.fp8.bf8 %v8i32, %v16i32, %v8f32, %index {reuseA = false, reuseB = false} : (vector<8xi32>, vector<16xi32>, vector<8xf32>, i32) -> vector<8xf32>
|
||||
|
||||
// CHECK: call <8 x float> @llvm.amdgcn.swmmac.f32.16x16x128.bf8.fp8.v8f32.v8i32.v16i32.i32(<8 x i32> %{{.*}}, <16 x i32> %{{.*}}, <8 x float> %{{.*}}, i32 %{{.*}}, i1 false, i1 false)
|
||||
%w32_18 = rocdl.swmmac.f32.16x16x128.bf8.fp8 %v8i32, %v16i32, %v8f32, %index {reuseA = false, reuseB = false} : (vector<8xi32>, vector<16xi32>, vector<8xf32>, i32) -> vector<8xf32>
|
||||
|
||||
// CHECK: call <8 x float> @llvm.amdgcn.swmmac.f32.16x16x128.bf8.bf8.v8f32.v8i32.v16i32.i32(<8 x i32> %{{.*}}, <16 x i32> %{{.*}}, <8 x float> %{{.*}}, i32 %{{.*}}, i1 false, i1 false)
|
||||
%w32_19 = rocdl.swmmac.f32.16x16x128.bf8.bf8 %v8i32, %v16i32, %v8f32, %index {reuseA = false, reuseB = false} : (vector<8xi32>, vector<16xi32>, vector<8xf32>, i32) -> vector<8xf32>
|
||||
|
||||
// CHECK: call <8 x half> @llvm.amdgcn.swmmac.f16.16x16x128.fp8.fp8.v8f16.v8i32.v16i32.i32(<8 x i32> %{{.*}}, <16 x i32> %{{.*}}, <8 x half> %{{.*}}, i32 %{{.*}}, i1 false, i1 false)
|
||||
%w32_20 = rocdl.swmmac.f16.16x16x128.fp8.fp8 %v8i32, %v16i32, %v8f16, %index {reuseA = false, reuseB = false} : (vector<8xi32>, vector<16xi32>, vector<8xf16>, i32) -> vector<8xf16>
|
||||
|
||||
// CHECK: call <8 x half> @llvm.amdgcn.swmmac.f16.16x16x128.fp8.bf8.v8f16.v8i32.v16i32.i32(<8 x i32> %{{.*}}, <16 x i32> %{{.*}}, <8 x half> %{{.*}}, i32 %{{.*}}, i1 false, i1 false)
|
||||
%w32_21 = rocdl.swmmac.f16.16x16x128.fp8.bf8 %v8i32, %v16i32, %v8f16, %index {reuseA = false, reuseB = false} : (vector<8xi32>, vector<16xi32>, vector<8xf16>, i32) -> vector<8xf16>
|
||||
|
||||
// CHECK: call <8 x half> @llvm.amdgcn.swmmac.f16.16x16x128.bf8.fp8.v8f16.v8i32.v16i32.i32(<8 x i32> %{{.*}}, <16 x i32> %{{.*}}, <8 x half> %{{.*}}, i32 %{{.*}}, i1 false, i1 false)
|
||||
%w32_22 = rocdl.swmmac.f16.16x16x128.bf8.fp8 %v8i32, %v16i32, %v8f16, %index {reuseA = false, reuseB = false} : (vector<8xi32>, vector<16xi32>, vector<8xf16>, i32) -> vector<8xf16>
|
||||
|
||||
// CHECK: call <8 x half> @llvm.amdgcn.swmmac.f16.16x16x128.bf8.bf8.v8f16.v8i32.v16i32.i32(<8 x i32> %{{.*}}, <16 x i32> %{{.*}}, <8 x half> %{{.*}}, i32 %{{.*}}, i1 false, i1 false)
|
||||
%w32_23 = rocdl.swmmac.f16.16x16x128.bf8.bf8 %v8i32, %v16i32, %v8f16, %index {reuseA = false, reuseB = false} : (vector<8xi32>, vector<16xi32>, vector<8xf16>, i32) -> vector<8xf16>
|
||||
|
||||
// CHECK: call <8 x i32> @llvm.amdgcn.swmmac.i32.16x16x128.iu8.v8i32.v8i32.v16i32.i32(i1 false, <8 x i32> %{{.*}}, i1 false, <16 x i32> %{{.*}}, <8 x i32> %{{.*}}, i32 %{{.*}}, i1 false, i1 false, i1 false)
|
||||
%w32_24 = rocdl.swmmac.i32.16x16x128.iu8 %v8i32, %v16i32, %v8i32, %index {signA = false, signB = false, reuseA = false, reuseB = false, clamp = false} : (vector<8xi32>, vector<16xi32>, vector<8xi32>, i32) -> vector<8xi32>
|
||||
|
||||
|
||||
// ---- Wave64 -----
|
||||
|
||||
// CHECK: call <4 x float> @llvm.amdgcn.swmmac.f32.16x16x32.f16.v4f32.v4f16.v8f16.i32(<4 x half> %{{.*}}, <8 x half> %{{.*}}, <4 x float> %{{.*}}, i32 %{{.*}})
|
||||
%w64_0 = rocdl.swmmac.f32.16x16x32.f16 %v4f16, %v8f16, %v4f32, %index : (vector<4xf16>, vector<8xf16>, vector<4xf32>, i32) -> vector<4xf32>
|
||||
|
||||
// CHECK: call <4 x float> @llvm.amdgcn.swmmac.f32.16x16x32.bf16.v4f32.v4i16.v8i16.i32(<4 x i16> %{{.*}}, <8 x i16> %{{.*}}, <4 x float> %{{.*}}, i32 %{{.*}})
|
||||
%w64_1 = rocdl.swmmac.f32.16x16x32.bf16 %v4i16, %v8i16, %v4f32, %index : (vector<4xi16>, vector<8xi16>, vector<4xf32>, i32) -> vector<4xf32>
|
||||
|
||||
// CHECK: call <4 x half> @llvm.amdgcn.swmmac.f16.16x16x32.f16.v4f16.v4f16.v8f16.i32(<4 x half> %{{.*}}, <8 x half> %{{.*}}, <4 x half> %{{.*}}, i32 %{{.*}})
|
||||
%w64_2 = rocdl.swmmac.f16.16x16x32.f16 %v4f16, %v8f16, %v4f16, %index : (vector<4xf16>, vector<8xf16>, vector<4xf16>, i32) -> vector<4xf16>
|
||||
|
||||
// CHECK: call <4 x i16> @llvm.amdgcn.swmmac.bf16.16x16x32.bf16.v4i16.v4i16.v8i16.i32(<4 x i16> %{{.*}}, <8 x i16> %{{.*}}, <4 x i16> %{{.*}}, i32 %{{.*}})
|
||||
%w64_3 = rocdl.swmmac.bf16.16x16x32.bf16 %v4i16, %v8i16, %v4i16, %index : (vector<4xi16>, vector<8xi16>, vector<4xi16>, i32) -> vector<4xi16>
|
||||
|
||||
// CHECK: call <4 x i32> @llvm.amdgcn.swmmac.i32.16x16x32.iu8.v4i32.i32.v2i32.i32(i1 false, i32 %{{.*}}, i1 false, <2 x i32> %{{.*}}, <4 x i32> %{{.*}}, i32 %{{.*}}, i1 false)
|
||||
%w64_4 = rocdl.swmmac.i32.16x16x32.iu8 %v1i32, %v2i32, %v4i32, %index {signA = false, signB = false, clamp = false} : (i32, vector<2xi32>, vector<4xi32>, i32) -> vector<4xi32>
|
||||
|
||||
// CHECK: call <4 x i32> @llvm.amdgcn.swmmac.i32.16x16x32.iu4.v4i32.i32.i32.i32(i1 false, i32 %{{.*}}, i1 false, i32 %{{.*}}, <4 x i32> %{{.*}}, i32 %{{.*}}, i1 false)
|
||||
%w64_5 = rocdl.swmmac.i32.16x16x32.iu4 %v1i32, %v1i32, %v4i32, %index {signA = false, signB = false, clamp = false} : (i32, i32, vector<4xi32>, i32) -> vector<4xi32>
|
||||
|
||||
// CHECK: call <4 x i32> @llvm.amdgcn.swmmac.i32.16x16x64.iu4.v4i32.i32.v2i32.i32(i1 false, i32 %{{.*}}, i1 false, <2 x i32> %{{.*}}, <4 x i32> %{{.*}}, i32 %{{.*}}, i1 false)
|
||||
%w64_6 = rocdl.swmmac.i32.16x16x64.iu4 %v1i32, %v2i32, %v4i32, %index {signA = false, signB = false, clamp = false} : (i32, vector<2xi32>, vector<4xi32>, i32) -> vector<4xi32>
|
||||
|
||||
// CHECK: call <4 x float> @llvm.amdgcn.swmmac.f32.16x16x32.fp8.fp8.v4f32.i32.v2i32.i32(i32 %{{.*}}, <2 x i32> %{{.*}}, <4 x float> %{{.*}}, i32 %{{.*}})
|
||||
%w64_7 = rocdl.swmmac.f32.16x16x32.fp8.fp8 %v1i32, %v2i32, %v4f32, %index : (i32, vector<2xi32>, vector<4xf32>, i32) -> vector<4xf32>
|
||||
|
||||
// CHECK: call <4 x float> @llvm.amdgcn.swmmac.f32.16x16x32.fp8.bf8.v4f32.i32.v2i32.i32(i32 %{{.*}}, <2 x i32> %{{.*}}, <4 x float> %{{.*}}, i32 %{{.*}})
|
||||
%w64_8 = rocdl.swmmac.f32.16x16x32.fp8.bf8 %v1i32, %v2i32, %v4f32, %index : (i32, vector<2xi32>, vector<4xf32>, i32) -> vector<4xf32>
|
||||
|
||||
// CHECK: call <4 x float> @llvm.amdgcn.swmmac.f32.16x16x32.bf8.fp8.v4f32.i32.v2i32.i32(i32 %{{.*}}, <2 x i32> %{{.*}}, <4 x float> %{{.*}}, i32 %{{.*}})
|
||||
%w64_9 = rocdl.swmmac.f32.16x16x32.bf8.fp8 %v1i32, %v2i32, %v4f32, %index : (i32, vector<2xi32>, vector<4xf32>, i32) -> vector<4xf32>
|
||||
|
||||
// CHECK: call <4 x float> @llvm.amdgcn.swmmac.f32.16x16x32.bf8.bf8.v4f32.i32.v2i32.i32(i32 %{{.*}}, <2 x i32> %{{.*}}, <4 x float> %{{.*}}, i32 %{{.*}})
|
||||
%w64_10 = rocdl.swmmac.f32.16x16x32.bf8.bf8 %v1i32, %v2i32, %v4f32, %index : (i32, vector<2xi32>, vector<4xf32>, i32) -> vector<4xf32>
|
||||
|
||||
llvm.return %w32_0 : vector<8xf32>
|
||||
}
|
||||
|
||||
llvm.func @rocdl.ds.read.tr(%ptr : !llvm.ptr<3>) -> vector<4xf16> {
|
||||
// CHECK-LABEL: rocdl.ds.read.tr
|
||||
// CHECK: call <2 x i32> @llvm.amdgcn.ds.read.tr4.b64.v2i32(ptr addrspace(3) %0)
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user