From bfb6f47e9ea463555833934ef714b03ee78eb01e Mon Sep 17 00:00:00 2001 From: David Sherwood Date: Mon, 28 Nov 2022 11:43:51 +0000 Subject: [PATCH] [SVE] Change some bfloat lane intrinsics to use i32 immediates Almost all of the other SVE LLVM IR intrinsics take i32 values for lane indices or other immediates. We should bring the bfloat intrinsics in line with that. It will also make it easier to add support for the SVE2.1 float intrinsics in future, since they reuse the same underlying instruction classes. I've maintained backwards compatibility with the old i64 variants and used the autoupgrade mechanism. Differential Revision: https://reviews.llvm.org/D138788 --- clang/include/clang/Basic/arm_sve.td | 6 +-- .../aarch64-sve-intrinsics/acle_sve_bfdot.c | 8 ++-- .../aarch64-sve-intrinsics/acle_sve_bfmlalb.c | 8 ++-- .../aarch64-sve-intrinsics/acle_sve_bfmlalt.c | 8 ++-- llvm/include/llvm/IR/IntrinsicsAArch64.td | 8 ++-- llvm/lib/IR/AutoUpgrade.cpp | 25 ++++++++++ .../lib/Target/AArch64/AArch64SVEInstrInfo.td | 8 ++-- llvm/lib/Target/AArch64/SVEInstrFormats.td | 20 ++++---- .../Bitcode/upgrade-aarch64-sve-intrinsics.ll | 30 ++++++++++++ .../CodeGen/AArch64/sve-intrinsics-bfloat.ll | 46 +++++++++---------- 10 files changed, 111 insertions(+), 56 deletions(-) diff --git a/clang/include/clang/Basic/arm_sve.td b/clang/include/clang/Basic/arm_sve.td index 175b572ffdab..6c24f0423238 100644 --- a/clang/include/clang/Basic/arm_sve.td +++ b/clang/include/clang/Basic/arm_sve.td @@ -537,9 +537,9 @@ let ArchGuard = "defined(__ARM_FEATURE_SVE_BF16)" in { def SVBFDOT_N : SInst<"svbfdot[_n_{0}]", "MMda", "b", MergeNone, "aarch64_sve_bfdot", [IsOverloadNone]>; def SVBFMLAL_N : SInst<"svbfmlalb[_n_{0}]", "MMda", "b", MergeNone, "aarch64_sve_bfmlalb", [IsOverloadNone]>; def SVBFMLALT_N : SInst<"svbfmlalt[_n_{0}]", "MMda", "b", MergeNone, "aarch64_sve_bfmlalt", [IsOverloadNone]>; - def SVBFDOT_LANE : SInst<"svbfdot_lane[_{0}]", "MMddn", "b", MergeNone, "aarch64_sve_bfdot_lane", [IsOverloadNone], [ImmCheck<3, ImmCheck0_3>]>; - def SVBFMLALB_LANE : SInst<"svbfmlalb_lane[_{0}]", "MMddn", "b", MergeNone, "aarch64_sve_bfmlalb_lane", [IsOverloadNone], [ImmCheck<3, ImmCheck0_7>]>; - def SVBFMLALT_LANE : SInst<"svbfmlalt_lane[_{0}]", "MMddn", "b", MergeNone, "aarch64_sve_bfmlalt_lane", [IsOverloadNone], [ImmCheck<3, ImmCheck0_7>]>; + def SVBFDOT_LANE : SInst<"svbfdot_lane[_{0}]", "MMddi", "b", MergeNone, "aarch64_sve_bfdot_lane_v2", [IsOverloadNone], [ImmCheck<3, ImmCheck0_3>]>; + def SVBFMLALB_LANE : SInst<"svbfmlalb_lane[_{0}]", "MMddi", "b", MergeNone, "aarch64_sve_bfmlalb_lane_v2", [IsOverloadNone], [ImmCheck<3, ImmCheck0_7>]>; + def SVBFMLALT_LANE : SInst<"svbfmlalt_lane[_{0}]", "MMddi", "b", MergeNone, "aarch64_sve_bfmlalt_lane_v2", [IsOverloadNone], [ImmCheck<3, ImmCheck0_7>]>; } //////////////////////////////////////////////////////////////////////////////// diff --git a/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_bfdot.c b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_bfdot.c index 7735a3173d38..454b4b546a9d 100644 --- a/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_bfdot.c +++ b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_bfdot.c @@ -31,12 +31,12 @@ svfloat32_t test_bfdot_f32(svfloat32_t x, svbfloat16_t y, svbfloat16_t z) { // CHECK-LABEL: @test_bfdot_lane_0_f32( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.bfdot.lane( [[X:%.*]], [[Y:%.*]], [[Z:%.*]], i64 0) +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.bfdot.lane.v2( [[X:%.*]], [[Y:%.*]], [[Z:%.*]], i32 0) // CHECK-NEXT: ret [[TMP0]] // // CPP-CHECK-LABEL: @_Z21test_bfdot_lane_0_f32u13__SVFloat32_tu14__SVBFloat16_tu14__SVBFloat16_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.bfdot.lane( [[X:%.*]], [[Y:%.*]], [[Z:%.*]], i64 0) +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.bfdot.lane.v2( [[X:%.*]], [[Y:%.*]], [[Z:%.*]], i32 0) // CPP-CHECK-NEXT: ret [[TMP0]] // svfloat32_t test_bfdot_lane_0_f32(svfloat32_t x, svbfloat16_t y, svbfloat16_t z) { @@ -45,12 +45,12 @@ svfloat32_t test_bfdot_lane_0_f32(svfloat32_t x, svbfloat16_t y, svbfloat16_t z) // CHECK-LABEL: @test_bfdot_lane_3_f32( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.bfdot.lane( [[X:%.*]], [[Y:%.*]], [[Z:%.*]], i64 3) +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.bfdot.lane.v2( [[X:%.*]], [[Y:%.*]], [[Z:%.*]], i32 3) // CHECK-NEXT: ret [[TMP0]] // // CPP-CHECK-LABEL: @_Z21test_bfdot_lane_3_f32u13__SVFloat32_tu14__SVBFloat16_tu14__SVBFloat16_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.bfdot.lane( [[X:%.*]], [[Y:%.*]], [[Z:%.*]], i64 3) +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.bfdot.lane.v2( [[X:%.*]], [[Y:%.*]], [[Z:%.*]], i32 3) // CPP-CHECK-NEXT: ret [[TMP0]] // svfloat32_t test_bfdot_lane_3_f32(svfloat32_t x, svbfloat16_t y, svbfloat16_t z) { diff --git a/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_bfmlalb.c b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_bfmlalb.c index 0727ffbb93cf..c736cf8104c9 100644 --- a/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_bfmlalb.c +++ b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_bfmlalb.c @@ -31,12 +31,12 @@ svfloat32_t test_svbfmlalb_f32(svfloat32_t x, svbfloat16_t y, svbfloat16_t z) { // CHECK-LABEL: @test_bfmlalb_lane_0_f32( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.bfmlalb.lane( [[X:%.*]], [[Y:%.*]], [[Z:%.*]], i64 0) +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.bfmlalb.lane.v2( [[X:%.*]], [[Y:%.*]], [[Z:%.*]], i32 0) // CHECK-NEXT: ret [[TMP0]] // // CPP-CHECK-LABEL: @_Z23test_bfmlalb_lane_0_f32u13__SVFloat32_tu14__SVBFloat16_tu14__SVBFloat16_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.bfmlalb.lane( [[X:%.*]], [[Y:%.*]], [[Z:%.*]], i64 0) +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.bfmlalb.lane.v2( [[X:%.*]], [[Y:%.*]], [[Z:%.*]], i32 0) // CPP-CHECK-NEXT: ret [[TMP0]] // svfloat32_t test_bfmlalb_lane_0_f32(svfloat32_t x, svbfloat16_t y, svbfloat16_t z) { @@ -45,12 +45,12 @@ svfloat32_t test_bfmlalb_lane_0_f32(svfloat32_t x, svbfloat16_t y, svbfloat16_t // CHECK-LABEL: @test_bfmlalb_lane_7_f32( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.bfmlalb.lane( [[X:%.*]], [[Y:%.*]], [[Z:%.*]], i64 7) +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.bfmlalb.lane.v2( [[X:%.*]], [[Y:%.*]], [[Z:%.*]], i32 7) // CHECK-NEXT: ret [[TMP0]] // // CPP-CHECK-LABEL: @_Z23test_bfmlalb_lane_7_f32u13__SVFloat32_tu14__SVBFloat16_tu14__SVBFloat16_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.bfmlalb.lane( [[X:%.*]], [[Y:%.*]], [[Z:%.*]], i64 7) +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.bfmlalb.lane.v2( [[X:%.*]], [[Y:%.*]], [[Z:%.*]], i32 7) // CPP-CHECK-NEXT: ret [[TMP0]] // svfloat32_t test_bfmlalb_lane_7_f32(svfloat32_t x, svbfloat16_t y, svbfloat16_t z) { diff --git a/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_bfmlalt.c b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_bfmlalt.c index 24e2da9aa470..7888043e820f 100644 --- a/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_bfmlalt.c +++ b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_bfmlalt.c @@ -31,12 +31,12 @@ svfloat32_t test_svbfmlalt_f32(svfloat32_t x, svbfloat16_t y, svbfloat16_t z) { // CHECK-LABEL: @test_bfmlalt_lane_0_f32( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.bfmlalt.lane( [[X:%.*]], [[Y:%.*]], [[Z:%.*]], i64 0) +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.bfmlalt.lane.v2( [[X:%.*]], [[Y:%.*]], [[Z:%.*]], i32 0) // CHECK-NEXT: ret [[TMP0]] // // CPP-CHECK-LABEL: @_Z23test_bfmlalt_lane_0_f32u13__SVFloat32_tu14__SVBFloat16_tu14__SVBFloat16_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.bfmlalt.lane( [[X:%.*]], [[Y:%.*]], [[Z:%.*]], i64 0) +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.bfmlalt.lane.v2( [[X:%.*]], [[Y:%.*]], [[Z:%.*]], i32 0) // CPP-CHECK-NEXT: ret [[TMP0]] // svfloat32_t test_bfmlalt_lane_0_f32(svfloat32_t x, svbfloat16_t y, svbfloat16_t z) { @@ -45,12 +45,12 @@ svfloat32_t test_bfmlalt_lane_0_f32(svfloat32_t x, svbfloat16_t y, svbfloat16_t // CHECK-LABEL: @test_bfmlalt_lane_7_f32( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.bfmlalt.lane( [[X:%.*]], [[Y:%.*]], [[Z:%.*]], i64 7) +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.bfmlalt.lane.v2( [[X:%.*]], [[Y:%.*]], [[Z:%.*]], i32 7) // CHECK-NEXT: ret [[TMP0]] // // CPP-CHECK-LABEL: @_Z23test_bfmlalt_lane_7_f32u13__SVFloat32_tu14__SVBFloat16_tu14__SVBFloat16_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.bfmlalt.lane( [[X:%.*]], [[Y:%.*]], [[Z:%.*]], i64 7) +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.bfmlalt.lane.v2( [[X:%.*]], [[Y:%.*]], [[Z:%.*]], i32 7) // CPP-CHECK-NEXT: ret [[TMP0]] // svfloat32_t test_bfmlalt_lane_7_f32(svfloat32_t x, svbfloat16_t y, svbfloat16_t z) { diff --git a/llvm/include/llvm/IR/IntrinsicsAArch64.td b/llvm/include/llvm/IR/IntrinsicsAArch64.td index 960200ddf9d9..c65446e8bafa 100644 --- a/llvm/include/llvm/IR/IntrinsicsAArch64.td +++ b/llvm/include/llvm/IR/IntrinsicsAArch64.td @@ -1516,7 +1516,7 @@ class SVE_4Vec_BF16 class SVE_4Vec_BF16_Indexed : DefaultAttrsIntrinsic<[llvm_nxv4f32_ty], - [llvm_nxv4f32_ty, llvm_nxv8bf16_ty, llvm_nxv8bf16_ty, llvm_i64_ty], + [llvm_nxv4f32_ty, llvm_nxv8bf16_ty, llvm_nxv8bf16_ty, llvm_i32_ty], [IntrNoMem, ImmArg>]>; // @@ -2519,9 +2519,9 @@ def int_aarch64_sve_bfmlalt : SVE_4Vec_BF16; def int_aarch64_sve_bfmmla : SVE_4Vec_BF16; -def int_aarch64_sve_bfdot_lane : SVE_4Vec_BF16_Indexed; -def int_aarch64_sve_bfmlalb_lane : SVE_4Vec_BF16_Indexed; -def int_aarch64_sve_bfmlalt_lane : SVE_4Vec_BF16_Indexed; +def int_aarch64_sve_bfdot_lane_v2 : SVE_4Vec_BF16_Indexed; +def int_aarch64_sve_bfmlalb_lane_v2 : SVE_4Vec_BF16_Indexed; +def int_aarch64_sve_bfmlalt_lane_v2 : SVE_4Vec_BF16_Indexed; } // diff --git a/llvm/lib/IR/AutoUpgrade.cpp b/llvm/lib/IR/AutoUpgrade.cpp index b5c7818dfae2..ef4aac5b0cde 100644 --- a/llvm/lib/IR/AutoUpgrade.cpp +++ b/llvm/lib/IR/AutoUpgrade.cpp @@ -605,6 +605,21 @@ static bool UpgradeIntrinsicFunction1(Function *F, Function *&NewFn) { F->arg_begin()->getType()); return true; } + if (Name == "aarch64.sve.bfdot.lane") { + NewFn = Intrinsic::getDeclaration(F->getParent(), + Intrinsic::aarch64_sve_bfdot_lane_v2); + return true; + } + if (Name == "aarch64.sve.bfmlalb.lane") { + NewFn = Intrinsic::getDeclaration(F->getParent(), + Intrinsic::aarch64_sve_bfmlalb_lane_v2); + return true; + } + if (Name == "aarch64.sve.bfmlalt.lane") { + NewFn = Intrinsic::getDeclaration(F->getParent(), + Intrinsic::aarch64_sve_bfmlalt_lane_v2); + return true; + } static const Regex LdRegex("^aarch64\\.sve\\.ld[234](.nxv[a-z0-9]+|$)"); if (LdRegex.match(Name)) { Type *ScalarTy = @@ -3955,6 +3970,16 @@ void llvm::UpgradeIntrinsicCall(CallBase *CI, Function *NewFn) { NewCall = Builder.CreateCall(NewFn, Args); break; } + case Intrinsic::aarch64_sve_bfmlalb_lane_v2: + case Intrinsic::aarch64_sve_bfmlalt_lane_v2: + case Intrinsic::aarch64_sve_bfdot_lane_v2: { + LLVMContext &Ctx = F->getParent()->getContext(); + SmallVector Args(CI->args()); + Args[3] = ConstantInt::get(Type::getInt32Ty(Ctx), + cast(Args[3])->getZExtValue()); + NewCall = Builder.CreateCall(NewFn, Args); + break; + } case Intrinsic::aarch64_sve_ld3_sret: case Intrinsic::aarch64_sve_ld4_sret: case Intrinsic::aarch64_sve_ld2_sret: { diff --git a/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td b/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td index 597c7a27cf33..7cc525e698f5 100644 --- a/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td +++ b/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td @@ -2162,8 +2162,8 @@ let Predicates = [HasSVEorSME] in { } // End HasSVEorSME let Predicates = [HasBF16, HasSVEorSME] in { - defm BFDOT_ZZZ : sve_bfloat_dot<"bfdot", int_aarch64_sve_bfdot>; - defm BFDOT_ZZI : sve_bfloat_dot_indexed<"bfdot", int_aarch64_sve_bfdot_lane>; + defm BFDOT_ZZZ : sve_float_dot<0b1, "bfdot", nxv8bf16, int_aarch64_sve_bfdot>; + defm BFDOT_ZZI : sve_float_dot_indexed<0b1, "bfdot", nxv8bf16, int_aarch64_sve_bfdot_lane_v2>; } // End HasBF16, HasSVEorSME let Predicates = [HasBF16, HasSVE] in { @@ -2173,8 +2173,8 @@ let Predicates = [HasBF16, HasSVE] in { let Predicates = [HasBF16, HasSVEorSME] in { defm BFMLALB_ZZZ : sve_bfloat_matmul_longvecl<0b0, 0b0, "bfmlalb", int_aarch64_sve_bfmlalb>; defm BFMLALT_ZZZ : sve_bfloat_matmul_longvecl<0b1, 0b0, "bfmlalt", int_aarch64_sve_bfmlalt>; - defm BFMLALB_ZZZI : sve_bfloat_matmul_longvecl_idx<0b0, 0b0, "bfmlalb", int_aarch64_sve_bfmlalb_lane>; - defm BFMLALT_ZZZI : sve_bfloat_matmul_longvecl_idx<0b1, 0b0, "bfmlalt", int_aarch64_sve_bfmlalt_lane>; + defm BFMLALB_ZZZI : sve_bfloat_matmul_longvecl_idx<0b0, 0b0, "bfmlalb", int_aarch64_sve_bfmlalb_lane_v2>; + defm BFMLALT_ZZZI : sve_bfloat_matmul_longvecl_idx<0b1, 0b0, "bfmlalt", int_aarch64_sve_bfmlalt_lane_v2>; defm BFCVT_ZPmZ : sve_bfloat_convert<0b1, "bfcvt", int_aarch64_sve_fcvt_bf16f32>; defm BFCVTNT_ZPmZ : sve_bfloat_convert<0b0, "bfcvtnt", int_aarch64_sve_fcvtnt_bf16f32>; } // End HasBF16, HasSVEorSME diff --git a/llvm/lib/Target/AArch64/SVEInstrFormats.td b/llvm/lib/Target/AArch64/SVEInstrFormats.td index cfccbd766709..a0fa88200d95 100644 --- a/llvm/lib/Target/AArch64/SVEInstrFormats.td +++ b/llvm/lib/Target/AArch64/SVEInstrFormats.td @@ -8317,13 +8317,13 @@ class sve_float_dot let DestructiveInstType = DestructiveOther; } -multiclass sve_bfloat_dot { - def NAME : sve_float_dot<0b1, asm>; - def : SVE_3_Op_Pat(NAME)>; +multiclass sve_float_dot { + def NAME : sve_float_dot; + def : SVE_3_Op_Pat(NAME)>; } class sve_float_dot_indexed -: I<(outs ZPR32:$Zda), (ins ZPR32:$_Zda, ZPR16:$Zn, ZPR3b16:$Zm, VectorIndexS:$iop), +: I<(outs ZPR32:$Zda), (ins ZPR32:$_Zda, ZPR16:$Zn, ZPR3b16:$Zm, VectorIndexS32b:$iop), asm, "\t$Zda, $Zn, $Zm$iop", "", []>, Sched<[]> { bits<5> Zda; bits<5> Zn; @@ -8342,9 +8342,9 @@ class sve_float_dot_indexed let DestructiveInstType = DestructiveOther; } -multiclass sve_bfloat_dot_indexed { - def NAME : sve_float_dot_indexed<0b1, asm>; - def : SVE_4_Op_Imm_Pat(NAME)>; +multiclass sve_float_dot_indexed { + def NAME : sve_float_dot_indexed; + def : SVE_4_Op_Imm_Pat(NAME)>; } class sve_bfloat_matmul @@ -8383,7 +8383,7 @@ multiclass sve_bfloat_matmul_longvecl -: I<(outs ZPR32:$Zda), (ins ZPR32:$_Zda, ZPR16:$Zn, ZPR3b16:$Zm, VectorIndexH:$iop), +: I<(outs ZPR32:$Zda), (ins ZPR32:$_Zda, ZPR16:$Zn, ZPR3b16:$Zm, VectorIndexH32b:$iop), asm, "\t$Zda, $Zn, $Zm$iop", "", []>, Sched<[]> { bits<5> Zda; bits<5> Zn; @@ -8407,7 +8407,7 @@ class sve_bfloat_matmul_longvecl_idx multiclass sve_bfloat_matmul_longvecl_idx { def NAME : sve_bfloat_matmul_longvecl_idx; - def : SVE_4_Op_Imm_Pat(NAME)>; + def : SVE_4_Op_Imm_Pat(NAME)>; } class sve_bfloat_convert @@ -8784,7 +8784,7 @@ class sve2p1_two_way_dot_vv // SVE two-way dot product (indexed) class sve2p1_two_way_dot_vvi - : I<(outs ZPR32:$Zda), (ins ZPR32:$_Zda, ZPR16:$Zn, ZPR3b16:$Zm, VectorIndexS:$i2), + : I<(outs ZPR32:$Zda), (ins ZPR32:$_Zda, ZPR16:$Zn, ZPR3b16:$Zm, VectorIndexS32b:$i2), mnemonic, "\t$Zda, $Zn, $Zm$i2", "", []>, Sched<[]> { bits<5> Zda; diff --git a/llvm/test/Bitcode/upgrade-aarch64-sve-intrinsics.ll b/llvm/test/Bitcode/upgrade-aarch64-sve-intrinsics.ll index c21a6b7f1539..234151aa8c36 100644 --- a/llvm/test/Bitcode/upgrade-aarch64-sve-intrinsics.ll +++ b/llvm/test/Bitcode/upgrade-aarch64-sve-intrinsics.ll @@ -160,6 +160,33 @@ define @get_tuple2_nxv8i32_elt1( %tuple) { ret %ext } +; bfdot +define @bfdot_lane( %a, %b, %c) nounwind { +; CHECK-LABEL: @bfdot_lane +; CHECK: %out = call @llvm.aarch64.sve.bfdot.lane.v2( %a, %b, %c, i32 0) +; CHECK-NEXT: ret %out + %out = call @llvm.aarch64.sve.bfdot.lane( %a, %b, %c, i64 0) + ret %out +} + +; bfmlalb +define @bfmlalb_lane( %a, %b, %c) nounwind { +; CHECK-LABEL: @bfmlalb_lane +; CHECK: %out = call @llvm.aarch64.sve.bfmlalb.lane.v2( %a, %b, %c, i32 0) +; CHECK-NEXT: ret %out + %out = call @llvm.aarch64.sve.bfmlalb.lane( %a, %b, %c, i64 0) + ret %out +} + +; bfmlalt +define @bfmlalt_lane( %a, %b, %c) nounwind { +; CHECK-LABEL: @bfmlalt_lane +; CHECK: %out = call @llvm.aarch64.sve.bfmlalt.lane.v2( %a, %b, %c, i32 0) +; CHECK-NEXT: ret %out + %out = call @llvm.aarch64.sve.bfmlalt.lane( %a, %b, %c, i64 0) + ret %out +} + declare @llvm.aarch64.sve.tuple.create2.nxv32i8.nxv16i8(, ) declare @llvm.aarch64.sve.tuple.create2.nxv32i8(, ) declare @llvm.aarch64.sve.tuple.create2(, ) @@ -168,3 +195,6 @@ declare @llvm.aarch64.sve.tuple.create4.nxv64i8.nxv16i8( @llvm.aarch64.sve.tuple.create1.nxv16i8.nxv16i8() declare @llvm.aarch64.sve.tuple.set.nxv8i32.nxv4i32(, i32, ) declare @llvm.aarch64.sve.tuple.get.nxv8i32(, i32) +declare @llvm.aarch64.sve.bfdot.lane(, , , i64) +declare @llvm.aarch64.sve.bfmlalb.lane(, , , i64) +declare @llvm.aarch64.sve.bfmlalt.lane(, , , i64) diff --git a/llvm/test/CodeGen/AArch64/sve-intrinsics-bfloat.ll b/llvm/test/CodeGen/AArch64/sve-intrinsics-bfloat.ll index a3989a97dc98..7d1e63e23a2c 100644 --- a/llvm/test/CodeGen/AArch64/sve-intrinsics-bfloat.ll +++ b/llvm/test/CodeGen/AArch64/sve-intrinsics-bfloat.ll @@ -19,7 +19,7 @@ define @bfdot_lane_0_f32( %a, @llvm.aarch64.sve.bfdot.lane( %a, %b, %c, i64 0) + %out = call @llvm.aarch64.sve.bfdot.lane.v2( %a, %b, %c, i32 0) ret %out } @@ -28,7 +28,7 @@ define @bfdot_lane_1_f32( %a, @llvm.aarch64.sve.bfdot.lane( %a, %b, %c, i64 1) + %out = call @llvm.aarch64.sve.bfdot.lane.v2( %a, %b, %c, i32 1) ret %out } @@ -37,7 +37,7 @@ define @bfdot_lane_2_f32( %a, @llvm.aarch64.sve.bfdot.lane( %a, %b, %c, i64 2) + %out = call @llvm.aarch64.sve.bfdot.lane.v2( %a, %b, %c, i32 2) ret %out } @@ -46,7 +46,7 @@ define @bfdot_lane_3_f32( %a, @llvm.aarch64.sve.bfdot.lane( %a, %b, %c, i64 3) + %out = call @llvm.aarch64.sve.bfdot.lane.v2( %a, %b, %c, i32 3) ret %out } @@ -68,7 +68,7 @@ define @bfmlalb_lane_0_f32( %a, @llvm.aarch64.sve.bfmlalb.lane( %a, %b, %c, i64 0) + %out = call @llvm.aarch64.sve.bfmlalb.lane.v2( %a, %b, %c, i32 0) ret %out } @@ -77,7 +77,7 @@ define @bfmlalb_lane_1_f32( %a, @llvm.aarch64.sve.bfmlalb.lane( %a, %b, %c, i64 1) + %out = call @llvm.aarch64.sve.bfmlalb.lane.v2( %a, %b, %c, i32 1) ret %out } @@ -86,7 +86,7 @@ define @bfmlalb_lane_2_f32( %a, @llvm.aarch64.sve.bfmlalb.lane( %a, %b, %c, i64 2) + %out = call @llvm.aarch64.sve.bfmlalb.lane.v2( %a, %b, %c, i32 2) ret %out } @@ -95,7 +95,7 @@ define @bfmlalb_lane_3_f32( %a, @llvm.aarch64.sve.bfmlalb.lane( %a, %b, %c, i64 3) + %out = call @llvm.aarch64.sve.bfmlalb.lane.v2( %a, %b, %c, i32 3) ret %out } @@ -104,7 +104,7 @@ define @bfmlalb_lane_4_f32( %a, @llvm.aarch64.sve.bfmlalb.lane( %a, %b, %c, i64 4) + %out = call @llvm.aarch64.sve.bfmlalb.lane.v2( %a, %b, %c, i32 4) ret %out } @@ -113,7 +113,7 @@ define @bfmlalb_lane_5_f32( %a, @llvm.aarch64.sve.bfmlalb.lane( %a, %b, %c, i64 5) + %out = call @llvm.aarch64.sve.bfmlalb.lane.v2( %a, %b, %c, i32 5) ret %out } @@ -122,7 +122,7 @@ define @bfmlalb_lane_6_f32( %a, @llvm.aarch64.sve.bfmlalb.lane( %a, %b, %c, i64 6) + %out = call @llvm.aarch64.sve.bfmlalb.lane.v2( %a, %b, %c, i32 6) ret %out } @@ -131,7 +131,7 @@ define @bfmlalb_lane_7_f32( %a, @llvm.aarch64.sve.bfmlalb.lane( %a, %b, %c, i64 7) + %out = call @llvm.aarch64.sve.bfmlalb.lane.v2( %a, %b, %c, i32 7) ret %out } @@ -153,7 +153,7 @@ define @bfmlalt_lane_0_f32( %a, @llvm.aarch64.sve.bfmlalt.lane( %a, %b, %c, i64 0) + %out = call @llvm.aarch64.sve.bfmlalt.lane.v2( %a, %b, %c, i32 0) ret %out } @@ -162,7 +162,7 @@ define @bfmlalt_lane_1_f32( %a, @llvm.aarch64.sve.bfmlalt.lane( %a, %b, %c, i64 1) + %out = call @llvm.aarch64.sve.bfmlalt.lane.v2( %a, %b, %c, i32 1) ret %out } @@ -171,7 +171,7 @@ define @bfmlalt_lane_2_f32( %a, @llvm.aarch64.sve.bfmlalt.lane( %a, %b, %c, i64 2) + %out = call @llvm.aarch64.sve.bfmlalt.lane.v2( %a, %b, %c, i32 2) ret %out } @@ -180,7 +180,7 @@ define @bfmlalt_lane_3_f32( %a, @llvm.aarch64.sve.bfmlalt.lane( %a, %b, %c, i64 3) + %out = call @llvm.aarch64.sve.bfmlalt.lane.v2( %a, %b, %c, i32 3) ret %out } @@ -189,7 +189,7 @@ define @bfmlalt_lane_4_f32( %a, @llvm.aarch64.sve.bfmlalt.lane( %a, %b, %c, i64 4) + %out = call @llvm.aarch64.sve.bfmlalt.lane.v2( %a, %b, %c, i32 4) ret %out } @@ -198,7 +198,7 @@ define @bfmlalt_lane_5_f32( %a, @llvm.aarch64.sve.bfmlalt.lane( %a, %b, %c, i64 5) + %out = call @llvm.aarch64.sve.bfmlalt.lane.v2( %a, %b, %c, i32 5) ret %out } @@ -207,7 +207,7 @@ define @bfmlalt_lane_6_f32( %a, @llvm.aarch64.sve.bfmlalt.lane( %a, %b, %c, i64 6) + %out = call @llvm.aarch64.sve.bfmlalt.lane.v2( %a, %b, %c, i32 6) ret %out } @@ -216,7 +216,7 @@ define @bfmlalt_lane_7_f32( %a, @llvm.aarch64.sve.bfmlalt.lane( %a, %b, %c, i64 7) + %out = call @llvm.aarch64.sve.bfmlalt.lane.v2( %a, %b, %c, i32 7) ret %out } @@ -260,11 +260,11 @@ define @fcvtnt_bf16_f32( %a, @llvm.aarch64.sve.bfdot(, , ) -declare @llvm.aarch64.sve.bfdot.lane(, , , i64) +declare @llvm.aarch64.sve.bfdot.lane.v2(, , , i32) declare @llvm.aarch64.sve.bfmlalb(, , ) -declare @llvm.aarch64.sve.bfmlalb.lane(, , , i64) +declare @llvm.aarch64.sve.bfmlalb.lane.v2(, , , i32) declare @llvm.aarch64.sve.bfmlalt(, , ) -declare @llvm.aarch64.sve.bfmlalt.lane(, , , i64) +declare @llvm.aarch64.sve.bfmlalt.lane.v2(, , , i32) declare @llvm.aarch64.sve.bfmmla(, , ) declare @llvm.aarch64.sve.fcvt.bf16f32(, , ) declare @llvm.aarch64.sve.fcvtnt.bf16f32(, , )