From ce86ff105b506aa0f150f676f59ee43abc00a213 Mon Sep 17 00:00:00 2001 From: paperchalice Date: Tue, 29 Jul 2025 12:11:52 +0800 Subject: [PATCH] [GlobalISel] Remove `UnsafeFPMath` references (#146319) This is the GlobalISel part to remove `UnsafeFPMath` flag in CodeGen pipeline. --- .../lib/CodeGen/GlobalISel/CombinerHelper.cpp | 6 +- .../CodeGen/GlobalISel/LegalizerHelper.cpp | 2 +- .../combine-fma-add-mul-post-legalize.mir | 2772 ++++++++++------- .../combine-fma-add-mul-pre-legalize.mir | 2496 ++++++++------- .../AMDGPU/GlobalISel/combine-fma-add-mul.ll | 893 +++++- .../GlobalISel/combine-fma-unmerge-values.mir | 40 +- .../AMDGPU/GlobalISel/legalize-fptrunc.mir | 121 +- llvm/test/CodeGen/AMDGPU/fptrunc.f16.ll | 865 ++++- llvm/test/CodeGen/AMDGPU/fptrunc.ll | 1181 +++++-- 9 files changed, 5618 insertions(+), 2758 deletions(-) diff --git a/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp b/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp index e8f513ad5a7a..e84ba91c47c8 100644 --- a/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp +++ b/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp @@ -5949,8 +5949,7 @@ bool CombinerHelper::canCombineFMadOrFMA(MachineInstr &MI, const TargetOptions &Options = MF->getTarget().Options; LLT DstType = MRI.getType(MI.getOperand(0).getReg()); - if (CanReassociate && - !(Options.UnsafeFPMath || MI.getFlag(MachineInstr::MIFlag::FmReassoc))) + if (CanReassociate && !MI.getFlag(MachineInstr::MIFlag::FmReassoc)) return false; // Floating-point multiply-add with intermediate rounding. @@ -5962,8 +5961,7 @@ bool CombinerHelper::canCombineFMadOrFMA(MachineInstr &MI, if (!HasFMAD && !HasFMA) return false; - AllowFusionGlobally = Options.AllowFPOpFusion == FPOpFusion::Fast || - Options.UnsafeFPMath || HasFMAD; + AllowFusionGlobally = Options.AllowFPOpFusion == FPOpFusion::Fast || HasFMAD; // If the addition is not contractable, do not combine. if (!AllowFusionGlobally && !MI.getFlag(MachineInstr::MIFlag::FmContract)) return false; diff --git a/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp b/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp index ed7b07f7d936..538a763f9f48 100644 --- a/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp +++ b/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp @@ -8004,7 +8004,7 @@ LegalizerHelper::lowerFPTRUNC_F64_TO_F16(MachineInstr &MI) { if (MRI.getType(Src).isVector()) // TODO: Handle vectors directly. return UnableToLegalize; - if (MIRBuilder.getMF().getTarget().Options.UnsafeFPMath) { + if (MI.getFlag(MachineInstr::FmAfn)) { unsigned Flags = MI.getFlags(); auto Src32 = MIRBuilder.buildFPTrunc(S32, Src, Flags); MIRBuilder.buildFPTrunc(Dst, Src32, Flags); diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/combine-fma-add-mul-post-legalize.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/combine-fma-add-mul-post-legalize.mir index 789385dcbae8..b770d432c994 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/combine-fma-add-mul-post-legalize.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/combine-fma-add-mul-post-legalize.mir @@ -1,12 +1,8 @@ # NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py # RUN: llc -global-isel -mtriple=amdgcn -mcpu=gfx900 -run-pass=amdgpu-postlegalizer-combiner %s -o - | FileCheck -check-prefix=GFX9 %s -# RUN: llc -global-isel -mtriple=amdgcn -mcpu=gfx900 -run-pass=amdgpu-postlegalizer-combiner -fp-contract=fast %s -o - | FileCheck -check-prefix=GFX9-CONTRACT %s # RUN: llc -global-isel -mtriple=amdgcn -mcpu=gfx900 -run-pass=amdgpu-postlegalizer-combiner --denormal-fp-math=preserve-sign %s -o - | FileCheck -check-prefix=GFX9-DENORM %s -# RUN: llc -global-isel -mtriple=amdgcn -mcpu=gfx900 -run-pass=amdgpu-postlegalizer-combiner -enable-unsafe-fp-math %s -o - | FileCheck -check-prefix=GFX9-UNSAFE %s # RUN: llc -global-isel -mtriple=amdgcn -mcpu=gfx1010 -run-pass=amdgpu-postlegalizer-combiner %s -o - | FileCheck -check-prefix=GFX10 %s -# RUN: llc -global-isel -mtriple=amdgcn -mcpu=gfx1010 -run-pass=amdgpu-postlegalizer-combiner -fp-contract=fast %s -o - | FileCheck -check-prefix=GFX10-CONTRACT %s # RUN: llc -global-isel -mtriple=amdgcn -mcpu=gfx1010 -run-pass=amdgpu-postlegalizer-combiner --denormal-fp-math=preserve-sign %s -o - | FileCheck -check-prefix=GFX10-DENORM %s -# RUN: llc -global-isel -mtriple=amdgcn -mcpu=gfx1010 -run-pass=amdgpu-postlegalizer-combiner -enable-unsafe-fp-math %s -o - | FileCheck -check-prefix=GFX10-UNSAFE %s --- name: test_f32_add_mul @@ -24,15 +20,7 @@ body: | ; GFX9-NEXT: [[FADD:%[0-9]+]]:_(s32) = G_FADD [[FMUL]], [[COPY2]] ; GFX9-NEXT: $vgpr0 = COPY [[FADD]](s32) ; GFX9-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0 - ; GFX9-CONTRACT-LABEL: name: test_f32_add_mul - ; GFX9-CONTRACT: liveins: $vgpr0, $vgpr1, $vgpr2 - ; GFX9-CONTRACT-NEXT: {{ $}} - ; GFX9-CONTRACT-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX9-CONTRACT-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX9-CONTRACT-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX9-CONTRACT-NEXT: [[FMA:%[0-9]+]]:_(s32) = G_FMA [[COPY]], [[COPY1]], [[COPY2]] - ; GFX9-CONTRACT-NEXT: $vgpr0 = COPY [[FMA]](s32) - ; GFX9-CONTRACT-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0 + ; ; GFX9-DENORM-LABEL: name: test_f32_add_mul ; GFX9-DENORM: liveins: $vgpr0, $vgpr1, $vgpr2 ; GFX9-DENORM-NEXT: {{ $}} @@ -43,15 +31,7 @@ body: | ; GFX9-DENORM-NEXT: [[FADD:%[0-9]+]]:_(s32) = G_FADD [[FMUL]], [[COPY2]] ; GFX9-DENORM-NEXT: $vgpr0 = COPY [[FADD]](s32) ; GFX9-DENORM-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0 - ; GFX9-UNSAFE-LABEL: name: test_f32_add_mul - ; GFX9-UNSAFE: liveins: $vgpr0, $vgpr1, $vgpr2 - ; GFX9-UNSAFE-NEXT: {{ $}} - ; GFX9-UNSAFE-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX9-UNSAFE-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX9-UNSAFE-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX9-UNSAFE-NEXT: [[FMA:%[0-9]+]]:_(s32) = G_FMA [[COPY]], [[COPY1]], [[COPY2]] - ; GFX9-UNSAFE-NEXT: $vgpr0 = COPY [[FMA]](s32) - ; GFX9-UNSAFE-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0 + ; ; GFX10-LABEL: name: test_f32_add_mul ; GFX10: liveins: $vgpr0, $vgpr1, $vgpr2 ; GFX10-NEXT: {{ $}} @@ -62,15 +42,7 @@ body: | ; GFX10-NEXT: [[FADD:%[0-9]+]]:_(s32) = G_FADD [[FMUL]], [[COPY2]] ; GFX10-NEXT: $vgpr0 = COPY [[FADD]](s32) ; GFX10-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0 - ; GFX10-CONTRACT-LABEL: name: test_f32_add_mul - ; GFX10-CONTRACT: liveins: $vgpr0, $vgpr1, $vgpr2 - ; GFX10-CONTRACT-NEXT: {{ $}} - ; GFX10-CONTRACT-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX10-CONTRACT-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX10-CONTRACT-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX10-CONTRACT-NEXT: [[FMA:%[0-9]+]]:_(s32) = G_FMA [[COPY]], [[COPY1]], [[COPY2]] - ; GFX10-CONTRACT-NEXT: $vgpr0 = COPY [[FMA]](s32) - ; GFX10-CONTRACT-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0 + ; ; GFX10-DENORM-LABEL: name: test_f32_add_mul ; GFX10-DENORM: liveins: $vgpr0, $vgpr1, $vgpr2 ; GFX10-DENORM-NEXT: {{ $}} @@ -81,15 +53,6 @@ body: | ; GFX10-DENORM-NEXT: [[FADD:%[0-9]+]]:_(s32) = G_FADD [[FMUL]], [[COPY2]] ; GFX10-DENORM-NEXT: $vgpr0 = COPY [[FADD]](s32) ; GFX10-DENORM-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0 - ; GFX10-UNSAFE-LABEL: name: test_f32_add_mul - ; GFX10-UNSAFE: liveins: $vgpr0, $vgpr1, $vgpr2 - ; GFX10-UNSAFE-NEXT: {{ $}} - ; GFX10-UNSAFE-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX10-UNSAFE-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX10-UNSAFE-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX10-UNSAFE-NEXT: [[FMA:%[0-9]+]]:_(s32) = G_FMA [[COPY]], [[COPY1]], [[COPY2]] - ; GFX10-UNSAFE-NEXT: $vgpr0 = COPY [[FMA]](s32) - ; GFX10-UNSAFE-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0 %0:_(s32) = COPY $vgpr0 %1:_(s32) = COPY $vgpr1 %2:_(s32) = COPY $vgpr2 @@ -99,6 +62,60 @@ body: | S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0 ... +--- +name: test_f32_add_mul_contract +body: | + bb.1.entry: + liveins: $vgpr0, $vgpr1, $vgpr2 + + ; GFX9-LABEL: name: test_f32_add_mul_contract + ; GFX9: liveins: $vgpr0, $vgpr1, $vgpr2 + ; GFX9-NEXT: {{ $}} + ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 + ; GFX9-NEXT: [[FMA:%[0-9]+]]:_(s32) = G_FMA [[COPY]], [[COPY1]], [[COPY2]] + ; GFX9-NEXT: $vgpr0 = COPY [[FMA]](s32) + ; GFX9-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0 + ; + ; GFX9-DENORM-LABEL: name: test_f32_add_mul_contract + ; GFX9-DENORM: liveins: $vgpr0, $vgpr1, $vgpr2 + ; GFX9-DENORM-NEXT: {{ $}} + ; GFX9-DENORM-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; GFX9-DENORM-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GFX9-DENORM-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 + ; GFX9-DENORM-NEXT: [[FMA:%[0-9]+]]:_(s32) = G_FMA [[COPY]], [[COPY1]], [[COPY2]] + ; GFX9-DENORM-NEXT: $vgpr0 = COPY [[FMA]](s32) + ; GFX9-DENORM-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0 + ; + ; GFX10-LABEL: name: test_f32_add_mul_contract + ; GFX10: liveins: $vgpr0, $vgpr1, $vgpr2 + ; GFX10-NEXT: {{ $}} + ; GFX10-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; GFX10-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GFX10-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 + ; GFX10-NEXT: [[FMA:%[0-9]+]]:_(s32) = G_FMA [[COPY]], [[COPY1]], [[COPY2]] + ; GFX10-NEXT: $vgpr0 = COPY [[FMA]](s32) + ; GFX10-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0 + ; + ; GFX10-DENORM-LABEL: name: test_f32_add_mul_contract + ; GFX10-DENORM: liveins: $vgpr0, $vgpr1, $vgpr2 + ; GFX10-DENORM-NEXT: {{ $}} + ; GFX10-DENORM-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; GFX10-DENORM-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GFX10-DENORM-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 + ; GFX10-DENORM-NEXT: [[FMA:%[0-9]+]]:_(s32) = G_FMA [[COPY]], [[COPY1]], [[COPY2]] + ; GFX10-DENORM-NEXT: $vgpr0 = COPY [[FMA]](s32) + ; GFX10-DENORM-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0 + %0:_(s32) = COPY $vgpr0 + %1:_(s32) = COPY $vgpr1 + %2:_(s32) = COPY $vgpr2 + %4:_(s32) = contract G_FMUL %0, %1 + %5:_(s32) = contract G_FADD %4, %2 + $vgpr0 = COPY %5(s32) + S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0 +... + --- name: test_f32_add_mul_rhs body: | @@ -115,15 +132,7 @@ body: | ; GFX9-NEXT: [[FADD:%[0-9]+]]:_(s32) = G_FADD [[COPY2]], [[FMUL]] ; GFX9-NEXT: $vgpr0 = COPY [[FADD]](s32) ; GFX9-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0 - ; GFX9-CONTRACT-LABEL: name: test_f32_add_mul_rhs - ; GFX9-CONTRACT: liveins: $vgpr0, $vgpr1, $vgpr2 - ; GFX9-CONTRACT-NEXT: {{ $}} - ; GFX9-CONTRACT-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX9-CONTRACT-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX9-CONTRACT-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX9-CONTRACT-NEXT: [[FMA:%[0-9]+]]:_(s32) = G_FMA [[COPY]], [[COPY1]], [[COPY2]] - ; GFX9-CONTRACT-NEXT: $vgpr0 = COPY [[FMA]](s32) - ; GFX9-CONTRACT-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0 + ; ; GFX9-DENORM-LABEL: name: test_f32_add_mul_rhs ; GFX9-DENORM: liveins: $vgpr0, $vgpr1, $vgpr2 ; GFX9-DENORM-NEXT: {{ $}} @@ -134,15 +143,7 @@ body: | ; GFX9-DENORM-NEXT: [[FADD:%[0-9]+]]:_(s32) = G_FADD [[COPY2]], [[FMUL]] ; GFX9-DENORM-NEXT: $vgpr0 = COPY [[FADD]](s32) ; GFX9-DENORM-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0 - ; GFX9-UNSAFE-LABEL: name: test_f32_add_mul_rhs - ; GFX9-UNSAFE: liveins: $vgpr0, $vgpr1, $vgpr2 - ; GFX9-UNSAFE-NEXT: {{ $}} - ; GFX9-UNSAFE-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX9-UNSAFE-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX9-UNSAFE-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX9-UNSAFE-NEXT: [[FMA:%[0-9]+]]:_(s32) = G_FMA [[COPY]], [[COPY1]], [[COPY2]] - ; GFX9-UNSAFE-NEXT: $vgpr0 = COPY [[FMA]](s32) - ; GFX9-UNSAFE-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0 + ; ; GFX10-LABEL: name: test_f32_add_mul_rhs ; GFX10: liveins: $vgpr0, $vgpr1, $vgpr2 ; GFX10-NEXT: {{ $}} @@ -153,15 +154,7 @@ body: | ; GFX10-NEXT: [[FADD:%[0-9]+]]:_(s32) = G_FADD [[COPY2]], [[FMUL]] ; GFX10-NEXT: $vgpr0 = COPY [[FADD]](s32) ; GFX10-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0 - ; GFX10-CONTRACT-LABEL: name: test_f32_add_mul_rhs - ; GFX10-CONTRACT: liveins: $vgpr0, $vgpr1, $vgpr2 - ; GFX10-CONTRACT-NEXT: {{ $}} - ; GFX10-CONTRACT-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX10-CONTRACT-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX10-CONTRACT-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX10-CONTRACT-NEXT: [[FMA:%[0-9]+]]:_(s32) = G_FMA [[COPY]], [[COPY1]], [[COPY2]] - ; GFX10-CONTRACT-NEXT: $vgpr0 = COPY [[FMA]](s32) - ; GFX10-CONTRACT-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0 + ; ; GFX10-DENORM-LABEL: name: test_f32_add_mul_rhs ; GFX10-DENORM: liveins: $vgpr0, $vgpr1, $vgpr2 ; GFX10-DENORM-NEXT: {{ $}} @@ -172,15 +165,6 @@ body: | ; GFX10-DENORM-NEXT: [[FADD:%[0-9]+]]:_(s32) = G_FADD [[COPY2]], [[FMUL]] ; GFX10-DENORM-NEXT: $vgpr0 = COPY [[FADD]](s32) ; GFX10-DENORM-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0 - ; GFX10-UNSAFE-LABEL: name: test_f32_add_mul_rhs - ; GFX10-UNSAFE: liveins: $vgpr0, $vgpr1, $vgpr2 - ; GFX10-UNSAFE-NEXT: {{ $}} - ; GFX10-UNSAFE-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX10-UNSAFE-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX10-UNSAFE-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX10-UNSAFE-NEXT: [[FMA:%[0-9]+]]:_(s32) = G_FMA [[COPY]], [[COPY1]], [[COPY2]] - ; GFX10-UNSAFE-NEXT: $vgpr0 = COPY [[FMA]](s32) - ; GFX10-UNSAFE-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0 %0:_(s32) = COPY $vgpr0 %1:_(s32) = COPY $vgpr1 %2:_(s32) = COPY $vgpr2 @@ -190,6 +174,60 @@ body: | S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0 ... +--- +name: test_f32_add_mul_rhs_contract +body: | + bb.1.entry: + liveins: $vgpr0, $vgpr1, $vgpr2 + + ; GFX9-LABEL: name: test_f32_add_mul_rhs_contract + ; GFX9: liveins: $vgpr0, $vgpr1, $vgpr2 + ; GFX9-NEXT: {{ $}} + ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 + ; GFX9-NEXT: [[FMA:%[0-9]+]]:_(s32) = G_FMA [[COPY]], [[COPY1]], [[COPY2]] + ; GFX9-NEXT: $vgpr0 = COPY [[FMA]](s32) + ; GFX9-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0 + ; + ; GFX9-DENORM-LABEL: name: test_f32_add_mul_rhs_contract + ; GFX9-DENORM: liveins: $vgpr0, $vgpr1, $vgpr2 + ; GFX9-DENORM-NEXT: {{ $}} + ; GFX9-DENORM-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; GFX9-DENORM-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GFX9-DENORM-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 + ; GFX9-DENORM-NEXT: [[FMA:%[0-9]+]]:_(s32) = G_FMA [[COPY]], [[COPY1]], [[COPY2]] + ; GFX9-DENORM-NEXT: $vgpr0 = COPY [[FMA]](s32) + ; GFX9-DENORM-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0 + ; + ; GFX10-LABEL: name: test_f32_add_mul_rhs_contract + ; GFX10: liveins: $vgpr0, $vgpr1, $vgpr2 + ; GFX10-NEXT: {{ $}} + ; GFX10-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; GFX10-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GFX10-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 + ; GFX10-NEXT: [[FMA:%[0-9]+]]:_(s32) = G_FMA [[COPY]], [[COPY1]], [[COPY2]] + ; GFX10-NEXT: $vgpr0 = COPY [[FMA]](s32) + ; GFX10-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0 + ; + ; GFX10-DENORM-LABEL: name: test_f32_add_mul_rhs_contract + ; GFX10-DENORM: liveins: $vgpr0, $vgpr1, $vgpr2 + ; GFX10-DENORM-NEXT: {{ $}} + ; GFX10-DENORM-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; GFX10-DENORM-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GFX10-DENORM-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 + ; GFX10-DENORM-NEXT: [[FMA:%[0-9]+]]:_(s32) = G_FMA [[COPY]], [[COPY1]], [[COPY2]] + ; GFX10-DENORM-NEXT: $vgpr0 = COPY [[FMA]](s32) + ; GFX10-DENORM-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0 + %0:_(s32) = COPY $vgpr0 + %1:_(s32) = COPY $vgpr1 + %2:_(s32) = COPY $vgpr2 + %4:_(s32) = contract G_FMUL %0, %1 + %5:_(s32) = contract G_FADD %2, %4 + $vgpr0 = COPY %5(s32) + S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0 +... + --- name: test_add_mul_multiple_defs_z body: | @@ -209,18 +247,7 @@ body: | ; GFX9-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD]](<2 x s32>) ; GFX9-NEXT: [[FADD:%[0-9]+]]:_(s32) = G_FADD [[FMUL]], [[UV1]] ; GFX9-NEXT: $vgpr0 = COPY [[FADD]](s32) - ; GFX9-CONTRACT-LABEL: name: test_add_mul_multiple_defs_z - ; GFX9-CONTRACT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3 - ; GFX9-CONTRACT-NEXT: {{ $}} - ; GFX9-CONTRACT-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX9-CONTRACT-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX9-CONTRACT-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX9-CONTRACT-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; GFX9-CONTRACT-NEXT: [[MV:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[COPY2]](s32), [[COPY3]](s32) - ; GFX9-CONTRACT-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[MV]](p1) :: (load (<2 x s32>), addrspace 1) - ; GFX9-CONTRACT-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD]](<2 x s32>) - ; GFX9-CONTRACT-NEXT: [[FMA:%[0-9]+]]:_(s32) = G_FMA [[COPY]], [[COPY1]], [[UV1]] - ; GFX9-CONTRACT-NEXT: $vgpr0 = COPY [[FMA]](s32) + ; ; GFX9-DENORM-LABEL: name: test_add_mul_multiple_defs_z ; GFX9-DENORM: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3 ; GFX9-DENORM-NEXT: {{ $}} @@ -234,18 +261,7 @@ body: | ; GFX9-DENORM-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD]](<2 x s32>) ; GFX9-DENORM-NEXT: [[FADD:%[0-9]+]]:_(s32) = G_FADD [[FMUL]], [[UV1]] ; GFX9-DENORM-NEXT: $vgpr0 = COPY [[FADD]](s32) - ; GFX9-UNSAFE-LABEL: name: test_add_mul_multiple_defs_z - ; GFX9-UNSAFE: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3 - ; GFX9-UNSAFE-NEXT: {{ $}} - ; GFX9-UNSAFE-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX9-UNSAFE-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX9-UNSAFE-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX9-UNSAFE-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; GFX9-UNSAFE-NEXT: [[MV:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[COPY2]](s32), [[COPY3]](s32) - ; GFX9-UNSAFE-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[MV]](p1) :: (load (<2 x s32>), addrspace 1) - ; GFX9-UNSAFE-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD]](<2 x s32>) - ; GFX9-UNSAFE-NEXT: [[FMA:%[0-9]+]]:_(s32) = G_FMA [[COPY]], [[COPY1]], [[UV1]] - ; GFX9-UNSAFE-NEXT: $vgpr0 = COPY [[FMA]](s32) + ; ; GFX10-LABEL: name: test_add_mul_multiple_defs_z ; GFX10: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3 ; GFX10-NEXT: {{ $}} @@ -259,18 +275,7 @@ body: | ; GFX10-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD]](<2 x s32>) ; GFX10-NEXT: [[FADD:%[0-9]+]]:_(s32) = G_FADD [[FMUL]], [[UV1]] ; GFX10-NEXT: $vgpr0 = COPY [[FADD]](s32) - ; GFX10-CONTRACT-LABEL: name: test_add_mul_multiple_defs_z - ; GFX10-CONTRACT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3 - ; GFX10-CONTRACT-NEXT: {{ $}} - ; GFX10-CONTRACT-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX10-CONTRACT-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX10-CONTRACT-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX10-CONTRACT-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; GFX10-CONTRACT-NEXT: [[MV:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[COPY2]](s32), [[COPY3]](s32) - ; GFX10-CONTRACT-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[MV]](p1) :: (load (<2 x s32>), addrspace 1) - ; GFX10-CONTRACT-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD]](<2 x s32>) - ; GFX10-CONTRACT-NEXT: [[FMA:%[0-9]+]]:_(s32) = G_FMA [[COPY]], [[COPY1]], [[UV1]] - ; GFX10-CONTRACT-NEXT: $vgpr0 = COPY [[FMA]](s32) + ; ; GFX10-DENORM-LABEL: name: test_add_mul_multiple_defs_z ; GFX10-DENORM: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3 ; GFX10-DENORM-NEXT: {{ $}} @@ -284,18 +289,6 @@ body: | ; GFX10-DENORM-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD]](<2 x s32>) ; GFX10-DENORM-NEXT: [[FADD:%[0-9]+]]:_(s32) = G_FADD [[FMUL]], [[UV1]] ; GFX10-DENORM-NEXT: $vgpr0 = COPY [[FADD]](s32) - ; GFX10-UNSAFE-LABEL: name: test_add_mul_multiple_defs_z - ; GFX10-UNSAFE: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3 - ; GFX10-UNSAFE-NEXT: {{ $}} - ; GFX10-UNSAFE-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX10-UNSAFE-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX10-UNSAFE-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX10-UNSAFE-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; GFX10-UNSAFE-NEXT: [[MV:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[COPY2]](s32), [[COPY3]](s32) - ; GFX10-UNSAFE-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[MV]](p1) :: (load (<2 x s32>), addrspace 1) - ; GFX10-UNSAFE-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD]](<2 x s32>) - ; GFX10-UNSAFE-NEXT: [[FMA:%[0-9]+]]:_(s32) = G_FMA [[COPY]], [[COPY1]], [[UV1]] - ; GFX10-UNSAFE-NEXT: $vgpr0 = COPY [[FMA]](s32) %0:_(s32) = COPY $vgpr0 %1:_(s32) = COPY $vgpr1 %4:_(s32) = COPY $vgpr2 @@ -309,6 +302,76 @@ body: | $vgpr0 = COPY %10(s32) ... +--- +name: test_add_mul_multiple_defs_z_contract +body: | + bb.1.entry: + liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3 + + ; GFX9-LABEL: name: test_add_mul_multiple_defs_z_contract + ; GFX9: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3 + ; GFX9-NEXT: {{ $}} + ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 + ; GFX9-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 + ; GFX9-NEXT: [[MV:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[COPY2]](s32), [[COPY3]](s32) + ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[MV]](p1) :: (load (<2 x s32>), addrspace 1) + ; GFX9-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD]](<2 x s32>) + ; GFX9-NEXT: [[FMA:%[0-9]+]]:_(s32) = G_FMA [[COPY]], [[COPY1]], [[UV1]] + ; GFX9-NEXT: $vgpr0 = COPY [[FMA]](s32) + ; + ; GFX9-DENORM-LABEL: name: test_add_mul_multiple_defs_z_contract + ; GFX9-DENORM: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3 + ; GFX9-DENORM-NEXT: {{ $}} + ; GFX9-DENORM-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; GFX9-DENORM-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GFX9-DENORM-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 + ; GFX9-DENORM-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 + ; GFX9-DENORM-NEXT: [[MV:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[COPY2]](s32), [[COPY3]](s32) + ; GFX9-DENORM-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[MV]](p1) :: (load (<2 x s32>), addrspace 1) + ; GFX9-DENORM-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD]](<2 x s32>) + ; GFX9-DENORM-NEXT: [[FMA:%[0-9]+]]:_(s32) = G_FMA [[COPY]], [[COPY1]], [[UV1]] + ; GFX9-DENORM-NEXT: $vgpr0 = COPY [[FMA]](s32) + ; + ; GFX10-LABEL: name: test_add_mul_multiple_defs_z_contract + ; GFX10: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3 + ; GFX10-NEXT: {{ $}} + ; GFX10-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; GFX10-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GFX10-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 + ; GFX10-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 + ; GFX10-NEXT: [[MV:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[COPY2]](s32), [[COPY3]](s32) + ; GFX10-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[MV]](p1) :: (load (<2 x s32>), addrspace 1) + ; GFX10-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD]](<2 x s32>) + ; GFX10-NEXT: [[FMA:%[0-9]+]]:_(s32) = G_FMA [[COPY]], [[COPY1]], [[UV1]] + ; GFX10-NEXT: $vgpr0 = COPY [[FMA]](s32) + ; + ; GFX10-DENORM-LABEL: name: test_add_mul_multiple_defs_z_contract + ; GFX10-DENORM: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3 + ; GFX10-DENORM-NEXT: {{ $}} + ; GFX10-DENORM-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; GFX10-DENORM-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GFX10-DENORM-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 + ; GFX10-DENORM-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 + ; GFX10-DENORM-NEXT: [[MV:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[COPY2]](s32), [[COPY3]](s32) + ; GFX10-DENORM-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[MV]](p1) :: (load (<2 x s32>), addrspace 1) + ; GFX10-DENORM-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD]](<2 x s32>) + ; GFX10-DENORM-NEXT: [[FMA:%[0-9]+]]:_(s32) = G_FMA [[COPY]], [[COPY1]], [[UV1]] + ; GFX10-DENORM-NEXT: $vgpr0 = COPY [[FMA]](s32) + %0:_(s32) = COPY $vgpr0 + %1:_(s32) = COPY $vgpr1 + %4:_(s32) = COPY $vgpr2 + %5:_(s32) = COPY $vgpr3 + %2:_(p1) = G_MERGE_VALUES %4(s32), %5(s32) + %6:_(s32) = contract G_FMUL %0, %1 + %7:_(<2 x s32>) = G_LOAD %2(p1) :: (load (<2 x s32>), addrspace 1) + %12:_(s32), %13:_(s32) = G_UNMERGE_VALUES %7(<2 x s32>) + %8:_(s32) = COPY %13(s32) + %10:_(s32) = contract G_FADD %6, %8 + $vgpr0 = COPY %10(s32) +... + --- name: test_add_mul_rhs_multiple_defs_z body: | @@ -328,18 +391,7 @@ body: | ; GFX9-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD]](<2 x s32>) ; GFX9-NEXT: [[FADD:%[0-9]+]]:_(s32) = G_FADD [[UV1]], [[FMUL]] ; GFX9-NEXT: $vgpr0 = COPY [[FADD]](s32) - ; GFX9-CONTRACT-LABEL: name: test_add_mul_rhs_multiple_defs_z - ; GFX9-CONTRACT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3 - ; GFX9-CONTRACT-NEXT: {{ $}} - ; GFX9-CONTRACT-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX9-CONTRACT-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX9-CONTRACT-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX9-CONTRACT-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; GFX9-CONTRACT-NEXT: [[MV:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[COPY2]](s32), [[COPY3]](s32) - ; GFX9-CONTRACT-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[MV]](p1) :: (load (<2 x s32>), addrspace 1) - ; GFX9-CONTRACT-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD]](<2 x s32>) - ; GFX9-CONTRACT-NEXT: [[FMA:%[0-9]+]]:_(s32) = G_FMA [[COPY]], [[COPY1]], [[UV1]] - ; GFX9-CONTRACT-NEXT: $vgpr0 = COPY [[FMA]](s32) + ; ; GFX9-DENORM-LABEL: name: test_add_mul_rhs_multiple_defs_z ; GFX9-DENORM: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3 ; GFX9-DENORM-NEXT: {{ $}} @@ -353,18 +405,7 @@ body: | ; GFX9-DENORM-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD]](<2 x s32>) ; GFX9-DENORM-NEXT: [[FADD:%[0-9]+]]:_(s32) = G_FADD [[UV1]], [[FMUL]] ; GFX9-DENORM-NEXT: $vgpr0 = COPY [[FADD]](s32) - ; GFX9-UNSAFE-LABEL: name: test_add_mul_rhs_multiple_defs_z - ; GFX9-UNSAFE: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3 - ; GFX9-UNSAFE-NEXT: {{ $}} - ; GFX9-UNSAFE-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX9-UNSAFE-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX9-UNSAFE-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX9-UNSAFE-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; GFX9-UNSAFE-NEXT: [[MV:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[COPY2]](s32), [[COPY3]](s32) - ; GFX9-UNSAFE-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[MV]](p1) :: (load (<2 x s32>), addrspace 1) - ; GFX9-UNSAFE-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD]](<2 x s32>) - ; GFX9-UNSAFE-NEXT: [[FMA:%[0-9]+]]:_(s32) = G_FMA [[COPY]], [[COPY1]], [[UV1]] - ; GFX9-UNSAFE-NEXT: $vgpr0 = COPY [[FMA]](s32) + ; ; GFX10-LABEL: name: test_add_mul_rhs_multiple_defs_z ; GFX10: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3 ; GFX10-NEXT: {{ $}} @@ -378,18 +419,7 @@ body: | ; GFX10-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD]](<2 x s32>) ; GFX10-NEXT: [[FADD:%[0-9]+]]:_(s32) = G_FADD [[UV1]], [[FMUL]] ; GFX10-NEXT: $vgpr0 = COPY [[FADD]](s32) - ; GFX10-CONTRACT-LABEL: name: test_add_mul_rhs_multiple_defs_z - ; GFX10-CONTRACT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3 - ; GFX10-CONTRACT-NEXT: {{ $}} - ; GFX10-CONTRACT-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX10-CONTRACT-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX10-CONTRACT-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX10-CONTRACT-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; GFX10-CONTRACT-NEXT: [[MV:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[COPY2]](s32), [[COPY3]](s32) - ; GFX10-CONTRACT-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[MV]](p1) :: (load (<2 x s32>), addrspace 1) - ; GFX10-CONTRACT-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD]](<2 x s32>) - ; GFX10-CONTRACT-NEXT: [[FMA:%[0-9]+]]:_(s32) = G_FMA [[COPY]], [[COPY1]], [[UV1]] - ; GFX10-CONTRACT-NEXT: $vgpr0 = COPY [[FMA]](s32) + ; ; GFX10-DENORM-LABEL: name: test_add_mul_rhs_multiple_defs_z ; GFX10-DENORM: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3 ; GFX10-DENORM-NEXT: {{ $}} @@ -403,18 +433,6 @@ body: | ; GFX10-DENORM-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD]](<2 x s32>) ; GFX10-DENORM-NEXT: [[FADD:%[0-9]+]]:_(s32) = G_FADD [[UV1]], [[FMUL]] ; GFX10-DENORM-NEXT: $vgpr0 = COPY [[FADD]](s32) - ; GFX10-UNSAFE-LABEL: name: test_add_mul_rhs_multiple_defs_z - ; GFX10-UNSAFE: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3 - ; GFX10-UNSAFE-NEXT: {{ $}} - ; GFX10-UNSAFE-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX10-UNSAFE-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX10-UNSAFE-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX10-UNSAFE-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; GFX10-UNSAFE-NEXT: [[MV:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[COPY2]](s32), [[COPY3]](s32) - ; GFX10-UNSAFE-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[MV]](p1) :: (load (<2 x s32>), addrspace 1) - ; GFX10-UNSAFE-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD]](<2 x s32>) - ; GFX10-UNSAFE-NEXT: [[FMA:%[0-9]+]]:_(s32) = G_FMA [[COPY]], [[COPY1]], [[UV1]] - ; GFX10-UNSAFE-NEXT: $vgpr0 = COPY [[FMA]](s32) %0:_(s32) = COPY $vgpr0 %1:_(s32) = COPY $vgpr1 %4:_(s32) = COPY $vgpr2 @@ -428,6 +446,76 @@ body: | $vgpr0 = COPY %10(s32) ... +--- +name: test_add_mul_rhs_multiple_defs_z_contract +body: | + bb.1.entry: + liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3 + + ; GFX9-LABEL: name: test_add_mul_rhs_multiple_defs_z_contract + ; GFX9: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3 + ; GFX9-NEXT: {{ $}} + ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 + ; GFX9-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 + ; GFX9-NEXT: [[MV:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[COPY2]](s32), [[COPY3]](s32) + ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[MV]](p1) :: (load (<2 x s32>), addrspace 1) + ; GFX9-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD]](<2 x s32>) + ; GFX9-NEXT: [[FMA:%[0-9]+]]:_(s32) = G_FMA [[COPY]], [[COPY1]], [[UV1]] + ; GFX9-NEXT: $vgpr0 = COPY [[FMA]](s32) + ; + ; GFX9-DENORM-LABEL: name: test_add_mul_rhs_multiple_defs_z_contract + ; GFX9-DENORM: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3 + ; GFX9-DENORM-NEXT: {{ $}} + ; GFX9-DENORM-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; GFX9-DENORM-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GFX9-DENORM-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 + ; GFX9-DENORM-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 + ; GFX9-DENORM-NEXT: [[MV:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[COPY2]](s32), [[COPY3]](s32) + ; GFX9-DENORM-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[MV]](p1) :: (load (<2 x s32>), addrspace 1) + ; GFX9-DENORM-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD]](<2 x s32>) + ; GFX9-DENORM-NEXT: [[FMA:%[0-9]+]]:_(s32) = G_FMA [[COPY]], [[COPY1]], [[UV1]] + ; GFX9-DENORM-NEXT: $vgpr0 = COPY [[FMA]](s32) + ; + ; GFX10-LABEL: name: test_add_mul_rhs_multiple_defs_z_contract + ; GFX10: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3 + ; GFX10-NEXT: {{ $}} + ; GFX10-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; GFX10-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GFX10-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 + ; GFX10-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 + ; GFX10-NEXT: [[MV:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[COPY2]](s32), [[COPY3]](s32) + ; GFX10-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[MV]](p1) :: (load (<2 x s32>), addrspace 1) + ; GFX10-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD]](<2 x s32>) + ; GFX10-NEXT: [[FMA:%[0-9]+]]:_(s32) = G_FMA [[COPY]], [[COPY1]], [[UV1]] + ; GFX10-NEXT: $vgpr0 = COPY [[FMA]](s32) + ; + ; GFX10-DENORM-LABEL: name: test_add_mul_rhs_multiple_defs_z_contract + ; GFX10-DENORM: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3 + ; GFX10-DENORM-NEXT: {{ $}} + ; GFX10-DENORM-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; GFX10-DENORM-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GFX10-DENORM-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 + ; GFX10-DENORM-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 + ; GFX10-DENORM-NEXT: [[MV:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[COPY2]](s32), [[COPY3]](s32) + ; GFX10-DENORM-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[MV]](p1) :: (load (<2 x s32>), addrspace 1) + ; GFX10-DENORM-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD]](<2 x s32>) + ; GFX10-DENORM-NEXT: [[FMA:%[0-9]+]]:_(s32) = G_FMA [[COPY]], [[COPY1]], [[UV1]] + ; GFX10-DENORM-NEXT: $vgpr0 = COPY [[FMA]](s32) + %0:_(s32) = COPY $vgpr0 + %1:_(s32) = COPY $vgpr1 + %4:_(s32) = COPY $vgpr2 + %5:_(s32) = COPY $vgpr3 + %2:_(p1) = G_MERGE_VALUES %4(s32), %5(s32) + %6:_(s32) = contract G_FMUL %0, %1 + %7:_(<2 x s32>) = G_LOAD %2(p1) :: (load (<2 x s32>), addrspace 1) + %12:_(s32), %13:_(s32) = G_UNMERGE_VALUES %7(<2 x s32>) + %8:_(s32) = COPY %13(s32) + %10:_(s32) = contract G_FADD %8, %6 + $vgpr0 = COPY %10(s32) +... + --- name: test_half_add_mul body: | @@ -448,19 +536,7 @@ body: | ; GFX9-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[FADD]](s16) ; GFX9-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) ; GFX9-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0 - ; GFX9-CONTRACT-LABEL: name: test_half_add_mul - ; GFX9-CONTRACT: liveins: $vgpr0, $vgpr1, $vgpr2 - ; GFX9-CONTRACT-NEXT: {{ $}} - ; GFX9-CONTRACT-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX9-CONTRACT-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) - ; GFX9-CONTRACT-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX9-CONTRACT-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) - ; GFX9-CONTRACT-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX9-CONTRACT-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY2]](s32) - ; GFX9-CONTRACT-NEXT: [[FMA:%[0-9]+]]:_(s16) = G_FMA [[TRUNC]], [[TRUNC1]], [[TRUNC2]] - ; GFX9-CONTRACT-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[FMA]](s16) - ; GFX9-CONTRACT-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) - ; GFX9-CONTRACT-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0 + ; ; GFX9-DENORM-LABEL: name: test_half_add_mul ; GFX9-DENORM: liveins: $vgpr0, $vgpr1, $vgpr2 ; GFX9-DENORM-NEXT: {{ $}} @@ -475,19 +551,7 @@ body: | ; GFX9-DENORM-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[FADD]](s16) ; GFX9-DENORM-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) ; GFX9-DENORM-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0 - ; GFX9-UNSAFE-LABEL: name: test_half_add_mul - ; GFX9-UNSAFE: liveins: $vgpr0, $vgpr1, $vgpr2 - ; GFX9-UNSAFE-NEXT: {{ $}} - ; GFX9-UNSAFE-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX9-UNSAFE-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) - ; GFX9-UNSAFE-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX9-UNSAFE-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) - ; GFX9-UNSAFE-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX9-UNSAFE-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY2]](s32) - ; GFX9-UNSAFE-NEXT: [[FMA:%[0-9]+]]:_(s16) = G_FMA [[TRUNC]], [[TRUNC1]], [[TRUNC2]] - ; GFX9-UNSAFE-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[FMA]](s16) - ; GFX9-UNSAFE-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) - ; GFX9-UNSAFE-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0 + ; ; GFX10-LABEL: name: test_half_add_mul ; GFX10: liveins: $vgpr0, $vgpr1, $vgpr2 ; GFX10-NEXT: {{ $}} @@ -502,19 +566,7 @@ body: | ; GFX10-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[FADD]](s16) ; GFX10-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) ; GFX10-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0 - ; GFX10-CONTRACT-LABEL: name: test_half_add_mul - ; GFX10-CONTRACT: liveins: $vgpr0, $vgpr1, $vgpr2 - ; GFX10-CONTRACT-NEXT: {{ $}} - ; GFX10-CONTRACT-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX10-CONTRACT-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) - ; GFX10-CONTRACT-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX10-CONTRACT-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) - ; GFX10-CONTRACT-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX10-CONTRACT-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY2]](s32) - ; GFX10-CONTRACT-NEXT: [[FMA:%[0-9]+]]:_(s16) = G_FMA [[TRUNC]], [[TRUNC1]], [[TRUNC2]] - ; GFX10-CONTRACT-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[FMA]](s16) - ; GFX10-CONTRACT-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) - ; GFX10-CONTRACT-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0 + ; ; GFX10-DENORM-LABEL: name: test_half_add_mul ; GFX10-DENORM: liveins: $vgpr0, $vgpr1, $vgpr2 ; GFX10-DENORM-NEXT: {{ $}} @@ -529,19 +581,6 @@ body: | ; GFX10-DENORM-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[FADD]](s16) ; GFX10-DENORM-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) ; GFX10-DENORM-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0 - ; GFX10-UNSAFE-LABEL: name: test_half_add_mul - ; GFX10-UNSAFE: liveins: $vgpr0, $vgpr1, $vgpr2 - ; GFX10-UNSAFE-NEXT: {{ $}} - ; GFX10-UNSAFE-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX10-UNSAFE-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) - ; GFX10-UNSAFE-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX10-UNSAFE-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) - ; GFX10-UNSAFE-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX10-UNSAFE-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY2]](s32) - ; GFX10-UNSAFE-NEXT: [[FMA:%[0-9]+]]:_(s16) = G_FMA [[TRUNC]], [[TRUNC1]], [[TRUNC2]] - ; GFX10-UNSAFE-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[FMA]](s16) - ; GFX10-UNSAFE-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) - ; GFX10-UNSAFE-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0 %4:_(s32) = COPY $vgpr0 %0:_(s16) = G_TRUNC %4(s32) %5:_(s32) = COPY $vgpr1 @@ -555,6 +594,80 @@ body: | S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0 ... +--- +name: test_half_add_mul_contract +body: | + bb.1.entry: + liveins: $vgpr0, $vgpr1, $vgpr2 + + ; GFX9-LABEL: name: test_half_add_mul_contract + ; GFX9: liveins: $vgpr0, $vgpr1, $vgpr2 + ; GFX9-NEXT: {{ $}} + ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) + ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 + ; GFX9-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY2]](s32) + ; GFX9-NEXT: [[FMA:%[0-9]+]]:_(s16) = G_FMA [[TRUNC]], [[TRUNC1]], [[TRUNC2]] + ; GFX9-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[FMA]](s16) + ; GFX9-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) + ; GFX9-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0 + ; + ; GFX9-DENORM-LABEL: name: test_half_add_mul_contract + ; GFX9-DENORM: liveins: $vgpr0, $vgpr1, $vgpr2 + ; GFX9-DENORM-NEXT: {{ $}} + ; GFX9-DENORM-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; GFX9-DENORM-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) + ; GFX9-DENORM-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GFX9-DENORM-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) + ; GFX9-DENORM-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 + ; GFX9-DENORM-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY2]](s32) + ; GFX9-DENORM-NEXT: [[FMA:%[0-9]+]]:_(s16) = G_FMA [[TRUNC]], [[TRUNC1]], [[TRUNC2]] + ; GFX9-DENORM-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[FMA]](s16) + ; GFX9-DENORM-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) + ; GFX9-DENORM-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0 + ; + ; GFX10-LABEL: name: test_half_add_mul_contract + ; GFX10: liveins: $vgpr0, $vgpr1, $vgpr2 + ; GFX10-NEXT: {{ $}} + ; GFX10-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; GFX10-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) + ; GFX10-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GFX10-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) + ; GFX10-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 + ; GFX10-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY2]](s32) + ; GFX10-NEXT: [[FMA:%[0-9]+]]:_(s16) = G_FMA [[TRUNC]], [[TRUNC1]], [[TRUNC2]] + ; GFX10-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[FMA]](s16) + ; GFX10-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) + ; GFX10-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0 + ; + ; GFX10-DENORM-LABEL: name: test_half_add_mul_contract + ; GFX10-DENORM: liveins: $vgpr0, $vgpr1, $vgpr2 + ; GFX10-DENORM-NEXT: {{ $}} + ; GFX10-DENORM-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; GFX10-DENORM-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) + ; GFX10-DENORM-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GFX10-DENORM-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) + ; GFX10-DENORM-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 + ; GFX10-DENORM-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY2]](s32) + ; GFX10-DENORM-NEXT: [[FMA:%[0-9]+]]:_(s16) = G_FMA [[TRUNC]], [[TRUNC1]], [[TRUNC2]] + ; GFX10-DENORM-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[FMA]](s16) + ; GFX10-DENORM-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) + ; GFX10-DENORM-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0 + %4:_(s32) = COPY $vgpr0 + %0:_(s16) = G_TRUNC %4(s32) + %5:_(s32) = COPY $vgpr1 + %1:_(s16) = G_TRUNC %5(s32) + %6:_(s32) = COPY $vgpr2 + %2:_(s16) = G_TRUNC %6(s32) + %7:_(s16) = contract G_FMUL %0, %1 + %8:_(s16) = contract G_FADD %7, %2 + %10:_(s32) = G_ANYEXT %8(s16) + $vgpr0 = COPY %10(s32) + S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0 +... + --- name: test_half_add_mul_rhs body: | @@ -575,19 +688,7 @@ body: | ; GFX9-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[FADD]](s16) ; GFX9-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) ; GFX9-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0 - ; GFX9-CONTRACT-LABEL: name: test_half_add_mul_rhs - ; GFX9-CONTRACT: liveins: $vgpr0, $vgpr1, $vgpr2 - ; GFX9-CONTRACT-NEXT: {{ $}} - ; GFX9-CONTRACT-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX9-CONTRACT-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) - ; GFX9-CONTRACT-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX9-CONTRACT-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) - ; GFX9-CONTRACT-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX9-CONTRACT-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY2]](s32) - ; GFX9-CONTRACT-NEXT: [[FMA:%[0-9]+]]:_(s16) = G_FMA [[TRUNC]], [[TRUNC1]], [[TRUNC2]] - ; GFX9-CONTRACT-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[FMA]](s16) - ; GFX9-CONTRACT-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) - ; GFX9-CONTRACT-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0 + ; ; GFX9-DENORM-LABEL: name: test_half_add_mul_rhs ; GFX9-DENORM: liveins: $vgpr0, $vgpr1, $vgpr2 ; GFX9-DENORM-NEXT: {{ $}} @@ -602,19 +703,7 @@ body: | ; GFX9-DENORM-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[FADD]](s16) ; GFX9-DENORM-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) ; GFX9-DENORM-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0 - ; GFX9-UNSAFE-LABEL: name: test_half_add_mul_rhs - ; GFX9-UNSAFE: liveins: $vgpr0, $vgpr1, $vgpr2 - ; GFX9-UNSAFE-NEXT: {{ $}} - ; GFX9-UNSAFE-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX9-UNSAFE-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) - ; GFX9-UNSAFE-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX9-UNSAFE-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) - ; GFX9-UNSAFE-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX9-UNSAFE-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY2]](s32) - ; GFX9-UNSAFE-NEXT: [[FMA:%[0-9]+]]:_(s16) = G_FMA [[TRUNC]], [[TRUNC1]], [[TRUNC2]] - ; GFX9-UNSAFE-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[FMA]](s16) - ; GFX9-UNSAFE-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) - ; GFX9-UNSAFE-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0 + ; ; GFX10-LABEL: name: test_half_add_mul_rhs ; GFX10: liveins: $vgpr0, $vgpr1, $vgpr2 ; GFX10-NEXT: {{ $}} @@ -629,19 +718,7 @@ body: | ; GFX10-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[FADD]](s16) ; GFX10-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) ; GFX10-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0 - ; GFX10-CONTRACT-LABEL: name: test_half_add_mul_rhs - ; GFX10-CONTRACT: liveins: $vgpr0, $vgpr1, $vgpr2 - ; GFX10-CONTRACT-NEXT: {{ $}} - ; GFX10-CONTRACT-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX10-CONTRACT-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) - ; GFX10-CONTRACT-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX10-CONTRACT-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) - ; GFX10-CONTRACT-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX10-CONTRACT-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY2]](s32) - ; GFX10-CONTRACT-NEXT: [[FMA:%[0-9]+]]:_(s16) = G_FMA [[TRUNC]], [[TRUNC1]], [[TRUNC2]] - ; GFX10-CONTRACT-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[FMA]](s16) - ; GFX10-CONTRACT-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) - ; GFX10-CONTRACT-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0 + ; ; GFX10-DENORM-LABEL: name: test_half_add_mul_rhs ; GFX10-DENORM: liveins: $vgpr0, $vgpr1, $vgpr2 ; GFX10-DENORM-NEXT: {{ $}} @@ -656,19 +733,6 @@ body: | ; GFX10-DENORM-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[FADD]](s16) ; GFX10-DENORM-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) ; GFX10-DENORM-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0 - ; GFX10-UNSAFE-LABEL: name: test_half_add_mul_rhs - ; GFX10-UNSAFE: liveins: $vgpr0, $vgpr1, $vgpr2 - ; GFX10-UNSAFE-NEXT: {{ $}} - ; GFX10-UNSAFE-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX10-UNSAFE-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) - ; GFX10-UNSAFE-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX10-UNSAFE-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) - ; GFX10-UNSAFE-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX10-UNSAFE-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY2]](s32) - ; GFX10-UNSAFE-NEXT: [[FMA:%[0-9]+]]:_(s16) = G_FMA [[TRUNC]], [[TRUNC1]], [[TRUNC2]] - ; GFX10-UNSAFE-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[FMA]](s16) - ; GFX10-UNSAFE-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) - ; GFX10-UNSAFE-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0 %4:_(s32) = COPY $vgpr0 %0:_(s16) = G_TRUNC %4(s32) %5:_(s32) = COPY $vgpr1 @@ -682,6 +746,80 @@ body: | S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0 ... +--- +name: test_half_add_mul_rhs_contract +body: | + bb.1.entry: + liveins: $vgpr0, $vgpr1, $vgpr2 + + ; GFX9-LABEL: name: test_half_add_mul_rhs_contract + ; GFX9: liveins: $vgpr0, $vgpr1, $vgpr2 + ; GFX9-NEXT: {{ $}} + ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) + ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 + ; GFX9-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY2]](s32) + ; GFX9-NEXT: [[FMA:%[0-9]+]]:_(s16) = G_FMA [[TRUNC]], [[TRUNC1]], [[TRUNC2]] + ; GFX9-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[FMA]](s16) + ; GFX9-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) + ; GFX9-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0 + ; + ; GFX9-DENORM-LABEL: name: test_half_add_mul_rhs_contract + ; GFX9-DENORM: liveins: $vgpr0, $vgpr1, $vgpr2 + ; GFX9-DENORM-NEXT: {{ $}} + ; GFX9-DENORM-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; GFX9-DENORM-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) + ; GFX9-DENORM-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GFX9-DENORM-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) + ; GFX9-DENORM-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 + ; GFX9-DENORM-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY2]](s32) + ; GFX9-DENORM-NEXT: [[FMA:%[0-9]+]]:_(s16) = G_FMA [[TRUNC]], [[TRUNC1]], [[TRUNC2]] + ; GFX9-DENORM-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[FMA]](s16) + ; GFX9-DENORM-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) + ; GFX9-DENORM-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0 + ; + ; GFX10-LABEL: name: test_half_add_mul_rhs_contract + ; GFX10: liveins: $vgpr0, $vgpr1, $vgpr2 + ; GFX10-NEXT: {{ $}} + ; GFX10-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; GFX10-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) + ; GFX10-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GFX10-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) + ; GFX10-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 + ; GFX10-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY2]](s32) + ; GFX10-NEXT: [[FMA:%[0-9]+]]:_(s16) = G_FMA [[TRUNC]], [[TRUNC1]], [[TRUNC2]] + ; GFX10-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[FMA]](s16) + ; GFX10-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) + ; GFX10-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0 + ; + ; GFX10-DENORM-LABEL: name: test_half_add_mul_rhs_contract + ; GFX10-DENORM: liveins: $vgpr0, $vgpr1, $vgpr2 + ; GFX10-DENORM-NEXT: {{ $}} + ; GFX10-DENORM-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; GFX10-DENORM-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) + ; GFX10-DENORM-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GFX10-DENORM-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) + ; GFX10-DENORM-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 + ; GFX10-DENORM-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY2]](s32) + ; GFX10-DENORM-NEXT: [[FMA:%[0-9]+]]:_(s16) = G_FMA [[TRUNC]], [[TRUNC1]], [[TRUNC2]] + ; GFX10-DENORM-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[FMA]](s16) + ; GFX10-DENORM-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) + ; GFX10-DENORM-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0 + %4:_(s32) = COPY $vgpr0 + %0:_(s16) = G_TRUNC %4(s32) + %5:_(s32) = COPY $vgpr1 + %1:_(s16) = G_TRUNC %5(s32) + %6:_(s32) = COPY $vgpr2 + %2:_(s16) = G_TRUNC %6(s32) + %7:_(s16) = contract G_FMUL %0, %1 + %8:_(s16) = contract G_FADD %2, %7 + %10:_(s32) = G_ANYEXT %8(s16) + $vgpr0 = COPY %10(s32) + S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0 +... + --- name: test_double_add_mul body: | @@ -706,23 +844,7 @@ body: | ; GFX9-NEXT: $vgpr0 = COPY [[UV]](s32) ; GFX9-NEXT: $vgpr1 = COPY [[UV1]](s32) ; GFX9-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1 - ; GFX9-CONTRACT-LABEL: name: test_double_add_mul - ; GFX9-CONTRACT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5 - ; GFX9-CONTRACT-NEXT: {{ $}} - ; GFX9-CONTRACT-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX9-CONTRACT-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX9-CONTRACT-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32) - ; GFX9-CONTRACT-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX9-CONTRACT-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; GFX9-CONTRACT-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY2]](s32), [[COPY3]](s32) - ; GFX9-CONTRACT-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4 - ; GFX9-CONTRACT-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5 - ; GFX9-CONTRACT-NEXT: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY4]](s32), [[COPY5]](s32) - ; GFX9-CONTRACT-NEXT: [[FMA:%[0-9]+]]:_(s64) = G_FMA [[MV]], [[MV1]], [[MV2]] - ; GFX9-CONTRACT-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[FMA]](s64) - ; GFX9-CONTRACT-NEXT: $vgpr0 = COPY [[UV]](s32) - ; GFX9-CONTRACT-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; GFX9-CONTRACT-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1 + ; ; GFX9-DENORM-LABEL: name: test_double_add_mul ; GFX9-DENORM: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5 ; GFX9-DENORM-NEXT: {{ $}} @@ -741,23 +863,7 @@ body: | ; GFX9-DENORM-NEXT: $vgpr0 = COPY [[UV]](s32) ; GFX9-DENORM-NEXT: $vgpr1 = COPY [[UV1]](s32) ; GFX9-DENORM-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1 - ; GFX9-UNSAFE-LABEL: name: test_double_add_mul - ; GFX9-UNSAFE: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5 - ; GFX9-UNSAFE-NEXT: {{ $}} - ; GFX9-UNSAFE-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX9-UNSAFE-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX9-UNSAFE-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32) - ; GFX9-UNSAFE-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX9-UNSAFE-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; GFX9-UNSAFE-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY2]](s32), [[COPY3]](s32) - ; GFX9-UNSAFE-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4 - ; GFX9-UNSAFE-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5 - ; GFX9-UNSAFE-NEXT: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY4]](s32), [[COPY5]](s32) - ; GFX9-UNSAFE-NEXT: [[FMA:%[0-9]+]]:_(s64) = G_FMA [[MV]], [[MV1]], [[MV2]] - ; GFX9-UNSAFE-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[FMA]](s64) - ; GFX9-UNSAFE-NEXT: $vgpr0 = COPY [[UV]](s32) - ; GFX9-UNSAFE-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; GFX9-UNSAFE-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1 + ; ; GFX10-LABEL: name: test_double_add_mul ; GFX10: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5 ; GFX10-NEXT: {{ $}} @@ -776,23 +882,7 @@ body: | ; GFX10-NEXT: $vgpr0 = COPY [[UV]](s32) ; GFX10-NEXT: $vgpr1 = COPY [[UV1]](s32) ; GFX10-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1 - ; GFX10-CONTRACT-LABEL: name: test_double_add_mul - ; GFX10-CONTRACT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5 - ; GFX10-CONTRACT-NEXT: {{ $}} - ; GFX10-CONTRACT-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX10-CONTRACT-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX10-CONTRACT-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32) - ; GFX10-CONTRACT-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX10-CONTRACT-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; GFX10-CONTRACT-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY2]](s32), [[COPY3]](s32) - ; GFX10-CONTRACT-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4 - ; GFX10-CONTRACT-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5 - ; GFX10-CONTRACT-NEXT: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY4]](s32), [[COPY5]](s32) - ; GFX10-CONTRACT-NEXT: [[FMA:%[0-9]+]]:_(s64) = G_FMA [[MV]], [[MV1]], [[MV2]] - ; GFX10-CONTRACT-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[FMA]](s64) - ; GFX10-CONTRACT-NEXT: $vgpr0 = COPY [[UV]](s32) - ; GFX10-CONTRACT-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; GFX10-CONTRACT-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1 + ; ; GFX10-DENORM-LABEL: name: test_double_add_mul ; GFX10-DENORM: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5 ; GFX10-DENORM-NEXT: {{ $}} @@ -811,23 +901,6 @@ body: | ; GFX10-DENORM-NEXT: $vgpr0 = COPY [[UV]](s32) ; GFX10-DENORM-NEXT: $vgpr1 = COPY [[UV1]](s32) ; GFX10-DENORM-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1 - ; GFX10-UNSAFE-LABEL: name: test_double_add_mul - ; GFX10-UNSAFE: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5 - ; GFX10-UNSAFE-NEXT: {{ $}} - ; GFX10-UNSAFE-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX10-UNSAFE-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX10-UNSAFE-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32) - ; GFX10-UNSAFE-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX10-UNSAFE-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; GFX10-UNSAFE-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY2]](s32), [[COPY3]](s32) - ; GFX10-UNSAFE-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4 - ; GFX10-UNSAFE-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5 - ; GFX10-UNSAFE-NEXT: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY4]](s32), [[COPY5]](s32) - ; GFX10-UNSAFE-NEXT: [[FMA:%[0-9]+]]:_(s64) = G_FMA [[MV]], [[MV1]], [[MV2]] - ; GFX10-UNSAFE-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[FMA]](s64) - ; GFX10-UNSAFE-NEXT: $vgpr0 = COPY [[UV]](s32) - ; GFX10-UNSAFE-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; GFX10-UNSAFE-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1 %4:_(s32) = COPY $vgpr0 %5:_(s32) = COPY $vgpr1 %0:_(s64) = G_MERGE_VALUES %4(s32), %5(s32) @@ -845,6 +918,101 @@ body: | S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1 ... +--- +name: test_double_add_mul_contract +body: | + bb.1.entry: + liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5 + + ; GFX9-LABEL: name: test_double_add_mul_contract + ; GFX9: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5 + ; GFX9-NEXT: {{ $}} + ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GFX9-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32) + ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 + ; GFX9-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 + ; GFX9-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY2]](s32), [[COPY3]](s32) + ; GFX9-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4 + ; GFX9-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5 + ; GFX9-NEXT: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY4]](s32), [[COPY5]](s32) + ; GFX9-NEXT: [[FMA:%[0-9]+]]:_(s64) = G_FMA [[MV]], [[MV1]], [[MV2]] + ; GFX9-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[FMA]](s64) + ; GFX9-NEXT: $vgpr0 = COPY [[UV]](s32) + ; GFX9-NEXT: $vgpr1 = COPY [[UV1]](s32) + ; GFX9-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1 + ; + ; GFX9-DENORM-LABEL: name: test_double_add_mul_contract + ; GFX9-DENORM: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5 + ; GFX9-DENORM-NEXT: {{ $}} + ; GFX9-DENORM-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; GFX9-DENORM-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GFX9-DENORM-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32) + ; GFX9-DENORM-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 + ; GFX9-DENORM-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 + ; GFX9-DENORM-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY2]](s32), [[COPY3]](s32) + ; GFX9-DENORM-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4 + ; GFX9-DENORM-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5 + ; GFX9-DENORM-NEXT: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY4]](s32), [[COPY5]](s32) + ; GFX9-DENORM-NEXT: [[FMA:%[0-9]+]]:_(s64) = G_FMA [[MV]], [[MV1]], [[MV2]] + ; GFX9-DENORM-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[FMA]](s64) + ; GFX9-DENORM-NEXT: $vgpr0 = COPY [[UV]](s32) + ; GFX9-DENORM-NEXT: $vgpr1 = COPY [[UV1]](s32) + ; GFX9-DENORM-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1 + ; + ; GFX10-LABEL: name: test_double_add_mul_contract + ; GFX10: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5 + ; GFX10-NEXT: {{ $}} + ; GFX10-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; GFX10-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GFX10-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32) + ; GFX10-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 + ; GFX10-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 + ; GFX10-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY2]](s32), [[COPY3]](s32) + ; GFX10-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4 + ; GFX10-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5 + ; GFX10-NEXT: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY4]](s32), [[COPY5]](s32) + ; GFX10-NEXT: [[FMA:%[0-9]+]]:_(s64) = G_FMA [[MV]], [[MV1]], [[MV2]] + ; GFX10-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[FMA]](s64) + ; GFX10-NEXT: $vgpr0 = COPY [[UV]](s32) + ; GFX10-NEXT: $vgpr1 = COPY [[UV1]](s32) + ; GFX10-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1 + ; + ; GFX10-DENORM-LABEL: name: test_double_add_mul_contract + ; GFX10-DENORM: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5 + ; GFX10-DENORM-NEXT: {{ $}} + ; GFX10-DENORM-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; GFX10-DENORM-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GFX10-DENORM-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32) + ; GFX10-DENORM-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 + ; GFX10-DENORM-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 + ; GFX10-DENORM-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY2]](s32), [[COPY3]](s32) + ; GFX10-DENORM-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4 + ; GFX10-DENORM-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5 + ; GFX10-DENORM-NEXT: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY4]](s32), [[COPY5]](s32) + ; GFX10-DENORM-NEXT: [[FMA:%[0-9]+]]:_(s64) = G_FMA [[MV]], [[MV1]], [[MV2]] + ; GFX10-DENORM-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[FMA]](s64) + ; GFX10-DENORM-NEXT: $vgpr0 = COPY [[UV]](s32) + ; GFX10-DENORM-NEXT: $vgpr1 = COPY [[UV1]](s32) + ; GFX10-DENORM-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1 + %4:_(s32) = COPY $vgpr0 + %5:_(s32) = COPY $vgpr1 + %0:_(s64) = G_MERGE_VALUES %4(s32), %5(s32) + %6:_(s32) = COPY $vgpr2 + %7:_(s32) = COPY $vgpr3 + %1:_(s64) = G_MERGE_VALUES %6(s32), %7(s32) + %8:_(s32) = COPY $vgpr4 + %9:_(s32) = COPY $vgpr5 + %2:_(s64) = G_MERGE_VALUES %8(s32), %9(s32) + %10:_(s64) = contract G_FMUL %0, %1 + %11:_(s64) = contract G_FADD %10, %2 + %13:_(s32), %14:_(s32) = G_UNMERGE_VALUES %11(s64) + $vgpr0 = COPY %13(s32) + $vgpr1 = COPY %14(s32) + S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1 +... + + --- name: test_double_add_mul_rhs body: | @@ -869,23 +1037,7 @@ body: | ; GFX9-NEXT: $vgpr0 = COPY [[UV]](s32) ; GFX9-NEXT: $vgpr1 = COPY [[UV1]](s32) ; GFX9-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1 - ; GFX9-CONTRACT-LABEL: name: test_double_add_mul_rhs - ; GFX9-CONTRACT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5 - ; GFX9-CONTRACT-NEXT: {{ $}} - ; GFX9-CONTRACT-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX9-CONTRACT-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX9-CONTRACT-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32) - ; GFX9-CONTRACT-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX9-CONTRACT-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; GFX9-CONTRACT-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY2]](s32), [[COPY3]](s32) - ; GFX9-CONTRACT-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4 - ; GFX9-CONTRACT-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5 - ; GFX9-CONTRACT-NEXT: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY4]](s32), [[COPY5]](s32) - ; GFX9-CONTRACT-NEXT: [[FMA:%[0-9]+]]:_(s64) = G_FMA [[MV]], [[MV1]], [[MV2]] - ; GFX9-CONTRACT-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[FMA]](s64) - ; GFX9-CONTRACT-NEXT: $vgpr0 = COPY [[UV]](s32) - ; GFX9-CONTRACT-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; GFX9-CONTRACT-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1 + ; ; GFX9-DENORM-LABEL: name: test_double_add_mul_rhs ; GFX9-DENORM: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5 ; GFX9-DENORM-NEXT: {{ $}} @@ -904,23 +1056,7 @@ body: | ; GFX9-DENORM-NEXT: $vgpr0 = COPY [[UV]](s32) ; GFX9-DENORM-NEXT: $vgpr1 = COPY [[UV1]](s32) ; GFX9-DENORM-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1 - ; GFX9-UNSAFE-LABEL: name: test_double_add_mul_rhs - ; GFX9-UNSAFE: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5 - ; GFX9-UNSAFE-NEXT: {{ $}} - ; GFX9-UNSAFE-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX9-UNSAFE-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX9-UNSAFE-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32) - ; GFX9-UNSAFE-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX9-UNSAFE-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; GFX9-UNSAFE-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY2]](s32), [[COPY3]](s32) - ; GFX9-UNSAFE-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4 - ; GFX9-UNSAFE-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5 - ; GFX9-UNSAFE-NEXT: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY4]](s32), [[COPY5]](s32) - ; GFX9-UNSAFE-NEXT: [[FMA:%[0-9]+]]:_(s64) = G_FMA [[MV]], [[MV1]], [[MV2]] - ; GFX9-UNSAFE-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[FMA]](s64) - ; GFX9-UNSAFE-NEXT: $vgpr0 = COPY [[UV]](s32) - ; GFX9-UNSAFE-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; GFX9-UNSAFE-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1 + ; ; GFX10-LABEL: name: test_double_add_mul_rhs ; GFX10: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5 ; GFX10-NEXT: {{ $}} @@ -939,23 +1075,7 @@ body: | ; GFX10-NEXT: $vgpr0 = COPY [[UV]](s32) ; GFX10-NEXT: $vgpr1 = COPY [[UV1]](s32) ; GFX10-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1 - ; GFX10-CONTRACT-LABEL: name: test_double_add_mul_rhs - ; GFX10-CONTRACT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5 - ; GFX10-CONTRACT-NEXT: {{ $}} - ; GFX10-CONTRACT-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX10-CONTRACT-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX10-CONTRACT-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32) - ; GFX10-CONTRACT-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX10-CONTRACT-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; GFX10-CONTRACT-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY2]](s32), [[COPY3]](s32) - ; GFX10-CONTRACT-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4 - ; GFX10-CONTRACT-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5 - ; GFX10-CONTRACT-NEXT: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY4]](s32), [[COPY5]](s32) - ; GFX10-CONTRACT-NEXT: [[FMA:%[0-9]+]]:_(s64) = G_FMA [[MV]], [[MV1]], [[MV2]] - ; GFX10-CONTRACT-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[FMA]](s64) - ; GFX10-CONTRACT-NEXT: $vgpr0 = COPY [[UV]](s32) - ; GFX10-CONTRACT-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; GFX10-CONTRACT-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1 + ; ; GFX10-DENORM-LABEL: name: test_double_add_mul_rhs ; GFX10-DENORM: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5 ; GFX10-DENORM-NEXT: {{ $}} @@ -974,23 +1094,6 @@ body: | ; GFX10-DENORM-NEXT: $vgpr0 = COPY [[UV]](s32) ; GFX10-DENORM-NEXT: $vgpr1 = COPY [[UV1]](s32) ; GFX10-DENORM-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1 - ; GFX10-UNSAFE-LABEL: name: test_double_add_mul_rhs - ; GFX10-UNSAFE: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5 - ; GFX10-UNSAFE-NEXT: {{ $}} - ; GFX10-UNSAFE-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX10-UNSAFE-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX10-UNSAFE-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32) - ; GFX10-UNSAFE-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX10-UNSAFE-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; GFX10-UNSAFE-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY2]](s32), [[COPY3]](s32) - ; GFX10-UNSAFE-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4 - ; GFX10-UNSAFE-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5 - ; GFX10-UNSAFE-NEXT: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY4]](s32), [[COPY5]](s32) - ; GFX10-UNSAFE-NEXT: [[FMA:%[0-9]+]]:_(s64) = G_FMA [[MV]], [[MV1]], [[MV2]] - ; GFX10-UNSAFE-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[FMA]](s64) - ; GFX10-UNSAFE-NEXT: $vgpr0 = COPY [[UV]](s32) - ; GFX10-UNSAFE-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; GFX10-UNSAFE-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1 %4:_(s32) = COPY $vgpr0 %5:_(s32) = COPY $vgpr1 %0:_(s64) = G_MERGE_VALUES %4(s32), %5(s32) @@ -1008,6 +1111,100 @@ body: | S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1 ... +--- +name: test_double_add_mul_rhs_contract +body: | + bb.1.entry: + liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5 + + ; GFX9-LABEL: name: test_double_add_mul_rhs_contract + ; GFX9: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5 + ; GFX9-NEXT: {{ $}} + ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GFX9-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32) + ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 + ; GFX9-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 + ; GFX9-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY2]](s32), [[COPY3]](s32) + ; GFX9-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4 + ; GFX9-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5 + ; GFX9-NEXT: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY4]](s32), [[COPY5]](s32) + ; GFX9-NEXT: [[FMA:%[0-9]+]]:_(s64) = G_FMA [[MV]], [[MV1]], [[MV2]] + ; GFX9-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[FMA]](s64) + ; GFX9-NEXT: $vgpr0 = COPY [[UV]](s32) + ; GFX9-NEXT: $vgpr1 = COPY [[UV1]](s32) + ; GFX9-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1 + ; + ; GFX9-DENORM-LABEL: name: test_double_add_mul_rhs_contract + ; GFX9-DENORM: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5 + ; GFX9-DENORM-NEXT: {{ $}} + ; GFX9-DENORM-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; GFX9-DENORM-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GFX9-DENORM-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32) + ; GFX9-DENORM-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 + ; GFX9-DENORM-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 + ; GFX9-DENORM-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY2]](s32), [[COPY3]](s32) + ; GFX9-DENORM-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4 + ; GFX9-DENORM-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5 + ; GFX9-DENORM-NEXT: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY4]](s32), [[COPY5]](s32) + ; GFX9-DENORM-NEXT: [[FMA:%[0-9]+]]:_(s64) = G_FMA [[MV]], [[MV1]], [[MV2]] + ; GFX9-DENORM-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[FMA]](s64) + ; GFX9-DENORM-NEXT: $vgpr0 = COPY [[UV]](s32) + ; GFX9-DENORM-NEXT: $vgpr1 = COPY [[UV1]](s32) + ; GFX9-DENORM-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1 + ; + ; GFX10-LABEL: name: test_double_add_mul_rhs_contract + ; GFX10: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5 + ; GFX10-NEXT: {{ $}} + ; GFX10-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; GFX10-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GFX10-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32) + ; GFX10-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 + ; GFX10-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 + ; GFX10-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY2]](s32), [[COPY3]](s32) + ; GFX10-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4 + ; GFX10-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5 + ; GFX10-NEXT: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY4]](s32), [[COPY5]](s32) + ; GFX10-NEXT: [[FMA:%[0-9]+]]:_(s64) = G_FMA [[MV]], [[MV1]], [[MV2]] + ; GFX10-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[FMA]](s64) + ; GFX10-NEXT: $vgpr0 = COPY [[UV]](s32) + ; GFX10-NEXT: $vgpr1 = COPY [[UV1]](s32) + ; GFX10-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1 + ; + ; GFX10-DENORM-LABEL: name: test_double_add_mul_rhs_contract + ; GFX10-DENORM: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5 + ; GFX10-DENORM-NEXT: {{ $}} + ; GFX10-DENORM-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; GFX10-DENORM-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GFX10-DENORM-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32) + ; GFX10-DENORM-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 + ; GFX10-DENORM-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 + ; GFX10-DENORM-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY2]](s32), [[COPY3]](s32) + ; GFX10-DENORM-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4 + ; GFX10-DENORM-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5 + ; GFX10-DENORM-NEXT: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY4]](s32), [[COPY5]](s32) + ; GFX10-DENORM-NEXT: [[FMA:%[0-9]+]]:_(s64) = G_FMA [[MV]], [[MV1]], [[MV2]] + ; GFX10-DENORM-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[FMA]](s64) + ; GFX10-DENORM-NEXT: $vgpr0 = COPY [[UV]](s32) + ; GFX10-DENORM-NEXT: $vgpr1 = COPY [[UV1]](s32) + ; GFX10-DENORM-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1 + %4:_(s32) = COPY $vgpr0 + %5:_(s32) = COPY $vgpr1 + %0:_(s64) = G_MERGE_VALUES %4(s32), %5(s32) + %6:_(s32) = COPY $vgpr2 + %7:_(s32) = COPY $vgpr3 + %1:_(s64) = G_MERGE_VALUES %6(s32), %7(s32) + %8:_(s32) = COPY $vgpr4 + %9:_(s32) = COPY $vgpr5 + %2:_(s64) = G_MERGE_VALUES %8(s32), %9(s32) + %10:_(s64) = contract G_FMUL %0, %1 + %11:_(s64) = contract G_FADD %2, %10 + %13:_(s32), %14:_(s32) = G_UNMERGE_VALUES %11(s64) + $vgpr0 = COPY %13(s32) + $vgpr1 = COPY %14(s32) + S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1 +... + --- name: test_4xfloat_add_mul body: | @@ -1040,32 +1237,7 @@ body: | ; GFX9-NEXT: $vgpr2 = COPY [[UV2]](s32) ; GFX9-NEXT: $vgpr3 = COPY [[UV3]](s32) ; GFX9-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 - ; GFX9-CONTRACT-LABEL: name: test_4xfloat_add_mul - ; GFX9-CONTRACT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10 - ; GFX9-CONTRACT-NEXT: {{ $}} - ; GFX9-CONTRACT-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX9-CONTRACT-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX9-CONTRACT-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX9-CONTRACT-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; GFX9-CONTRACT-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) - ; GFX9-CONTRACT-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4 - ; GFX9-CONTRACT-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5 - ; GFX9-CONTRACT-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $vgpr6 - ; GFX9-CONTRACT-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $vgpr7 - ; GFX9-CONTRACT-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX9-CONTRACT-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr8 - ; GFX9-CONTRACT-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr9 - ; GFX9-CONTRACT-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr10 - ; GFX9-CONTRACT-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr11 - ; GFX9-CONTRACT-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) - ; GFX9-CONTRACT-NEXT: [[FMUL:%[0-9]+]]:_(<4 x s32>) = G_FMUL [[BUILD_VECTOR]], [[BUILD_VECTOR1]] - ; GFX9-CONTRACT-NEXT: [[FADD:%[0-9]+]]:_(<4 x s32>) = G_FADD [[FMUL]], [[BUILD_VECTOR2]] - ; GFX9-CONTRACT-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[FADD]](<4 x s32>) - ; GFX9-CONTRACT-NEXT: $vgpr0 = COPY [[UV]](s32) - ; GFX9-CONTRACT-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; GFX9-CONTRACT-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; GFX9-CONTRACT-NEXT: $vgpr3 = COPY [[UV3]](s32) - ; GFX9-CONTRACT-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 + ; ; GFX9-DENORM-LABEL: name: test_4xfloat_add_mul ; GFX9-DENORM: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10 ; GFX9-DENORM-NEXT: {{ $}} @@ -1092,32 +1264,7 @@ body: | ; GFX9-DENORM-NEXT: $vgpr2 = COPY [[UV2]](s32) ; GFX9-DENORM-NEXT: $vgpr3 = COPY [[UV3]](s32) ; GFX9-DENORM-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 - ; GFX9-UNSAFE-LABEL: name: test_4xfloat_add_mul - ; GFX9-UNSAFE: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10 - ; GFX9-UNSAFE-NEXT: {{ $}} - ; GFX9-UNSAFE-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX9-UNSAFE-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX9-UNSAFE-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX9-UNSAFE-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; GFX9-UNSAFE-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) - ; GFX9-UNSAFE-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4 - ; GFX9-UNSAFE-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5 - ; GFX9-UNSAFE-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $vgpr6 - ; GFX9-UNSAFE-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $vgpr7 - ; GFX9-UNSAFE-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX9-UNSAFE-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr8 - ; GFX9-UNSAFE-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr9 - ; GFX9-UNSAFE-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr10 - ; GFX9-UNSAFE-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr11 - ; GFX9-UNSAFE-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) - ; GFX9-UNSAFE-NEXT: [[FMUL:%[0-9]+]]:_(<4 x s32>) = G_FMUL [[BUILD_VECTOR]], [[BUILD_VECTOR1]] - ; GFX9-UNSAFE-NEXT: [[FADD:%[0-9]+]]:_(<4 x s32>) = G_FADD [[FMUL]], [[BUILD_VECTOR2]] - ; GFX9-UNSAFE-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[FADD]](<4 x s32>) - ; GFX9-UNSAFE-NEXT: $vgpr0 = COPY [[UV]](s32) - ; GFX9-UNSAFE-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; GFX9-UNSAFE-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; GFX9-UNSAFE-NEXT: $vgpr3 = COPY [[UV3]](s32) - ; GFX9-UNSAFE-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 + ; ; GFX10-LABEL: name: test_4xfloat_add_mul ; GFX10: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10 ; GFX10-NEXT: {{ $}} @@ -1144,32 +1291,7 @@ body: | ; GFX10-NEXT: $vgpr2 = COPY [[UV2]](s32) ; GFX10-NEXT: $vgpr3 = COPY [[UV3]](s32) ; GFX10-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 - ; GFX10-CONTRACT-LABEL: name: test_4xfloat_add_mul - ; GFX10-CONTRACT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10 - ; GFX10-CONTRACT-NEXT: {{ $}} - ; GFX10-CONTRACT-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX10-CONTRACT-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX10-CONTRACT-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX10-CONTRACT-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; GFX10-CONTRACT-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) - ; GFX10-CONTRACT-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4 - ; GFX10-CONTRACT-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5 - ; GFX10-CONTRACT-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $vgpr6 - ; GFX10-CONTRACT-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $vgpr7 - ; GFX10-CONTRACT-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX10-CONTRACT-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr8 - ; GFX10-CONTRACT-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr9 - ; GFX10-CONTRACT-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr10 - ; GFX10-CONTRACT-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr11 - ; GFX10-CONTRACT-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) - ; GFX10-CONTRACT-NEXT: [[FMUL:%[0-9]+]]:_(<4 x s32>) = G_FMUL [[BUILD_VECTOR]], [[BUILD_VECTOR1]] - ; GFX10-CONTRACT-NEXT: [[FADD:%[0-9]+]]:_(<4 x s32>) = G_FADD [[FMUL]], [[BUILD_VECTOR2]] - ; GFX10-CONTRACT-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[FADD]](<4 x s32>) - ; GFX10-CONTRACT-NEXT: $vgpr0 = COPY [[UV]](s32) - ; GFX10-CONTRACT-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; GFX10-CONTRACT-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; GFX10-CONTRACT-NEXT: $vgpr3 = COPY [[UV3]](s32) - ; GFX10-CONTRACT-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 + ; ; GFX10-DENORM-LABEL: name: test_4xfloat_add_mul ; GFX10-DENORM: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10 ; GFX10-DENORM-NEXT: {{ $}} @@ -1196,32 +1318,6 @@ body: | ; GFX10-DENORM-NEXT: $vgpr2 = COPY [[UV2]](s32) ; GFX10-DENORM-NEXT: $vgpr3 = COPY [[UV3]](s32) ; GFX10-DENORM-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 - ; GFX10-UNSAFE-LABEL: name: test_4xfloat_add_mul - ; GFX10-UNSAFE: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10 - ; GFX10-UNSAFE-NEXT: {{ $}} - ; GFX10-UNSAFE-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX10-UNSAFE-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX10-UNSAFE-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX10-UNSAFE-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; GFX10-UNSAFE-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) - ; GFX10-UNSAFE-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4 - ; GFX10-UNSAFE-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5 - ; GFX10-UNSAFE-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $vgpr6 - ; GFX10-UNSAFE-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $vgpr7 - ; GFX10-UNSAFE-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX10-UNSAFE-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr8 - ; GFX10-UNSAFE-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr9 - ; GFX10-UNSAFE-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr10 - ; GFX10-UNSAFE-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr11 - ; GFX10-UNSAFE-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) - ; GFX10-UNSAFE-NEXT: [[FMUL:%[0-9]+]]:_(<4 x s32>) = G_FMUL [[BUILD_VECTOR]], [[BUILD_VECTOR1]] - ; GFX10-UNSAFE-NEXT: [[FADD:%[0-9]+]]:_(<4 x s32>) = G_FADD [[FMUL]], [[BUILD_VECTOR2]] - ; GFX10-UNSAFE-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[FADD]](<4 x s32>) - ; GFX10-UNSAFE-NEXT: $vgpr0 = COPY [[UV]](s32) - ; GFX10-UNSAFE-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; GFX10-UNSAFE-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; GFX10-UNSAFE-NEXT: $vgpr3 = COPY [[UV3]](s32) - ; GFX10-UNSAFE-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 %4:_(s32) = COPY $vgpr0 %5:_(s32) = COPY $vgpr1 %6:_(s32) = COPY $vgpr2 @@ -1247,6 +1343,144 @@ body: | S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 ... +--- +name: test_4xfloat_add_mul_contract +body: | + bb.1.entry: + liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10 + + ; GFX9-LABEL: name: test_4xfloat_add_mul_contract + ; GFX9: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10 + ; GFX9-NEXT: {{ $}} + ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 + ; GFX9-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 + ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) + ; GFX9-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4 + ; GFX9-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5 + ; GFX9-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $vgpr6 + ; GFX9-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $vgpr7 + ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) + ; GFX9-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr8 + ; GFX9-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr9 + ; GFX9-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr10 + ; GFX9-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr11 + ; GFX9-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) + ; GFX9-NEXT: [[FMUL:%[0-9]+]]:_(<4 x s32>) = contract G_FMUL [[BUILD_VECTOR]], [[BUILD_VECTOR1]] + ; GFX9-NEXT: [[FADD:%[0-9]+]]:_(<4 x s32>) = contract G_FADD [[FMUL]], [[BUILD_VECTOR2]] + ; GFX9-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[FADD]](<4 x s32>) + ; GFX9-NEXT: $vgpr0 = COPY [[UV]](s32) + ; GFX9-NEXT: $vgpr1 = COPY [[UV1]](s32) + ; GFX9-NEXT: $vgpr2 = COPY [[UV2]](s32) + ; GFX9-NEXT: $vgpr3 = COPY [[UV3]](s32) + ; GFX9-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 + ; + ; GFX9-DENORM-LABEL: name: test_4xfloat_add_mul_contract + ; GFX9-DENORM: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10 + ; GFX9-DENORM-NEXT: {{ $}} + ; GFX9-DENORM-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; GFX9-DENORM-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GFX9-DENORM-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 + ; GFX9-DENORM-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 + ; GFX9-DENORM-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) + ; GFX9-DENORM-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4 + ; GFX9-DENORM-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5 + ; GFX9-DENORM-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $vgpr6 + ; GFX9-DENORM-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $vgpr7 + ; GFX9-DENORM-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) + ; GFX9-DENORM-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr8 + ; GFX9-DENORM-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr9 + ; GFX9-DENORM-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr10 + ; GFX9-DENORM-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr11 + ; GFX9-DENORM-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) + ; GFX9-DENORM-NEXT: [[FMUL:%[0-9]+]]:_(<4 x s32>) = contract G_FMUL [[BUILD_VECTOR]], [[BUILD_VECTOR1]] + ; GFX9-DENORM-NEXT: [[FADD:%[0-9]+]]:_(<4 x s32>) = contract G_FADD [[FMUL]], [[BUILD_VECTOR2]] + ; GFX9-DENORM-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[FADD]](<4 x s32>) + ; GFX9-DENORM-NEXT: $vgpr0 = COPY [[UV]](s32) + ; GFX9-DENORM-NEXT: $vgpr1 = COPY [[UV1]](s32) + ; GFX9-DENORM-NEXT: $vgpr2 = COPY [[UV2]](s32) + ; GFX9-DENORM-NEXT: $vgpr3 = COPY [[UV3]](s32) + ; GFX9-DENORM-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 + ; + ; GFX10-LABEL: name: test_4xfloat_add_mul_contract + ; GFX10: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10 + ; GFX10-NEXT: {{ $}} + ; GFX10-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; GFX10-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GFX10-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 + ; GFX10-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 + ; GFX10-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) + ; GFX10-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4 + ; GFX10-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5 + ; GFX10-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $vgpr6 + ; GFX10-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $vgpr7 + ; GFX10-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) + ; GFX10-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr8 + ; GFX10-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr9 + ; GFX10-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr10 + ; GFX10-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr11 + ; GFX10-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) + ; GFX10-NEXT: [[FMUL:%[0-9]+]]:_(<4 x s32>) = contract G_FMUL [[BUILD_VECTOR]], [[BUILD_VECTOR1]] + ; GFX10-NEXT: [[FADD:%[0-9]+]]:_(<4 x s32>) = contract G_FADD [[FMUL]], [[BUILD_VECTOR2]] + ; GFX10-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[FADD]](<4 x s32>) + ; GFX10-NEXT: $vgpr0 = COPY [[UV]](s32) + ; GFX10-NEXT: $vgpr1 = COPY [[UV1]](s32) + ; GFX10-NEXT: $vgpr2 = COPY [[UV2]](s32) + ; GFX10-NEXT: $vgpr3 = COPY [[UV3]](s32) + ; GFX10-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 + ; + ; GFX10-DENORM-LABEL: name: test_4xfloat_add_mul_contract + ; GFX10-DENORM: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10 + ; GFX10-DENORM-NEXT: {{ $}} + ; GFX10-DENORM-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; GFX10-DENORM-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GFX10-DENORM-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 + ; GFX10-DENORM-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 + ; GFX10-DENORM-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) + ; GFX10-DENORM-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4 + ; GFX10-DENORM-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5 + ; GFX10-DENORM-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $vgpr6 + ; GFX10-DENORM-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $vgpr7 + ; GFX10-DENORM-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) + ; GFX10-DENORM-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr8 + ; GFX10-DENORM-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr9 + ; GFX10-DENORM-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr10 + ; GFX10-DENORM-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr11 + ; GFX10-DENORM-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) + ; GFX10-DENORM-NEXT: [[FMUL:%[0-9]+]]:_(<4 x s32>) = contract G_FMUL [[BUILD_VECTOR]], [[BUILD_VECTOR1]] + ; GFX10-DENORM-NEXT: [[FADD:%[0-9]+]]:_(<4 x s32>) = contract G_FADD [[FMUL]], [[BUILD_VECTOR2]] + ; GFX10-DENORM-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[FADD]](<4 x s32>) + ; GFX10-DENORM-NEXT: $vgpr0 = COPY [[UV]](s32) + ; GFX10-DENORM-NEXT: $vgpr1 = COPY [[UV1]](s32) + ; GFX10-DENORM-NEXT: $vgpr2 = COPY [[UV2]](s32) + ; GFX10-DENORM-NEXT: $vgpr3 = COPY [[UV3]](s32) + ; GFX10-DENORM-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 + %4:_(s32) = COPY $vgpr0 + %5:_(s32) = COPY $vgpr1 + %6:_(s32) = COPY $vgpr2 + %7:_(s32) = COPY $vgpr3 + %0:_(<4 x s32>) = G_BUILD_VECTOR %4(s32), %5(s32), %6(s32), %7(s32) + %8:_(s32) = COPY $vgpr4 + %9:_(s32) = COPY $vgpr5 + %10:_(s32) = COPY $vgpr6 + %11:_(s32) = COPY $vgpr7 + %1:_(<4 x s32>) = G_BUILD_VECTOR %8(s32), %9(s32), %10(s32), %11(s32) + %12:_(s32) = COPY $vgpr8 + %13:_(s32) = COPY $vgpr9 + %14:_(s32) = COPY $vgpr10 + %15:_(s32) = COPY $vgpr11 + %2:_(<4 x s32>) = G_BUILD_VECTOR %12(s32), %13(s32), %14(s32), %15(s32) + %16:_(<4 x s32>) = contract G_FMUL %0, %1 + %17:_(<4 x s32>) = contract G_FADD %16, %2 + %19:_(s32), %20:_(s32), %21:_(s32), %22:_(s32) = G_UNMERGE_VALUES %17(<4 x s32>) + $vgpr0 = COPY %19(s32) + $vgpr1 = COPY %20(s32) + $vgpr2 = COPY %21(s32) + $vgpr3 = COPY %22(s32) + S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 +... + --- name: test_3xfloat_add_mul_rhs body: | @@ -1275,28 +1509,7 @@ body: | ; GFX9-NEXT: $vgpr1 = COPY [[UV1]](s32) ; GFX9-NEXT: $vgpr2 = COPY [[UV2]](s32) ; GFX9-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2 - ; GFX9-CONTRACT-LABEL: name: test_3xfloat_add_mul_rhs - ; GFX9-CONTRACT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8 - ; GFX9-CONTRACT-NEXT: {{ $}} - ; GFX9-CONTRACT-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX9-CONTRACT-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX9-CONTRACT-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX9-CONTRACT-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32) - ; GFX9-CONTRACT-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; GFX9-CONTRACT-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4 - ; GFX9-CONTRACT-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5 - ; GFX9-CONTRACT-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32) - ; GFX9-CONTRACT-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $vgpr6 - ; GFX9-CONTRACT-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $vgpr7 - ; GFX9-CONTRACT-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr8 - ; GFX9-CONTRACT-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[COPY6]](s32), [[COPY7]](s32), [[COPY8]](s32) - ; GFX9-CONTRACT-NEXT: [[FMUL:%[0-9]+]]:_(<3 x s32>) = G_FMUL [[BUILD_VECTOR]], [[BUILD_VECTOR1]] - ; GFX9-CONTRACT-NEXT: [[FADD:%[0-9]+]]:_(<3 x s32>) = G_FADD [[BUILD_VECTOR2]], [[FMUL]] - ; GFX9-CONTRACT-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[FADD]](<3 x s32>) - ; GFX9-CONTRACT-NEXT: $vgpr0 = COPY [[UV]](s32) - ; GFX9-CONTRACT-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; GFX9-CONTRACT-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; GFX9-CONTRACT-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2 + ; ; GFX9-DENORM-LABEL: name: test_3xfloat_add_mul_rhs ; GFX9-DENORM: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8 ; GFX9-DENORM-NEXT: {{ $}} @@ -1319,28 +1532,7 @@ body: | ; GFX9-DENORM-NEXT: $vgpr1 = COPY [[UV1]](s32) ; GFX9-DENORM-NEXT: $vgpr2 = COPY [[UV2]](s32) ; GFX9-DENORM-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2 - ; GFX9-UNSAFE-LABEL: name: test_3xfloat_add_mul_rhs - ; GFX9-UNSAFE: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8 - ; GFX9-UNSAFE-NEXT: {{ $}} - ; GFX9-UNSAFE-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX9-UNSAFE-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX9-UNSAFE-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX9-UNSAFE-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32) - ; GFX9-UNSAFE-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; GFX9-UNSAFE-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4 - ; GFX9-UNSAFE-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5 - ; GFX9-UNSAFE-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32) - ; GFX9-UNSAFE-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $vgpr6 - ; GFX9-UNSAFE-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $vgpr7 - ; GFX9-UNSAFE-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr8 - ; GFX9-UNSAFE-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[COPY6]](s32), [[COPY7]](s32), [[COPY8]](s32) - ; GFX9-UNSAFE-NEXT: [[FMUL:%[0-9]+]]:_(<3 x s32>) = G_FMUL [[BUILD_VECTOR]], [[BUILD_VECTOR1]] - ; GFX9-UNSAFE-NEXT: [[FADD:%[0-9]+]]:_(<3 x s32>) = G_FADD [[BUILD_VECTOR2]], [[FMUL]] - ; GFX9-UNSAFE-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[FADD]](<3 x s32>) - ; GFX9-UNSAFE-NEXT: $vgpr0 = COPY [[UV]](s32) - ; GFX9-UNSAFE-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; GFX9-UNSAFE-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; GFX9-UNSAFE-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2 + ; ; GFX10-LABEL: name: test_3xfloat_add_mul_rhs ; GFX10: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8 ; GFX10-NEXT: {{ $}} @@ -1363,28 +1555,7 @@ body: | ; GFX10-NEXT: $vgpr1 = COPY [[UV1]](s32) ; GFX10-NEXT: $vgpr2 = COPY [[UV2]](s32) ; GFX10-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2 - ; GFX10-CONTRACT-LABEL: name: test_3xfloat_add_mul_rhs - ; GFX10-CONTRACT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8 - ; GFX10-CONTRACT-NEXT: {{ $}} - ; GFX10-CONTRACT-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX10-CONTRACT-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX10-CONTRACT-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX10-CONTRACT-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32) - ; GFX10-CONTRACT-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; GFX10-CONTRACT-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4 - ; GFX10-CONTRACT-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5 - ; GFX10-CONTRACT-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32) - ; GFX10-CONTRACT-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $vgpr6 - ; GFX10-CONTRACT-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $vgpr7 - ; GFX10-CONTRACT-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr8 - ; GFX10-CONTRACT-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[COPY6]](s32), [[COPY7]](s32), [[COPY8]](s32) - ; GFX10-CONTRACT-NEXT: [[FMUL:%[0-9]+]]:_(<3 x s32>) = G_FMUL [[BUILD_VECTOR]], [[BUILD_VECTOR1]] - ; GFX10-CONTRACT-NEXT: [[FADD:%[0-9]+]]:_(<3 x s32>) = G_FADD [[BUILD_VECTOR2]], [[FMUL]] - ; GFX10-CONTRACT-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[FADD]](<3 x s32>) - ; GFX10-CONTRACT-NEXT: $vgpr0 = COPY [[UV]](s32) - ; GFX10-CONTRACT-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; GFX10-CONTRACT-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; GFX10-CONTRACT-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2 + ; ; GFX10-DENORM-LABEL: name: test_3xfloat_add_mul_rhs ; GFX10-DENORM: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8 ; GFX10-DENORM-NEXT: {{ $}} @@ -1407,28 +1578,6 @@ body: | ; GFX10-DENORM-NEXT: $vgpr1 = COPY [[UV1]](s32) ; GFX10-DENORM-NEXT: $vgpr2 = COPY [[UV2]](s32) ; GFX10-DENORM-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2 - ; GFX10-UNSAFE-LABEL: name: test_3xfloat_add_mul_rhs - ; GFX10-UNSAFE: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8 - ; GFX10-UNSAFE-NEXT: {{ $}} - ; GFX10-UNSAFE-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX10-UNSAFE-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX10-UNSAFE-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX10-UNSAFE-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32) - ; GFX10-UNSAFE-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; GFX10-UNSAFE-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4 - ; GFX10-UNSAFE-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5 - ; GFX10-UNSAFE-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32) - ; GFX10-UNSAFE-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $vgpr6 - ; GFX10-UNSAFE-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $vgpr7 - ; GFX10-UNSAFE-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr8 - ; GFX10-UNSAFE-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[COPY6]](s32), [[COPY7]](s32), [[COPY8]](s32) - ; GFX10-UNSAFE-NEXT: [[FMUL:%[0-9]+]]:_(<3 x s32>) = G_FMUL [[BUILD_VECTOR]], [[BUILD_VECTOR1]] - ; GFX10-UNSAFE-NEXT: [[FADD:%[0-9]+]]:_(<3 x s32>) = G_FADD [[BUILD_VECTOR2]], [[FMUL]] - ; GFX10-UNSAFE-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[FADD]](<3 x s32>) - ; GFX10-UNSAFE-NEXT: $vgpr0 = COPY [[UV]](s32) - ; GFX10-UNSAFE-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; GFX10-UNSAFE-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; GFX10-UNSAFE-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2 %4:_(s32) = COPY $vgpr0 %5:_(s32) = COPY $vgpr1 %6:_(s32) = COPY $vgpr2 @@ -1450,6 +1599,124 @@ body: | S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2 ... +--- +name: test_3xfloat_add_mul_rhs_contract +body: | + bb.1.entry: + liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8 + + ; GFX9-LABEL: name: test_3xfloat_add_mul_rhs_contract + ; GFX9: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8 + ; GFX9-NEXT: {{ $}} + ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 + ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32) + ; GFX9-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 + ; GFX9-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4 + ; GFX9-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5 + ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32) + ; GFX9-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $vgpr6 + ; GFX9-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $vgpr7 + ; GFX9-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr8 + ; GFX9-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[COPY6]](s32), [[COPY7]](s32), [[COPY8]](s32) + ; GFX9-NEXT: [[FMUL:%[0-9]+]]:_(<3 x s32>) = contract G_FMUL [[BUILD_VECTOR]], [[BUILD_VECTOR1]] + ; GFX9-NEXT: [[FADD:%[0-9]+]]:_(<3 x s32>) = contract G_FADD [[BUILD_VECTOR2]], [[FMUL]] + ; GFX9-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[FADD]](<3 x s32>) + ; GFX9-NEXT: $vgpr0 = COPY [[UV]](s32) + ; GFX9-NEXT: $vgpr1 = COPY [[UV1]](s32) + ; GFX9-NEXT: $vgpr2 = COPY [[UV2]](s32) + ; GFX9-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2 + ; + ; GFX9-DENORM-LABEL: name: test_3xfloat_add_mul_rhs_contract + ; GFX9-DENORM: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8 + ; GFX9-DENORM-NEXT: {{ $}} + ; GFX9-DENORM-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; GFX9-DENORM-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GFX9-DENORM-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 + ; GFX9-DENORM-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32) + ; GFX9-DENORM-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 + ; GFX9-DENORM-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4 + ; GFX9-DENORM-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5 + ; GFX9-DENORM-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32) + ; GFX9-DENORM-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $vgpr6 + ; GFX9-DENORM-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $vgpr7 + ; GFX9-DENORM-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr8 + ; GFX9-DENORM-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[COPY6]](s32), [[COPY7]](s32), [[COPY8]](s32) + ; GFX9-DENORM-NEXT: [[FMUL:%[0-9]+]]:_(<3 x s32>) = contract G_FMUL [[BUILD_VECTOR]], [[BUILD_VECTOR1]] + ; GFX9-DENORM-NEXT: [[FADD:%[0-9]+]]:_(<3 x s32>) = contract G_FADD [[BUILD_VECTOR2]], [[FMUL]] + ; GFX9-DENORM-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[FADD]](<3 x s32>) + ; GFX9-DENORM-NEXT: $vgpr0 = COPY [[UV]](s32) + ; GFX9-DENORM-NEXT: $vgpr1 = COPY [[UV1]](s32) + ; GFX9-DENORM-NEXT: $vgpr2 = COPY [[UV2]](s32) + ; GFX9-DENORM-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2 + ; + ; GFX10-LABEL: name: test_3xfloat_add_mul_rhs_contract + ; GFX10: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8 + ; GFX10-NEXT: {{ $}} + ; GFX10-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; GFX10-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GFX10-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 + ; GFX10-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32) + ; GFX10-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 + ; GFX10-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4 + ; GFX10-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5 + ; GFX10-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32) + ; GFX10-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $vgpr6 + ; GFX10-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $vgpr7 + ; GFX10-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr8 + ; GFX10-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[COPY6]](s32), [[COPY7]](s32), [[COPY8]](s32) + ; GFX10-NEXT: [[FMUL:%[0-9]+]]:_(<3 x s32>) = contract G_FMUL [[BUILD_VECTOR]], [[BUILD_VECTOR1]] + ; GFX10-NEXT: [[FADD:%[0-9]+]]:_(<3 x s32>) = contract G_FADD [[BUILD_VECTOR2]], [[FMUL]] + ; GFX10-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[FADD]](<3 x s32>) + ; GFX10-NEXT: $vgpr0 = COPY [[UV]](s32) + ; GFX10-NEXT: $vgpr1 = COPY [[UV1]](s32) + ; GFX10-NEXT: $vgpr2 = COPY [[UV2]](s32) + ; GFX10-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2 + ; + ; GFX10-DENORM-LABEL: name: test_3xfloat_add_mul_rhs_contract + ; GFX10-DENORM: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8 + ; GFX10-DENORM-NEXT: {{ $}} + ; GFX10-DENORM-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; GFX10-DENORM-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GFX10-DENORM-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 + ; GFX10-DENORM-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32) + ; GFX10-DENORM-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 + ; GFX10-DENORM-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4 + ; GFX10-DENORM-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5 + ; GFX10-DENORM-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32) + ; GFX10-DENORM-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $vgpr6 + ; GFX10-DENORM-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $vgpr7 + ; GFX10-DENORM-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr8 + ; GFX10-DENORM-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[COPY6]](s32), [[COPY7]](s32), [[COPY8]](s32) + ; GFX10-DENORM-NEXT: [[FMUL:%[0-9]+]]:_(<3 x s32>) = contract G_FMUL [[BUILD_VECTOR]], [[BUILD_VECTOR1]] + ; GFX10-DENORM-NEXT: [[FADD:%[0-9]+]]:_(<3 x s32>) = contract G_FADD [[BUILD_VECTOR2]], [[FMUL]] + ; GFX10-DENORM-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[FADD]](<3 x s32>) + ; GFX10-DENORM-NEXT: $vgpr0 = COPY [[UV]](s32) + ; GFX10-DENORM-NEXT: $vgpr1 = COPY [[UV1]](s32) + ; GFX10-DENORM-NEXT: $vgpr2 = COPY [[UV2]](s32) + ; GFX10-DENORM-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2 + %4:_(s32) = COPY $vgpr0 + %5:_(s32) = COPY $vgpr1 + %6:_(s32) = COPY $vgpr2 + %0:_(<3 x s32>) = G_BUILD_VECTOR %4(s32), %5(s32), %6(s32) + %7:_(s32) = COPY $vgpr3 + %8:_(s32) = COPY $vgpr4 + %9:_(s32) = COPY $vgpr5 + %1:_(<3 x s32>) = G_BUILD_VECTOR %7(s32), %8(s32), %9(s32) + %10:_(s32) = COPY $vgpr6 + %11:_(s32) = COPY $vgpr7 + %12:_(s32) = COPY $vgpr8 + %2:_(<3 x s32>) = G_BUILD_VECTOR %10(s32), %11(s32), %12(s32) + %13:_(<3 x s32>) = contract G_FMUL %0, %1 + %14:_(<3 x s32>) = contract G_FADD %2, %13 + %16:_(s32), %17:_(s32), %18:_(s32) = G_UNMERGE_VALUES %14(<3 x s32>) + $vgpr0 = COPY %16(s32) + $vgpr1 = COPY %17(s32) + $vgpr2 = COPY %18(s32) + S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2 +... + --- name: test_4xhalf_add_mul body: | @@ -1474,24 +1741,7 @@ body: | ; GFX9-NEXT: $vgpr0 = COPY [[UV]](<2 x s16>) ; GFX9-NEXT: $vgpr1 = COPY [[UV1]](<2 x s16>) ; GFX9-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1 - ; GFX9-CONTRACT-LABEL: name: test_4xhalf_add_mul - ; GFX9-CONTRACT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5 - ; GFX9-CONTRACT-NEXT: {{ $}} - ; GFX9-CONTRACT-NEXT: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 - ; GFX9-CONTRACT-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 - ; GFX9-CONTRACT-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[COPY]](<2 x s16>), [[COPY1]](<2 x s16>) - ; GFX9-CONTRACT-NEXT: [[COPY2:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr2 - ; GFX9-CONTRACT-NEXT: [[COPY3:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr3 - ; GFX9-CONTRACT-NEXT: [[CONCAT_VECTORS1:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[COPY2]](<2 x s16>), [[COPY3]](<2 x s16>) - ; GFX9-CONTRACT-NEXT: [[COPY4:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr4 - ; GFX9-CONTRACT-NEXT: [[COPY5:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr5 - ; GFX9-CONTRACT-NEXT: [[CONCAT_VECTORS2:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[COPY4]](<2 x s16>), [[COPY5]](<2 x s16>) - ; GFX9-CONTRACT-NEXT: [[FMUL:%[0-9]+]]:_(<4 x s16>) = G_FMUL [[CONCAT_VECTORS]], [[CONCAT_VECTORS1]] - ; GFX9-CONTRACT-NEXT: [[FADD:%[0-9]+]]:_(<4 x s16>) = G_FADD [[FMUL]], [[CONCAT_VECTORS2]] - ; GFX9-CONTRACT-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[FADD]](<4 x s16>) - ; GFX9-CONTRACT-NEXT: $vgpr0 = COPY [[UV]](<2 x s16>) - ; GFX9-CONTRACT-NEXT: $vgpr1 = COPY [[UV1]](<2 x s16>) - ; GFX9-CONTRACT-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1 + ; ; GFX9-DENORM-LABEL: name: test_4xhalf_add_mul ; GFX9-DENORM: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5 ; GFX9-DENORM-NEXT: {{ $}} @@ -1510,24 +1760,7 @@ body: | ; GFX9-DENORM-NEXT: $vgpr0 = COPY [[UV]](<2 x s16>) ; GFX9-DENORM-NEXT: $vgpr1 = COPY [[UV1]](<2 x s16>) ; GFX9-DENORM-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1 - ; GFX9-UNSAFE-LABEL: name: test_4xhalf_add_mul - ; GFX9-UNSAFE: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5 - ; GFX9-UNSAFE-NEXT: {{ $}} - ; GFX9-UNSAFE-NEXT: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 - ; GFX9-UNSAFE-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 - ; GFX9-UNSAFE-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[COPY]](<2 x s16>), [[COPY1]](<2 x s16>) - ; GFX9-UNSAFE-NEXT: [[COPY2:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr2 - ; GFX9-UNSAFE-NEXT: [[COPY3:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr3 - ; GFX9-UNSAFE-NEXT: [[CONCAT_VECTORS1:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[COPY2]](<2 x s16>), [[COPY3]](<2 x s16>) - ; GFX9-UNSAFE-NEXT: [[COPY4:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr4 - ; GFX9-UNSAFE-NEXT: [[COPY5:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr5 - ; GFX9-UNSAFE-NEXT: [[CONCAT_VECTORS2:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[COPY4]](<2 x s16>), [[COPY5]](<2 x s16>) - ; GFX9-UNSAFE-NEXT: [[FMUL:%[0-9]+]]:_(<4 x s16>) = G_FMUL [[CONCAT_VECTORS]], [[CONCAT_VECTORS1]] - ; GFX9-UNSAFE-NEXT: [[FADD:%[0-9]+]]:_(<4 x s16>) = G_FADD [[FMUL]], [[CONCAT_VECTORS2]] - ; GFX9-UNSAFE-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[FADD]](<4 x s16>) - ; GFX9-UNSAFE-NEXT: $vgpr0 = COPY [[UV]](<2 x s16>) - ; GFX9-UNSAFE-NEXT: $vgpr1 = COPY [[UV1]](<2 x s16>) - ; GFX9-UNSAFE-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1 + ; ; GFX10-LABEL: name: test_4xhalf_add_mul ; GFX10: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5 ; GFX10-NEXT: {{ $}} @@ -1546,24 +1779,7 @@ body: | ; GFX10-NEXT: $vgpr0 = COPY [[UV]](<2 x s16>) ; GFX10-NEXT: $vgpr1 = COPY [[UV1]](<2 x s16>) ; GFX10-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1 - ; GFX10-CONTRACT-LABEL: name: test_4xhalf_add_mul - ; GFX10-CONTRACT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5 - ; GFX10-CONTRACT-NEXT: {{ $}} - ; GFX10-CONTRACT-NEXT: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 - ; GFX10-CONTRACT-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 - ; GFX10-CONTRACT-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[COPY]](<2 x s16>), [[COPY1]](<2 x s16>) - ; GFX10-CONTRACT-NEXT: [[COPY2:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr2 - ; GFX10-CONTRACT-NEXT: [[COPY3:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr3 - ; GFX10-CONTRACT-NEXT: [[CONCAT_VECTORS1:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[COPY2]](<2 x s16>), [[COPY3]](<2 x s16>) - ; GFX10-CONTRACT-NEXT: [[COPY4:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr4 - ; GFX10-CONTRACT-NEXT: [[COPY5:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr5 - ; GFX10-CONTRACT-NEXT: [[CONCAT_VECTORS2:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[COPY4]](<2 x s16>), [[COPY5]](<2 x s16>) - ; GFX10-CONTRACT-NEXT: [[FMUL:%[0-9]+]]:_(<4 x s16>) = G_FMUL [[CONCAT_VECTORS]], [[CONCAT_VECTORS1]] - ; GFX10-CONTRACT-NEXT: [[FADD:%[0-9]+]]:_(<4 x s16>) = G_FADD [[FMUL]], [[CONCAT_VECTORS2]] - ; GFX10-CONTRACT-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[FADD]](<4 x s16>) - ; GFX10-CONTRACT-NEXT: $vgpr0 = COPY [[UV]](<2 x s16>) - ; GFX10-CONTRACT-NEXT: $vgpr1 = COPY [[UV1]](<2 x s16>) - ; GFX10-CONTRACT-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1 + ; ; GFX10-DENORM-LABEL: name: test_4xhalf_add_mul ; GFX10-DENORM: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5 ; GFX10-DENORM-NEXT: {{ $}} @@ -1582,24 +1798,6 @@ body: | ; GFX10-DENORM-NEXT: $vgpr0 = COPY [[UV]](<2 x s16>) ; GFX10-DENORM-NEXT: $vgpr1 = COPY [[UV1]](<2 x s16>) ; GFX10-DENORM-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1 - ; GFX10-UNSAFE-LABEL: name: test_4xhalf_add_mul - ; GFX10-UNSAFE: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5 - ; GFX10-UNSAFE-NEXT: {{ $}} - ; GFX10-UNSAFE-NEXT: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 - ; GFX10-UNSAFE-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 - ; GFX10-UNSAFE-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[COPY]](<2 x s16>), [[COPY1]](<2 x s16>) - ; GFX10-UNSAFE-NEXT: [[COPY2:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr2 - ; GFX10-UNSAFE-NEXT: [[COPY3:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr3 - ; GFX10-UNSAFE-NEXT: [[CONCAT_VECTORS1:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[COPY2]](<2 x s16>), [[COPY3]](<2 x s16>) - ; GFX10-UNSAFE-NEXT: [[COPY4:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr4 - ; GFX10-UNSAFE-NEXT: [[COPY5:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr5 - ; GFX10-UNSAFE-NEXT: [[CONCAT_VECTORS2:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[COPY4]](<2 x s16>), [[COPY5]](<2 x s16>) - ; GFX10-UNSAFE-NEXT: [[FMUL:%[0-9]+]]:_(<4 x s16>) = G_FMUL [[CONCAT_VECTORS]], [[CONCAT_VECTORS1]] - ; GFX10-UNSAFE-NEXT: [[FADD:%[0-9]+]]:_(<4 x s16>) = G_FADD [[FMUL]], [[CONCAT_VECTORS2]] - ; GFX10-UNSAFE-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[FADD]](<4 x s16>) - ; GFX10-UNSAFE-NEXT: $vgpr0 = COPY [[UV]](<2 x s16>) - ; GFX10-UNSAFE-NEXT: $vgpr1 = COPY [[UV1]](<2 x s16>) - ; GFX10-UNSAFE-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1 %4:_(<2 x s16>) = COPY $vgpr0 %5:_(<2 x s16>) = COPY $vgpr1 %0:_(<4 x s16>) = G_CONCAT_VECTORS %4(<2 x s16>), %5(<2 x s16>) @@ -1617,6 +1815,105 @@ body: | S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1 ... +--- +name: test_4xhalf_add_mul_contract +body: | + bb.1.entry: + liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5 + + ; GFX9-LABEL: name: test_4xhalf_add_mul_contract + ; GFX9: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5 + ; GFX9-NEXT: {{ $}} + ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 + ; GFX9-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[COPY]](<2 x s16>), [[COPY1]](<2 x s16>) + ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr2 + ; GFX9-NEXT: [[COPY3:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr3 + ; GFX9-NEXT: [[CONCAT_VECTORS1:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[COPY2]](<2 x s16>), [[COPY3]](<2 x s16>) + ; GFX9-NEXT: [[COPY4:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr4 + ; GFX9-NEXT: [[COPY5:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr5 + ; GFX9-NEXT: [[CONCAT_VECTORS2:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[COPY4]](<2 x s16>), [[COPY5]](<2 x s16>) + ; GFX9-NEXT: [[FMUL:%[0-9]+]]:_(<4 x s16>) = contract G_FMUL [[CONCAT_VECTORS]], [[CONCAT_VECTORS1]] + ; GFX9-NEXT: [[FADD:%[0-9]+]]:_(<4 x s16>) = contract G_FADD [[FMUL]], [[CONCAT_VECTORS2]] + ; GFX9-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[FADD]](<4 x s16>) + ; GFX9-NEXT: $vgpr0 = COPY [[UV]](<2 x s16>) + ; GFX9-NEXT: $vgpr1 = COPY [[UV1]](<2 x s16>) + ; GFX9-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1 + ; + ; GFX9-DENORM-LABEL: name: test_4xhalf_add_mul_contract + ; GFX9-DENORM: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5 + ; GFX9-DENORM-NEXT: {{ $}} + ; GFX9-DENORM-NEXT: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 + ; GFX9-DENORM-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 + ; GFX9-DENORM-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[COPY]](<2 x s16>), [[COPY1]](<2 x s16>) + ; GFX9-DENORM-NEXT: [[COPY2:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr2 + ; GFX9-DENORM-NEXT: [[COPY3:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr3 + ; GFX9-DENORM-NEXT: [[CONCAT_VECTORS1:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[COPY2]](<2 x s16>), [[COPY3]](<2 x s16>) + ; GFX9-DENORM-NEXT: [[COPY4:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr4 + ; GFX9-DENORM-NEXT: [[COPY5:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr5 + ; GFX9-DENORM-NEXT: [[CONCAT_VECTORS2:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[COPY4]](<2 x s16>), [[COPY5]](<2 x s16>) + ; GFX9-DENORM-NEXT: [[FMUL:%[0-9]+]]:_(<4 x s16>) = contract G_FMUL [[CONCAT_VECTORS]], [[CONCAT_VECTORS1]] + ; GFX9-DENORM-NEXT: [[FADD:%[0-9]+]]:_(<4 x s16>) = contract G_FADD [[FMUL]], [[CONCAT_VECTORS2]] + ; GFX9-DENORM-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[FADD]](<4 x s16>) + ; GFX9-DENORM-NEXT: $vgpr0 = COPY [[UV]](<2 x s16>) + ; GFX9-DENORM-NEXT: $vgpr1 = COPY [[UV1]](<2 x s16>) + ; GFX9-DENORM-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1 + ; + ; GFX10-LABEL: name: test_4xhalf_add_mul_contract + ; GFX10: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5 + ; GFX10-NEXT: {{ $}} + ; GFX10-NEXT: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 + ; GFX10-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 + ; GFX10-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[COPY]](<2 x s16>), [[COPY1]](<2 x s16>) + ; GFX10-NEXT: [[COPY2:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr2 + ; GFX10-NEXT: [[COPY3:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr3 + ; GFX10-NEXT: [[CONCAT_VECTORS1:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[COPY2]](<2 x s16>), [[COPY3]](<2 x s16>) + ; GFX10-NEXT: [[COPY4:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr4 + ; GFX10-NEXT: [[COPY5:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr5 + ; GFX10-NEXT: [[CONCAT_VECTORS2:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[COPY4]](<2 x s16>), [[COPY5]](<2 x s16>) + ; GFX10-NEXT: [[FMUL:%[0-9]+]]:_(<4 x s16>) = contract G_FMUL [[CONCAT_VECTORS]], [[CONCAT_VECTORS1]] + ; GFX10-NEXT: [[FADD:%[0-9]+]]:_(<4 x s16>) = contract G_FADD [[FMUL]], [[CONCAT_VECTORS2]] + ; GFX10-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[FADD]](<4 x s16>) + ; GFX10-NEXT: $vgpr0 = COPY [[UV]](<2 x s16>) + ; GFX10-NEXT: $vgpr1 = COPY [[UV1]](<2 x s16>) + ; GFX10-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1 + ; + ; GFX10-DENORM-LABEL: name: test_4xhalf_add_mul_contract + ; GFX10-DENORM: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5 + ; GFX10-DENORM-NEXT: {{ $}} + ; GFX10-DENORM-NEXT: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 + ; GFX10-DENORM-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 + ; GFX10-DENORM-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[COPY]](<2 x s16>), [[COPY1]](<2 x s16>) + ; GFX10-DENORM-NEXT: [[COPY2:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr2 + ; GFX10-DENORM-NEXT: [[COPY3:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr3 + ; GFX10-DENORM-NEXT: [[CONCAT_VECTORS1:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[COPY2]](<2 x s16>), [[COPY3]](<2 x s16>) + ; GFX10-DENORM-NEXT: [[COPY4:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr4 + ; GFX10-DENORM-NEXT: [[COPY5:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr5 + ; GFX10-DENORM-NEXT: [[CONCAT_VECTORS2:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[COPY4]](<2 x s16>), [[COPY5]](<2 x s16>) + ; GFX10-DENORM-NEXT: [[FMUL:%[0-9]+]]:_(<4 x s16>) = contract G_FMUL [[CONCAT_VECTORS]], [[CONCAT_VECTORS1]] + ; GFX10-DENORM-NEXT: [[FADD:%[0-9]+]]:_(<4 x s16>) = contract G_FADD [[FMUL]], [[CONCAT_VECTORS2]] + ; GFX10-DENORM-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[FADD]](<4 x s16>) + ; GFX10-DENORM-NEXT: $vgpr0 = COPY [[UV]](<2 x s16>) + ; GFX10-DENORM-NEXT: $vgpr1 = COPY [[UV1]](<2 x s16>) + ; GFX10-DENORM-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1 + %4:_(<2 x s16>) = COPY $vgpr0 + %5:_(<2 x s16>) = COPY $vgpr1 + %0:_(<4 x s16>) = G_CONCAT_VECTORS %4(<2 x s16>), %5(<2 x s16>) + %6:_(<2 x s16>) = COPY $vgpr2 + %7:_(<2 x s16>) = COPY $vgpr3 + %1:_(<4 x s16>) = G_CONCAT_VECTORS %6(<2 x s16>), %7(<2 x s16>) + %8:_(<2 x s16>) = COPY $vgpr4 + %9:_(<2 x s16>) = COPY $vgpr5 + %2:_(<4 x s16>) = G_CONCAT_VECTORS %8(<2 x s16>), %9(<2 x s16>) + %10:_(<4 x s16>) = contract G_FMUL %0, %1 + %11:_(<4 x s16>) = contract G_FADD %10, %2 + %13:_(<2 x s16>), %14:_(<2 x s16>) = G_UNMERGE_VALUES %11(<4 x s16>) + $vgpr0 = COPY %13(<2 x s16>) + $vgpr1 = COPY %14(<2 x s16>) + S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1 +... + + --- name: test_3xhalf_add_mul_rhs body: | @@ -1648,31 +1945,6 @@ body: | ; GFX9-NEXT: $vgpr1 = COPY [[UV7]](<2 x s16>) ; GFX9-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1 ; - ; GFX9-CONTRACT-LABEL: name: test_3xhalf_add_mul_rhs - ; GFX9-CONTRACT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5 - ; GFX9-CONTRACT-NEXT: {{ $}} - ; GFX9-CONTRACT-NEXT: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 - ; GFX9-CONTRACT-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 - ; GFX9-CONTRACT-NEXT: [[DEF:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF - ; GFX9-CONTRACT-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[COPY]](<2 x s16>), [[COPY1]](<2 x s16>), [[DEF]](<2 x s16>) - ; GFX9-CONTRACT-NEXT: [[UV:%[0-9]+]]:_(<3 x s16>), [[UV1:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<6 x s16>) - ; GFX9-CONTRACT-NEXT: [[COPY2:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr2 - ; GFX9-CONTRACT-NEXT: [[COPY3:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr3 - ; GFX9-CONTRACT-NEXT: [[CONCAT_VECTORS1:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[COPY2]](<2 x s16>), [[COPY3]](<2 x s16>), [[DEF]](<2 x s16>) - ; GFX9-CONTRACT-NEXT: [[UV2:%[0-9]+]]:_(<3 x s16>), [[UV3:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS1]](<6 x s16>) - ; GFX9-CONTRACT-NEXT: [[COPY4:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr4 - ; GFX9-CONTRACT-NEXT: [[COPY5:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr5 - ; GFX9-CONTRACT-NEXT: [[CONCAT_VECTORS2:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[COPY4]](<2 x s16>), [[COPY5]](<2 x s16>), [[DEF]](<2 x s16>) - ; GFX9-CONTRACT-NEXT: [[UV4:%[0-9]+]]:_(<3 x s16>), [[UV5:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS2]](<6 x s16>) - ; GFX9-CONTRACT-NEXT: [[FMUL:%[0-9]+]]:_(<3 x s16>) = G_FMUL [[UV]], [[UV2]] - ; GFX9-CONTRACT-NEXT: [[FADD:%[0-9]+]]:_(<3 x s16>) = G_FADD [[UV4]], [[FMUL]] - ; GFX9-CONTRACT-NEXT: [[DEF1:%[0-9]+]]:_(<3 x s16>) = G_IMPLICIT_DEF - ; GFX9-CONTRACT-NEXT: [[CONCAT_VECTORS3:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[FADD]](<3 x s16>), [[DEF1]](<3 x s16>) - ; GFX9-CONTRACT-NEXT: [[UV6:%[0-9]+]]:_(<2 x s16>), [[UV7:%[0-9]+]]:_(<2 x s16>), [[UV8:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS3]](<6 x s16>) - ; GFX9-CONTRACT-NEXT: $vgpr0 = COPY [[UV6]](<2 x s16>) - ; GFX9-CONTRACT-NEXT: $vgpr1 = COPY [[UV7]](<2 x s16>) - ; GFX9-CONTRACT-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1 - ; ; GFX9-DENORM-LABEL: name: test_3xhalf_add_mul_rhs ; GFX9-DENORM: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5 ; GFX9-DENORM-NEXT: {{ $}} @@ -1698,31 +1970,6 @@ body: | ; GFX9-DENORM-NEXT: $vgpr1 = COPY [[UV7]](<2 x s16>) ; GFX9-DENORM-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1 ; - ; GFX9-UNSAFE-LABEL: name: test_3xhalf_add_mul_rhs - ; GFX9-UNSAFE: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5 - ; GFX9-UNSAFE-NEXT: {{ $}} - ; GFX9-UNSAFE-NEXT: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 - ; GFX9-UNSAFE-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 - ; GFX9-UNSAFE-NEXT: [[DEF:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF - ; GFX9-UNSAFE-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[COPY]](<2 x s16>), [[COPY1]](<2 x s16>), [[DEF]](<2 x s16>) - ; GFX9-UNSAFE-NEXT: [[UV:%[0-9]+]]:_(<3 x s16>), [[UV1:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<6 x s16>) - ; GFX9-UNSAFE-NEXT: [[COPY2:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr2 - ; GFX9-UNSAFE-NEXT: [[COPY3:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr3 - ; GFX9-UNSAFE-NEXT: [[CONCAT_VECTORS1:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[COPY2]](<2 x s16>), [[COPY3]](<2 x s16>), [[DEF]](<2 x s16>) - ; GFX9-UNSAFE-NEXT: [[UV2:%[0-9]+]]:_(<3 x s16>), [[UV3:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS1]](<6 x s16>) - ; GFX9-UNSAFE-NEXT: [[COPY4:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr4 - ; GFX9-UNSAFE-NEXT: [[COPY5:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr5 - ; GFX9-UNSAFE-NEXT: [[CONCAT_VECTORS2:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[COPY4]](<2 x s16>), [[COPY5]](<2 x s16>), [[DEF]](<2 x s16>) - ; GFX9-UNSAFE-NEXT: [[UV4:%[0-9]+]]:_(<3 x s16>), [[UV5:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS2]](<6 x s16>) - ; GFX9-UNSAFE-NEXT: [[FMUL:%[0-9]+]]:_(<3 x s16>) = G_FMUL [[UV]], [[UV2]] - ; GFX9-UNSAFE-NEXT: [[FADD:%[0-9]+]]:_(<3 x s16>) = G_FADD [[UV4]], [[FMUL]] - ; GFX9-UNSAFE-NEXT: [[DEF1:%[0-9]+]]:_(<3 x s16>) = G_IMPLICIT_DEF - ; GFX9-UNSAFE-NEXT: [[CONCAT_VECTORS3:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[FADD]](<3 x s16>), [[DEF1]](<3 x s16>) - ; GFX9-UNSAFE-NEXT: [[UV6:%[0-9]+]]:_(<2 x s16>), [[UV7:%[0-9]+]]:_(<2 x s16>), [[UV8:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS3]](<6 x s16>) - ; GFX9-UNSAFE-NEXT: $vgpr0 = COPY [[UV6]](<2 x s16>) - ; GFX9-UNSAFE-NEXT: $vgpr1 = COPY [[UV7]](<2 x s16>) - ; GFX9-UNSAFE-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1 - ; ; GFX10-LABEL: name: test_3xhalf_add_mul_rhs ; GFX10: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5 ; GFX10-NEXT: {{ $}} @@ -1748,31 +1995,6 @@ body: | ; GFX10-NEXT: $vgpr1 = COPY [[UV7]](<2 x s16>) ; GFX10-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1 ; - ; GFX10-CONTRACT-LABEL: name: test_3xhalf_add_mul_rhs - ; GFX10-CONTRACT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5 - ; GFX10-CONTRACT-NEXT: {{ $}} - ; GFX10-CONTRACT-NEXT: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 - ; GFX10-CONTRACT-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 - ; GFX10-CONTRACT-NEXT: [[DEF:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF - ; GFX10-CONTRACT-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[COPY]](<2 x s16>), [[COPY1]](<2 x s16>), [[DEF]](<2 x s16>) - ; GFX10-CONTRACT-NEXT: [[UV:%[0-9]+]]:_(<3 x s16>), [[UV1:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<6 x s16>) - ; GFX10-CONTRACT-NEXT: [[COPY2:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr2 - ; GFX10-CONTRACT-NEXT: [[COPY3:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr3 - ; GFX10-CONTRACT-NEXT: [[CONCAT_VECTORS1:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[COPY2]](<2 x s16>), [[COPY3]](<2 x s16>), [[DEF]](<2 x s16>) - ; GFX10-CONTRACT-NEXT: [[UV2:%[0-9]+]]:_(<3 x s16>), [[UV3:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS1]](<6 x s16>) - ; GFX10-CONTRACT-NEXT: [[COPY4:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr4 - ; GFX10-CONTRACT-NEXT: [[COPY5:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr5 - ; GFX10-CONTRACT-NEXT: [[CONCAT_VECTORS2:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[COPY4]](<2 x s16>), [[COPY5]](<2 x s16>), [[DEF]](<2 x s16>) - ; GFX10-CONTRACT-NEXT: [[UV4:%[0-9]+]]:_(<3 x s16>), [[UV5:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS2]](<6 x s16>) - ; GFX10-CONTRACT-NEXT: [[FMUL:%[0-9]+]]:_(<3 x s16>) = G_FMUL [[UV]], [[UV2]] - ; GFX10-CONTRACT-NEXT: [[FADD:%[0-9]+]]:_(<3 x s16>) = G_FADD [[UV4]], [[FMUL]] - ; GFX10-CONTRACT-NEXT: [[DEF1:%[0-9]+]]:_(<3 x s16>) = G_IMPLICIT_DEF - ; GFX10-CONTRACT-NEXT: [[CONCAT_VECTORS3:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[FADD]](<3 x s16>), [[DEF1]](<3 x s16>) - ; GFX10-CONTRACT-NEXT: [[UV6:%[0-9]+]]:_(<2 x s16>), [[UV7:%[0-9]+]]:_(<2 x s16>), [[UV8:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS3]](<6 x s16>) - ; GFX10-CONTRACT-NEXT: $vgpr0 = COPY [[UV6]](<2 x s16>) - ; GFX10-CONTRACT-NEXT: $vgpr1 = COPY [[UV7]](<2 x s16>) - ; GFX10-CONTRACT-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1 - ; ; GFX10-DENORM-LABEL: name: test_3xhalf_add_mul_rhs ; GFX10-DENORM: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5 ; GFX10-DENORM-NEXT: {{ $}} @@ -1797,31 +2019,6 @@ body: | ; GFX10-DENORM-NEXT: $vgpr0 = COPY [[UV6]](<2 x s16>) ; GFX10-DENORM-NEXT: $vgpr1 = COPY [[UV7]](<2 x s16>) ; GFX10-DENORM-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1 - ; - ; GFX10-UNSAFE-LABEL: name: test_3xhalf_add_mul_rhs - ; GFX10-UNSAFE: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5 - ; GFX10-UNSAFE-NEXT: {{ $}} - ; GFX10-UNSAFE-NEXT: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 - ; GFX10-UNSAFE-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 - ; GFX10-UNSAFE-NEXT: [[DEF:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF - ; GFX10-UNSAFE-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[COPY]](<2 x s16>), [[COPY1]](<2 x s16>), [[DEF]](<2 x s16>) - ; GFX10-UNSAFE-NEXT: [[UV:%[0-9]+]]:_(<3 x s16>), [[UV1:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<6 x s16>) - ; GFX10-UNSAFE-NEXT: [[COPY2:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr2 - ; GFX10-UNSAFE-NEXT: [[COPY3:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr3 - ; GFX10-UNSAFE-NEXT: [[CONCAT_VECTORS1:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[COPY2]](<2 x s16>), [[COPY3]](<2 x s16>), [[DEF]](<2 x s16>) - ; GFX10-UNSAFE-NEXT: [[UV2:%[0-9]+]]:_(<3 x s16>), [[UV3:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS1]](<6 x s16>) - ; GFX10-UNSAFE-NEXT: [[COPY4:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr4 - ; GFX10-UNSAFE-NEXT: [[COPY5:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr5 - ; GFX10-UNSAFE-NEXT: [[CONCAT_VECTORS2:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[COPY4]](<2 x s16>), [[COPY5]](<2 x s16>), [[DEF]](<2 x s16>) - ; GFX10-UNSAFE-NEXT: [[UV4:%[0-9]+]]:_(<3 x s16>), [[UV5:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS2]](<6 x s16>) - ; GFX10-UNSAFE-NEXT: [[FMUL:%[0-9]+]]:_(<3 x s16>) = G_FMUL [[UV]], [[UV2]] - ; GFX10-UNSAFE-NEXT: [[FADD:%[0-9]+]]:_(<3 x s16>) = G_FADD [[UV4]], [[FMUL]] - ; GFX10-UNSAFE-NEXT: [[DEF1:%[0-9]+]]:_(<3 x s16>) = G_IMPLICIT_DEF - ; GFX10-UNSAFE-NEXT: [[CONCAT_VECTORS3:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[FADD]](<3 x s16>), [[DEF1]](<3 x s16>) - ; GFX10-UNSAFE-NEXT: [[UV6:%[0-9]+]]:_(<2 x s16>), [[UV7:%[0-9]+]]:_(<2 x s16>), [[UV8:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS3]](<6 x s16>) - ; GFX10-UNSAFE-NEXT: $vgpr0 = COPY [[UV6]](<2 x s16>) - ; GFX10-UNSAFE-NEXT: $vgpr1 = COPY [[UV7]](<2 x s16>) - ; GFX10-UNSAFE-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1 %4:_(<2 x s16>) = COPY $vgpr0 %5:_(<2 x s16>) = COPY $vgpr1 %10:_(<2 x s16>) = G_IMPLICIT_DEF @@ -1845,6 +2042,134 @@ body: | S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1 ... +--- +name: test_3xhalf_add_mul_rhs_contract +body: | + bb.1.entry: + liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5 + + ; GFX9-LABEL: name: test_3xhalf_add_mul_rhs_contract + ; GFX9: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5 + ; GFX9-NEXT: {{ $}} + ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 + ; GFX9-NEXT: [[DEF:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF + ; GFX9-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[COPY]](<2 x s16>), [[COPY1]](<2 x s16>), [[DEF]](<2 x s16>) + ; GFX9-NEXT: [[UV:%[0-9]+]]:_(<3 x s16>), [[UV1:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<6 x s16>) + ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr2 + ; GFX9-NEXT: [[COPY3:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr3 + ; GFX9-NEXT: [[CONCAT_VECTORS1:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[COPY2]](<2 x s16>), [[COPY3]](<2 x s16>), [[DEF]](<2 x s16>) + ; GFX9-NEXT: [[UV2:%[0-9]+]]:_(<3 x s16>), [[UV3:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS1]](<6 x s16>) + ; GFX9-NEXT: [[COPY4:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr4 + ; GFX9-NEXT: [[COPY5:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr5 + ; GFX9-NEXT: [[CONCAT_VECTORS2:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[COPY4]](<2 x s16>), [[COPY5]](<2 x s16>), [[DEF]](<2 x s16>) + ; GFX9-NEXT: [[UV4:%[0-9]+]]:_(<3 x s16>), [[UV5:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS2]](<6 x s16>) + ; GFX9-NEXT: [[FMUL:%[0-9]+]]:_(<3 x s16>) = contract G_FMUL [[UV]], [[UV2]] + ; GFX9-NEXT: [[FADD:%[0-9]+]]:_(<3 x s16>) = contract G_FADD [[UV4]], [[FMUL]] + ; GFX9-NEXT: [[DEF1:%[0-9]+]]:_(<3 x s16>) = G_IMPLICIT_DEF + ; GFX9-NEXT: [[CONCAT_VECTORS3:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[FADD]](<3 x s16>), [[DEF1]](<3 x s16>) + ; GFX9-NEXT: [[UV6:%[0-9]+]]:_(<2 x s16>), [[UV7:%[0-9]+]]:_(<2 x s16>), [[UV8:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS3]](<6 x s16>) + ; GFX9-NEXT: $vgpr0 = COPY [[UV6]](<2 x s16>) + ; GFX9-NEXT: $vgpr1 = COPY [[UV7]](<2 x s16>) + ; GFX9-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1 + ; + ; GFX9-DENORM-LABEL: name: test_3xhalf_add_mul_rhs_contract + ; GFX9-DENORM: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5 + ; GFX9-DENORM-NEXT: {{ $}} + ; GFX9-DENORM-NEXT: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 + ; GFX9-DENORM-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 + ; GFX9-DENORM-NEXT: [[DEF:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF + ; GFX9-DENORM-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[COPY]](<2 x s16>), [[COPY1]](<2 x s16>), [[DEF]](<2 x s16>) + ; GFX9-DENORM-NEXT: [[UV:%[0-9]+]]:_(<3 x s16>), [[UV1:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<6 x s16>) + ; GFX9-DENORM-NEXT: [[COPY2:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr2 + ; GFX9-DENORM-NEXT: [[COPY3:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr3 + ; GFX9-DENORM-NEXT: [[CONCAT_VECTORS1:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[COPY2]](<2 x s16>), [[COPY3]](<2 x s16>), [[DEF]](<2 x s16>) + ; GFX9-DENORM-NEXT: [[UV2:%[0-9]+]]:_(<3 x s16>), [[UV3:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS1]](<6 x s16>) + ; GFX9-DENORM-NEXT: [[COPY4:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr4 + ; GFX9-DENORM-NEXT: [[COPY5:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr5 + ; GFX9-DENORM-NEXT: [[CONCAT_VECTORS2:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[COPY4]](<2 x s16>), [[COPY5]](<2 x s16>), [[DEF]](<2 x s16>) + ; GFX9-DENORM-NEXT: [[UV4:%[0-9]+]]:_(<3 x s16>), [[UV5:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS2]](<6 x s16>) + ; GFX9-DENORM-NEXT: [[FMUL:%[0-9]+]]:_(<3 x s16>) = contract G_FMUL [[UV]], [[UV2]] + ; GFX9-DENORM-NEXT: [[FADD:%[0-9]+]]:_(<3 x s16>) = contract G_FADD [[UV4]], [[FMUL]] + ; GFX9-DENORM-NEXT: [[DEF1:%[0-9]+]]:_(<3 x s16>) = G_IMPLICIT_DEF + ; GFX9-DENORM-NEXT: [[CONCAT_VECTORS3:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[FADD]](<3 x s16>), [[DEF1]](<3 x s16>) + ; GFX9-DENORM-NEXT: [[UV6:%[0-9]+]]:_(<2 x s16>), [[UV7:%[0-9]+]]:_(<2 x s16>), [[UV8:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS3]](<6 x s16>) + ; GFX9-DENORM-NEXT: $vgpr0 = COPY [[UV6]](<2 x s16>) + ; GFX9-DENORM-NEXT: $vgpr1 = COPY [[UV7]](<2 x s16>) + ; GFX9-DENORM-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1 + ; + ; GFX10-LABEL: name: test_3xhalf_add_mul_rhs_contract + ; GFX10: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5 + ; GFX10-NEXT: {{ $}} + ; GFX10-NEXT: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 + ; GFX10-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 + ; GFX10-NEXT: [[DEF:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF + ; GFX10-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[COPY]](<2 x s16>), [[COPY1]](<2 x s16>), [[DEF]](<2 x s16>) + ; GFX10-NEXT: [[UV:%[0-9]+]]:_(<3 x s16>), [[UV1:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<6 x s16>) + ; GFX10-NEXT: [[COPY2:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr2 + ; GFX10-NEXT: [[COPY3:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr3 + ; GFX10-NEXT: [[CONCAT_VECTORS1:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[COPY2]](<2 x s16>), [[COPY3]](<2 x s16>), [[DEF]](<2 x s16>) + ; GFX10-NEXT: [[UV2:%[0-9]+]]:_(<3 x s16>), [[UV3:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS1]](<6 x s16>) + ; GFX10-NEXT: [[COPY4:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr4 + ; GFX10-NEXT: [[COPY5:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr5 + ; GFX10-NEXT: [[CONCAT_VECTORS2:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[COPY4]](<2 x s16>), [[COPY5]](<2 x s16>), [[DEF]](<2 x s16>) + ; GFX10-NEXT: [[UV4:%[0-9]+]]:_(<3 x s16>), [[UV5:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS2]](<6 x s16>) + ; GFX10-NEXT: [[FMUL:%[0-9]+]]:_(<3 x s16>) = contract G_FMUL [[UV]], [[UV2]] + ; GFX10-NEXT: [[FADD:%[0-9]+]]:_(<3 x s16>) = contract G_FADD [[UV4]], [[FMUL]] + ; GFX10-NEXT: [[DEF1:%[0-9]+]]:_(<3 x s16>) = G_IMPLICIT_DEF + ; GFX10-NEXT: [[CONCAT_VECTORS3:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[FADD]](<3 x s16>), [[DEF1]](<3 x s16>) + ; GFX10-NEXT: [[UV6:%[0-9]+]]:_(<2 x s16>), [[UV7:%[0-9]+]]:_(<2 x s16>), [[UV8:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS3]](<6 x s16>) + ; GFX10-NEXT: $vgpr0 = COPY [[UV6]](<2 x s16>) + ; GFX10-NEXT: $vgpr1 = COPY [[UV7]](<2 x s16>) + ; GFX10-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1 + ; + ; GFX10-DENORM-LABEL: name: test_3xhalf_add_mul_rhs_contract + ; GFX10-DENORM: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5 + ; GFX10-DENORM-NEXT: {{ $}} + ; GFX10-DENORM-NEXT: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 + ; GFX10-DENORM-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 + ; GFX10-DENORM-NEXT: [[DEF:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF + ; GFX10-DENORM-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[COPY]](<2 x s16>), [[COPY1]](<2 x s16>), [[DEF]](<2 x s16>) + ; GFX10-DENORM-NEXT: [[UV:%[0-9]+]]:_(<3 x s16>), [[UV1:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<6 x s16>) + ; GFX10-DENORM-NEXT: [[COPY2:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr2 + ; GFX10-DENORM-NEXT: [[COPY3:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr3 + ; GFX10-DENORM-NEXT: [[CONCAT_VECTORS1:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[COPY2]](<2 x s16>), [[COPY3]](<2 x s16>), [[DEF]](<2 x s16>) + ; GFX10-DENORM-NEXT: [[UV2:%[0-9]+]]:_(<3 x s16>), [[UV3:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS1]](<6 x s16>) + ; GFX10-DENORM-NEXT: [[COPY4:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr4 + ; GFX10-DENORM-NEXT: [[COPY5:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr5 + ; GFX10-DENORM-NEXT: [[CONCAT_VECTORS2:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[COPY4]](<2 x s16>), [[COPY5]](<2 x s16>), [[DEF]](<2 x s16>) + ; GFX10-DENORM-NEXT: [[UV4:%[0-9]+]]:_(<3 x s16>), [[UV5:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS2]](<6 x s16>) + ; GFX10-DENORM-NEXT: [[FMUL:%[0-9]+]]:_(<3 x s16>) = contract G_FMUL [[UV]], [[UV2]] + ; GFX10-DENORM-NEXT: [[FADD:%[0-9]+]]:_(<3 x s16>) = contract G_FADD [[UV4]], [[FMUL]] + ; GFX10-DENORM-NEXT: [[DEF1:%[0-9]+]]:_(<3 x s16>) = G_IMPLICIT_DEF + ; GFX10-DENORM-NEXT: [[CONCAT_VECTORS3:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[FADD]](<3 x s16>), [[DEF1]](<3 x s16>) + ; GFX10-DENORM-NEXT: [[UV6:%[0-9]+]]:_(<2 x s16>), [[UV7:%[0-9]+]]:_(<2 x s16>), [[UV8:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS3]](<6 x s16>) + ; GFX10-DENORM-NEXT: $vgpr0 = COPY [[UV6]](<2 x s16>) + ; GFX10-DENORM-NEXT: $vgpr1 = COPY [[UV7]](<2 x s16>) + ; GFX10-DENORM-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1 + %4:_(<2 x s16>) = COPY $vgpr0 + %5:_(<2 x s16>) = COPY $vgpr1 + %10:_(<2 x s16>) = G_IMPLICIT_DEF + %11:_(<6 x s16>) = G_CONCAT_VECTORS %4(<2 x s16>), %5(<2 x s16>), %10(<2 x s16>) + %0:_(<3 x s16>), %12:_(<3 x s16>) = G_UNMERGE_VALUES %11(<6 x s16>) + %6:_(<2 x s16>) = COPY $vgpr2 + %7:_(<2 x s16>) = COPY $vgpr3 + %13:_(<6 x s16>) = G_CONCAT_VECTORS %6(<2 x s16>), %7(<2 x s16>), %10(<2 x s16>) + %1:_(<3 x s16>), %14:_(<3 x s16>) = G_UNMERGE_VALUES %13(<6 x s16>) + %8:_(<2 x s16>) = COPY $vgpr4 + %9:_(<2 x s16>) = COPY $vgpr5 + %15:_(<6 x s16>) = G_CONCAT_VECTORS %8(<2 x s16>), %9(<2 x s16>), %10(<2 x s16>) + %2:_(<3 x s16>), %16:_(<3 x s16>) = G_UNMERGE_VALUES %15(<6 x s16>) + %17:_(<3 x s16>) = contract G_FMUL %0, %1 + %18:_(<3 x s16>) = contract G_FADD %2, %17 + %22:_(<3 x s16>) = G_IMPLICIT_DEF + %23:_(<6 x s16>) = G_CONCAT_VECTORS %18(<3 x s16>), %22(<3 x s16>) + %20:_(<2 x s16>), %21:_(<2 x s16>), %24:_(<2 x s16>) = G_UNMERGE_VALUES %23(<6 x s16>) + $vgpr0 = COPY %20(<2 x s16>) + $vgpr1 = COPY %21(<2 x s16>) + S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1 +... + --- name: test_4xdouble_add_mul body: | @@ -1905,60 +2230,7 @@ body: | ; GFX9-NEXT: $vgpr6 = COPY [[UV6]](s32) ; GFX9-NEXT: $vgpr7 = COPY [[UV7]](s32) ; GFX9-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7 - ; GFX9-CONTRACT-LABEL: name: test_4xdouble_add_mul - ; GFX9-CONTRACT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17, $vgpr18, $vgpr19, $vgpr20, $vgpr21, $vgpr22, $vgpr23 - ; GFX9-CONTRACT-NEXT: {{ $}} - ; GFX9-CONTRACT-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX9-CONTRACT-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX9-CONTRACT-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX9-CONTRACT-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; GFX9-CONTRACT-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4 - ; GFX9-CONTRACT-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5 - ; GFX9-CONTRACT-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $vgpr6 - ; GFX9-CONTRACT-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $vgpr7 - ; GFX9-CONTRACT-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32) - ; GFX9-CONTRACT-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY2]](s32), [[COPY3]](s32) - ; GFX9-CONTRACT-NEXT: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY4]](s32), [[COPY5]](s32) - ; GFX9-CONTRACT-NEXT: [[MV3:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY6]](s32), [[COPY7]](s32) - ; GFX9-CONTRACT-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[MV]](s64), [[MV1]](s64), [[MV2]](s64), [[MV3]](s64) - ; GFX9-CONTRACT-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr8 - ; GFX9-CONTRACT-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr9 - ; GFX9-CONTRACT-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr10 - ; GFX9-CONTRACT-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr11 - ; GFX9-CONTRACT-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr12 - ; GFX9-CONTRACT-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr13 - ; GFX9-CONTRACT-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr14 - ; GFX9-CONTRACT-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY $vgpr15 - ; GFX9-CONTRACT-NEXT: [[MV4:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY8]](s32), [[COPY9]](s32) - ; GFX9-CONTRACT-NEXT: [[MV5:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY10]](s32), [[COPY11]](s32) - ; GFX9-CONTRACT-NEXT: [[MV6:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY12]](s32), [[COPY13]](s32) - ; GFX9-CONTRACT-NEXT: [[MV7:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY14]](s32), [[COPY15]](s32) - ; GFX9-CONTRACT-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[MV4]](s64), [[MV5]](s64), [[MV6]](s64), [[MV7]](s64) - ; GFX9-CONTRACT-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY $vgpr16 - ; GFX9-CONTRACT-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY $vgpr17 - ; GFX9-CONTRACT-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY $vgpr18 - ; GFX9-CONTRACT-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY $vgpr19 - ; GFX9-CONTRACT-NEXT: [[COPY20:%[0-9]+]]:_(s32) = COPY $vgpr20 - ; GFX9-CONTRACT-NEXT: [[COPY21:%[0-9]+]]:_(s32) = COPY $vgpr21 - ; GFX9-CONTRACT-NEXT: [[COPY22:%[0-9]+]]:_(s32) = COPY $vgpr22 - ; GFX9-CONTRACT-NEXT: [[COPY23:%[0-9]+]]:_(s32) = COPY $vgpr23 - ; GFX9-CONTRACT-NEXT: [[MV8:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY16]](s32), [[COPY17]](s32) - ; GFX9-CONTRACT-NEXT: [[MV9:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY18]](s32), [[COPY19]](s32) - ; GFX9-CONTRACT-NEXT: [[MV10:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY20]](s32), [[COPY21]](s32) - ; GFX9-CONTRACT-NEXT: [[MV11:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY22]](s32), [[COPY23]](s32) - ; GFX9-CONTRACT-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[MV8]](s64), [[MV9]](s64), [[MV10]](s64), [[MV11]](s64) - ; GFX9-CONTRACT-NEXT: [[FMUL:%[0-9]+]]:_(<4 x s64>) = G_FMUL [[BUILD_VECTOR]], [[BUILD_VECTOR1]] - ; GFX9-CONTRACT-NEXT: [[FADD:%[0-9]+]]:_(<4 x s64>) = G_FADD [[FMUL]], [[BUILD_VECTOR2]] - ; GFX9-CONTRACT-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[FADD]](<4 x s64>) - ; GFX9-CONTRACT-NEXT: $vgpr0 = COPY [[UV]](s32) - ; GFX9-CONTRACT-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; GFX9-CONTRACT-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; GFX9-CONTRACT-NEXT: $vgpr3 = COPY [[UV3]](s32) - ; GFX9-CONTRACT-NEXT: $vgpr4 = COPY [[UV4]](s32) - ; GFX9-CONTRACT-NEXT: $vgpr5 = COPY [[UV5]](s32) - ; GFX9-CONTRACT-NEXT: $vgpr6 = COPY [[UV6]](s32) - ; GFX9-CONTRACT-NEXT: $vgpr7 = COPY [[UV7]](s32) - ; GFX9-CONTRACT-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7 + ; ; GFX9-DENORM-LABEL: name: test_4xdouble_add_mul ; GFX9-DENORM: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17, $vgpr18, $vgpr19, $vgpr20, $vgpr21, $vgpr22, $vgpr23 ; GFX9-DENORM-NEXT: {{ $}} @@ -2013,60 +2285,7 @@ body: | ; GFX9-DENORM-NEXT: $vgpr6 = COPY [[UV6]](s32) ; GFX9-DENORM-NEXT: $vgpr7 = COPY [[UV7]](s32) ; GFX9-DENORM-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7 - ; GFX9-UNSAFE-LABEL: name: test_4xdouble_add_mul - ; GFX9-UNSAFE: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17, $vgpr18, $vgpr19, $vgpr20, $vgpr21, $vgpr22, $vgpr23 - ; GFX9-UNSAFE-NEXT: {{ $}} - ; GFX9-UNSAFE-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX9-UNSAFE-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX9-UNSAFE-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX9-UNSAFE-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; GFX9-UNSAFE-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4 - ; GFX9-UNSAFE-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5 - ; GFX9-UNSAFE-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $vgpr6 - ; GFX9-UNSAFE-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $vgpr7 - ; GFX9-UNSAFE-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32) - ; GFX9-UNSAFE-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY2]](s32), [[COPY3]](s32) - ; GFX9-UNSAFE-NEXT: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY4]](s32), [[COPY5]](s32) - ; GFX9-UNSAFE-NEXT: [[MV3:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY6]](s32), [[COPY7]](s32) - ; GFX9-UNSAFE-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[MV]](s64), [[MV1]](s64), [[MV2]](s64), [[MV3]](s64) - ; GFX9-UNSAFE-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr8 - ; GFX9-UNSAFE-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr9 - ; GFX9-UNSAFE-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr10 - ; GFX9-UNSAFE-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr11 - ; GFX9-UNSAFE-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr12 - ; GFX9-UNSAFE-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr13 - ; GFX9-UNSAFE-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr14 - ; GFX9-UNSAFE-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY $vgpr15 - ; GFX9-UNSAFE-NEXT: [[MV4:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY8]](s32), [[COPY9]](s32) - ; GFX9-UNSAFE-NEXT: [[MV5:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY10]](s32), [[COPY11]](s32) - ; GFX9-UNSAFE-NEXT: [[MV6:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY12]](s32), [[COPY13]](s32) - ; GFX9-UNSAFE-NEXT: [[MV7:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY14]](s32), [[COPY15]](s32) - ; GFX9-UNSAFE-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[MV4]](s64), [[MV5]](s64), [[MV6]](s64), [[MV7]](s64) - ; GFX9-UNSAFE-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY $vgpr16 - ; GFX9-UNSAFE-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY $vgpr17 - ; GFX9-UNSAFE-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY $vgpr18 - ; GFX9-UNSAFE-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY $vgpr19 - ; GFX9-UNSAFE-NEXT: [[COPY20:%[0-9]+]]:_(s32) = COPY $vgpr20 - ; GFX9-UNSAFE-NEXT: [[COPY21:%[0-9]+]]:_(s32) = COPY $vgpr21 - ; GFX9-UNSAFE-NEXT: [[COPY22:%[0-9]+]]:_(s32) = COPY $vgpr22 - ; GFX9-UNSAFE-NEXT: [[COPY23:%[0-9]+]]:_(s32) = COPY $vgpr23 - ; GFX9-UNSAFE-NEXT: [[MV8:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY16]](s32), [[COPY17]](s32) - ; GFX9-UNSAFE-NEXT: [[MV9:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY18]](s32), [[COPY19]](s32) - ; GFX9-UNSAFE-NEXT: [[MV10:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY20]](s32), [[COPY21]](s32) - ; GFX9-UNSAFE-NEXT: [[MV11:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY22]](s32), [[COPY23]](s32) - ; GFX9-UNSAFE-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[MV8]](s64), [[MV9]](s64), [[MV10]](s64), [[MV11]](s64) - ; GFX9-UNSAFE-NEXT: [[FMUL:%[0-9]+]]:_(<4 x s64>) = G_FMUL [[BUILD_VECTOR]], [[BUILD_VECTOR1]] - ; GFX9-UNSAFE-NEXT: [[FADD:%[0-9]+]]:_(<4 x s64>) = G_FADD [[FMUL]], [[BUILD_VECTOR2]] - ; GFX9-UNSAFE-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[FADD]](<4 x s64>) - ; GFX9-UNSAFE-NEXT: $vgpr0 = COPY [[UV]](s32) - ; GFX9-UNSAFE-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; GFX9-UNSAFE-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; GFX9-UNSAFE-NEXT: $vgpr3 = COPY [[UV3]](s32) - ; GFX9-UNSAFE-NEXT: $vgpr4 = COPY [[UV4]](s32) - ; GFX9-UNSAFE-NEXT: $vgpr5 = COPY [[UV5]](s32) - ; GFX9-UNSAFE-NEXT: $vgpr6 = COPY [[UV6]](s32) - ; GFX9-UNSAFE-NEXT: $vgpr7 = COPY [[UV7]](s32) - ; GFX9-UNSAFE-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7 + ; ; GFX10-LABEL: name: test_4xdouble_add_mul ; GFX10: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17, $vgpr18, $vgpr19, $vgpr20, $vgpr21, $vgpr22, $vgpr23 ; GFX10-NEXT: {{ $}} @@ -2121,60 +2340,7 @@ body: | ; GFX10-NEXT: $vgpr6 = COPY [[UV6]](s32) ; GFX10-NEXT: $vgpr7 = COPY [[UV7]](s32) ; GFX10-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7 - ; GFX10-CONTRACT-LABEL: name: test_4xdouble_add_mul - ; GFX10-CONTRACT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17, $vgpr18, $vgpr19, $vgpr20, $vgpr21, $vgpr22, $vgpr23 - ; GFX10-CONTRACT-NEXT: {{ $}} - ; GFX10-CONTRACT-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX10-CONTRACT-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX10-CONTRACT-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX10-CONTRACT-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; GFX10-CONTRACT-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4 - ; GFX10-CONTRACT-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5 - ; GFX10-CONTRACT-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $vgpr6 - ; GFX10-CONTRACT-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $vgpr7 - ; GFX10-CONTRACT-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32) - ; GFX10-CONTRACT-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY2]](s32), [[COPY3]](s32) - ; GFX10-CONTRACT-NEXT: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY4]](s32), [[COPY5]](s32) - ; GFX10-CONTRACT-NEXT: [[MV3:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY6]](s32), [[COPY7]](s32) - ; GFX10-CONTRACT-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[MV]](s64), [[MV1]](s64), [[MV2]](s64), [[MV3]](s64) - ; GFX10-CONTRACT-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr8 - ; GFX10-CONTRACT-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr9 - ; GFX10-CONTRACT-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr10 - ; GFX10-CONTRACT-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr11 - ; GFX10-CONTRACT-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr12 - ; GFX10-CONTRACT-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr13 - ; GFX10-CONTRACT-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr14 - ; GFX10-CONTRACT-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY $vgpr15 - ; GFX10-CONTRACT-NEXT: [[MV4:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY8]](s32), [[COPY9]](s32) - ; GFX10-CONTRACT-NEXT: [[MV5:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY10]](s32), [[COPY11]](s32) - ; GFX10-CONTRACT-NEXT: [[MV6:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY12]](s32), [[COPY13]](s32) - ; GFX10-CONTRACT-NEXT: [[MV7:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY14]](s32), [[COPY15]](s32) - ; GFX10-CONTRACT-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[MV4]](s64), [[MV5]](s64), [[MV6]](s64), [[MV7]](s64) - ; GFX10-CONTRACT-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY $vgpr16 - ; GFX10-CONTRACT-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY $vgpr17 - ; GFX10-CONTRACT-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY $vgpr18 - ; GFX10-CONTRACT-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY $vgpr19 - ; GFX10-CONTRACT-NEXT: [[COPY20:%[0-9]+]]:_(s32) = COPY $vgpr20 - ; GFX10-CONTRACT-NEXT: [[COPY21:%[0-9]+]]:_(s32) = COPY $vgpr21 - ; GFX10-CONTRACT-NEXT: [[COPY22:%[0-9]+]]:_(s32) = COPY $vgpr22 - ; GFX10-CONTRACT-NEXT: [[COPY23:%[0-9]+]]:_(s32) = COPY $vgpr23 - ; GFX10-CONTRACT-NEXT: [[MV8:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY16]](s32), [[COPY17]](s32) - ; GFX10-CONTRACT-NEXT: [[MV9:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY18]](s32), [[COPY19]](s32) - ; GFX10-CONTRACT-NEXT: [[MV10:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY20]](s32), [[COPY21]](s32) - ; GFX10-CONTRACT-NEXT: [[MV11:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY22]](s32), [[COPY23]](s32) - ; GFX10-CONTRACT-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[MV8]](s64), [[MV9]](s64), [[MV10]](s64), [[MV11]](s64) - ; GFX10-CONTRACT-NEXT: [[FMUL:%[0-9]+]]:_(<4 x s64>) = G_FMUL [[BUILD_VECTOR]], [[BUILD_VECTOR1]] - ; GFX10-CONTRACT-NEXT: [[FADD:%[0-9]+]]:_(<4 x s64>) = G_FADD [[FMUL]], [[BUILD_VECTOR2]] - ; GFX10-CONTRACT-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[FADD]](<4 x s64>) - ; GFX10-CONTRACT-NEXT: $vgpr0 = COPY [[UV]](s32) - ; GFX10-CONTRACT-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; GFX10-CONTRACT-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; GFX10-CONTRACT-NEXT: $vgpr3 = COPY [[UV3]](s32) - ; GFX10-CONTRACT-NEXT: $vgpr4 = COPY [[UV4]](s32) - ; GFX10-CONTRACT-NEXT: $vgpr5 = COPY [[UV5]](s32) - ; GFX10-CONTRACT-NEXT: $vgpr6 = COPY [[UV6]](s32) - ; GFX10-CONTRACT-NEXT: $vgpr7 = COPY [[UV7]](s32) - ; GFX10-CONTRACT-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7 + ; ; GFX10-DENORM-LABEL: name: test_4xdouble_add_mul ; GFX10-DENORM: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17, $vgpr18, $vgpr19, $vgpr20, $vgpr21, $vgpr22, $vgpr23 ; GFX10-DENORM-NEXT: {{ $}} @@ -2229,60 +2395,6 @@ body: | ; GFX10-DENORM-NEXT: $vgpr6 = COPY [[UV6]](s32) ; GFX10-DENORM-NEXT: $vgpr7 = COPY [[UV7]](s32) ; GFX10-DENORM-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7 - ; GFX10-UNSAFE-LABEL: name: test_4xdouble_add_mul - ; GFX10-UNSAFE: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17, $vgpr18, $vgpr19, $vgpr20, $vgpr21, $vgpr22, $vgpr23 - ; GFX10-UNSAFE-NEXT: {{ $}} - ; GFX10-UNSAFE-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX10-UNSAFE-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX10-UNSAFE-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX10-UNSAFE-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; GFX10-UNSAFE-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4 - ; GFX10-UNSAFE-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5 - ; GFX10-UNSAFE-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $vgpr6 - ; GFX10-UNSAFE-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $vgpr7 - ; GFX10-UNSAFE-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32) - ; GFX10-UNSAFE-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY2]](s32), [[COPY3]](s32) - ; GFX10-UNSAFE-NEXT: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY4]](s32), [[COPY5]](s32) - ; GFX10-UNSAFE-NEXT: [[MV3:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY6]](s32), [[COPY7]](s32) - ; GFX10-UNSAFE-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[MV]](s64), [[MV1]](s64), [[MV2]](s64), [[MV3]](s64) - ; GFX10-UNSAFE-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr8 - ; GFX10-UNSAFE-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr9 - ; GFX10-UNSAFE-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr10 - ; GFX10-UNSAFE-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr11 - ; GFX10-UNSAFE-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr12 - ; GFX10-UNSAFE-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr13 - ; GFX10-UNSAFE-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr14 - ; GFX10-UNSAFE-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY $vgpr15 - ; GFX10-UNSAFE-NEXT: [[MV4:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY8]](s32), [[COPY9]](s32) - ; GFX10-UNSAFE-NEXT: [[MV5:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY10]](s32), [[COPY11]](s32) - ; GFX10-UNSAFE-NEXT: [[MV6:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY12]](s32), [[COPY13]](s32) - ; GFX10-UNSAFE-NEXT: [[MV7:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY14]](s32), [[COPY15]](s32) - ; GFX10-UNSAFE-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[MV4]](s64), [[MV5]](s64), [[MV6]](s64), [[MV7]](s64) - ; GFX10-UNSAFE-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY $vgpr16 - ; GFX10-UNSAFE-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY $vgpr17 - ; GFX10-UNSAFE-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY $vgpr18 - ; GFX10-UNSAFE-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY $vgpr19 - ; GFX10-UNSAFE-NEXT: [[COPY20:%[0-9]+]]:_(s32) = COPY $vgpr20 - ; GFX10-UNSAFE-NEXT: [[COPY21:%[0-9]+]]:_(s32) = COPY $vgpr21 - ; GFX10-UNSAFE-NEXT: [[COPY22:%[0-9]+]]:_(s32) = COPY $vgpr22 - ; GFX10-UNSAFE-NEXT: [[COPY23:%[0-9]+]]:_(s32) = COPY $vgpr23 - ; GFX10-UNSAFE-NEXT: [[MV8:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY16]](s32), [[COPY17]](s32) - ; GFX10-UNSAFE-NEXT: [[MV9:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY18]](s32), [[COPY19]](s32) - ; GFX10-UNSAFE-NEXT: [[MV10:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY20]](s32), [[COPY21]](s32) - ; GFX10-UNSAFE-NEXT: [[MV11:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY22]](s32), [[COPY23]](s32) - ; GFX10-UNSAFE-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[MV8]](s64), [[MV9]](s64), [[MV10]](s64), [[MV11]](s64) - ; GFX10-UNSAFE-NEXT: [[FMUL:%[0-9]+]]:_(<4 x s64>) = G_FMUL [[BUILD_VECTOR]], [[BUILD_VECTOR1]] - ; GFX10-UNSAFE-NEXT: [[FADD:%[0-9]+]]:_(<4 x s64>) = G_FADD [[FMUL]], [[BUILD_VECTOR2]] - ; GFX10-UNSAFE-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[FADD]](<4 x s64>) - ; GFX10-UNSAFE-NEXT: $vgpr0 = COPY [[UV]](s32) - ; GFX10-UNSAFE-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; GFX10-UNSAFE-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; GFX10-UNSAFE-NEXT: $vgpr3 = COPY [[UV3]](s32) - ; GFX10-UNSAFE-NEXT: $vgpr4 = COPY [[UV4]](s32) - ; GFX10-UNSAFE-NEXT: $vgpr5 = COPY [[UV5]](s32) - ; GFX10-UNSAFE-NEXT: $vgpr6 = COPY [[UV6]](s32) - ; GFX10-UNSAFE-NEXT: $vgpr7 = COPY [[UV7]](s32) - ; GFX10-UNSAFE-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7 %4:_(s32) = COPY $vgpr0 %5:_(s32) = COPY $vgpr1 %6:_(s32) = COPY $vgpr2 @@ -2336,6 +2448,284 @@ body: | S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7 ... +--- +name: test_4xdouble_add_mul_contract +body: | + bb.1.entry: + liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17, $vgpr18, $vgpr19, $vgpr20, $vgpr21, $vgpr22, $vgpr23 + + ; GFX9-LABEL: name: test_4xdouble_add_mul_contract + ; GFX9: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17, $vgpr18, $vgpr19, $vgpr20, $vgpr21, $vgpr22, $vgpr23 + ; GFX9-NEXT: {{ $}} + ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 + ; GFX9-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 + ; GFX9-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4 + ; GFX9-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5 + ; GFX9-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $vgpr6 + ; GFX9-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $vgpr7 + ; GFX9-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32) + ; GFX9-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY2]](s32), [[COPY3]](s32) + ; GFX9-NEXT: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY4]](s32), [[COPY5]](s32) + ; GFX9-NEXT: [[MV3:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY6]](s32), [[COPY7]](s32) + ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[MV]](s64), [[MV1]](s64), [[MV2]](s64), [[MV3]](s64) + ; GFX9-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr8 + ; GFX9-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr9 + ; GFX9-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr10 + ; GFX9-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr11 + ; GFX9-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr12 + ; GFX9-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr13 + ; GFX9-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr14 + ; GFX9-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY $vgpr15 + ; GFX9-NEXT: [[MV4:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY8]](s32), [[COPY9]](s32) + ; GFX9-NEXT: [[MV5:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY10]](s32), [[COPY11]](s32) + ; GFX9-NEXT: [[MV6:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY12]](s32), [[COPY13]](s32) + ; GFX9-NEXT: [[MV7:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY14]](s32), [[COPY15]](s32) + ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[MV4]](s64), [[MV5]](s64), [[MV6]](s64), [[MV7]](s64) + ; GFX9-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY $vgpr16 + ; GFX9-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY $vgpr17 + ; GFX9-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY $vgpr18 + ; GFX9-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY $vgpr19 + ; GFX9-NEXT: [[COPY20:%[0-9]+]]:_(s32) = COPY $vgpr20 + ; GFX9-NEXT: [[COPY21:%[0-9]+]]:_(s32) = COPY $vgpr21 + ; GFX9-NEXT: [[COPY22:%[0-9]+]]:_(s32) = COPY $vgpr22 + ; GFX9-NEXT: [[COPY23:%[0-9]+]]:_(s32) = COPY $vgpr23 + ; GFX9-NEXT: [[MV8:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY16]](s32), [[COPY17]](s32) + ; GFX9-NEXT: [[MV9:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY18]](s32), [[COPY19]](s32) + ; GFX9-NEXT: [[MV10:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY20]](s32), [[COPY21]](s32) + ; GFX9-NEXT: [[MV11:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY22]](s32), [[COPY23]](s32) + ; GFX9-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[MV8]](s64), [[MV9]](s64), [[MV10]](s64), [[MV11]](s64) + ; GFX9-NEXT: [[FMUL:%[0-9]+]]:_(<4 x s64>) = contract G_FMUL [[BUILD_VECTOR]], [[BUILD_VECTOR1]] + ; GFX9-NEXT: [[FADD:%[0-9]+]]:_(<4 x s64>) = contract G_FADD [[FMUL]], [[BUILD_VECTOR2]] + ; GFX9-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[FADD]](<4 x s64>) + ; GFX9-NEXT: $vgpr0 = COPY [[UV]](s32) + ; GFX9-NEXT: $vgpr1 = COPY [[UV1]](s32) + ; GFX9-NEXT: $vgpr2 = COPY [[UV2]](s32) + ; GFX9-NEXT: $vgpr3 = COPY [[UV3]](s32) + ; GFX9-NEXT: $vgpr4 = COPY [[UV4]](s32) + ; GFX9-NEXT: $vgpr5 = COPY [[UV5]](s32) + ; GFX9-NEXT: $vgpr6 = COPY [[UV6]](s32) + ; GFX9-NEXT: $vgpr7 = COPY [[UV7]](s32) + ; GFX9-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7 + ; + ; GFX9-DENORM-LABEL: name: test_4xdouble_add_mul_contract + ; GFX9-DENORM: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17, $vgpr18, $vgpr19, $vgpr20, $vgpr21, $vgpr22, $vgpr23 + ; GFX9-DENORM-NEXT: {{ $}} + ; GFX9-DENORM-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; GFX9-DENORM-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GFX9-DENORM-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 + ; GFX9-DENORM-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 + ; GFX9-DENORM-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4 + ; GFX9-DENORM-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5 + ; GFX9-DENORM-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $vgpr6 + ; GFX9-DENORM-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $vgpr7 + ; GFX9-DENORM-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32) + ; GFX9-DENORM-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY2]](s32), [[COPY3]](s32) + ; GFX9-DENORM-NEXT: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY4]](s32), [[COPY5]](s32) + ; GFX9-DENORM-NEXT: [[MV3:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY6]](s32), [[COPY7]](s32) + ; GFX9-DENORM-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[MV]](s64), [[MV1]](s64), [[MV2]](s64), [[MV3]](s64) + ; GFX9-DENORM-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr8 + ; GFX9-DENORM-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr9 + ; GFX9-DENORM-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr10 + ; GFX9-DENORM-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr11 + ; GFX9-DENORM-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr12 + ; GFX9-DENORM-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr13 + ; GFX9-DENORM-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr14 + ; GFX9-DENORM-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY $vgpr15 + ; GFX9-DENORM-NEXT: [[MV4:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY8]](s32), [[COPY9]](s32) + ; GFX9-DENORM-NEXT: [[MV5:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY10]](s32), [[COPY11]](s32) + ; GFX9-DENORM-NEXT: [[MV6:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY12]](s32), [[COPY13]](s32) + ; GFX9-DENORM-NEXT: [[MV7:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY14]](s32), [[COPY15]](s32) + ; GFX9-DENORM-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[MV4]](s64), [[MV5]](s64), [[MV6]](s64), [[MV7]](s64) + ; GFX9-DENORM-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY $vgpr16 + ; GFX9-DENORM-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY $vgpr17 + ; GFX9-DENORM-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY $vgpr18 + ; GFX9-DENORM-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY $vgpr19 + ; GFX9-DENORM-NEXT: [[COPY20:%[0-9]+]]:_(s32) = COPY $vgpr20 + ; GFX9-DENORM-NEXT: [[COPY21:%[0-9]+]]:_(s32) = COPY $vgpr21 + ; GFX9-DENORM-NEXT: [[COPY22:%[0-9]+]]:_(s32) = COPY $vgpr22 + ; GFX9-DENORM-NEXT: [[COPY23:%[0-9]+]]:_(s32) = COPY $vgpr23 + ; GFX9-DENORM-NEXT: [[MV8:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY16]](s32), [[COPY17]](s32) + ; GFX9-DENORM-NEXT: [[MV9:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY18]](s32), [[COPY19]](s32) + ; GFX9-DENORM-NEXT: [[MV10:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY20]](s32), [[COPY21]](s32) + ; GFX9-DENORM-NEXT: [[MV11:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY22]](s32), [[COPY23]](s32) + ; GFX9-DENORM-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[MV8]](s64), [[MV9]](s64), [[MV10]](s64), [[MV11]](s64) + ; GFX9-DENORM-NEXT: [[FMUL:%[0-9]+]]:_(<4 x s64>) = contract G_FMUL [[BUILD_VECTOR]], [[BUILD_VECTOR1]] + ; GFX9-DENORM-NEXT: [[FADD:%[0-9]+]]:_(<4 x s64>) = contract G_FADD [[FMUL]], [[BUILD_VECTOR2]] + ; GFX9-DENORM-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[FADD]](<4 x s64>) + ; GFX9-DENORM-NEXT: $vgpr0 = COPY [[UV]](s32) + ; GFX9-DENORM-NEXT: $vgpr1 = COPY [[UV1]](s32) + ; GFX9-DENORM-NEXT: $vgpr2 = COPY [[UV2]](s32) + ; GFX9-DENORM-NEXT: $vgpr3 = COPY [[UV3]](s32) + ; GFX9-DENORM-NEXT: $vgpr4 = COPY [[UV4]](s32) + ; GFX9-DENORM-NEXT: $vgpr5 = COPY [[UV5]](s32) + ; GFX9-DENORM-NEXT: $vgpr6 = COPY [[UV6]](s32) + ; GFX9-DENORM-NEXT: $vgpr7 = COPY [[UV7]](s32) + ; GFX9-DENORM-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7 + ; + ; GFX10-LABEL: name: test_4xdouble_add_mul_contract + ; GFX10: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17, $vgpr18, $vgpr19, $vgpr20, $vgpr21, $vgpr22, $vgpr23 + ; GFX10-NEXT: {{ $}} + ; GFX10-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; GFX10-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GFX10-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 + ; GFX10-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 + ; GFX10-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4 + ; GFX10-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5 + ; GFX10-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $vgpr6 + ; GFX10-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $vgpr7 + ; GFX10-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32) + ; GFX10-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY2]](s32), [[COPY3]](s32) + ; GFX10-NEXT: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY4]](s32), [[COPY5]](s32) + ; GFX10-NEXT: [[MV3:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY6]](s32), [[COPY7]](s32) + ; GFX10-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[MV]](s64), [[MV1]](s64), [[MV2]](s64), [[MV3]](s64) + ; GFX10-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr8 + ; GFX10-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr9 + ; GFX10-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr10 + ; GFX10-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr11 + ; GFX10-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr12 + ; GFX10-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr13 + ; GFX10-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr14 + ; GFX10-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY $vgpr15 + ; GFX10-NEXT: [[MV4:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY8]](s32), [[COPY9]](s32) + ; GFX10-NEXT: [[MV5:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY10]](s32), [[COPY11]](s32) + ; GFX10-NEXT: [[MV6:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY12]](s32), [[COPY13]](s32) + ; GFX10-NEXT: [[MV7:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY14]](s32), [[COPY15]](s32) + ; GFX10-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[MV4]](s64), [[MV5]](s64), [[MV6]](s64), [[MV7]](s64) + ; GFX10-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY $vgpr16 + ; GFX10-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY $vgpr17 + ; GFX10-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY $vgpr18 + ; GFX10-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY $vgpr19 + ; GFX10-NEXT: [[COPY20:%[0-9]+]]:_(s32) = COPY $vgpr20 + ; GFX10-NEXT: [[COPY21:%[0-9]+]]:_(s32) = COPY $vgpr21 + ; GFX10-NEXT: [[COPY22:%[0-9]+]]:_(s32) = COPY $vgpr22 + ; GFX10-NEXT: [[COPY23:%[0-9]+]]:_(s32) = COPY $vgpr23 + ; GFX10-NEXT: [[MV8:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY16]](s32), [[COPY17]](s32) + ; GFX10-NEXT: [[MV9:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY18]](s32), [[COPY19]](s32) + ; GFX10-NEXT: [[MV10:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY20]](s32), [[COPY21]](s32) + ; GFX10-NEXT: [[MV11:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY22]](s32), [[COPY23]](s32) + ; GFX10-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[MV8]](s64), [[MV9]](s64), [[MV10]](s64), [[MV11]](s64) + ; GFX10-NEXT: [[FMUL:%[0-9]+]]:_(<4 x s64>) = contract G_FMUL [[BUILD_VECTOR]], [[BUILD_VECTOR1]] + ; GFX10-NEXT: [[FADD:%[0-9]+]]:_(<4 x s64>) = contract G_FADD [[FMUL]], [[BUILD_VECTOR2]] + ; GFX10-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[FADD]](<4 x s64>) + ; GFX10-NEXT: $vgpr0 = COPY [[UV]](s32) + ; GFX10-NEXT: $vgpr1 = COPY [[UV1]](s32) + ; GFX10-NEXT: $vgpr2 = COPY [[UV2]](s32) + ; GFX10-NEXT: $vgpr3 = COPY [[UV3]](s32) + ; GFX10-NEXT: $vgpr4 = COPY [[UV4]](s32) + ; GFX10-NEXT: $vgpr5 = COPY [[UV5]](s32) + ; GFX10-NEXT: $vgpr6 = COPY [[UV6]](s32) + ; GFX10-NEXT: $vgpr7 = COPY [[UV7]](s32) + ; GFX10-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7 + ; + ; GFX10-DENORM-LABEL: name: test_4xdouble_add_mul_contract + ; GFX10-DENORM: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17, $vgpr18, $vgpr19, $vgpr20, $vgpr21, $vgpr22, $vgpr23 + ; GFX10-DENORM-NEXT: {{ $}} + ; GFX10-DENORM-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; GFX10-DENORM-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GFX10-DENORM-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 + ; GFX10-DENORM-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 + ; GFX10-DENORM-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4 + ; GFX10-DENORM-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5 + ; GFX10-DENORM-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $vgpr6 + ; GFX10-DENORM-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $vgpr7 + ; GFX10-DENORM-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32) + ; GFX10-DENORM-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY2]](s32), [[COPY3]](s32) + ; GFX10-DENORM-NEXT: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY4]](s32), [[COPY5]](s32) + ; GFX10-DENORM-NEXT: [[MV3:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY6]](s32), [[COPY7]](s32) + ; GFX10-DENORM-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[MV]](s64), [[MV1]](s64), [[MV2]](s64), [[MV3]](s64) + ; GFX10-DENORM-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr8 + ; GFX10-DENORM-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr9 + ; GFX10-DENORM-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr10 + ; GFX10-DENORM-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr11 + ; GFX10-DENORM-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr12 + ; GFX10-DENORM-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr13 + ; GFX10-DENORM-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr14 + ; GFX10-DENORM-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY $vgpr15 + ; GFX10-DENORM-NEXT: [[MV4:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY8]](s32), [[COPY9]](s32) + ; GFX10-DENORM-NEXT: [[MV5:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY10]](s32), [[COPY11]](s32) + ; GFX10-DENORM-NEXT: [[MV6:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY12]](s32), [[COPY13]](s32) + ; GFX10-DENORM-NEXT: [[MV7:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY14]](s32), [[COPY15]](s32) + ; GFX10-DENORM-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[MV4]](s64), [[MV5]](s64), [[MV6]](s64), [[MV7]](s64) + ; GFX10-DENORM-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY $vgpr16 + ; GFX10-DENORM-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY $vgpr17 + ; GFX10-DENORM-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY $vgpr18 + ; GFX10-DENORM-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY $vgpr19 + ; GFX10-DENORM-NEXT: [[COPY20:%[0-9]+]]:_(s32) = COPY $vgpr20 + ; GFX10-DENORM-NEXT: [[COPY21:%[0-9]+]]:_(s32) = COPY $vgpr21 + ; GFX10-DENORM-NEXT: [[COPY22:%[0-9]+]]:_(s32) = COPY $vgpr22 + ; GFX10-DENORM-NEXT: [[COPY23:%[0-9]+]]:_(s32) = COPY $vgpr23 + ; GFX10-DENORM-NEXT: [[MV8:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY16]](s32), [[COPY17]](s32) + ; GFX10-DENORM-NEXT: [[MV9:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY18]](s32), [[COPY19]](s32) + ; GFX10-DENORM-NEXT: [[MV10:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY20]](s32), [[COPY21]](s32) + ; GFX10-DENORM-NEXT: [[MV11:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY22]](s32), [[COPY23]](s32) + ; GFX10-DENORM-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[MV8]](s64), [[MV9]](s64), [[MV10]](s64), [[MV11]](s64) + ; GFX10-DENORM-NEXT: [[FMUL:%[0-9]+]]:_(<4 x s64>) = contract G_FMUL [[BUILD_VECTOR]], [[BUILD_VECTOR1]] + ; GFX10-DENORM-NEXT: [[FADD:%[0-9]+]]:_(<4 x s64>) = contract G_FADD [[FMUL]], [[BUILD_VECTOR2]] + ; GFX10-DENORM-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[FADD]](<4 x s64>) + ; GFX10-DENORM-NEXT: $vgpr0 = COPY [[UV]](s32) + ; GFX10-DENORM-NEXT: $vgpr1 = COPY [[UV1]](s32) + ; GFX10-DENORM-NEXT: $vgpr2 = COPY [[UV2]](s32) + ; GFX10-DENORM-NEXT: $vgpr3 = COPY [[UV3]](s32) + ; GFX10-DENORM-NEXT: $vgpr4 = COPY [[UV4]](s32) + ; GFX10-DENORM-NEXT: $vgpr5 = COPY [[UV5]](s32) + ; GFX10-DENORM-NEXT: $vgpr6 = COPY [[UV6]](s32) + ; GFX10-DENORM-NEXT: $vgpr7 = COPY [[UV7]](s32) + ; GFX10-DENORM-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7 + %4:_(s32) = COPY $vgpr0 + %5:_(s32) = COPY $vgpr1 + %6:_(s32) = COPY $vgpr2 + %7:_(s32) = COPY $vgpr3 + %8:_(s32) = COPY $vgpr4 + %9:_(s32) = COPY $vgpr5 + %10:_(s32) = COPY $vgpr6 + %11:_(s32) = COPY $vgpr7 + %28:_(s64) = G_MERGE_VALUES %4(s32), %5(s32) + %29:_(s64) = G_MERGE_VALUES %6(s32), %7(s32) + %30:_(s64) = G_MERGE_VALUES %8(s32), %9(s32) + %31:_(s64) = G_MERGE_VALUES %10(s32), %11(s32) + %0:_(<4 x s64>) = G_BUILD_VECTOR %28(s64), %29(s64), %30(s64), %31(s64) + %12:_(s32) = COPY $vgpr8 + %13:_(s32) = COPY $vgpr9 + %14:_(s32) = COPY $vgpr10 + %15:_(s32) = COPY $vgpr11 + %16:_(s32) = COPY $vgpr12 + %17:_(s32) = COPY $vgpr13 + %18:_(s32) = COPY $vgpr14 + %19:_(s32) = COPY $vgpr15 + %32:_(s64) = G_MERGE_VALUES %12(s32), %13(s32) + %33:_(s64) = G_MERGE_VALUES %14(s32), %15(s32) + %34:_(s64) = G_MERGE_VALUES %16(s32), %17(s32) + %35:_(s64) = G_MERGE_VALUES %18(s32), %19(s32) + %1:_(<4 x s64>) = G_BUILD_VECTOR %32(s64), %33(s64), %34(s64), %35(s64) + %20:_(s32) = COPY $vgpr16 + %21:_(s32) = COPY $vgpr17 + %22:_(s32) = COPY $vgpr18 + %23:_(s32) = COPY $vgpr19 + %24:_(s32) = COPY $vgpr20 + %25:_(s32) = COPY $vgpr21 + %26:_(s32) = COPY $vgpr22 + %27:_(s32) = COPY $vgpr23 + %36:_(s64) = G_MERGE_VALUES %20(s32), %21(s32) + %37:_(s64) = G_MERGE_VALUES %22(s32), %23(s32) + %38:_(s64) = G_MERGE_VALUES %24(s32), %25(s32) + %39:_(s64) = G_MERGE_VALUES %26(s32), %27(s32) + %2:_(<4 x s64>) = G_BUILD_VECTOR %36(s64), %37(s64), %38(s64), %39(s64) + %40:_(<4 x s64>) = contract G_FMUL %0, %1 + %41:_(<4 x s64>) = contract G_FADD %40, %2 + %43:_(s32), %44:_(s32), %45:_(s32), %46:_(s32), %47:_(s32), %48:_(s32), %49:_(s32), %50:_(s32) = G_UNMERGE_VALUES %41(<4 x s64>) + $vgpr0 = COPY %43(s32) + $vgpr1 = COPY %44(s32) + $vgpr2 = COPY %45(s32) + $vgpr3 = COPY %46(s32) + $vgpr4 = COPY %47(s32) + $vgpr5 = COPY %48(s32) + $vgpr6 = COPY %49(s32) + $vgpr7 = COPY %50(s32) + S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7 +... + --- name: test_3xdouble_add_mul_rhs body: | @@ -2385,49 +2775,7 @@ body: | ; GFX9-NEXT: $vgpr4 = COPY [[UV4]](s32) ; GFX9-NEXT: $vgpr5 = COPY [[UV5]](s32) ; GFX9-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5 - ; GFX9-CONTRACT-LABEL: name: test_3xdouble_add_mul_rhs - ; GFX9-CONTRACT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17 - ; GFX9-CONTRACT-NEXT: {{ $}} - ; GFX9-CONTRACT-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX9-CONTRACT-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX9-CONTRACT-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX9-CONTRACT-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; GFX9-CONTRACT-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4 - ; GFX9-CONTRACT-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5 - ; GFX9-CONTRACT-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32) - ; GFX9-CONTRACT-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY2]](s32), [[COPY3]](s32) - ; GFX9-CONTRACT-NEXT: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY4]](s32), [[COPY5]](s32) - ; GFX9-CONTRACT-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s64>) = G_BUILD_VECTOR [[MV]](s64), [[MV1]](s64), [[MV2]](s64) - ; GFX9-CONTRACT-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $vgpr6 - ; GFX9-CONTRACT-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $vgpr7 - ; GFX9-CONTRACT-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr8 - ; GFX9-CONTRACT-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr9 - ; GFX9-CONTRACT-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr10 - ; GFX9-CONTRACT-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr11 - ; GFX9-CONTRACT-NEXT: [[MV3:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY6]](s32), [[COPY7]](s32) - ; GFX9-CONTRACT-NEXT: [[MV4:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY8]](s32), [[COPY9]](s32) - ; GFX9-CONTRACT-NEXT: [[MV5:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY10]](s32), [[COPY11]](s32) - ; GFX9-CONTRACT-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<3 x s64>) = G_BUILD_VECTOR [[MV3]](s64), [[MV4]](s64), [[MV5]](s64) - ; GFX9-CONTRACT-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr12 - ; GFX9-CONTRACT-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr13 - ; GFX9-CONTRACT-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr14 - ; GFX9-CONTRACT-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY $vgpr15 - ; GFX9-CONTRACT-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY $vgpr16 - ; GFX9-CONTRACT-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY $vgpr17 - ; GFX9-CONTRACT-NEXT: [[MV6:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY12]](s32), [[COPY13]](s32) - ; GFX9-CONTRACT-NEXT: [[MV7:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY14]](s32), [[COPY15]](s32) - ; GFX9-CONTRACT-NEXT: [[MV8:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY16]](s32), [[COPY17]](s32) - ; GFX9-CONTRACT-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<3 x s64>) = G_BUILD_VECTOR [[MV6]](s64), [[MV7]](s64), [[MV8]](s64) - ; GFX9-CONTRACT-NEXT: [[FMUL:%[0-9]+]]:_(<3 x s64>) = G_FMUL [[BUILD_VECTOR]], [[BUILD_VECTOR1]] - ; GFX9-CONTRACT-NEXT: [[FADD:%[0-9]+]]:_(<3 x s64>) = G_FADD [[BUILD_VECTOR2]], [[FMUL]] - ; GFX9-CONTRACT-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[FADD]](<3 x s64>) - ; GFX9-CONTRACT-NEXT: $vgpr0 = COPY [[UV]](s32) - ; GFX9-CONTRACT-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; GFX9-CONTRACT-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; GFX9-CONTRACT-NEXT: $vgpr3 = COPY [[UV3]](s32) - ; GFX9-CONTRACT-NEXT: $vgpr4 = COPY [[UV4]](s32) - ; GFX9-CONTRACT-NEXT: $vgpr5 = COPY [[UV5]](s32) - ; GFX9-CONTRACT-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5 + ; ; GFX9-DENORM-LABEL: name: test_3xdouble_add_mul_rhs ; GFX9-DENORM: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17 ; GFX9-DENORM-NEXT: {{ $}} @@ -2471,49 +2819,7 @@ body: | ; GFX9-DENORM-NEXT: $vgpr4 = COPY [[UV4]](s32) ; GFX9-DENORM-NEXT: $vgpr5 = COPY [[UV5]](s32) ; GFX9-DENORM-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5 - ; GFX9-UNSAFE-LABEL: name: test_3xdouble_add_mul_rhs - ; GFX9-UNSAFE: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17 - ; GFX9-UNSAFE-NEXT: {{ $}} - ; GFX9-UNSAFE-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX9-UNSAFE-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX9-UNSAFE-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX9-UNSAFE-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; GFX9-UNSAFE-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4 - ; GFX9-UNSAFE-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5 - ; GFX9-UNSAFE-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32) - ; GFX9-UNSAFE-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY2]](s32), [[COPY3]](s32) - ; GFX9-UNSAFE-NEXT: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY4]](s32), [[COPY5]](s32) - ; GFX9-UNSAFE-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s64>) = G_BUILD_VECTOR [[MV]](s64), [[MV1]](s64), [[MV2]](s64) - ; GFX9-UNSAFE-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $vgpr6 - ; GFX9-UNSAFE-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $vgpr7 - ; GFX9-UNSAFE-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr8 - ; GFX9-UNSAFE-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr9 - ; GFX9-UNSAFE-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr10 - ; GFX9-UNSAFE-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr11 - ; GFX9-UNSAFE-NEXT: [[MV3:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY6]](s32), [[COPY7]](s32) - ; GFX9-UNSAFE-NEXT: [[MV4:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY8]](s32), [[COPY9]](s32) - ; GFX9-UNSAFE-NEXT: [[MV5:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY10]](s32), [[COPY11]](s32) - ; GFX9-UNSAFE-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<3 x s64>) = G_BUILD_VECTOR [[MV3]](s64), [[MV4]](s64), [[MV5]](s64) - ; GFX9-UNSAFE-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr12 - ; GFX9-UNSAFE-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr13 - ; GFX9-UNSAFE-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr14 - ; GFX9-UNSAFE-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY $vgpr15 - ; GFX9-UNSAFE-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY $vgpr16 - ; GFX9-UNSAFE-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY $vgpr17 - ; GFX9-UNSAFE-NEXT: [[MV6:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY12]](s32), [[COPY13]](s32) - ; GFX9-UNSAFE-NEXT: [[MV7:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY14]](s32), [[COPY15]](s32) - ; GFX9-UNSAFE-NEXT: [[MV8:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY16]](s32), [[COPY17]](s32) - ; GFX9-UNSAFE-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<3 x s64>) = G_BUILD_VECTOR [[MV6]](s64), [[MV7]](s64), [[MV8]](s64) - ; GFX9-UNSAFE-NEXT: [[FMUL:%[0-9]+]]:_(<3 x s64>) = G_FMUL [[BUILD_VECTOR]], [[BUILD_VECTOR1]] - ; GFX9-UNSAFE-NEXT: [[FADD:%[0-9]+]]:_(<3 x s64>) = G_FADD [[BUILD_VECTOR2]], [[FMUL]] - ; GFX9-UNSAFE-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[FADD]](<3 x s64>) - ; GFX9-UNSAFE-NEXT: $vgpr0 = COPY [[UV]](s32) - ; GFX9-UNSAFE-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; GFX9-UNSAFE-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; GFX9-UNSAFE-NEXT: $vgpr3 = COPY [[UV3]](s32) - ; GFX9-UNSAFE-NEXT: $vgpr4 = COPY [[UV4]](s32) - ; GFX9-UNSAFE-NEXT: $vgpr5 = COPY [[UV5]](s32) - ; GFX9-UNSAFE-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5 + ; ; GFX10-LABEL: name: test_3xdouble_add_mul_rhs ; GFX10: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17 ; GFX10-NEXT: {{ $}} @@ -2557,49 +2863,7 @@ body: | ; GFX10-NEXT: $vgpr4 = COPY [[UV4]](s32) ; GFX10-NEXT: $vgpr5 = COPY [[UV5]](s32) ; GFX10-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5 - ; GFX10-CONTRACT-LABEL: name: test_3xdouble_add_mul_rhs - ; GFX10-CONTRACT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17 - ; GFX10-CONTRACT-NEXT: {{ $}} - ; GFX10-CONTRACT-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX10-CONTRACT-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX10-CONTRACT-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX10-CONTRACT-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; GFX10-CONTRACT-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4 - ; GFX10-CONTRACT-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5 - ; GFX10-CONTRACT-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32) - ; GFX10-CONTRACT-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY2]](s32), [[COPY3]](s32) - ; GFX10-CONTRACT-NEXT: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY4]](s32), [[COPY5]](s32) - ; GFX10-CONTRACT-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s64>) = G_BUILD_VECTOR [[MV]](s64), [[MV1]](s64), [[MV2]](s64) - ; GFX10-CONTRACT-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $vgpr6 - ; GFX10-CONTRACT-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $vgpr7 - ; GFX10-CONTRACT-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr8 - ; GFX10-CONTRACT-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr9 - ; GFX10-CONTRACT-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr10 - ; GFX10-CONTRACT-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr11 - ; GFX10-CONTRACT-NEXT: [[MV3:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY6]](s32), [[COPY7]](s32) - ; GFX10-CONTRACT-NEXT: [[MV4:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY8]](s32), [[COPY9]](s32) - ; GFX10-CONTRACT-NEXT: [[MV5:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY10]](s32), [[COPY11]](s32) - ; GFX10-CONTRACT-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<3 x s64>) = G_BUILD_VECTOR [[MV3]](s64), [[MV4]](s64), [[MV5]](s64) - ; GFX10-CONTRACT-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr12 - ; GFX10-CONTRACT-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr13 - ; GFX10-CONTRACT-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr14 - ; GFX10-CONTRACT-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY $vgpr15 - ; GFX10-CONTRACT-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY $vgpr16 - ; GFX10-CONTRACT-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY $vgpr17 - ; GFX10-CONTRACT-NEXT: [[MV6:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY12]](s32), [[COPY13]](s32) - ; GFX10-CONTRACT-NEXT: [[MV7:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY14]](s32), [[COPY15]](s32) - ; GFX10-CONTRACT-NEXT: [[MV8:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY16]](s32), [[COPY17]](s32) - ; GFX10-CONTRACT-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<3 x s64>) = G_BUILD_VECTOR [[MV6]](s64), [[MV7]](s64), [[MV8]](s64) - ; GFX10-CONTRACT-NEXT: [[FMUL:%[0-9]+]]:_(<3 x s64>) = G_FMUL [[BUILD_VECTOR]], [[BUILD_VECTOR1]] - ; GFX10-CONTRACT-NEXT: [[FADD:%[0-9]+]]:_(<3 x s64>) = G_FADD [[BUILD_VECTOR2]], [[FMUL]] - ; GFX10-CONTRACT-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[FADD]](<3 x s64>) - ; GFX10-CONTRACT-NEXT: $vgpr0 = COPY [[UV]](s32) - ; GFX10-CONTRACT-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; GFX10-CONTRACT-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; GFX10-CONTRACT-NEXT: $vgpr3 = COPY [[UV3]](s32) - ; GFX10-CONTRACT-NEXT: $vgpr4 = COPY [[UV4]](s32) - ; GFX10-CONTRACT-NEXT: $vgpr5 = COPY [[UV5]](s32) - ; GFX10-CONTRACT-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5 + ; ; GFX10-DENORM-LABEL: name: test_3xdouble_add_mul_rhs ; GFX10-DENORM: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17 ; GFX10-DENORM-NEXT: {{ $}} @@ -2643,49 +2907,6 @@ body: | ; GFX10-DENORM-NEXT: $vgpr4 = COPY [[UV4]](s32) ; GFX10-DENORM-NEXT: $vgpr5 = COPY [[UV5]](s32) ; GFX10-DENORM-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5 - ; GFX10-UNSAFE-LABEL: name: test_3xdouble_add_mul_rhs - ; GFX10-UNSAFE: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17 - ; GFX10-UNSAFE-NEXT: {{ $}} - ; GFX10-UNSAFE-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX10-UNSAFE-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX10-UNSAFE-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX10-UNSAFE-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; GFX10-UNSAFE-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4 - ; GFX10-UNSAFE-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5 - ; GFX10-UNSAFE-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32) - ; GFX10-UNSAFE-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY2]](s32), [[COPY3]](s32) - ; GFX10-UNSAFE-NEXT: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY4]](s32), [[COPY5]](s32) - ; GFX10-UNSAFE-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s64>) = G_BUILD_VECTOR [[MV]](s64), [[MV1]](s64), [[MV2]](s64) - ; GFX10-UNSAFE-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $vgpr6 - ; GFX10-UNSAFE-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $vgpr7 - ; GFX10-UNSAFE-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr8 - ; GFX10-UNSAFE-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr9 - ; GFX10-UNSAFE-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr10 - ; GFX10-UNSAFE-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr11 - ; GFX10-UNSAFE-NEXT: [[MV3:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY6]](s32), [[COPY7]](s32) - ; GFX10-UNSAFE-NEXT: [[MV4:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY8]](s32), [[COPY9]](s32) - ; GFX10-UNSAFE-NEXT: [[MV5:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY10]](s32), [[COPY11]](s32) - ; GFX10-UNSAFE-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<3 x s64>) = G_BUILD_VECTOR [[MV3]](s64), [[MV4]](s64), [[MV5]](s64) - ; GFX10-UNSAFE-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr12 - ; GFX10-UNSAFE-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr13 - ; GFX10-UNSAFE-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr14 - ; GFX10-UNSAFE-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY $vgpr15 - ; GFX10-UNSAFE-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY $vgpr16 - ; GFX10-UNSAFE-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY $vgpr17 - ; GFX10-UNSAFE-NEXT: [[MV6:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY12]](s32), [[COPY13]](s32) - ; GFX10-UNSAFE-NEXT: [[MV7:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY14]](s32), [[COPY15]](s32) - ; GFX10-UNSAFE-NEXT: [[MV8:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY16]](s32), [[COPY17]](s32) - ; GFX10-UNSAFE-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<3 x s64>) = G_BUILD_VECTOR [[MV6]](s64), [[MV7]](s64), [[MV8]](s64) - ; GFX10-UNSAFE-NEXT: [[FMUL:%[0-9]+]]:_(<3 x s64>) = G_FMUL [[BUILD_VECTOR]], [[BUILD_VECTOR1]] - ; GFX10-UNSAFE-NEXT: [[FADD:%[0-9]+]]:_(<3 x s64>) = G_FADD [[BUILD_VECTOR2]], [[FMUL]] - ; GFX10-UNSAFE-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[FADD]](<3 x s64>) - ; GFX10-UNSAFE-NEXT: $vgpr0 = COPY [[UV]](s32) - ; GFX10-UNSAFE-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; GFX10-UNSAFE-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; GFX10-UNSAFE-NEXT: $vgpr3 = COPY [[UV3]](s32) - ; GFX10-UNSAFE-NEXT: $vgpr4 = COPY [[UV4]](s32) - ; GFX10-UNSAFE-NEXT: $vgpr5 = COPY [[UV5]](s32) - ; GFX10-UNSAFE-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5 %4:_(s32) = COPY $vgpr0 %5:_(s32) = COPY $vgpr1 %6:_(s32) = COPY $vgpr2 @@ -2727,3 +2948,226 @@ body: | $vgpr5 = COPY %39(s32) S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5 ... + +--- +name: test_3xdouble_add_mul_rhs_contract +body: | + bb.1.entry: + liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17 + + ; GFX9-LABEL: name: test_3xdouble_add_mul_rhs_contract + ; GFX9: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17 + ; GFX9-NEXT: {{ $}} + ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 + ; GFX9-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 + ; GFX9-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4 + ; GFX9-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5 + ; GFX9-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32) + ; GFX9-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY2]](s32), [[COPY3]](s32) + ; GFX9-NEXT: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY4]](s32), [[COPY5]](s32) + ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s64>) = G_BUILD_VECTOR [[MV]](s64), [[MV1]](s64), [[MV2]](s64) + ; GFX9-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $vgpr6 + ; GFX9-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $vgpr7 + ; GFX9-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr8 + ; GFX9-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr9 + ; GFX9-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr10 + ; GFX9-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr11 + ; GFX9-NEXT: [[MV3:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY6]](s32), [[COPY7]](s32) + ; GFX9-NEXT: [[MV4:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY8]](s32), [[COPY9]](s32) + ; GFX9-NEXT: [[MV5:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY10]](s32), [[COPY11]](s32) + ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<3 x s64>) = G_BUILD_VECTOR [[MV3]](s64), [[MV4]](s64), [[MV5]](s64) + ; GFX9-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr12 + ; GFX9-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr13 + ; GFX9-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr14 + ; GFX9-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY $vgpr15 + ; GFX9-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY $vgpr16 + ; GFX9-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY $vgpr17 + ; GFX9-NEXT: [[MV6:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY12]](s32), [[COPY13]](s32) + ; GFX9-NEXT: [[MV7:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY14]](s32), [[COPY15]](s32) + ; GFX9-NEXT: [[MV8:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY16]](s32), [[COPY17]](s32) + ; GFX9-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<3 x s64>) = G_BUILD_VECTOR [[MV6]](s64), [[MV7]](s64), [[MV8]](s64) + ; GFX9-NEXT: [[FMUL:%[0-9]+]]:_(<3 x s64>) = contract G_FMUL [[BUILD_VECTOR]], [[BUILD_VECTOR1]] + ; GFX9-NEXT: [[FADD:%[0-9]+]]:_(<3 x s64>) = contract G_FADD [[BUILD_VECTOR2]], [[FMUL]] + ; GFX9-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[FADD]](<3 x s64>) + ; GFX9-NEXT: $vgpr0 = COPY [[UV]](s32) + ; GFX9-NEXT: $vgpr1 = COPY [[UV1]](s32) + ; GFX9-NEXT: $vgpr2 = COPY [[UV2]](s32) + ; GFX9-NEXT: $vgpr3 = COPY [[UV3]](s32) + ; GFX9-NEXT: $vgpr4 = COPY [[UV4]](s32) + ; GFX9-NEXT: $vgpr5 = COPY [[UV5]](s32) + ; GFX9-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5 + ; + ; GFX9-DENORM-LABEL: name: test_3xdouble_add_mul_rhs_contract + ; GFX9-DENORM: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17 + ; GFX9-DENORM-NEXT: {{ $}} + ; GFX9-DENORM-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; GFX9-DENORM-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GFX9-DENORM-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 + ; GFX9-DENORM-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 + ; GFX9-DENORM-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4 + ; GFX9-DENORM-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5 + ; GFX9-DENORM-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32) + ; GFX9-DENORM-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY2]](s32), [[COPY3]](s32) + ; GFX9-DENORM-NEXT: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY4]](s32), [[COPY5]](s32) + ; GFX9-DENORM-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s64>) = G_BUILD_VECTOR [[MV]](s64), [[MV1]](s64), [[MV2]](s64) + ; GFX9-DENORM-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $vgpr6 + ; GFX9-DENORM-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $vgpr7 + ; GFX9-DENORM-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr8 + ; GFX9-DENORM-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr9 + ; GFX9-DENORM-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr10 + ; GFX9-DENORM-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr11 + ; GFX9-DENORM-NEXT: [[MV3:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY6]](s32), [[COPY7]](s32) + ; GFX9-DENORM-NEXT: [[MV4:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY8]](s32), [[COPY9]](s32) + ; GFX9-DENORM-NEXT: [[MV5:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY10]](s32), [[COPY11]](s32) + ; GFX9-DENORM-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<3 x s64>) = G_BUILD_VECTOR [[MV3]](s64), [[MV4]](s64), [[MV5]](s64) + ; GFX9-DENORM-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr12 + ; GFX9-DENORM-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr13 + ; GFX9-DENORM-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr14 + ; GFX9-DENORM-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY $vgpr15 + ; GFX9-DENORM-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY $vgpr16 + ; GFX9-DENORM-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY $vgpr17 + ; GFX9-DENORM-NEXT: [[MV6:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY12]](s32), [[COPY13]](s32) + ; GFX9-DENORM-NEXT: [[MV7:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY14]](s32), [[COPY15]](s32) + ; GFX9-DENORM-NEXT: [[MV8:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY16]](s32), [[COPY17]](s32) + ; GFX9-DENORM-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<3 x s64>) = G_BUILD_VECTOR [[MV6]](s64), [[MV7]](s64), [[MV8]](s64) + ; GFX9-DENORM-NEXT: [[FMUL:%[0-9]+]]:_(<3 x s64>) = contract G_FMUL [[BUILD_VECTOR]], [[BUILD_VECTOR1]] + ; GFX9-DENORM-NEXT: [[FADD:%[0-9]+]]:_(<3 x s64>) = contract G_FADD [[BUILD_VECTOR2]], [[FMUL]] + ; GFX9-DENORM-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[FADD]](<3 x s64>) + ; GFX9-DENORM-NEXT: $vgpr0 = COPY [[UV]](s32) + ; GFX9-DENORM-NEXT: $vgpr1 = COPY [[UV1]](s32) + ; GFX9-DENORM-NEXT: $vgpr2 = COPY [[UV2]](s32) + ; GFX9-DENORM-NEXT: $vgpr3 = COPY [[UV3]](s32) + ; GFX9-DENORM-NEXT: $vgpr4 = COPY [[UV4]](s32) + ; GFX9-DENORM-NEXT: $vgpr5 = COPY [[UV5]](s32) + ; GFX9-DENORM-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5 + ; + ; GFX10-LABEL: name: test_3xdouble_add_mul_rhs_contract + ; GFX10: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17 + ; GFX10-NEXT: {{ $}} + ; GFX10-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; GFX10-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GFX10-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 + ; GFX10-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 + ; GFX10-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4 + ; GFX10-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5 + ; GFX10-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32) + ; GFX10-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY2]](s32), [[COPY3]](s32) + ; GFX10-NEXT: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY4]](s32), [[COPY5]](s32) + ; GFX10-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s64>) = G_BUILD_VECTOR [[MV]](s64), [[MV1]](s64), [[MV2]](s64) + ; GFX10-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $vgpr6 + ; GFX10-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $vgpr7 + ; GFX10-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr8 + ; GFX10-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr9 + ; GFX10-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr10 + ; GFX10-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr11 + ; GFX10-NEXT: [[MV3:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY6]](s32), [[COPY7]](s32) + ; GFX10-NEXT: [[MV4:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY8]](s32), [[COPY9]](s32) + ; GFX10-NEXT: [[MV5:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY10]](s32), [[COPY11]](s32) + ; GFX10-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<3 x s64>) = G_BUILD_VECTOR [[MV3]](s64), [[MV4]](s64), [[MV5]](s64) + ; GFX10-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr12 + ; GFX10-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr13 + ; GFX10-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr14 + ; GFX10-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY $vgpr15 + ; GFX10-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY $vgpr16 + ; GFX10-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY $vgpr17 + ; GFX10-NEXT: [[MV6:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY12]](s32), [[COPY13]](s32) + ; GFX10-NEXT: [[MV7:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY14]](s32), [[COPY15]](s32) + ; GFX10-NEXT: [[MV8:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY16]](s32), [[COPY17]](s32) + ; GFX10-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<3 x s64>) = G_BUILD_VECTOR [[MV6]](s64), [[MV7]](s64), [[MV8]](s64) + ; GFX10-NEXT: [[FMUL:%[0-9]+]]:_(<3 x s64>) = contract G_FMUL [[BUILD_VECTOR]], [[BUILD_VECTOR1]] + ; GFX10-NEXT: [[FADD:%[0-9]+]]:_(<3 x s64>) = contract G_FADD [[BUILD_VECTOR2]], [[FMUL]] + ; GFX10-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[FADD]](<3 x s64>) + ; GFX10-NEXT: $vgpr0 = COPY [[UV]](s32) + ; GFX10-NEXT: $vgpr1 = COPY [[UV1]](s32) + ; GFX10-NEXT: $vgpr2 = COPY [[UV2]](s32) + ; GFX10-NEXT: $vgpr3 = COPY [[UV3]](s32) + ; GFX10-NEXT: $vgpr4 = COPY [[UV4]](s32) + ; GFX10-NEXT: $vgpr5 = COPY [[UV5]](s32) + ; GFX10-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5 + ; + ; GFX10-DENORM-LABEL: name: test_3xdouble_add_mul_rhs_contract + ; GFX10-DENORM: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17 + ; GFX10-DENORM-NEXT: {{ $}} + ; GFX10-DENORM-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; GFX10-DENORM-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GFX10-DENORM-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 + ; GFX10-DENORM-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 + ; GFX10-DENORM-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4 + ; GFX10-DENORM-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5 + ; GFX10-DENORM-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32) + ; GFX10-DENORM-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY2]](s32), [[COPY3]](s32) + ; GFX10-DENORM-NEXT: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY4]](s32), [[COPY5]](s32) + ; GFX10-DENORM-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s64>) = G_BUILD_VECTOR [[MV]](s64), [[MV1]](s64), [[MV2]](s64) + ; GFX10-DENORM-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $vgpr6 + ; GFX10-DENORM-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $vgpr7 + ; GFX10-DENORM-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr8 + ; GFX10-DENORM-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr9 + ; GFX10-DENORM-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr10 + ; GFX10-DENORM-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr11 + ; GFX10-DENORM-NEXT: [[MV3:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY6]](s32), [[COPY7]](s32) + ; GFX10-DENORM-NEXT: [[MV4:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY8]](s32), [[COPY9]](s32) + ; GFX10-DENORM-NEXT: [[MV5:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY10]](s32), [[COPY11]](s32) + ; GFX10-DENORM-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<3 x s64>) = G_BUILD_VECTOR [[MV3]](s64), [[MV4]](s64), [[MV5]](s64) + ; GFX10-DENORM-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr12 + ; GFX10-DENORM-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr13 + ; GFX10-DENORM-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr14 + ; GFX10-DENORM-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY $vgpr15 + ; GFX10-DENORM-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY $vgpr16 + ; GFX10-DENORM-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY $vgpr17 + ; GFX10-DENORM-NEXT: [[MV6:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY12]](s32), [[COPY13]](s32) + ; GFX10-DENORM-NEXT: [[MV7:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY14]](s32), [[COPY15]](s32) + ; GFX10-DENORM-NEXT: [[MV8:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY16]](s32), [[COPY17]](s32) + ; GFX10-DENORM-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<3 x s64>) = G_BUILD_VECTOR [[MV6]](s64), [[MV7]](s64), [[MV8]](s64) + ; GFX10-DENORM-NEXT: [[FMUL:%[0-9]+]]:_(<3 x s64>) = contract G_FMUL [[BUILD_VECTOR]], [[BUILD_VECTOR1]] + ; GFX10-DENORM-NEXT: [[FADD:%[0-9]+]]:_(<3 x s64>) = contract G_FADD [[BUILD_VECTOR2]], [[FMUL]] + ; GFX10-DENORM-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[FADD]](<3 x s64>) + ; GFX10-DENORM-NEXT: $vgpr0 = COPY [[UV]](s32) + ; GFX10-DENORM-NEXT: $vgpr1 = COPY [[UV1]](s32) + ; GFX10-DENORM-NEXT: $vgpr2 = COPY [[UV2]](s32) + ; GFX10-DENORM-NEXT: $vgpr3 = COPY [[UV3]](s32) + ; GFX10-DENORM-NEXT: $vgpr4 = COPY [[UV4]](s32) + ; GFX10-DENORM-NEXT: $vgpr5 = COPY [[UV5]](s32) + ; GFX10-DENORM-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5 + %4:_(s32) = COPY $vgpr0 + %5:_(s32) = COPY $vgpr1 + %6:_(s32) = COPY $vgpr2 + %7:_(s32) = COPY $vgpr3 + %8:_(s32) = COPY $vgpr4 + %9:_(s32) = COPY $vgpr5 + %22:_(s64) = G_MERGE_VALUES %4(s32), %5(s32) + %23:_(s64) = G_MERGE_VALUES %6(s32), %7(s32) + %24:_(s64) = G_MERGE_VALUES %8(s32), %9(s32) + %0:_(<3 x s64>) = G_BUILD_VECTOR %22(s64), %23(s64), %24(s64) + %10:_(s32) = COPY $vgpr6 + %11:_(s32) = COPY $vgpr7 + %12:_(s32) = COPY $vgpr8 + %13:_(s32) = COPY $vgpr9 + %14:_(s32) = COPY $vgpr10 + %15:_(s32) = COPY $vgpr11 + %25:_(s64) = G_MERGE_VALUES %10(s32), %11(s32) + %26:_(s64) = G_MERGE_VALUES %12(s32), %13(s32) + %27:_(s64) = G_MERGE_VALUES %14(s32), %15(s32) + %1:_(<3 x s64>) = G_BUILD_VECTOR %25(s64), %26(s64), %27(s64) + %16:_(s32) = COPY $vgpr12 + %17:_(s32) = COPY $vgpr13 + %18:_(s32) = COPY $vgpr14 + %19:_(s32) = COPY $vgpr15 + %20:_(s32) = COPY $vgpr16 + %21:_(s32) = COPY $vgpr17 + %28:_(s64) = G_MERGE_VALUES %16(s32), %17(s32) + %29:_(s64) = G_MERGE_VALUES %18(s32), %19(s32) + %30:_(s64) = G_MERGE_VALUES %20(s32), %21(s32) + %2:_(<3 x s64>) = G_BUILD_VECTOR %28(s64), %29(s64), %30(s64) + %31:_(<3 x s64>) = contract G_FMUL %0, %1 + %32:_(<3 x s64>) = contract G_FADD %2, %31 + %34:_(s32), %35:_(s32), %36:_(s32), %37:_(s32), %38:_(s32), %39:_(s32) = G_UNMERGE_VALUES %32(<3 x s64>) + $vgpr0 = COPY %34(s32) + $vgpr1 = COPY %35(s32) + $vgpr2 = COPY %36(s32) + $vgpr3 = COPY %37(s32) + $vgpr4 = COPY %38(s32) + $vgpr5 = COPY %39(s32) + S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5 +... diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/combine-fma-add-mul-pre-legalize.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/combine-fma-add-mul-pre-legalize.mir index 42e53bedb8d8..8f9fc67ab76d 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/combine-fma-add-mul-pre-legalize.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/combine-fma-add-mul-pre-legalize.mir @@ -1,12 +1,8 @@ # NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py # RUN: llc -global-isel -mtriple=amdgcn -mcpu=gfx900 -run-pass=amdgpu-prelegalizer-combiner %s -o - | FileCheck -check-prefix=GFX9 %s -# RUN: llc -global-isel -mtriple=amdgcn -mcpu=gfx900 -run-pass=amdgpu-prelegalizer-combiner -fp-contract=fast %s -o - | FileCheck -check-prefix=GFX9-CONTRACT %s # RUN: llc -global-isel -mtriple=amdgcn -mcpu=gfx900 -run-pass=amdgpu-prelegalizer-combiner --denormal-fp-math=preserve-sign %s -o - | FileCheck -check-prefix=GFX9-DENORM %s -# RUN: llc -global-isel -mtriple=amdgcn -mcpu=gfx900 -run-pass=amdgpu-prelegalizer-combiner -enable-unsafe-fp-math %s -o - | FileCheck -check-prefix=GFX9-UNSAFE %s # RUN: llc -global-isel -mtriple=amdgcn -mcpu=gfx1010 -run-pass=amdgpu-prelegalizer-combiner %s -o - | FileCheck -check-prefix=GFX10 %s -# RUN: llc -global-isel -mtriple=amdgcn -mcpu=gfx1010 -run-pass=amdgpu-prelegalizer-combiner -fp-contract=fast %s -o - | FileCheck -check-prefix=GFX10-CONTRACT %s # RUN: llc -global-isel -mtriple=amdgcn -mcpu=gfx1010 -run-pass=amdgpu-prelegalizer-combiner --denormal-fp-math=preserve-sign %s -o - | FileCheck -check-prefix=GFX10-DENORM %s -# RUN: llc -global-isel -mtriple=amdgcn -mcpu=gfx1010 -run-pass=amdgpu-prelegalizer-combiner -enable-unsafe-fp-math %s -o - | FileCheck -check-prefix=GFX10-UNSAFE %s --- name: test_f32_add_mul @@ -25,16 +21,6 @@ body: | ; GFX9-NEXT: $vgpr0 = COPY [[FADD]](s32) ; GFX9-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0 ; - ; GFX9-CONTRACT-LABEL: name: test_f32_add_mul - ; GFX9-CONTRACT: liveins: $vgpr0, $vgpr1, $vgpr2 - ; GFX9-CONTRACT-NEXT: {{ $}} - ; GFX9-CONTRACT-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX9-CONTRACT-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX9-CONTRACT-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX9-CONTRACT-NEXT: [[FMA:%[0-9]+]]:_(s32) = G_FMA [[COPY]], [[COPY1]], [[COPY2]] - ; GFX9-CONTRACT-NEXT: $vgpr0 = COPY [[FMA]](s32) - ; GFX9-CONTRACT-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0 - ; ; GFX9-DENORM-LABEL: name: test_f32_add_mul ; GFX9-DENORM: liveins: $vgpr0, $vgpr1, $vgpr2 ; GFX9-DENORM-NEXT: {{ $}} @@ -46,16 +32,6 @@ body: | ; GFX9-DENORM-NEXT: $vgpr0 = COPY [[FADD]](s32) ; GFX9-DENORM-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0 ; - ; GFX9-UNSAFE-LABEL: name: test_f32_add_mul - ; GFX9-UNSAFE: liveins: $vgpr0, $vgpr1, $vgpr2 - ; GFX9-UNSAFE-NEXT: {{ $}} - ; GFX9-UNSAFE-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX9-UNSAFE-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX9-UNSAFE-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX9-UNSAFE-NEXT: [[FMA:%[0-9]+]]:_(s32) = G_FMA [[COPY]], [[COPY1]], [[COPY2]] - ; GFX9-UNSAFE-NEXT: $vgpr0 = COPY [[FMA]](s32) - ; GFX9-UNSAFE-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0 - ; ; GFX10-LABEL: name: test_f32_add_mul ; GFX10: liveins: $vgpr0, $vgpr1, $vgpr2 ; GFX10-NEXT: {{ $}} @@ -67,16 +43,6 @@ body: | ; GFX10-NEXT: $vgpr0 = COPY [[FADD]](s32) ; GFX10-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0 ; - ; GFX10-CONTRACT-LABEL: name: test_f32_add_mul - ; GFX10-CONTRACT: liveins: $vgpr0, $vgpr1, $vgpr2 - ; GFX10-CONTRACT-NEXT: {{ $}} - ; GFX10-CONTRACT-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX10-CONTRACT-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX10-CONTRACT-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX10-CONTRACT-NEXT: [[FMA:%[0-9]+]]:_(s32) = G_FMA [[COPY]], [[COPY1]], [[COPY2]] - ; GFX10-CONTRACT-NEXT: $vgpr0 = COPY [[FMA]](s32) - ; GFX10-CONTRACT-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0 - ; ; GFX10-DENORM-LABEL: name: test_f32_add_mul ; GFX10-DENORM: liveins: $vgpr0, $vgpr1, $vgpr2 ; GFX10-DENORM-NEXT: {{ $}} @@ -87,16 +53,6 @@ body: | ; GFX10-DENORM-NEXT: [[FADD:%[0-9]+]]:_(s32) = reassoc G_FADD [[FMUL]], [[COPY2]] ; GFX10-DENORM-NEXT: $vgpr0 = COPY [[FADD]](s32) ; GFX10-DENORM-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0 - ; - ; GFX10-UNSAFE-LABEL: name: test_f32_add_mul - ; GFX10-UNSAFE: liveins: $vgpr0, $vgpr1, $vgpr2 - ; GFX10-UNSAFE-NEXT: {{ $}} - ; GFX10-UNSAFE-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX10-UNSAFE-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX10-UNSAFE-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX10-UNSAFE-NEXT: [[FMA:%[0-9]+]]:_(s32) = G_FMA [[COPY]], [[COPY1]], [[COPY2]] - ; GFX10-UNSAFE-NEXT: $vgpr0 = COPY [[FMA]](s32) - ; GFX10-UNSAFE-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0 %0:_(s32) = COPY $vgpr0 %1:_(s32) = COPY $vgpr1 %2:_(s32) = COPY $vgpr2 @@ -106,6 +62,60 @@ body: | S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0 ... +--- +name: test_f32_add_mul_contract +body: | + bb.1.entry: + liveins: $vgpr0, $vgpr1, $vgpr2 + + ; GFX9-LABEL: name: test_f32_add_mul_contract + ; GFX9: liveins: $vgpr0, $vgpr1, $vgpr2 + ; GFX9-NEXT: {{ $}} + ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 + ; GFX9-NEXT: [[FMA:%[0-9]+]]:_(s32) = G_FMA [[COPY]], [[COPY1]], [[COPY2]] + ; GFX9-NEXT: $vgpr0 = COPY [[FMA]](s32) + ; GFX9-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0 + ; + ; GFX9-DENORM-LABEL: name: test_f32_add_mul_contract + ; GFX9-DENORM: liveins: $vgpr0, $vgpr1, $vgpr2 + ; GFX9-DENORM-NEXT: {{ $}} + ; GFX9-DENORM-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; GFX9-DENORM-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GFX9-DENORM-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 + ; GFX9-DENORM-NEXT: [[FMA:%[0-9]+]]:_(s32) = G_FMA [[COPY]], [[COPY1]], [[COPY2]] + ; GFX9-DENORM-NEXT: $vgpr0 = COPY [[FMA]](s32) + ; GFX9-DENORM-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0 + ; + ; GFX10-LABEL: name: test_f32_add_mul_contract + ; GFX10: liveins: $vgpr0, $vgpr1, $vgpr2 + ; GFX10-NEXT: {{ $}} + ; GFX10-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; GFX10-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GFX10-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 + ; GFX10-NEXT: [[FMA:%[0-9]+]]:_(s32) = G_FMA [[COPY]], [[COPY1]], [[COPY2]] + ; GFX10-NEXT: $vgpr0 = COPY [[FMA]](s32) + ; GFX10-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0 + ; + ; GFX10-DENORM-LABEL: name: test_f32_add_mul_contract + ; GFX10-DENORM: liveins: $vgpr0, $vgpr1, $vgpr2 + ; GFX10-DENORM-NEXT: {{ $}} + ; GFX10-DENORM-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; GFX10-DENORM-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GFX10-DENORM-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 + ; GFX10-DENORM-NEXT: [[FMA:%[0-9]+]]:_(s32) = G_FMA [[COPY]], [[COPY1]], [[COPY2]] + ; GFX10-DENORM-NEXT: $vgpr0 = COPY [[FMA]](s32) + ; GFX10-DENORM-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0 + %0:_(s32) = COPY $vgpr0 + %1:_(s32) = COPY $vgpr1 + %2:_(s32) = COPY $vgpr2 + %4:_(s32) = reassoc contract G_FMUL %0, %1 + %5:_(s32) = reassoc contract G_FADD %4, %2 + $vgpr0 = COPY %5(s32) + S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0 +... + --- name: test_f32_add_mul_rhs body: | @@ -123,16 +133,6 @@ body: | ; GFX9-NEXT: $vgpr0 = COPY [[FADD]](s32) ; GFX9-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0 ; - ; GFX9-CONTRACT-LABEL: name: test_f32_add_mul_rhs - ; GFX9-CONTRACT: liveins: $vgpr0, $vgpr1, $vgpr2 - ; GFX9-CONTRACT-NEXT: {{ $}} - ; GFX9-CONTRACT-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX9-CONTRACT-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX9-CONTRACT-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX9-CONTRACT-NEXT: [[FMA:%[0-9]+]]:_(s32) = G_FMA [[COPY]], [[COPY1]], [[COPY2]] - ; GFX9-CONTRACT-NEXT: $vgpr0 = COPY [[FMA]](s32) - ; GFX9-CONTRACT-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0 - ; ; GFX9-DENORM-LABEL: name: test_f32_add_mul_rhs ; GFX9-DENORM: liveins: $vgpr0, $vgpr1, $vgpr2 ; GFX9-DENORM-NEXT: {{ $}} @@ -144,16 +144,6 @@ body: | ; GFX9-DENORM-NEXT: $vgpr0 = COPY [[FADD]](s32) ; GFX9-DENORM-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0 ; - ; GFX9-UNSAFE-LABEL: name: test_f32_add_mul_rhs - ; GFX9-UNSAFE: liveins: $vgpr0, $vgpr1, $vgpr2 - ; GFX9-UNSAFE-NEXT: {{ $}} - ; GFX9-UNSAFE-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX9-UNSAFE-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX9-UNSAFE-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX9-UNSAFE-NEXT: [[FMA:%[0-9]+]]:_(s32) = G_FMA [[COPY]], [[COPY1]], [[COPY2]] - ; GFX9-UNSAFE-NEXT: $vgpr0 = COPY [[FMA]](s32) - ; GFX9-UNSAFE-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0 - ; ; GFX10-LABEL: name: test_f32_add_mul_rhs ; GFX10: liveins: $vgpr0, $vgpr1, $vgpr2 ; GFX10-NEXT: {{ $}} @@ -165,16 +155,6 @@ body: | ; GFX10-NEXT: $vgpr0 = COPY [[FADD]](s32) ; GFX10-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0 ; - ; GFX10-CONTRACT-LABEL: name: test_f32_add_mul_rhs - ; GFX10-CONTRACT: liveins: $vgpr0, $vgpr1, $vgpr2 - ; GFX10-CONTRACT-NEXT: {{ $}} - ; GFX10-CONTRACT-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX10-CONTRACT-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX10-CONTRACT-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX10-CONTRACT-NEXT: [[FMA:%[0-9]+]]:_(s32) = G_FMA [[COPY]], [[COPY1]], [[COPY2]] - ; GFX10-CONTRACT-NEXT: $vgpr0 = COPY [[FMA]](s32) - ; GFX10-CONTRACT-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0 - ; ; GFX10-DENORM-LABEL: name: test_f32_add_mul_rhs ; GFX10-DENORM: liveins: $vgpr0, $vgpr1, $vgpr2 ; GFX10-DENORM-NEXT: {{ $}} @@ -185,16 +165,6 @@ body: | ; GFX10-DENORM-NEXT: [[FADD:%[0-9]+]]:_(s32) = reassoc G_FADD [[COPY2]], [[FMUL]] ; GFX10-DENORM-NEXT: $vgpr0 = COPY [[FADD]](s32) ; GFX10-DENORM-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0 - ; - ; GFX10-UNSAFE-LABEL: name: test_f32_add_mul_rhs - ; GFX10-UNSAFE: liveins: $vgpr0, $vgpr1, $vgpr2 - ; GFX10-UNSAFE-NEXT: {{ $}} - ; GFX10-UNSAFE-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX10-UNSAFE-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX10-UNSAFE-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX10-UNSAFE-NEXT: [[FMA:%[0-9]+]]:_(s32) = G_FMA [[COPY]], [[COPY1]], [[COPY2]] - ; GFX10-UNSAFE-NEXT: $vgpr0 = COPY [[FMA]](s32) - ; GFX10-UNSAFE-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0 %0:_(s32) = COPY $vgpr0 %1:_(s32) = COPY $vgpr1 %2:_(s32) = COPY $vgpr2 @@ -204,6 +174,60 @@ body: | S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0 ... +--- +name: test_f32_add_mul_rhs_contract +body: | + bb.1.entry: + liveins: $vgpr0, $vgpr1, $vgpr2 + + ; GFX9-LABEL: name: test_f32_add_mul_rhs_contract + ; GFX9: liveins: $vgpr0, $vgpr1, $vgpr2 + ; GFX9-NEXT: {{ $}} + ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 + ; GFX9-NEXT: [[FMA:%[0-9]+]]:_(s32) = G_FMA [[COPY]], [[COPY1]], [[COPY2]] + ; GFX9-NEXT: $vgpr0 = COPY [[FMA]](s32) + ; GFX9-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0 + ; + ; GFX9-DENORM-LABEL: name: test_f32_add_mul_rhs_contract + ; GFX9-DENORM: liveins: $vgpr0, $vgpr1, $vgpr2 + ; GFX9-DENORM-NEXT: {{ $}} + ; GFX9-DENORM-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; GFX9-DENORM-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GFX9-DENORM-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 + ; GFX9-DENORM-NEXT: [[FMA:%[0-9]+]]:_(s32) = G_FMA [[COPY]], [[COPY1]], [[COPY2]] + ; GFX9-DENORM-NEXT: $vgpr0 = COPY [[FMA]](s32) + ; GFX9-DENORM-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0 + ; + ; GFX10-LABEL: name: test_f32_add_mul_rhs_contract + ; GFX10: liveins: $vgpr0, $vgpr1, $vgpr2 + ; GFX10-NEXT: {{ $}} + ; GFX10-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; GFX10-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GFX10-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 + ; GFX10-NEXT: [[FMA:%[0-9]+]]:_(s32) = G_FMA [[COPY]], [[COPY1]], [[COPY2]] + ; GFX10-NEXT: $vgpr0 = COPY [[FMA]](s32) + ; GFX10-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0 + ; + ; GFX10-DENORM-LABEL: name: test_f32_add_mul_rhs_contract + ; GFX10-DENORM: liveins: $vgpr0, $vgpr1, $vgpr2 + ; GFX10-DENORM-NEXT: {{ $}} + ; GFX10-DENORM-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; GFX10-DENORM-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GFX10-DENORM-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 + ; GFX10-DENORM-NEXT: [[FMA:%[0-9]+]]:_(s32) = G_FMA [[COPY]], [[COPY1]], [[COPY2]] + ; GFX10-DENORM-NEXT: $vgpr0 = COPY [[FMA]](s32) + ; GFX10-DENORM-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0 + %0:_(s32) = COPY $vgpr0 + %1:_(s32) = COPY $vgpr1 + %2:_(s32) = COPY $vgpr2 + %4:_(s32) = reassoc contract G_FMUL %0, %1 + %5:_(s32) = reassoc contract G_FADD %2, %4 + $vgpr0 = COPY %5(s32) + S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0 +... + --- name: test_half_add_mul body: | @@ -225,20 +249,6 @@ body: | ; GFX9-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) ; GFX9-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0 ; - ; GFX9-CONTRACT-LABEL: name: test_half_add_mul - ; GFX9-CONTRACT: liveins: $vgpr0, $vgpr1, $vgpr2 - ; GFX9-CONTRACT-NEXT: {{ $}} - ; GFX9-CONTRACT-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX9-CONTRACT-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) - ; GFX9-CONTRACT-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX9-CONTRACT-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) - ; GFX9-CONTRACT-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX9-CONTRACT-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY2]](s32) - ; GFX9-CONTRACT-NEXT: [[FMA:%[0-9]+]]:_(s16) = G_FMA [[TRUNC]], [[TRUNC1]], [[TRUNC2]] - ; GFX9-CONTRACT-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[FMA]](s16) - ; GFX9-CONTRACT-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) - ; GFX9-CONTRACT-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0 - ; ; GFX9-DENORM-LABEL: name: test_half_add_mul ; GFX9-DENORM: liveins: $vgpr0, $vgpr1, $vgpr2 ; GFX9-DENORM-NEXT: {{ $}} @@ -254,20 +264,6 @@ body: | ; GFX9-DENORM-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) ; GFX9-DENORM-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0 ; - ; GFX9-UNSAFE-LABEL: name: test_half_add_mul - ; GFX9-UNSAFE: liveins: $vgpr0, $vgpr1, $vgpr2 - ; GFX9-UNSAFE-NEXT: {{ $}} - ; GFX9-UNSAFE-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX9-UNSAFE-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) - ; GFX9-UNSAFE-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX9-UNSAFE-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) - ; GFX9-UNSAFE-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX9-UNSAFE-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY2]](s32) - ; GFX9-UNSAFE-NEXT: [[FMA:%[0-9]+]]:_(s16) = G_FMA [[TRUNC]], [[TRUNC1]], [[TRUNC2]] - ; GFX9-UNSAFE-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[FMA]](s16) - ; GFX9-UNSAFE-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) - ; GFX9-UNSAFE-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0 - ; ; GFX10-LABEL: name: test_half_add_mul ; GFX10: liveins: $vgpr0, $vgpr1, $vgpr2 ; GFX10-NEXT: {{ $}} @@ -283,20 +279,6 @@ body: | ; GFX10-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) ; GFX10-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0 ; - ; GFX10-CONTRACT-LABEL: name: test_half_add_mul - ; GFX10-CONTRACT: liveins: $vgpr0, $vgpr1, $vgpr2 - ; GFX10-CONTRACT-NEXT: {{ $}} - ; GFX10-CONTRACT-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX10-CONTRACT-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) - ; GFX10-CONTRACT-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX10-CONTRACT-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) - ; GFX10-CONTRACT-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX10-CONTRACT-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY2]](s32) - ; GFX10-CONTRACT-NEXT: [[FMA:%[0-9]+]]:_(s16) = G_FMA [[TRUNC]], [[TRUNC1]], [[TRUNC2]] - ; GFX10-CONTRACT-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[FMA]](s16) - ; GFX10-CONTRACT-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) - ; GFX10-CONTRACT-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0 - ; ; GFX10-DENORM-LABEL: name: test_half_add_mul ; GFX10-DENORM: liveins: $vgpr0, $vgpr1, $vgpr2 ; GFX10-DENORM-NEXT: {{ $}} @@ -311,20 +293,6 @@ body: | ; GFX10-DENORM-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[FADD]](s16) ; GFX10-DENORM-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) ; GFX10-DENORM-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0 - ; - ; GFX10-UNSAFE-LABEL: name: test_half_add_mul - ; GFX10-UNSAFE: liveins: $vgpr0, $vgpr1, $vgpr2 - ; GFX10-UNSAFE-NEXT: {{ $}} - ; GFX10-UNSAFE-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX10-UNSAFE-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) - ; GFX10-UNSAFE-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX10-UNSAFE-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) - ; GFX10-UNSAFE-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX10-UNSAFE-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY2]](s32) - ; GFX10-UNSAFE-NEXT: [[FMA:%[0-9]+]]:_(s16) = G_FMA [[TRUNC]], [[TRUNC1]], [[TRUNC2]] - ; GFX10-UNSAFE-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[FMA]](s16) - ; GFX10-UNSAFE-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) - ; GFX10-UNSAFE-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0 %4:_(s32) = COPY $vgpr0 %0:_(s16) = G_TRUNC %4(s32) %5:_(s32) = COPY $vgpr1 @@ -338,6 +306,81 @@ body: | S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0 ... +--- +name: test_half_add_mul_contract +body: | + bb.1.entry: + liveins: $vgpr0, $vgpr1, $vgpr2 + + ; GFX9-LABEL: name: test_half_add_mul_contract + ; GFX9: liveins: $vgpr0, $vgpr1, $vgpr2 + ; GFX9-NEXT: {{ $}} + ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) + ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 + ; GFX9-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY2]](s32) + ; GFX9-NEXT: [[FMA:%[0-9]+]]:_(s16) = G_FMA [[TRUNC]], [[TRUNC1]], [[TRUNC2]] + ; GFX9-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[FMA]](s16) + ; GFX9-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) + ; GFX9-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0 + ; + ; GFX9-DENORM-LABEL: name: test_half_add_mul_contract + ; GFX9-DENORM: liveins: $vgpr0, $vgpr1, $vgpr2 + ; GFX9-DENORM-NEXT: {{ $}} + ; GFX9-DENORM-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; GFX9-DENORM-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) + ; GFX9-DENORM-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GFX9-DENORM-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) + ; GFX9-DENORM-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 + ; GFX9-DENORM-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY2]](s32) + ; GFX9-DENORM-NEXT: [[FMA:%[0-9]+]]:_(s16) = G_FMA [[TRUNC]], [[TRUNC1]], [[TRUNC2]] + ; GFX9-DENORM-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[FMA]](s16) + ; GFX9-DENORM-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) + ; GFX9-DENORM-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0 + ; + ; GFX10-LABEL: name: test_half_add_mul_contract + ; GFX10: liveins: $vgpr0, $vgpr1, $vgpr2 + ; GFX10-NEXT: {{ $}} + ; GFX10-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; GFX10-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) + ; GFX10-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GFX10-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) + ; GFX10-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 + ; GFX10-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY2]](s32) + ; GFX10-NEXT: [[FMA:%[0-9]+]]:_(s16) = G_FMA [[TRUNC]], [[TRUNC1]], [[TRUNC2]] + ; GFX10-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[FMA]](s16) + ; GFX10-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) + ; GFX10-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0 + ; + ; GFX10-DENORM-LABEL: name: test_half_add_mul_contract + ; GFX10-DENORM: liveins: $vgpr0, $vgpr1, $vgpr2 + ; GFX10-DENORM-NEXT: {{ $}} + ; GFX10-DENORM-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; GFX10-DENORM-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) + ; GFX10-DENORM-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GFX10-DENORM-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) + ; GFX10-DENORM-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 + ; GFX10-DENORM-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY2]](s32) + ; GFX10-DENORM-NEXT: [[FMA:%[0-9]+]]:_(s16) = G_FMA [[TRUNC]], [[TRUNC1]], [[TRUNC2]] + ; GFX10-DENORM-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[FMA]](s16) + ; GFX10-DENORM-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) + ; GFX10-DENORM-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0 + %4:_(s32) = COPY $vgpr0 + %0:_(s16) = G_TRUNC %4(s32) + %5:_(s32) = COPY $vgpr1 + %1:_(s16) = G_TRUNC %5(s32) + %6:_(s32) = COPY $vgpr2 + %2:_(s16) = G_TRUNC %6(s32) + %7:_(s16) = reassoc contract G_FMUL %0, %1 + %8:_(s16) = reassoc contract G_FADD %7, %2 + %10:_(s32) = G_ANYEXT %8(s16) + $vgpr0 = COPY %10(s32) + S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0 +... + + --- name: test_half_add_mul_rhs body: | @@ -359,20 +402,6 @@ body: | ; GFX9-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) ; GFX9-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0 ; - ; GFX9-CONTRACT-LABEL: name: test_half_add_mul_rhs - ; GFX9-CONTRACT: liveins: $vgpr0, $vgpr1, $vgpr2 - ; GFX9-CONTRACT-NEXT: {{ $}} - ; GFX9-CONTRACT-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX9-CONTRACT-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) - ; GFX9-CONTRACT-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX9-CONTRACT-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) - ; GFX9-CONTRACT-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX9-CONTRACT-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY2]](s32) - ; GFX9-CONTRACT-NEXT: [[FMA:%[0-9]+]]:_(s16) = G_FMA [[TRUNC]], [[TRUNC1]], [[TRUNC2]] - ; GFX9-CONTRACT-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[FMA]](s16) - ; GFX9-CONTRACT-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) - ; GFX9-CONTRACT-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0 - ; ; GFX9-DENORM-LABEL: name: test_half_add_mul_rhs ; GFX9-DENORM: liveins: $vgpr0, $vgpr1, $vgpr2 ; GFX9-DENORM-NEXT: {{ $}} @@ -388,20 +417,6 @@ body: | ; GFX9-DENORM-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) ; GFX9-DENORM-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0 ; - ; GFX9-UNSAFE-LABEL: name: test_half_add_mul_rhs - ; GFX9-UNSAFE: liveins: $vgpr0, $vgpr1, $vgpr2 - ; GFX9-UNSAFE-NEXT: {{ $}} - ; GFX9-UNSAFE-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX9-UNSAFE-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) - ; GFX9-UNSAFE-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX9-UNSAFE-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) - ; GFX9-UNSAFE-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX9-UNSAFE-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY2]](s32) - ; GFX9-UNSAFE-NEXT: [[FMA:%[0-9]+]]:_(s16) = G_FMA [[TRUNC]], [[TRUNC1]], [[TRUNC2]] - ; GFX9-UNSAFE-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[FMA]](s16) - ; GFX9-UNSAFE-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) - ; GFX9-UNSAFE-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0 - ; ; GFX10-LABEL: name: test_half_add_mul_rhs ; GFX10: liveins: $vgpr0, $vgpr1, $vgpr2 ; GFX10-NEXT: {{ $}} @@ -417,20 +432,6 @@ body: | ; GFX10-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) ; GFX10-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0 ; - ; GFX10-CONTRACT-LABEL: name: test_half_add_mul_rhs - ; GFX10-CONTRACT: liveins: $vgpr0, $vgpr1, $vgpr2 - ; GFX10-CONTRACT-NEXT: {{ $}} - ; GFX10-CONTRACT-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX10-CONTRACT-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) - ; GFX10-CONTRACT-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX10-CONTRACT-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) - ; GFX10-CONTRACT-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX10-CONTRACT-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY2]](s32) - ; GFX10-CONTRACT-NEXT: [[FMA:%[0-9]+]]:_(s16) = G_FMA [[TRUNC]], [[TRUNC1]], [[TRUNC2]] - ; GFX10-CONTRACT-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[FMA]](s16) - ; GFX10-CONTRACT-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) - ; GFX10-CONTRACT-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0 - ; ; GFX10-DENORM-LABEL: name: test_half_add_mul_rhs ; GFX10-DENORM: liveins: $vgpr0, $vgpr1, $vgpr2 ; GFX10-DENORM-NEXT: {{ $}} @@ -445,20 +446,84 @@ body: | ; GFX10-DENORM-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[FADD]](s16) ; GFX10-DENORM-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) ; GFX10-DENORM-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0 + %4:_(s32) = COPY $vgpr0 + %0:_(s16) = G_TRUNC %4(s32) + %5:_(s32) = COPY $vgpr1 + %1:_(s16) = G_TRUNC %5(s32) + %6:_(s32) = COPY $vgpr2 + %2:_(s16) = G_TRUNC %6(s32) + %7:_(s16) = reassoc G_FMUL %0, %1 + %8:_(s16) = reassoc G_FADD %2, %7 + %10:_(s32) = G_ANYEXT %8(s16) + $vgpr0 = COPY %10(s32) + S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0 +... + +--- +name: test_half_add_mul_rhs_contract +body: | + bb.1.entry: + liveins: $vgpr0, $vgpr1, $vgpr2 + + ; GFX9-LABEL: name: test_half_add_mul_rhs_contract + ; GFX9: liveins: $vgpr0, $vgpr1, $vgpr2 + ; GFX9-NEXT: {{ $}} + ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) + ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 + ; GFX9-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY2]](s32) + ; GFX9-NEXT: [[FMUL:%[0-9]+]]:_(s16) = reassoc G_FMUL [[TRUNC]], [[TRUNC1]] + ; GFX9-NEXT: [[FADD:%[0-9]+]]:_(s16) = reassoc G_FADD [[TRUNC2]], [[FMUL]] + ; GFX9-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[FADD]](s16) + ; GFX9-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) + ; GFX9-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0 ; - ; GFX10-UNSAFE-LABEL: name: test_half_add_mul_rhs - ; GFX10-UNSAFE: liveins: $vgpr0, $vgpr1, $vgpr2 - ; GFX10-UNSAFE-NEXT: {{ $}} - ; GFX10-UNSAFE-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX10-UNSAFE-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) - ; GFX10-UNSAFE-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX10-UNSAFE-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) - ; GFX10-UNSAFE-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX10-UNSAFE-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY2]](s32) - ; GFX10-UNSAFE-NEXT: [[FMA:%[0-9]+]]:_(s16) = G_FMA [[TRUNC]], [[TRUNC1]], [[TRUNC2]] - ; GFX10-UNSAFE-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[FMA]](s16) - ; GFX10-UNSAFE-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) - ; GFX10-UNSAFE-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0 + ; GFX9-DENORM-LABEL: name: test_half_add_mul_rhs_contract + ; GFX9-DENORM: liveins: $vgpr0, $vgpr1, $vgpr2 + ; GFX9-DENORM-NEXT: {{ $}} + ; GFX9-DENORM-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; GFX9-DENORM-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) + ; GFX9-DENORM-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GFX9-DENORM-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) + ; GFX9-DENORM-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 + ; GFX9-DENORM-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY2]](s32) + ; GFX9-DENORM-NEXT: [[FMUL:%[0-9]+]]:_(s16) = reassoc G_FMUL [[TRUNC]], [[TRUNC1]] + ; GFX9-DENORM-NEXT: [[FADD:%[0-9]+]]:_(s16) = reassoc G_FADD [[TRUNC2]], [[FMUL]] + ; GFX9-DENORM-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[FADD]](s16) + ; GFX9-DENORM-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) + ; GFX9-DENORM-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0 + ; + ; GFX10-LABEL: name: test_half_add_mul_rhs_contract + ; GFX10: liveins: $vgpr0, $vgpr1, $vgpr2 + ; GFX10-NEXT: {{ $}} + ; GFX10-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; GFX10-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) + ; GFX10-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GFX10-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) + ; GFX10-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 + ; GFX10-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY2]](s32) + ; GFX10-NEXT: [[FMUL:%[0-9]+]]:_(s16) = reassoc G_FMUL [[TRUNC]], [[TRUNC1]] + ; GFX10-NEXT: [[FADD:%[0-9]+]]:_(s16) = reassoc G_FADD [[TRUNC2]], [[FMUL]] + ; GFX10-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[FADD]](s16) + ; GFX10-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) + ; GFX10-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0 + ; + ; GFX10-DENORM-LABEL: name: test_half_add_mul_rhs_contract + ; GFX10-DENORM: liveins: $vgpr0, $vgpr1, $vgpr2 + ; GFX10-DENORM-NEXT: {{ $}} + ; GFX10-DENORM-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; GFX10-DENORM-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) + ; GFX10-DENORM-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GFX10-DENORM-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) + ; GFX10-DENORM-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 + ; GFX10-DENORM-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY2]](s32) + ; GFX10-DENORM-NEXT: [[FMUL:%[0-9]+]]:_(s16) = reassoc G_FMUL [[TRUNC]], [[TRUNC1]] + ; GFX10-DENORM-NEXT: [[FADD:%[0-9]+]]:_(s16) = reassoc G_FADD [[TRUNC2]], [[FMUL]] + ; GFX10-DENORM-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[FADD]](s16) + ; GFX10-DENORM-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) + ; GFX10-DENORM-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0 %4:_(s32) = COPY $vgpr0 %0:_(s16) = G_TRUNC %4(s32) %5:_(s32) = COPY $vgpr1 @@ -497,24 +562,6 @@ body: | ; GFX9-NEXT: $vgpr1 = COPY [[UV1]](s32) ; GFX9-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1 ; - ; GFX9-CONTRACT-LABEL: name: test_double_add_mul - ; GFX9-CONTRACT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5 - ; GFX9-CONTRACT-NEXT: {{ $}} - ; GFX9-CONTRACT-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX9-CONTRACT-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX9-CONTRACT-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32) - ; GFX9-CONTRACT-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX9-CONTRACT-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; GFX9-CONTRACT-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY2]](s32), [[COPY3]](s32) - ; GFX9-CONTRACT-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4 - ; GFX9-CONTRACT-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5 - ; GFX9-CONTRACT-NEXT: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY4]](s32), [[COPY5]](s32) - ; GFX9-CONTRACT-NEXT: [[FMA:%[0-9]+]]:_(s64) = G_FMA [[MV]], [[MV1]], [[MV2]] - ; GFX9-CONTRACT-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[FMA]](s64) - ; GFX9-CONTRACT-NEXT: $vgpr0 = COPY [[UV]](s32) - ; GFX9-CONTRACT-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; GFX9-CONTRACT-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1 - ; ; GFX9-DENORM-LABEL: name: test_double_add_mul ; GFX9-DENORM: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5 ; GFX9-DENORM-NEXT: {{ $}} @@ -534,24 +581,6 @@ body: | ; GFX9-DENORM-NEXT: $vgpr1 = COPY [[UV1]](s32) ; GFX9-DENORM-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1 ; - ; GFX9-UNSAFE-LABEL: name: test_double_add_mul - ; GFX9-UNSAFE: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5 - ; GFX9-UNSAFE-NEXT: {{ $}} - ; GFX9-UNSAFE-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX9-UNSAFE-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX9-UNSAFE-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32) - ; GFX9-UNSAFE-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX9-UNSAFE-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; GFX9-UNSAFE-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY2]](s32), [[COPY3]](s32) - ; GFX9-UNSAFE-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4 - ; GFX9-UNSAFE-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5 - ; GFX9-UNSAFE-NEXT: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY4]](s32), [[COPY5]](s32) - ; GFX9-UNSAFE-NEXT: [[FMA:%[0-9]+]]:_(s64) = G_FMA [[MV]], [[MV1]], [[MV2]] - ; GFX9-UNSAFE-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[FMA]](s64) - ; GFX9-UNSAFE-NEXT: $vgpr0 = COPY [[UV]](s32) - ; GFX9-UNSAFE-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; GFX9-UNSAFE-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1 - ; ; GFX10-LABEL: name: test_double_add_mul ; GFX10: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5 ; GFX10-NEXT: {{ $}} @@ -571,24 +600,6 @@ body: | ; GFX10-NEXT: $vgpr1 = COPY [[UV1]](s32) ; GFX10-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1 ; - ; GFX10-CONTRACT-LABEL: name: test_double_add_mul - ; GFX10-CONTRACT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5 - ; GFX10-CONTRACT-NEXT: {{ $}} - ; GFX10-CONTRACT-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX10-CONTRACT-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX10-CONTRACT-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32) - ; GFX10-CONTRACT-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX10-CONTRACT-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; GFX10-CONTRACT-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY2]](s32), [[COPY3]](s32) - ; GFX10-CONTRACT-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4 - ; GFX10-CONTRACT-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5 - ; GFX10-CONTRACT-NEXT: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY4]](s32), [[COPY5]](s32) - ; GFX10-CONTRACT-NEXT: [[FMA:%[0-9]+]]:_(s64) = G_FMA [[MV]], [[MV1]], [[MV2]] - ; GFX10-CONTRACT-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[FMA]](s64) - ; GFX10-CONTRACT-NEXT: $vgpr0 = COPY [[UV]](s32) - ; GFX10-CONTRACT-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; GFX10-CONTRACT-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1 - ; ; GFX10-DENORM-LABEL: name: test_double_add_mul ; GFX10-DENORM: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5 ; GFX10-DENORM-NEXT: {{ $}} @@ -607,24 +618,6 @@ body: | ; GFX10-DENORM-NEXT: $vgpr0 = COPY [[UV]](s32) ; GFX10-DENORM-NEXT: $vgpr1 = COPY [[UV1]](s32) ; GFX10-DENORM-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1 - ; - ; GFX10-UNSAFE-LABEL: name: test_double_add_mul - ; GFX10-UNSAFE: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5 - ; GFX10-UNSAFE-NEXT: {{ $}} - ; GFX10-UNSAFE-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX10-UNSAFE-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX10-UNSAFE-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32) - ; GFX10-UNSAFE-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX10-UNSAFE-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; GFX10-UNSAFE-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY2]](s32), [[COPY3]](s32) - ; GFX10-UNSAFE-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4 - ; GFX10-UNSAFE-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5 - ; GFX10-UNSAFE-NEXT: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY4]](s32), [[COPY5]](s32) - ; GFX10-UNSAFE-NEXT: [[FMA:%[0-9]+]]:_(s64) = G_FMA [[MV]], [[MV1]], [[MV2]] - ; GFX10-UNSAFE-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[FMA]](s64) - ; GFX10-UNSAFE-NEXT: $vgpr0 = COPY [[UV]](s32) - ; GFX10-UNSAFE-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; GFX10-UNSAFE-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1 %4:_(s32) = COPY $vgpr0 %5:_(s32) = COPY $vgpr1 %0:_(s64) = G_MERGE_VALUES %4(s32), %5(s32) @@ -642,6 +635,100 @@ body: | S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1 ... +--- +name: test_double_add_mul_contract +body: | + bb.1.entry: + liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5 + + ; GFX9-LABEL: name: test_double_add_mul_contract + ; GFX9: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5 + ; GFX9-NEXT: {{ $}} + ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GFX9-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32) + ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 + ; GFX9-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 + ; GFX9-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY2]](s32), [[COPY3]](s32) + ; GFX9-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4 + ; GFX9-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5 + ; GFX9-NEXT: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY4]](s32), [[COPY5]](s32) + ; GFX9-NEXT: [[FMA:%[0-9]+]]:_(s64) = G_FMA [[MV]], [[MV1]], [[MV2]] + ; GFX9-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[FMA]](s64) + ; GFX9-NEXT: $vgpr0 = COPY [[UV]](s32) + ; GFX9-NEXT: $vgpr1 = COPY [[UV1]](s32) + ; GFX9-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1 + ; + ; GFX9-DENORM-LABEL: name: test_double_add_mul_contract + ; GFX9-DENORM: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5 + ; GFX9-DENORM-NEXT: {{ $}} + ; GFX9-DENORM-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; GFX9-DENORM-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GFX9-DENORM-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32) + ; GFX9-DENORM-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 + ; GFX9-DENORM-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 + ; GFX9-DENORM-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY2]](s32), [[COPY3]](s32) + ; GFX9-DENORM-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4 + ; GFX9-DENORM-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5 + ; GFX9-DENORM-NEXT: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY4]](s32), [[COPY5]](s32) + ; GFX9-DENORM-NEXT: [[FMA:%[0-9]+]]:_(s64) = G_FMA [[MV]], [[MV1]], [[MV2]] + ; GFX9-DENORM-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[FMA]](s64) + ; GFX9-DENORM-NEXT: $vgpr0 = COPY [[UV]](s32) + ; GFX9-DENORM-NEXT: $vgpr1 = COPY [[UV1]](s32) + ; GFX9-DENORM-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1 + ; + ; GFX10-LABEL: name: test_double_add_mul_contract + ; GFX10: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5 + ; GFX10-NEXT: {{ $}} + ; GFX10-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; GFX10-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GFX10-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32) + ; GFX10-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 + ; GFX10-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 + ; GFX10-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY2]](s32), [[COPY3]](s32) + ; GFX10-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4 + ; GFX10-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5 + ; GFX10-NEXT: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY4]](s32), [[COPY5]](s32) + ; GFX10-NEXT: [[FMA:%[0-9]+]]:_(s64) = G_FMA [[MV]], [[MV1]], [[MV2]] + ; GFX10-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[FMA]](s64) + ; GFX10-NEXT: $vgpr0 = COPY [[UV]](s32) + ; GFX10-NEXT: $vgpr1 = COPY [[UV1]](s32) + ; GFX10-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1 + ; + ; GFX10-DENORM-LABEL: name: test_double_add_mul_contract + ; GFX10-DENORM: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5 + ; GFX10-DENORM-NEXT: {{ $}} + ; GFX10-DENORM-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; GFX10-DENORM-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GFX10-DENORM-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32) + ; GFX10-DENORM-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 + ; GFX10-DENORM-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 + ; GFX10-DENORM-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY2]](s32), [[COPY3]](s32) + ; GFX10-DENORM-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4 + ; GFX10-DENORM-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5 + ; GFX10-DENORM-NEXT: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY4]](s32), [[COPY5]](s32) + ; GFX10-DENORM-NEXT: [[FMA:%[0-9]+]]:_(s64) = G_FMA [[MV]], [[MV1]], [[MV2]] + ; GFX10-DENORM-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[FMA]](s64) + ; GFX10-DENORM-NEXT: $vgpr0 = COPY [[UV]](s32) + ; GFX10-DENORM-NEXT: $vgpr1 = COPY [[UV1]](s32) + ; GFX10-DENORM-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1 + %4:_(s32) = COPY $vgpr0 + %5:_(s32) = COPY $vgpr1 + %0:_(s64) = G_MERGE_VALUES %4(s32), %5(s32) + %6:_(s32) = COPY $vgpr2 + %7:_(s32) = COPY $vgpr3 + %1:_(s64) = G_MERGE_VALUES %6(s32), %7(s32) + %8:_(s32) = COPY $vgpr4 + %9:_(s32) = COPY $vgpr5 + %2:_(s64) = G_MERGE_VALUES %8(s32), %9(s32) + %10:_(s64) = reassoc contract G_FMUL %0, %1 + %11:_(s64) = reassoc contract G_FADD %10, %2 + %13:_(s32), %14:_(s32) = G_UNMERGE_VALUES %11(s64) + $vgpr0 = COPY %13(s32) + $vgpr1 = COPY %14(s32) + S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1 +... + --- name: test_double_add_mul_rhs body: | @@ -667,24 +754,6 @@ body: | ; GFX9-NEXT: $vgpr1 = COPY [[UV1]](s32) ; GFX9-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1 ; - ; GFX9-CONTRACT-LABEL: name: test_double_add_mul_rhs - ; GFX9-CONTRACT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5 - ; GFX9-CONTRACT-NEXT: {{ $}} - ; GFX9-CONTRACT-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX9-CONTRACT-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX9-CONTRACT-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32) - ; GFX9-CONTRACT-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX9-CONTRACT-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; GFX9-CONTRACT-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY2]](s32), [[COPY3]](s32) - ; GFX9-CONTRACT-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4 - ; GFX9-CONTRACT-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5 - ; GFX9-CONTRACT-NEXT: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY4]](s32), [[COPY5]](s32) - ; GFX9-CONTRACT-NEXT: [[FMA:%[0-9]+]]:_(s64) = G_FMA [[MV]], [[MV1]], [[MV2]] - ; GFX9-CONTRACT-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[FMA]](s64) - ; GFX9-CONTRACT-NEXT: $vgpr0 = COPY [[UV]](s32) - ; GFX9-CONTRACT-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; GFX9-CONTRACT-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1 - ; ; GFX9-DENORM-LABEL: name: test_double_add_mul_rhs ; GFX9-DENORM: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5 ; GFX9-DENORM-NEXT: {{ $}} @@ -704,24 +773,6 @@ body: | ; GFX9-DENORM-NEXT: $vgpr1 = COPY [[UV1]](s32) ; GFX9-DENORM-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1 ; - ; GFX9-UNSAFE-LABEL: name: test_double_add_mul_rhs - ; GFX9-UNSAFE: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5 - ; GFX9-UNSAFE-NEXT: {{ $}} - ; GFX9-UNSAFE-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX9-UNSAFE-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX9-UNSAFE-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32) - ; GFX9-UNSAFE-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX9-UNSAFE-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; GFX9-UNSAFE-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY2]](s32), [[COPY3]](s32) - ; GFX9-UNSAFE-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4 - ; GFX9-UNSAFE-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5 - ; GFX9-UNSAFE-NEXT: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY4]](s32), [[COPY5]](s32) - ; GFX9-UNSAFE-NEXT: [[FMA:%[0-9]+]]:_(s64) = G_FMA [[MV]], [[MV1]], [[MV2]] - ; GFX9-UNSAFE-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[FMA]](s64) - ; GFX9-UNSAFE-NEXT: $vgpr0 = COPY [[UV]](s32) - ; GFX9-UNSAFE-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; GFX9-UNSAFE-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1 - ; ; GFX10-LABEL: name: test_double_add_mul_rhs ; GFX10: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5 ; GFX10-NEXT: {{ $}} @@ -741,24 +792,6 @@ body: | ; GFX10-NEXT: $vgpr1 = COPY [[UV1]](s32) ; GFX10-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1 ; - ; GFX10-CONTRACT-LABEL: name: test_double_add_mul_rhs - ; GFX10-CONTRACT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5 - ; GFX10-CONTRACT-NEXT: {{ $}} - ; GFX10-CONTRACT-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX10-CONTRACT-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX10-CONTRACT-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32) - ; GFX10-CONTRACT-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX10-CONTRACT-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; GFX10-CONTRACT-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY2]](s32), [[COPY3]](s32) - ; GFX10-CONTRACT-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4 - ; GFX10-CONTRACT-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5 - ; GFX10-CONTRACT-NEXT: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY4]](s32), [[COPY5]](s32) - ; GFX10-CONTRACT-NEXT: [[FMA:%[0-9]+]]:_(s64) = G_FMA [[MV]], [[MV1]], [[MV2]] - ; GFX10-CONTRACT-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[FMA]](s64) - ; GFX10-CONTRACT-NEXT: $vgpr0 = COPY [[UV]](s32) - ; GFX10-CONTRACT-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; GFX10-CONTRACT-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1 - ; ; GFX10-DENORM-LABEL: name: test_double_add_mul_rhs ; GFX10-DENORM: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5 ; GFX10-DENORM-NEXT: {{ $}} @@ -777,24 +810,6 @@ body: | ; GFX10-DENORM-NEXT: $vgpr0 = COPY [[UV]](s32) ; GFX10-DENORM-NEXT: $vgpr1 = COPY [[UV1]](s32) ; GFX10-DENORM-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1 - ; - ; GFX10-UNSAFE-LABEL: name: test_double_add_mul_rhs - ; GFX10-UNSAFE: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5 - ; GFX10-UNSAFE-NEXT: {{ $}} - ; GFX10-UNSAFE-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX10-UNSAFE-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX10-UNSAFE-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32) - ; GFX10-UNSAFE-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX10-UNSAFE-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; GFX10-UNSAFE-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY2]](s32), [[COPY3]](s32) - ; GFX10-UNSAFE-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4 - ; GFX10-UNSAFE-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5 - ; GFX10-UNSAFE-NEXT: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY4]](s32), [[COPY5]](s32) - ; GFX10-UNSAFE-NEXT: [[FMA:%[0-9]+]]:_(s64) = G_FMA [[MV]], [[MV1]], [[MV2]] - ; GFX10-UNSAFE-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[FMA]](s64) - ; GFX10-UNSAFE-NEXT: $vgpr0 = COPY [[UV]](s32) - ; GFX10-UNSAFE-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; GFX10-UNSAFE-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1 %4:_(s32) = COPY $vgpr0 %5:_(s32) = COPY $vgpr1 %0:_(s64) = G_MERGE_VALUES %4(s32), %5(s32) @@ -812,6 +827,100 @@ body: | S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1 ... +--- +name: test_double_add_mul_rhs_contract +body: | + bb.1.entry: + liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5 + + ; GFX9-LABEL: name: test_double_add_mul_rhs_contract + ; GFX9: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5 + ; GFX9-NEXT: {{ $}} + ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GFX9-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32) + ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 + ; GFX9-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 + ; GFX9-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY2]](s32), [[COPY3]](s32) + ; GFX9-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4 + ; GFX9-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5 + ; GFX9-NEXT: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY4]](s32), [[COPY5]](s32) + ; GFX9-NEXT: [[FMA:%[0-9]+]]:_(s64) = G_FMA [[MV]], [[MV1]], [[MV2]] + ; GFX9-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[FMA]](s64) + ; GFX9-NEXT: $vgpr0 = COPY [[UV]](s32) + ; GFX9-NEXT: $vgpr1 = COPY [[UV1]](s32) + ; GFX9-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1 + ; + ; GFX9-DENORM-LABEL: name: test_double_add_mul_rhs_contract + ; GFX9-DENORM: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5 + ; GFX9-DENORM-NEXT: {{ $}} + ; GFX9-DENORM-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; GFX9-DENORM-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GFX9-DENORM-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32) + ; GFX9-DENORM-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 + ; GFX9-DENORM-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 + ; GFX9-DENORM-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY2]](s32), [[COPY3]](s32) + ; GFX9-DENORM-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4 + ; GFX9-DENORM-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5 + ; GFX9-DENORM-NEXT: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY4]](s32), [[COPY5]](s32) + ; GFX9-DENORM-NEXT: [[FMA:%[0-9]+]]:_(s64) = G_FMA [[MV]], [[MV1]], [[MV2]] + ; GFX9-DENORM-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[FMA]](s64) + ; GFX9-DENORM-NEXT: $vgpr0 = COPY [[UV]](s32) + ; GFX9-DENORM-NEXT: $vgpr1 = COPY [[UV1]](s32) + ; GFX9-DENORM-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1 + ; + ; GFX10-LABEL: name: test_double_add_mul_rhs_contract + ; GFX10: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5 + ; GFX10-NEXT: {{ $}} + ; GFX10-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; GFX10-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GFX10-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32) + ; GFX10-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 + ; GFX10-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 + ; GFX10-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY2]](s32), [[COPY3]](s32) + ; GFX10-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4 + ; GFX10-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5 + ; GFX10-NEXT: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY4]](s32), [[COPY5]](s32) + ; GFX10-NEXT: [[FMA:%[0-9]+]]:_(s64) = G_FMA [[MV]], [[MV1]], [[MV2]] + ; GFX10-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[FMA]](s64) + ; GFX10-NEXT: $vgpr0 = COPY [[UV]](s32) + ; GFX10-NEXT: $vgpr1 = COPY [[UV1]](s32) + ; GFX10-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1 + ; + ; GFX10-DENORM-LABEL: name: test_double_add_mul_rhs_contract + ; GFX10-DENORM: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5 + ; GFX10-DENORM-NEXT: {{ $}} + ; GFX10-DENORM-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; GFX10-DENORM-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GFX10-DENORM-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32) + ; GFX10-DENORM-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 + ; GFX10-DENORM-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 + ; GFX10-DENORM-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY2]](s32), [[COPY3]](s32) + ; GFX10-DENORM-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4 + ; GFX10-DENORM-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5 + ; GFX10-DENORM-NEXT: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY4]](s32), [[COPY5]](s32) + ; GFX10-DENORM-NEXT: [[FMA:%[0-9]+]]:_(s64) = G_FMA [[MV]], [[MV1]], [[MV2]] + ; GFX10-DENORM-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[FMA]](s64) + ; GFX10-DENORM-NEXT: $vgpr0 = COPY [[UV]](s32) + ; GFX10-DENORM-NEXT: $vgpr1 = COPY [[UV1]](s32) + ; GFX10-DENORM-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1 + %4:_(s32) = COPY $vgpr0 + %5:_(s32) = COPY $vgpr1 + %0:_(s64) = G_MERGE_VALUES %4(s32), %5(s32) + %6:_(s32) = COPY $vgpr2 + %7:_(s32) = COPY $vgpr3 + %1:_(s64) = G_MERGE_VALUES %6(s32), %7(s32) + %8:_(s32) = COPY $vgpr4 + %9:_(s32) = COPY $vgpr5 + %2:_(s64) = G_MERGE_VALUES %8(s32), %9(s32) + %10:_(s64) = reassoc contract G_FMUL %0, %1 + %11:_(s64) = reassoc contract G_FADD %2, %10 + %13:_(s32), %14:_(s32) = G_UNMERGE_VALUES %11(s64) + $vgpr0 = COPY %13(s32) + $vgpr1 = COPY %14(s32) + S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1 +... + --- name: test_4xfloat_add_mul body: | @@ -845,32 +954,6 @@ body: | ; GFX9-NEXT: $vgpr3 = COPY [[UV3]](s32) ; GFX9-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 ; - ; GFX9-CONTRACT-LABEL: name: test_4xfloat_add_mul - ; GFX9-CONTRACT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11 - ; GFX9-CONTRACT-NEXT: {{ $}} - ; GFX9-CONTRACT-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX9-CONTRACT-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX9-CONTRACT-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX9-CONTRACT-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; GFX9-CONTRACT-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) - ; GFX9-CONTRACT-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4 - ; GFX9-CONTRACT-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5 - ; GFX9-CONTRACT-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $vgpr6 - ; GFX9-CONTRACT-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $vgpr7 - ; GFX9-CONTRACT-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX9-CONTRACT-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr8 - ; GFX9-CONTRACT-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr9 - ; GFX9-CONTRACT-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr10 - ; GFX9-CONTRACT-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr11 - ; GFX9-CONTRACT-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) - ; GFX9-CONTRACT-NEXT: [[FMA:%[0-9]+]]:_(<4 x s32>) = G_FMA [[BUILD_VECTOR]], [[BUILD_VECTOR1]], [[BUILD_VECTOR2]] - ; GFX9-CONTRACT-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[FMA]](<4 x s32>) - ; GFX9-CONTRACT-NEXT: $vgpr0 = COPY [[UV]](s32) - ; GFX9-CONTRACT-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; GFX9-CONTRACT-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; GFX9-CONTRACT-NEXT: $vgpr3 = COPY [[UV3]](s32) - ; GFX9-CONTRACT-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 - ; ; GFX9-DENORM-LABEL: name: test_4xfloat_add_mul ; GFX9-DENORM: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11 ; GFX9-DENORM-NEXT: {{ $}} @@ -898,32 +981,6 @@ body: | ; GFX9-DENORM-NEXT: $vgpr3 = COPY [[UV3]](s32) ; GFX9-DENORM-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 ; - ; GFX9-UNSAFE-LABEL: name: test_4xfloat_add_mul - ; GFX9-UNSAFE: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11 - ; GFX9-UNSAFE-NEXT: {{ $}} - ; GFX9-UNSAFE-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX9-UNSAFE-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX9-UNSAFE-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX9-UNSAFE-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; GFX9-UNSAFE-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) - ; GFX9-UNSAFE-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4 - ; GFX9-UNSAFE-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5 - ; GFX9-UNSAFE-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $vgpr6 - ; GFX9-UNSAFE-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $vgpr7 - ; GFX9-UNSAFE-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX9-UNSAFE-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr8 - ; GFX9-UNSAFE-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr9 - ; GFX9-UNSAFE-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr10 - ; GFX9-UNSAFE-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr11 - ; GFX9-UNSAFE-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) - ; GFX9-UNSAFE-NEXT: [[FMA:%[0-9]+]]:_(<4 x s32>) = G_FMA [[BUILD_VECTOR]], [[BUILD_VECTOR1]], [[BUILD_VECTOR2]] - ; GFX9-UNSAFE-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[FMA]](<4 x s32>) - ; GFX9-UNSAFE-NEXT: $vgpr0 = COPY [[UV]](s32) - ; GFX9-UNSAFE-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; GFX9-UNSAFE-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; GFX9-UNSAFE-NEXT: $vgpr3 = COPY [[UV3]](s32) - ; GFX9-UNSAFE-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 - ; ; GFX10-LABEL: name: test_4xfloat_add_mul ; GFX10: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11 ; GFX10-NEXT: {{ $}} @@ -951,32 +1008,6 @@ body: | ; GFX10-NEXT: $vgpr3 = COPY [[UV3]](s32) ; GFX10-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 ; - ; GFX10-CONTRACT-LABEL: name: test_4xfloat_add_mul - ; GFX10-CONTRACT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11 - ; GFX10-CONTRACT-NEXT: {{ $}} - ; GFX10-CONTRACT-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX10-CONTRACT-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX10-CONTRACT-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX10-CONTRACT-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; GFX10-CONTRACT-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) - ; GFX10-CONTRACT-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4 - ; GFX10-CONTRACT-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5 - ; GFX10-CONTRACT-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $vgpr6 - ; GFX10-CONTRACT-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $vgpr7 - ; GFX10-CONTRACT-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX10-CONTRACT-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr8 - ; GFX10-CONTRACT-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr9 - ; GFX10-CONTRACT-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr10 - ; GFX10-CONTRACT-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr11 - ; GFX10-CONTRACT-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) - ; GFX10-CONTRACT-NEXT: [[FMA:%[0-9]+]]:_(<4 x s32>) = G_FMA [[BUILD_VECTOR]], [[BUILD_VECTOR1]], [[BUILD_VECTOR2]] - ; GFX10-CONTRACT-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[FMA]](<4 x s32>) - ; GFX10-CONTRACT-NEXT: $vgpr0 = COPY [[UV]](s32) - ; GFX10-CONTRACT-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; GFX10-CONTRACT-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; GFX10-CONTRACT-NEXT: $vgpr3 = COPY [[UV3]](s32) - ; GFX10-CONTRACT-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 - ; ; GFX10-DENORM-LABEL: name: test_4xfloat_add_mul ; GFX10-DENORM: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11 ; GFX10-DENORM-NEXT: {{ $}} @@ -1003,32 +1034,6 @@ body: | ; GFX10-DENORM-NEXT: $vgpr2 = COPY [[UV2]](s32) ; GFX10-DENORM-NEXT: $vgpr3 = COPY [[UV3]](s32) ; GFX10-DENORM-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 - ; - ; GFX10-UNSAFE-LABEL: name: test_4xfloat_add_mul - ; GFX10-UNSAFE: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11 - ; GFX10-UNSAFE-NEXT: {{ $}} - ; GFX10-UNSAFE-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX10-UNSAFE-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX10-UNSAFE-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX10-UNSAFE-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; GFX10-UNSAFE-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) - ; GFX10-UNSAFE-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4 - ; GFX10-UNSAFE-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5 - ; GFX10-UNSAFE-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $vgpr6 - ; GFX10-UNSAFE-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $vgpr7 - ; GFX10-UNSAFE-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX10-UNSAFE-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr8 - ; GFX10-UNSAFE-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr9 - ; GFX10-UNSAFE-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr10 - ; GFX10-UNSAFE-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr11 - ; GFX10-UNSAFE-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) - ; GFX10-UNSAFE-NEXT: [[FMA:%[0-9]+]]:_(<4 x s32>) = G_FMA [[BUILD_VECTOR]], [[BUILD_VECTOR1]], [[BUILD_VECTOR2]] - ; GFX10-UNSAFE-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[FMA]](<4 x s32>) - ; GFX10-UNSAFE-NEXT: $vgpr0 = COPY [[UV]](s32) - ; GFX10-UNSAFE-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; GFX10-UNSAFE-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; GFX10-UNSAFE-NEXT: $vgpr3 = COPY [[UV3]](s32) - ; GFX10-UNSAFE-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 %4:_(s32) = COPY $vgpr0 %5:_(s32) = COPY $vgpr1 %6:_(s32) = COPY $vgpr2 @@ -1054,6 +1059,140 @@ body: | S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 ... +--- +name: test_4xfloat_add_mul_contract +body: | + bb.1.entry: + liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11 + + ; GFX9-LABEL: name: test_4xfloat_add_mul_contract + ; GFX9: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11 + ; GFX9-NEXT: {{ $}} + ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 + ; GFX9-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 + ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) + ; GFX9-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4 + ; GFX9-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5 + ; GFX9-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $vgpr6 + ; GFX9-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $vgpr7 + ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) + ; GFX9-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr8 + ; GFX9-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr9 + ; GFX9-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr10 + ; GFX9-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr11 + ; GFX9-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) + ; GFX9-NEXT: [[FMA:%[0-9]+]]:_(<4 x s32>) = G_FMA [[BUILD_VECTOR]], [[BUILD_VECTOR1]], [[BUILD_VECTOR2]] + ; GFX9-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[FMA]](<4 x s32>) + ; GFX9-NEXT: $vgpr0 = COPY [[UV]](s32) + ; GFX9-NEXT: $vgpr1 = COPY [[UV1]](s32) + ; GFX9-NEXT: $vgpr2 = COPY [[UV2]](s32) + ; GFX9-NEXT: $vgpr3 = COPY [[UV3]](s32) + ; GFX9-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 + ; + ; GFX9-DENORM-LABEL: name: test_4xfloat_add_mul_contract + ; GFX9-DENORM: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11 + ; GFX9-DENORM-NEXT: {{ $}} + ; GFX9-DENORM-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; GFX9-DENORM-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GFX9-DENORM-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 + ; GFX9-DENORM-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 + ; GFX9-DENORM-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) + ; GFX9-DENORM-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4 + ; GFX9-DENORM-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5 + ; GFX9-DENORM-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $vgpr6 + ; GFX9-DENORM-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $vgpr7 + ; GFX9-DENORM-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) + ; GFX9-DENORM-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr8 + ; GFX9-DENORM-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr9 + ; GFX9-DENORM-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr10 + ; GFX9-DENORM-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr11 + ; GFX9-DENORM-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) + ; GFX9-DENORM-NEXT: [[FMA:%[0-9]+]]:_(<4 x s32>) = G_FMA [[BUILD_VECTOR]], [[BUILD_VECTOR1]], [[BUILD_VECTOR2]] + ; GFX9-DENORM-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[FMA]](<4 x s32>) + ; GFX9-DENORM-NEXT: $vgpr0 = COPY [[UV]](s32) + ; GFX9-DENORM-NEXT: $vgpr1 = COPY [[UV1]](s32) + ; GFX9-DENORM-NEXT: $vgpr2 = COPY [[UV2]](s32) + ; GFX9-DENORM-NEXT: $vgpr3 = COPY [[UV3]](s32) + ; GFX9-DENORM-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 + ; + ; GFX10-LABEL: name: test_4xfloat_add_mul_contract + ; GFX10: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11 + ; GFX10-NEXT: {{ $}} + ; GFX10-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; GFX10-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GFX10-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 + ; GFX10-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 + ; GFX10-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) + ; GFX10-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4 + ; GFX10-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5 + ; GFX10-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $vgpr6 + ; GFX10-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $vgpr7 + ; GFX10-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) + ; GFX10-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr8 + ; GFX10-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr9 + ; GFX10-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr10 + ; GFX10-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr11 + ; GFX10-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) + ; GFX10-NEXT: [[FMA:%[0-9]+]]:_(<4 x s32>) = G_FMA [[BUILD_VECTOR]], [[BUILD_VECTOR1]], [[BUILD_VECTOR2]] + ; GFX10-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[FMA]](<4 x s32>) + ; GFX10-NEXT: $vgpr0 = COPY [[UV]](s32) + ; GFX10-NEXT: $vgpr1 = COPY [[UV1]](s32) + ; GFX10-NEXT: $vgpr2 = COPY [[UV2]](s32) + ; GFX10-NEXT: $vgpr3 = COPY [[UV3]](s32) + ; GFX10-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 + ; + ; GFX10-DENORM-LABEL: name: test_4xfloat_add_mul_contract + ; GFX10-DENORM: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11 + ; GFX10-DENORM-NEXT: {{ $}} + ; GFX10-DENORM-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; GFX10-DENORM-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GFX10-DENORM-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 + ; GFX10-DENORM-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 + ; GFX10-DENORM-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) + ; GFX10-DENORM-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4 + ; GFX10-DENORM-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5 + ; GFX10-DENORM-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $vgpr6 + ; GFX10-DENORM-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $vgpr7 + ; GFX10-DENORM-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) + ; GFX10-DENORM-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr8 + ; GFX10-DENORM-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr9 + ; GFX10-DENORM-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr10 + ; GFX10-DENORM-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr11 + ; GFX10-DENORM-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) + ; GFX10-DENORM-NEXT: [[FMA:%[0-9]+]]:_(<4 x s32>) = G_FMA [[BUILD_VECTOR]], [[BUILD_VECTOR1]], [[BUILD_VECTOR2]] + ; GFX10-DENORM-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[FMA]](<4 x s32>) + ; GFX10-DENORM-NEXT: $vgpr0 = COPY [[UV]](s32) + ; GFX10-DENORM-NEXT: $vgpr1 = COPY [[UV1]](s32) + ; GFX10-DENORM-NEXT: $vgpr2 = COPY [[UV2]](s32) + ; GFX10-DENORM-NEXT: $vgpr3 = COPY [[UV3]](s32) + ; GFX10-DENORM-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 + %4:_(s32) = COPY $vgpr0 + %5:_(s32) = COPY $vgpr1 + %6:_(s32) = COPY $vgpr2 + %7:_(s32) = COPY $vgpr3 + %0:_(<4 x s32>) = G_BUILD_VECTOR %4(s32), %5(s32), %6(s32), %7(s32) + %8:_(s32) = COPY $vgpr4 + %9:_(s32) = COPY $vgpr5 + %10:_(s32) = COPY $vgpr6 + %11:_(s32) = COPY $vgpr7 + %1:_(<4 x s32>) = G_BUILD_VECTOR %8(s32), %9(s32), %10(s32), %11(s32) + %12:_(s32) = COPY $vgpr8 + %13:_(s32) = COPY $vgpr9 + %14:_(s32) = COPY $vgpr10 + %15:_(s32) = COPY $vgpr11 + %2:_(<4 x s32>) = G_BUILD_VECTOR %12(s32), %13(s32), %14(s32), %15(s32) + %16:_(<4 x s32>) = reassoc contract G_FMUL %0, %1 + %17:_(<4 x s32>) = reassoc contract G_FADD %16, %2 + %19:_(s32), %20:_(s32), %21:_(s32), %22:_(s32) = G_UNMERGE_VALUES %17(<4 x s32>) + $vgpr0 = COPY %19(s32) + $vgpr1 = COPY %20(s32) + $vgpr2 = COPY %21(s32) + $vgpr3 = COPY %22(s32) + S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 +... + --- name: test_3xfloat_add_mul_rhs body: | @@ -1083,28 +1222,6 @@ body: | ; GFX9-NEXT: $vgpr2 = COPY [[UV2]](s32) ; GFX9-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2 ; - ; GFX9-CONTRACT-LABEL: name: test_3xfloat_add_mul_rhs - ; GFX9-CONTRACT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8 - ; GFX9-CONTRACT-NEXT: {{ $}} - ; GFX9-CONTRACT-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX9-CONTRACT-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX9-CONTRACT-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX9-CONTRACT-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32) - ; GFX9-CONTRACT-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; GFX9-CONTRACT-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4 - ; GFX9-CONTRACT-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5 - ; GFX9-CONTRACT-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32) - ; GFX9-CONTRACT-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $vgpr6 - ; GFX9-CONTRACT-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $vgpr7 - ; GFX9-CONTRACT-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr8 - ; GFX9-CONTRACT-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[COPY6]](s32), [[COPY7]](s32), [[COPY8]](s32) - ; GFX9-CONTRACT-NEXT: [[FMA:%[0-9]+]]:_(<3 x s32>) = G_FMA [[BUILD_VECTOR]], [[BUILD_VECTOR1]], [[BUILD_VECTOR2]] - ; GFX9-CONTRACT-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[FMA]](<3 x s32>) - ; GFX9-CONTRACT-NEXT: $vgpr0 = COPY [[UV]](s32) - ; GFX9-CONTRACT-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; GFX9-CONTRACT-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; GFX9-CONTRACT-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2 - ; ; GFX9-DENORM-LABEL: name: test_3xfloat_add_mul_rhs ; GFX9-DENORM: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8 ; GFX9-DENORM-NEXT: {{ $}} @@ -1128,28 +1245,6 @@ body: | ; GFX9-DENORM-NEXT: $vgpr2 = COPY [[UV2]](s32) ; GFX9-DENORM-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2 ; - ; GFX9-UNSAFE-LABEL: name: test_3xfloat_add_mul_rhs - ; GFX9-UNSAFE: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8 - ; GFX9-UNSAFE-NEXT: {{ $}} - ; GFX9-UNSAFE-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX9-UNSAFE-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX9-UNSAFE-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX9-UNSAFE-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32) - ; GFX9-UNSAFE-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; GFX9-UNSAFE-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4 - ; GFX9-UNSAFE-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5 - ; GFX9-UNSAFE-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32) - ; GFX9-UNSAFE-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $vgpr6 - ; GFX9-UNSAFE-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $vgpr7 - ; GFX9-UNSAFE-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr8 - ; GFX9-UNSAFE-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[COPY6]](s32), [[COPY7]](s32), [[COPY8]](s32) - ; GFX9-UNSAFE-NEXT: [[FMA:%[0-9]+]]:_(<3 x s32>) = G_FMA [[BUILD_VECTOR]], [[BUILD_VECTOR1]], [[BUILD_VECTOR2]] - ; GFX9-UNSAFE-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[FMA]](<3 x s32>) - ; GFX9-UNSAFE-NEXT: $vgpr0 = COPY [[UV]](s32) - ; GFX9-UNSAFE-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; GFX9-UNSAFE-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; GFX9-UNSAFE-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2 - ; ; GFX10-LABEL: name: test_3xfloat_add_mul_rhs ; GFX10: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8 ; GFX10-NEXT: {{ $}} @@ -1173,28 +1268,6 @@ body: | ; GFX10-NEXT: $vgpr2 = COPY [[UV2]](s32) ; GFX10-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2 ; - ; GFX10-CONTRACT-LABEL: name: test_3xfloat_add_mul_rhs - ; GFX10-CONTRACT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8 - ; GFX10-CONTRACT-NEXT: {{ $}} - ; GFX10-CONTRACT-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX10-CONTRACT-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX10-CONTRACT-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX10-CONTRACT-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32) - ; GFX10-CONTRACT-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; GFX10-CONTRACT-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4 - ; GFX10-CONTRACT-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5 - ; GFX10-CONTRACT-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32) - ; GFX10-CONTRACT-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $vgpr6 - ; GFX10-CONTRACT-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $vgpr7 - ; GFX10-CONTRACT-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr8 - ; GFX10-CONTRACT-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[COPY6]](s32), [[COPY7]](s32), [[COPY8]](s32) - ; GFX10-CONTRACT-NEXT: [[FMA:%[0-9]+]]:_(<3 x s32>) = G_FMA [[BUILD_VECTOR]], [[BUILD_VECTOR1]], [[BUILD_VECTOR2]] - ; GFX10-CONTRACT-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[FMA]](<3 x s32>) - ; GFX10-CONTRACT-NEXT: $vgpr0 = COPY [[UV]](s32) - ; GFX10-CONTRACT-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; GFX10-CONTRACT-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; GFX10-CONTRACT-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2 - ; ; GFX10-DENORM-LABEL: name: test_3xfloat_add_mul_rhs ; GFX10-DENORM: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8 ; GFX10-DENORM-NEXT: {{ $}} @@ -1217,28 +1290,124 @@ body: | ; GFX10-DENORM-NEXT: $vgpr1 = COPY [[UV1]](s32) ; GFX10-DENORM-NEXT: $vgpr2 = COPY [[UV2]](s32) ; GFX10-DENORM-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2 + %4:_(s32) = COPY $vgpr0 + %5:_(s32) = COPY $vgpr1 + %6:_(s32) = COPY $vgpr2 + %0:_(<3 x s32>) = G_BUILD_VECTOR %4(s32), %5(s32), %6(s32) + %7:_(s32) = COPY $vgpr3 + %8:_(s32) = COPY $vgpr4 + %9:_(s32) = COPY $vgpr5 + %1:_(<3 x s32>) = G_BUILD_VECTOR %7(s32), %8(s32), %9(s32) + %10:_(s32) = COPY $vgpr6 + %11:_(s32) = COPY $vgpr7 + %12:_(s32) = COPY $vgpr8 + %2:_(<3 x s32>) = G_BUILD_VECTOR %10(s32), %11(s32), %12(s32) + %13:_(<3 x s32>) = reassoc G_FMUL %0, %1 + %14:_(<3 x s32>) = reassoc G_FADD %2, %13 + %16:_(s32), %17:_(s32), %18:_(s32) = G_UNMERGE_VALUES %14(<3 x s32>) + $vgpr0 = COPY %16(s32) + $vgpr1 = COPY %17(s32) + $vgpr2 = COPY %18(s32) + S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2 +... + +--- +name: test_3xfloat_add_mul_rhs_contract +body: | + bb.1.entry: + liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8 + + ; GFX9-LABEL: name: test_3xfloat_add_mul_rhs_contract + ; GFX9: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8 + ; GFX9-NEXT: {{ $}} + ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 + ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32) + ; GFX9-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 + ; GFX9-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4 + ; GFX9-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5 + ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32) + ; GFX9-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $vgpr6 + ; GFX9-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $vgpr7 + ; GFX9-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr8 + ; GFX9-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[COPY6]](s32), [[COPY7]](s32), [[COPY8]](s32) + ; GFX9-NEXT: [[FMUL:%[0-9]+]]:_(<3 x s32>) = reassoc G_FMUL [[BUILD_VECTOR]], [[BUILD_VECTOR1]] + ; GFX9-NEXT: [[FADD:%[0-9]+]]:_(<3 x s32>) = reassoc G_FADD [[BUILD_VECTOR2]], [[FMUL]] + ; GFX9-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[FADD]](<3 x s32>) + ; GFX9-NEXT: $vgpr0 = COPY [[UV]](s32) + ; GFX9-NEXT: $vgpr1 = COPY [[UV1]](s32) + ; GFX9-NEXT: $vgpr2 = COPY [[UV2]](s32) + ; GFX9-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2 ; - ; GFX10-UNSAFE-LABEL: name: test_3xfloat_add_mul_rhs - ; GFX10-UNSAFE: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8 - ; GFX10-UNSAFE-NEXT: {{ $}} - ; GFX10-UNSAFE-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX10-UNSAFE-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX10-UNSAFE-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX10-UNSAFE-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32) - ; GFX10-UNSAFE-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; GFX10-UNSAFE-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4 - ; GFX10-UNSAFE-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5 - ; GFX10-UNSAFE-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32) - ; GFX10-UNSAFE-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $vgpr6 - ; GFX10-UNSAFE-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $vgpr7 - ; GFX10-UNSAFE-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr8 - ; GFX10-UNSAFE-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[COPY6]](s32), [[COPY7]](s32), [[COPY8]](s32) - ; GFX10-UNSAFE-NEXT: [[FMA:%[0-9]+]]:_(<3 x s32>) = G_FMA [[BUILD_VECTOR]], [[BUILD_VECTOR1]], [[BUILD_VECTOR2]] - ; GFX10-UNSAFE-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[FMA]](<3 x s32>) - ; GFX10-UNSAFE-NEXT: $vgpr0 = COPY [[UV]](s32) - ; GFX10-UNSAFE-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; GFX10-UNSAFE-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; GFX10-UNSAFE-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2 + ; GFX9-DENORM-LABEL: name: test_3xfloat_add_mul_rhs_contract + ; GFX9-DENORM: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8 + ; GFX9-DENORM-NEXT: {{ $}} + ; GFX9-DENORM-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; GFX9-DENORM-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GFX9-DENORM-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 + ; GFX9-DENORM-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32) + ; GFX9-DENORM-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 + ; GFX9-DENORM-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4 + ; GFX9-DENORM-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5 + ; GFX9-DENORM-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32) + ; GFX9-DENORM-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $vgpr6 + ; GFX9-DENORM-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $vgpr7 + ; GFX9-DENORM-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr8 + ; GFX9-DENORM-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[COPY6]](s32), [[COPY7]](s32), [[COPY8]](s32) + ; GFX9-DENORM-NEXT: [[FMUL:%[0-9]+]]:_(<3 x s32>) = reassoc G_FMUL [[BUILD_VECTOR]], [[BUILD_VECTOR1]] + ; GFX9-DENORM-NEXT: [[FADD:%[0-9]+]]:_(<3 x s32>) = reassoc G_FADD [[BUILD_VECTOR2]], [[FMUL]] + ; GFX9-DENORM-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[FADD]](<3 x s32>) + ; GFX9-DENORM-NEXT: $vgpr0 = COPY [[UV]](s32) + ; GFX9-DENORM-NEXT: $vgpr1 = COPY [[UV1]](s32) + ; GFX9-DENORM-NEXT: $vgpr2 = COPY [[UV2]](s32) + ; GFX9-DENORM-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2 + ; + ; GFX10-LABEL: name: test_3xfloat_add_mul_rhs_contract + ; GFX10: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8 + ; GFX10-NEXT: {{ $}} + ; GFX10-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; GFX10-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GFX10-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 + ; GFX10-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32) + ; GFX10-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 + ; GFX10-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4 + ; GFX10-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5 + ; GFX10-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32) + ; GFX10-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $vgpr6 + ; GFX10-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $vgpr7 + ; GFX10-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr8 + ; GFX10-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[COPY6]](s32), [[COPY7]](s32), [[COPY8]](s32) + ; GFX10-NEXT: [[FMUL:%[0-9]+]]:_(<3 x s32>) = reassoc G_FMUL [[BUILD_VECTOR]], [[BUILD_VECTOR1]] + ; GFX10-NEXT: [[FADD:%[0-9]+]]:_(<3 x s32>) = reassoc G_FADD [[BUILD_VECTOR2]], [[FMUL]] + ; GFX10-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[FADD]](<3 x s32>) + ; GFX10-NEXT: $vgpr0 = COPY [[UV]](s32) + ; GFX10-NEXT: $vgpr1 = COPY [[UV1]](s32) + ; GFX10-NEXT: $vgpr2 = COPY [[UV2]](s32) + ; GFX10-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2 + ; + ; GFX10-DENORM-LABEL: name: test_3xfloat_add_mul_rhs_contract + ; GFX10-DENORM: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8 + ; GFX10-DENORM-NEXT: {{ $}} + ; GFX10-DENORM-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; GFX10-DENORM-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GFX10-DENORM-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 + ; GFX10-DENORM-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32) + ; GFX10-DENORM-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 + ; GFX10-DENORM-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4 + ; GFX10-DENORM-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5 + ; GFX10-DENORM-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32) + ; GFX10-DENORM-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $vgpr6 + ; GFX10-DENORM-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $vgpr7 + ; GFX10-DENORM-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr8 + ; GFX10-DENORM-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[COPY6]](s32), [[COPY7]](s32), [[COPY8]](s32) + ; GFX10-DENORM-NEXT: [[FMUL:%[0-9]+]]:_(<3 x s32>) = reassoc G_FMUL [[BUILD_VECTOR]], [[BUILD_VECTOR1]] + ; GFX10-DENORM-NEXT: [[FADD:%[0-9]+]]:_(<3 x s32>) = reassoc G_FADD [[BUILD_VECTOR2]], [[FMUL]] + ; GFX10-DENORM-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[FADD]](<3 x s32>) + ; GFX10-DENORM-NEXT: $vgpr0 = COPY [[UV]](s32) + ; GFX10-DENORM-NEXT: $vgpr1 = COPY [[UV1]](s32) + ; GFX10-DENORM-NEXT: $vgpr2 = COPY [[UV2]](s32) + ; GFX10-DENORM-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2 %4:_(s32) = COPY $vgpr0 %5:_(s32) = COPY $vgpr1 %6:_(s32) = COPY $vgpr2 @@ -1285,24 +1454,6 @@ body: | ; GFX9-NEXT: $vgpr1 = COPY [[UV1]](<2 x s16>) ; GFX9-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1 ; - ; GFX9-CONTRACT-LABEL: name: test_4xhalf_add_mul - ; GFX9-CONTRACT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5 - ; GFX9-CONTRACT-NEXT: {{ $}} - ; GFX9-CONTRACT-NEXT: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 - ; GFX9-CONTRACT-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 - ; GFX9-CONTRACT-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[COPY]](<2 x s16>), [[COPY1]](<2 x s16>) - ; GFX9-CONTRACT-NEXT: [[COPY2:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr2 - ; GFX9-CONTRACT-NEXT: [[COPY3:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr3 - ; GFX9-CONTRACT-NEXT: [[CONCAT_VECTORS1:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[COPY2]](<2 x s16>), [[COPY3]](<2 x s16>) - ; GFX9-CONTRACT-NEXT: [[COPY4:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr4 - ; GFX9-CONTRACT-NEXT: [[COPY5:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr5 - ; GFX9-CONTRACT-NEXT: [[CONCAT_VECTORS2:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[COPY4]](<2 x s16>), [[COPY5]](<2 x s16>) - ; GFX9-CONTRACT-NEXT: [[FMA:%[0-9]+]]:_(<4 x s16>) = G_FMA [[CONCAT_VECTORS]], [[CONCAT_VECTORS1]], [[CONCAT_VECTORS2]] - ; GFX9-CONTRACT-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[FMA]](<4 x s16>) - ; GFX9-CONTRACT-NEXT: $vgpr0 = COPY [[UV]](<2 x s16>) - ; GFX9-CONTRACT-NEXT: $vgpr1 = COPY [[UV1]](<2 x s16>) - ; GFX9-CONTRACT-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1 - ; ; GFX9-DENORM-LABEL: name: test_4xhalf_add_mul ; GFX9-DENORM: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5 ; GFX9-DENORM-NEXT: {{ $}} @@ -1322,24 +1473,6 @@ body: | ; GFX9-DENORM-NEXT: $vgpr1 = COPY [[UV1]](<2 x s16>) ; GFX9-DENORM-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1 ; - ; GFX9-UNSAFE-LABEL: name: test_4xhalf_add_mul - ; GFX9-UNSAFE: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5 - ; GFX9-UNSAFE-NEXT: {{ $}} - ; GFX9-UNSAFE-NEXT: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 - ; GFX9-UNSAFE-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 - ; GFX9-UNSAFE-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[COPY]](<2 x s16>), [[COPY1]](<2 x s16>) - ; GFX9-UNSAFE-NEXT: [[COPY2:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr2 - ; GFX9-UNSAFE-NEXT: [[COPY3:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr3 - ; GFX9-UNSAFE-NEXT: [[CONCAT_VECTORS1:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[COPY2]](<2 x s16>), [[COPY3]](<2 x s16>) - ; GFX9-UNSAFE-NEXT: [[COPY4:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr4 - ; GFX9-UNSAFE-NEXT: [[COPY5:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr5 - ; GFX9-UNSAFE-NEXT: [[CONCAT_VECTORS2:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[COPY4]](<2 x s16>), [[COPY5]](<2 x s16>) - ; GFX9-UNSAFE-NEXT: [[FMA:%[0-9]+]]:_(<4 x s16>) = G_FMA [[CONCAT_VECTORS]], [[CONCAT_VECTORS1]], [[CONCAT_VECTORS2]] - ; GFX9-UNSAFE-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[FMA]](<4 x s16>) - ; GFX9-UNSAFE-NEXT: $vgpr0 = COPY [[UV]](<2 x s16>) - ; GFX9-UNSAFE-NEXT: $vgpr1 = COPY [[UV1]](<2 x s16>) - ; GFX9-UNSAFE-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1 - ; ; GFX10-LABEL: name: test_4xhalf_add_mul ; GFX10: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5 ; GFX10-NEXT: {{ $}} @@ -1359,24 +1492,6 @@ body: | ; GFX10-NEXT: $vgpr1 = COPY [[UV1]](<2 x s16>) ; GFX10-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1 ; - ; GFX10-CONTRACT-LABEL: name: test_4xhalf_add_mul - ; GFX10-CONTRACT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5 - ; GFX10-CONTRACT-NEXT: {{ $}} - ; GFX10-CONTRACT-NEXT: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 - ; GFX10-CONTRACT-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 - ; GFX10-CONTRACT-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[COPY]](<2 x s16>), [[COPY1]](<2 x s16>) - ; GFX10-CONTRACT-NEXT: [[COPY2:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr2 - ; GFX10-CONTRACT-NEXT: [[COPY3:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr3 - ; GFX10-CONTRACT-NEXT: [[CONCAT_VECTORS1:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[COPY2]](<2 x s16>), [[COPY3]](<2 x s16>) - ; GFX10-CONTRACT-NEXT: [[COPY4:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr4 - ; GFX10-CONTRACT-NEXT: [[COPY5:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr5 - ; GFX10-CONTRACT-NEXT: [[CONCAT_VECTORS2:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[COPY4]](<2 x s16>), [[COPY5]](<2 x s16>) - ; GFX10-CONTRACT-NEXT: [[FMA:%[0-9]+]]:_(<4 x s16>) = G_FMA [[CONCAT_VECTORS]], [[CONCAT_VECTORS1]], [[CONCAT_VECTORS2]] - ; GFX10-CONTRACT-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[FMA]](<4 x s16>) - ; GFX10-CONTRACT-NEXT: $vgpr0 = COPY [[UV]](<2 x s16>) - ; GFX10-CONTRACT-NEXT: $vgpr1 = COPY [[UV1]](<2 x s16>) - ; GFX10-CONTRACT-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1 - ; ; GFX10-DENORM-LABEL: name: test_4xhalf_add_mul ; GFX10-DENORM: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5 ; GFX10-DENORM-NEXT: {{ $}} @@ -1395,24 +1510,6 @@ body: | ; GFX10-DENORM-NEXT: $vgpr0 = COPY [[UV]](<2 x s16>) ; GFX10-DENORM-NEXT: $vgpr1 = COPY [[UV1]](<2 x s16>) ; GFX10-DENORM-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1 - ; - ; GFX10-UNSAFE-LABEL: name: test_4xhalf_add_mul - ; GFX10-UNSAFE: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5 - ; GFX10-UNSAFE-NEXT: {{ $}} - ; GFX10-UNSAFE-NEXT: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 - ; GFX10-UNSAFE-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 - ; GFX10-UNSAFE-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[COPY]](<2 x s16>), [[COPY1]](<2 x s16>) - ; GFX10-UNSAFE-NEXT: [[COPY2:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr2 - ; GFX10-UNSAFE-NEXT: [[COPY3:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr3 - ; GFX10-UNSAFE-NEXT: [[CONCAT_VECTORS1:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[COPY2]](<2 x s16>), [[COPY3]](<2 x s16>) - ; GFX10-UNSAFE-NEXT: [[COPY4:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr4 - ; GFX10-UNSAFE-NEXT: [[COPY5:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr5 - ; GFX10-UNSAFE-NEXT: [[CONCAT_VECTORS2:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[COPY4]](<2 x s16>), [[COPY5]](<2 x s16>) - ; GFX10-UNSAFE-NEXT: [[FMA:%[0-9]+]]:_(<4 x s16>) = G_FMA [[CONCAT_VECTORS]], [[CONCAT_VECTORS1]], [[CONCAT_VECTORS2]] - ; GFX10-UNSAFE-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[FMA]](<4 x s16>) - ; GFX10-UNSAFE-NEXT: $vgpr0 = COPY [[UV]](<2 x s16>) - ; GFX10-UNSAFE-NEXT: $vgpr1 = COPY [[UV1]](<2 x s16>) - ; GFX10-UNSAFE-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1 %4:_(<2 x s16>) = COPY $vgpr0 %5:_(<2 x s16>) = COPY $vgpr1 %0:_(<4 x s16>) = G_CONCAT_VECTORS %4(<2 x s16>), %5(<2 x s16>) @@ -1430,6 +1527,100 @@ body: | S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1 ... +--- +name: test_4xhalf_add_mul_contract +body: | + bb.1.entry: + liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5 + + ; GFX9-LABEL: name: test_4xhalf_add_mul_contract + ; GFX9: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5 + ; GFX9-NEXT: {{ $}} + ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 + ; GFX9-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[COPY]](<2 x s16>), [[COPY1]](<2 x s16>) + ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr2 + ; GFX9-NEXT: [[COPY3:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr3 + ; GFX9-NEXT: [[CONCAT_VECTORS1:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[COPY2]](<2 x s16>), [[COPY3]](<2 x s16>) + ; GFX9-NEXT: [[COPY4:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr4 + ; GFX9-NEXT: [[COPY5:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr5 + ; GFX9-NEXT: [[CONCAT_VECTORS2:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[COPY4]](<2 x s16>), [[COPY5]](<2 x s16>) + ; GFX9-NEXT: [[FMA:%[0-9]+]]:_(<4 x s16>) = G_FMA [[CONCAT_VECTORS]], [[CONCAT_VECTORS1]], [[CONCAT_VECTORS2]] + ; GFX9-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[FMA]](<4 x s16>) + ; GFX9-NEXT: $vgpr0 = COPY [[UV]](<2 x s16>) + ; GFX9-NEXT: $vgpr1 = COPY [[UV1]](<2 x s16>) + ; GFX9-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1 + ; + ; GFX9-DENORM-LABEL: name: test_4xhalf_add_mul_contract + ; GFX9-DENORM: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5 + ; GFX9-DENORM-NEXT: {{ $}} + ; GFX9-DENORM-NEXT: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 + ; GFX9-DENORM-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 + ; GFX9-DENORM-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[COPY]](<2 x s16>), [[COPY1]](<2 x s16>) + ; GFX9-DENORM-NEXT: [[COPY2:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr2 + ; GFX9-DENORM-NEXT: [[COPY3:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr3 + ; GFX9-DENORM-NEXT: [[CONCAT_VECTORS1:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[COPY2]](<2 x s16>), [[COPY3]](<2 x s16>) + ; GFX9-DENORM-NEXT: [[COPY4:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr4 + ; GFX9-DENORM-NEXT: [[COPY5:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr5 + ; GFX9-DENORM-NEXT: [[CONCAT_VECTORS2:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[COPY4]](<2 x s16>), [[COPY5]](<2 x s16>) + ; GFX9-DENORM-NEXT: [[FMA:%[0-9]+]]:_(<4 x s16>) = G_FMA [[CONCAT_VECTORS]], [[CONCAT_VECTORS1]], [[CONCAT_VECTORS2]] + ; GFX9-DENORM-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[FMA]](<4 x s16>) + ; GFX9-DENORM-NEXT: $vgpr0 = COPY [[UV]](<2 x s16>) + ; GFX9-DENORM-NEXT: $vgpr1 = COPY [[UV1]](<2 x s16>) + ; GFX9-DENORM-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1 + ; + ; GFX10-LABEL: name: test_4xhalf_add_mul_contract + ; GFX10: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5 + ; GFX10-NEXT: {{ $}} + ; GFX10-NEXT: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 + ; GFX10-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 + ; GFX10-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[COPY]](<2 x s16>), [[COPY1]](<2 x s16>) + ; GFX10-NEXT: [[COPY2:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr2 + ; GFX10-NEXT: [[COPY3:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr3 + ; GFX10-NEXT: [[CONCAT_VECTORS1:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[COPY2]](<2 x s16>), [[COPY3]](<2 x s16>) + ; GFX10-NEXT: [[COPY4:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr4 + ; GFX10-NEXT: [[COPY5:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr5 + ; GFX10-NEXT: [[CONCAT_VECTORS2:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[COPY4]](<2 x s16>), [[COPY5]](<2 x s16>) + ; GFX10-NEXT: [[FMA:%[0-9]+]]:_(<4 x s16>) = G_FMA [[CONCAT_VECTORS]], [[CONCAT_VECTORS1]], [[CONCAT_VECTORS2]] + ; GFX10-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[FMA]](<4 x s16>) + ; GFX10-NEXT: $vgpr0 = COPY [[UV]](<2 x s16>) + ; GFX10-NEXT: $vgpr1 = COPY [[UV1]](<2 x s16>) + ; GFX10-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1 + ; + ; GFX10-DENORM-LABEL: name: test_4xhalf_add_mul_contract + ; GFX10-DENORM: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5 + ; GFX10-DENORM-NEXT: {{ $}} + ; GFX10-DENORM-NEXT: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 + ; GFX10-DENORM-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 + ; GFX10-DENORM-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[COPY]](<2 x s16>), [[COPY1]](<2 x s16>) + ; GFX10-DENORM-NEXT: [[COPY2:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr2 + ; GFX10-DENORM-NEXT: [[COPY3:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr3 + ; GFX10-DENORM-NEXT: [[CONCAT_VECTORS1:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[COPY2]](<2 x s16>), [[COPY3]](<2 x s16>) + ; GFX10-DENORM-NEXT: [[COPY4:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr4 + ; GFX10-DENORM-NEXT: [[COPY5:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr5 + ; GFX10-DENORM-NEXT: [[CONCAT_VECTORS2:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[COPY4]](<2 x s16>), [[COPY5]](<2 x s16>) + ; GFX10-DENORM-NEXT: [[FMA:%[0-9]+]]:_(<4 x s16>) = G_FMA [[CONCAT_VECTORS]], [[CONCAT_VECTORS1]], [[CONCAT_VECTORS2]] + ; GFX10-DENORM-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[FMA]](<4 x s16>) + ; GFX10-DENORM-NEXT: $vgpr0 = COPY [[UV]](<2 x s16>) + ; GFX10-DENORM-NEXT: $vgpr1 = COPY [[UV1]](<2 x s16>) + ; GFX10-DENORM-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1 + %4:_(<2 x s16>) = COPY $vgpr0 + %5:_(<2 x s16>) = COPY $vgpr1 + %0:_(<4 x s16>) = G_CONCAT_VECTORS %4(<2 x s16>), %5(<2 x s16>) + %6:_(<2 x s16>) = COPY $vgpr2 + %7:_(<2 x s16>) = COPY $vgpr3 + %1:_(<4 x s16>) = G_CONCAT_VECTORS %6(<2 x s16>), %7(<2 x s16>) + %8:_(<2 x s16>) = COPY $vgpr4 + %9:_(<2 x s16>) = COPY $vgpr5 + %2:_(<4 x s16>) = G_CONCAT_VECTORS %8(<2 x s16>), %9(<2 x s16>) + %10:_(<4 x s16>) = reassoc contract G_FMUL %0, %1 + %11:_(<4 x s16>) = reassoc contract G_FADD %10, %2 + %13:_(<2 x s16>), %14:_(<2 x s16>) = G_UNMERGE_VALUES %11(<4 x s16>) + $vgpr0 = COPY %13(<2 x s16>) + $vgpr1 = COPY %14(<2 x s16>) + S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1 +... + --- name: test_3xhalf_add_mul_rhs body: | @@ -1461,30 +1652,6 @@ body: | ; GFX9-NEXT: $vgpr1 = COPY [[UV7]](<2 x s16>) ; GFX9-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1 ; - ; GFX9-CONTRACT-LABEL: name: test_3xhalf_add_mul_rhs - ; GFX9-CONTRACT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5 - ; GFX9-CONTRACT-NEXT: {{ $}} - ; GFX9-CONTRACT-NEXT: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 - ; GFX9-CONTRACT-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 - ; GFX9-CONTRACT-NEXT: [[DEF:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF - ; GFX9-CONTRACT-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[COPY]](<2 x s16>), [[COPY1]](<2 x s16>), [[DEF]](<2 x s16>) - ; GFX9-CONTRACT-NEXT: [[UV:%[0-9]+]]:_(<3 x s16>), [[UV1:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<6 x s16>) - ; GFX9-CONTRACT-NEXT: [[COPY2:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr2 - ; GFX9-CONTRACT-NEXT: [[COPY3:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr3 - ; GFX9-CONTRACT-NEXT: [[CONCAT_VECTORS1:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[COPY2]](<2 x s16>), [[COPY3]](<2 x s16>), [[DEF]](<2 x s16>) - ; GFX9-CONTRACT-NEXT: [[UV2:%[0-9]+]]:_(<3 x s16>), [[UV3:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS1]](<6 x s16>) - ; GFX9-CONTRACT-NEXT: [[COPY4:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr4 - ; GFX9-CONTRACT-NEXT: [[COPY5:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr5 - ; GFX9-CONTRACT-NEXT: [[CONCAT_VECTORS2:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[COPY4]](<2 x s16>), [[COPY5]](<2 x s16>), [[DEF]](<2 x s16>) - ; GFX9-CONTRACT-NEXT: [[UV4:%[0-9]+]]:_(<3 x s16>), [[UV5:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS2]](<6 x s16>) - ; GFX9-CONTRACT-NEXT: [[FMA:%[0-9]+]]:_(<3 x s16>) = G_FMA [[UV]], [[UV2]], [[UV4]] - ; GFX9-CONTRACT-NEXT: [[DEF1:%[0-9]+]]:_(<3 x s16>) = G_IMPLICIT_DEF - ; GFX9-CONTRACT-NEXT: [[CONCAT_VECTORS3:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[FMA]](<3 x s16>), [[DEF1]](<3 x s16>) - ; GFX9-CONTRACT-NEXT: [[UV6:%[0-9]+]]:_(<2 x s16>), [[UV7:%[0-9]+]]:_(<2 x s16>), [[UV8:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS3]](<6 x s16>) - ; GFX9-CONTRACT-NEXT: $vgpr0 = COPY [[UV6]](<2 x s16>) - ; GFX9-CONTRACT-NEXT: $vgpr1 = COPY [[UV7]](<2 x s16>) - ; GFX9-CONTRACT-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1 - ; ; GFX9-DENORM-LABEL: name: test_3xhalf_add_mul_rhs ; GFX9-DENORM: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5 ; GFX9-DENORM-NEXT: {{ $}} @@ -1510,30 +1677,6 @@ body: | ; GFX9-DENORM-NEXT: $vgpr1 = COPY [[UV7]](<2 x s16>) ; GFX9-DENORM-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1 ; - ; GFX9-UNSAFE-LABEL: name: test_3xhalf_add_mul_rhs - ; GFX9-UNSAFE: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5 - ; GFX9-UNSAFE-NEXT: {{ $}} - ; GFX9-UNSAFE-NEXT: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 - ; GFX9-UNSAFE-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 - ; GFX9-UNSAFE-NEXT: [[DEF:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF - ; GFX9-UNSAFE-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[COPY]](<2 x s16>), [[COPY1]](<2 x s16>), [[DEF]](<2 x s16>) - ; GFX9-UNSAFE-NEXT: [[UV:%[0-9]+]]:_(<3 x s16>), [[UV1:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<6 x s16>) - ; GFX9-UNSAFE-NEXT: [[COPY2:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr2 - ; GFX9-UNSAFE-NEXT: [[COPY3:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr3 - ; GFX9-UNSAFE-NEXT: [[CONCAT_VECTORS1:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[COPY2]](<2 x s16>), [[COPY3]](<2 x s16>), [[DEF]](<2 x s16>) - ; GFX9-UNSAFE-NEXT: [[UV2:%[0-9]+]]:_(<3 x s16>), [[UV3:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS1]](<6 x s16>) - ; GFX9-UNSAFE-NEXT: [[COPY4:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr4 - ; GFX9-UNSAFE-NEXT: [[COPY5:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr5 - ; GFX9-UNSAFE-NEXT: [[CONCAT_VECTORS2:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[COPY4]](<2 x s16>), [[COPY5]](<2 x s16>), [[DEF]](<2 x s16>) - ; GFX9-UNSAFE-NEXT: [[UV4:%[0-9]+]]:_(<3 x s16>), [[UV5:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS2]](<6 x s16>) - ; GFX9-UNSAFE-NEXT: [[FMA:%[0-9]+]]:_(<3 x s16>) = G_FMA [[UV]], [[UV2]], [[UV4]] - ; GFX9-UNSAFE-NEXT: [[DEF1:%[0-9]+]]:_(<3 x s16>) = G_IMPLICIT_DEF - ; GFX9-UNSAFE-NEXT: [[CONCAT_VECTORS3:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[FMA]](<3 x s16>), [[DEF1]](<3 x s16>) - ; GFX9-UNSAFE-NEXT: [[UV6:%[0-9]+]]:_(<2 x s16>), [[UV7:%[0-9]+]]:_(<2 x s16>), [[UV8:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS3]](<6 x s16>) - ; GFX9-UNSAFE-NEXT: $vgpr0 = COPY [[UV6]](<2 x s16>) - ; GFX9-UNSAFE-NEXT: $vgpr1 = COPY [[UV7]](<2 x s16>) - ; GFX9-UNSAFE-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1 - ; ; GFX10-LABEL: name: test_3xhalf_add_mul_rhs ; GFX10: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5 ; GFX10-NEXT: {{ $}} @@ -1559,30 +1702,6 @@ body: | ; GFX10-NEXT: $vgpr1 = COPY [[UV7]](<2 x s16>) ; GFX10-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1 ; - ; GFX10-CONTRACT-LABEL: name: test_3xhalf_add_mul_rhs - ; GFX10-CONTRACT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5 - ; GFX10-CONTRACT-NEXT: {{ $}} - ; GFX10-CONTRACT-NEXT: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 - ; GFX10-CONTRACT-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 - ; GFX10-CONTRACT-NEXT: [[DEF:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF - ; GFX10-CONTRACT-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[COPY]](<2 x s16>), [[COPY1]](<2 x s16>), [[DEF]](<2 x s16>) - ; GFX10-CONTRACT-NEXT: [[UV:%[0-9]+]]:_(<3 x s16>), [[UV1:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<6 x s16>) - ; GFX10-CONTRACT-NEXT: [[COPY2:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr2 - ; GFX10-CONTRACT-NEXT: [[COPY3:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr3 - ; GFX10-CONTRACT-NEXT: [[CONCAT_VECTORS1:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[COPY2]](<2 x s16>), [[COPY3]](<2 x s16>), [[DEF]](<2 x s16>) - ; GFX10-CONTRACT-NEXT: [[UV2:%[0-9]+]]:_(<3 x s16>), [[UV3:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS1]](<6 x s16>) - ; GFX10-CONTRACT-NEXT: [[COPY4:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr4 - ; GFX10-CONTRACT-NEXT: [[COPY5:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr5 - ; GFX10-CONTRACT-NEXT: [[CONCAT_VECTORS2:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[COPY4]](<2 x s16>), [[COPY5]](<2 x s16>), [[DEF]](<2 x s16>) - ; GFX10-CONTRACT-NEXT: [[UV4:%[0-9]+]]:_(<3 x s16>), [[UV5:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS2]](<6 x s16>) - ; GFX10-CONTRACT-NEXT: [[FMA:%[0-9]+]]:_(<3 x s16>) = G_FMA [[UV]], [[UV2]], [[UV4]] - ; GFX10-CONTRACT-NEXT: [[DEF1:%[0-9]+]]:_(<3 x s16>) = G_IMPLICIT_DEF - ; GFX10-CONTRACT-NEXT: [[CONCAT_VECTORS3:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[FMA]](<3 x s16>), [[DEF1]](<3 x s16>) - ; GFX10-CONTRACT-NEXT: [[UV6:%[0-9]+]]:_(<2 x s16>), [[UV7:%[0-9]+]]:_(<2 x s16>), [[UV8:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS3]](<6 x s16>) - ; GFX10-CONTRACT-NEXT: $vgpr0 = COPY [[UV6]](<2 x s16>) - ; GFX10-CONTRACT-NEXT: $vgpr1 = COPY [[UV7]](<2 x s16>) - ; GFX10-CONTRACT-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1 - ; ; GFX10-DENORM-LABEL: name: test_3xhalf_add_mul_rhs ; GFX10-DENORM: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5 ; GFX10-DENORM-NEXT: {{ $}} @@ -1607,30 +1726,6 @@ body: | ; GFX10-DENORM-NEXT: $vgpr0 = COPY [[UV6]](<2 x s16>) ; GFX10-DENORM-NEXT: $vgpr1 = COPY [[UV7]](<2 x s16>) ; GFX10-DENORM-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1 - ; - ; GFX10-UNSAFE-LABEL: name: test_3xhalf_add_mul_rhs - ; GFX10-UNSAFE: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5 - ; GFX10-UNSAFE-NEXT: {{ $}} - ; GFX10-UNSAFE-NEXT: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 - ; GFX10-UNSAFE-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 - ; GFX10-UNSAFE-NEXT: [[DEF:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF - ; GFX10-UNSAFE-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[COPY]](<2 x s16>), [[COPY1]](<2 x s16>), [[DEF]](<2 x s16>) - ; GFX10-UNSAFE-NEXT: [[UV:%[0-9]+]]:_(<3 x s16>), [[UV1:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<6 x s16>) - ; GFX10-UNSAFE-NEXT: [[COPY2:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr2 - ; GFX10-UNSAFE-NEXT: [[COPY3:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr3 - ; GFX10-UNSAFE-NEXT: [[CONCAT_VECTORS1:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[COPY2]](<2 x s16>), [[COPY3]](<2 x s16>), [[DEF]](<2 x s16>) - ; GFX10-UNSAFE-NEXT: [[UV2:%[0-9]+]]:_(<3 x s16>), [[UV3:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS1]](<6 x s16>) - ; GFX10-UNSAFE-NEXT: [[COPY4:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr4 - ; GFX10-UNSAFE-NEXT: [[COPY5:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr5 - ; GFX10-UNSAFE-NEXT: [[CONCAT_VECTORS2:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[COPY4]](<2 x s16>), [[COPY5]](<2 x s16>), [[DEF]](<2 x s16>) - ; GFX10-UNSAFE-NEXT: [[UV4:%[0-9]+]]:_(<3 x s16>), [[UV5:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS2]](<6 x s16>) - ; GFX10-UNSAFE-NEXT: [[FMA:%[0-9]+]]:_(<3 x s16>) = G_FMA [[UV]], [[UV2]], [[UV4]] - ; GFX10-UNSAFE-NEXT: [[DEF1:%[0-9]+]]:_(<3 x s16>) = G_IMPLICIT_DEF - ; GFX10-UNSAFE-NEXT: [[CONCAT_VECTORS3:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[FMA]](<3 x s16>), [[DEF1]](<3 x s16>) - ; GFX10-UNSAFE-NEXT: [[UV6:%[0-9]+]]:_(<2 x s16>), [[UV7:%[0-9]+]]:_(<2 x s16>), [[UV8:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS3]](<6 x s16>) - ; GFX10-UNSAFE-NEXT: $vgpr0 = COPY [[UV6]](<2 x s16>) - ; GFX10-UNSAFE-NEXT: $vgpr1 = COPY [[UV7]](<2 x s16>) - ; GFX10-UNSAFE-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1 %4:_(<2 x s16>) = COPY $vgpr0 %5:_(<2 x s16>) = COPY $vgpr1 %10:_(<2 x s16>) = G_IMPLICIT_DEF @@ -1654,6 +1749,130 @@ body: | S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1 ... +--- +name: test_3xhalf_add_mul_rhs_contract +body: | + bb.1.entry: + liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5 + + ; GFX9-LABEL: name: test_3xhalf_add_mul_rhs_contract + ; GFX9: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5 + ; GFX9-NEXT: {{ $}} + ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 + ; GFX9-NEXT: [[DEF:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF + ; GFX9-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[COPY]](<2 x s16>), [[COPY1]](<2 x s16>), [[DEF]](<2 x s16>) + ; GFX9-NEXT: [[UV:%[0-9]+]]:_(<3 x s16>), [[UV1:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<6 x s16>) + ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr2 + ; GFX9-NEXT: [[COPY3:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr3 + ; GFX9-NEXT: [[CONCAT_VECTORS1:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[COPY2]](<2 x s16>), [[COPY3]](<2 x s16>), [[DEF]](<2 x s16>) + ; GFX9-NEXT: [[UV2:%[0-9]+]]:_(<3 x s16>), [[UV3:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS1]](<6 x s16>) + ; GFX9-NEXT: [[COPY4:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr4 + ; GFX9-NEXT: [[COPY5:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr5 + ; GFX9-NEXT: [[CONCAT_VECTORS2:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[COPY4]](<2 x s16>), [[COPY5]](<2 x s16>), [[DEF]](<2 x s16>) + ; GFX9-NEXT: [[UV4:%[0-9]+]]:_(<3 x s16>), [[UV5:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS2]](<6 x s16>) + ; GFX9-NEXT: [[FMA:%[0-9]+]]:_(<3 x s16>) = G_FMA [[UV]], [[UV2]], [[UV4]] + ; GFX9-NEXT: [[DEF1:%[0-9]+]]:_(<3 x s16>) = G_IMPLICIT_DEF + ; GFX9-NEXT: [[CONCAT_VECTORS3:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[FMA]](<3 x s16>), [[DEF1]](<3 x s16>) + ; GFX9-NEXT: [[UV6:%[0-9]+]]:_(<2 x s16>), [[UV7:%[0-9]+]]:_(<2 x s16>), [[UV8:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS3]](<6 x s16>) + ; GFX9-NEXT: $vgpr0 = COPY [[UV6]](<2 x s16>) + ; GFX9-NEXT: $vgpr1 = COPY [[UV7]](<2 x s16>) + ; GFX9-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1 + ; + ; GFX9-DENORM-LABEL: name: test_3xhalf_add_mul_rhs_contract + ; GFX9-DENORM: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5 + ; GFX9-DENORM-NEXT: {{ $}} + ; GFX9-DENORM-NEXT: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 + ; GFX9-DENORM-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 + ; GFX9-DENORM-NEXT: [[DEF:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF + ; GFX9-DENORM-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[COPY]](<2 x s16>), [[COPY1]](<2 x s16>), [[DEF]](<2 x s16>) + ; GFX9-DENORM-NEXT: [[UV:%[0-9]+]]:_(<3 x s16>), [[UV1:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<6 x s16>) + ; GFX9-DENORM-NEXT: [[COPY2:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr2 + ; GFX9-DENORM-NEXT: [[COPY3:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr3 + ; GFX9-DENORM-NEXT: [[CONCAT_VECTORS1:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[COPY2]](<2 x s16>), [[COPY3]](<2 x s16>), [[DEF]](<2 x s16>) + ; GFX9-DENORM-NEXT: [[UV2:%[0-9]+]]:_(<3 x s16>), [[UV3:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS1]](<6 x s16>) + ; GFX9-DENORM-NEXT: [[COPY4:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr4 + ; GFX9-DENORM-NEXT: [[COPY5:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr5 + ; GFX9-DENORM-NEXT: [[CONCAT_VECTORS2:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[COPY4]](<2 x s16>), [[COPY5]](<2 x s16>), [[DEF]](<2 x s16>) + ; GFX9-DENORM-NEXT: [[UV4:%[0-9]+]]:_(<3 x s16>), [[UV5:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS2]](<6 x s16>) + ; GFX9-DENORM-NEXT: [[FMA:%[0-9]+]]:_(<3 x s16>) = G_FMA [[UV]], [[UV2]], [[UV4]] + ; GFX9-DENORM-NEXT: [[DEF1:%[0-9]+]]:_(<3 x s16>) = G_IMPLICIT_DEF + ; GFX9-DENORM-NEXT: [[CONCAT_VECTORS3:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[FMA]](<3 x s16>), [[DEF1]](<3 x s16>) + ; GFX9-DENORM-NEXT: [[UV6:%[0-9]+]]:_(<2 x s16>), [[UV7:%[0-9]+]]:_(<2 x s16>), [[UV8:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS3]](<6 x s16>) + ; GFX9-DENORM-NEXT: $vgpr0 = COPY [[UV6]](<2 x s16>) + ; GFX9-DENORM-NEXT: $vgpr1 = COPY [[UV7]](<2 x s16>) + ; GFX9-DENORM-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1 + ; + ; GFX10-LABEL: name: test_3xhalf_add_mul_rhs_contract + ; GFX10: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5 + ; GFX10-NEXT: {{ $}} + ; GFX10-NEXT: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 + ; GFX10-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 + ; GFX10-NEXT: [[DEF:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF + ; GFX10-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[COPY]](<2 x s16>), [[COPY1]](<2 x s16>), [[DEF]](<2 x s16>) + ; GFX10-NEXT: [[UV:%[0-9]+]]:_(<3 x s16>), [[UV1:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<6 x s16>) + ; GFX10-NEXT: [[COPY2:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr2 + ; GFX10-NEXT: [[COPY3:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr3 + ; GFX10-NEXT: [[CONCAT_VECTORS1:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[COPY2]](<2 x s16>), [[COPY3]](<2 x s16>), [[DEF]](<2 x s16>) + ; GFX10-NEXT: [[UV2:%[0-9]+]]:_(<3 x s16>), [[UV3:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS1]](<6 x s16>) + ; GFX10-NEXT: [[COPY4:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr4 + ; GFX10-NEXT: [[COPY5:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr5 + ; GFX10-NEXT: [[CONCAT_VECTORS2:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[COPY4]](<2 x s16>), [[COPY5]](<2 x s16>), [[DEF]](<2 x s16>) + ; GFX10-NEXT: [[UV4:%[0-9]+]]:_(<3 x s16>), [[UV5:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS2]](<6 x s16>) + ; GFX10-NEXT: [[FMA:%[0-9]+]]:_(<3 x s16>) = G_FMA [[UV]], [[UV2]], [[UV4]] + ; GFX10-NEXT: [[DEF1:%[0-9]+]]:_(<3 x s16>) = G_IMPLICIT_DEF + ; GFX10-NEXT: [[CONCAT_VECTORS3:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[FMA]](<3 x s16>), [[DEF1]](<3 x s16>) + ; GFX10-NEXT: [[UV6:%[0-9]+]]:_(<2 x s16>), [[UV7:%[0-9]+]]:_(<2 x s16>), [[UV8:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS3]](<6 x s16>) + ; GFX10-NEXT: $vgpr0 = COPY [[UV6]](<2 x s16>) + ; GFX10-NEXT: $vgpr1 = COPY [[UV7]](<2 x s16>) + ; GFX10-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1 + ; + ; GFX10-DENORM-LABEL: name: test_3xhalf_add_mul_rhs_contract + ; GFX10-DENORM: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5 + ; GFX10-DENORM-NEXT: {{ $}} + ; GFX10-DENORM-NEXT: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 + ; GFX10-DENORM-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 + ; GFX10-DENORM-NEXT: [[DEF:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF + ; GFX10-DENORM-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[COPY]](<2 x s16>), [[COPY1]](<2 x s16>), [[DEF]](<2 x s16>) + ; GFX10-DENORM-NEXT: [[UV:%[0-9]+]]:_(<3 x s16>), [[UV1:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<6 x s16>) + ; GFX10-DENORM-NEXT: [[COPY2:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr2 + ; GFX10-DENORM-NEXT: [[COPY3:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr3 + ; GFX10-DENORM-NEXT: [[CONCAT_VECTORS1:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[COPY2]](<2 x s16>), [[COPY3]](<2 x s16>), [[DEF]](<2 x s16>) + ; GFX10-DENORM-NEXT: [[UV2:%[0-9]+]]:_(<3 x s16>), [[UV3:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS1]](<6 x s16>) + ; GFX10-DENORM-NEXT: [[COPY4:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr4 + ; GFX10-DENORM-NEXT: [[COPY5:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr5 + ; GFX10-DENORM-NEXT: [[CONCAT_VECTORS2:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[COPY4]](<2 x s16>), [[COPY5]](<2 x s16>), [[DEF]](<2 x s16>) + ; GFX10-DENORM-NEXT: [[UV4:%[0-9]+]]:_(<3 x s16>), [[UV5:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS2]](<6 x s16>) + ; GFX10-DENORM-NEXT: [[FMA:%[0-9]+]]:_(<3 x s16>) = G_FMA [[UV]], [[UV2]], [[UV4]] + ; GFX10-DENORM-NEXT: [[DEF1:%[0-9]+]]:_(<3 x s16>) = G_IMPLICIT_DEF + ; GFX10-DENORM-NEXT: [[CONCAT_VECTORS3:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[FMA]](<3 x s16>), [[DEF1]](<3 x s16>) + ; GFX10-DENORM-NEXT: [[UV6:%[0-9]+]]:_(<2 x s16>), [[UV7:%[0-9]+]]:_(<2 x s16>), [[UV8:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS3]](<6 x s16>) + ; GFX10-DENORM-NEXT: $vgpr0 = COPY [[UV6]](<2 x s16>) + ; GFX10-DENORM-NEXT: $vgpr1 = COPY [[UV7]](<2 x s16>) + ; GFX10-DENORM-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1 + %4:_(<2 x s16>) = COPY $vgpr0 + %5:_(<2 x s16>) = COPY $vgpr1 + %10:_(<2 x s16>) = G_IMPLICIT_DEF + %11:_(<6 x s16>) = G_CONCAT_VECTORS %4(<2 x s16>), %5(<2 x s16>), %10(<2 x s16>) + %0:_(<3 x s16>), %12:_(<3 x s16>) = G_UNMERGE_VALUES %11(<6 x s16>) + %6:_(<2 x s16>) = COPY $vgpr2 + %7:_(<2 x s16>) = COPY $vgpr3 + %13:_(<6 x s16>) = G_CONCAT_VECTORS %6(<2 x s16>), %7(<2 x s16>), %10(<2 x s16>) + %1:_(<3 x s16>), %14:_(<3 x s16>) = G_UNMERGE_VALUES %13(<6 x s16>) + %8:_(<2 x s16>) = COPY $vgpr4 + %9:_(<2 x s16>) = COPY $vgpr5 + %15:_(<6 x s16>) = G_CONCAT_VECTORS %8(<2 x s16>), %9(<2 x s16>), %10(<2 x s16>) + %2:_(<3 x s16>), %16:_(<3 x s16>) = G_UNMERGE_VALUES %15(<6 x s16>) + %17:_(<3 x s16>) = reassoc contract G_FMUL %0, %1 + %18:_(<3 x s16>) = reassoc contract G_FADD %2, %17 + %22:_(<3 x s16>) = G_IMPLICIT_DEF + %23:_(<6 x s16>) = G_CONCAT_VECTORS %18(<3 x s16>), %22(<3 x s16>) + %20:_(<2 x s16>), %21:_(<2 x s16>), %24:_(<2 x s16>) = G_UNMERGE_VALUES %23(<6 x s16>) + $vgpr0 = COPY %20(<2 x s16>) + $vgpr1 = COPY %21(<2 x s16>) + S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1 +... + --- name: test_4xdouble_add_mul body: | @@ -1715,60 +1934,6 @@ body: | ; GFX9-NEXT: $vgpr7 = COPY [[UV7]](s32) ; GFX9-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7 ; - ; GFX9-CONTRACT-LABEL: name: test_4xdouble_add_mul - ; GFX9-CONTRACT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17, $vgpr18, $vgpr19, $vgpr20, $vgpr21, $vgpr22, $vgpr23 - ; GFX9-CONTRACT-NEXT: {{ $}} - ; GFX9-CONTRACT-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX9-CONTRACT-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX9-CONTRACT-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX9-CONTRACT-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; GFX9-CONTRACT-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4 - ; GFX9-CONTRACT-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5 - ; GFX9-CONTRACT-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $vgpr6 - ; GFX9-CONTRACT-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $vgpr7 - ; GFX9-CONTRACT-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32) - ; GFX9-CONTRACT-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY2]](s32), [[COPY3]](s32) - ; GFX9-CONTRACT-NEXT: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY4]](s32), [[COPY5]](s32) - ; GFX9-CONTRACT-NEXT: [[MV3:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY6]](s32), [[COPY7]](s32) - ; GFX9-CONTRACT-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[MV]](s64), [[MV1]](s64), [[MV2]](s64), [[MV3]](s64) - ; GFX9-CONTRACT-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr8 - ; GFX9-CONTRACT-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr9 - ; GFX9-CONTRACT-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr10 - ; GFX9-CONTRACT-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr11 - ; GFX9-CONTRACT-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr12 - ; GFX9-CONTRACT-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr13 - ; GFX9-CONTRACT-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr14 - ; GFX9-CONTRACT-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY $vgpr15 - ; GFX9-CONTRACT-NEXT: [[MV4:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY8]](s32), [[COPY9]](s32) - ; GFX9-CONTRACT-NEXT: [[MV5:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY10]](s32), [[COPY11]](s32) - ; GFX9-CONTRACT-NEXT: [[MV6:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY12]](s32), [[COPY13]](s32) - ; GFX9-CONTRACT-NEXT: [[MV7:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY14]](s32), [[COPY15]](s32) - ; GFX9-CONTRACT-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[MV4]](s64), [[MV5]](s64), [[MV6]](s64), [[MV7]](s64) - ; GFX9-CONTRACT-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY $vgpr16 - ; GFX9-CONTRACT-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY $vgpr17 - ; GFX9-CONTRACT-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY $vgpr18 - ; GFX9-CONTRACT-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY $vgpr19 - ; GFX9-CONTRACT-NEXT: [[COPY20:%[0-9]+]]:_(s32) = COPY $vgpr20 - ; GFX9-CONTRACT-NEXT: [[COPY21:%[0-9]+]]:_(s32) = COPY $vgpr21 - ; GFX9-CONTRACT-NEXT: [[COPY22:%[0-9]+]]:_(s32) = COPY $vgpr22 - ; GFX9-CONTRACT-NEXT: [[COPY23:%[0-9]+]]:_(s32) = COPY $vgpr23 - ; GFX9-CONTRACT-NEXT: [[MV8:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY16]](s32), [[COPY17]](s32) - ; GFX9-CONTRACT-NEXT: [[MV9:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY18]](s32), [[COPY19]](s32) - ; GFX9-CONTRACT-NEXT: [[MV10:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY20]](s32), [[COPY21]](s32) - ; GFX9-CONTRACT-NEXT: [[MV11:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY22]](s32), [[COPY23]](s32) - ; GFX9-CONTRACT-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[MV8]](s64), [[MV9]](s64), [[MV10]](s64), [[MV11]](s64) - ; GFX9-CONTRACT-NEXT: [[FMA:%[0-9]+]]:_(<4 x s64>) = G_FMA [[BUILD_VECTOR]], [[BUILD_VECTOR1]], [[BUILD_VECTOR2]] - ; GFX9-CONTRACT-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[FMA]](<4 x s64>) - ; GFX9-CONTRACT-NEXT: $vgpr0 = COPY [[UV]](s32) - ; GFX9-CONTRACT-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; GFX9-CONTRACT-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; GFX9-CONTRACT-NEXT: $vgpr3 = COPY [[UV3]](s32) - ; GFX9-CONTRACT-NEXT: $vgpr4 = COPY [[UV4]](s32) - ; GFX9-CONTRACT-NEXT: $vgpr5 = COPY [[UV5]](s32) - ; GFX9-CONTRACT-NEXT: $vgpr6 = COPY [[UV6]](s32) - ; GFX9-CONTRACT-NEXT: $vgpr7 = COPY [[UV7]](s32) - ; GFX9-CONTRACT-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7 - ; ; GFX9-DENORM-LABEL: name: test_4xdouble_add_mul ; GFX9-DENORM: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17, $vgpr18, $vgpr19, $vgpr20, $vgpr21, $vgpr22, $vgpr23 ; GFX9-DENORM-NEXT: {{ $}} @@ -1824,60 +1989,6 @@ body: | ; GFX9-DENORM-NEXT: $vgpr7 = COPY [[UV7]](s32) ; GFX9-DENORM-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7 ; - ; GFX9-UNSAFE-LABEL: name: test_4xdouble_add_mul - ; GFX9-UNSAFE: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17, $vgpr18, $vgpr19, $vgpr20, $vgpr21, $vgpr22, $vgpr23 - ; GFX9-UNSAFE-NEXT: {{ $}} - ; GFX9-UNSAFE-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX9-UNSAFE-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX9-UNSAFE-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX9-UNSAFE-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; GFX9-UNSAFE-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4 - ; GFX9-UNSAFE-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5 - ; GFX9-UNSAFE-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $vgpr6 - ; GFX9-UNSAFE-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $vgpr7 - ; GFX9-UNSAFE-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32) - ; GFX9-UNSAFE-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY2]](s32), [[COPY3]](s32) - ; GFX9-UNSAFE-NEXT: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY4]](s32), [[COPY5]](s32) - ; GFX9-UNSAFE-NEXT: [[MV3:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY6]](s32), [[COPY7]](s32) - ; GFX9-UNSAFE-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[MV]](s64), [[MV1]](s64), [[MV2]](s64), [[MV3]](s64) - ; GFX9-UNSAFE-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr8 - ; GFX9-UNSAFE-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr9 - ; GFX9-UNSAFE-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr10 - ; GFX9-UNSAFE-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr11 - ; GFX9-UNSAFE-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr12 - ; GFX9-UNSAFE-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr13 - ; GFX9-UNSAFE-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr14 - ; GFX9-UNSAFE-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY $vgpr15 - ; GFX9-UNSAFE-NEXT: [[MV4:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY8]](s32), [[COPY9]](s32) - ; GFX9-UNSAFE-NEXT: [[MV5:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY10]](s32), [[COPY11]](s32) - ; GFX9-UNSAFE-NEXT: [[MV6:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY12]](s32), [[COPY13]](s32) - ; GFX9-UNSAFE-NEXT: [[MV7:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY14]](s32), [[COPY15]](s32) - ; GFX9-UNSAFE-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[MV4]](s64), [[MV5]](s64), [[MV6]](s64), [[MV7]](s64) - ; GFX9-UNSAFE-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY $vgpr16 - ; GFX9-UNSAFE-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY $vgpr17 - ; GFX9-UNSAFE-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY $vgpr18 - ; GFX9-UNSAFE-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY $vgpr19 - ; GFX9-UNSAFE-NEXT: [[COPY20:%[0-9]+]]:_(s32) = COPY $vgpr20 - ; GFX9-UNSAFE-NEXT: [[COPY21:%[0-9]+]]:_(s32) = COPY $vgpr21 - ; GFX9-UNSAFE-NEXT: [[COPY22:%[0-9]+]]:_(s32) = COPY $vgpr22 - ; GFX9-UNSAFE-NEXT: [[COPY23:%[0-9]+]]:_(s32) = COPY $vgpr23 - ; GFX9-UNSAFE-NEXT: [[MV8:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY16]](s32), [[COPY17]](s32) - ; GFX9-UNSAFE-NEXT: [[MV9:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY18]](s32), [[COPY19]](s32) - ; GFX9-UNSAFE-NEXT: [[MV10:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY20]](s32), [[COPY21]](s32) - ; GFX9-UNSAFE-NEXT: [[MV11:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY22]](s32), [[COPY23]](s32) - ; GFX9-UNSAFE-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[MV8]](s64), [[MV9]](s64), [[MV10]](s64), [[MV11]](s64) - ; GFX9-UNSAFE-NEXT: [[FMA:%[0-9]+]]:_(<4 x s64>) = G_FMA [[BUILD_VECTOR]], [[BUILD_VECTOR1]], [[BUILD_VECTOR2]] - ; GFX9-UNSAFE-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[FMA]](<4 x s64>) - ; GFX9-UNSAFE-NEXT: $vgpr0 = COPY [[UV]](s32) - ; GFX9-UNSAFE-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; GFX9-UNSAFE-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; GFX9-UNSAFE-NEXT: $vgpr3 = COPY [[UV3]](s32) - ; GFX9-UNSAFE-NEXT: $vgpr4 = COPY [[UV4]](s32) - ; GFX9-UNSAFE-NEXT: $vgpr5 = COPY [[UV5]](s32) - ; GFX9-UNSAFE-NEXT: $vgpr6 = COPY [[UV6]](s32) - ; GFX9-UNSAFE-NEXT: $vgpr7 = COPY [[UV7]](s32) - ; GFX9-UNSAFE-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7 - ; ; GFX10-LABEL: name: test_4xdouble_add_mul ; GFX10: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17, $vgpr18, $vgpr19, $vgpr20, $vgpr21, $vgpr22, $vgpr23 ; GFX10-NEXT: {{ $}} @@ -1933,60 +2044,6 @@ body: | ; GFX10-NEXT: $vgpr7 = COPY [[UV7]](s32) ; GFX10-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7 ; - ; GFX10-CONTRACT-LABEL: name: test_4xdouble_add_mul - ; GFX10-CONTRACT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17, $vgpr18, $vgpr19, $vgpr20, $vgpr21, $vgpr22, $vgpr23 - ; GFX10-CONTRACT-NEXT: {{ $}} - ; GFX10-CONTRACT-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX10-CONTRACT-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX10-CONTRACT-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX10-CONTRACT-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; GFX10-CONTRACT-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4 - ; GFX10-CONTRACT-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5 - ; GFX10-CONTRACT-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $vgpr6 - ; GFX10-CONTRACT-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $vgpr7 - ; GFX10-CONTRACT-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32) - ; GFX10-CONTRACT-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY2]](s32), [[COPY3]](s32) - ; GFX10-CONTRACT-NEXT: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY4]](s32), [[COPY5]](s32) - ; GFX10-CONTRACT-NEXT: [[MV3:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY6]](s32), [[COPY7]](s32) - ; GFX10-CONTRACT-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[MV]](s64), [[MV1]](s64), [[MV2]](s64), [[MV3]](s64) - ; GFX10-CONTRACT-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr8 - ; GFX10-CONTRACT-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr9 - ; GFX10-CONTRACT-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr10 - ; GFX10-CONTRACT-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr11 - ; GFX10-CONTRACT-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr12 - ; GFX10-CONTRACT-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr13 - ; GFX10-CONTRACT-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr14 - ; GFX10-CONTRACT-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY $vgpr15 - ; GFX10-CONTRACT-NEXT: [[MV4:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY8]](s32), [[COPY9]](s32) - ; GFX10-CONTRACT-NEXT: [[MV5:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY10]](s32), [[COPY11]](s32) - ; GFX10-CONTRACT-NEXT: [[MV6:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY12]](s32), [[COPY13]](s32) - ; GFX10-CONTRACT-NEXT: [[MV7:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY14]](s32), [[COPY15]](s32) - ; GFX10-CONTRACT-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[MV4]](s64), [[MV5]](s64), [[MV6]](s64), [[MV7]](s64) - ; GFX10-CONTRACT-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY $vgpr16 - ; GFX10-CONTRACT-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY $vgpr17 - ; GFX10-CONTRACT-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY $vgpr18 - ; GFX10-CONTRACT-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY $vgpr19 - ; GFX10-CONTRACT-NEXT: [[COPY20:%[0-9]+]]:_(s32) = COPY $vgpr20 - ; GFX10-CONTRACT-NEXT: [[COPY21:%[0-9]+]]:_(s32) = COPY $vgpr21 - ; GFX10-CONTRACT-NEXT: [[COPY22:%[0-9]+]]:_(s32) = COPY $vgpr22 - ; GFX10-CONTRACT-NEXT: [[COPY23:%[0-9]+]]:_(s32) = COPY $vgpr23 - ; GFX10-CONTRACT-NEXT: [[MV8:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY16]](s32), [[COPY17]](s32) - ; GFX10-CONTRACT-NEXT: [[MV9:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY18]](s32), [[COPY19]](s32) - ; GFX10-CONTRACT-NEXT: [[MV10:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY20]](s32), [[COPY21]](s32) - ; GFX10-CONTRACT-NEXT: [[MV11:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY22]](s32), [[COPY23]](s32) - ; GFX10-CONTRACT-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[MV8]](s64), [[MV9]](s64), [[MV10]](s64), [[MV11]](s64) - ; GFX10-CONTRACT-NEXT: [[FMA:%[0-9]+]]:_(<4 x s64>) = G_FMA [[BUILD_VECTOR]], [[BUILD_VECTOR1]], [[BUILD_VECTOR2]] - ; GFX10-CONTRACT-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[FMA]](<4 x s64>) - ; GFX10-CONTRACT-NEXT: $vgpr0 = COPY [[UV]](s32) - ; GFX10-CONTRACT-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; GFX10-CONTRACT-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; GFX10-CONTRACT-NEXT: $vgpr3 = COPY [[UV3]](s32) - ; GFX10-CONTRACT-NEXT: $vgpr4 = COPY [[UV4]](s32) - ; GFX10-CONTRACT-NEXT: $vgpr5 = COPY [[UV5]](s32) - ; GFX10-CONTRACT-NEXT: $vgpr6 = COPY [[UV6]](s32) - ; GFX10-CONTRACT-NEXT: $vgpr7 = COPY [[UV7]](s32) - ; GFX10-CONTRACT-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7 - ; ; GFX10-DENORM-LABEL: name: test_4xdouble_add_mul ; GFX10-DENORM: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17, $vgpr18, $vgpr19, $vgpr20, $vgpr21, $vgpr22, $vgpr23 ; GFX10-DENORM-NEXT: {{ $}} @@ -2041,60 +2098,6 @@ body: | ; GFX10-DENORM-NEXT: $vgpr6 = COPY [[UV6]](s32) ; GFX10-DENORM-NEXT: $vgpr7 = COPY [[UV7]](s32) ; GFX10-DENORM-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7 - ; - ; GFX10-UNSAFE-LABEL: name: test_4xdouble_add_mul - ; GFX10-UNSAFE: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17, $vgpr18, $vgpr19, $vgpr20, $vgpr21, $vgpr22, $vgpr23 - ; GFX10-UNSAFE-NEXT: {{ $}} - ; GFX10-UNSAFE-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX10-UNSAFE-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX10-UNSAFE-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX10-UNSAFE-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; GFX10-UNSAFE-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4 - ; GFX10-UNSAFE-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5 - ; GFX10-UNSAFE-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $vgpr6 - ; GFX10-UNSAFE-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $vgpr7 - ; GFX10-UNSAFE-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32) - ; GFX10-UNSAFE-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY2]](s32), [[COPY3]](s32) - ; GFX10-UNSAFE-NEXT: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY4]](s32), [[COPY5]](s32) - ; GFX10-UNSAFE-NEXT: [[MV3:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY6]](s32), [[COPY7]](s32) - ; GFX10-UNSAFE-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[MV]](s64), [[MV1]](s64), [[MV2]](s64), [[MV3]](s64) - ; GFX10-UNSAFE-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr8 - ; GFX10-UNSAFE-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr9 - ; GFX10-UNSAFE-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr10 - ; GFX10-UNSAFE-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr11 - ; GFX10-UNSAFE-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr12 - ; GFX10-UNSAFE-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr13 - ; GFX10-UNSAFE-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr14 - ; GFX10-UNSAFE-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY $vgpr15 - ; GFX10-UNSAFE-NEXT: [[MV4:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY8]](s32), [[COPY9]](s32) - ; GFX10-UNSAFE-NEXT: [[MV5:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY10]](s32), [[COPY11]](s32) - ; GFX10-UNSAFE-NEXT: [[MV6:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY12]](s32), [[COPY13]](s32) - ; GFX10-UNSAFE-NEXT: [[MV7:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY14]](s32), [[COPY15]](s32) - ; GFX10-UNSAFE-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[MV4]](s64), [[MV5]](s64), [[MV6]](s64), [[MV7]](s64) - ; GFX10-UNSAFE-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY $vgpr16 - ; GFX10-UNSAFE-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY $vgpr17 - ; GFX10-UNSAFE-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY $vgpr18 - ; GFX10-UNSAFE-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY $vgpr19 - ; GFX10-UNSAFE-NEXT: [[COPY20:%[0-9]+]]:_(s32) = COPY $vgpr20 - ; GFX10-UNSAFE-NEXT: [[COPY21:%[0-9]+]]:_(s32) = COPY $vgpr21 - ; GFX10-UNSAFE-NEXT: [[COPY22:%[0-9]+]]:_(s32) = COPY $vgpr22 - ; GFX10-UNSAFE-NEXT: [[COPY23:%[0-9]+]]:_(s32) = COPY $vgpr23 - ; GFX10-UNSAFE-NEXT: [[MV8:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY16]](s32), [[COPY17]](s32) - ; GFX10-UNSAFE-NEXT: [[MV9:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY18]](s32), [[COPY19]](s32) - ; GFX10-UNSAFE-NEXT: [[MV10:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY20]](s32), [[COPY21]](s32) - ; GFX10-UNSAFE-NEXT: [[MV11:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY22]](s32), [[COPY23]](s32) - ; GFX10-UNSAFE-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[MV8]](s64), [[MV9]](s64), [[MV10]](s64), [[MV11]](s64) - ; GFX10-UNSAFE-NEXT: [[FMA:%[0-9]+]]:_(<4 x s64>) = G_FMA [[BUILD_VECTOR]], [[BUILD_VECTOR1]], [[BUILD_VECTOR2]] - ; GFX10-UNSAFE-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[FMA]](<4 x s64>) - ; GFX10-UNSAFE-NEXT: $vgpr0 = COPY [[UV]](s32) - ; GFX10-UNSAFE-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; GFX10-UNSAFE-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; GFX10-UNSAFE-NEXT: $vgpr3 = COPY [[UV3]](s32) - ; GFX10-UNSAFE-NEXT: $vgpr4 = COPY [[UV4]](s32) - ; GFX10-UNSAFE-NEXT: $vgpr5 = COPY [[UV5]](s32) - ; GFX10-UNSAFE-NEXT: $vgpr6 = COPY [[UV6]](s32) - ; GFX10-UNSAFE-NEXT: $vgpr7 = COPY [[UV7]](s32) - ; GFX10-UNSAFE-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7 %4:_(s32) = COPY $vgpr0 %5:_(s32) = COPY $vgpr1 %6:_(s32) = COPY $vgpr2 @@ -2148,6 +2151,280 @@ body: | S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7 ... +--- +name: test_4xdouble_add_mul_contract +body: | + bb.1.entry: + liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17, $vgpr18, $vgpr19, $vgpr20, $vgpr21, $vgpr22, $vgpr23 + + ; GFX9-LABEL: name: test_4xdouble_add_mul_contract + ; GFX9: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17, $vgpr18, $vgpr19, $vgpr20, $vgpr21, $vgpr22, $vgpr23 + ; GFX9-NEXT: {{ $}} + ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 + ; GFX9-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 + ; GFX9-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4 + ; GFX9-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5 + ; GFX9-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $vgpr6 + ; GFX9-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $vgpr7 + ; GFX9-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32) + ; GFX9-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY2]](s32), [[COPY3]](s32) + ; GFX9-NEXT: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY4]](s32), [[COPY5]](s32) + ; GFX9-NEXT: [[MV3:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY6]](s32), [[COPY7]](s32) + ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[MV]](s64), [[MV1]](s64), [[MV2]](s64), [[MV3]](s64) + ; GFX9-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr8 + ; GFX9-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr9 + ; GFX9-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr10 + ; GFX9-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr11 + ; GFX9-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr12 + ; GFX9-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr13 + ; GFX9-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr14 + ; GFX9-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY $vgpr15 + ; GFX9-NEXT: [[MV4:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY8]](s32), [[COPY9]](s32) + ; GFX9-NEXT: [[MV5:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY10]](s32), [[COPY11]](s32) + ; GFX9-NEXT: [[MV6:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY12]](s32), [[COPY13]](s32) + ; GFX9-NEXT: [[MV7:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY14]](s32), [[COPY15]](s32) + ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[MV4]](s64), [[MV5]](s64), [[MV6]](s64), [[MV7]](s64) + ; GFX9-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY $vgpr16 + ; GFX9-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY $vgpr17 + ; GFX9-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY $vgpr18 + ; GFX9-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY $vgpr19 + ; GFX9-NEXT: [[COPY20:%[0-9]+]]:_(s32) = COPY $vgpr20 + ; GFX9-NEXT: [[COPY21:%[0-9]+]]:_(s32) = COPY $vgpr21 + ; GFX9-NEXT: [[COPY22:%[0-9]+]]:_(s32) = COPY $vgpr22 + ; GFX9-NEXT: [[COPY23:%[0-9]+]]:_(s32) = COPY $vgpr23 + ; GFX9-NEXT: [[MV8:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY16]](s32), [[COPY17]](s32) + ; GFX9-NEXT: [[MV9:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY18]](s32), [[COPY19]](s32) + ; GFX9-NEXT: [[MV10:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY20]](s32), [[COPY21]](s32) + ; GFX9-NEXT: [[MV11:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY22]](s32), [[COPY23]](s32) + ; GFX9-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[MV8]](s64), [[MV9]](s64), [[MV10]](s64), [[MV11]](s64) + ; GFX9-NEXT: [[FMA:%[0-9]+]]:_(<4 x s64>) = G_FMA [[BUILD_VECTOR]], [[BUILD_VECTOR1]], [[BUILD_VECTOR2]] + ; GFX9-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[FMA]](<4 x s64>) + ; GFX9-NEXT: $vgpr0 = COPY [[UV]](s32) + ; GFX9-NEXT: $vgpr1 = COPY [[UV1]](s32) + ; GFX9-NEXT: $vgpr2 = COPY [[UV2]](s32) + ; GFX9-NEXT: $vgpr3 = COPY [[UV3]](s32) + ; GFX9-NEXT: $vgpr4 = COPY [[UV4]](s32) + ; GFX9-NEXT: $vgpr5 = COPY [[UV5]](s32) + ; GFX9-NEXT: $vgpr6 = COPY [[UV6]](s32) + ; GFX9-NEXT: $vgpr7 = COPY [[UV7]](s32) + ; GFX9-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7 + ; + ; GFX9-DENORM-LABEL: name: test_4xdouble_add_mul_contract + ; GFX9-DENORM: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17, $vgpr18, $vgpr19, $vgpr20, $vgpr21, $vgpr22, $vgpr23 + ; GFX9-DENORM-NEXT: {{ $}} + ; GFX9-DENORM-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; GFX9-DENORM-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GFX9-DENORM-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 + ; GFX9-DENORM-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 + ; GFX9-DENORM-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4 + ; GFX9-DENORM-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5 + ; GFX9-DENORM-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $vgpr6 + ; GFX9-DENORM-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $vgpr7 + ; GFX9-DENORM-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32) + ; GFX9-DENORM-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY2]](s32), [[COPY3]](s32) + ; GFX9-DENORM-NEXT: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY4]](s32), [[COPY5]](s32) + ; GFX9-DENORM-NEXT: [[MV3:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY6]](s32), [[COPY7]](s32) + ; GFX9-DENORM-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[MV]](s64), [[MV1]](s64), [[MV2]](s64), [[MV3]](s64) + ; GFX9-DENORM-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr8 + ; GFX9-DENORM-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr9 + ; GFX9-DENORM-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr10 + ; GFX9-DENORM-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr11 + ; GFX9-DENORM-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr12 + ; GFX9-DENORM-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr13 + ; GFX9-DENORM-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr14 + ; GFX9-DENORM-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY $vgpr15 + ; GFX9-DENORM-NEXT: [[MV4:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY8]](s32), [[COPY9]](s32) + ; GFX9-DENORM-NEXT: [[MV5:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY10]](s32), [[COPY11]](s32) + ; GFX9-DENORM-NEXT: [[MV6:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY12]](s32), [[COPY13]](s32) + ; GFX9-DENORM-NEXT: [[MV7:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY14]](s32), [[COPY15]](s32) + ; GFX9-DENORM-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[MV4]](s64), [[MV5]](s64), [[MV6]](s64), [[MV7]](s64) + ; GFX9-DENORM-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY $vgpr16 + ; GFX9-DENORM-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY $vgpr17 + ; GFX9-DENORM-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY $vgpr18 + ; GFX9-DENORM-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY $vgpr19 + ; GFX9-DENORM-NEXT: [[COPY20:%[0-9]+]]:_(s32) = COPY $vgpr20 + ; GFX9-DENORM-NEXT: [[COPY21:%[0-9]+]]:_(s32) = COPY $vgpr21 + ; GFX9-DENORM-NEXT: [[COPY22:%[0-9]+]]:_(s32) = COPY $vgpr22 + ; GFX9-DENORM-NEXT: [[COPY23:%[0-9]+]]:_(s32) = COPY $vgpr23 + ; GFX9-DENORM-NEXT: [[MV8:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY16]](s32), [[COPY17]](s32) + ; GFX9-DENORM-NEXT: [[MV9:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY18]](s32), [[COPY19]](s32) + ; GFX9-DENORM-NEXT: [[MV10:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY20]](s32), [[COPY21]](s32) + ; GFX9-DENORM-NEXT: [[MV11:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY22]](s32), [[COPY23]](s32) + ; GFX9-DENORM-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[MV8]](s64), [[MV9]](s64), [[MV10]](s64), [[MV11]](s64) + ; GFX9-DENORM-NEXT: [[FMA:%[0-9]+]]:_(<4 x s64>) = G_FMA [[BUILD_VECTOR]], [[BUILD_VECTOR1]], [[BUILD_VECTOR2]] + ; GFX9-DENORM-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[FMA]](<4 x s64>) + ; GFX9-DENORM-NEXT: $vgpr0 = COPY [[UV]](s32) + ; GFX9-DENORM-NEXT: $vgpr1 = COPY [[UV1]](s32) + ; GFX9-DENORM-NEXT: $vgpr2 = COPY [[UV2]](s32) + ; GFX9-DENORM-NEXT: $vgpr3 = COPY [[UV3]](s32) + ; GFX9-DENORM-NEXT: $vgpr4 = COPY [[UV4]](s32) + ; GFX9-DENORM-NEXT: $vgpr5 = COPY [[UV5]](s32) + ; GFX9-DENORM-NEXT: $vgpr6 = COPY [[UV6]](s32) + ; GFX9-DENORM-NEXT: $vgpr7 = COPY [[UV7]](s32) + ; GFX9-DENORM-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7 + ; + ; GFX10-LABEL: name: test_4xdouble_add_mul_contract + ; GFX10: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17, $vgpr18, $vgpr19, $vgpr20, $vgpr21, $vgpr22, $vgpr23 + ; GFX10-NEXT: {{ $}} + ; GFX10-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; GFX10-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GFX10-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 + ; GFX10-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 + ; GFX10-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4 + ; GFX10-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5 + ; GFX10-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $vgpr6 + ; GFX10-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $vgpr7 + ; GFX10-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32) + ; GFX10-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY2]](s32), [[COPY3]](s32) + ; GFX10-NEXT: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY4]](s32), [[COPY5]](s32) + ; GFX10-NEXT: [[MV3:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY6]](s32), [[COPY7]](s32) + ; GFX10-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[MV]](s64), [[MV1]](s64), [[MV2]](s64), [[MV3]](s64) + ; GFX10-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr8 + ; GFX10-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr9 + ; GFX10-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr10 + ; GFX10-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr11 + ; GFX10-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr12 + ; GFX10-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr13 + ; GFX10-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr14 + ; GFX10-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY $vgpr15 + ; GFX10-NEXT: [[MV4:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY8]](s32), [[COPY9]](s32) + ; GFX10-NEXT: [[MV5:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY10]](s32), [[COPY11]](s32) + ; GFX10-NEXT: [[MV6:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY12]](s32), [[COPY13]](s32) + ; GFX10-NEXT: [[MV7:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY14]](s32), [[COPY15]](s32) + ; GFX10-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[MV4]](s64), [[MV5]](s64), [[MV6]](s64), [[MV7]](s64) + ; GFX10-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY $vgpr16 + ; GFX10-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY $vgpr17 + ; GFX10-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY $vgpr18 + ; GFX10-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY $vgpr19 + ; GFX10-NEXT: [[COPY20:%[0-9]+]]:_(s32) = COPY $vgpr20 + ; GFX10-NEXT: [[COPY21:%[0-9]+]]:_(s32) = COPY $vgpr21 + ; GFX10-NEXT: [[COPY22:%[0-9]+]]:_(s32) = COPY $vgpr22 + ; GFX10-NEXT: [[COPY23:%[0-9]+]]:_(s32) = COPY $vgpr23 + ; GFX10-NEXT: [[MV8:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY16]](s32), [[COPY17]](s32) + ; GFX10-NEXT: [[MV9:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY18]](s32), [[COPY19]](s32) + ; GFX10-NEXT: [[MV10:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY20]](s32), [[COPY21]](s32) + ; GFX10-NEXT: [[MV11:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY22]](s32), [[COPY23]](s32) + ; GFX10-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[MV8]](s64), [[MV9]](s64), [[MV10]](s64), [[MV11]](s64) + ; GFX10-NEXT: [[FMA:%[0-9]+]]:_(<4 x s64>) = G_FMA [[BUILD_VECTOR]], [[BUILD_VECTOR1]], [[BUILD_VECTOR2]] + ; GFX10-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[FMA]](<4 x s64>) + ; GFX10-NEXT: $vgpr0 = COPY [[UV]](s32) + ; GFX10-NEXT: $vgpr1 = COPY [[UV1]](s32) + ; GFX10-NEXT: $vgpr2 = COPY [[UV2]](s32) + ; GFX10-NEXT: $vgpr3 = COPY [[UV3]](s32) + ; GFX10-NEXT: $vgpr4 = COPY [[UV4]](s32) + ; GFX10-NEXT: $vgpr5 = COPY [[UV5]](s32) + ; GFX10-NEXT: $vgpr6 = COPY [[UV6]](s32) + ; GFX10-NEXT: $vgpr7 = COPY [[UV7]](s32) + ; GFX10-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7 + ; + ; GFX10-DENORM-LABEL: name: test_4xdouble_add_mul_contract + ; GFX10-DENORM: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17, $vgpr18, $vgpr19, $vgpr20, $vgpr21, $vgpr22, $vgpr23 + ; GFX10-DENORM-NEXT: {{ $}} + ; GFX10-DENORM-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; GFX10-DENORM-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GFX10-DENORM-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 + ; GFX10-DENORM-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 + ; GFX10-DENORM-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4 + ; GFX10-DENORM-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5 + ; GFX10-DENORM-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $vgpr6 + ; GFX10-DENORM-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $vgpr7 + ; GFX10-DENORM-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32) + ; GFX10-DENORM-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY2]](s32), [[COPY3]](s32) + ; GFX10-DENORM-NEXT: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY4]](s32), [[COPY5]](s32) + ; GFX10-DENORM-NEXT: [[MV3:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY6]](s32), [[COPY7]](s32) + ; GFX10-DENORM-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[MV]](s64), [[MV1]](s64), [[MV2]](s64), [[MV3]](s64) + ; GFX10-DENORM-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr8 + ; GFX10-DENORM-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr9 + ; GFX10-DENORM-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr10 + ; GFX10-DENORM-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr11 + ; GFX10-DENORM-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr12 + ; GFX10-DENORM-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr13 + ; GFX10-DENORM-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr14 + ; GFX10-DENORM-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY $vgpr15 + ; GFX10-DENORM-NEXT: [[MV4:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY8]](s32), [[COPY9]](s32) + ; GFX10-DENORM-NEXT: [[MV5:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY10]](s32), [[COPY11]](s32) + ; GFX10-DENORM-NEXT: [[MV6:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY12]](s32), [[COPY13]](s32) + ; GFX10-DENORM-NEXT: [[MV7:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY14]](s32), [[COPY15]](s32) + ; GFX10-DENORM-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[MV4]](s64), [[MV5]](s64), [[MV6]](s64), [[MV7]](s64) + ; GFX10-DENORM-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY $vgpr16 + ; GFX10-DENORM-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY $vgpr17 + ; GFX10-DENORM-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY $vgpr18 + ; GFX10-DENORM-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY $vgpr19 + ; GFX10-DENORM-NEXT: [[COPY20:%[0-9]+]]:_(s32) = COPY $vgpr20 + ; GFX10-DENORM-NEXT: [[COPY21:%[0-9]+]]:_(s32) = COPY $vgpr21 + ; GFX10-DENORM-NEXT: [[COPY22:%[0-9]+]]:_(s32) = COPY $vgpr22 + ; GFX10-DENORM-NEXT: [[COPY23:%[0-9]+]]:_(s32) = COPY $vgpr23 + ; GFX10-DENORM-NEXT: [[MV8:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY16]](s32), [[COPY17]](s32) + ; GFX10-DENORM-NEXT: [[MV9:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY18]](s32), [[COPY19]](s32) + ; GFX10-DENORM-NEXT: [[MV10:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY20]](s32), [[COPY21]](s32) + ; GFX10-DENORM-NEXT: [[MV11:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY22]](s32), [[COPY23]](s32) + ; GFX10-DENORM-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[MV8]](s64), [[MV9]](s64), [[MV10]](s64), [[MV11]](s64) + ; GFX10-DENORM-NEXT: [[FMA:%[0-9]+]]:_(<4 x s64>) = G_FMA [[BUILD_VECTOR]], [[BUILD_VECTOR1]], [[BUILD_VECTOR2]] + ; GFX10-DENORM-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[FMA]](<4 x s64>) + ; GFX10-DENORM-NEXT: $vgpr0 = COPY [[UV]](s32) + ; GFX10-DENORM-NEXT: $vgpr1 = COPY [[UV1]](s32) + ; GFX10-DENORM-NEXT: $vgpr2 = COPY [[UV2]](s32) + ; GFX10-DENORM-NEXT: $vgpr3 = COPY [[UV3]](s32) + ; GFX10-DENORM-NEXT: $vgpr4 = COPY [[UV4]](s32) + ; GFX10-DENORM-NEXT: $vgpr5 = COPY [[UV5]](s32) + ; GFX10-DENORM-NEXT: $vgpr6 = COPY [[UV6]](s32) + ; GFX10-DENORM-NEXT: $vgpr7 = COPY [[UV7]](s32) + ; GFX10-DENORM-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7 + %4:_(s32) = COPY $vgpr0 + %5:_(s32) = COPY $vgpr1 + %6:_(s32) = COPY $vgpr2 + %7:_(s32) = COPY $vgpr3 + %8:_(s32) = COPY $vgpr4 + %9:_(s32) = COPY $vgpr5 + %10:_(s32) = COPY $vgpr6 + %11:_(s32) = COPY $vgpr7 + %28:_(s64) = G_MERGE_VALUES %4(s32), %5(s32) + %29:_(s64) = G_MERGE_VALUES %6(s32), %7(s32) + %30:_(s64) = G_MERGE_VALUES %8(s32), %9(s32) + %31:_(s64) = G_MERGE_VALUES %10(s32), %11(s32) + %0:_(<4 x s64>) = G_BUILD_VECTOR %28(s64), %29(s64), %30(s64), %31(s64) + %12:_(s32) = COPY $vgpr8 + %13:_(s32) = COPY $vgpr9 + %14:_(s32) = COPY $vgpr10 + %15:_(s32) = COPY $vgpr11 + %16:_(s32) = COPY $vgpr12 + %17:_(s32) = COPY $vgpr13 + %18:_(s32) = COPY $vgpr14 + %19:_(s32) = COPY $vgpr15 + %32:_(s64) = G_MERGE_VALUES %12(s32), %13(s32) + %33:_(s64) = G_MERGE_VALUES %14(s32), %15(s32) + %34:_(s64) = G_MERGE_VALUES %16(s32), %17(s32) + %35:_(s64) = G_MERGE_VALUES %18(s32), %19(s32) + %1:_(<4 x s64>) = G_BUILD_VECTOR %32(s64), %33(s64), %34(s64), %35(s64) + %20:_(s32) = COPY $vgpr16 + %21:_(s32) = COPY $vgpr17 + %22:_(s32) = COPY $vgpr18 + %23:_(s32) = COPY $vgpr19 + %24:_(s32) = COPY $vgpr20 + %25:_(s32) = COPY $vgpr21 + %26:_(s32) = COPY $vgpr22 + %27:_(s32) = COPY $vgpr23 + %36:_(s64) = G_MERGE_VALUES %20(s32), %21(s32) + %37:_(s64) = G_MERGE_VALUES %22(s32), %23(s32) + %38:_(s64) = G_MERGE_VALUES %24(s32), %25(s32) + %39:_(s64) = G_MERGE_VALUES %26(s32), %27(s32) + %2:_(<4 x s64>) = G_BUILD_VECTOR %36(s64), %37(s64), %38(s64), %39(s64) + %40:_(<4 x s64>) = reassoc contract G_FMUL %0, %1 + %41:_(<4 x s64>) = reassoc contract G_FADD %40, %2 + %43:_(s32), %44:_(s32), %45:_(s32), %46:_(s32), %47:_(s32), %48:_(s32), %49:_(s32), %50:_(s32) = G_UNMERGE_VALUES %41(<4 x s64>) + $vgpr0 = COPY %43(s32) + $vgpr1 = COPY %44(s32) + $vgpr2 = COPY %45(s32) + $vgpr3 = COPY %46(s32) + $vgpr4 = COPY %47(s32) + $vgpr5 = COPY %48(s32) + $vgpr6 = COPY %49(s32) + $vgpr7 = COPY %50(s32) + S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7 +... + --- name: test_3xdouble_add_mul_rhs body: | @@ -2198,49 +2475,6 @@ body: | ; GFX9-NEXT: $vgpr5 = COPY [[UV5]](s32) ; GFX9-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5 ; - ; GFX9-CONTRACT-LABEL: name: test_3xdouble_add_mul_rhs - ; GFX9-CONTRACT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17 - ; GFX9-CONTRACT-NEXT: {{ $}} - ; GFX9-CONTRACT-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX9-CONTRACT-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX9-CONTRACT-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX9-CONTRACT-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; GFX9-CONTRACT-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4 - ; GFX9-CONTRACT-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5 - ; GFX9-CONTRACT-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32) - ; GFX9-CONTRACT-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY2]](s32), [[COPY3]](s32) - ; GFX9-CONTRACT-NEXT: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY4]](s32), [[COPY5]](s32) - ; GFX9-CONTRACT-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s64>) = G_BUILD_VECTOR [[MV]](s64), [[MV1]](s64), [[MV2]](s64) - ; GFX9-CONTRACT-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $vgpr6 - ; GFX9-CONTRACT-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $vgpr7 - ; GFX9-CONTRACT-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr8 - ; GFX9-CONTRACT-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr9 - ; GFX9-CONTRACT-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr10 - ; GFX9-CONTRACT-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr11 - ; GFX9-CONTRACT-NEXT: [[MV3:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY6]](s32), [[COPY7]](s32) - ; GFX9-CONTRACT-NEXT: [[MV4:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY8]](s32), [[COPY9]](s32) - ; GFX9-CONTRACT-NEXT: [[MV5:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY10]](s32), [[COPY11]](s32) - ; GFX9-CONTRACT-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<3 x s64>) = G_BUILD_VECTOR [[MV3]](s64), [[MV4]](s64), [[MV5]](s64) - ; GFX9-CONTRACT-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr12 - ; GFX9-CONTRACT-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr13 - ; GFX9-CONTRACT-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr14 - ; GFX9-CONTRACT-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY $vgpr15 - ; GFX9-CONTRACT-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY $vgpr16 - ; GFX9-CONTRACT-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY $vgpr17 - ; GFX9-CONTRACT-NEXT: [[MV6:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY12]](s32), [[COPY13]](s32) - ; GFX9-CONTRACT-NEXT: [[MV7:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY14]](s32), [[COPY15]](s32) - ; GFX9-CONTRACT-NEXT: [[MV8:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY16]](s32), [[COPY17]](s32) - ; GFX9-CONTRACT-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<3 x s64>) = G_BUILD_VECTOR [[MV6]](s64), [[MV7]](s64), [[MV8]](s64) - ; GFX9-CONTRACT-NEXT: [[FMA:%[0-9]+]]:_(<3 x s64>) = G_FMA [[BUILD_VECTOR]], [[BUILD_VECTOR1]], [[BUILD_VECTOR2]] - ; GFX9-CONTRACT-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[FMA]](<3 x s64>) - ; GFX9-CONTRACT-NEXT: $vgpr0 = COPY [[UV]](s32) - ; GFX9-CONTRACT-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; GFX9-CONTRACT-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; GFX9-CONTRACT-NEXT: $vgpr3 = COPY [[UV3]](s32) - ; GFX9-CONTRACT-NEXT: $vgpr4 = COPY [[UV4]](s32) - ; GFX9-CONTRACT-NEXT: $vgpr5 = COPY [[UV5]](s32) - ; GFX9-CONTRACT-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5 - ; ; GFX9-DENORM-LABEL: name: test_3xdouble_add_mul_rhs ; GFX9-DENORM: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17 ; GFX9-DENORM-NEXT: {{ $}} @@ -2285,49 +2519,6 @@ body: | ; GFX9-DENORM-NEXT: $vgpr5 = COPY [[UV5]](s32) ; GFX9-DENORM-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5 ; - ; GFX9-UNSAFE-LABEL: name: test_3xdouble_add_mul_rhs - ; GFX9-UNSAFE: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17 - ; GFX9-UNSAFE-NEXT: {{ $}} - ; GFX9-UNSAFE-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX9-UNSAFE-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX9-UNSAFE-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX9-UNSAFE-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; GFX9-UNSAFE-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4 - ; GFX9-UNSAFE-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5 - ; GFX9-UNSAFE-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32) - ; GFX9-UNSAFE-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY2]](s32), [[COPY3]](s32) - ; GFX9-UNSAFE-NEXT: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY4]](s32), [[COPY5]](s32) - ; GFX9-UNSAFE-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s64>) = G_BUILD_VECTOR [[MV]](s64), [[MV1]](s64), [[MV2]](s64) - ; GFX9-UNSAFE-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $vgpr6 - ; GFX9-UNSAFE-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $vgpr7 - ; GFX9-UNSAFE-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr8 - ; GFX9-UNSAFE-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr9 - ; GFX9-UNSAFE-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr10 - ; GFX9-UNSAFE-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr11 - ; GFX9-UNSAFE-NEXT: [[MV3:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY6]](s32), [[COPY7]](s32) - ; GFX9-UNSAFE-NEXT: [[MV4:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY8]](s32), [[COPY9]](s32) - ; GFX9-UNSAFE-NEXT: [[MV5:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY10]](s32), [[COPY11]](s32) - ; GFX9-UNSAFE-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<3 x s64>) = G_BUILD_VECTOR [[MV3]](s64), [[MV4]](s64), [[MV5]](s64) - ; GFX9-UNSAFE-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr12 - ; GFX9-UNSAFE-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr13 - ; GFX9-UNSAFE-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr14 - ; GFX9-UNSAFE-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY $vgpr15 - ; GFX9-UNSAFE-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY $vgpr16 - ; GFX9-UNSAFE-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY $vgpr17 - ; GFX9-UNSAFE-NEXT: [[MV6:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY12]](s32), [[COPY13]](s32) - ; GFX9-UNSAFE-NEXT: [[MV7:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY14]](s32), [[COPY15]](s32) - ; GFX9-UNSAFE-NEXT: [[MV8:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY16]](s32), [[COPY17]](s32) - ; GFX9-UNSAFE-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<3 x s64>) = G_BUILD_VECTOR [[MV6]](s64), [[MV7]](s64), [[MV8]](s64) - ; GFX9-UNSAFE-NEXT: [[FMA:%[0-9]+]]:_(<3 x s64>) = G_FMA [[BUILD_VECTOR]], [[BUILD_VECTOR1]], [[BUILD_VECTOR2]] - ; GFX9-UNSAFE-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[FMA]](<3 x s64>) - ; GFX9-UNSAFE-NEXT: $vgpr0 = COPY [[UV]](s32) - ; GFX9-UNSAFE-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; GFX9-UNSAFE-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; GFX9-UNSAFE-NEXT: $vgpr3 = COPY [[UV3]](s32) - ; GFX9-UNSAFE-NEXT: $vgpr4 = COPY [[UV4]](s32) - ; GFX9-UNSAFE-NEXT: $vgpr5 = COPY [[UV5]](s32) - ; GFX9-UNSAFE-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5 - ; ; GFX10-LABEL: name: test_3xdouble_add_mul_rhs ; GFX10: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17 ; GFX10-NEXT: {{ $}} @@ -2372,49 +2563,6 @@ body: | ; GFX10-NEXT: $vgpr5 = COPY [[UV5]](s32) ; GFX10-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5 ; - ; GFX10-CONTRACT-LABEL: name: test_3xdouble_add_mul_rhs - ; GFX10-CONTRACT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17 - ; GFX10-CONTRACT-NEXT: {{ $}} - ; GFX10-CONTRACT-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX10-CONTRACT-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX10-CONTRACT-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX10-CONTRACT-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; GFX10-CONTRACT-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4 - ; GFX10-CONTRACT-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5 - ; GFX10-CONTRACT-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32) - ; GFX10-CONTRACT-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY2]](s32), [[COPY3]](s32) - ; GFX10-CONTRACT-NEXT: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY4]](s32), [[COPY5]](s32) - ; GFX10-CONTRACT-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s64>) = G_BUILD_VECTOR [[MV]](s64), [[MV1]](s64), [[MV2]](s64) - ; GFX10-CONTRACT-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $vgpr6 - ; GFX10-CONTRACT-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $vgpr7 - ; GFX10-CONTRACT-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr8 - ; GFX10-CONTRACT-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr9 - ; GFX10-CONTRACT-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr10 - ; GFX10-CONTRACT-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr11 - ; GFX10-CONTRACT-NEXT: [[MV3:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY6]](s32), [[COPY7]](s32) - ; GFX10-CONTRACT-NEXT: [[MV4:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY8]](s32), [[COPY9]](s32) - ; GFX10-CONTRACT-NEXT: [[MV5:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY10]](s32), [[COPY11]](s32) - ; GFX10-CONTRACT-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<3 x s64>) = G_BUILD_VECTOR [[MV3]](s64), [[MV4]](s64), [[MV5]](s64) - ; GFX10-CONTRACT-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr12 - ; GFX10-CONTRACT-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr13 - ; GFX10-CONTRACT-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr14 - ; GFX10-CONTRACT-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY $vgpr15 - ; GFX10-CONTRACT-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY $vgpr16 - ; GFX10-CONTRACT-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY $vgpr17 - ; GFX10-CONTRACT-NEXT: [[MV6:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY12]](s32), [[COPY13]](s32) - ; GFX10-CONTRACT-NEXT: [[MV7:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY14]](s32), [[COPY15]](s32) - ; GFX10-CONTRACT-NEXT: [[MV8:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY16]](s32), [[COPY17]](s32) - ; GFX10-CONTRACT-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<3 x s64>) = G_BUILD_VECTOR [[MV6]](s64), [[MV7]](s64), [[MV8]](s64) - ; GFX10-CONTRACT-NEXT: [[FMA:%[0-9]+]]:_(<3 x s64>) = G_FMA [[BUILD_VECTOR]], [[BUILD_VECTOR1]], [[BUILD_VECTOR2]] - ; GFX10-CONTRACT-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[FMA]](<3 x s64>) - ; GFX10-CONTRACT-NEXT: $vgpr0 = COPY [[UV]](s32) - ; GFX10-CONTRACT-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; GFX10-CONTRACT-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; GFX10-CONTRACT-NEXT: $vgpr3 = COPY [[UV3]](s32) - ; GFX10-CONTRACT-NEXT: $vgpr4 = COPY [[UV4]](s32) - ; GFX10-CONTRACT-NEXT: $vgpr5 = COPY [[UV5]](s32) - ; GFX10-CONTRACT-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5 - ; ; GFX10-DENORM-LABEL: name: test_3xdouble_add_mul_rhs ; GFX10-DENORM: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17 ; GFX10-DENORM-NEXT: {{ $}} @@ -2458,49 +2606,6 @@ body: | ; GFX10-DENORM-NEXT: $vgpr4 = COPY [[UV4]](s32) ; GFX10-DENORM-NEXT: $vgpr5 = COPY [[UV5]](s32) ; GFX10-DENORM-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5 - ; - ; GFX10-UNSAFE-LABEL: name: test_3xdouble_add_mul_rhs - ; GFX10-UNSAFE: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17 - ; GFX10-UNSAFE-NEXT: {{ $}} - ; GFX10-UNSAFE-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX10-UNSAFE-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX10-UNSAFE-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX10-UNSAFE-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; GFX10-UNSAFE-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4 - ; GFX10-UNSAFE-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5 - ; GFX10-UNSAFE-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32) - ; GFX10-UNSAFE-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY2]](s32), [[COPY3]](s32) - ; GFX10-UNSAFE-NEXT: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY4]](s32), [[COPY5]](s32) - ; GFX10-UNSAFE-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s64>) = G_BUILD_VECTOR [[MV]](s64), [[MV1]](s64), [[MV2]](s64) - ; GFX10-UNSAFE-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $vgpr6 - ; GFX10-UNSAFE-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $vgpr7 - ; GFX10-UNSAFE-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr8 - ; GFX10-UNSAFE-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr9 - ; GFX10-UNSAFE-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr10 - ; GFX10-UNSAFE-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr11 - ; GFX10-UNSAFE-NEXT: [[MV3:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY6]](s32), [[COPY7]](s32) - ; GFX10-UNSAFE-NEXT: [[MV4:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY8]](s32), [[COPY9]](s32) - ; GFX10-UNSAFE-NEXT: [[MV5:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY10]](s32), [[COPY11]](s32) - ; GFX10-UNSAFE-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<3 x s64>) = G_BUILD_VECTOR [[MV3]](s64), [[MV4]](s64), [[MV5]](s64) - ; GFX10-UNSAFE-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr12 - ; GFX10-UNSAFE-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr13 - ; GFX10-UNSAFE-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr14 - ; GFX10-UNSAFE-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY $vgpr15 - ; GFX10-UNSAFE-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY $vgpr16 - ; GFX10-UNSAFE-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY $vgpr17 - ; GFX10-UNSAFE-NEXT: [[MV6:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY12]](s32), [[COPY13]](s32) - ; GFX10-UNSAFE-NEXT: [[MV7:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY14]](s32), [[COPY15]](s32) - ; GFX10-UNSAFE-NEXT: [[MV8:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY16]](s32), [[COPY17]](s32) - ; GFX10-UNSAFE-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<3 x s64>) = G_BUILD_VECTOR [[MV6]](s64), [[MV7]](s64), [[MV8]](s64) - ; GFX10-UNSAFE-NEXT: [[FMA:%[0-9]+]]:_(<3 x s64>) = G_FMA [[BUILD_VECTOR]], [[BUILD_VECTOR1]], [[BUILD_VECTOR2]] - ; GFX10-UNSAFE-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[FMA]](<3 x s64>) - ; GFX10-UNSAFE-NEXT: $vgpr0 = COPY [[UV]](s32) - ; GFX10-UNSAFE-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; GFX10-UNSAFE-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; GFX10-UNSAFE-NEXT: $vgpr3 = COPY [[UV3]](s32) - ; GFX10-UNSAFE-NEXT: $vgpr4 = COPY [[UV4]](s32) - ; GFX10-UNSAFE-NEXT: $vgpr5 = COPY [[UV5]](s32) - ; GFX10-UNSAFE-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5 %4:_(s32) = COPY $vgpr0 %5:_(s32) = COPY $vgpr1 %6:_(s32) = COPY $vgpr2 @@ -2542,3 +2647,222 @@ body: | $vgpr5 = COPY %39(s32) S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5 ... + +--- +name: test_3xdouble_add_mul_rhs_contract +body: | + bb.1.entry: + liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17 + + ; GFX9-LABEL: name: test_3xdouble_add_mul_rhs_contract + ; GFX9: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17 + ; GFX9-NEXT: {{ $}} + ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 + ; GFX9-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 + ; GFX9-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4 + ; GFX9-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5 + ; GFX9-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32) + ; GFX9-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY2]](s32), [[COPY3]](s32) + ; GFX9-NEXT: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY4]](s32), [[COPY5]](s32) + ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s64>) = G_BUILD_VECTOR [[MV]](s64), [[MV1]](s64), [[MV2]](s64) + ; GFX9-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $vgpr6 + ; GFX9-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $vgpr7 + ; GFX9-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr8 + ; GFX9-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr9 + ; GFX9-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr10 + ; GFX9-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr11 + ; GFX9-NEXT: [[MV3:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY6]](s32), [[COPY7]](s32) + ; GFX9-NEXT: [[MV4:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY8]](s32), [[COPY9]](s32) + ; GFX9-NEXT: [[MV5:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY10]](s32), [[COPY11]](s32) + ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<3 x s64>) = G_BUILD_VECTOR [[MV3]](s64), [[MV4]](s64), [[MV5]](s64) + ; GFX9-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr12 + ; GFX9-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr13 + ; GFX9-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr14 + ; GFX9-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY $vgpr15 + ; GFX9-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY $vgpr16 + ; GFX9-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY $vgpr17 + ; GFX9-NEXT: [[MV6:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY12]](s32), [[COPY13]](s32) + ; GFX9-NEXT: [[MV7:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY14]](s32), [[COPY15]](s32) + ; GFX9-NEXT: [[MV8:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY16]](s32), [[COPY17]](s32) + ; GFX9-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<3 x s64>) = G_BUILD_VECTOR [[MV6]](s64), [[MV7]](s64), [[MV8]](s64) + ; GFX9-NEXT: [[FMA:%[0-9]+]]:_(<3 x s64>) = G_FMA [[BUILD_VECTOR]], [[BUILD_VECTOR1]], [[BUILD_VECTOR2]] + ; GFX9-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[FMA]](<3 x s64>) + ; GFX9-NEXT: $vgpr0 = COPY [[UV]](s32) + ; GFX9-NEXT: $vgpr1 = COPY [[UV1]](s32) + ; GFX9-NEXT: $vgpr2 = COPY [[UV2]](s32) + ; GFX9-NEXT: $vgpr3 = COPY [[UV3]](s32) + ; GFX9-NEXT: $vgpr4 = COPY [[UV4]](s32) + ; GFX9-NEXT: $vgpr5 = COPY [[UV5]](s32) + ; GFX9-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5 + ; + ; GFX9-DENORM-LABEL: name: test_3xdouble_add_mul_rhs_contract + ; GFX9-DENORM: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17 + ; GFX9-DENORM-NEXT: {{ $}} + ; GFX9-DENORM-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; GFX9-DENORM-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GFX9-DENORM-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 + ; GFX9-DENORM-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 + ; GFX9-DENORM-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4 + ; GFX9-DENORM-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5 + ; GFX9-DENORM-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32) + ; GFX9-DENORM-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY2]](s32), [[COPY3]](s32) + ; GFX9-DENORM-NEXT: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY4]](s32), [[COPY5]](s32) + ; GFX9-DENORM-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s64>) = G_BUILD_VECTOR [[MV]](s64), [[MV1]](s64), [[MV2]](s64) + ; GFX9-DENORM-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $vgpr6 + ; GFX9-DENORM-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $vgpr7 + ; GFX9-DENORM-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr8 + ; GFX9-DENORM-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr9 + ; GFX9-DENORM-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr10 + ; GFX9-DENORM-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr11 + ; GFX9-DENORM-NEXT: [[MV3:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY6]](s32), [[COPY7]](s32) + ; GFX9-DENORM-NEXT: [[MV4:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY8]](s32), [[COPY9]](s32) + ; GFX9-DENORM-NEXT: [[MV5:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY10]](s32), [[COPY11]](s32) + ; GFX9-DENORM-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<3 x s64>) = G_BUILD_VECTOR [[MV3]](s64), [[MV4]](s64), [[MV5]](s64) + ; GFX9-DENORM-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr12 + ; GFX9-DENORM-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr13 + ; GFX9-DENORM-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr14 + ; GFX9-DENORM-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY $vgpr15 + ; GFX9-DENORM-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY $vgpr16 + ; GFX9-DENORM-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY $vgpr17 + ; GFX9-DENORM-NEXT: [[MV6:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY12]](s32), [[COPY13]](s32) + ; GFX9-DENORM-NEXT: [[MV7:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY14]](s32), [[COPY15]](s32) + ; GFX9-DENORM-NEXT: [[MV8:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY16]](s32), [[COPY17]](s32) + ; GFX9-DENORM-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<3 x s64>) = G_BUILD_VECTOR [[MV6]](s64), [[MV7]](s64), [[MV8]](s64) + ; GFX9-DENORM-NEXT: [[FMA:%[0-9]+]]:_(<3 x s64>) = G_FMA [[BUILD_VECTOR]], [[BUILD_VECTOR1]], [[BUILD_VECTOR2]] + ; GFX9-DENORM-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[FMA]](<3 x s64>) + ; GFX9-DENORM-NEXT: $vgpr0 = COPY [[UV]](s32) + ; GFX9-DENORM-NEXT: $vgpr1 = COPY [[UV1]](s32) + ; GFX9-DENORM-NEXT: $vgpr2 = COPY [[UV2]](s32) + ; GFX9-DENORM-NEXT: $vgpr3 = COPY [[UV3]](s32) + ; GFX9-DENORM-NEXT: $vgpr4 = COPY [[UV4]](s32) + ; GFX9-DENORM-NEXT: $vgpr5 = COPY [[UV5]](s32) + ; GFX9-DENORM-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5 + ; + ; GFX10-LABEL: name: test_3xdouble_add_mul_rhs_contract + ; GFX10: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17 + ; GFX10-NEXT: {{ $}} + ; GFX10-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; GFX10-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GFX10-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 + ; GFX10-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 + ; GFX10-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4 + ; GFX10-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5 + ; GFX10-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32) + ; GFX10-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY2]](s32), [[COPY3]](s32) + ; GFX10-NEXT: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY4]](s32), [[COPY5]](s32) + ; GFX10-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s64>) = G_BUILD_VECTOR [[MV]](s64), [[MV1]](s64), [[MV2]](s64) + ; GFX10-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $vgpr6 + ; GFX10-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $vgpr7 + ; GFX10-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr8 + ; GFX10-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr9 + ; GFX10-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr10 + ; GFX10-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr11 + ; GFX10-NEXT: [[MV3:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY6]](s32), [[COPY7]](s32) + ; GFX10-NEXT: [[MV4:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY8]](s32), [[COPY9]](s32) + ; GFX10-NEXT: [[MV5:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY10]](s32), [[COPY11]](s32) + ; GFX10-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<3 x s64>) = G_BUILD_VECTOR [[MV3]](s64), [[MV4]](s64), [[MV5]](s64) + ; GFX10-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr12 + ; GFX10-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr13 + ; GFX10-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr14 + ; GFX10-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY $vgpr15 + ; GFX10-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY $vgpr16 + ; GFX10-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY $vgpr17 + ; GFX10-NEXT: [[MV6:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY12]](s32), [[COPY13]](s32) + ; GFX10-NEXT: [[MV7:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY14]](s32), [[COPY15]](s32) + ; GFX10-NEXT: [[MV8:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY16]](s32), [[COPY17]](s32) + ; GFX10-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<3 x s64>) = G_BUILD_VECTOR [[MV6]](s64), [[MV7]](s64), [[MV8]](s64) + ; GFX10-NEXT: [[FMA:%[0-9]+]]:_(<3 x s64>) = G_FMA [[BUILD_VECTOR]], [[BUILD_VECTOR1]], [[BUILD_VECTOR2]] + ; GFX10-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[FMA]](<3 x s64>) + ; GFX10-NEXT: $vgpr0 = COPY [[UV]](s32) + ; GFX10-NEXT: $vgpr1 = COPY [[UV1]](s32) + ; GFX10-NEXT: $vgpr2 = COPY [[UV2]](s32) + ; GFX10-NEXT: $vgpr3 = COPY [[UV3]](s32) + ; GFX10-NEXT: $vgpr4 = COPY [[UV4]](s32) + ; GFX10-NEXT: $vgpr5 = COPY [[UV5]](s32) + ; GFX10-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5 + ; + ; GFX10-DENORM-LABEL: name: test_3xdouble_add_mul_rhs_contract + ; GFX10-DENORM: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17 + ; GFX10-DENORM-NEXT: {{ $}} + ; GFX10-DENORM-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; GFX10-DENORM-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GFX10-DENORM-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 + ; GFX10-DENORM-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 + ; GFX10-DENORM-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4 + ; GFX10-DENORM-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5 + ; GFX10-DENORM-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32) + ; GFX10-DENORM-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY2]](s32), [[COPY3]](s32) + ; GFX10-DENORM-NEXT: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY4]](s32), [[COPY5]](s32) + ; GFX10-DENORM-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s64>) = G_BUILD_VECTOR [[MV]](s64), [[MV1]](s64), [[MV2]](s64) + ; GFX10-DENORM-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $vgpr6 + ; GFX10-DENORM-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $vgpr7 + ; GFX10-DENORM-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr8 + ; GFX10-DENORM-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr9 + ; GFX10-DENORM-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr10 + ; GFX10-DENORM-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr11 + ; GFX10-DENORM-NEXT: [[MV3:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY6]](s32), [[COPY7]](s32) + ; GFX10-DENORM-NEXT: [[MV4:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY8]](s32), [[COPY9]](s32) + ; GFX10-DENORM-NEXT: [[MV5:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY10]](s32), [[COPY11]](s32) + ; GFX10-DENORM-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<3 x s64>) = G_BUILD_VECTOR [[MV3]](s64), [[MV4]](s64), [[MV5]](s64) + ; GFX10-DENORM-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr12 + ; GFX10-DENORM-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr13 + ; GFX10-DENORM-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr14 + ; GFX10-DENORM-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY $vgpr15 + ; GFX10-DENORM-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY $vgpr16 + ; GFX10-DENORM-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY $vgpr17 + ; GFX10-DENORM-NEXT: [[MV6:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY12]](s32), [[COPY13]](s32) + ; GFX10-DENORM-NEXT: [[MV7:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY14]](s32), [[COPY15]](s32) + ; GFX10-DENORM-NEXT: [[MV8:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY16]](s32), [[COPY17]](s32) + ; GFX10-DENORM-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<3 x s64>) = G_BUILD_VECTOR [[MV6]](s64), [[MV7]](s64), [[MV8]](s64) + ; GFX10-DENORM-NEXT: [[FMA:%[0-9]+]]:_(<3 x s64>) = G_FMA [[BUILD_VECTOR]], [[BUILD_VECTOR1]], [[BUILD_VECTOR2]] + ; GFX10-DENORM-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[FMA]](<3 x s64>) + ; GFX10-DENORM-NEXT: $vgpr0 = COPY [[UV]](s32) + ; GFX10-DENORM-NEXT: $vgpr1 = COPY [[UV1]](s32) + ; GFX10-DENORM-NEXT: $vgpr2 = COPY [[UV2]](s32) + ; GFX10-DENORM-NEXT: $vgpr3 = COPY [[UV3]](s32) + ; GFX10-DENORM-NEXT: $vgpr4 = COPY [[UV4]](s32) + ; GFX10-DENORM-NEXT: $vgpr5 = COPY [[UV5]](s32) + ; GFX10-DENORM-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5 + %4:_(s32) = COPY $vgpr0 + %5:_(s32) = COPY $vgpr1 + %6:_(s32) = COPY $vgpr2 + %7:_(s32) = COPY $vgpr3 + %8:_(s32) = COPY $vgpr4 + %9:_(s32) = COPY $vgpr5 + %22:_(s64) = G_MERGE_VALUES %4(s32), %5(s32) + %23:_(s64) = G_MERGE_VALUES %6(s32), %7(s32) + %24:_(s64) = G_MERGE_VALUES %8(s32), %9(s32) + %0:_(<3 x s64>) = G_BUILD_VECTOR %22(s64), %23(s64), %24(s64) + %10:_(s32) = COPY $vgpr6 + %11:_(s32) = COPY $vgpr7 + %12:_(s32) = COPY $vgpr8 + %13:_(s32) = COPY $vgpr9 + %14:_(s32) = COPY $vgpr10 + %15:_(s32) = COPY $vgpr11 + %25:_(s64) = G_MERGE_VALUES %10(s32), %11(s32) + %26:_(s64) = G_MERGE_VALUES %12(s32), %13(s32) + %27:_(s64) = G_MERGE_VALUES %14(s32), %15(s32) + %1:_(<3 x s64>) = G_BUILD_VECTOR %25(s64), %26(s64), %27(s64) + %16:_(s32) = COPY $vgpr12 + %17:_(s32) = COPY $vgpr13 + %18:_(s32) = COPY $vgpr14 + %19:_(s32) = COPY $vgpr15 + %20:_(s32) = COPY $vgpr16 + %21:_(s32) = COPY $vgpr17 + %28:_(s64) = G_MERGE_VALUES %16(s32), %17(s32) + %29:_(s64) = G_MERGE_VALUES %18(s32), %19(s32) + %30:_(s64) = G_MERGE_VALUES %20(s32), %21(s32) + %2:_(<3 x s64>) = G_BUILD_VECTOR %28(s64), %29(s64), %30(s64) + %31:_(<3 x s64>) = reassoc contract G_FMUL %0, %1 + %32:_(<3 x s64>) = reassoc contract G_FADD %2, %31 + %34:_(s32), %35:_(s32), %36:_(s32), %37:_(s32), %38:_(s32), %39:_(s32) = G_UNMERGE_VALUES %32(<3 x s64>) + $vgpr0 = COPY %34(s32) + $vgpr1 = COPY %35(s32) + $vgpr2 = COPY %36(s32) + $vgpr3 = COPY %37(s32) + $vgpr4 = COPY %38(s32) + $vgpr5 = COPY %39(s32) + S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5 +... diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/combine-fma-add-mul.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/combine-fma-add-mul.ll index 24dd53574f22..3f6e3d81c52a 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/combine-fma-add-mul.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/combine-fma-add-mul.ll @@ -2,11 +2,9 @@ ; RUN: llc -global-isel -mtriple=amdgcn -mcpu=gfx900 < %s | FileCheck -check-prefix=GFX9 %s ; RUN: llc -global-isel -mtriple=amdgcn -mcpu=gfx900 -fp-contract=fast < %s | FileCheck -check-prefix=GFX9-CONTRACT %s ; RUN: llc -global-isel -mtriple=amdgcn -mcpu=gfx900 --denormal-fp-math=preserve-sign < %s | FileCheck -check-prefix=GFX9-DENORM %s -; RUN: llc -global-isel -mtriple=amdgcn -mcpu=gfx900 -enable-unsafe-fp-math < %s | FileCheck -check-prefix=GFX9-UNSAFE %s ; RUN: llc -global-isel -mtriple=amdgcn -mcpu=gfx1010 < %s | FileCheck -check-prefix=GFX10 %s ; RUN: llc -global-isel -mtriple=amdgcn -mcpu=gfx1010 -fp-contract=fast < %s | FileCheck -check-prefix=GFX10-CONTRACT %s ; RUN: llc -global-isel -mtriple=amdgcn -mcpu=gfx1010 --denormal-fp-math=preserve-sign < %s | FileCheck -check-prefix=GFX10-DENORM %s -; RUN: llc -global-isel -mtriple=amdgcn -mcpu=gfx1010 -enable-unsafe-fp-math < %s | FileCheck -check-prefix=GFX10-UNSAFE %s define float @test_f32_add_mul(float %x, float %y, float %z) { ; GFX9-LABEL: test_f32_add_mul: @@ -28,12 +26,6 @@ define float @test_f32_add_mul(float %x, float %y, float %z) { ; GFX9-DENORM-NEXT: v_mad_f32 v0, v0, v1, v2 ; GFX9-DENORM-NEXT: s_setpc_b64 s[30:31] ; -; GFX9-UNSAFE-LABEL: test_f32_add_mul: -; GFX9-UNSAFE: ; %bb.0: ; %.entry -; GFX9-UNSAFE-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-UNSAFE-NEXT: v_fma_f32 v0, v0, v1, v2 -; GFX9-UNSAFE-NEXT: s_setpc_b64 s[30:31] -; ; GFX10-LABEL: test_f32_add_mul: ; GFX10: ; %bb.0: ; %.entry ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -52,7 +44,6 @@ define float @test_f32_add_mul(float %x, float %y, float %z) { ; GFX10-DENORM-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX10-DENORM-NEXT: v_mad_f32 v0, v0, v1, v2 ; GFX10-DENORM-NEXT: s_setpc_b64 s[30:31] -; ; GFX10-UNSAFE-LABEL: test_f32_add_mul: ; GFX10-UNSAFE: ; %bb.0: ; %.entry ; GFX10-UNSAFE-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -64,6 +55,58 @@ define float @test_f32_add_mul(float %x, float %y, float %z) { ret float %b } +define float @test_f32_add_mul_contract(float %x, float %y, float %z) { +; GFX9-LABEL: test_f32_add_mul_contract: +; GFX9: ; %bb.0: ; %.entry +; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-NEXT: v_fma_f32 v0, v0, v1, v2 +; GFX9-NEXT: s_setpc_b64 s[30:31] +; +; GFX9-CONTRACT-LABEL: test_f32_add_mul_contract: +; GFX9-CONTRACT: ; %bb.0: ; %.entry +; GFX9-CONTRACT-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-CONTRACT-NEXT: v_fma_f32 v0, v0, v1, v2 +; GFX9-CONTRACT-NEXT: s_setpc_b64 s[30:31] +; +; GFX9-DENORM-LABEL: test_f32_add_mul_contract: +; GFX9-DENORM: ; %bb.0: ; %.entry +; GFX9-DENORM-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-DENORM-NEXT: v_mad_f32 v0, v0, v1, v2 +; GFX9-DENORM-NEXT: s_setpc_b64 s[30:31] +; +; GFX10-LABEL: test_f32_add_mul_contract: +; GFX10: ; %bb.0: ; %.entry +; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX10-NEXT: v_fma_f32 v0, v0, v1, v2 +; GFX10-NEXT: s_setpc_b64 s[30:31] +; +; GFX10-CONTRACT-LABEL: test_f32_add_mul_contract: +; GFX10-CONTRACT: ; %bb.0: ; %.entry +; GFX10-CONTRACT-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX10-CONTRACT-NEXT: v_fma_f32 v0, v0, v1, v2 +; GFX10-CONTRACT-NEXT: s_setpc_b64 s[30:31] +; +; GFX10-DENORM-LABEL: test_f32_add_mul_contract: +; GFX10-DENORM: ; %bb.0: ; %.entry +; GFX10-DENORM-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX10-DENORM-NEXT: v_fma_f32 v0, v0, v1, v2 +; GFX10-DENORM-NEXT: s_setpc_b64 s[30:31] +; GFX9-UNSAFE-LABEL: test_f32_add_mul_contract: +; GFX9-UNSAFE: ; %bb.0: ; %.entry +; GFX9-UNSAFE-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-UNSAFE-NEXT: v_fma_f32 v0, v0, v1, v2 +; GFX9-UNSAFE-NEXT: s_setpc_b64 s[30:31] +; GFX10-UNSAFE-LABEL: test_f32_add_mul_contract: +; GFX10-UNSAFE: ; %bb.0: ; %.entry +; GFX10-UNSAFE-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX10-UNSAFE-NEXT: v_fma_f32 v0, v0, v1, v2 +; GFX10-UNSAFE-NEXT: s_setpc_b64 s[30:31] +.entry: + %a = fmul contract float %x, %y + %b = fadd contract float %a, %z + ret float %b +} + define float @test_f32_add_mul_rhs(float %x, float %y, float %z) { ; GFX9-LABEL: test_f32_add_mul_rhs: ; GFX9: ; %bb.0: ; %.entry @@ -84,12 +127,6 @@ define float @test_f32_add_mul_rhs(float %x, float %y, float %z) { ; GFX9-DENORM-NEXT: v_mad_f32 v0, v0, v1, v2 ; GFX9-DENORM-NEXT: s_setpc_b64 s[30:31] ; -; GFX9-UNSAFE-LABEL: test_f32_add_mul_rhs: -; GFX9-UNSAFE: ; %bb.0: ; %.entry -; GFX9-UNSAFE-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-UNSAFE-NEXT: v_fma_f32 v0, v0, v1, v2 -; GFX9-UNSAFE-NEXT: s_setpc_b64 s[30:31] -; ; GFX10-LABEL: test_f32_add_mul_rhs: ; GFX10: ; %bb.0: ; %.entry ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -108,7 +145,6 @@ define float @test_f32_add_mul_rhs(float %x, float %y, float %z) { ; GFX10-DENORM-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX10-DENORM-NEXT: v_mad_f32 v0, v0, v1, v2 ; GFX10-DENORM-NEXT: s_setpc_b64 s[30:31] -; ; GFX10-UNSAFE-LABEL: test_f32_add_mul_rhs: ; GFX10-UNSAFE: ; %bb.0: ; %.entry ; GFX10-UNSAFE-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -120,6 +156,58 @@ define float @test_f32_add_mul_rhs(float %x, float %y, float %z) { ret float %b } +define float @test_f32_add_mul_rhs_contract(float %x, float %y, float %z) { +; GFX9-LABEL: test_f32_add_mul_rhs_contract: +; GFX9: ; %bb.0: ; %.entry +; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-NEXT: v_fma_f32 v0, v0, v1, v2 +; GFX9-NEXT: s_setpc_b64 s[30:31] +; +; GFX9-CONTRACT-LABEL: test_f32_add_mul_rhs_contract: +; GFX9-CONTRACT: ; %bb.0: ; %.entry +; GFX9-CONTRACT-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-CONTRACT-NEXT: v_fma_f32 v0, v0, v1, v2 +; GFX9-CONTRACT-NEXT: s_setpc_b64 s[30:31] +; +; GFX9-DENORM-LABEL: test_f32_add_mul_rhs_contract: +; GFX9-DENORM: ; %bb.0: ; %.entry +; GFX9-DENORM-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-DENORM-NEXT: v_mad_f32 v0, v0, v1, v2 +; GFX9-DENORM-NEXT: s_setpc_b64 s[30:31] +; +; GFX10-LABEL: test_f32_add_mul_rhs_contract: +; GFX10: ; %bb.0: ; %.entry +; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX10-NEXT: v_fma_f32 v0, v0, v1, v2 +; GFX10-NEXT: s_setpc_b64 s[30:31] +; +; GFX10-CONTRACT-LABEL: test_f32_add_mul_rhs_contract: +; GFX10-CONTRACT: ; %bb.0: ; %.entry +; GFX10-CONTRACT-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX10-CONTRACT-NEXT: v_fma_f32 v0, v0, v1, v2 +; GFX10-CONTRACT-NEXT: s_setpc_b64 s[30:31] +; +; GFX10-DENORM-LABEL: test_f32_add_mul_rhs_contract: +; GFX10-DENORM: ; %bb.0: ; %.entry +; GFX10-DENORM-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX10-DENORM-NEXT: v_fma_f32 v0, v0, v1, v2 +; GFX10-DENORM-NEXT: s_setpc_b64 s[30:31] +; GFX9-UNSAFE-LABEL: test_f32_add_mul_rhs_contract: +; GFX9-UNSAFE: ; %bb.0: ; %.entry +; GFX9-UNSAFE-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-UNSAFE-NEXT: v_fma_f32 v0, v0, v1, v2 +; GFX9-UNSAFE-NEXT: s_setpc_b64 s[30:31] +; GFX10-UNSAFE-LABEL: test_f32_add_mul_rhs_contract: +; GFX10-UNSAFE: ; %bb.0: ; %.entry +; GFX10-UNSAFE-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX10-UNSAFE-NEXT: v_fma_f32 v0, v0, v1, v2 +; GFX10-UNSAFE-NEXT: s_setpc_b64 s[30:31] +.entry: + %a = fmul contract float %x, %y + %b = fadd contract float %z, %a + ret float %b +} + define float @test_add_mul_multiple_defs_z(float %x, float %y, ptr addrspace(1) %vec_ptr) { ; GFX9-LABEL: test_add_mul_multiple_defs_z: ; GFX9: ; %bb.0: ; %.entry @@ -147,14 +235,6 @@ define float @test_add_mul_multiple_defs_z(float %x, float %y, ptr addrspace(1) ; GFX9-DENORM-NEXT: v_mov_b32_e32 v0, v2 ; GFX9-DENORM-NEXT: s_setpc_b64 s[30:31] ; -; GFX9-UNSAFE-LABEL: test_add_mul_multiple_defs_z: -; GFX9-UNSAFE: ; %bb.0: ; %.entry -; GFX9-UNSAFE-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-UNSAFE-NEXT: global_load_dword v2, v[2:3], off offset:4 -; GFX9-UNSAFE-NEXT: s_waitcnt vmcnt(0) -; GFX9-UNSAFE-NEXT: v_fma_f32 v0, v0, v1, v2 -; GFX9-UNSAFE-NEXT: s_setpc_b64 s[30:31] -; ; GFX10-LABEL: test_add_mul_multiple_defs_z: ; GFX10: ; %bb.0: ; %.entry ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -181,7 +261,6 @@ define float @test_add_mul_multiple_defs_z(float %x, float %y, ptr addrspace(1) ; GFX10-DENORM-NEXT: v_mac_f32_e32 v2, v0, v1 ; GFX10-DENORM-NEXT: v_mov_b32_e32 v0, v2 ; GFX10-DENORM-NEXT: s_setpc_b64 s[30:31] -; ; GFX10-UNSAFE-LABEL: test_add_mul_multiple_defs_z: ; GFX10-UNSAFE: ; %bb.0: ; %.entry ; GFX10-UNSAFE-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -198,6 +277,81 @@ define float @test_add_mul_multiple_defs_z(float %x, float %y, ptr addrspace(1) ret float %b } +define float @test_add_mul_multiple_defs_z_contract(float %x, float %y, ptr addrspace(1) %vec_ptr) { +; GFX9-LABEL: test_add_mul_multiple_defs_z_contract: +; GFX9: ; %bb.0: ; %.entry +; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-NEXT: global_load_dword v2, v[2:3], off offset:4 +; GFX9-NEXT: s_waitcnt vmcnt(0) +; GFX9-NEXT: v_fma_f32 v0, v0, v1, v2 +; GFX9-NEXT: s_setpc_b64 s[30:31] +; +; GFX9-CONTRACT-LABEL: test_add_mul_multiple_defs_z_contract: +; GFX9-CONTRACT: ; %bb.0: ; %.entry +; GFX9-CONTRACT-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-CONTRACT-NEXT: global_load_dword v2, v[2:3], off offset:4 +; GFX9-CONTRACT-NEXT: s_waitcnt vmcnt(0) +; GFX9-CONTRACT-NEXT: v_fma_f32 v0, v0, v1, v2 +; GFX9-CONTRACT-NEXT: s_setpc_b64 s[30:31] +; +; GFX9-DENORM-LABEL: test_add_mul_multiple_defs_z_contract: +; GFX9-DENORM: ; %bb.0: ; %.entry +; GFX9-DENORM-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-DENORM-NEXT: global_load_dword v2, v[2:3], off offset:4 +; GFX9-DENORM-NEXT: s_waitcnt vmcnt(0) +; GFX9-DENORM-NEXT: v_mac_f32_e32 v2, v0, v1 +; GFX9-DENORM-NEXT: v_mov_b32_e32 v0, v2 +; GFX9-DENORM-NEXT: s_setpc_b64 s[30:31] +; +; GFX10-LABEL: test_add_mul_multiple_defs_z_contract: +; GFX10: ; %bb.0: ; %.entry +; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX10-NEXT: global_load_dword v2, v[2:3], off offset:4 +; GFX10-NEXT: s_waitcnt vmcnt(0) +; GFX10-NEXT: v_fmac_f32_e32 v2, v0, v1 +; GFX10-NEXT: v_mov_b32_e32 v0, v2 +; GFX10-NEXT: s_setpc_b64 s[30:31] +; +; GFX10-CONTRACT-LABEL: test_add_mul_multiple_defs_z_contract: +; GFX10-CONTRACT: ; %bb.0: ; %.entry +; GFX10-CONTRACT-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX10-CONTRACT-NEXT: global_load_dword v2, v[2:3], off offset:4 +; GFX10-CONTRACT-NEXT: s_waitcnt vmcnt(0) +; GFX10-CONTRACT-NEXT: v_fmac_f32_e32 v2, v0, v1 +; GFX10-CONTRACT-NEXT: v_mov_b32_e32 v0, v2 +; GFX10-CONTRACT-NEXT: s_setpc_b64 s[30:31] +; +; GFX10-DENORM-LABEL: test_add_mul_multiple_defs_z_contract: +; GFX10-DENORM: ; %bb.0: ; %.entry +; GFX10-DENORM-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX10-DENORM-NEXT: global_load_dword v2, v[2:3], off offset:4 +; GFX10-DENORM-NEXT: s_waitcnt vmcnt(0) +; GFX10-DENORM-NEXT: v_fmac_f32_e32 v2, v0, v1 +; GFX10-DENORM-NEXT: v_mov_b32_e32 v0, v2 +; GFX10-DENORM-NEXT: s_setpc_b64 s[30:31] +; GFX9-UNSAFE-LABEL: test_add_mul_multiple_defs_z_contract: +; GFX9-UNSAFE: ; %bb.0: ; %.entry +; GFX9-UNSAFE-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-UNSAFE-NEXT: global_load_dword v2, v[2:3], off offset:4 +; GFX9-UNSAFE-NEXT: s_waitcnt vmcnt(0) +; GFX9-UNSAFE-NEXT: v_fma_f32 v0, v0, v1, v2 +; GFX9-UNSAFE-NEXT: s_setpc_b64 s[30:31] +; GFX10-UNSAFE-LABEL: test_add_mul_multiple_defs_z_contract: +; GFX10-UNSAFE: ; %bb.0: ; %.entry +; GFX10-UNSAFE-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX10-UNSAFE-NEXT: global_load_dword v2, v[2:3], off offset:4 +; GFX10-UNSAFE-NEXT: s_waitcnt vmcnt(0) +; GFX10-UNSAFE-NEXT: v_fmac_f32_e32 v2, v0, v1 +; GFX10-UNSAFE-NEXT: v_mov_b32_e32 v0, v2 +; GFX10-UNSAFE-NEXT: s_setpc_b64 s[30:31] +.entry: + %a = fmul contract float %x, %y + %vec = load <2 x float>, ptr addrspace(1) %vec_ptr + %z = extractelement <2 x float> %vec, i64 1 + %b = fadd contract float %a, %z + ret float %b +} + define float @test_add_mul_rhs_multiple_defs_z(float %x, float %y, ptr addrspace(1) %vec_ptr) { ; GFX9-LABEL: test_add_mul_rhs_multiple_defs_z: ; GFX9: ; %bb.0: ; %.entry @@ -225,14 +379,6 @@ define float @test_add_mul_rhs_multiple_defs_z(float %x, float %y, ptr addrspace ; GFX9-DENORM-NEXT: v_mov_b32_e32 v0, v2 ; GFX9-DENORM-NEXT: s_setpc_b64 s[30:31] ; -; GFX9-UNSAFE-LABEL: test_add_mul_rhs_multiple_defs_z: -; GFX9-UNSAFE: ; %bb.0: ; %.entry -; GFX9-UNSAFE-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-UNSAFE-NEXT: global_load_dword v2, v[2:3], off offset:4 -; GFX9-UNSAFE-NEXT: s_waitcnt vmcnt(0) -; GFX9-UNSAFE-NEXT: v_fma_f32 v0, v0, v1, v2 -; GFX9-UNSAFE-NEXT: s_setpc_b64 s[30:31] -; ; GFX10-LABEL: test_add_mul_rhs_multiple_defs_z: ; GFX10: ; %bb.0: ; %.entry ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -259,7 +405,6 @@ define float @test_add_mul_rhs_multiple_defs_z(float %x, float %y, ptr addrspace ; GFX10-DENORM-NEXT: v_mac_f32_e32 v2, v0, v1 ; GFX10-DENORM-NEXT: v_mov_b32_e32 v0, v2 ; GFX10-DENORM-NEXT: s_setpc_b64 s[30:31] -; ; GFX10-UNSAFE-LABEL: test_add_mul_rhs_multiple_defs_z: ; GFX10-UNSAFE: ; %bb.0: ; %.entry ; GFX10-UNSAFE-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -296,12 +441,6 @@ define half @test_half_add_mul(half %x, half %y, half %z) { ; GFX9-DENORM-NEXT: v_mad_legacy_f16 v0, v0, v1, v2 ; GFX9-DENORM-NEXT: s_setpc_b64 s[30:31] ; -; GFX9-UNSAFE-LABEL: test_half_add_mul: -; GFX9-UNSAFE: ; %bb.0: ; %.entry -; GFX9-UNSAFE-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-UNSAFE-NEXT: v_fma_f16 v0, v0, v1, v2 -; GFX9-UNSAFE-NEXT: s_setpc_b64 s[30:31] -; ; GFX10-LABEL: test_half_add_mul: ; GFX10: ; %bb.0: ; %.entry ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -321,7 +460,6 @@ define half @test_half_add_mul(half %x, half %y, half %z) { ; GFX10-DENORM-NEXT: v_mul_f16_e32 v0, v0, v1 ; GFX10-DENORM-NEXT: v_add_f16_e32 v0, v0, v2 ; GFX10-DENORM-NEXT: s_setpc_b64 s[30:31] -; ; GFX10-UNSAFE-LABEL: test_half_add_mul: ; GFX10-UNSAFE: ; %bb.0: ; %.entry ; GFX10-UNSAFE-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -333,6 +471,59 @@ define half @test_half_add_mul(half %x, half %y, half %z) { ret half %b } +define half @test_half_add_mul_contract(half %x, half %y, half %z) { +; GFX9-LABEL: test_half_add_mul_contract: +; GFX9: ; %bb.0: ; %.entry +; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-NEXT: v_fma_f16 v0, v0, v1, v2 +; GFX9-NEXT: s_setpc_b64 s[30:31] +; +; GFX9-CONTRACT-LABEL: test_half_add_mul_contract: +; GFX9-CONTRACT: ; %bb.0: ; %.entry +; GFX9-CONTRACT-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-CONTRACT-NEXT: v_fma_f16 v0, v0, v1, v2 +; GFX9-CONTRACT-NEXT: s_setpc_b64 s[30:31] +; +; GFX9-DENORM-LABEL: test_half_add_mul_contract: +; GFX9-DENORM: ; %bb.0: ; %.entry +; GFX9-DENORM-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-DENORM-NEXT: v_mad_legacy_f16 v0, v0, v1, v2 +; GFX9-DENORM-NEXT: s_setpc_b64 s[30:31] +; +; GFX10-LABEL: test_half_add_mul_contract: +; GFX10: ; %bb.0: ; %.entry +; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX10-NEXT: v_fma_f16 v0, v0, v1, v2 +; GFX10-NEXT: s_setpc_b64 s[30:31] +; +; GFX10-CONTRACT-LABEL: test_half_add_mul_contract: +; GFX10-CONTRACT: ; %bb.0: ; %.entry +; GFX10-CONTRACT-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX10-CONTRACT-NEXT: v_fma_f16 v0, v0, v1, v2 +; GFX10-CONTRACT-NEXT: s_setpc_b64 s[30:31] +; +; GFX10-DENORM-LABEL: test_half_add_mul_contract: +; GFX10-DENORM: ; %bb.0: ; %.entry +; GFX10-DENORM-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX10-DENORM-NEXT: v_mul_f16_e32 v0, v0, v1 +; GFX10-DENORM-NEXT: v_add_f16_e32 v0, v0, v2 +; GFX10-DENORM-NEXT: s_setpc_b64 s[30:31] +; GFX9-UNSAFE-LABEL: test_half_add_mul_contract: +; GFX9-UNSAFE: ; %bb.0: ; %.entry +; GFX9-UNSAFE-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-UNSAFE-NEXT: v_fma_f16 v0, v0, v1, v2 +; GFX9-UNSAFE-NEXT: s_setpc_b64 s[30:31] +; GFX10-UNSAFE-LABEL: test_half_add_mul_contract: +; GFX10-UNSAFE: ; %bb.0: ; %.entry +; GFX10-UNSAFE-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX10-UNSAFE-NEXT: v_fma_f16 v0, v0, v1, v2 +; GFX10-UNSAFE-NEXT: s_setpc_b64 s[30:31] +.entry: + %a = fmul contract half %x, %y + %b = fadd contract half %a, %z + ret half %b +} + define half @test_half_add_mul_rhs(half %x, half %y, half %z) { ; GFX9-LABEL: test_half_add_mul_rhs: ; GFX9: ; %bb.0: ; %.entry @@ -353,12 +544,6 @@ define half @test_half_add_mul_rhs(half %x, half %y, half %z) { ; GFX9-DENORM-NEXT: v_mad_legacy_f16 v0, v0, v1, v2 ; GFX9-DENORM-NEXT: s_setpc_b64 s[30:31] ; -; GFX9-UNSAFE-LABEL: test_half_add_mul_rhs: -; GFX9-UNSAFE: ; %bb.0: ; %.entry -; GFX9-UNSAFE-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-UNSAFE-NEXT: v_fma_f16 v0, v0, v1, v2 -; GFX9-UNSAFE-NEXT: s_setpc_b64 s[30:31] -; ; GFX10-LABEL: test_half_add_mul_rhs: ; GFX10: ; %bb.0: ; %.entry ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -378,7 +563,6 @@ define half @test_half_add_mul_rhs(half %x, half %y, half %z) { ; GFX10-DENORM-NEXT: v_mul_f16_e32 v0, v0, v1 ; GFX10-DENORM-NEXT: v_add_f16_e32 v0, v2, v0 ; GFX10-DENORM-NEXT: s_setpc_b64 s[30:31] -; ; GFX10-UNSAFE-LABEL: test_half_add_mul_rhs: ; GFX10-UNSAFE: ; %bb.0: ; %.entry ; GFX10-UNSAFE-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -390,6 +574,59 @@ define half @test_half_add_mul_rhs(half %x, half %y, half %z) { ret half %b } +define half @test_half_add_mul_rhs_contract(half %x, half %y, half %z) { +; GFX9-LABEL: test_half_add_mul_rhs_contract: +; GFX9: ; %bb.0: ; %.entry +; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-NEXT: v_fma_f16 v0, v0, v1, v2 +; GFX9-NEXT: s_setpc_b64 s[30:31] +; +; GFX9-CONTRACT-LABEL: test_half_add_mul_rhs_contract: +; GFX9-CONTRACT: ; %bb.0: ; %.entry +; GFX9-CONTRACT-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-CONTRACT-NEXT: v_fma_f16 v0, v0, v1, v2 +; GFX9-CONTRACT-NEXT: s_setpc_b64 s[30:31] +; +; GFX9-DENORM-LABEL: test_half_add_mul_rhs_contract: +; GFX9-DENORM: ; %bb.0: ; %.entry +; GFX9-DENORM-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-DENORM-NEXT: v_mad_legacy_f16 v0, v0, v1, v2 +; GFX9-DENORM-NEXT: s_setpc_b64 s[30:31] +; +; GFX10-LABEL: test_half_add_mul_rhs_contract: +; GFX10: ; %bb.0: ; %.entry +; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX10-NEXT: v_fma_f16 v0, v0, v1, v2 +; GFX10-NEXT: s_setpc_b64 s[30:31] +; +; GFX10-CONTRACT-LABEL: test_half_add_mul_rhs_contract: +; GFX10-CONTRACT: ; %bb.0: ; %.entry +; GFX10-CONTRACT-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX10-CONTRACT-NEXT: v_fma_f16 v0, v0, v1, v2 +; GFX10-CONTRACT-NEXT: s_setpc_b64 s[30:31] +; +; GFX10-DENORM-LABEL: test_half_add_mul_rhs_contract: +; GFX10-DENORM: ; %bb.0: ; %.entry +; GFX10-DENORM-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX10-DENORM-NEXT: v_mul_f16_e32 v0, v0, v1 +; GFX10-DENORM-NEXT: v_add_f16_e32 v0, v2, v0 +; GFX10-DENORM-NEXT: s_setpc_b64 s[30:31] +; GFX9-UNSAFE-LABEL: test_half_add_mul_rhs_contract: +; GFX9-UNSAFE: ; %bb.0: ; %.entry +; GFX9-UNSAFE-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-UNSAFE-NEXT: v_fma_f16 v0, v0, v1, v2 +; GFX9-UNSAFE-NEXT: s_setpc_b64 s[30:31] +; GFX10-UNSAFE-LABEL: test_half_add_mul_rhs_contract: +; GFX10-UNSAFE: ; %bb.0: ; %.entry +; GFX10-UNSAFE-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX10-UNSAFE-NEXT: v_fma_f16 v0, v0, v1, v2 +; GFX10-UNSAFE-NEXT: s_setpc_b64 s[30:31] +.entry: + %a = fmul contract half %x, %y + %b = fadd contract half %z, %a + ret half %b +} + define double @test_double_add_mul(double %x, double %y, double %z) { ; GFX9-LABEL: test_double_add_mul: ; GFX9: ; %bb.0: ; %.entry @@ -411,12 +648,6 @@ define double @test_double_add_mul(double %x, double %y, double %z) { ; GFX9-DENORM-NEXT: v_add_f64 v[0:1], v[0:1], v[4:5] ; GFX9-DENORM-NEXT: s_setpc_b64 s[30:31] ; -; GFX9-UNSAFE-LABEL: test_double_add_mul: -; GFX9-UNSAFE: ; %bb.0: ; %.entry -; GFX9-UNSAFE-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-UNSAFE-NEXT: v_fma_f64 v[0:1], v[0:1], v[2:3], v[4:5] -; GFX9-UNSAFE-NEXT: s_setpc_b64 s[30:31] -; ; GFX10-LABEL: test_double_add_mul: ; GFX10: ; %bb.0: ; %.entry ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -436,15 +667,61 @@ define double @test_double_add_mul(double %x, double %y, double %z) { ; GFX10-DENORM-NEXT: v_mul_f64 v[0:1], v[0:1], v[2:3] ; GFX10-DENORM-NEXT: v_add_f64 v[0:1], v[0:1], v[4:5] ; GFX10-DENORM-NEXT: s_setpc_b64 s[30:31] +.entry: + %a = fmul double %x, %y + %b = fadd double %a, %z + ret double %b +} + +define double @test_double_add_mul_contract(double %x, double %y, double %z) { +; GFX9-LABEL: test_double_add_mul_contract: +; GFX9: ; %bb.0: ; %.entry +; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-NEXT: v_fma_f64 v[0:1], v[0:1], v[2:3], v[4:5] +; GFX9-NEXT: s_setpc_b64 s[30:31] ; +; GFX9-CONTRACT-LABEL: test_double_add_mul_contract: +; GFX9-CONTRACT: ; %bb.0: ; %.entry +; GFX9-CONTRACT-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-CONTRACT-NEXT: v_fma_f64 v[0:1], v[0:1], v[2:3], v[4:5] +; GFX9-CONTRACT-NEXT: s_setpc_b64 s[30:31] +; +; GFX9-DENORM-LABEL: test_double_add_mul_contract: +; GFX9-DENORM: ; %bb.0: ; %.entry +; GFX9-DENORM-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-DENORM-NEXT: v_fma_f64 v[0:1], v[0:1], v[2:3], v[4:5] +; GFX9-DENORM-NEXT: s_setpc_b64 s[30:31] +; +; GFX10-LABEL: test_double_add_mul_contract: +; GFX10: ; %bb.0: ; %.entry +; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX10-NEXT: v_fma_f64 v[0:1], v[0:1], v[2:3], v[4:5] +; GFX10-NEXT: s_setpc_b64 s[30:31] +; +; GFX10-CONTRACT-LABEL: test_double_add_mul_contract: +; GFX10-CONTRACT: ; %bb.0: ; %.entry +; GFX10-CONTRACT-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX10-CONTRACT-NEXT: v_fma_f64 v[0:1], v[0:1], v[2:3], v[4:5] +; GFX10-CONTRACT-NEXT: s_setpc_b64 s[30:31] +; +; GFX10-DENORM-LABEL: test_double_add_mul_contract: +; GFX10-DENORM: ; %bb.0: ; %.entry +; GFX10-DENORM-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX10-DENORM-NEXT: v_fma_f64 v[0:1], v[0:1], v[2:3], v[4:5] +; GFX10-DENORM-NEXT: s_setpc_b64 s[30:31] +; GFX9-UNSAFE-LABEL: test_double_add_mul_contract: +; GFX9-UNSAFE: ; %bb.0: ; %.entry +; GFX9-UNSAFE-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-UNSAFE-NEXT: v_fma_f64 v[0:1], v[0:1], v[2:3], v[4:5] +; GFX9-UNSAFE-NEXT: s_setpc_b64 s[30:31] ; GFX10-UNSAFE-LABEL: test_double_add_mul: ; GFX10-UNSAFE: ; %bb.0: ; %.entry ; GFX10-UNSAFE-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX10-UNSAFE-NEXT: v_fma_f64 v[0:1], v[0:1], v[2:3], v[4:5] ; GFX10-UNSAFE-NEXT: s_setpc_b64 s[30:31] .entry: - %a = fmul double %x, %y - %b = fadd double %a, %z + %a = fmul contract double %x, %y + %b = fadd contract double %a, %z ret double %b } @@ -469,12 +746,6 @@ define double @test_double_add_mul_rhs(double %x, double %y, double %z) { ; GFX9-DENORM-NEXT: v_add_f64 v[0:1], v[4:5], v[0:1] ; GFX9-DENORM-NEXT: s_setpc_b64 s[30:31] ; -; GFX9-UNSAFE-LABEL: test_double_add_mul_rhs: -; GFX9-UNSAFE: ; %bb.0: ; %.entry -; GFX9-UNSAFE-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-UNSAFE-NEXT: v_fma_f64 v[0:1], v[0:1], v[2:3], v[4:5] -; GFX9-UNSAFE-NEXT: s_setpc_b64 s[30:31] -; ; GFX10-LABEL: test_double_add_mul_rhs: ; GFX10: ; %bb.0: ; %.entry ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -494,15 +765,61 @@ define double @test_double_add_mul_rhs(double %x, double %y, double %z) { ; GFX10-DENORM-NEXT: v_mul_f64 v[0:1], v[0:1], v[2:3] ; GFX10-DENORM-NEXT: v_add_f64 v[0:1], v[4:5], v[0:1] ; GFX10-DENORM-NEXT: s_setpc_b64 s[30:31] +.entry: + %a = fmul double %x, %y + %b = fadd double %z, %a + ret double %b +} + +define double @test_double_add_mul_rhs_contract(double %x, double %y, double %z) { +; GFX9-LABEL: test_double_add_mul_rhs_contract: +; GFX9: ; %bb.0: ; %.entry +; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-NEXT: v_fma_f64 v[0:1], v[0:1], v[2:3], v[4:5] +; GFX9-NEXT: s_setpc_b64 s[30:31] ; -; GFX10-UNSAFE-LABEL: test_double_add_mul_rhs: +; GFX9-CONTRACT-LABEL: test_double_add_mul_rhs_contract: +; GFX9-CONTRACT: ; %bb.0: ; %.entry +; GFX9-CONTRACT-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-CONTRACT-NEXT: v_fma_f64 v[0:1], v[0:1], v[2:3], v[4:5] +; GFX9-CONTRACT-NEXT: s_setpc_b64 s[30:31] +; +; GFX9-DENORM-LABEL: test_double_add_mul_rhs_contract: +; GFX9-DENORM: ; %bb.0: ; %.entry +; GFX9-DENORM-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-DENORM-NEXT: v_fma_f64 v[0:1], v[0:1], v[2:3], v[4:5] +; GFX9-DENORM-NEXT: s_setpc_b64 s[30:31] +; +; GFX10-LABEL: test_double_add_mul_rhs_contract: +; GFX10: ; %bb.0: ; %.entry +; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX10-NEXT: v_fma_f64 v[0:1], v[0:1], v[2:3], v[4:5] +; GFX10-NEXT: s_setpc_b64 s[30:31] +; +; GFX10-CONTRACT-LABEL: test_double_add_mul_rhs_contract: +; GFX10-CONTRACT: ; %bb.0: ; %.entry +; GFX10-CONTRACT-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX10-CONTRACT-NEXT: v_fma_f64 v[0:1], v[0:1], v[2:3], v[4:5] +; GFX10-CONTRACT-NEXT: s_setpc_b64 s[30:31] +; +; GFX10-DENORM-LABEL: test_double_add_mul_rhs_contract: +; GFX10-DENORM: ; %bb.0: ; %.entry +; GFX10-DENORM-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX10-DENORM-NEXT: v_fma_f64 v[0:1], v[0:1], v[2:3], v[4:5] +; GFX10-DENORM-NEXT: s_setpc_b64 s[30:31] +; GFX9-UNSAFE-LABEL: test_double_add_mul_rhs_contract: +; GFX9-UNSAFE: ; %bb.0: ; %.entry +; GFX9-UNSAFE-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-UNSAFE-NEXT: v_fma_f64 v[0:1], v[0:1], v[2:3], v[4:5] +; GFX9-UNSAFE-NEXT: s_setpc_b64 s[30:31] +; GFX10-UNSAFE-LABEL: test_double_add_mul_rhs_contract: ; GFX10-UNSAFE: ; %bb.0: ; %.entry ; GFX10-UNSAFE-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX10-UNSAFE-NEXT: v_fma_f64 v[0:1], v[0:1], v[2:3], v[4:5] ; GFX10-UNSAFE-NEXT: s_setpc_b64 s[30:31] .entry: - %a = fmul double %x, %y - %b = fadd double %z, %a + %a = fmul contract double %x, %y + %b = fadd contract double %z, %a ret double %b } @@ -538,15 +855,6 @@ define <4 x float> @test_4xfloat_add_mul(<4 x float> %x, <4 x float> %y, <4 x fl ; GFX9-DENORM-NEXT: v_mad_f32 v3, v3, v7, v11 ; GFX9-DENORM-NEXT: s_setpc_b64 s[30:31] ; -; GFX9-UNSAFE-LABEL: test_4xfloat_add_mul: -; GFX9-UNSAFE: ; %bb.0: ; %.entry -; GFX9-UNSAFE-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-UNSAFE-NEXT: v_fma_f32 v0, v0, v4, v8 -; GFX9-UNSAFE-NEXT: v_fma_f32 v1, v1, v5, v9 -; GFX9-UNSAFE-NEXT: v_fma_f32 v2, v2, v6, v10 -; GFX9-UNSAFE-NEXT: v_fma_f32 v3, v3, v7, v11 -; GFX9-UNSAFE-NEXT: s_setpc_b64 s[30:31] -; ; GFX10-LABEL: test_4xfloat_add_mul: ; GFX10: ; %bb.0: ; %.entry ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -577,8 +885,75 @@ define <4 x float> @test_4xfloat_add_mul(<4 x float> %x, <4 x float> %y, <4 x fl ; GFX10-DENORM-NEXT: v_mad_f32 v2, v2, v6, v10 ; GFX10-DENORM-NEXT: v_mad_f32 v3, v3, v7, v11 ; GFX10-DENORM-NEXT: s_setpc_b64 s[30:31] +.entry: + %a = fmul <4 x float> %x, %y + %b = fadd <4 x float> %a, %z + ret <4 x float> %b +} + +define <4 x float> @test_4xfloat_add_mul_contract(<4 x float> %x, <4 x float> %y, <4 x float> %z) { +; GFX9-LABEL: test_4xfloat_add_mul_contract: +; GFX9: ; %bb.0: ; %.entry +; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-NEXT: v_fma_f32 v0, v0, v4, v8 +; GFX9-NEXT: v_fma_f32 v1, v1, v5, v9 +; GFX9-NEXT: v_fma_f32 v2, v2, v6, v10 +; GFX9-NEXT: v_fma_f32 v3, v3, v7, v11 +; GFX9-NEXT: s_setpc_b64 s[30:31] ; -; GFX10-UNSAFE-LABEL: test_4xfloat_add_mul: +; GFX9-CONTRACT-LABEL: test_4xfloat_add_mul_contract: +; GFX9-CONTRACT: ; %bb.0: ; %.entry +; GFX9-CONTRACT-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-CONTRACT-NEXT: v_fma_f32 v0, v0, v4, v8 +; GFX9-CONTRACT-NEXT: v_fma_f32 v1, v1, v5, v9 +; GFX9-CONTRACT-NEXT: v_fma_f32 v2, v2, v6, v10 +; GFX9-CONTRACT-NEXT: v_fma_f32 v3, v3, v7, v11 +; GFX9-CONTRACT-NEXT: s_setpc_b64 s[30:31] +; +; GFX9-DENORM-LABEL: test_4xfloat_add_mul_contract: +; GFX9-DENORM: ; %bb.0: ; %.entry +; GFX9-DENORM-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-DENORM-NEXT: v_mad_f32 v0, v0, v4, v8 +; GFX9-DENORM-NEXT: v_mad_f32 v1, v1, v5, v9 +; GFX9-DENORM-NEXT: v_mad_f32 v2, v2, v6, v10 +; GFX9-DENORM-NEXT: v_mad_f32 v3, v3, v7, v11 +; GFX9-DENORM-NEXT: s_setpc_b64 s[30:31] +; +; GFX10-LABEL: test_4xfloat_add_mul_contract: +; GFX10: ; %bb.0: ; %.entry +; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX10-NEXT: v_fma_f32 v0, v0, v4, v8 +; GFX10-NEXT: v_fma_f32 v1, v1, v5, v9 +; GFX10-NEXT: v_fma_f32 v2, v2, v6, v10 +; GFX10-NEXT: v_fma_f32 v3, v3, v7, v11 +; GFX10-NEXT: s_setpc_b64 s[30:31] +; +; GFX10-CONTRACT-LABEL: test_4xfloat_add_mul_contract: +; GFX10-CONTRACT: ; %bb.0: ; %.entry +; GFX10-CONTRACT-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX10-CONTRACT-NEXT: v_fma_f32 v0, v0, v4, v8 +; GFX10-CONTRACT-NEXT: v_fma_f32 v1, v1, v5, v9 +; GFX10-CONTRACT-NEXT: v_fma_f32 v2, v2, v6, v10 +; GFX10-CONTRACT-NEXT: v_fma_f32 v3, v3, v7, v11 +; GFX10-CONTRACT-NEXT: s_setpc_b64 s[30:31] +; +; GFX10-DENORM-LABEL: test_4xfloat_add_mul_contract: +; GFX10-DENORM: ; %bb.0: ; %.entry +; GFX10-DENORM-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX10-DENORM-NEXT: v_fma_f32 v0, v0, v4, v8 +; GFX10-DENORM-NEXT: v_fma_f32 v1, v1, v5, v9 +; GFX10-DENORM-NEXT: v_fma_f32 v2, v2, v6, v10 +; GFX10-DENORM-NEXT: v_fma_f32 v3, v3, v7, v11 +; GFX10-DENORM-NEXT: s_setpc_b64 s[30:31] +; GFX9-UNSAFE-LABEL: test_4xfloat_add_mul_contract: +; GFX9-UNSAFE: ; %bb.0: ; %.entry +; GFX9-UNSAFE-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-UNSAFE-NEXT: v_fma_f32 v0, v0, v4, v8 +; GFX9-UNSAFE-NEXT: v_fma_f32 v1, v1, v5, v9 +; GFX9-UNSAFE-NEXT: v_fma_f32 v2, v2, v6, v10 +; GFX9-UNSAFE-NEXT: v_fma_f32 v3, v3, v7, v11 +; GFX9-UNSAFE-NEXT: s_setpc_b64 s[30:31] +; GFX10-UNSAFE-LABEL: test_4xfloat_add_mul_contract: ; GFX10-UNSAFE: ; %bb.0: ; %.entry ; GFX10-UNSAFE-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX10-UNSAFE-NEXT: v_fma_f32 v0, v0, v4, v8 @@ -587,8 +962,8 @@ define <4 x float> @test_4xfloat_add_mul(<4 x float> %x, <4 x float> %y, <4 x fl ; GFX10-UNSAFE-NEXT: v_fma_f32 v3, v3, v7, v11 ; GFX10-UNSAFE-NEXT: s_setpc_b64 s[30:31] .entry: - %a = fmul <4 x float> %x, %y - %b = fadd <4 x float> %a, %z + %a = fmul contract <4 x float> %x, %y + %b = fadd contract <4 x float> %a, %z ret <4 x float> %b } @@ -620,14 +995,6 @@ define <3 x float> @test_3xfloat_add_mul_rhs(<3 x float> %x, <3 x float> %y, <3 ; GFX9-DENORM-NEXT: v_mad_f32 v2, v2, v5, v8 ; GFX9-DENORM-NEXT: s_setpc_b64 s[30:31] ; -; GFX9-UNSAFE-LABEL: test_3xfloat_add_mul_rhs: -; GFX9-UNSAFE: ; %bb.0: ; %.entry -; GFX9-UNSAFE-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-UNSAFE-NEXT: v_fma_f32 v0, v0, v3, v6 -; GFX9-UNSAFE-NEXT: v_fma_f32 v1, v1, v4, v7 -; GFX9-UNSAFE-NEXT: v_fma_f32 v2, v2, v5, v8 -; GFX9-UNSAFE-NEXT: s_setpc_b64 s[30:31] -; ; GFX10-LABEL: test_3xfloat_add_mul_rhs: ; GFX10: ; %bb.0: ; %.entry ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -654,8 +1021,68 @@ define <3 x float> @test_3xfloat_add_mul_rhs(<3 x float> %x, <3 x float> %y, <3 ; GFX10-DENORM-NEXT: v_mad_f32 v1, v1, v4, v7 ; GFX10-DENORM-NEXT: v_mad_f32 v2, v2, v5, v8 ; GFX10-DENORM-NEXT: s_setpc_b64 s[30:31] +.entry: + %a = fmul <3 x float> %x, %y + %b = fadd <3 x float> %z, %a + ret <3 x float> %b +} + +define <3 x float> @test_3xfloat_add_mul_rhs_contract(<3 x float> %x, <3 x float> %y, <3 x float> %z) { +; GFX9-LABEL: test_3xfloat_add_mul_rhs_contract: +; GFX9: ; %bb.0: ; %.entry +; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-NEXT: v_fma_f32 v0, v0, v3, v6 +; GFX9-NEXT: v_fma_f32 v1, v1, v4, v7 +; GFX9-NEXT: v_fma_f32 v2, v2, v5, v8 +; GFX9-NEXT: s_setpc_b64 s[30:31] ; -; GFX10-UNSAFE-LABEL: test_3xfloat_add_mul_rhs: +; GFX9-CONTRACT-LABEL: test_3xfloat_add_mul_rhs_contract: +; GFX9-CONTRACT: ; %bb.0: ; %.entry +; GFX9-CONTRACT-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-CONTRACT-NEXT: v_fma_f32 v0, v0, v3, v6 +; GFX9-CONTRACT-NEXT: v_fma_f32 v1, v1, v4, v7 +; GFX9-CONTRACT-NEXT: v_fma_f32 v2, v2, v5, v8 +; GFX9-CONTRACT-NEXT: s_setpc_b64 s[30:31] +; +; GFX9-DENORM-LABEL: test_3xfloat_add_mul_rhs_contract: +; GFX9-DENORM: ; %bb.0: ; %.entry +; GFX9-DENORM-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-DENORM-NEXT: v_mad_f32 v0, v0, v3, v6 +; GFX9-DENORM-NEXT: v_mad_f32 v1, v1, v4, v7 +; GFX9-DENORM-NEXT: v_mad_f32 v2, v2, v5, v8 +; GFX9-DENORM-NEXT: s_setpc_b64 s[30:31] +; +; GFX10-LABEL: test_3xfloat_add_mul_rhs_contract: +; GFX10: ; %bb.0: ; %.entry +; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX10-NEXT: v_fma_f32 v0, v0, v3, v6 +; GFX10-NEXT: v_fma_f32 v1, v1, v4, v7 +; GFX10-NEXT: v_fma_f32 v2, v2, v5, v8 +; GFX10-NEXT: s_setpc_b64 s[30:31] +; +; GFX10-CONTRACT-LABEL: test_3xfloat_add_mul_rhs_contract: +; GFX10-CONTRACT: ; %bb.0: ; %.entry +; GFX10-CONTRACT-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX10-CONTRACT-NEXT: v_fma_f32 v0, v0, v3, v6 +; GFX10-CONTRACT-NEXT: v_fma_f32 v1, v1, v4, v7 +; GFX10-CONTRACT-NEXT: v_fma_f32 v2, v2, v5, v8 +; GFX10-CONTRACT-NEXT: s_setpc_b64 s[30:31] +; +; GFX10-DENORM-LABEL: test_3xfloat_add_mul_rhs_contract: +; GFX10-DENORM: ; %bb.0: ; %.entry +; GFX10-DENORM-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX10-DENORM-NEXT: v_fma_f32 v0, v0, v3, v6 +; GFX10-DENORM-NEXT: v_fma_f32 v1, v1, v4, v7 +; GFX10-DENORM-NEXT: v_fma_f32 v2, v2, v5, v8 +; GFX10-DENORM-NEXT: s_setpc_b64 s[30:31] +; GFX9-UNSAFE-LABEL: test_3xfloat_add_mul_rhs_contract: +; GFX9-UNSAFE: ; %bb.0: ; %.entry +; GFX9-UNSAFE-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-UNSAFE-NEXT: v_fma_f32 v0, v0, v3, v6 +; GFX9-UNSAFE-NEXT: v_fma_f32 v1, v1, v4, v7 +; GFX9-UNSAFE-NEXT: v_fma_f32 v2, v2, v5, v8 +; GFX9-UNSAFE-NEXT: s_setpc_b64 s[30:31] +; GFX10-UNSAFE-LABEL: test_3xfloat_add_mul_rhs_contract: ; GFX10-UNSAFE: ; %bb.0: ; %.entry ; GFX10-UNSAFE-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX10-UNSAFE-NEXT: v_fma_f32 v0, v0, v3, v6 @@ -663,8 +1090,8 @@ define <3 x float> @test_3xfloat_add_mul_rhs(<3 x float> %x, <3 x float> %y, <3 ; GFX10-UNSAFE-NEXT: v_fma_f32 v2, v2, v5, v8 ; GFX10-UNSAFE-NEXT: s_setpc_b64 s[30:31] .entry: - %a = fmul <3 x float> %x, %y - %b = fadd <3 x float> %z, %a + %a = fmul contract <3 x float> %x, %y + %b = fadd contract <3 x float> %z, %a ret <3 x float> %b } @@ -694,13 +1121,6 @@ define <4 x half> @test_4xhalf_add_mul(<4 x half> %x, <4 x half> %y, <4 x half> ; GFX9-DENORM-NEXT: v_pk_add_f16 v1, v1, v5 ; GFX9-DENORM-NEXT: s_setpc_b64 s[30:31] ; -; GFX9-UNSAFE-LABEL: test_4xhalf_add_mul: -; GFX9-UNSAFE: ; %bb.0: ; %.entry -; GFX9-UNSAFE-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-UNSAFE-NEXT: v_pk_fma_f16 v0, v0, v2, v4 -; GFX9-UNSAFE-NEXT: v_pk_fma_f16 v1, v1, v3, v5 -; GFX9-UNSAFE-NEXT: s_setpc_b64 s[30:31] -; ; GFX10-LABEL: test_4xhalf_add_mul: ; GFX10: ; %bb.0: ; %.entry ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -725,7 +1145,6 @@ define <4 x half> @test_4xhalf_add_mul(<4 x half> %x, <4 x half> %y, <4 x half> ; GFX10-DENORM-NEXT: v_pk_add_f16 v0, v0, v4 ; GFX10-DENORM-NEXT: v_pk_add_f16 v1, v1, v5 ; GFX10-DENORM-NEXT: s_setpc_b64 s[30:31] -; ; GFX10-UNSAFE-LABEL: test_4xhalf_add_mul: ; GFX10-UNSAFE: ; %bb.0: ; %.entry ; GFX10-UNSAFE-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -738,6 +1157,70 @@ define <4 x half> @test_4xhalf_add_mul(<4 x half> %x, <4 x half> %y, <4 x half> ret <4 x half> %b } +define <4 x half> @test_4xhalf_add_mul_contract(<4 x half> %x, <4 x half> %y, <4 x half> %z) { +; GFX9-LABEL: test_4xhalf_add_mul_contract: +; GFX9: ; %bb.0: ; %.entry +; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-NEXT: v_pk_fma_f16 v0, v0, v2, v4 +; GFX9-NEXT: v_pk_fma_f16 v1, v1, v3, v5 +; GFX9-NEXT: s_setpc_b64 s[30:31] +; +; GFX9-CONTRACT-LABEL: test_4xhalf_add_mul_contract: +; GFX9-CONTRACT: ; %bb.0: ; %.entry +; GFX9-CONTRACT-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-CONTRACT-NEXT: v_pk_fma_f16 v0, v0, v2, v4 +; GFX9-CONTRACT-NEXT: v_pk_fma_f16 v1, v1, v3, v5 +; GFX9-CONTRACT-NEXT: s_setpc_b64 s[30:31] +; +; GFX9-DENORM-LABEL: test_4xhalf_add_mul_contract: +; GFX9-DENORM: ; %bb.0: ; %.entry +; GFX9-DENORM-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-DENORM-NEXT: v_pk_mul_f16 v0, v0, v2 +; GFX9-DENORM-NEXT: v_pk_mul_f16 v1, v1, v3 +; GFX9-DENORM-NEXT: v_pk_add_f16 v0, v0, v4 +; GFX9-DENORM-NEXT: v_pk_add_f16 v1, v1, v5 +; GFX9-DENORM-NEXT: s_setpc_b64 s[30:31] +; +; GFX10-LABEL: test_4xhalf_add_mul_contract: +; GFX10: ; %bb.0: ; %.entry +; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX10-NEXT: v_pk_fma_f16 v0, v0, v2, v4 +; GFX10-NEXT: v_pk_fma_f16 v1, v1, v3, v5 +; GFX10-NEXT: s_setpc_b64 s[30:31] +; +; GFX10-CONTRACT-LABEL: test_4xhalf_add_mul_contract: +; GFX10-CONTRACT: ; %bb.0: ; %.entry +; GFX10-CONTRACT-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX10-CONTRACT-NEXT: v_pk_fma_f16 v0, v0, v2, v4 +; GFX10-CONTRACT-NEXT: v_pk_fma_f16 v1, v1, v3, v5 +; GFX10-CONTRACT-NEXT: s_setpc_b64 s[30:31] +; +; GFX10-DENORM-LABEL: test_4xhalf_add_mul_contract: +; GFX10-DENORM: ; %bb.0: ; %.entry +; GFX10-DENORM-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX10-DENORM-NEXT: v_pk_mul_f16 v0, v0, v2 +; GFX10-DENORM-NEXT: v_pk_mul_f16 v1, v1, v3 +; GFX10-DENORM-NEXT: v_pk_add_f16 v0, v0, v4 +; GFX10-DENORM-NEXT: v_pk_add_f16 v1, v1, v5 +; GFX10-DENORM-NEXT: s_setpc_b64 s[30:31] +; GFX9-UNSAFE-LABEL: test_4xhalf_add_mul_contract: +; GFX9-UNSAFE: ; %bb.0: ; %.entry +; GFX9-UNSAFE-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-UNSAFE-NEXT: v_pk_fma_f16 v0, v0, v2, v4 +; GFX9-UNSAFE-NEXT: v_pk_fma_f16 v1, v1, v3, v5 +; GFX9-UNSAFE-NEXT: s_setpc_b64 s[30:31] +; GFX10-UNSAFE-LABEL: test_4xhalf_add_mul_contract: +; GFX10-UNSAFE: ; %bb.0: ; %.entry +; GFX10-UNSAFE-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX10-UNSAFE-NEXT: v_pk_fma_f16 v0, v0, v2, v4 +; GFX10-UNSAFE-NEXT: v_pk_fma_f16 v1, v1, v3, v5 +; GFX10-UNSAFE-NEXT: s_setpc_b64 s[30:31] +.entry: + %a = fmul contract <4 x half> %x, %y + %b = fadd contract <4 x half> %a, %z + ret <4 x half> %b +} + define <3 x half> @test_3xhalf_add_mul_rhs(<3 x half> %x, <3 x half> %y, <3 x half> %z) { ; GFX9-LABEL: test_3xhalf_add_mul_rhs: ; GFX9: ; %bb.0: ; %.entry @@ -764,13 +1247,6 @@ define <3 x half> @test_3xhalf_add_mul_rhs(<3 x half> %x, <3 x half> %y, <3 x ha ; GFX9-DENORM-NEXT: v_pk_add_f16 v1, v5, v1 ; GFX9-DENORM-NEXT: s_setpc_b64 s[30:31] ; -; GFX9-UNSAFE-LABEL: test_3xhalf_add_mul_rhs: -; GFX9-UNSAFE: ; %bb.0: ; %.entry -; GFX9-UNSAFE-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-UNSAFE-NEXT: v_pk_fma_f16 v0, v0, v2, v4 -; GFX9-UNSAFE-NEXT: v_pk_fma_f16 v1, v1, v3, v5 -; GFX9-UNSAFE-NEXT: s_setpc_b64 s[30:31] -; ; GFX10-LABEL: test_3xhalf_add_mul_rhs: ; GFX10: ; %bb.0: ; %.entry ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -795,16 +1271,73 @@ define <3 x half> @test_3xhalf_add_mul_rhs(<3 x half> %x, <3 x half> %y, <3 x ha ; GFX10-DENORM-NEXT: v_pk_add_f16 v0, v4, v0 ; GFX10-DENORM-NEXT: v_pk_add_f16 v1, v5, v1 ; GFX10-DENORM-NEXT: s_setpc_b64 s[30:31] +.entry: + %a = fmul <3 x half> %x, %y + %b = fadd <3 x half> %z, %a + ret <3 x half> %b +} + +define <3 x half> @test_3xhalf_add_mul_rhs_contract(<3 x half> %x, <3 x half> %y, <3 x half> %z) { +; GFX9-LABEL: test_3xhalf_add_mul_rhs_contract: +; GFX9: ; %bb.0: ; %.entry +; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-NEXT: v_pk_fma_f16 v0, v0, v2, v4 +; GFX9-NEXT: v_pk_fma_f16 v1, v1, v3, v5 +; GFX9-NEXT: s_setpc_b64 s[30:31] ; -; GFX10-UNSAFE-LABEL: test_3xhalf_add_mul_rhs: +; GFX9-CONTRACT-LABEL: test_3xhalf_add_mul_rhs_contract: +; GFX9-CONTRACT: ; %bb.0: ; %.entry +; GFX9-CONTRACT-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-CONTRACT-NEXT: v_pk_fma_f16 v0, v0, v2, v4 +; GFX9-CONTRACT-NEXT: v_pk_fma_f16 v1, v1, v3, v5 +; GFX9-CONTRACT-NEXT: s_setpc_b64 s[30:31] +; +; GFX9-DENORM-LABEL: test_3xhalf_add_mul_rhs_contract: +; GFX9-DENORM: ; %bb.0: ; %.entry +; GFX9-DENORM-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-DENORM-NEXT: v_pk_mul_f16 v0, v0, v2 +; GFX9-DENORM-NEXT: v_pk_mul_f16 v1, v1, v3 +; GFX9-DENORM-NEXT: v_pk_add_f16 v0, v4, v0 +; GFX9-DENORM-NEXT: v_pk_add_f16 v1, v5, v1 +; GFX9-DENORM-NEXT: s_setpc_b64 s[30:31] +; +; GFX10-LABEL: test_3xhalf_add_mul_rhs_contract: +; GFX10: ; %bb.0: ; %.entry +; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX10-NEXT: v_pk_fma_f16 v0, v0, v2, v4 +; GFX10-NEXT: v_pk_fma_f16 v1, v1, v3, v5 +; GFX10-NEXT: s_setpc_b64 s[30:31] +; +; GFX10-CONTRACT-LABEL: test_3xhalf_add_mul_rhs_contract: +; GFX10-CONTRACT: ; %bb.0: ; %.entry +; GFX10-CONTRACT-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX10-CONTRACT-NEXT: v_pk_fma_f16 v0, v0, v2, v4 +; GFX10-CONTRACT-NEXT: v_pk_fma_f16 v1, v1, v3, v5 +; GFX10-CONTRACT-NEXT: s_setpc_b64 s[30:31] +; +; GFX10-DENORM-LABEL: test_3xhalf_add_mul_rhs_contract: +; GFX10-DENORM: ; %bb.0: ; %.entry +; GFX10-DENORM-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX10-DENORM-NEXT: v_pk_mul_f16 v0, v0, v2 +; GFX10-DENORM-NEXT: v_pk_mul_f16 v1, v1, v3 +; GFX10-DENORM-NEXT: v_pk_add_f16 v0, v4, v0 +; GFX10-DENORM-NEXT: v_pk_add_f16 v1, v5, v1 +; GFX10-DENORM-NEXT: s_setpc_b64 s[30:31] +; GFX9-UNSAFE-LABEL: test_3xhalf_add_mul_rhs_contract: +; GFX9-UNSAFE: ; %bb.0: ; %.entry +; GFX9-UNSAFE-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-UNSAFE-NEXT: v_pk_fma_f16 v0, v0, v2, v4 +; GFX9-UNSAFE-NEXT: v_pk_fma_f16 v1, v1, v3, v5 +; GFX9-UNSAFE-NEXT: s_setpc_b64 s[30:31] +; GFX10-UNSAFE-LABEL: test_3xhalf_add_mul_rhs_contract: ; GFX10-UNSAFE: ; %bb.0: ; %.entry ; GFX10-UNSAFE-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX10-UNSAFE-NEXT: v_pk_fma_f16 v0, v0, v2, v4 ; GFX10-UNSAFE-NEXT: v_pk_fma_f16 v1, v1, v3, v5 ; GFX10-UNSAFE-NEXT: s_setpc_b64 s[30:31] .entry: - %a = fmul <3 x half> %x, %y - %b = fadd <3 x half> %z, %a + %a = fmul contract <3 x half> %x, %y + %b = fadd contract <3 x half> %z, %a ret <3 x half> %b } @@ -844,15 +1377,6 @@ define <4 x double> @test_4xdouble_add_mul(<4 x double> %x, <4 x double> %y, <4 ; GFX9-DENORM-NEXT: v_add_f64 v[6:7], v[6:7], v[22:23] ; GFX9-DENORM-NEXT: s_setpc_b64 s[30:31] ; -; GFX9-UNSAFE-LABEL: test_4xdouble_add_mul: -; GFX9-UNSAFE: ; %bb.0: ; %.entry -; GFX9-UNSAFE-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-UNSAFE-NEXT: v_fma_f64 v[0:1], v[0:1], v[8:9], v[16:17] -; GFX9-UNSAFE-NEXT: v_fma_f64 v[2:3], v[2:3], v[10:11], v[18:19] -; GFX9-UNSAFE-NEXT: v_fma_f64 v[4:5], v[4:5], v[12:13], v[20:21] -; GFX9-UNSAFE-NEXT: v_fma_f64 v[6:7], v[6:7], v[14:15], v[22:23] -; GFX9-UNSAFE-NEXT: s_setpc_b64 s[30:31] -; ; GFX10-LABEL: test_4xdouble_add_mul: ; GFX10: ; %bb.0: ; %.entry ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -887,7 +1411,14 @@ define <4 x double> @test_4xdouble_add_mul(<4 x double> %x, <4 x double> %y, <4 ; GFX10-DENORM-NEXT: v_add_f64 v[4:5], v[4:5], v[20:21] ; GFX10-DENORM-NEXT: v_add_f64 v[6:7], v[6:7], v[22:23] ; GFX10-DENORM-NEXT: s_setpc_b64 s[30:31] -; +; GFX9-UNSAFE-LABEL: test_4xdouble_add_mul: +; GFX9-UNSAFE: ; %bb.0: ; %.entry +; GFX9-UNSAFE-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-UNSAFE-NEXT: v_fma_f64 v[0:1], v[0:1], v[8:9], v[16:17] +; GFX9-UNSAFE-NEXT: v_fma_f64 v[2:3], v[2:3], v[10:11], v[18:19] +; GFX9-UNSAFE-NEXT: v_fma_f64 v[4:5], v[4:5], v[12:13], v[20:21] +; GFX9-UNSAFE-NEXT: v_fma_f64 v[6:7], v[6:7], v[14:15], v[22:23] +; GFX9-UNSAFE-NEXT: s_setpc_b64 s[30:31] ; GFX10-UNSAFE-LABEL: test_4xdouble_add_mul: ; GFX10-UNSAFE: ; %bb.0: ; %.entry ; GFX10-UNSAFE-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -902,6 +1433,66 @@ define <4 x double> @test_4xdouble_add_mul(<4 x double> %x, <4 x double> %y, <4 ret <4 x double> %b } +define <4 x double> @test_4xdouble_add_mul_contract(<4 x double> %x, <4 x double> %y, <4 x double> %z) { +; GFX9-LABEL: test_4xdouble_add_mul_contract: +; GFX9: ; %bb.0: ; %.entry +; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-NEXT: v_fma_f64 v[0:1], v[0:1], v[8:9], v[16:17] +; GFX9-NEXT: v_fma_f64 v[2:3], v[2:3], v[10:11], v[18:19] +; GFX9-NEXT: v_fma_f64 v[4:5], v[4:5], v[12:13], v[20:21] +; GFX9-NEXT: v_fma_f64 v[6:7], v[6:7], v[14:15], v[22:23] +; GFX9-NEXT: s_setpc_b64 s[30:31] +; +; GFX9-CONTRACT-LABEL: test_4xdouble_add_mul_contract: +; GFX9-CONTRACT: ; %bb.0: ; %.entry +; GFX9-CONTRACT-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-CONTRACT-NEXT: v_fma_f64 v[0:1], v[0:1], v[8:9], v[16:17] +; GFX9-CONTRACT-NEXT: v_fma_f64 v[2:3], v[2:3], v[10:11], v[18:19] +; GFX9-CONTRACT-NEXT: v_fma_f64 v[4:5], v[4:5], v[12:13], v[20:21] +; GFX9-CONTRACT-NEXT: v_fma_f64 v[6:7], v[6:7], v[14:15], v[22:23] +; GFX9-CONTRACT-NEXT: s_setpc_b64 s[30:31] +; +; GFX9-DENORM-LABEL: test_4xdouble_add_mul_contract: +; GFX9-DENORM: ; %bb.0: ; %.entry +; GFX9-DENORM-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-DENORM-NEXT: v_fma_f64 v[0:1], v[0:1], v[8:9], v[16:17] +; GFX9-DENORM-NEXT: v_fma_f64 v[2:3], v[2:3], v[10:11], v[18:19] +; GFX9-DENORM-NEXT: v_fma_f64 v[4:5], v[4:5], v[12:13], v[20:21] +; GFX9-DENORM-NEXT: v_fma_f64 v[6:7], v[6:7], v[14:15], v[22:23] +; GFX9-DENORM-NEXT: s_setpc_b64 s[30:31] +; +; GFX10-LABEL: test_4xdouble_add_mul_contract: +; GFX10: ; %bb.0: ; %.entry +; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX10-NEXT: v_fma_f64 v[0:1], v[0:1], v[8:9], v[16:17] +; GFX10-NEXT: v_fma_f64 v[2:3], v[2:3], v[10:11], v[18:19] +; GFX10-NEXT: v_fma_f64 v[4:5], v[4:5], v[12:13], v[20:21] +; GFX10-NEXT: v_fma_f64 v[6:7], v[6:7], v[14:15], v[22:23] +; GFX10-NEXT: s_setpc_b64 s[30:31] +; +; GFX10-CONTRACT-LABEL: test_4xdouble_add_mul_contract: +; GFX10-CONTRACT: ; %bb.0: ; %.entry +; GFX10-CONTRACT-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX10-CONTRACT-NEXT: v_fma_f64 v[0:1], v[0:1], v[8:9], v[16:17] +; GFX10-CONTRACT-NEXT: v_fma_f64 v[2:3], v[2:3], v[10:11], v[18:19] +; GFX10-CONTRACT-NEXT: v_fma_f64 v[4:5], v[4:5], v[12:13], v[20:21] +; GFX10-CONTRACT-NEXT: v_fma_f64 v[6:7], v[6:7], v[14:15], v[22:23] +; GFX10-CONTRACT-NEXT: s_setpc_b64 s[30:31] +; +; GFX10-DENORM-LABEL: test_4xdouble_add_mul_contract: +; GFX10-DENORM: ; %bb.0: ; %.entry +; GFX10-DENORM-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX10-DENORM-NEXT: v_fma_f64 v[0:1], v[0:1], v[8:9], v[16:17] +; GFX10-DENORM-NEXT: v_fma_f64 v[2:3], v[2:3], v[10:11], v[18:19] +; GFX10-DENORM-NEXT: v_fma_f64 v[4:5], v[4:5], v[12:13], v[20:21] +; GFX10-DENORM-NEXT: v_fma_f64 v[6:7], v[6:7], v[14:15], v[22:23] +; GFX10-DENORM-NEXT: s_setpc_b64 s[30:31] +.entry: + %a = fmul contract <4 x double> %x, %y + %b = fadd contract <4 x double> %a, %z + ret <4 x double> %b +} + define <3 x double> @test_3xdouble_add_mul_rhs(<3 x double> %x, <3 x double> %y, <3 x double> %z) { ; GFX9-LABEL: test_3xdouble_add_mul_rhs: ; GFX9: ; %bb.0: ; %.entry @@ -933,14 +1524,6 @@ define <3 x double> @test_3xdouble_add_mul_rhs(<3 x double> %x, <3 x double> %y, ; GFX9-DENORM-NEXT: v_add_f64 v[4:5], v[16:17], v[4:5] ; GFX9-DENORM-NEXT: s_setpc_b64 s[30:31] ; -; GFX9-UNSAFE-LABEL: test_3xdouble_add_mul_rhs: -; GFX9-UNSAFE: ; %bb.0: ; %.entry -; GFX9-UNSAFE-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-UNSAFE-NEXT: v_fma_f64 v[0:1], v[0:1], v[6:7], v[12:13] -; GFX9-UNSAFE-NEXT: v_fma_f64 v[2:3], v[2:3], v[8:9], v[14:15] -; GFX9-UNSAFE-NEXT: v_fma_f64 v[4:5], v[4:5], v[10:11], v[16:17] -; GFX9-UNSAFE-NEXT: s_setpc_b64 s[30:31] -; ; GFX10-LABEL: test_3xdouble_add_mul_rhs: ; GFX10: ; %bb.0: ; %.entry ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -970,7 +1553,13 @@ define <3 x double> @test_3xdouble_add_mul_rhs(<3 x double> %x, <3 x double> %y, ; GFX10-DENORM-NEXT: v_add_f64 v[2:3], v[14:15], v[2:3] ; GFX10-DENORM-NEXT: v_add_f64 v[4:5], v[16:17], v[4:5] ; GFX10-DENORM-NEXT: s_setpc_b64 s[30:31] -; +; GFX9-UNSAFE-LABEL: test_3xdouble_add_mul_rhs: +; GFX9-UNSAFE: ; %bb.0: ; %.entry +; GFX9-UNSAFE-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-UNSAFE-NEXT: v_fma_f64 v[0:1], v[0:1], v[6:7], v[12:13] +; GFX9-UNSAFE-NEXT: v_fma_f64 v[2:3], v[2:3], v[8:9], v[14:15] +; GFX9-UNSAFE-NEXT: v_fma_f64 v[4:5], v[4:5], v[10:11], v[16:17] +; GFX9-UNSAFE-NEXT: s_setpc_b64 s[30:31] ; GFX10-UNSAFE-LABEL: test_3xdouble_add_mul_rhs: ; GFX10-UNSAFE: ; %bb.0: ; %.entry ; GFX10-UNSAFE-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -983,3 +1572,57 @@ define <3 x double> @test_3xdouble_add_mul_rhs(<3 x double> %x, <3 x double> %y, %b = fadd <3 x double> %z, %a ret <3 x double> %b } + +define <3 x double> @test_3xdouble_add_mul_rhs_contract(<3 x double> %x, <3 x double> %y, <3 x double> %z) { +; GFX9-LABEL: test_3xdouble_add_mul_rhs_contract: +; GFX9: ; %bb.0: ; %.entry +; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-NEXT: v_fma_f64 v[0:1], v[0:1], v[6:7], v[12:13] +; GFX9-NEXT: v_fma_f64 v[2:3], v[2:3], v[8:9], v[14:15] +; GFX9-NEXT: v_fma_f64 v[4:5], v[4:5], v[10:11], v[16:17] +; GFX9-NEXT: s_setpc_b64 s[30:31] +; +; GFX9-CONTRACT-LABEL: test_3xdouble_add_mul_rhs_contract: +; GFX9-CONTRACT: ; %bb.0: ; %.entry +; GFX9-CONTRACT-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-CONTRACT-NEXT: v_fma_f64 v[0:1], v[0:1], v[6:7], v[12:13] +; GFX9-CONTRACT-NEXT: v_fma_f64 v[2:3], v[2:3], v[8:9], v[14:15] +; GFX9-CONTRACT-NEXT: v_fma_f64 v[4:5], v[4:5], v[10:11], v[16:17] +; GFX9-CONTRACT-NEXT: s_setpc_b64 s[30:31] +; +; GFX9-DENORM-LABEL: test_3xdouble_add_mul_rhs_contract: +; GFX9-DENORM: ; %bb.0: ; %.entry +; GFX9-DENORM-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-DENORM-NEXT: v_fma_f64 v[0:1], v[0:1], v[6:7], v[12:13] +; GFX9-DENORM-NEXT: v_fma_f64 v[2:3], v[2:3], v[8:9], v[14:15] +; GFX9-DENORM-NEXT: v_fma_f64 v[4:5], v[4:5], v[10:11], v[16:17] +; GFX9-DENORM-NEXT: s_setpc_b64 s[30:31] +; +; GFX10-LABEL: test_3xdouble_add_mul_rhs_contract: +; GFX10: ; %bb.0: ; %.entry +; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX10-NEXT: v_fma_f64 v[0:1], v[0:1], v[6:7], v[12:13] +; GFX10-NEXT: v_fma_f64 v[2:3], v[2:3], v[8:9], v[14:15] +; GFX10-NEXT: v_fma_f64 v[4:5], v[4:5], v[10:11], v[16:17] +; GFX10-NEXT: s_setpc_b64 s[30:31] +; +; GFX10-CONTRACT-LABEL: test_3xdouble_add_mul_rhs_contract: +; GFX10-CONTRACT: ; %bb.0: ; %.entry +; GFX10-CONTRACT-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX10-CONTRACT-NEXT: v_fma_f64 v[0:1], v[0:1], v[6:7], v[12:13] +; GFX10-CONTRACT-NEXT: v_fma_f64 v[2:3], v[2:3], v[8:9], v[14:15] +; GFX10-CONTRACT-NEXT: v_fma_f64 v[4:5], v[4:5], v[10:11], v[16:17] +; GFX10-CONTRACT-NEXT: s_setpc_b64 s[30:31] +; +; GFX10-DENORM-LABEL: test_3xdouble_add_mul_rhs_contract: +; GFX10-DENORM: ; %bb.0: ; %.entry +; GFX10-DENORM-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX10-DENORM-NEXT: v_fma_f64 v[0:1], v[0:1], v[6:7], v[12:13] +; GFX10-DENORM-NEXT: v_fma_f64 v[2:3], v[2:3], v[8:9], v[14:15] +; GFX10-DENORM-NEXT: v_fma_f64 v[4:5], v[4:5], v[10:11], v[16:17] +; GFX10-DENORM-NEXT: s_setpc_b64 s[30:31] +.entry: + %a = fmul contract <3 x double> %x, %y + %b = fadd contract <3 x double> %z, %a + ret <3 x double> %b +} diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/combine-fma-unmerge-values.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/combine-fma-unmerge-values.mir index 2845a632a84b..d9ac9a71ebb2 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/combine-fma-unmerge-values.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/combine-fma-unmerge-values.mir @@ -24,8 +24,8 @@ body: | %ptr:_(p1) = COPY $vgpr2_vgpr3 %vec:_(<2 x s32>) = G_LOAD %ptr(p1) :: (load (<2 x s32>), addrspace 1) %el0:_(s32), %el1:_(s32) = G_UNMERGE_VALUES %vec(<2 x s32>) - %6:_(s32) = G_FMUL %0, %1 - %7:_(s32) = G_FADD %6, %el1 + %6:_(s32) = contract G_FMUL %0, %1 + %7:_(s32) = contract G_FADD %6, %el1 $vgpr0 = COPY %7(s32) ... @@ -54,8 +54,8 @@ body: | %ptr:_(p1) = COPY $vgpr2_vgpr3 %vec:_(<2 x s32>) = G_LOAD %ptr(p1) :: (load (<2 x s32>), addrspace 1) %el0:_(s32), %el1:_(s32) = G_UNMERGE_VALUES %vec(<2 x s32>) - %6:_(s32) = G_FMUL %0, %1 - %7:_(s32) = G_FADD %el1, %6 + %6:_(s32) = contract G_FMUL %0, %1 + %7:_(s32) = contract G_FADD %el1, %6 $vgpr0 = COPY %7(s32) ... @@ -233,10 +233,10 @@ body: | %7:_(s16) = G_TRUNC %6(s32) %8:_(s32) = COPY $vgpr5 %9:_(s16) = G_TRUNC %8(s32) - %10:_(s16) = G_FMUL %7, %9 + %10:_(s16) = contract G_FMUL %7, %9 %11:_(s32) = G_FPEXT %10(s16) %12:_(s32) = G_FMA %0, %1, %11 - %13:_(s32) = G_FADD %12, %el1 + %13:_(s32) = contract G_FADD %12, %el1 $vgpr0 = COPY %13(s32) ... @@ -282,11 +282,11 @@ body: | %9:_(s16) = G_TRUNC %8(s32) %10:_(s32) = COPY $vgpr5 %11:_(s16) = G_TRUNC %10(s32) - %12:_(s16) = G_FMUL %9, %11 - %13:_(s16) = G_FMUL %1, %3 - %14:_(s16) = G_FADD %13, %12 + %12:_(s16) = contract G_FMUL %9, %11 + %13:_(s16) = contract G_FMUL %1, %3 + %14:_(s16) = contract G_FADD %13, %12 %15:_(s32) = G_FPEXT %14(s16) - %16:_(s32) = G_FADD %15, %el1 + %16:_(s32) = contract G_FADD %15, %el1 $vgpr0 = COPY %16(s32) ... @@ -326,10 +326,10 @@ body: | %7:_(s16) = G_TRUNC %6(s32) %8:_(s32) = COPY $vgpr5 %9:_(s16) = G_TRUNC %8(s32) - %10:_(s16) = G_FMUL %7, %9 + %10:_(s16) = contract G_FMUL %7, %9 %11:_(s32) = G_FPEXT %10(s16) %12:_(s32) = G_FMA %4, %5, %11 - %13:_(s32) = G_FADD %el1, %12 + %13:_(s32) = contract G_FADD %el1, %12 $vgpr0 = COPY %13(s32) ... @@ -375,11 +375,11 @@ body: | %9:_(s16) = G_TRUNC %8(s32) %10:_(s32) = COPY $vgpr5 %11:_(s16) = G_TRUNC %10(s32) - %12:_(s16) = G_FMUL %9, %11 - %13:_(s16) = G_FMUL %5, %7 - %14:_(s16) = G_FADD %13, %12 + %12:_(s16) = contract G_FMUL %9, %11 + %13:_(s16) = contract G_FMUL %5, %7 + %14:_(s16) = contract G_FADD %13, %12 %15:_(s32) = G_FPEXT %14(s16) - %16:_(s32) = G_FADD %el1, %15 + %16:_(s32) = contract G_FADD %el1, %15 $vgpr0 = COPY %16(s32) ... @@ -409,8 +409,8 @@ body: | %ptr:_(p1) = COPY $vgpr0_vgpr1 %vec:_(<2 x s32>) = G_LOAD %ptr(p1) :: (load (<2 x s32>), addrspace 1) %el0:_(s32), %el1:_(s32) = G_UNMERGE_VALUES %vec(<2 x s32>) - %6:_(s32) = G_FMUL %0, %1 - %7:_(s32) = G_FSUB %6, %el1 + %6:_(s32) = contract G_FMUL %0, %1 + %7:_(s32) = contract G_FSUB %6, %el1 $vgpr0 = COPY %7(s32) ... @@ -440,7 +440,7 @@ body: | %ptr:_(p1) = COPY $vgpr2_vgpr3 %vec:_(<2 x s32>) = G_LOAD %ptr(p1) :: (load (<2 x s32>), addrspace 1) %el0:_(s32), %el1:_(s32) = G_UNMERGE_VALUES %vec(<2 x s32>) - %6:_(s32) = G_FMUL %0, %1 - %7:_(s32) = G_FSUB %el1, %6 + %6:_(s32) = contract G_FMUL %0, %1 + %7:_(s32) = contract G_FSUB %el1, %6 $vgpr0 = COPY %7(s32) ... diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fptrunc.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fptrunc.mir index f513de8b9c77..477ef327f57d 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fptrunc.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fptrunc.mir @@ -385,117 +385,16 @@ body: | ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<2 x s64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY]](<2 x s64>) - ; CHECK-NEXT: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV]](s64) - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 20 - ; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[UV3]], [[C]](s32) - ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 2047 - ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C1]] - ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 -1008 - ; CHECK-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[AND]], [[C2]] - ; CHECK-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; CHECK-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[UV3]], [[C3]](s32) - ; CHECK-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 4094 - ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR1]], [[C4]] - ; CHECK-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 511 - ; CHECK-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[UV3]], [[C5]] - ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[UV2]] - ; CHECK-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[OR]](s32), [[C6]] - ; CHECK-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[ICMP]](s1) - ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND1]], [[ZEXT]] - ; CHECK-NEXT: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 512 - ; CHECK-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[OR1]](s32), [[C6]] - ; CHECK-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP1]](s1), [[C7]], [[C6]] - ; CHECK-NEXT: [[C8:%[0-9]+]]:_(s32) = G_CONSTANT i32 31744 - ; CHECK-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[SELECT]], [[C8]] - ; CHECK-NEXT: [[C9:%[0-9]+]]:_(s32) = G_CONSTANT i32 12 - ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ADD]], [[C9]](s32) - ; CHECK-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[OR1]], [[SHL]] - ; CHECK-NEXT: [[C10:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; CHECK-NEXT: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[C10]], [[ADD]] - ; CHECK-NEXT: [[SMAX:%[0-9]+]]:_(s32) = G_SMAX [[SUB]], [[C6]] - ; CHECK-NEXT: [[C11:%[0-9]+]]:_(s32) = G_CONSTANT i32 13 - ; CHECK-NEXT: [[SMIN:%[0-9]+]]:_(s32) = G_SMIN [[SMAX]], [[C11]] - ; CHECK-NEXT: [[C12:%[0-9]+]]:_(s32) = G_CONSTANT i32 4096 - ; CHECK-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[OR1]], [[C12]] - ; CHECK-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[OR4]], [[SMIN]](s32) - ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LSHR2]], [[SMIN]](s32) - ; CHECK-NEXT: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[SHL1]](s32), [[OR4]] - ; CHECK-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[ICMP2]](s1) - ; CHECK-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[LSHR2]], [[ZEXT1]] - ; CHECK-NEXT: [[ICMP3:%[0-9]+]]:_(s1) = G_ICMP intpred(slt), [[ADD]](s32), [[C10]] - ; CHECK-NEXT: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP3]](s1), [[OR5]], [[OR3]] - ; CHECK-NEXT: [[C13:%[0-9]+]]:_(s32) = G_CONSTANT i32 7 - ; CHECK-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[SELECT1]], [[C13]] - ; CHECK-NEXT: [[C14:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 - ; CHECK-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[SELECT1]], [[C14]](s32) - ; CHECK-NEXT: [[C15:%[0-9]+]]:_(s32) = G_CONSTANT i32 3 - ; CHECK-NEXT: [[ICMP4:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[AND3]](s32), [[C15]] - ; CHECK-NEXT: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[ICMP4]](s1) - ; CHECK-NEXT: [[C16:%[0-9]+]]:_(s32) = G_CONSTANT i32 5 - ; CHECK-NEXT: [[ICMP5:%[0-9]+]]:_(s1) = G_ICMP intpred(sgt), [[AND3]](s32), [[C16]] - ; CHECK-NEXT: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[ICMP5]](s1) - ; CHECK-NEXT: [[OR6:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[ZEXT3]] - ; CHECK-NEXT: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[LSHR3]], [[OR6]] - ; CHECK-NEXT: [[C17:%[0-9]+]]:_(s32) = G_CONSTANT i32 30 - ; CHECK-NEXT: [[ICMP6:%[0-9]+]]:_(s1) = G_ICMP intpred(sgt), [[ADD]](s32), [[C17]] - ; CHECK-NEXT: [[SELECT2:%[0-9]+]]:_(s32) = G_SELECT [[ICMP6]](s1), [[C8]], [[ADD1]] - ; CHECK-NEXT: [[C18:%[0-9]+]]:_(s32) = G_CONSTANT i32 1039 - ; CHECK-NEXT: [[ICMP7:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[ADD]](s32), [[C18]] - ; CHECK-NEXT: [[SELECT3:%[0-9]+]]:_(s32) = G_SELECT [[ICMP7]](s1), [[OR2]], [[SELECT2]] - ; CHECK-NEXT: [[C19:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; CHECK-NEXT: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[UV3]], [[C19]](s32) - ; CHECK-NEXT: [[C20:%[0-9]+]]:_(s32) = G_CONSTANT i32 32768 - ; CHECK-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LSHR4]], [[C20]] - ; CHECK-NEXT: [[OR7:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SELECT3]] - ; CHECK-NEXT: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV1]](s64) - ; CHECK-NEXT: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[UV5]], [[C]](s32) - ; CHECK-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LSHR5]], [[C1]] - ; CHECK-NEXT: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[AND5]], [[C2]] - ; CHECK-NEXT: [[LSHR6:%[0-9]+]]:_(s32) = G_LSHR [[UV5]], [[C3]](s32) - ; CHECK-NEXT: [[AND6:%[0-9]+]]:_(s32) = G_AND [[LSHR6]], [[C4]] - ; CHECK-NEXT: [[AND7:%[0-9]+]]:_(s32) = G_AND [[UV5]], [[C5]] - ; CHECK-NEXT: [[OR8:%[0-9]+]]:_(s32) = G_OR [[AND7]], [[UV4]] - ; CHECK-NEXT: [[ICMP8:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[OR8]](s32), [[C6]] - ; CHECK-NEXT: [[ZEXT4:%[0-9]+]]:_(s32) = G_ZEXT [[ICMP8]](s1) - ; CHECK-NEXT: [[OR9:%[0-9]+]]:_(s32) = G_OR [[AND6]], [[ZEXT4]] - ; CHECK-NEXT: [[ICMP9:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[OR9]](s32), [[C6]] - ; CHECK-NEXT: [[SELECT4:%[0-9]+]]:_(s32) = G_SELECT [[ICMP9]](s1), [[C7]], [[C6]] - ; CHECK-NEXT: [[OR10:%[0-9]+]]:_(s32) = G_OR [[SELECT4]], [[C8]] - ; CHECK-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[ADD2]], [[C9]](s32) - ; CHECK-NEXT: [[OR11:%[0-9]+]]:_(s32) = G_OR [[OR9]], [[SHL2]] - ; CHECK-NEXT: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[C10]], [[ADD2]] - ; CHECK-NEXT: [[SMAX1:%[0-9]+]]:_(s32) = G_SMAX [[SUB1]], [[C6]] - ; CHECK-NEXT: [[SMIN1:%[0-9]+]]:_(s32) = G_SMIN [[SMAX1]], [[C11]] - ; CHECK-NEXT: [[OR12:%[0-9]+]]:_(s32) = G_OR [[OR9]], [[C12]] - ; CHECK-NEXT: [[LSHR7:%[0-9]+]]:_(s32) = G_LSHR [[OR12]], [[SMIN1]](s32) - ; CHECK-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[LSHR7]], [[SMIN1]](s32) - ; CHECK-NEXT: [[ICMP10:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[SHL3]](s32), [[OR12]] - ; CHECK-NEXT: [[ZEXT5:%[0-9]+]]:_(s32) = G_ZEXT [[ICMP10]](s1) - ; CHECK-NEXT: [[OR13:%[0-9]+]]:_(s32) = G_OR [[LSHR7]], [[ZEXT5]] - ; CHECK-NEXT: [[ICMP11:%[0-9]+]]:_(s1) = G_ICMP intpred(slt), [[ADD2]](s32), [[C10]] - ; CHECK-NEXT: [[SELECT5:%[0-9]+]]:_(s32) = G_SELECT [[ICMP11]](s1), [[OR13]], [[OR11]] - ; CHECK-NEXT: [[AND8:%[0-9]+]]:_(s32) = G_AND [[SELECT5]], [[C13]] - ; CHECK-NEXT: [[LSHR8:%[0-9]+]]:_(s32) = G_LSHR [[SELECT5]], [[C14]](s32) - ; CHECK-NEXT: [[ICMP12:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[AND8]](s32), [[C15]] - ; CHECK-NEXT: [[ZEXT6:%[0-9]+]]:_(s32) = G_ZEXT [[ICMP12]](s1) - ; CHECK-NEXT: [[ICMP13:%[0-9]+]]:_(s1) = G_ICMP intpred(sgt), [[AND8]](s32), [[C16]] - ; CHECK-NEXT: [[ZEXT7:%[0-9]+]]:_(s32) = G_ZEXT [[ICMP13]](s1) - ; CHECK-NEXT: [[OR14:%[0-9]+]]:_(s32) = G_OR [[ZEXT6]], [[ZEXT7]] - ; CHECK-NEXT: [[ADD3:%[0-9]+]]:_(s32) = G_ADD [[LSHR8]], [[OR14]] - ; CHECK-NEXT: [[ICMP14:%[0-9]+]]:_(s1) = G_ICMP intpred(sgt), [[ADD2]](s32), [[C17]] - ; CHECK-NEXT: [[SELECT6:%[0-9]+]]:_(s32) = G_SELECT [[ICMP14]](s1), [[C8]], [[ADD3]] - ; CHECK-NEXT: [[ICMP15:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[ADD2]](s32), [[C18]] - ; CHECK-NEXT: [[SELECT7:%[0-9]+]]:_(s32) = G_SELECT [[ICMP15]](s1), [[OR10]], [[SELECT6]] - ; CHECK-NEXT: [[LSHR9:%[0-9]+]]:_(s32) = G_LSHR [[UV5]], [[C19]](s32) - ; CHECK-NEXT: [[AND9:%[0-9]+]]:_(s32) = G_AND [[LSHR9]], [[C20]] - ; CHECK-NEXT: [[OR15:%[0-9]+]]:_(s32) = G_OR [[AND9]], [[SELECT7]] - ; CHECK-NEXT: [[C21:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; CHECK-NEXT: [[AND10:%[0-9]+]]:_(s32) = G_AND [[OR7]], [[C21]] - ; CHECK-NEXT: [[AND11:%[0-9]+]]:_(s32) = G_AND [[OR15]], [[C21]] - ; CHECK-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND11]], [[C19]](s32) - ; CHECK-NEXT: [[OR16:%[0-9]+]]:_(s32) = G_OR [[AND10]], [[SHL4]] - ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR16]](s32) + ; CHECK-NEXT: [[FPTRUNC:%[0-9]+]]:_(s32) = afn G_FPTRUNC [[UV]](s64) + ; CHECK-NEXT: [[FPTRUNC1:%[0-9]+]]:_(s16) = afn G_FPTRUNC [[FPTRUNC]](s32) + ; CHECK-NEXT: [[FPTRUNC2:%[0-9]+]]:_(s32) = afn G_FPTRUNC [[UV1]](s64) + ; CHECK-NEXT: [[FPTRUNC3:%[0-9]+]]:_(s16) = afn G_FPTRUNC [[FPTRUNC2]](s32) + ; CHECK-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[FPTRUNC1]](s16) + ; CHECK-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[FPTRUNC3]](s16) + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C]](s32) + ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL]] + ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) ; CHECK-NEXT: $vgpr0 = COPY [[BITCAST]](<2 x s16>) %0:_(<2 x s64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 %1:_(<2 x s16>) = afn G_FPTRUNC %0 diff --git a/llvm/test/CodeGen/AMDGPU/fptrunc.f16.ll b/llvm/test/CodeGen/AMDGPU/fptrunc.f16.ll index 00b9b2b216f0..57b485777624 100644 --- a/llvm/test/CodeGen/AMDGPU/fptrunc.f16.ll +++ b/llvm/test/CodeGen/AMDGPU/fptrunc.f16.ll @@ -1,16 +1,16 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 2 ; RUN: llc -amdgpu-scalarize-global-loads=false -mtriple=amdgcn -global-isel=0 < %s | FileCheck -enable-var-scope -check-prefixes=SI-SDAG %s -; RUN: llc -amdgpu-scalarize-global-loads=false -mtriple=amdgcn -global-isel=1 -enable-unsafe-fp-math < %s | FileCheck -check-prefixes=SI-GISEL %s +; RUN: llc -amdgpu-scalarize-global-loads=false -mtriple=amdgcn -global-isel=1 < %s | FileCheck -check-prefixes=SI-GISEL %s ; RUN: llc -amdgpu-scalarize-global-loads=false -mtriple=amdgcn -mcpu=fiji -global-isel=0 -mattr=-flat-for-global < %s | FileCheck -enable-var-scope -check-prefixes=VI-SDAG %s -; RUN: llc -amdgpu-scalarize-global-loads=false -mtriple=amdgcn -mcpu=fiji -global-isel=1 -mattr=-flat-for-global -enable-unsafe-fp-math < %s | FileCheck -enable-var-scope -check-prefixes=VI-GISEL %s +; RUN: llc -amdgpu-scalarize-global-loads=false -mtriple=amdgcn -mcpu=fiji -global-isel=1 -mattr=-flat-for-global < %s | FileCheck -enable-var-scope -check-prefixes=VI-GISEL %s ; RUN: llc -amdgpu-scalarize-global-loads=false -mtriple=amdgcn -mcpu=gfx900 -global-isel=0 -mattr=-flat-for-global -denormal-fp-math=preserve-sign < %s | FileCheck -enable-var-scope -check-prefixes=GFX9-SDAG %s -; RUN: llc -amdgpu-scalarize-global-loads=false -mtriple=amdgcn -mcpu=gfx900 -global-isel=1 -mattr=-flat-for-global -denormal-fp-math=preserve-sign -enable-unsafe-fp-math < %s | FileCheck -enable-var-scope -check-prefixes=GFX9-GISEL %s +; RUN: llc -amdgpu-scalarize-global-loads=false -mtriple=amdgcn -mcpu=gfx900 -global-isel=1 -mattr=-flat-for-global -denormal-fp-math=preserve-sign < %s | FileCheck -enable-var-scope -check-prefixes=GFX9-GISEL %s ; RUN: llc -amdgpu-scalarize-global-loads=false -mtriple=amdgcn -mcpu=gfx950 -global-isel=0 -mattr=-flat-for-global -denormal-fp-math=preserve-sign < %s | FileCheck -enable-var-scope -check-prefixes=GFX950-SDAG %s -; RUN: llc -amdgpu-scalarize-global-loads=false -mtriple=amdgcn -mcpu=gfx950 -global-isel=1 -mattr=-flat-for-global -denormal-fp-math=preserve-sign -enable-unsafe-fp-math < %s | FileCheck -enable-var-scope -check-prefixes=GFX950-GISEL %s +; RUN: llc -amdgpu-scalarize-global-loads=false -mtriple=amdgcn -mcpu=gfx950 -global-isel=1 -mattr=-flat-for-global -denormal-fp-math=preserve-sign < %s | FileCheck -enable-var-scope -check-prefixes=GFX950-GISEL %s ; RUN: llc -amdgpu-scalarize-global-loads=false -mtriple=amdgcn -mcpu=gfx1100 -global-isel=0 -mattr=-flat-for-global,+real-true16 -denormal-fp-math=preserve-sign < %s | FileCheck -enable-var-scope -check-prefixes=GFX11-SDAG-TRUE16 %s ; RUN: llc -amdgpu-scalarize-global-loads=false -mtriple=amdgcn -mcpu=gfx1100 -global-isel=0 -mattr=-flat-for-global,-real-true16 -denormal-fp-math=preserve-sign < %s | FileCheck -enable-var-scope -check-prefixes=GFX11-SDAG-FAKE16 %s -; RUN: llc -amdgpu-scalarize-global-loads=false -mtriple=amdgcn -mcpu=gfx1100 -global-isel=1 -mattr=-flat-for-global,+real-true16 -denormal-fp-math=preserve-sign -enable-unsafe-fp-math < %s | FileCheck -enable-var-scope -check-prefixes=GFX11-GISEL-TRUE16 %s -; RUN: llc -amdgpu-scalarize-global-loads=false -mtriple=amdgcn -mcpu=gfx1100 -global-isel=1 -mattr=-flat-for-global,-real-true16 -denormal-fp-math=preserve-sign -enable-unsafe-fp-math < %s | FileCheck -enable-var-scope -check-prefixes=GFX11-GISEL-FAKE16 %s +; RUN: llc -amdgpu-scalarize-global-loads=false -mtriple=amdgcn -mcpu=gfx1100 -global-isel=1 -mattr=-flat-for-global,+real-true16 -denormal-fp-math=preserve-sign < %s | FileCheck -enable-var-scope -check-prefixes=GFX11-GISEL-TRUE16 %s +; RUN: llc -amdgpu-scalarize-global-loads=false -mtriple=amdgcn -mcpu=gfx1100 -global-isel=1 -mattr=-flat-for-global,-real-true16 -denormal-fp-math=preserve-sign < %s | FileCheck -enable-var-scope -check-prefixes=GFX11-GISEL-FAKE16 %s define amdgpu_kernel void @fptrunc_f32_to_f16( ; SI-SDAG-LABEL: fptrunc_f32_to_f16: @@ -457,9 +457,49 @@ define amdgpu_kernel void @fptrunc_f64_to_f16( ; SI-GISEL-NEXT: s_load_dwordx2 s[4:5], s[2:3], 0x0 ; SI-GISEL-NEXT: s_mov_b32 s2, -1 ; SI-GISEL-NEXT: s_waitcnt lgkmcnt(0) -; SI-GISEL-NEXT: v_cvt_f32_f64_e32 v0, s[4:5] -; SI-GISEL-NEXT: v_cvt_f16_f32_e32 v0, v0 +; SI-GISEL-NEXT: s_bfe_u32 s3, s5, 0xb0014 +; SI-GISEL-NEXT: s_lshr_b32 s6, s5, 8 +; SI-GISEL-NEXT: s_and_b32 s7, s5, 0x1ff +; SI-GISEL-NEXT: s_addk_i32 s3, 0xfc10 +; SI-GISEL-NEXT: s_and_b32 s6, s6, 0xffe +; SI-GISEL-NEXT: s_or_b32 s4, s7, s4 +; SI-GISEL-NEXT: s_cmp_lg_u32 s4, 0 +; SI-GISEL-NEXT: s_cselect_b32 s4, 1, 0 +; SI-GISEL-NEXT: s_or_b32 s4, s6, s4 +; SI-GISEL-NEXT: s_cmp_lg_u32 s4, 0 +; SI-GISEL-NEXT: s_cselect_b32 s6, 1, 0 +; SI-GISEL-NEXT: s_lshl_b32 s6, s6, 9 +; SI-GISEL-NEXT: s_lshl_b32 s7, s3, 12 +; SI-GISEL-NEXT: s_sub_i32 s8, 1, s3 +; SI-GISEL-NEXT: s_or_b32 s9, s4, 0x1000 +; SI-GISEL-NEXT: s_or_b32 s6, s6, 0x7c00 +; SI-GISEL-NEXT: s_or_b32 s4, s4, s7 +; SI-GISEL-NEXT: s_max_i32 s7, s8, 0 +; SI-GISEL-NEXT: s_min_i32 s7, s7, 13 +; SI-GISEL-NEXT: s_lshr_b32 s8, s9, s7 +; SI-GISEL-NEXT: s_lshl_b32 s7, s8, s7 +; SI-GISEL-NEXT: s_cmp_lg_u32 s7, s9 +; SI-GISEL-NEXT: s_cselect_b32 s7, 1, 0 +; SI-GISEL-NEXT: s_or_b32 s7, s8, s7 +; SI-GISEL-NEXT: s_cmp_lt_i32 s3, 1 +; SI-GISEL-NEXT: s_cselect_b32 s4, s7, s4 +; SI-GISEL-NEXT: s_and_b32 s7, s4, 7 +; SI-GISEL-NEXT: s_lshr_b32 s4, s4, 2 +; SI-GISEL-NEXT: s_cmp_eq_u32 s7, 3 +; SI-GISEL-NEXT: s_cselect_b32 s8, 1, 0 +; SI-GISEL-NEXT: s_cmp_gt_i32 s7, 5 +; SI-GISEL-NEXT: s_cselect_b32 s7, 1, 0 +; SI-GISEL-NEXT: s_or_b32 s7, s8, s7 +; SI-GISEL-NEXT: s_add_i32 s4, s4, s7 +; SI-GISEL-NEXT: s_cmp_gt_i32 s3, 30 +; SI-GISEL-NEXT: s_cselect_b32 s4, 0x7c00, s4 +; SI-GISEL-NEXT: s_cmpk_eq_i32 s3, 0x40f +; SI-GISEL-NEXT: s_cselect_b32 s3, s6, s4 +; SI-GISEL-NEXT: s_lshr_b32 s4, s5, 16 +; SI-GISEL-NEXT: s_and_b32 s4, s4, 0x8000 +; SI-GISEL-NEXT: s_or_b32 s4, s4, s3 ; SI-GISEL-NEXT: s_mov_b32 s3, 0xf000 +; SI-GISEL-NEXT: v_mov_b32_e32 v0, s4 ; SI-GISEL-NEXT: buffer_store_short v0, off, s[0:3], 0 ; SI-GISEL-NEXT: s_endpgm ; @@ -529,10 +569,50 @@ define amdgpu_kernel void @fptrunc_f64_to_f16( ; VI-GISEL-NEXT: s_waitcnt lgkmcnt(0) ; VI-GISEL-NEXT: s_load_dwordx2 s[2:3], s[2:3], 0x0 ; VI-GISEL-NEXT: s_waitcnt lgkmcnt(0) -; VI-GISEL-NEXT: v_cvt_f32_f64_e32 v0, s[2:3] +; VI-GISEL-NEXT: s_bfe_u32 s4, s3, 0xb0014 +; VI-GISEL-NEXT: s_lshr_b32 s5, s3, 8 +; VI-GISEL-NEXT: s_and_b32 s6, s3, 0x1ff +; VI-GISEL-NEXT: s_addk_i32 s4, 0xfc10 +; VI-GISEL-NEXT: s_and_b32 s5, s5, 0xffe +; VI-GISEL-NEXT: s_or_b32 s2, s6, s2 +; VI-GISEL-NEXT: s_cmp_lg_u32 s2, 0 +; VI-GISEL-NEXT: s_cselect_b32 s2, 1, 0 +; VI-GISEL-NEXT: s_or_b32 s2, s5, s2 +; VI-GISEL-NEXT: s_cmp_lg_u32 s2, 0 +; VI-GISEL-NEXT: s_cselect_b32 s5, 1, 0 +; VI-GISEL-NEXT: s_sub_i32 s7, 1, s4 +; VI-GISEL-NEXT: s_lshl_b32 s6, s4, 12 +; VI-GISEL-NEXT: s_max_i32 s7, s7, 0 +; VI-GISEL-NEXT: s_or_b32 s6, s2, s6 +; VI-GISEL-NEXT: s_min_i32 s7, s7, 13 +; VI-GISEL-NEXT: s_bitset1_b32 s2, 12 +; VI-GISEL-NEXT: s_lshl_b32 s5, s5, 9 +; VI-GISEL-NEXT: s_lshr_b32 s8, s2, s7 +; VI-GISEL-NEXT: s_or_b32 s5, s5, 0x7c00 +; VI-GISEL-NEXT: s_lshl_b32 s7, s8, s7 +; VI-GISEL-NEXT: s_cmp_lg_u32 s7, s2 +; VI-GISEL-NEXT: s_cselect_b32 s2, 1, 0 +; VI-GISEL-NEXT: s_or_b32 s2, s8, s2 +; VI-GISEL-NEXT: s_cmp_lt_i32 s4, 1 +; VI-GISEL-NEXT: s_cselect_b32 s2, s2, s6 +; VI-GISEL-NEXT: s_and_b32 s6, s2, 7 +; VI-GISEL-NEXT: s_lshr_b32 s2, s2, 2 +; VI-GISEL-NEXT: s_cmp_eq_u32 s6, 3 +; VI-GISEL-NEXT: s_cselect_b32 s7, 1, 0 +; VI-GISEL-NEXT: s_cmp_gt_i32 s6, 5 +; VI-GISEL-NEXT: s_cselect_b32 s6, 1, 0 +; VI-GISEL-NEXT: s_or_b32 s6, s7, s6 +; VI-GISEL-NEXT: s_add_i32 s2, s2, s6 +; VI-GISEL-NEXT: s_cmp_gt_i32 s4, 30 +; VI-GISEL-NEXT: s_cselect_b32 s2, 0x7c00, s2 +; VI-GISEL-NEXT: s_cmpk_eq_i32 s4, 0x40f +; VI-GISEL-NEXT: s_cselect_b32 s2, s5, s2 +; VI-GISEL-NEXT: s_lshr_b32 s3, s3, 16 +; VI-GISEL-NEXT: s_and_b32 s3, s3, 0x8000 +; VI-GISEL-NEXT: s_or_b32 s2, s3, s2 +; VI-GISEL-NEXT: v_mov_b32_e32 v0, s2 ; VI-GISEL-NEXT: s_mov_b32 s2, -1 ; VI-GISEL-NEXT: s_mov_b32 s3, 0xf000 -; VI-GISEL-NEXT: v_cvt_f16_f32_e32 v0, v0 ; VI-GISEL-NEXT: buffer_store_short v0, off, s[0:3], 0 ; VI-GISEL-NEXT: s_endpgm ; @@ -602,10 +682,50 @@ define amdgpu_kernel void @fptrunc_f64_to_f16( ; GFX9-GISEL-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-GISEL-NEXT: s_load_dwordx2 s[2:3], s[2:3], 0x0 ; GFX9-GISEL-NEXT: s_waitcnt lgkmcnt(0) -; GFX9-GISEL-NEXT: v_cvt_f32_f64_e32 v0, s[2:3] +; GFX9-GISEL-NEXT: s_bfe_u32 s4, s3, 0xb0014 +; GFX9-GISEL-NEXT: s_lshr_b32 s5, s3, 8 +; GFX9-GISEL-NEXT: s_and_b32 s6, s3, 0x1ff +; GFX9-GISEL-NEXT: s_addk_i32 s4, 0xfc10 +; GFX9-GISEL-NEXT: s_and_b32 s5, s5, 0xffe +; GFX9-GISEL-NEXT: s_or_b32 s2, s6, s2 +; GFX9-GISEL-NEXT: s_cmp_lg_u32 s2, 0 +; GFX9-GISEL-NEXT: s_cselect_b32 s2, 1, 0 +; GFX9-GISEL-NEXT: s_or_b32 s2, s5, s2 +; GFX9-GISEL-NEXT: s_cmp_lg_u32 s2, 0 +; GFX9-GISEL-NEXT: s_cselect_b32 s5, 1, 0 +; GFX9-GISEL-NEXT: s_sub_i32 s7, 1, s4 +; GFX9-GISEL-NEXT: s_lshl_b32 s6, s4, 12 +; GFX9-GISEL-NEXT: s_max_i32 s7, s7, 0 +; GFX9-GISEL-NEXT: s_or_b32 s6, s2, s6 +; GFX9-GISEL-NEXT: s_min_i32 s7, s7, 13 +; GFX9-GISEL-NEXT: s_bitset1_b32 s2, 12 +; GFX9-GISEL-NEXT: s_lshl_b32 s5, s5, 9 +; GFX9-GISEL-NEXT: s_lshr_b32 s8, s2, s7 +; GFX9-GISEL-NEXT: s_or_b32 s5, s5, 0x7c00 +; GFX9-GISEL-NEXT: s_lshl_b32 s7, s8, s7 +; GFX9-GISEL-NEXT: s_cmp_lg_u32 s7, s2 +; GFX9-GISEL-NEXT: s_cselect_b32 s2, 1, 0 +; GFX9-GISEL-NEXT: s_or_b32 s2, s8, s2 +; GFX9-GISEL-NEXT: s_cmp_lt_i32 s4, 1 +; GFX9-GISEL-NEXT: s_cselect_b32 s2, s2, s6 +; GFX9-GISEL-NEXT: s_and_b32 s6, s2, 7 +; GFX9-GISEL-NEXT: s_lshr_b32 s2, s2, 2 +; GFX9-GISEL-NEXT: s_cmp_eq_u32 s6, 3 +; GFX9-GISEL-NEXT: s_cselect_b32 s7, 1, 0 +; GFX9-GISEL-NEXT: s_cmp_gt_i32 s6, 5 +; GFX9-GISEL-NEXT: s_cselect_b32 s6, 1, 0 +; GFX9-GISEL-NEXT: s_or_b32 s6, s7, s6 +; GFX9-GISEL-NEXT: s_add_i32 s2, s2, s6 +; GFX9-GISEL-NEXT: s_cmp_gt_i32 s4, 30 +; GFX9-GISEL-NEXT: s_cselect_b32 s2, 0x7c00, s2 +; GFX9-GISEL-NEXT: s_cmpk_eq_i32 s4, 0x40f +; GFX9-GISEL-NEXT: s_cselect_b32 s2, s5, s2 +; GFX9-GISEL-NEXT: s_lshr_b32 s3, s3, 16 +; GFX9-GISEL-NEXT: s_and_b32 s3, s3, 0x8000 +; GFX9-GISEL-NEXT: s_or_b32 s2, s3, s2 +; GFX9-GISEL-NEXT: v_mov_b32_e32 v0, s2 ; GFX9-GISEL-NEXT: s_mov_b32 s2, -1 ; GFX9-GISEL-NEXT: s_mov_b32 s3, 0xf000 -; GFX9-GISEL-NEXT: v_cvt_f16_f32_e32 v0, v0 ; GFX9-GISEL-NEXT: buffer_store_short v0, off, s[0:3], 0 ; GFX9-GISEL-NEXT: s_endpgm ; @@ -675,8 +795,48 @@ define amdgpu_kernel void @fptrunc_f64_to_f16( ; GFX950-GISEL-NEXT: s_waitcnt lgkmcnt(0) ; GFX950-GISEL-NEXT: s_load_dwordx2 s[2:3], s[2:3], 0x0 ; GFX950-GISEL-NEXT: s_waitcnt lgkmcnt(0) -; GFX950-GISEL-NEXT: v_cvt_f32_f64_e32 v0, s[2:3] -; GFX950-GISEL-NEXT: v_cvt_f16_f32_e32 v0, v0 +; GFX950-GISEL-NEXT: s_bfe_u32 s4, s3, 0xb0014 +; GFX950-GISEL-NEXT: s_lshr_b32 s5, s3, 8 +; GFX950-GISEL-NEXT: s_and_b32 s6, s3, 0x1ff +; GFX950-GISEL-NEXT: s_addk_i32 s4, 0xfc10 +; GFX950-GISEL-NEXT: s_and_b32 s5, s5, 0xffe +; GFX950-GISEL-NEXT: s_or_b32 s2, s6, s2 +; GFX950-GISEL-NEXT: s_cmp_lg_u32 s2, 0 +; GFX950-GISEL-NEXT: s_cselect_b32 s2, 1, 0 +; GFX950-GISEL-NEXT: s_or_b32 s2, s5, s2 +; GFX950-GISEL-NEXT: s_cmp_lg_u32 s2, 0 +; GFX950-GISEL-NEXT: s_cselect_b32 s5, 1, 0 +; GFX950-GISEL-NEXT: s_sub_i32 s7, 1, s4 +; GFX950-GISEL-NEXT: s_lshl_b32 s6, s4, 12 +; GFX950-GISEL-NEXT: s_max_i32 s7, s7, 0 +; GFX950-GISEL-NEXT: s_or_b32 s6, s2, s6 +; GFX950-GISEL-NEXT: s_min_i32 s7, s7, 13 +; GFX950-GISEL-NEXT: s_bitset1_b32 s2, 12 +; GFX950-GISEL-NEXT: s_lshl_b32 s5, s5, 9 +; GFX950-GISEL-NEXT: s_lshr_b32 s8, s2, s7 +; GFX950-GISEL-NEXT: s_or_b32 s5, s5, 0x7c00 +; GFX950-GISEL-NEXT: s_lshl_b32 s7, s8, s7 +; GFX950-GISEL-NEXT: s_cmp_lg_u32 s7, s2 +; GFX950-GISEL-NEXT: s_cselect_b32 s2, 1, 0 +; GFX950-GISEL-NEXT: s_or_b32 s2, s8, s2 +; GFX950-GISEL-NEXT: s_cmp_lt_i32 s4, 1 +; GFX950-GISEL-NEXT: s_cselect_b32 s2, s2, s6 +; GFX950-GISEL-NEXT: s_and_b32 s6, s2, 7 +; GFX950-GISEL-NEXT: s_lshr_b32 s2, s2, 2 +; GFX950-GISEL-NEXT: s_cmp_eq_u32 s6, 3 +; GFX950-GISEL-NEXT: s_cselect_b32 s7, 1, 0 +; GFX950-GISEL-NEXT: s_cmp_gt_i32 s6, 5 +; GFX950-GISEL-NEXT: s_cselect_b32 s6, 1, 0 +; GFX950-GISEL-NEXT: s_or_b32 s6, s7, s6 +; GFX950-GISEL-NEXT: s_add_i32 s2, s2, s6 +; GFX950-GISEL-NEXT: s_cmp_gt_i32 s4, 30 +; GFX950-GISEL-NEXT: s_cselect_b32 s2, 0x7c00, s2 +; GFX950-GISEL-NEXT: s_cmpk_eq_i32 s4, 0x40f +; GFX950-GISEL-NEXT: s_cselect_b32 s2, s5, s2 +; GFX950-GISEL-NEXT: s_lshr_b32 s3, s3, 16 +; GFX950-GISEL-NEXT: s_and_b32 s3, s3, 0x8000 +; GFX950-GISEL-NEXT: s_or_b32 s2, s3, s2 +; GFX950-GISEL-NEXT: v_mov_b32_e32 v0, s2 ; GFX950-GISEL-NEXT: s_mov_b32 s2, -1 ; GFX950-GISEL-NEXT: s_mov_b32 s3, 0xf000 ; GFX950-GISEL-NEXT: buffer_store_short v0, off, s[0:3], 0 @@ -822,11 +982,54 @@ define amdgpu_kernel void @fptrunc_f64_to_f16( ; GFX11-GISEL-TRUE16-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-GISEL-TRUE16-NEXT: s_load_b64 s[2:3], s[2:3], 0x0 ; GFX11-GISEL-TRUE16-NEXT: s_waitcnt lgkmcnt(0) -; GFX11-GISEL-TRUE16-NEXT: v_cvt_f32_f64_e32 v0, s[2:3] -; GFX11-GISEL-TRUE16-NEXT: s_mov_b32 s2, -1 +; GFX11-GISEL-TRUE16-NEXT: s_and_b32 s6, s3, 0x1ff +; GFX11-GISEL-TRUE16-NEXT: s_bfe_u32 s4, s3, 0xb0014 +; GFX11-GISEL-TRUE16-NEXT: s_lshr_b32 s5, s3, 8 +; GFX11-GISEL-TRUE16-NEXT: s_or_b32 s2, s6, s2 +; GFX11-GISEL-TRUE16-NEXT: s_addk_i32 s4, 0xfc10 +; GFX11-GISEL-TRUE16-NEXT: s_and_b32 s5, s5, 0xffe +; GFX11-GISEL-TRUE16-NEXT: s_cmp_lg_u32 s2, 0 +; GFX11-GISEL-TRUE16-NEXT: s_cselect_b32 s2, 1, 0 +; GFX11-GISEL-TRUE16-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1) +; GFX11-GISEL-TRUE16-NEXT: s_or_b32 s2, s5, s2 +; GFX11-GISEL-TRUE16-NEXT: s_cmp_lg_u32 s2, 0 +; GFX11-GISEL-TRUE16-NEXT: s_cselect_b32 s5, 1, 0 +; GFX11-GISEL-TRUE16-NEXT: s_sub_i32 s6, 1, s4 +; GFX11-GISEL-TRUE16-NEXT: s_or_b32 s8, s2, 0x1000 +; GFX11-GISEL-TRUE16-NEXT: s_max_i32 s6, s6, 0 +; GFX11-GISEL-TRUE16-NEXT: s_lshl_b32 s7, s4, 12 +; GFX11-GISEL-TRUE16-NEXT: s_min_i32 s6, s6, 13 +; GFX11-GISEL-TRUE16-NEXT: s_lshl_b32 s5, s5, 9 +; GFX11-GISEL-TRUE16-NEXT: s_lshr_b32 s9, s8, s6 +; GFX11-GISEL-TRUE16-NEXT: s_or_b32 s2, s2, s7 +; GFX11-GISEL-TRUE16-NEXT: s_lshl_b32 s6, s9, s6 +; GFX11-GISEL-TRUE16-NEXT: s_or_b32 s5, s5, 0x7c00 +; GFX11-GISEL-TRUE16-NEXT: s_cmp_lg_u32 s6, s8 +; GFX11-GISEL-TRUE16-NEXT: s_cselect_b32 s6, 1, 0 +; GFX11-GISEL-TRUE16-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(SKIP_2) | instid1(SALU_CYCLE_1) +; GFX11-GISEL-TRUE16-NEXT: s_or_b32 s6, s9, s6 +; GFX11-GISEL-TRUE16-NEXT: s_cmp_lt_i32 s4, 1 +; GFX11-GISEL-TRUE16-NEXT: s_cselect_b32 s2, s6, s2 +; GFX11-GISEL-TRUE16-NEXT: s_and_b32 s6, s2, 7 +; GFX11-GISEL-TRUE16-NEXT: s_lshr_b32 s2, s2, 2 +; GFX11-GISEL-TRUE16-NEXT: s_cmp_eq_u32 s6, 3 +; GFX11-GISEL-TRUE16-NEXT: s_cselect_b32 s7, 1, 0 +; GFX11-GISEL-TRUE16-NEXT: s_cmp_gt_i32 s6, 5 +; GFX11-GISEL-TRUE16-NEXT: s_cselect_b32 s6, 1, 0 +; GFX11-GISEL-TRUE16-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1) +; GFX11-GISEL-TRUE16-NEXT: s_or_b32 s6, s7, s6 +; GFX11-GISEL-TRUE16-NEXT: s_add_i32 s2, s2, s6 +; GFX11-GISEL-TRUE16-NEXT: s_cmp_gt_i32 s4, 30 +; GFX11-GISEL-TRUE16-NEXT: s_cselect_b32 s2, 0x7c00, s2 +; GFX11-GISEL-TRUE16-NEXT: s_cmpk_eq_i32 s4, 0x40f +; GFX11-GISEL-TRUE16-NEXT: s_cselect_b32 s2, s5, s2 +; GFX11-GISEL-TRUE16-NEXT: s_lshr_b32 s3, s3, 16 +; GFX11-GISEL-TRUE16-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1) +; GFX11-GISEL-TRUE16-NEXT: s_and_b32 s3, s3, 0x8000 +; GFX11-GISEL-TRUE16-NEXT: s_or_b32 s2, s3, s2 ; GFX11-GISEL-TRUE16-NEXT: s_mov_b32 s3, 0x31016000 -; GFX11-GISEL-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX11-GISEL-TRUE16-NEXT: v_cvt_f16_f32_e32 v0.l, v0 +; GFX11-GISEL-TRUE16-NEXT: v_mov_b32_e32 v0, s2 +; GFX11-GISEL-TRUE16-NEXT: s_mov_b32 s2, -1 ; GFX11-GISEL-TRUE16-NEXT: buffer_store_b16 v0, off, s[0:3], 0 ; GFX11-GISEL-TRUE16-NEXT: s_endpgm ; @@ -836,11 +1039,54 @@ define amdgpu_kernel void @fptrunc_f64_to_f16( ; GFX11-GISEL-FAKE16-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-GISEL-FAKE16-NEXT: s_load_b64 s[2:3], s[2:3], 0x0 ; GFX11-GISEL-FAKE16-NEXT: s_waitcnt lgkmcnt(0) -; GFX11-GISEL-FAKE16-NEXT: v_cvt_f32_f64_e32 v0, s[2:3] -; GFX11-GISEL-FAKE16-NEXT: s_mov_b32 s2, -1 +; GFX11-GISEL-FAKE16-NEXT: s_and_b32 s6, s3, 0x1ff +; GFX11-GISEL-FAKE16-NEXT: s_bfe_u32 s4, s3, 0xb0014 +; GFX11-GISEL-FAKE16-NEXT: s_lshr_b32 s5, s3, 8 +; GFX11-GISEL-FAKE16-NEXT: s_or_b32 s2, s6, s2 +; GFX11-GISEL-FAKE16-NEXT: s_addk_i32 s4, 0xfc10 +; GFX11-GISEL-FAKE16-NEXT: s_and_b32 s5, s5, 0xffe +; GFX11-GISEL-FAKE16-NEXT: s_cmp_lg_u32 s2, 0 +; GFX11-GISEL-FAKE16-NEXT: s_cselect_b32 s2, 1, 0 +; GFX11-GISEL-FAKE16-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1) +; GFX11-GISEL-FAKE16-NEXT: s_or_b32 s2, s5, s2 +; GFX11-GISEL-FAKE16-NEXT: s_cmp_lg_u32 s2, 0 +; GFX11-GISEL-FAKE16-NEXT: s_cselect_b32 s5, 1, 0 +; GFX11-GISEL-FAKE16-NEXT: s_sub_i32 s6, 1, s4 +; GFX11-GISEL-FAKE16-NEXT: s_or_b32 s8, s2, 0x1000 +; GFX11-GISEL-FAKE16-NEXT: s_max_i32 s6, s6, 0 +; GFX11-GISEL-FAKE16-NEXT: s_lshl_b32 s7, s4, 12 +; GFX11-GISEL-FAKE16-NEXT: s_min_i32 s6, s6, 13 +; GFX11-GISEL-FAKE16-NEXT: s_lshl_b32 s5, s5, 9 +; GFX11-GISEL-FAKE16-NEXT: s_lshr_b32 s9, s8, s6 +; GFX11-GISEL-FAKE16-NEXT: s_or_b32 s2, s2, s7 +; GFX11-GISEL-FAKE16-NEXT: s_lshl_b32 s6, s9, s6 +; GFX11-GISEL-FAKE16-NEXT: s_or_b32 s5, s5, 0x7c00 +; GFX11-GISEL-FAKE16-NEXT: s_cmp_lg_u32 s6, s8 +; GFX11-GISEL-FAKE16-NEXT: s_cselect_b32 s6, 1, 0 +; GFX11-GISEL-FAKE16-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(SKIP_2) | instid1(SALU_CYCLE_1) +; GFX11-GISEL-FAKE16-NEXT: s_or_b32 s6, s9, s6 +; GFX11-GISEL-FAKE16-NEXT: s_cmp_lt_i32 s4, 1 +; GFX11-GISEL-FAKE16-NEXT: s_cselect_b32 s2, s6, s2 +; GFX11-GISEL-FAKE16-NEXT: s_and_b32 s6, s2, 7 +; GFX11-GISEL-FAKE16-NEXT: s_lshr_b32 s2, s2, 2 +; GFX11-GISEL-FAKE16-NEXT: s_cmp_eq_u32 s6, 3 +; GFX11-GISEL-FAKE16-NEXT: s_cselect_b32 s7, 1, 0 +; GFX11-GISEL-FAKE16-NEXT: s_cmp_gt_i32 s6, 5 +; GFX11-GISEL-FAKE16-NEXT: s_cselect_b32 s6, 1, 0 +; GFX11-GISEL-FAKE16-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1) +; GFX11-GISEL-FAKE16-NEXT: s_or_b32 s6, s7, s6 +; GFX11-GISEL-FAKE16-NEXT: s_add_i32 s2, s2, s6 +; GFX11-GISEL-FAKE16-NEXT: s_cmp_gt_i32 s4, 30 +; GFX11-GISEL-FAKE16-NEXT: s_cselect_b32 s2, 0x7c00, s2 +; GFX11-GISEL-FAKE16-NEXT: s_cmpk_eq_i32 s4, 0x40f +; GFX11-GISEL-FAKE16-NEXT: s_cselect_b32 s2, s5, s2 +; GFX11-GISEL-FAKE16-NEXT: s_lshr_b32 s3, s3, 16 +; GFX11-GISEL-FAKE16-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1) +; GFX11-GISEL-FAKE16-NEXT: s_and_b32 s3, s3, 0x8000 +; GFX11-GISEL-FAKE16-NEXT: s_or_b32 s2, s3, s2 ; GFX11-GISEL-FAKE16-NEXT: s_mov_b32 s3, 0x31016000 -; GFX11-GISEL-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX11-GISEL-FAKE16-NEXT: v_cvt_f16_f32_e32 v0, v0 +; GFX11-GISEL-FAKE16-NEXT: v_mov_b32_e32 v0, s2 +; GFX11-GISEL-FAKE16-NEXT: s_mov_b32 s2, -1 ; GFX11-GISEL-FAKE16-NEXT: buffer_store_b16 v0, off, s[0:3], 0 ; GFX11-GISEL-FAKE16-NEXT: s_endpgm ptr addrspace(1) %r, @@ -1644,13 +1890,94 @@ define amdgpu_kernel void @fptrunc_v2f64_to_v2f16( ; SI-GISEL-NEXT: s_load_dwordx4 s[4:7], s[2:3], 0x0 ; SI-GISEL-NEXT: s_mov_b32 s2, -1 ; SI-GISEL-NEXT: s_waitcnt lgkmcnt(0) -; SI-GISEL-NEXT: v_cvt_f32_f64_e32 v0, s[4:5] -; SI-GISEL-NEXT: v_cvt_f32_f64_e32 v1, s[6:7] -; SI-GISEL-NEXT: v_cvt_f16_f32_e32 v0, v0 -; SI-GISEL-NEXT: v_cvt_f16_f32_e32 v1, v1 -; SI-GISEL-NEXT: v_lshlrev_b32_e32 v1, 16, v1 -; SI-GISEL-NEXT: v_or_b32_e32 v0, v0, v1 +; SI-GISEL-NEXT: s_bfe_u32 s3, s5, 0xb0014 +; SI-GISEL-NEXT: s_lshr_b32 s8, s5, 8 +; SI-GISEL-NEXT: s_and_b32 s9, s5, 0x1ff +; SI-GISEL-NEXT: s_addk_i32 s3, 0xfc10 +; SI-GISEL-NEXT: s_and_b32 s8, s8, 0xffe +; SI-GISEL-NEXT: s_or_b32 s4, s9, s4 +; SI-GISEL-NEXT: s_cmp_lg_u32 s4, 0 +; SI-GISEL-NEXT: s_cselect_b32 s4, 1, 0 +; SI-GISEL-NEXT: s_or_b32 s4, s8, s4 +; SI-GISEL-NEXT: s_cmp_lg_u32 s4, 0 +; SI-GISEL-NEXT: s_cselect_b32 s8, 1, 0 +; SI-GISEL-NEXT: s_lshl_b32 s8, s8, 9 +; SI-GISEL-NEXT: s_lshl_b32 s9, s3, 12 +; SI-GISEL-NEXT: s_sub_i32 s10, 1, s3 +; SI-GISEL-NEXT: s_or_b32 s11, s4, 0x1000 +; SI-GISEL-NEXT: s_or_b32 s8, s8, 0x7c00 +; SI-GISEL-NEXT: s_or_b32 s4, s4, s9 +; SI-GISEL-NEXT: s_max_i32 s9, s10, 0 +; SI-GISEL-NEXT: s_min_i32 s9, s9, 13 +; SI-GISEL-NEXT: s_lshr_b32 s10, s11, s9 +; SI-GISEL-NEXT: s_lshl_b32 s9, s10, s9 +; SI-GISEL-NEXT: s_cmp_lg_u32 s9, s11 +; SI-GISEL-NEXT: s_cselect_b32 s9, 1, 0 +; SI-GISEL-NEXT: s_or_b32 s9, s10, s9 +; SI-GISEL-NEXT: s_cmp_lt_i32 s3, 1 +; SI-GISEL-NEXT: s_cselect_b32 s4, s9, s4 +; SI-GISEL-NEXT: s_and_b32 s9, s4, 7 +; SI-GISEL-NEXT: s_lshr_b32 s4, s4, 2 +; SI-GISEL-NEXT: s_cmp_eq_u32 s9, 3 +; SI-GISEL-NEXT: s_cselect_b32 s10, 1, 0 +; SI-GISEL-NEXT: s_cmp_gt_i32 s9, 5 +; SI-GISEL-NEXT: s_cselect_b32 s9, 1, 0 +; SI-GISEL-NEXT: s_or_b32 s9, s10, s9 +; SI-GISEL-NEXT: s_add_i32 s4, s4, s9 +; SI-GISEL-NEXT: s_cmp_gt_i32 s3, 30 +; SI-GISEL-NEXT: s_cselect_b32 s4, 0x7c00, s4 +; SI-GISEL-NEXT: s_cmpk_eq_i32 s3, 0x40f +; SI-GISEL-NEXT: s_cselect_b32 s3, s8, s4 +; SI-GISEL-NEXT: s_lshr_b32 s4, s5, 16 +; SI-GISEL-NEXT: s_bfe_u32 s5, s7, 0xb0014 +; SI-GISEL-NEXT: s_lshr_b32 s8, s7, 8 +; SI-GISEL-NEXT: s_and_b32 s9, s7, 0x1ff +; SI-GISEL-NEXT: s_and_b32 s4, s4, 0x8000 +; SI-GISEL-NEXT: s_addk_i32 s5, 0xfc10 +; SI-GISEL-NEXT: s_and_b32 s8, s8, 0xffe +; SI-GISEL-NEXT: s_or_b32 s6, s9, s6 +; SI-GISEL-NEXT: s_or_b32 s3, s4, s3 +; SI-GISEL-NEXT: s_cmp_lg_u32 s6, 0 +; SI-GISEL-NEXT: s_cselect_b32 s4, 1, 0 +; SI-GISEL-NEXT: s_or_b32 s4, s8, s4 +; SI-GISEL-NEXT: s_cmp_lg_u32 s4, 0 +; SI-GISEL-NEXT: s_cselect_b32 s6, 1, 0 +; SI-GISEL-NEXT: s_lshl_b32 s6, s6, 9 +; SI-GISEL-NEXT: s_lshl_b32 s8, s5, 12 +; SI-GISEL-NEXT: s_sub_i32 s9, 1, s5 +; SI-GISEL-NEXT: s_or_b32 s10, s4, 0x1000 +; SI-GISEL-NEXT: s_or_b32 s6, s6, 0x7c00 +; SI-GISEL-NEXT: s_or_b32 s4, s4, s8 +; SI-GISEL-NEXT: s_max_i32 s8, s9, 0 +; SI-GISEL-NEXT: s_min_i32 s8, s8, 13 +; SI-GISEL-NEXT: s_lshr_b32 s9, s10, s8 +; SI-GISEL-NEXT: s_lshl_b32 s8, s9, s8 +; SI-GISEL-NEXT: s_cmp_lg_u32 s8, s10 +; SI-GISEL-NEXT: s_cselect_b32 s8, 1, 0 +; SI-GISEL-NEXT: s_or_b32 s8, s9, s8 +; SI-GISEL-NEXT: s_cmp_lt_i32 s5, 1 +; SI-GISEL-NEXT: s_cselect_b32 s4, s8, s4 +; SI-GISEL-NEXT: s_and_b32 s8, s4, 7 +; SI-GISEL-NEXT: s_lshr_b32 s4, s4, 2 +; SI-GISEL-NEXT: s_cmp_eq_u32 s8, 3 +; SI-GISEL-NEXT: s_cselect_b32 s9, 1, 0 +; SI-GISEL-NEXT: s_cmp_gt_i32 s8, 5 +; SI-GISEL-NEXT: s_cselect_b32 s8, 1, 0 +; SI-GISEL-NEXT: s_or_b32 s8, s9, s8 +; SI-GISEL-NEXT: s_add_i32 s4, s4, s8 +; SI-GISEL-NEXT: s_cmp_gt_i32 s5, 30 +; SI-GISEL-NEXT: s_cselect_b32 s4, 0x7c00, s4 +; SI-GISEL-NEXT: s_cmpk_eq_i32 s5, 0x40f +; SI-GISEL-NEXT: s_cselect_b32 s4, s6, s4 +; SI-GISEL-NEXT: s_lshr_b32 s5, s7, 16 +; SI-GISEL-NEXT: s_and_b32 s3, s3, 0xffff +; SI-GISEL-NEXT: s_and_b32 s5, s5, 0x8000 +; SI-GISEL-NEXT: s_or_b32 s4, s5, s4 +; SI-GISEL-NEXT: s_and_b32 s4, s4, 0xffff +; SI-GISEL-NEXT: s_lshl_b32 s4, s4, 16 +; SI-GISEL-NEXT: s_or_b32 s4, s3, s4 ; SI-GISEL-NEXT: s_mov_b32 s3, 0xf000 +; SI-GISEL-NEXT: v_mov_b32_e32 v0, s4 ; SI-GISEL-NEXT: buffer_store_dword v0, off, s[0:3], 0 ; SI-GISEL-NEXT: s_endpgm ; @@ -1763,14 +2090,96 @@ define amdgpu_kernel void @fptrunc_v2f64_to_v2f16( ; VI-GISEL-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 ; VI-GISEL-NEXT: s_waitcnt lgkmcnt(0) ; VI-GISEL-NEXT: s_load_dwordx4 s[4:7], s[2:3], 0x0 +; VI-GISEL-NEXT: s_waitcnt lgkmcnt(0) +; VI-GISEL-NEXT: s_bfe_u32 s2, s5, 0xb0014 +; VI-GISEL-NEXT: s_lshr_b32 s3, s5, 8 +; VI-GISEL-NEXT: s_and_b32 s8, s5, 0x1ff +; VI-GISEL-NEXT: s_addk_i32 s2, 0xfc10 +; VI-GISEL-NEXT: s_and_b32 s3, s3, 0xffe +; VI-GISEL-NEXT: s_or_b32 s4, s8, s4 +; VI-GISEL-NEXT: s_cmp_lg_u32 s4, 0 +; VI-GISEL-NEXT: s_cselect_b32 s4, 1, 0 +; VI-GISEL-NEXT: s_or_b32 s3, s3, s4 +; VI-GISEL-NEXT: s_cmp_lg_u32 s3, 0 +; VI-GISEL-NEXT: s_cselect_b32 s4, 1, 0 +; VI-GISEL-NEXT: s_sub_i32 s9, 1, s2 +; VI-GISEL-NEXT: s_lshl_b32 s8, s2, 12 +; VI-GISEL-NEXT: s_max_i32 s9, s9, 0 +; VI-GISEL-NEXT: s_or_b32 s8, s3, s8 +; VI-GISEL-NEXT: s_min_i32 s9, s9, 13 +; VI-GISEL-NEXT: s_bitset1_b32 s3, 12 +; VI-GISEL-NEXT: s_lshl_b32 s4, s4, 9 +; VI-GISEL-NEXT: s_lshr_b32 s10, s3, s9 +; VI-GISEL-NEXT: s_or_b32 s4, s4, 0x7c00 +; VI-GISEL-NEXT: s_lshl_b32 s9, s10, s9 +; VI-GISEL-NEXT: s_cmp_lg_u32 s9, s3 +; VI-GISEL-NEXT: s_cselect_b32 s3, 1, 0 +; VI-GISEL-NEXT: s_or_b32 s3, s10, s3 +; VI-GISEL-NEXT: s_cmp_lt_i32 s2, 1 +; VI-GISEL-NEXT: s_cselect_b32 s3, s3, s8 +; VI-GISEL-NEXT: s_and_b32 s8, s3, 7 +; VI-GISEL-NEXT: s_lshr_b32 s3, s3, 2 +; VI-GISEL-NEXT: s_cmp_eq_u32 s8, 3 +; VI-GISEL-NEXT: s_cselect_b32 s9, 1, 0 +; VI-GISEL-NEXT: s_cmp_gt_i32 s8, 5 +; VI-GISEL-NEXT: s_cselect_b32 s8, 1, 0 +; VI-GISEL-NEXT: s_or_b32 s8, s9, s8 +; VI-GISEL-NEXT: s_add_i32 s3, s3, s8 +; VI-GISEL-NEXT: s_cmp_gt_i32 s2, 30 +; VI-GISEL-NEXT: s_cselect_b32 s3, 0x7c00, s3 +; VI-GISEL-NEXT: s_cmpk_eq_i32 s2, 0x40f +; VI-GISEL-NEXT: s_cselect_b32 s2, s4, s3 +; VI-GISEL-NEXT: s_lshr_b32 s3, s5, 16 +; VI-GISEL-NEXT: s_and_b32 s3, s3, 0x8000 +; VI-GISEL-NEXT: s_or_b32 s2, s3, s2 +; VI-GISEL-NEXT: s_bfe_u32 s3, s7, 0xb0014 +; VI-GISEL-NEXT: s_lshr_b32 s4, s7, 8 +; VI-GISEL-NEXT: s_and_b32 s5, s7, 0x1ff +; VI-GISEL-NEXT: s_addk_i32 s3, 0xfc10 +; VI-GISEL-NEXT: s_and_b32 s4, s4, 0xffe +; VI-GISEL-NEXT: s_or_b32 s5, s5, s6 +; VI-GISEL-NEXT: s_cmp_lg_u32 s5, 0 +; VI-GISEL-NEXT: s_cselect_b32 s5, 1, 0 +; VI-GISEL-NEXT: s_or_b32 s4, s4, s5 +; VI-GISEL-NEXT: s_cmp_lg_u32 s4, 0 +; VI-GISEL-NEXT: s_cselect_b32 s5, 1, 0 +; VI-GISEL-NEXT: s_sub_i32 s8, 1, s3 +; VI-GISEL-NEXT: s_lshl_b32 s6, s3, 12 +; VI-GISEL-NEXT: s_max_i32 s8, s8, 0 +; VI-GISEL-NEXT: s_or_b32 s6, s4, s6 +; VI-GISEL-NEXT: s_min_i32 s8, s8, 13 +; VI-GISEL-NEXT: s_bitset1_b32 s4, 12 +; VI-GISEL-NEXT: s_lshl_b32 s5, s5, 9 +; VI-GISEL-NEXT: s_lshr_b32 s9, s4, s8 +; VI-GISEL-NEXT: s_or_b32 s5, s5, 0x7c00 +; VI-GISEL-NEXT: s_lshl_b32 s8, s9, s8 +; VI-GISEL-NEXT: s_cmp_lg_u32 s8, s4 +; VI-GISEL-NEXT: s_cselect_b32 s4, 1, 0 +; VI-GISEL-NEXT: s_or_b32 s4, s9, s4 +; VI-GISEL-NEXT: s_cmp_lt_i32 s3, 1 +; VI-GISEL-NEXT: s_cselect_b32 s4, s4, s6 +; VI-GISEL-NEXT: s_and_b32 s6, s4, 7 +; VI-GISEL-NEXT: s_lshr_b32 s4, s4, 2 +; VI-GISEL-NEXT: s_cmp_eq_u32 s6, 3 +; VI-GISEL-NEXT: s_cselect_b32 s8, 1, 0 +; VI-GISEL-NEXT: s_cmp_gt_i32 s6, 5 +; VI-GISEL-NEXT: s_cselect_b32 s6, 1, 0 +; VI-GISEL-NEXT: s_or_b32 s6, s8, s6 +; VI-GISEL-NEXT: s_add_i32 s4, s4, s6 +; VI-GISEL-NEXT: s_cmp_gt_i32 s3, 30 +; VI-GISEL-NEXT: s_cselect_b32 s4, 0x7c00, s4 +; VI-GISEL-NEXT: s_cmpk_eq_i32 s3, 0x40f +; VI-GISEL-NEXT: s_cselect_b32 s3, s5, s4 +; VI-GISEL-NEXT: s_lshr_b32 s4, s7, 16 +; VI-GISEL-NEXT: s_and_b32 s4, s4, 0x8000 +; VI-GISEL-NEXT: s_or_b32 s3, s4, s3 +; VI-GISEL-NEXT: s_and_b32 s3, s3, 0xffff +; VI-GISEL-NEXT: s_and_b32 s2, s2, 0xffff +; VI-GISEL-NEXT: s_lshl_b32 s3, s3, 16 +; VI-GISEL-NEXT: s_or_b32 s2, s2, s3 +; VI-GISEL-NEXT: v_mov_b32_e32 v0, s2 ; VI-GISEL-NEXT: s_mov_b32 s2, -1 ; VI-GISEL-NEXT: s_mov_b32 s3, 0xf000 -; VI-GISEL-NEXT: s_waitcnt lgkmcnt(0) -; VI-GISEL-NEXT: v_cvt_f32_f64_e32 v0, s[4:5] -; VI-GISEL-NEXT: v_cvt_f32_f64_e32 v1, s[6:7] -; VI-GISEL-NEXT: v_cvt_f16_f32_e32 v0, v0 -; VI-GISEL-NEXT: v_cvt_f16_f32_sdwa v1, v1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD -; VI-GISEL-NEXT: v_or_b32_e32 v0, v0, v1 ; VI-GISEL-NEXT: buffer_store_dword v0, off, s[0:3], 0 ; VI-GISEL-NEXT: s_endpgm ; @@ -1881,14 +2290,93 @@ define amdgpu_kernel void @fptrunc_v2f64_to_v2f16( ; GFX9-GISEL-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 ; GFX9-GISEL-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-GISEL-NEXT: s_load_dwordx4 s[4:7], s[2:3], 0x0 +; GFX9-GISEL-NEXT: s_waitcnt lgkmcnt(0) +; GFX9-GISEL-NEXT: s_bfe_u32 s2, s5, 0xb0014 +; GFX9-GISEL-NEXT: s_lshr_b32 s3, s5, 8 +; GFX9-GISEL-NEXT: s_and_b32 s8, s5, 0x1ff +; GFX9-GISEL-NEXT: s_addk_i32 s2, 0xfc10 +; GFX9-GISEL-NEXT: s_and_b32 s3, s3, 0xffe +; GFX9-GISEL-NEXT: s_or_b32 s4, s8, s4 +; GFX9-GISEL-NEXT: s_cmp_lg_u32 s4, 0 +; GFX9-GISEL-NEXT: s_cselect_b32 s4, 1, 0 +; GFX9-GISEL-NEXT: s_or_b32 s3, s3, s4 +; GFX9-GISEL-NEXT: s_cmp_lg_u32 s3, 0 +; GFX9-GISEL-NEXT: s_cselect_b32 s4, 1, 0 +; GFX9-GISEL-NEXT: s_sub_i32 s9, 1, s2 +; GFX9-GISEL-NEXT: s_lshl_b32 s8, s2, 12 +; GFX9-GISEL-NEXT: s_max_i32 s9, s9, 0 +; GFX9-GISEL-NEXT: s_or_b32 s8, s3, s8 +; GFX9-GISEL-NEXT: s_min_i32 s9, s9, 13 +; GFX9-GISEL-NEXT: s_bitset1_b32 s3, 12 +; GFX9-GISEL-NEXT: s_lshl_b32 s4, s4, 9 +; GFX9-GISEL-NEXT: s_lshr_b32 s10, s3, s9 +; GFX9-GISEL-NEXT: s_or_b32 s4, s4, 0x7c00 +; GFX9-GISEL-NEXT: s_lshl_b32 s9, s10, s9 +; GFX9-GISEL-NEXT: s_cmp_lg_u32 s9, s3 +; GFX9-GISEL-NEXT: s_cselect_b32 s3, 1, 0 +; GFX9-GISEL-NEXT: s_or_b32 s3, s10, s3 +; GFX9-GISEL-NEXT: s_cmp_lt_i32 s2, 1 +; GFX9-GISEL-NEXT: s_cselect_b32 s3, s3, s8 +; GFX9-GISEL-NEXT: s_and_b32 s8, s3, 7 +; GFX9-GISEL-NEXT: s_lshr_b32 s3, s3, 2 +; GFX9-GISEL-NEXT: s_cmp_eq_u32 s8, 3 +; GFX9-GISEL-NEXT: s_cselect_b32 s9, 1, 0 +; GFX9-GISEL-NEXT: s_cmp_gt_i32 s8, 5 +; GFX9-GISEL-NEXT: s_cselect_b32 s8, 1, 0 +; GFX9-GISEL-NEXT: s_or_b32 s8, s9, s8 +; GFX9-GISEL-NEXT: s_add_i32 s3, s3, s8 +; GFX9-GISEL-NEXT: s_cmp_gt_i32 s2, 30 +; GFX9-GISEL-NEXT: s_cselect_b32 s3, 0x7c00, s3 +; GFX9-GISEL-NEXT: s_cmpk_eq_i32 s2, 0x40f +; GFX9-GISEL-NEXT: s_cselect_b32 s2, s4, s3 +; GFX9-GISEL-NEXT: s_lshr_b32 s3, s5, 16 +; GFX9-GISEL-NEXT: s_and_b32 s3, s3, 0x8000 +; GFX9-GISEL-NEXT: s_or_b32 s2, s3, s2 +; GFX9-GISEL-NEXT: s_bfe_u32 s3, s7, 0xb0014 +; GFX9-GISEL-NEXT: s_lshr_b32 s4, s7, 8 +; GFX9-GISEL-NEXT: s_and_b32 s5, s7, 0x1ff +; GFX9-GISEL-NEXT: s_addk_i32 s3, 0xfc10 +; GFX9-GISEL-NEXT: s_and_b32 s4, s4, 0xffe +; GFX9-GISEL-NEXT: s_or_b32 s5, s5, s6 +; GFX9-GISEL-NEXT: s_cmp_lg_u32 s5, 0 +; GFX9-GISEL-NEXT: s_cselect_b32 s5, 1, 0 +; GFX9-GISEL-NEXT: s_or_b32 s4, s4, s5 +; GFX9-GISEL-NEXT: s_cmp_lg_u32 s4, 0 +; GFX9-GISEL-NEXT: s_cselect_b32 s5, 1, 0 +; GFX9-GISEL-NEXT: s_sub_i32 s8, 1, s3 +; GFX9-GISEL-NEXT: s_lshl_b32 s6, s3, 12 +; GFX9-GISEL-NEXT: s_max_i32 s8, s8, 0 +; GFX9-GISEL-NEXT: s_or_b32 s6, s4, s6 +; GFX9-GISEL-NEXT: s_min_i32 s8, s8, 13 +; GFX9-GISEL-NEXT: s_bitset1_b32 s4, 12 +; GFX9-GISEL-NEXT: s_lshl_b32 s5, s5, 9 +; GFX9-GISEL-NEXT: s_lshr_b32 s9, s4, s8 +; GFX9-GISEL-NEXT: s_or_b32 s5, s5, 0x7c00 +; GFX9-GISEL-NEXT: s_lshl_b32 s8, s9, s8 +; GFX9-GISEL-NEXT: s_cmp_lg_u32 s8, s4 +; GFX9-GISEL-NEXT: s_cselect_b32 s4, 1, 0 +; GFX9-GISEL-NEXT: s_or_b32 s4, s9, s4 +; GFX9-GISEL-NEXT: s_cmp_lt_i32 s3, 1 +; GFX9-GISEL-NEXT: s_cselect_b32 s4, s4, s6 +; GFX9-GISEL-NEXT: s_and_b32 s6, s4, 7 +; GFX9-GISEL-NEXT: s_lshr_b32 s4, s4, 2 +; GFX9-GISEL-NEXT: s_cmp_eq_u32 s6, 3 +; GFX9-GISEL-NEXT: s_cselect_b32 s8, 1, 0 +; GFX9-GISEL-NEXT: s_cmp_gt_i32 s6, 5 +; GFX9-GISEL-NEXT: s_cselect_b32 s6, 1, 0 +; GFX9-GISEL-NEXT: s_or_b32 s6, s8, s6 +; GFX9-GISEL-NEXT: s_add_i32 s4, s4, s6 +; GFX9-GISEL-NEXT: s_cmp_gt_i32 s3, 30 +; GFX9-GISEL-NEXT: s_cselect_b32 s4, 0x7c00, s4 +; GFX9-GISEL-NEXT: s_cmpk_eq_i32 s3, 0x40f +; GFX9-GISEL-NEXT: s_cselect_b32 s3, s5, s4 +; GFX9-GISEL-NEXT: s_lshr_b32 s4, s7, 16 +; GFX9-GISEL-NEXT: s_and_b32 s4, s4, 0x8000 +; GFX9-GISEL-NEXT: s_or_b32 s3, s4, s3 +; GFX9-GISEL-NEXT: s_pack_ll_b32_b16 s2, s2, s3 +; GFX9-GISEL-NEXT: v_mov_b32_e32 v0, s2 ; GFX9-GISEL-NEXT: s_mov_b32 s2, -1 ; GFX9-GISEL-NEXT: s_mov_b32 s3, 0xf000 -; GFX9-GISEL-NEXT: s_waitcnt lgkmcnt(0) -; GFX9-GISEL-NEXT: v_cvt_f32_f64_e32 v0, s[4:5] -; GFX9-GISEL-NEXT: v_cvt_f32_f64_e32 v1, s[6:7] -; GFX9-GISEL-NEXT: v_cvt_f16_f32_e32 v0, v0 -; GFX9-GISEL-NEXT: v_cvt_f16_f32_e32 v1, v1 -; GFX9-GISEL-NEXT: v_pack_b32_f16 v0, v0, v1 ; GFX9-GISEL-NEXT: buffer_store_dword v0, off, s[0:3], 0 ; GFX9-GISEL-NEXT: s_endpgm ; @@ -1999,14 +2487,93 @@ define amdgpu_kernel void @fptrunc_v2f64_to_v2f16( ; GFX950-GISEL-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 ; GFX950-GISEL-NEXT: s_waitcnt lgkmcnt(0) ; GFX950-GISEL-NEXT: s_load_dwordx4 s[4:7], s[2:3], 0x0 +; GFX950-GISEL-NEXT: s_waitcnt lgkmcnt(0) +; GFX950-GISEL-NEXT: s_bfe_u32 s2, s5, 0xb0014 +; GFX950-GISEL-NEXT: s_lshr_b32 s3, s5, 8 +; GFX950-GISEL-NEXT: s_and_b32 s8, s5, 0x1ff +; GFX950-GISEL-NEXT: s_addk_i32 s2, 0xfc10 +; GFX950-GISEL-NEXT: s_and_b32 s3, s3, 0xffe +; GFX950-GISEL-NEXT: s_or_b32 s4, s8, s4 +; GFX950-GISEL-NEXT: s_cmp_lg_u32 s4, 0 +; GFX950-GISEL-NEXT: s_cselect_b32 s4, 1, 0 +; GFX950-GISEL-NEXT: s_or_b32 s3, s3, s4 +; GFX950-GISEL-NEXT: s_cmp_lg_u32 s3, 0 +; GFX950-GISEL-NEXT: s_cselect_b32 s4, 1, 0 +; GFX950-GISEL-NEXT: s_sub_i32 s9, 1, s2 +; GFX950-GISEL-NEXT: s_lshl_b32 s8, s2, 12 +; GFX950-GISEL-NEXT: s_max_i32 s9, s9, 0 +; GFX950-GISEL-NEXT: s_or_b32 s8, s3, s8 +; GFX950-GISEL-NEXT: s_min_i32 s9, s9, 13 +; GFX950-GISEL-NEXT: s_bitset1_b32 s3, 12 +; GFX950-GISEL-NEXT: s_lshl_b32 s4, s4, 9 +; GFX950-GISEL-NEXT: s_lshr_b32 s10, s3, s9 +; GFX950-GISEL-NEXT: s_or_b32 s4, s4, 0x7c00 +; GFX950-GISEL-NEXT: s_lshl_b32 s9, s10, s9 +; GFX950-GISEL-NEXT: s_cmp_lg_u32 s9, s3 +; GFX950-GISEL-NEXT: s_cselect_b32 s3, 1, 0 +; GFX950-GISEL-NEXT: s_or_b32 s3, s10, s3 +; GFX950-GISEL-NEXT: s_cmp_lt_i32 s2, 1 +; GFX950-GISEL-NEXT: s_cselect_b32 s3, s3, s8 +; GFX950-GISEL-NEXT: s_and_b32 s8, s3, 7 +; GFX950-GISEL-NEXT: s_lshr_b32 s3, s3, 2 +; GFX950-GISEL-NEXT: s_cmp_eq_u32 s8, 3 +; GFX950-GISEL-NEXT: s_cselect_b32 s9, 1, 0 +; GFX950-GISEL-NEXT: s_cmp_gt_i32 s8, 5 +; GFX950-GISEL-NEXT: s_cselect_b32 s8, 1, 0 +; GFX950-GISEL-NEXT: s_or_b32 s8, s9, s8 +; GFX950-GISEL-NEXT: s_add_i32 s3, s3, s8 +; GFX950-GISEL-NEXT: s_cmp_gt_i32 s2, 30 +; GFX950-GISEL-NEXT: s_cselect_b32 s3, 0x7c00, s3 +; GFX950-GISEL-NEXT: s_cmpk_eq_i32 s2, 0x40f +; GFX950-GISEL-NEXT: s_cselect_b32 s2, s4, s3 +; GFX950-GISEL-NEXT: s_lshr_b32 s3, s5, 16 +; GFX950-GISEL-NEXT: s_and_b32 s3, s3, 0x8000 +; GFX950-GISEL-NEXT: s_or_b32 s2, s3, s2 +; GFX950-GISEL-NEXT: s_bfe_u32 s3, s7, 0xb0014 +; GFX950-GISEL-NEXT: s_lshr_b32 s4, s7, 8 +; GFX950-GISEL-NEXT: s_and_b32 s5, s7, 0x1ff +; GFX950-GISEL-NEXT: s_addk_i32 s3, 0xfc10 +; GFX950-GISEL-NEXT: s_and_b32 s4, s4, 0xffe +; GFX950-GISEL-NEXT: s_or_b32 s5, s5, s6 +; GFX950-GISEL-NEXT: s_cmp_lg_u32 s5, 0 +; GFX950-GISEL-NEXT: s_cselect_b32 s5, 1, 0 +; GFX950-GISEL-NEXT: s_or_b32 s4, s4, s5 +; GFX950-GISEL-NEXT: s_cmp_lg_u32 s4, 0 +; GFX950-GISEL-NEXT: s_cselect_b32 s5, 1, 0 +; GFX950-GISEL-NEXT: s_sub_i32 s8, 1, s3 +; GFX950-GISEL-NEXT: s_lshl_b32 s6, s3, 12 +; GFX950-GISEL-NEXT: s_max_i32 s8, s8, 0 +; GFX950-GISEL-NEXT: s_or_b32 s6, s4, s6 +; GFX950-GISEL-NEXT: s_min_i32 s8, s8, 13 +; GFX950-GISEL-NEXT: s_bitset1_b32 s4, 12 +; GFX950-GISEL-NEXT: s_lshl_b32 s5, s5, 9 +; GFX950-GISEL-NEXT: s_lshr_b32 s9, s4, s8 +; GFX950-GISEL-NEXT: s_or_b32 s5, s5, 0x7c00 +; GFX950-GISEL-NEXT: s_lshl_b32 s8, s9, s8 +; GFX950-GISEL-NEXT: s_cmp_lg_u32 s8, s4 +; GFX950-GISEL-NEXT: s_cselect_b32 s4, 1, 0 +; GFX950-GISEL-NEXT: s_or_b32 s4, s9, s4 +; GFX950-GISEL-NEXT: s_cmp_lt_i32 s3, 1 +; GFX950-GISEL-NEXT: s_cselect_b32 s4, s4, s6 +; GFX950-GISEL-NEXT: s_and_b32 s6, s4, 7 +; GFX950-GISEL-NEXT: s_lshr_b32 s4, s4, 2 +; GFX950-GISEL-NEXT: s_cmp_eq_u32 s6, 3 +; GFX950-GISEL-NEXT: s_cselect_b32 s8, 1, 0 +; GFX950-GISEL-NEXT: s_cmp_gt_i32 s6, 5 +; GFX950-GISEL-NEXT: s_cselect_b32 s6, 1, 0 +; GFX950-GISEL-NEXT: s_or_b32 s6, s8, s6 +; GFX950-GISEL-NEXT: s_add_i32 s4, s4, s6 +; GFX950-GISEL-NEXT: s_cmp_gt_i32 s3, 30 +; GFX950-GISEL-NEXT: s_cselect_b32 s4, 0x7c00, s4 +; GFX950-GISEL-NEXT: s_cmpk_eq_i32 s3, 0x40f +; GFX950-GISEL-NEXT: s_cselect_b32 s3, s5, s4 +; GFX950-GISEL-NEXT: s_lshr_b32 s4, s7, 16 +; GFX950-GISEL-NEXT: s_and_b32 s4, s4, 0x8000 +; GFX950-GISEL-NEXT: s_or_b32 s3, s4, s3 +; GFX950-GISEL-NEXT: s_pack_ll_b32_b16 s2, s2, s3 +; GFX950-GISEL-NEXT: v_mov_b32_e32 v0, s2 ; GFX950-GISEL-NEXT: s_mov_b32 s2, -1 ; GFX950-GISEL-NEXT: s_mov_b32 s3, 0xf000 -; GFX950-GISEL-NEXT: s_waitcnt lgkmcnt(0) -; GFX950-GISEL-NEXT: v_cvt_f32_f64_e32 v0, s[4:5] -; GFX950-GISEL-NEXT: v_cvt_f32_f64_e32 v1, s[6:7] -; GFX950-GISEL-NEXT: v_cvt_f16_f32_e32 v0, v0 -; GFX950-GISEL-NEXT: v_cvt_f16_f32_e32 v1, v1 -; GFX950-GISEL-NEXT: v_pack_b32_f16 v0, v0, v1 ; GFX950-GISEL-NEXT: buffer_store_dword v0, off, s[0:3], 0 ; GFX950-GISEL-NEXT: s_endpgm ; @@ -2247,16 +2814,101 @@ define amdgpu_kernel void @fptrunc_v2f64_to_v2f16( ; GFX11-GISEL-TRUE16-NEXT: s_load_b128 s[0:3], s[4:5], 0x24 ; GFX11-GISEL-TRUE16-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-GISEL-TRUE16-NEXT: s_load_b128 s[4:7], s[2:3], 0x0 -; GFX11-GISEL-TRUE16-NEXT: s_mov_b32 s2, -1 -; GFX11-GISEL-TRUE16-NEXT: s_mov_b32 s3, 0x31016000 ; GFX11-GISEL-TRUE16-NEXT: s_waitcnt lgkmcnt(0) -; GFX11-GISEL-TRUE16-NEXT: v_cvt_f32_f64_e32 v0, s[4:5] -; GFX11-GISEL-TRUE16-NEXT: v_cvt_f32_f64_e32 v1, s[6:7] -; GFX11-GISEL-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) -; GFX11-GISEL-TRUE16-NEXT: v_cvt_f16_f32_e32 v0.l, v0 -; GFX11-GISEL-TRUE16-NEXT: v_cvt_f16_f32_e32 v0.h, v1 -; GFX11-GISEL-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX11-GISEL-TRUE16-NEXT: v_pack_b32_f16 v0, v0.l, v0.h +; GFX11-GISEL-TRUE16-NEXT: s_and_b32 s8, s5, 0x1ff +; GFX11-GISEL-TRUE16-NEXT: s_bfe_u32 s2, s5, 0xb0014 +; GFX11-GISEL-TRUE16-NEXT: s_lshr_b32 s3, s5, 8 +; GFX11-GISEL-TRUE16-NEXT: s_or_b32 s4, s8, s4 +; GFX11-GISEL-TRUE16-NEXT: s_addk_i32 s2, 0xfc10 +; GFX11-GISEL-TRUE16-NEXT: s_and_b32 s3, s3, 0xffe +; GFX11-GISEL-TRUE16-NEXT: s_cmp_lg_u32 s4, 0 +; GFX11-GISEL-TRUE16-NEXT: s_cselect_b32 s4, 1, 0 +; GFX11-GISEL-TRUE16-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1) +; GFX11-GISEL-TRUE16-NEXT: s_or_b32 s3, s3, s4 +; GFX11-GISEL-TRUE16-NEXT: s_cmp_lg_u32 s3, 0 +; GFX11-GISEL-TRUE16-NEXT: s_cselect_b32 s4, 1, 0 +; GFX11-GISEL-TRUE16-NEXT: s_sub_i32 s8, 1, s2 +; GFX11-GISEL-TRUE16-NEXT: s_or_b32 s10, s3, 0x1000 +; GFX11-GISEL-TRUE16-NEXT: s_max_i32 s8, s8, 0 +; GFX11-GISEL-TRUE16-NEXT: s_lshl_b32 s9, s2, 12 +; GFX11-GISEL-TRUE16-NEXT: s_min_i32 s8, s8, 13 +; GFX11-GISEL-TRUE16-NEXT: s_lshl_b32 s4, s4, 9 +; GFX11-GISEL-TRUE16-NEXT: s_lshr_b32 s11, s10, s8 +; GFX11-GISEL-TRUE16-NEXT: s_or_b32 s3, s3, s9 +; GFX11-GISEL-TRUE16-NEXT: s_lshl_b32 s8, s11, s8 +; GFX11-GISEL-TRUE16-NEXT: s_or_b32 s4, s4, 0x7c00 +; GFX11-GISEL-TRUE16-NEXT: s_cmp_lg_u32 s8, s10 +; GFX11-GISEL-TRUE16-NEXT: s_cselect_b32 s8, 1, 0 +; GFX11-GISEL-TRUE16-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(SKIP_2) | instid1(SALU_CYCLE_1) +; GFX11-GISEL-TRUE16-NEXT: s_or_b32 s8, s11, s8 +; GFX11-GISEL-TRUE16-NEXT: s_cmp_lt_i32 s2, 1 +; GFX11-GISEL-TRUE16-NEXT: s_cselect_b32 s3, s8, s3 +; GFX11-GISEL-TRUE16-NEXT: s_and_b32 s8, s3, 7 +; GFX11-GISEL-TRUE16-NEXT: s_lshr_b32 s3, s3, 2 +; GFX11-GISEL-TRUE16-NEXT: s_cmp_eq_u32 s8, 3 +; GFX11-GISEL-TRUE16-NEXT: s_cselect_b32 s9, 1, 0 +; GFX11-GISEL-TRUE16-NEXT: s_cmp_gt_i32 s8, 5 +; GFX11-GISEL-TRUE16-NEXT: s_cselect_b32 s8, 1, 0 +; GFX11-GISEL-TRUE16-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1) +; GFX11-GISEL-TRUE16-NEXT: s_or_b32 s8, s9, s8 +; GFX11-GISEL-TRUE16-NEXT: s_add_i32 s3, s3, s8 +; GFX11-GISEL-TRUE16-NEXT: s_cmp_gt_i32 s2, 30 +; GFX11-GISEL-TRUE16-NEXT: s_cselect_b32 s3, 0x7c00, s3 +; GFX11-GISEL-TRUE16-NEXT: s_cmpk_eq_i32 s2, 0x40f +; GFX11-GISEL-TRUE16-NEXT: s_cselect_b32 s2, s4, s3 +; GFX11-GISEL-TRUE16-NEXT: s_lshr_b32 s3, s5, 16 +; GFX11-GISEL-TRUE16-NEXT: s_and_b32 s8, s7, 0x1ff +; GFX11-GISEL-TRUE16-NEXT: s_bfe_u32 s4, s7, 0xb0014 +; GFX11-GISEL-TRUE16-NEXT: s_lshr_b32 s5, s7, 8 +; GFX11-GISEL-TRUE16-NEXT: s_and_b32 s3, s3, 0x8000 +; GFX11-GISEL-TRUE16-NEXT: s_or_b32 s6, s8, s6 +; GFX11-GISEL-TRUE16-NEXT: s_addk_i32 s4, 0xfc10 +; GFX11-GISEL-TRUE16-NEXT: s_and_b32 s5, s5, 0xffe +; GFX11-GISEL-TRUE16-NEXT: s_or_b32 s2, s3, s2 +; GFX11-GISEL-TRUE16-NEXT: s_cmp_lg_u32 s6, 0 +; GFX11-GISEL-TRUE16-NEXT: s_cselect_b32 s3, 1, 0 +; GFX11-GISEL-TRUE16-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1) +; GFX11-GISEL-TRUE16-NEXT: s_or_b32 s3, s5, s3 +; GFX11-GISEL-TRUE16-NEXT: s_cmp_lg_u32 s3, 0 +; GFX11-GISEL-TRUE16-NEXT: s_cselect_b32 s5, 1, 0 +; GFX11-GISEL-TRUE16-NEXT: s_sub_i32 s6, 1, s4 +; GFX11-GISEL-TRUE16-NEXT: s_or_b32 s9, s3, 0x1000 +; GFX11-GISEL-TRUE16-NEXT: s_max_i32 s6, s6, 0 +; GFX11-GISEL-TRUE16-NEXT: s_lshl_b32 s8, s4, 12 +; GFX11-GISEL-TRUE16-NEXT: s_min_i32 s6, s6, 13 +; GFX11-GISEL-TRUE16-NEXT: s_lshl_b32 s5, s5, 9 +; GFX11-GISEL-TRUE16-NEXT: s_lshr_b32 s10, s9, s6 +; GFX11-GISEL-TRUE16-NEXT: s_or_b32 s3, s3, s8 +; GFX11-GISEL-TRUE16-NEXT: s_lshl_b32 s6, s10, s6 +; GFX11-GISEL-TRUE16-NEXT: s_or_b32 s5, s5, 0x7c00 +; GFX11-GISEL-TRUE16-NEXT: s_cmp_lg_u32 s6, s9 +; GFX11-GISEL-TRUE16-NEXT: s_cselect_b32 s6, 1, 0 +; GFX11-GISEL-TRUE16-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(SKIP_2) | instid1(SALU_CYCLE_1) +; GFX11-GISEL-TRUE16-NEXT: s_or_b32 s6, s10, s6 +; GFX11-GISEL-TRUE16-NEXT: s_cmp_lt_i32 s4, 1 +; GFX11-GISEL-TRUE16-NEXT: s_cselect_b32 s3, s6, s3 +; GFX11-GISEL-TRUE16-NEXT: s_and_b32 s6, s3, 7 +; GFX11-GISEL-TRUE16-NEXT: s_lshr_b32 s3, s3, 2 +; GFX11-GISEL-TRUE16-NEXT: s_cmp_eq_u32 s6, 3 +; GFX11-GISEL-TRUE16-NEXT: s_cselect_b32 s8, 1, 0 +; GFX11-GISEL-TRUE16-NEXT: s_cmp_gt_i32 s6, 5 +; GFX11-GISEL-TRUE16-NEXT: s_cselect_b32 s6, 1, 0 +; GFX11-GISEL-TRUE16-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1) +; GFX11-GISEL-TRUE16-NEXT: s_or_b32 s6, s8, s6 +; GFX11-GISEL-TRUE16-NEXT: s_add_i32 s3, s3, s6 +; GFX11-GISEL-TRUE16-NEXT: s_cmp_gt_i32 s4, 30 +; GFX11-GISEL-TRUE16-NEXT: s_cselect_b32 s3, 0x7c00, s3 +; GFX11-GISEL-TRUE16-NEXT: s_cmpk_eq_i32 s4, 0x40f +; GFX11-GISEL-TRUE16-NEXT: s_cselect_b32 s3, s5, s3 +; GFX11-GISEL-TRUE16-NEXT: s_lshr_b32 s4, s7, 16 +; GFX11-GISEL-TRUE16-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1) +; GFX11-GISEL-TRUE16-NEXT: s_and_b32 s4, s4, 0x8000 +; GFX11-GISEL-TRUE16-NEXT: s_or_b32 s3, s4, s3 +; GFX11-GISEL-TRUE16-NEXT: s_delay_alu instid0(SALU_CYCLE_1) +; GFX11-GISEL-TRUE16-NEXT: s_pack_ll_b32_b16 s2, s2, s3 +; GFX11-GISEL-TRUE16-NEXT: s_mov_b32 s3, 0x31016000 +; GFX11-GISEL-TRUE16-NEXT: v_mov_b32_e32 v0, s2 +; GFX11-GISEL-TRUE16-NEXT: s_mov_b32 s2, -1 ; GFX11-GISEL-TRUE16-NEXT: buffer_store_b32 v0, off, s[0:3], 0 ; GFX11-GISEL-TRUE16-NEXT: s_endpgm ; @@ -2265,16 +2917,101 @@ define amdgpu_kernel void @fptrunc_v2f64_to_v2f16( ; GFX11-GISEL-FAKE16-NEXT: s_load_b128 s[0:3], s[4:5], 0x24 ; GFX11-GISEL-FAKE16-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-GISEL-FAKE16-NEXT: s_load_b128 s[4:7], s[2:3], 0x0 -; GFX11-GISEL-FAKE16-NEXT: s_mov_b32 s2, -1 -; GFX11-GISEL-FAKE16-NEXT: s_mov_b32 s3, 0x31016000 ; GFX11-GISEL-FAKE16-NEXT: s_waitcnt lgkmcnt(0) -; GFX11-GISEL-FAKE16-NEXT: v_cvt_f32_f64_e32 v0, s[4:5] -; GFX11-GISEL-FAKE16-NEXT: v_cvt_f32_f64_e32 v1, s[6:7] -; GFX11-GISEL-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) -; GFX11-GISEL-FAKE16-NEXT: v_cvt_f16_f32_e32 v0, v0 -; GFX11-GISEL-FAKE16-NEXT: v_cvt_f16_f32_e32 v1, v1 -; GFX11-GISEL-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX11-GISEL-FAKE16-NEXT: v_pack_b32_f16 v0, v0, v1 +; GFX11-GISEL-FAKE16-NEXT: s_and_b32 s8, s5, 0x1ff +; GFX11-GISEL-FAKE16-NEXT: s_bfe_u32 s2, s5, 0xb0014 +; GFX11-GISEL-FAKE16-NEXT: s_lshr_b32 s3, s5, 8 +; GFX11-GISEL-FAKE16-NEXT: s_or_b32 s4, s8, s4 +; GFX11-GISEL-FAKE16-NEXT: s_addk_i32 s2, 0xfc10 +; GFX11-GISEL-FAKE16-NEXT: s_and_b32 s3, s3, 0xffe +; GFX11-GISEL-FAKE16-NEXT: s_cmp_lg_u32 s4, 0 +; GFX11-GISEL-FAKE16-NEXT: s_cselect_b32 s4, 1, 0 +; GFX11-GISEL-FAKE16-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1) +; GFX11-GISEL-FAKE16-NEXT: s_or_b32 s3, s3, s4 +; GFX11-GISEL-FAKE16-NEXT: s_cmp_lg_u32 s3, 0 +; GFX11-GISEL-FAKE16-NEXT: s_cselect_b32 s4, 1, 0 +; GFX11-GISEL-FAKE16-NEXT: s_sub_i32 s8, 1, s2 +; GFX11-GISEL-FAKE16-NEXT: s_or_b32 s10, s3, 0x1000 +; GFX11-GISEL-FAKE16-NEXT: s_max_i32 s8, s8, 0 +; GFX11-GISEL-FAKE16-NEXT: s_lshl_b32 s9, s2, 12 +; GFX11-GISEL-FAKE16-NEXT: s_min_i32 s8, s8, 13 +; GFX11-GISEL-FAKE16-NEXT: s_lshl_b32 s4, s4, 9 +; GFX11-GISEL-FAKE16-NEXT: s_lshr_b32 s11, s10, s8 +; GFX11-GISEL-FAKE16-NEXT: s_or_b32 s3, s3, s9 +; GFX11-GISEL-FAKE16-NEXT: s_lshl_b32 s8, s11, s8 +; GFX11-GISEL-FAKE16-NEXT: s_or_b32 s4, s4, 0x7c00 +; GFX11-GISEL-FAKE16-NEXT: s_cmp_lg_u32 s8, s10 +; GFX11-GISEL-FAKE16-NEXT: s_cselect_b32 s8, 1, 0 +; GFX11-GISEL-FAKE16-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(SKIP_2) | instid1(SALU_CYCLE_1) +; GFX11-GISEL-FAKE16-NEXT: s_or_b32 s8, s11, s8 +; GFX11-GISEL-FAKE16-NEXT: s_cmp_lt_i32 s2, 1 +; GFX11-GISEL-FAKE16-NEXT: s_cselect_b32 s3, s8, s3 +; GFX11-GISEL-FAKE16-NEXT: s_and_b32 s8, s3, 7 +; GFX11-GISEL-FAKE16-NEXT: s_lshr_b32 s3, s3, 2 +; GFX11-GISEL-FAKE16-NEXT: s_cmp_eq_u32 s8, 3 +; GFX11-GISEL-FAKE16-NEXT: s_cselect_b32 s9, 1, 0 +; GFX11-GISEL-FAKE16-NEXT: s_cmp_gt_i32 s8, 5 +; GFX11-GISEL-FAKE16-NEXT: s_cselect_b32 s8, 1, 0 +; GFX11-GISEL-FAKE16-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1) +; GFX11-GISEL-FAKE16-NEXT: s_or_b32 s8, s9, s8 +; GFX11-GISEL-FAKE16-NEXT: s_add_i32 s3, s3, s8 +; GFX11-GISEL-FAKE16-NEXT: s_cmp_gt_i32 s2, 30 +; GFX11-GISEL-FAKE16-NEXT: s_cselect_b32 s3, 0x7c00, s3 +; GFX11-GISEL-FAKE16-NEXT: s_cmpk_eq_i32 s2, 0x40f +; GFX11-GISEL-FAKE16-NEXT: s_cselect_b32 s2, s4, s3 +; GFX11-GISEL-FAKE16-NEXT: s_lshr_b32 s3, s5, 16 +; GFX11-GISEL-FAKE16-NEXT: s_and_b32 s8, s7, 0x1ff +; GFX11-GISEL-FAKE16-NEXT: s_bfe_u32 s4, s7, 0xb0014 +; GFX11-GISEL-FAKE16-NEXT: s_lshr_b32 s5, s7, 8 +; GFX11-GISEL-FAKE16-NEXT: s_and_b32 s3, s3, 0x8000 +; GFX11-GISEL-FAKE16-NEXT: s_or_b32 s6, s8, s6 +; GFX11-GISEL-FAKE16-NEXT: s_addk_i32 s4, 0xfc10 +; GFX11-GISEL-FAKE16-NEXT: s_and_b32 s5, s5, 0xffe +; GFX11-GISEL-FAKE16-NEXT: s_or_b32 s2, s3, s2 +; GFX11-GISEL-FAKE16-NEXT: s_cmp_lg_u32 s6, 0 +; GFX11-GISEL-FAKE16-NEXT: s_cselect_b32 s3, 1, 0 +; GFX11-GISEL-FAKE16-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1) +; GFX11-GISEL-FAKE16-NEXT: s_or_b32 s3, s5, s3 +; GFX11-GISEL-FAKE16-NEXT: s_cmp_lg_u32 s3, 0 +; GFX11-GISEL-FAKE16-NEXT: s_cselect_b32 s5, 1, 0 +; GFX11-GISEL-FAKE16-NEXT: s_sub_i32 s6, 1, s4 +; GFX11-GISEL-FAKE16-NEXT: s_or_b32 s9, s3, 0x1000 +; GFX11-GISEL-FAKE16-NEXT: s_max_i32 s6, s6, 0 +; GFX11-GISEL-FAKE16-NEXT: s_lshl_b32 s8, s4, 12 +; GFX11-GISEL-FAKE16-NEXT: s_min_i32 s6, s6, 13 +; GFX11-GISEL-FAKE16-NEXT: s_lshl_b32 s5, s5, 9 +; GFX11-GISEL-FAKE16-NEXT: s_lshr_b32 s10, s9, s6 +; GFX11-GISEL-FAKE16-NEXT: s_or_b32 s3, s3, s8 +; GFX11-GISEL-FAKE16-NEXT: s_lshl_b32 s6, s10, s6 +; GFX11-GISEL-FAKE16-NEXT: s_or_b32 s5, s5, 0x7c00 +; GFX11-GISEL-FAKE16-NEXT: s_cmp_lg_u32 s6, s9 +; GFX11-GISEL-FAKE16-NEXT: s_cselect_b32 s6, 1, 0 +; GFX11-GISEL-FAKE16-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(SKIP_2) | instid1(SALU_CYCLE_1) +; GFX11-GISEL-FAKE16-NEXT: s_or_b32 s6, s10, s6 +; GFX11-GISEL-FAKE16-NEXT: s_cmp_lt_i32 s4, 1 +; GFX11-GISEL-FAKE16-NEXT: s_cselect_b32 s3, s6, s3 +; GFX11-GISEL-FAKE16-NEXT: s_and_b32 s6, s3, 7 +; GFX11-GISEL-FAKE16-NEXT: s_lshr_b32 s3, s3, 2 +; GFX11-GISEL-FAKE16-NEXT: s_cmp_eq_u32 s6, 3 +; GFX11-GISEL-FAKE16-NEXT: s_cselect_b32 s8, 1, 0 +; GFX11-GISEL-FAKE16-NEXT: s_cmp_gt_i32 s6, 5 +; GFX11-GISEL-FAKE16-NEXT: s_cselect_b32 s6, 1, 0 +; GFX11-GISEL-FAKE16-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1) +; GFX11-GISEL-FAKE16-NEXT: s_or_b32 s6, s8, s6 +; GFX11-GISEL-FAKE16-NEXT: s_add_i32 s3, s3, s6 +; GFX11-GISEL-FAKE16-NEXT: s_cmp_gt_i32 s4, 30 +; GFX11-GISEL-FAKE16-NEXT: s_cselect_b32 s3, 0x7c00, s3 +; GFX11-GISEL-FAKE16-NEXT: s_cmpk_eq_i32 s4, 0x40f +; GFX11-GISEL-FAKE16-NEXT: s_cselect_b32 s3, s5, s3 +; GFX11-GISEL-FAKE16-NEXT: s_lshr_b32 s4, s7, 16 +; GFX11-GISEL-FAKE16-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1) +; GFX11-GISEL-FAKE16-NEXT: s_and_b32 s4, s4, 0x8000 +; GFX11-GISEL-FAKE16-NEXT: s_or_b32 s3, s4, s3 +; GFX11-GISEL-FAKE16-NEXT: s_delay_alu instid0(SALU_CYCLE_1) +; GFX11-GISEL-FAKE16-NEXT: s_pack_ll_b32_b16 s2, s2, s3 +; GFX11-GISEL-FAKE16-NEXT: s_mov_b32 s3, 0x31016000 +; GFX11-GISEL-FAKE16-NEXT: v_mov_b32_e32 v0, s2 +; GFX11-GISEL-FAKE16-NEXT: s_mov_b32 s2, -1 ; GFX11-GISEL-FAKE16-NEXT: buffer_store_b32 v0, off, s[0:3], 0 ; GFX11-GISEL-FAKE16-NEXT: s_endpgm ptr addrspace(1) %r, diff --git a/llvm/test/CodeGen/AMDGPU/fptrunc.ll b/llvm/test/CodeGen/AMDGPU/fptrunc.ll index 2bd36591108a..4f8eab1c2fec 100644 --- a/llvm/test/CodeGen/AMDGPU/fptrunc.ll +++ b/llvm/test/CodeGen/AMDGPU/fptrunc.ll @@ -3,17 +3,15 @@ ; RUN: llc -mtriple=amdgcn -mcpu=tonga -global-isel=0 -mattr=-flat-for-global < %s | FileCheck -check-prefixes=VI-SDAG,VI-SAFE-SDAG %s ; RUN: llc -mtriple=amdgcn -mcpu=tonga -global-isel=1 -mattr=-flat-for-global < %s | FileCheck -check-prefixes=VI-GISEL,VI-SAFE-GISEL %s ; RUN: llc -mtriple=amdgcn -mcpu=tonga -global-isel=0 -mattr=-flat-for-global -enable-unsafe-fp-math < %s | FileCheck -check-prefixes=VI-SDAG,VI-UNSAFE-SDAG %s -; RUN: llc -mtriple=amdgcn -mcpu=tonga -global-isel=1 -mattr=-flat-for-global -enable-unsafe-fp-math < %s | FileCheck -check-prefixes=VI-GISEL,VI-UNSAFE-GISEL %s ; RUN: llc -mtriple=amdgcn -mcpu=gfx1030 -global-isel=0 -mattr=-flat-for-global < %s | FileCheck -check-prefixes=GFX10-SDAG,GFX10-SAFE-SDAG %s ; RUN: llc -mtriple=amdgcn -mcpu=gfx1030 -global-isel=1 -mattr=-flat-for-global < %s | FileCheck -check-prefixes=GFX10-GISEL,GFX10-SAFE-GISEL %s ; RUN: llc -mtriple=amdgcn -mcpu=gfx1030 -global-isel=0 -mattr=-flat-for-global -enable-unsafe-fp-math < %s | FileCheck -check-prefixes=GFX10-SDAG,GFX10-UNSAFE-SDAG %s -; RUN: llc -mtriple=amdgcn -mcpu=gfx1030 -global-isel=1 -mattr=-flat-for-global -enable-unsafe-fp-math < %s | FileCheck -check-prefixes=GFX10-GISEL,GFX10-UNSAFE-GISEL %s ; RUN: llc -mtriple=amdgcn -mcpu=gfx1100 -global-isel=0 -mattr=-flat-for-global < %s | FileCheck -check-prefixes=GFX11-SDAG,GFX11-SAFE-SDAG %s ; RUN: llc -mtriple=amdgcn -mcpu=gfx1100 -global-isel=1 -mattr=-flat-for-global < %s | FileCheck -check-prefixes=GFX11-GISEL,GFX11-SAFE-GISEL %s ; RUN: llc -mtriple=amdgcn -mcpu=gfx1100 -global-isel=0 -mattr=-flat-for-global,+real-true16 -enable-unsafe-fp-math < %s | FileCheck -check-prefixes=GFX11-SDAG,GFX11-UNSAFE-DAG-TRUE16 %s ; RUN: llc -mtriple=amdgcn -mcpu=gfx1100 -global-isel=0 -mattr=-flat-for-global,-real-true16 -enable-unsafe-fp-math < %s | FileCheck -check-prefixes=GFX11-SDAG,GFX11-UNSAFE-DAG-FAKE16 %s -; RUN: llc -mtriple=amdgcn -mcpu=gfx1100 -global-isel=1 -mattr=-flat-for-global,+real-true16 -enable-unsafe-fp-math < %s | FileCheck -check-prefixes=GFX11-GISEL,GFX11-UNSAFE-GISEL-TRUE16 %s -; RUN: llc -mtriple=amdgcn -mcpu=gfx1100 -global-isel=1 -mattr=-flat-for-global,-real-true16 -enable-unsafe-fp-math < %s | FileCheck -check-prefixes=GFX11-GISEL,GFX11-UNSAFE-GISEL-FAKE16 %s +; RUN: llc -mtriple=amdgcn -mcpu=gfx1100 -global-isel=1 -mattr=-flat-for-global,+real-true16 < %s | FileCheck -check-prefixes=GFX11-GISEL,GFX11-UNSAFE-GISEL-TRUE16 %s +; RUN: llc -mtriple=amdgcn -mcpu=gfx1100 -global-isel=1 -mattr=-flat-for-global,-real-true16 < %s | FileCheck -check-prefixes=GFX11-GISEL,GFX11-UNSAFE-GISEL-FAKE16 %s define amdgpu_kernel void @fptrunc_f64_to_f32(ptr addrspace(1) %out, double %in) { ; SI-LABEL: fptrunc_f64_to_f32: @@ -94,6 +92,85 @@ define amdgpu_kernel void @fptrunc_f64_to_f32(ptr addrspace(1) %out, double %in) ret void } +define amdgpu_kernel void @fptrunc_f64_to_f32_afn(ptr addrspace(1) %out, double %in) { +; SI-LABEL: fptrunc_f64_to_f32_afn: +; SI: ; %bb.0: +; SI-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x9 +; SI-NEXT: s_mov_b32 s7, 0xf000 +; SI-NEXT: s_mov_b32 s6, -1 +; SI-NEXT: s_waitcnt lgkmcnt(0) +; SI-NEXT: s_mov_b32 s4, s0 +; SI-NEXT: s_mov_b32 s5, s1 +; SI-NEXT: v_cvt_f32_f64_e32 v0, s[2:3] +; SI-NEXT: buffer_store_dword v0, off, s[4:7], 0 +; SI-NEXT: s_endpgm +; +; VI-SDAG-LABEL: fptrunc_f64_to_f32_afn: +; VI-SDAG: ; %bb.0: +; VI-SDAG-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 +; VI-SDAG-NEXT: s_mov_b32 s7, 0xf000 +; VI-SDAG-NEXT: s_mov_b32 s6, -1 +; VI-SDAG-NEXT: s_waitcnt lgkmcnt(0) +; VI-SDAG-NEXT: v_cvt_f32_f64_e32 v0, s[2:3] +; VI-SDAG-NEXT: s_mov_b32 s4, s0 +; VI-SDAG-NEXT: s_mov_b32 s5, s1 +; VI-SDAG-NEXT: buffer_store_dword v0, off, s[4:7], 0 +; VI-SDAG-NEXT: s_endpgm +; +; VI-GISEL-LABEL: fptrunc_f64_to_f32_afn: +; VI-GISEL: ; %bb.0: +; VI-GISEL-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 +; VI-GISEL-NEXT: s_waitcnt lgkmcnt(0) +; VI-GISEL-NEXT: v_cvt_f32_f64_e32 v0, s[2:3] +; VI-GISEL-NEXT: s_mov_b32 s2, -1 +; VI-GISEL-NEXT: s_mov_b32 s3, 0xf000 +; VI-GISEL-NEXT: buffer_store_dword v0, off, s[0:3], 0 +; VI-GISEL-NEXT: s_endpgm +; +; GFX10-SDAG-LABEL: fptrunc_f64_to_f32_afn: +; GFX10-SDAG: ; %bb.0: +; GFX10-SDAG-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 +; GFX10-SDAG-NEXT: s_waitcnt lgkmcnt(0) +; GFX10-SDAG-NEXT: v_cvt_f32_f64_e32 v0, s[2:3] +; GFX10-SDAG-NEXT: s_mov_b32 s3, 0x31016000 +; GFX10-SDAG-NEXT: s_mov_b32 s2, -1 +; GFX10-SDAG-NEXT: buffer_store_dword v0, off, s[0:3], 0 +; GFX10-SDAG-NEXT: s_endpgm +; +; GFX10-GISEL-LABEL: fptrunc_f64_to_f32_afn: +; GFX10-GISEL: ; %bb.0: +; GFX10-GISEL-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 +; GFX10-GISEL-NEXT: s_waitcnt lgkmcnt(0) +; GFX10-GISEL-NEXT: v_cvt_f32_f64_e32 v0, s[2:3] +; GFX10-GISEL-NEXT: s_mov_b32 s2, -1 +; GFX10-GISEL-NEXT: s_mov_b32 s3, 0x31016000 +; GFX10-GISEL-NEXT: buffer_store_dword v0, off, s[0:3], 0 +; GFX10-GISEL-NEXT: s_endpgm +; +; GFX11-SDAG-LABEL: fptrunc_f64_to_f32_afn: +; GFX11-SDAG: ; %bb.0: +; GFX11-SDAG-NEXT: s_load_b128 s[0:3], s[4:5], 0x24 +; GFX11-SDAG-NEXT: s_waitcnt lgkmcnt(0) +; GFX11-SDAG-NEXT: v_cvt_f32_f64_e32 v0, s[2:3] +; GFX11-SDAG-NEXT: s_mov_b32 s3, 0x31016000 +; GFX11-SDAG-NEXT: s_mov_b32 s2, -1 +; GFX11-SDAG-NEXT: buffer_store_b32 v0, off, s[0:3], 0 +; GFX11-SDAG-NEXT: s_endpgm +; +; GFX11-GISEL-LABEL: fptrunc_f64_to_f32_afn: +; GFX11-GISEL: ; %bb.0: +; GFX11-GISEL-NEXT: s_load_b128 s[0:3], s[4:5], 0x24 +; GFX11-GISEL-NEXT: s_waitcnt lgkmcnt(0) +; GFX11-GISEL-NEXT: v_cvt_f32_f64_e32 v0, s[2:3] +; GFX11-GISEL-NEXT: s_mov_b32 s2, -1 +; GFX11-GISEL-NEXT: s_mov_b32 s3, 0x31016000 +; GFX11-GISEL-NEXT: buffer_store_b32 v0, off, s[0:3], 0 +; GFX11-GISEL-NEXT: s_endpgm + %result = fptrunc afn double %in to float + store float %result, ptr addrspace(1) %out + ret void +} + define amdgpu_kernel void @fptrunc_f64_to_f16(ptr addrspace(1) %out, double %in) { ; SI-LABEL: fptrunc_f64_to_f16: ; SI: ; %bb.0: @@ -203,56 +280,56 @@ define amdgpu_kernel void @fptrunc_f64_to_f16(ptr addrspace(1) %out, double %in) ; VI-SAFE-SDAG-NEXT: buffer_store_short v0, off, s[0:3], 0 ; VI-SAFE-SDAG-NEXT: s_endpgm ; -; VI-SAFE-GISEL-LABEL: fptrunc_f64_to_f16: -; VI-SAFE-GISEL: ; %bb.0: -; VI-SAFE-GISEL-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 -; VI-SAFE-GISEL-NEXT: s_waitcnt lgkmcnt(0) -; VI-SAFE-GISEL-NEXT: s_bfe_u32 s4, s3, 0xb0014 -; VI-SAFE-GISEL-NEXT: s_lshr_b32 s5, s3, 8 -; VI-SAFE-GISEL-NEXT: s_and_b32 s6, s3, 0x1ff -; VI-SAFE-GISEL-NEXT: s_addk_i32 s4, 0xfc10 -; VI-SAFE-GISEL-NEXT: s_and_b32 s5, s5, 0xffe -; VI-SAFE-GISEL-NEXT: s_or_b32 s2, s6, s2 -; VI-SAFE-GISEL-NEXT: s_cmp_lg_u32 s2, 0 -; VI-SAFE-GISEL-NEXT: s_cselect_b32 s2, 1, 0 -; VI-SAFE-GISEL-NEXT: s_or_b32 s2, s5, s2 -; VI-SAFE-GISEL-NEXT: s_cmp_lg_u32 s2, 0 -; VI-SAFE-GISEL-NEXT: s_cselect_b32 s5, 1, 0 -; VI-SAFE-GISEL-NEXT: s_sub_i32 s7, 1, s4 -; VI-SAFE-GISEL-NEXT: s_lshl_b32 s6, s4, 12 -; VI-SAFE-GISEL-NEXT: s_max_i32 s7, s7, 0 -; VI-SAFE-GISEL-NEXT: s_or_b32 s6, s2, s6 -; VI-SAFE-GISEL-NEXT: s_min_i32 s7, s7, 13 -; VI-SAFE-GISEL-NEXT: s_bitset1_b32 s2, 12 -; VI-SAFE-GISEL-NEXT: s_lshl_b32 s5, s5, 9 -; VI-SAFE-GISEL-NEXT: s_lshr_b32 s8, s2, s7 -; VI-SAFE-GISEL-NEXT: s_or_b32 s5, s5, 0x7c00 -; VI-SAFE-GISEL-NEXT: s_lshl_b32 s7, s8, s7 -; VI-SAFE-GISEL-NEXT: s_cmp_lg_u32 s7, s2 -; VI-SAFE-GISEL-NEXT: s_cselect_b32 s2, 1, 0 -; VI-SAFE-GISEL-NEXT: s_or_b32 s2, s8, s2 -; VI-SAFE-GISEL-NEXT: s_cmp_lt_i32 s4, 1 -; VI-SAFE-GISEL-NEXT: s_cselect_b32 s2, s2, s6 -; VI-SAFE-GISEL-NEXT: s_and_b32 s6, s2, 7 -; VI-SAFE-GISEL-NEXT: s_lshr_b32 s2, s2, 2 -; VI-SAFE-GISEL-NEXT: s_cmp_eq_u32 s6, 3 -; VI-SAFE-GISEL-NEXT: s_cselect_b32 s7, 1, 0 -; VI-SAFE-GISEL-NEXT: s_cmp_gt_i32 s6, 5 -; VI-SAFE-GISEL-NEXT: s_cselect_b32 s6, 1, 0 -; VI-SAFE-GISEL-NEXT: s_or_b32 s6, s7, s6 -; VI-SAFE-GISEL-NEXT: s_add_i32 s2, s2, s6 -; VI-SAFE-GISEL-NEXT: s_cmp_gt_i32 s4, 30 -; VI-SAFE-GISEL-NEXT: s_cselect_b32 s2, 0x7c00, s2 -; VI-SAFE-GISEL-NEXT: s_cmpk_eq_i32 s4, 0x40f -; VI-SAFE-GISEL-NEXT: s_cselect_b32 s2, s5, s2 -; VI-SAFE-GISEL-NEXT: s_lshr_b32 s3, s3, 16 -; VI-SAFE-GISEL-NEXT: s_and_b32 s3, s3, 0x8000 -; VI-SAFE-GISEL-NEXT: s_or_b32 s2, s3, s2 -; VI-SAFE-GISEL-NEXT: v_mov_b32_e32 v0, s2 -; VI-SAFE-GISEL-NEXT: s_mov_b32 s2, -1 -; VI-SAFE-GISEL-NEXT: s_mov_b32 s3, 0xf000 -; VI-SAFE-GISEL-NEXT: buffer_store_short v0, off, s[0:3], 0 -; VI-SAFE-GISEL-NEXT: s_endpgm +; VI-GISEL-LABEL: fptrunc_f64_to_f16: +; VI-GISEL: ; %bb.0: +; VI-GISEL-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 +; VI-GISEL-NEXT: s_waitcnt lgkmcnt(0) +; VI-GISEL-NEXT: s_bfe_u32 s4, s3, 0xb0014 +; VI-GISEL-NEXT: s_lshr_b32 s5, s3, 8 +; VI-GISEL-NEXT: s_and_b32 s6, s3, 0x1ff +; VI-GISEL-NEXT: s_addk_i32 s4, 0xfc10 +; VI-GISEL-NEXT: s_and_b32 s5, s5, 0xffe +; VI-GISEL-NEXT: s_or_b32 s2, s6, s2 +; VI-GISEL-NEXT: s_cmp_lg_u32 s2, 0 +; VI-GISEL-NEXT: s_cselect_b32 s2, 1, 0 +; VI-GISEL-NEXT: s_or_b32 s2, s5, s2 +; VI-GISEL-NEXT: s_cmp_lg_u32 s2, 0 +; VI-GISEL-NEXT: s_cselect_b32 s5, 1, 0 +; VI-GISEL-NEXT: s_sub_i32 s7, 1, s4 +; VI-GISEL-NEXT: s_lshl_b32 s6, s4, 12 +; VI-GISEL-NEXT: s_max_i32 s7, s7, 0 +; VI-GISEL-NEXT: s_or_b32 s6, s2, s6 +; VI-GISEL-NEXT: s_min_i32 s7, s7, 13 +; VI-GISEL-NEXT: s_bitset1_b32 s2, 12 +; VI-GISEL-NEXT: s_lshl_b32 s5, s5, 9 +; VI-GISEL-NEXT: s_lshr_b32 s8, s2, s7 +; VI-GISEL-NEXT: s_or_b32 s5, s5, 0x7c00 +; VI-GISEL-NEXT: s_lshl_b32 s7, s8, s7 +; VI-GISEL-NEXT: s_cmp_lg_u32 s7, s2 +; VI-GISEL-NEXT: s_cselect_b32 s2, 1, 0 +; VI-GISEL-NEXT: s_or_b32 s2, s8, s2 +; VI-GISEL-NEXT: s_cmp_lt_i32 s4, 1 +; VI-GISEL-NEXT: s_cselect_b32 s2, s2, s6 +; VI-GISEL-NEXT: s_and_b32 s6, s2, 7 +; VI-GISEL-NEXT: s_lshr_b32 s2, s2, 2 +; VI-GISEL-NEXT: s_cmp_eq_u32 s6, 3 +; VI-GISEL-NEXT: s_cselect_b32 s7, 1, 0 +; VI-GISEL-NEXT: s_cmp_gt_i32 s6, 5 +; VI-GISEL-NEXT: s_cselect_b32 s6, 1, 0 +; VI-GISEL-NEXT: s_or_b32 s6, s7, s6 +; VI-GISEL-NEXT: s_add_i32 s2, s2, s6 +; VI-GISEL-NEXT: s_cmp_gt_i32 s4, 30 +; VI-GISEL-NEXT: s_cselect_b32 s2, 0x7c00, s2 +; VI-GISEL-NEXT: s_cmpk_eq_i32 s4, 0x40f +; VI-GISEL-NEXT: s_cselect_b32 s2, s5, s2 +; VI-GISEL-NEXT: s_lshr_b32 s3, s3, 16 +; VI-GISEL-NEXT: s_and_b32 s3, s3, 0x8000 +; VI-GISEL-NEXT: s_or_b32 s2, s3, s2 +; VI-GISEL-NEXT: v_mov_b32_e32 v0, s2 +; VI-GISEL-NEXT: s_mov_b32 s2, -1 +; VI-GISEL-NEXT: s_mov_b32 s3, 0xf000 +; VI-GISEL-NEXT: buffer_store_short v0, off, s[0:3], 0 +; VI-GISEL-NEXT: s_endpgm ; ; VI-UNSAFE-SDAG-LABEL: fptrunc_f64_to_f16: ; VI-UNSAFE-SDAG: ; %bb.0: @@ -265,17 +342,6 @@ define amdgpu_kernel void @fptrunc_f64_to_f16(ptr addrspace(1) %out, double %in) ; VI-UNSAFE-SDAG-NEXT: buffer_store_short v0, off, s[0:3], 0 ; VI-UNSAFE-SDAG-NEXT: s_endpgm ; -; VI-UNSAFE-GISEL-LABEL: fptrunc_f64_to_f16: -; VI-UNSAFE-GISEL: ; %bb.0: -; VI-UNSAFE-GISEL-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 -; VI-UNSAFE-GISEL-NEXT: s_waitcnt lgkmcnt(0) -; VI-UNSAFE-GISEL-NEXT: v_cvt_f32_f64_e32 v0, s[2:3] -; VI-UNSAFE-GISEL-NEXT: s_mov_b32 s2, -1 -; VI-UNSAFE-GISEL-NEXT: s_mov_b32 s3, 0xf000 -; VI-UNSAFE-GISEL-NEXT: v_cvt_f16_f32_e32 v0, v0 -; VI-UNSAFE-GISEL-NEXT: buffer_store_short v0, off, s[0:3], 0 -; VI-UNSAFE-GISEL-NEXT: s_endpgm -; ; GFX10-SAFE-SDAG-LABEL: fptrunc_f64_to_f16: ; GFX10-SAFE-SDAG: ; %bb.0: ; GFX10-SAFE-SDAG-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 @@ -328,56 +394,56 @@ define amdgpu_kernel void @fptrunc_f64_to_f16(ptr addrspace(1) %out, double %in) ; GFX10-SAFE-SDAG-NEXT: buffer_store_short v0, off, s[0:3], 0 ; GFX10-SAFE-SDAG-NEXT: s_endpgm ; -; GFX10-SAFE-GISEL-LABEL: fptrunc_f64_to_f16: -; GFX10-SAFE-GISEL: ; %bb.0: -; GFX10-SAFE-GISEL-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 -; GFX10-SAFE-GISEL-NEXT: s_waitcnt lgkmcnt(0) -; GFX10-SAFE-GISEL-NEXT: s_and_b32 s6, s3, 0x1ff -; GFX10-SAFE-GISEL-NEXT: s_bfe_u32 s4, s3, 0xb0014 -; GFX10-SAFE-GISEL-NEXT: s_lshr_b32 s5, s3, 8 -; GFX10-SAFE-GISEL-NEXT: s_or_b32 s2, s6, s2 -; GFX10-SAFE-GISEL-NEXT: s_addk_i32 s4, 0xfc10 -; GFX10-SAFE-GISEL-NEXT: s_and_b32 s5, s5, 0xffe -; GFX10-SAFE-GISEL-NEXT: s_cmp_lg_u32 s2, 0 -; GFX10-SAFE-GISEL-NEXT: s_cselect_b32 s2, 1, 0 -; GFX10-SAFE-GISEL-NEXT: s_or_b32 s2, s5, s2 -; GFX10-SAFE-GISEL-NEXT: s_cmp_lg_u32 s2, 0 -; GFX10-SAFE-GISEL-NEXT: s_cselect_b32 s5, 1, 0 -; GFX10-SAFE-GISEL-NEXT: s_sub_i32 s6, 1, s4 -; GFX10-SAFE-GISEL-NEXT: s_or_b32 s8, s2, 0x1000 -; GFX10-SAFE-GISEL-NEXT: s_max_i32 s6, s6, 0 -; GFX10-SAFE-GISEL-NEXT: s_lshl_b32 s7, s4, 12 -; GFX10-SAFE-GISEL-NEXT: s_min_i32 s6, s6, 13 -; GFX10-SAFE-GISEL-NEXT: s_lshl_b32 s5, s5, 9 -; GFX10-SAFE-GISEL-NEXT: s_lshr_b32 s9, s8, s6 -; GFX10-SAFE-GISEL-NEXT: s_or_b32 s2, s2, s7 -; GFX10-SAFE-GISEL-NEXT: s_lshl_b32 s6, s9, s6 -; GFX10-SAFE-GISEL-NEXT: s_or_b32 s5, s5, 0x7c00 -; GFX10-SAFE-GISEL-NEXT: s_cmp_lg_u32 s6, s8 -; GFX10-SAFE-GISEL-NEXT: s_cselect_b32 s6, 1, 0 -; GFX10-SAFE-GISEL-NEXT: s_or_b32 s6, s9, s6 -; GFX10-SAFE-GISEL-NEXT: s_cmp_lt_i32 s4, 1 -; GFX10-SAFE-GISEL-NEXT: s_cselect_b32 s2, s6, s2 -; GFX10-SAFE-GISEL-NEXT: s_and_b32 s6, s2, 7 -; GFX10-SAFE-GISEL-NEXT: s_lshr_b32 s2, s2, 2 -; GFX10-SAFE-GISEL-NEXT: s_cmp_eq_u32 s6, 3 -; GFX10-SAFE-GISEL-NEXT: s_cselect_b32 s7, 1, 0 -; GFX10-SAFE-GISEL-NEXT: s_cmp_gt_i32 s6, 5 -; GFX10-SAFE-GISEL-NEXT: s_cselect_b32 s6, 1, 0 -; GFX10-SAFE-GISEL-NEXT: s_or_b32 s6, s7, s6 -; GFX10-SAFE-GISEL-NEXT: s_add_i32 s2, s2, s6 -; GFX10-SAFE-GISEL-NEXT: s_cmp_gt_i32 s4, 30 -; GFX10-SAFE-GISEL-NEXT: s_cselect_b32 s2, 0x7c00, s2 -; GFX10-SAFE-GISEL-NEXT: s_cmpk_eq_i32 s4, 0x40f -; GFX10-SAFE-GISEL-NEXT: s_cselect_b32 s2, s5, s2 -; GFX10-SAFE-GISEL-NEXT: s_lshr_b32 s3, s3, 16 -; GFX10-SAFE-GISEL-NEXT: s_and_b32 s3, s3, 0x8000 -; GFX10-SAFE-GISEL-NEXT: s_or_b32 s2, s3, s2 -; GFX10-SAFE-GISEL-NEXT: s_mov_b32 s3, 0x31016000 -; GFX10-SAFE-GISEL-NEXT: v_mov_b32_e32 v0, s2 -; GFX10-SAFE-GISEL-NEXT: s_mov_b32 s2, -1 -; GFX10-SAFE-GISEL-NEXT: buffer_store_short v0, off, s[0:3], 0 -; GFX10-SAFE-GISEL-NEXT: s_endpgm +; GFX10-GISEL-LABEL: fptrunc_f64_to_f16: +; GFX10-GISEL: ; %bb.0: +; GFX10-GISEL-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 +; GFX10-GISEL-NEXT: s_waitcnt lgkmcnt(0) +; GFX10-GISEL-NEXT: s_and_b32 s6, s3, 0x1ff +; GFX10-GISEL-NEXT: s_bfe_u32 s4, s3, 0xb0014 +; GFX10-GISEL-NEXT: s_lshr_b32 s5, s3, 8 +; GFX10-GISEL-NEXT: s_or_b32 s2, s6, s2 +; GFX10-GISEL-NEXT: s_addk_i32 s4, 0xfc10 +; GFX10-GISEL-NEXT: s_and_b32 s5, s5, 0xffe +; GFX10-GISEL-NEXT: s_cmp_lg_u32 s2, 0 +; GFX10-GISEL-NEXT: s_cselect_b32 s2, 1, 0 +; GFX10-GISEL-NEXT: s_or_b32 s2, s5, s2 +; GFX10-GISEL-NEXT: s_cmp_lg_u32 s2, 0 +; GFX10-GISEL-NEXT: s_cselect_b32 s5, 1, 0 +; GFX10-GISEL-NEXT: s_sub_i32 s6, 1, s4 +; GFX10-GISEL-NEXT: s_or_b32 s8, s2, 0x1000 +; GFX10-GISEL-NEXT: s_max_i32 s6, s6, 0 +; GFX10-GISEL-NEXT: s_lshl_b32 s7, s4, 12 +; GFX10-GISEL-NEXT: s_min_i32 s6, s6, 13 +; GFX10-GISEL-NEXT: s_lshl_b32 s5, s5, 9 +; GFX10-GISEL-NEXT: s_lshr_b32 s9, s8, s6 +; GFX10-GISEL-NEXT: s_or_b32 s2, s2, s7 +; GFX10-GISEL-NEXT: s_lshl_b32 s6, s9, s6 +; GFX10-GISEL-NEXT: s_or_b32 s5, s5, 0x7c00 +; GFX10-GISEL-NEXT: s_cmp_lg_u32 s6, s8 +; GFX10-GISEL-NEXT: s_cselect_b32 s6, 1, 0 +; GFX10-GISEL-NEXT: s_or_b32 s6, s9, s6 +; GFX10-GISEL-NEXT: s_cmp_lt_i32 s4, 1 +; GFX10-GISEL-NEXT: s_cselect_b32 s2, s6, s2 +; GFX10-GISEL-NEXT: s_and_b32 s6, s2, 7 +; GFX10-GISEL-NEXT: s_lshr_b32 s2, s2, 2 +; GFX10-GISEL-NEXT: s_cmp_eq_u32 s6, 3 +; GFX10-GISEL-NEXT: s_cselect_b32 s7, 1, 0 +; GFX10-GISEL-NEXT: s_cmp_gt_i32 s6, 5 +; GFX10-GISEL-NEXT: s_cselect_b32 s6, 1, 0 +; GFX10-GISEL-NEXT: s_or_b32 s6, s7, s6 +; GFX10-GISEL-NEXT: s_add_i32 s2, s2, s6 +; GFX10-GISEL-NEXT: s_cmp_gt_i32 s4, 30 +; GFX10-GISEL-NEXT: s_cselect_b32 s2, 0x7c00, s2 +; GFX10-GISEL-NEXT: s_cmpk_eq_i32 s4, 0x40f +; GFX10-GISEL-NEXT: s_cselect_b32 s2, s5, s2 +; GFX10-GISEL-NEXT: s_lshr_b32 s3, s3, 16 +; GFX10-GISEL-NEXT: s_and_b32 s3, s3, 0x8000 +; GFX10-GISEL-NEXT: s_or_b32 s2, s3, s2 +; GFX10-GISEL-NEXT: s_mov_b32 s3, 0x31016000 +; GFX10-GISEL-NEXT: v_mov_b32_e32 v0, s2 +; GFX10-GISEL-NEXT: s_mov_b32 s2, -1 +; GFX10-GISEL-NEXT: buffer_store_short v0, off, s[0:3], 0 +; GFX10-GISEL-NEXT: s_endpgm ; ; GFX10-UNSAFE-SDAG-LABEL: fptrunc_f64_to_f16: ; GFX10-UNSAFE-SDAG: ; %bb.0: @@ -390,17 +456,6 @@ define amdgpu_kernel void @fptrunc_f64_to_f16(ptr addrspace(1) %out, double %in) ; GFX10-UNSAFE-SDAG-NEXT: buffer_store_short v0, off, s[0:3], 0 ; GFX10-UNSAFE-SDAG-NEXT: s_endpgm ; -; GFX10-UNSAFE-GISEL-LABEL: fptrunc_f64_to_f16: -; GFX10-UNSAFE-GISEL: ; %bb.0: -; GFX10-UNSAFE-GISEL-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 -; GFX10-UNSAFE-GISEL-NEXT: s_waitcnt lgkmcnt(0) -; GFX10-UNSAFE-GISEL-NEXT: v_cvt_f32_f64_e32 v0, s[2:3] -; GFX10-UNSAFE-GISEL-NEXT: s_mov_b32 s2, -1 -; GFX10-UNSAFE-GISEL-NEXT: s_mov_b32 s3, 0x31016000 -; GFX10-UNSAFE-GISEL-NEXT: v_cvt_f16_f32_e32 v0, v0 -; GFX10-UNSAFE-GISEL-NEXT: buffer_store_short v0, off, s[0:3], 0 -; GFX10-UNSAFE-GISEL-NEXT: s_endpgm -; ; GFX11-SAFE-SDAG-LABEL: fptrunc_f64_to_f16: ; GFX11-SAFE-SDAG: ; %bb.0: ; GFX11-SAFE-SDAG-NEXT: s_load_b128 s[0:3], s[4:5], 0x24 @@ -461,60 +516,60 @@ define amdgpu_kernel void @fptrunc_f64_to_f16(ptr addrspace(1) %out, double %in) ; GFX11-SAFE-SDAG-NEXT: buffer_store_b16 v0, off, s[0:3], 0 ; GFX11-SAFE-SDAG-NEXT: s_endpgm ; -; GFX11-SAFE-GISEL-LABEL: fptrunc_f64_to_f16: -; GFX11-SAFE-GISEL: ; %bb.0: -; GFX11-SAFE-GISEL-NEXT: s_load_b128 s[0:3], s[4:5], 0x24 -; GFX11-SAFE-GISEL-NEXT: s_waitcnt lgkmcnt(0) -; GFX11-SAFE-GISEL-NEXT: s_and_b32 s6, s3, 0x1ff -; GFX11-SAFE-GISEL-NEXT: s_bfe_u32 s4, s3, 0xb0014 -; GFX11-SAFE-GISEL-NEXT: s_lshr_b32 s5, s3, 8 -; GFX11-SAFE-GISEL-NEXT: s_or_b32 s2, s6, s2 -; GFX11-SAFE-GISEL-NEXT: s_addk_i32 s4, 0xfc10 -; GFX11-SAFE-GISEL-NEXT: s_and_b32 s5, s5, 0xffe -; GFX11-SAFE-GISEL-NEXT: s_cmp_lg_u32 s2, 0 -; GFX11-SAFE-GISEL-NEXT: s_cselect_b32 s2, 1, 0 -; GFX11-SAFE-GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1) -; GFX11-SAFE-GISEL-NEXT: s_or_b32 s2, s5, s2 -; GFX11-SAFE-GISEL-NEXT: s_cmp_lg_u32 s2, 0 -; GFX11-SAFE-GISEL-NEXT: s_cselect_b32 s5, 1, 0 -; GFX11-SAFE-GISEL-NEXT: s_sub_i32 s6, 1, s4 -; GFX11-SAFE-GISEL-NEXT: s_or_b32 s8, s2, 0x1000 -; GFX11-SAFE-GISEL-NEXT: s_max_i32 s6, s6, 0 -; GFX11-SAFE-GISEL-NEXT: s_lshl_b32 s7, s4, 12 -; GFX11-SAFE-GISEL-NEXT: s_min_i32 s6, s6, 13 -; GFX11-SAFE-GISEL-NEXT: s_lshl_b32 s5, s5, 9 -; GFX11-SAFE-GISEL-NEXT: s_lshr_b32 s9, s8, s6 -; GFX11-SAFE-GISEL-NEXT: s_or_b32 s2, s2, s7 -; GFX11-SAFE-GISEL-NEXT: s_lshl_b32 s6, s9, s6 -; GFX11-SAFE-GISEL-NEXT: s_or_b32 s5, s5, 0x7c00 -; GFX11-SAFE-GISEL-NEXT: s_cmp_lg_u32 s6, s8 -; GFX11-SAFE-GISEL-NEXT: s_cselect_b32 s6, 1, 0 -; GFX11-SAFE-GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(SKIP_2) | instid1(SALU_CYCLE_1) -; GFX11-SAFE-GISEL-NEXT: s_or_b32 s6, s9, s6 -; GFX11-SAFE-GISEL-NEXT: s_cmp_lt_i32 s4, 1 -; GFX11-SAFE-GISEL-NEXT: s_cselect_b32 s2, s6, s2 -; GFX11-SAFE-GISEL-NEXT: s_and_b32 s6, s2, 7 -; GFX11-SAFE-GISEL-NEXT: s_lshr_b32 s2, s2, 2 -; GFX11-SAFE-GISEL-NEXT: s_cmp_eq_u32 s6, 3 -; GFX11-SAFE-GISEL-NEXT: s_cselect_b32 s7, 1, 0 -; GFX11-SAFE-GISEL-NEXT: s_cmp_gt_i32 s6, 5 -; GFX11-SAFE-GISEL-NEXT: s_cselect_b32 s6, 1, 0 -; GFX11-SAFE-GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1) -; GFX11-SAFE-GISEL-NEXT: s_or_b32 s6, s7, s6 -; GFX11-SAFE-GISEL-NEXT: s_add_i32 s2, s2, s6 -; GFX11-SAFE-GISEL-NEXT: s_cmp_gt_i32 s4, 30 -; GFX11-SAFE-GISEL-NEXT: s_cselect_b32 s2, 0x7c00, s2 -; GFX11-SAFE-GISEL-NEXT: s_cmpk_eq_i32 s4, 0x40f -; GFX11-SAFE-GISEL-NEXT: s_cselect_b32 s2, s5, s2 -; GFX11-SAFE-GISEL-NEXT: s_lshr_b32 s3, s3, 16 -; GFX11-SAFE-GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1) -; GFX11-SAFE-GISEL-NEXT: s_and_b32 s3, s3, 0x8000 -; GFX11-SAFE-GISEL-NEXT: s_or_b32 s2, s3, s2 -; GFX11-SAFE-GISEL-NEXT: s_mov_b32 s3, 0x31016000 -; GFX11-SAFE-GISEL-NEXT: v_mov_b32_e32 v0, s2 -; GFX11-SAFE-GISEL-NEXT: s_mov_b32 s2, -1 -; GFX11-SAFE-GISEL-NEXT: buffer_store_b16 v0, off, s[0:3], 0 -; GFX11-SAFE-GISEL-NEXT: s_endpgm +; GFX11-GISEL-LABEL: fptrunc_f64_to_f16: +; GFX11-GISEL: ; %bb.0: +; GFX11-GISEL-NEXT: s_load_b128 s[0:3], s[4:5], 0x24 +; GFX11-GISEL-NEXT: s_waitcnt lgkmcnt(0) +; GFX11-GISEL-NEXT: s_and_b32 s6, s3, 0x1ff +; GFX11-GISEL-NEXT: s_bfe_u32 s4, s3, 0xb0014 +; GFX11-GISEL-NEXT: s_lshr_b32 s5, s3, 8 +; GFX11-GISEL-NEXT: s_or_b32 s2, s6, s2 +; GFX11-GISEL-NEXT: s_addk_i32 s4, 0xfc10 +; GFX11-GISEL-NEXT: s_and_b32 s5, s5, 0xffe +; GFX11-GISEL-NEXT: s_cmp_lg_u32 s2, 0 +; GFX11-GISEL-NEXT: s_cselect_b32 s2, 1, 0 +; GFX11-GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1) +; GFX11-GISEL-NEXT: s_or_b32 s2, s5, s2 +; GFX11-GISEL-NEXT: s_cmp_lg_u32 s2, 0 +; GFX11-GISEL-NEXT: s_cselect_b32 s5, 1, 0 +; GFX11-GISEL-NEXT: s_sub_i32 s6, 1, s4 +; GFX11-GISEL-NEXT: s_or_b32 s8, s2, 0x1000 +; GFX11-GISEL-NEXT: s_max_i32 s6, s6, 0 +; GFX11-GISEL-NEXT: s_lshl_b32 s7, s4, 12 +; GFX11-GISEL-NEXT: s_min_i32 s6, s6, 13 +; GFX11-GISEL-NEXT: s_lshl_b32 s5, s5, 9 +; GFX11-GISEL-NEXT: s_lshr_b32 s9, s8, s6 +; GFX11-GISEL-NEXT: s_or_b32 s2, s2, s7 +; GFX11-GISEL-NEXT: s_lshl_b32 s6, s9, s6 +; GFX11-GISEL-NEXT: s_or_b32 s5, s5, 0x7c00 +; GFX11-GISEL-NEXT: s_cmp_lg_u32 s6, s8 +; GFX11-GISEL-NEXT: s_cselect_b32 s6, 1, 0 +; GFX11-GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(SKIP_2) | instid1(SALU_CYCLE_1) +; GFX11-GISEL-NEXT: s_or_b32 s6, s9, s6 +; GFX11-GISEL-NEXT: s_cmp_lt_i32 s4, 1 +; GFX11-GISEL-NEXT: s_cselect_b32 s2, s6, s2 +; GFX11-GISEL-NEXT: s_and_b32 s6, s2, 7 +; GFX11-GISEL-NEXT: s_lshr_b32 s2, s2, 2 +; GFX11-GISEL-NEXT: s_cmp_eq_u32 s6, 3 +; GFX11-GISEL-NEXT: s_cselect_b32 s7, 1, 0 +; GFX11-GISEL-NEXT: s_cmp_gt_i32 s6, 5 +; GFX11-GISEL-NEXT: s_cselect_b32 s6, 1, 0 +; GFX11-GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1) +; GFX11-GISEL-NEXT: s_or_b32 s6, s7, s6 +; GFX11-GISEL-NEXT: s_add_i32 s2, s2, s6 +; GFX11-GISEL-NEXT: s_cmp_gt_i32 s4, 30 +; GFX11-GISEL-NEXT: s_cselect_b32 s2, 0x7c00, s2 +; GFX11-GISEL-NEXT: s_cmpk_eq_i32 s4, 0x40f +; GFX11-GISEL-NEXT: s_cselect_b32 s2, s5, s2 +; GFX11-GISEL-NEXT: s_lshr_b32 s3, s3, 16 +; GFX11-GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1) +; GFX11-GISEL-NEXT: s_and_b32 s3, s3, 0x8000 +; GFX11-GISEL-NEXT: s_or_b32 s2, s3, s2 +; GFX11-GISEL-NEXT: s_mov_b32 s3, 0x31016000 +; GFX11-GISEL-NEXT: v_mov_b32_e32 v0, s2 +; GFX11-GISEL-NEXT: s_mov_b32 s2, -1 +; GFX11-GISEL-NEXT: buffer_store_b16 v0, off, s[0:3], 0 +; GFX11-GISEL-NEXT: s_endpgm ; ; GFX11-UNSAFE-DAG-TRUE16-LABEL: fptrunc_f64_to_f16: ; GFX11-UNSAFE-DAG-TRUE16: ; %bb.0: @@ -539,8 +594,314 @@ define amdgpu_kernel void @fptrunc_f64_to_f16(ptr addrspace(1) %out, double %in) ; GFX11-UNSAFE-DAG-FAKE16-NEXT: v_cvt_f16_f32_e32 v0, v0 ; GFX11-UNSAFE-DAG-FAKE16-NEXT: buffer_store_b16 v0, off, s[0:3], 0 ; GFX11-UNSAFE-DAG-FAKE16-NEXT: s_endpgm + %result = fptrunc double %in to half + %result_i16 = bitcast half %result to i16 + store i16 %result_i16, ptr addrspace(1) %out + ret void +} + +define amdgpu_kernel void @fptrunc_f64_to_f16_afn(ptr addrspace(1) %out, double %in) { +; SI-LABEL: fptrunc_f64_to_f16_afn: +; SI: ; %bb.0: +; SI-NEXT: s_load_dwordx4 s[4:7], s[4:5], 0x9 +; SI-NEXT: s_mov_b32 s3, 0xf000 +; SI-NEXT: s_movk_i32 s2, 0x7e00 +; SI-NEXT: s_waitcnt lgkmcnt(0) +; SI-NEXT: s_lshr_b32 s0, s7, 8 +; SI-NEXT: s_and_b32 s1, s7, 0x1ff +; SI-NEXT: s_and_b32 s8, s0, 0xffe +; SI-NEXT: s_or_b32 s0, s1, s6 +; SI-NEXT: s_cmp_lg_u32 s0, 0 +; SI-NEXT: s_cselect_b64 s[0:1], -1, 0 +; SI-NEXT: v_cndmask_b32_e64 v0, 0, 1, s[0:1] +; SI-NEXT: s_bfe_u32 s0, s7, 0xb0014 +; SI-NEXT: v_readfirstlane_b32 s1, v0 +; SI-NEXT: s_sub_i32 s6, 0x3f1, s0 +; SI-NEXT: s_or_b32 s1, s8, s1 +; SI-NEXT: v_med3_i32 v0, s6, 0, 13 +; SI-NEXT: s_or_b32 s6, s1, 0x1000 +; SI-NEXT: v_readfirstlane_b32 s8, v0 +; SI-NEXT: s_lshr_b32 s9, s6, s8 +; SI-NEXT: s_lshl_b32 s8, s9, s8 +; SI-NEXT: s_cmp_lg_u32 s8, s6 +; SI-NEXT: s_cselect_b32 s6, 1, 0 +; SI-NEXT: s_addk_i32 s0, 0xfc10 +; SI-NEXT: s_or_b32 s6, s9, s6 +; SI-NEXT: s_lshl_b32 s8, s0, 12 +; SI-NEXT: s_or_b32 s8, s1, s8 +; SI-NEXT: s_cmp_lt_i32 s0, 1 +; SI-NEXT: s_cselect_b32 s6, s6, s8 +; SI-NEXT: s_and_b32 s8, s6, 7 +; SI-NEXT: s_cmp_gt_i32 s8, 5 +; SI-NEXT: s_cselect_b32 s9, 1, 0 +; SI-NEXT: s_cmp_eq_u32 s8, 3 +; SI-NEXT: s_cselect_b32 s8, 1, 0 +; SI-NEXT: s_lshr_b32 s6, s6, 2 +; SI-NEXT: s_or_b32 s8, s8, s9 +; SI-NEXT: s_add_i32 s6, s6, s8 +; SI-NEXT: s_cmp_lt_i32 s0, 31 +; SI-NEXT: s_cselect_b32 s6, s6, 0x7c00 +; SI-NEXT: s_cmp_lg_u32 s1, 0 +; SI-NEXT: s_cselect_b32 s1, s2, 0x7c00 +; SI-NEXT: s_cmpk_eq_i32 s0, 0x40f +; SI-NEXT: s_cselect_b32 s0, s1, s6 +; SI-NEXT: s_lshr_b32 s1, s7, 16 +; SI-NEXT: s_and_b32 s1, s1, 0x8000 +; SI-NEXT: s_or_b32 s6, s1, s0 +; SI-NEXT: s_mov_b32 s2, -1 +; SI-NEXT: s_mov_b32 s0, s4 +; SI-NEXT: s_mov_b32 s1, s5 +; SI-NEXT: v_mov_b32_e32 v0, s6 +; SI-NEXT: buffer_store_short v0, off, s[0:3], 0 +; SI-NEXT: s_endpgm ; -; GFX11-UNSAFE-GISEL-TRUE16-LABEL: fptrunc_f64_to_f16: +; VI-SAFE-SDAG-LABEL: fptrunc_f64_to_f16_afn: +; VI-SAFE-SDAG: ; %bb.0: +; VI-SAFE-SDAG-NEXT: s_load_dwordx4 s[4:7], s[4:5], 0x24 +; VI-SAFE-SDAG-NEXT: s_mov_b32 s3, 0xf000 +; VI-SAFE-SDAG-NEXT: s_mov_b32 s2, -1 +; VI-SAFE-SDAG-NEXT: s_waitcnt lgkmcnt(0) +; VI-SAFE-SDAG-NEXT: s_mov_b32 s0, s4 +; VI-SAFE-SDAG-NEXT: s_lshr_b32 s4, s7, 8 +; VI-SAFE-SDAG-NEXT: s_and_b32 s8, s4, 0xffe +; VI-SAFE-SDAG-NEXT: s_and_b32 s4, s7, 0x1ff +; VI-SAFE-SDAG-NEXT: s_or_b32 s4, s4, s6 +; VI-SAFE-SDAG-NEXT: s_cmp_lg_u32 s4, 0 +; VI-SAFE-SDAG-NEXT: s_mov_b32 s1, s5 +; VI-SAFE-SDAG-NEXT: s_cselect_b64 s[4:5], -1, 0 +; VI-SAFE-SDAG-NEXT: v_cndmask_b32_e64 v0, 0, 1, s[4:5] +; VI-SAFE-SDAG-NEXT: v_readfirstlane_b32 s4, v0 +; VI-SAFE-SDAG-NEXT: s_bfe_u32 s6, s7, 0xb0014 +; VI-SAFE-SDAG-NEXT: s_or_b32 s4, s8, s4 +; VI-SAFE-SDAG-NEXT: s_sub_i32 s8, 0x3f1, s6 +; VI-SAFE-SDAG-NEXT: v_med3_i32 v0, s8, 0, 13 +; VI-SAFE-SDAG-NEXT: s_or_b32 s5, s4, 0x1000 +; VI-SAFE-SDAG-NEXT: v_readfirstlane_b32 s8, v0 +; VI-SAFE-SDAG-NEXT: s_lshr_b32 s9, s5, s8 +; VI-SAFE-SDAG-NEXT: s_lshl_b32 s8, s9, s8 +; VI-SAFE-SDAG-NEXT: s_cmp_lg_u32 s8, s5 +; VI-SAFE-SDAG-NEXT: s_cselect_b32 s5, 1, 0 +; VI-SAFE-SDAG-NEXT: s_addk_i32 s6, 0xfc10 +; VI-SAFE-SDAG-NEXT: s_lshl_b32 s8, s6, 12 +; VI-SAFE-SDAG-NEXT: s_or_b32 s5, s9, s5 +; VI-SAFE-SDAG-NEXT: s_or_b32 s8, s4, s8 +; VI-SAFE-SDAG-NEXT: s_cmp_lt_i32 s6, 1 +; VI-SAFE-SDAG-NEXT: s_cselect_b32 s5, s5, s8 +; VI-SAFE-SDAG-NEXT: s_and_b32 s8, s5, 7 +; VI-SAFE-SDAG-NEXT: s_cmp_gt_i32 s8, 5 +; VI-SAFE-SDAG-NEXT: s_cselect_b32 s9, 1, 0 +; VI-SAFE-SDAG-NEXT: s_cmp_eq_u32 s8, 3 +; VI-SAFE-SDAG-NEXT: s_cselect_b32 s8, 1, 0 +; VI-SAFE-SDAG-NEXT: s_or_b32 s8, s8, s9 +; VI-SAFE-SDAG-NEXT: s_lshr_b32 s5, s5, 2 +; VI-SAFE-SDAG-NEXT: s_add_i32 s5, s5, s8 +; VI-SAFE-SDAG-NEXT: s_cmp_lt_i32 s6, 31 +; VI-SAFE-SDAG-NEXT: s_cselect_b32 s5, s5, 0x7c00 +; VI-SAFE-SDAG-NEXT: s_cmp_lg_u32 s4, 0 +; VI-SAFE-SDAG-NEXT: s_movk_i32 s4, 0x7e00 +; VI-SAFE-SDAG-NEXT: s_cselect_b32 s4, s4, 0x7c00 +; VI-SAFE-SDAG-NEXT: s_cmpk_eq_i32 s6, 0x40f +; VI-SAFE-SDAG-NEXT: s_cselect_b32 s4, s4, s5 +; VI-SAFE-SDAG-NEXT: s_lshr_b32 s5, s7, 16 +; VI-SAFE-SDAG-NEXT: s_and_b32 s5, s5, 0x8000 +; VI-SAFE-SDAG-NEXT: s_or_b32 s4, s5, s4 +; VI-SAFE-SDAG-NEXT: v_mov_b32_e32 v0, s4 +; VI-SAFE-SDAG-NEXT: buffer_store_short v0, off, s[0:3], 0 +; VI-SAFE-SDAG-NEXT: s_endpgm +; +; VI-GISEL-LABEL: fptrunc_f64_to_f16_afn: +; VI-GISEL: ; %bb.0: +; VI-GISEL-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 +; VI-GISEL-NEXT: s_waitcnt lgkmcnt(0) +; VI-GISEL-NEXT: v_cvt_f32_f64_e32 v0, s[2:3] +; VI-GISEL-NEXT: s_mov_b32 s2, -1 +; VI-GISEL-NEXT: s_mov_b32 s3, 0xf000 +; VI-GISEL-NEXT: v_cvt_f16_f32_e32 v0, v0 +; VI-GISEL-NEXT: buffer_store_short v0, off, s[0:3], 0 +; VI-GISEL-NEXT: s_endpgm +; +; VI-UNSAFE-SDAG-LABEL: fptrunc_f64_to_f16_afn: +; VI-UNSAFE-SDAG: ; %bb.0: +; VI-UNSAFE-SDAG-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 +; VI-UNSAFE-SDAG-NEXT: s_waitcnt lgkmcnt(0) +; VI-UNSAFE-SDAG-NEXT: v_cvt_f32_f64_e32 v0, s[2:3] +; VI-UNSAFE-SDAG-NEXT: s_mov_b32 s3, 0xf000 +; VI-UNSAFE-SDAG-NEXT: s_mov_b32 s2, -1 +; VI-UNSAFE-SDAG-NEXT: v_cvt_f16_f32_e32 v0, v0 +; VI-UNSAFE-SDAG-NEXT: buffer_store_short v0, off, s[0:3], 0 +; VI-UNSAFE-SDAG-NEXT: s_endpgm +; +; GFX10-SAFE-SDAG-LABEL: fptrunc_f64_to_f16_afn: +; GFX10-SAFE-SDAG: ; %bb.0: +; GFX10-SAFE-SDAG-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 +; GFX10-SAFE-SDAG-NEXT: s_waitcnt lgkmcnt(0) +; GFX10-SAFE-SDAG-NEXT: s_and_b32 s4, s3, 0x1ff +; GFX10-SAFE-SDAG-NEXT: s_lshr_b32 s5, s3, 8 +; GFX10-SAFE-SDAG-NEXT: s_or_b32 s2, s4, s2 +; GFX10-SAFE-SDAG-NEXT: s_and_b32 s4, s5, 0xffe +; GFX10-SAFE-SDAG-NEXT: s_cmp_lg_u32 s2, 0 +; GFX10-SAFE-SDAG-NEXT: s_cselect_b32 s2, -1, 0 +; GFX10-SAFE-SDAG-NEXT: v_cndmask_b32_e64 v0, 0, 1, s2 +; GFX10-SAFE-SDAG-NEXT: s_bfe_u32 s2, s3, 0xb0014 +; GFX10-SAFE-SDAG-NEXT: s_sub_i32 s5, 0x3f1, s2 +; GFX10-SAFE-SDAG-NEXT: v_med3_i32 v1, s5, 0, 13 +; GFX10-SAFE-SDAG-NEXT: v_readfirstlane_b32 s5, v0 +; GFX10-SAFE-SDAG-NEXT: v_readfirstlane_b32 s6, v1 +; GFX10-SAFE-SDAG-NEXT: s_or_b32 s4, s4, s5 +; GFX10-SAFE-SDAG-NEXT: s_or_b32 s5, s4, 0x1000 +; GFX10-SAFE-SDAG-NEXT: s_lshr_b32 s7, s5, s6 +; GFX10-SAFE-SDAG-NEXT: s_lshl_b32 s6, s7, s6 +; GFX10-SAFE-SDAG-NEXT: s_cmp_lg_u32 s6, s5 +; GFX10-SAFE-SDAG-NEXT: s_cselect_b32 s5, 1, 0 +; GFX10-SAFE-SDAG-NEXT: s_addk_i32 s2, 0xfc10 +; GFX10-SAFE-SDAG-NEXT: s_or_b32 s5, s7, s5 +; GFX10-SAFE-SDAG-NEXT: s_lshl_b32 s6, s2, 12 +; GFX10-SAFE-SDAG-NEXT: s_or_b32 s6, s4, s6 +; GFX10-SAFE-SDAG-NEXT: s_cmp_lt_i32 s2, 1 +; GFX10-SAFE-SDAG-NEXT: s_cselect_b32 s5, s5, s6 +; GFX10-SAFE-SDAG-NEXT: s_and_b32 s6, s5, 7 +; GFX10-SAFE-SDAG-NEXT: s_cmp_gt_i32 s6, 5 +; GFX10-SAFE-SDAG-NEXT: s_cselect_b32 s7, 1, 0 +; GFX10-SAFE-SDAG-NEXT: s_cmp_eq_u32 s6, 3 +; GFX10-SAFE-SDAG-NEXT: s_cselect_b32 s6, 1, 0 +; GFX10-SAFE-SDAG-NEXT: s_lshr_b32 s5, s5, 2 +; GFX10-SAFE-SDAG-NEXT: s_or_b32 s6, s6, s7 +; GFX10-SAFE-SDAG-NEXT: s_add_i32 s5, s5, s6 +; GFX10-SAFE-SDAG-NEXT: s_cmp_lt_i32 s2, 31 +; GFX10-SAFE-SDAG-NEXT: s_movk_i32 s6, 0x7e00 +; GFX10-SAFE-SDAG-NEXT: s_cselect_b32 s5, s5, 0x7c00 +; GFX10-SAFE-SDAG-NEXT: s_cmp_lg_u32 s4, 0 +; GFX10-SAFE-SDAG-NEXT: s_cselect_b32 s4, s6, 0x7c00 +; GFX10-SAFE-SDAG-NEXT: s_cmpk_eq_i32 s2, 0x40f +; GFX10-SAFE-SDAG-NEXT: s_cselect_b32 s2, s4, s5 +; GFX10-SAFE-SDAG-NEXT: s_lshr_b32 s3, s3, 16 +; GFX10-SAFE-SDAG-NEXT: s_and_b32 s3, s3, 0x8000 +; GFX10-SAFE-SDAG-NEXT: s_or_b32 s2, s3, s2 +; GFX10-SAFE-SDAG-NEXT: s_mov_b32 s3, 0x31016000 +; GFX10-SAFE-SDAG-NEXT: v_mov_b32_e32 v0, s2 +; GFX10-SAFE-SDAG-NEXT: s_mov_b32 s2, -1 +; GFX10-SAFE-SDAG-NEXT: buffer_store_short v0, off, s[0:3], 0 +; GFX10-SAFE-SDAG-NEXT: s_endpgm +; +; GFX10-GISEL-LABEL: fptrunc_f64_to_f16_afn: +; GFX10-GISEL: ; %bb.0: +; GFX10-GISEL-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 +; GFX10-GISEL-NEXT: s_waitcnt lgkmcnt(0) +; GFX10-GISEL-NEXT: v_cvt_f32_f64_e32 v0, s[2:3] +; GFX10-GISEL-NEXT: s_mov_b32 s2, -1 +; GFX10-GISEL-NEXT: s_mov_b32 s3, 0x31016000 +; GFX10-GISEL-NEXT: v_cvt_f16_f32_e32 v0, v0 +; GFX10-GISEL-NEXT: buffer_store_short v0, off, s[0:3], 0 +; GFX10-GISEL-NEXT: s_endpgm +; +; GFX10-UNSAFE-SDAG-LABEL: fptrunc_f64_to_f16_afn: +; GFX10-UNSAFE-SDAG: ; %bb.0: +; GFX10-UNSAFE-SDAG-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 +; GFX10-UNSAFE-SDAG-NEXT: s_waitcnt lgkmcnt(0) +; GFX10-UNSAFE-SDAG-NEXT: v_cvt_f32_f64_e32 v0, s[2:3] +; GFX10-UNSAFE-SDAG-NEXT: s_mov_b32 s3, 0x31016000 +; GFX10-UNSAFE-SDAG-NEXT: s_mov_b32 s2, -1 +; GFX10-UNSAFE-SDAG-NEXT: v_cvt_f16_f32_e32 v0, v0 +; GFX10-UNSAFE-SDAG-NEXT: buffer_store_short v0, off, s[0:3], 0 +; GFX10-UNSAFE-SDAG-NEXT: s_endpgm +; +; GFX11-SAFE-SDAG-LABEL: fptrunc_f64_to_f16_afn: +; GFX11-SAFE-SDAG: ; %bb.0: +; GFX11-SAFE-SDAG-NEXT: s_load_b128 s[0:3], s[4:5], 0x24 +; GFX11-SAFE-SDAG-NEXT: s_waitcnt lgkmcnt(0) +; GFX11-SAFE-SDAG-NEXT: s_and_b32 s4, s3, 0x1ff +; GFX11-SAFE-SDAG-NEXT: s_lshr_b32 s5, s3, 8 +; GFX11-SAFE-SDAG-NEXT: s_or_b32 s2, s4, s2 +; GFX11-SAFE-SDAG-NEXT: s_and_b32 s4, s5, 0xffe +; GFX11-SAFE-SDAG-NEXT: s_cmp_lg_u32 s2, 0 +; GFX11-SAFE-SDAG-NEXT: s_cselect_b32 s2, -1, 0 +; GFX11-SAFE-SDAG-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(SKIP_1) | instid1(SALU_CYCLE_1) +; GFX11-SAFE-SDAG-NEXT: v_cndmask_b32_e64 v0, 0, 1, s2 +; GFX11-SAFE-SDAG-NEXT: s_bfe_u32 s2, s3, 0xb0014 +; GFX11-SAFE-SDAG-NEXT: s_sub_i32 s5, 0x3f1, s2 +; GFX11-SAFE-SDAG-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(VALU_DEP_2) +; GFX11-SAFE-SDAG-NEXT: v_med3_i32 v1, s5, 0, 13 +; GFX11-SAFE-SDAG-NEXT: v_readfirstlane_b32 s5, v0 +; GFX11-SAFE-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(SALU_CYCLE_1) +; GFX11-SAFE-SDAG-NEXT: v_readfirstlane_b32 s6, v1 +; GFX11-SAFE-SDAG-NEXT: s_or_b32 s4, s4, s5 +; GFX11-SAFE-SDAG-NEXT: s_or_b32 s5, s4, 0x1000 +; GFX11-SAFE-SDAG-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1) +; GFX11-SAFE-SDAG-NEXT: s_lshr_b32 s7, s5, s6 +; GFX11-SAFE-SDAG-NEXT: s_lshl_b32 s6, s7, s6 +; GFX11-SAFE-SDAG-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(SKIP_4) | instid1(SALU_CYCLE_1) +; GFX11-SAFE-SDAG-NEXT: s_cmp_lg_u32 s6, s5 +; GFX11-SAFE-SDAG-NEXT: s_cselect_b32 s5, 1, 0 +; GFX11-SAFE-SDAG-NEXT: s_addk_i32 s2, 0xfc10 +; GFX11-SAFE-SDAG-NEXT: s_or_b32 s5, s7, s5 +; GFX11-SAFE-SDAG-NEXT: s_lshl_b32 s6, s2, 12 +; GFX11-SAFE-SDAG-NEXT: s_or_b32 s6, s4, s6 +; GFX11-SAFE-SDAG-NEXT: s_cmp_lt_i32 s2, 1 +; GFX11-SAFE-SDAG-NEXT: s_cselect_b32 s5, s5, s6 +; GFX11-SAFE-SDAG-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1) +; GFX11-SAFE-SDAG-NEXT: s_and_b32 s6, s5, 7 +; GFX11-SAFE-SDAG-NEXT: s_cmp_gt_i32 s6, 5 +; GFX11-SAFE-SDAG-NEXT: s_cselect_b32 s7, 1, 0 +; GFX11-SAFE-SDAG-NEXT: s_cmp_eq_u32 s6, 3 +; GFX11-SAFE-SDAG-NEXT: s_cselect_b32 s6, 1, 0 +; GFX11-SAFE-SDAG-NEXT: s_lshr_b32 s5, s5, 2 +; GFX11-SAFE-SDAG-NEXT: s_or_b32 s6, s6, s7 +; GFX11-SAFE-SDAG-NEXT: s_delay_alu instid0(SALU_CYCLE_1) +; GFX11-SAFE-SDAG-NEXT: s_add_i32 s5, s5, s6 +; GFX11-SAFE-SDAG-NEXT: s_cmp_lt_i32 s2, 31 +; GFX11-SAFE-SDAG-NEXT: s_movk_i32 s6, 0x7e00 +; GFX11-SAFE-SDAG-NEXT: s_cselect_b32 s5, s5, 0x7c00 +; GFX11-SAFE-SDAG-NEXT: s_cmp_lg_u32 s4, 0 +; GFX11-SAFE-SDAG-NEXT: s_cselect_b32 s4, s6, 0x7c00 +; GFX11-SAFE-SDAG-NEXT: s_cmpk_eq_i32 s2, 0x40f +; GFX11-SAFE-SDAG-NEXT: s_cselect_b32 s2, s4, s5 +; GFX11-SAFE-SDAG-NEXT: s_lshr_b32 s3, s3, 16 +; GFX11-SAFE-SDAG-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1) +; GFX11-SAFE-SDAG-NEXT: s_and_b32 s3, s3, 0x8000 +; GFX11-SAFE-SDAG-NEXT: s_or_b32 s2, s3, s2 +; GFX11-SAFE-SDAG-NEXT: s_mov_b32 s3, 0x31016000 +; GFX11-SAFE-SDAG-NEXT: v_mov_b32_e32 v0, s2 +; GFX11-SAFE-SDAG-NEXT: s_mov_b32 s2, -1 +; GFX11-SAFE-SDAG-NEXT: buffer_store_b16 v0, off, s[0:3], 0 +; GFX11-SAFE-SDAG-NEXT: s_endpgm +; +; GFX11-SAFE-GISEL-LABEL: fptrunc_f64_to_f16_afn: +; GFX11-SAFE-GISEL: ; %bb.0: +; GFX11-SAFE-GISEL-NEXT: s_load_b128 s[0:3], s[4:5], 0x24 +; GFX11-SAFE-GISEL-NEXT: s_waitcnt lgkmcnt(0) +; GFX11-SAFE-GISEL-NEXT: v_cvt_f32_f64_e32 v0, s[2:3] +; GFX11-SAFE-GISEL-NEXT: s_mov_b32 s2, -1 +; GFX11-SAFE-GISEL-NEXT: s_mov_b32 s3, 0x31016000 +; GFX11-SAFE-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX11-SAFE-GISEL-NEXT: v_cvt_f16_f32_e32 v0.l, v0 +; GFX11-SAFE-GISEL-NEXT: buffer_store_b16 v0, off, s[0:3], 0 +; GFX11-SAFE-GISEL-NEXT: s_endpgm +; +; GFX11-UNSAFE-DAG-TRUE16-LABEL: fptrunc_f64_to_f16_afn: +; GFX11-UNSAFE-DAG-TRUE16: ; %bb.0: +; GFX11-UNSAFE-DAG-TRUE16-NEXT: s_load_b128 s[0:3], s[4:5], 0x24 +; GFX11-UNSAFE-DAG-TRUE16-NEXT: s_waitcnt lgkmcnt(0) +; GFX11-UNSAFE-DAG-TRUE16-NEXT: v_cvt_f32_f64_e32 v0, s[2:3] +; GFX11-UNSAFE-DAG-TRUE16-NEXT: s_mov_b32 s3, 0x31016000 +; GFX11-UNSAFE-DAG-TRUE16-NEXT: s_mov_b32 s2, -1 +; GFX11-UNSAFE-DAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX11-UNSAFE-DAG-TRUE16-NEXT: v_cvt_f16_f32_e32 v0.l, v0 +; GFX11-UNSAFE-DAG-TRUE16-NEXT: buffer_store_b16 v0, off, s[0:3], 0 +; GFX11-UNSAFE-DAG-TRUE16-NEXT: s_endpgm +; +; GFX11-UNSAFE-DAG-FAKE16-LABEL: fptrunc_f64_to_f16_afn: +; GFX11-UNSAFE-DAG-FAKE16: ; %bb.0: +; GFX11-UNSAFE-DAG-FAKE16-NEXT: s_load_b128 s[0:3], s[4:5], 0x24 +; GFX11-UNSAFE-DAG-FAKE16-NEXT: s_waitcnt lgkmcnt(0) +; GFX11-UNSAFE-DAG-FAKE16-NEXT: v_cvt_f32_f64_e32 v0, s[2:3] +; GFX11-UNSAFE-DAG-FAKE16-NEXT: s_mov_b32 s3, 0x31016000 +; GFX11-UNSAFE-DAG-FAKE16-NEXT: s_mov_b32 s2, -1 +; GFX11-UNSAFE-DAG-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX11-UNSAFE-DAG-FAKE16-NEXT: v_cvt_f16_f32_e32 v0, v0 +; GFX11-UNSAFE-DAG-FAKE16-NEXT: buffer_store_b16 v0, off, s[0:3], 0 +; GFX11-UNSAFE-DAG-FAKE16-NEXT: s_endpgm +; +; GFX11-UNSAFE-GISEL-TRUE16-LABEL: fptrunc_f64_to_f16_afn: ; GFX11-UNSAFE-GISEL-TRUE16: ; %bb.0: ; GFX11-UNSAFE-GISEL-TRUE16-NEXT: s_load_b128 s[0:3], s[4:5], 0x24 ; GFX11-UNSAFE-GISEL-TRUE16-NEXT: s_waitcnt lgkmcnt(0) @@ -552,7 +913,7 @@ define amdgpu_kernel void @fptrunc_f64_to_f16(ptr addrspace(1) %out, double %in) ; GFX11-UNSAFE-GISEL-TRUE16-NEXT: buffer_store_b16 v0, off, s[0:3], 0 ; GFX11-UNSAFE-GISEL-TRUE16-NEXT: s_endpgm ; -; GFX11-UNSAFE-GISEL-FAKE16-LABEL: fptrunc_f64_to_f16: +; GFX11-UNSAFE-GISEL-FAKE16-LABEL: fptrunc_f64_to_f16_afn: ; GFX11-UNSAFE-GISEL-FAKE16: ; %bb.0: ; GFX11-UNSAFE-GISEL-FAKE16-NEXT: s_load_b128 s[0:3], s[4:5], 0x24 ; GFX11-UNSAFE-GISEL-FAKE16-NEXT: s_waitcnt lgkmcnt(0) @@ -563,7 +924,7 @@ define amdgpu_kernel void @fptrunc_f64_to_f16(ptr addrspace(1) %out, double %in) ; GFX11-UNSAFE-GISEL-FAKE16-NEXT: v_cvt_f16_f32_e32 v0, v0 ; GFX11-UNSAFE-GISEL-FAKE16-NEXT: buffer_store_b16 v0, off, s[0:3], 0 ; GFX11-UNSAFE-GISEL-FAKE16-NEXT: s_endpgm - %result = fptrunc double %in to half + %result = fptrunc afn double %in to half %result_i16 = bitcast half %result to i16 store i16 %result_i16, ptr addrspace(1) %out ret void @@ -662,6 +1023,99 @@ define amdgpu_kernel void @fptrunc_v2f64_to_v2f32(ptr addrspace(1) %out, <2 x do ret void } +define amdgpu_kernel void @fptrunc_v2f64_to_v2f32_afn(ptr addrspace(1) %out, <2 x double> %in) { +; SI-LABEL: fptrunc_v2f64_to_v2f32_afn: +; SI: ; %bb.0: +; SI-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0xd +; SI-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x9 +; SI-NEXT: s_mov_b32 s7, 0xf000 +; SI-NEXT: s_mov_b32 s6, -1 +; SI-NEXT: s_waitcnt lgkmcnt(0) +; SI-NEXT: v_cvt_f32_f64_e32 v1, s[2:3] +; SI-NEXT: v_cvt_f32_f64_e32 v0, s[0:1] +; SI-NEXT: buffer_store_dwordx2 v[0:1], off, s[4:7], 0 +; SI-NEXT: s_endpgm +; +; VI-SDAG-LABEL: fptrunc_v2f64_to_v2f32_afn: +; VI-SDAG: ; %bb.0: +; VI-SDAG-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x34 +; VI-SDAG-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x24 +; VI-SDAG-NEXT: s_mov_b32 s7, 0xf000 +; VI-SDAG-NEXT: s_mov_b32 s6, -1 +; VI-SDAG-NEXT: s_waitcnt lgkmcnt(0) +; VI-SDAG-NEXT: v_cvt_f32_f64_e32 v1, s[2:3] +; VI-SDAG-NEXT: v_cvt_f32_f64_e32 v0, s[0:1] +; VI-SDAG-NEXT: buffer_store_dwordx2 v[0:1], off, s[4:7], 0 +; VI-SDAG-NEXT: s_endpgm +; +; VI-GISEL-LABEL: fptrunc_v2f64_to_v2f32_afn: +; VI-GISEL: ; %bb.0: +; VI-GISEL-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x34 +; VI-GISEL-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x24 +; VI-GISEL-NEXT: s_mov_b32 s6, -1 +; VI-GISEL-NEXT: s_mov_b32 s7, 0xf000 +; VI-GISEL-NEXT: s_waitcnt lgkmcnt(0) +; VI-GISEL-NEXT: v_cvt_f32_f64_e32 v0, s[0:1] +; VI-GISEL-NEXT: v_cvt_f32_f64_e32 v1, s[2:3] +; VI-GISEL-NEXT: buffer_store_dwordx2 v[0:1], off, s[4:7], 0 +; VI-GISEL-NEXT: s_endpgm +; +; GFX10-SDAG-LABEL: fptrunc_v2f64_to_v2f32_afn: +; GFX10-SDAG: ; %bb.0: +; GFX10-SDAG-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x34 +; GFX10-SDAG-NEXT: s_waitcnt lgkmcnt(0) +; GFX10-SDAG-NEXT: v_cvt_f32_f64_e32 v1, s[2:3] +; GFX10-SDAG-NEXT: v_cvt_f32_f64_e32 v0, s[0:1] +; GFX10-SDAG-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 +; GFX10-SDAG-NEXT: s_mov_b32 s3, 0x31016000 +; GFX10-SDAG-NEXT: s_mov_b32 s2, -1 +; GFX10-SDAG-NEXT: s_waitcnt lgkmcnt(0) +; GFX10-SDAG-NEXT: buffer_store_dwordx2 v[0:1], off, s[0:3], 0 +; GFX10-SDAG-NEXT: s_endpgm +; +; GFX10-GISEL-LABEL: fptrunc_v2f64_to_v2f32_afn: +; GFX10-GISEL: ; %bb.0: +; GFX10-GISEL-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x34 +; GFX10-GISEL-NEXT: s_waitcnt lgkmcnt(0) +; GFX10-GISEL-NEXT: v_cvt_f32_f64_e32 v0, s[0:1] +; GFX10-GISEL-NEXT: v_cvt_f32_f64_e32 v1, s[2:3] +; GFX10-GISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 +; GFX10-GISEL-NEXT: s_mov_b32 s2, -1 +; GFX10-GISEL-NEXT: s_mov_b32 s3, 0x31016000 +; GFX10-GISEL-NEXT: s_waitcnt lgkmcnt(0) +; GFX10-GISEL-NEXT: buffer_store_dwordx2 v[0:1], off, s[0:3], 0 +; GFX10-GISEL-NEXT: s_endpgm +; +; GFX11-SDAG-LABEL: fptrunc_v2f64_to_v2f32_afn: +; GFX11-SDAG: ; %bb.0: +; GFX11-SDAG-NEXT: s_load_b128 s[0:3], s[4:5], 0x34 +; GFX11-SDAG-NEXT: s_waitcnt lgkmcnt(0) +; GFX11-SDAG-NEXT: v_cvt_f32_f64_e32 v1, s[2:3] +; GFX11-SDAG-NEXT: v_cvt_f32_f64_e32 v0, s[0:1] +; GFX11-SDAG-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 +; GFX11-SDAG-NEXT: s_mov_b32 s3, 0x31016000 +; GFX11-SDAG-NEXT: s_mov_b32 s2, -1 +; GFX11-SDAG-NEXT: s_waitcnt lgkmcnt(0) +; GFX11-SDAG-NEXT: buffer_store_b64 v[0:1], off, s[0:3], 0 +; GFX11-SDAG-NEXT: s_endpgm +; +; GFX11-GISEL-LABEL: fptrunc_v2f64_to_v2f32_afn: +; GFX11-GISEL: ; %bb.0: +; GFX11-GISEL-NEXT: s_load_b128 s[0:3], s[4:5], 0x34 +; GFX11-GISEL-NEXT: s_waitcnt lgkmcnt(0) +; GFX11-GISEL-NEXT: v_cvt_f32_f64_e32 v0, s[0:1] +; GFX11-GISEL-NEXT: v_cvt_f32_f64_e32 v1, s[2:3] +; GFX11-GISEL-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 +; GFX11-GISEL-NEXT: s_mov_b32 s2, -1 +; GFX11-GISEL-NEXT: s_mov_b32 s3, 0x31016000 +; GFX11-GISEL-NEXT: s_waitcnt lgkmcnt(0) +; GFX11-GISEL-NEXT: buffer_store_b64 v[0:1], off, s[0:3], 0 +; GFX11-GISEL-NEXT: s_endpgm + %result = fptrunc afn <2 x double> %in to <2 x float> + store <2 x float> %result, ptr addrspace(1) %out + ret void +} + define amdgpu_kernel void @fptrunc_v3f64_to_v3f32(ptr addrspace(1) %out, <3 x double> %in) { ; SI-LABEL: fptrunc_v3f64_to_v3f32: ; SI: ; %bb.0: @@ -769,6 +1223,113 @@ define amdgpu_kernel void @fptrunc_v3f64_to_v3f32(ptr addrspace(1) %out, <3 x do ret void } +define amdgpu_kernel void @fptrunc_v3f64_to_v3f32_afn(ptr addrspace(1) %out, <3 x double> %in) { +; SI-LABEL: fptrunc_v3f64_to_v3f32_afn: +; SI: ; %bb.0: +; SI-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x9 +; SI-NEXT: s_load_dwordx4 s[8:11], s[4:5], 0x11 +; SI-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x15 +; SI-NEXT: s_mov_b32 s3, 0xf000 +; SI-NEXT: s_mov_b32 s2, -1 +; SI-NEXT: s_waitcnt lgkmcnt(0) +; SI-NEXT: v_cvt_f32_f64_e32 v1, s[10:11] +; SI-NEXT: v_cvt_f32_f64_e32 v0, s[8:9] +; SI-NEXT: v_cvt_f32_f64_e32 v2, s[4:5] +; SI-NEXT: buffer_store_dword v2, off, s[0:3], 0 offset:8 +; SI-NEXT: buffer_store_dwordx2 v[0:1], off, s[0:3], 0 +; SI-NEXT: s_endpgm +; +; VI-SDAG-LABEL: fptrunc_v3f64_to_v3f32_afn: +; VI-SDAG: ; %bb.0: +; VI-SDAG-NEXT: s_load_dwordx2 s[6:7], s[4:5], 0x54 +; VI-SDAG-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x44 +; VI-SDAG-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x24 +; VI-SDAG-NEXT: s_waitcnt lgkmcnt(0) +; VI-SDAG-NEXT: v_cvt_f32_f64_e32 v2, s[6:7] +; VI-SDAG-NEXT: v_cvt_f32_f64_e32 v1, s[2:3] +; VI-SDAG-NEXT: v_cvt_f32_f64_e32 v0, s[0:1] +; VI-SDAG-NEXT: s_mov_b32 s7, 0xf000 +; VI-SDAG-NEXT: s_mov_b32 s6, -1 +; VI-SDAG-NEXT: buffer_store_dwordx3 v[0:2], off, s[4:7], 0 +; VI-SDAG-NEXT: s_endpgm +; +; VI-GISEL-LABEL: fptrunc_v3f64_to_v3f32_afn: +; VI-GISEL: ; %bb.0: +; VI-GISEL-NEXT: s_load_dwordx8 s[8:15], s[4:5], 0x44 +; VI-GISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 +; VI-GISEL-NEXT: s_mov_b32 s2, -1 +; VI-GISEL-NEXT: s_mov_b32 s3, 0xf000 +; VI-GISEL-NEXT: s_waitcnt lgkmcnt(0) +; VI-GISEL-NEXT: v_cvt_f32_f64_e32 v0, s[8:9] +; VI-GISEL-NEXT: v_cvt_f32_f64_e32 v1, s[10:11] +; VI-GISEL-NEXT: v_cvt_f32_f64_e32 v2, s[12:13] +; VI-GISEL-NEXT: buffer_store_dwordx3 v[0:2], off, s[0:3], 0 +; VI-GISEL-NEXT: s_endpgm +; +; GFX10-SDAG-LABEL: fptrunc_v3f64_to_v3f32_afn: +; GFX10-SDAG: ; %bb.0: +; GFX10-SDAG-NEXT: s_clause 0x1 +; GFX10-SDAG-NEXT: s_load_dwordx2 s[6:7], s[4:5], 0x54 +; GFX10-SDAG-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x44 +; GFX10-SDAG-NEXT: s_waitcnt lgkmcnt(0) +; GFX10-SDAG-NEXT: v_cvt_f32_f64_e32 v2, s[6:7] +; GFX10-SDAG-NEXT: v_cvt_f32_f64_e32 v1, s[2:3] +; GFX10-SDAG-NEXT: v_cvt_f32_f64_e32 v0, s[0:1] +; GFX10-SDAG-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 +; GFX10-SDAG-NEXT: s_mov_b32 s3, 0x31016000 +; GFX10-SDAG-NEXT: s_mov_b32 s2, -1 +; GFX10-SDAG-NEXT: s_waitcnt lgkmcnt(0) +; GFX10-SDAG-NEXT: buffer_store_dwordx3 v[0:2], off, s[0:3], 0 +; GFX10-SDAG-NEXT: s_endpgm +; +; GFX10-GISEL-LABEL: fptrunc_v3f64_to_v3f32_afn: +; GFX10-GISEL: ; %bb.0: +; GFX10-GISEL-NEXT: s_clause 0x1 +; GFX10-GISEL-NEXT: s_load_dwordx8 s[8:15], s[4:5], 0x44 +; GFX10-GISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 +; GFX10-GISEL-NEXT: s_mov_b32 s2, -1 +; GFX10-GISEL-NEXT: s_mov_b32 s3, 0x31016000 +; GFX10-GISEL-NEXT: s_waitcnt lgkmcnt(0) +; GFX10-GISEL-NEXT: v_cvt_f32_f64_e32 v0, s[8:9] +; GFX10-GISEL-NEXT: v_cvt_f32_f64_e32 v1, s[10:11] +; GFX10-GISEL-NEXT: v_cvt_f32_f64_e32 v2, s[12:13] +; GFX10-GISEL-NEXT: buffer_store_dwordx3 v[0:2], off, s[0:3], 0 +; GFX10-GISEL-NEXT: s_endpgm +; +; GFX11-SDAG-LABEL: fptrunc_v3f64_to_v3f32_afn: +; GFX11-SDAG: ; %bb.0: +; GFX11-SDAG-NEXT: s_clause 0x1 +; GFX11-SDAG-NEXT: s_load_b64 s[6:7], s[4:5], 0x54 +; GFX11-SDAG-NEXT: s_load_b128 s[0:3], s[4:5], 0x44 +; GFX11-SDAG-NEXT: s_waitcnt lgkmcnt(0) +; GFX11-SDAG-NEXT: v_cvt_f32_f64_e32 v2, s[6:7] +; GFX11-SDAG-NEXT: v_cvt_f32_f64_e32 v1, s[2:3] +; GFX11-SDAG-NEXT: v_cvt_f32_f64_e32 v0, s[0:1] +; GFX11-SDAG-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 +; GFX11-SDAG-NEXT: s_mov_b32 s3, 0x31016000 +; GFX11-SDAG-NEXT: s_mov_b32 s2, -1 +; GFX11-SDAG-NEXT: s_waitcnt lgkmcnt(0) +; GFX11-SDAG-NEXT: buffer_store_b96 v[0:2], off, s[0:3], 0 +; GFX11-SDAG-NEXT: s_endpgm +; +; GFX11-GISEL-LABEL: fptrunc_v3f64_to_v3f32_afn: +; GFX11-GISEL: ; %bb.0: +; GFX11-GISEL-NEXT: s_clause 0x1 +; GFX11-GISEL-NEXT: s_load_b256 s[8:15], s[4:5], 0x44 +; GFX11-GISEL-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 +; GFX11-GISEL-NEXT: s_mov_b32 s2, -1 +; GFX11-GISEL-NEXT: s_mov_b32 s3, 0x31016000 +; GFX11-GISEL-NEXT: s_waitcnt lgkmcnt(0) +; GFX11-GISEL-NEXT: v_cvt_f32_f64_e32 v0, s[8:9] +; GFX11-GISEL-NEXT: v_cvt_f32_f64_e32 v1, s[10:11] +; GFX11-GISEL-NEXT: v_cvt_f32_f64_e32 v2, s[12:13] +; GFX11-GISEL-NEXT: buffer_store_b96 v[0:2], off, s[0:3], 0 +; GFX11-GISEL-NEXT: s_endpgm + %result = fptrunc afn <3 x double> %in to <3 x float> + store <3 x float> %result, ptr addrspace(1) %out + ret void +} + define amdgpu_kernel void @fptrunc_v4f64_to_v4f32(ptr addrspace(1) %out, <4 x double> %in) { ; SI-LABEL: fptrunc_v4f64_to_v4f32: ; SI: ; %bb.0: @@ -876,6 +1437,113 @@ define amdgpu_kernel void @fptrunc_v4f64_to_v4f32(ptr addrspace(1) %out, <4 x do ret void } +define amdgpu_kernel void @fptrunc_v4f64_to_v4f32_afn(ptr addrspace(1) %out, <4 x double> %in) { +; SI-LABEL: fptrunc_v4f64_to_v4f32_afn: +; SI: ; %bb.0: +; SI-NEXT: s_load_dwordx8 s[8:15], s[4:5], 0x11 +; SI-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x9 +; SI-NEXT: s_mov_b32 s3, 0xf000 +; SI-NEXT: s_mov_b32 s2, -1 +; SI-NEXT: s_waitcnt lgkmcnt(0) +; SI-NEXT: v_cvt_f32_f64_e32 v3, s[14:15] +; SI-NEXT: v_cvt_f32_f64_e32 v2, s[12:13] +; SI-NEXT: v_cvt_f32_f64_e32 v1, s[10:11] +; SI-NEXT: v_cvt_f32_f64_e32 v0, s[8:9] +; SI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 +; SI-NEXT: s_endpgm +; +; VI-SDAG-LABEL: fptrunc_v4f64_to_v4f32_afn: +; VI-SDAG: ; %bb.0: +; VI-SDAG-NEXT: s_load_dwordx8 s[8:15], s[4:5], 0x44 +; VI-SDAG-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 +; VI-SDAG-NEXT: s_mov_b32 s3, 0xf000 +; VI-SDAG-NEXT: s_mov_b32 s2, -1 +; VI-SDAG-NEXT: s_waitcnt lgkmcnt(0) +; VI-SDAG-NEXT: v_cvt_f32_f64_e32 v3, s[14:15] +; VI-SDAG-NEXT: v_cvt_f32_f64_e32 v2, s[12:13] +; VI-SDAG-NEXT: v_cvt_f32_f64_e32 v1, s[10:11] +; VI-SDAG-NEXT: v_cvt_f32_f64_e32 v0, s[8:9] +; VI-SDAG-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 +; VI-SDAG-NEXT: s_endpgm +; +; VI-GISEL-LABEL: fptrunc_v4f64_to_v4f32_afn: +; VI-GISEL: ; %bb.0: +; VI-GISEL-NEXT: s_load_dwordx8 s[8:15], s[4:5], 0x44 +; VI-GISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 +; VI-GISEL-NEXT: s_mov_b32 s2, -1 +; VI-GISEL-NEXT: s_mov_b32 s3, 0xf000 +; VI-GISEL-NEXT: s_waitcnt lgkmcnt(0) +; VI-GISEL-NEXT: v_cvt_f32_f64_e32 v0, s[8:9] +; VI-GISEL-NEXT: v_cvt_f32_f64_e32 v1, s[10:11] +; VI-GISEL-NEXT: v_cvt_f32_f64_e32 v2, s[12:13] +; VI-GISEL-NEXT: v_cvt_f32_f64_e32 v3, s[14:15] +; VI-GISEL-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 +; VI-GISEL-NEXT: s_endpgm +; +; GFX10-SDAG-LABEL: fptrunc_v4f64_to_v4f32_afn: +; GFX10-SDAG: ; %bb.0: +; GFX10-SDAG-NEXT: s_clause 0x1 +; GFX10-SDAG-NEXT: s_load_dwordx8 s[8:15], s[4:5], 0x44 +; GFX10-SDAG-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 +; GFX10-SDAG-NEXT: s_mov_b32 s3, 0x31016000 +; GFX10-SDAG-NEXT: s_mov_b32 s2, -1 +; GFX10-SDAG-NEXT: s_waitcnt lgkmcnt(0) +; GFX10-SDAG-NEXT: v_cvt_f32_f64_e32 v3, s[14:15] +; GFX10-SDAG-NEXT: v_cvt_f32_f64_e32 v2, s[12:13] +; GFX10-SDAG-NEXT: v_cvt_f32_f64_e32 v1, s[10:11] +; GFX10-SDAG-NEXT: v_cvt_f32_f64_e32 v0, s[8:9] +; GFX10-SDAG-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 +; GFX10-SDAG-NEXT: s_endpgm +; +; GFX10-GISEL-LABEL: fptrunc_v4f64_to_v4f32_afn: +; GFX10-GISEL: ; %bb.0: +; GFX10-GISEL-NEXT: s_clause 0x1 +; GFX10-GISEL-NEXT: s_load_dwordx8 s[8:15], s[4:5], 0x44 +; GFX10-GISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 +; GFX10-GISEL-NEXT: s_mov_b32 s2, -1 +; GFX10-GISEL-NEXT: s_mov_b32 s3, 0x31016000 +; GFX10-GISEL-NEXT: s_waitcnt lgkmcnt(0) +; GFX10-GISEL-NEXT: v_cvt_f32_f64_e32 v0, s[8:9] +; GFX10-GISEL-NEXT: v_cvt_f32_f64_e32 v1, s[10:11] +; GFX10-GISEL-NEXT: v_cvt_f32_f64_e32 v2, s[12:13] +; GFX10-GISEL-NEXT: v_cvt_f32_f64_e32 v3, s[14:15] +; GFX10-GISEL-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 +; GFX10-GISEL-NEXT: s_endpgm +; +; GFX11-SDAG-LABEL: fptrunc_v4f64_to_v4f32_afn: +; GFX11-SDAG: ; %bb.0: +; GFX11-SDAG-NEXT: s_clause 0x1 +; GFX11-SDAG-NEXT: s_load_b256 s[8:15], s[4:5], 0x44 +; GFX11-SDAG-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 +; GFX11-SDAG-NEXT: s_mov_b32 s3, 0x31016000 +; GFX11-SDAG-NEXT: s_mov_b32 s2, -1 +; GFX11-SDAG-NEXT: s_waitcnt lgkmcnt(0) +; GFX11-SDAG-NEXT: v_cvt_f32_f64_e32 v3, s[14:15] +; GFX11-SDAG-NEXT: v_cvt_f32_f64_e32 v2, s[12:13] +; GFX11-SDAG-NEXT: v_cvt_f32_f64_e32 v1, s[10:11] +; GFX11-SDAG-NEXT: v_cvt_f32_f64_e32 v0, s[8:9] +; GFX11-SDAG-NEXT: buffer_store_b128 v[0:3], off, s[0:3], 0 +; GFX11-SDAG-NEXT: s_endpgm +; +; GFX11-GISEL-LABEL: fptrunc_v4f64_to_v4f32_afn: +; GFX11-GISEL: ; %bb.0: +; GFX11-GISEL-NEXT: s_clause 0x1 +; GFX11-GISEL-NEXT: s_load_b256 s[8:15], s[4:5], 0x44 +; GFX11-GISEL-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 +; GFX11-GISEL-NEXT: s_mov_b32 s2, -1 +; GFX11-GISEL-NEXT: s_mov_b32 s3, 0x31016000 +; GFX11-GISEL-NEXT: s_waitcnt lgkmcnt(0) +; GFX11-GISEL-NEXT: v_cvt_f32_f64_e32 v0, s[8:9] +; GFX11-GISEL-NEXT: v_cvt_f32_f64_e32 v1, s[10:11] +; GFX11-GISEL-NEXT: v_cvt_f32_f64_e32 v2, s[12:13] +; GFX11-GISEL-NEXT: v_cvt_f32_f64_e32 v3, s[14:15] +; GFX11-GISEL-NEXT: buffer_store_b128 v[0:3], off, s[0:3], 0 +; GFX11-GISEL-NEXT: s_endpgm + %result = fptrunc afn <4 x double> %in to <4 x float> + store <4 x float> %result, ptr addrspace(1) %out + ret void +} + define amdgpu_kernel void @fptrunc_v8f64_to_v8f32(ptr addrspace(1) %out, <8 x double> %in) { ; SI-LABEL: fptrunc_v8f64_to_v8f32: ; SI: ; %bb.0: @@ -1019,3 +1687,150 @@ define amdgpu_kernel void @fptrunc_v8f64_to_v8f32(ptr addrspace(1) %out, <8 x do store <8 x float> %result, ptr addrspace(1) %out ret void } + +define amdgpu_kernel void @fptrunc_v8f64_to_v8f32_afn(ptr addrspace(1) %out, <8 x double> %in) { +; SI-LABEL: fptrunc_v8f64_to_v8f32_afn: +; SI: ; %bb.0: +; SI-NEXT: s_load_dwordx16 s[8:23], s[4:5], 0x19 +; SI-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x9 +; SI-NEXT: s_mov_b32 s3, 0xf000 +; SI-NEXT: s_mov_b32 s2, -1 +; SI-NEXT: s_waitcnt lgkmcnt(0) +; SI-NEXT: v_cvt_f32_f64_e32 v3, s[14:15] +; SI-NEXT: v_cvt_f32_f64_e32 v2, s[12:13] +; SI-NEXT: v_cvt_f32_f64_e32 v1, s[10:11] +; SI-NEXT: v_cvt_f32_f64_e32 v0, s[8:9] +; SI-NEXT: v_cvt_f32_f64_e32 v7, s[22:23] +; SI-NEXT: v_cvt_f32_f64_e32 v6, s[20:21] +; SI-NEXT: v_cvt_f32_f64_e32 v5, s[18:19] +; SI-NEXT: v_cvt_f32_f64_e32 v4, s[16:17] +; SI-NEXT: buffer_store_dwordx4 v[4:7], off, s[0:3], 0 offset:16 +; SI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 +; SI-NEXT: s_endpgm +; +; VI-SDAG-LABEL: fptrunc_v8f64_to_v8f32_afn: +; VI-SDAG: ; %bb.0: +; VI-SDAG-NEXT: s_load_dwordx16 s[8:23], s[4:5], 0x64 +; VI-SDAG-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 +; VI-SDAG-NEXT: s_mov_b32 s3, 0xf000 +; VI-SDAG-NEXT: s_mov_b32 s2, -1 +; VI-SDAG-NEXT: s_waitcnt lgkmcnt(0) +; VI-SDAG-NEXT: v_cvt_f32_f64_e32 v7, s[22:23] +; VI-SDAG-NEXT: v_cvt_f32_f64_e32 v6, s[20:21] +; VI-SDAG-NEXT: v_cvt_f32_f64_e32 v5, s[18:19] +; VI-SDAG-NEXT: v_cvt_f32_f64_e32 v4, s[16:17] +; VI-SDAG-NEXT: v_cvt_f32_f64_e32 v3, s[14:15] +; VI-SDAG-NEXT: v_cvt_f32_f64_e32 v2, s[12:13] +; VI-SDAG-NEXT: v_cvt_f32_f64_e32 v1, s[10:11] +; VI-SDAG-NEXT: v_cvt_f32_f64_e32 v0, s[8:9] +; VI-SDAG-NEXT: buffer_store_dwordx4 v[4:7], off, s[0:3], 0 offset:16 +; VI-SDAG-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 +; VI-SDAG-NEXT: s_endpgm +; +; VI-GISEL-LABEL: fptrunc_v8f64_to_v8f32_afn: +; VI-GISEL: ; %bb.0: +; VI-GISEL-NEXT: s_load_dwordx16 s[8:23], s[4:5], 0x64 +; VI-GISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 +; VI-GISEL-NEXT: s_mov_b32 s2, -1 +; VI-GISEL-NEXT: s_mov_b32 s3, 0xf000 +; VI-GISEL-NEXT: s_waitcnt lgkmcnt(0) +; VI-GISEL-NEXT: v_cvt_f32_f64_e32 v0, s[8:9] +; VI-GISEL-NEXT: v_cvt_f32_f64_e32 v1, s[10:11] +; VI-GISEL-NEXT: v_cvt_f32_f64_e32 v2, s[12:13] +; VI-GISEL-NEXT: v_cvt_f32_f64_e32 v3, s[14:15] +; VI-GISEL-NEXT: v_cvt_f32_f64_e32 v4, s[16:17] +; VI-GISEL-NEXT: v_cvt_f32_f64_e32 v5, s[18:19] +; VI-GISEL-NEXT: v_cvt_f32_f64_e32 v6, s[20:21] +; VI-GISEL-NEXT: v_cvt_f32_f64_e32 v7, s[22:23] +; VI-GISEL-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 +; VI-GISEL-NEXT: buffer_store_dwordx4 v[4:7], off, s[0:3], 0 offset:16 +; VI-GISEL-NEXT: s_endpgm +; +; GFX10-SDAG-LABEL: fptrunc_v8f64_to_v8f32_afn: +; GFX10-SDAG: ; %bb.0: +; GFX10-SDAG-NEXT: s_clause 0x1 +; GFX10-SDAG-NEXT: s_load_dwordx16 s[8:23], s[4:5], 0x64 +; GFX10-SDAG-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 +; GFX10-SDAG-NEXT: s_mov_b32 s3, 0x31016000 +; GFX10-SDAG-NEXT: s_mov_b32 s2, -1 +; GFX10-SDAG-NEXT: s_waitcnt lgkmcnt(0) +; GFX10-SDAG-NEXT: v_cvt_f32_f64_e32 v7, s[22:23] +; GFX10-SDAG-NEXT: v_cvt_f32_f64_e32 v6, s[20:21] +; GFX10-SDAG-NEXT: v_cvt_f32_f64_e32 v5, s[18:19] +; GFX10-SDAG-NEXT: v_cvt_f32_f64_e32 v4, s[16:17] +; GFX10-SDAG-NEXT: v_cvt_f32_f64_e32 v3, s[14:15] +; GFX10-SDAG-NEXT: v_cvt_f32_f64_e32 v2, s[12:13] +; GFX10-SDAG-NEXT: v_cvt_f32_f64_e32 v1, s[10:11] +; GFX10-SDAG-NEXT: v_cvt_f32_f64_e32 v0, s[8:9] +; GFX10-SDAG-NEXT: buffer_store_dwordx4 v[4:7], off, s[0:3], 0 offset:16 +; GFX10-SDAG-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 +; GFX10-SDAG-NEXT: s_endpgm +; +; GFX10-GISEL-LABEL: fptrunc_v8f64_to_v8f32_afn: +; GFX10-GISEL: ; %bb.0: +; GFX10-GISEL-NEXT: s_clause 0x1 +; GFX10-GISEL-NEXT: s_load_dwordx16 s[8:23], s[4:5], 0x64 +; GFX10-GISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 +; GFX10-GISEL-NEXT: s_mov_b32 s2, -1 +; GFX10-GISEL-NEXT: s_mov_b32 s3, 0x31016000 +; GFX10-GISEL-NEXT: s_waitcnt lgkmcnt(0) +; GFX10-GISEL-NEXT: v_cvt_f32_f64_e32 v0, s[8:9] +; GFX10-GISEL-NEXT: v_cvt_f32_f64_e32 v1, s[10:11] +; GFX10-GISEL-NEXT: v_cvt_f32_f64_e32 v2, s[12:13] +; GFX10-GISEL-NEXT: v_cvt_f32_f64_e32 v3, s[14:15] +; GFX10-GISEL-NEXT: v_cvt_f32_f64_e32 v4, s[16:17] +; GFX10-GISEL-NEXT: v_cvt_f32_f64_e32 v5, s[18:19] +; GFX10-GISEL-NEXT: v_cvt_f32_f64_e32 v6, s[20:21] +; GFX10-GISEL-NEXT: v_cvt_f32_f64_e32 v7, s[22:23] +; GFX10-GISEL-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 +; GFX10-GISEL-NEXT: buffer_store_dwordx4 v[4:7], off, s[0:3], 0 offset:16 +; GFX10-GISEL-NEXT: s_endpgm +; +; GFX11-SDAG-LABEL: fptrunc_v8f64_to_v8f32_afn: +; GFX11-SDAG: ; %bb.0: +; GFX11-SDAG-NEXT: s_clause 0x1 +; GFX11-SDAG-NEXT: s_load_b512 s[8:23], s[4:5], 0x64 +; GFX11-SDAG-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 +; GFX11-SDAG-NEXT: s_mov_b32 s3, 0x31016000 +; GFX11-SDAG-NEXT: s_mov_b32 s2, -1 +; GFX11-SDAG-NEXT: s_waitcnt lgkmcnt(0) +; GFX11-SDAG-NEXT: v_cvt_f32_f64_e32 v7, s[22:23] +; GFX11-SDAG-NEXT: v_cvt_f32_f64_e32 v6, s[20:21] +; GFX11-SDAG-NEXT: v_cvt_f32_f64_e32 v5, s[18:19] +; GFX11-SDAG-NEXT: v_cvt_f32_f64_e32 v4, s[16:17] +; GFX11-SDAG-NEXT: v_cvt_f32_f64_e32 v3, s[14:15] +; GFX11-SDAG-NEXT: v_cvt_f32_f64_e32 v2, s[12:13] +; GFX11-SDAG-NEXT: v_cvt_f32_f64_e32 v1, s[10:11] +; GFX11-SDAG-NEXT: v_cvt_f32_f64_e32 v0, s[8:9] +; GFX11-SDAG-NEXT: s_clause 0x1 +; GFX11-SDAG-NEXT: buffer_store_b128 v[4:7], off, s[0:3], 0 offset:16 +; GFX11-SDAG-NEXT: buffer_store_b128 v[0:3], off, s[0:3], 0 +; GFX11-SDAG-NEXT: s_endpgm +; +; GFX11-GISEL-LABEL: fptrunc_v8f64_to_v8f32_afn: +; GFX11-GISEL: ; %bb.0: +; GFX11-GISEL-NEXT: s_clause 0x1 +; GFX11-GISEL-NEXT: s_load_b512 s[8:23], s[4:5], 0x64 +; GFX11-GISEL-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 +; GFX11-GISEL-NEXT: s_mov_b32 s2, -1 +; GFX11-GISEL-NEXT: s_mov_b32 s3, 0x31016000 +; GFX11-GISEL-NEXT: s_waitcnt lgkmcnt(0) +; GFX11-GISEL-NEXT: v_cvt_f32_f64_e32 v0, s[8:9] +; GFX11-GISEL-NEXT: v_cvt_f32_f64_e32 v1, s[10:11] +; GFX11-GISEL-NEXT: v_cvt_f32_f64_e32 v2, s[12:13] +; GFX11-GISEL-NEXT: v_cvt_f32_f64_e32 v3, s[14:15] +; GFX11-GISEL-NEXT: v_cvt_f32_f64_e32 v4, s[16:17] +; GFX11-GISEL-NEXT: v_cvt_f32_f64_e32 v5, s[18:19] +; GFX11-GISEL-NEXT: v_cvt_f32_f64_e32 v6, s[20:21] +; GFX11-GISEL-NEXT: v_cvt_f32_f64_e32 v7, s[22:23] +; GFX11-GISEL-NEXT: s_clause 0x1 +; GFX11-GISEL-NEXT: buffer_store_b128 v[0:3], off, s[0:3], 0 +; GFX11-GISEL-NEXT: buffer_store_b128 v[4:7], off, s[0:3], 0 offset:16 +; GFX11-GISEL-NEXT: s_endpgm + %result = fptrunc <8 x double> %in to <8 x float> + store <8 x float> %result, ptr addrspace(1) %out + ret void +} +;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line: +; GFX10-SAFE-GISEL: {{.*}} +; VI-SAFE-GISEL: {{.*}}