From efd20a3603a03d65baa7b2c7c52dfac097026032 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Mirko=20Brku=C5=A1anin?= Date: Thu, 12 Mar 2026 12:32:56 +0100 Subject: [PATCH] [AMDGPU] Codegen for min/max instructions for gfx1170 (#185625) gfx1170 does not have s_minimum/maximum_f16/f32 instructions so a new feature `SALUMinimumMaximumInsts` is added for gfx12+ subtargets. --- llvm/lib/Target/AMDGPU/AMDGPU.td | 18 +- .../AMDGPU/AMDGPURegBankLegalizeRules.cpp | 8 +- .../Target/AMDGPU/AMDGPURegisterBankInfo.cpp | 11 +- llvm/lib/Target/AMDGPU/SIInstructions.td | 8 +- .../Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp | 2 + .../GlobalISel/clamp-fmed3-const-combine.ll | 59 + .../GlobalISel/clamp-minmax-const-combine.ll | 107 ++ .../GlobalISel/fmed3-min-max-const-combine.ll | 333 +++-- .../GlobalISel/llvm.amdgcn.rsq.clamp.ll | 89 ++ llvm/test/CodeGen/AMDGPU/fmax3.ll | 191 +++ llvm/test/CodeGen/AMDGPU/fmaximum.ll | 355 +++++ llvm/test/CodeGen/AMDGPU/fmaximum3.ll | 811 +++++++++++ llvm/test/CodeGen/AMDGPU/fmin3.ll | 257 ++++ llvm/test/CodeGen/AMDGPU/fminimum.ll | 355 +++++ llvm/test/CodeGen/AMDGPU/fminimum3.ll | 811 +++++++++++ llvm/test/CodeGen/AMDGPU/llvm.maximum.f16.ll | 235 +++- llvm/test/CodeGen/AMDGPU/llvm.maximum.f32.ll | 209 ++- llvm/test/CodeGen/AMDGPU/llvm.maximum.f64.ll | 261 +++- llvm/test/CodeGen/AMDGPU/llvm.minimum.f16.ll | 236 +++- llvm/test/CodeGen/AMDGPU/llvm.minimum.f32.ll | 209 ++- llvm/test/CodeGen/AMDGPU/llvm.minimum.f64.ll | 261 +++- llvm/test/CodeGen/AMDGPU/maximumnum.ll | 1240 +++++++++++++++++ llvm/test/CodeGen/AMDGPU/minimummaximum.ll | 362 +++-- llvm/test/CodeGen/AMDGPU/minimumnum.ll | 1204 ++++++++++++++++ llvm/test/CodeGen/AMDGPU/minmax.ll | 363 +++++ .../AMDGPU/select-flags-to-fmin-fmax.ll | 626 +++++++++ .../test/CodeGen/AMDGPU/vector-reduce-fmax.ll | 678 +++++++++ .../CodeGen/AMDGPU/vector-reduce-fmaximum.ll | 224 +++ .../test/CodeGen/AMDGPU/vector-reduce-fmin.ll | 678 +++++++++ .../CodeGen/AMDGPU/vector-reduce-fminimum.ll | 464 ++++++ 30 files changed, 10350 insertions(+), 315 deletions(-) diff --git a/llvm/lib/Target/AMDGPU/AMDGPU.td b/llvm/lib/Target/AMDGPU/AMDGPU.td index bef02486f615..616effeb5b9f 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPU.td +++ b/llvm/lib/Target/AMDGPU/AMDGPU.td @@ -186,6 +186,10 @@ defm IEEEMinimumMaximumInsts : AMDGPUSubtargetFeature<"ieee-minimum-maximum-inst "v_pk_minimum/maximum_f16 instructions" >; +defm SALUMinimumMaximumInsts : AMDGPUSubtargetFeature<"salu-minimum-maximum-insts", + "Has s_minimum/maximum_f16/f32 instructions" +>; + defm Minimum3Maximum3F32 : AMDGPUSubtargetFeature<"minimum3-maximum3-f32", "Has v_minimum3_f32 and v_maximum3_f32 instructions" >; @@ -1488,9 +1492,10 @@ def FeatureGFX12 : GCNSubtargetFeatureGeneration<"GFX12", FeatureUnalignedDSAccess, FeatureTrue16BitInsts, FeatureDefaultComponentBroadcast, FeatureMaxHardClauseLength32, FeatureAtomicFMinFMaxF32GlobalInsts, FeatureAtomicFMinFMaxF32FlatInsts, - FeatureIEEEMinimumMaximumInsts, FeatureMinimum3Maximum3F32, - FeatureMinimum3Maximum3F16, FeatureAgentScopeFineGrainedRemoteMemoryAtomics, - FeatureFlatOffsetBits24, FeatureFlatSignedOffset, FeatureInstCacheLineSize128 + FeatureIEEEMinimumMaximumInsts, FeatureSALUMinimumMaximumInsts, + FeatureMinimum3Maximum3F32, FeatureMinimum3Maximum3F16, + FeatureAgentScopeFineGrainedRemoteMemoryAtomics, FeatureFlatOffsetBits24, + FeatureFlatSignedOffset, FeatureInstCacheLineSize128 ] >; @@ -1513,9 +1518,10 @@ def FeatureGFX13 : GCNSubtargetFeatureGeneration<"GFX13", FeatureUnalignedDSAccess, FeatureTrue16BitInsts, FeatureDefaultComponentBroadcast, FeatureMaxHardClauseLength32, FeatureAtomicFMinFMaxF32GlobalInsts, FeatureAtomicFMinFMaxF32FlatInsts, - FeatureIEEEMinimumMaximumInsts, FeatureMinimum3Maximum3F32, - FeatureMinimum3Maximum3F16, FeatureAgentScopeFineGrainedRemoteMemoryAtomics, - FeatureFlatOffsetBits24, FeatureFlatSignedOffset, FeatureInstCacheLineSize128 + FeatureIEEEMinimumMaximumInsts, FeatureSALUMinimumMaximumInsts, + FeatureMinimum3Maximum3F32, FeatureMinimum3Maximum3F16, + FeatureAgentScopeFineGrainedRemoteMemoryAtomics, FeatureFlatOffsetBits24, + FeatureFlatSignedOffset, FeatureInstCacheLineSize128 ] >; //===----------------------------------------------------------------------===// diff --git a/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeRules.cpp b/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeRules.cpp index fb1680a97cdb..acb9bfb703e2 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeRules.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeRules.cpp @@ -1351,10 +1351,14 @@ RegBankLegalizeRules::RegBankLegalizeRules(const GCNSubtarget &_ST, .Uni(S32, {{UniInVgprS32}, {Vgpr32, Vgpr32}}) .Div(S32, {{Vgpr32}, {Vgpr32, Vgpr32}}); + bool hasSALUMinimumMaximumInsts = ST->hasSALUMinimumMaximumInsts(); + addRulesForGOpcs({G_FMINIMUM, G_FMAXIMUM}, Standard) - .Uni(S16, {{Sgpr16}, {Sgpr16, Sgpr16}}) + .Uni(S16, {{Sgpr16}, {Sgpr16, Sgpr16}}, hasSALUMinimumMaximumInsts) + .Uni(S16, {{UniInVgprS16}, {Vgpr16, Vgpr16}}, !hasSALUMinimumMaximumInsts) .Div(S16, {{Vgpr16}, {Vgpr16, Vgpr16}}) - .Uni(S32, {{Sgpr32}, {Sgpr32, Sgpr32}}) + .Uni(S32, {{Sgpr32}, {Sgpr32, Sgpr32}}, hasSALUMinimumMaximumInsts) + .Uni(S32, {{UniInVgprS32}, {Vgpr32, Vgpr32}}, !hasSALUMinimumMaximumInsts) .Div(S32, {{Vgpr32}, {Vgpr32, Vgpr32}}) .Uni(S64, {{UniInVgprS64}, {Vgpr64, Vgpr64}}) .Div(S64, {{Vgpr64}, {Vgpr64, Vgpr64}}) diff --git a/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp b/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp index 7e047278fe78..f14cc8e0446d 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp @@ -4075,8 +4075,6 @@ AMDGPURegisterBankInfo::getInstrMapping(const MachineInstr &MI) const { case AMDGPU::G_INTRINSIC_ROUNDEVEN: case AMDGPU::G_FMINNUM: case AMDGPU::G_FMAXNUM: - case AMDGPU::G_FMINIMUM: - case AMDGPU::G_FMAXIMUM: case AMDGPU::G_FMINIMUMNUM: case AMDGPU::G_FMAXIMUMNUM: case AMDGPU::G_INTRINSIC_TRUNC: @@ -4091,6 +4089,15 @@ AMDGPURegisterBankInfo::getInstrMapping(const MachineInstr &MI) const { return getDefaultMappingSOP(MI); return getDefaultMappingVOP(MI); } + case AMDGPU::G_FMINIMUM: + case AMDGPU::G_FMAXIMUM: { + LLT Ty = MRI.getType(MI.getOperand(0).getReg()); + unsigned Size = Ty.getSizeInBits(); + if (Subtarget.hasSALUMinimumMaximumInsts() && Ty.isScalar() && + (Size == 32 || Size == 16) && isSALUMapping(MI)) + return getDefaultMappingSOP(MI); + return getDefaultMappingVOP(MI); + } case AMDGPU::G_FPTOSI: case AMDGPU::G_FPTOUI: case AMDGPU::G_FPTOSI_SAT: diff --git a/llvm/lib/Target/AMDGPU/SIInstructions.td b/llvm/lib/Target/AMDGPU/SIInstructions.td index 80bc12762425..a55330c87e68 100644 --- a/llvm/lib/Target/AMDGPU/SIInstructions.td +++ b/llvm/lib/Target/AMDGPU/SIInstructions.td @@ -2034,9 +2034,9 @@ class ClampPat : GCNPat < >; def : ClampPat; -let SubtargetPredicate = isNotGFX12Plus in +let SubtargetPredicate = NotHasIEEEMinimumMaximumInsts in def : ClampPat; -let SubtargetPredicate = isGFX12Plus in +let SubtargetPredicate = HasIEEEMinimumMaximumInsts in def : ClampPat; let SubtargetPredicate = NotHasTrue16BitInsts in def : ClampPat; @@ -3671,13 +3671,13 @@ multiclass SelectCanonicalizeAsMax< def : GCNPat< (fcanonicalize (f64 (VOP3Mods f64:$src, i32:$src_mods))), (V_MAX_F64_e64 $src_mods, $src, $src_mods, $src)> { - let OtherPredicates = !listconcat(f64_preds, [isNotGFX12Plus]); + let OtherPredicates = !listconcat(f64_preds, [NotHasIEEEMinimumMaximumInsts]); } def : GCNPat< (fcanonicalize (f64 (VOP3Mods f64:$src, i32:$src_mods))), (V_MAX_NUM_F64_e64 $src_mods, $src, $src_mods, $src)> { - let OtherPredicates = !listconcat(f64_preds, [isGFX12Plus]); + let OtherPredicates = !listconcat(f64_preds, [HasIEEEMinimumMaximumInsts]); } def : GCNPat< diff --git a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp index df132a0eeeda..9160a42b7b37 100644 --- a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp +++ b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp @@ -641,6 +641,8 @@ unsigned getVOPDEncodingFamily(const MCSubtargetInfo &ST) { return SIEncodingFamily::GFX1250; if (ST.hasFeature(AMDGPU::FeatureGFX12Insts)) return SIEncodingFamily::GFX12; + if (ST.hasFeature(AMDGPU::FeatureGFX11_7Insts)) + return SIEncodingFamily::GFX1170; if (ST.hasFeature(AMDGPU::FeatureGFX11Insts)) return SIEncodingFamily::GFX11; llvm_unreachable("Subtarget generation does not support VOPD!"); diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/clamp-fmed3-const-combine.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/clamp-fmed3-const-combine.ll index 23f5ae4f5900..884630ddbb47 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/clamp-fmed3-const-combine.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/clamp-fmed3-const-combine.ll @@ -1,5 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc -global-isel -new-reg-bank-select -mtriple=amdgcn-amd-mesa3d -mcpu=gfx1010 < %s | FileCheck -check-prefix=GFX10 %s +; RUN: llc -global-isel -new-reg-bank-select -mtriple=amdgcn-amd-mesa3d -mcpu=gfx1170 -mattr=-real-true16 < %s | FileCheck -check-prefix=GFX1170 %s ; RUN: llc -global-isel -new-reg-bank-select -mtriple=amdgcn-amd-mesa3d -mcpu=gfx1200 -mattr=-real-true16 < %s | FileCheck -check-prefix=GFX12 %s define float @test_fmed3_f32_known_nnan_ieee_true(float %a) #0 { @@ -9,6 +10,12 @@ define float @test_fmed3_f32_known_nnan_ieee_true(float %a) #0 { ; GFX10-NEXT: v_mul_f32_e64 v0, v0, 2.0 clamp ; GFX10-NEXT: s_setpc_b64 s[30:31] ; +; GFX1170-LABEL: test_fmed3_f32_known_nnan_ieee_true: +; GFX1170: ; %bb.0: +; GFX1170-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1170-NEXT: v_mul_f32_e64 v0, v0, 2.0 clamp +; GFX1170-NEXT: s_setpc_b64 s[30:31] +; ; GFX12-LABEL: test_fmed3_f32_known_nnan_ieee_true: ; GFX12: ; %bb.0: ; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 @@ -30,6 +37,12 @@ define half @test_fmed3_f16_known_nnan_ieee_false(half %a) #1 { ; GFX10-NEXT: v_mul_f16_e64 v0, v0, 2.0 clamp ; GFX10-NEXT: s_setpc_b64 s[30:31] ; +; GFX1170-LABEL: test_fmed3_f16_known_nnan_ieee_false: +; GFX1170: ; %bb.0: +; GFX1170-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1170-NEXT: v_mul_f16_e64 v0, v0, 2.0 clamp +; GFX1170-NEXT: s_setpc_b64 s[30:31] +; ; GFX12-LABEL: test_fmed3_f16_known_nnan_ieee_false: ; GFX12: ; %bb.0: ; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 @@ -53,6 +66,14 @@ define float @test_fmed3_non_SNaN_input_ieee_true_dx10clamp_true(float %a) #2 { ; GFX10-NEXT: v_min_f32_e64 v0, 0x41200000, v0 clamp ; GFX10-NEXT: s_setpc_b64 s[30:31] ; +; GFX1170-LABEL: test_fmed3_non_SNaN_input_ieee_true_dx10clamp_true: +; GFX1170: ; %bb.0: +; GFX1170-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1170-NEXT: v_max_num_f32_e32 v0, v0, v0 +; GFX1170-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX1170-NEXT: v_min_num_f32_e64 v0, 0x41200000, v0 clamp +; GFX1170-NEXT: s_setpc_b64 s[30:31] +; ; GFX12-LABEL: test_fmed3_non_SNaN_input_ieee_true_dx10clamp_true: ; GFX12: ; %bb.0: ; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 @@ -77,6 +98,12 @@ define float @test_fmed3_maybe_SNaN_input_zero_third_operand_ieee_true_dx10clamp ; GFX10-NEXT: v_mul_f32_e64 v0, v0, 2.0 clamp ; GFX10-NEXT: s_setpc_b64 s[30:31] ; +; GFX1170-LABEL: test_fmed3_maybe_SNaN_input_zero_third_operand_ieee_true_dx10clamp_true: +; GFX1170: ; %bb.0: +; GFX1170-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1170-NEXT: v_mul_f32_e64 v0, v0, 2.0 clamp +; GFX1170-NEXT: s_setpc_b64 s[30:31] +; ; GFX12-LABEL: test_fmed3_maybe_SNaN_input_zero_third_operand_ieee_true_dx10clamp_true: ; GFX12: ; %bb.0: ; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 @@ -100,6 +127,12 @@ define float @test_fmed3_global_nnan(float %a) #3 { ; GFX10-NEXT: v_mul_f32_e64 v0, v0, 2.0 clamp ; GFX10-NEXT: s_setpc_b64 s[30:31] ; +; GFX1170-LABEL: test_fmed3_global_nnan: +; GFX1170: ; %bb.0: +; GFX1170-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1170-NEXT: v_mul_f32_e64 v0, v0, 2.0 clamp +; GFX1170-NEXT: s_setpc_b64 s[30:31] +; ; GFX12-LABEL: test_fmed3_global_nnan: ; GFX12: ; %bb.0: ; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 @@ -127,6 +160,12 @@ define float @test_fmed3_f32_maybe_NaN_ieee_false(float %a) #1 { ; GFX10-NEXT: v_med3_f32 v0, v0, 1.0, 0 ; GFX10-NEXT: s_setpc_b64 s[30:31] ; +; GFX1170-LABEL: test_fmed3_f32_maybe_NaN_ieee_false: +; GFX1170: ; %bb.0: +; GFX1170-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1170-NEXT: v_mul_f32_e64 v0, v0, 2.0 clamp +; GFX1170-NEXT: s_setpc_b64 s[30:31] +; ; GFX12-LABEL: test_fmed3_f32_maybe_NaN_ieee_false: ; GFX12: ; %bb.0: ; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 @@ -151,6 +190,14 @@ define float @test_fmed3_non_SNaN_input_ieee_true_dx10clamp_false(float %a) #4 { ; GFX10-NEXT: v_med3_f32 v0, v0, 0, 1.0 ; GFX10-NEXT: s_setpc_b64 s[30:31] ; +; GFX1170-LABEL: test_fmed3_non_SNaN_input_ieee_true_dx10clamp_false: +; GFX1170: ; %bb.0: +; GFX1170-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1170-NEXT: v_max_num_f32_e32 v0, v0, v0 +; GFX1170-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX1170-NEXT: v_min_num_f32_e64 v0, 0x41200000, v0 clamp +; GFX1170-NEXT: s_setpc_b64 s[30:31] +; ; GFX12-LABEL: test_fmed3_non_SNaN_input_ieee_true_dx10clamp_false: ; GFX12: ; %bb.0: ; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 @@ -175,6 +222,12 @@ define float @test_fmed3_maybe_SNaN_input_ieee_true_dx10clamp_true(float %a) #2 ; GFX10-NEXT: v_mul_f32_e64 v0, v0, 2.0 clamp ; GFX10-NEXT: s_setpc_b64 s[30:31] ; +; GFX1170-LABEL: test_fmed3_maybe_SNaN_input_ieee_true_dx10clamp_true: +; GFX1170: ; %bb.0: +; GFX1170-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1170-NEXT: v_mul_f32_e64 v0, v0, 2.0 clamp +; GFX1170-NEXT: s_setpc_b64 s[30:31] +; ; GFX12-LABEL: test_fmed3_maybe_SNaN_input_ieee_true_dx10clamp_true: ; GFX12: ; %bb.0: ; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 @@ -197,6 +250,12 @@ define float @test_fmed3_unknown_input_ieee_true_dx10clamp_true(float %a) #2 { ; GFX10-NEXT: v_med3_f32 v0, v0, 0, 1.0 ; GFX10-NEXT: s_setpc_b64 s[30:31] ; +; GFX1170-LABEL: test_fmed3_unknown_input_ieee_true_dx10clamp_true: +; GFX1170: ; %bb.0: +; GFX1170-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1170-NEXT: v_med3_num_f32 v0, v0, 0, 1.0 +; GFX1170-NEXT: s_setpc_b64 s[30:31] +; ; GFX12-LABEL: test_fmed3_unknown_input_ieee_true_dx10clamp_true: ; GFX12: ; %bb.0: ; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/clamp-minmax-const-combine.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/clamp-minmax-const-combine.ll index 0315bd86feed..8fd41241b7ae 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/clamp-minmax-const-combine.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/clamp-minmax-const-combine.ll @@ -1,5 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc -global-isel -new-reg-bank-select -mtriple=amdgcn-amd-mesa3d -mcpu=gfx1010 < %s | FileCheck -check-prefix=GFX10 %s +; RUN: llc -global-isel -new-reg-bank-select -mtriple=amdgcn-amd-mesa3d -mcpu=gfx1170 -mattr=-real-true16 < %s | FileCheck -check-prefix=GFX1170 %s ; RUN: llc -global-isel -new-reg-bank-select -mtriple=amdgcn-amd-mesa3d -mcpu=gfx1200 -mattr=-real-true16 < %s | FileCheck -check-prefix=GFX12 %s define float @test_min_max_ValK0_K1_f32(float %a) #0 { @@ -9,6 +10,12 @@ define float @test_min_max_ValK0_K1_f32(float %a) #0 { ; GFX10-NEXT: v_mul_f32_e64 v0, v0, 2.0 clamp ; GFX10-NEXT: s_setpc_b64 s[30:31] ; +; GFX1170-LABEL: test_min_max_ValK0_K1_f32: +; GFX1170: ; %bb.0: +; GFX1170-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1170-NEXT: v_mul_f32_e64 v0, v0, 2.0 clamp +; GFX1170-NEXT: s_setpc_b64 s[30:31] +; ; GFX12-LABEL: test_min_max_ValK0_K1_f32: ; GFX12: ; %bb.0: ; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 @@ -31,6 +38,12 @@ define double @test_min_max_K0Val_K1_f64(double %a) #1 { ; GFX10-NEXT: v_mul_f64 v[0:1], v[0:1], 2.0 clamp ; GFX10-NEXT: s_setpc_b64 s[30:31] ; +; GFX1170-LABEL: test_min_max_K0Val_K1_f64: +; GFX1170: ; %bb.0: +; GFX1170-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1170-NEXT: v_mul_f64 v[0:1], v[0:1], 2.0 clamp +; GFX1170-NEXT: s_setpc_b64 s[30:31] +; ; GFX12-LABEL: test_min_max_K0Val_K1_f64: ; GFX12: ; %bb.0: ; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 @@ -54,6 +67,12 @@ define half @test_min_K1max_ValK0_f16(half %a) #2 { ; GFX10-NEXT: v_mul_f16_e64 v0, v0, 2.0 clamp ; GFX10-NEXT: s_setpc_b64 s[30:31] ; +; GFX1170-LABEL: test_min_K1max_ValK0_f16: +; GFX1170: ; %bb.0: +; GFX1170-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1170-NEXT: v_mul_f16_e64 v0, v0, 2.0 clamp +; GFX1170-NEXT: s_setpc_b64 s[30:31] +; ; GFX12-LABEL: test_min_K1max_ValK0_f16: ; GFX12: ; %bb.0: ; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 @@ -76,6 +95,12 @@ define <2 x half> @test_min_K1max_K0Val_f16(<2 x half> %a) #1 { ; GFX10-NEXT: v_pk_mul_f16 v0, v0, 2.0 op_sel_hi:[1,0] clamp ; GFX10-NEXT: s_setpc_b64 s[30:31] ; +; GFX1170-LABEL: test_min_K1max_K0Val_f16: +; GFX1170: ; %bb.0: +; GFX1170-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1170-NEXT: v_pk_mul_f16 v0, v0, 2.0 op_sel_hi:[1,0] clamp +; GFX1170-NEXT: s_setpc_b64 s[30:31] +; ; GFX12-LABEL: test_min_K1max_K0Val_f16: ; GFX12: ; %bb.0: ; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 @@ -98,6 +123,12 @@ define <2 x half> @test_min_max_splat_padded_with_undef(<2 x half> %a) #2 { ; GFX10-NEXT: v_pk_mul_f16 v0, v0, 2.0 op_sel_hi:[1,0] clamp ; GFX10-NEXT: s_setpc_b64 s[30:31] ; +; GFX1170-LABEL: test_min_max_splat_padded_with_undef: +; GFX1170: ; %bb.0: +; GFX1170-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1170-NEXT: v_pk_mul_f16 v0, v0, 2.0 op_sel_hi:[1,0] clamp +; GFX1170-NEXT: s_setpc_b64 s[30:31] +; ; GFX12-LABEL: test_min_max_splat_padded_with_undef: ; GFX12: ; %bb.0: ; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 @@ -122,6 +153,12 @@ define float @test_max_min_ValK1_K0_f32(float %a) #0 { ; GFX10-NEXT: v_mul_f32_e64 v0, v0, 2.0 clamp ; GFX10-NEXT: s_setpc_b64 s[30:31] ; +; GFX1170-LABEL: test_max_min_ValK1_K0_f32: +; GFX1170: ; %bb.0: +; GFX1170-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1170-NEXT: v_mul_f32_e64 v0, v0, 2.0 clamp +; GFX1170-NEXT: s_setpc_b64 s[30:31] +; ; GFX12-LABEL: test_max_min_ValK1_K0_f32: ; GFX12: ; %bb.0: ; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 @@ -144,6 +181,12 @@ define double @test_max_min_K1Val_K0_f64(double %a) #1 { ; GFX10-NEXT: v_mul_f64 v[0:1], v[0:1], 2.0 clamp ; GFX10-NEXT: s_setpc_b64 s[30:31] ; +; GFX1170-LABEL: test_max_min_K1Val_K0_f64: +; GFX1170: ; %bb.0: +; GFX1170-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1170-NEXT: v_mul_f64 v[0:1], v[0:1], 2.0 clamp +; GFX1170-NEXT: s_setpc_b64 s[30:31] +; ; GFX12-LABEL: test_max_min_K1Val_K0_f64: ; GFX12: ; %bb.0: ; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 @@ -166,6 +209,12 @@ define half @test_max_K0min_ValK1_f16(half %a) #0 { ; GFX10-NEXT: v_mul_f16_e64 v0, v0, 2.0 clamp ; GFX10-NEXT: s_setpc_b64 s[30:31] ; +; GFX1170-LABEL: test_max_K0min_ValK1_f16: +; GFX1170: ; %bb.0: +; GFX1170-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1170-NEXT: v_mul_f16_e64 v0, v0, 2.0 clamp +; GFX1170-NEXT: s_setpc_b64 s[30:31] +; ; GFX12-LABEL: test_max_K0min_ValK1_f16: ; GFX12: ; %bb.0: ; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 @@ -189,6 +238,12 @@ define <2 x half> @test_max_K0min_K1Val_v2f16(<2 x half> %a) #1 { ; GFX10-NEXT: v_pk_mul_f16 v0, v0, 2.0 op_sel_hi:[1,0] clamp ; GFX10-NEXT: s_setpc_b64 s[30:31] ; +; GFX1170-LABEL: test_max_K0min_K1Val_v2f16: +; GFX1170: ; %bb.0: +; GFX1170-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1170-NEXT: v_pk_mul_f16 v0, v0, 2.0 op_sel_hi:[1,0] clamp +; GFX1170-NEXT: s_setpc_b64 s[30:31] +; ; GFX12-LABEL: test_max_K0min_K1Val_v2f16: ; GFX12: ; %bb.0: ; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 @@ -213,6 +268,12 @@ define float @test_min_max_global_nnan(float %a) { ; GFX10-NEXT: v_max_f32_e64 v0, v0, v0 clamp ; GFX10-NEXT: s_setpc_b64 s[30:31] ; +; GFX1170-LABEL: test_min_max_global_nnan: +; GFX1170: ; %bb.0: +; GFX1170-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1170-NEXT: v_max_num_f32_e64 v0, v0, v0 clamp +; GFX1170-NEXT: s_setpc_b64 s[30:31] +; ; GFX12-LABEL: test_min_max_global_nnan: ; GFX12: ; %bb.0: ; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 @@ -234,6 +295,12 @@ define float @test_max_min_global_nnan(float %a) { ; GFX10-NEXT: v_max_f32_e64 v0, v0, v0 clamp ; GFX10-NEXT: s_setpc_b64 s[30:31] ; +; GFX1170-LABEL: test_max_min_global_nnan: +; GFX1170: ; %bb.0: +; GFX1170-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1170-NEXT: v_max_num_f32_e64 v0, v0, v0 clamp +; GFX1170-NEXT: s_setpc_b64 s[30:31] +; ; GFX12-LABEL: test_max_min_global_nnan: ; GFX12: ; %bb.0: ; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 @@ -261,6 +328,12 @@ define float @test_min_max_K0_gt_K1(float %a) #0 { ; GFX10-NEXT: v_min_f32_e32 v0, 0, v0 ; GFX10-NEXT: s_setpc_b64 s[30:31] ; +; GFX1170-LABEL: test_min_max_K0_gt_K1: +; GFX1170: ; %bb.0: +; GFX1170-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1170-NEXT: v_maxmin_num_f32 v0, v0, 1.0, 0 +; GFX1170-NEXT: s_setpc_b64 s[30:31] +; ; GFX12-LABEL: test_min_max_K0_gt_K1: ; GFX12: ; %bb.0: ; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 @@ -284,6 +357,12 @@ define float @test_max_min_K0_gt_K1(float %a) #0 { ; GFX10-NEXT: v_max_f32_e32 v0, 1.0, v0 ; GFX10-NEXT: s_setpc_b64 s[30:31] ; +; GFX1170-LABEL: test_max_min_K0_gt_K1: +; GFX1170: ; %bb.0: +; GFX1170-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1170-NEXT: v_minmax_num_f32 v0, v0, 0, 1.0 +; GFX1170-NEXT: s_setpc_b64 s[30:31] +; ; GFX12-LABEL: test_max_min_K0_gt_K1: ; GFX12: ; %bb.0: ; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 @@ -310,6 +389,12 @@ define float @test_min_max_maybe_NaN_input_ieee_false(float %a) #1 { ; GFX10-NEXT: v_min_f32_e32 v0, 1.0, v0 ; GFX10-NEXT: s_setpc_b64 s[30:31] ; +; GFX1170-LABEL: test_min_max_maybe_NaN_input_ieee_false: +; GFX1170: ; %bb.0: +; GFX1170-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1170-NEXT: v_mul_f32_e64 v0, v0, 2.0 clamp +; GFX1170-NEXT: s_setpc_b64 s[30:31] +; ; GFX12-LABEL: test_min_max_maybe_NaN_input_ieee_false: ; GFX12: ; %bb.0: ; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 @@ -334,6 +419,12 @@ define float @test_min_max_maybe_NaN_input_ieee_true_dx10clamp_false(float %a) # ; GFX10-NEXT: v_med3_f32 v0, v0, 0, 1.0 ; GFX10-NEXT: s_setpc_b64 s[30:31] ; +; GFX1170-LABEL: test_min_max_maybe_NaN_input_ieee_true_dx10clamp_false: +; GFX1170: ; %bb.0: +; GFX1170-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1170-NEXT: v_mul_f32_e64 v0, v0, 2.0 clamp +; GFX1170-NEXT: s_setpc_b64 s[30:31] +; ; GFX12-LABEL: test_min_max_maybe_NaN_input_ieee_true_dx10clamp_false: ; GFX12: ; %bb.0: ; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 @@ -360,6 +451,14 @@ define float @test_max_min_maybe_NaN_input_ieee_true(float %a) #0 { ; GFX10-NEXT: v_max_f32_e32 v0, 0, v0 ; GFX10-NEXT: s_setpc_b64 s[30:31] ; +; GFX1170-LABEL: test_max_min_maybe_NaN_input_ieee_true: +; GFX1170: ; %bb.0: +; GFX1170-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1170-NEXT: v_mul_f32_e32 v0, 2.0, v0 +; GFX1170-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX1170-NEXT: v_minmax_num_f32 v0, v0, 1.0, 0 +; GFX1170-NEXT: s_setpc_b64 s[30:31] +; ; GFX12-LABEL: test_max_min_maybe_NaN_input_ieee_true: ; GFX12: ; %bb.0: ; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 @@ -386,6 +485,14 @@ define float @test_max_min_maybe_NaN_input_ieee_false(float %a) #1 { ; GFX10-NEXT: v_max_f32_e32 v0, 0, v0 ; GFX10-NEXT: s_setpc_b64 s[30:31] ; +; GFX1170-LABEL: test_max_min_maybe_NaN_input_ieee_false: +; GFX1170: ; %bb.0: +; GFX1170-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1170-NEXT: v_mul_f32_e32 v0, 2.0, v0 +; GFX1170-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX1170-NEXT: v_minmax_num_f32 v0, v0, 1.0, 0 +; GFX1170-NEXT: s_setpc_b64 s[30:31] +; ; GFX12-LABEL: test_max_min_maybe_NaN_input_ieee_false: ; GFX12: ; %bb.0: ; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/fmed3-min-max-const-combine.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/fmed3-min-max-const-combine.ll index b754bb6081c3..dbcd4eaa2d71 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/fmed3-min-max-const-combine.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/fmed3-min-max-const-combine.ll @@ -1,20 +1,27 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -global-isel -new-reg-bank-select -mtriple=amdgcn-amd-mesa3d -mcpu=gfx1010 < %s | FileCheck -check-prefix=GFX10 %s ; RUN: llc -global-isel -new-reg-bank-select -mtriple=amdgcn-amd-mesa3d -mcpu=gfx803 < %s | FileCheck -check-prefix=GFX8 %s +; RUN: llc -global-isel -new-reg-bank-select -mtriple=amdgcn-amd-mesa3d -mcpu=gfx1010 < %s | FileCheck -check-prefix=GFX10 %s +; RUN: llc -global-isel -new-reg-bank-select -mtriple=amdgcn-amd-mesa3d -mcpu=gfx1170 -mattr=-real-true16 < %s | FileCheck -check-prefix=GFX1170 %s ; RUN: llc -global-isel -new-reg-bank-select -mtriple=amdgcn-amd-mesa3d -mcpu=gfx1200 -mattr=-real-true16 < %s | FileCheck -check-prefix=GFX12 %s define float @test_min_max_ValK0_K1_f32(float %a) #0 { +; GFX8-LABEL: test_min_max_ValK0_K1_f32: +; GFX8: ; %bb.0: +; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX8-NEXT: v_med3_f32 v0, v0, 2.0, 4.0 +; GFX8-NEXT: s_setpc_b64 s[30:31] +; ; GFX10-LABEL: test_min_max_ValK0_K1_f32: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX10-NEXT: v_med3_f32 v0, v0, 2.0, 4.0 ; GFX10-NEXT: s_setpc_b64 s[30:31] ; -; GFX8-LABEL: test_min_max_ValK0_K1_f32: -; GFX8: ; %bb.0: -; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX8-NEXT: v_med3_f32 v0, v0, 2.0, 4.0 -; GFX8-NEXT: s_setpc_b64 s[30:31] +; GFX1170-LABEL: test_min_max_ValK0_K1_f32: +; GFX1170: ; %bb.0: +; GFX1170-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1170-NEXT: v_med3_num_f32 v0, v0, 2.0, 4.0 +; GFX1170-NEXT: s_setpc_b64 s[30:31] ; ; GFX12-LABEL: test_min_max_ValK0_K1_f32: ; GFX12: ; %bb.0: @@ -31,17 +38,23 @@ define float @test_min_max_ValK0_K1_f32(float %a) #0 { } define float @test_min_max_K0Val_K1_f32(float %a) #1 { +; GFX8-LABEL: test_min_max_K0Val_K1_f32: +; GFX8: ; %bb.0: +; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX8-NEXT: v_med3_f32 v0, v0, 2.0, 4.0 +; GFX8-NEXT: s_setpc_b64 s[30:31] +; ; GFX10-LABEL: test_min_max_K0Val_K1_f32: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX10-NEXT: v_med3_f32 v0, v0, 2.0, 4.0 ; GFX10-NEXT: s_setpc_b64 s[30:31] ; -; GFX8-LABEL: test_min_max_K0Val_K1_f32: -; GFX8: ; %bb.0: -; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX8-NEXT: v_med3_f32 v0, v0, 2.0, 4.0 -; GFX8-NEXT: s_setpc_b64 s[30:31] +; GFX1170-LABEL: test_min_max_K0Val_K1_f32: +; GFX1170: ; %bb.0: +; GFX1170-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1170-NEXT: v_med3_num_f32 v0, v0, 2.0, 4.0 +; GFX1170-NEXT: s_setpc_b64 s[30:31] ; ; GFX12-LABEL: test_min_max_K0Val_K1_f32: ; GFX12: ; %bb.0: @@ -60,13 +73,6 @@ define float @test_min_max_K0Val_K1_f32(float %a) #1 { ; min-max patterns for ieee=true do not have to check for NaNs ; 'v_max_f16_e32 v0, v0, v0' is from fcanonicalize of the input to fmin/fmax with ieee=true define half @test_min_K1max_ValK0_f16(half %a) #0 { -; GFX10-LABEL: test_min_K1max_ValK0_f16: -; GFX10: ; %bb.0: -; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: v_max_f16_e32 v0, v0, v0 -; GFX10-NEXT: v_med3_f16 v0, v0, 2.0, 4.0 -; GFX10-NEXT: s_setpc_b64 s[30:31] -; ; GFX8-LABEL: test_min_K1max_ValK0_f16: ; GFX8: ; %bb.0: ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -75,6 +81,21 @@ define half @test_min_K1max_ValK0_f16(half %a) #0 { ; GFX8-NEXT: v_min_f16_e32 v0, 4.0, v0 ; GFX8-NEXT: s_setpc_b64 s[30:31] ; +; GFX10-LABEL: test_min_K1max_ValK0_f16: +; GFX10: ; %bb.0: +; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX10-NEXT: v_max_f16_e32 v0, v0, v0 +; GFX10-NEXT: v_med3_f16 v0, v0, 2.0, 4.0 +; GFX10-NEXT: s_setpc_b64 s[30:31] +; +; GFX1170-LABEL: test_min_K1max_ValK0_f16: +; GFX1170: ; %bb.0: +; GFX1170-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1170-NEXT: v_max_num_f16_e32 v0, v0, v0 +; GFX1170-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX1170-NEXT: v_med3_num_f16 v0, v0, 2.0, 4.0 +; GFX1170-NEXT: s_setpc_b64 s[30:31] +; ; GFX12-LABEL: test_min_K1max_ValK0_f16: ; GFX12: ; %bb.0: ; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 @@ -92,12 +113,6 @@ define half @test_min_K1max_ValK0_f16(half %a) #0 { } define half @test_min_K1max_K0Val_f16(half %a) #1 { -; GFX10-LABEL: test_min_K1max_K0Val_f16: -; GFX10: ; %bb.0: -; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: v_med3_f16 v0, v0, 2.0, 4.0 -; GFX10-NEXT: s_setpc_b64 s[30:31] -; ; GFX8-LABEL: test_min_K1max_K0Val_f16: ; GFX8: ; %bb.0: ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -105,6 +120,18 @@ define half @test_min_K1max_K0Val_f16(half %a) #1 { ; GFX8-NEXT: v_min_f16_e32 v0, 4.0, v0 ; GFX8-NEXT: s_setpc_b64 s[30:31] ; +; GFX10-LABEL: test_min_K1max_K0Val_f16: +; GFX10: ; %bb.0: +; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX10-NEXT: v_med3_f16 v0, v0, 2.0, 4.0 +; GFX10-NEXT: s_setpc_b64 s[30:31] +; +; GFX1170-LABEL: test_min_K1max_K0Val_f16: +; GFX1170: ; %bb.0: +; GFX1170-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1170-NEXT: v_med3_num_f16 v0, v0, 2.0, 4.0 +; GFX1170-NEXT: s_setpc_b64 s[30:31] +; ; GFX12-LABEL: test_min_K1max_K0Val_f16: ; GFX12: ; %bb.0: ; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 @@ -121,17 +148,23 @@ define half @test_min_K1max_K0Val_f16(half %a) #1 { ; max-mix patterns work only for non-NaN inputs define float @test_max_min_ValK1_K0_f32(float %a) #0 { +; GFX8-LABEL: test_max_min_ValK1_K0_f32: +; GFX8: ; %bb.0: +; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX8-NEXT: v_med3_f32 v0, v0, 2.0, 4.0 +; GFX8-NEXT: s_setpc_b64 s[30:31] +; ; GFX10-LABEL: test_max_min_ValK1_K0_f32: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX10-NEXT: v_med3_f32 v0, v0, 2.0, 4.0 ; GFX10-NEXT: s_setpc_b64 s[30:31] ; -; GFX8-LABEL: test_max_min_ValK1_K0_f32: -; GFX8: ; %bb.0: -; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX8-NEXT: v_med3_f32 v0, v0, 2.0, 4.0 -; GFX8-NEXT: s_setpc_b64 s[30:31] +; GFX1170-LABEL: test_max_min_ValK1_K0_f32: +; GFX1170: ; %bb.0: +; GFX1170-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1170-NEXT: v_med3_num_f32 v0, v0, 2.0, 4.0 +; GFX1170-NEXT: s_setpc_b64 s[30:31] ; ; GFX12-LABEL: test_max_min_ValK1_K0_f32: ; GFX12: ; %bb.0: @@ -148,17 +181,23 @@ define float @test_max_min_ValK1_K0_f32(float %a) #0 { } define float @test_max_min_K1Val_K0_f32(float %a) #1 { +; GFX8-LABEL: test_max_min_K1Val_K0_f32: +; GFX8: ; %bb.0: +; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX8-NEXT: v_med3_f32 v0, v0, 2.0, 4.0 +; GFX8-NEXT: s_setpc_b64 s[30:31] +; ; GFX10-LABEL: test_max_min_K1Val_K0_f32: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX10-NEXT: v_med3_f32 v0, v0, 2.0, 4.0 ; GFX10-NEXT: s_setpc_b64 s[30:31] ; -; GFX8-LABEL: test_max_min_K1Val_K0_f32: -; GFX8: ; %bb.0: -; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX8-NEXT: v_med3_f32 v0, v0, 2.0, 4.0 -; GFX8-NEXT: s_setpc_b64 s[30:31] +; GFX1170-LABEL: test_max_min_K1Val_K0_f32: +; GFX1170: ; %bb.0: +; GFX1170-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1170-NEXT: v_med3_num_f32 v0, v0, 2.0, 4.0 +; GFX1170-NEXT: s_setpc_b64 s[30:31] ; ; GFX12-LABEL: test_max_min_K1Val_K0_f32: ; GFX12: ; %bb.0: @@ -175,12 +214,6 @@ define float @test_max_min_K1Val_K0_f32(float %a) #1 { } define half @test_max_K0min_ValK1_f16(half %a) #0 { -; GFX10-LABEL: test_max_K0min_ValK1_f16: -; GFX10: ; %bb.0: -; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: v_med3_f16 v0, v0, 2.0, 4.0 -; GFX10-NEXT: s_setpc_b64 s[30:31] -; ; GFX8-LABEL: test_max_K0min_ValK1_f16: ; GFX8: ; %bb.0: ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -188,6 +221,18 @@ define half @test_max_K0min_ValK1_f16(half %a) #0 { ; GFX8-NEXT: v_max_f16_e32 v0, 2.0, v0 ; GFX8-NEXT: s_setpc_b64 s[30:31] ; +; GFX10-LABEL: test_max_K0min_ValK1_f16: +; GFX10: ; %bb.0: +; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX10-NEXT: v_med3_f16 v0, v0, 2.0, 4.0 +; GFX10-NEXT: s_setpc_b64 s[30:31] +; +; GFX1170-LABEL: test_max_K0min_ValK1_f16: +; GFX1170: ; %bb.0: +; GFX1170-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1170-NEXT: v_med3_num_f16 v0, v0, 2.0, 4.0 +; GFX1170-NEXT: s_setpc_b64 s[30:31] +; ; GFX12-LABEL: test_max_K0min_ValK1_f16: ; GFX12: ; %bb.0: ; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 @@ -203,12 +248,6 @@ define half @test_max_K0min_ValK1_f16(half %a) #0 { } define half @test_max_K0min_K1Val_f16(half %a) #1 { -; GFX10-LABEL: test_max_K0min_K1Val_f16: -; GFX10: ; %bb.0: -; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: v_med3_f16 v0, v0, 2.0, 4.0 -; GFX10-NEXT: s_setpc_b64 s[30:31] -; ; GFX8-LABEL: test_max_K0min_K1Val_f16: ; GFX8: ; %bb.0: ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -216,6 +255,18 @@ define half @test_max_K0min_K1Val_f16(half %a) #1 { ; GFX8-NEXT: v_max_f16_e32 v0, 2.0, v0 ; GFX8-NEXT: s_setpc_b64 s[30:31] ; +; GFX10-LABEL: test_max_K0min_K1Val_f16: +; GFX10: ; %bb.0: +; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX10-NEXT: v_med3_f16 v0, v0, 2.0, 4.0 +; GFX10-NEXT: s_setpc_b64 s[30:31] +; +; GFX1170-LABEL: test_max_K0min_K1Val_f16: +; GFX1170: ; %bb.0: +; GFX1170-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1170-NEXT: v_med3_num_f16 v0, v0, 2.0, 4.0 +; GFX1170-NEXT: s_setpc_b64 s[30:31] +; ; GFX12-LABEL: test_max_K0min_K1Val_f16: ; GFX12: ; %bb.0: ; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 @@ -233,17 +284,23 @@ define half @test_max_K0min_K1Val_f16(half %a) #1 { ; global nnan function attribute always forces fmed3 combine define float @test_min_max_global_nnan(float %a) { +; GFX8-LABEL: test_min_max_global_nnan: +; GFX8: ; %bb.0: +; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX8-NEXT: v_med3_f32 v0, v0, 2.0, 4.0 +; GFX8-NEXT: s_setpc_b64 s[30:31] +; ; GFX10-LABEL: test_min_max_global_nnan: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX10-NEXT: v_med3_f32 v0, v0, 2.0, 4.0 ; GFX10-NEXT: s_setpc_b64 s[30:31] ; -; GFX8-LABEL: test_min_max_global_nnan: -; GFX8: ; %bb.0: -; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX8-NEXT: v_med3_f32 v0, v0, 2.0, 4.0 -; GFX8-NEXT: s_setpc_b64 s[30:31] +; GFX1170-LABEL: test_min_max_global_nnan: +; GFX1170: ; %bb.0: +; GFX1170-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1170-NEXT: v_med3_num_f32 v0, v0, 2.0, 4.0 +; GFX1170-NEXT: s_setpc_b64 s[30:31] ; ; GFX12-LABEL: test_min_max_global_nnan: ; GFX12: ; %bb.0: @@ -260,17 +317,23 @@ define float @test_min_max_global_nnan(float %a) { } define float @test_max_min_global_nnan(float %a) { +; GFX8-LABEL: test_max_min_global_nnan: +; GFX8: ; %bb.0: +; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX8-NEXT: v_med3_f32 v0, v0, 2.0, 4.0 +; GFX8-NEXT: s_setpc_b64 s[30:31] +; ; GFX10-LABEL: test_max_min_global_nnan: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX10-NEXT: v_med3_f32 v0, v0, 2.0, 4.0 ; GFX10-NEXT: s_setpc_b64 s[30:31] ; -; GFX8-LABEL: test_max_min_global_nnan: -; GFX8: ; %bb.0: -; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX8-NEXT: v_med3_f32 v0, v0, 2.0, 4.0 -; GFX8-NEXT: s_setpc_b64 s[30:31] +; GFX1170-LABEL: test_max_min_global_nnan: +; GFX1170: ; %bb.0: +; GFX1170-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1170-NEXT: v_med3_num_f32 v0, v0, 2.0, 4.0 +; GFX1170-NEXT: s_setpc_b64 s[30:31] ; ; GFX12-LABEL: test_max_min_global_nnan: ; GFX12: ; %bb.0: @@ -292,6 +355,13 @@ define float @test_max_min_global_nnan(float %a) { ; min(max(Val, K0), K1) K0 > K1, should be K0<=K1 define float @test_min_max_K0_gt_K1(float %a) #0 { +; GFX8-LABEL: test_min_max_K0_gt_K1: +; GFX8: ; %bb.0: +; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX8-NEXT: v_max_f32_e32 v0, 4.0, v0 +; GFX8-NEXT: v_min_f32_e32 v0, 2.0, v0 +; GFX8-NEXT: s_setpc_b64 s[30:31] +; ; GFX10-LABEL: test_min_max_K0_gt_K1: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -299,12 +369,11 @@ define float @test_min_max_K0_gt_K1(float %a) #0 { ; GFX10-NEXT: v_min_f32_e32 v0, 2.0, v0 ; GFX10-NEXT: s_setpc_b64 s[30:31] ; -; GFX8-LABEL: test_min_max_K0_gt_K1: -; GFX8: ; %bb.0: -; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX8-NEXT: v_max_f32_e32 v0, 4.0, v0 -; GFX8-NEXT: v_min_f32_e32 v0, 2.0, v0 -; GFX8-NEXT: s_setpc_b64 s[30:31] +; GFX1170-LABEL: test_min_max_K0_gt_K1: +; GFX1170: ; %bb.0: +; GFX1170-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1170-NEXT: v_maxmin_num_f32 v0, v0, 4.0, 2.0 +; GFX1170-NEXT: s_setpc_b64 s[30:31] ; ; GFX12-LABEL: test_min_max_K0_gt_K1: ; GFX12: ; %bb.0: @@ -322,6 +391,13 @@ define float @test_min_max_K0_gt_K1(float %a) #0 { ; max(min(Val, K1), K0) K0 > K1, should be K0<=K1 define float @test_max_min_K0_gt_K1(float %a) #0 { +; GFX8-LABEL: test_max_min_K0_gt_K1: +; GFX8: ; %bb.0: +; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX8-NEXT: v_min_f32_e32 v0, 2.0, v0 +; GFX8-NEXT: v_max_f32_e32 v0, 4.0, v0 +; GFX8-NEXT: s_setpc_b64 s[30:31] +; ; GFX10-LABEL: test_max_min_K0_gt_K1: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -329,12 +405,11 @@ define float @test_max_min_K0_gt_K1(float %a) #0 { ; GFX10-NEXT: v_max_f32_e32 v0, 4.0, v0 ; GFX10-NEXT: s_setpc_b64 s[30:31] ; -; GFX8-LABEL: test_max_min_K0_gt_K1: -; GFX8: ; %bb.0: -; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX8-NEXT: v_min_f32_e32 v0, 2.0, v0 -; GFX8-NEXT: v_max_f32_e32 v0, 4.0, v0 -; GFX8-NEXT: s_setpc_b64 s[30:31] +; GFX1170-LABEL: test_max_min_K0_gt_K1: +; GFX1170: ; %bb.0: +; GFX1170-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1170-NEXT: v_minmax_num_f32 v0, v0, 2.0, 4.0 +; GFX1170-NEXT: s_setpc_b64 s[30:31] ; ; GFX12-LABEL: test_max_min_K0_gt_K1: ; GFX12: ; %bb.0: @@ -352,6 +427,13 @@ define float @test_max_min_K0_gt_K1(float %a) #0 { ; non-inline constant define float @test_min_max_non_inline_const(float %a) #0 { +; GFX8-LABEL: test_min_max_non_inline_const: +; GFX8: ; %bb.0: +; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX8-NEXT: v_max_f32_e32 v0, 2.0, v0 +; GFX8-NEXT: v_min_f32_e32 v0, 0x41000000, v0 +; GFX8-NEXT: s_setpc_b64 s[30:31] +; ; GFX10-LABEL: test_min_max_non_inline_const: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -359,12 +441,11 @@ define float @test_min_max_non_inline_const(float %a) #0 { ; GFX10-NEXT: v_min_f32_e32 v0, 0x41000000, v0 ; GFX10-NEXT: s_setpc_b64 s[30:31] ; -; GFX8-LABEL: test_min_max_non_inline_const: -; GFX8: ; %bb.0: -; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX8-NEXT: v_max_f32_e32 v0, 2.0, v0 -; GFX8-NEXT: v_min_f32_e32 v0, 0x41000000, v0 -; GFX8-NEXT: s_setpc_b64 s[30:31] +; GFX1170-LABEL: test_min_max_non_inline_const: +; GFX1170: ; %bb.0: +; GFX1170-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1170-NEXT: v_maxmin_num_f32 v0, v0, 2.0, 0x41000000 +; GFX1170-NEXT: s_setpc_b64 s[30:31] ; ; GFX12-LABEL: test_min_max_non_inline_const: ; GFX12: ; %bb.0: @@ -383,6 +464,13 @@ define float @test_min_max_non_inline_const(float %a) #0 { ; there is no fmed3 for f64 or v2f16 types define double @test_min_max_f64(double %a) #0 { +; GFX8-LABEL: test_min_max_f64: +; GFX8: ; %bb.0: +; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX8-NEXT: v_max_f64 v[0:1], v[0:1], 2.0 +; GFX8-NEXT: v_min_f64 v[0:1], v[0:1], 4.0 +; GFX8-NEXT: s_setpc_b64 s[30:31] +; ; GFX10-LABEL: test_min_max_f64: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -390,12 +478,13 @@ define double @test_min_max_f64(double %a) #0 { ; GFX10-NEXT: v_min_f64 v[0:1], v[0:1], 4.0 ; GFX10-NEXT: s_setpc_b64 s[30:31] ; -; GFX8-LABEL: test_min_max_f64: -; GFX8: ; %bb.0: -; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX8-NEXT: v_max_f64 v[0:1], v[0:1], 2.0 -; GFX8-NEXT: v_min_f64 v[0:1], v[0:1], 4.0 -; GFX8-NEXT: s_setpc_b64 s[30:31] +; GFX1170-LABEL: test_min_max_f64: +; GFX1170: ; %bb.0: +; GFX1170-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1170-NEXT: v_max_num_f64 v[0:1], v[0:1], 2.0 +; GFX1170-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX1170-NEXT: v_min_num_f64 v[0:1], v[0:1], 4.0 +; GFX1170-NEXT: s_setpc_b64 s[30:31] ; ; GFX12-LABEL: test_min_max_f64: ; GFX12: ; %bb.0: @@ -414,13 +503,6 @@ define double @test_min_max_f64(double %a) #0 { } define <2 x half> @test_min_max_v2f16(<2 x half> %a) #0 { -; GFX10-LABEL: test_min_max_v2f16: -; GFX10: ; %bb.0: -; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: v_pk_max_f16 v0, v0, 2.0 op_sel_hi:[1,0] -; GFX10-NEXT: v_pk_min_f16 v0, v0, 4.0 op_sel_hi:[1,0] -; GFX10-NEXT: s_setpc_b64 s[30:31] -; ; GFX8-LABEL: test_min_max_v2f16: ; GFX8: ; %bb.0: ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -433,6 +515,21 @@ define <2 x half> @test_min_max_v2f16(<2 x half> %a) #0 { ; GFX8-NEXT: v_or_b32_e32 v0, v1, v0 ; GFX8-NEXT: s_setpc_b64 s[30:31] ; +; GFX10-LABEL: test_min_max_v2f16: +; GFX10: ; %bb.0: +; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX10-NEXT: v_pk_max_f16 v0, v0, 2.0 op_sel_hi:[1,0] +; GFX10-NEXT: v_pk_min_f16 v0, v0, 4.0 op_sel_hi:[1,0] +; GFX10-NEXT: s_setpc_b64 s[30:31] +; +; GFX1170-LABEL: test_min_max_v2f16: +; GFX1170: ; %bb.0: +; GFX1170-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1170-NEXT: v_pk_max_num_f16 v0, v0, 2.0 op_sel_hi:[1,0] +; GFX1170-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX1170-NEXT: v_pk_min_num_f16 v0, v0, 4.0 op_sel_hi:[1,0] +; GFX1170-NEXT: s_setpc_b64 s[30:31] +; ; GFX12-LABEL: test_min_max_v2f16: ; GFX12: ; %bb.0: ; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 @@ -453,6 +550,13 @@ define <2 x half> @test_min_max_v2f16(<2 x half> %a) #0 { ; min-max patterns for ieee=false require known non-NaN input define float @test_min_max_maybe_NaN_input_ieee_false(float %a) #1 { +; GFX8-LABEL: test_min_max_maybe_NaN_input_ieee_false: +; GFX8: ; %bb.0: +; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX8-NEXT: v_max_f32_e32 v0, 2.0, v0 +; GFX8-NEXT: v_min_f32_e32 v0, 4.0, v0 +; GFX8-NEXT: s_setpc_b64 s[30:31] +; ; GFX10-LABEL: test_min_max_maybe_NaN_input_ieee_false: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -460,12 +564,13 @@ define float @test_min_max_maybe_NaN_input_ieee_false(float %a) #1 { ; GFX10-NEXT: v_min_f32_e32 v0, 4.0, v0 ; GFX10-NEXT: s_setpc_b64 s[30:31] ; -; GFX8-LABEL: test_min_max_maybe_NaN_input_ieee_false: -; GFX8: ; %bb.0: -; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX8-NEXT: v_max_f32_e32 v0, 2.0, v0 -; GFX8-NEXT: v_min_f32_e32 v0, 4.0, v0 -; GFX8-NEXT: s_setpc_b64 s[30:31] +; GFX1170-LABEL: test_min_max_maybe_NaN_input_ieee_false: +; GFX1170: ; %bb.0: +; GFX1170-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1170-NEXT: v_max_num_f32_e32 v0, v0, v0 +; GFX1170-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX1170-NEXT: v_med3_num_f32 v0, v0, 2.0, 4.0 +; GFX1170-NEXT: s_setpc_b64 s[30:31] ; ; GFX12-LABEL: test_min_max_maybe_NaN_input_ieee_false: ; GFX12: ; %bb.0: @@ -486,6 +591,13 @@ define float @test_min_max_maybe_NaN_input_ieee_false(float %a) #1 { ; max-min patterns always require known non-NaN input define float @test_max_min_maybe_NaN_input_ieee_false(float %a) #1 { +; GFX8-LABEL: test_max_min_maybe_NaN_input_ieee_false: +; GFX8: ; %bb.0: +; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX8-NEXT: v_min_f32_e32 v0, 4.0, v0 +; GFX8-NEXT: v_max_f32_e32 v0, 2.0, v0 +; GFX8-NEXT: s_setpc_b64 s[30:31] +; ; GFX10-LABEL: test_max_min_maybe_NaN_input_ieee_false: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -493,12 +605,13 @@ define float @test_max_min_maybe_NaN_input_ieee_false(float %a) #1 { ; GFX10-NEXT: v_max_f32_e32 v0, 2.0, v0 ; GFX10-NEXT: s_setpc_b64 s[30:31] ; -; GFX8-LABEL: test_max_min_maybe_NaN_input_ieee_false: -; GFX8: ; %bb.0: -; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX8-NEXT: v_min_f32_e32 v0, 4.0, v0 -; GFX8-NEXT: v_max_f32_e32 v0, 2.0, v0 -; GFX8-NEXT: s_setpc_b64 s[30:31] +; GFX1170-LABEL: test_max_min_maybe_NaN_input_ieee_false: +; GFX1170: ; %bb.0: +; GFX1170-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1170-NEXT: v_max_num_f32_e32 v0, v0, v0 +; GFX1170-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX1170-NEXT: v_minmax_num_f32 v0, v0, 4.0, 2.0 +; GFX1170-NEXT: s_setpc_b64 s[30:31] ; ; GFX12-LABEL: test_max_min_maybe_NaN_input_ieee_false: ; GFX12: ; %bb.0: @@ -518,6 +631,14 @@ define float @test_max_min_maybe_NaN_input_ieee_false(float %a) #1 { ; 'v_max_f32_e32 v0, v0, v0' is from fcanonicalize of the input to fmin/fmax with ieee=true define float @test_max_min_maybe_NaN_input_ieee_true(float %a) #0 { +; GFX8-LABEL: test_max_min_maybe_NaN_input_ieee_true: +; GFX8: ; %bb.0: +; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX8-NEXT: v_mul_f32_e32 v0, 1.0, v0 +; GFX8-NEXT: v_min_f32_e32 v0, 4.0, v0 +; GFX8-NEXT: v_max_f32_e32 v0, 2.0, v0 +; GFX8-NEXT: s_setpc_b64 s[30:31] +; ; GFX10-LABEL: test_max_min_maybe_NaN_input_ieee_true: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -526,13 +647,13 @@ define float @test_max_min_maybe_NaN_input_ieee_true(float %a) #0 { ; GFX10-NEXT: v_max_f32_e32 v0, 2.0, v0 ; GFX10-NEXT: s_setpc_b64 s[30:31] ; -; GFX8-LABEL: test_max_min_maybe_NaN_input_ieee_true: -; GFX8: ; %bb.0: -; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX8-NEXT: v_mul_f32_e32 v0, 1.0, v0 -; GFX8-NEXT: v_min_f32_e32 v0, 4.0, v0 -; GFX8-NEXT: v_max_f32_e32 v0, 2.0, v0 -; GFX8-NEXT: s_setpc_b64 s[30:31] +; GFX1170-LABEL: test_max_min_maybe_NaN_input_ieee_true: +; GFX1170: ; %bb.0: +; GFX1170-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1170-NEXT: v_max_num_f32_e32 v0, v0, v0 +; GFX1170-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX1170-NEXT: v_minmax_num_f32 v0, v0, 4.0, 2.0 +; GFX1170-NEXT: s_setpc_b64 s[30:31] ; ; GFX12-LABEL: test_max_min_maybe_NaN_input_ieee_true: ; GFX12: ; %bb.0: diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.rsq.clamp.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.rsq.clamp.ll index 4a6e24b70066..9bbdc2982138 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.rsq.clamp.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.rsq.clamp.ll @@ -1,6 +1,7 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc -global-isel -mtriple=amdgcn -mcpu=tahiti < %s | FileCheck -check-prefix=SI %s ; RUN: llc -global-isel -mtriple=amdgcn -mcpu=tonga < %s | FileCheck -check-prefix=VI %s +; RUN: llc -global-isel -mtriple=amdgcn -mcpu=gfx1170 < %s | FileCheck -check-prefix=GFX1170 %s ; RUN: llc -global-isel -mtriple=amdgcn -mcpu=gfx1200 < %s | FileCheck -check-prefix=GFX12 %s define float @v_rsq_clamp_f32(float %src) #0 { @@ -18,6 +19,15 @@ define float @v_rsq_clamp_f32(float %src) #0 { ; VI-NEXT: v_max_f32_e32 v0, 0xff7fffff, v0 ; VI-NEXT: s_setpc_b64 s[30:31] ; +; GFX1170-LABEL: v_rsq_clamp_f32: +; GFX1170: ; %bb.0: +; GFX1170-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1170-NEXT: v_rsq_f32_e32 v0, v0 +; GFX1170-NEXT: v_mov_b32_e32 v1, 0xff7fffff +; GFX1170-NEXT: s_delay_alu instid0(TRANS32_DEP_1) | instid1(VALU_DEP_1) +; GFX1170-NEXT: v_minmax_num_f32 v0, v0, 0x7f7fffff, v1 +; GFX1170-NEXT: s_setpc_b64 s[30:31] +; ; GFX12-LABEL: v_rsq_clamp_f32: ; GFX12: ; %bb.0: ; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 @@ -49,6 +59,15 @@ define float @v_rsq_clamp_fabs_f32(float %src) #0 { ; VI-NEXT: v_max_f32_e32 v0, 0xff7fffff, v0 ; VI-NEXT: s_setpc_b64 s[30:31] ; +; GFX1170-LABEL: v_rsq_clamp_fabs_f32: +; GFX1170: ; %bb.0: +; GFX1170-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1170-NEXT: v_rsq_f32_e64 v0, |v0| +; GFX1170-NEXT: v_mov_b32_e32 v1, 0xff7fffff +; GFX1170-NEXT: s_delay_alu instid0(TRANS32_DEP_1) | instid1(VALU_DEP_1) +; GFX1170-NEXT: v_minmax_num_f32 v0, v0, 0x7f7fffff, v1 +; GFX1170-NEXT: s_setpc_b64 s[30:31] +; ; GFX12-LABEL: v_rsq_clamp_fabs_f32: ; GFX12: ; %bb.0: ; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 @@ -85,6 +104,19 @@ define double @v_rsq_clamp_f64(double %src) #0 { ; VI-NEXT: v_max_f64 v[0:1], v[0:1], v[2:3] ; VI-NEXT: s_setpc_b64 s[30:31] ; +; GFX1170-LABEL: v_rsq_clamp_f64: +; GFX1170: ; %bb.0: +; GFX1170-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1170-NEXT: v_rsq_f64_e32 v[0:1], v[0:1] +; GFX1170-NEXT: v_mov_b32_e32 v2, -1 +; GFX1170-NEXT: v_mov_b32_e32 v3, 0x7fefffff +; GFX1170-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_2) | instid1(VALU_DEP_1) +; GFX1170-NEXT: v_min_num_f64 v[0:1], v[0:1], v[2:3] +; GFX1170-NEXT: v_mov_b32_e32 v2, -1 +; GFX1170-NEXT: v_mov_b32_e32 v3, 0xffefffff +; GFX1170-NEXT: v_max_num_f64 v[0:1], v[0:1], v[2:3] +; GFX1170-NEXT: s_setpc_b64 s[30:31] +; ; GFX12-LABEL: v_rsq_clamp_f64: ; GFX12: ; %bb.0: ; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 @@ -124,6 +156,19 @@ define double @v_rsq_clamp_fabs_f64(double %src) #0 { ; VI-NEXT: v_max_f64 v[0:1], v[0:1], v[2:3] ; VI-NEXT: s_setpc_b64 s[30:31] ; +; GFX1170-LABEL: v_rsq_clamp_fabs_f64: +; GFX1170: ; %bb.0: +; GFX1170-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1170-NEXT: v_rsq_f64_e64 v[0:1], |v[0:1]| +; GFX1170-NEXT: v_mov_b32_e32 v2, -1 +; GFX1170-NEXT: v_mov_b32_e32 v3, 0x7fefffff +; GFX1170-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_2) | instid1(VALU_DEP_1) +; GFX1170-NEXT: v_min_num_f64 v[0:1], v[0:1], v[2:3] +; GFX1170-NEXT: v_mov_b32_e32 v2, -1 +; GFX1170-NEXT: v_mov_b32_e32 v3, 0xffefffff +; GFX1170-NEXT: v_max_num_f64 v[0:1], v[0:1], v[2:3] +; GFX1170-NEXT: s_setpc_b64 s[30:31] +; ; GFX12-LABEL: v_rsq_clamp_fabs_f64: ; GFX12: ; %bb.0: ; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 @@ -160,6 +205,15 @@ define float @v_rsq_clamp_undef_f32() #0 { ; VI-NEXT: v_max_f32_e32 v0, 0xff7fffff, v0 ; VI-NEXT: s_setpc_b64 s[30:31] ; +; GFX1170-LABEL: v_rsq_clamp_undef_f32: +; GFX1170: ; %bb.0: +; GFX1170-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1170-NEXT: v_rsq_f32_e32 v0, s0 +; GFX1170-NEXT: v_mov_b32_e32 v1, 0xff7fffff +; GFX1170-NEXT: s_delay_alu instid0(TRANS32_DEP_1) | instid1(VALU_DEP_1) +; GFX1170-NEXT: v_minmax_num_f32 v0, v0, 0x7f7fffff, v1 +; GFX1170-NEXT: s_setpc_b64 s[30:31] +; ; GFX12-LABEL: v_rsq_clamp_undef_f32: ; GFX12: ; %bb.0: ; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 @@ -196,6 +250,19 @@ define double @v_rsq_clamp_undef_f64() #0 { ; VI-NEXT: v_max_f64 v[0:1], v[0:1], v[2:3] ; VI-NEXT: s_setpc_b64 s[30:31] ; +; GFX1170-LABEL: v_rsq_clamp_undef_f64: +; GFX1170: ; %bb.0: +; GFX1170-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1170-NEXT: v_rsq_f64_e32 v[0:1], s[0:1] +; GFX1170-NEXT: v_mov_b32_e32 v2, -1 +; GFX1170-NEXT: v_mov_b32_e32 v3, 0x7fefffff +; GFX1170-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_2) | instid1(VALU_DEP_1) +; GFX1170-NEXT: v_min_num_f64 v[0:1], v[0:1], v[2:3] +; GFX1170-NEXT: v_mov_b32_e32 v2, -1 +; GFX1170-NEXT: v_mov_b32_e32 v3, 0xffefffff +; GFX1170-NEXT: v_max_num_f64 v[0:1], v[0:1], v[2:3] +; GFX1170-NEXT: s_setpc_b64 s[30:31] +; ; GFX12-LABEL: v_rsq_clamp_undef_f64: ; GFX12: ; %bb.0: ; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 @@ -231,6 +298,15 @@ define float @v_rsq_clamp_f32_non_ieee(float %src) #2 { ; VI-NEXT: v_max_f32_e32 v0, 0xff7fffff, v0 ; VI-NEXT: s_setpc_b64 s[30:31] ; +; GFX1170-LABEL: v_rsq_clamp_f32_non_ieee: +; GFX1170: ; %bb.0: +; GFX1170-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1170-NEXT: v_rsq_f32_e32 v0, v0 +; GFX1170-NEXT: v_mov_b32_e32 v1, 0xff7fffff +; GFX1170-NEXT: s_delay_alu instid0(TRANS32_DEP_1) | instid1(VALU_DEP_1) +; GFX1170-NEXT: v_minmax_num_f32 v0, v0, 0x7f7fffff, v1 +; GFX1170-NEXT: s_setpc_b64 s[30:31] +; ; GFX12-LABEL: v_rsq_clamp_f32_non_ieee: ; GFX12: ; %bb.0: ; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 @@ -266,6 +342,19 @@ define double @v_rsq_clamp_f64_non_ieee(double %src) #2 { ; VI-NEXT: v_max_f64 v[0:1], v[0:1], v[2:3] ; VI-NEXT: s_setpc_b64 s[30:31] ; +; GFX1170-LABEL: v_rsq_clamp_f64_non_ieee: +; GFX1170: ; %bb.0: +; GFX1170-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1170-NEXT: v_rsq_f64_e32 v[0:1], v[0:1] +; GFX1170-NEXT: v_mov_b32_e32 v2, -1 +; GFX1170-NEXT: v_mov_b32_e32 v3, 0x7fefffff +; GFX1170-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_2) | instid1(VALU_DEP_1) +; GFX1170-NEXT: v_min_num_f64 v[0:1], v[0:1], v[2:3] +; GFX1170-NEXT: v_mov_b32_e32 v2, -1 +; GFX1170-NEXT: v_mov_b32_e32 v3, 0xffefffff +; GFX1170-NEXT: v_max_num_f64 v[0:1], v[0:1], v[2:3] +; GFX1170-NEXT: s_setpc_b64 s[30:31] +; ; GFX12-LABEL: v_rsq_clamp_f64_non_ieee: ; GFX12: ; %bb.0: ; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 diff --git a/llvm/test/CodeGen/AMDGPU/fmax3.ll b/llvm/test/CodeGen/AMDGPU/fmax3.ll index c2b44cb251ff..03ab1e3e6921 100644 --- a/llvm/test/CodeGen/AMDGPU/fmax3.ll +++ b/llvm/test/CodeGen/AMDGPU/fmax3.ll @@ -4,6 +4,8 @@ ; RUN: llc -mtriple=amdgcn -mcpu=gfx900 -mattr=-flat-for-global < %s | FileCheck -enable-var-scope -check-prefixes=GFX9 %s ; RUN: llc -mtriple=amdgcn -mcpu=gfx1100 -mattr=+real-true16 -mattr=-flat-for-global < %s | FileCheck -enable-var-scope -check-prefixes=GFX11,GFX11-TRUE16 %s ; RUN: llc -mtriple=amdgcn -mcpu=gfx1100 -mattr=-real-true16 -mattr=-flat-for-global < %s | FileCheck -enable-var-scope -check-prefixes=GFX11,GFX11-FAKE16 %s +; RUN: llc -mtriple=amdgcn -mcpu=gfx1170 -mattr=+real-true16 -mattr=-flat-for-global < %s | FileCheck -enable-var-scope -check-prefixes=GFX1170,GFX1170-TRUE16 %s +; RUN: llc -mtriple=amdgcn -mcpu=gfx1170 -mattr=-real-true16 -mattr=-flat-for-global < %s | FileCheck -enable-var-scope -check-prefixes=GFX1170,GFX1170-FAKE16 %s ; RUN: llc -mtriple=amdgcn -mcpu=gfx1200 -mattr=+real-true16 -mattr=-flat-for-global < %s | FileCheck -enable-var-scope -check-prefixes=GFX12,GFX12-TRUE16 %s ; RUN: llc -mtriple=amdgcn -mcpu=gfx1200 -mattr=-real-true16 -mattr=-flat-for-global < %s | FileCheck -enable-var-scope -check-prefixes=GFX12,GFX12-FAKE16 %s ; RUN: llc -mtriple=amdgcn -mcpu=gfx1250 -mattr=+real-true16 -mattr=-flat-for-global < %s | FileCheck -enable-var-scope -check-prefixes=GFX1250,GFX1250-TRUE16 %s @@ -130,6 +132,36 @@ define amdgpu_kernel void @test_fmax3_olt_0_f32(ptr addrspace(1) %out, ptr addrs ; GFX11-NEXT: buffer_store_b32 v0, off, s[8:11], 0 ; GFX11-NEXT: s_endpgm ; +; GFX1170-LABEL: test_fmax3_olt_0_f32: +; GFX1170: ; %bb.0: +; GFX1170-NEXT: s_load_b256 s[0:7], s[4:5], 0x24 +; GFX1170-NEXT: s_mov_b32 s10, -1 +; GFX1170-NEXT: s_mov_b32 s11, 0x31016000 +; GFX1170-NEXT: s_mov_b32 s14, s10 +; GFX1170-NEXT: s_mov_b32 s15, s11 +; GFX1170-NEXT: s_mov_b32 s18, s10 +; GFX1170-NEXT: s_mov_b32 s19, s11 +; GFX1170-NEXT: s_mov_b32 s22, s10 +; GFX1170-NEXT: s_mov_b32 s23, s11 +; GFX1170-NEXT: s_waitcnt lgkmcnt(0) +; GFX1170-NEXT: s_mov_b32 s12, s2 +; GFX1170-NEXT: s_mov_b32 s13, s3 +; GFX1170-NEXT: s_mov_b32 s16, s4 +; GFX1170-NEXT: s_mov_b32 s17, s5 +; GFX1170-NEXT: s_mov_b32 s20, s6 +; GFX1170-NEXT: s_mov_b32 s21, s7 +; GFX1170-NEXT: buffer_load_b32 v0, off, s[12:15], 0 glc dlc +; GFX1170-NEXT: s_waitcnt vmcnt(0) +; GFX1170-NEXT: buffer_load_b32 v1, off, s[16:19], 0 glc dlc +; GFX1170-NEXT: s_waitcnt vmcnt(0) +; GFX1170-NEXT: buffer_load_b32 v2, off, s[20:23], 0 glc dlc +; GFX1170-NEXT: s_waitcnt vmcnt(0) +; GFX1170-NEXT: s_mov_b32 s8, s0 +; GFX1170-NEXT: s_mov_b32 s9, s1 +; GFX1170-NEXT: v_max3_num_f32 v0, v0, v1, v2 +; GFX1170-NEXT: buffer_store_b32 v0, off, s[8:11], 0 +; GFX1170-NEXT: s_endpgm +; ; GFX12-LABEL: test_fmax3_olt_0_f32: ; GFX12: ; %bb.0: ; GFX12-NEXT: s_load_b256 s[0:7], s[4:5], 0x24 @@ -321,6 +353,36 @@ define amdgpu_kernel void @test_fmax3_olt_1_f32(ptr addrspace(1) %out, ptr addrs ; GFX11-NEXT: buffer_store_b32 v0, off, s[8:11], 0 ; GFX11-NEXT: s_endpgm ; +; GFX1170-LABEL: test_fmax3_olt_1_f32: +; GFX1170: ; %bb.0: +; GFX1170-NEXT: s_load_b256 s[0:7], s[4:5], 0x24 +; GFX1170-NEXT: s_mov_b32 s10, -1 +; GFX1170-NEXT: s_mov_b32 s11, 0x31016000 +; GFX1170-NEXT: s_mov_b32 s14, s10 +; GFX1170-NEXT: s_mov_b32 s15, s11 +; GFX1170-NEXT: s_mov_b32 s18, s10 +; GFX1170-NEXT: s_mov_b32 s19, s11 +; GFX1170-NEXT: s_mov_b32 s22, s10 +; GFX1170-NEXT: s_mov_b32 s23, s11 +; GFX1170-NEXT: s_waitcnt lgkmcnt(0) +; GFX1170-NEXT: s_mov_b32 s12, s2 +; GFX1170-NEXT: s_mov_b32 s13, s3 +; GFX1170-NEXT: s_mov_b32 s16, s4 +; GFX1170-NEXT: s_mov_b32 s17, s5 +; GFX1170-NEXT: s_mov_b32 s20, s6 +; GFX1170-NEXT: s_mov_b32 s21, s7 +; GFX1170-NEXT: buffer_load_b32 v0, off, s[12:15], 0 glc dlc +; GFX1170-NEXT: s_waitcnt vmcnt(0) +; GFX1170-NEXT: buffer_load_b32 v1, off, s[16:19], 0 glc dlc +; GFX1170-NEXT: s_waitcnt vmcnt(0) +; GFX1170-NEXT: buffer_load_b32 v2, off, s[20:23], 0 glc dlc +; GFX1170-NEXT: s_waitcnt vmcnt(0) +; GFX1170-NEXT: s_mov_b32 s8, s0 +; GFX1170-NEXT: s_mov_b32 s9, s1 +; GFX1170-NEXT: v_max3_num_f32 v0, v2, v0, v1 +; GFX1170-NEXT: buffer_store_b32 v0, off, s[8:11], 0 +; GFX1170-NEXT: s_endpgm +; ; GFX12-LABEL: test_fmax3_olt_1_f32: ; GFX12: ; %bb.0: ; GFX12-NEXT: s_load_b256 s[0:7], s[4:5], 0x24 @@ -552,6 +614,66 @@ define amdgpu_kernel void @test_fmax3_olt_0_f16(ptr addrspace(1) %out, ptr addrs ; GFX11-FAKE16-NEXT: buffer_store_b16 v0, off, s[8:11], 0 ; GFX11-FAKE16-NEXT: s_endpgm ; +; GFX1170-TRUE16-LABEL: test_fmax3_olt_0_f16: +; GFX1170-TRUE16: ; %bb.0: +; GFX1170-TRUE16-NEXT: s_load_b256 s[0:7], s[4:5], 0x24 +; GFX1170-TRUE16-NEXT: s_mov_b32 s10, -1 +; GFX1170-TRUE16-NEXT: s_mov_b32 s11, 0x31016000 +; GFX1170-TRUE16-NEXT: s_mov_b32 s14, s10 +; GFX1170-TRUE16-NEXT: s_mov_b32 s15, s11 +; GFX1170-TRUE16-NEXT: s_mov_b32 s18, s10 +; GFX1170-TRUE16-NEXT: s_mov_b32 s19, s11 +; GFX1170-TRUE16-NEXT: s_mov_b32 s22, s10 +; GFX1170-TRUE16-NEXT: s_mov_b32 s23, s11 +; GFX1170-TRUE16-NEXT: s_waitcnt lgkmcnt(0) +; GFX1170-TRUE16-NEXT: s_mov_b32 s12, s2 +; GFX1170-TRUE16-NEXT: s_mov_b32 s13, s3 +; GFX1170-TRUE16-NEXT: s_mov_b32 s16, s4 +; GFX1170-TRUE16-NEXT: s_mov_b32 s17, s5 +; GFX1170-TRUE16-NEXT: s_mov_b32 s20, s6 +; GFX1170-TRUE16-NEXT: s_mov_b32 s21, s7 +; GFX1170-TRUE16-NEXT: buffer_load_d16_b16 v0, off, s[12:15], 0 glc dlc +; GFX1170-TRUE16-NEXT: s_waitcnt vmcnt(0) +; GFX1170-TRUE16-NEXT: buffer_load_d16_hi_b16 v0, off, s[16:19], 0 glc dlc +; GFX1170-TRUE16-NEXT: s_waitcnt vmcnt(0) +; GFX1170-TRUE16-NEXT: buffer_load_d16_b16 v1, off, s[20:23], 0 glc dlc +; GFX1170-TRUE16-NEXT: s_waitcnt vmcnt(0) +; GFX1170-TRUE16-NEXT: s_mov_b32 s8, s0 +; GFX1170-TRUE16-NEXT: s_mov_b32 s9, s1 +; GFX1170-TRUE16-NEXT: v_max3_num_f16 v0.l, v0.l, v0.h, v1.l +; GFX1170-TRUE16-NEXT: buffer_store_b16 v0, off, s[8:11], 0 +; GFX1170-TRUE16-NEXT: s_endpgm +; +; GFX1170-FAKE16-LABEL: test_fmax3_olt_0_f16: +; GFX1170-FAKE16: ; %bb.0: +; GFX1170-FAKE16-NEXT: s_load_b256 s[0:7], s[4:5], 0x24 +; GFX1170-FAKE16-NEXT: s_mov_b32 s10, -1 +; GFX1170-FAKE16-NEXT: s_mov_b32 s11, 0x31016000 +; GFX1170-FAKE16-NEXT: s_mov_b32 s14, s10 +; GFX1170-FAKE16-NEXT: s_mov_b32 s15, s11 +; GFX1170-FAKE16-NEXT: s_mov_b32 s18, s10 +; GFX1170-FAKE16-NEXT: s_mov_b32 s19, s11 +; GFX1170-FAKE16-NEXT: s_mov_b32 s22, s10 +; GFX1170-FAKE16-NEXT: s_mov_b32 s23, s11 +; GFX1170-FAKE16-NEXT: s_waitcnt lgkmcnt(0) +; GFX1170-FAKE16-NEXT: s_mov_b32 s12, s2 +; GFX1170-FAKE16-NEXT: s_mov_b32 s13, s3 +; GFX1170-FAKE16-NEXT: s_mov_b32 s16, s4 +; GFX1170-FAKE16-NEXT: s_mov_b32 s17, s5 +; GFX1170-FAKE16-NEXT: s_mov_b32 s20, s6 +; GFX1170-FAKE16-NEXT: s_mov_b32 s21, s7 +; GFX1170-FAKE16-NEXT: buffer_load_u16 v0, off, s[12:15], 0 glc dlc +; GFX1170-FAKE16-NEXT: s_waitcnt vmcnt(0) +; GFX1170-FAKE16-NEXT: buffer_load_u16 v1, off, s[16:19], 0 glc dlc +; GFX1170-FAKE16-NEXT: s_waitcnt vmcnt(0) +; GFX1170-FAKE16-NEXT: buffer_load_u16 v2, off, s[20:23], 0 glc dlc +; GFX1170-FAKE16-NEXT: s_waitcnt vmcnt(0) +; GFX1170-FAKE16-NEXT: s_mov_b32 s8, s0 +; GFX1170-FAKE16-NEXT: s_mov_b32 s9, s1 +; GFX1170-FAKE16-NEXT: v_max3_num_f16 v0, v0, v1, v2 +; GFX1170-FAKE16-NEXT: buffer_store_b16 v0, off, s[8:11], 0 +; GFX1170-FAKE16-NEXT: s_endpgm +; ; GFX12-TRUE16-LABEL: test_fmax3_olt_0_f16: ; GFX12-TRUE16: ; %bb.0: ; GFX12-TRUE16-NEXT: s_load_b256 s[0:7], s[4:5], 0x24 @@ -845,6 +967,66 @@ define amdgpu_kernel void @test_fmax3_olt_1_f16(ptr addrspace(1) %out, ptr addrs ; GFX11-FAKE16-NEXT: buffer_store_b16 v0, off, s[8:11], 0 ; GFX11-FAKE16-NEXT: s_endpgm ; +; GFX1170-TRUE16-LABEL: test_fmax3_olt_1_f16: +; GFX1170-TRUE16: ; %bb.0: +; GFX1170-TRUE16-NEXT: s_load_b256 s[0:7], s[4:5], 0x24 +; GFX1170-TRUE16-NEXT: s_mov_b32 s10, -1 +; GFX1170-TRUE16-NEXT: s_mov_b32 s11, 0x31016000 +; GFX1170-TRUE16-NEXT: s_mov_b32 s14, s10 +; GFX1170-TRUE16-NEXT: s_mov_b32 s15, s11 +; GFX1170-TRUE16-NEXT: s_mov_b32 s18, s10 +; GFX1170-TRUE16-NEXT: s_mov_b32 s19, s11 +; GFX1170-TRUE16-NEXT: s_mov_b32 s22, s10 +; GFX1170-TRUE16-NEXT: s_mov_b32 s23, s11 +; GFX1170-TRUE16-NEXT: s_waitcnt lgkmcnt(0) +; GFX1170-TRUE16-NEXT: s_mov_b32 s12, s2 +; GFX1170-TRUE16-NEXT: s_mov_b32 s13, s3 +; GFX1170-TRUE16-NEXT: s_mov_b32 s16, s4 +; GFX1170-TRUE16-NEXT: s_mov_b32 s17, s5 +; GFX1170-TRUE16-NEXT: s_mov_b32 s20, s6 +; GFX1170-TRUE16-NEXT: s_mov_b32 s21, s7 +; GFX1170-TRUE16-NEXT: buffer_load_d16_b16 v0, off, s[12:15], 0 glc dlc +; GFX1170-TRUE16-NEXT: s_waitcnt vmcnt(0) +; GFX1170-TRUE16-NEXT: buffer_load_d16_hi_b16 v0, off, s[16:19], 0 glc dlc +; GFX1170-TRUE16-NEXT: s_waitcnt vmcnt(0) +; GFX1170-TRUE16-NEXT: buffer_load_d16_b16 v1, off, s[20:23], 0 glc dlc +; GFX1170-TRUE16-NEXT: s_waitcnt vmcnt(0) +; GFX1170-TRUE16-NEXT: s_mov_b32 s8, s0 +; GFX1170-TRUE16-NEXT: s_mov_b32 s9, s1 +; GFX1170-TRUE16-NEXT: v_max3_num_f16 v0.l, v1.l, v0.l, v0.h +; GFX1170-TRUE16-NEXT: buffer_store_b16 v0, off, s[8:11], 0 +; GFX1170-TRUE16-NEXT: s_endpgm +; +; GFX1170-FAKE16-LABEL: test_fmax3_olt_1_f16: +; GFX1170-FAKE16: ; %bb.0: +; GFX1170-FAKE16-NEXT: s_load_b256 s[0:7], s[4:5], 0x24 +; GFX1170-FAKE16-NEXT: s_mov_b32 s10, -1 +; GFX1170-FAKE16-NEXT: s_mov_b32 s11, 0x31016000 +; GFX1170-FAKE16-NEXT: s_mov_b32 s14, s10 +; GFX1170-FAKE16-NEXT: s_mov_b32 s15, s11 +; GFX1170-FAKE16-NEXT: s_mov_b32 s18, s10 +; GFX1170-FAKE16-NEXT: s_mov_b32 s19, s11 +; GFX1170-FAKE16-NEXT: s_mov_b32 s22, s10 +; GFX1170-FAKE16-NEXT: s_mov_b32 s23, s11 +; GFX1170-FAKE16-NEXT: s_waitcnt lgkmcnt(0) +; GFX1170-FAKE16-NEXT: s_mov_b32 s12, s2 +; GFX1170-FAKE16-NEXT: s_mov_b32 s13, s3 +; GFX1170-FAKE16-NEXT: s_mov_b32 s16, s4 +; GFX1170-FAKE16-NEXT: s_mov_b32 s17, s5 +; GFX1170-FAKE16-NEXT: s_mov_b32 s20, s6 +; GFX1170-FAKE16-NEXT: s_mov_b32 s21, s7 +; GFX1170-FAKE16-NEXT: buffer_load_u16 v0, off, s[12:15], 0 glc dlc +; GFX1170-FAKE16-NEXT: s_waitcnt vmcnt(0) +; GFX1170-FAKE16-NEXT: buffer_load_u16 v1, off, s[16:19], 0 glc dlc +; GFX1170-FAKE16-NEXT: s_waitcnt vmcnt(0) +; GFX1170-FAKE16-NEXT: buffer_load_u16 v2, off, s[20:23], 0 glc dlc +; GFX1170-FAKE16-NEXT: s_waitcnt vmcnt(0) +; GFX1170-FAKE16-NEXT: s_mov_b32 s8, s0 +; GFX1170-FAKE16-NEXT: s_mov_b32 s9, s1 +; GFX1170-FAKE16-NEXT: v_max3_num_f16 v0, v2, v0, v1 +; GFX1170-FAKE16-NEXT: buffer_store_b16 v0, off, s[8:11], 0 +; GFX1170-FAKE16-NEXT: s_endpgm +; ; GFX12-TRUE16-LABEL: test_fmax3_olt_1_f16: ; GFX12-TRUE16: ; %bb.0: ; GFX12-TRUE16-NEXT: s_load_b256 s[0:7], s[4:5], 0x24 @@ -1042,6 +1224,15 @@ define <2 x half> @no_fmax3_v2f16(<2 x half> %a, <2 x half> %b, <2 x half> %c, < ; GFX11-NEXT: v_pk_max_f16 v0, v0, v3 ; GFX11-NEXT: s_setpc_b64 s[30:31] ; +; GFX1170-LABEL: no_fmax3_v2f16: +; GFX1170: ; %bb.0: ; %entry +; GFX1170-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1170-NEXT: v_pk_max_num_f16 v0, v0, v1 +; GFX1170-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX1170-NEXT: v_pk_max_num_f16 v0, v2, v0 +; GFX1170-NEXT: v_pk_max_num_f16 v0, v0, v3 +; GFX1170-NEXT: s_setpc_b64 s[30:31] +; ; GFX12-LABEL: no_fmax3_v2f16: ; GFX12: ; %bb.0: ; %entry ; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 diff --git a/llvm/test/CodeGen/AMDGPU/fmaximum.ll b/llvm/test/CodeGen/AMDGPU/fmaximum.ll index 471829186841..ea1abbdf3b1c 100644 --- a/llvm/test/CodeGen/AMDGPU/fmaximum.ll +++ b/llvm/test/CodeGen/AMDGPU/fmaximum.ll @@ -1,6 +1,10 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx900 < %s | FileCheck -enable-var-scope -check-prefixes=GFX9,GFX9-SDAG %s ; RUN: llc -global-isel=1 -new-reg-bank-select -mtriple=amdgcn -mcpu=gfx900 < %s | FileCheck -enable-var-scope -check-prefixes=GFX9,GFX9-GISEL %s +; RUN: llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx1170 -mattr=+real-true16 < %s | FileCheck -enable-var-scope -check-prefixes=GFX1170,GFX1170-SDAG,GFX1170-SDAG-TRUE16 %s +; RUN: llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx1170 -mattr=-real-true16 < %s | FileCheck -enable-var-scope -check-prefixes=GFX1170,GFX1170-SDAG,GFX1170-SDAG-FAKE16 %s +; RUN: llc -global-isel=1 -new-reg-bank-select -mtriple=amdgcn -mcpu=gfx1170 -mattr=+real-true16 < %s | FileCheck -enable-var-scope -check-prefixes=GFX1170,GFX1170-GISEL,GFX1170-GISEL-TRUE16 %s +; RUN: llc -global-isel=1 -new-reg-bank-select -mtriple=amdgcn -mcpu=gfx1170 -mattr=-real-true16 < %s | FileCheck -enable-var-scope -check-prefixes=GFX1170,GFX1170-GISEL,GFX1170-GISEL-FAKE16 %s ; RUN: llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx1200 -mattr=+real-true16 < %s | FileCheck -enable-var-scope -check-prefixes=GFX12,GFX12-SDAG,GFX12-SDAG-TRUE16 %s ; RUN: llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx1200 -mattr=-real-true16 < %s | FileCheck -enable-var-scope -check-prefixes=GFX12,GFX12-SDAG,GFX12-SDAG-FAKE16 %s ; RUN: llc -global-isel=1 -new-reg-bank-select -mtriple=amdgcn -mcpu=gfx1200 -mattr=+real-true16 < %s | FileCheck -enable-var-scope -check-prefixes=GFX12,GFX12-GISEL,GFX12-GISEL-TRUE16 %s @@ -15,6 +19,11 @@ define amdgpu_ps float @test_fmaximum_f32_vv(float %a, float %b) { ; GFX9-NEXT: v_cndmask_b32_e32 v0, v3, v2, vcc ; GFX9-NEXT: ; return to shader part epilog ; +; GFX1170-LABEL: test_fmaximum_f32_vv: +; GFX1170: ; %bb.0: +; GFX1170-NEXT: v_maximum_f32 v0, v0, v1 +; GFX1170-NEXT: ; return to shader part epilog +; ; GFX12-LABEL: test_fmaximum_f32_vv: ; GFX12: ; %bb.0: ; GFX12-NEXT: v_maximum_f32 v0, v0, v1 @@ -44,6 +53,11 @@ define amdgpu_ps float @test_fmaximum_f32_ss(float inreg %a, float inreg %b) { ; GFX9-GISEL-NEXT: v_mov_b32_e32 v0, s0 ; GFX9-GISEL-NEXT: ; return to shader part epilog ; +; GFX1170-LABEL: test_fmaximum_f32_ss: +; GFX1170: ; %bb.0: +; GFX1170-NEXT: v_maximum_f32 v0, s0, s1 +; GFX1170-NEXT: ; return to shader part epilog +; ; GFX12-LABEL: test_fmaximum_f32_ss: ; GFX12: ; %bb.0: ; GFX12-NEXT: s_maximum_f32 s0, s0, s1 @@ -63,6 +77,11 @@ define amdgpu_ps float @test_fmaximum_f32_vs(float %a, float inreg %b) { ; GFX9-NEXT: v_cndmask_b32_e32 v0, v2, v1, vcc ; GFX9-NEXT: ; return to shader part epilog ; +; GFX1170-LABEL: test_fmaximum_f32_vs: +; GFX1170: ; %bb.0: +; GFX1170-NEXT: v_maximum_f32 v0, v0, s0 +; GFX1170-NEXT: ; return to shader part epilog +; ; GFX12-LABEL: test_fmaximum_f32_vs: ; GFX12: ; %bb.0: ; GFX12-NEXT: v_maximum_f32 v0, v0, s0 @@ -77,6 +96,11 @@ define amdgpu_ps float @test_fmaximum_nnan_f32(float %a, float %b) { ; GFX9-NEXT: v_max_f32_e32 v0, v0, v1 ; GFX9-NEXT: ; return to shader part epilog ; +; GFX1170-LABEL: test_fmaximum_nnan_f32: +; GFX1170: ; %bb.0: +; GFX1170-NEXT: v_maximum_f32 v0, v0, v1 +; GFX1170-NEXT: ; return to shader part epilog +; ; GFX12-LABEL: test_fmaximum_nnan_f32: ; GFX12: ; %bb.0: ; GFX12-NEXT: v_maximum_f32 v0, v0, v1 @@ -94,6 +118,11 @@ define amdgpu_ps float @test_fmaximum_nsz_f32(float %a, float %b) { ; GFX9-NEXT: v_cndmask_b32_e32 v0, v3, v2, vcc ; GFX9-NEXT: ; return to shader part epilog ; +; GFX1170-LABEL: test_fmaximum_nsz_f32: +; GFX1170: ; %bb.0: +; GFX1170-NEXT: v_maximum_f32 v0, v0, v1 +; GFX1170-NEXT: ; return to shader part epilog +; ; GFX12-LABEL: test_fmaximum_nsz_f32: ; GFX12: ; %bb.0: ; GFX12-NEXT: v_maximum_f32 v0, v0, v1 @@ -108,6 +137,11 @@ define amdgpu_ps float @test_fmaximum_signed_zero_f32() { ; GFX9-NEXT: v_mov_b32_e32 v0, 0 ; GFX9-NEXT: ; return to shader part epilog ; +; GFX1170-LABEL: test_fmaximum_signed_zero_f32: +; GFX1170: ; %bb.0: +; GFX1170-NEXT: v_mov_b32_e32 v0, 0 +; GFX1170-NEXT: ; return to shader part epilog +; ; GFX12-LABEL: test_fmaximum_signed_zero_f32: ; GFX12: ; %bb.0: ; GFX12-NEXT: v_mov_b32_e32 v0, 0 @@ -128,6 +162,12 @@ define amdgpu_ps <2 x float> @test_fmaximum_v2f32(<2 x float> %a, <2 x float> %b ; GFX9-NEXT: v_cndmask_b32_e32 v1, v5, v2, vcc ; GFX9-NEXT: ; return to shader part epilog ; +; GFX1170-LABEL: test_fmaximum_v2f32: +; GFX1170: ; %bb.0: +; GFX1170-NEXT: v_maximum_f32 v0, v0, v2 +; GFX1170-NEXT: v_maximum_f32 v1, v1, v3 +; GFX1170-NEXT: ; return to shader part epilog +; ; GFX12-LABEL: test_fmaximum_v2f32: ; GFX12: ; %bb.0: ; GFX12-NEXT: v_maximum_f32 v0, v0, v2 @@ -169,6 +209,12 @@ define amdgpu_ps <2 x float> @test_fmaximum_v2f32_ss(<2 x float> inreg %a, <2 x ; GFX9-GISEL-NEXT: v_mov_b32_e32 v1, s1 ; GFX9-GISEL-NEXT: ; return to shader part epilog ; +; GFX1170-LABEL: test_fmaximum_v2f32_ss: +; GFX1170: ; %bb.0: +; GFX1170-NEXT: v_maximum_f32 v0, s0, s2 +; GFX1170-NEXT: v_maximum_f32 v1, s1, s3 +; GFX1170-NEXT: ; return to shader part epilog +; ; GFX12-LABEL: test_fmaximum_v2f32_ss: ; GFX12: ; %bb.0: ; GFX12-NEXT: s_maximum_f32 s0, s0, s2 @@ -195,6 +241,13 @@ define amdgpu_ps <3 x float> @test_fmaximum_v3f32(<3 x float> %a, <3 x float> %b ; GFX9-NEXT: v_cndmask_b32_e32 v2, v7, v3, vcc ; GFX9-NEXT: ; return to shader part epilog ; +; GFX1170-LABEL: test_fmaximum_v3f32: +; GFX1170: ; %bb.0: +; GFX1170-NEXT: v_maximum_f32 v0, v0, v3 +; GFX1170-NEXT: v_maximum_f32 v1, v1, v4 +; GFX1170-NEXT: v_maximum_f32 v2, v2, v5 +; GFX1170-NEXT: ; return to shader part epilog +; ; GFX12-LABEL: test_fmaximum_v3f32: ; GFX12: ; %bb.0: ; GFX12-NEXT: v_maximum_f32 v0, v0, v3 @@ -223,6 +276,14 @@ define amdgpu_ps <4 x float> @test_fmaximum_v4f32(<4 x float> %a, <4 x float> %b ; GFX9-NEXT: v_cndmask_b32_e32 v3, v9, v4, vcc ; GFX9-NEXT: ; return to shader part epilog ; +; GFX1170-LABEL: test_fmaximum_v4f32: +; GFX1170: ; %bb.0: +; GFX1170-NEXT: v_maximum_f32 v0, v0, v4 +; GFX1170-NEXT: v_maximum_f32 v1, v1, v5 +; GFX1170-NEXT: v_maximum_f32 v2, v2, v6 +; GFX1170-NEXT: v_maximum_f32 v3, v3, v7 +; GFX1170-NEXT: ; return to shader part epilog +; ; GFX12-LABEL: test_fmaximum_v4f32: ; GFX12: ; %bb.0: ; GFX12-NEXT: v_maximum_f32 v0, v0, v4 @@ -288,6 +349,26 @@ define amdgpu_ps <16 x float> @test_fmaximum_v16f32(<16 x float> %a, <16 x float ; GFX9-NEXT: v_cndmask_b32_e32 v15, v33, v16, vcc ; GFX9-NEXT: ; return to shader part epilog ; +; GFX1170-LABEL: test_fmaximum_v16f32: +; GFX1170: ; %bb.0: +; GFX1170-NEXT: v_maximum_f32 v0, v0, v16 +; GFX1170-NEXT: v_maximum_f32 v1, v1, v17 +; GFX1170-NEXT: v_maximum_f32 v2, v2, v18 +; GFX1170-NEXT: v_maximum_f32 v3, v3, v19 +; GFX1170-NEXT: v_maximum_f32 v4, v4, v20 +; GFX1170-NEXT: v_maximum_f32 v5, v5, v21 +; GFX1170-NEXT: v_maximum_f32 v6, v6, v22 +; GFX1170-NEXT: v_maximum_f32 v7, v7, v23 +; GFX1170-NEXT: v_maximum_f32 v8, v8, v24 +; GFX1170-NEXT: v_maximum_f32 v9, v9, v25 +; GFX1170-NEXT: v_maximum_f32 v10, v10, v26 +; GFX1170-NEXT: v_maximum_f32 v11, v11, v27 +; GFX1170-NEXT: v_maximum_f32 v12, v12, v28 +; GFX1170-NEXT: v_maximum_f32 v13, v13, v29 +; GFX1170-NEXT: v_maximum_f32 v14, v14, v30 +; GFX1170-NEXT: v_maximum_f32 v15, v15, v31 +; GFX1170-NEXT: ; return to shader part epilog +; ; GFX12-LABEL: test_fmaximum_v16f32: ; GFX12: ; %bb.0: ; GFX12-NEXT: v_maximum_f32 v0, v0, v16 @@ -320,6 +401,26 @@ define amdgpu_ps half @test_fmaximum_f16_vv(half %a, half %b) { ; GFX9-NEXT: v_cndmask_b32_e32 v0, v3, v2, vcc ; GFX9-NEXT: ; return to shader part epilog ; +; GFX1170-SDAG-TRUE16-LABEL: test_fmaximum_f16_vv: +; GFX1170-SDAG-TRUE16: ; %bb.0: +; GFX1170-SDAG-TRUE16-NEXT: v_maximum_f16 v0.l, v0.l, v1.l +; GFX1170-SDAG-TRUE16-NEXT: ; return to shader part epilog +; +; GFX1170-SDAG-FAKE16-LABEL: test_fmaximum_f16_vv: +; GFX1170-SDAG-FAKE16: ; %bb.0: +; GFX1170-SDAG-FAKE16-NEXT: v_maximum_f16 v0, v0, v1 +; GFX1170-SDAG-FAKE16-NEXT: ; return to shader part epilog +; +; GFX1170-GISEL-TRUE16-LABEL: test_fmaximum_f16_vv: +; GFX1170-GISEL-TRUE16: ; %bb.0: +; GFX1170-GISEL-TRUE16-NEXT: v_maximum_f16 v0.l, v0.l, v1.l +; GFX1170-GISEL-TRUE16-NEXT: ; return to shader part epilog +; +; GFX1170-GISEL-FAKE16-LABEL: test_fmaximum_f16_vv: +; GFX1170-GISEL-FAKE16: ; %bb.0: +; GFX1170-GISEL-FAKE16-NEXT: v_maximum_f16 v0, v0, v1 +; GFX1170-GISEL-FAKE16-NEXT: ; return to shader part epilog +; ; GFX12-SDAG-TRUE16-LABEL: test_fmaximum_f16_vv: ; GFX12-SDAG-TRUE16: ; %bb.0: ; GFX12-SDAG-TRUE16-NEXT: v_maximum_f16 v0.l, v0.l, v1.l @@ -364,6 +465,26 @@ define amdgpu_ps half @test_fmaximum_f16_ss(half inreg %a, half inreg %b) { ; GFX9-GISEL-NEXT: v_mov_b32_e32 v0, s0 ; GFX9-GISEL-NEXT: ; return to shader part epilog ; +; GFX1170-SDAG-TRUE16-LABEL: test_fmaximum_f16_ss: +; GFX1170-SDAG-TRUE16: ; %bb.0: +; GFX1170-SDAG-TRUE16-NEXT: v_maximum_f16 v0.l, s0, s1 +; GFX1170-SDAG-TRUE16-NEXT: ; return to shader part epilog +; +; GFX1170-SDAG-FAKE16-LABEL: test_fmaximum_f16_ss: +; GFX1170-SDAG-FAKE16: ; %bb.0: +; GFX1170-SDAG-FAKE16-NEXT: v_maximum_f16 v0, s0, s1 +; GFX1170-SDAG-FAKE16-NEXT: ; return to shader part epilog +; +; GFX1170-GISEL-TRUE16-LABEL: test_fmaximum_f16_ss: +; GFX1170-GISEL-TRUE16: ; %bb.0: +; GFX1170-GISEL-TRUE16-NEXT: v_maximum_f16 v0.l, s0, s1 +; GFX1170-GISEL-TRUE16-NEXT: ; return to shader part epilog +; +; GFX1170-GISEL-FAKE16-LABEL: test_fmaximum_f16_ss: +; GFX1170-GISEL-FAKE16: ; %bb.0: +; GFX1170-GISEL-FAKE16-NEXT: v_maximum_f16 v0, s0, s1 +; GFX1170-GISEL-FAKE16-NEXT: ; return to shader part epilog +; ; GFX12-LABEL: test_fmaximum_f16_ss: ; GFX12: ; %bb.0: ; GFX12-NEXT: s_maximum_f16 s0, s0, s1 @@ -399,6 +520,11 @@ define amdgpu_ps <2 x half> @test_fmaximum_v2f16_vv(<2 x half> %a, <2 x half> %b ; GFX9-GISEL-NEXT: v_lshl_or_b32 v0, v1, 16, v0 ; GFX9-GISEL-NEXT: ; return to shader part epilog ; +; GFX1170-LABEL: test_fmaximum_v2f16_vv: +; GFX1170: ; %bb.0: +; GFX1170-NEXT: v_pk_maximum_f16 v0, v0, v1 +; GFX1170-NEXT: ; return to shader part epilog +; ; GFX12-LABEL: test_fmaximum_v2f16_vv: ; GFX12: ; %bb.0: ; GFX12-NEXT: v_pk_maximum_f16 v0, v0, v1 @@ -448,6 +574,11 @@ define amdgpu_ps <2 x half> @test_fmaximum_v2f16_ss(<2 x half> inreg %a, <2 x ha ; GFX9-GISEL-NEXT: v_mov_b32_e32 v0, s0 ; GFX9-GISEL-NEXT: ; return to shader part epilog ; +; GFX1170-LABEL: test_fmaximum_v2f16_ss: +; GFX1170: ; %bb.0: +; GFX1170-NEXT: v_pk_maximum_f16 v0, s0, s1 +; GFX1170-NEXT: ; return to shader part epilog +; ; GFX12-LABEL: test_fmaximum_v2f16_ss: ; GFX12: ; %bb.0: ; GFX12-NEXT: v_pk_maximum_f16 v0, s0, s1 @@ -490,6 +621,27 @@ define amdgpu_ps <3 x half> @test_fmaximum_v3f16_vv(<3 x half> %a, <3 x half> %b ; GFX9-GISEL-NEXT: v_lshl_or_b32 v1, s0, 16, v1 ; GFX9-GISEL-NEXT: ; return to shader part epilog ; +; GFX1170-SDAG-LABEL: test_fmaximum_v3f16_vv: +; GFX1170-SDAG: ; %bb.0: +; GFX1170-SDAG-NEXT: v_pk_maximum_f16 v0, v0, v2 +; GFX1170-SDAG-NEXT: v_pk_maximum_f16 v1, v1, v3 +; GFX1170-SDAG-NEXT: ; return to shader part epilog +; +; GFX1170-GISEL-TRUE16-LABEL: test_fmaximum_v3f16_vv: +; GFX1170-GISEL-TRUE16: ; %bb.0: +; GFX1170-GISEL-TRUE16-NEXT: v_pk_maximum_f16 v0, v0, v2 +; GFX1170-GISEL-TRUE16-NEXT: v_maximum_f16 v1.l, v1.l, v3.l +; GFX1170-GISEL-TRUE16-NEXT: ; return to shader part epilog +; +; GFX1170-GISEL-FAKE16-LABEL: test_fmaximum_v3f16_vv: +; GFX1170-GISEL-FAKE16: ; %bb.0: +; GFX1170-GISEL-FAKE16-NEXT: v_maximum_f16 v1, v1, v3 +; GFX1170-GISEL-FAKE16-NEXT: v_pk_maximum_f16 v0, v0, v2 +; GFX1170-GISEL-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX1170-GISEL-FAKE16-NEXT: v_and_b32_e32 v1, 0xffff, v1 +; GFX1170-GISEL-FAKE16-NEXT: v_lshl_or_b32 v1, s0, 16, v1 +; GFX1170-GISEL-FAKE16-NEXT: ; return to shader part epilog +; ; GFX12-SDAG-LABEL: test_fmaximum_v3f16_vv: ; GFX12-SDAG: ; %bb.0: ; GFX12-SDAG-NEXT: v_pk_maximum_f16 v0, v0, v2 @@ -567,6 +719,30 @@ define amdgpu_ps <3 x half> @test_fmaximum_v3f16_ss(<3 x half> inreg %a, <3 x ha ; GFX9-GISEL-NEXT: v_mov_b32_e32 v1, s1 ; GFX9-GISEL-NEXT: ; return to shader part epilog ; +; GFX1170-SDAG-LABEL: test_fmaximum_v3f16_ss: +; GFX1170-SDAG: ; %bb.0: +; GFX1170-SDAG-NEXT: v_pk_maximum_f16 v0, s0, s2 +; GFX1170-SDAG-NEXT: v_pk_maximum_f16 v1, s1, s3 +; GFX1170-SDAG-NEXT: ; return to shader part epilog +; +; GFX1170-GISEL-TRUE16-LABEL: test_fmaximum_v3f16_ss: +; GFX1170-GISEL-TRUE16: ; %bb.0: +; GFX1170-GISEL-TRUE16-NEXT: v_maximum_f16 v1.l, s1, s3 +; GFX1170-GISEL-TRUE16-NEXT: v_pk_maximum_f16 v0, s0, s2 +; GFX1170-GISEL-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX1170-GISEL-TRUE16-NEXT: v_readfirstlane_b32 s0, v1 +; GFX1170-GISEL-TRUE16-NEXT: v_mov_b32_e32 v1, s0 +; GFX1170-GISEL-TRUE16-NEXT: ; return to shader part epilog +; +; GFX1170-GISEL-FAKE16-LABEL: test_fmaximum_v3f16_ss: +; GFX1170-GISEL-FAKE16: ; %bb.0: +; GFX1170-GISEL-FAKE16-NEXT: v_maximum_f16 v1, s1, s3 +; GFX1170-GISEL-FAKE16-NEXT: v_pk_maximum_f16 v0, s0, s2 +; GFX1170-GISEL-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX1170-GISEL-FAKE16-NEXT: v_readfirstlane_b32 s0, v1 +; GFX1170-GISEL-FAKE16-NEXT: v_mov_b32_e32 v1, s0 +; GFX1170-GISEL-FAKE16-NEXT: ; return to shader part epilog +; ; GFX12-SDAG-LABEL: test_fmaximum_v3f16_ss: ; GFX12-SDAG: ; %bb.0: ; GFX12-SDAG-NEXT: v_pk_maximum_f16 v0, s0, s2 @@ -624,6 +800,12 @@ define amdgpu_ps <4 x half> @test_fmaximum_v4f16(<4 x half> %a, <4 x half> %b) { ; GFX9-GISEL-NEXT: v_lshl_or_b32 v1, v2, 16, v1 ; GFX9-GISEL-NEXT: ; return to shader part epilog ; +; GFX1170-LABEL: test_fmaximum_v4f16: +; GFX1170: ; %bb.0: +; GFX1170-NEXT: v_pk_maximum_f16 v0, v0, v2 +; GFX1170-NEXT: v_pk_maximum_f16 v1, v1, v3 +; GFX1170-NEXT: ; return to shader part epilog +; ; GFX12-LABEL: test_fmaximum_v4f16: ; GFX12: ; %bb.0: ; GFX12-NEXT: v_pk_maximum_f16 v0, v0, v2 @@ -706,6 +888,12 @@ define amdgpu_ps <4 x half> @test_fmaximum_v4f16_ss(<4 x half> inreg %a, <4 x ha ; GFX9-GISEL-NEXT: v_mov_b32_e32 v1, s1 ; GFX9-GISEL-NEXT: ; return to shader part epilog ; +; GFX1170-LABEL: test_fmaximum_v4f16_ss: +; GFX1170: ; %bb.0: +; GFX1170-NEXT: v_pk_maximum_f16 v0, s0, s2 +; GFX1170-NEXT: v_pk_maximum_f16 v1, s1, s3 +; GFX1170-NEXT: ; return to shader part epilog +; ; GFX12-LABEL: test_fmaximum_v4f16_ss: ; GFX12: ; %bb.0: ; GFX12-NEXT: v_pk_maximum_f16 v0, s0, s2 @@ -734,6 +922,11 @@ define amdgpu_ps <2 x float> @test_fmaximum_f64_vv(double %a, double %b) { ; GFX9-GISEL-NEXT: v_cndmask_b32_e32 v1, v1, v5, vcc ; GFX9-GISEL-NEXT: ; return to shader part epilog ; +; GFX1170-LABEL: test_fmaximum_f64_vv: +; GFX1170: ; %bb.0: +; GFX1170-NEXT: v_maximum_f64 v[0:1], v[0:1], v[2:3] +; GFX1170-NEXT: ; return to shader part epilog +; ; GFX12-LABEL: test_fmaximum_f64_vv: ; GFX12: ; %bb.0: ; GFX12-NEXT: v_maximum_f64 v[0:1], v[0:1], v[2:3] @@ -771,6 +964,21 @@ define amdgpu_ps <2 x float> @test_fmaximum_f64_ss(double inreg %a, double inreg ; GFX9-GISEL-NEXT: v_mov_b32_e32 v1, s1 ; GFX9-GISEL-NEXT: ; return to shader part epilog ; +; GFX1170-SDAG-LABEL: test_fmaximum_f64_ss: +; GFX1170-SDAG: ; %bb.0: +; GFX1170-SDAG-NEXT: v_maximum_f64 v[0:1], s[0:1], s[2:3] +; GFX1170-SDAG-NEXT: ; return to shader part epilog +; +; GFX1170-GISEL-LABEL: test_fmaximum_f64_ss: +; GFX1170-GISEL: ; %bb.0: +; GFX1170-GISEL-NEXT: v_maximum_f64 v[0:1], s[0:1], s[2:3] +; GFX1170-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_2) +; GFX1170-GISEL-NEXT: v_readfirstlane_b32 s0, v0 +; GFX1170-GISEL-NEXT: v_readfirstlane_b32 s1, v1 +; GFX1170-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX1170-GISEL-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1 +; GFX1170-GISEL-NEXT: ; return to shader part epilog +; ; GFX12-SDAG-LABEL: test_fmaximum_f64_ss: ; GFX12-SDAG: ; %bb.0: ; GFX12-SDAG-NEXT: v_maximum_f64 v[0:1], s[0:1], s[2:3] @@ -835,6 +1043,27 @@ define amdgpu_ps <4 x float> @test_fmaximum_v2f64_ss(<2 x double> inreg %a, <2 x ; GFX9-GISEL-NEXT: v_mov_b32_e32 v3, s1 ; GFX9-GISEL-NEXT: ; return to shader part epilog ; +; GFX1170-SDAG-LABEL: test_fmaximum_v2f64_ss: +; GFX1170-SDAG: ; %bb.0: +; GFX1170-SDAG-NEXT: v_maximum_f64 v[0:1], s[0:1], s[4:5] +; GFX1170-SDAG-NEXT: v_maximum_f64 v[2:3], s[2:3], s[6:7] +; GFX1170-SDAG-NEXT: ; return to shader part epilog +; +; GFX1170-GISEL-LABEL: test_fmaximum_v2f64_ss: +; GFX1170-GISEL: ; %bb.0: +; GFX1170-GISEL-NEXT: v_maximum_f64 v[0:1], s[0:1], s[4:5] +; GFX1170-GISEL-NEXT: v_maximum_f64 v[2:3], s[2:3], s[6:7] +; GFX1170-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_3) +; GFX1170-GISEL-NEXT: v_readfirstlane_b32 s0, v0 +; GFX1170-GISEL-NEXT: v_readfirstlane_b32 s1, v1 +; GFX1170-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_4) +; GFX1170-GISEL-NEXT: v_readfirstlane_b32 s2, v2 +; GFX1170-GISEL-NEXT: v_readfirstlane_b32 s3, v3 +; GFX1170-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2) +; GFX1170-GISEL-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1 +; GFX1170-GISEL-NEXT: v_dual_mov_b32 v2, s2 :: v_dual_mov_b32 v3, s3 +; GFX1170-GISEL-NEXT: ; return to shader part epilog +; ; GFX12-SDAG-LABEL: test_fmaximum_v2f64_ss: ; GFX12-SDAG: ; %bb.0: ; GFX12-SDAG-NEXT: v_maximum_f64 v[0:1], s[0:1], s[4:5] @@ -904,6 +1133,14 @@ define amdgpu_ps <8 x float> @test_fmaximum_v4f64(<4 x double> %a, <4 x double> ; GFX9-GISEL-NEXT: v_cndmask_b32_e64 v7, v18, v13, s[4:5] ; GFX9-GISEL-NEXT: ; return to shader part epilog ; +; GFX1170-LABEL: test_fmaximum_v4f64: +; GFX1170: ; %bb.0: +; GFX1170-NEXT: v_maximum_f64 v[0:1], v[0:1], v[8:9] +; GFX1170-NEXT: v_maximum_f64 v[2:3], v[2:3], v[10:11] +; GFX1170-NEXT: v_maximum_f64 v[4:5], v[4:5], v[12:13] +; GFX1170-NEXT: v_maximum_f64 v[6:7], v[6:7], v[14:15] +; GFX1170-NEXT: ; return to shader part epilog +; ; GFX12-LABEL: test_fmaximum_v4f64: ; GFX12: ; %bb.0: ; GFX12-NEXT: v_maximum_f64 v[0:1], v[0:1], v[8:9] @@ -992,6 +1229,36 @@ define amdgpu_ps <8 x float> @test_fmaximum_v4f64_ss(<4 x double> inreg %a, <4 x ; GFX9-GISEL-NEXT: v_mov_b32_e32 v7, s5 ; GFX9-GISEL-NEXT: ; return to shader part epilog ; +; GFX1170-SDAG-LABEL: test_fmaximum_v4f64_ss: +; GFX1170-SDAG: ; %bb.0: +; GFX1170-SDAG-NEXT: v_maximum_f64 v[0:1], s[0:1], s[8:9] +; GFX1170-SDAG-NEXT: v_maximum_f64 v[2:3], s[2:3], s[10:11] +; GFX1170-SDAG-NEXT: v_maximum_f64 v[4:5], s[4:5], s[12:13] +; GFX1170-SDAG-NEXT: v_maximum_f64 v[6:7], s[6:7], s[14:15] +; GFX1170-SDAG-NEXT: ; return to shader part epilog +; +; GFX1170-GISEL-LABEL: test_fmaximum_v4f64_ss: +; GFX1170-GISEL: ; %bb.0: +; GFX1170-GISEL-NEXT: v_maximum_f64 v[0:1], s[0:1], s[8:9] +; GFX1170-GISEL-NEXT: v_maximum_f64 v[2:3], s[2:3], s[10:11] +; GFX1170-GISEL-NEXT: v_maximum_f64 v[4:5], s[4:5], s[12:13] +; GFX1170-GISEL-NEXT: v_maximum_f64 v[6:7], s[6:7], s[14:15] +; GFX1170-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_4) +; GFX1170-GISEL-NEXT: v_readfirstlane_b32 s0, v0 +; GFX1170-GISEL-NEXT: v_readfirstlane_b32 s1, v1 +; GFX1170-GISEL-NEXT: v_readfirstlane_b32 s2, v2 +; GFX1170-GISEL-NEXT: v_readfirstlane_b32 s3, v3 +; GFX1170-GISEL-NEXT: v_readfirstlane_b32 s4, v4 +; GFX1170-GISEL-NEXT: v_readfirstlane_b32 s5, v5 +; GFX1170-GISEL-NEXT: v_readfirstlane_b32 s6, v6 +; GFX1170-GISEL-NEXT: v_readfirstlane_b32 s7, v7 +; GFX1170-GISEL-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1 +; GFX1170-GISEL-NEXT: v_dual_mov_b32 v2, s2 :: v_dual_mov_b32 v3, s3 +; GFX1170-GISEL-NEXT: v_dual_mov_b32 v4, s4 :: v_dual_mov_b32 v5, s5 +; GFX1170-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_4) +; GFX1170-GISEL-NEXT: v_dual_mov_b32 v6, s6 :: v_dual_mov_b32 v7, s7 +; GFX1170-GISEL-NEXT: ; return to shader part epilog +; ; GFX12-SDAG-LABEL: test_fmaximum_v4f64_ss: ; GFX12-SDAG: ; %bb.0: ; GFX12-SDAG-NEXT: v_maximum_f64 v[0:1], s[0:1], s[8:9] @@ -1063,6 +1330,21 @@ define amdgpu_kernel void @fmaximumi_f32_move_to_valu(ptr addrspace(1) %out, ptr ; GFX9-GISEL-NEXT: global_store_dword v0, v1, s[0:1] ; GFX9-GISEL-NEXT: s_endpgm ; +; GFX1170-LABEL: fmaximumi_f32_move_to_valu: +; GFX1170: ; %bb.0: +; GFX1170-NEXT: s_clause 0x1 +; GFX1170-NEXT: s_load_b128 s[0:3], s[4:5], 0x24 +; GFX1170-NEXT: s_load_b64 s[4:5], s[4:5], 0x34 +; GFX1170-NEXT: v_mov_b32_e32 v0, 0 +; GFX1170-NEXT: s_waitcnt lgkmcnt(0) +; GFX1170-NEXT: global_load_b32 v1, v0, s[2:3] glc dlc +; GFX1170-NEXT: s_waitcnt vmcnt(0) +; GFX1170-NEXT: global_load_b32 v2, v0, s[4:5] glc dlc +; GFX1170-NEXT: s_waitcnt vmcnt(0) +; GFX1170-NEXT: v_maximum_f32 v1, v1, v2 +; GFX1170-NEXT: global_store_b32 v0, v1, s[0:1] +; GFX1170-NEXT: s_endpgm +; ; GFX12-SDAG-LABEL: fmaximumi_f32_move_to_valu: ; GFX12-SDAG: ; %bb.0: ; GFX12-SDAG-NEXT: s_clause 0x1 @@ -1143,6 +1425,69 @@ define amdgpu_kernel void @fmaximum_f16_move_to_valu(ptr addrspace(1) %out, ptr ; GFX9-GISEL-NEXT: global_store_short v0, v1, s[0:1] ; GFX9-GISEL-NEXT: s_endpgm ; +; GFX1170-SDAG-TRUE16-LABEL: fmaximum_f16_move_to_valu: +; GFX1170-SDAG-TRUE16: ; %bb.0: +; GFX1170-SDAG-TRUE16-NEXT: s_clause 0x1 +; GFX1170-SDAG-TRUE16-NEXT: s_load_b128 s[0:3], s[4:5], 0x24 +; GFX1170-SDAG-TRUE16-NEXT: s_load_b64 s[4:5], s[4:5], 0x34 +; GFX1170-SDAG-TRUE16-NEXT: v_mov_b32_e32 v1, 0 +; GFX1170-SDAG-TRUE16-NEXT: s_waitcnt lgkmcnt(0) +; GFX1170-SDAG-TRUE16-NEXT: global_load_d16_b16 v0, v1, s[2:3] glc dlc +; GFX1170-SDAG-TRUE16-NEXT: s_waitcnt vmcnt(0) +; GFX1170-SDAG-TRUE16-NEXT: global_load_d16_hi_b16 v0, v1, s[4:5] glc dlc +; GFX1170-SDAG-TRUE16-NEXT: s_waitcnt vmcnt(0) +; GFX1170-SDAG-TRUE16-NEXT: v_maximum_f16 v0.l, v0.l, v0.h +; GFX1170-SDAG-TRUE16-NEXT: global_store_b16 v1, v0, s[0:1] +; GFX1170-SDAG-TRUE16-NEXT: s_endpgm +; +; GFX1170-SDAG-FAKE16-LABEL: fmaximum_f16_move_to_valu: +; GFX1170-SDAG-FAKE16: ; %bb.0: +; GFX1170-SDAG-FAKE16-NEXT: s_clause 0x1 +; GFX1170-SDAG-FAKE16-NEXT: s_load_b128 s[0:3], s[4:5], 0x24 +; GFX1170-SDAG-FAKE16-NEXT: s_load_b64 s[4:5], s[4:5], 0x34 +; GFX1170-SDAG-FAKE16-NEXT: v_mov_b32_e32 v0, 0 +; GFX1170-SDAG-FAKE16-NEXT: s_waitcnt lgkmcnt(0) +; GFX1170-SDAG-FAKE16-NEXT: global_load_u16 v1, v0, s[2:3] glc dlc +; GFX1170-SDAG-FAKE16-NEXT: s_waitcnt vmcnt(0) +; GFX1170-SDAG-FAKE16-NEXT: global_load_u16 v2, v0, s[4:5] glc dlc +; GFX1170-SDAG-FAKE16-NEXT: s_waitcnt vmcnt(0) +; GFX1170-SDAG-FAKE16-NEXT: v_maximum_f16 v1, v1, v2 +; GFX1170-SDAG-FAKE16-NEXT: global_store_b16 v0, v1, s[0:1] +; GFX1170-SDAG-FAKE16-NEXT: s_endpgm +; +; GFX1170-GISEL-TRUE16-LABEL: fmaximum_f16_move_to_valu: +; GFX1170-GISEL-TRUE16: ; %bb.0: +; GFX1170-GISEL-TRUE16-NEXT: s_clause 0x1 +; GFX1170-GISEL-TRUE16-NEXT: s_load_b128 s[0:3], s[4:5], 0x24 +; GFX1170-GISEL-TRUE16-NEXT: s_load_b64 s[4:5], s[4:5], 0x34 +; GFX1170-GISEL-TRUE16-NEXT: v_mov_b32_e32 v1, 0 +; GFX1170-GISEL-TRUE16-NEXT: s_waitcnt lgkmcnt(0) +; GFX1170-GISEL-TRUE16-NEXT: global_load_d16_b16 v0, v1, s[2:3] glc dlc +; GFX1170-GISEL-TRUE16-NEXT: s_waitcnt vmcnt(0) +; GFX1170-GISEL-TRUE16-NEXT: global_load_d16_hi_b16 v0, v1, s[4:5] glc dlc +; GFX1170-GISEL-TRUE16-NEXT: s_waitcnt vmcnt(0) +; GFX1170-GISEL-TRUE16-NEXT: v_maximum_f16 v0.l, v0.l, v0.h +; GFX1170-GISEL-TRUE16-NEXT: global_store_b16 v1, v0, s[0:1] +; GFX1170-GISEL-TRUE16-NEXT: s_endpgm +; +; GFX1170-GISEL-FAKE16-LABEL: fmaximum_f16_move_to_valu: +; GFX1170-GISEL-FAKE16: ; %bb.0: +; GFX1170-GISEL-FAKE16-NEXT: s_clause 0x1 +; GFX1170-GISEL-FAKE16-NEXT: s_load_b128 s[0:3], s[4:5], 0x24 +; GFX1170-GISEL-FAKE16-NEXT: s_load_b64 s[4:5], s[4:5], 0x34 +; GFX1170-GISEL-FAKE16-NEXT: v_mov_b32_e32 v0, 0 +; GFX1170-GISEL-FAKE16-NEXT: s_waitcnt lgkmcnt(0) +; GFX1170-GISEL-FAKE16-NEXT: global_load_u16 v1, v0, s[2:3] glc dlc +; GFX1170-GISEL-FAKE16-NEXT: s_waitcnt vmcnt(0) +; GFX1170-GISEL-FAKE16-NEXT: global_load_u16 v2, v0, s[4:5] glc dlc +; GFX1170-GISEL-FAKE16-NEXT: s_waitcnt vmcnt(0) +; GFX1170-GISEL-FAKE16-NEXT: v_readfirstlane_b32 s2, v1 +; GFX1170-GISEL-FAKE16-NEXT: v_readfirstlane_b32 s3, v2 +; GFX1170-GISEL-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX1170-GISEL-FAKE16-NEXT: v_maximum_f16 v1, s2, s3 +; GFX1170-GISEL-FAKE16-NEXT: global_store_b16 v0, v1, s[0:1] +; GFX1170-GISEL-FAKE16-NEXT: s_endpgm +; ; GFX12-SDAG-TRUE16-LABEL: fmaximum_f16_move_to_valu: ; GFX12-SDAG-TRUE16: ; %bb.0: ; GFX12-SDAG-TRUE16-NEXT: s_clause 0x1 @@ -1226,6 +1571,11 @@ define amdgpu_ps float @test_fmaximum_f32_ieee_on(float %a, float %b) #0 { ; GFX9-NEXT: v_cndmask_b32_e32 v0, v3, v2, vcc ; GFX9-NEXT: ; return to shader part epilog ; +; GFX1170-LABEL: test_fmaximum_f32_ieee_on: +; GFX1170: ; %bb.0: +; GFX1170-NEXT: v_maximum_f32 v0, v0, v1 +; GFX1170-NEXT: ; return to shader part epilog +; ; GFX12-LABEL: test_fmaximum_f32_ieee_on: ; GFX12: ; %bb.0: ; GFX12-NEXT: v_maximum_f32 v0, v0, v1 @@ -1243,6 +1593,11 @@ define amdgpu_ps float @test_fmaximum_f32_ieee_off(float %a, float %b) #1 { ; GFX9-NEXT: v_cndmask_b32_e32 v0, v3, v2, vcc ; GFX9-NEXT: ; return to shader part epilog ; +; GFX1170-LABEL: test_fmaximum_f32_ieee_off: +; GFX1170: ; %bb.0: +; GFX1170-NEXT: v_maximum_f32 v0, v0, v1 +; GFX1170-NEXT: ; return to shader part epilog +; ; GFX12-LABEL: test_fmaximum_f32_ieee_off: ; GFX12: ; %bb.0: ; GFX12-NEXT: v_maximum_f32 v0, v0, v1 diff --git a/llvm/test/CodeGen/AMDGPU/fmaximum3.ll b/llvm/test/CodeGen/AMDGPU/fmaximum3.ll index 6010f29c166a..bdbd980cc166 100644 --- a/llvm/test/CodeGen/AMDGPU/fmaximum3.ll +++ b/llvm/test/CodeGen/AMDGPU/fmaximum3.ll @@ -1,6 +1,8 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 ; RUN: llc -mtriple=amdgcn -mcpu=gfx1200 -mattr=+real-true16 < %s | FileCheck -check-prefixes=GFX12,GFX12-TRUE16 %s ; RUN: llc -mtriple=amdgcn -mcpu=gfx1200 -mattr=-real-true16 < %s | FileCheck -check-prefixes=GFX12,GFX12-FAKE16 %s +; RUN: llc -mtriple=amdgcn -mcpu=gfx1170 -mattr=+real-true16 < %s | FileCheck -check-prefixes=GFX1170,GFX1170-TRUE16 %s +; RUN: llc -mtriple=amdgcn -mcpu=gfx1170 -mattr=-real-true16 < %s | FileCheck -check-prefixes=GFX1170,GFX1170-FAKE16 %s ; RUN: llc -mtriple=amdgcn -mcpu=gfx942 < %s | FileCheck -check-prefixes=GFX9,GFX942 %s ; RUN: llc -mtriple=amdgcn -mcpu=gfx950 < %s | FileCheck -check-prefixes=GFX9,GFX950 %s @@ -15,6 +17,12 @@ define float @v_fmaximum3_f32(float %a, float %b, float %c) { ; GFX12-NEXT: v_maximum3_f32 v0, v0, v1, v2 ; GFX12-NEXT: s_setpc_b64 s[30:31] ; +; GFX1170-LABEL: v_fmaximum3_f32: +; GFX1170: ; %bb.0: +; GFX1170-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1170-NEXT: v_maximum3_f32 v0, v0, v1, v2 +; GFX1170-NEXT: s_setpc_b64 s[30:31] +; ; GFX942-LABEL: v_fmaximum3_f32: ; GFX942: ; %bb.0: ; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -50,6 +58,12 @@ define float @v_fmaximum3_f32_commute(float %a, float %b, float %c) { ; GFX12-NEXT: v_maximum3_f32 v0, v2, v0, v1 ; GFX12-NEXT: s_setpc_b64 s[30:31] ; +; GFX1170-LABEL: v_fmaximum3_f32_commute: +; GFX1170: ; %bb.0: +; GFX1170-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1170-NEXT: v_maximum3_f32 v0, v2, v0, v1 +; GFX1170-NEXT: s_setpc_b64 s[30:31] +; ; GFX942-LABEL: v_fmaximum3_f32_commute: ; GFX942: ; %bb.0: ; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -84,6 +98,14 @@ define amdgpu_ps i32 @s_fmaximum3_f32(float inreg %a, float inreg %b, float inre ; GFX12-NEXT: s_wait_alu depctr_va_sdst(0) ; GFX12-NEXT: ; return to shader part epilog ; +; GFX1170-LABEL: s_fmaximum3_f32: +; GFX1170: ; %bb.0: +; GFX1170-NEXT: v_mov_b32_e32 v0, s2 +; GFX1170-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX1170-NEXT: v_maximum3_f32 v0, s0, s1, v0 +; GFX1170-NEXT: v_readfirstlane_b32 s0, v0 +; GFX1170-NEXT: ; return to shader part epilog +; ; GFX942-LABEL: s_fmaximum3_f32: ; GFX942: ; %bb.0: ; GFX942-NEXT: v_mov_b32_e32 v0, s1 @@ -126,6 +148,12 @@ define float @v_fmaximum3_f32_fabs0(float %a, float %b, float %c) { ; GFX12-NEXT: v_maximum3_f32 v0, |v0|, v1, v2 ; GFX12-NEXT: s_setpc_b64 s[30:31] ; +; GFX1170-LABEL: v_fmaximum3_f32_fabs0: +; GFX1170: ; %bb.0: +; GFX1170-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1170-NEXT: v_maximum3_f32 v0, |v0|, v1, v2 +; GFX1170-NEXT: s_setpc_b64 s[30:31] +; ; GFX942-LABEL: v_fmaximum3_f32_fabs0: ; GFX942: ; %bb.0: ; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -162,6 +190,12 @@ define float @v_fmaximum3_f32_fabs1(float %a, float %b, float %c) { ; GFX12-NEXT: v_maximum3_f32 v0, v0, |v1|, v2 ; GFX12-NEXT: s_setpc_b64 s[30:31] ; +; GFX1170-LABEL: v_fmaximum3_f32_fabs1: +; GFX1170: ; %bb.0: +; GFX1170-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1170-NEXT: v_maximum3_f32 v0, v0, |v1|, v2 +; GFX1170-NEXT: s_setpc_b64 s[30:31] +; ; GFX942-LABEL: v_fmaximum3_f32_fabs1: ; GFX942: ; %bb.0: ; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -198,6 +232,12 @@ define float @v_fmaximum3_f32_fabs2(float %a, float %b, float %c) { ; GFX12-NEXT: v_maximum3_f32 v0, v0, v1, |v2| ; GFX12-NEXT: s_setpc_b64 s[30:31] ; +; GFX1170-LABEL: v_fmaximum3_f32_fabs2: +; GFX1170: ; %bb.0: +; GFX1170-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1170-NEXT: v_maximum3_f32 v0, v0, v1, |v2| +; GFX1170-NEXT: s_setpc_b64 s[30:31] +; ; GFX942-LABEL: v_fmaximum3_f32_fabs2: ; GFX942: ; %bb.0: ; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -234,6 +274,12 @@ define float @v_fmaximum3_f32_fabs_all(float %a, float %b, float %c) { ; GFX12-NEXT: v_maximum3_f32 v0, |v0|, |v1|, |v2| ; GFX12-NEXT: s_setpc_b64 s[30:31] ; +; GFX1170-LABEL: v_fmaximum3_f32_fabs_all: +; GFX1170: ; %bb.0: +; GFX1170-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1170-NEXT: v_maximum3_f32 v0, |v0|, |v1|, |v2| +; GFX1170-NEXT: s_setpc_b64 s[30:31] +; ; GFX942-LABEL: v_fmaximum3_f32_fabs_all: ; GFX942: ; %bb.0: ; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -272,6 +318,12 @@ define float @v_fmaximum3_f32_fneg_all(float %a, float %b, float %c) { ; GFX12-NEXT: v_maximum3_f32 v0, -v0, -v1, -v2 ; GFX12-NEXT: s_setpc_b64 s[30:31] ; +; GFX1170-LABEL: v_fmaximum3_f32_fneg_all: +; GFX1170: ; %bb.0: +; GFX1170-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1170-NEXT: v_maximum3_f32 v0, -v0, -v1, -v2 +; GFX1170-NEXT: s_setpc_b64 s[30:31] +; ; GFX942-LABEL: v_fmaximum3_f32_fneg_all: ; GFX942: ; %bb.0: ; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -310,6 +362,12 @@ define float @v_fmaximum3_f32_fneg_fabs_all(float %a, float %b, float %c) { ; GFX12-NEXT: v_maximum3_f32 v0, -|v0|, -|v1|, -|v2| ; GFX12-NEXT: s_setpc_b64 s[30:31] ; +; GFX1170-LABEL: v_fmaximum3_f32_fneg_fabs_all: +; GFX1170: ; %bb.0: +; GFX1170-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1170-NEXT: v_maximum3_f32 v0, -|v0|, -|v1|, -|v2| +; GFX1170-NEXT: s_setpc_b64 s[30:31] +; ; GFX942-LABEL: v_fmaximum3_f32_fneg_fabs_all: ; GFX942: ; %bb.0: ; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -351,6 +409,12 @@ define float @v_fmaximum3_f32_fneg0(float %a, float %b, float %c) { ; GFX12-NEXT: v_maximum3_f32 v0, -v0, v1, v2 ; GFX12-NEXT: s_setpc_b64 s[30:31] ; +; GFX1170-LABEL: v_fmaximum3_f32_fneg0: +; GFX1170: ; %bb.0: +; GFX1170-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1170-NEXT: v_maximum3_f32 v0, -v0, v1, v2 +; GFX1170-NEXT: s_setpc_b64 s[30:31] +; ; GFX942-LABEL: v_fmaximum3_f32_fneg0: ; GFX942: ; %bb.0: ; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -387,6 +451,12 @@ define float @v_fmaximum3_f32_fneg1(float %a, float %b, float %c) { ; GFX12-NEXT: v_maximum3_f32 v0, v0, -v1, v2 ; GFX12-NEXT: s_setpc_b64 s[30:31] ; +; GFX1170-LABEL: v_fmaximum3_f32_fneg1: +; GFX1170: ; %bb.0: +; GFX1170-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1170-NEXT: v_maximum3_f32 v0, v0, -v1, v2 +; GFX1170-NEXT: s_setpc_b64 s[30:31] +; ; GFX942-LABEL: v_fmaximum3_f32_fneg1: ; GFX942: ; %bb.0: ; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -423,6 +493,12 @@ define float @v_fmaximum3_f32_fneg2(float %a, float %b, float %c) { ; GFX12-NEXT: v_maximum3_f32 v0, v0, v1, -v2 ; GFX12-NEXT: s_setpc_b64 s[30:31] ; +; GFX1170-LABEL: v_fmaximum3_f32_fneg2: +; GFX1170: ; %bb.0: +; GFX1170-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1170-NEXT: v_maximum3_f32 v0, v0, v1, -v2 +; GFX1170-NEXT: s_setpc_b64 s[30:31] +; ; GFX942-LABEL: v_fmaximum3_f32_fneg2: ; GFX942: ; %bb.0: ; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -459,6 +535,12 @@ define float @v_fmaximum3_f32_const0(float %b, float %c) { ; GFX12-NEXT: v_maximum3_f32 v0, v0, 0x41000000, v1 ; GFX12-NEXT: s_setpc_b64 s[30:31] ; +; GFX1170-LABEL: v_fmaximum3_f32_const0: +; GFX1170: ; %bb.0: +; GFX1170-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1170-NEXT: v_maximum3_f32 v0, v0, 0x41000000, v1 +; GFX1170-NEXT: s_setpc_b64 s[30:31] +; ; GFX942-LABEL: v_fmaximum3_f32_const0: ; GFX942: ; %bb.0: ; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -495,6 +577,12 @@ define float @v_fmaximum3_f32__const2(float %a, float %b) { ; GFX12-NEXT: v_maximum3_f32 v0, v0, v1, 0x41000000 ; GFX12-NEXT: s_setpc_b64 s[30:31] ; +; GFX1170-LABEL: v_fmaximum3_f32__const2: +; GFX1170: ; %bb.0: +; GFX1170-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1170-NEXT: v_maximum3_f32 v0, v0, v1, 0x41000000 +; GFX1170-NEXT: s_setpc_b64 s[30:31] +; ; GFX942-LABEL: v_fmaximum3_f32__const2: ; GFX942: ; %bb.0: ; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -531,6 +619,12 @@ define float @v_fmaximum3_f32_inlineimm0(float %b, float %c) { ; GFX12-NEXT: v_maximum3_f32 v0, v0, 4.0, v1 ; GFX12-NEXT: s_setpc_b64 s[30:31] ; +; GFX1170-LABEL: v_fmaximum3_f32_inlineimm0: +; GFX1170: ; %bb.0: +; GFX1170-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1170-NEXT: v_maximum3_f32 v0, v0, 4.0, v1 +; GFX1170-NEXT: s_setpc_b64 s[30:31] +; ; GFX942-LABEL: v_fmaximum3_f32_inlineimm0: ; GFX942: ; %bb.0: ; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -566,6 +660,12 @@ define float @v_fmaximum3_f32__inlineimm(float %a, float %b) { ; GFX12-NEXT: v_maximum3_f32 v0, v0, v1, 4.0 ; GFX12-NEXT: s_setpc_b64 s[30:31] ; +; GFX1170-LABEL: v_fmaximum3_f32__inlineimm: +; GFX1170: ; %bb.0: +; GFX1170-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1170-NEXT: v_maximum3_f32 v0, v0, v1, 4.0 +; GFX1170-NEXT: s_setpc_b64 s[30:31] +; ; GFX942-LABEL: v_fmaximum3_f32__inlineimm: ; GFX942: ; %bb.0: ; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -603,6 +703,14 @@ define float @v_fmaximum3_f32_const1_const2(float %a) { ; GFX12-NEXT: v_maximum3_f32 v0, v0, s0, 0x41800000 ; GFX12-NEXT: s_setpc_b64 s[30:31] ; +; GFX1170-LABEL: v_fmaximum3_f32_const1_const2: +; GFX1170: ; %bb.0: +; GFX1170-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1170-NEXT: s_mov_b32 s0, 0x41000000 +; GFX1170-NEXT: s_delay_alu instid0(SALU_CYCLE_1) +; GFX1170-NEXT: v_maximum3_f32 v0, v0, s0, 0x41800000 +; GFX1170-NEXT: s_setpc_b64 s[30:31] +; ; GFX942-LABEL: v_fmaximum3_f32_const1_const2: ; GFX942: ; %bb.0: ; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -641,6 +749,13 @@ define <2 x float> @v_fmaximum3_v2f32(<2 x float> %a, <2 x float> %b, <2 x float ; GFX12-NEXT: v_maximum3_f32 v1, v5, v1, v3 ; GFX12-NEXT: s_setpc_b64 s[30:31] ; +; GFX1170-LABEL: v_fmaximum3_v2f32: +; GFX1170: ; %bb.0: +; GFX1170-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1170-NEXT: v_maximum3_f32 v0, v4, v0, v2 +; GFX1170-NEXT: v_maximum3_f32 v1, v5, v1, v3 +; GFX1170-NEXT: s_setpc_b64 s[30:31] +; ; GFX942-LABEL: v_fmaximum3_v2f32: ; GFX942: ; %bb.0: ; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -686,6 +801,13 @@ define <2 x float> @v_fmaximum3_v2f32_commute(<2 x float> %a, <2 x float> %b, <2 ; GFX12-NEXT: v_maximum3_f32 v1, v1, v3, v5 ; GFX12-NEXT: s_setpc_b64 s[30:31] ; +; GFX1170-LABEL: v_fmaximum3_v2f32_commute: +; GFX1170: ; %bb.0: +; GFX1170-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1170-NEXT: v_maximum3_f32 v0, v0, v2, v4 +; GFX1170-NEXT: v_maximum3_f32 v1, v1, v3, v5 +; GFX1170-NEXT: s_setpc_b64 s[30:31] +; ; GFX942-LABEL: v_fmaximum3_v2f32_commute: ; GFX942: ; %bb.0: ; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -731,6 +853,13 @@ define <2 x float> @v_fmaximum3_v2f32__fabs_all(<2 x float> %a, <2 x float> %b, ; GFX12-NEXT: v_maximum3_f32 v1, |v1|, |v3|, |v5| ; GFX12-NEXT: s_setpc_b64 s[30:31] ; +; GFX1170-LABEL: v_fmaximum3_v2f32__fabs_all: +; GFX1170: ; %bb.0: +; GFX1170-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1170-NEXT: v_maximum3_f32 v0, |v0|, |v2|, |v4| +; GFX1170-NEXT: v_maximum3_f32 v1, |v1|, |v3|, |v5| +; GFX1170-NEXT: s_setpc_b64 s[30:31] +; ; GFX942-LABEL: v_fmaximum3_v2f32__fabs_all: ; GFX942: ; %bb.0: ; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -779,6 +908,13 @@ define <2 x float> @v_fmaximum3_v2f32__fneg_all(<2 x float> %a, <2 x float> %b, ; GFX12-NEXT: v_maximum3_f32 v1, -v1, -v3, -v5 ; GFX12-NEXT: s_setpc_b64 s[30:31] ; +; GFX1170-LABEL: v_fmaximum3_v2f32__fneg_all: +; GFX1170: ; %bb.0: +; GFX1170-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1170-NEXT: v_maximum3_f32 v0, -v0, -v2, -v4 +; GFX1170-NEXT: v_maximum3_f32 v1, -v1, -v3, -v5 +; GFX1170-NEXT: s_setpc_b64 s[30:31] +; ; GFX942-LABEL: v_fmaximum3_v2f32__fneg_all: ; GFX942: ; %bb.0: ; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -827,6 +963,13 @@ define <2 x float> @v_fmaximum3_v2f32__inlineimm1(<2 x float> %a, <2 x float> %c ; GFX12-NEXT: v_maximum3_f32 v1, v1, 2.0, v3 ; GFX12-NEXT: s_setpc_b64 s[30:31] ; +; GFX1170-LABEL: v_fmaximum3_v2f32__inlineimm1: +; GFX1170: ; %bb.0: +; GFX1170-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1170-NEXT: v_maximum3_f32 v0, v0, 2.0, v2 +; GFX1170-NEXT: v_maximum3_f32 v1, v1, 2.0, v3 +; GFX1170-NEXT: s_setpc_b64 s[30:31] +; ; GFX942-LABEL: v_fmaximum3_v2f32__inlineimm1: ; GFX942: ; %bb.0: ; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -872,6 +1015,13 @@ define <2 x float> @v_fmaximum3_v2f32__inlineimm2(<2 x float> %a, <2 x float> %b ; GFX12-NEXT: v_maximum3_f32 v1, v1, v3, 4.0 ; GFX12-NEXT: s_setpc_b64 s[30:31] ; +; GFX1170-LABEL: v_fmaximum3_v2f32__inlineimm2: +; GFX1170: ; %bb.0: +; GFX1170-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1170-NEXT: v_maximum3_f32 v0, v0, v2, 4.0 +; GFX1170-NEXT: v_maximum3_f32 v1, v1, v3, 4.0 +; GFX1170-NEXT: s_setpc_b64 s[30:31] +; ; GFX942-LABEL: v_fmaximum3_v2f32__inlineimm2: ; GFX942: ; %bb.0: ; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -918,6 +1068,14 @@ define <3 x float> @v_fmaximum3_v3f32(<3 x float> %a, <3 x float> %b, <3 x float ; GFX12-NEXT: v_maximum3_f32 v2, v8, v2, v5 ; GFX12-NEXT: s_setpc_b64 s[30:31] ; +; GFX1170-LABEL: v_fmaximum3_v3f32: +; GFX1170: ; %bb.0: +; GFX1170-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1170-NEXT: v_maximum3_f32 v0, v6, v0, v3 +; GFX1170-NEXT: v_maximum3_f32 v1, v7, v1, v4 +; GFX1170-NEXT: v_maximum3_f32 v2, v8, v2, v5 +; GFX1170-NEXT: s_setpc_b64 s[30:31] +; ; GFX942-LABEL: v_fmaximum3_v3f32: ; GFX942: ; %bb.0: ; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -973,6 +1131,14 @@ define <3 x float> @v_fmaximum3_v3f32_commute(<3 x float> %a, <3 x float> %b, <3 ; GFX12-NEXT: v_maximum3_f32 v2, v2, v5, v8 ; GFX12-NEXT: s_setpc_b64 s[30:31] ; +; GFX1170-LABEL: v_fmaximum3_v3f32_commute: +; GFX1170: ; %bb.0: +; GFX1170-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1170-NEXT: v_maximum3_f32 v0, v0, v3, v6 +; GFX1170-NEXT: v_maximum3_f32 v1, v1, v4, v7 +; GFX1170-NEXT: v_maximum3_f32 v2, v2, v5, v8 +; GFX1170-NEXT: s_setpc_b64 s[30:31] +; ; GFX942-LABEL: v_fmaximum3_v3f32_commute: ; GFX942: ; %bb.0: ; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -1028,6 +1194,14 @@ define <3 x float> @v_fmaximum3_v3f32__fabs_all(<3 x float> %a, <3 x float> %b, ; GFX12-NEXT: v_maximum3_f32 v2, |v2|, |v5|, |v8| ; GFX12-NEXT: s_setpc_b64 s[30:31] ; +; GFX1170-LABEL: v_fmaximum3_v3f32__fabs_all: +; GFX1170: ; %bb.0: +; GFX1170-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1170-NEXT: v_maximum3_f32 v0, |v0|, |v3|, |v6| +; GFX1170-NEXT: v_maximum3_f32 v1, |v1|, |v4|, |v7| +; GFX1170-NEXT: v_maximum3_f32 v2, |v2|, |v5|, |v8| +; GFX1170-NEXT: s_setpc_b64 s[30:31] +; ; GFX942-LABEL: v_fmaximum3_v3f32__fabs_all: ; GFX942: ; %bb.0: ; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -1086,6 +1260,14 @@ define <3 x float> @v_fmaximum3_v3f32__fneg_all(<3 x float> %a, <3 x float> %b, ; GFX12-NEXT: v_maximum3_f32 v2, -v2, -v5, -v8 ; GFX12-NEXT: s_setpc_b64 s[30:31] ; +; GFX1170-LABEL: v_fmaximum3_v3f32__fneg_all: +; GFX1170: ; %bb.0: +; GFX1170-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1170-NEXT: v_maximum3_f32 v0, -v0, -v3, -v6 +; GFX1170-NEXT: v_maximum3_f32 v1, -v1, -v4, -v7 +; GFX1170-NEXT: v_maximum3_f32 v2, -v2, -v5, -v8 +; GFX1170-NEXT: s_setpc_b64 s[30:31] +; ; GFX942-LABEL: v_fmaximum3_v3f32__fneg_all: ; GFX942: ; %bb.0: ; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -1144,6 +1326,14 @@ define <3 x float> @v_fmaximum3_v3f32__inlineimm1(<3 x float> %a, <3 x float> %c ; GFX12-NEXT: v_maximum3_f32 v2, v2, 2.0, v5 ; GFX12-NEXT: s_setpc_b64 s[30:31] ; +; GFX1170-LABEL: v_fmaximum3_v3f32__inlineimm1: +; GFX1170: ; %bb.0: +; GFX1170-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1170-NEXT: v_maximum3_f32 v0, v0, 2.0, v3 +; GFX1170-NEXT: v_maximum3_f32 v1, v1, 2.0, v4 +; GFX1170-NEXT: v_maximum3_f32 v2, v2, 2.0, v5 +; GFX1170-NEXT: s_setpc_b64 s[30:31] +; ; GFX942-LABEL: v_fmaximum3_v3f32__inlineimm1: ; GFX942: ; %bb.0: ; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -1199,6 +1389,14 @@ define <3 x float> @v_fmaximum3_v3f32__inlineimm2(<3 x float> %a, <3 x float> %b ; GFX12-NEXT: v_maximum3_f32 v2, v2, v5, 4.0 ; GFX12-NEXT: s_setpc_b64 s[30:31] ; +; GFX1170-LABEL: v_fmaximum3_v3f32__inlineimm2: +; GFX1170: ; %bb.0: +; GFX1170-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1170-NEXT: v_maximum3_f32 v0, v0, v3, 4.0 +; GFX1170-NEXT: v_maximum3_f32 v1, v1, v4, 4.0 +; GFX1170-NEXT: v_maximum3_f32 v2, v2, v5, 4.0 +; GFX1170-NEXT: s_setpc_b64 s[30:31] +; ; GFX942-LABEL: v_fmaximum3_v3f32__inlineimm2: ; GFX942: ; %bb.0: ; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -1263,6 +1461,18 @@ define half @v_fmaximum3_f16(half %a, half %b, half %c) { ; GFX12-FAKE16-NEXT: v_maximum3_f16 v0, v0, v1, v2 ; GFX12-FAKE16-NEXT: s_setpc_b64 s[30:31] ; +; GFX1170-TRUE16-LABEL: v_fmaximum3_f16: +; GFX1170-TRUE16: ; %bb.0: +; GFX1170-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1170-TRUE16-NEXT: v_maximum3_f16 v0.l, v0.l, v1.l, v2.l +; GFX1170-TRUE16-NEXT: s_setpc_b64 s[30:31] +; +; GFX1170-FAKE16-LABEL: v_fmaximum3_f16: +; GFX1170-FAKE16: ; %bb.0: +; GFX1170-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1170-FAKE16-NEXT: v_maximum3_f16 v0, v0, v1, v2 +; GFX1170-FAKE16-NEXT: s_setpc_b64 s[30:31] +; ; GFX942-LABEL: v_fmaximum3_f16: ; GFX942: ; %bb.0: ; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -1308,6 +1518,18 @@ define half @v_fmaximum3_f16_commute(half %a, half %b, half %c) { ; GFX12-FAKE16-NEXT: v_maximum3_f16 v0, v2, v0, v1 ; GFX12-FAKE16-NEXT: s_setpc_b64 s[30:31] ; +; GFX1170-TRUE16-LABEL: v_fmaximum3_f16_commute: +; GFX1170-TRUE16: ; %bb.0: +; GFX1170-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1170-TRUE16-NEXT: v_maximum3_f16 v0.l, v2.l, v0.l, v1.l +; GFX1170-TRUE16-NEXT: s_setpc_b64 s[30:31] +; +; GFX1170-FAKE16-LABEL: v_fmaximum3_f16_commute: +; GFX1170-FAKE16: ; %bb.0: +; GFX1170-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1170-FAKE16-NEXT: v_maximum3_f16 v0, v2, v0, v1 +; GFX1170-FAKE16-NEXT: s_setpc_b64 s[30:31] +; ; GFX942-LABEL: v_fmaximum3_f16_commute: ; GFX942: ; %bb.0: ; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -1355,6 +1577,26 @@ define amdgpu_ps i32 @s_fmaximum3_f16(half inreg %a, half inreg %b, half inreg % ; GFX12-FAKE16-NEXT: s_wait_alu depctr_va_sdst(0) ; GFX12-FAKE16-NEXT: ; return to shader part epilog ; +; GFX1170-TRUE16-LABEL: s_fmaximum3_f16: +; GFX1170-TRUE16: ; %bb.0: +; GFX1170-TRUE16-NEXT: v_mov_b16_e32 v0.l, s2 +; GFX1170-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX1170-TRUE16-NEXT: v_maximum3_f16 v0.l, s0, s1, v0.l +; GFX1170-TRUE16-NEXT: v_and_b32_e32 v0, 0xffff, v0 +; GFX1170-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX1170-TRUE16-NEXT: v_readfirstlane_b32 s0, v0 +; GFX1170-TRUE16-NEXT: ; return to shader part epilog +; +; GFX1170-FAKE16-LABEL: s_fmaximum3_f16: +; GFX1170-FAKE16: ; %bb.0: +; GFX1170-FAKE16-NEXT: v_mov_b32_e32 v0, s2 +; GFX1170-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX1170-FAKE16-NEXT: v_maximum3_f16 v0, s0, s1, v0 +; GFX1170-FAKE16-NEXT: v_and_b32_e32 v0, 0xffff, v0 +; GFX1170-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX1170-FAKE16-NEXT: v_readfirstlane_b32 s0, v0 +; GFX1170-FAKE16-NEXT: ; return to shader part epilog +; ; GFX942-LABEL: s_fmaximum3_f16: ; GFX942: ; %bb.0: ; GFX942-NEXT: v_mov_b32_e32 v0, s1 @@ -1411,6 +1653,18 @@ define half @v_fmaximum3_f16_fabs0(half %a, half %b, half %c) { ; GFX12-FAKE16-NEXT: v_maximum3_f16 v0, |v0|, v1, v2 ; GFX12-FAKE16-NEXT: s_setpc_b64 s[30:31] ; +; GFX1170-TRUE16-LABEL: v_fmaximum3_f16_fabs0: +; GFX1170-TRUE16: ; %bb.0: +; GFX1170-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1170-TRUE16-NEXT: v_maximum3_f16 v0.l, |v0.l|, v1.l, v2.l +; GFX1170-TRUE16-NEXT: s_setpc_b64 s[30:31] +; +; GFX1170-FAKE16-LABEL: v_fmaximum3_f16_fabs0: +; GFX1170-FAKE16: ; %bb.0: +; GFX1170-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1170-FAKE16-NEXT: v_maximum3_f16 v0, |v0|, v1, v2 +; GFX1170-FAKE16-NEXT: s_setpc_b64 s[30:31] +; ; GFX942-LABEL: v_fmaximum3_f16_fabs0: ; GFX942: ; %bb.0: ; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -1458,6 +1712,18 @@ define half @v_fmaximum3_f16_fabs1(half %a, half %b, half %c) { ; GFX12-FAKE16-NEXT: v_maximum3_f16 v0, v0, |v1|, v2 ; GFX12-FAKE16-NEXT: s_setpc_b64 s[30:31] ; +; GFX1170-TRUE16-LABEL: v_fmaximum3_f16_fabs1: +; GFX1170-TRUE16: ; %bb.0: +; GFX1170-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1170-TRUE16-NEXT: v_maximum3_f16 v0.l, v0.l, |v1.l|, v2.l +; GFX1170-TRUE16-NEXT: s_setpc_b64 s[30:31] +; +; GFX1170-FAKE16-LABEL: v_fmaximum3_f16_fabs1: +; GFX1170-FAKE16: ; %bb.0: +; GFX1170-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1170-FAKE16-NEXT: v_maximum3_f16 v0, v0, |v1|, v2 +; GFX1170-FAKE16-NEXT: s_setpc_b64 s[30:31] +; ; GFX942-LABEL: v_fmaximum3_f16_fabs1: ; GFX942: ; %bb.0: ; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -1505,6 +1771,18 @@ define half @v_fmaximum3_f16_fabs2(half %a, half %b, half %c) { ; GFX12-FAKE16-NEXT: v_maximum3_f16 v0, v0, v1, |v2| ; GFX12-FAKE16-NEXT: s_setpc_b64 s[30:31] ; +; GFX1170-TRUE16-LABEL: v_fmaximum3_f16_fabs2: +; GFX1170-TRUE16: ; %bb.0: +; GFX1170-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1170-TRUE16-NEXT: v_maximum3_f16 v0.l, v0.l, v1.l, |v2.l| +; GFX1170-TRUE16-NEXT: s_setpc_b64 s[30:31] +; +; GFX1170-FAKE16-LABEL: v_fmaximum3_f16_fabs2: +; GFX1170-FAKE16: ; %bb.0: +; GFX1170-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1170-FAKE16-NEXT: v_maximum3_f16 v0, v0, v1, |v2| +; GFX1170-FAKE16-NEXT: s_setpc_b64 s[30:31] +; ; GFX942-LABEL: v_fmaximum3_f16_fabs2: ; GFX942: ; %bb.0: ; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -1552,6 +1830,18 @@ define half @v_fmaximum3_f16_fabs_all(half %a, half %b, half %c) { ; GFX12-FAKE16-NEXT: v_maximum3_f16 v0, |v0|, |v1|, |v2| ; GFX12-FAKE16-NEXT: s_setpc_b64 s[30:31] ; +; GFX1170-TRUE16-LABEL: v_fmaximum3_f16_fabs_all: +; GFX1170-TRUE16: ; %bb.0: +; GFX1170-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1170-TRUE16-NEXT: v_maximum3_f16 v0.l, |v0.l|, |v1.l|, |v2.l| +; GFX1170-TRUE16-NEXT: s_setpc_b64 s[30:31] +; +; GFX1170-FAKE16-LABEL: v_fmaximum3_f16_fabs_all: +; GFX1170-FAKE16: ; %bb.0: +; GFX1170-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1170-FAKE16-NEXT: v_maximum3_f16 v0, |v0|, |v1|, |v2| +; GFX1170-FAKE16-NEXT: s_setpc_b64 s[30:31] +; ; GFX942-LABEL: v_fmaximum3_f16_fabs_all: ; GFX942: ; %bb.0: ; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -1603,6 +1893,18 @@ define half @v_fmaximum3_f16_fneg_all(half %a, half %b, half %c) { ; GFX12-FAKE16-NEXT: v_maximum3_f16 v0, -v0, -v1, -v2 ; GFX12-FAKE16-NEXT: s_setpc_b64 s[30:31] ; +; GFX1170-TRUE16-LABEL: v_fmaximum3_f16_fneg_all: +; GFX1170-TRUE16: ; %bb.0: +; GFX1170-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1170-TRUE16-NEXT: v_maximum3_f16 v0.l, -v0.l, -v1.l, -v2.l +; GFX1170-TRUE16-NEXT: s_setpc_b64 s[30:31] +; +; GFX1170-FAKE16-LABEL: v_fmaximum3_f16_fneg_all: +; GFX1170-FAKE16: ; %bb.0: +; GFX1170-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1170-FAKE16-NEXT: v_maximum3_f16 v0, -v0, -v1, -v2 +; GFX1170-FAKE16-NEXT: s_setpc_b64 s[30:31] +; ; GFX942-LABEL: v_fmaximum3_f16_fneg_all: ; GFX942: ; %bb.0: ; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -1654,6 +1956,18 @@ define half @v_fmaximum3_f16_fneg_fabs_all(half %a, half %b, half %c) { ; GFX12-FAKE16-NEXT: v_maximum3_f16 v0, -|v0|, -|v1|, -|v2| ; GFX12-FAKE16-NEXT: s_setpc_b64 s[30:31] ; +; GFX1170-TRUE16-LABEL: v_fmaximum3_f16_fneg_fabs_all: +; GFX1170-TRUE16: ; %bb.0: +; GFX1170-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1170-TRUE16-NEXT: v_maximum3_f16 v0.l, -|v0.l|, -|v1.l|, -|v2.l| +; GFX1170-TRUE16-NEXT: s_setpc_b64 s[30:31] +; +; GFX1170-FAKE16-LABEL: v_fmaximum3_f16_fneg_fabs_all: +; GFX1170-FAKE16: ; %bb.0: +; GFX1170-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1170-FAKE16-NEXT: v_maximum3_f16 v0, -|v0|, -|v1|, -|v2| +; GFX1170-FAKE16-NEXT: s_setpc_b64 s[30:31] +; ; GFX942-LABEL: v_fmaximum3_f16_fneg_fabs_all: ; GFX942: ; %bb.0: ; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -1708,6 +2022,18 @@ define half @v_fmaximum3_f16_fneg0(half %a, half %b, half %c) { ; GFX12-FAKE16-NEXT: v_maximum3_f16 v0, -v0, v1, v2 ; GFX12-FAKE16-NEXT: s_setpc_b64 s[30:31] ; +; GFX1170-TRUE16-LABEL: v_fmaximum3_f16_fneg0: +; GFX1170-TRUE16: ; %bb.0: +; GFX1170-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1170-TRUE16-NEXT: v_maximum3_f16 v0.l, -v0.l, v1.l, v2.l +; GFX1170-TRUE16-NEXT: s_setpc_b64 s[30:31] +; +; GFX1170-FAKE16-LABEL: v_fmaximum3_f16_fneg0: +; GFX1170-FAKE16: ; %bb.0: +; GFX1170-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1170-FAKE16-NEXT: v_maximum3_f16 v0, -v0, v1, v2 +; GFX1170-FAKE16-NEXT: s_setpc_b64 s[30:31] +; ; GFX942-LABEL: v_fmaximum3_f16_fneg0: ; GFX942: ; %bb.0: ; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -1755,6 +2081,18 @@ define half @v_fmaximum3_f16_fneg1(half %a, half %b, half %c) { ; GFX12-FAKE16-NEXT: v_maximum3_f16 v0, v0, -v1, v2 ; GFX12-FAKE16-NEXT: s_setpc_b64 s[30:31] ; +; GFX1170-TRUE16-LABEL: v_fmaximum3_f16_fneg1: +; GFX1170-TRUE16: ; %bb.0: +; GFX1170-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1170-TRUE16-NEXT: v_maximum3_f16 v0.l, v0.l, -v1.l, v2.l +; GFX1170-TRUE16-NEXT: s_setpc_b64 s[30:31] +; +; GFX1170-FAKE16-LABEL: v_fmaximum3_f16_fneg1: +; GFX1170-FAKE16: ; %bb.0: +; GFX1170-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1170-FAKE16-NEXT: v_maximum3_f16 v0, v0, -v1, v2 +; GFX1170-FAKE16-NEXT: s_setpc_b64 s[30:31] +; ; GFX942-LABEL: v_fmaximum3_f16_fneg1: ; GFX942: ; %bb.0: ; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -1802,6 +2140,18 @@ define half @v_fmaximum3_f16_fneg2(half %a, half %b, half %c) { ; GFX12-FAKE16-NEXT: v_maximum3_f16 v0, v0, v1, -v2 ; GFX12-FAKE16-NEXT: s_setpc_b64 s[30:31] ; +; GFX1170-TRUE16-LABEL: v_fmaximum3_f16_fneg2: +; GFX1170-TRUE16: ; %bb.0: +; GFX1170-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1170-TRUE16-NEXT: v_maximum3_f16 v0.l, v0.l, v1.l, -v2.l +; GFX1170-TRUE16-NEXT: s_setpc_b64 s[30:31] +; +; GFX1170-FAKE16-LABEL: v_fmaximum3_f16_fneg2: +; GFX1170-FAKE16: ; %bb.0: +; GFX1170-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1170-FAKE16-NEXT: v_maximum3_f16 v0, v0, v1, -v2 +; GFX1170-FAKE16-NEXT: s_setpc_b64 s[30:31] +; ; GFX942-LABEL: v_fmaximum3_f16_fneg2: ; GFX942: ; %bb.0: ; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -1849,6 +2199,18 @@ define half @v_fmaximum3_f16_const0(half %b, half %c) { ; GFX12-FAKE16-NEXT: v_maximum3_f16 v0, v0, 0x4800, v1 ; GFX12-FAKE16-NEXT: s_setpc_b64 s[30:31] ; +; GFX1170-TRUE16-LABEL: v_fmaximum3_f16_const0: +; GFX1170-TRUE16: ; %bb.0: +; GFX1170-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1170-TRUE16-NEXT: v_maximum3_f16 v0.l, v0.l, 0x4800, v1.l +; GFX1170-TRUE16-NEXT: s_setpc_b64 s[30:31] +; +; GFX1170-FAKE16-LABEL: v_fmaximum3_f16_const0: +; GFX1170-FAKE16: ; %bb.0: +; GFX1170-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1170-FAKE16-NEXT: v_maximum3_f16 v0, v0, 0x4800, v1 +; GFX1170-FAKE16-NEXT: s_setpc_b64 s[30:31] +; ; GFX942-LABEL: v_fmaximum3_f16_const0: ; GFX942: ; %bb.0: ; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -1895,6 +2257,18 @@ define half @v_fmaximum3_f16__const2(half %a, half %b) { ; GFX12-FAKE16-NEXT: v_maximum3_f16 v0, v0, v1, 0x4800 ; GFX12-FAKE16-NEXT: s_setpc_b64 s[30:31] ; +; GFX1170-TRUE16-LABEL: v_fmaximum3_f16__const2: +; GFX1170-TRUE16: ; %bb.0: +; GFX1170-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1170-TRUE16-NEXT: v_maximum3_f16 v0.l, v0.l, v1.l, 0x4800 +; GFX1170-TRUE16-NEXT: s_setpc_b64 s[30:31] +; +; GFX1170-FAKE16-LABEL: v_fmaximum3_f16__const2: +; GFX1170-FAKE16: ; %bb.0: +; GFX1170-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1170-FAKE16-NEXT: v_maximum3_f16 v0, v0, v1, 0x4800 +; GFX1170-FAKE16-NEXT: s_setpc_b64 s[30:31] +; ; GFX942-LABEL: v_fmaximum3_f16__const2: ; GFX942: ; %bb.0: ; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -1941,6 +2315,18 @@ define half @v_fmaximum3_f16_inlineimm0(half %b, half %c) { ; GFX12-FAKE16-NEXT: v_maximum3_f16 v0, v0, 4.0, v1 ; GFX12-FAKE16-NEXT: s_setpc_b64 s[30:31] ; +; GFX1170-TRUE16-LABEL: v_fmaximum3_f16_inlineimm0: +; GFX1170-TRUE16: ; %bb.0: +; GFX1170-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1170-TRUE16-NEXT: v_maximum3_f16 v0.l, v0.l, 4.0, v1.l +; GFX1170-TRUE16-NEXT: s_setpc_b64 s[30:31] +; +; GFX1170-FAKE16-LABEL: v_fmaximum3_f16_inlineimm0: +; GFX1170-FAKE16: ; %bb.0: +; GFX1170-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1170-FAKE16-NEXT: v_maximum3_f16 v0, v0, 4.0, v1 +; GFX1170-FAKE16-NEXT: s_setpc_b64 s[30:31] +; ; GFX942-LABEL: v_fmaximum3_f16_inlineimm0: ; GFX942: ; %bb.0: ; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -1986,6 +2372,18 @@ define half @v_fmaximum3_f16__inlineimm(half %a, half %b) { ; GFX12-FAKE16-NEXT: v_maximum3_f16 v0, v0, v1, 4.0 ; GFX12-FAKE16-NEXT: s_setpc_b64 s[30:31] ; +; GFX1170-TRUE16-LABEL: v_fmaximum3_f16__inlineimm: +; GFX1170-TRUE16: ; %bb.0: +; GFX1170-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1170-TRUE16-NEXT: v_maximum3_f16 v0.l, v0.l, v1.l, 4.0 +; GFX1170-TRUE16-NEXT: s_setpc_b64 s[30:31] +; +; GFX1170-FAKE16-LABEL: v_fmaximum3_f16__inlineimm: +; GFX1170-FAKE16: ; %bb.0: +; GFX1170-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1170-FAKE16-NEXT: v_maximum3_f16 v0, v0, v1, 4.0 +; GFX1170-FAKE16-NEXT: s_setpc_b64 s[30:31] +; ; GFX942-LABEL: v_fmaximum3_f16__inlineimm: ; GFX942: ; %bb.0: ; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -2035,6 +2433,22 @@ define half @v_fmaximum3_f16_const1_const2(half %a) { ; GFX12-FAKE16-NEXT: v_maximum3_f16 v0, v0, s0, 0x4c00 ; GFX12-FAKE16-NEXT: s_setpc_b64 s[30:31] ; +; GFX1170-TRUE16-LABEL: v_fmaximum3_f16_const1_const2: +; GFX1170-TRUE16: ; %bb.0: +; GFX1170-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1170-TRUE16-NEXT: v_mov_b16_e32 v1.l, 0x4800 +; GFX1170-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX1170-TRUE16-NEXT: v_maximum3_f16 v0.l, v0.l, v1.l, 0x4c00 +; GFX1170-TRUE16-NEXT: s_setpc_b64 s[30:31] +; +; GFX1170-FAKE16-LABEL: v_fmaximum3_f16_const1_const2: +; GFX1170-FAKE16: ; %bb.0: +; GFX1170-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1170-FAKE16-NEXT: s_movk_i32 s0, 0x4800 +; GFX1170-FAKE16-NEXT: s_delay_alu instid0(SALU_CYCLE_1) +; GFX1170-FAKE16-NEXT: v_maximum3_f16 v0, v0, s0, 0x4c00 +; GFX1170-FAKE16-NEXT: s_setpc_b64 s[30:31] +; ; GFX942-LABEL: v_fmaximum3_f16_const1_const2: ; GFX942: ; %bb.0: ; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -2074,6 +2488,14 @@ define <2 x half> @v_fmaximum3_v2f16(<2 x half> %a, <2 x half> %b, <2 x half> %c ; GFX12-NEXT: v_pk_maximum_f16 v0, v2, v0 ; GFX12-NEXT: s_setpc_b64 s[30:31] ; +; GFX1170-LABEL: v_fmaximum3_v2f16: +; GFX1170: ; %bb.0: +; GFX1170-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1170-NEXT: v_pk_maximum_f16 v0, v0, v1 +; GFX1170-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX1170-NEXT: v_pk_maximum_f16 v0, v2, v0 +; GFX1170-NEXT: s_setpc_b64 s[30:31] +; ; GFX942-LABEL: v_fmaximum3_v2f16: ; GFX942: ; %bb.0: ; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -2120,6 +2542,14 @@ define <2 x half> @v_fmaximum3_v2f16_commute(<2 x half> %a, <2 x half> %b, <2 x ; GFX12-NEXT: v_pk_maximum_f16 v0, v0, v2 ; GFX12-NEXT: s_setpc_b64 s[30:31] ; +; GFX1170-LABEL: v_fmaximum3_v2f16_commute: +; GFX1170: ; %bb.0: +; GFX1170-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1170-NEXT: v_pk_maximum_f16 v0, v0, v1 +; GFX1170-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX1170-NEXT: v_pk_maximum_f16 v0, v0, v2 +; GFX1170-NEXT: s_setpc_b64 s[30:31] +; ; GFX942-LABEL: v_fmaximum3_v2f16_commute: ; GFX942: ; %bb.0: ; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -2169,6 +2599,17 @@ define <2 x half> @v_fmaximum3_v2f16__fabs_all(<2 x half> %a, <2 x half> %b, <2 ; GFX12-NEXT: v_pk_maximum_f16 v0, v0, v2 ; GFX12-NEXT: s_setpc_b64 s[30:31] ; +; GFX1170-LABEL: v_fmaximum3_v2f16__fabs_all: +; GFX1170: ; %bb.0: +; GFX1170-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1170-NEXT: v_and_b32_e32 v0, 0x7fff7fff, v0 +; GFX1170-NEXT: v_and_b32_e32 v1, 0x7fff7fff, v1 +; GFX1170-NEXT: v_and_b32_e32 v2, 0x7fff7fff, v2 +; GFX1170-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX1170-NEXT: v_pk_maximum_f16 v0, v0, v1 +; GFX1170-NEXT: v_pk_maximum_f16 v0, v0, v2 +; GFX1170-NEXT: s_setpc_b64 s[30:31] +; ; GFX942-LABEL: v_fmaximum3_v2f16__fabs_all: ; GFX942: ; %bb.0: ; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -2222,6 +2663,14 @@ define <2 x half> @v_fmaximum3_v2f16__fneg_all(<2 x half> %a, <2 x half> %b, <2 ; GFX12-NEXT: v_pk_maximum_f16 v0, v0, v2 neg_lo:[0,1] neg_hi:[0,1] ; GFX12-NEXT: s_setpc_b64 s[30:31] ; +; GFX1170-LABEL: v_fmaximum3_v2f16__fneg_all: +; GFX1170: ; %bb.0: +; GFX1170-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1170-NEXT: v_pk_maximum_f16 v0, v0, v1 neg_lo:[1,1] neg_hi:[1,1] +; GFX1170-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX1170-NEXT: v_pk_maximum_f16 v0, v0, v2 neg_lo:[0,1] neg_hi:[0,1] +; GFX1170-NEXT: s_setpc_b64 s[30:31] +; ; GFX942-LABEL: v_fmaximum3_v2f16__fneg_all: ; GFX942: ; %bb.0: ; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -2271,6 +2720,14 @@ define <2 x half> @v_fmaximum3_v2f16__inlineimm1(<2 x half> %a, <2 x half> %c) { ; GFX12-NEXT: v_pk_maximum_f16 v0, v0, v1 ; GFX12-NEXT: s_setpc_b64 s[30:31] ; +; GFX1170-LABEL: v_fmaximum3_v2f16__inlineimm1: +; GFX1170: ; %bb.0: +; GFX1170-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1170-NEXT: v_pk_maximum_f16 v0, v0, 2.0 op_sel_hi:[1,0] +; GFX1170-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX1170-NEXT: v_pk_maximum_f16 v0, v0, v1 +; GFX1170-NEXT: s_setpc_b64 s[30:31] +; ; GFX942-LABEL: v_fmaximum3_v2f16__inlineimm1: ; GFX942: ; %bb.0: ; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -2316,6 +2773,14 @@ define <2 x half> @v_fmaximum3_v2f16__inlineimm2(<2 x half> %a, <2 x half> %b) { ; GFX12-NEXT: v_pk_maximum_f16 v0, v0, 4.0 op_sel_hi:[1,0] ; GFX12-NEXT: s_setpc_b64 s[30:31] ; +; GFX1170-LABEL: v_fmaximum3_v2f16__inlineimm2: +; GFX1170: ; %bb.0: +; GFX1170-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1170-NEXT: v_pk_maximum_f16 v0, v0, v1 +; GFX1170-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX1170-NEXT: v_pk_maximum_f16 v0, v0, 4.0 op_sel_hi:[1,0] +; GFX1170-NEXT: s_setpc_b64 s[30:31] +; ; GFX942-LABEL: v_fmaximum3_v2f16__inlineimm2: ; GFX942: ; %bb.0: ; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -2364,6 +2829,16 @@ define <3 x half> @v_fmaximum3_v3f16(<3 x half> %a, <3 x half> %b, <3 x half> %c ; GFX12-NEXT: v_pk_maximum_f16 v1, v5, v1 ; GFX12-NEXT: s_setpc_b64 s[30:31] ; +; GFX1170-LABEL: v_fmaximum3_v3f16: +; GFX1170: ; %bb.0: +; GFX1170-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1170-NEXT: v_pk_maximum_f16 v0, v0, v2 +; GFX1170-NEXT: v_pk_maximum_f16 v1, v1, v3 +; GFX1170-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) +; GFX1170-NEXT: v_pk_maximum_f16 v0, v4, v0 +; GFX1170-NEXT: v_pk_maximum_f16 v1, v5, v1 +; GFX1170-NEXT: s_setpc_b64 s[30:31] +; ; GFX942-LABEL: v_fmaximum3_v3f16: ; GFX942: ; %bb.0: ; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -2426,6 +2901,16 @@ define <3 x half> @v_fmaximum3_v3f16_commute(<3 x half> %a, <3 x half> %b, <3 x ; GFX12-NEXT: v_pk_maximum_f16 v1, v1, v5 ; GFX12-NEXT: s_setpc_b64 s[30:31] ; +; GFX1170-LABEL: v_fmaximum3_v3f16_commute: +; GFX1170: ; %bb.0: +; GFX1170-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1170-NEXT: v_pk_maximum_f16 v0, v0, v2 +; GFX1170-NEXT: v_pk_maximum_f16 v1, v1, v3 +; GFX1170-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) +; GFX1170-NEXT: v_pk_maximum_f16 v0, v0, v4 +; GFX1170-NEXT: v_pk_maximum_f16 v1, v1, v5 +; GFX1170-NEXT: s_setpc_b64 s[30:31] +; ; GFX942-LABEL: v_fmaximum3_v3f16_commute: ; GFX942: ; %bb.0: ; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -2495,6 +2980,23 @@ define <3 x half> @v_fmaximum3_v3f16__fabs_all(<3 x half> %a, <3 x half> %b, <3 ; GFX12-NEXT: v_pk_maximum_f16 v1, v1, v5 ; GFX12-NEXT: s_setpc_b64 s[30:31] ; +; GFX1170-LABEL: v_fmaximum3_v3f16__fabs_all: +; GFX1170: ; %bb.0: +; GFX1170-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1170-NEXT: v_and_b32_e32 v0, 0x7fff7fff, v0 +; GFX1170-NEXT: v_and_b32_e32 v1, 0x7fff7fff, v1 +; GFX1170-NEXT: v_and_b32_e32 v2, 0x7fff7fff, v2 +; GFX1170-NEXT: v_and_b32_e32 v3, 0x7fff7fff, v3 +; GFX1170-NEXT: v_and_b32_e32 v5, 0x7fff7fff, v5 +; GFX1170-NEXT: v_and_b32_e32 v4, 0x7fff7fff, v4 +; GFX1170-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4) +; GFX1170-NEXT: v_pk_maximum_f16 v0, v0, v2 +; GFX1170-NEXT: v_pk_maximum_f16 v1, v1, v3 +; GFX1170-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) +; GFX1170-NEXT: v_pk_maximum_f16 v0, v0, v4 +; GFX1170-NEXT: v_pk_maximum_f16 v1, v1, v5 +; GFX1170-NEXT: s_setpc_b64 s[30:31] +; ; GFX942-LABEL: v_fmaximum3_v3f16__fabs_all: ; GFX942: ; %bb.0: ; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -2571,6 +3073,16 @@ define <3 x half> @v_fmaximum3_v3f16__fneg_all(<3 x half> %a, <3 x half> %b, <3 ; GFX12-NEXT: v_pk_maximum_f16 v1, v1, v5 neg_lo:[0,1] neg_hi:[0,1] ; GFX12-NEXT: s_setpc_b64 s[30:31] ; +; GFX1170-LABEL: v_fmaximum3_v3f16__fneg_all: +; GFX1170: ; %bb.0: +; GFX1170-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1170-NEXT: v_pk_maximum_f16 v0, v0, v2 neg_lo:[1,1] neg_hi:[1,1] +; GFX1170-NEXT: v_pk_maximum_f16 v1, v1, v3 neg_lo:[1,1] neg_hi:[1,1] +; GFX1170-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) +; GFX1170-NEXT: v_pk_maximum_f16 v0, v0, v4 neg_lo:[0,1] neg_hi:[0,1] +; GFX1170-NEXT: v_pk_maximum_f16 v1, v1, v5 neg_lo:[0,1] neg_hi:[0,1] +; GFX1170-NEXT: s_setpc_b64 s[30:31] +; ; GFX942-LABEL: v_fmaximum3_v3f16__fneg_all: ; GFX942: ; %bb.0: ; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -2636,6 +3148,16 @@ define <3 x half> @v_fmaximum3_v3f16__inlineimm1(<3 x half> %a, <3 x half> %c) { ; GFX12-NEXT: v_pk_maximum_f16 v1, v1, v3 ; GFX12-NEXT: s_setpc_b64 s[30:31] ; +; GFX1170-LABEL: v_fmaximum3_v3f16__inlineimm1: +; GFX1170: ; %bb.0: +; GFX1170-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1170-NEXT: v_pk_maximum_f16 v0, v0, 2.0 op_sel_hi:[1,0] +; GFX1170-NEXT: v_pk_maximum_f16 v1, v1, 2.0 +; GFX1170-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) +; GFX1170-NEXT: v_pk_maximum_f16 v0, v0, v2 +; GFX1170-NEXT: v_pk_maximum_f16 v1, v1, v3 +; GFX1170-NEXT: s_setpc_b64 s[30:31] +; ; GFX942-LABEL: v_fmaximum3_v3f16__inlineimm1: ; GFX942: ; %bb.0: ; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -2693,6 +3215,16 @@ define <3 x half> @v_fmaximum3_v3f16__inlineimm2(<3 x half> %a, <3 x half> %b) { ; GFX12-NEXT: v_pk_maximum_f16 v1, v1, 4.0 ; GFX12-NEXT: s_setpc_b64 s[30:31] ; +; GFX1170-LABEL: v_fmaximum3_v3f16__inlineimm2: +; GFX1170: ; %bb.0: +; GFX1170-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1170-NEXT: v_pk_maximum_f16 v0, v0, v2 +; GFX1170-NEXT: v_pk_maximum_f16 v1, v1, v3 +; GFX1170-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) +; GFX1170-NEXT: v_pk_maximum_f16 v0, v0, 4.0 op_sel_hi:[1,0] +; GFX1170-NEXT: v_pk_maximum_f16 v1, v1, 4.0 +; GFX1170-NEXT: s_setpc_b64 s[30:31] +; ; GFX942-LABEL: v_fmaximum3_v3f16__inlineimm2: ; GFX942: ; %bb.0: ; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -2755,6 +3287,16 @@ define <4 x half> @v_fmaximum3_v4f16(<4 x half> %a, <4 x half> %b, <4 x half> %c ; GFX12-NEXT: v_pk_maximum_f16 v1, v5, v1 ; GFX12-NEXT: s_setpc_b64 s[30:31] ; +; GFX1170-LABEL: v_fmaximum3_v4f16: +; GFX1170: ; %bb.0: +; GFX1170-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1170-NEXT: v_pk_maximum_f16 v0, v0, v2 +; GFX1170-NEXT: v_pk_maximum_f16 v1, v1, v3 +; GFX1170-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) +; GFX1170-NEXT: v_pk_maximum_f16 v0, v4, v0 +; GFX1170-NEXT: v_pk_maximum_f16 v1, v5, v1 +; GFX1170-NEXT: s_setpc_b64 s[30:31] +; ; GFX942-LABEL: v_fmaximum3_v4f16: ; GFX942: ; %bb.0: ; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -2820,6 +3362,16 @@ define <4 x half> @v_fmaximum3_v4f16_commute(<4 x half> %a, <4 x half> %b, <4 x ; GFX12-NEXT: v_pk_maximum_f16 v1, v1, v5 ; GFX12-NEXT: s_setpc_b64 s[30:31] ; +; GFX1170-LABEL: v_fmaximum3_v4f16_commute: +; GFX1170: ; %bb.0: +; GFX1170-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1170-NEXT: v_pk_maximum_f16 v0, v0, v2 +; GFX1170-NEXT: v_pk_maximum_f16 v1, v1, v3 +; GFX1170-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) +; GFX1170-NEXT: v_pk_maximum_f16 v0, v0, v4 +; GFX1170-NEXT: v_pk_maximum_f16 v1, v1, v5 +; GFX1170-NEXT: s_setpc_b64 s[30:31] +; ; GFX942-LABEL: v_fmaximum3_v4f16_commute: ; GFX942: ; %bb.0: ; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -2892,6 +3444,23 @@ define <4 x half> @v_fmaximum3_v4f16__fabs_all(<4 x half> %a, <4 x half> %b, <4 ; GFX12-NEXT: v_pk_maximum_f16 v1, v1, v5 ; GFX12-NEXT: s_setpc_b64 s[30:31] ; +; GFX1170-LABEL: v_fmaximum3_v4f16__fabs_all: +; GFX1170: ; %bb.0: +; GFX1170-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1170-NEXT: v_and_b32_e32 v0, 0x7fff7fff, v0 +; GFX1170-NEXT: v_and_b32_e32 v1, 0x7fff7fff, v1 +; GFX1170-NEXT: v_and_b32_e32 v2, 0x7fff7fff, v2 +; GFX1170-NEXT: v_and_b32_e32 v3, 0x7fff7fff, v3 +; GFX1170-NEXT: v_and_b32_e32 v5, 0x7fff7fff, v5 +; GFX1170-NEXT: v_and_b32_e32 v4, 0x7fff7fff, v4 +; GFX1170-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4) +; GFX1170-NEXT: v_pk_maximum_f16 v0, v0, v2 +; GFX1170-NEXT: v_pk_maximum_f16 v1, v1, v3 +; GFX1170-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) +; GFX1170-NEXT: v_pk_maximum_f16 v0, v0, v4 +; GFX1170-NEXT: v_pk_maximum_f16 v1, v1, v5 +; GFX1170-NEXT: s_setpc_b64 s[30:31] +; ; GFX942-LABEL: v_fmaximum3_v4f16__fabs_all: ; GFX942: ; %bb.0: ; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -2969,6 +3538,16 @@ define <4 x half> @v_fmaximum3_v4f16__fneg_all(<4 x half> %a, <4 x half> %b, <4 ; GFX12-NEXT: v_pk_maximum_f16 v1, v1, v5 neg_lo:[0,1] neg_hi:[0,1] ; GFX12-NEXT: s_setpc_b64 s[30:31] ; +; GFX1170-LABEL: v_fmaximum3_v4f16__fneg_all: +; GFX1170: ; %bb.0: +; GFX1170-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1170-NEXT: v_pk_maximum_f16 v0, v0, v2 neg_lo:[1,1] neg_hi:[1,1] +; GFX1170-NEXT: v_pk_maximum_f16 v1, v1, v3 neg_lo:[1,1] neg_hi:[1,1] +; GFX1170-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) +; GFX1170-NEXT: v_pk_maximum_f16 v0, v0, v4 neg_lo:[0,1] neg_hi:[0,1] +; GFX1170-NEXT: v_pk_maximum_f16 v1, v1, v5 neg_lo:[0,1] neg_hi:[0,1] +; GFX1170-NEXT: s_setpc_b64 s[30:31] +; ; GFX942-LABEL: v_fmaximum3_v4f16__fneg_all: ; GFX942: ; %bb.0: ; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -3037,6 +3616,16 @@ define <4 x half> @v_fmaximum3_v4f16__inlineimm1(<4 x half> %a, <4 x half> %c) { ; GFX12-NEXT: v_pk_maximum_f16 v1, v1, v3 ; GFX12-NEXT: s_setpc_b64 s[30:31] ; +; GFX1170-LABEL: v_fmaximum3_v4f16__inlineimm1: +; GFX1170: ; %bb.0: +; GFX1170-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1170-NEXT: v_pk_maximum_f16 v0, v0, 2.0 op_sel_hi:[1,0] +; GFX1170-NEXT: v_pk_maximum_f16 v1, v1, 2.0 op_sel_hi:[1,0] +; GFX1170-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) +; GFX1170-NEXT: v_pk_maximum_f16 v0, v0, v2 +; GFX1170-NEXT: v_pk_maximum_f16 v1, v1, v3 +; GFX1170-NEXT: s_setpc_b64 s[30:31] +; ; GFX942-LABEL: v_fmaximum3_v4f16__inlineimm1: ; GFX942: ; %bb.0: ; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -3099,6 +3688,16 @@ define <4 x half> @v_fmaximum3_v4f16__inlineimm2(<4 x half> %a, <4 x half> %b) { ; GFX12-NEXT: v_pk_maximum_f16 v1, v1, 4.0 op_sel_hi:[1,0] ; GFX12-NEXT: s_setpc_b64 s[30:31] ; +; GFX1170-LABEL: v_fmaximum3_v4f16__inlineimm2: +; GFX1170: ; %bb.0: +; GFX1170-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1170-NEXT: v_pk_maximum_f16 v0, v0, v2 +; GFX1170-NEXT: v_pk_maximum_f16 v1, v1, v3 +; GFX1170-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) +; GFX1170-NEXT: v_pk_maximum_f16 v0, v0, 4.0 op_sel_hi:[1,0] +; GFX1170-NEXT: v_pk_maximum_f16 v1, v1, 4.0 op_sel_hi:[1,0] +; GFX1170-NEXT: s_setpc_b64 s[30:31] +; ; GFX942-LABEL: v_fmaximum3_v4f16__inlineimm2: ; GFX942: ; %bb.0: ; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -3162,6 +3761,14 @@ define double @v_fmaximum3_f64(double %a, double %b, double %c) { ; GFX12-NEXT: v_maximum_f64 v[0:1], v[0:1], v[4:5] ; GFX12-NEXT: s_setpc_b64 s[30:31] ; +; GFX1170-LABEL: v_fmaximum3_f64: +; GFX1170: ; %bb.0: +; GFX1170-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1170-NEXT: v_maximum_f64 v[0:1], v[0:1], v[2:3] +; GFX1170-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX1170-NEXT: v_maximum_f64 v[0:1], v[0:1], v[4:5] +; GFX1170-NEXT: s_setpc_b64 s[30:31] +; ; GFX9-LABEL: v_fmaximum3_f64: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -3195,6 +3802,14 @@ define double @v_fmaximum3_f64_commute(double %a, double %b, double %c) { ; GFX12-NEXT: v_maximum_f64 v[0:1], v[4:5], v[0:1] ; GFX12-NEXT: s_setpc_b64 s[30:31] ; +; GFX1170-LABEL: v_fmaximum3_f64_commute: +; GFX1170: ; %bb.0: +; GFX1170-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1170-NEXT: v_maximum_f64 v[0:1], v[0:1], v[2:3] +; GFX1170-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX1170-NEXT: v_maximum_f64 v[0:1], v[4:5], v[0:1] +; GFX1170-NEXT: s_setpc_b64 s[30:31] +; ; GFX9-LABEL: v_fmaximum3_f64_commute: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -3227,6 +3842,16 @@ define amdgpu_ps <2 x i32> @s_fmaximum3_f64(double inreg %a, double inreg %b, do ; GFX12-NEXT: s_wait_alu depctr_va_sdst(0) ; GFX12-NEXT: ; return to shader part epilog ; +; GFX1170-LABEL: s_fmaximum3_f64: +; GFX1170: ; %bb.0: +; GFX1170-NEXT: v_maximum_f64 v[0:1], s[0:1], s[2:3] +; GFX1170-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX1170-NEXT: v_maximum_f64 v[0:1], v[0:1], s[4:5] +; GFX1170-NEXT: v_readfirstlane_b32 s0, v0 +; GFX1170-NEXT: s_delay_alu instid0(VALU_DEP_2) +; GFX1170-NEXT: v_readfirstlane_b32 s1, v1 +; GFX1170-NEXT: ; return to shader part epilog +; ; GFX9-LABEL: s_fmaximum3_f64: ; GFX9: ; %bb.0: ; GFX9-NEXT: v_mov_b64_e32 v[0:1], s[2:3] @@ -3269,6 +3894,14 @@ define double @v_fmaximum3_f64_fabs0(double %a, double %b, double %c) { ; GFX12-NEXT: v_maximum_f64 v[0:1], v[0:1], v[4:5] ; GFX12-NEXT: s_setpc_b64 s[30:31] ; +; GFX1170-LABEL: v_fmaximum3_f64_fabs0: +; GFX1170: ; %bb.0: +; GFX1170-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1170-NEXT: v_maximum_f64 v[0:1], |v[0:1]|, v[2:3] +; GFX1170-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX1170-NEXT: v_maximum_f64 v[0:1], v[0:1], v[4:5] +; GFX1170-NEXT: s_setpc_b64 s[30:31] +; ; GFX9-LABEL: v_fmaximum3_f64_fabs0: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -3303,6 +3936,14 @@ define double @v_fmaximum3_f64_fabs1(double %a, double %b, double %c) { ; GFX12-NEXT: v_maximum_f64 v[0:1], v[0:1], v[4:5] ; GFX12-NEXT: s_setpc_b64 s[30:31] ; +; GFX1170-LABEL: v_fmaximum3_f64_fabs1: +; GFX1170: ; %bb.0: +; GFX1170-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1170-NEXT: v_maximum_f64 v[0:1], v[0:1], |v[2:3]| +; GFX1170-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX1170-NEXT: v_maximum_f64 v[0:1], v[0:1], v[4:5] +; GFX1170-NEXT: s_setpc_b64 s[30:31] +; ; GFX9-LABEL: v_fmaximum3_f64_fabs1: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -3337,6 +3978,14 @@ define double @v_fmaximum3_f64_fabs2(double %a, double %b, double %c) { ; GFX12-NEXT: v_maximum_f64 v[0:1], v[0:1], |v[4:5]| ; GFX12-NEXT: s_setpc_b64 s[30:31] ; +; GFX1170-LABEL: v_fmaximum3_f64_fabs2: +; GFX1170: ; %bb.0: +; GFX1170-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1170-NEXT: v_maximum_f64 v[0:1], v[0:1], v[2:3] +; GFX1170-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX1170-NEXT: v_maximum_f64 v[0:1], v[0:1], |v[4:5]| +; GFX1170-NEXT: s_setpc_b64 s[30:31] +; ; GFX9-LABEL: v_fmaximum3_f64_fabs2: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -3371,6 +4020,14 @@ define double @v_fmaximum3_f64_fabs_all(double %a, double %b, double %c) { ; GFX12-NEXT: v_maximum_f64 v[0:1], v[0:1], |v[4:5]| ; GFX12-NEXT: s_setpc_b64 s[30:31] ; +; GFX1170-LABEL: v_fmaximum3_f64_fabs_all: +; GFX1170: ; %bb.0: +; GFX1170-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1170-NEXT: v_maximum_f64 v[0:1], |v[0:1]|, |v[2:3]| +; GFX1170-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX1170-NEXT: v_maximum_f64 v[0:1], v[0:1], |v[4:5]| +; GFX1170-NEXT: s_setpc_b64 s[30:31] +; ; GFX9-LABEL: v_fmaximum3_f64_fabs_all: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -3407,6 +4064,14 @@ define double @v_fmaximum3_f64_fneg_all(double %a, double %b, double %c) { ; GFX12-NEXT: v_maximum_f64 v[0:1], v[0:1], -v[4:5] ; GFX12-NEXT: s_setpc_b64 s[30:31] ; +; GFX1170-LABEL: v_fmaximum3_f64_fneg_all: +; GFX1170: ; %bb.0: +; GFX1170-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1170-NEXT: v_maximum_f64 v[0:1], -v[0:1], -v[2:3] +; GFX1170-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX1170-NEXT: v_maximum_f64 v[0:1], v[0:1], -v[4:5] +; GFX1170-NEXT: s_setpc_b64 s[30:31] +; ; GFX9-LABEL: v_fmaximum3_f64_fneg_all: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -3443,6 +4108,14 @@ define double @v_fmaximum3_f64_fneg_fabs_all(double %a, double %b, double %c) { ; GFX12-NEXT: v_maximum_f64 v[0:1], v[0:1], -|v[4:5]| ; GFX12-NEXT: s_setpc_b64 s[30:31] ; +; GFX1170-LABEL: v_fmaximum3_f64_fneg_fabs_all: +; GFX1170: ; %bb.0: +; GFX1170-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1170-NEXT: v_maximum_f64 v[0:1], -|v[0:1]|, -|v[2:3]| +; GFX1170-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX1170-NEXT: v_maximum_f64 v[0:1], v[0:1], -|v[4:5]| +; GFX1170-NEXT: s_setpc_b64 s[30:31] +; ; GFX9-LABEL: v_fmaximum3_f64_fneg_fabs_all: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -3482,6 +4155,14 @@ define double @v_fmaximum3_f64_fneg0(double %a, double %b, double %c) { ; GFX12-NEXT: v_maximum_f64 v[0:1], v[0:1], v[4:5] ; GFX12-NEXT: s_setpc_b64 s[30:31] ; +; GFX1170-LABEL: v_fmaximum3_f64_fneg0: +; GFX1170: ; %bb.0: +; GFX1170-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1170-NEXT: v_maximum_f64 v[0:1], -v[0:1], v[2:3] +; GFX1170-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX1170-NEXT: v_maximum_f64 v[0:1], v[0:1], v[4:5] +; GFX1170-NEXT: s_setpc_b64 s[30:31] +; ; GFX9-LABEL: v_fmaximum3_f64_fneg0: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -3516,6 +4197,14 @@ define double @v_fmaximum3_f64_fneg1(double %a, double %b, double %c) { ; GFX12-NEXT: v_maximum_f64 v[0:1], v[0:1], v[4:5] ; GFX12-NEXT: s_setpc_b64 s[30:31] ; +; GFX1170-LABEL: v_fmaximum3_f64_fneg1: +; GFX1170: ; %bb.0: +; GFX1170-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1170-NEXT: v_maximum_f64 v[0:1], v[0:1], -v[2:3] +; GFX1170-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX1170-NEXT: v_maximum_f64 v[0:1], v[0:1], v[4:5] +; GFX1170-NEXT: s_setpc_b64 s[30:31] +; ; GFX9-LABEL: v_fmaximum3_f64_fneg1: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -3550,6 +4239,14 @@ define double @v_fmaximum3_f64_fneg2(double %a, double %b, double %c) { ; GFX12-NEXT: v_maximum_f64 v[0:1], v[0:1], -v[4:5] ; GFX12-NEXT: s_setpc_b64 s[30:31] ; +; GFX1170-LABEL: v_fmaximum3_f64_fneg2: +; GFX1170: ; %bb.0: +; GFX1170-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1170-NEXT: v_maximum_f64 v[0:1], v[0:1], v[2:3] +; GFX1170-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX1170-NEXT: v_maximum_f64 v[0:1], v[0:1], -v[4:5] +; GFX1170-NEXT: s_setpc_b64 s[30:31] +; ; GFX9-LABEL: v_fmaximum3_f64_fneg2: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -3584,6 +4281,14 @@ define double @v_fmaximum3_f64_const0(double %b, double %c) { ; GFX12-NEXT: v_maximum_f64 v[0:1], v[0:1], v[2:3] ; GFX12-NEXT: s_setpc_b64 s[30:31] ; +; GFX1170-LABEL: v_fmaximum3_f64_const0: +; GFX1170: ; %bb.0: +; GFX1170-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1170-NEXT: v_maximum_f64 v[0:1], 0x40200000, v[0:1] +; GFX1170-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX1170-NEXT: v_maximum_f64 v[0:1], v[0:1], v[2:3] +; GFX1170-NEXT: s_setpc_b64 s[30:31] +; ; GFX9-LABEL: v_fmaximum3_f64_const0: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -3619,6 +4324,14 @@ define double @v_fmaximum3_f64__const2(double %a, double %b) { ; GFX12-NEXT: v_maximum_f64 v[0:1], 0x40200000, v[0:1] ; GFX12-NEXT: s_setpc_b64 s[30:31] ; +; GFX1170-LABEL: v_fmaximum3_f64__const2: +; GFX1170: ; %bb.0: +; GFX1170-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1170-NEXT: v_maximum_f64 v[0:1], v[0:1], v[2:3] +; GFX1170-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX1170-NEXT: v_maximum_f64 v[0:1], 0x40200000, v[0:1] +; GFX1170-NEXT: s_setpc_b64 s[30:31] +; ; GFX9-LABEL: v_fmaximum3_f64__const2: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -3653,6 +4366,14 @@ define double @v_fmaximum3_f64_inlineimm0(double %b, double %c) { ; GFX12-NEXT: v_maximum_f64 v[0:1], v[0:1], v[2:3] ; GFX12-NEXT: s_setpc_b64 s[30:31] ; +; GFX1170-LABEL: v_fmaximum3_f64_inlineimm0: +; GFX1170: ; %bb.0: +; GFX1170-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1170-NEXT: v_maximum_f64 v[0:1], v[0:1], 4.0 +; GFX1170-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX1170-NEXT: v_maximum_f64 v[0:1], v[0:1], v[2:3] +; GFX1170-NEXT: s_setpc_b64 s[30:31] +; ; GFX9-LABEL: v_fmaximum3_f64_inlineimm0: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -3686,6 +4407,14 @@ define double @v_fmaximum3_f64__inlineimm(double %a, double %b) { ; GFX12-NEXT: v_maximum_f64 v[0:1], v[0:1], 4.0 ; GFX12-NEXT: s_setpc_b64 s[30:31] ; +; GFX1170-LABEL: v_fmaximum3_f64__inlineimm: +; GFX1170: ; %bb.0: +; GFX1170-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1170-NEXT: v_maximum_f64 v[0:1], v[0:1], v[2:3] +; GFX1170-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX1170-NEXT: v_maximum_f64 v[0:1], v[0:1], 4.0 +; GFX1170-NEXT: s_setpc_b64 s[30:31] +; ; GFX9-LABEL: v_fmaximum3_f64__inlineimm: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -3719,6 +4448,14 @@ define double @v_fmaximum3_f64_const1_const2(double %a) { ; GFX12-NEXT: v_maximum_f64 v[0:1], 0x40300000, v[0:1] ; GFX12-NEXT: s_setpc_b64 s[30:31] ; +; GFX1170-LABEL: v_fmaximum3_f64_const1_const2: +; GFX1170: ; %bb.0: +; GFX1170-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1170-NEXT: v_maximum_f64 v[0:1], 0x40200000, v[0:1] +; GFX1170-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX1170-NEXT: v_maximum_f64 v[0:1], 0x40300000, v[0:1] +; GFX1170-NEXT: s_setpc_b64 s[30:31] +; ; GFX9-LABEL: v_fmaximum3_f64_const1_const2: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -3755,6 +4492,14 @@ define <2 x float> @v_no_fmaximum3_f32__multi_use(float %a, float %b, float %c) ; GFX12-NEXT: v_maximum_f32 v1, v0, v2 ; GFX12-NEXT: s_setpc_b64 s[30:31] ; +; GFX1170-LABEL: v_no_fmaximum3_f32__multi_use: +; GFX1170: ; %bb.0: +; GFX1170-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1170-NEXT: v_maximum_f32 v0, v0, v1 +; GFX1170-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX1170-NEXT: v_maximum_f32 v1, v0, v2 +; GFX1170-NEXT: s_setpc_b64 s[30:31] +; ; GFX942-LABEL: v_no_fmaximum3_f32__multi_use: ; GFX942: ; %bb.0: ; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -3790,6 +4535,15 @@ define amdgpu_ps <2 x i32> @s_no_fmaximum3_f32__multi_use(float inreg %a, float ; GFX12-NEXT: s_maximum_f32 s1, s0, s2 ; GFX12-NEXT: ; return to shader part epilog ; +; GFX1170-LABEL: s_no_fmaximum3_f32__multi_use: +; GFX1170: ; %bb.0: +; GFX1170-NEXT: v_maximum_f32 v0, s0, s1 +; GFX1170-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2) +; GFX1170-NEXT: v_maximum_f32 v1, v0, s2 +; GFX1170-NEXT: v_readfirstlane_b32 s0, v0 +; GFX1170-NEXT: v_readfirstlane_b32 s1, v1 +; GFX1170-NEXT: ; return to shader part epilog +; ; GFX942-LABEL: s_no_fmaximum3_f32__multi_use: ; GFX942: ; %bb.0: ; GFX942-NEXT: v_mov_b32_e32 v0, s1 @@ -3852,6 +4606,23 @@ define <2 x half> @v_no_fmaximum3_f16__multi_use(half %a, half %b, half %c) { ; GFX12-FAKE16-NEXT: v_pack_b32_f16 v0, v0, v1 ; GFX12-FAKE16-NEXT: s_setpc_b64 s[30:31] ; +; GFX1170-TRUE16-LABEL: v_no_fmaximum3_f16__multi_use: +; GFX1170-TRUE16: ; %bb.0: +; GFX1170-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1170-TRUE16-NEXT: v_maximum_f16 v0.l, v0.l, v1.l +; GFX1170-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX1170-TRUE16-NEXT: v_maximum_f16 v0.h, v0.l, v2.l +; GFX1170-TRUE16-NEXT: s_setpc_b64 s[30:31] +; +; GFX1170-FAKE16-LABEL: v_no_fmaximum3_f16__multi_use: +; GFX1170-FAKE16: ; %bb.0: +; GFX1170-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1170-FAKE16-NEXT: v_maximum_f16 v0, v0, v1 +; GFX1170-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX1170-FAKE16-NEXT: v_maximum_f16 v1, v0, v2 +; GFX1170-FAKE16-NEXT: v_pack_b32_f16 v0, v0, v1 +; GFX1170-FAKE16-NEXT: s_setpc_b64 s[30:31] +; ; GFX942-LABEL: v_no_fmaximum3_f16__multi_use: ; GFX942: ; %bb.0: ; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -3893,6 +4664,30 @@ define amdgpu_ps <2 x i32> @s_no_fmaximum3_f16__multi_use(half inreg %a, half in ; GFX12-NEXT: s_and_b32 s1, 0xffff, s1 ; GFX12-NEXT: ; return to shader part epilog ; +; GFX1170-TRUE16-LABEL: s_no_fmaximum3_f16__multi_use: +; GFX1170-TRUE16: ; %bb.0: +; GFX1170-TRUE16-NEXT: v_maximum_f16 v0.l, s0, s1 +; GFX1170-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2) +; GFX1170-TRUE16-NEXT: v_maximum_f16 v1.l, v0.l, s2 +; GFX1170-TRUE16-NEXT: v_and_b32_e32 v0, 0xffff, v0 +; GFX1170-TRUE16-NEXT: v_and_b32_e32 v1, 0xffff, v1 +; GFX1170-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) +; GFX1170-TRUE16-NEXT: v_readfirstlane_b32 s0, v0 +; GFX1170-TRUE16-NEXT: v_readfirstlane_b32 s1, v1 +; GFX1170-TRUE16-NEXT: ; return to shader part epilog +; +; GFX1170-FAKE16-LABEL: s_no_fmaximum3_f16__multi_use: +; GFX1170-FAKE16: ; %bb.0: +; GFX1170-FAKE16-NEXT: v_maximum_f16 v0, s0, s1 +; GFX1170-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2) +; GFX1170-FAKE16-NEXT: v_maximum_f16 v1, v0, s2 +; GFX1170-FAKE16-NEXT: v_and_b32_e32 v0, 0xffff, v0 +; GFX1170-FAKE16-NEXT: v_and_b32_e32 v1, 0xffff, v1 +; GFX1170-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) +; GFX1170-FAKE16-NEXT: v_readfirstlane_b32 s0, v0 +; GFX1170-FAKE16-NEXT: v_readfirstlane_b32 s1, v1 +; GFX1170-FAKE16-NEXT: ; return to shader part epilog +; ; GFX942-LABEL: s_no_fmaximum3_f16__multi_use: ; GFX942: ; %bb.0: ; GFX942-NEXT: v_mov_b32_e32 v0, s1 @@ -3948,6 +4743,14 @@ define <4 x half> @v_no_fmaximum3_v2f16__multi_use(<2 x half> %a, <2 x half> %b, ; GFX12-NEXT: v_pk_maximum_f16 v1, v0, v2 ; GFX12-NEXT: s_setpc_b64 s[30:31] ; +; GFX1170-LABEL: v_no_fmaximum3_v2f16__multi_use: +; GFX1170: ; %bb.0: +; GFX1170-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1170-NEXT: v_pk_maximum_f16 v0, v0, v1 +; GFX1170-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX1170-NEXT: v_pk_maximum_f16 v1, v0, v2 +; GFX1170-NEXT: s_setpc_b64 s[30:31] +; ; GFX942-LABEL: v_no_fmaximum3_v2f16__multi_use: ; GFX942: ; %bb.0: ; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -3997,6 +4800,14 @@ define <2 x double> @v_no_fmaximum3_f64__multi_use(double %a, double %b, double ; GFX12-NEXT: v_maximum_f64 v[2:3], v[0:1], v[4:5] ; GFX12-NEXT: s_setpc_b64 s[30:31] ; +; GFX1170-LABEL: v_no_fmaximum3_f64__multi_use: +; GFX1170: ; %bb.0: +; GFX1170-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1170-NEXT: v_maximum_f64 v[0:1], v[0:1], v[2:3] +; GFX1170-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX1170-NEXT: v_maximum_f64 v[2:3], v[0:1], v[4:5] +; GFX1170-NEXT: s_setpc_b64 s[30:31] +; ; GFX9-LABEL: v_no_fmaximum3_f64__multi_use: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) diff --git a/llvm/test/CodeGen/AMDGPU/fmin3.ll b/llvm/test/CodeGen/AMDGPU/fmin3.ll index 382c98218a11..4b952df206b3 100644 --- a/llvm/test/CodeGen/AMDGPU/fmin3.ll +++ b/llvm/test/CodeGen/AMDGPU/fmin3.ll @@ -4,6 +4,8 @@ ; RUN: llc -mtriple=amdgcn -mcpu=gfx900 -mattr=-flat-for-global < %s | FileCheck -enable-var-scope -check-prefixes=GFX9 %s ; RUN: llc -mtriple=amdgcn -mcpu=gfx1100 -mattr=+real-true16 -mattr=-flat-for-global < %s | FileCheck -enable-var-scope -check-prefixes=GFX11,GFX11-TRUE16 %s ; RUN: llc -mtriple=amdgcn -mcpu=gfx1100 -mattr=-real-true16 -mattr=-flat-for-global < %s | FileCheck -enable-var-scope -check-prefixes=GFX11,GFX11-FAKE16 %s +; RUN: llc -mtriple=amdgcn -mcpu=gfx1170 -mattr=+real-true16 -mattr=-flat-for-global < %s | FileCheck -enable-var-scope -check-prefixes=GFX1170,GFX1170-TRUE16 %s +; RUN: llc -mtriple=amdgcn -mcpu=gfx1170 -mattr=-real-true16 -mattr=-flat-for-global < %s | FileCheck -enable-var-scope -check-prefixes=GFX1170,GFX1170-FAKE16 %s ; RUN: llc -mtriple=amdgcn -mcpu=gfx1200 -mattr=+real-true16 -mattr=-flat-for-global < %s | FileCheck -enable-var-scope -check-prefixes=GFX12,GFX12-TRUE16 %s ; RUN: llc -mtriple=amdgcn -mcpu=gfx1200 -mattr=-real-true16 -mattr=-flat-for-global < %s | FileCheck -enable-var-scope -check-prefixes=GFX12,GFX12-FAKE16 %s ; RUN: llc -mtriple=amdgcn -mcpu=gfx1250 -mattr=+real-true16 -mattr=-flat-for-global < %s | FileCheck -enable-var-scope -check-prefixes=GFX1250,GFX1250-TRUE16 %s @@ -130,6 +132,36 @@ define amdgpu_kernel void @test_fmin3_olt_0_f32(ptr addrspace(1) %out, ptr addrs ; GFX11-NEXT: buffer_store_b32 v0, off, s[8:11], 0 ; GFX11-NEXT: s_endpgm ; +; GFX1170-LABEL: test_fmin3_olt_0_f32: +; GFX1170: ; %bb.0: +; GFX1170-NEXT: s_load_b256 s[0:7], s[4:5], 0x24 +; GFX1170-NEXT: s_mov_b32 s10, -1 +; GFX1170-NEXT: s_mov_b32 s11, 0x31016000 +; GFX1170-NEXT: s_mov_b32 s14, s10 +; GFX1170-NEXT: s_mov_b32 s15, s11 +; GFX1170-NEXT: s_mov_b32 s18, s10 +; GFX1170-NEXT: s_mov_b32 s19, s11 +; GFX1170-NEXT: s_mov_b32 s22, s10 +; GFX1170-NEXT: s_mov_b32 s23, s11 +; GFX1170-NEXT: s_waitcnt lgkmcnt(0) +; GFX1170-NEXT: s_mov_b32 s12, s2 +; GFX1170-NEXT: s_mov_b32 s13, s3 +; GFX1170-NEXT: s_mov_b32 s16, s4 +; GFX1170-NEXT: s_mov_b32 s17, s5 +; GFX1170-NEXT: s_mov_b32 s20, s6 +; GFX1170-NEXT: s_mov_b32 s21, s7 +; GFX1170-NEXT: buffer_load_b32 v0, off, s[12:15], 0 glc dlc +; GFX1170-NEXT: s_waitcnt vmcnt(0) +; GFX1170-NEXT: buffer_load_b32 v1, off, s[16:19], 0 glc dlc +; GFX1170-NEXT: s_waitcnt vmcnt(0) +; GFX1170-NEXT: buffer_load_b32 v2, off, s[20:23], 0 glc dlc +; GFX1170-NEXT: s_waitcnt vmcnt(0) +; GFX1170-NEXT: s_mov_b32 s8, s0 +; GFX1170-NEXT: s_mov_b32 s9, s1 +; GFX1170-NEXT: v_min3_num_f32 v0, v0, v1, v2 +; GFX1170-NEXT: buffer_store_b32 v0, off, s[8:11], 0 +; GFX1170-NEXT: s_endpgm +; ; GFX12-LABEL: test_fmin3_olt_0_f32: ; GFX12: ; %bb.0: ; GFX12-NEXT: s_load_b256 s[0:7], s[4:5], 0x24 @@ -321,6 +353,36 @@ define amdgpu_kernel void @test_fmin3_olt_1_f32(ptr addrspace(1) %out, ptr addrs ; GFX11-NEXT: buffer_store_b32 v0, off, s[8:11], 0 ; GFX11-NEXT: s_endpgm ; +; GFX1170-LABEL: test_fmin3_olt_1_f32: +; GFX1170: ; %bb.0: +; GFX1170-NEXT: s_load_b256 s[0:7], s[4:5], 0x24 +; GFX1170-NEXT: s_mov_b32 s10, -1 +; GFX1170-NEXT: s_mov_b32 s11, 0x31016000 +; GFX1170-NEXT: s_mov_b32 s14, s10 +; GFX1170-NEXT: s_mov_b32 s15, s11 +; GFX1170-NEXT: s_mov_b32 s18, s10 +; GFX1170-NEXT: s_mov_b32 s19, s11 +; GFX1170-NEXT: s_mov_b32 s22, s10 +; GFX1170-NEXT: s_mov_b32 s23, s11 +; GFX1170-NEXT: s_waitcnt lgkmcnt(0) +; GFX1170-NEXT: s_mov_b32 s12, s2 +; GFX1170-NEXT: s_mov_b32 s13, s3 +; GFX1170-NEXT: s_mov_b32 s16, s4 +; GFX1170-NEXT: s_mov_b32 s17, s5 +; GFX1170-NEXT: s_mov_b32 s20, s6 +; GFX1170-NEXT: s_mov_b32 s21, s7 +; GFX1170-NEXT: buffer_load_b32 v0, off, s[12:15], 0 glc dlc +; GFX1170-NEXT: s_waitcnt vmcnt(0) +; GFX1170-NEXT: buffer_load_b32 v1, off, s[16:19], 0 glc dlc +; GFX1170-NEXT: s_waitcnt vmcnt(0) +; GFX1170-NEXT: buffer_load_b32 v2, off, s[20:23], 0 glc dlc +; GFX1170-NEXT: s_waitcnt vmcnt(0) +; GFX1170-NEXT: s_mov_b32 s8, s0 +; GFX1170-NEXT: s_mov_b32 s9, s1 +; GFX1170-NEXT: v_min3_num_f32 v0, v2, v0, v1 +; GFX1170-NEXT: buffer_store_b32 v0, off, s[8:11], 0 +; GFX1170-NEXT: s_endpgm +; ; GFX12-LABEL: test_fmin3_olt_1_f32: ; GFX12: ; %bb.0: ; GFX12-NEXT: s_load_b256 s[0:7], s[4:5], 0x24 @@ -552,6 +614,66 @@ define amdgpu_kernel void @test_fmin3_olt_0_f16(ptr addrspace(1) %out, ptr addrs ; GFX11-FAKE16-NEXT: buffer_store_b16 v0, off, s[8:11], 0 ; GFX11-FAKE16-NEXT: s_endpgm ; +; GFX1170-TRUE16-LABEL: test_fmin3_olt_0_f16: +; GFX1170-TRUE16: ; %bb.0: +; GFX1170-TRUE16-NEXT: s_load_b256 s[0:7], s[4:5], 0x24 +; GFX1170-TRUE16-NEXT: s_mov_b32 s10, -1 +; GFX1170-TRUE16-NEXT: s_mov_b32 s11, 0x31016000 +; GFX1170-TRUE16-NEXT: s_mov_b32 s14, s10 +; GFX1170-TRUE16-NEXT: s_mov_b32 s15, s11 +; GFX1170-TRUE16-NEXT: s_mov_b32 s18, s10 +; GFX1170-TRUE16-NEXT: s_mov_b32 s19, s11 +; GFX1170-TRUE16-NEXT: s_mov_b32 s22, s10 +; GFX1170-TRUE16-NEXT: s_mov_b32 s23, s11 +; GFX1170-TRUE16-NEXT: s_waitcnt lgkmcnt(0) +; GFX1170-TRUE16-NEXT: s_mov_b32 s12, s2 +; GFX1170-TRUE16-NEXT: s_mov_b32 s13, s3 +; GFX1170-TRUE16-NEXT: s_mov_b32 s16, s4 +; GFX1170-TRUE16-NEXT: s_mov_b32 s17, s5 +; GFX1170-TRUE16-NEXT: s_mov_b32 s20, s6 +; GFX1170-TRUE16-NEXT: s_mov_b32 s21, s7 +; GFX1170-TRUE16-NEXT: buffer_load_d16_b16 v0, off, s[12:15], 0 glc dlc +; GFX1170-TRUE16-NEXT: s_waitcnt vmcnt(0) +; GFX1170-TRUE16-NEXT: buffer_load_d16_hi_b16 v0, off, s[16:19], 0 glc dlc +; GFX1170-TRUE16-NEXT: s_waitcnt vmcnt(0) +; GFX1170-TRUE16-NEXT: buffer_load_d16_b16 v1, off, s[20:23], 0 glc dlc +; GFX1170-TRUE16-NEXT: s_waitcnt vmcnt(0) +; GFX1170-TRUE16-NEXT: s_mov_b32 s8, s0 +; GFX1170-TRUE16-NEXT: s_mov_b32 s9, s1 +; GFX1170-TRUE16-NEXT: v_min3_num_f16 v0.l, v0.l, v0.h, v1.l +; GFX1170-TRUE16-NEXT: buffer_store_b16 v0, off, s[8:11], 0 +; GFX1170-TRUE16-NEXT: s_endpgm +; +; GFX1170-FAKE16-LABEL: test_fmin3_olt_0_f16: +; GFX1170-FAKE16: ; %bb.0: +; GFX1170-FAKE16-NEXT: s_load_b256 s[0:7], s[4:5], 0x24 +; GFX1170-FAKE16-NEXT: s_mov_b32 s10, -1 +; GFX1170-FAKE16-NEXT: s_mov_b32 s11, 0x31016000 +; GFX1170-FAKE16-NEXT: s_mov_b32 s14, s10 +; GFX1170-FAKE16-NEXT: s_mov_b32 s15, s11 +; GFX1170-FAKE16-NEXT: s_mov_b32 s18, s10 +; GFX1170-FAKE16-NEXT: s_mov_b32 s19, s11 +; GFX1170-FAKE16-NEXT: s_mov_b32 s22, s10 +; GFX1170-FAKE16-NEXT: s_mov_b32 s23, s11 +; GFX1170-FAKE16-NEXT: s_waitcnt lgkmcnt(0) +; GFX1170-FAKE16-NEXT: s_mov_b32 s12, s2 +; GFX1170-FAKE16-NEXT: s_mov_b32 s13, s3 +; GFX1170-FAKE16-NEXT: s_mov_b32 s16, s4 +; GFX1170-FAKE16-NEXT: s_mov_b32 s17, s5 +; GFX1170-FAKE16-NEXT: s_mov_b32 s20, s6 +; GFX1170-FAKE16-NEXT: s_mov_b32 s21, s7 +; GFX1170-FAKE16-NEXT: buffer_load_u16 v0, off, s[12:15], 0 glc dlc +; GFX1170-FAKE16-NEXT: s_waitcnt vmcnt(0) +; GFX1170-FAKE16-NEXT: buffer_load_u16 v1, off, s[16:19], 0 glc dlc +; GFX1170-FAKE16-NEXT: s_waitcnt vmcnt(0) +; GFX1170-FAKE16-NEXT: buffer_load_u16 v2, off, s[20:23], 0 glc dlc +; GFX1170-FAKE16-NEXT: s_waitcnt vmcnt(0) +; GFX1170-FAKE16-NEXT: s_mov_b32 s8, s0 +; GFX1170-FAKE16-NEXT: s_mov_b32 s9, s1 +; GFX1170-FAKE16-NEXT: v_min3_num_f16 v0, v0, v1, v2 +; GFX1170-FAKE16-NEXT: buffer_store_b16 v0, off, s[8:11], 0 +; GFX1170-FAKE16-NEXT: s_endpgm +; ; GFX12-TRUE16-LABEL: test_fmin3_olt_0_f16: ; GFX12-TRUE16: ; %bb.0: ; GFX12-TRUE16-NEXT: s_load_b256 s[0:7], s[4:5], 0x24 @@ -845,6 +967,66 @@ define amdgpu_kernel void @test_fmin3_olt_1_f16(ptr addrspace(1) %out, ptr addrs ; GFX11-FAKE16-NEXT: buffer_store_b16 v0, off, s[8:11], 0 ; GFX11-FAKE16-NEXT: s_endpgm ; +; GFX1170-TRUE16-LABEL: test_fmin3_olt_1_f16: +; GFX1170-TRUE16: ; %bb.0: +; GFX1170-TRUE16-NEXT: s_load_b256 s[0:7], s[4:5], 0x24 +; GFX1170-TRUE16-NEXT: s_mov_b32 s10, -1 +; GFX1170-TRUE16-NEXT: s_mov_b32 s11, 0x31016000 +; GFX1170-TRUE16-NEXT: s_mov_b32 s14, s10 +; GFX1170-TRUE16-NEXT: s_mov_b32 s15, s11 +; GFX1170-TRUE16-NEXT: s_mov_b32 s18, s10 +; GFX1170-TRUE16-NEXT: s_mov_b32 s19, s11 +; GFX1170-TRUE16-NEXT: s_mov_b32 s22, s10 +; GFX1170-TRUE16-NEXT: s_mov_b32 s23, s11 +; GFX1170-TRUE16-NEXT: s_waitcnt lgkmcnt(0) +; GFX1170-TRUE16-NEXT: s_mov_b32 s12, s2 +; GFX1170-TRUE16-NEXT: s_mov_b32 s13, s3 +; GFX1170-TRUE16-NEXT: s_mov_b32 s16, s4 +; GFX1170-TRUE16-NEXT: s_mov_b32 s17, s5 +; GFX1170-TRUE16-NEXT: s_mov_b32 s20, s6 +; GFX1170-TRUE16-NEXT: s_mov_b32 s21, s7 +; GFX1170-TRUE16-NEXT: buffer_load_d16_b16 v0, off, s[12:15], 0 glc dlc +; GFX1170-TRUE16-NEXT: s_waitcnt vmcnt(0) +; GFX1170-TRUE16-NEXT: buffer_load_d16_hi_b16 v0, off, s[16:19], 0 glc dlc +; GFX1170-TRUE16-NEXT: s_waitcnt vmcnt(0) +; GFX1170-TRUE16-NEXT: buffer_load_d16_b16 v1, off, s[20:23], 0 glc dlc +; GFX1170-TRUE16-NEXT: s_waitcnt vmcnt(0) +; GFX1170-TRUE16-NEXT: s_mov_b32 s8, s0 +; GFX1170-TRUE16-NEXT: s_mov_b32 s9, s1 +; GFX1170-TRUE16-NEXT: v_min3_num_f16 v0.l, v1.l, v0.l, v0.h +; GFX1170-TRUE16-NEXT: buffer_store_b16 v0, off, s[8:11], 0 +; GFX1170-TRUE16-NEXT: s_endpgm +; +; GFX1170-FAKE16-LABEL: test_fmin3_olt_1_f16: +; GFX1170-FAKE16: ; %bb.0: +; GFX1170-FAKE16-NEXT: s_load_b256 s[0:7], s[4:5], 0x24 +; GFX1170-FAKE16-NEXT: s_mov_b32 s10, -1 +; GFX1170-FAKE16-NEXT: s_mov_b32 s11, 0x31016000 +; GFX1170-FAKE16-NEXT: s_mov_b32 s14, s10 +; GFX1170-FAKE16-NEXT: s_mov_b32 s15, s11 +; GFX1170-FAKE16-NEXT: s_mov_b32 s18, s10 +; GFX1170-FAKE16-NEXT: s_mov_b32 s19, s11 +; GFX1170-FAKE16-NEXT: s_mov_b32 s22, s10 +; GFX1170-FAKE16-NEXT: s_mov_b32 s23, s11 +; GFX1170-FAKE16-NEXT: s_waitcnt lgkmcnt(0) +; GFX1170-FAKE16-NEXT: s_mov_b32 s12, s2 +; GFX1170-FAKE16-NEXT: s_mov_b32 s13, s3 +; GFX1170-FAKE16-NEXT: s_mov_b32 s16, s4 +; GFX1170-FAKE16-NEXT: s_mov_b32 s17, s5 +; GFX1170-FAKE16-NEXT: s_mov_b32 s20, s6 +; GFX1170-FAKE16-NEXT: s_mov_b32 s21, s7 +; GFX1170-FAKE16-NEXT: buffer_load_u16 v0, off, s[12:15], 0 glc dlc +; GFX1170-FAKE16-NEXT: s_waitcnt vmcnt(0) +; GFX1170-FAKE16-NEXT: buffer_load_u16 v1, off, s[16:19], 0 glc dlc +; GFX1170-FAKE16-NEXT: s_waitcnt vmcnt(0) +; GFX1170-FAKE16-NEXT: buffer_load_u16 v2, off, s[20:23], 0 glc dlc +; GFX1170-FAKE16-NEXT: s_waitcnt vmcnt(0) +; GFX1170-FAKE16-NEXT: s_mov_b32 s8, s0 +; GFX1170-FAKE16-NEXT: s_mov_b32 s9, s1 +; GFX1170-FAKE16-NEXT: v_min3_num_f16 v0, v2, v0, v1 +; GFX1170-FAKE16-NEXT: buffer_store_b16 v0, off, s[8:11], 0 +; GFX1170-FAKE16-NEXT: s_endpgm +; ; GFX12-TRUE16-LABEL: test_fmin3_olt_1_f16: ; GFX12-TRUE16: ; %bb.0: ; GFX12-TRUE16-NEXT: s_load_b256 s[0:7], s[4:5], 0x24 @@ -1042,6 +1224,15 @@ define <2 x half> @no_fmin3_v2f16(<2 x half> %a, <2 x half> %b, <2 x half> %c, < ; GFX11-NEXT: v_pk_min_f16 v0, v0, v3 ; GFX11-NEXT: s_setpc_b64 s[30:31] ; +; GFX1170-LABEL: no_fmin3_v2f16: +; GFX1170: ; %bb.0: ; %entry +; GFX1170-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1170-NEXT: v_pk_min_num_f16 v0, v0, v1 +; GFX1170-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX1170-NEXT: v_pk_min_num_f16 v0, v2, v0 +; GFX1170-NEXT: v_pk_min_num_f16 v0, v0, v3 +; GFX1170-NEXT: s_setpc_b64 s[30:31] +; ; GFX12-LABEL: no_fmin3_v2f16: ; GFX12: ; %bb.0: ; %entry ; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 @@ -1204,6 +1395,39 @@ define amdgpu_kernel void @test_fmin3_olt_0_f64(ptr addrspace(1) %out, ptr addrs ; GFX11-NEXT: buffer_store_b64 v[0:1], off, s[8:11], 0 ; GFX11-NEXT: s_endpgm ; +; GFX1170-LABEL: test_fmin3_olt_0_f64: +; GFX1170: ; %bb.0: +; GFX1170-NEXT: s_load_b256 s[0:7], s[4:5], 0x24 +; GFX1170-NEXT: s_mov_b32 s10, -1 +; GFX1170-NEXT: s_mov_b32 s11, 0x31016000 +; GFX1170-NEXT: s_mov_b32 s14, s10 +; GFX1170-NEXT: s_mov_b32 s15, s11 +; GFX1170-NEXT: s_mov_b32 s18, s10 +; GFX1170-NEXT: s_mov_b32 s19, s11 +; GFX1170-NEXT: s_waitcnt lgkmcnt(0) +; GFX1170-NEXT: s_mov_b32 s12, s2 +; GFX1170-NEXT: s_mov_b32 s13, s3 +; GFX1170-NEXT: s_mov_b32 s16, s4 +; GFX1170-NEXT: s_mov_b32 s17, s5 +; GFX1170-NEXT: buffer_load_b64 v[0:1], off, s[12:15], 0 glc dlc +; GFX1170-NEXT: s_waitcnt vmcnt(0) +; GFX1170-NEXT: buffer_load_b64 v[2:3], off, s[16:19], 0 glc dlc +; GFX1170-NEXT: s_waitcnt vmcnt(0) +; GFX1170-NEXT: s_mov_b32 s12, s6 +; GFX1170-NEXT: s_mov_b32 s13, s7 +; GFX1170-NEXT: s_mov_b32 s8, s0 +; GFX1170-NEXT: buffer_load_b64 v[4:5], off, s[12:15], 0 glc dlc +; GFX1170-NEXT: s_waitcnt vmcnt(0) +; GFX1170-NEXT: s_mov_b32 s9, s1 +; GFX1170-NEXT: v_max_num_f64 v[0:1], v[0:1], v[0:1] +; GFX1170-NEXT: v_max_num_f64 v[2:3], v[2:3], v[2:3] +; GFX1170-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_1) +; GFX1170-NEXT: v_min_num_f64 v[0:1], v[0:1], v[2:3] +; GFX1170-NEXT: v_max_num_f64 v[2:3], v[4:5], v[4:5] +; GFX1170-NEXT: v_min_num_f64 v[0:1], v[0:1], v[2:3] +; GFX1170-NEXT: buffer_store_b64 v[0:1], off, s[8:11], 0 +; GFX1170-NEXT: s_endpgm +; ; GFX12-LABEL: test_fmin3_olt_0_f64: ; GFX12: ; %bb.0: ; GFX12-NEXT: s_load_b256 s[0:7], s[4:5], 0x24 @@ -1414,6 +1638,39 @@ define amdgpu_kernel void @test_fmin3_olt_1_f64(ptr addrspace(1) %out, ptr addrs ; GFX11-NEXT: buffer_store_b64 v[0:1], off, s[8:11], 0 ; GFX11-NEXT: s_endpgm ; +; GFX1170-LABEL: test_fmin3_olt_1_f64: +; GFX1170: ; %bb.0: +; GFX1170-NEXT: s_load_b256 s[0:7], s[4:5], 0x24 +; GFX1170-NEXT: s_mov_b32 s10, -1 +; GFX1170-NEXT: s_mov_b32 s11, 0x31016000 +; GFX1170-NEXT: s_mov_b32 s14, s10 +; GFX1170-NEXT: s_mov_b32 s15, s11 +; GFX1170-NEXT: s_mov_b32 s18, s10 +; GFX1170-NEXT: s_mov_b32 s19, s11 +; GFX1170-NEXT: s_waitcnt lgkmcnt(0) +; GFX1170-NEXT: s_mov_b32 s12, s2 +; GFX1170-NEXT: s_mov_b32 s13, s3 +; GFX1170-NEXT: s_mov_b32 s16, s4 +; GFX1170-NEXT: s_mov_b32 s17, s5 +; GFX1170-NEXT: buffer_load_b64 v[0:1], off, s[12:15], 0 glc dlc +; GFX1170-NEXT: s_waitcnt vmcnt(0) +; GFX1170-NEXT: buffer_load_b64 v[2:3], off, s[16:19], 0 glc dlc +; GFX1170-NEXT: s_waitcnt vmcnt(0) +; GFX1170-NEXT: s_mov_b32 s12, s6 +; GFX1170-NEXT: s_mov_b32 s13, s7 +; GFX1170-NEXT: s_mov_b32 s8, s0 +; GFX1170-NEXT: buffer_load_b64 v[4:5], off, s[12:15], 0 glc dlc +; GFX1170-NEXT: s_waitcnt vmcnt(0) +; GFX1170-NEXT: s_mov_b32 s9, s1 +; GFX1170-NEXT: v_max_num_f64 v[0:1], v[0:1], v[0:1] +; GFX1170-NEXT: v_max_num_f64 v[2:3], v[2:3], v[2:3] +; GFX1170-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_1) +; GFX1170-NEXT: v_min_num_f64 v[0:1], v[0:1], v[2:3] +; GFX1170-NEXT: v_max_num_f64 v[2:3], v[4:5], v[4:5] +; GFX1170-NEXT: v_min_num_f64 v[0:1], v[2:3], v[0:1] +; GFX1170-NEXT: buffer_store_b64 v[0:1], off, s[8:11], 0 +; GFX1170-NEXT: s_endpgm +; ; GFX12-LABEL: test_fmin3_olt_1_f64: ; GFX12: ; %bb.0: ; GFX12-NEXT: s_load_b256 s[0:7], s[4:5], 0x24 diff --git a/llvm/test/CodeGen/AMDGPU/fminimum.ll b/llvm/test/CodeGen/AMDGPU/fminimum.ll index e851f1d2e586..7deaa1d0c62b 100644 --- a/llvm/test/CodeGen/AMDGPU/fminimum.ll +++ b/llvm/test/CodeGen/AMDGPU/fminimum.ll @@ -1,6 +1,10 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx900 < %s | FileCheck -enable-var-scope -check-prefixes=GFX9,GFX9-SDAG %s ; RUN: llc -global-isel=1 -new-reg-bank-select -mtriple=amdgcn -mcpu=gfx900 < %s | FileCheck -enable-var-scope -check-prefixes=GFX9,GFX9-GISEL %s +; RUN: llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx1170 -mattr=+real-true16 < %s | FileCheck -enable-var-scope -check-prefixes=GFX1170,GFX1170-SDAG,GFX1170-SDAG-TRUE16 %s +; RUN: llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx1170 -mattr=-real-true16 < %s | FileCheck -enable-var-scope -check-prefixes=GFX1170,GFX1170-SDAG,GFX1170-SDAG-FAKE16 %s +; RUN: llc -global-isel=1 -new-reg-bank-select -mtriple=amdgcn -mcpu=gfx1170 -mattr=+real-true16 < %s | FileCheck -enable-var-scope -check-prefixes=GFX1170,GFX1170-GISEL,GFX1170-GISEL-TRUE16 %s +; RUN: llc -global-isel=1 -new-reg-bank-select -mtriple=amdgcn -mcpu=gfx1170 -mattr=-real-true16 < %s | FileCheck -enable-var-scope -check-prefixes=GFX1170,GFX1170-GISEL,GFX1170-GISEL-FAKE16 %s ; RUN: llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx1200 -mattr=+real-true16 < %s | FileCheck -enable-var-scope -check-prefixes=GFX12,GFX12-SDAG,GFX12-SDAG-TRUE16 %s ; RUN: llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx1200 -mattr=-real-true16 < %s | FileCheck -enable-var-scope -check-prefixes=GFX12,GFX12-SDAG,GFX12-SDAG-FAKE16 %s ; RUN: llc -global-isel=1 -new-reg-bank-select -mtriple=amdgcn -mcpu=gfx1200 -mattr=+real-true16 < %s | FileCheck -enable-var-scope -check-prefixes=GFX12,GFX12-GISEL,GFX12-GISEL-TRUE16 %s @@ -15,6 +19,11 @@ define amdgpu_ps float @test_fminimum_f32_vv(float %a, float %b) { ; GFX9-NEXT: v_cndmask_b32_e32 v0, v3, v2, vcc ; GFX9-NEXT: ; return to shader part epilog ; +; GFX1170-LABEL: test_fminimum_f32_vv: +; GFX1170: ; %bb.0: +; GFX1170-NEXT: v_minimum_f32 v0, v0, v1 +; GFX1170-NEXT: ; return to shader part epilog +; ; GFX12-LABEL: test_fminimum_f32_vv: ; GFX12: ; %bb.0: ; GFX12-NEXT: v_minimum_f32 v0, v0, v1 @@ -44,6 +53,11 @@ define amdgpu_ps float @test_fminimum_f32_ss(float inreg %a, float inreg %b) { ; GFX9-GISEL-NEXT: v_mov_b32_e32 v0, s0 ; GFX9-GISEL-NEXT: ; return to shader part epilog ; +; GFX1170-LABEL: test_fminimum_f32_ss: +; GFX1170: ; %bb.0: +; GFX1170-NEXT: v_minimum_f32 v0, s0, s1 +; GFX1170-NEXT: ; return to shader part epilog +; ; GFX12-LABEL: test_fminimum_f32_ss: ; GFX12: ; %bb.0: ; GFX12-NEXT: s_minimum_f32 s0, s0, s1 @@ -63,6 +77,11 @@ define amdgpu_ps float @test_fminimum_f32_vs(float %a, float inreg %b) { ; GFX9-NEXT: v_cndmask_b32_e32 v0, v2, v1, vcc ; GFX9-NEXT: ; return to shader part epilog ; +; GFX1170-LABEL: test_fminimum_f32_vs: +; GFX1170: ; %bb.0: +; GFX1170-NEXT: v_minimum_f32 v0, v0, s0 +; GFX1170-NEXT: ; return to shader part epilog +; ; GFX12-LABEL: test_fminimum_f32_vs: ; GFX12: ; %bb.0: ; GFX12-NEXT: v_minimum_f32 v0, v0, s0 @@ -77,6 +96,11 @@ define amdgpu_ps float @test_fminimum_nnan_f32(float %a, float %b) { ; GFX9-NEXT: v_min_f32_e32 v0, v0, v1 ; GFX9-NEXT: ; return to shader part epilog ; +; GFX1170-LABEL: test_fminimum_nnan_f32: +; GFX1170: ; %bb.0: +; GFX1170-NEXT: v_minimum_f32 v0, v0, v1 +; GFX1170-NEXT: ; return to shader part epilog +; ; GFX12-LABEL: test_fminimum_nnan_f32: ; GFX12: ; %bb.0: ; GFX12-NEXT: v_minimum_f32 v0, v0, v1 @@ -94,6 +118,11 @@ define amdgpu_ps float @test_fminimum_nsz_f32(float %a, float %b) { ; GFX9-NEXT: v_cndmask_b32_e32 v0, v3, v2, vcc ; GFX9-NEXT: ; return to shader part epilog ; +; GFX1170-LABEL: test_fminimum_nsz_f32: +; GFX1170: ; %bb.0: +; GFX1170-NEXT: v_minimum_f32 v0, v0, v1 +; GFX1170-NEXT: ; return to shader part epilog +; ; GFX12-LABEL: test_fminimum_nsz_f32: ; GFX12: ; %bb.0: ; GFX12-NEXT: v_minimum_f32 v0, v0, v1 @@ -108,6 +137,11 @@ define amdgpu_ps float @test_fminimum_signed_zero_f32() { ; GFX9-NEXT: v_bfrev_b32_e32 v0, 1 ; GFX9-NEXT: ; return to shader part epilog ; +; GFX1170-LABEL: test_fminimum_signed_zero_f32: +; GFX1170: ; %bb.0: +; GFX1170-NEXT: v_bfrev_b32_e32 v0, 1 +; GFX1170-NEXT: ; return to shader part epilog +; ; GFX12-LABEL: test_fminimum_signed_zero_f32: ; GFX12: ; %bb.0: ; GFX12-NEXT: v_bfrev_b32_e32 v0, 1 @@ -128,6 +162,12 @@ define amdgpu_ps <2 x float> @test_fminimum_v2f32(<2 x float> %a, <2 x float> %b ; GFX9-NEXT: v_cndmask_b32_e32 v1, v5, v2, vcc ; GFX9-NEXT: ; return to shader part epilog ; +; GFX1170-LABEL: test_fminimum_v2f32: +; GFX1170: ; %bb.0: +; GFX1170-NEXT: v_minimum_f32 v0, v0, v2 +; GFX1170-NEXT: v_minimum_f32 v1, v1, v3 +; GFX1170-NEXT: ; return to shader part epilog +; ; GFX12-LABEL: test_fminimum_v2f32: ; GFX12: ; %bb.0: ; GFX12-NEXT: v_minimum_f32 v0, v0, v2 @@ -169,6 +209,12 @@ define amdgpu_ps <2 x float> @test_fminimum_v2f32_ss(<2 x float> inreg %a, <2 x ; GFX9-GISEL-NEXT: v_mov_b32_e32 v1, s1 ; GFX9-GISEL-NEXT: ; return to shader part epilog ; +; GFX1170-LABEL: test_fminimum_v2f32_ss: +; GFX1170: ; %bb.0: +; GFX1170-NEXT: v_minimum_f32 v0, s0, s2 +; GFX1170-NEXT: v_minimum_f32 v1, s1, s3 +; GFX1170-NEXT: ; return to shader part epilog +; ; GFX12-LABEL: test_fminimum_v2f32_ss: ; GFX12: ; %bb.0: ; GFX12-NEXT: s_minimum_f32 s0, s0, s2 @@ -195,6 +241,13 @@ define amdgpu_ps <3 x float> @test_fminimum_v3f32(<3 x float> %a, <3 x float> %b ; GFX9-NEXT: v_cndmask_b32_e32 v2, v7, v3, vcc ; GFX9-NEXT: ; return to shader part epilog ; +; GFX1170-LABEL: test_fminimum_v3f32: +; GFX1170: ; %bb.0: +; GFX1170-NEXT: v_minimum_f32 v0, v0, v3 +; GFX1170-NEXT: v_minimum_f32 v1, v1, v4 +; GFX1170-NEXT: v_minimum_f32 v2, v2, v5 +; GFX1170-NEXT: ; return to shader part epilog +; ; GFX12-LABEL: test_fminimum_v3f32: ; GFX12: ; %bb.0: ; GFX12-NEXT: v_minimum_f32 v0, v0, v3 @@ -223,6 +276,14 @@ define amdgpu_ps <4 x float> @test_fminimum_v4f32(<4 x float> %a, <4 x float> %b ; GFX9-NEXT: v_cndmask_b32_e32 v3, v9, v4, vcc ; GFX9-NEXT: ; return to shader part epilog ; +; GFX1170-LABEL: test_fminimum_v4f32: +; GFX1170: ; %bb.0: +; GFX1170-NEXT: v_minimum_f32 v0, v0, v4 +; GFX1170-NEXT: v_minimum_f32 v1, v1, v5 +; GFX1170-NEXT: v_minimum_f32 v2, v2, v6 +; GFX1170-NEXT: v_minimum_f32 v3, v3, v7 +; GFX1170-NEXT: ; return to shader part epilog +; ; GFX12-LABEL: test_fminimum_v4f32: ; GFX12: ; %bb.0: ; GFX12-NEXT: v_minimum_f32 v0, v0, v4 @@ -288,6 +349,26 @@ define amdgpu_ps <16 x float> @test_fminimum_v16f32(<16 x float> %a, <16 x float ; GFX9-NEXT: v_cndmask_b32_e32 v15, v33, v16, vcc ; GFX9-NEXT: ; return to shader part epilog ; +; GFX1170-LABEL: test_fminimum_v16f32: +; GFX1170: ; %bb.0: +; GFX1170-NEXT: v_minimum_f32 v0, v0, v16 +; GFX1170-NEXT: v_minimum_f32 v1, v1, v17 +; GFX1170-NEXT: v_minimum_f32 v2, v2, v18 +; GFX1170-NEXT: v_minimum_f32 v3, v3, v19 +; GFX1170-NEXT: v_minimum_f32 v4, v4, v20 +; GFX1170-NEXT: v_minimum_f32 v5, v5, v21 +; GFX1170-NEXT: v_minimum_f32 v6, v6, v22 +; GFX1170-NEXT: v_minimum_f32 v7, v7, v23 +; GFX1170-NEXT: v_minimum_f32 v8, v8, v24 +; GFX1170-NEXT: v_minimum_f32 v9, v9, v25 +; GFX1170-NEXT: v_minimum_f32 v10, v10, v26 +; GFX1170-NEXT: v_minimum_f32 v11, v11, v27 +; GFX1170-NEXT: v_minimum_f32 v12, v12, v28 +; GFX1170-NEXT: v_minimum_f32 v13, v13, v29 +; GFX1170-NEXT: v_minimum_f32 v14, v14, v30 +; GFX1170-NEXT: v_minimum_f32 v15, v15, v31 +; GFX1170-NEXT: ; return to shader part epilog +; ; GFX12-LABEL: test_fminimum_v16f32: ; GFX12: ; %bb.0: ; GFX12-NEXT: v_minimum_f32 v0, v0, v16 @@ -320,6 +401,26 @@ define amdgpu_ps half @test_fminimum_f16_vv(half %a, half %b) { ; GFX9-NEXT: v_cndmask_b32_e32 v0, v3, v2, vcc ; GFX9-NEXT: ; return to shader part epilog ; +; GFX1170-SDAG-TRUE16-LABEL: test_fminimum_f16_vv: +; GFX1170-SDAG-TRUE16: ; %bb.0: +; GFX1170-SDAG-TRUE16-NEXT: v_minimum_f16 v0.l, v0.l, v1.l +; GFX1170-SDAG-TRUE16-NEXT: ; return to shader part epilog +; +; GFX1170-SDAG-FAKE16-LABEL: test_fminimum_f16_vv: +; GFX1170-SDAG-FAKE16: ; %bb.0: +; GFX1170-SDAG-FAKE16-NEXT: v_minimum_f16 v0, v0, v1 +; GFX1170-SDAG-FAKE16-NEXT: ; return to shader part epilog +; +; GFX1170-GISEL-TRUE16-LABEL: test_fminimum_f16_vv: +; GFX1170-GISEL-TRUE16: ; %bb.0: +; GFX1170-GISEL-TRUE16-NEXT: v_minimum_f16 v0.l, v0.l, v1.l +; GFX1170-GISEL-TRUE16-NEXT: ; return to shader part epilog +; +; GFX1170-GISEL-FAKE16-LABEL: test_fminimum_f16_vv: +; GFX1170-GISEL-FAKE16: ; %bb.0: +; GFX1170-GISEL-FAKE16-NEXT: v_minimum_f16 v0, v0, v1 +; GFX1170-GISEL-FAKE16-NEXT: ; return to shader part epilog +; ; GFX12-SDAG-TRUE16-LABEL: test_fminimum_f16_vv: ; GFX12-SDAG-TRUE16: ; %bb.0: ; GFX12-SDAG-TRUE16-NEXT: v_minimum_f16 v0.l, v0.l, v1.l @@ -364,6 +465,26 @@ define amdgpu_ps half @test_fminimum_f16_ss(half inreg %a, half inreg %b) { ; GFX9-GISEL-NEXT: v_mov_b32_e32 v0, s0 ; GFX9-GISEL-NEXT: ; return to shader part epilog ; +; GFX1170-SDAG-TRUE16-LABEL: test_fminimum_f16_ss: +; GFX1170-SDAG-TRUE16: ; %bb.0: +; GFX1170-SDAG-TRUE16-NEXT: v_minimum_f16 v0.l, s0, s1 +; GFX1170-SDAG-TRUE16-NEXT: ; return to shader part epilog +; +; GFX1170-SDAG-FAKE16-LABEL: test_fminimum_f16_ss: +; GFX1170-SDAG-FAKE16: ; %bb.0: +; GFX1170-SDAG-FAKE16-NEXT: v_minimum_f16 v0, s0, s1 +; GFX1170-SDAG-FAKE16-NEXT: ; return to shader part epilog +; +; GFX1170-GISEL-TRUE16-LABEL: test_fminimum_f16_ss: +; GFX1170-GISEL-TRUE16: ; %bb.0: +; GFX1170-GISEL-TRUE16-NEXT: v_minimum_f16 v0.l, s0, s1 +; GFX1170-GISEL-TRUE16-NEXT: ; return to shader part epilog +; +; GFX1170-GISEL-FAKE16-LABEL: test_fminimum_f16_ss: +; GFX1170-GISEL-FAKE16: ; %bb.0: +; GFX1170-GISEL-FAKE16-NEXT: v_minimum_f16 v0, s0, s1 +; GFX1170-GISEL-FAKE16-NEXT: ; return to shader part epilog +; ; GFX12-LABEL: test_fminimum_f16_ss: ; GFX12: ; %bb.0: ; GFX12-NEXT: s_minimum_f16 s0, s0, s1 @@ -399,6 +520,11 @@ define amdgpu_ps <2 x half> @test_fminimum_v2f16_vv(<2 x half> %a, <2 x half> %b ; GFX9-GISEL-NEXT: v_lshl_or_b32 v0, v1, 16, v0 ; GFX9-GISEL-NEXT: ; return to shader part epilog ; +; GFX1170-LABEL: test_fminimum_v2f16_vv: +; GFX1170: ; %bb.0: +; GFX1170-NEXT: v_pk_minimum_f16 v0, v0, v1 +; GFX1170-NEXT: ; return to shader part epilog +; ; GFX12-LABEL: test_fminimum_v2f16_vv: ; GFX12: ; %bb.0: ; GFX12-NEXT: v_pk_minimum_f16 v0, v0, v1 @@ -448,6 +574,11 @@ define amdgpu_ps <2 x half> @test_fminimum_v2f16_ss(<2 x half> inreg %a, <2 x ha ; GFX9-GISEL-NEXT: v_mov_b32_e32 v0, s0 ; GFX9-GISEL-NEXT: ; return to shader part epilog ; +; GFX1170-LABEL: test_fminimum_v2f16_ss: +; GFX1170: ; %bb.0: +; GFX1170-NEXT: v_pk_minimum_f16 v0, s0, s1 +; GFX1170-NEXT: ; return to shader part epilog +; ; GFX12-LABEL: test_fminimum_v2f16_ss: ; GFX12: ; %bb.0: ; GFX12-NEXT: v_pk_minimum_f16 v0, s0, s1 @@ -490,6 +621,27 @@ define amdgpu_ps <3 x half> @test_fminimum_v3f16_vv(<3 x half> %a, <3 x half> %b ; GFX9-GISEL-NEXT: v_lshl_or_b32 v1, s0, 16, v1 ; GFX9-GISEL-NEXT: ; return to shader part epilog ; +; GFX1170-SDAG-LABEL: test_fminimum_v3f16_vv: +; GFX1170-SDAG: ; %bb.0: +; GFX1170-SDAG-NEXT: v_pk_minimum_f16 v0, v0, v2 +; GFX1170-SDAG-NEXT: v_pk_minimum_f16 v1, v1, v3 +; GFX1170-SDAG-NEXT: ; return to shader part epilog +; +; GFX1170-GISEL-TRUE16-LABEL: test_fminimum_v3f16_vv: +; GFX1170-GISEL-TRUE16: ; %bb.0: +; GFX1170-GISEL-TRUE16-NEXT: v_pk_minimum_f16 v0, v0, v2 +; GFX1170-GISEL-TRUE16-NEXT: v_minimum_f16 v1.l, v1.l, v3.l +; GFX1170-GISEL-TRUE16-NEXT: ; return to shader part epilog +; +; GFX1170-GISEL-FAKE16-LABEL: test_fminimum_v3f16_vv: +; GFX1170-GISEL-FAKE16: ; %bb.0: +; GFX1170-GISEL-FAKE16-NEXT: v_minimum_f16 v1, v1, v3 +; GFX1170-GISEL-FAKE16-NEXT: v_pk_minimum_f16 v0, v0, v2 +; GFX1170-GISEL-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX1170-GISEL-FAKE16-NEXT: v_and_b32_e32 v1, 0xffff, v1 +; GFX1170-GISEL-FAKE16-NEXT: v_lshl_or_b32 v1, s0, 16, v1 +; GFX1170-GISEL-FAKE16-NEXT: ; return to shader part epilog +; ; GFX12-SDAG-LABEL: test_fminimum_v3f16_vv: ; GFX12-SDAG: ; %bb.0: ; GFX12-SDAG-NEXT: v_pk_minimum_f16 v0, v0, v2 @@ -567,6 +719,30 @@ define amdgpu_ps <3 x half> @test_fminimum_v3f16_ss(<3 x half> inreg %a, <3 x ha ; GFX9-GISEL-NEXT: v_mov_b32_e32 v1, s1 ; GFX9-GISEL-NEXT: ; return to shader part epilog ; +; GFX1170-SDAG-LABEL: test_fminimum_v3f16_ss: +; GFX1170-SDAG: ; %bb.0: +; GFX1170-SDAG-NEXT: v_pk_minimum_f16 v0, s0, s2 +; GFX1170-SDAG-NEXT: v_pk_minimum_f16 v1, s1, s3 +; GFX1170-SDAG-NEXT: ; return to shader part epilog +; +; GFX1170-GISEL-TRUE16-LABEL: test_fminimum_v3f16_ss: +; GFX1170-GISEL-TRUE16: ; %bb.0: +; GFX1170-GISEL-TRUE16-NEXT: v_minimum_f16 v1.l, s1, s3 +; GFX1170-GISEL-TRUE16-NEXT: v_pk_minimum_f16 v0, s0, s2 +; GFX1170-GISEL-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX1170-GISEL-TRUE16-NEXT: v_readfirstlane_b32 s0, v1 +; GFX1170-GISEL-TRUE16-NEXT: v_mov_b32_e32 v1, s0 +; GFX1170-GISEL-TRUE16-NEXT: ; return to shader part epilog +; +; GFX1170-GISEL-FAKE16-LABEL: test_fminimum_v3f16_ss: +; GFX1170-GISEL-FAKE16: ; %bb.0: +; GFX1170-GISEL-FAKE16-NEXT: v_minimum_f16 v1, s1, s3 +; GFX1170-GISEL-FAKE16-NEXT: v_pk_minimum_f16 v0, s0, s2 +; GFX1170-GISEL-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX1170-GISEL-FAKE16-NEXT: v_readfirstlane_b32 s0, v1 +; GFX1170-GISEL-FAKE16-NEXT: v_mov_b32_e32 v1, s0 +; GFX1170-GISEL-FAKE16-NEXT: ; return to shader part epilog +; ; GFX12-SDAG-LABEL: test_fminimum_v3f16_ss: ; GFX12-SDAG: ; %bb.0: ; GFX12-SDAG-NEXT: v_pk_minimum_f16 v0, s0, s2 @@ -624,6 +800,12 @@ define amdgpu_ps <4 x half> @test_fminimum_v4f16(<4 x half> %a, <4 x half> %b) { ; GFX9-GISEL-NEXT: v_lshl_or_b32 v1, v2, 16, v1 ; GFX9-GISEL-NEXT: ; return to shader part epilog ; +; GFX1170-LABEL: test_fminimum_v4f16: +; GFX1170: ; %bb.0: +; GFX1170-NEXT: v_pk_minimum_f16 v0, v0, v2 +; GFX1170-NEXT: v_pk_minimum_f16 v1, v1, v3 +; GFX1170-NEXT: ; return to shader part epilog +; ; GFX12-LABEL: test_fminimum_v4f16: ; GFX12: ; %bb.0: ; GFX12-NEXT: v_pk_minimum_f16 v0, v0, v2 @@ -706,6 +888,12 @@ define amdgpu_ps <4 x half> @test_fminimum_v4f16_ss(<4 x half> inreg %a, <4 x ha ; GFX9-GISEL-NEXT: v_mov_b32_e32 v1, s1 ; GFX9-GISEL-NEXT: ; return to shader part epilog ; +; GFX1170-LABEL: test_fminimum_v4f16_ss: +; GFX1170: ; %bb.0: +; GFX1170-NEXT: v_pk_minimum_f16 v0, s0, s2 +; GFX1170-NEXT: v_pk_minimum_f16 v1, s1, s3 +; GFX1170-NEXT: ; return to shader part epilog +; ; GFX12-LABEL: test_fminimum_v4f16_ss: ; GFX12: ; %bb.0: ; GFX12-NEXT: v_pk_minimum_f16 v0, s0, s2 @@ -734,6 +922,11 @@ define amdgpu_ps <2 x float> @test_fminimum_f64_vv(double %a, double %b) { ; GFX9-GISEL-NEXT: v_cndmask_b32_e32 v1, v1, v5, vcc ; GFX9-GISEL-NEXT: ; return to shader part epilog ; +; GFX1170-LABEL: test_fminimum_f64_vv: +; GFX1170: ; %bb.0: +; GFX1170-NEXT: v_minimum_f64 v[0:1], v[0:1], v[2:3] +; GFX1170-NEXT: ; return to shader part epilog +; ; GFX12-LABEL: test_fminimum_f64_vv: ; GFX12: ; %bb.0: ; GFX12-NEXT: v_minimum_f64 v[0:1], v[0:1], v[2:3] @@ -771,6 +964,21 @@ define amdgpu_ps <2 x float> @test_fminimum_f64_ss(double inreg %a, double inreg ; GFX9-GISEL-NEXT: v_mov_b32_e32 v1, s1 ; GFX9-GISEL-NEXT: ; return to shader part epilog ; +; GFX1170-SDAG-LABEL: test_fminimum_f64_ss: +; GFX1170-SDAG: ; %bb.0: +; GFX1170-SDAG-NEXT: v_minimum_f64 v[0:1], s[0:1], s[2:3] +; GFX1170-SDAG-NEXT: ; return to shader part epilog +; +; GFX1170-GISEL-LABEL: test_fminimum_f64_ss: +; GFX1170-GISEL: ; %bb.0: +; GFX1170-GISEL-NEXT: v_minimum_f64 v[0:1], s[0:1], s[2:3] +; GFX1170-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_2) +; GFX1170-GISEL-NEXT: v_readfirstlane_b32 s0, v0 +; GFX1170-GISEL-NEXT: v_readfirstlane_b32 s1, v1 +; GFX1170-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX1170-GISEL-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1 +; GFX1170-GISEL-NEXT: ; return to shader part epilog +; ; GFX12-SDAG-LABEL: test_fminimum_f64_ss: ; GFX12-SDAG: ; %bb.0: ; GFX12-SDAG-NEXT: v_minimum_f64 v[0:1], s[0:1], s[2:3] @@ -835,6 +1043,27 @@ define amdgpu_ps <4 x float> @test_fminimum_v2f64_ss(<2 x double> inreg %a, <2 x ; GFX9-GISEL-NEXT: v_mov_b32_e32 v3, s1 ; GFX9-GISEL-NEXT: ; return to shader part epilog ; +; GFX1170-SDAG-LABEL: test_fminimum_v2f64_ss: +; GFX1170-SDAG: ; %bb.0: +; GFX1170-SDAG-NEXT: v_minimum_f64 v[0:1], s[0:1], s[4:5] +; GFX1170-SDAG-NEXT: v_minimum_f64 v[2:3], s[2:3], s[6:7] +; GFX1170-SDAG-NEXT: ; return to shader part epilog +; +; GFX1170-GISEL-LABEL: test_fminimum_v2f64_ss: +; GFX1170-GISEL: ; %bb.0: +; GFX1170-GISEL-NEXT: v_minimum_f64 v[0:1], s[0:1], s[4:5] +; GFX1170-GISEL-NEXT: v_minimum_f64 v[2:3], s[2:3], s[6:7] +; GFX1170-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_3) +; GFX1170-GISEL-NEXT: v_readfirstlane_b32 s0, v0 +; GFX1170-GISEL-NEXT: v_readfirstlane_b32 s1, v1 +; GFX1170-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_4) +; GFX1170-GISEL-NEXT: v_readfirstlane_b32 s2, v2 +; GFX1170-GISEL-NEXT: v_readfirstlane_b32 s3, v3 +; GFX1170-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2) +; GFX1170-GISEL-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1 +; GFX1170-GISEL-NEXT: v_dual_mov_b32 v2, s2 :: v_dual_mov_b32 v3, s3 +; GFX1170-GISEL-NEXT: ; return to shader part epilog +; ; GFX12-SDAG-LABEL: test_fminimum_v2f64_ss: ; GFX12-SDAG: ; %bb.0: ; GFX12-SDAG-NEXT: v_minimum_f64 v[0:1], s[0:1], s[4:5] @@ -904,6 +1133,14 @@ define amdgpu_ps <8 x float> @test_fminimum_v4f64(<4 x double> %a, <4 x double> ; GFX9-GISEL-NEXT: v_cndmask_b32_e64 v7, v18, v13, s[4:5] ; GFX9-GISEL-NEXT: ; return to shader part epilog ; +; GFX1170-LABEL: test_fminimum_v4f64: +; GFX1170: ; %bb.0: +; GFX1170-NEXT: v_minimum_f64 v[0:1], v[0:1], v[8:9] +; GFX1170-NEXT: v_minimum_f64 v[2:3], v[2:3], v[10:11] +; GFX1170-NEXT: v_minimum_f64 v[4:5], v[4:5], v[12:13] +; GFX1170-NEXT: v_minimum_f64 v[6:7], v[6:7], v[14:15] +; GFX1170-NEXT: ; return to shader part epilog +; ; GFX12-LABEL: test_fminimum_v4f64: ; GFX12: ; %bb.0: ; GFX12-NEXT: v_minimum_f64 v[0:1], v[0:1], v[8:9] @@ -992,6 +1229,36 @@ define amdgpu_ps <8 x float> @test_fminimum_v4f64_ss(<4 x double> inreg %a, <4 x ; GFX9-GISEL-NEXT: v_mov_b32_e32 v7, s5 ; GFX9-GISEL-NEXT: ; return to shader part epilog ; +; GFX1170-SDAG-LABEL: test_fminimum_v4f64_ss: +; GFX1170-SDAG: ; %bb.0: +; GFX1170-SDAG-NEXT: v_minimum_f64 v[0:1], s[0:1], s[8:9] +; GFX1170-SDAG-NEXT: v_minimum_f64 v[2:3], s[2:3], s[10:11] +; GFX1170-SDAG-NEXT: v_minimum_f64 v[4:5], s[4:5], s[12:13] +; GFX1170-SDAG-NEXT: v_minimum_f64 v[6:7], s[6:7], s[14:15] +; GFX1170-SDAG-NEXT: ; return to shader part epilog +; +; GFX1170-GISEL-LABEL: test_fminimum_v4f64_ss: +; GFX1170-GISEL: ; %bb.0: +; GFX1170-GISEL-NEXT: v_minimum_f64 v[0:1], s[0:1], s[8:9] +; GFX1170-GISEL-NEXT: v_minimum_f64 v[2:3], s[2:3], s[10:11] +; GFX1170-GISEL-NEXT: v_minimum_f64 v[4:5], s[4:5], s[12:13] +; GFX1170-GISEL-NEXT: v_minimum_f64 v[6:7], s[6:7], s[14:15] +; GFX1170-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_4) +; GFX1170-GISEL-NEXT: v_readfirstlane_b32 s0, v0 +; GFX1170-GISEL-NEXT: v_readfirstlane_b32 s1, v1 +; GFX1170-GISEL-NEXT: v_readfirstlane_b32 s2, v2 +; GFX1170-GISEL-NEXT: v_readfirstlane_b32 s3, v3 +; GFX1170-GISEL-NEXT: v_readfirstlane_b32 s4, v4 +; GFX1170-GISEL-NEXT: v_readfirstlane_b32 s5, v5 +; GFX1170-GISEL-NEXT: v_readfirstlane_b32 s6, v6 +; GFX1170-GISEL-NEXT: v_readfirstlane_b32 s7, v7 +; GFX1170-GISEL-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1 +; GFX1170-GISEL-NEXT: v_dual_mov_b32 v2, s2 :: v_dual_mov_b32 v3, s3 +; GFX1170-GISEL-NEXT: v_dual_mov_b32 v4, s4 :: v_dual_mov_b32 v5, s5 +; GFX1170-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_4) +; GFX1170-GISEL-NEXT: v_dual_mov_b32 v6, s6 :: v_dual_mov_b32 v7, s7 +; GFX1170-GISEL-NEXT: ; return to shader part epilog +; ; GFX12-SDAG-LABEL: test_fminimum_v4f64_ss: ; GFX12-SDAG: ; %bb.0: ; GFX12-SDAG-NEXT: v_minimum_f64 v[0:1], s[0:1], s[8:9] @@ -1063,6 +1330,21 @@ define amdgpu_kernel void @fminimumi_f32_move_to_valu(ptr addrspace(1) %out, ptr ; GFX9-GISEL-NEXT: global_store_dword v0, v1, s[0:1] ; GFX9-GISEL-NEXT: s_endpgm ; +; GFX1170-LABEL: fminimumi_f32_move_to_valu: +; GFX1170: ; %bb.0: +; GFX1170-NEXT: s_clause 0x1 +; GFX1170-NEXT: s_load_b128 s[0:3], s[4:5], 0x24 +; GFX1170-NEXT: s_load_b64 s[4:5], s[4:5], 0x34 +; GFX1170-NEXT: v_mov_b32_e32 v0, 0 +; GFX1170-NEXT: s_waitcnt lgkmcnt(0) +; GFX1170-NEXT: global_load_b32 v1, v0, s[2:3] glc dlc +; GFX1170-NEXT: s_waitcnt vmcnt(0) +; GFX1170-NEXT: global_load_b32 v2, v0, s[4:5] glc dlc +; GFX1170-NEXT: s_waitcnt vmcnt(0) +; GFX1170-NEXT: v_minimum_f32 v1, v1, v2 +; GFX1170-NEXT: global_store_b32 v0, v1, s[0:1] +; GFX1170-NEXT: s_endpgm +; ; GFX12-SDAG-LABEL: fminimumi_f32_move_to_valu: ; GFX12-SDAG: ; %bb.0: ; GFX12-SDAG-NEXT: s_clause 0x1 @@ -1143,6 +1425,69 @@ define amdgpu_kernel void @fminimum_f16_move_to_valu(ptr addrspace(1) %out, ptr ; GFX9-GISEL-NEXT: global_store_short v0, v1, s[0:1] ; GFX9-GISEL-NEXT: s_endpgm ; +; GFX1170-SDAG-TRUE16-LABEL: fminimum_f16_move_to_valu: +; GFX1170-SDAG-TRUE16: ; %bb.0: +; GFX1170-SDAG-TRUE16-NEXT: s_clause 0x1 +; GFX1170-SDAG-TRUE16-NEXT: s_load_b128 s[0:3], s[4:5], 0x24 +; GFX1170-SDAG-TRUE16-NEXT: s_load_b64 s[4:5], s[4:5], 0x34 +; GFX1170-SDAG-TRUE16-NEXT: v_mov_b32_e32 v1, 0 +; GFX1170-SDAG-TRUE16-NEXT: s_waitcnt lgkmcnt(0) +; GFX1170-SDAG-TRUE16-NEXT: global_load_d16_b16 v0, v1, s[2:3] glc dlc +; GFX1170-SDAG-TRUE16-NEXT: s_waitcnt vmcnt(0) +; GFX1170-SDAG-TRUE16-NEXT: global_load_d16_hi_b16 v0, v1, s[4:5] glc dlc +; GFX1170-SDAG-TRUE16-NEXT: s_waitcnt vmcnt(0) +; GFX1170-SDAG-TRUE16-NEXT: v_minimum_f16 v0.l, v0.l, v0.h +; GFX1170-SDAG-TRUE16-NEXT: global_store_b16 v1, v0, s[0:1] +; GFX1170-SDAG-TRUE16-NEXT: s_endpgm +; +; GFX1170-SDAG-FAKE16-LABEL: fminimum_f16_move_to_valu: +; GFX1170-SDAG-FAKE16: ; %bb.0: +; GFX1170-SDAG-FAKE16-NEXT: s_clause 0x1 +; GFX1170-SDAG-FAKE16-NEXT: s_load_b128 s[0:3], s[4:5], 0x24 +; GFX1170-SDAG-FAKE16-NEXT: s_load_b64 s[4:5], s[4:5], 0x34 +; GFX1170-SDAG-FAKE16-NEXT: v_mov_b32_e32 v0, 0 +; GFX1170-SDAG-FAKE16-NEXT: s_waitcnt lgkmcnt(0) +; GFX1170-SDAG-FAKE16-NEXT: global_load_u16 v1, v0, s[2:3] glc dlc +; GFX1170-SDAG-FAKE16-NEXT: s_waitcnt vmcnt(0) +; GFX1170-SDAG-FAKE16-NEXT: global_load_u16 v2, v0, s[4:5] glc dlc +; GFX1170-SDAG-FAKE16-NEXT: s_waitcnt vmcnt(0) +; GFX1170-SDAG-FAKE16-NEXT: v_minimum_f16 v1, v1, v2 +; GFX1170-SDAG-FAKE16-NEXT: global_store_b16 v0, v1, s[0:1] +; GFX1170-SDAG-FAKE16-NEXT: s_endpgm +; +; GFX1170-GISEL-TRUE16-LABEL: fminimum_f16_move_to_valu: +; GFX1170-GISEL-TRUE16: ; %bb.0: +; GFX1170-GISEL-TRUE16-NEXT: s_clause 0x1 +; GFX1170-GISEL-TRUE16-NEXT: s_load_b128 s[0:3], s[4:5], 0x24 +; GFX1170-GISEL-TRUE16-NEXT: s_load_b64 s[4:5], s[4:5], 0x34 +; GFX1170-GISEL-TRUE16-NEXT: v_mov_b32_e32 v1, 0 +; GFX1170-GISEL-TRUE16-NEXT: s_waitcnt lgkmcnt(0) +; GFX1170-GISEL-TRUE16-NEXT: global_load_d16_b16 v0, v1, s[2:3] glc dlc +; GFX1170-GISEL-TRUE16-NEXT: s_waitcnt vmcnt(0) +; GFX1170-GISEL-TRUE16-NEXT: global_load_d16_hi_b16 v0, v1, s[4:5] glc dlc +; GFX1170-GISEL-TRUE16-NEXT: s_waitcnt vmcnt(0) +; GFX1170-GISEL-TRUE16-NEXT: v_minimum_f16 v0.l, v0.l, v0.h +; GFX1170-GISEL-TRUE16-NEXT: global_store_b16 v1, v0, s[0:1] +; GFX1170-GISEL-TRUE16-NEXT: s_endpgm +; +; GFX1170-GISEL-FAKE16-LABEL: fminimum_f16_move_to_valu: +; GFX1170-GISEL-FAKE16: ; %bb.0: +; GFX1170-GISEL-FAKE16-NEXT: s_clause 0x1 +; GFX1170-GISEL-FAKE16-NEXT: s_load_b128 s[0:3], s[4:5], 0x24 +; GFX1170-GISEL-FAKE16-NEXT: s_load_b64 s[4:5], s[4:5], 0x34 +; GFX1170-GISEL-FAKE16-NEXT: v_mov_b32_e32 v0, 0 +; GFX1170-GISEL-FAKE16-NEXT: s_waitcnt lgkmcnt(0) +; GFX1170-GISEL-FAKE16-NEXT: global_load_u16 v1, v0, s[2:3] glc dlc +; GFX1170-GISEL-FAKE16-NEXT: s_waitcnt vmcnt(0) +; GFX1170-GISEL-FAKE16-NEXT: global_load_u16 v2, v0, s[4:5] glc dlc +; GFX1170-GISEL-FAKE16-NEXT: s_waitcnt vmcnt(0) +; GFX1170-GISEL-FAKE16-NEXT: v_readfirstlane_b32 s2, v1 +; GFX1170-GISEL-FAKE16-NEXT: v_readfirstlane_b32 s3, v2 +; GFX1170-GISEL-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX1170-GISEL-FAKE16-NEXT: v_minimum_f16 v1, s2, s3 +; GFX1170-GISEL-FAKE16-NEXT: global_store_b16 v0, v1, s[0:1] +; GFX1170-GISEL-FAKE16-NEXT: s_endpgm +; ; GFX12-SDAG-TRUE16-LABEL: fminimum_f16_move_to_valu: ; GFX12-SDAG-TRUE16: ; %bb.0: ; GFX12-SDAG-TRUE16-NEXT: s_clause 0x1 @@ -1226,6 +1571,11 @@ define amdgpu_ps float @test_fminimum_f32_ieee_on(float %a, float %b) #0 { ; GFX9-NEXT: v_cndmask_b32_e32 v0, v3, v2, vcc ; GFX9-NEXT: ; return to shader part epilog ; +; GFX1170-LABEL: test_fminimum_f32_ieee_on: +; GFX1170: ; %bb.0: +; GFX1170-NEXT: v_minimum_f32 v0, v0, v1 +; GFX1170-NEXT: ; return to shader part epilog +; ; GFX12-LABEL: test_fminimum_f32_ieee_on: ; GFX12: ; %bb.0: ; GFX12-NEXT: v_minimum_f32 v0, v0, v1 @@ -1243,6 +1593,11 @@ define amdgpu_ps float @test_fminimum_f32_ieee_off(float %a, float %b) #1 { ; GFX9-NEXT: v_cndmask_b32_e32 v0, v3, v2, vcc ; GFX9-NEXT: ; return to shader part epilog ; +; GFX1170-LABEL: test_fminimum_f32_ieee_off: +; GFX1170: ; %bb.0: +; GFX1170-NEXT: v_minimum_f32 v0, v0, v1 +; GFX1170-NEXT: ; return to shader part epilog +; ; GFX12-LABEL: test_fminimum_f32_ieee_off: ; GFX12: ; %bb.0: ; GFX12-NEXT: v_minimum_f32 v0, v0, v1 diff --git a/llvm/test/CodeGen/AMDGPU/fminimum3.ll b/llvm/test/CodeGen/AMDGPU/fminimum3.ll index 4506fd649a5f..0cd4293a1611 100644 --- a/llvm/test/CodeGen/AMDGPU/fminimum3.ll +++ b/llvm/test/CodeGen/AMDGPU/fminimum3.ll @@ -1,6 +1,8 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 ; RUN: llc -mtriple=amdgcn -mcpu=gfx1200 -mattr=+real-true16 < %s | FileCheck -check-prefixes=GFX12,GFX12-TRUE16 %s ; RUN: llc -mtriple=amdgcn -mcpu=gfx1200 -mattr=-real-true16 < %s | FileCheck -check-prefixes=GFX12,GFX12-FAKE16 %s +; RUN: llc -mtriple=amdgcn -mcpu=gfx1170 -mattr=+real-true16 < %s | FileCheck -check-prefixes=GFX1170,GFX1170-TRUE16 %s +; RUN: llc -mtriple=amdgcn -mcpu=gfx1170 -mattr=-real-true16 < %s | FileCheck -check-prefixes=GFX1170,GFX1170-FAKE16 %s ; RUN: llc -mtriple=amdgcn -mcpu=gfx942 < %s | FileCheck -check-prefixes=GFX9,GFX942 %s ; RUN: llc -mtriple=amdgcn -mcpu=gfx950 < %s | FileCheck -check-prefixes=GFX9,GFX950 %s @@ -15,6 +17,12 @@ define float @v_fminimum3_f32(float %a, float %b, float %c) { ; GFX12-NEXT: v_minimum3_f32 v0, v0, v1, v2 ; GFX12-NEXT: s_setpc_b64 s[30:31] ; +; GFX1170-LABEL: v_fminimum3_f32: +; GFX1170: ; %bb.0: +; GFX1170-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1170-NEXT: v_minimum3_f32 v0, v0, v1, v2 +; GFX1170-NEXT: s_setpc_b64 s[30:31] +; ; GFX942-LABEL: v_fminimum3_f32: ; GFX942: ; %bb.0: ; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -50,6 +58,12 @@ define float @v_fminimum3_f32_commute(float %a, float %b, float %c) { ; GFX12-NEXT: v_minimum3_f32 v0, v2, v0, v1 ; GFX12-NEXT: s_setpc_b64 s[30:31] ; +; GFX1170-LABEL: v_fminimum3_f32_commute: +; GFX1170: ; %bb.0: +; GFX1170-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1170-NEXT: v_minimum3_f32 v0, v2, v0, v1 +; GFX1170-NEXT: s_setpc_b64 s[30:31] +; ; GFX942-LABEL: v_fminimum3_f32_commute: ; GFX942: ; %bb.0: ; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -84,6 +98,14 @@ define amdgpu_ps i32 @s_fminimum3_f32(float inreg %a, float inreg %b, float inre ; GFX12-NEXT: s_wait_alu depctr_va_sdst(0) ; GFX12-NEXT: ; return to shader part epilog ; +; GFX1170-LABEL: s_fminimum3_f32: +; GFX1170: ; %bb.0: +; GFX1170-NEXT: v_mov_b32_e32 v0, s2 +; GFX1170-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX1170-NEXT: v_minimum3_f32 v0, s0, s1, v0 +; GFX1170-NEXT: v_readfirstlane_b32 s0, v0 +; GFX1170-NEXT: ; return to shader part epilog +; ; GFX942-LABEL: s_fminimum3_f32: ; GFX942: ; %bb.0: ; GFX942-NEXT: v_mov_b32_e32 v0, s1 @@ -126,6 +148,12 @@ define float @v_fminimum3_f32_fabs0(float %a, float %b, float %c) { ; GFX12-NEXT: v_minimum3_f32 v0, |v0|, v1, v2 ; GFX12-NEXT: s_setpc_b64 s[30:31] ; +; GFX1170-LABEL: v_fminimum3_f32_fabs0: +; GFX1170: ; %bb.0: +; GFX1170-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1170-NEXT: v_minimum3_f32 v0, |v0|, v1, v2 +; GFX1170-NEXT: s_setpc_b64 s[30:31] +; ; GFX942-LABEL: v_fminimum3_f32_fabs0: ; GFX942: ; %bb.0: ; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -162,6 +190,12 @@ define float @v_fminimum3_f32_fabs1(float %a, float %b, float %c) { ; GFX12-NEXT: v_minimum3_f32 v0, v0, |v1|, v2 ; GFX12-NEXT: s_setpc_b64 s[30:31] ; +; GFX1170-LABEL: v_fminimum3_f32_fabs1: +; GFX1170: ; %bb.0: +; GFX1170-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1170-NEXT: v_minimum3_f32 v0, v0, |v1|, v2 +; GFX1170-NEXT: s_setpc_b64 s[30:31] +; ; GFX942-LABEL: v_fminimum3_f32_fabs1: ; GFX942: ; %bb.0: ; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -198,6 +232,12 @@ define float @v_fminimum3_f32_fabs2(float %a, float %b, float %c) { ; GFX12-NEXT: v_minimum3_f32 v0, v0, v1, |v2| ; GFX12-NEXT: s_setpc_b64 s[30:31] ; +; GFX1170-LABEL: v_fminimum3_f32_fabs2: +; GFX1170: ; %bb.0: +; GFX1170-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1170-NEXT: v_minimum3_f32 v0, v0, v1, |v2| +; GFX1170-NEXT: s_setpc_b64 s[30:31] +; ; GFX942-LABEL: v_fminimum3_f32_fabs2: ; GFX942: ; %bb.0: ; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -234,6 +274,12 @@ define float @v_fminimum3_f32_fabs_all(float %a, float %b, float %c) { ; GFX12-NEXT: v_minimum3_f32 v0, |v0|, |v1|, |v2| ; GFX12-NEXT: s_setpc_b64 s[30:31] ; +; GFX1170-LABEL: v_fminimum3_f32_fabs_all: +; GFX1170: ; %bb.0: +; GFX1170-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1170-NEXT: v_minimum3_f32 v0, |v0|, |v1|, |v2| +; GFX1170-NEXT: s_setpc_b64 s[30:31] +; ; GFX942-LABEL: v_fminimum3_f32_fabs_all: ; GFX942: ; %bb.0: ; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -272,6 +318,12 @@ define float @v_fminimum3_f32_fneg_all(float %a, float %b, float %c) { ; GFX12-NEXT: v_minimum3_f32 v0, -v0, -v1, -v2 ; GFX12-NEXT: s_setpc_b64 s[30:31] ; +; GFX1170-LABEL: v_fminimum3_f32_fneg_all: +; GFX1170: ; %bb.0: +; GFX1170-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1170-NEXT: v_minimum3_f32 v0, -v0, -v1, -v2 +; GFX1170-NEXT: s_setpc_b64 s[30:31] +; ; GFX942-LABEL: v_fminimum3_f32_fneg_all: ; GFX942: ; %bb.0: ; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -310,6 +362,12 @@ define float @v_fminimum3_f32_fneg_fabs_all(float %a, float %b, float %c) { ; GFX12-NEXT: v_minimum3_f32 v0, -|v0|, -|v1|, -|v2| ; GFX12-NEXT: s_setpc_b64 s[30:31] ; +; GFX1170-LABEL: v_fminimum3_f32_fneg_fabs_all: +; GFX1170: ; %bb.0: +; GFX1170-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1170-NEXT: v_minimum3_f32 v0, -|v0|, -|v1|, -|v2| +; GFX1170-NEXT: s_setpc_b64 s[30:31] +; ; GFX942-LABEL: v_fminimum3_f32_fneg_fabs_all: ; GFX942: ; %bb.0: ; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -351,6 +409,12 @@ define float @v_fminimum3_f32_fneg0(float %a, float %b, float %c) { ; GFX12-NEXT: v_minimum3_f32 v0, -v0, v1, v2 ; GFX12-NEXT: s_setpc_b64 s[30:31] ; +; GFX1170-LABEL: v_fminimum3_f32_fneg0: +; GFX1170: ; %bb.0: +; GFX1170-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1170-NEXT: v_minimum3_f32 v0, -v0, v1, v2 +; GFX1170-NEXT: s_setpc_b64 s[30:31] +; ; GFX942-LABEL: v_fminimum3_f32_fneg0: ; GFX942: ; %bb.0: ; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -387,6 +451,12 @@ define float @v_fminimum3_f32_fneg1(float %a, float %b, float %c) { ; GFX12-NEXT: v_minimum3_f32 v0, v0, -v1, v2 ; GFX12-NEXT: s_setpc_b64 s[30:31] ; +; GFX1170-LABEL: v_fminimum3_f32_fneg1: +; GFX1170: ; %bb.0: +; GFX1170-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1170-NEXT: v_minimum3_f32 v0, v0, -v1, v2 +; GFX1170-NEXT: s_setpc_b64 s[30:31] +; ; GFX942-LABEL: v_fminimum3_f32_fneg1: ; GFX942: ; %bb.0: ; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -423,6 +493,12 @@ define float @v_fminimum3_f32_fneg2(float %a, float %b, float %c) { ; GFX12-NEXT: v_minimum3_f32 v0, v0, v1, -v2 ; GFX12-NEXT: s_setpc_b64 s[30:31] ; +; GFX1170-LABEL: v_fminimum3_f32_fneg2: +; GFX1170: ; %bb.0: +; GFX1170-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1170-NEXT: v_minimum3_f32 v0, v0, v1, -v2 +; GFX1170-NEXT: s_setpc_b64 s[30:31] +; ; GFX942-LABEL: v_fminimum3_f32_fneg2: ; GFX942: ; %bb.0: ; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -459,6 +535,12 @@ define float @v_fminimum3_f32_const0(float %b, float %c) { ; GFX12-NEXT: v_minimum3_f32 v0, v0, 0x41000000, v1 ; GFX12-NEXT: s_setpc_b64 s[30:31] ; +; GFX1170-LABEL: v_fminimum3_f32_const0: +; GFX1170: ; %bb.0: +; GFX1170-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1170-NEXT: v_minimum3_f32 v0, v0, 0x41000000, v1 +; GFX1170-NEXT: s_setpc_b64 s[30:31] +; ; GFX942-LABEL: v_fminimum3_f32_const0: ; GFX942: ; %bb.0: ; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -495,6 +577,12 @@ define float @v_fminimum3_f32__const2(float %a, float %b) { ; GFX12-NEXT: v_minimum3_f32 v0, v0, v1, 0x41000000 ; GFX12-NEXT: s_setpc_b64 s[30:31] ; +; GFX1170-LABEL: v_fminimum3_f32__const2: +; GFX1170: ; %bb.0: +; GFX1170-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1170-NEXT: v_minimum3_f32 v0, v0, v1, 0x41000000 +; GFX1170-NEXT: s_setpc_b64 s[30:31] +; ; GFX942-LABEL: v_fminimum3_f32__const2: ; GFX942: ; %bb.0: ; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -531,6 +619,12 @@ define float @v_fminimum3_f32_inlineimm0(float %b, float %c) { ; GFX12-NEXT: v_minimum3_f32 v0, v0, 4.0, v1 ; GFX12-NEXT: s_setpc_b64 s[30:31] ; +; GFX1170-LABEL: v_fminimum3_f32_inlineimm0: +; GFX1170: ; %bb.0: +; GFX1170-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1170-NEXT: v_minimum3_f32 v0, v0, 4.0, v1 +; GFX1170-NEXT: s_setpc_b64 s[30:31] +; ; GFX942-LABEL: v_fminimum3_f32_inlineimm0: ; GFX942: ; %bb.0: ; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -566,6 +660,12 @@ define float @v_fminimum3_f32__inlineimm(float %a, float %b) { ; GFX12-NEXT: v_minimum3_f32 v0, v0, v1, 4.0 ; GFX12-NEXT: s_setpc_b64 s[30:31] ; +; GFX1170-LABEL: v_fminimum3_f32__inlineimm: +; GFX1170: ; %bb.0: +; GFX1170-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1170-NEXT: v_minimum3_f32 v0, v0, v1, 4.0 +; GFX1170-NEXT: s_setpc_b64 s[30:31] +; ; GFX942-LABEL: v_fminimum3_f32__inlineimm: ; GFX942: ; %bb.0: ; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -603,6 +703,14 @@ define float @v_fminimum3_f32_const1_const2(float %a) { ; GFX12-NEXT: v_minimum3_f32 v0, v0, s0, 0x41800000 ; GFX12-NEXT: s_setpc_b64 s[30:31] ; +; GFX1170-LABEL: v_fminimum3_f32_const1_const2: +; GFX1170: ; %bb.0: +; GFX1170-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1170-NEXT: s_mov_b32 s0, 0x41000000 +; GFX1170-NEXT: s_delay_alu instid0(SALU_CYCLE_1) +; GFX1170-NEXT: v_minimum3_f32 v0, v0, s0, 0x41800000 +; GFX1170-NEXT: s_setpc_b64 s[30:31] +; ; GFX942-LABEL: v_fminimum3_f32_const1_const2: ; GFX942: ; %bb.0: ; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -641,6 +749,13 @@ define <2 x float> @v_fminimum3_v2f32(<2 x float> %a, <2 x float> %b, <2 x float ; GFX12-NEXT: v_minimum3_f32 v1, v5, v1, v3 ; GFX12-NEXT: s_setpc_b64 s[30:31] ; +; GFX1170-LABEL: v_fminimum3_v2f32: +; GFX1170: ; %bb.0: +; GFX1170-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1170-NEXT: v_minimum3_f32 v0, v4, v0, v2 +; GFX1170-NEXT: v_minimum3_f32 v1, v5, v1, v3 +; GFX1170-NEXT: s_setpc_b64 s[30:31] +; ; GFX942-LABEL: v_fminimum3_v2f32: ; GFX942: ; %bb.0: ; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -686,6 +801,13 @@ define <2 x float> @v_fminimum3_v2f32_commute(<2 x float> %a, <2 x float> %b, <2 ; GFX12-NEXT: v_minimum3_f32 v1, v1, v3, v5 ; GFX12-NEXT: s_setpc_b64 s[30:31] ; +; GFX1170-LABEL: v_fminimum3_v2f32_commute: +; GFX1170: ; %bb.0: +; GFX1170-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1170-NEXT: v_minimum3_f32 v0, v0, v2, v4 +; GFX1170-NEXT: v_minimum3_f32 v1, v1, v3, v5 +; GFX1170-NEXT: s_setpc_b64 s[30:31] +; ; GFX942-LABEL: v_fminimum3_v2f32_commute: ; GFX942: ; %bb.0: ; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -731,6 +853,13 @@ define <2 x float> @v_fminimum3_v2f32__fabs_all(<2 x float> %a, <2 x float> %b, ; GFX12-NEXT: v_minimum3_f32 v1, |v1|, |v3|, |v5| ; GFX12-NEXT: s_setpc_b64 s[30:31] ; +; GFX1170-LABEL: v_fminimum3_v2f32__fabs_all: +; GFX1170: ; %bb.0: +; GFX1170-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1170-NEXT: v_minimum3_f32 v0, |v0|, |v2|, |v4| +; GFX1170-NEXT: v_minimum3_f32 v1, |v1|, |v3|, |v5| +; GFX1170-NEXT: s_setpc_b64 s[30:31] +; ; GFX942-LABEL: v_fminimum3_v2f32__fabs_all: ; GFX942: ; %bb.0: ; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -779,6 +908,13 @@ define <2 x float> @v_fminimum3_v2f32__fneg_all(<2 x float> %a, <2 x float> %b, ; GFX12-NEXT: v_minimum3_f32 v1, -v1, -v3, -v5 ; GFX12-NEXT: s_setpc_b64 s[30:31] ; +; GFX1170-LABEL: v_fminimum3_v2f32__fneg_all: +; GFX1170: ; %bb.0: +; GFX1170-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1170-NEXT: v_minimum3_f32 v0, -v0, -v2, -v4 +; GFX1170-NEXT: v_minimum3_f32 v1, -v1, -v3, -v5 +; GFX1170-NEXT: s_setpc_b64 s[30:31] +; ; GFX942-LABEL: v_fminimum3_v2f32__fneg_all: ; GFX942: ; %bb.0: ; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -827,6 +963,13 @@ define <2 x float> @v_fminimum3_v2f32__inlineimm1(<2 x float> %a, <2 x float> %c ; GFX12-NEXT: v_minimum3_f32 v1, v1, 2.0, v3 ; GFX12-NEXT: s_setpc_b64 s[30:31] ; +; GFX1170-LABEL: v_fminimum3_v2f32__inlineimm1: +; GFX1170: ; %bb.0: +; GFX1170-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1170-NEXT: v_minimum3_f32 v0, v0, 2.0, v2 +; GFX1170-NEXT: v_minimum3_f32 v1, v1, 2.0, v3 +; GFX1170-NEXT: s_setpc_b64 s[30:31] +; ; GFX942-LABEL: v_fminimum3_v2f32__inlineimm1: ; GFX942: ; %bb.0: ; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -872,6 +1015,13 @@ define <2 x float> @v_fminimum3_v2f32__inlineimm2(<2 x float> %a, <2 x float> %b ; GFX12-NEXT: v_minimum3_f32 v1, v1, v3, 4.0 ; GFX12-NEXT: s_setpc_b64 s[30:31] ; +; GFX1170-LABEL: v_fminimum3_v2f32__inlineimm2: +; GFX1170: ; %bb.0: +; GFX1170-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1170-NEXT: v_minimum3_f32 v0, v0, v2, 4.0 +; GFX1170-NEXT: v_minimum3_f32 v1, v1, v3, 4.0 +; GFX1170-NEXT: s_setpc_b64 s[30:31] +; ; GFX942-LABEL: v_fminimum3_v2f32__inlineimm2: ; GFX942: ; %bb.0: ; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -918,6 +1068,14 @@ define <3 x float> @v_fminimum3_v3f32(<3 x float> %a, <3 x float> %b, <3 x float ; GFX12-NEXT: v_minimum3_f32 v2, v8, v2, v5 ; GFX12-NEXT: s_setpc_b64 s[30:31] ; +; GFX1170-LABEL: v_fminimum3_v3f32: +; GFX1170: ; %bb.0: +; GFX1170-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1170-NEXT: v_minimum3_f32 v0, v6, v0, v3 +; GFX1170-NEXT: v_minimum3_f32 v1, v7, v1, v4 +; GFX1170-NEXT: v_minimum3_f32 v2, v8, v2, v5 +; GFX1170-NEXT: s_setpc_b64 s[30:31] +; ; GFX942-LABEL: v_fminimum3_v3f32: ; GFX942: ; %bb.0: ; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -973,6 +1131,14 @@ define <3 x float> @v_fminimum3_v3f32_commute(<3 x float> %a, <3 x float> %b, <3 ; GFX12-NEXT: v_minimum3_f32 v2, v2, v5, v8 ; GFX12-NEXT: s_setpc_b64 s[30:31] ; +; GFX1170-LABEL: v_fminimum3_v3f32_commute: +; GFX1170: ; %bb.0: +; GFX1170-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1170-NEXT: v_minimum3_f32 v0, v0, v3, v6 +; GFX1170-NEXT: v_minimum3_f32 v1, v1, v4, v7 +; GFX1170-NEXT: v_minimum3_f32 v2, v2, v5, v8 +; GFX1170-NEXT: s_setpc_b64 s[30:31] +; ; GFX942-LABEL: v_fminimum3_v3f32_commute: ; GFX942: ; %bb.0: ; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -1028,6 +1194,14 @@ define <3 x float> @v_fminimum3_v3f32__fabs_all(<3 x float> %a, <3 x float> %b, ; GFX12-NEXT: v_minimum3_f32 v2, |v2|, |v5|, |v8| ; GFX12-NEXT: s_setpc_b64 s[30:31] ; +; GFX1170-LABEL: v_fminimum3_v3f32__fabs_all: +; GFX1170: ; %bb.0: +; GFX1170-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1170-NEXT: v_minimum3_f32 v0, |v0|, |v3|, |v6| +; GFX1170-NEXT: v_minimum3_f32 v1, |v1|, |v4|, |v7| +; GFX1170-NEXT: v_minimum3_f32 v2, |v2|, |v5|, |v8| +; GFX1170-NEXT: s_setpc_b64 s[30:31] +; ; GFX942-LABEL: v_fminimum3_v3f32__fabs_all: ; GFX942: ; %bb.0: ; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -1086,6 +1260,14 @@ define <3 x float> @v_fminimum3_v3f32__fneg_all(<3 x float> %a, <3 x float> %b, ; GFX12-NEXT: v_minimum3_f32 v2, -v2, -v5, -v8 ; GFX12-NEXT: s_setpc_b64 s[30:31] ; +; GFX1170-LABEL: v_fminimum3_v3f32__fneg_all: +; GFX1170: ; %bb.0: +; GFX1170-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1170-NEXT: v_minimum3_f32 v0, -v0, -v3, -v6 +; GFX1170-NEXT: v_minimum3_f32 v1, -v1, -v4, -v7 +; GFX1170-NEXT: v_minimum3_f32 v2, -v2, -v5, -v8 +; GFX1170-NEXT: s_setpc_b64 s[30:31] +; ; GFX942-LABEL: v_fminimum3_v3f32__fneg_all: ; GFX942: ; %bb.0: ; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -1144,6 +1326,14 @@ define <3 x float> @v_fminimum3_v3f32__inlineimm1(<3 x float> %a, <3 x float> %c ; GFX12-NEXT: v_minimum3_f32 v2, v2, 2.0, v5 ; GFX12-NEXT: s_setpc_b64 s[30:31] ; +; GFX1170-LABEL: v_fminimum3_v3f32__inlineimm1: +; GFX1170: ; %bb.0: +; GFX1170-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1170-NEXT: v_minimum3_f32 v0, v0, 2.0, v3 +; GFX1170-NEXT: v_minimum3_f32 v1, v1, 2.0, v4 +; GFX1170-NEXT: v_minimum3_f32 v2, v2, 2.0, v5 +; GFX1170-NEXT: s_setpc_b64 s[30:31] +; ; GFX942-LABEL: v_fminimum3_v3f32__inlineimm1: ; GFX942: ; %bb.0: ; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -1199,6 +1389,14 @@ define <3 x float> @v_fminimum3_v3f32__inlineimm2(<3 x float> %a, <3 x float> %b ; GFX12-NEXT: v_minimum3_f32 v2, v2, v5, 4.0 ; GFX12-NEXT: s_setpc_b64 s[30:31] ; +; GFX1170-LABEL: v_fminimum3_v3f32__inlineimm2: +; GFX1170: ; %bb.0: +; GFX1170-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1170-NEXT: v_minimum3_f32 v0, v0, v3, 4.0 +; GFX1170-NEXT: v_minimum3_f32 v1, v1, v4, 4.0 +; GFX1170-NEXT: v_minimum3_f32 v2, v2, v5, 4.0 +; GFX1170-NEXT: s_setpc_b64 s[30:31] +; ; GFX942-LABEL: v_fminimum3_v3f32__inlineimm2: ; GFX942: ; %bb.0: ; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -1263,6 +1461,18 @@ define half @v_fminimum3_f16(half %a, half %b, half %c) { ; GFX12-FAKE16-NEXT: v_minimum3_f16 v0, v0, v1, v2 ; GFX12-FAKE16-NEXT: s_setpc_b64 s[30:31] ; +; GFX1170-TRUE16-LABEL: v_fminimum3_f16: +; GFX1170-TRUE16: ; %bb.0: +; GFX1170-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1170-TRUE16-NEXT: v_minimum3_f16 v0.l, v0.l, v1.l, v2.l +; GFX1170-TRUE16-NEXT: s_setpc_b64 s[30:31] +; +; GFX1170-FAKE16-LABEL: v_fminimum3_f16: +; GFX1170-FAKE16: ; %bb.0: +; GFX1170-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1170-FAKE16-NEXT: v_minimum3_f16 v0, v0, v1, v2 +; GFX1170-FAKE16-NEXT: s_setpc_b64 s[30:31] +; ; GFX942-LABEL: v_fminimum3_f16: ; GFX942: ; %bb.0: ; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -1308,6 +1518,18 @@ define half @v_fminimum3_f16_commute(half %a, half %b, half %c) { ; GFX12-FAKE16-NEXT: v_minimum3_f16 v0, v2, v0, v1 ; GFX12-FAKE16-NEXT: s_setpc_b64 s[30:31] ; +; GFX1170-TRUE16-LABEL: v_fminimum3_f16_commute: +; GFX1170-TRUE16: ; %bb.0: +; GFX1170-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1170-TRUE16-NEXT: v_minimum3_f16 v0.l, v2.l, v0.l, v1.l +; GFX1170-TRUE16-NEXT: s_setpc_b64 s[30:31] +; +; GFX1170-FAKE16-LABEL: v_fminimum3_f16_commute: +; GFX1170-FAKE16: ; %bb.0: +; GFX1170-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1170-FAKE16-NEXT: v_minimum3_f16 v0, v2, v0, v1 +; GFX1170-FAKE16-NEXT: s_setpc_b64 s[30:31] +; ; GFX942-LABEL: v_fminimum3_f16_commute: ; GFX942: ; %bb.0: ; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -1355,6 +1577,26 @@ define amdgpu_ps i32 @s_fminimum3_f16(half inreg %a, half inreg %b, half inreg % ; GFX12-FAKE16-NEXT: s_wait_alu depctr_va_sdst(0) ; GFX12-FAKE16-NEXT: ; return to shader part epilog ; +; GFX1170-TRUE16-LABEL: s_fminimum3_f16: +; GFX1170-TRUE16: ; %bb.0: +; GFX1170-TRUE16-NEXT: v_mov_b16_e32 v0.l, s2 +; GFX1170-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX1170-TRUE16-NEXT: v_minimum3_f16 v0.l, s0, s1, v0.l +; GFX1170-TRUE16-NEXT: v_and_b32_e32 v0, 0xffff, v0 +; GFX1170-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX1170-TRUE16-NEXT: v_readfirstlane_b32 s0, v0 +; GFX1170-TRUE16-NEXT: ; return to shader part epilog +; +; GFX1170-FAKE16-LABEL: s_fminimum3_f16: +; GFX1170-FAKE16: ; %bb.0: +; GFX1170-FAKE16-NEXT: v_mov_b32_e32 v0, s2 +; GFX1170-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX1170-FAKE16-NEXT: v_minimum3_f16 v0, s0, s1, v0 +; GFX1170-FAKE16-NEXT: v_and_b32_e32 v0, 0xffff, v0 +; GFX1170-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX1170-FAKE16-NEXT: v_readfirstlane_b32 s0, v0 +; GFX1170-FAKE16-NEXT: ; return to shader part epilog +; ; GFX942-LABEL: s_fminimum3_f16: ; GFX942: ; %bb.0: ; GFX942-NEXT: v_mov_b32_e32 v0, s1 @@ -1411,6 +1653,18 @@ define half @v_fminimum3_f16_fabs0(half %a, half %b, half %c) { ; GFX12-FAKE16-NEXT: v_minimum3_f16 v0, |v0|, v1, v2 ; GFX12-FAKE16-NEXT: s_setpc_b64 s[30:31] ; +; GFX1170-TRUE16-LABEL: v_fminimum3_f16_fabs0: +; GFX1170-TRUE16: ; %bb.0: +; GFX1170-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1170-TRUE16-NEXT: v_minimum3_f16 v0.l, |v0.l|, v1.l, v2.l +; GFX1170-TRUE16-NEXT: s_setpc_b64 s[30:31] +; +; GFX1170-FAKE16-LABEL: v_fminimum3_f16_fabs0: +; GFX1170-FAKE16: ; %bb.0: +; GFX1170-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1170-FAKE16-NEXT: v_minimum3_f16 v0, |v0|, v1, v2 +; GFX1170-FAKE16-NEXT: s_setpc_b64 s[30:31] +; ; GFX942-LABEL: v_fminimum3_f16_fabs0: ; GFX942: ; %bb.0: ; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -1458,6 +1712,18 @@ define half @v_fminimum3_f16_fabs1(half %a, half %b, half %c) { ; GFX12-FAKE16-NEXT: v_minimum3_f16 v0, v0, |v1|, v2 ; GFX12-FAKE16-NEXT: s_setpc_b64 s[30:31] ; +; GFX1170-TRUE16-LABEL: v_fminimum3_f16_fabs1: +; GFX1170-TRUE16: ; %bb.0: +; GFX1170-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1170-TRUE16-NEXT: v_minimum3_f16 v0.l, v0.l, |v1.l|, v2.l +; GFX1170-TRUE16-NEXT: s_setpc_b64 s[30:31] +; +; GFX1170-FAKE16-LABEL: v_fminimum3_f16_fabs1: +; GFX1170-FAKE16: ; %bb.0: +; GFX1170-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1170-FAKE16-NEXT: v_minimum3_f16 v0, v0, |v1|, v2 +; GFX1170-FAKE16-NEXT: s_setpc_b64 s[30:31] +; ; GFX942-LABEL: v_fminimum3_f16_fabs1: ; GFX942: ; %bb.0: ; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -1505,6 +1771,18 @@ define half @v_fminimum3_f16_fabs2(half %a, half %b, half %c) { ; GFX12-FAKE16-NEXT: v_minimum3_f16 v0, v0, v1, |v2| ; GFX12-FAKE16-NEXT: s_setpc_b64 s[30:31] ; +; GFX1170-TRUE16-LABEL: v_fminimum3_f16_fabs2: +; GFX1170-TRUE16: ; %bb.0: +; GFX1170-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1170-TRUE16-NEXT: v_minimum3_f16 v0.l, v0.l, v1.l, |v2.l| +; GFX1170-TRUE16-NEXT: s_setpc_b64 s[30:31] +; +; GFX1170-FAKE16-LABEL: v_fminimum3_f16_fabs2: +; GFX1170-FAKE16: ; %bb.0: +; GFX1170-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1170-FAKE16-NEXT: v_minimum3_f16 v0, v0, v1, |v2| +; GFX1170-FAKE16-NEXT: s_setpc_b64 s[30:31] +; ; GFX942-LABEL: v_fminimum3_f16_fabs2: ; GFX942: ; %bb.0: ; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -1552,6 +1830,18 @@ define half @v_fminimum3_f16_fabs_all(half %a, half %b, half %c) { ; GFX12-FAKE16-NEXT: v_minimum3_f16 v0, |v0|, |v1|, |v2| ; GFX12-FAKE16-NEXT: s_setpc_b64 s[30:31] ; +; GFX1170-TRUE16-LABEL: v_fminimum3_f16_fabs_all: +; GFX1170-TRUE16: ; %bb.0: +; GFX1170-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1170-TRUE16-NEXT: v_minimum3_f16 v0.l, |v0.l|, |v1.l|, |v2.l| +; GFX1170-TRUE16-NEXT: s_setpc_b64 s[30:31] +; +; GFX1170-FAKE16-LABEL: v_fminimum3_f16_fabs_all: +; GFX1170-FAKE16: ; %bb.0: +; GFX1170-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1170-FAKE16-NEXT: v_minimum3_f16 v0, |v0|, |v1|, |v2| +; GFX1170-FAKE16-NEXT: s_setpc_b64 s[30:31] +; ; GFX942-LABEL: v_fminimum3_f16_fabs_all: ; GFX942: ; %bb.0: ; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -1603,6 +1893,18 @@ define half @v_fminimum3_f16_fneg_all(half %a, half %b, half %c) { ; GFX12-FAKE16-NEXT: v_minimum3_f16 v0, -v0, -v1, -v2 ; GFX12-FAKE16-NEXT: s_setpc_b64 s[30:31] ; +; GFX1170-TRUE16-LABEL: v_fminimum3_f16_fneg_all: +; GFX1170-TRUE16: ; %bb.0: +; GFX1170-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1170-TRUE16-NEXT: v_minimum3_f16 v0.l, -v0.l, -v1.l, -v2.l +; GFX1170-TRUE16-NEXT: s_setpc_b64 s[30:31] +; +; GFX1170-FAKE16-LABEL: v_fminimum3_f16_fneg_all: +; GFX1170-FAKE16: ; %bb.0: +; GFX1170-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1170-FAKE16-NEXT: v_minimum3_f16 v0, -v0, -v1, -v2 +; GFX1170-FAKE16-NEXT: s_setpc_b64 s[30:31] +; ; GFX942-LABEL: v_fminimum3_f16_fneg_all: ; GFX942: ; %bb.0: ; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -1654,6 +1956,18 @@ define half @v_fminimum3_f16_fneg_fabs_all(half %a, half %b, half %c) { ; GFX12-FAKE16-NEXT: v_minimum3_f16 v0, -|v0|, -|v1|, -|v2| ; GFX12-FAKE16-NEXT: s_setpc_b64 s[30:31] ; +; GFX1170-TRUE16-LABEL: v_fminimum3_f16_fneg_fabs_all: +; GFX1170-TRUE16: ; %bb.0: +; GFX1170-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1170-TRUE16-NEXT: v_minimum3_f16 v0.l, -|v0.l|, -|v1.l|, -|v2.l| +; GFX1170-TRUE16-NEXT: s_setpc_b64 s[30:31] +; +; GFX1170-FAKE16-LABEL: v_fminimum3_f16_fneg_fabs_all: +; GFX1170-FAKE16: ; %bb.0: +; GFX1170-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1170-FAKE16-NEXT: v_minimum3_f16 v0, -|v0|, -|v1|, -|v2| +; GFX1170-FAKE16-NEXT: s_setpc_b64 s[30:31] +; ; GFX942-LABEL: v_fminimum3_f16_fneg_fabs_all: ; GFX942: ; %bb.0: ; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -1708,6 +2022,18 @@ define half @v_fminimum3_f16_fneg0(half %a, half %b, half %c) { ; GFX12-FAKE16-NEXT: v_minimum3_f16 v0, -v0, v1, v2 ; GFX12-FAKE16-NEXT: s_setpc_b64 s[30:31] ; +; GFX1170-TRUE16-LABEL: v_fminimum3_f16_fneg0: +; GFX1170-TRUE16: ; %bb.0: +; GFX1170-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1170-TRUE16-NEXT: v_minimum3_f16 v0.l, -v0.l, v1.l, v2.l +; GFX1170-TRUE16-NEXT: s_setpc_b64 s[30:31] +; +; GFX1170-FAKE16-LABEL: v_fminimum3_f16_fneg0: +; GFX1170-FAKE16: ; %bb.0: +; GFX1170-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1170-FAKE16-NEXT: v_minimum3_f16 v0, -v0, v1, v2 +; GFX1170-FAKE16-NEXT: s_setpc_b64 s[30:31] +; ; GFX942-LABEL: v_fminimum3_f16_fneg0: ; GFX942: ; %bb.0: ; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -1755,6 +2081,18 @@ define half @v_fminimum3_f16_fneg1(half %a, half %b, half %c) { ; GFX12-FAKE16-NEXT: v_minimum3_f16 v0, v0, -v1, v2 ; GFX12-FAKE16-NEXT: s_setpc_b64 s[30:31] ; +; GFX1170-TRUE16-LABEL: v_fminimum3_f16_fneg1: +; GFX1170-TRUE16: ; %bb.0: +; GFX1170-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1170-TRUE16-NEXT: v_minimum3_f16 v0.l, v0.l, -v1.l, v2.l +; GFX1170-TRUE16-NEXT: s_setpc_b64 s[30:31] +; +; GFX1170-FAKE16-LABEL: v_fminimum3_f16_fneg1: +; GFX1170-FAKE16: ; %bb.0: +; GFX1170-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1170-FAKE16-NEXT: v_minimum3_f16 v0, v0, -v1, v2 +; GFX1170-FAKE16-NEXT: s_setpc_b64 s[30:31] +; ; GFX942-LABEL: v_fminimum3_f16_fneg1: ; GFX942: ; %bb.0: ; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -1802,6 +2140,18 @@ define half @v_fminimum3_f16_fneg2(half %a, half %b, half %c) { ; GFX12-FAKE16-NEXT: v_minimum3_f16 v0, v0, v1, -v2 ; GFX12-FAKE16-NEXT: s_setpc_b64 s[30:31] ; +; GFX1170-TRUE16-LABEL: v_fminimum3_f16_fneg2: +; GFX1170-TRUE16: ; %bb.0: +; GFX1170-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1170-TRUE16-NEXT: v_minimum3_f16 v0.l, v0.l, v1.l, -v2.l +; GFX1170-TRUE16-NEXT: s_setpc_b64 s[30:31] +; +; GFX1170-FAKE16-LABEL: v_fminimum3_f16_fneg2: +; GFX1170-FAKE16: ; %bb.0: +; GFX1170-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1170-FAKE16-NEXT: v_minimum3_f16 v0, v0, v1, -v2 +; GFX1170-FAKE16-NEXT: s_setpc_b64 s[30:31] +; ; GFX942-LABEL: v_fminimum3_f16_fneg2: ; GFX942: ; %bb.0: ; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -1849,6 +2199,18 @@ define half @v_fminimum3_f16_const0(half %b, half %c) { ; GFX12-FAKE16-NEXT: v_minimum3_f16 v0, v0, 0x4800, v1 ; GFX12-FAKE16-NEXT: s_setpc_b64 s[30:31] ; +; GFX1170-TRUE16-LABEL: v_fminimum3_f16_const0: +; GFX1170-TRUE16: ; %bb.0: +; GFX1170-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1170-TRUE16-NEXT: v_minimum3_f16 v0.l, v0.l, 0x4800, v1.l +; GFX1170-TRUE16-NEXT: s_setpc_b64 s[30:31] +; +; GFX1170-FAKE16-LABEL: v_fminimum3_f16_const0: +; GFX1170-FAKE16: ; %bb.0: +; GFX1170-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1170-FAKE16-NEXT: v_minimum3_f16 v0, v0, 0x4800, v1 +; GFX1170-FAKE16-NEXT: s_setpc_b64 s[30:31] +; ; GFX942-LABEL: v_fminimum3_f16_const0: ; GFX942: ; %bb.0: ; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -1895,6 +2257,18 @@ define half @v_fminimum3_f16__const2(half %a, half %b) { ; GFX12-FAKE16-NEXT: v_minimum3_f16 v0, v0, v1, 0x4800 ; GFX12-FAKE16-NEXT: s_setpc_b64 s[30:31] ; +; GFX1170-TRUE16-LABEL: v_fminimum3_f16__const2: +; GFX1170-TRUE16: ; %bb.0: +; GFX1170-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1170-TRUE16-NEXT: v_minimum3_f16 v0.l, v0.l, v1.l, 0x4800 +; GFX1170-TRUE16-NEXT: s_setpc_b64 s[30:31] +; +; GFX1170-FAKE16-LABEL: v_fminimum3_f16__const2: +; GFX1170-FAKE16: ; %bb.0: +; GFX1170-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1170-FAKE16-NEXT: v_minimum3_f16 v0, v0, v1, 0x4800 +; GFX1170-FAKE16-NEXT: s_setpc_b64 s[30:31] +; ; GFX942-LABEL: v_fminimum3_f16__const2: ; GFX942: ; %bb.0: ; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -1941,6 +2315,18 @@ define half @v_fminimum3_f16_inlineimm0(half %b, half %c) { ; GFX12-FAKE16-NEXT: v_minimum3_f16 v0, v0, 4.0, v1 ; GFX12-FAKE16-NEXT: s_setpc_b64 s[30:31] ; +; GFX1170-TRUE16-LABEL: v_fminimum3_f16_inlineimm0: +; GFX1170-TRUE16: ; %bb.0: +; GFX1170-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1170-TRUE16-NEXT: v_minimum3_f16 v0.l, v0.l, 4.0, v1.l +; GFX1170-TRUE16-NEXT: s_setpc_b64 s[30:31] +; +; GFX1170-FAKE16-LABEL: v_fminimum3_f16_inlineimm0: +; GFX1170-FAKE16: ; %bb.0: +; GFX1170-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1170-FAKE16-NEXT: v_minimum3_f16 v0, v0, 4.0, v1 +; GFX1170-FAKE16-NEXT: s_setpc_b64 s[30:31] +; ; GFX942-LABEL: v_fminimum3_f16_inlineimm0: ; GFX942: ; %bb.0: ; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -1986,6 +2372,18 @@ define half @v_fminimum3_f16__inlineimm(half %a, half %b) { ; GFX12-FAKE16-NEXT: v_minimum3_f16 v0, v0, v1, 4.0 ; GFX12-FAKE16-NEXT: s_setpc_b64 s[30:31] ; +; GFX1170-TRUE16-LABEL: v_fminimum3_f16__inlineimm: +; GFX1170-TRUE16: ; %bb.0: +; GFX1170-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1170-TRUE16-NEXT: v_minimum3_f16 v0.l, v0.l, v1.l, 4.0 +; GFX1170-TRUE16-NEXT: s_setpc_b64 s[30:31] +; +; GFX1170-FAKE16-LABEL: v_fminimum3_f16__inlineimm: +; GFX1170-FAKE16: ; %bb.0: +; GFX1170-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1170-FAKE16-NEXT: v_minimum3_f16 v0, v0, v1, 4.0 +; GFX1170-FAKE16-NEXT: s_setpc_b64 s[30:31] +; ; GFX942-LABEL: v_fminimum3_f16__inlineimm: ; GFX942: ; %bb.0: ; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -2035,6 +2433,22 @@ define half @v_fminimum3_f16_const1_const2(half %a) { ; GFX12-FAKE16-NEXT: v_minimum3_f16 v0, v0, s0, 0x4c00 ; GFX12-FAKE16-NEXT: s_setpc_b64 s[30:31] ; +; GFX1170-TRUE16-LABEL: v_fminimum3_f16_const1_const2: +; GFX1170-TRUE16: ; %bb.0: +; GFX1170-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1170-TRUE16-NEXT: v_mov_b16_e32 v1.l, 0x4800 +; GFX1170-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX1170-TRUE16-NEXT: v_minimum3_f16 v0.l, v0.l, v1.l, 0x4c00 +; GFX1170-TRUE16-NEXT: s_setpc_b64 s[30:31] +; +; GFX1170-FAKE16-LABEL: v_fminimum3_f16_const1_const2: +; GFX1170-FAKE16: ; %bb.0: +; GFX1170-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1170-FAKE16-NEXT: s_movk_i32 s0, 0x4800 +; GFX1170-FAKE16-NEXT: s_delay_alu instid0(SALU_CYCLE_1) +; GFX1170-FAKE16-NEXT: v_minimum3_f16 v0, v0, s0, 0x4c00 +; GFX1170-FAKE16-NEXT: s_setpc_b64 s[30:31] +; ; GFX942-LABEL: v_fminimum3_f16_const1_const2: ; GFX942: ; %bb.0: ; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -2074,6 +2488,14 @@ define <2 x half> @v_fminimum3_v2f16(<2 x half> %a, <2 x half> %b, <2 x half> %c ; GFX12-NEXT: v_pk_minimum_f16 v0, v2, v0 ; GFX12-NEXT: s_setpc_b64 s[30:31] ; +; GFX1170-LABEL: v_fminimum3_v2f16: +; GFX1170: ; %bb.0: +; GFX1170-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1170-NEXT: v_pk_minimum_f16 v0, v0, v1 +; GFX1170-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX1170-NEXT: v_pk_minimum_f16 v0, v2, v0 +; GFX1170-NEXT: s_setpc_b64 s[30:31] +; ; GFX942-LABEL: v_fminimum3_v2f16: ; GFX942: ; %bb.0: ; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -2120,6 +2542,14 @@ define <2 x half> @v_fminimum3_v2f16_commute(<2 x half> %a, <2 x half> %b, <2 x ; GFX12-NEXT: v_pk_minimum_f16 v0, v0, v2 ; GFX12-NEXT: s_setpc_b64 s[30:31] ; +; GFX1170-LABEL: v_fminimum3_v2f16_commute: +; GFX1170: ; %bb.0: +; GFX1170-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1170-NEXT: v_pk_minimum_f16 v0, v0, v1 +; GFX1170-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX1170-NEXT: v_pk_minimum_f16 v0, v0, v2 +; GFX1170-NEXT: s_setpc_b64 s[30:31] +; ; GFX942-LABEL: v_fminimum3_v2f16_commute: ; GFX942: ; %bb.0: ; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -2169,6 +2599,17 @@ define <2 x half> @v_fminimum3_v2f16__fabs_all(<2 x half> %a, <2 x half> %b, <2 ; GFX12-NEXT: v_pk_minimum_f16 v0, v0, v2 ; GFX12-NEXT: s_setpc_b64 s[30:31] ; +; GFX1170-LABEL: v_fminimum3_v2f16__fabs_all: +; GFX1170: ; %bb.0: +; GFX1170-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1170-NEXT: v_and_b32_e32 v0, 0x7fff7fff, v0 +; GFX1170-NEXT: v_and_b32_e32 v1, 0x7fff7fff, v1 +; GFX1170-NEXT: v_and_b32_e32 v2, 0x7fff7fff, v2 +; GFX1170-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX1170-NEXT: v_pk_minimum_f16 v0, v0, v1 +; GFX1170-NEXT: v_pk_minimum_f16 v0, v0, v2 +; GFX1170-NEXT: s_setpc_b64 s[30:31] +; ; GFX942-LABEL: v_fminimum3_v2f16__fabs_all: ; GFX942: ; %bb.0: ; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -2222,6 +2663,14 @@ define <2 x half> @v_fminimum3_v2f16__fneg_all(<2 x half> %a, <2 x half> %b, <2 ; GFX12-NEXT: v_pk_minimum_f16 v0, v0, v2 neg_lo:[0,1] neg_hi:[0,1] ; GFX12-NEXT: s_setpc_b64 s[30:31] ; +; GFX1170-LABEL: v_fminimum3_v2f16__fneg_all: +; GFX1170: ; %bb.0: +; GFX1170-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1170-NEXT: v_pk_minimum_f16 v0, v0, v1 neg_lo:[1,1] neg_hi:[1,1] +; GFX1170-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX1170-NEXT: v_pk_minimum_f16 v0, v0, v2 neg_lo:[0,1] neg_hi:[0,1] +; GFX1170-NEXT: s_setpc_b64 s[30:31] +; ; GFX942-LABEL: v_fminimum3_v2f16__fneg_all: ; GFX942: ; %bb.0: ; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -2271,6 +2720,14 @@ define <2 x half> @v_fminimum3_v2f16__inlineimm1(<2 x half> %a, <2 x half> %c) { ; GFX12-NEXT: v_pk_minimum_f16 v0, v0, v1 ; GFX12-NEXT: s_setpc_b64 s[30:31] ; +; GFX1170-LABEL: v_fminimum3_v2f16__inlineimm1: +; GFX1170: ; %bb.0: +; GFX1170-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1170-NEXT: v_pk_minimum_f16 v0, v0, 2.0 op_sel_hi:[1,0] +; GFX1170-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX1170-NEXT: v_pk_minimum_f16 v0, v0, v1 +; GFX1170-NEXT: s_setpc_b64 s[30:31] +; ; GFX942-LABEL: v_fminimum3_v2f16__inlineimm1: ; GFX942: ; %bb.0: ; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -2316,6 +2773,14 @@ define <2 x half> @v_fminimum3_v2f16__inlineimm2(<2 x half> %a, <2 x half> %b) { ; GFX12-NEXT: v_pk_minimum_f16 v0, v0, 4.0 op_sel_hi:[1,0] ; GFX12-NEXT: s_setpc_b64 s[30:31] ; +; GFX1170-LABEL: v_fminimum3_v2f16__inlineimm2: +; GFX1170: ; %bb.0: +; GFX1170-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1170-NEXT: v_pk_minimum_f16 v0, v0, v1 +; GFX1170-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX1170-NEXT: v_pk_minimum_f16 v0, v0, 4.0 op_sel_hi:[1,0] +; GFX1170-NEXT: s_setpc_b64 s[30:31] +; ; GFX942-LABEL: v_fminimum3_v2f16__inlineimm2: ; GFX942: ; %bb.0: ; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -2364,6 +2829,16 @@ define <3 x half> @v_fminimum3_v3f16(<3 x half> %a, <3 x half> %b, <3 x half> %c ; GFX12-NEXT: v_pk_minimum_f16 v1, v5, v1 ; GFX12-NEXT: s_setpc_b64 s[30:31] ; +; GFX1170-LABEL: v_fminimum3_v3f16: +; GFX1170: ; %bb.0: +; GFX1170-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1170-NEXT: v_pk_minimum_f16 v0, v0, v2 +; GFX1170-NEXT: v_pk_minimum_f16 v1, v1, v3 +; GFX1170-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) +; GFX1170-NEXT: v_pk_minimum_f16 v0, v4, v0 +; GFX1170-NEXT: v_pk_minimum_f16 v1, v5, v1 +; GFX1170-NEXT: s_setpc_b64 s[30:31] +; ; GFX942-LABEL: v_fminimum3_v3f16: ; GFX942: ; %bb.0: ; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -2426,6 +2901,16 @@ define <3 x half> @v_fminimum3_v3f16_commute(<3 x half> %a, <3 x half> %b, <3 x ; GFX12-NEXT: v_pk_minimum_f16 v1, v1, v5 ; GFX12-NEXT: s_setpc_b64 s[30:31] ; +; GFX1170-LABEL: v_fminimum3_v3f16_commute: +; GFX1170: ; %bb.0: +; GFX1170-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1170-NEXT: v_pk_minimum_f16 v0, v0, v2 +; GFX1170-NEXT: v_pk_minimum_f16 v1, v1, v3 +; GFX1170-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) +; GFX1170-NEXT: v_pk_minimum_f16 v0, v0, v4 +; GFX1170-NEXT: v_pk_minimum_f16 v1, v1, v5 +; GFX1170-NEXT: s_setpc_b64 s[30:31] +; ; GFX942-LABEL: v_fminimum3_v3f16_commute: ; GFX942: ; %bb.0: ; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -2495,6 +2980,23 @@ define <3 x half> @v_fminimum3_v3f16__fabs_all(<3 x half> %a, <3 x half> %b, <3 ; GFX12-NEXT: v_pk_minimum_f16 v1, v1, v5 ; GFX12-NEXT: s_setpc_b64 s[30:31] ; +; GFX1170-LABEL: v_fminimum3_v3f16__fabs_all: +; GFX1170: ; %bb.0: +; GFX1170-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1170-NEXT: v_and_b32_e32 v0, 0x7fff7fff, v0 +; GFX1170-NEXT: v_and_b32_e32 v1, 0x7fff7fff, v1 +; GFX1170-NEXT: v_and_b32_e32 v2, 0x7fff7fff, v2 +; GFX1170-NEXT: v_and_b32_e32 v3, 0x7fff7fff, v3 +; GFX1170-NEXT: v_and_b32_e32 v5, 0x7fff7fff, v5 +; GFX1170-NEXT: v_and_b32_e32 v4, 0x7fff7fff, v4 +; GFX1170-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4) +; GFX1170-NEXT: v_pk_minimum_f16 v0, v0, v2 +; GFX1170-NEXT: v_pk_minimum_f16 v1, v1, v3 +; GFX1170-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) +; GFX1170-NEXT: v_pk_minimum_f16 v0, v0, v4 +; GFX1170-NEXT: v_pk_minimum_f16 v1, v1, v5 +; GFX1170-NEXT: s_setpc_b64 s[30:31] +; ; GFX942-LABEL: v_fminimum3_v3f16__fabs_all: ; GFX942: ; %bb.0: ; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -2571,6 +3073,16 @@ define <3 x half> @v_fminimum3_v3f16__fneg_all(<3 x half> %a, <3 x half> %b, <3 ; GFX12-NEXT: v_pk_minimum_f16 v1, v1, v5 neg_lo:[0,1] neg_hi:[0,1] ; GFX12-NEXT: s_setpc_b64 s[30:31] ; +; GFX1170-LABEL: v_fminimum3_v3f16__fneg_all: +; GFX1170: ; %bb.0: +; GFX1170-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1170-NEXT: v_pk_minimum_f16 v0, v0, v2 neg_lo:[1,1] neg_hi:[1,1] +; GFX1170-NEXT: v_pk_minimum_f16 v1, v1, v3 neg_lo:[1,1] neg_hi:[1,1] +; GFX1170-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) +; GFX1170-NEXT: v_pk_minimum_f16 v0, v0, v4 neg_lo:[0,1] neg_hi:[0,1] +; GFX1170-NEXT: v_pk_minimum_f16 v1, v1, v5 neg_lo:[0,1] neg_hi:[0,1] +; GFX1170-NEXT: s_setpc_b64 s[30:31] +; ; GFX942-LABEL: v_fminimum3_v3f16__fneg_all: ; GFX942: ; %bb.0: ; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -2636,6 +3148,16 @@ define <3 x half> @v_fminimum3_v3f16__inlineimm1(<3 x half> %a, <3 x half> %c) { ; GFX12-NEXT: v_pk_minimum_f16 v1, v1, v3 ; GFX12-NEXT: s_setpc_b64 s[30:31] ; +; GFX1170-LABEL: v_fminimum3_v3f16__inlineimm1: +; GFX1170: ; %bb.0: +; GFX1170-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1170-NEXT: v_pk_minimum_f16 v0, v0, 2.0 op_sel_hi:[1,0] +; GFX1170-NEXT: v_pk_minimum_f16 v1, v1, 2.0 +; GFX1170-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) +; GFX1170-NEXT: v_pk_minimum_f16 v0, v0, v2 +; GFX1170-NEXT: v_pk_minimum_f16 v1, v1, v3 +; GFX1170-NEXT: s_setpc_b64 s[30:31] +; ; GFX942-LABEL: v_fminimum3_v3f16__inlineimm1: ; GFX942: ; %bb.0: ; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -2693,6 +3215,16 @@ define <3 x half> @v_fminimum3_v3f16__inlineimm2(<3 x half> %a, <3 x half> %b) { ; GFX12-NEXT: v_pk_minimum_f16 v1, v1, 4.0 ; GFX12-NEXT: s_setpc_b64 s[30:31] ; +; GFX1170-LABEL: v_fminimum3_v3f16__inlineimm2: +; GFX1170: ; %bb.0: +; GFX1170-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1170-NEXT: v_pk_minimum_f16 v0, v0, v2 +; GFX1170-NEXT: v_pk_minimum_f16 v1, v1, v3 +; GFX1170-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) +; GFX1170-NEXT: v_pk_minimum_f16 v0, v0, 4.0 op_sel_hi:[1,0] +; GFX1170-NEXT: v_pk_minimum_f16 v1, v1, 4.0 +; GFX1170-NEXT: s_setpc_b64 s[30:31] +; ; GFX942-LABEL: v_fminimum3_v3f16__inlineimm2: ; GFX942: ; %bb.0: ; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -2755,6 +3287,16 @@ define <4 x half> @v_fminimum3_v4f16(<4 x half> %a, <4 x half> %b, <4 x half> %c ; GFX12-NEXT: v_pk_minimum_f16 v1, v5, v1 ; GFX12-NEXT: s_setpc_b64 s[30:31] ; +; GFX1170-LABEL: v_fminimum3_v4f16: +; GFX1170: ; %bb.0: +; GFX1170-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1170-NEXT: v_pk_minimum_f16 v0, v0, v2 +; GFX1170-NEXT: v_pk_minimum_f16 v1, v1, v3 +; GFX1170-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) +; GFX1170-NEXT: v_pk_minimum_f16 v0, v4, v0 +; GFX1170-NEXT: v_pk_minimum_f16 v1, v5, v1 +; GFX1170-NEXT: s_setpc_b64 s[30:31] +; ; GFX942-LABEL: v_fminimum3_v4f16: ; GFX942: ; %bb.0: ; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -2820,6 +3362,16 @@ define <4 x half> @v_fminimum3_v4f16_commute(<4 x half> %a, <4 x half> %b, <4 x ; GFX12-NEXT: v_pk_minimum_f16 v1, v1, v5 ; GFX12-NEXT: s_setpc_b64 s[30:31] ; +; GFX1170-LABEL: v_fminimum3_v4f16_commute: +; GFX1170: ; %bb.0: +; GFX1170-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1170-NEXT: v_pk_minimum_f16 v0, v0, v2 +; GFX1170-NEXT: v_pk_minimum_f16 v1, v1, v3 +; GFX1170-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) +; GFX1170-NEXT: v_pk_minimum_f16 v0, v0, v4 +; GFX1170-NEXT: v_pk_minimum_f16 v1, v1, v5 +; GFX1170-NEXT: s_setpc_b64 s[30:31] +; ; GFX942-LABEL: v_fminimum3_v4f16_commute: ; GFX942: ; %bb.0: ; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -2892,6 +3444,23 @@ define <4 x half> @v_fminimum3_v4f16__fabs_all(<4 x half> %a, <4 x half> %b, <4 ; GFX12-NEXT: v_pk_minimum_f16 v1, v1, v5 ; GFX12-NEXT: s_setpc_b64 s[30:31] ; +; GFX1170-LABEL: v_fminimum3_v4f16__fabs_all: +; GFX1170: ; %bb.0: +; GFX1170-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1170-NEXT: v_and_b32_e32 v0, 0x7fff7fff, v0 +; GFX1170-NEXT: v_and_b32_e32 v1, 0x7fff7fff, v1 +; GFX1170-NEXT: v_and_b32_e32 v2, 0x7fff7fff, v2 +; GFX1170-NEXT: v_and_b32_e32 v3, 0x7fff7fff, v3 +; GFX1170-NEXT: v_and_b32_e32 v5, 0x7fff7fff, v5 +; GFX1170-NEXT: v_and_b32_e32 v4, 0x7fff7fff, v4 +; GFX1170-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4) +; GFX1170-NEXT: v_pk_minimum_f16 v0, v0, v2 +; GFX1170-NEXT: v_pk_minimum_f16 v1, v1, v3 +; GFX1170-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) +; GFX1170-NEXT: v_pk_minimum_f16 v0, v0, v4 +; GFX1170-NEXT: v_pk_minimum_f16 v1, v1, v5 +; GFX1170-NEXT: s_setpc_b64 s[30:31] +; ; GFX942-LABEL: v_fminimum3_v4f16__fabs_all: ; GFX942: ; %bb.0: ; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -2969,6 +3538,16 @@ define <4 x half> @v_fminimum3_v4f16__fneg_all(<4 x half> %a, <4 x half> %b, <4 ; GFX12-NEXT: v_pk_minimum_f16 v1, v1, v5 neg_lo:[0,1] neg_hi:[0,1] ; GFX12-NEXT: s_setpc_b64 s[30:31] ; +; GFX1170-LABEL: v_fminimum3_v4f16__fneg_all: +; GFX1170: ; %bb.0: +; GFX1170-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1170-NEXT: v_pk_minimum_f16 v0, v0, v2 neg_lo:[1,1] neg_hi:[1,1] +; GFX1170-NEXT: v_pk_minimum_f16 v1, v1, v3 neg_lo:[1,1] neg_hi:[1,1] +; GFX1170-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) +; GFX1170-NEXT: v_pk_minimum_f16 v0, v0, v4 neg_lo:[0,1] neg_hi:[0,1] +; GFX1170-NEXT: v_pk_minimum_f16 v1, v1, v5 neg_lo:[0,1] neg_hi:[0,1] +; GFX1170-NEXT: s_setpc_b64 s[30:31] +; ; GFX942-LABEL: v_fminimum3_v4f16__fneg_all: ; GFX942: ; %bb.0: ; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -3037,6 +3616,16 @@ define <4 x half> @v_fminimum3_v4f16__inlineimm1(<4 x half> %a, <4 x half> %c) { ; GFX12-NEXT: v_pk_minimum_f16 v1, v1, v3 ; GFX12-NEXT: s_setpc_b64 s[30:31] ; +; GFX1170-LABEL: v_fminimum3_v4f16__inlineimm1: +; GFX1170: ; %bb.0: +; GFX1170-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1170-NEXT: v_pk_minimum_f16 v0, v0, 2.0 op_sel_hi:[1,0] +; GFX1170-NEXT: v_pk_minimum_f16 v1, v1, 2.0 op_sel_hi:[1,0] +; GFX1170-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) +; GFX1170-NEXT: v_pk_minimum_f16 v0, v0, v2 +; GFX1170-NEXT: v_pk_minimum_f16 v1, v1, v3 +; GFX1170-NEXT: s_setpc_b64 s[30:31] +; ; GFX942-LABEL: v_fminimum3_v4f16__inlineimm1: ; GFX942: ; %bb.0: ; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -3099,6 +3688,16 @@ define <4 x half> @v_fminimum3_v4f16__inlineimm2(<4 x half> %a, <4 x half> %b) { ; GFX12-NEXT: v_pk_minimum_f16 v1, v1, 4.0 op_sel_hi:[1,0] ; GFX12-NEXT: s_setpc_b64 s[30:31] ; +; GFX1170-LABEL: v_fminimum3_v4f16__inlineimm2: +; GFX1170: ; %bb.0: +; GFX1170-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1170-NEXT: v_pk_minimum_f16 v0, v0, v2 +; GFX1170-NEXT: v_pk_minimum_f16 v1, v1, v3 +; GFX1170-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) +; GFX1170-NEXT: v_pk_minimum_f16 v0, v0, 4.0 op_sel_hi:[1,0] +; GFX1170-NEXT: v_pk_minimum_f16 v1, v1, 4.0 op_sel_hi:[1,0] +; GFX1170-NEXT: s_setpc_b64 s[30:31] +; ; GFX942-LABEL: v_fminimum3_v4f16__inlineimm2: ; GFX942: ; %bb.0: ; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -3162,6 +3761,14 @@ define double @v_fminimum3_f64(double %a, double %b, double %c) { ; GFX12-NEXT: v_minimum_f64 v[0:1], v[0:1], v[4:5] ; GFX12-NEXT: s_setpc_b64 s[30:31] ; +; GFX1170-LABEL: v_fminimum3_f64: +; GFX1170: ; %bb.0: +; GFX1170-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1170-NEXT: v_minimum_f64 v[0:1], v[0:1], v[2:3] +; GFX1170-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX1170-NEXT: v_minimum_f64 v[0:1], v[0:1], v[4:5] +; GFX1170-NEXT: s_setpc_b64 s[30:31] +; ; GFX9-LABEL: v_fminimum3_f64: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -3195,6 +3802,14 @@ define double @v_fminimum3_f64_commute(double %a, double %b, double %c) { ; GFX12-NEXT: v_minimum_f64 v[0:1], v[4:5], v[0:1] ; GFX12-NEXT: s_setpc_b64 s[30:31] ; +; GFX1170-LABEL: v_fminimum3_f64_commute: +; GFX1170: ; %bb.0: +; GFX1170-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1170-NEXT: v_minimum_f64 v[0:1], v[0:1], v[2:3] +; GFX1170-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX1170-NEXT: v_minimum_f64 v[0:1], v[4:5], v[0:1] +; GFX1170-NEXT: s_setpc_b64 s[30:31] +; ; GFX9-LABEL: v_fminimum3_f64_commute: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -3227,6 +3842,16 @@ define amdgpu_ps <2 x i32> @s_fminimum3_f64(double inreg %a, double inreg %b, do ; GFX12-NEXT: s_wait_alu depctr_va_sdst(0) ; GFX12-NEXT: ; return to shader part epilog ; +; GFX1170-LABEL: s_fminimum3_f64: +; GFX1170: ; %bb.0: +; GFX1170-NEXT: v_minimum_f64 v[0:1], s[0:1], s[2:3] +; GFX1170-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX1170-NEXT: v_minimum_f64 v[0:1], v[0:1], s[4:5] +; GFX1170-NEXT: v_readfirstlane_b32 s0, v0 +; GFX1170-NEXT: s_delay_alu instid0(VALU_DEP_2) +; GFX1170-NEXT: v_readfirstlane_b32 s1, v1 +; GFX1170-NEXT: ; return to shader part epilog +; ; GFX9-LABEL: s_fminimum3_f64: ; GFX9: ; %bb.0: ; GFX9-NEXT: v_mov_b64_e32 v[0:1], s[2:3] @@ -3269,6 +3894,14 @@ define double @v_fminimum3_f64_fabs0(double %a, double %b, double %c) { ; GFX12-NEXT: v_minimum_f64 v[0:1], v[0:1], v[4:5] ; GFX12-NEXT: s_setpc_b64 s[30:31] ; +; GFX1170-LABEL: v_fminimum3_f64_fabs0: +; GFX1170: ; %bb.0: +; GFX1170-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1170-NEXT: v_minimum_f64 v[0:1], |v[0:1]|, v[2:3] +; GFX1170-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX1170-NEXT: v_minimum_f64 v[0:1], v[0:1], v[4:5] +; GFX1170-NEXT: s_setpc_b64 s[30:31] +; ; GFX9-LABEL: v_fminimum3_f64_fabs0: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -3303,6 +3936,14 @@ define double @v_fminimum3_f64_fabs1(double %a, double %b, double %c) { ; GFX12-NEXT: v_minimum_f64 v[0:1], v[0:1], v[4:5] ; GFX12-NEXT: s_setpc_b64 s[30:31] ; +; GFX1170-LABEL: v_fminimum3_f64_fabs1: +; GFX1170: ; %bb.0: +; GFX1170-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1170-NEXT: v_minimum_f64 v[0:1], v[0:1], |v[2:3]| +; GFX1170-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX1170-NEXT: v_minimum_f64 v[0:1], v[0:1], v[4:5] +; GFX1170-NEXT: s_setpc_b64 s[30:31] +; ; GFX9-LABEL: v_fminimum3_f64_fabs1: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -3337,6 +3978,14 @@ define double @v_fminimum3_f64_fabs2(double %a, double %b, double %c) { ; GFX12-NEXT: v_minimum_f64 v[0:1], v[0:1], |v[4:5]| ; GFX12-NEXT: s_setpc_b64 s[30:31] ; +; GFX1170-LABEL: v_fminimum3_f64_fabs2: +; GFX1170: ; %bb.0: +; GFX1170-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1170-NEXT: v_minimum_f64 v[0:1], v[0:1], v[2:3] +; GFX1170-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX1170-NEXT: v_minimum_f64 v[0:1], v[0:1], |v[4:5]| +; GFX1170-NEXT: s_setpc_b64 s[30:31] +; ; GFX9-LABEL: v_fminimum3_f64_fabs2: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -3371,6 +4020,14 @@ define double @v_fminimum3_f64_fabs_all(double %a, double %b, double %c) { ; GFX12-NEXT: v_minimum_f64 v[0:1], v[0:1], |v[4:5]| ; GFX12-NEXT: s_setpc_b64 s[30:31] ; +; GFX1170-LABEL: v_fminimum3_f64_fabs_all: +; GFX1170: ; %bb.0: +; GFX1170-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1170-NEXT: v_minimum_f64 v[0:1], |v[0:1]|, |v[2:3]| +; GFX1170-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX1170-NEXT: v_minimum_f64 v[0:1], v[0:1], |v[4:5]| +; GFX1170-NEXT: s_setpc_b64 s[30:31] +; ; GFX9-LABEL: v_fminimum3_f64_fabs_all: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -3407,6 +4064,14 @@ define double @v_fminimum3_f64_fneg_all(double %a, double %b, double %c) { ; GFX12-NEXT: v_minimum_f64 v[0:1], v[0:1], -v[4:5] ; GFX12-NEXT: s_setpc_b64 s[30:31] ; +; GFX1170-LABEL: v_fminimum3_f64_fneg_all: +; GFX1170: ; %bb.0: +; GFX1170-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1170-NEXT: v_minimum_f64 v[0:1], -v[0:1], -v[2:3] +; GFX1170-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX1170-NEXT: v_minimum_f64 v[0:1], v[0:1], -v[4:5] +; GFX1170-NEXT: s_setpc_b64 s[30:31] +; ; GFX9-LABEL: v_fminimum3_f64_fneg_all: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -3443,6 +4108,14 @@ define double @v_fminimum3_f64_fneg_fabs_all(double %a, double %b, double %c) { ; GFX12-NEXT: v_minimum_f64 v[0:1], v[0:1], -|v[4:5]| ; GFX12-NEXT: s_setpc_b64 s[30:31] ; +; GFX1170-LABEL: v_fminimum3_f64_fneg_fabs_all: +; GFX1170: ; %bb.0: +; GFX1170-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1170-NEXT: v_minimum_f64 v[0:1], -|v[0:1]|, -|v[2:3]| +; GFX1170-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX1170-NEXT: v_minimum_f64 v[0:1], v[0:1], -|v[4:5]| +; GFX1170-NEXT: s_setpc_b64 s[30:31] +; ; GFX9-LABEL: v_fminimum3_f64_fneg_fabs_all: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -3482,6 +4155,14 @@ define double @v_fminimum3_f64_fneg0(double %a, double %b, double %c) { ; GFX12-NEXT: v_minimum_f64 v[0:1], v[0:1], v[4:5] ; GFX12-NEXT: s_setpc_b64 s[30:31] ; +; GFX1170-LABEL: v_fminimum3_f64_fneg0: +; GFX1170: ; %bb.0: +; GFX1170-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1170-NEXT: v_minimum_f64 v[0:1], -v[0:1], v[2:3] +; GFX1170-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX1170-NEXT: v_minimum_f64 v[0:1], v[0:1], v[4:5] +; GFX1170-NEXT: s_setpc_b64 s[30:31] +; ; GFX9-LABEL: v_fminimum3_f64_fneg0: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -3516,6 +4197,14 @@ define double @v_fminimum3_f64_fneg1(double %a, double %b, double %c) { ; GFX12-NEXT: v_minimum_f64 v[0:1], v[0:1], v[4:5] ; GFX12-NEXT: s_setpc_b64 s[30:31] ; +; GFX1170-LABEL: v_fminimum3_f64_fneg1: +; GFX1170: ; %bb.0: +; GFX1170-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1170-NEXT: v_minimum_f64 v[0:1], v[0:1], -v[2:3] +; GFX1170-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX1170-NEXT: v_minimum_f64 v[0:1], v[0:1], v[4:5] +; GFX1170-NEXT: s_setpc_b64 s[30:31] +; ; GFX9-LABEL: v_fminimum3_f64_fneg1: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -3550,6 +4239,14 @@ define double @v_fminimum3_f64_fneg2(double %a, double %b, double %c) { ; GFX12-NEXT: v_minimum_f64 v[0:1], v[0:1], -v[4:5] ; GFX12-NEXT: s_setpc_b64 s[30:31] ; +; GFX1170-LABEL: v_fminimum3_f64_fneg2: +; GFX1170: ; %bb.0: +; GFX1170-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1170-NEXT: v_minimum_f64 v[0:1], v[0:1], v[2:3] +; GFX1170-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX1170-NEXT: v_minimum_f64 v[0:1], v[0:1], -v[4:5] +; GFX1170-NEXT: s_setpc_b64 s[30:31] +; ; GFX9-LABEL: v_fminimum3_f64_fneg2: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -3584,6 +4281,14 @@ define double @v_fminimum3_f64_const0(double %b, double %c) { ; GFX12-NEXT: v_minimum_f64 v[0:1], v[0:1], v[2:3] ; GFX12-NEXT: s_setpc_b64 s[30:31] ; +; GFX1170-LABEL: v_fminimum3_f64_const0: +; GFX1170: ; %bb.0: +; GFX1170-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1170-NEXT: v_minimum_f64 v[0:1], 0x40200000, v[0:1] +; GFX1170-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX1170-NEXT: v_minimum_f64 v[0:1], v[0:1], v[2:3] +; GFX1170-NEXT: s_setpc_b64 s[30:31] +; ; GFX9-LABEL: v_fminimum3_f64_const0: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -3619,6 +4324,14 @@ define double @v_fminimum3_f64__const2(double %a, double %b) { ; GFX12-NEXT: v_minimum_f64 v[0:1], 0x40200000, v[0:1] ; GFX12-NEXT: s_setpc_b64 s[30:31] ; +; GFX1170-LABEL: v_fminimum3_f64__const2: +; GFX1170: ; %bb.0: +; GFX1170-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1170-NEXT: v_minimum_f64 v[0:1], v[0:1], v[2:3] +; GFX1170-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX1170-NEXT: v_minimum_f64 v[0:1], 0x40200000, v[0:1] +; GFX1170-NEXT: s_setpc_b64 s[30:31] +; ; GFX9-LABEL: v_fminimum3_f64__const2: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -3653,6 +4366,14 @@ define double @v_fminimum3_f64_inlineimm0(double %b, double %c) { ; GFX12-NEXT: v_minimum_f64 v[0:1], v[0:1], v[2:3] ; GFX12-NEXT: s_setpc_b64 s[30:31] ; +; GFX1170-LABEL: v_fminimum3_f64_inlineimm0: +; GFX1170: ; %bb.0: +; GFX1170-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1170-NEXT: v_minimum_f64 v[0:1], v[0:1], 4.0 +; GFX1170-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX1170-NEXT: v_minimum_f64 v[0:1], v[0:1], v[2:3] +; GFX1170-NEXT: s_setpc_b64 s[30:31] +; ; GFX9-LABEL: v_fminimum3_f64_inlineimm0: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -3686,6 +4407,14 @@ define double @v_fminimum3_f64__inlineimm(double %a, double %b) { ; GFX12-NEXT: v_minimum_f64 v[0:1], v[0:1], 4.0 ; GFX12-NEXT: s_setpc_b64 s[30:31] ; +; GFX1170-LABEL: v_fminimum3_f64__inlineimm: +; GFX1170: ; %bb.0: +; GFX1170-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1170-NEXT: v_minimum_f64 v[0:1], v[0:1], v[2:3] +; GFX1170-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX1170-NEXT: v_minimum_f64 v[0:1], v[0:1], 4.0 +; GFX1170-NEXT: s_setpc_b64 s[30:31] +; ; GFX9-LABEL: v_fminimum3_f64__inlineimm: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -3719,6 +4448,14 @@ define double @v_fminimum3_f64_const1_const2(double %a) { ; GFX12-NEXT: v_minimum_f64 v[0:1], 0x40300000, v[0:1] ; GFX12-NEXT: s_setpc_b64 s[30:31] ; +; GFX1170-LABEL: v_fminimum3_f64_const1_const2: +; GFX1170: ; %bb.0: +; GFX1170-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1170-NEXT: v_minimum_f64 v[0:1], 0x40200000, v[0:1] +; GFX1170-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX1170-NEXT: v_minimum_f64 v[0:1], 0x40300000, v[0:1] +; GFX1170-NEXT: s_setpc_b64 s[30:31] +; ; GFX9-LABEL: v_fminimum3_f64_const1_const2: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -3755,6 +4492,14 @@ define <2 x float> @v_no_fminimum3_f32__multi_use(float %a, float %b, float %c) ; GFX12-NEXT: v_minimum_f32 v1, v0, v2 ; GFX12-NEXT: s_setpc_b64 s[30:31] ; +; GFX1170-LABEL: v_no_fminimum3_f32__multi_use: +; GFX1170: ; %bb.0: +; GFX1170-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1170-NEXT: v_minimum_f32 v0, v0, v1 +; GFX1170-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX1170-NEXT: v_minimum_f32 v1, v0, v2 +; GFX1170-NEXT: s_setpc_b64 s[30:31] +; ; GFX942-LABEL: v_no_fminimum3_f32__multi_use: ; GFX942: ; %bb.0: ; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -3790,6 +4535,15 @@ define amdgpu_ps <2 x i32> @s_no_fminimum3_f32__multi_use(float inreg %a, float ; GFX12-NEXT: s_minimum_f32 s1, s0, s2 ; GFX12-NEXT: ; return to shader part epilog ; +; GFX1170-LABEL: s_no_fminimum3_f32__multi_use: +; GFX1170: ; %bb.0: +; GFX1170-NEXT: v_minimum_f32 v0, s0, s1 +; GFX1170-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2) +; GFX1170-NEXT: v_minimum_f32 v1, v0, s2 +; GFX1170-NEXT: v_readfirstlane_b32 s0, v0 +; GFX1170-NEXT: v_readfirstlane_b32 s1, v1 +; GFX1170-NEXT: ; return to shader part epilog +; ; GFX942-LABEL: s_no_fminimum3_f32__multi_use: ; GFX942: ; %bb.0: ; GFX942-NEXT: v_mov_b32_e32 v0, s1 @@ -3852,6 +4606,23 @@ define <2 x half> @v_no_fminimum3_f16__multi_use(half %a, half %b, half %c) { ; GFX12-FAKE16-NEXT: v_pack_b32_f16 v0, v0, v1 ; GFX12-FAKE16-NEXT: s_setpc_b64 s[30:31] ; +; GFX1170-TRUE16-LABEL: v_no_fminimum3_f16__multi_use: +; GFX1170-TRUE16: ; %bb.0: +; GFX1170-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1170-TRUE16-NEXT: v_minimum_f16 v0.l, v0.l, v1.l +; GFX1170-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX1170-TRUE16-NEXT: v_minimum_f16 v0.h, v0.l, v2.l +; GFX1170-TRUE16-NEXT: s_setpc_b64 s[30:31] +; +; GFX1170-FAKE16-LABEL: v_no_fminimum3_f16__multi_use: +; GFX1170-FAKE16: ; %bb.0: +; GFX1170-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1170-FAKE16-NEXT: v_minimum_f16 v0, v0, v1 +; GFX1170-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX1170-FAKE16-NEXT: v_minimum_f16 v1, v0, v2 +; GFX1170-FAKE16-NEXT: v_pack_b32_f16 v0, v0, v1 +; GFX1170-FAKE16-NEXT: s_setpc_b64 s[30:31] +; ; GFX942-LABEL: v_no_fminimum3_f16__multi_use: ; GFX942: ; %bb.0: ; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -3893,6 +4664,30 @@ define amdgpu_ps <2 x i32> @s_no_fminimum3_f16__multi_use(half inreg %a, half in ; GFX12-NEXT: s_and_b32 s1, 0xffff, s1 ; GFX12-NEXT: ; return to shader part epilog ; +; GFX1170-TRUE16-LABEL: s_no_fminimum3_f16__multi_use: +; GFX1170-TRUE16: ; %bb.0: +; GFX1170-TRUE16-NEXT: v_minimum_f16 v0.l, s0, s1 +; GFX1170-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2) +; GFX1170-TRUE16-NEXT: v_minimum_f16 v1.l, v0.l, s2 +; GFX1170-TRUE16-NEXT: v_and_b32_e32 v0, 0xffff, v0 +; GFX1170-TRUE16-NEXT: v_and_b32_e32 v1, 0xffff, v1 +; GFX1170-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) +; GFX1170-TRUE16-NEXT: v_readfirstlane_b32 s0, v0 +; GFX1170-TRUE16-NEXT: v_readfirstlane_b32 s1, v1 +; GFX1170-TRUE16-NEXT: ; return to shader part epilog +; +; GFX1170-FAKE16-LABEL: s_no_fminimum3_f16__multi_use: +; GFX1170-FAKE16: ; %bb.0: +; GFX1170-FAKE16-NEXT: v_minimum_f16 v0, s0, s1 +; GFX1170-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2) +; GFX1170-FAKE16-NEXT: v_minimum_f16 v1, v0, s2 +; GFX1170-FAKE16-NEXT: v_and_b32_e32 v0, 0xffff, v0 +; GFX1170-FAKE16-NEXT: v_and_b32_e32 v1, 0xffff, v1 +; GFX1170-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) +; GFX1170-FAKE16-NEXT: v_readfirstlane_b32 s0, v0 +; GFX1170-FAKE16-NEXT: v_readfirstlane_b32 s1, v1 +; GFX1170-FAKE16-NEXT: ; return to shader part epilog +; ; GFX942-LABEL: s_no_fminimum3_f16__multi_use: ; GFX942: ; %bb.0: ; GFX942-NEXT: v_mov_b32_e32 v0, s1 @@ -3948,6 +4743,14 @@ define <4 x half> @v_no_fminimum3_v2f16__multi_use(<2 x half> %a, <2 x half> %b, ; GFX12-NEXT: v_pk_minimum_f16 v1, v0, v2 ; GFX12-NEXT: s_setpc_b64 s[30:31] ; +; GFX1170-LABEL: v_no_fminimum3_v2f16__multi_use: +; GFX1170: ; %bb.0: +; GFX1170-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1170-NEXT: v_pk_minimum_f16 v0, v0, v1 +; GFX1170-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX1170-NEXT: v_pk_minimum_f16 v1, v0, v2 +; GFX1170-NEXT: s_setpc_b64 s[30:31] +; ; GFX942-LABEL: v_no_fminimum3_v2f16__multi_use: ; GFX942: ; %bb.0: ; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -3997,6 +4800,14 @@ define <2 x double> @v_no_fminimum3_f64__multi_use(double %a, double %b, double ; GFX12-NEXT: v_minimum_f64 v[2:3], v[0:1], v[4:5] ; GFX12-NEXT: s_setpc_b64 s[30:31] ; +; GFX1170-LABEL: v_no_fminimum3_f64__multi_use: +; GFX1170: ; %bb.0: +; GFX1170-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1170-NEXT: v_minimum_f64 v[0:1], v[0:1], v[2:3] +; GFX1170-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX1170-NEXT: v_minimum_f64 v[2:3], v[0:1], v[4:5] +; GFX1170-NEXT: s_setpc_b64 s[30:31] +; ; GFX9-LABEL: v_no_fminimum3_f64__multi_use: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) diff --git a/llvm/test/CodeGen/AMDGPU/llvm.maximum.f16.ll b/llvm/test/CodeGen/AMDGPU/llvm.maximum.f16.ll index 48f6c96df139..f192ee28dcec 100644 --- a/llvm/test/CodeGen/AMDGPU/llvm.maximum.f16.ll +++ b/llvm/test/CodeGen/AMDGPU/llvm.maximum.f16.ll @@ -1,13 +1,15 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx703 < %s | FileCheck -check-prefixes=GCN,GFX7 %s -; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx803 < %s | FileCheck -check-prefixes=GCN,GFX8 %s -; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 < %s | FileCheck -check-prefixes=GCN,GFX9,GFX900 %s -; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx950 < %s | FileCheck -check-prefixes=GCN,GFX9,GFX950 %s -; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1030 < %s | FileCheck -check-prefixes=GCN,GFX10 %s -; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1100 -mattr=+real-true16 < %s | FileCheck -check-prefixes=GCN,GFX11,GFX11-TRUE16 %s -; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1100 -mattr=-real-true16 < %s | FileCheck -check-prefixes=GCN,GFX11,GFX11-FAKE16 %s -; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1200 -mattr=+real-true16 < %s | FileCheck -check-prefixes=GCN,GFX12,GFX12-TRUE16 %s -; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1200 -mattr=-real-true16 < %s | FileCheck -check-prefixes=GCN,GFX12,GFX12-FAKE16 %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx703 < %s | FileCheck -check-prefixes=GFX7 %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx803 < %s | FileCheck -check-prefixes=GFX8 %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 < %s | FileCheck -check-prefixes=GFX9,GFX900 %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx950 < %s | FileCheck -check-prefixes=GFX9,GFX950 %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1030 < %s | FileCheck -check-prefixes=GFX10 %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1100 -mattr=+real-true16 < %s | FileCheck -check-prefixes=GFX11,GFX11-TRUE16 %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1100 -mattr=-real-true16 < %s | FileCheck -check-prefixes=GFX11,GFX11-FAKE16 %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1170 -mattr=+real-true16 < %s | FileCheck -check-prefixes=GFX1170,GFX1170-TRUE16 %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1170 -mattr=-real-true16 < %s | FileCheck -check-prefixes=GFX1170,GFX1170-FAKE16 %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1200 -mattr=+real-true16 < %s | FileCheck -check-prefixes=GFX12,GFX12-TRUE16 %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1200 -mattr=-real-true16 < %s | FileCheck -check-prefixes=GFX12,GFX12-FAKE16 %s define half @v_maximum_f16(half %src0, half %src1) { ; GFX7-LABEL: v_maximum_f16: @@ -72,6 +74,18 @@ define half @v_maximum_f16(half %src0, half %src1) { ; GFX11-FAKE16-NEXT: v_cndmask_b32_e32 v0, 0x7e00, v2, vcc_lo ; GFX11-FAKE16-NEXT: s_setpc_b64 s[30:31] ; +; GFX1170-TRUE16-LABEL: v_maximum_f16: +; GFX1170-TRUE16: ; %bb.0: +; GFX1170-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1170-TRUE16-NEXT: v_maximum_f16 v0.l, v0.l, v1.l +; GFX1170-TRUE16-NEXT: s_setpc_b64 s[30:31] +; +; GFX1170-FAKE16-LABEL: v_maximum_f16: +; GFX1170-FAKE16: ; %bb.0: +; GFX1170-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1170-FAKE16-NEXT: v_maximum_f16 v0, v0, v1 +; GFX1170-FAKE16-NEXT: s_setpc_b64 s[30:31] +; ; GFX12-TRUE16-LABEL: v_maximum_f16: ; GFX12-TRUE16: ; %bb.0: ; GFX12-TRUE16-NEXT: s_wait_loadcnt_dscnt 0x0 @@ -135,6 +149,18 @@ define half @v_maximum_f16__nnan(half %src0, half %src1) { ; GFX11-FAKE16-NEXT: v_max_f16_e32 v0, v0, v1 ; GFX11-FAKE16-NEXT: s_setpc_b64 s[30:31] ; +; GFX1170-TRUE16-LABEL: v_maximum_f16__nnan: +; GFX1170-TRUE16: ; %bb.0: +; GFX1170-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1170-TRUE16-NEXT: v_maximum_f16 v0.l, v0.l, v1.l +; GFX1170-TRUE16-NEXT: s_setpc_b64 s[30:31] +; +; GFX1170-FAKE16-LABEL: v_maximum_f16__nnan: +; GFX1170-FAKE16: ; %bb.0: +; GFX1170-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1170-FAKE16-NEXT: v_maximum_f16 v0, v0, v1 +; GFX1170-FAKE16-NEXT: s_setpc_b64 s[30:31] +; ; GFX12-TRUE16-LABEL: v_maximum_f16__nnan: ; GFX12-TRUE16: ; %bb.0: ; GFX12-TRUE16-NEXT: s_wait_loadcnt_dscnt 0x0 @@ -221,6 +247,18 @@ define half @v_maximum_f16__nsz(half %src0, half %src1) { ; GFX11-FAKE16-NEXT: v_cndmask_b32_e32 v0, 0x7e00, v2, vcc_lo ; GFX11-FAKE16-NEXT: s_setpc_b64 s[30:31] ; +; GFX1170-TRUE16-LABEL: v_maximum_f16__nsz: +; GFX1170-TRUE16: ; %bb.0: +; GFX1170-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1170-TRUE16-NEXT: v_maximum_f16 v0.l, v0.l, v1.l +; GFX1170-TRUE16-NEXT: s_setpc_b64 s[30:31] +; +; GFX1170-FAKE16-LABEL: v_maximum_f16__nsz: +; GFX1170-FAKE16: ; %bb.0: +; GFX1170-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1170-FAKE16-NEXT: v_maximum_f16 v0, v0, v1 +; GFX1170-FAKE16-NEXT: s_setpc_b64 s[30:31] +; ; GFX12-TRUE16-LABEL: v_maximum_f16__nsz: ; GFX12-TRUE16: ; %bb.0: ; GFX12-TRUE16-NEXT: s_wait_loadcnt_dscnt 0x0 @@ -284,6 +322,18 @@ define half @v_maximum_f16__nnan_nsz(half %src0, half %src1) { ; GFX11-FAKE16-NEXT: v_max_f16_e32 v0, v0, v1 ; GFX11-FAKE16-NEXT: s_setpc_b64 s[30:31] ; +; GFX1170-TRUE16-LABEL: v_maximum_f16__nnan_nsz: +; GFX1170-TRUE16: ; %bb.0: +; GFX1170-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1170-TRUE16-NEXT: v_maximum_f16 v0.l, v0.l, v1.l +; GFX1170-TRUE16-NEXT: s_setpc_b64 s[30:31] +; +; GFX1170-FAKE16-LABEL: v_maximum_f16__nnan_nsz: +; GFX1170-FAKE16: ; %bb.0: +; GFX1170-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1170-FAKE16-NEXT: v_maximum_f16 v0, v0, v1 +; GFX1170-FAKE16-NEXT: s_setpc_b64 s[30:31] +; ; GFX12-TRUE16-LABEL: v_maximum_f16__nnan_nsz: ; GFX12-TRUE16: ; %bb.0: ; GFX12-TRUE16-NEXT: s_wait_loadcnt_dscnt 0x0 @@ -379,6 +429,22 @@ define half @v_maximum_f16__nnan_src0(half %arg0, half %src1) { ; GFX11-FAKE16-NEXT: v_cndmask_b32_e32 v0, 0x7e00, v2, vcc_lo ; GFX11-FAKE16-NEXT: s_setpc_b64 s[30:31] ; +; GFX1170-TRUE16-LABEL: v_maximum_f16__nnan_src0: +; GFX1170-TRUE16: ; %bb.0: +; GFX1170-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1170-TRUE16-NEXT: v_add_f16_e32 v0.l, 1.0, v0.l +; GFX1170-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX1170-TRUE16-NEXT: v_maximum_f16 v0.l, v0.l, v1.l +; GFX1170-TRUE16-NEXT: s_setpc_b64 s[30:31] +; +; GFX1170-FAKE16-LABEL: v_maximum_f16__nnan_src0: +; GFX1170-FAKE16: ; %bb.0: +; GFX1170-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1170-FAKE16-NEXT: v_add_f16_e32 v0, 1.0, v0 +; GFX1170-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX1170-FAKE16-NEXT: v_maximum_f16 v0, v0, v1 +; GFX1170-FAKE16-NEXT: s_setpc_b64 s[30:31] +; ; GFX12-TRUE16-LABEL: v_maximum_f16__nnan_src0: ; GFX12-TRUE16: ; %bb.0: ; GFX12-TRUE16-NEXT: s_wait_loadcnt_dscnt 0x0 @@ -479,6 +545,22 @@ define half @v_maximum_f16__nnan_src1(half %src0, half %arg1) { ; GFX11-FAKE16-NEXT: v_cndmask_b32_e32 v0, 0x7e00, v2, vcc_lo ; GFX11-FAKE16-NEXT: s_setpc_b64 s[30:31] ; +; GFX1170-TRUE16-LABEL: v_maximum_f16__nnan_src1: +; GFX1170-TRUE16: ; %bb.0: +; GFX1170-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1170-TRUE16-NEXT: v_add_f16_e32 v1.l, 1.0, v1.l +; GFX1170-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX1170-TRUE16-NEXT: v_maximum_f16 v0.l, v0.l, v1.l +; GFX1170-TRUE16-NEXT: s_setpc_b64 s[30:31] +; +; GFX1170-FAKE16-LABEL: v_maximum_f16__nnan_src1: +; GFX1170-FAKE16: ; %bb.0: +; GFX1170-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1170-FAKE16-NEXT: v_add_f16_e32 v1, 1.0, v1 +; GFX1170-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX1170-FAKE16-NEXT: v_maximum_f16 v0, v0, v1 +; GFX1170-FAKE16-NEXT: s_setpc_b64 s[30:31] +; ; GFX12-TRUE16-LABEL: v_maximum_f16__nnan_src1: ; GFX12-TRUE16: ; %bb.0: ; GFX12-TRUE16-NEXT: s_wait_loadcnt_dscnt 0x0 @@ -601,6 +683,28 @@ define void @s_maximum_f16(half inreg %src0, half inreg %src1) { ; GFX11-FAKE16-NEXT: ;;#ASMEND ; GFX11-FAKE16-NEXT: s_setpc_b64 s[30:31] ; +; GFX1170-TRUE16-LABEL: s_maximum_f16: +; GFX1170-TRUE16: ; %bb.0: +; GFX1170-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1170-TRUE16-NEXT: v_maximum_f16 v0.l, s0, s1 +; GFX1170-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX1170-TRUE16-NEXT: v_and_b32_e32 v0, 0xffff, v0 +; GFX1170-TRUE16-NEXT: ;;#ASMSTART +; GFX1170-TRUE16-NEXT: ; use v0 +; GFX1170-TRUE16-NEXT: ;;#ASMEND +; GFX1170-TRUE16-NEXT: s_setpc_b64 s[30:31] +; +; GFX1170-FAKE16-LABEL: s_maximum_f16: +; GFX1170-FAKE16: ; %bb.0: +; GFX1170-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1170-FAKE16-NEXT: v_maximum_f16 v0, s0, s1 +; GFX1170-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX1170-FAKE16-NEXT: v_and_b32_e32 v0, 0xffff, v0 +; GFX1170-FAKE16-NEXT: ;;#ASMSTART +; GFX1170-FAKE16-NEXT: ; use v0 +; GFX1170-FAKE16-NEXT: ;;#ASMEND +; GFX1170-FAKE16-NEXT: s_setpc_b64 s[30:31] +; ; GFX12-LABEL: s_maximum_f16: ; GFX12: ; %bb.0: ; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 @@ -720,6 +824,12 @@ define <2 x half> @v_maximum_v2f16(<2 x half> %src0, <2 x half> %src1) { ; GFX11-FAKE16-NEXT: v_perm_b32 v0, v1, v0, 0x5040100 ; GFX11-FAKE16-NEXT: s_setpc_b64 s[30:31] ; +; GFX1170-LABEL: v_maximum_v2f16: +; GFX1170: ; %bb.0: +; GFX1170-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1170-NEXT: v_pk_maximum_f16 v0, v0, v1 +; GFX1170-NEXT: s_setpc_b64 s[30:31] +; ; GFX12-LABEL: v_maximum_v2f16: ; GFX12: ; %bb.0: ; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 @@ -777,6 +887,12 @@ define <2 x half> @v_maximum_v2f16__nnan(<2 x half> %src0, <2 x half> %src1) { ; GFX11-NEXT: v_pk_max_f16 v0, v0, v1 ; GFX11-NEXT: s_setpc_b64 s[30:31] ; +; GFX1170-LABEL: v_maximum_v2f16__nnan: +; GFX1170: ; %bb.0: +; GFX1170-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1170-NEXT: v_pk_maximum_f16 v0, v0, v1 +; GFX1170-NEXT: s_setpc_b64 s[30:31] +; ; GFX12-LABEL: v_maximum_v2f16__nnan: ; GFX12: ; %bb.0: ; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 @@ -886,6 +1002,12 @@ define <2 x half> @v_maximum_v2f16__nsz(<2 x half> %src0, <2 x half> %src1) { ; GFX11-FAKE16-NEXT: v_perm_b32 v0, v1, v0, 0x5040100 ; GFX11-FAKE16-NEXT: s_setpc_b64 s[30:31] ; +; GFX1170-LABEL: v_maximum_v2f16__nsz: +; GFX1170: ; %bb.0: +; GFX1170-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1170-NEXT: v_pk_maximum_f16 v0, v0, v1 +; GFX1170-NEXT: s_setpc_b64 s[30:31] +; ; GFX12-LABEL: v_maximum_v2f16__nsz: ; GFX12: ; %bb.0: ; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 @@ -943,6 +1065,12 @@ define <2 x half> @v_maximum_v2f16__nnan_nsz(<2 x half> %src0, <2 x half> %src1) ; GFX11-NEXT: v_pk_max_f16 v0, v0, v1 ; GFX11-NEXT: s_setpc_b64 s[30:31] ; +; GFX1170-LABEL: v_maximum_v2f16__nnan_nsz: +; GFX1170: ; %bb.0: +; GFX1170-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1170-NEXT: v_pk_maximum_f16 v0, v0, v1 +; GFX1170-NEXT: s_setpc_b64 s[30:31] +; ; GFX12-LABEL: v_maximum_v2f16__nnan_nsz: ; GFX12: ; %bb.0: ; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 @@ -1089,6 +1217,15 @@ define void @s_maximum_v2f16(<2 x half> inreg %src0, <2 x half> inreg %src1) { ; GFX11-FAKE16-NEXT: ;;#ASMEND ; GFX11-FAKE16-NEXT: s_setpc_b64 s[30:31] ; +; GFX1170-LABEL: s_maximum_v2f16: +; GFX1170: ; %bb.0: +; GFX1170-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1170-NEXT: v_pk_maximum_f16 v0, s0, s1 +; GFX1170-NEXT: ;;#ASMSTART +; GFX1170-NEXT: ; use v0 +; GFX1170-NEXT: ;;#ASMEND +; GFX1170-NEXT: s_setpc_b64 s[30:31] +; ; GFX12-LABEL: s_maximum_v2f16: ; GFX12: ; %bb.0: ; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 @@ -1227,6 +1364,13 @@ define <3 x half> @v_maximum_v3f16(<3 x half> %src0, <3 x half> %src1) { ; GFX11-FAKE16-NEXT: v_cndmask_b32_e32 v1, 0x7e00, v4, vcc_lo ; GFX11-FAKE16-NEXT: s_setpc_b64 s[30:31] ; +; GFX1170-LABEL: v_maximum_v3f16: +; GFX1170: ; %bb.0: +; GFX1170-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1170-NEXT: v_pk_maximum_f16 v0, v0, v2 +; GFX1170-NEXT: v_pk_maximum_f16 v1, v1, v3 +; GFX1170-NEXT: s_setpc_b64 s[30:31] +; ; GFX12-LABEL: v_maximum_v3f16: ; GFX12: ; %bb.0: ; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 @@ -1293,6 +1437,13 @@ define <3 x half> @v_maximum_v3f16__nnan(<3 x half> %src0, <3 x half> %src1) { ; GFX11-NEXT: v_pk_max_f16 v1, v1, v3 ; GFX11-NEXT: s_setpc_b64 s[30:31] ; +; GFX1170-LABEL: v_maximum_v3f16__nnan: +; GFX1170: ; %bb.0: +; GFX1170-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1170-NEXT: v_pk_maximum_f16 v0, v0, v2 +; GFX1170-NEXT: v_pk_maximum_f16 v1, v1, v3 +; GFX1170-NEXT: s_setpc_b64 s[30:31] +; ; GFX12-LABEL: v_maximum_v3f16__nnan: ; GFX12: ; %bb.0: ; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 @@ -1427,6 +1578,13 @@ define <3 x half> @v_maximum_v3f16__nsz(<3 x half> %src0, <3 x half> %src1) { ; GFX11-FAKE16-NEXT: v_cndmask_b32_e32 v1, 0x7e00, v4, vcc_lo ; GFX11-FAKE16-NEXT: s_setpc_b64 s[30:31] ; +; GFX1170-LABEL: v_maximum_v3f16__nsz: +; GFX1170: ; %bb.0: +; GFX1170-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1170-NEXT: v_pk_maximum_f16 v0, v0, v2 +; GFX1170-NEXT: v_pk_maximum_f16 v1, v1, v3 +; GFX1170-NEXT: s_setpc_b64 s[30:31] +; ; GFX12-LABEL: v_maximum_v3f16__nsz: ; GFX12: ; %bb.0: ; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 @@ -1493,6 +1651,13 @@ define <3 x half> @v_maximum_v3f16__nnan_nsz(<3 x half> %src0, <3 x half> %src1) ; GFX11-NEXT: v_pk_max_f16 v1, v1, v3 ; GFX11-NEXT: s_setpc_b64 s[30:31] ; +; GFX1170-LABEL: v_maximum_v3f16__nnan_nsz: +; GFX1170: ; %bb.0: +; GFX1170-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1170-NEXT: v_pk_maximum_f16 v0, v0, v2 +; GFX1170-NEXT: v_pk_maximum_f16 v1, v1, v3 +; GFX1170-NEXT: s_setpc_b64 s[30:31] +; ; GFX12-LABEL: v_maximum_v3f16__nnan_nsz: ; GFX12: ; %bb.0: ; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 @@ -1658,6 +1823,13 @@ define <4 x half> @v_maximum_v4f16(<4 x half> %src0, <4 x half> %src1) { ; GFX11-FAKE16-NEXT: v_perm_b32 v1, v3, v1, 0x5040100 ; GFX11-FAKE16-NEXT: s_setpc_b64 s[30:31] ; +; GFX1170-LABEL: v_maximum_v4f16: +; GFX1170: ; %bb.0: +; GFX1170-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1170-NEXT: v_pk_maximum_f16 v0, v0, v2 +; GFX1170-NEXT: v_pk_maximum_f16 v1, v1, v3 +; GFX1170-NEXT: s_setpc_b64 s[30:31] +; ; GFX12-LABEL: v_maximum_v4f16: ; GFX12: ; %bb.0: ; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 @@ -1734,6 +1906,13 @@ define <4 x half> @v_maximum_v4f16__nnan(<4 x half> %src0, <4 x half> %src1) { ; GFX11-NEXT: v_pk_max_f16 v1, v1, v3 ; GFX11-NEXT: s_setpc_b64 s[30:31] ; +; GFX1170-LABEL: v_maximum_v4f16__nnan: +; GFX1170: ; %bb.0: +; GFX1170-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1170-NEXT: v_pk_maximum_f16 v0, v0, v2 +; GFX1170-NEXT: v_pk_maximum_f16 v1, v1, v3 +; GFX1170-NEXT: s_setpc_b64 s[30:31] +; ; GFX12-LABEL: v_maximum_v4f16__nnan: ; GFX12: ; %bb.0: ; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 @@ -1899,6 +2078,13 @@ define <4 x half> @v_maximum_v4f16__nsz(<4 x half> %src0, <4 x half> %src1) { ; GFX11-FAKE16-NEXT: v_perm_b32 v1, v3, v1, 0x5040100 ; GFX11-FAKE16-NEXT: s_setpc_b64 s[30:31] ; +; GFX1170-LABEL: v_maximum_v4f16__nsz: +; GFX1170: ; %bb.0: +; GFX1170-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1170-NEXT: v_pk_maximum_f16 v0, v0, v2 +; GFX1170-NEXT: v_pk_maximum_f16 v1, v1, v3 +; GFX1170-NEXT: s_setpc_b64 s[30:31] +; ; GFX12-LABEL: v_maximum_v4f16__nsz: ; GFX12: ; %bb.0: ; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 @@ -1975,6 +2161,13 @@ define <4 x half> @v_maximum_v4f16__nnan_nsz(<4 x half> %src0, <4 x half> %src1) ; GFX11-NEXT: v_pk_max_f16 v1, v1, v3 ; GFX11-NEXT: s_setpc_b64 s[30:31] ; +; GFX1170-LABEL: v_maximum_v4f16__nnan_nsz: +; GFX1170: ; %bb.0: +; GFX1170-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1170-NEXT: v_pk_maximum_f16 v0, v0, v2 +; GFX1170-NEXT: v_pk_maximum_f16 v1, v1, v3 +; GFX1170-NEXT: s_setpc_b64 s[30:31] +; ; GFX12-LABEL: v_maximum_v4f16__nnan_nsz: ; GFX12: ; %bb.0: ; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 @@ -2251,6 +2444,15 @@ define <8 x half> @v_maximum_v8f16(<8 x half> %src0, <8 x half> %src1) { ; GFX11-FAKE16-NEXT: v_perm_b32 v3, v3, v9, 0x5040100 ; GFX11-FAKE16-NEXT: s_setpc_b64 s[30:31] ; +; GFX1170-LABEL: v_maximum_v8f16: +; GFX1170: ; %bb.0: +; GFX1170-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1170-NEXT: v_pk_maximum_f16 v0, v0, v4 +; GFX1170-NEXT: v_pk_maximum_f16 v1, v1, v5 +; GFX1170-NEXT: v_pk_maximum_f16 v2, v2, v6 +; GFX1170-NEXT: v_pk_maximum_f16 v3, v3, v7 +; GFX1170-NEXT: s_setpc_b64 s[30:31] +; ; GFX12-LABEL: v_maximum_v8f16: ; GFX12: ; %bb.0: ; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 @@ -2755,6 +2957,19 @@ define <16 x half> @v_maximum_v16f16(<16 x half> %src0, <16 x half> %src1) { ; GFX11-FAKE16-NEXT: v_perm_b32 v4, v4, v14, 0x5040100 ; GFX11-FAKE16-NEXT: s_setpc_b64 s[30:31] ; +; GFX1170-LABEL: v_maximum_v16f16: +; GFX1170: ; %bb.0: +; GFX1170-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1170-NEXT: v_pk_maximum_f16 v0, v0, v8 +; GFX1170-NEXT: v_pk_maximum_f16 v1, v1, v9 +; GFX1170-NEXT: v_pk_maximum_f16 v2, v2, v10 +; GFX1170-NEXT: v_pk_maximum_f16 v3, v3, v11 +; GFX1170-NEXT: v_pk_maximum_f16 v4, v4, v12 +; GFX1170-NEXT: v_pk_maximum_f16 v5, v5, v13 +; GFX1170-NEXT: v_pk_maximum_f16 v6, v6, v14 +; GFX1170-NEXT: v_pk_maximum_f16 v7, v7, v15 +; GFX1170-NEXT: s_setpc_b64 s[30:31] +; ; GFX12-LABEL: v_maximum_v16f16: ; GFX12: ; %bb.0: ; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 @@ -2774,5 +2989,3 @@ define <16 x half> @v_maximum_v16f16(<16 x half> %src0, <16 x half> %src1) { %op = call <16 x half> @llvm.maximum.v16f16(<16 x half> %src0, <16 x half> %src1) ret <16 x half> %op } -;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line: -; GCN: {{.*}} diff --git a/llvm/test/CodeGen/AMDGPU/llvm.maximum.f32.ll b/llvm/test/CodeGen/AMDGPU/llvm.maximum.f32.ll index 97eafd07d4b3..505e8c36b56d 100644 --- a/llvm/test/CodeGen/AMDGPU/llvm.maximum.f32.ll +++ b/llvm/test/CodeGen/AMDGPU/llvm.maximum.f32.ll @@ -1,12 +1,12 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx703 < %s | FileCheck -check-prefixes=GCN,GFX7 %s -; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx803 < %s | FileCheck -check-prefixes=GCN,GFX8 %s -; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 < %s | FileCheck -check-prefixes=GCN,GFX9 %s -; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 < %s | FileCheck -check-prefixes=GCN,GFX9,GFX900 %s -; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx950 < %s | FileCheck -check-prefixes=GCN,GFX9,GFX950 %s -; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1030 < %s | FileCheck -check-prefixes=GCN,GFX10 %s -; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1100 < %s | FileCheck -check-prefixes=GCN,GFX11 %s -; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1200 < %s | FileCheck -check-prefixes=GCN,GFX12 %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx703 < %s | FileCheck -check-prefixes=GFX7 %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx803 < %s | FileCheck -check-prefixes=GFX8 %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 < %s | FileCheck -check-prefixes=GFX9,GFX900 %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx950 < %s | FileCheck -check-prefixes=GFX9,GFX950 %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1030 < %s | FileCheck -check-prefixes=GFX10 %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1100 < %s | FileCheck -check-prefixes=GFX11 %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1170 < %s | FileCheck -check-prefixes=GFX1170 %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1200 < %s | FileCheck -check-prefixes=GFX12 %s define float @v_maximum_f32(float %src0, float %src1) { ; GFX7-LABEL: v_maximum_f32: @@ -59,6 +59,12 @@ define float @v_maximum_f32(float %src0, float %src1) { ; GFX11-NEXT: v_cndmask_b32_e32 v0, 0x7fc00000, v2, vcc_lo ; GFX11-NEXT: s_setpc_b64 s[30:31] ; +; GFX1170-LABEL: v_maximum_f32: +; GFX1170: ; %bb.0: +; GFX1170-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1170-NEXT: v_maximum_f32 v0, v0, v1 +; GFX1170-NEXT: s_setpc_b64 s[30:31] +; ; GFX12-LABEL: v_maximum_f32: ; GFX12: ; %bb.0: ; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 @@ -103,6 +109,12 @@ define float @v_maximum_f32__nnan(float %src0, float %src1) { ; GFX11-NEXT: v_max_f32_e32 v0, v0, v1 ; GFX11-NEXT: s_setpc_b64 s[30:31] ; +; GFX1170-LABEL: v_maximum_f32__nnan: +; GFX1170: ; %bb.0: +; GFX1170-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1170-NEXT: v_maximum_f32 v0, v0, v1 +; GFX1170-NEXT: s_setpc_b64 s[30:31] +; ; GFX12-LABEL: v_maximum_f32__nnan: ; GFX12: ; %bb.0: ; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 @@ -167,6 +179,12 @@ define float @v_maximum_f32__nsz(float %src0, float %src1) { ; GFX11-NEXT: v_cndmask_b32_e32 v0, 0x7fc00000, v2, vcc_lo ; GFX11-NEXT: s_setpc_b64 s[30:31] ; +; GFX1170-LABEL: v_maximum_f32__nsz: +; GFX1170: ; %bb.0: +; GFX1170-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1170-NEXT: v_maximum_f32 v0, v0, v1 +; GFX1170-NEXT: s_setpc_b64 s[30:31] +; ; GFX12-LABEL: v_maximum_f32__nsz: ; GFX12: ; %bb.0: ; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 @@ -211,6 +229,12 @@ define float @v_maximum_f32__nnan_nsz(float %src0, float %src1) { ; GFX11-NEXT: v_max_f32_e32 v0, v0, v1 ; GFX11-NEXT: s_setpc_b64 s[30:31] ; +; GFX1170-LABEL: v_maximum_f32__nnan_nsz: +; GFX1170: ; %bb.0: +; GFX1170-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1170-NEXT: v_maximum_f32 v0, v0, v1 +; GFX1170-NEXT: s_setpc_b64 s[30:31] +; ; GFX12-LABEL: v_maximum_f32__nnan_nsz: ; GFX12: ; %bb.0: ; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 @@ -281,6 +305,14 @@ define float @v_maximum_f32__nnan_src0(float %arg0, float %src1) { ; GFX11-NEXT: v_cndmask_b32_e32 v0, 0x7fc00000, v2, vcc_lo ; GFX11-NEXT: s_setpc_b64 s[30:31] ; +; GFX1170-LABEL: v_maximum_f32__nnan_src0: +; GFX1170: ; %bb.0: +; GFX1170-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1170-NEXT: v_add_f32_e32 v0, 1.0, v0 +; GFX1170-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX1170-NEXT: v_maximum_f32 v0, v0, v1 +; GFX1170-NEXT: s_setpc_b64 s[30:31] +; ; GFX12-LABEL: v_maximum_f32__nnan_src0: ; GFX12: ; %bb.0: ; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 @@ -354,6 +386,14 @@ define float @v_maximum_f32__nnan_src1(float %src0, float %arg1) { ; GFX11-NEXT: v_cndmask_b32_e32 v0, 0x7fc00000, v2, vcc_lo ; GFX11-NEXT: s_setpc_b64 s[30:31] ; +; GFX1170-LABEL: v_maximum_f32__nnan_src1: +; GFX1170: ; %bb.0: +; GFX1170-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1170-NEXT: v_add_f32_e32 v1, 1.0, v1 +; GFX1170-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX1170-NEXT: v_maximum_f32 v0, v0, v1 +; GFX1170-NEXT: s_setpc_b64 s[30:31] +; ; GFX12-LABEL: v_maximum_f32__nnan_src1: ; GFX12: ; %bb.0: ; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 @@ -443,6 +483,15 @@ define void @s_maximum_f32(float inreg %src0, float inreg %src1) { ; GFX11-NEXT: ;;#ASMEND ; GFX11-NEXT: s_setpc_b64 s[30:31] ; +; GFX1170-LABEL: s_maximum_f32: +; GFX1170: ; %bb.0: +; GFX1170-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1170-NEXT: v_maximum_f32 v0, s0, s1 +; GFX1170-NEXT: ;;#ASMSTART +; GFX1170-NEXT: ; use v0 +; GFX1170-NEXT: ;;#ASMEND +; GFX1170-NEXT: s_setpc_b64 s[30:31] +; ; GFX12-LABEL: s_maximum_f32: ; GFX12: ; %bb.0: ; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 @@ -527,6 +576,13 @@ define <2 x float> @v_maximum_v2f32(<2 x float> %src0, <2 x float> %src1) { ; GFX11-NEXT: v_cndmask_b32_e32 v1, 0x7fc00000, v5, vcc_lo ; GFX11-NEXT: s_setpc_b64 s[30:31] ; +; GFX1170-LABEL: v_maximum_v2f32: +; GFX1170: ; %bb.0: +; GFX1170-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1170-NEXT: v_maximum_f32 v0, v0, v2 +; GFX1170-NEXT: v_maximum_f32 v1, v1, v3 +; GFX1170-NEXT: s_setpc_b64 s[30:31] +; ; GFX12-LABEL: v_maximum_v2f32: ; GFX12: ; %bb.0: ; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 @@ -576,6 +632,13 @@ define <2 x float> @v_maximum_v2f32__nnan(<2 x float> %src0, <2 x float> %src1) ; GFX11-NEXT: v_dual_max_f32 v0, v0, v2 :: v_dual_max_f32 v1, v1, v3 ; GFX11-NEXT: s_setpc_b64 s[30:31] ; +; GFX1170-LABEL: v_maximum_v2f32__nnan: +; GFX1170: ; %bb.0: +; GFX1170-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1170-NEXT: v_maximum_f32 v0, v0, v2 +; GFX1170-NEXT: v_maximum_f32 v1, v1, v3 +; GFX1170-NEXT: s_setpc_b64 s[30:31] +; ; GFX12-LABEL: v_maximum_v2f32__nnan: ; GFX12: ; %bb.0: ; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 @@ -656,6 +719,13 @@ define <2 x float> @v_maximum_v2f32__nsz(<2 x float> %src0, <2 x float> %src1) { ; GFX11-NEXT: v_cndmask_b32_e32 v1, 0x7fc00000, v5, vcc_lo ; GFX11-NEXT: s_setpc_b64 s[30:31] ; +; GFX1170-LABEL: v_maximum_v2f32__nsz: +; GFX1170: ; %bb.0: +; GFX1170-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1170-NEXT: v_maximum_f32 v0, v0, v2 +; GFX1170-NEXT: v_maximum_f32 v1, v1, v3 +; GFX1170-NEXT: s_setpc_b64 s[30:31] +; ; GFX12-LABEL: v_maximum_v2f32__nsz: ; GFX12: ; %bb.0: ; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 @@ -705,6 +775,13 @@ define <2 x float> @v_maximum_v2f32__nnan_nsz(<2 x float> %src0, <2 x float> %sr ; GFX11-NEXT: v_dual_max_f32 v0, v0, v2 :: v_dual_max_f32 v1, v1, v3 ; GFX11-NEXT: s_setpc_b64 s[30:31] ; +; GFX1170-LABEL: v_maximum_v2f32__nnan_nsz: +; GFX1170: ; %bb.0: +; GFX1170-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1170-NEXT: v_maximum_f32 v0, v0, v2 +; GFX1170-NEXT: v_maximum_f32 v1, v1, v3 +; GFX1170-NEXT: s_setpc_b64 s[30:31] +; ; GFX12-LABEL: v_maximum_v2f32__nnan_nsz: ; GFX12: ; %bb.0: ; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 @@ -812,6 +889,16 @@ define void @s_maximum_v2f32(<2 x float> inreg %src0, <2 x float> inreg %src1) { ; GFX11-NEXT: ;;#ASMEND ; GFX11-NEXT: s_setpc_b64 s[30:31] ; +; GFX1170-LABEL: s_maximum_v2f32: +; GFX1170: ; %bb.0: +; GFX1170-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1170-NEXT: v_maximum_f32 v1, s1, s3 +; GFX1170-NEXT: v_maximum_f32 v0, s0, s2 +; GFX1170-NEXT: ;;#ASMSTART +; GFX1170-NEXT: ; use v[0:1] +; GFX1170-NEXT: ;;#ASMEND +; GFX1170-NEXT: s_setpc_b64 s[30:31] +; ; GFX12-LABEL: s_maximum_v2f32: ; GFX12: ; %bb.0: ; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 @@ -913,6 +1000,14 @@ define <3 x float> @v_maximum_v3f32(<3 x float> %src0, <3 x float> %src1) { ; GFX11-NEXT: v_cndmask_b32_e32 v2, 0x7fc00000, v8, vcc_lo ; GFX11-NEXT: s_setpc_b64 s[30:31] ; +; GFX1170-LABEL: v_maximum_v3f32: +; GFX1170: ; %bb.0: +; GFX1170-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1170-NEXT: v_maximum_f32 v0, v0, v3 +; GFX1170-NEXT: v_maximum_f32 v1, v1, v4 +; GFX1170-NEXT: v_maximum_f32 v2, v2, v5 +; GFX1170-NEXT: s_setpc_b64 s[30:31] +; ; GFX12-LABEL: v_maximum_v3f32: ; GFX12: ; %bb.0: ; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 @@ -968,6 +1063,14 @@ define <3 x float> @v_maximum_v3f32__nnan(<3 x float> %src0, <3 x float> %src1) ; GFX11-NEXT: v_max_f32_e32 v2, v2, v5 ; GFX11-NEXT: s_setpc_b64 s[30:31] ; +; GFX1170-LABEL: v_maximum_v3f32__nnan: +; GFX1170: ; %bb.0: +; GFX1170-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1170-NEXT: v_maximum_f32 v0, v0, v3 +; GFX1170-NEXT: v_maximum_f32 v1, v1, v4 +; GFX1170-NEXT: v_maximum_f32 v2, v2, v5 +; GFX1170-NEXT: s_setpc_b64 s[30:31] +; ; GFX12-LABEL: v_maximum_v3f32__nnan: ; GFX12: ; %bb.0: ; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 @@ -1065,6 +1168,14 @@ define <3 x float> @v_maximum_v3f32__nsz(<3 x float> %src0, <3 x float> %src1) { ; GFX11-NEXT: v_cndmask_b32_e32 v2, 0x7fc00000, v8, vcc_lo ; GFX11-NEXT: s_setpc_b64 s[30:31] ; +; GFX1170-LABEL: v_maximum_v3f32__nsz: +; GFX1170: ; %bb.0: +; GFX1170-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1170-NEXT: v_maximum_f32 v0, v0, v3 +; GFX1170-NEXT: v_maximum_f32 v1, v1, v4 +; GFX1170-NEXT: v_maximum_f32 v2, v2, v5 +; GFX1170-NEXT: s_setpc_b64 s[30:31] +; ; GFX12-LABEL: v_maximum_v3f32__nsz: ; GFX12: ; %bb.0: ; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 @@ -1120,6 +1231,14 @@ define <3 x float> @v_maximum_v3f32__nnan_nsz(<3 x float> %src0, <3 x float> %sr ; GFX11-NEXT: v_max_f32_e32 v2, v2, v5 ; GFX11-NEXT: s_setpc_b64 s[30:31] ; +; GFX1170-LABEL: v_maximum_v3f32__nnan_nsz: +; GFX1170: ; %bb.0: +; GFX1170-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1170-NEXT: v_maximum_f32 v0, v0, v3 +; GFX1170-NEXT: v_maximum_f32 v1, v1, v4 +; GFX1170-NEXT: v_maximum_f32 v2, v2, v5 +; GFX1170-NEXT: s_setpc_b64 s[30:31] +; ; GFX12-LABEL: v_maximum_v3f32__nnan_nsz: ; GFX12: ; %bb.0: ; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 @@ -1233,6 +1352,15 @@ define <4 x float> @v_maximum_v4f32(<4 x float> %src0, <4 x float> %src1) { ; GFX11-NEXT: v_cndmask_b32_e32 v3, 0x7fc00000, v8, vcc_lo ; GFX11-NEXT: s_setpc_b64 s[30:31] ; +; GFX1170-LABEL: v_maximum_v4f32: +; GFX1170: ; %bb.0: +; GFX1170-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1170-NEXT: v_maximum_f32 v0, v0, v4 +; GFX1170-NEXT: v_maximum_f32 v1, v1, v5 +; GFX1170-NEXT: v_maximum_f32 v2, v2, v6 +; GFX1170-NEXT: v_maximum_f32 v3, v3, v7 +; GFX1170-NEXT: s_setpc_b64 s[30:31] +; ; GFX12-LABEL: v_maximum_v4f32: ; GFX12: ; %bb.0: ; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 @@ -1293,6 +1421,15 @@ define <4 x float> @v_maximum_v4f32__nnan(<4 x float> %src0, <4 x float> %src1) ; GFX11-NEXT: v_dual_max_f32 v2, v2, v6 :: v_dual_max_f32 v3, v3, v7 ; GFX11-NEXT: s_setpc_b64 s[30:31] ; +; GFX1170-LABEL: v_maximum_v4f32__nnan: +; GFX1170: ; %bb.0: +; GFX1170-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1170-NEXT: v_maximum_f32 v0, v0, v4 +; GFX1170-NEXT: v_maximum_f32 v1, v1, v5 +; GFX1170-NEXT: v_maximum_f32 v2, v2, v6 +; GFX1170-NEXT: v_maximum_f32 v3, v3, v7 +; GFX1170-NEXT: s_setpc_b64 s[30:31] +; ; GFX12-LABEL: v_maximum_v4f32__nnan: ; GFX12: ; %bb.0: ; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 @@ -1407,6 +1544,15 @@ define <4 x float> @v_maximum_v4f32__nsz(<4 x float> %src0, <4 x float> %src1) { ; GFX11-NEXT: v_cndmask_b32_e32 v3, 0x7fc00000, v8, vcc_lo ; GFX11-NEXT: s_setpc_b64 s[30:31] ; +; GFX1170-LABEL: v_maximum_v4f32__nsz: +; GFX1170: ; %bb.0: +; GFX1170-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1170-NEXT: v_maximum_f32 v0, v0, v4 +; GFX1170-NEXT: v_maximum_f32 v1, v1, v5 +; GFX1170-NEXT: v_maximum_f32 v2, v2, v6 +; GFX1170-NEXT: v_maximum_f32 v3, v3, v7 +; GFX1170-NEXT: s_setpc_b64 s[30:31] +; ; GFX12-LABEL: v_maximum_v4f32__nsz: ; GFX12: ; %bb.0: ; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 @@ -1467,6 +1613,15 @@ define <4 x float> @v_maximum_v4f32__nnan_nsz(<4 x float> %src0, <4 x float> %sr ; GFX11-NEXT: v_dual_max_f32 v2, v2, v6 :: v_dual_max_f32 v3, v3, v7 ; GFX11-NEXT: s_setpc_b64 s[30:31] ; +; GFX1170-LABEL: v_maximum_v4f32__nnan_nsz: +; GFX1170: ; %bb.0: +; GFX1170-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1170-NEXT: v_maximum_f32 v0, v0, v4 +; GFX1170-NEXT: v_maximum_f32 v1, v1, v5 +; GFX1170-NEXT: v_maximum_f32 v2, v2, v6 +; GFX1170-NEXT: v_maximum_f32 v3, v3, v7 +; GFX1170-NEXT: s_setpc_b64 s[30:31] +; ; GFX12-LABEL: v_maximum_v4f32__nnan_nsz: ; GFX12: ; %bb.0: ; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 @@ -1643,6 +1798,19 @@ define <8 x float> @v_maximum_v8f32(<8 x float> %src0, <8 x float> %src1) { ; GFX11-NEXT: v_cndmask_b32_e32 v7, 0x7fc00000, v10, vcc_lo ; GFX11-NEXT: s_setpc_b64 s[30:31] ; +; GFX1170-LABEL: v_maximum_v8f32: +; GFX1170: ; %bb.0: +; GFX1170-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1170-NEXT: v_maximum_f32 v0, v0, v8 +; GFX1170-NEXT: v_maximum_f32 v1, v1, v9 +; GFX1170-NEXT: v_maximum_f32 v2, v2, v10 +; GFX1170-NEXT: v_maximum_f32 v3, v3, v11 +; GFX1170-NEXT: v_maximum_f32 v4, v4, v12 +; GFX1170-NEXT: v_maximum_f32 v5, v5, v13 +; GFX1170-NEXT: v_maximum_f32 v6, v6, v14 +; GFX1170-NEXT: v_maximum_f32 v7, v7, v15 +; GFX1170-NEXT: s_setpc_b64 s[30:31] +; ; GFX12-LABEL: v_maximum_v8f32: ; GFX12: ; %bb.0: ; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 @@ -1960,6 +2128,29 @@ define <16 x float> @v_maximum_v16f32(<16 x float> %src0, <16 x float> %src1) { ; GFX11-NEXT: v_cndmask_b32_e32 v15, 0x7fc00000, v16, vcc_lo ; GFX11-NEXT: s_setpc_b64 s[30:31] ; +; GFX1170-LABEL: v_maximum_v16f32: +; GFX1170: ; %bb.0: +; GFX1170-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1170-NEXT: scratch_load_b32 v31, off, s32 +; GFX1170-NEXT: v_maximum_f32 v0, v0, v16 +; GFX1170-NEXT: v_maximum_f32 v1, v1, v17 +; GFX1170-NEXT: v_maximum_f32 v2, v2, v18 +; GFX1170-NEXT: v_maximum_f32 v3, v3, v19 +; GFX1170-NEXT: v_maximum_f32 v4, v4, v20 +; GFX1170-NEXT: v_maximum_f32 v5, v5, v21 +; GFX1170-NEXT: v_maximum_f32 v6, v6, v22 +; GFX1170-NEXT: v_maximum_f32 v7, v7, v23 +; GFX1170-NEXT: v_maximum_f32 v8, v8, v24 +; GFX1170-NEXT: v_maximum_f32 v9, v9, v25 +; GFX1170-NEXT: v_maximum_f32 v10, v10, v26 +; GFX1170-NEXT: v_maximum_f32 v11, v11, v27 +; GFX1170-NEXT: v_maximum_f32 v12, v12, v28 +; GFX1170-NEXT: v_maximum_f32 v13, v13, v29 +; GFX1170-NEXT: v_maximum_f32 v14, v14, v30 +; GFX1170-NEXT: s_waitcnt vmcnt(0) +; GFX1170-NEXT: v_maximum_f32 v15, v15, v31 +; GFX1170-NEXT: s_setpc_b64 s[30:31] +; ; GFX12-LABEL: v_maximum_v16f32: ; GFX12: ; %bb.0: ; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 @@ -1989,5 +2180,3 @@ define <16 x float> @v_maximum_v16f32(<16 x float> %src0, <16 x float> %src1) { %op = call <16 x float> @llvm.maximum.v16f32(<16 x float> %src0, <16 x float> %src1) ret <16 x float> %op } -;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line: -; GCN: {{.*}} diff --git a/llvm/test/CodeGen/AMDGPU/llvm.maximum.f64.ll b/llvm/test/CodeGen/AMDGPU/llvm.maximum.f64.ll index 3280d7aa9ddf..a98f22fdf72f 100644 --- a/llvm/test/CodeGen/AMDGPU/llvm.maximum.f64.ll +++ b/llvm/test/CodeGen/AMDGPU/llvm.maximum.f64.ll @@ -1,12 +1,12 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx703 < %s | FileCheck -check-prefixes=GCN,GFX7 %s -; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx803 < %s | FileCheck -check-prefixes=GCN,GFX8 %s -; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 < %s | FileCheck -check-prefixes=GCN,GFX9 %s -; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 < %s | FileCheck -check-prefixes=GCN,GFX9,GFX900 %s -; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx950 < %s | FileCheck -check-prefixes=GCN,GFX9,GFX950 %s -; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1030 < %s | FileCheck -check-prefixes=GCN,GFX10 %s -; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1100 < %s | FileCheck -check-prefixes=GCN,GFX11 %s -; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1200 < %s | FileCheck -check-prefixes=GCN,GFX12 %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx703 < %s | FileCheck -check-prefixes=GFX7 %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx803 < %s | FileCheck -check-prefixes=GFX8 %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 < %s | FileCheck -check-prefixes=GFX9,GFX900 %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx950 < %s | FileCheck -check-prefixes=GFX9,GFX950 %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1030 < %s | FileCheck -check-prefixes=GFX10 %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1100 < %s | FileCheck -check-prefixes=GFX11 %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1170 < %s | FileCheck -check-prefixes=GFX1170 %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1200 < %s | FileCheck -check-prefixes=GFX12 %s define double @v_maximum_f64(double %src0, double %src1) { ; GFX7-LABEL: v_maximum_f64: @@ -69,6 +69,12 @@ define double @v_maximum_f64(double %src0, double %src1) { ; GFX11-NEXT: v_cndmask_b32_e64 v1, v5, 0x7ff80000, vcc_lo ; GFX11-NEXT: s_setpc_b64 s[30:31] ; +; GFX1170-LABEL: v_maximum_f64: +; GFX1170: ; %bb.0: +; GFX1170-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1170-NEXT: v_maximum_f64 v[0:1], v[0:1], v[2:3] +; GFX1170-NEXT: s_setpc_b64 s[30:31] +; ; GFX12-LABEL: v_maximum_f64: ; GFX12: ; %bb.0: ; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 @@ -113,6 +119,12 @@ define double @v_maximum_f64__nnan(double %src0, double %src1) { ; GFX11-NEXT: v_max_f64 v[0:1], v[0:1], v[2:3] ; GFX11-NEXT: s_setpc_b64 s[30:31] ; +; GFX1170-LABEL: v_maximum_f64__nnan: +; GFX1170: ; %bb.0: +; GFX1170-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1170-NEXT: v_maximum_f64 v[0:1], v[0:1], v[2:3] +; GFX1170-NEXT: s_setpc_b64 s[30:31] +; ; GFX12-LABEL: v_maximum_f64__nnan: ; GFX12: ; %bb.0: ; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 @@ -187,6 +199,12 @@ define double @v_maximum_f64__nsz(double %src0, double %src1) { ; GFX11-NEXT: v_cndmask_b32_e64 v1, v5, 0x7ff80000, vcc_lo ; GFX11-NEXT: s_setpc_b64 s[30:31] ; +; GFX1170-LABEL: v_maximum_f64__nsz: +; GFX1170: ; %bb.0: +; GFX1170-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1170-NEXT: v_maximum_f64 v[0:1], v[0:1], v[2:3] +; GFX1170-NEXT: s_setpc_b64 s[30:31] +; ; GFX12-LABEL: v_maximum_f64__nsz: ; GFX12: ; %bb.0: ; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 @@ -231,6 +249,12 @@ define double @v_maximum_f64__nnan_nsz(double %src0, double %src1) { ; GFX11-NEXT: v_max_f64 v[0:1], v[0:1], v[2:3] ; GFX11-NEXT: s_setpc_b64 s[30:31] ; +; GFX1170-LABEL: v_maximum_f64__nnan_nsz: +; GFX1170: ; %bb.0: +; GFX1170-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1170-NEXT: v_maximum_f64 v[0:1], v[0:1], v[2:3] +; GFX1170-NEXT: s_setpc_b64 s[30:31] +; ; GFX12-LABEL: v_maximum_f64__nnan_nsz: ; GFX12: ; %bb.0: ; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 @@ -312,6 +336,14 @@ define double @v_maximum_f64__nnan_src0(double %arg0, double %src1) { ; GFX11-NEXT: v_cndmask_b32_e64 v1, v5, 0x7ff80000, vcc_lo ; GFX11-NEXT: s_setpc_b64 s[30:31] ; +; GFX1170-LABEL: v_maximum_f64__nnan_src0: +; GFX1170: ; %bb.0: +; GFX1170-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1170-NEXT: v_add_f64 v[0:1], v[0:1], 1.0 +; GFX1170-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX1170-NEXT: v_maximum_f64 v[0:1], v[0:1], v[2:3] +; GFX1170-NEXT: s_setpc_b64 s[30:31] +; ; GFX12-LABEL: v_maximum_f64__nnan_src0: ; GFX12: ; %bb.0: ; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 @@ -396,6 +428,14 @@ define double @v_maximum_f64__nnan_src1(double %src0, double %arg1) { ; GFX11-NEXT: v_cndmask_b32_e64 v1, v5, 0x7ff80000, vcc_lo ; GFX11-NEXT: s_setpc_b64 s[30:31] ; +; GFX1170-LABEL: v_maximum_f64__nnan_src1: +; GFX1170: ; %bb.0: +; GFX1170-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1170-NEXT: v_add_f64 v[2:3], v[2:3], 1.0 +; GFX1170-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX1170-NEXT: v_maximum_f64 v[0:1], v[0:1], v[2:3] +; GFX1170-NEXT: s_setpc_b64 s[30:31] +; ; GFX12-LABEL: v_maximum_f64__nnan_src1: ; GFX12: ; %bb.0: ; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 @@ -498,6 +538,15 @@ define void @s_maximum_f64(double inreg %src0, double inreg %src1) { ; GFX11-NEXT: ;;#ASMEND ; GFX11-NEXT: s_setpc_b64 s[30:31] ; +; GFX1170-LABEL: s_maximum_f64: +; GFX1170: ; %bb.0: +; GFX1170-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1170-NEXT: v_maximum_f64 v[0:1], s[0:1], s[2:3] +; GFX1170-NEXT: ;;#ASMSTART +; GFX1170-NEXT: ; use v[0:1] +; GFX1170-NEXT: ;;#ASMEND +; GFX1170-NEXT: s_setpc_b64 s[30:31] +; ; GFX12-LABEL: s_maximum_f64: ; GFX12: ; %bb.0: ; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 @@ -601,6 +650,13 @@ define <2 x double> @v_maximum_v2f64(<2 x double> %src0, <2 x double> %src1) { ; GFX11-NEXT: v_cndmask_b32_e64 v3, v5, 0x7ff80000, s0 ; GFX11-NEXT: s_setpc_b64 s[30:31] ; +; GFX1170-LABEL: v_maximum_v2f64: +; GFX1170: ; %bb.0: +; GFX1170-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1170-NEXT: v_maximum_f64 v[0:1], v[0:1], v[4:5] +; GFX1170-NEXT: v_maximum_f64 v[2:3], v[2:3], v[6:7] +; GFX1170-NEXT: s_setpc_b64 s[30:31] +; ; GFX12-LABEL: v_maximum_v2f64: ; GFX12: ; %bb.0: ; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 @@ -651,6 +707,13 @@ define <2 x double> @v_maximum_v2f64__nnan(<2 x double> %src0, <2 x double> %src ; GFX11-NEXT: v_max_f64 v[2:3], v[2:3], v[6:7] ; GFX11-NEXT: s_setpc_b64 s[30:31] ; +; GFX1170-LABEL: v_maximum_v2f64__nnan: +; GFX1170: ; %bb.0: +; GFX1170-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1170-NEXT: v_maximum_f64 v[0:1], v[0:1], v[4:5] +; GFX1170-NEXT: v_maximum_f64 v[2:3], v[2:3], v[6:7] +; GFX1170-NEXT: s_setpc_b64 s[30:31] +; ; GFX12-LABEL: v_maximum_v2f64__nnan: ; GFX12: ; %bb.0: ; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 @@ -751,6 +814,13 @@ define <2 x double> @v_maximum_v2f64__nsz(<2 x double> %src0, <2 x double> %src1 ; GFX11-NEXT: v_cndmask_b32_e64 v3, v5, 0x7ff80000, s0 ; GFX11-NEXT: s_setpc_b64 s[30:31] ; +; GFX1170-LABEL: v_maximum_v2f64__nsz: +; GFX1170: ; %bb.0: +; GFX1170-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1170-NEXT: v_maximum_f64 v[0:1], v[0:1], v[4:5] +; GFX1170-NEXT: v_maximum_f64 v[2:3], v[2:3], v[6:7] +; GFX1170-NEXT: s_setpc_b64 s[30:31] +; ; GFX12-LABEL: v_maximum_v2f64__nsz: ; GFX12: ; %bb.0: ; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 @@ -801,6 +871,13 @@ define <2 x double> @v_maximum_v2f64__nnan_nsz(<2 x double> %src0, <2 x double> ; GFX11-NEXT: v_max_f64 v[2:3], v[2:3], v[6:7] ; GFX11-NEXT: s_setpc_b64 s[30:31] ; +; GFX1170-LABEL: v_maximum_v2f64__nnan_nsz: +; GFX1170: ; %bb.0: +; GFX1170-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1170-NEXT: v_maximum_f64 v[0:1], v[0:1], v[4:5] +; GFX1170-NEXT: v_maximum_f64 v[2:3], v[2:3], v[6:7] +; GFX1170-NEXT: s_setpc_b64 s[30:31] +; ; GFX12-LABEL: v_maximum_v2f64__nnan_nsz: ; GFX12: ; %bb.0: ; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 @@ -932,6 +1009,16 @@ define void @s_maximum_v2f64(<2 x double> inreg %src0, <2 x double> inreg %src1) ; GFX11-NEXT: ;;#ASMEND ; GFX11-NEXT: s_setpc_b64 s[30:31] ; +; GFX1170-LABEL: s_maximum_v2f64: +; GFX1170: ; %bb.0: +; GFX1170-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1170-NEXT: v_maximum_f64 v[2:3], s[2:3], s[18:19] +; GFX1170-NEXT: v_maximum_f64 v[0:1], s[0:1], s[16:17] +; GFX1170-NEXT: ;;#ASMSTART +; GFX1170-NEXT: ; use v[0:3] +; GFX1170-NEXT: ;;#ASMEND +; GFX1170-NEXT: s_setpc_b64 s[30:31] +; ; GFX12-LABEL: s_maximum_v2f64: ; GFX12: ; %bb.0: ; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 @@ -1060,6 +1147,14 @@ define <3 x double> @v_maximum_v3f64(<3 x double> %src0, <3 x double> %src1) { ; GFX11-NEXT: v_cndmask_b32_e64 v5, v9, 0x7ff80000, s1 ; GFX11-NEXT: s_setpc_b64 s[30:31] ; +; GFX1170-LABEL: v_maximum_v3f64: +; GFX1170: ; %bb.0: +; GFX1170-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1170-NEXT: v_maximum_f64 v[0:1], v[0:1], v[6:7] +; GFX1170-NEXT: v_maximum_f64 v[2:3], v[2:3], v[8:9] +; GFX1170-NEXT: v_maximum_f64 v[4:5], v[4:5], v[10:11] +; GFX1170-NEXT: s_setpc_b64 s[30:31] +; ; GFX12-LABEL: v_maximum_v3f64: ; GFX12: ; %bb.0: ; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 @@ -1116,6 +1211,14 @@ define <3 x double> @v_maximum_v3f64__nnan(<3 x double> %src0, <3 x double> %src ; GFX11-NEXT: v_max_f64 v[4:5], v[4:5], v[10:11] ; GFX11-NEXT: s_setpc_b64 s[30:31] ; +; GFX1170-LABEL: v_maximum_v3f64__nnan: +; GFX1170: ; %bb.0: +; GFX1170-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1170-NEXT: v_maximum_f64 v[0:1], v[0:1], v[6:7] +; GFX1170-NEXT: v_maximum_f64 v[2:3], v[2:3], v[8:9] +; GFX1170-NEXT: v_maximum_f64 v[4:5], v[4:5], v[10:11] +; GFX1170-NEXT: s_setpc_b64 s[30:31] +; ; GFX12-LABEL: v_maximum_v3f64__nnan: ; GFX12: ; %bb.0: ; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 @@ -1241,6 +1344,14 @@ define <3 x double> @v_maximum_v3f64__nsz(<3 x double> %src0, <3 x double> %src1 ; GFX11-NEXT: v_cndmask_b32_e64 v5, v9, 0x7ff80000, s1 ; GFX11-NEXT: s_setpc_b64 s[30:31] ; +; GFX1170-LABEL: v_maximum_v3f64__nsz: +; GFX1170: ; %bb.0: +; GFX1170-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1170-NEXT: v_maximum_f64 v[0:1], v[0:1], v[6:7] +; GFX1170-NEXT: v_maximum_f64 v[2:3], v[2:3], v[8:9] +; GFX1170-NEXT: v_maximum_f64 v[4:5], v[4:5], v[10:11] +; GFX1170-NEXT: s_setpc_b64 s[30:31] +; ; GFX12-LABEL: v_maximum_v3f64__nsz: ; GFX12: ; %bb.0: ; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 @@ -1297,6 +1408,14 @@ define <3 x double> @v_maximum_v3f64__nnan_nsz(<3 x double> %src0, <3 x double> ; GFX11-NEXT: v_max_f64 v[4:5], v[4:5], v[10:11] ; GFX11-NEXT: s_setpc_b64 s[30:31] ; +; GFX1170-LABEL: v_maximum_v3f64__nnan_nsz: +; GFX1170: ; %bb.0: +; GFX1170-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1170-NEXT: v_maximum_f64 v[0:1], v[0:1], v[6:7] +; GFX1170-NEXT: v_maximum_f64 v[2:3], v[2:3], v[8:9] +; GFX1170-NEXT: v_maximum_f64 v[4:5], v[4:5], v[10:11] +; GFX1170-NEXT: s_setpc_b64 s[30:31] +; ; GFX12-LABEL: v_maximum_v3f64__nnan_nsz: ; GFX12: ; %bb.0: ; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 @@ -1447,6 +1566,15 @@ define <4 x double> @v_maximum_v4f64(<4 x double> %src0, <4 x double> %src1) { ; GFX11-NEXT: v_cndmask_b32_e64 v7, v13, 0x7ff80000, s2 ; GFX11-NEXT: s_setpc_b64 s[30:31] ; +; GFX1170-LABEL: v_maximum_v4f64: +; GFX1170: ; %bb.0: +; GFX1170-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1170-NEXT: v_maximum_f64 v[0:1], v[0:1], v[8:9] +; GFX1170-NEXT: v_maximum_f64 v[2:3], v[2:3], v[10:11] +; GFX1170-NEXT: v_maximum_f64 v[4:5], v[4:5], v[12:13] +; GFX1170-NEXT: v_maximum_f64 v[6:7], v[6:7], v[14:15] +; GFX1170-NEXT: s_setpc_b64 s[30:31] +; ; GFX12-LABEL: v_maximum_v4f64: ; GFX12: ; %bb.0: ; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 @@ -1509,6 +1637,15 @@ define <4 x double> @v_maximum_v4f64__nnan(<4 x double> %src0, <4 x double> %src ; GFX11-NEXT: v_max_f64 v[6:7], v[6:7], v[14:15] ; GFX11-NEXT: s_setpc_b64 s[30:31] ; +; GFX1170-LABEL: v_maximum_v4f64__nnan: +; GFX1170: ; %bb.0: +; GFX1170-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1170-NEXT: v_maximum_f64 v[0:1], v[0:1], v[8:9] +; GFX1170-NEXT: v_maximum_f64 v[2:3], v[2:3], v[10:11] +; GFX1170-NEXT: v_maximum_f64 v[4:5], v[4:5], v[12:13] +; GFX1170-NEXT: v_maximum_f64 v[6:7], v[6:7], v[14:15] +; GFX1170-NEXT: s_setpc_b64 s[30:31] +; ; GFX12-LABEL: v_maximum_v4f64__nnan: ; GFX12: ; %bb.0: ; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 @@ -1660,6 +1797,15 @@ define <4 x double> @v_maximum_v4f64__nsz(<4 x double> %src0, <4 x double> %src1 ; GFX11-NEXT: v_cndmask_b32_e64 v7, v13, 0x7ff80000, s2 ; GFX11-NEXT: s_setpc_b64 s[30:31] ; +; GFX1170-LABEL: v_maximum_v4f64__nsz: +; GFX1170: ; %bb.0: +; GFX1170-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1170-NEXT: v_maximum_f64 v[0:1], v[0:1], v[8:9] +; GFX1170-NEXT: v_maximum_f64 v[2:3], v[2:3], v[10:11] +; GFX1170-NEXT: v_maximum_f64 v[4:5], v[4:5], v[12:13] +; GFX1170-NEXT: v_maximum_f64 v[6:7], v[6:7], v[14:15] +; GFX1170-NEXT: s_setpc_b64 s[30:31] +; ; GFX12-LABEL: v_maximum_v4f64__nsz: ; GFX12: ; %bb.0: ; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 @@ -1722,6 +1868,15 @@ define <4 x double> @v_maximum_v4f64__nnan_nsz(<4 x double> %src0, <4 x double> ; GFX11-NEXT: v_max_f64 v[6:7], v[6:7], v[14:15] ; GFX11-NEXT: s_setpc_b64 s[30:31] ; +; GFX1170-LABEL: v_maximum_v4f64__nnan_nsz: +; GFX1170: ; %bb.0: +; GFX1170-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1170-NEXT: v_maximum_f64 v[0:1], v[0:1], v[8:9] +; GFX1170-NEXT: v_maximum_f64 v[2:3], v[2:3], v[10:11] +; GFX1170-NEXT: v_maximum_f64 v[4:5], v[4:5], v[12:13] +; GFX1170-NEXT: v_maximum_f64 v[6:7], v[6:7], v[14:15] +; GFX1170-NEXT: s_setpc_b64 s[30:31] +; ; GFX12-LABEL: v_maximum_v4f64__nnan_nsz: ; GFX12: ; %bb.0: ; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 @@ -1982,6 +2137,21 @@ define <8 x double> @v_maximum_v8f64(<8 x double> %src0, <8 x double> %src1) { ; GFX11-NEXT: v_cndmask_b32_e64 v15, v29, 0x7ff80000, s6 ; GFX11-NEXT: s_setpc_b64 s[30:31] ; +; GFX1170-LABEL: v_maximum_v8f64: +; GFX1170: ; %bb.0: +; GFX1170-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1170-NEXT: scratch_load_b32 v31, off, s32 +; GFX1170-NEXT: v_maximum_f64 v[0:1], v[0:1], v[16:17] +; GFX1170-NEXT: v_maximum_f64 v[2:3], v[2:3], v[18:19] +; GFX1170-NEXT: v_maximum_f64 v[4:5], v[4:5], v[20:21] +; GFX1170-NEXT: v_maximum_f64 v[6:7], v[6:7], v[22:23] +; GFX1170-NEXT: v_maximum_f64 v[8:9], v[8:9], v[24:25] +; GFX1170-NEXT: v_maximum_f64 v[10:11], v[10:11], v[26:27] +; GFX1170-NEXT: v_maximum_f64 v[12:13], v[12:13], v[28:29] +; GFX1170-NEXT: s_waitcnt vmcnt(0) +; GFX1170-NEXT: v_maximum_f64 v[14:15], v[14:15], v[30:31] +; GFX1170-NEXT: s_setpc_b64 s[30:31] +; ; GFX12-LABEL: v_maximum_v8f64: ; GFX12: ; %bb.0: ; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 @@ -2757,6 +2927,79 @@ define <16 x double> @v_maximum_v16f64(<16 x double> %src0, <16 x double> %src1) ; GFX11-NEXT: v_cndmask_b32_e64 v31, v85, 0x7ff80000, s14 ; GFX11-NEXT: s_setpc_b64 s[30:31] ; +; GFX1170-LABEL: v_maximum_v16f64: +; GFX1170: ; %bb.0: +; GFX1170-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1170-NEXT: s_clause 0x1b +; GFX1170-NEXT: scratch_load_b32 v33, off, s32 offset:8 +; GFX1170-NEXT: scratch_load_b32 v32, off, s32 offset:4 +; GFX1170-NEXT: scratch_load_b32 v35, off, s32 offset:16 +; GFX1170-NEXT: scratch_load_b32 v34, off, s32 offset:12 +; GFX1170-NEXT: scratch_load_b32 v31, off, s32 +; GFX1170-NEXT: scratch_load_b32 v37, off, s32 offset:120 +; GFX1170-NEXT: scratch_load_b32 v39, off, s32 offset:104 +; GFX1170-NEXT: scratch_load_b32 v49, off, s32 offset:24 +; GFX1170-NEXT: scratch_load_b32 v48, off, s32 offset:20 +; GFX1170-NEXT: scratch_load_b32 v51, off, s32 offset:32 +; GFX1170-NEXT: scratch_load_b32 v50, off, s32 offset:28 +; GFX1170-NEXT: scratch_load_b32 v53, off, s32 offset:40 +; GFX1170-NEXT: scratch_load_b32 v52, off, s32 offset:36 +; GFX1170-NEXT: scratch_load_b32 v55, off, s32 offset:48 +; GFX1170-NEXT: scratch_load_b32 v54, off, s32 offset:44 +; GFX1170-NEXT: scratch_load_b32 v65, off, s32 offset:56 +; GFX1170-NEXT: scratch_load_b32 v64, off, s32 offset:52 +; GFX1170-NEXT: scratch_load_b32 v67, off, s32 offset:64 +; GFX1170-NEXT: scratch_load_b32 v66, off, s32 offset:60 +; GFX1170-NEXT: scratch_load_b32 v69, off, s32 offset:72 +; GFX1170-NEXT: scratch_load_b32 v68, off, s32 offset:68 +; GFX1170-NEXT: scratch_load_b32 v71, off, s32 offset:80 +; GFX1170-NEXT: scratch_load_b32 v70, off, s32 offset:76 +; GFX1170-NEXT: scratch_load_b32 v81, off, s32 offset:88 +; GFX1170-NEXT: scratch_load_b32 v80, off, s32 offset:84 +; GFX1170-NEXT: scratch_load_b32 v83, off, s32 offset:96 +; GFX1170-NEXT: scratch_load_b32 v82, off, s32 offset:92 +; GFX1170-NEXT: scratch_load_b32 v38, off, s32 offset:100 +; GFX1170-NEXT: s_waitcnt vmcnt(26) +; GFX1170-NEXT: v_maximum_f64 v[0:1], v[0:1], v[32:33] +; GFX1170-NEXT: s_clause 0x2 +; GFX1170-NEXT: scratch_load_b32 v33, off, s32 offset:112 +; GFX1170-NEXT: scratch_load_b32 v32, off, s32 offset:108 +; GFX1170-NEXT: scratch_load_b32 v36, off, s32 offset:116 +; GFX1170-NEXT: s_waitcnt vmcnt(27) +; GFX1170-NEXT: v_maximum_f64 v[2:3], v[2:3], v[34:35] +; GFX1170-NEXT: s_clause 0x1 +; GFX1170-NEXT: scratch_load_b32 v35, off, s32 offset:128 +; GFX1170-NEXT: scratch_load_b32 v34, off, s32 offset:124 +; GFX1170-NEXT: s_waitcnt vmcnt(24) +; GFX1170-NEXT: v_maximum_f64 v[4:5], v[4:5], v[48:49] +; GFX1170-NEXT: s_waitcnt vmcnt(22) +; GFX1170-NEXT: v_maximum_f64 v[6:7], v[6:7], v[50:51] +; GFX1170-NEXT: s_waitcnt vmcnt(20) +; GFX1170-NEXT: v_maximum_f64 v[8:9], v[8:9], v[52:53] +; GFX1170-NEXT: s_waitcnt vmcnt(18) +; GFX1170-NEXT: v_maximum_f64 v[10:11], v[10:11], v[54:55] +; GFX1170-NEXT: s_waitcnt vmcnt(16) +; GFX1170-NEXT: v_maximum_f64 v[12:13], v[12:13], v[64:65] +; GFX1170-NEXT: s_waitcnt vmcnt(14) +; GFX1170-NEXT: v_maximum_f64 v[14:15], v[14:15], v[66:67] +; GFX1170-NEXT: s_waitcnt vmcnt(12) +; GFX1170-NEXT: v_maximum_f64 v[16:17], v[16:17], v[68:69] +; GFX1170-NEXT: s_waitcnt vmcnt(10) +; GFX1170-NEXT: v_maximum_f64 v[18:19], v[18:19], v[70:71] +; GFX1170-NEXT: s_waitcnt vmcnt(8) +; GFX1170-NEXT: v_maximum_f64 v[20:21], v[20:21], v[80:81] +; GFX1170-NEXT: s_waitcnt vmcnt(6) +; GFX1170-NEXT: v_maximum_f64 v[22:23], v[22:23], v[82:83] +; GFX1170-NEXT: s_waitcnt vmcnt(5) +; GFX1170-NEXT: v_maximum_f64 v[24:25], v[24:25], v[38:39] +; GFX1170-NEXT: s_waitcnt vmcnt(3) +; GFX1170-NEXT: v_maximum_f64 v[26:27], v[26:27], v[32:33] +; GFX1170-NEXT: s_waitcnt vmcnt(2) +; GFX1170-NEXT: v_maximum_f64 v[28:29], v[28:29], v[36:37] +; GFX1170-NEXT: s_waitcnt vmcnt(0) +; GFX1170-NEXT: v_maximum_f64 v[30:31], v[30:31], v[34:35] +; GFX1170-NEXT: s_setpc_b64 s[30:31] +; ; GFX12-LABEL: v_maximum_v16f64: ; GFX12: ; %bb.0: ; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 @@ -2834,5 +3077,3 @@ define <16 x double> @v_maximum_v16f64(<16 x double> %src0, <16 x double> %src1) %op = call <16 x double> @llvm.maximum.v16f64(<16 x double> %src0, <16 x double> %src1) ret <16 x double> %op } -;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line: -; GCN: {{.*}} diff --git a/llvm/test/CodeGen/AMDGPU/llvm.minimum.f16.ll b/llvm/test/CodeGen/AMDGPU/llvm.minimum.f16.ll index b5dab396f0bf..9a1a51cc5dfc 100644 --- a/llvm/test/CodeGen/AMDGPU/llvm.minimum.f16.ll +++ b/llvm/test/CodeGen/AMDGPU/llvm.minimum.f16.ll @@ -1,14 +1,15 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; xUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx703 < %s | FileCheck -check-prefixes=GCN,GFX7 %s -; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx803 < %s | FileCheck -check-prefixes=GCN,GFX8 %s -; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 < %s | FileCheck -check-prefixes=GCN,GFX9 %s -; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 < %s | FileCheck -check-prefixes=GCN,GFX9,GFX900 %s -; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx950 < %s | FileCheck -check-prefixes=GCN,GFX9,GFX950 %s -; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1030 < %s | FileCheck -check-prefixes=GCN,GFX10 %s -; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1100 -mattr=+real-true16 < %s | FileCheck -check-prefixes=GCN,GFX11,GFX11-TRUE16 %s -; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1100 -mattr=-real-true16 < %s | FileCheck -check-prefixes=GCN,GFX11,GFX11-FAKE16 %s -; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1200 -mattr=+real-true16 < %s | FileCheck -check-prefixes=GCN,GFX12,GFX12-TRUE16 %s -; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1200 -mattr=-real-true16 < %s | FileCheck -check-prefixes=GCN,GFX12,GFX12-FAKE16 %s +; xUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx703 < %s | FileCheck -check-prefixes=GFX7 %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx803 < %s | FileCheck -check-prefixes=GFX8 %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 < %s | FileCheck -check-prefixes=GFX9,GFX900 %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx950 < %s | FileCheck -check-prefixes=GFX9,GFX950 %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1030 < %s | FileCheck -check-prefixes=GFX10 %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1100 -mattr=+real-true16 < %s | FileCheck -check-prefixes=GFX11,GFX11-TRUE16 %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1100 -mattr=-real-true16 < %s | FileCheck -check-prefixes=GFX11,GFX11-FAKE16 %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1170 -mattr=+real-true16 < %s | FileCheck -check-prefixes=GFX1170,GFX1170-TRUE16 %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1170 -mattr=-real-true16 < %s | FileCheck -check-prefixes=GFX1170,GFX1170-FAKE16 %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1200 -mattr=+real-true16 < %s | FileCheck -check-prefixes=GFX12,GFX12-TRUE16 %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1200 -mattr=-real-true16 < %s | FileCheck -check-prefixes=GFX12,GFX12-FAKE16 %s define half @v_minimum_f16(half %src0, half %src1) { ; GFX8-LABEL: v_minimum_f16: @@ -61,6 +62,18 @@ define half @v_minimum_f16(half %src0, half %src1) { ; GFX11-FAKE16-NEXT: v_cndmask_b32_e32 v0, 0x7e00, v2, vcc_lo ; GFX11-FAKE16-NEXT: s_setpc_b64 s[30:31] ; +; GFX1170-TRUE16-LABEL: v_minimum_f16: +; GFX1170-TRUE16: ; %bb.0: +; GFX1170-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1170-TRUE16-NEXT: v_minimum_f16 v0.l, v0.l, v1.l +; GFX1170-TRUE16-NEXT: s_setpc_b64 s[30:31] +; +; GFX1170-FAKE16-LABEL: v_minimum_f16: +; GFX1170-FAKE16: ; %bb.0: +; GFX1170-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1170-FAKE16-NEXT: v_minimum_f16 v0, v0, v1 +; GFX1170-FAKE16-NEXT: s_setpc_b64 s[30:31] +; ; GFX12-TRUE16-LABEL: v_minimum_f16: ; GFX12-TRUE16: ; %bb.0: ; GFX12-TRUE16-NEXT: s_wait_loadcnt_dscnt 0x0 @@ -115,6 +128,18 @@ define half @v_minimum_f16__nnan(half %src0, half %src1) { ; GFX11-FAKE16-NEXT: v_min_f16_e32 v0, v0, v1 ; GFX11-FAKE16-NEXT: s_setpc_b64 s[30:31] ; +; GFX1170-TRUE16-LABEL: v_minimum_f16__nnan: +; GFX1170-TRUE16: ; %bb.0: +; GFX1170-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1170-TRUE16-NEXT: v_minimum_f16 v0.l, v0.l, v1.l +; GFX1170-TRUE16-NEXT: s_setpc_b64 s[30:31] +; +; GFX1170-FAKE16-LABEL: v_minimum_f16__nnan: +; GFX1170-FAKE16: ; %bb.0: +; GFX1170-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1170-FAKE16-NEXT: v_minimum_f16 v0, v0, v1 +; GFX1170-FAKE16-NEXT: s_setpc_b64 s[30:31] +; ; GFX12-TRUE16-LABEL: v_minimum_f16__nnan: ; GFX12-TRUE16: ; %bb.0: ; GFX12-TRUE16-NEXT: s_wait_loadcnt_dscnt 0x0 @@ -189,6 +214,18 @@ define half @v_minimum_f16__nsz(half %src0, half %src1) { ; GFX11-FAKE16-NEXT: v_cndmask_b32_e32 v0, 0x7e00, v2, vcc_lo ; GFX11-FAKE16-NEXT: s_setpc_b64 s[30:31] ; +; GFX1170-TRUE16-LABEL: v_minimum_f16__nsz: +; GFX1170-TRUE16: ; %bb.0: +; GFX1170-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1170-TRUE16-NEXT: v_minimum_f16 v0.l, v0.l, v1.l +; GFX1170-TRUE16-NEXT: s_setpc_b64 s[30:31] +; +; GFX1170-FAKE16-LABEL: v_minimum_f16__nsz: +; GFX1170-FAKE16: ; %bb.0: +; GFX1170-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1170-FAKE16-NEXT: v_minimum_f16 v0, v0, v1 +; GFX1170-FAKE16-NEXT: s_setpc_b64 s[30:31] +; ; GFX12-TRUE16-LABEL: v_minimum_f16__nsz: ; GFX12-TRUE16: ; %bb.0: ; GFX12-TRUE16-NEXT: s_wait_loadcnt_dscnt 0x0 @@ -243,6 +280,18 @@ define half @v_minimum_f16__nnan_nsz(half %src0, half %src1) { ; GFX11-FAKE16-NEXT: v_min_f16_e32 v0, v0, v1 ; GFX11-FAKE16-NEXT: s_setpc_b64 s[30:31] ; +; GFX1170-TRUE16-LABEL: v_minimum_f16__nnan_nsz: +; GFX1170-TRUE16: ; %bb.0: +; GFX1170-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1170-TRUE16-NEXT: v_minimum_f16 v0.l, v0.l, v1.l +; GFX1170-TRUE16-NEXT: s_setpc_b64 s[30:31] +; +; GFX1170-FAKE16-LABEL: v_minimum_f16__nnan_nsz: +; GFX1170-FAKE16: ; %bb.0: +; GFX1170-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1170-FAKE16-NEXT: v_minimum_f16 v0, v0, v1 +; GFX1170-FAKE16-NEXT: s_setpc_b64 s[30:31] +; ; GFX12-TRUE16-LABEL: v_minimum_f16__nnan_nsz: ; GFX12-TRUE16: ; %bb.0: ; GFX12-TRUE16-NEXT: s_wait_loadcnt_dscnt 0x0 @@ -323,6 +372,22 @@ define half @v_minimum_f16__nnan_src0(half %arg0, half %src1) { ; GFX11-FAKE16-NEXT: v_cndmask_b32_e32 v0, 0x7e00, v2, vcc_lo ; GFX11-FAKE16-NEXT: s_setpc_b64 s[30:31] ; +; GFX1170-TRUE16-LABEL: v_minimum_f16__nnan_src0: +; GFX1170-TRUE16: ; %bb.0: +; GFX1170-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1170-TRUE16-NEXT: v_add_f16_e32 v0.l, 1.0, v0.l +; GFX1170-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX1170-TRUE16-NEXT: v_minimum_f16 v0.l, v0.l, v1.l +; GFX1170-TRUE16-NEXT: s_setpc_b64 s[30:31] +; +; GFX1170-FAKE16-LABEL: v_minimum_f16__nnan_src0: +; GFX1170-FAKE16: ; %bb.0: +; GFX1170-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1170-FAKE16-NEXT: v_add_f16_e32 v0, 1.0, v0 +; GFX1170-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX1170-FAKE16-NEXT: v_minimum_f16 v0, v0, v1 +; GFX1170-FAKE16-NEXT: s_setpc_b64 s[30:31] +; ; GFX12-TRUE16-LABEL: v_minimum_f16__nnan_src0: ; GFX12-TRUE16: ; %bb.0: ; GFX12-TRUE16-NEXT: s_wait_loadcnt_dscnt 0x0 @@ -408,6 +473,22 @@ define half @v_minimum_f16__nnan_src1(half %src0, half %arg1) { ; GFX11-FAKE16-NEXT: v_cndmask_b32_e32 v0, 0x7e00, v2, vcc_lo ; GFX11-FAKE16-NEXT: s_setpc_b64 s[30:31] ; +; GFX1170-TRUE16-LABEL: v_minimum_f16__nnan_src1: +; GFX1170-TRUE16: ; %bb.0: +; GFX1170-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1170-TRUE16-NEXT: v_add_f16_e32 v1.l, 1.0, v1.l +; GFX1170-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX1170-TRUE16-NEXT: v_minimum_f16 v0.l, v0.l, v1.l +; GFX1170-TRUE16-NEXT: s_setpc_b64 s[30:31] +; +; GFX1170-FAKE16-LABEL: v_minimum_f16__nnan_src1: +; GFX1170-FAKE16: ; %bb.0: +; GFX1170-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1170-FAKE16-NEXT: v_add_f16_e32 v1, 1.0, v1 +; GFX1170-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX1170-FAKE16-NEXT: v_minimum_f16 v0, v0, v1 +; GFX1170-FAKE16-NEXT: s_setpc_b64 s[30:31] +; ; GFX12-TRUE16-LABEL: v_minimum_f16__nnan_src1: ; GFX12-TRUE16: ; %bb.0: ; GFX12-TRUE16-NEXT: s_wait_loadcnt_dscnt 0x0 @@ -515,6 +596,28 @@ define void @s_minimum_f16(half inreg %src0, half inreg %src1) { ; GFX11-FAKE16-NEXT: ;;#ASMEND ; GFX11-FAKE16-NEXT: s_setpc_b64 s[30:31] ; +; GFX1170-TRUE16-LABEL: s_minimum_f16: +; GFX1170-TRUE16: ; %bb.0: +; GFX1170-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1170-TRUE16-NEXT: v_minimum_f16 v0.l, s0, s1 +; GFX1170-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX1170-TRUE16-NEXT: v_and_b32_e32 v0, 0xffff, v0 +; GFX1170-TRUE16-NEXT: ;;#ASMSTART +; GFX1170-TRUE16-NEXT: ; use v0 +; GFX1170-TRUE16-NEXT: ;;#ASMEND +; GFX1170-TRUE16-NEXT: s_setpc_b64 s[30:31] +; +; GFX1170-FAKE16-LABEL: s_minimum_f16: +; GFX1170-FAKE16: ; %bb.0: +; GFX1170-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1170-FAKE16-NEXT: v_minimum_f16 v0, s0, s1 +; GFX1170-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX1170-FAKE16-NEXT: v_and_b32_e32 v0, 0xffff, v0 +; GFX1170-FAKE16-NEXT: ;;#ASMSTART +; GFX1170-FAKE16-NEXT: ; use v0 +; GFX1170-FAKE16-NEXT: ;;#ASMEND +; GFX1170-FAKE16-NEXT: s_setpc_b64 s[30:31] +; ; GFX12-LABEL: s_minimum_f16: ; GFX12: ; %bb.0: ; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 @@ -612,6 +715,12 @@ define <2 x half> @v_minimum_v2f16(<2 x half> %src0, <2 x half> %src1) { ; GFX11-FAKE16-NEXT: v_perm_b32 v0, v1, v0, 0x5040100 ; GFX11-FAKE16-NEXT: s_setpc_b64 s[30:31] ; +; GFX1170-LABEL: v_minimum_v2f16: +; GFX1170: ; %bb.0: +; GFX1170-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1170-NEXT: v_pk_minimum_f16 v0, v0, v1 +; GFX1170-NEXT: s_setpc_b64 s[30:31] +; ; GFX12-LABEL: v_minimum_v2f16: ; GFX12: ; %bb.0: ; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 @@ -652,6 +761,12 @@ define <2 x half> @v_minimum_v2f16__nnan(<2 x half> %src0, <2 x half> %src1) { ; GFX11-NEXT: v_pk_min_f16 v0, v0, v1 ; GFX11-NEXT: s_setpc_b64 s[30:31] ; +; GFX1170-LABEL: v_minimum_v2f16__nnan: +; GFX1170: ; %bb.0: +; GFX1170-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1170-NEXT: v_pk_minimum_f16 v0, v0, v1 +; GFX1170-NEXT: s_setpc_b64 s[30:31] +; ; GFX12-LABEL: v_minimum_v2f16__nnan: ; GFX12: ; %bb.0: ; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 @@ -739,6 +854,12 @@ define <2 x half> @v_minimum_v2f16__nsz(<2 x half> %src0, <2 x half> %src1) { ; GFX11-FAKE16-NEXT: v_perm_b32 v0, v1, v0, 0x5040100 ; GFX11-FAKE16-NEXT: s_setpc_b64 s[30:31] ; +; GFX1170-LABEL: v_minimum_v2f16__nsz: +; GFX1170: ; %bb.0: +; GFX1170-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1170-NEXT: v_pk_minimum_f16 v0, v0, v1 +; GFX1170-NEXT: s_setpc_b64 s[30:31] +; ; GFX12-LABEL: v_minimum_v2f16__nsz: ; GFX12: ; %bb.0: ; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 @@ -779,6 +900,12 @@ define <2 x half> @v_minimum_v2f16__nnan_nsz(<2 x half> %src0, <2 x half> %src1) ; GFX11-NEXT: v_pk_min_f16 v0, v0, v1 ; GFX11-NEXT: s_setpc_b64 s[30:31] ; +; GFX1170-LABEL: v_minimum_v2f16__nnan_nsz: +; GFX1170: ; %bb.0: +; GFX1170-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1170-NEXT: v_pk_minimum_f16 v0, v0, v1 +; GFX1170-NEXT: s_setpc_b64 s[30:31] +; ; GFX12-LABEL: v_minimum_v2f16__nnan_nsz: ; GFX12: ; %bb.0: ; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 @@ -900,6 +1027,15 @@ define void @s_minimum_v2f16(<2 x half> inreg %src0, <2 x half> inreg %src1) { ; GFX11-FAKE16-NEXT: ;;#ASMEND ; GFX11-FAKE16-NEXT: s_setpc_b64 s[30:31] ; +; GFX1170-LABEL: s_minimum_v2f16: +; GFX1170: ; %bb.0: +; GFX1170-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1170-NEXT: v_pk_minimum_f16 v0, s0, s1 +; GFX1170-NEXT: ;;#ASMSTART +; GFX1170-NEXT: ; use v0 +; GFX1170-NEXT: ;;#ASMEND +; GFX1170-NEXT: s_setpc_b64 s[30:31] +; ; GFX12-LABEL: s_minimum_v2f16: ; GFX12: ; %bb.0: ; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 @@ -1010,6 +1146,13 @@ define <3 x half> @v_minimum_v3f16(<3 x half> %src0, <3 x half> %src1) { ; GFX11-FAKE16-NEXT: v_cndmask_b32_e32 v1, 0x7e00, v4, vcc_lo ; GFX11-FAKE16-NEXT: s_setpc_b64 s[30:31] ; +; GFX1170-LABEL: v_minimum_v3f16: +; GFX1170: ; %bb.0: +; GFX1170-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1170-NEXT: v_pk_minimum_f16 v0, v0, v2 +; GFX1170-NEXT: v_pk_minimum_f16 v1, v1, v3 +; GFX1170-NEXT: s_setpc_b64 s[30:31] +; ; GFX12-LABEL: v_minimum_v3f16: ; GFX12: ; %bb.0: ; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 @@ -1055,6 +1198,13 @@ define <3 x half> @v_minimum_v3f16__nnan(<3 x half> %src0, <3 x half> %src1) { ; GFX11-NEXT: v_pk_min_f16 v1, v1, v3 ; GFX11-NEXT: s_setpc_b64 s[30:31] ; +; GFX1170-LABEL: v_minimum_v3f16__nnan: +; GFX1170: ; %bb.0: +; GFX1170-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1170-NEXT: v_pk_minimum_f16 v0, v0, v2 +; GFX1170-NEXT: v_pk_minimum_f16 v1, v1, v3 +; GFX1170-NEXT: s_setpc_b64 s[30:31] +; ; GFX12-LABEL: v_minimum_v3f16__nnan: ; GFX12: ; %bb.0: ; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 @@ -1161,6 +1311,13 @@ define <3 x half> @v_minimum_v3f16__nsz(<3 x half> %src0, <3 x half> %src1) { ; GFX11-FAKE16-NEXT: v_cndmask_b32_e32 v1, 0x7e00, v4, vcc_lo ; GFX11-FAKE16-NEXT: s_setpc_b64 s[30:31] ; +; GFX1170-LABEL: v_minimum_v3f16__nsz: +; GFX1170: ; %bb.0: +; GFX1170-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1170-NEXT: v_pk_minimum_f16 v0, v0, v2 +; GFX1170-NEXT: v_pk_minimum_f16 v1, v1, v3 +; GFX1170-NEXT: s_setpc_b64 s[30:31] +; ; GFX12-LABEL: v_minimum_v3f16__nsz: ; GFX12: ; %bb.0: ; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 @@ -1206,6 +1363,13 @@ define <3 x half> @v_minimum_v3f16__nnan_nsz(<3 x half> %src0, <3 x half> %src1) ; GFX11-NEXT: v_pk_min_f16 v1, v1, v3 ; GFX11-NEXT: s_setpc_b64 s[30:31] ; +; GFX1170-LABEL: v_minimum_v3f16__nnan_nsz: +; GFX1170: ; %bb.0: +; GFX1170-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1170-NEXT: v_pk_minimum_f16 v0, v0, v2 +; GFX1170-NEXT: v_pk_minimum_f16 v1, v1, v3 +; GFX1170-NEXT: s_setpc_b64 s[30:31] +; ; GFX12-LABEL: v_minimum_v3f16__nnan_nsz: ; GFX12: ; %bb.0: ; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 @@ -1333,6 +1497,13 @@ define <4 x half> @v_minimum_v4f16(<4 x half> %src0, <4 x half> %src1) { ; GFX11-FAKE16-NEXT: v_perm_b32 v1, v3, v1, 0x5040100 ; GFX11-FAKE16-NEXT: s_setpc_b64 s[30:31] ; +; GFX1170-LABEL: v_minimum_v4f16: +; GFX1170: ; %bb.0: +; GFX1170-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1170-NEXT: v_pk_minimum_f16 v0, v0, v2 +; GFX1170-NEXT: v_pk_minimum_f16 v1, v1, v3 +; GFX1170-NEXT: s_setpc_b64 s[30:31] +; ; GFX12-LABEL: v_minimum_v4f16: ; GFX12: ; %bb.0: ; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 @@ -1380,6 +1551,13 @@ define <4 x half> @v_minimum_v4f16__nnan(<4 x half> %src0, <4 x half> %src1) { ; GFX11-NEXT: v_pk_min_f16 v1, v1, v3 ; GFX11-NEXT: s_setpc_b64 s[30:31] ; +; GFX1170-LABEL: v_minimum_v4f16__nnan: +; GFX1170: ; %bb.0: +; GFX1170-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1170-NEXT: v_pk_minimum_f16 v0, v0, v2 +; GFX1170-NEXT: v_pk_minimum_f16 v1, v1, v3 +; GFX1170-NEXT: s_setpc_b64 s[30:31] +; ; GFX12-LABEL: v_minimum_v4f16__nnan: ; GFX12: ; %bb.0: ; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 @@ -1507,6 +1685,13 @@ define <4 x half> @v_minimum_v4f16__nsz(<4 x half> %src0, <4 x half> %src1) { ; GFX11-FAKE16-NEXT: v_perm_b32 v1, v3, v1, 0x5040100 ; GFX11-FAKE16-NEXT: s_setpc_b64 s[30:31] ; +; GFX1170-LABEL: v_minimum_v4f16__nsz: +; GFX1170: ; %bb.0: +; GFX1170-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1170-NEXT: v_pk_minimum_f16 v0, v0, v2 +; GFX1170-NEXT: v_pk_minimum_f16 v1, v1, v3 +; GFX1170-NEXT: s_setpc_b64 s[30:31] +; ; GFX12-LABEL: v_minimum_v4f16__nsz: ; GFX12: ; %bb.0: ; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 @@ -1554,6 +1739,13 @@ define <4 x half> @v_minimum_v4f16__nnan_nsz(<4 x half> %src0, <4 x half> %src1) ; GFX11-NEXT: v_pk_min_f16 v1, v1, v3 ; GFX11-NEXT: s_setpc_b64 s[30:31] ; +; GFX1170-LABEL: v_minimum_v4f16__nnan_nsz: +; GFX1170: ; %bb.0: +; GFX1170-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1170-NEXT: v_pk_minimum_f16 v0, v0, v2 +; GFX1170-NEXT: v_pk_minimum_f16 v1, v1, v3 +; GFX1170-NEXT: s_setpc_b64 s[30:31] +; ; GFX12-LABEL: v_minimum_v4f16__nnan_nsz: ; GFX12: ; %bb.0: ; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 @@ -1760,6 +1952,15 @@ define <8 x half> @v_minimum_v8f16(<8 x half> %src0, <8 x half> %src1) { ; GFX11-FAKE16-NEXT: v_perm_b32 v3, v3, v9, 0x5040100 ; GFX11-FAKE16-NEXT: s_setpc_b64 s[30:31] ; +; GFX1170-LABEL: v_minimum_v8f16: +; GFX1170: ; %bb.0: +; GFX1170-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1170-NEXT: v_pk_minimum_f16 v0, v0, v4 +; GFX1170-NEXT: v_pk_minimum_f16 v1, v1, v5 +; GFX1170-NEXT: v_pk_minimum_f16 v2, v2, v6 +; GFX1170-NEXT: v_pk_minimum_f16 v3, v3, v7 +; GFX1170-NEXT: s_setpc_b64 s[30:31] +; ; GFX12-LABEL: v_minimum_v8f16: ; GFX12: ; %bb.0: ; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 @@ -2130,6 +2331,19 @@ define <16 x half> @v_minimum_v16f16(<16 x half> %src0, <16 x half> %src1) { ; GFX11-FAKE16-NEXT: v_perm_b32 v4, v4, v14, 0x5040100 ; GFX11-FAKE16-NEXT: s_setpc_b64 s[30:31] ; +; GFX1170-LABEL: v_minimum_v16f16: +; GFX1170: ; %bb.0: +; GFX1170-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1170-NEXT: v_pk_minimum_f16 v0, v0, v8 +; GFX1170-NEXT: v_pk_minimum_f16 v1, v1, v9 +; GFX1170-NEXT: v_pk_minimum_f16 v2, v2, v10 +; GFX1170-NEXT: v_pk_minimum_f16 v3, v3, v11 +; GFX1170-NEXT: v_pk_minimum_f16 v4, v4, v12 +; GFX1170-NEXT: v_pk_minimum_f16 v5, v5, v13 +; GFX1170-NEXT: v_pk_minimum_f16 v6, v6, v14 +; GFX1170-NEXT: v_pk_minimum_f16 v7, v7, v15 +; GFX1170-NEXT: s_setpc_b64 s[30:31] +; ; GFX12-LABEL: v_minimum_v16f16: ; GFX12: ; %bb.0: ; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 @@ -2149,5 +2363,3 @@ define <16 x half> @v_minimum_v16f16(<16 x half> %src0, <16 x half> %src1) { %op = call <16 x half> @llvm.minimum.v16f16(<16 x half> %src0, <16 x half> %src1) ret <16 x half> %op } -;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line: -; GCN: {{.*}} diff --git a/llvm/test/CodeGen/AMDGPU/llvm.minimum.f32.ll b/llvm/test/CodeGen/AMDGPU/llvm.minimum.f32.ll index 3e98599fc4c7..269f10823ddb 100644 --- a/llvm/test/CodeGen/AMDGPU/llvm.minimum.f32.ll +++ b/llvm/test/CodeGen/AMDGPU/llvm.minimum.f32.ll @@ -1,12 +1,12 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx703 < %s | FileCheck -check-prefixes=GCN,GFX7 %s -; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx803 < %s | FileCheck -check-prefixes=GCN,GFX8 %s -; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 < %s | FileCheck -check-prefixes=GCN,GFX9 %s -; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 < %s | FileCheck -check-prefixes=GCN,GFX9,GFX900 %s -; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx950 < %s | FileCheck -check-prefixes=GCN,GFX9,GFX950 %s -; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1030 < %s | FileCheck -check-prefixes=GCN,GFX10 %s -; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1100 < %s | FileCheck -check-prefixes=GCN,GFX11 %s -; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1200 < %s | FileCheck -check-prefixes=GCN,GFX12 %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx703 < %s | FileCheck -check-prefixes=GFX7 %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx803 < %s | FileCheck -check-prefixes=GFX8 %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 < %s | FileCheck -check-prefixes=GFX9,GFX900 %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx950 < %s | FileCheck -check-prefixes=GFX9,GFX950 %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1030 < %s | FileCheck -check-prefixes=GFX10 %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1100 < %s | FileCheck -check-prefixes=GFX11 %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1170 < %s | FileCheck -check-prefixes=GFX1170 %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1200 < %s | FileCheck -check-prefixes=GFX12 %s define float @v_minimum_f32(float %src0, float %src1) { ; GFX7-LABEL: v_minimum_f32: @@ -59,6 +59,12 @@ define float @v_minimum_f32(float %src0, float %src1) { ; GFX11-NEXT: v_cndmask_b32_e32 v0, 0x7fc00000, v2, vcc_lo ; GFX11-NEXT: s_setpc_b64 s[30:31] ; +; GFX1170-LABEL: v_minimum_f32: +; GFX1170: ; %bb.0: +; GFX1170-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1170-NEXT: v_minimum_f32 v0, v0, v1 +; GFX1170-NEXT: s_setpc_b64 s[30:31] +; ; GFX12-LABEL: v_minimum_f32: ; GFX12: ; %bb.0: ; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 @@ -103,6 +109,12 @@ define float @v_minimum_f32__nnan(float %src0, float %src1) { ; GFX11-NEXT: v_min_f32_e32 v0, v0, v1 ; GFX11-NEXT: s_setpc_b64 s[30:31] ; +; GFX1170-LABEL: v_minimum_f32__nnan: +; GFX1170: ; %bb.0: +; GFX1170-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1170-NEXT: v_minimum_f32 v0, v0, v1 +; GFX1170-NEXT: s_setpc_b64 s[30:31] +; ; GFX12-LABEL: v_minimum_f32__nnan: ; GFX12: ; %bb.0: ; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 @@ -167,6 +179,12 @@ define float @v_minimum_f32__nsz(float %src0, float %src1) { ; GFX11-NEXT: v_cndmask_b32_e32 v0, 0x7fc00000, v2, vcc_lo ; GFX11-NEXT: s_setpc_b64 s[30:31] ; +; GFX1170-LABEL: v_minimum_f32__nsz: +; GFX1170: ; %bb.0: +; GFX1170-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1170-NEXT: v_minimum_f32 v0, v0, v1 +; GFX1170-NEXT: s_setpc_b64 s[30:31] +; ; GFX12-LABEL: v_minimum_f32__nsz: ; GFX12: ; %bb.0: ; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 @@ -211,6 +229,12 @@ define float @v_minimum_f32__nnan_nsz(float %src0, float %src1) { ; GFX11-NEXT: v_min_f32_e32 v0, v0, v1 ; GFX11-NEXT: s_setpc_b64 s[30:31] ; +; GFX1170-LABEL: v_minimum_f32__nnan_nsz: +; GFX1170: ; %bb.0: +; GFX1170-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1170-NEXT: v_minimum_f32 v0, v0, v1 +; GFX1170-NEXT: s_setpc_b64 s[30:31] +; ; GFX12-LABEL: v_minimum_f32__nnan_nsz: ; GFX12: ; %bb.0: ; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 @@ -281,6 +305,14 @@ define float @v_minimum_f32__nnan_src0(float %arg0, float %src1) { ; GFX11-NEXT: v_cndmask_b32_e32 v0, 0x7fc00000, v2, vcc_lo ; GFX11-NEXT: s_setpc_b64 s[30:31] ; +; GFX1170-LABEL: v_minimum_f32__nnan_src0: +; GFX1170: ; %bb.0: +; GFX1170-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1170-NEXT: v_add_f32_e32 v0, 1.0, v0 +; GFX1170-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX1170-NEXT: v_minimum_f32 v0, v0, v1 +; GFX1170-NEXT: s_setpc_b64 s[30:31] +; ; GFX12-LABEL: v_minimum_f32__nnan_src0: ; GFX12: ; %bb.0: ; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 @@ -354,6 +386,14 @@ define float @v_minimum_f32__nnan_src1(float %src0, float %arg1) { ; GFX11-NEXT: v_cndmask_b32_e32 v0, 0x7fc00000, v2, vcc_lo ; GFX11-NEXT: s_setpc_b64 s[30:31] ; +; GFX1170-LABEL: v_minimum_f32__nnan_src1: +; GFX1170: ; %bb.0: +; GFX1170-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1170-NEXT: v_add_f32_e32 v1, 1.0, v1 +; GFX1170-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX1170-NEXT: v_minimum_f32 v0, v0, v1 +; GFX1170-NEXT: s_setpc_b64 s[30:31] +; ; GFX12-LABEL: v_minimum_f32__nnan_src1: ; GFX12: ; %bb.0: ; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 @@ -443,6 +483,15 @@ define void @s_minimum_f32(float inreg %src0, float inreg %src1) { ; GFX11-NEXT: ;;#ASMEND ; GFX11-NEXT: s_setpc_b64 s[30:31] ; +; GFX1170-LABEL: s_minimum_f32: +; GFX1170: ; %bb.0: +; GFX1170-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1170-NEXT: v_minimum_f32 v0, s0, s1 +; GFX1170-NEXT: ;;#ASMSTART +; GFX1170-NEXT: ; use v0 +; GFX1170-NEXT: ;;#ASMEND +; GFX1170-NEXT: s_setpc_b64 s[30:31] +; ; GFX12-LABEL: s_minimum_f32: ; GFX12: ; %bb.0: ; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 @@ -527,6 +576,13 @@ define <2 x float> @v_minimum_v2f32(<2 x float> %src0, <2 x float> %src1) { ; GFX11-NEXT: v_cndmask_b32_e32 v1, 0x7fc00000, v5, vcc_lo ; GFX11-NEXT: s_setpc_b64 s[30:31] ; +; GFX1170-LABEL: v_minimum_v2f32: +; GFX1170: ; %bb.0: +; GFX1170-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1170-NEXT: v_minimum_f32 v0, v0, v2 +; GFX1170-NEXT: v_minimum_f32 v1, v1, v3 +; GFX1170-NEXT: s_setpc_b64 s[30:31] +; ; GFX12-LABEL: v_minimum_v2f32: ; GFX12: ; %bb.0: ; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 @@ -576,6 +632,13 @@ define <2 x float> @v_minimum_v2f32__nnan(<2 x float> %src0, <2 x float> %src1) ; GFX11-NEXT: v_dual_min_f32 v0, v0, v2 :: v_dual_min_f32 v1, v1, v3 ; GFX11-NEXT: s_setpc_b64 s[30:31] ; +; GFX1170-LABEL: v_minimum_v2f32__nnan: +; GFX1170: ; %bb.0: +; GFX1170-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1170-NEXT: v_minimum_f32 v0, v0, v2 +; GFX1170-NEXT: v_minimum_f32 v1, v1, v3 +; GFX1170-NEXT: s_setpc_b64 s[30:31] +; ; GFX12-LABEL: v_minimum_v2f32__nnan: ; GFX12: ; %bb.0: ; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 @@ -656,6 +719,13 @@ define <2 x float> @v_minimum_v2f32__nsz(<2 x float> %src0, <2 x float> %src1) { ; GFX11-NEXT: v_cndmask_b32_e32 v1, 0x7fc00000, v5, vcc_lo ; GFX11-NEXT: s_setpc_b64 s[30:31] ; +; GFX1170-LABEL: v_minimum_v2f32__nsz: +; GFX1170: ; %bb.0: +; GFX1170-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1170-NEXT: v_minimum_f32 v0, v0, v2 +; GFX1170-NEXT: v_minimum_f32 v1, v1, v3 +; GFX1170-NEXT: s_setpc_b64 s[30:31] +; ; GFX12-LABEL: v_minimum_v2f32__nsz: ; GFX12: ; %bb.0: ; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 @@ -705,6 +775,13 @@ define <2 x float> @v_minimum_v2f32__nnan_nsz(<2 x float> %src0, <2 x float> %sr ; GFX11-NEXT: v_dual_min_f32 v0, v0, v2 :: v_dual_min_f32 v1, v1, v3 ; GFX11-NEXT: s_setpc_b64 s[30:31] ; +; GFX1170-LABEL: v_minimum_v2f32__nnan_nsz: +; GFX1170: ; %bb.0: +; GFX1170-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1170-NEXT: v_minimum_f32 v0, v0, v2 +; GFX1170-NEXT: v_minimum_f32 v1, v1, v3 +; GFX1170-NEXT: s_setpc_b64 s[30:31] +; ; GFX12-LABEL: v_minimum_v2f32__nnan_nsz: ; GFX12: ; %bb.0: ; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 @@ -812,6 +889,16 @@ define void @s_minimum_v2f32(<2 x float> inreg %src0, <2 x float> inreg %src1) { ; GFX11-NEXT: ;;#ASMEND ; GFX11-NEXT: s_setpc_b64 s[30:31] ; +; GFX1170-LABEL: s_minimum_v2f32: +; GFX1170: ; %bb.0: +; GFX1170-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1170-NEXT: v_minimum_f32 v1, s1, s3 +; GFX1170-NEXT: v_minimum_f32 v0, s0, s2 +; GFX1170-NEXT: ;;#ASMSTART +; GFX1170-NEXT: ; use v[0:1] +; GFX1170-NEXT: ;;#ASMEND +; GFX1170-NEXT: s_setpc_b64 s[30:31] +; ; GFX12-LABEL: s_minimum_v2f32: ; GFX12: ; %bb.0: ; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 @@ -913,6 +1000,14 @@ define <3 x float> @v_minimum_v3f32(<3 x float> %src0, <3 x float> %src1) { ; GFX11-NEXT: v_cndmask_b32_e32 v2, 0x7fc00000, v8, vcc_lo ; GFX11-NEXT: s_setpc_b64 s[30:31] ; +; GFX1170-LABEL: v_minimum_v3f32: +; GFX1170: ; %bb.0: +; GFX1170-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1170-NEXT: v_minimum_f32 v0, v0, v3 +; GFX1170-NEXT: v_minimum_f32 v1, v1, v4 +; GFX1170-NEXT: v_minimum_f32 v2, v2, v5 +; GFX1170-NEXT: s_setpc_b64 s[30:31] +; ; GFX12-LABEL: v_minimum_v3f32: ; GFX12: ; %bb.0: ; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 @@ -968,6 +1063,14 @@ define <3 x float> @v_minimum_v3f32__nnan(<3 x float> %src0, <3 x float> %src1) ; GFX11-NEXT: v_min_f32_e32 v2, v2, v5 ; GFX11-NEXT: s_setpc_b64 s[30:31] ; +; GFX1170-LABEL: v_minimum_v3f32__nnan: +; GFX1170: ; %bb.0: +; GFX1170-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1170-NEXT: v_minimum_f32 v0, v0, v3 +; GFX1170-NEXT: v_minimum_f32 v1, v1, v4 +; GFX1170-NEXT: v_minimum_f32 v2, v2, v5 +; GFX1170-NEXT: s_setpc_b64 s[30:31] +; ; GFX12-LABEL: v_minimum_v3f32__nnan: ; GFX12: ; %bb.0: ; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 @@ -1065,6 +1168,14 @@ define <3 x float> @v_minimum_v3f32__nsz(<3 x float> %src0, <3 x float> %src1) { ; GFX11-NEXT: v_cndmask_b32_e32 v2, 0x7fc00000, v8, vcc_lo ; GFX11-NEXT: s_setpc_b64 s[30:31] ; +; GFX1170-LABEL: v_minimum_v3f32__nsz: +; GFX1170: ; %bb.0: +; GFX1170-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1170-NEXT: v_minimum_f32 v0, v0, v3 +; GFX1170-NEXT: v_minimum_f32 v1, v1, v4 +; GFX1170-NEXT: v_minimum_f32 v2, v2, v5 +; GFX1170-NEXT: s_setpc_b64 s[30:31] +; ; GFX12-LABEL: v_minimum_v3f32__nsz: ; GFX12: ; %bb.0: ; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 @@ -1120,6 +1231,14 @@ define <3 x float> @v_minimum_v3f32__nnan_nsz(<3 x float> %src0, <3 x float> %sr ; GFX11-NEXT: v_min_f32_e32 v2, v2, v5 ; GFX11-NEXT: s_setpc_b64 s[30:31] ; +; GFX1170-LABEL: v_minimum_v3f32__nnan_nsz: +; GFX1170: ; %bb.0: +; GFX1170-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1170-NEXT: v_minimum_f32 v0, v0, v3 +; GFX1170-NEXT: v_minimum_f32 v1, v1, v4 +; GFX1170-NEXT: v_minimum_f32 v2, v2, v5 +; GFX1170-NEXT: s_setpc_b64 s[30:31] +; ; GFX12-LABEL: v_minimum_v3f32__nnan_nsz: ; GFX12: ; %bb.0: ; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 @@ -1233,6 +1352,15 @@ define <4 x float> @v_minimum_v4f32(<4 x float> %src0, <4 x float> %src1) { ; GFX11-NEXT: v_cndmask_b32_e32 v3, 0x7fc00000, v8, vcc_lo ; GFX11-NEXT: s_setpc_b64 s[30:31] ; +; GFX1170-LABEL: v_minimum_v4f32: +; GFX1170: ; %bb.0: +; GFX1170-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1170-NEXT: v_minimum_f32 v0, v0, v4 +; GFX1170-NEXT: v_minimum_f32 v1, v1, v5 +; GFX1170-NEXT: v_minimum_f32 v2, v2, v6 +; GFX1170-NEXT: v_minimum_f32 v3, v3, v7 +; GFX1170-NEXT: s_setpc_b64 s[30:31] +; ; GFX12-LABEL: v_minimum_v4f32: ; GFX12: ; %bb.0: ; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 @@ -1293,6 +1421,15 @@ define <4 x float> @v_minimum_v4f32__nnan(<4 x float> %src0, <4 x float> %src1) ; GFX11-NEXT: v_dual_min_f32 v2, v2, v6 :: v_dual_min_f32 v3, v3, v7 ; GFX11-NEXT: s_setpc_b64 s[30:31] ; +; GFX1170-LABEL: v_minimum_v4f32__nnan: +; GFX1170: ; %bb.0: +; GFX1170-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1170-NEXT: v_minimum_f32 v0, v0, v4 +; GFX1170-NEXT: v_minimum_f32 v1, v1, v5 +; GFX1170-NEXT: v_minimum_f32 v2, v2, v6 +; GFX1170-NEXT: v_minimum_f32 v3, v3, v7 +; GFX1170-NEXT: s_setpc_b64 s[30:31] +; ; GFX12-LABEL: v_minimum_v4f32__nnan: ; GFX12: ; %bb.0: ; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 @@ -1407,6 +1544,15 @@ define <4 x float> @v_minimum_v4f32__nsz(<4 x float> %src0, <4 x float> %src1) { ; GFX11-NEXT: v_cndmask_b32_e32 v3, 0x7fc00000, v8, vcc_lo ; GFX11-NEXT: s_setpc_b64 s[30:31] ; +; GFX1170-LABEL: v_minimum_v4f32__nsz: +; GFX1170: ; %bb.0: +; GFX1170-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1170-NEXT: v_minimum_f32 v0, v0, v4 +; GFX1170-NEXT: v_minimum_f32 v1, v1, v5 +; GFX1170-NEXT: v_minimum_f32 v2, v2, v6 +; GFX1170-NEXT: v_minimum_f32 v3, v3, v7 +; GFX1170-NEXT: s_setpc_b64 s[30:31] +; ; GFX12-LABEL: v_minimum_v4f32__nsz: ; GFX12: ; %bb.0: ; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 @@ -1467,6 +1613,15 @@ define <4 x float> @v_minimum_v4f32__nnan_nsz(<4 x float> %src0, <4 x float> %sr ; GFX11-NEXT: v_dual_min_f32 v2, v2, v6 :: v_dual_min_f32 v3, v3, v7 ; GFX11-NEXT: s_setpc_b64 s[30:31] ; +; GFX1170-LABEL: v_minimum_v4f32__nnan_nsz: +; GFX1170: ; %bb.0: +; GFX1170-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1170-NEXT: v_minimum_f32 v0, v0, v4 +; GFX1170-NEXT: v_minimum_f32 v1, v1, v5 +; GFX1170-NEXT: v_minimum_f32 v2, v2, v6 +; GFX1170-NEXT: v_minimum_f32 v3, v3, v7 +; GFX1170-NEXT: s_setpc_b64 s[30:31] +; ; GFX12-LABEL: v_minimum_v4f32__nnan_nsz: ; GFX12: ; %bb.0: ; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 @@ -1643,6 +1798,19 @@ define <8 x float> @v_minimum_v8f32(<8 x float> %src0, <8 x float> %src1) { ; GFX11-NEXT: v_cndmask_b32_e32 v7, 0x7fc00000, v10, vcc_lo ; GFX11-NEXT: s_setpc_b64 s[30:31] ; +; GFX1170-LABEL: v_minimum_v8f32: +; GFX1170: ; %bb.0: +; GFX1170-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1170-NEXT: v_minimum_f32 v0, v0, v8 +; GFX1170-NEXT: v_minimum_f32 v1, v1, v9 +; GFX1170-NEXT: v_minimum_f32 v2, v2, v10 +; GFX1170-NEXT: v_minimum_f32 v3, v3, v11 +; GFX1170-NEXT: v_minimum_f32 v4, v4, v12 +; GFX1170-NEXT: v_minimum_f32 v5, v5, v13 +; GFX1170-NEXT: v_minimum_f32 v6, v6, v14 +; GFX1170-NEXT: v_minimum_f32 v7, v7, v15 +; GFX1170-NEXT: s_setpc_b64 s[30:31] +; ; GFX12-LABEL: v_minimum_v8f32: ; GFX12: ; %bb.0: ; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 @@ -1960,6 +2128,29 @@ define <16 x float> @v_minimum_v16f32(<16 x float> %src0, <16 x float> %src1) { ; GFX11-NEXT: v_cndmask_b32_e32 v15, 0x7fc00000, v16, vcc_lo ; GFX11-NEXT: s_setpc_b64 s[30:31] ; +; GFX1170-LABEL: v_minimum_v16f32: +; GFX1170: ; %bb.0: +; GFX1170-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1170-NEXT: scratch_load_b32 v31, off, s32 +; GFX1170-NEXT: v_minimum_f32 v0, v0, v16 +; GFX1170-NEXT: v_minimum_f32 v1, v1, v17 +; GFX1170-NEXT: v_minimum_f32 v2, v2, v18 +; GFX1170-NEXT: v_minimum_f32 v3, v3, v19 +; GFX1170-NEXT: v_minimum_f32 v4, v4, v20 +; GFX1170-NEXT: v_minimum_f32 v5, v5, v21 +; GFX1170-NEXT: v_minimum_f32 v6, v6, v22 +; GFX1170-NEXT: v_minimum_f32 v7, v7, v23 +; GFX1170-NEXT: v_minimum_f32 v8, v8, v24 +; GFX1170-NEXT: v_minimum_f32 v9, v9, v25 +; GFX1170-NEXT: v_minimum_f32 v10, v10, v26 +; GFX1170-NEXT: v_minimum_f32 v11, v11, v27 +; GFX1170-NEXT: v_minimum_f32 v12, v12, v28 +; GFX1170-NEXT: v_minimum_f32 v13, v13, v29 +; GFX1170-NEXT: v_minimum_f32 v14, v14, v30 +; GFX1170-NEXT: s_waitcnt vmcnt(0) +; GFX1170-NEXT: v_minimum_f32 v15, v15, v31 +; GFX1170-NEXT: s_setpc_b64 s[30:31] +; ; GFX12-LABEL: v_minimum_v16f32: ; GFX12: ; %bb.0: ; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 @@ -1989,5 +2180,3 @@ define <16 x float> @v_minimum_v16f32(<16 x float> %src0, <16 x float> %src1) { %op = call <16 x float> @llvm.minimum.v16f32(<16 x float> %src0, <16 x float> %src1) ret <16 x float> %op } -;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line: -; GCN: {{.*}} diff --git a/llvm/test/CodeGen/AMDGPU/llvm.minimum.f64.ll b/llvm/test/CodeGen/AMDGPU/llvm.minimum.f64.ll index d07bd6c8dd90..1d7678779b8b 100644 --- a/llvm/test/CodeGen/AMDGPU/llvm.minimum.f64.ll +++ b/llvm/test/CodeGen/AMDGPU/llvm.minimum.f64.ll @@ -1,12 +1,12 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx703 < %s | FileCheck -check-prefixes=GCN,GFX7 %s -; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx803 < %s | FileCheck -check-prefixes=GCN,GFX8 %s -; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 < %s | FileCheck -check-prefixes=GCN,GFX9 %s -; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 < %s | FileCheck -check-prefixes=GCN,GFX9,GFX900 %s -; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx950 < %s | FileCheck -check-prefixes=GCN,GFX9,GFX950 %s -; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1030 < %s | FileCheck -check-prefixes=GCN,GFX10 %s -; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1100 < %s | FileCheck -check-prefixes=GCN,GFX11 %s -; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1200 < %s | FileCheck -check-prefixes=GCN,GFX12 %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx703 < %s | FileCheck -check-prefixes=GFX7 %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx803 < %s | FileCheck -check-prefixes=GFX8 %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 < %s | FileCheck -check-prefixes=GFX9,GFX900 %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx950 < %s | FileCheck -check-prefixes=GFX9,GFX950 %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1030 < %s | FileCheck -check-prefixes=GFX10 %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1100 < %s | FileCheck -check-prefixes=GFX11 %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1170 < %s | FileCheck -check-prefixes=GFX1170 %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1200 < %s | FileCheck -check-prefixes=GFX12 %s define double @v_minimum_f64(double %src0, double %src1) { ; GFX7-LABEL: v_minimum_f64: @@ -69,6 +69,12 @@ define double @v_minimum_f64(double %src0, double %src1) { ; GFX11-NEXT: v_cndmask_b32_e64 v1, v5, 0x7ff80000, vcc_lo ; GFX11-NEXT: s_setpc_b64 s[30:31] ; +; GFX1170-LABEL: v_minimum_f64: +; GFX1170: ; %bb.0: +; GFX1170-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1170-NEXT: v_minimum_f64 v[0:1], v[0:1], v[2:3] +; GFX1170-NEXT: s_setpc_b64 s[30:31] +; ; GFX12-LABEL: v_minimum_f64: ; GFX12: ; %bb.0: ; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 @@ -113,6 +119,12 @@ define double @v_minimum_f64__nnan(double %src0, double %src1) { ; GFX11-NEXT: v_min_f64 v[0:1], v[0:1], v[2:3] ; GFX11-NEXT: s_setpc_b64 s[30:31] ; +; GFX1170-LABEL: v_minimum_f64__nnan: +; GFX1170: ; %bb.0: +; GFX1170-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1170-NEXT: v_minimum_f64 v[0:1], v[0:1], v[2:3] +; GFX1170-NEXT: s_setpc_b64 s[30:31] +; ; GFX12-LABEL: v_minimum_f64__nnan: ; GFX12: ; %bb.0: ; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 @@ -187,6 +199,12 @@ define double @v_minimum_f64__nsz(double %src0, double %src1) { ; GFX11-NEXT: v_cndmask_b32_e64 v1, v5, 0x7ff80000, vcc_lo ; GFX11-NEXT: s_setpc_b64 s[30:31] ; +; GFX1170-LABEL: v_minimum_f64__nsz: +; GFX1170: ; %bb.0: +; GFX1170-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1170-NEXT: v_minimum_f64 v[0:1], v[0:1], v[2:3] +; GFX1170-NEXT: s_setpc_b64 s[30:31] +; ; GFX12-LABEL: v_minimum_f64__nsz: ; GFX12: ; %bb.0: ; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 @@ -231,6 +249,12 @@ define double @v_minimum_f64__nnan_nsz(double %src0, double %src1) { ; GFX11-NEXT: v_min_f64 v[0:1], v[0:1], v[2:3] ; GFX11-NEXT: s_setpc_b64 s[30:31] ; +; GFX1170-LABEL: v_minimum_f64__nnan_nsz: +; GFX1170: ; %bb.0: +; GFX1170-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1170-NEXT: v_minimum_f64 v[0:1], v[0:1], v[2:3] +; GFX1170-NEXT: s_setpc_b64 s[30:31] +; ; GFX12-LABEL: v_minimum_f64__nnan_nsz: ; GFX12: ; %bb.0: ; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 @@ -312,6 +336,14 @@ define double @v_minimum_f64__nnan_src0(double %arg0, double %src1) { ; GFX11-NEXT: v_cndmask_b32_e64 v1, v5, 0x7ff80000, vcc_lo ; GFX11-NEXT: s_setpc_b64 s[30:31] ; +; GFX1170-LABEL: v_minimum_f64__nnan_src0: +; GFX1170: ; %bb.0: +; GFX1170-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1170-NEXT: v_add_f64 v[0:1], v[0:1], 1.0 +; GFX1170-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX1170-NEXT: v_minimum_f64 v[0:1], v[0:1], v[2:3] +; GFX1170-NEXT: s_setpc_b64 s[30:31] +; ; GFX12-LABEL: v_minimum_f64__nnan_src0: ; GFX12: ; %bb.0: ; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 @@ -396,6 +428,14 @@ define double @v_minimum_f64__nnan_src1(double %src0, double %arg1) { ; GFX11-NEXT: v_cndmask_b32_e64 v1, v5, 0x7ff80000, vcc_lo ; GFX11-NEXT: s_setpc_b64 s[30:31] ; +; GFX1170-LABEL: v_minimum_f64__nnan_src1: +; GFX1170: ; %bb.0: +; GFX1170-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1170-NEXT: v_add_f64 v[2:3], v[2:3], 1.0 +; GFX1170-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX1170-NEXT: v_minimum_f64 v[0:1], v[0:1], v[2:3] +; GFX1170-NEXT: s_setpc_b64 s[30:31] +; ; GFX12-LABEL: v_minimum_f64__nnan_src1: ; GFX12: ; %bb.0: ; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 @@ -498,6 +538,15 @@ define void @s_minimum_f64(double inreg %src0, double inreg %src1) { ; GFX11-NEXT: ;;#ASMEND ; GFX11-NEXT: s_setpc_b64 s[30:31] ; +; GFX1170-LABEL: s_minimum_f64: +; GFX1170: ; %bb.0: +; GFX1170-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1170-NEXT: v_minimum_f64 v[0:1], s[0:1], s[2:3] +; GFX1170-NEXT: ;;#ASMSTART +; GFX1170-NEXT: ; use v[0:1] +; GFX1170-NEXT: ;;#ASMEND +; GFX1170-NEXT: s_setpc_b64 s[30:31] +; ; GFX12-LABEL: s_minimum_f64: ; GFX12: ; %bb.0: ; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 @@ -601,6 +650,13 @@ define <2 x double> @v_minimum_v2f64(<2 x double> %src0, <2 x double> %src1) { ; GFX11-NEXT: v_cndmask_b32_e64 v3, v5, 0x7ff80000, s0 ; GFX11-NEXT: s_setpc_b64 s[30:31] ; +; GFX1170-LABEL: v_minimum_v2f64: +; GFX1170: ; %bb.0: +; GFX1170-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1170-NEXT: v_minimum_f64 v[0:1], v[0:1], v[4:5] +; GFX1170-NEXT: v_minimum_f64 v[2:3], v[2:3], v[6:7] +; GFX1170-NEXT: s_setpc_b64 s[30:31] +; ; GFX12-LABEL: v_minimum_v2f64: ; GFX12: ; %bb.0: ; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 @@ -651,6 +707,13 @@ define <2 x double> @v_minimum_v2f64__nnan(<2 x double> %src0, <2 x double> %src ; GFX11-NEXT: v_min_f64 v[2:3], v[2:3], v[6:7] ; GFX11-NEXT: s_setpc_b64 s[30:31] ; +; GFX1170-LABEL: v_minimum_v2f64__nnan: +; GFX1170: ; %bb.0: +; GFX1170-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1170-NEXT: v_minimum_f64 v[0:1], v[0:1], v[4:5] +; GFX1170-NEXT: v_minimum_f64 v[2:3], v[2:3], v[6:7] +; GFX1170-NEXT: s_setpc_b64 s[30:31] +; ; GFX12-LABEL: v_minimum_v2f64__nnan: ; GFX12: ; %bb.0: ; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 @@ -751,6 +814,13 @@ define <2 x double> @v_minimum_v2f64__nsz(<2 x double> %src0, <2 x double> %src1 ; GFX11-NEXT: v_cndmask_b32_e64 v3, v5, 0x7ff80000, s0 ; GFX11-NEXT: s_setpc_b64 s[30:31] ; +; GFX1170-LABEL: v_minimum_v2f64__nsz: +; GFX1170: ; %bb.0: +; GFX1170-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1170-NEXT: v_minimum_f64 v[0:1], v[0:1], v[4:5] +; GFX1170-NEXT: v_minimum_f64 v[2:3], v[2:3], v[6:7] +; GFX1170-NEXT: s_setpc_b64 s[30:31] +; ; GFX12-LABEL: v_minimum_v2f64__nsz: ; GFX12: ; %bb.0: ; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 @@ -801,6 +871,13 @@ define <2 x double> @v_minimum_v2f64__nnan_nsz(<2 x double> %src0, <2 x double> ; GFX11-NEXT: v_min_f64 v[2:3], v[2:3], v[6:7] ; GFX11-NEXT: s_setpc_b64 s[30:31] ; +; GFX1170-LABEL: v_minimum_v2f64__nnan_nsz: +; GFX1170: ; %bb.0: +; GFX1170-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1170-NEXT: v_minimum_f64 v[0:1], v[0:1], v[4:5] +; GFX1170-NEXT: v_minimum_f64 v[2:3], v[2:3], v[6:7] +; GFX1170-NEXT: s_setpc_b64 s[30:31] +; ; GFX12-LABEL: v_minimum_v2f64__nnan_nsz: ; GFX12: ; %bb.0: ; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 @@ -932,6 +1009,16 @@ define void @s_minimum_v2f64(<2 x double> inreg %src0, <2 x double> inreg %src1) ; GFX11-NEXT: ;;#ASMEND ; GFX11-NEXT: s_setpc_b64 s[30:31] ; +; GFX1170-LABEL: s_minimum_v2f64: +; GFX1170: ; %bb.0: +; GFX1170-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1170-NEXT: v_minimum_f64 v[2:3], s[2:3], s[18:19] +; GFX1170-NEXT: v_minimum_f64 v[0:1], s[0:1], s[16:17] +; GFX1170-NEXT: ;;#ASMSTART +; GFX1170-NEXT: ; use v[0:3] +; GFX1170-NEXT: ;;#ASMEND +; GFX1170-NEXT: s_setpc_b64 s[30:31] +; ; GFX12-LABEL: s_minimum_v2f64: ; GFX12: ; %bb.0: ; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 @@ -1060,6 +1147,14 @@ define <3 x double> @v_minimum_v3f64(<3 x double> %src0, <3 x double> %src1) { ; GFX11-NEXT: v_cndmask_b32_e64 v5, v9, 0x7ff80000, s1 ; GFX11-NEXT: s_setpc_b64 s[30:31] ; +; GFX1170-LABEL: v_minimum_v3f64: +; GFX1170: ; %bb.0: +; GFX1170-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1170-NEXT: v_minimum_f64 v[0:1], v[0:1], v[6:7] +; GFX1170-NEXT: v_minimum_f64 v[2:3], v[2:3], v[8:9] +; GFX1170-NEXT: v_minimum_f64 v[4:5], v[4:5], v[10:11] +; GFX1170-NEXT: s_setpc_b64 s[30:31] +; ; GFX12-LABEL: v_minimum_v3f64: ; GFX12: ; %bb.0: ; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 @@ -1116,6 +1211,14 @@ define <3 x double> @v_minimum_v3f64__nnan(<3 x double> %src0, <3 x double> %src ; GFX11-NEXT: v_min_f64 v[4:5], v[4:5], v[10:11] ; GFX11-NEXT: s_setpc_b64 s[30:31] ; +; GFX1170-LABEL: v_minimum_v3f64__nnan: +; GFX1170: ; %bb.0: +; GFX1170-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1170-NEXT: v_minimum_f64 v[0:1], v[0:1], v[6:7] +; GFX1170-NEXT: v_minimum_f64 v[2:3], v[2:3], v[8:9] +; GFX1170-NEXT: v_minimum_f64 v[4:5], v[4:5], v[10:11] +; GFX1170-NEXT: s_setpc_b64 s[30:31] +; ; GFX12-LABEL: v_minimum_v3f64__nnan: ; GFX12: ; %bb.0: ; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 @@ -1241,6 +1344,14 @@ define <3 x double> @v_minimum_v3f64__nsz(<3 x double> %src0, <3 x double> %src1 ; GFX11-NEXT: v_cndmask_b32_e64 v5, v9, 0x7ff80000, s1 ; GFX11-NEXT: s_setpc_b64 s[30:31] ; +; GFX1170-LABEL: v_minimum_v3f64__nsz: +; GFX1170: ; %bb.0: +; GFX1170-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1170-NEXT: v_minimum_f64 v[0:1], v[0:1], v[6:7] +; GFX1170-NEXT: v_minimum_f64 v[2:3], v[2:3], v[8:9] +; GFX1170-NEXT: v_minimum_f64 v[4:5], v[4:5], v[10:11] +; GFX1170-NEXT: s_setpc_b64 s[30:31] +; ; GFX12-LABEL: v_minimum_v3f64__nsz: ; GFX12: ; %bb.0: ; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 @@ -1297,6 +1408,14 @@ define <3 x double> @v_minimum_v3f64__nnan_nsz(<3 x double> %src0, <3 x double> ; GFX11-NEXT: v_min_f64 v[4:5], v[4:5], v[10:11] ; GFX11-NEXT: s_setpc_b64 s[30:31] ; +; GFX1170-LABEL: v_minimum_v3f64__nnan_nsz: +; GFX1170: ; %bb.0: +; GFX1170-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1170-NEXT: v_minimum_f64 v[0:1], v[0:1], v[6:7] +; GFX1170-NEXT: v_minimum_f64 v[2:3], v[2:3], v[8:9] +; GFX1170-NEXT: v_minimum_f64 v[4:5], v[4:5], v[10:11] +; GFX1170-NEXT: s_setpc_b64 s[30:31] +; ; GFX12-LABEL: v_minimum_v3f64__nnan_nsz: ; GFX12: ; %bb.0: ; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 @@ -1447,6 +1566,15 @@ define <4 x double> @v_minimum_v4f64(<4 x double> %src0, <4 x double> %src1) { ; GFX11-NEXT: v_cndmask_b32_e64 v7, v13, 0x7ff80000, s2 ; GFX11-NEXT: s_setpc_b64 s[30:31] ; +; GFX1170-LABEL: v_minimum_v4f64: +; GFX1170: ; %bb.0: +; GFX1170-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1170-NEXT: v_minimum_f64 v[0:1], v[0:1], v[8:9] +; GFX1170-NEXT: v_minimum_f64 v[2:3], v[2:3], v[10:11] +; GFX1170-NEXT: v_minimum_f64 v[4:5], v[4:5], v[12:13] +; GFX1170-NEXT: v_minimum_f64 v[6:7], v[6:7], v[14:15] +; GFX1170-NEXT: s_setpc_b64 s[30:31] +; ; GFX12-LABEL: v_minimum_v4f64: ; GFX12: ; %bb.0: ; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 @@ -1509,6 +1637,15 @@ define <4 x double> @v_minimum_v4f64__nnan(<4 x double> %src0, <4 x double> %src ; GFX11-NEXT: v_min_f64 v[6:7], v[6:7], v[14:15] ; GFX11-NEXT: s_setpc_b64 s[30:31] ; +; GFX1170-LABEL: v_minimum_v4f64__nnan: +; GFX1170: ; %bb.0: +; GFX1170-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1170-NEXT: v_minimum_f64 v[0:1], v[0:1], v[8:9] +; GFX1170-NEXT: v_minimum_f64 v[2:3], v[2:3], v[10:11] +; GFX1170-NEXT: v_minimum_f64 v[4:5], v[4:5], v[12:13] +; GFX1170-NEXT: v_minimum_f64 v[6:7], v[6:7], v[14:15] +; GFX1170-NEXT: s_setpc_b64 s[30:31] +; ; GFX12-LABEL: v_minimum_v4f64__nnan: ; GFX12: ; %bb.0: ; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 @@ -1660,6 +1797,15 @@ define <4 x double> @v_minimum_v4f64__nsz(<4 x double> %src0, <4 x double> %src1 ; GFX11-NEXT: v_cndmask_b32_e64 v7, v13, 0x7ff80000, s2 ; GFX11-NEXT: s_setpc_b64 s[30:31] ; +; GFX1170-LABEL: v_minimum_v4f64__nsz: +; GFX1170: ; %bb.0: +; GFX1170-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1170-NEXT: v_minimum_f64 v[0:1], v[0:1], v[8:9] +; GFX1170-NEXT: v_minimum_f64 v[2:3], v[2:3], v[10:11] +; GFX1170-NEXT: v_minimum_f64 v[4:5], v[4:5], v[12:13] +; GFX1170-NEXT: v_minimum_f64 v[6:7], v[6:7], v[14:15] +; GFX1170-NEXT: s_setpc_b64 s[30:31] +; ; GFX12-LABEL: v_minimum_v4f64__nsz: ; GFX12: ; %bb.0: ; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 @@ -1722,6 +1868,15 @@ define <4 x double> @v_minimum_v4f64__nnan_nsz(<4 x double> %src0, <4 x double> ; GFX11-NEXT: v_min_f64 v[6:7], v[6:7], v[14:15] ; GFX11-NEXT: s_setpc_b64 s[30:31] ; +; GFX1170-LABEL: v_minimum_v4f64__nnan_nsz: +; GFX1170: ; %bb.0: +; GFX1170-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1170-NEXT: v_minimum_f64 v[0:1], v[0:1], v[8:9] +; GFX1170-NEXT: v_minimum_f64 v[2:3], v[2:3], v[10:11] +; GFX1170-NEXT: v_minimum_f64 v[4:5], v[4:5], v[12:13] +; GFX1170-NEXT: v_minimum_f64 v[6:7], v[6:7], v[14:15] +; GFX1170-NEXT: s_setpc_b64 s[30:31] +; ; GFX12-LABEL: v_minimum_v4f64__nnan_nsz: ; GFX12: ; %bb.0: ; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 @@ -1982,6 +2137,21 @@ define <8 x double> @v_minimum_v8f64(<8 x double> %src0, <8 x double> %src1) { ; GFX11-NEXT: v_cndmask_b32_e64 v15, v29, 0x7ff80000, s6 ; GFX11-NEXT: s_setpc_b64 s[30:31] ; +; GFX1170-LABEL: v_minimum_v8f64: +; GFX1170: ; %bb.0: +; GFX1170-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1170-NEXT: scratch_load_b32 v31, off, s32 +; GFX1170-NEXT: v_minimum_f64 v[0:1], v[0:1], v[16:17] +; GFX1170-NEXT: v_minimum_f64 v[2:3], v[2:3], v[18:19] +; GFX1170-NEXT: v_minimum_f64 v[4:5], v[4:5], v[20:21] +; GFX1170-NEXT: v_minimum_f64 v[6:7], v[6:7], v[22:23] +; GFX1170-NEXT: v_minimum_f64 v[8:9], v[8:9], v[24:25] +; GFX1170-NEXT: v_minimum_f64 v[10:11], v[10:11], v[26:27] +; GFX1170-NEXT: v_minimum_f64 v[12:13], v[12:13], v[28:29] +; GFX1170-NEXT: s_waitcnt vmcnt(0) +; GFX1170-NEXT: v_minimum_f64 v[14:15], v[14:15], v[30:31] +; GFX1170-NEXT: s_setpc_b64 s[30:31] +; ; GFX12-LABEL: v_minimum_v8f64: ; GFX12: ; %bb.0: ; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 @@ -2757,6 +2927,79 @@ define <16 x double> @v_minimum_v16f64(<16 x double> %src0, <16 x double> %src1) ; GFX11-NEXT: v_cndmask_b32_e64 v31, v85, 0x7ff80000, s14 ; GFX11-NEXT: s_setpc_b64 s[30:31] ; +; GFX1170-LABEL: v_minimum_v16f64: +; GFX1170: ; %bb.0: +; GFX1170-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1170-NEXT: s_clause 0x1b +; GFX1170-NEXT: scratch_load_b32 v33, off, s32 offset:8 +; GFX1170-NEXT: scratch_load_b32 v32, off, s32 offset:4 +; GFX1170-NEXT: scratch_load_b32 v35, off, s32 offset:16 +; GFX1170-NEXT: scratch_load_b32 v34, off, s32 offset:12 +; GFX1170-NEXT: scratch_load_b32 v31, off, s32 +; GFX1170-NEXT: scratch_load_b32 v37, off, s32 offset:120 +; GFX1170-NEXT: scratch_load_b32 v39, off, s32 offset:104 +; GFX1170-NEXT: scratch_load_b32 v49, off, s32 offset:24 +; GFX1170-NEXT: scratch_load_b32 v48, off, s32 offset:20 +; GFX1170-NEXT: scratch_load_b32 v51, off, s32 offset:32 +; GFX1170-NEXT: scratch_load_b32 v50, off, s32 offset:28 +; GFX1170-NEXT: scratch_load_b32 v53, off, s32 offset:40 +; GFX1170-NEXT: scratch_load_b32 v52, off, s32 offset:36 +; GFX1170-NEXT: scratch_load_b32 v55, off, s32 offset:48 +; GFX1170-NEXT: scratch_load_b32 v54, off, s32 offset:44 +; GFX1170-NEXT: scratch_load_b32 v65, off, s32 offset:56 +; GFX1170-NEXT: scratch_load_b32 v64, off, s32 offset:52 +; GFX1170-NEXT: scratch_load_b32 v67, off, s32 offset:64 +; GFX1170-NEXT: scratch_load_b32 v66, off, s32 offset:60 +; GFX1170-NEXT: scratch_load_b32 v69, off, s32 offset:72 +; GFX1170-NEXT: scratch_load_b32 v68, off, s32 offset:68 +; GFX1170-NEXT: scratch_load_b32 v71, off, s32 offset:80 +; GFX1170-NEXT: scratch_load_b32 v70, off, s32 offset:76 +; GFX1170-NEXT: scratch_load_b32 v81, off, s32 offset:88 +; GFX1170-NEXT: scratch_load_b32 v80, off, s32 offset:84 +; GFX1170-NEXT: scratch_load_b32 v83, off, s32 offset:96 +; GFX1170-NEXT: scratch_load_b32 v82, off, s32 offset:92 +; GFX1170-NEXT: scratch_load_b32 v38, off, s32 offset:100 +; GFX1170-NEXT: s_waitcnt vmcnt(26) +; GFX1170-NEXT: v_minimum_f64 v[0:1], v[0:1], v[32:33] +; GFX1170-NEXT: s_clause 0x2 +; GFX1170-NEXT: scratch_load_b32 v33, off, s32 offset:112 +; GFX1170-NEXT: scratch_load_b32 v32, off, s32 offset:108 +; GFX1170-NEXT: scratch_load_b32 v36, off, s32 offset:116 +; GFX1170-NEXT: s_waitcnt vmcnt(27) +; GFX1170-NEXT: v_minimum_f64 v[2:3], v[2:3], v[34:35] +; GFX1170-NEXT: s_clause 0x1 +; GFX1170-NEXT: scratch_load_b32 v35, off, s32 offset:128 +; GFX1170-NEXT: scratch_load_b32 v34, off, s32 offset:124 +; GFX1170-NEXT: s_waitcnt vmcnt(24) +; GFX1170-NEXT: v_minimum_f64 v[4:5], v[4:5], v[48:49] +; GFX1170-NEXT: s_waitcnt vmcnt(22) +; GFX1170-NEXT: v_minimum_f64 v[6:7], v[6:7], v[50:51] +; GFX1170-NEXT: s_waitcnt vmcnt(20) +; GFX1170-NEXT: v_minimum_f64 v[8:9], v[8:9], v[52:53] +; GFX1170-NEXT: s_waitcnt vmcnt(18) +; GFX1170-NEXT: v_minimum_f64 v[10:11], v[10:11], v[54:55] +; GFX1170-NEXT: s_waitcnt vmcnt(16) +; GFX1170-NEXT: v_minimum_f64 v[12:13], v[12:13], v[64:65] +; GFX1170-NEXT: s_waitcnt vmcnt(14) +; GFX1170-NEXT: v_minimum_f64 v[14:15], v[14:15], v[66:67] +; GFX1170-NEXT: s_waitcnt vmcnt(12) +; GFX1170-NEXT: v_minimum_f64 v[16:17], v[16:17], v[68:69] +; GFX1170-NEXT: s_waitcnt vmcnt(10) +; GFX1170-NEXT: v_minimum_f64 v[18:19], v[18:19], v[70:71] +; GFX1170-NEXT: s_waitcnt vmcnt(8) +; GFX1170-NEXT: v_minimum_f64 v[20:21], v[20:21], v[80:81] +; GFX1170-NEXT: s_waitcnt vmcnt(6) +; GFX1170-NEXT: v_minimum_f64 v[22:23], v[22:23], v[82:83] +; GFX1170-NEXT: s_waitcnt vmcnt(5) +; GFX1170-NEXT: v_minimum_f64 v[24:25], v[24:25], v[38:39] +; GFX1170-NEXT: s_waitcnt vmcnt(3) +; GFX1170-NEXT: v_minimum_f64 v[26:27], v[26:27], v[32:33] +; GFX1170-NEXT: s_waitcnt vmcnt(2) +; GFX1170-NEXT: v_minimum_f64 v[28:29], v[28:29], v[36:37] +; GFX1170-NEXT: s_waitcnt vmcnt(0) +; GFX1170-NEXT: v_minimum_f64 v[30:31], v[30:31], v[34:35] +; GFX1170-NEXT: s_setpc_b64 s[30:31] +; ; GFX12-LABEL: v_minimum_v16f64: ; GFX12: ; %bb.0: ; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 @@ -2834,5 +3077,3 @@ define <16 x double> @v_minimum_v16f64(<16 x double> %src0, <16 x double> %src1) %op = call <16 x double> @llvm.minimum.v16f64(<16 x double> %src0, <16 x double> %src1) ret <16 x double> %op } -;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line: -; GCN: {{.*}} diff --git a/llvm/test/CodeGen/AMDGPU/maximumnum.ll b/llvm/test/CodeGen/AMDGPU/maximumnum.ll index 806d941ac873..54262139da7c 100644 --- a/llvm/test/CodeGen/AMDGPU/maximumnum.ll +++ b/llvm/test/CodeGen/AMDGPU/maximumnum.ll @@ -20,6 +20,12 @@ ; RUN: llc -global-isel=0 -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1100 -mattr=-real-true16 < %s | FileCheck -check-prefixes=GFX11,GFX11-FAKE16,GFX11-SDAG,GFX11-FAKE16-SDAG %s ; RUN: llc -global-isel -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1100 -mattr=-real-true16 < %s | FileCheck -check-prefixes=GFX11,GFX11-FAKE16,GFX11-GISEL,GFX11-FAKE16-GISEL %s +; RUN: llc -global-isel=0 -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1170 -mattr=+real-true16 < %s | FileCheck -check-prefixes=GFX1170,GFX1170-TRUE16,GFX1170-SDAG,GFX1170-TRUE16-SDAG %s +; RUN: llc -global-isel -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1170 -mattr=+real-true16 < %s | FileCheck -check-prefixes=GFX1170,GFX1170-TRUE16,GFX1170-GISEL,GFX1170-TRUE16-GISEL %s + +; RUN: llc -global-isel=0 -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1170 -mattr=-real-true16 < %s | FileCheck -check-prefixes=GFX1170,GFX1170-FAKE16,GFX1170-SDAG,GFX1170-FAKE16-SDAG %s +; RUN: llc -global-isel -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1170 -mattr=-real-true16 < %s | FileCheck -check-prefixes=GFX1170,GFX1170-FAKE16,GFX1170-GISEL,GFX1170-FAKE16-GISEL %s + ; RUN: llc -global-isel=0 -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1200 -mattr=+real-true16 < %s | FileCheck -check-prefixes=GFX12,GFX12-TRUE16,GFX12-SDAG,GFX12-TRUE16-SDAG %s ; RUN: llc -global-isel -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1200 -mattr=+real-true16 < %s | FileCheck -check-prefixes=GFX12,GFX12-TRUE16,GFX12-GISEL,GFX12-TRUE16-GISEL %s @@ -129,6 +135,42 @@ define half @v_maximumnum_f16(half %x, half %y) { ; GFX11-FAKE16-GISEL-NEXT: v_max_f16_e32 v0, v0, v1 ; GFX11-FAKE16-GISEL-NEXT: s_setpc_b64 s[30:31] ; +; GFX1170-TRUE16-SDAG-LABEL: v_maximumnum_f16: +; GFX1170-TRUE16-SDAG: ; %bb.0: +; GFX1170-TRUE16-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1170-TRUE16-SDAG-NEXT: v_max_num_f16_e32 v0.h, v1.l, v1.l +; GFX1170-TRUE16-SDAG-NEXT: v_max_num_f16_e32 v0.l, v0.l, v0.l +; GFX1170-TRUE16-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX1170-TRUE16-SDAG-NEXT: v_max_num_f16_e32 v0.l, v0.l, v0.h +; GFX1170-TRUE16-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX1170-TRUE16-GISEL-LABEL: v_maximumnum_f16: +; GFX1170-TRUE16-GISEL: ; %bb.0: +; GFX1170-TRUE16-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1170-TRUE16-GISEL-NEXT: v_max_num_f16_e32 v0.l, v0.l, v0.l +; GFX1170-TRUE16-GISEL-NEXT: v_max_num_f16_e32 v0.h, v1.l, v1.l +; GFX1170-TRUE16-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX1170-TRUE16-GISEL-NEXT: v_max_num_f16_e32 v0.l, v0.l, v0.h +; GFX1170-TRUE16-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX1170-FAKE16-SDAG-LABEL: v_maximumnum_f16: +; GFX1170-FAKE16-SDAG: ; %bb.0: +; GFX1170-FAKE16-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1170-FAKE16-SDAG-NEXT: v_max_num_f16_e32 v1, v1, v1 +; GFX1170-FAKE16-SDAG-NEXT: v_max_num_f16_e32 v0, v0, v0 +; GFX1170-FAKE16-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX1170-FAKE16-SDAG-NEXT: v_max_num_f16_e32 v0, v0, v1 +; GFX1170-FAKE16-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX1170-FAKE16-GISEL-LABEL: v_maximumnum_f16: +; GFX1170-FAKE16-GISEL: ; %bb.0: +; GFX1170-FAKE16-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1170-FAKE16-GISEL-NEXT: v_max_num_f16_e32 v0, v0, v0 +; GFX1170-FAKE16-GISEL-NEXT: v_max_num_f16_e32 v1, v1, v1 +; GFX1170-FAKE16-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX1170-FAKE16-GISEL-NEXT: v_max_num_f16_e32 v0, v0, v1 +; GFX1170-FAKE16-GISEL-NEXT: s_setpc_b64 s[30:31] +; ; GFX12-TRUE16-SDAG-LABEL: v_maximumnum_f16: ; GFX12-TRUE16-SDAG: ; %bb.0: ; GFX12-TRUE16-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0 @@ -233,6 +275,18 @@ define half @v_maximumnum_f16_nnan(half %x, half %y) { ; GFX11-FAKE16-NEXT: v_max_f16_e32 v0, v0, v1 ; GFX11-FAKE16-NEXT: s_setpc_b64 s[30:31] ; +; GFX1170-TRUE16-LABEL: v_maximumnum_f16_nnan: +; GFX1170-TRUE16: ; %bb.0: +; GFX1170-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1170-TRUE16-NEXT: v_max_num_f16_e32 v0.l, v0.l, v1.l +; GFX1170-TRUE16-NEXT: s_setpc_b64 s[30:31] +; +; GFX1170-FAKE16-LABEL: v_maximumnum_f16_nnan: +; GFX1170-FAKE16: ; %bb.0: +; GFX1170-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1170-FAKE16-NEXT: v_max_num_f16_e32 v0, v0, v1 +; GFX1170-FAKE16-NEXT: s_setpc_b64 s[30:31] +; ; GFX12-TRUE16-LABEL: v_maximumnum_f16_nnan: ; GFX12-TRUE16: ; %bb.0: ; GFX12-TRUE16-NEXT: s_wait_loadcnt_dscnt 0x0 @@ -302,6 +356,22 @@ define half @v_maximumnum_f16_1.0(half %x) { ; GFX11-FAKE16-NEXT: v_max_f16_e32 v0, 1.0, v0 ; GFX11-FAKE16-NEXT: s_setpc_b64 s[30:31] ; +; GFX1170-TRUE16-LABEL: v_maximumnum_f16_1.0: +; GFX1170-TRUE16: ; %bb.0: +; GFX1170-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1170-TRUE16-NEXT: v_max_num_f16_e32 v0.l, v0.l, v0.l +; GFX1170-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX1170-TRUE16-NEXT: v_max_num_f16_e32 v0.l, 1.0, v0.l +; GFX1170-TRUE16-NEXT: s_setpc_b64 s[30:31] +; +; GFX1170-FAKE16-LABEL: v_maximumnum_f16_1.0: +; GFX1170-FAKE16: ; %bb.0: +; GFX1170-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1170-FAKE16-NEXT: v_max_num_f16_e32 v0, v0, v0 +; GFX1170-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX1170-FAKE16-NEXT: v_max_num_f16_e32 v0, 1.0, v0 +; GFX1170-FAKE16-NEXT: s_setpc_b64 s[30:31] +; ; GFX12-TRUE16-LABEL: v_maximumnum_f16_1.0: ; GFX12-TRUE16: ; %bb.0: ; GFX12-TRUE16-NEXT: s_wait_loadcnt_dscnt 0x0 @@ -410,6 +480,22 @@ define float @v_maximumnum_f32(float %x, float %y) { ; GFX11-GISEL-NEXT: v_max_f32_e32 v0, v0, v1 ; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31] ; +; GFX1170-SDAG-LABEL: v_maximumnum_f32: +; GFX1170-SDAG: ; %bb.0: +; GFX1170-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1170-SDAG-NEXT: v_dual_max_num_f32 v1, v1, v1 :: v_dual_max_num_f32 v0, v0, v0 +; GFX1170-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX1170-SDAG-NEXT: v_max_num_f32_e32 v0, v0, v1 +; GFX1170-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX1170-GISEL-LABEL: v_maximumnum_f32: +; GFX1170-GISEL: ; %bb.0: +; GFX1170-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1170-GISEL-NEXT: v_dual_max_num_f32 v0, v0, v0 :: v_dual_max_num_f32 v1, v1, v1 +; GFX1170-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX1170-GISEL-NEXT: v_max_num_f32_e32 v0, v0, v1 +; GFX1170-GISEL-NEXT: s_setpc_b64 s[30:31] +; ; GFX12-SDAG-LABEL: v_maximumnum_f32: ; GFX12-SDAG: ; %bb.0: ; GFX12-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0 @@ -468,6 +554,12 @@ define float @v_maximumnum_f32_nnan(float %x, float %y) { ; GFX11-NEXT: v_max_f32_e32 v0, v0, v1 ; GFX11-NEXT: s_setpc_b64 s[30:31] ; +; GFX1170-LABEL: v_maximumnum_f32_nnan: +; GFX1170: ; %bb.0: +; GFX1170-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1170-NEXT: v_max_num_f32_e32 v0, v0, v1 +; GFX1170-NEXT: s_setpc_b64 s[30:31] +; ; GFX12-LABEL: v_maximumnum_f32_nnan: ; GFX12: ; %bb.0: ; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 @@ -564,6 +656,24 @@ define double @v_maximumnum_f64(double %x, double %y) { ; GFX11-GISEL-NEXT: v_max_f64 v[0:1], v[0:1], v[2:3] ; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31] ; +; GFX1170-SDAG-LABEL: v_maximumnum_f64: +; GFX1170-SDAG: ; %bb.0: +; GFX1170-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1170-SDAG-NEXT: v_max_num_f64 v[2:3], v[2:3], v[2:3] +; GFX1170-SDAG-NEXT: v_max_num_f64 v[0:1], v[0:1], v[0:1] +; GFX1170-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX1170-SDAG-NEXT: v_max_num_f64 v[0:1], v[0:1], v[2:3] +; GFX1170-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX1170-GISEL-LABEL: v_maximumnum_f64: +; GFX1170-GISEL: ; %bb.0: +; GFX1170-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1170-GISEL-NEXT: v_max_num_f64 v[0:1], v[0:1], v[0:1] +; GFX1170-GISEL-NEXT: v_max_num_f64 v[2:3], v[2:3], v[2:3] +; GFX1170-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX1170-GISEL-NEXT: v_max_num_f64 v[0:1], v[0:1], v[2:3] +; GFX1170-GISEL-NEXT: s_setpc_b64 s[30:31] +; ; GFX12-SDAG-LABEL: v_maximumnum_f64: ; GFX12-SDAG: ; %bb.0: ; GFX12-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0 @@ -624,6 +734,12 @@ define double @v_maximumnum_f64_nnan(double %x, double %y) { ; GFX11-NEXT: v_max_f64 v[0:1], v[0:1], v[2:3] ; GFX11-NEXT: s_setpc_b64 s[30:31] ; +; GFX1170-LABEL: v_maximumnum_f64_nnan: +; GFX1170: ; %bb.0: +; GFX1170-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1170-NEXT: v_max_num_f64 v[0:1], v[0:1], v[2:3] +; GFX1170-NEXT: s_setpc_b64 s[30:31] +; ; GFX12-LABEL: v_maximumnum_f64_nnan: ; GFX12: ; %bb.0: ; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 @@ -674,6 +790,14 @@ define float @v_maximumnum_f32_1.0(float %x) { ; GFX11-NEXT: v_max_f32_e32 v0, 1.0, v0 ; GFX11-NEXT: s_setpc_b64 s[30:31] ; +; GFX1170-LABEL: v_maximumnum_f32_1.0: +; GFX1170: ; %bb.0: +; GFX1170-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1170-NEXT: v_max_num_f32_e32 v0, v0, v0 +; GFX1170-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX1170-NEXT: v_max_num_f32_e32 v0, 1.0, v0 +; GFX1170-NEXT: s_setpc_b64 s[30:31] +; ; GFX12-LABEL: v_maximumnum_f32_1.0: ; GFX12: ; %bb.0: ; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 @@ -730,6 +854,14 @@ define float @v_maximumnum_f32_rhs_not_snan(float %x, float %y) { ; GFX11-NEXT: v_max_f32_e32 v0, v0, v1 ; GFX11-NEXT: s_setpc_b64 s[30:31] ; +; GFX1170-LABEL: v_maximumnum_f32_rhs_not_snan: +; GFX1170: ; %bb.0: +; GFX1170-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1170-NEXT: v_dual_max_num_f32 v1, v1, v1 :: v_dual_max_num_f32 v0, v0, v0 +; GFX1170-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX1170-NEXT: v_max_num_f32_e32 v0, v0, v1 +; GFX1170-NEXT: s_setpc_b64 s[30:31] +; ; GFX12-LABEL: v_maximumnum_f32_rhs_not_snan: ; GFX12: ; %bb.0: ; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 @@ -787,6 +919,14 @@ define float @v_maximumnum_f32_lhs_not_snan(float %x, float %y) { ; GFX11-NEXT: v_max_f32_e32 v0, v0, v1 ; GFX11-NEXT: s_setpc_b64 s[30:31] ; +; GFX1170-LABEL: v_maximumnum_f32_lhs_not_snan: +; GFX1170: ; %bb.0: +; GFX1170-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1170-NEXT: v_dual_max_num_f32 v0, v0, v0 :: v_dual_max_num_f32 v1, v1, v1 +; GFX1170-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX1170-NEXT: v_max_num_f32_e32 v0, v0, v1 +; GFX1170-NEXT: s_setpc_b64 s[30:31] +; ; GFX12-LABEL: v_maximumnum_f32_lhs_not_snan: ; GFX12: ; %bb.0: ; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 @@ -844,6 +984,14 @@ define float @v_maximumnum_f32_both_operands_not_snan(float %x, float %y) { ; GFX11-NEXT: v_max_f32_e32 v0, v0, v1 ; GFX11-NEXT: s_setpc_b64 s[30:31] ; +; GFX1170-LABEL: v_maximumnum_f32_both_operands_not_snan: +; GFX1170: ; %bb.0: +; GFX1170-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1170-NEXT: v_dual_max_num_f32 v0, v0, v0 :: v_dual_max_num_f32 v1, v1, v1 +; GFX1170-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX1170-NEXT: v_max_num_f32_e32 v0, v0, v1 +; GFX1170-NEXT: s_setpc_b64 s[30:31] +; ; GFX12-LABEL: v_maximumnum_f32_both_operands_not_snan: ; GFX12: ; %bb.0: ; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 @@ -898,6 +1046,14 @@ define double @v_maximumnum_f64_1.0(double %x) { ; GFX11-NEXT: v_max_f64 v[0:1], v[0:1], 1.0 ; GFX11-NEXT: s_setpc_b64 s[30:31] ; +; GFX1170-LABEL: v_maximumnum_f64_1.0: +; GFX1170: ; %bb.0: +; GFX1170-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1170-NEXT: v_max_num_f64 v[0:1], v[0:1], v[0:1] +; GFX1170-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX1170-NEXT: v_max_num_f64 v[0:1], v[0:1], 1.0 +; GFX1170-NEXT: s_setpc_b64 s[30:31] +; ; GFX12-LABEL: v_maximumnum_f64_1.0: ; GFX12: ; %bb.0: ; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 @@ -1032,6 +1188,42 @@ define half @v_maximumnum_f16_s_v(half inreg %x, half %y) { ; GFX11-FAKE16-GISEL-NEXT: v_max_f16_e32 v0, v1, v0 ; GFX11-FAKE16-GISEL-NEXT: s_setpc_b64 s[30:31] ; +; GFX1170-TRUE16-SDAG-LABEL: v_maximumnum_f16_s_v: +; GFX1170-TRUE16-SDAG: ; %bb.0: +; GFX1170-TRUE16-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1170-TRUE16-SDAG-NEXT: v_max_num_f16_e32 v0.l, v0.l, v0.l +; GFX1170-TRUE16-SDAG-NEXT: v_max_num_f16_e64 v0.h, s0, s0 +; GFX1170-TRUE16-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX1170-TRUE16-SDAG-NEXT: v_max_num_f16_e32 v0.l, v0.h, v0.l +; GFX1170-TRUE16-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX1170-TRUE16-GISEL-LABEL: v_maximumnum_f16_s_v: +; GFX1170-TRUE16-GISEL: ; %bb.0: +; GFX1170-TRUE16-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1170-TRUE16-GISEL-NEXT: v_max_num_f16_e64 v0.h, s0, s0 +; GFX1170-TRUE16-GISEL-NEXT: v_max_num_f16_e32 v0.l, v0.l, v0.l +; GFX1170-TRUE16-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX1170-TRUE16-GISEL-NEXT: v_max_num_f16_e32 v0.l, v0.h, v0.l +; GFX1170-TRUE16-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX1170-FAKE16-SDAG-LABEL: v_maximumnum_f16_s_v: +; GFX1170-FAKE16-SDAG: ; %bb.0: +; GFX1170-FAKE16-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1170-FAKE16-SDAG-NEXT: v_max_num_f16_e32 v0, v0, v0 +; GFX1170-FAKE16-SDAG-NEXT: v_max_num_f16_e64 v1, s0, s0 +; GFX1170-FAKE16-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX1170-FAKE16-SDAG-NEXT: v_max_num_f16_e32 v0, v1, v0 +; GFX1170-FAKE16-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX1170-FAKE16-GISEL-LABEL: v_maximumnum_f16_s_v: +; GFX1170-FAKE16-GISEL: ; %bb.0: +; GFX1170-FAKE16-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1170-FAKE16-GISEL-NEXT: v_max_num_f16_e64 v1, s0, s0 +; GFX1170-FAKE16-GISEL-NEXT: v_max_num_f16_e32 v0, v0, v0 +; GFX1170-FAKE16-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX1170-FAKE16-GISEL-NEXT: v_max_num_f16_e32 v0, v1, v0 +; GFX1170-FAKE16-GISEL-NEXT: s_setpc_b64 s[30:31] +; ; GFX12-TRUE16-SDAG-LABEL: v_maximumnum_f16_s_v: ; GFX12-TRUE16-SDAG: ; %bb.0: ; GFX12-TRUE16-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0 @@ -1206,6 +1398,42 @@ define half @v_maximumnum_f16_v_s(half %x, half inreg %y) { ; GFX11-FAKE16-GISEL-NEXT: v_max_f16_e32 v0, v0, v1 ; GFX11-FAKE16-GISEL-NEXT: s_setpc_b64 s[30:31] ; +; GFX1170-TRUE16-SDAG-LABEL: v_maximumnum_f16_v_s: +; GFX1170-TRUE16-SDAG: ; %bb.0: +; GFX1170-TRUE16-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1170-TRUE16-SDAG-NEXT: v_max_num_f16_e64 v0.h, s0, s0 +; GFX1170-TRUE16-SDAG-NEXT: v_max_num_f16_e32 v0.l, v0.l, v0.l +; GFX1170-TRUE16-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX1170-TRUE16-SDAG-NEXT: v_max_num_f16_e32 v0.l, v0.l, v0.h +; GFX1170-TRUE16-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX1170-TRUE16-GISEL-LABEL: v_maximumnum_f16_v_s: +; GFX1170-TRUE16-GISEL: ; %bb.0: +; GFX1170-TRUE16-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1170-TRUE16-GISEL-NEXT: v_max_num_f16_e32 v0.l, v0.l, v0.l +; GFX1170-TRUE16-GISEL-NEXT: v_max_num_f16_e64 v0.h, s0, s0 +; GFX1170-TRUE16-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX1170-TRUE16-GISEL-NEXT: v_max_num_f16_e32 v0.l, v0.l, v0.h +; GFX1170-TRUE16-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX1170-FAKE16-SDAG-LABEL: v_maximumnum_f16_v_s: +; GFX1170-FAKE16-SDAG: ; %bb.0: +; GFX1170-FAKE16-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1170-FAKE16-SDAG-NEXT: v_max_num_f16_e64 v1, s0, s0 +; GFX1170-FAKE16-SDAG-NEXT: v_max_num_f16_e32 v0, v0, v0 +; GFX1170-FAKE16-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX1170-FAKE16-SDAG-NEXT: v_max_num_f16_e32 v0, v0, v1 +; GFX1170-FAKE16-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX1170-FAKE16-GISEL-LABEL: v_maximumnum_f16_v_s: +; GFX1170-FAKE16-GISEL: ; %bb.0: +; GFX1170-FAKE16-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1170-FAKE16-GISEL-NEXT: v_max_num_f16_e32 v0, v0, v0 +; GFX1170-FAKE16-GISEL-NEXT: v_max_num_f16_e64 v1, s0, s0 +; GFX1170-FAKE16-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX1170-FAKE16-GISEL-NEXT: v_max_num_f16_e32 v0, v0, v1 +; GFX1170-FAKE16-GISEL-NEXT: s_setpc_b64 s[30:31] +; ; GFX12-TRUE16-SDAG-LABEL: v_maximumnum_f16_v_s: ; GFX12-TRUE16-SDAG: ; %bb.0: ; GFX12-TRUE16-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0 @@ -1380,6 +1608,42 @@ define half @v_maximumnum_f16_s_s(half inreg %x, half inreg %y) { ; GFX11-FAKE16-GISEL-NEXT: v_max_f16_e32 v0, v0, v1 ; GFX11-FAKE16-GISEL-NEXT: s_setpc_b64 s[30:31] ; +; GFX1170-TRUE16-SDAG-LABEL: v_maximumnum_f16_s_s: +; GFX1170-TRUE16-SDAG: ; %bb.0: +; GFX1170-TRUE16-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1170-TRUE16-SDAG-NEXT: v_max_num_f16_e64 v0.l, s1, s1 +; GFX1170-TRUE16-SDAG-NEXT: v_max_num_f16_e64 v0.h, s0, s0 +; GFX1170-TRUE16-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX1170-TRUE16-SDAG-NEXT: v_max_num_f16_e32 v0.l, v0.h, v0.l +; GFX1170-TRUE16-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX1170-TRUE16-GISEL-LABEL: v_maximumnum_f16_s_s: +; GFX1170-TRUE16-GISEL: ; %bb.0: +; GFX1170-TRUE16-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1170-TRUE16-GISEL-NEXT: v_max_num_f16_e64 v0.l, s0, s0 +; GFX1170-TRUE16-GISEL-NEXT: v_max_num_f16_e64 v0.h, s1, s1 +; GFX1170-TRUE16-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX1170-TRUE16-GISEL-NEXT: v_max_num_f16_e32 v0.l, v0.l, v0.h +; GFX1170-TRUE16-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX1170-FAKE16-SDAG-LABEL: v_maximumnum_f16_s_s: +; GFX1170-FAKE16-SDAG: ; %bb.0: +; GFX1170-FAKE16-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1170-FAKE16-SDAG-NEXT: v_max_num_f16_e64 v0, s1, s1 +; GFX1170-FAKE16-SDAG-NEXT: v_max_num_f16_e64 v1, s0, s0 +; GFX1170-FAKE16-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX1170-FAKE16-SDAG-NEXT: v_max_num_f16_e32 v0, v1, v0 +; GFX1170-FAKE16-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX1170-FAKE16-GISEL-LABEL: v_maximumnum_f16_s_s: +; GFX1170-FAKE16-GISEL: ; %bb.0: +; GFX1170-FAKE16-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1170-FAKE16-GISEL-NEXT: v_max_num_f16_e64 v0, s0, s0 +; GFX1170-FAKE16-GISEL-NEXT: v_max_num_f16_e64 v1, s1, s1 +; GFX1170-FAKE16-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX1170-FAKE16-GISEL-NEXT: v_max_num_f16_e32 v0, v0, v1 +; GFX1170-FAKE16-GISEL-NEXT: s_setpc_b64 s[30:31] +; ; GFX12-TRUE16-SDAG-LABEL: v_maximumnum_f16_s_s: ; GFX12-TRUE16-SDAG: ; %bb.0: ; GFX12-TRUE16-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0 @@ -1534,6 +1798,24 @@ define float @v_maximumnum_f32_s_v(float inreg %x, float %y) { ; GFX11-GISEL-NEXT: v_max_f32_e32 v0, v1, v0 ; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31] ; +; GFX1170-SDAG-LABEL: v_maximumnum_f32_s_v: +; GFX1170-SDAG: ; %bb.0: +; GFX1170-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1170-SDAG-NEXT: v_max_num_f32_e32 v0, v0, v0 +; GFX1170-SDAG-NEXT: v_max_num_f32_e64 v1, s0, s0 +; GFX1170-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX1170-SDAG-NEXT: v_max_num_f32_e32 v0, v1, v0 +; GFX1170-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX1170-GISEL-LABEL: v_maximumnum_f32_s_v: +; GFX1170-GISEL: ; %bb.0: +; GFX1170-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1170-GISEL-NEXT: v_max_num_f32_e64 v1, s0, s0 +; GFX1170-GISEL-NEXT: v_max_num_f32_e32 v0, v0, v0 +; GFX1170-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX1170-GISEL-NEXT: v_max_num_f32_e32 v0, v1, v0 +; GFX1170-GISEL-NEXT: s_setpc_b64 s[30:31] +; ; GFX12-SDAG-LABEL: v_maximumnum_f32_s_v: ; GFX12-SDAG: ; %bb.0: ; GFX12-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0 @@ -1662,6 +1944,24 @@ define float @v_maximumnum_f32_v_s(float %x, float inreg %y) { ; GFX11-GISEL-NEXT: v_max_f32_e32 v0, v0, v1 ; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31] ; +; GFX1170-SDAG-LABEL: v_maximumnum_f32_v_s: +; GFX1170-SDAG: ; %bb.0: +; GFX1170-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1170-SDAG-NEXT: v_max_num_f32_e64 v1, s0, s0 +; GFX1170-SDAG-NEXT: v_max_num_f32_e32 v0, v0, v0 +; GFX1170-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX1170-SDAG-NEXT: v_max_num_f32_e32 v0, v0, v1 +; GFX1170-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX1170-GISEL-LABEL: v_maximumnum_f32_v_s: +; GFX1170-GISEL: ; %bb.0: +; GFX1170-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1170-GISEL-NEXT: v_max_num_f32_e32 v0, v0, v0 +; GFX1170-GISEL-NEXT: v_max_num_f32_e64 v1, s0, s0 +; GFX1170-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX1170-GISEL-NEXT: v_max_num_f32_e32 v0, v0, v1 +; GFX1170-GISEL-NEXT: s_setpc_b64 s[30:31] +; ; GFX12-SDAG-LABEL: v_maximumnum_f32_v_s: ; GFX12-SDAG: ; %bb.0: ; GFX12-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0 @@ -1790,6 +2090,24 @@ define float @v_maximumnum_f32_s_s(float inreg %x, float inreg %y) { ; GFX11-GISEL-NEXT: v_max_f32_e32 v0, v0, v1 ; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31] ; +; GFX1170-SDAG-LABEL: v_maximumnum_f32_s_s: +; GFX1170-SDAG: ; %bb.0: +; GFX1170-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1170-SDAG-NEXT: v_max_num_f32_e64 v0, s1, s1 +; GFX1170-SDAG-NEXT: v_max_num_f32_e64 v1, s0, s0 +; GFX1170-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX1170-SDAG-NEXT: v_max_num_f32_e32 v0, v1, v0 +; GFX1170-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX1170-GISEL-LABEL: v_maximumnum_f32_s_s: +; GFX1170-GISEL: ; %bb.0: +; GFX1170-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1170-GISEL-NEXT: v_max_num_f32_e64 v0, s0, s0 +; GFX1170-GISEL-NEXT: v_max_num_f32_e64 v1, s1, s1 +; GFX1170-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX1170-GISEL-NEXT: v_max_num_f32_e32 v0, v0, v1 +; GFX1170-GISEL-NEXT: s_setpc_b64 s[30:31] +; ; GFX12-SDAG-LABEL: v_maximumnum_f32_s_s: ; GFX12-SDAG: ; %bb.0: ; GFX12-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0 @@ -1901,6 +2219,15 @@ define double @v_maximumnum_f64_s_v(double inreg %x, double %y) { ; GFX11-NEXT: v_max_f64 v[0:1], v[2:3], v[0:1] ; GFX11-NEXT: s_setpc_b64 s[30:31] ; +; GFX1170-LABEL: v_maximumnum_f64_s_v: +; GFX1170: ; %bb.0: +; GFX1170-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1170-NEXT: v_max_num_f64 v[2:3], s[0:1], s[0:1] +; GFX1170-NEXT: v_max_num_f64 v[0:1], v[0:1], v[0:1] +; GFX1170-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX1170-NEXT: v_max_num_f64 v[0:1], v[2:3], v[0:1] +; GFX1170-NEXT: s_setpc_b64 s[30:31] +; ; GFX12-LABEL: v_maximumnum_f64_s_v: ; GFX12: ; %bb.0: ; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 @@ -1999,6 +2326,15 @@ define double @v_maximumnum_f64_v_s(double %x, double inreg %y) { ; GFX11-NEXT: v_max_f64 v[0:1], v[0:1], v[2:3] ; GFX11-NEXT: s_setpc_b64 s[30:31] ; +; GFX1170-LABEL: v_maximumnum_f64_v_s: +; GFX1170: ; %bb.0: +; GFX1170-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1170-NEXT: v_max_num_f64 v[2:3], s[0:1], s[0:1] +; GFX1170-NEXT: v_max_num_f64 v[0:1], v[0:1], v[0:1] +; GFX1170-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX1170-NEXT: v_max_num_f64 v[0:1], v[0:1], v[2:3] +; GFX1170-NEXT: s_setpc_b64 s[30:31] +; ; GFX12-LABEL: v_maximumnum_f64_v_s: ; GFX12: ; %bb.0: ; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 @@ -2114,6 +2450,24 @@ define double @v_maximumnum_f64_s_s(double inreg %x, double inreg %y) { ; GFX11-GISEL-NEXT: v_max_f64 v[0:1], v[0:1], v[2:3] ; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31] ; +; GFX1170-SDAG-LABEL: v_maximumnum_f64_s_s: +; GFX1170-SDAG: ; %bb.0: +; GFX1170-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1170-SDAG-NEXT: v_max_num_f64 v[0:1], s[2:3], s[2:3] +; GFX1170-SDAG-NEXT: v_max_num_f64 v[2:3], s[0:1], s[0:1] +; GFX1170-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX1170-SDAG-NEXT: v_max_num_f64 v[0:1], v[2:3], v[0:1] +; GFX1170-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX1170-GISEL-LABEL: v_maximumnum_f64_s_s: +; GFX1170-GISEL: ; %bb.0: +; GFX1170-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1170-GISEL-NEXT: v_max_num_f64 v[0:1], s[0:1], s[0:1] +; GFX1170-GISEL-NEXT: v_max_num_f64 v[2:3], s[2:3], s[2:3] +; GFX1170-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX1170-GISEL-NEXT: v_max_num_f64 v[0:1], v[0:1], v[2:3] +; GFX1170-GISEL-NEXT: s_setpc_b64 s[30:31] +; ; GFX12-SDAG-LABEL: v_maximumnum_f64_s_s: ; GFX12-SDAG: ; %bb.0: ; GFX12-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0 @@ -2226,6 +2580,24 @@ define float @v_maximumnum_f32_fabs_rhs(float %x, float %y) { ; GFX11-GISEL-NEXT: v_max_f32_e32 v0, v0, v1 ; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31] ; +; GFX1170-SDAG-LABEL: v_maximumnum_f32_fabs_rhs: +; GFX1170-SDAG: ; %bb.0: +; GFX1170-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1170-SDAG-NEXT: v_max_num_f32_e64 v1, |v1|, |v1| +; GFX1170-SDAG-NEXT: v_max_num_f32_e32 v0, v0, v0 +; GFX1170-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX1170-SDAG-NEXT: v_max_num_f32_e32 v0, v0, v1 +; GFX1170-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX1170-GISEL-LABEL: v_maximumnum_f32_fabs_rhs: +; GFX1170-GISEL: ; %bb.0: +; GFX1170-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1170-GISEL-NEXT: v_max_num_f32_e32 v0, v0, v0 +; GFX1170-GISEL-NEXT: v_max_num_f32_e64 v1, |v1|, |v1| +; GFX1170-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX1170-GISEL-NEXT: v_max_num_f32_e32 v0, v0, v1 +; GFX1170-GISEL-NEXT: s_setpc_b64 s[30:31] +; ; GFX12-SDAG-LABEL: v_maximumnum_f32_fabs_rhs: ; GFX12-SDAG: ; %bb.0: ; GFX12-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0 @@ -2339,6 +2711,24 @@ define float @v_maximumnum_f32_fneg_fabs_rhs(float %x, float %y) { ; GFX11-GISEL-NEXT: v_max_f32_e32 v0, v0, v1 ; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31] ; +; GFX1170-SDAG-LABEL: v_maximumnum_f32_fneg_fabs_rhs: +; GFX1170-SDAG: ; %bb.0: +; GFX1170-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1170-SDAG-NEXT: v_max_num_f32_e64 v1, -|v1|, -|v1| +; GFX1170-SDAG-NEXT: v_max_num_f32_e32 v0, v0, v0 +; GFX1170-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX1170-SDAG-NEXT: v_max_num_f32_e32 v0, v0, v1 +; GFX1170-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX1170-GISEL-LABEL: v_maximumnum_f32_fneg_fabs_rhs: +; GFX1170-GISEL: ; %bb.0: +; GFX1170-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1170-GISEL-NEXT: v_max_num_f32_e32 v0, v0, v0 +; GFX1170-GISEL-NEXT: v_max_num_f32_e64 v1, -|v1|, -|v1| +; GFX1170-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX1170-GISEL-NEXT: v_max_num_f32_e32 v0, v0, v1 +; GFX1170-GISEL-NEXT: s_setpc_b64 s[30:31] +; ; GFX12-SDAG-LABEL: v_maximumnum_f32_fneg_fabs_rhs: ; GFX12-SDAG: ; %bb.0: ; GFX12-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0 @@ -2453,6 +2843,24 @@ define float @v_maximumnum_f32_fabs(float %x, float %y) { ; GFX11-GISEL-NEXT: v_max_f32_e32 v0, v0, v1 ; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31] ; +; GFX1170-SDAG-LABEL: v_maximumnum_f32_fabs: +; GFX1170-SDAG: ; %bb.0: +; GFX1170-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1170-SDAG-NEXT: v_max_num_f32_e64 v1, |v1|, |v1| +; GFX1170-SDAG-NEXT: v_max_num_f32_e64 v0, |v0|, |v0| +; GFX1170-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX1170-SDAG-NEXT: v_max_num_f32_e32 v0, v0, v1 +; GFX1170-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX1170-GISEL-LABEL: v_maximumnum_f32_fabs: +; GFX1170-GISEL: ; %bb.0: +; GFX1170-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1170-GISEL-NEXT: v_max_num_f32_e64 v0, |v0|, |v0| +; GFX1170-GISEL-NEXT: v_max_num_f32_e64 v1, |v1|, |v1| +; GFX1170-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX1170-GISEL-NEXT: v_max_num_f32_e32 v0, v0, v1 +; GFX1170-GISEL-NEXT: s_setpc_b64 s[30:31] +; ; GFX12-SDAG-LABEL: v_maximumnum_f32_fabs: ; GFX12-SDAG: ; %bb.0: ; GFX12-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0 @@ -2567,6 +2975,24 @@ define float @v_maximumnum_f32_fneg(float %x, float %y) { ; GFX11-GISEL-NEXT: v_max_f32_e32 v0, v0, v1 ; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31] ; +; GFX1170-SDAG-LABEL: v_maximumnum_f32_fneg: +; GFX1170-SDAG: ; %bb.0: +; GFX1170-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1170-SDAG-NEXT: v_max_num_f32_e64 v1, -v1, -v1 +; GFX1170-SDAG-NEXT: v_max_num_f32_e64 v0, -v0, -v0 +; GFX1170-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX1170-SDAG-NEXT: v_max_num_f32_e32 v0, v0, v1 +; GFX1170-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX1170-GISEL-LABEL: v_maximumnum_f32_fneg: +; GFX1170-GISEL: ; %bb.0: +; GFX1170-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1170-GISEL-NEXT: v_max_num_f32_e64 v0, -v0, -v0 +; GFX1170-GISEL-NEXT: v_max_num_f32_e64 v1, -v1, -v1 +; GFX1170-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX1170-GISEL-NEXT: v_max_num_f32_e32 v0, v0, v1 +; GFX1170-GISEL-NEXT: s_setpc_b64 s[30:31] +; ; GFX12-SDAG-LABEL: v_maximumnum_f32_fneg: ; GFX12-SDAG: ; %bb.0: ; GFX12-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0 @@ -2701,6 +3127,42 @@ define half @v_maximumnum_f16_fabs_rhs(half %x, half %y) { ; GFX11-FAKE16-GISEL-NEXT: v_max_f16_e32 v0, v0, v1 ; GFX11-FAKE16-GISEL-NEXT: s_setpc_b64 s[30:31] ; +; GFX1170-TRUE16-SDAG-LABEL: v_maximumnum_f16_fabs_rhs: +; GFX1170-TRUE16-SDAG: ; %bb.0: +; GFX1170-TRUE16-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1170-TRUE16-SDAG-NEXT: v_max_num_f16_e64 v0.h, |v1.l|, |v1.l| +; GFX1170-TRUE16-SDAG-NEXT: v_max_num_f16_e32 v0.l, v0.l, v0.l +; GFX1170-TRUE16-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX1170-TRUE16-SDAG-NEXT: v_max_num_f16_e32 v0.l, v0.l, v0.h +; GFX1170-TRUE16-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX1170-TRUE16-GISEL-LABEL: v_maximumnum_f16_fabs_rhs: +; GFX1170-TRUE16-GISEL: ; %bb.0: +; GFX1170-TRUE16-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1170-TRUE16-GISEL-NEXT: v_max_num_f16_e32 v0.l, v0.l, v0.l +; GFX1170-TRUE16-GISEL-NEXT: v_max_num_f16_e64 v0.h, |v1.l|, |v1.l| +; GFX1170-TRUE16-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX1170-TRUE16-GISEL-NEXT: v_max_num_f16_e32 v0.l, v0.l, v0.h +; GFX1170-TRUE16-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX1170-FAKE16-SDAG-LABEL: v_maximumnum_f16_fabs_rhs: +; GFX1170-FAKE16-SDAG: ; %bb.0: +; GFX1170-FAKE16-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1170-FAKE16-SDAG-NEXT: v_max_num_f16_e64 v1, |v1|, |v1| +; GFX1170-FAKE16-SDAG-NEXT: v_max_num_f16_e32 v0, v0, v0 +; GFX1170-FAKE16-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX1170-FAKE16-SDAG-NEXT: v_max_num_f16_e32 v0, v0, v1 +; GFX1170-FAKE16-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX1170-FAKE16-GISEL-LABEL: v_maximumnum_f16_fabs_rhs: +; GFX1170-FAKE16-GISEL: ; %bb.0: +; GFX1170-FAKE16-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1170-FAKE16-GISEL-NEXT: v_max_num_f16_e32 v0, v0, v0 +; GFX1170-FAKE16-GISEL-NEXT: v_max_num_f16_e64 v1, |v1|, |v1| +; GFX1170-FAKE16-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX1170-FAKE16-GISEL-NEXT: v_max_num_f16_e32 v0, v0, v1 +; GFX1170-FAKE16-GISEL-NEXT: s_setpc_b64 s[30:31] +; ; GFX12-TRUE16-SDAG-LABEL: v_maximumnum_f16_fabs_rhs: ; GFX12-TRUE16-SDAG: ; %bb.0: ; GFX12-TRUE16-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0 @@ -2860,6 +3322,42 @@ define half @v_maximumnum_f16_fneg_fabs_rhs(half %x, half %y) { ; GFX11-FAKE16-GISEL-NEXT: v_max_f16_e32 v0, v0, v1 ; GFX11-FAKE16-GISEL-NEXT: s_setpc_b64 s[30:31] ; +; GFX1170-TRUE16-SDAG-LABEL: v_maximumnum_f16_fneg_fabs_rhs: +; GFX1170-TRUE16-SDAG: ; %bb.0: +; GFX1170-TRUE16-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1170-TRUE16-SDAG-NEXT: v_max_num_f16_e64 v0.h, -|v1.l|, -|v1.l| +; GFX1170-TRUE16-SDAG-NEXT: v_max_num_f16_e32 v0.l, v0.l, v0.l +; GFX1170-TRUE16-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX1170-TRUE16-SDAG-NEXT: v_max_num_f16_e32 v0.l, v0.l, v0.h +; GFX1170-TRUE16-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX1170-TRUE16-GISEL-LABEL: v_maximumnum_f16_fneg_fabs_rhs: +; GFX1170-TRUE16-GISEL: ; %bb.0: +; GFX1170-TRUE16-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1170-TRUE16-GISEL-NEXT: v_max_num_f16_e32 v0.l, v0.l, v0.l +; GFX1170-TRUE16-GISEL-NEXT: v_max_num_f16_e64 v0.h, -|v1.l|, -|v1.l| +; GFX1170-TRUE16-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX1170-TRUE16-GISEL-NEXT: v_max_num_f16_e32 v0.l, v0.l, v0.h +; GFX1170-TRUE16-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX1170-FAKE16-SDAG-LABEL: v_maximumnum_f16_fneg_fabs_rhs: +; GFX1170-FAKE16-SDAG: ; %bb.0: +; GFX1170-FAKE16-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1170-FAKE16-SDAG-NEXT: v_max_num_f16_e64 v1, -|v1|, -|v1| +; GFX1170-FAKE16-SDAG-NEXT: v_max_num_f16_e32 v0, v0, v0 +; GFX1170-FAKE16-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX1170-FAKE16-SDAG-NEXT: v_max_num_f16_e32 v0, v0, v1 +; GFX1170-FAKE16-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX1170-FAKE16-GISEL-LABEL: v_maximumnum_f16_fneg_fabs_rhs: +; GFX1170-FAKE16-GISEL: ; %bb.0: +; GFX1170-FAKE16-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1170-FAKE16-GISEL-NEXT: v_max_num_f16_e32 v0, v0, v0 +; GFX1170-FAKE16-GISEL-NEXT: v_max_num_f16_e64 v1, -|v1|, -|v1| +; GFX1170-FAKE16-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX1170-FAKE16-GISEL-NEXT: v_max_num_f16_e32 v0, v0, v1 +; GFX1170-FAKE16-GISEL-NEXT: s_setpc_b64 s[30:31] +; ; GFX12-TRUE16-SDAG-LABEL: v_maximumnum_f16_fneg_fabs_rhs: ; GFX12-TRUE16-SDAG: ; %bb.0: ; GFX12-TRUE16-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0 @@ -3020,6 +3518,42 @@ define half @v_maximumnum_f16_fabs(half %x, half %y) { ; GFX11-FAKE16-GISEL-NEXT: v_max_f16_e32 v0, v0, v1 ; GFX11-FAKE16-GISEL-NEXT: s_setpc_b64 s[30:31] ; +; GFX1170-TRUE16-SDAG-LABEL: v_maximumnum_f16_fabs: +; GFX1170-TRUE16-SDAG: ; %bb.0: +; GFX1170-TRUE16-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1170-TRUE16-SDAG-NEXT: v_max_num_f16_e64 v0.h, |v1.l|, |v1.l| +; GFX1170-TRUE16-SDAG-NEXT: v_max_num_f16_e64 v0.l, |v0.l|, |v0.l| +; GFX1170-TRUE16-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX1170-TRUE16-SDAG-NEXT: v_max_num_f16_e32 v0.l, v0.l, v0.h +; GFX1170-TRUE16-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX1170-TRUE16-GISEL-LABEL: v_maximumnum_f16_fabs: +; GFX1170-TRUE16-GISEL: ; %bb.0: +; GFX1170-TRUE16-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1170-TRUE16-GISEL-NEXT: v_max_num_f16_e64 v0.l, |v0.l|, |v0.l| +; GFX1170-TRUE16-GISEL-NEXT: v_max_num_f16_e64 v0.h, |v1.l|, |v1.l| +; GFX1170-TRUE16-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX1170-TRUE16-GISEL-NEXT: v_max_num_f16_e32 v0.l, v0.l, v0.h +; GFX1170-TRUE16-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX1170-FAKE16-SDAG-LABEL: v_maximumnum_f16_fabs: +; GFX1170-FAKE16-SDAG: ; %bb.0: +; GFX1170-FAKE16-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1170-FAKE16-SDAG-NEXT: v_max_num_f16_e64 v1, |v1|, |v1| +; GFX1170-FAKE16-SDAG-NEXT: v_max_num_f16_e64 v0, |v0|, |v0| +; GFX1170-FAKE16-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX1170-FAKE16-SDAG-NEXT: v_max_num_f16_e32 v0, v0, v1 +; GFX1170-FAKE16-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX1170-FAKE16-GISEL-LABEL: v_maximumnum_f16_fabs: +; GFX1170-FAKE16-GISEL: ; %bb.0: +; GFX1170-FAKE16-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1170-FAKE16-GISEL-NEXT: v_max_num_f16_e64 v0, |v0|, |v0| +; GFX1170-FAKE16-GISEL-NEXT: v_max_num_f16_e64 v1, |v1|, |v1| +; GFX1170-FAKE16-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX1170-FAKE16-GISEL-NEXT: v_max_num_f16_e32 v0, v0, v1 +; GFX1170-FAKE16-GISEL-NEXT: s_setpc_b64 s[30:31] +; ; GFX12-TRUE16-SDAG-LABEL: v_maximumnum_f16_fabs: ; GFX12-TRUE16-SDAG: ; %bb.0: ; GFX12-TRUE16-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0 @@ -3180,6 +3714,42 @@ define half @v_maximumnum_f16_fneg(half %x, half %y) { ; GFX11-FAKE16-GISEL-NEXT: v_max_f16_e32 v0, v0, v1 ; GFX11-FAKE16-GISEL-NEXT: s_setpc_b64 s[30:31] ; +; GFX1170-TRUE16-SDAG-LABEL: v_maximumnum_f16_fneg: +; GFX1170-TRUE16-SDAG: ; %bb.0: +; GFX1170-TRUE16-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1170-TRUE16-SDAG-NEXT: v_max_num_f16_e64 v0.h, -v1.l, -v1.l +; GFX1170-TRUE16-SDAG-NEXT: v_max_num_f16_e64 v0.l, -v0.l, -v0.l +; GFX1170-TRUE16-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX1170-TRUE16-SDAG-NEXT: v_max_num_f16_e32 v0.l, v0.l, v0.h +; GFX1170-TRUE16-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX1170-TRUE16-GISEL-LABEL: v_maximumnum_f16_fneg: +; GFX1170-TRUE16-GISEL: ; %bb.0: +; GFX1170-TRUE16-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1170-TRUE16-GISEL-NEXT: v_max_num_f16_e64 v0.l, -v0.l, -v0.l +; GFX1170-TRUE16-GISEL-NEXT: v_max_num_f16_e64 v0.h, -v1.l, -v1.l +; GFX1170-TRUE16-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX1170-TRUE16-GISEL-NEXT: v_max_num_f16_e32 v0.l, v0.l, v0.h +; GFX1170-TRUE16-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX1170-FAKE16-SDAG-LABEL: v_maximumnum_f16_fneg: +; GFX1170-FAKE16-SDAG: ; %bb.0: +; GFX1170-FAKE16-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1170-FAKE16-SDAG-NEXT: v_max_num_f16_e64 v1, -v1, -v1 +; GFX1170-FAKE16-SDAG-NEXT: v_max_num_f16_e64 v0, -v0, -v0 +; GFX1170-FAKE16-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX1170-FAKE16-SDAG-NEXT: v_max_num_f16_e32 v0, v0, v1 +; GFX1170-FAKE16-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX1170-FAKE16-GISEL-LABEL: v_maximumnum_f16_fneg: +; GFX1170-FAKE16-GISEL: ; %bb.0: +; GFX1170-FAKE16-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1170-FAKE16-GISEL-NEXT: v_max_num_f16_e64 v0, -v0, -v0 +; GFX1170-FAKE16-GISEL-NEXT: v_max_num_f16_e64 v1, -v1, -v1 +; GFX1170-FAKE16-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX1170-FAKE16-GISEL-NEXT: v_max_num_f16_e32 v0, v0, v1 +; GFX1170-FAKE16-GISEL-NEXT: s_setpc_b64 s[30:31] +; ; GFX12-TRUE16-SDAG-LABEL: v_maximumnum_f16_fneg: ; GFX12-TRUE16-SDAG: ; %bb.0: ; GFX12-TRUE16-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0 @@ -3320,6 +3890,24 @@ define double @v_maximumnum_f64_fneg(double %x, double %y) { ; GFX11-GISEL-NEXT: v_max_f64 v[0:1], v[0:1], v[2:3] ; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31] ; +; GFX1170-SDAG-LABEL: v_maximumnum_f64_fneg: +; GFX1170-SDAG: ; %bb.0: +; GFX1170-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1170-SDAG-NEXT: v_max_num_f64 v[2:3], -v[2:3], -v[2:3] +; GFX1170-SDAG-NEXT: v_max_num_f64 v[0:1], -v[0:1], -v[0:1] +; GFX1170-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX1170-SDAG-NEXT: v_max_num_f64 v[0:1], v[0:1], v[2:3] +; GFX1170-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX1170-GISEL-LABEL: v_maximumnum_f64_fneg: +; GFX1170-GISEL: ; %bb.0: +; GFX1170-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1170-GISEL-NEXT: v_max_num_f64 v[0:1], -v[0:1], -v[0:1] +; GFX1170-GISEL-NEXT: v_max_num_f64 v[2:3], -v[2:3], -v[2:3] +; GFX1170-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX1170-GISEL-NEXT: v_max_num_f64 v[0:1], v[0:1], v[2:3] +; GFX1170-GISEL-NEXT: s_setpc_b64 s[30:31] +; ; GFX12-SDAG-LABEL: v_maximumnum_f64_fneg: ; GFX12-SDAG: ; %bb.0: ; GFX12-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0 @@ -3478,6 +4066,24 @@ define <2 x half> @v_maximumnum_v2f16(<2 x half> %x, <2 x half> %y) { ; GFX11-GISEL-NEXT: v_pk_max_f16 v0, v0, v1 ; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31] ; +; GFX1170-SDAG-LABEL: v_maximumnum_v2f16: +; GFX1170-SDAG: ; %bb.0: +; GFX1170-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1170-SDAG-NEXT: v_pk_max_num_f16 v1, v1, v1 +; GFX1170-SDAG-NEXT: v_pk_max_num_f16 v0, v0, v0 +; GFX1170-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX1170-SDAG-NEXT: v_pk_max_num_f16 v0, v0, v1 +; GFX1170-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX1170-GISEL-LABEL: v_maximumnum_v2f16: +; GFX1170-GISEL: ; %bb.0: +; GFX1170-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1170-GISEL-NEXT: v_pk_max_num_f16 v0, v0, v0 +; GFX1170-GISEL-NEXT: v_pk_max_num_f16 v1, v1, v1 +; GFX1170-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX1170-GISEL-NEXT: v_pk_max_num_f16 v0, v0, v1 +; GFX1170-GISEL-NEXT: s_setpc_b64 s[30:31] +; ; GFX12-SDAG-LABEL: v_maximumnum_v2f16: ; GFX12-SDAG: ; %bb.0: ; GFX12-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0 @@ -3576,6 +4182,12 @@ define <2 x half> @v_maximumnum_v2f16_nnan(<2 x half> %x, <2 x half> %y) { ; GFX11-NEXT: v_pk_max_f16 v0, v0, v1 ; GFX11-NEXT: s_setpc_b64 s[30:31] ; +; GFX1170-LABEL: v_maximumnum_v2f16_nnan: +; GFX1170: ; %bb.0: +; GFX1170-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1170-NEXT: v_pk_max_num_f16 v0, v0, v1 +; GFX1170-NEXT: s_setpc_b64 s[30:31] +; ; GFX12-LABEL: v_maximumnum_v2f16_nnan: ; GFX12: ; %bb.0: ; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 @@ -3742,6 +4354,30 @@ define <3 x half> @v_maximumnum_v3f16(<3 x half> %x, <3 x half> %y) { ; GFX11-GISEL-NEXT: v_pk_max_f16 v1, v1, v3 ; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31] ; +; GFX1170-SDAG-LABEL: v_maximumnum_v3f16: +; GFX1170-SDAG: ; %bb.0: +; GFX1170-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1170-SDAG-NEXT: v_pk_max_num_f16 v3, v3, v3 +; GFX1170-SDAG-NEXT: v_pk_max_num_f16 v2, v2, v2 +; GFX1170-SDAG-NEXT: v_pk_max_num_f16 v0, v0, v0 +; GFX1170-SDAG-NEXT: v_pk_max_num_f16 v1, v1, v1 +; GFX1170-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) +; GFX1170-SDAG-NEXT: v_pk_max_num_f16 v0, v0, v2 +; GFX1170-SDAG-NEXT: v_pk_max_num_f16 v1, v1, v3 +; GFX1170-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX1170-GISEL-LABEL: v_maximumnum_v3f16: +; GFX1170-GISEL: ; %bb.0: +; GFX1170-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1170-GISEL-NEXT: v_pk_max_num_f16 v0, v0, v0 +; GFX1170-GISEL-NEXT: v_pk_max_num_f16 v2, v2, v2 +; GFX1170-GISEL-NEXT: v_pk_max_num_f16 v1, v1, v1 +; GFX1170-GISEL-NEXT: v_pk_max_num_f16 v3, v3, v3 +; GFX1170-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2) +; GFX1170-GISEL-NEXT: v_pk_max_num_f16 v0, v0, v2 +; GFX1170-GISEL-NEXT: v_pk_max_num_f16 v1, v1, v3 +; GFX1170-GISEL-NEXT: s_setpc_b64 s[30:31] +; ; GFX12-SDAG-LABEL: v_maximumnum_v3f16: ; GFX12-SDAG: ; %bb.0: ; GFX12-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0 @@ -3866,6 +4502,13 @@ define <3 x half> @v_maximumnum_v3f16_nnan(<3 x half> %x, <3 x half> %y) { ; GFX11-NEXT: v_pk_max_f16 v1, v1, v3 ; GFX11-NEXT: s_setpc_b64 s[30:31] ; +; GFX1170-LABEL: v_maximumnum_v3f16_nnan: +; GFX1170: ; %bb.0: +; GFX1170-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1170-NEXT: v_pk_max_num_f16 v0, v0, v2 +; GFX1170-NEXT: v_pk_max_num_f16 v1, v1, v3 +; GFX1170-NEXT: s_setpc_b64 s[30:31] +; ; GFX12-LABEL: v_maximumnum_v3f16_nnan: ; GFX12: ; %bb.0: ; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 @@ -4069,6 +4712,30 @@ define <4 x half> @v_maximumnum_v4f16(<4 x half> %x, <4 x half> %y) { ; GFX11-GISEL-NEXT: v_pk_max_f16 v1, v1, v3 ; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31] ; +; GFX1170-SDAG-LABEL: v_maximumnum_v4f16: +; GFX1170-SDAG: ; %bb.0: +; GFX1170-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1170-SDAG-NEXT: v_pk_max_num_f16 v2, v2, v2 +; GFX1170-SDAG-NEXT: v_pk_max_num_f16 v0, v0, v0 +; GFX1170-SDAG-NEXT: v_pk_max_num_f16 v3, v3, v3 +; GFX1170-SDAG-NEXT: v_pk_max_num_f16 v1, v1, v1 +; GFX1170-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2) +; GFX1170-SDAG-NEXT: v_pk_max_num_f16 v0, v0, v2 +; GFX1170-SDAG-NEXT: v_pk_max_num_f16 v1, v1, v3 +; GFX1170-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX1170-GISEL-LABEL: v_maximumnum_v4f16: +; GFX1170-GISEL: ; %bb.0: +; GFX1170-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1170-GISEL-NEXT: v_pk_max_num_f16 v0, v0, v0 +; GFX1170-GISEL-NEXT: v_pk_max_num_f16 v2, v2, v2 +; GFX1170-GISEL-NEXT: v_pk_max_num_f16 v1, v1, v1 +; GFX1170-GISEL-NEXT: v_pk_max_num_f16 v3, v3, v3 +; GFX1170-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2) +; GFX1170-GISEL-NEXT: v_pk_max_num_f16 v0, v0, v2 +; GFX1170-GISEL-NEXT: v_pk_max_num_f16 v1, v1, v3 +; GFX1170-GISEL-NEXT: s_setpc_b64 s[30:31] +; ; GFX12-SDAG-LABEL: v_maximumnum_v4f16: ; GFX12-SDAG: ; %bb.0: ; GFX12-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0 @@ -4206,6 +4873,13 @@ define <4 x half> @v_maximumnum_v4f16_nnan(<4 x half> %x, <4 x half> %y) { ; GFX11-NEXT: v_pk_max_f16 v1, v1, v3 ; GFX11-NEXT: s_setpc_b64 s[30:31] ; +; GFX1170-LABEL: v_maximumnum_v4f16_nnan: +; GFX1170: ; %bb.0: +; GFX1170-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1170-NEXT: v_pk_max_num_f16 v0, v0, v2 +; GFX1170-NEXT: v_pk_max_num_f16 v1, v1, v3 +; GFX1170-NEXT: s_setpc_b64 s[30:31] +; ; GFX12-LABEL: v_maximumnum_v4f16_nnan: ; GFX12: ; %bb.0: ; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 @@ -4471,6 +5145,36 @@ define <6 x half> @v_maximumnum_v6f16(<6 x half> %x, <6 x half> %y) { ; GFX11-GISEL-NEXT: v_pk_max_f16 v2, v2, v5 ; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31] ; +; GFX1170-SDAG-LABEL: v_maximumnum_v6f16: +; GFX1170-SDAG: ; %bb.0: +; GFX1170-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1170-SDAG-NEXT: v_pk_max_num_f16 v3, v3, v3 +; GFX1170-SDAG-NEXT: v_pk_max_num_f16 v0, v0, v0 +; GFX1170-SDAG-NEXT: v_pk_max_num_f16 v4, v4, v4 +; GFX1170-SDAG-NEXT: v_pk_max_num_f16 v1, v1, v1 +; GFX1170-SDAG-NEXT: v_pk_max_num_f16 v5, v5, v5 +; GFX1170-SDAG-NEXT: v_pk_max_num_f16 v2, v2, v2 +; GFX1170-SDAG-NEXT: v_pk_max_num_f16 v0, v0, v3 +; GFX1170-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_3) +; GFX1170-SDAG-NEXT: v_pk_max_num_f16 v1, v1, v4 +; GFX1170-SDAG-NEXT: v_pk_max_num_f16 v2, v2, v5 +; GFX1170-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX1170-GISEL-LABEL: v_maximumnum_v6f16: +; GFX1170-GISEL: ; %bb.0: +; GFX1170-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1170-GISEL-NEXT: v_pk_max_num_f16 v0, v0, v0 +; GFX1170-GISEL-NEXT: v_pk_max_num_f16 v3, v3, v3 +; GFX1170-GISEL-NEXT: v_pk_max_num_f16 v1, v1, v1 +; GFX1170-GISEL-NEXT: v_pk_max_num_f16 v4, v4, v4 +; GFX1170-GISEL-NEXT: v_pk_max_num_f16 v2, v2, v2 +; GFX1170-GISEL-NEXT: v_pk_max_num_f16 v5, v5, v5 +; GFX1170-GISEL-NEXT: v_pk_max_num_f16 v0, v0, v3 +; GFX1170-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_3) +; GFX1170-GISEL-NEXT: v_pk_max_num_f16 v1, v1, v4 +; GFX1170-GISEL-NEXT: v_pk_max_num_f16 v2, v2, v5 +; GFX1170-GISEL-NEXT: s_setpc_b64 s[30:31] +; ; GFX12-SDAG-LABEL: v_maximumnum_v6f16: ; GFX12-SDAG: ; %bb.0: ; GFX12-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0 @@ -4825,6 +5529,42 @@ define <8 x half> @v_maximumnum_v8f16(<8 x half> %x, <8 x half> %y) { ; GFX11-GISEL-NEXT: v_pk_max_f16 v3, v3, v7 ; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31] ; +; GFX1170-SDAG-LABEL: v_maximumnum_v8f16: +; GFX1170-SDAG: ; %bb.0: +; GFX1170-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1170-SDAG-NEXT: v_pk_max_num_f16 v4, v4, v4 +; GFX1170-SDAG-NEXT: v_pk_max_num_f16 v0, v0, v0 +; GFX1170-SDAG-NEXT: v_pk_max_num_f16 v5, v5, v5 +; GFX1170-SDAG-NEXT: v_pk_max_num_f16 v1, v1, v1 +; GFX1170-SDAG-NEXT: v_pk_max_num_f16 v6, v6, v6 +; GFX1170-SDAG-NEXT: v_pk_max_num_f16 v2, v2, v2 +; GFX1170-SDAG-NEXT: v_pk_max_num_f16 v7, v7, v7 +; GFX1170-SDAG-NEXT: v_pk_max_num_f16 v3, v3, v3 +; GFX1170-SDAG-NEXT: v_pk_max_num_f16 v0, v0, v4 +; GFX1170-SDAG-NEXT: v_pk_max_num_f16 v1, v1, v5 +; GFX1170-SDAG-NEXT: v_pk_max_num_f16 v2, v2, v6 +; GFX1170-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_4) +; GFX1170-SDAG-NEXT: v_pk_max_num_f16 v3, v3, v7 +; GFX1170-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX1170-GISEL-LABEL: v_maximumnum_v8f16: +; GFX1170-GISEL: ; %bb.0: +; GFX1170-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1170-GISEL-NEXT: v_pk_max_num_f16 v0, v0, v0 +; GFX1170-GISEL-NEXT: v_pk_max_num_f16 v4, v4, v4 +; GFX1170-GISEL-NEXT: v_pk_max_num_f16 v1, v1, v1 +; GFX1170-GISEL-NEXT: v_pk_max_num_f16 v5, v5, v5 +; GFX1170-GISEL-NEXT: v_pk_max_num_f16 v2, v2, v2 +; GFX1170-GISEL-NEXT: v_pk_max_num_f16 v6, v6, v6 +; GFX1170-GISEL-NEXT: v_pk_max_num_f16 v3, v3, v3 +; GFX1170-GISEL-NEXT: v_pk_max_num_f16 v7, v7, v7 +; GFX1170-GISEL-NEXT: v_pk_max_num_f16 v0, v0, v4 +; GFX1170-GISEL-NEXT: v_pk_max_num_f16 v1, v1, v5 +; GFX1170-GISEL-NEXT: v_pk_max_num_f16 v2, v2, v6 +; GFX1170-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_4) +; GFX1170-GISEL-NEXT: v_pk_max_num_f16 v3, v3, v7 +; GFX1170-GISEL-NEXT: s_setpc_b64 s[30:31] +; ; GFX12-SDAG-LABEL: v_maximumnum_v8f16: ; GFX12-SDAG: ; %bb.0: ; GFX12-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0 @@ -5431,6 +6171,64 @@ define <16 x half> @v_maximumnum_v16f16(<16 x half> %x, <16 x half> %y) { ; GFX11-GISEL-NEXT: v_pk_max_f16 v7, v7, v12 ; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31] ; +; GFX1170-SDAG-LABEL: v_maximumnum_v16f16: +; GFX1170-SDAG: ; %bb.0: +; GFX1170-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1170-SDAG-NEXT: v_pk_max_num_f16 v8, v8, v8 +; GFX1170-SDAG-NEXT: v_pk_max_num_f16 v0, v0, v0 +; GFX1170-SDAG-NEXT: v_pk_max_num_f16 v9, v9, v9 +; GFX1170-SDAG-NEXT: v_pk_max_num_f16 v1, v1, v1 +; GFX1170-SDAG-NEXT: v_pk_max_num_f16 v10, v10, v10 +; GFX1170-SDAG-NEXT: v_pk_max_num_f16 v2, v2, v2 +; GFX1170-SDAG-NEXT: v_pk_max_num_f16 v0, v0, v8 +; GFX1170-SDAG-NEXT: v_pk_max_num_f16 v8, v11, v11 +; GFX1170-SDAG-NEXT: v_pk_max_num_f16 v1, v1, v9 +; GFX1170-SDAG-NEXT: v_pk_max_num_f16 v3, v3, v3 +; GFX1170-SDAG-NEXT: v_pk_max_num_f16 v2, v2, v10 +; GFX1170-SDAG-NEXT: v_pk_max_num_f16 v9, v12, v12 +; GFX1170-SDAG-NEXT: v_pk_max_num_f16 v4, v4, v4 +; GFX1170-SDAG-NEXT: v_pk_max_num_f16 v10, v13, v13 +; GFX1170-SDAG-NEXT: v_pk_max_num_f16 v5, v5, v5 +; GFX1170-SDAG-NEXT: v_pk_max_num_f16 v11, v14, v14 +; GFX1170-SDAG-NEXT: v_pk_max_num_f16 v6, v6, v6 +; GFX1170-SDAG-NEXT: v_pk_max_num_f16 v12, v15, v15 +; GFX1170-SDAG-NEXT: v_pk_max_num_f16 v7, v7, v7 +; GFX1170-SDAG-NEXT: v_pk_max_num_f16 v3, v3, v8 +; GFX1170-SDAG-NEXT: v_pk_max_num_f16 v4, v4, v9 +; GFX1170-SDAG-NEXT: v_pk_max_num_f16 v5, v5, v10 +; GFX1170-SDAG-NEXT: v_pk_max_num_f16 v6, v6, v11 +; GFX1170-SDAG-NEXT: v_pk_max_num_f16 v7, v7, v12 +; GFX1170-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX1170-GISEL-LABEL: v_maximumnum_v16f16: +; GFX1170-GISEL: ; %bb.0: +; GFX1170-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1170-GISEL-NEXT: v_pk_max_num_f16 v0, v0, v0 +; GFX1170-GISEL-NEXT: v_pk_max_num_f16 v8, v8, v8 +; GFX1170-GISEL-NEXT: v_pk_max_num_f16 v1, v1, v1 +; GFX1170-GISEL-NEXT: v_pk_max_num_f16 v9, v9, v9 +; GFX1170-GISEL-NEXT: v_pk_max_num_f16 v2, v2, v2 +; GFX1170-GISEL-NEXT: v_pk_max_num_f16 v10, v10, v10 +; GFX1170-GISEL-NEXT: v_pk_max_num_f16 v0, v0, v8 +; GFX1170-GISEL-NEXT: v_pk_max_num_f16 v3, v3, v3 +; GFX1170-GISEL-NEXT: v_pk_max_num_f16 v1, v1, v9 +; GFX1170-GISEL-NEXT: v_pk_max_num_f16 v8, v11, v11 +; GFX1170-GISEL-NEXT: v_pk_max_num_f16 v2, v2, v10 +; GFX1170-GISEL-NEXT: v_pk_max_num_f16 v4, v4, v4 +; GFX1170-GISEL-NEXT: v_pk_max_num_f16 v9, v12, v12 +; GFX1170-GISEL-NEXT: v_pk_max_num_f16 v5, v5, v5 +; GFX1170-GISEL-NEXT: v_pk_max_num_f16 v10, v13, v13 +; GFX1170-GISEL-NEXT: v_pk_max_num_f16 v6, v6, v6 +; GFX1170-GISEL-NEXT: v_pk_max_num_f16 v11, v14, v14 +; GFX1170-GISEL-NEXT: v_pk_max_num_f16 v7, v7, v7 +; GFX1170-GISEL-NEXT: v_pk_max_num_f16 v12, v15, v15 +; GFX1170-GISEL-NEXT: v_pk_max_num_f16 v3, v3, v8 +; GFX1170-GISEL-NEXT: v_pk_max_num_f16 v4, v4, v9 +; GFX1170-GISEL-NEXT: v_pk_max_num_f16 v5, v5, v10 +; GFX1170-GISEL-NEXT: v_pk_max_num_f16 v6, v6, v11 +; GFX1170-GISEL-NEXT: v_pk_max_num_f16 v7, v7, v12 +; GFX1170-GISEL-NEXT: s_setpc_b64 s[30:31] +; ; GFX12-SDAG-LABEL: v_maximumnum_v16f16: ; GFX12-SDAG: ; %bb.0: ; GFX12-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0 @@ -6616,6 +7414,118 @@ define <32 x half> @v_maximumnum_v32f16(<32 x half> %x, <32 x half> %y) { ; GFX11-GISEL-NEXT: v_pk_max_f16 v15, v15, v16 ; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31] ; +; GFX1170-SDAG-LABEL: v_maximumnum_v32f16: +; GFX1170-SDAG: ; %bb.0: +; GFX1170-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1170-SDAG-NEXT: scratch_load_b32 v31, off, s32 +; GFX1170-SDAG-NEXT: v_pk_max_num_f16 v16, v16, v16 +; GFX1170-SDAG-NEXT: v_pk_max_num_f16 v0, v0, v0 +; GFX1170-SDAG-NEXT: v_pk_max_num_f16 v17, v17, v17 +; GFX1170-SDAG-NEXT: v_pk_max_num_f16 v1, v1, v1 +; GFX1170-SDAG-NEXT: v_pk_max_num_f16 v18, v18, v18 +; GFX1170-SDAG-NEXT: v_pk_max_num_f16 v2, v2, v2 +; GFX1170-SDAG-NEXT: v_pk_max_num_f16 v19, v19, v19 +; GFX1170-SDAG-NEXT: v_pk_max_num_f16 v3, v3, v3 +; GFX1170-SDAG-NEXT: v_pk_max_num_f16 v20, v20, v20 +; GFX1170-SDAG-NEXT: v_pk_max_num_f16 v4, v4, v4 +; GFX1170-SDAG-NEXT: v_pk_max_num_f16 v21, v21, v21 +; GFX1170-SDAG-NEXT: v_pk_max_num_f16 v5, v5, v5 +; GFX1170-SDAG-NEXT: v_pk_max_num_f16 v22, v22, v22 +; GFX1170-SDAG-NEXT: v_pk_max_num_f16 v6, v6, v6 +; GFX1170-SDAG-NEXT: v_pk_max_num_f16 v23, v23, v23 +; GFX1170-SDAG-NEXT: v_pk_max_num_f16 v7, v7, v7 +; GFX1170-SDAG-NEXT: v_pk_max_num_f16 v24, v24, v24 +; GFX1170-SDAG-NEXT: v_pk_max_num_f16 v8, v8, v8 +; GFX1170-SDAG-NEXT: v_pk_max_num_f16 v25, v25, v25 +; GFX1170-SDAG-NEXT: v_pk_max_num_f16 v9, v9, v9 +; GFX1170-SDAG-NEXT: v_pk_max_num_f16 v26, v26, v26 +; GFX1170-SDAG-NEXT: v_pk_max_num_f16 v10, v10, v10 +; GFX1170-SDAG-NEXT: v_pk_max_num_f16 v27, v27, v27 +; GFX1170-SDAG-NEXT: v_pk_max_num_f16 v11, v11, v11 +; GFX1170-SDAG-NEXT: v_pk_max_num_f16 v28, v28, v28 +; GFX1170-SDAG-NEXT: v_pk_max_num_f16 v12, v12, v12 +; GFX1170-SDAG-NEXT: v_pk_max_num_f16 v29, v29, v29 +; GFX1170-SDAG-NEXT: v_pk_max_num_f16 v13, v13, v13 +; GFX1170-SDAG-NEXT: v_pk_max_num_f16 v30, v30, v30 +; GFX1170-SDAG-NEXT: v_pk_max_num_f16 v14, v14, v14 +; GFX1170-SDAG-NEXT: v_pk_max_num_f16 v15, v15, v15 +; GFX1170-SDAG-NEXT: v_pk_max_num_f16 v0, v0, v16 +; GFX1170-SDAG-NEXT: v_pk_max_num_f16 v1, v1, v17 +; GFX1170-SDAG-NEXT: v_pk_max_num_f16 v2, v2, v18 +; GFX1170-SDAG-NEXT: v_pk_max_num_f16 v3, v3, v19 +; GFX1170-SDAG-NEXT: v_pk_max_num_f16 v4, v4, v20 +; GFX1170-SDAG-NEXT: v_pk_max_num_f16 v5, v5, v21 +; GFX1170-SDAG-NEXT: v_pk_max_num_f16 v6, v6, v22 +; GFX1170-SDAG-NEXT: v_pk_max_num_f16 v7, v7, v23 +; GFX1170-SDAG-NEXT: v_pk_max_num_f16 v8, v8, v24 +; GFX1170-SDAG-NEXT: v_pk_max_num_f16 v9, v9, v25 +; GFX1170-SDAG-NEXT: v_pk_max_num_f16 v10, v10, v26 +; GFX1170-SDAG-NEXT: v_pk_max_num_f16 v11, v11, v27 +; GFX1170-SDAG-NEXT: v_pk_max_num_f16 v12, v12, v28 +; GFX1170-SDAG-NEXT: v_pk_max_num_f16 v13, v13, v29 +; GFX1170-SDAG-NEXT: v_pk_max_num_f16 v14, v14, v30 +; GFX1170-SDAG-NEXT: s_waitcnt vmcnt(0) +; GFX1170-SDAG-NEXT: v_pk_max_num_f16 v16, v31, v31 +; GFX1170-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX1170-SDAG-NEXT: v_pk_max_num_f16 v15, v15, v16 +; GFX1170-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX1170-GISEL-LABEL: v_maximumnum_v32f16: +; GFX1170-GISEL: ; %bb.0: +; GFX1170-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1170-GISEL-NEXT: scratch_load_b32 v31, off, s32 +; GFX1170-GISEL-NEXT: v_pk_max_num_f16 v0, v0, v0 +; GFX1170-GISEL-NEXT: v_pk_max_num_f16 v16, v16, v16 +; GFX1170-GISEL-NEXT: v_pk_max_num_f16 v1, v1, v1 +; GFX1170-GISEL-NEXT: v_pk_max_num_f16 v17, v17, v17 +; GFX1170-GISEL-NEXT: v_pk_max_num_f16 v2, v2, v2 +; GFX1170-GISEL-NEXT: v_pk_max_num_f16 v18, v18, v18 +; GFX1170-GISEL-NEXT: v_pk_max_num_f16 v3, v3, v3 +; GFX1170-GISEL-NEXT: v_pk_max_num_f16 v19, v19, v19 +; GFX1170-GISEL-NEXT: v_pk_max_num_f16 v4, v4, v4 +; GFX1170-GISEL-NEXT: v_pk_max_num_f16 v20, v20, v20 +; GFX1170-GISEL-NEXT: v_pk_max_num_f16 v5, v5, v5 +; GFX1170-GISEL-NEXT: v_pk_max_num_f16 v21, v21, v21 +; GFX1170-GISEL-NEXT: v_pk_max_num_f16 v6, v6, v6 +; GFX1170-GISEL-NEXT: v_pk_max_num_f16 v22, v22, v22 +; GFX1170-GISEL-NEXT: v_pk_max_num_f16 v7, v7, v7 +; GFX1170-GISEL-NEXT: v_pk_max_num_f16 v23, v23, v23 +; GFX1170-GISEL-NEXT: v_pk_max_num_f16 v8, v8, v8 +; GFX1170-GISEL-NEXT: v_pk_max_num_f16 v24, v24, v24 +; GFX1170-GISEL-NEXT: v_pk_max_num_f16 v9, v9, v9 +; GFX1170-GISEL-NEXT: v_pk_max_num_f16 v25, v25, v25 +; GFX1170-GISEL-NEXT: v_pk_max_num_f16 v10, v10, v10 +; GFX1170-GISEL-NEXT: v_pk_max_num_f16 v26, v26, v26 +; GFX1170-GISEL-NEXT: v_pk_max_num_f16 v11, v11, v11 +; GFX1170-GISEL-NEXT: v_pk_max_num_f16 v27, v27, v27 +; GFX1170-GISEL-NEXT: v_pk_max_num_f16 v12, v12, v12 +; GFX1170-GISEL-NEXT: v_pk_max_num_f16 v28, v28, v28 +; GFX1170-GISEL-NEXT: v_pk_max_num_f16 v13, v13, v13 +; GFX1170-GISEL-NEXT: v_pk_max_num_f16 v29, v29, v29 +; GFX1170-GISEL-NEXT: v_pk_max_num_f16 v14, v14, v14 +; GFX1170-GISEL-NEXT: v_pk_max_num_f16 v30, v30, v30 +; GFX1170-GISEL-NEXT: v_pk_max_num_f16 v15, v15, v15 +; GFX1170-GISEL-NEXT: v_pk_max_num_f16 v0, v0, v16 +; GFX1170-GISEL-NEXT: v_pk_max_num_f16 v1, v1, v17 +; GFX1170-GISEL-NEXT: v_pk_max_num_f16 v2, v2, v18 +; GFX1170-GISEL-NEXT: v_pk_max_num_f16 v3, v3, v19 +; GFX1170-GISEL-NEXT: v_pk_max_num_f16 v4, v4, v20 +; GFX1170-GISEL-NEXT: v_pk_max_num_f16 v5, v5, v21 +; GFX1170-GISEL-NEXT: v_pk_max_num_f16 v6, v6, v22 +; GFX1170-GISEL-NEXT: v_pk_max_num_f16 v7, v7, v23 +; GFX1170-GISEL-NEXT: v_pk_max_num_f16 v8, v8, v24 +; GFX1170-GISEL-NEXT: v_pk_max_num_f16 v9, v9, v25 +; GFX1170-GISEL-NEXT: v_pk_max_num_f16 v10, v10, v26 +; GFX1170-GISEL-NEXT: v_pk_max_num_f16 v11, v11, v27 +; GFX1170-GISEL-NEXT: v_pk_max_num_f16 v12, v12, v28 +; GFX1170-GISEL-NEXT: v_pk_max_num_f16 v13, v13, v29 +; GFX1170-GISEL-NEXT: v_pk_max_num_f16 v14, v14, v30 +; GFX1170-GISEL-NEXT: s_waitcnt vmcnt(0) +; GFX1170-GISEL-NEXT: v_pk_max_num_f16 v16, v31, v31 +; GFX1170-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX1170-GISEL-NEXT: v_pk_max_num_f16 v15, v15, v16 +; GFX1170-GISEL-NEXT: s_setpc_b64 s[30:31] +; ; GFX12-SDAG-LABEL: v_maximumnum_v32f16: ; GFX12-SDAG: ; %bb.0: ; GFX12-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0 @@ -6846,6 +7756,24 @@ define <2 x float> @v_maximumnum_v2f32(<2 x float> %x, <2 x float> %y) { ; GFX11-GISEL-NEXT: v_dual_max_f32 v0, v0, v2 :: v_dual_max_f32 v1, v1, v3 ; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31] ; +; GFX1170-SDAG-LABEL: v_maximumnum_v2f32: +; GFX1170-SDAG: ; %bb.0: +; GFX1170-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1170-SDAG-NEXT: v_dual_max_num_f32 v2, v2, v2 :: v_dual_max_num_f32 v3, v3, v3 +; GFX1170-SDAG-NEXT: v_dual_max_num_f32 v0, v0, v0 :: v_dual_max_num_f32 v1, v1, v1 +; GFX1170-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX1170-SDAG-NEXT: v_dual_max_num_f32 v0, v0, v2 :: v_dual_max_num_f32 v1, v1, v3 +; GFX1170-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX1170-GISEL-LABEL: v_maximumnum_v2f32: +; GFX1170-GISEL: ; %bb.0: +; GFX1170-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1170-GISEL-NEXT: v_dual_max_num_f32 v0, v0, v0 :: v_dual_max_num_f32 v1, v1, v1 +; GFX1170-GISEL-NEXT: v_dual_max_num_f32 v2, v2, v2 :: v_dual_max_num_f32 v3, v3, v3 +; GFX1170-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX1170-GISEL-NEXT: v_dual_max_num_f32 v0, v0, v2 :: v_dual_max_num_f32 v1, v1, v3 +; GFX1170-GISEL-NEXT: s_setpc_b64 s[30:31] +; ; GFX12-SDAG-LABEL: v_maximumnum_v2f32: ; GFX12-SDAG: ; %bb.0: ; GFX12-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0 @@ -6910,6 +7838,12 @@ define <2 x float> @v_maximumnum_v2f32_nnan(<2 x float> %x, <2 x float> %y) { ; GFX11-NEXT: v_dual_max_f32 v0, v0, v2 :: v_dual_max_f32 v1, v1, v3 ; GFX11-NEXT: s_setpc_b64 s[30:31] ; +; GFX1170-LABEL: v_maximumnum_v2f32_nnan: +; GFX1170: ; %bb.0: +; GFX1170-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1170-NEXT: v_dual_max_num_f32 v0, v0, v2 :: v_dual_max_num_f32 v1, v1, v3 +; GFX1170-NEXT: s_setpc_b64 s[30:31] +; ; GFX12-LABEL: v_maximumnum_v2f32_nnan: ; GFX12: ; %bb.0: ; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 @@ -7058,6 +7992,28 @@ define <3 x float> @v_maximumnum_v3f32(<3 x float> %x, <3 x float> %y) { ; GFX11-GISEL-NEXT: v_max_f32_e32 v2, v2, v5 ; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31] ; +; GFX1170-SDAG-LABEL: v_maximumnum_v3f32: +; GFX1170-SDAG: ; %bb.0: +; GFX1170-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1170-SDAG-NEXT: v_dual_max_num_f32 v3, v3, v3 :: v_dual_max_num_f32 v0, v0, v0 +; GFX1170-SDAG-NEXT: v_dual_max_num_f32 v4, v4, v4 :: v_dual_max_num_f32 v1, v1, v1 +; GFX1170-SDAG-NEXT: v_dual_max_num_f32 v5, v5, v5 :: v_dual_max_num_f32 v2, v2, v2 +; GFX1170-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) +; GFX1170-SDAG-NEXT: v_dual_max_num_f32 v0, v0, v3 :: v_dual_max_num_f32 v1, v1, v4 +; GFX1170-SDAG-NEXT: v_max_num_f32_e32 v2, v2, v5 +; GFX1170-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX1170-GISEL-LABEL: v_maximumnum_v3f32: +; GFX1170-GISEL: ; %bb.0: +; GFX1170-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1170-GISEL-NEXT: v_dual_max_num_f32 v0, v0, v0 :: v_dual_max_num_f32 v3, v3, v3 +; GFX1170-GISEL-NEXT: v_dual_max_num_f32 v1, v1, v1 :: v_dual_max_num_f32 v4, v4, v4 +; GFX1170-GISEL-NEXT: v_dual_max_num_f32 v2, v2, v2 :: v_dual_max_num_f32 v5, v5, v5 +; GFX1170-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) +; GFX1170-GISEL-NEXT: v_dual_max_num_f32 v0, v0, v3 :: v_dual_max_num_f32 v1, v1, v4 +; GFX1170-GISEL-NEXT: v_max_num_f32_e32 v2, v2, v5 +; GFX1170-GISEL-NEXT: s_setpc_b64 s[30:31] +; ; GFX12-SDAG-LABEL: v_maximumnum_v3f32: ; GFX12-SDAG: ; %bb.0: ; GFX12-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0 @@ -7131,6 +8087,13 @@ define <3 x float> @v_maximumnum_v3f32_nnan(<3 x float> %x, <3 x float> %y) { ; GFX11-NEXT: v_max_f32_e32 v2, v2, v5 ; GFX11-NEXT: s_setpc_b64 s[30:31] ; +; GFX1170-LABEL: v_maximumnum_v3f32_nnan: +; GFX1170: ; %bb.0: +; GFX1170-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1170-NEXT: v_dual_max_num_f32 v0, v0, v3 :: v_dual_max_num_f32 v1, v1, v4 +; GFX1170-NEXT: v_max_num_f32_e32 v2, v2, v5 +; GFX1170-NEXT: s_setpc_b64 s[30:31] +; ; GFX12-LABEL: v_maximumnum_v3f32_nnan: ; GFX12: ; %bb.0: ; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 @@ -7306,6 +8269,30 @@ define <4 x float> @v_maximumnum_v4f32(<4 x float> %x, <4 x float> %y) { ; GFX11-GISEL-NEXT: v_dual_max_f32 v2, v2, v6 :: v_dual_max_f32 v3, v3, v7 ; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31] ; +; GFX1170-SDAG-LABEL: v_maximumnum_v4f32: +; GFX1170-SDAG: ; %bb.0: +; GFX1170-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1170-SDAG-NEXT: v_dual_max_num_f32 v4, v4, v4 :: v_dual_max_num_f32 v5, v5, v5 +; GFX1170-SDAG-NEXT: v_dual_max_num_f32 v0, v0, v0 :: v_dual_max_num_f32 v1, v1, v1 +; GFX1170-SDAG-NEXT: v_dual_max_num_f32 v6, v6, v6 :: v_dual_max_num_f32 v7, v7, v7 +; GFX1170-SDAG-NEXT: v_dual_max_num_f32 v2, v2, v2 :: v_dual_max_num_f32 v3, v3, v3 +; GFX1170-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2) +; GFX1170-SDAG-NEXT: v_dual_max_num_f32 v0, v0, v4 :: v_dual_max_num_f32 v1, v1, v5 +; GFX1170-SDAG-NEXT: v_dual_max_num_f32 v2, v2, v6 :: v_dual_max_num_f32 v3, v3, v7 +; GFX1170-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX1170-GISEL-LABEL: v_maximumnum_v4f32: +; GFX1170-GISEL: ; %bb.0: +; GFX1170-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1170-GISEL-NEXT: v_dual_max_num_f32 v0, v0, v0 :: v_dual_max_num_f32 v1, v1, v1 +; GFX1170-GISEL-NEXT: v_dual_max_num_f32 v4, v4, v4 :: v_dual_max_num_f32 v5, v5, v5 +; GFX1170-GISEL-NEXT: v_dual_max_num_f32 v2, v2, v2 :: v_dual_max_num_f32 v3, v3, v3 +; GFX1170-GISEL-NEXT: v_dual_max_num_f32 v6, v6, v6 :: v_dual_max_num_f32 v7, v7, v7 +; GFX1170-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2) +; GFX1170-GISEL-NEXT: v_dual_max_num_f32 v0, v0, v4 :: v_dual_max_num_f32 v1, v1, v5 +; GFX1170-GISEL-NEXT: v_dual_max_num_f32 v2, v2, v6 :: v_dual_max_num_f32 v3, v3, v7 +; GFX1170-GISEL-NEXT: s_setpc_b64 s[30:31] +; ; GFX12-SDAG-LABEL: v_maximumnum_v4f32: ; GFX12-SDAG: ; %bb.0: ; GFX12-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0 @@ -7385,6 +8372,13 @@ define <4 x float> @v_maximumnum_v4f32_nnan(<4 x float> %x, <4 x float> %y) { ; GFX11-NEXT: v_dual_max_f32 v2, v2, v6 :: v_dual_max_f32 v3, v3, v7 ; GFX11-NEXT: s_setpc_b64 s[30:31] ; +; GFX1170-LABEL: v_maximumnum_v4f32_nnan: +; GFX1170: ; %bb.0: +; GFX1170-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1170-NEXT: v_dual_max_num_f32 v0, v0, v4 :: v_dual_max_num_f32 v1, v1, v5 +; GFX1170-NEXT: v_dual_max_num_f32 v2, v2, v6 :: v_dual_max_num_f32 v3, v3, v7 +; GFX1170-NEXT: s_setpc_b64 s[30:31] +; ; GFX12-LABEL: v_maximumnum_v4f32_nnan: ; GFX12: ; %bb.0: ; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 @@ -7534,6 +8528,30 @@ define <2 x double> @v_maximumnum_v2f64(<2 x double> %x, <2 x double> %y) { ; GFX11-GISEL-NEXT: v_max_f64 v[2:3], v[2:3], v[6:7] ; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31] ; +; GFX1170-SDAG-LABEL: v_maximumnum_v2f64: +; GFX1170-SDAG: ; %bb.0: +; GFX1170-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1170-SDAG-NEXT: v_max_num_f64 v[4:5], v[4:5], v[4:5] +; GFX1170-SDAG-NEXT: v_max_num_f64 v[0:1], v[0:1], v[0:1] +; GFX1170-SDAG-NEXT: v_max_num_f64 v[6:7], v[6:7], v[6:7] +; GFX1170-SDAG-NEXT: v_max_num_f64 v[2:3], v[2:3], v[2:3] +; GFX1170-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2) +; GFX1170-SDAG-NEXT: v_max_num_f64 v[0:1], v[0:1], v[4:5] +; GFX1170-SDAG-NEXT: v_max_num_f64 v[2:3], v[2:3], v[6:7] +; GFX1170-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX1170-GISEL-LABEL: v_maximumnum_v2f64: +; GFX1170-GISEL: ; %bb.0: +; GFX1170-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1170-GISEL-NEXT: v_max_num_f64 v[0:1], v[0:1], v[0:1] +; GFX1170-GISEL-NEXT: v_max_num_f64 v[4:5], v[4:5], v[4:5] +; GFX1170-GISEL-NEXT: v_max_num_f64 v[2:3], v[2:3], v[2:3] +; GFX1170-GISEL-NEXT: v_max_num_f64 v[6:7], v[6:7], v[6:7] +; GFX1170-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2) +; GFX1170-GISEL-NEXT: v_max_num_f64 v[0:1], v[0:1], v[4:5] +; GFX1170-GISEL-NEXT: v_max_num_f64 v[2:3], v[2:3], v[6:7] +; GFX1170-GISEL-NEXT: s_setpc_b64 s[30:31] +; ; GFX12-SDAG-LABEL: v_maximumnum_v2f64: ; GFX12-SDAG: ; %bb.0: ; GFX12-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0 @@ -7605,6 +8623,13 @@ define <2 x double> @v_maximumnum_v2f64_nnan(<2 x double> %x, <2 x double> %y) { ; GFX11-NEXT: v_max_f64 v[2:3], v[2:3], v[6:7] ; GFX11-NEXT: s_setpc_b64 s[30:31] ; +; GFX1170-LABEL: v_maximumnum_v2f64_nnan: +; GFX1170: ; %bb.0: +; GFX1170-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1170-NEXT: v_max_num_f64 v[0:1], v[0:1], v[4:5] +; GFX1170-NEXT: v_max_num_f64 v[2:3], v[2:3], v[6:7] +; GFX1170-NEXT: s_setpc_b64 s[30:31] +; ; GFX12-LABEL: v_maximumnum_v2f64_nnan: ; GFX12: ; %bb.0: ; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 @@ -7790,6 +8815,36 @@ define <3 x double> @v_maximumnum_v3f64(<3 x double> %x, <3 x double> %y) { ; GFX11-GISEL-NEXT: v_max_f64 v[4:5], v[4:5], v[10:11] ; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31] ; +; GFX1170-SDAG-LABEL: v_maximumnum_v3f64: +; GFX1170-SDAG: ; %bb.0: +; GFX1170-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1170-SDAG-NEXT: v_max_num_f64 v[6:7], v[6:7], v[6:7] +; GFX1170-SDAG-NEXT: v_max_num_f64 v[0:1], v[0:1], v[0:1] +; GFX1170-SDAG-NEXT: v_max_num_f64 v[8:9], v[8:9], v[8:9] +; GFX1170-SDAG-NEXT: v_max_num_f64 v[2:3], v[2:3], v[2:3] +; GFX1170-SDAG-NEXT: v_max_num_f64 v[10:11], v[10:11], v[10:11] +; GFX1170-SDAG-NEXT: v_max_num_f64 v[4:5], v[4:5], v[4:5] +; GFX1170-SDAG-NEXT: v_max_num_f64 v[0:1], v[0:1], v[6:7] +; GFX1170-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_3) +; GFX1170-SDAG-NEXT: v_max_num_f64 v[2:3], v[2:3], v[8:9] +; GFX1170-SDAG-NEXT: v_max_num_f64 v[4:5], v[4:5], v[10:11] +; GFX1170-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX1170-GISEL-LABEL: v_maximumnum_v3f64: +; GFX1170-GISEL: ; %bb.0: +; GFX1170-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1170-GISEL-NEXT: v_max_num_f64 v[0:1], v[0:1], v[0:1] +; GFX1170-GISEL-NEXT: v_max_num_f64 v[6:7], v[6:7], v[6:7] +; GFX1170-GISEL-NEXT: v_max_num_f64 v[2:3], v[2:3], v[2:3] +; GFX1170-GISEL-NEXT: v_max_num_f64 v[8:9], v[8:9], v[8:9] +; GFX1170-GISEL-NEXT: v_max_num_f64 v[4:5], v[4:5], v[4:5] +; GFX1170-GISEL-NEXT: v_max_num_f64 v[10:11], v[10:11], v[10:11] +; GFX1170-GISEL-NEXT: v_max_num_f64 v[0:1], v[0:1], v[6:7] +; GFX1170-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_3) +; GFX1170-GISEL-NEXT: v_max_num_f64 v[2:3], v[2:3], v[8:9] +; GFX1170-GISEL-NEXT: v_max_num_f64 v[4:5], v[4:5], v[10:11] +; GFX1170-GISEL-NEXT: s_setpc_b64 s[30:31] +; ; GFX12-SDAG-LABEL: v_maximumnum_v3f64: ; GFX12-SDAG: ; %bb.0: ; GFX12-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0 @@ -7872,6 +8927,14 @@ define <3 x double> @v_maximumnum_v3f64_nnan(<3 x double> %x, <3 x double> %y) { ; GFX11-NEXT: v_max_f64 v[4:5], v[4:5], v[10:11] ; GFX11-NEXT: s_setpc_b64 s[30:31] ; +; GFX1170-LABEL: v_maximumnum_v3f64_nnan: +; GFX1170: ; %bb.0: +; GFX1170-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1170-NEXT: v_max_num_f64 v[0:1], v[0:1], v[6:7] +; GFX1170-NEXT: v_max_num_f64 v[2:3], v[2:3], v[8:9] +; GFX1170-NEXT: v_max_num_f64 v[4:5], v[4:5], v[10:11] +; GFX1170-NEXT: s_setpc_b64 s[30:31] +; ; GFX12-LABEL: v_maximumnum_v3f64_nnan: ; GFX12: ; %bb.0: ; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 @@ -8094,6 +9157,42 @@ define <4 x double> @v_maximumnum_v4f64(<4 x double> %x, <4 x double> %y) { ; GFX11-GISEL-NEXT: v_max_f64 v[6:7], v[6:7], v[14:15] ; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31] ; +; GFX1170-SDAG-LABEL: v_maximumnum_v4f64: +; GFX1170-SDAG: ; %bb.0: +; GFX1170-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1170-SDAG-NEXT: v_max_num_f64 v[8:9], v[8:9], v[8:9] +; GFX1170-SDAG-NEXT: v_max_num_f64 v[0:1], v[0:1], v[0:1] +; GFX1170-SDAG-NEXT: v_max_num_f64 v[10:11], v[10:11], v[10:11] +; GFX1170-SDAG-NEXT: v_max_num_f64 v[2:3], v[2:3], v[2:3] +; GFX1170-SDAG-NEXT: v_max_num_f64 v[12:13], v[12:13], v[12:13] +; GFX1170-SDAG-NEXT: v_max_num_f64 v[4:5], v[4:5], v[4:5] +; GFX1170-SDAG-NEXT: v_max_num_f64 v[14:15], v[14:15], v[14:15] +; GFX1170-SDAG-NEXT: v_max_num_f64 v[6:7], v[6:7], v[6:7] +; GFX1170-SDAG-NEXT: v_max_num_f64 v[0:1], v[0:1], v[8:9] +; GFX1170-SDAG-NEXT: v_max_num_f64 v[2:3], v[2:3], v[10:11] +; GFX1170-SDAG-NEXT: v_max_num_f64 v[4:5], v[4:5], v[12:13] +; GFX1170-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_4) +; GFX1170-SDAG-NEXT: v_max_num_f64 v[6:7], v[6:7], v[14:15] +; GFX1170-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX1170-GISEL-LABEL: v_maximumnum_v4f64: +; GFX1170-GISEL: ; %bb.0: +; GFX1170-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1170-GISEL-NEXT: v_max_num_f64 v[0:1], v[0:1], v[0:1] +; GFX1170-GISEL-NEXT: v_max_num_f64 v[8:9], v[8:9], v[8:9] +; GFX1170-GISEL-NEXT: v_max_num_f64 v[2:3], v[2:3], v[2:3] +; GFX1170-GISEL-NEXT: v_max_num_f64 v[10:11], v[10:11], v[10:11] +; GFX1170-GISEL-NEXT: v_max_num_f64 v[4:5], v[4:5], v[4:5] +; GFX1170-GISEL-NEXT: v_max_num_f64 v[12:13], v[12:13], v[12:13] +; GFX1170-GISEL-NEXT: v_max_num_f64 v[6:7], v[6:7], v[6:7] +; GFX1170-GISEL-NEXT: v_max_num_f64 v[14:15], v[14:15], v[14:15] +; GFX1170-GISEL-NEXT: v_max_num_f64 v[0:1], v[0:1], v[8:9] +; GFX1170-GISEL-NEXT: v_max_num_f64 v[2:3], v[2:3], v[10:11] +; GFX1170-GISEL-NEXT: v_max_num_f64 v[4:5], v[4:5], v[12:13] +; GFX1170-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_4) +; GFX1170-GISEL-NEXT: v_max_num_f64 v[6:7], v[6:7], v[14:15] +; GFX1170-GISEL-NEXT: s_setpc_b64 s[30:31] +; ; GFX12-SDAG-LABEL: v_maximumnum_v4f64: ; GFX12-SDAG: ; %bb.0: ; GFX12-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0 @@ -8187,6 +9286,15 @@ define <4 x double> @v_maximumnum_v4f64_nnan(<4 x double> %x, <4 x double> %y) { ; GFX11-NEXT: v_max_f64 v[6:7], v[6:7], v[14:15] ; GFX11-NEXT: s_setpc_b64 s[30:31] ; +; GFX1170-LABEL: v_maximumnum_v4f64_nnan: +; GFX1170: ; %bb.0: +; GFX1170-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1170-NEXT: v_max_num_f64 v[0:1], v[0:1], v[8:9] +; GFX1170-NEXT: v_max_num_f64 v[2:3], v[2:3], v[10:11] +; GFX1170-NEXT: v_max_num_f64 v[4:5], v[4:5], v[12:13] +; GFX1170-NEXT: v_max_num_f64 v[6:7], v[6:7], v[14:15] +; GFX1170-NEXT: s_setpc_b64 s[30:31] +; ; GFX12-LABEL: v_maximumnum_v4f64_nnan: ; GFX12: ; %bb.0: ; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 @@ -8252,6 +9360,42 @@ define half @v_maximumnum_f16_no_ieee(half %x, half %y) #0 { ; GFX11-FAKE16-NEXT: v_max_f16_e32 v0, v0, v1 ; GFX11-FAKE16-NEXT: s_setpc_b64 s[30:31] ; +; GFX1170-TRUE16-SDAG-LABEL: v_maximumnum_f16_no_ieee: +; GFX1170-TRUE16-SDAG: ; %bb.0: +; GFX1170-TRUE16-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1170-TRUE16-SDAG-NEXT: v_max_num_f16_e32 v0.h, v1.l, v1.l +; GFX1170-TRUE16-SDAG-NEXT: v_max_num_f16_e32 v0.l, v0.l, v0.l +; GFX1170-TRUE16-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX1170-TRUE16-SDAG-NEXT: v_max_num_f16_e32 v0.l, v0.l, v0.h +; GFX1170-TRUE16-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX1170-TRUE16-GISEL-LABEL: v_maximumnum_f16_no_ieee: +; GFX1170-TRUE16-GISEL: ; %bb.0: +; GFX1170-TRUE16-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1170-TRUE16-GISEL-NEXT: v_max_num_f16_e32 v0.l, v0.l, v0.l +; GFX1170-TRUE16-GISEL-NEXT: v_max_num_f16_e32 v0.h, v1.l, v1.l +; GFX1170-TRUE16-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX1170-TRUE16-GISEL-NEXT: v_max_num_f16_e32 v0.l, v0.l, v0.h +; GFX1170-TRUE16-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX1170-FAKE16-SDAG-LABEL: v_maximumnum_f16_no_ieee: +; GFX1170-FAKE16-SDAG: ; %bb.0: +; GFX1170-FAKE16-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1170-FAKE16-SDAG-NEXT: v_max_num_f16_e32 v1, v1, v1 +; GFX1170-FAKE16-SDAG-NEXT: v_max_num_f16_e32 v0, v0, v0 +; GFX1170-FAKE16-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX1170-FAKE16-SDAG-NEXT: v_max_num_f16_e32 v0, v0, v1 +; GFX1170-FAKE16-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX1170-FAKE16-GISEL-LABEL: v_maximumnum_f16_no_ieee: +; GFX1170-FAKE16-GISEL: ; %bb.0: +; GFX1170-FAKE16-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1170-FAKE16-GISEL-NEXT: v_max_num_f16_e32 v0, v0, v0 +; GFX1170-FAKE16-GISEL-NEXT: v_max_num_f16_e32 v1, v1, v1 +; GFX1170-FAKE16-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX1170-FAKE16-GISEL-NEXT: v_max_num_f16_e32 v0, v0, v1 +; GFX1170-FAKE16-GISEL-NEXT: s_setpc_b64 s[30:31] +; ; GFX12-TRUE16-SDAG-LABEL: v_maximumnum_f16_no_ieee: ; GFX12-TRUE16-SDAG: ; %bb.0: ; GFX12-TRUE16-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0 @@ -8356,6 +9500,18 @@ define half @v_maximumnum_f16_nan_no_ieee(half %x, half %y) #0 { ; GFX11-FAKE16-NEXT: v_max_f16_e32 v0, v0, v1 ; GFX11-FAKE16-NEXT: s_setpc_b64 s[30:31] ; +; GFX1170-TRUE16-LABEL: v_maximumnum_f16_nan_no_ieee: +; GFX1170-TRUE16: ; %bb.0: +; GFX1170-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1170-TRUE16-NEXT: v_max_num_f16_e32 v0.l, v0.l, v1.l +; GFX1170-TRUE16-NEXT: s_setpc_b64 s[30:31] +; +; GFX1170-FAKE16-LABEL: v_maximumnum_f16_nan_no_ieee: +; GFX1170-FAKE16: ; %bb.0: +; GFX1170-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1170-FAKE16-NEXT: v_max_num_f16_e32 v0, v0, v1 +; GFX1170-FAKE16-NEXT: s_setpc_b64 s[30:31] +; ; GFX12-TRUE16-LABEL: v_maximumnum_f16_nan_no_ieee: ; GFX12-TRUE16: ; %bb.0: ; GFX12-TRUE16-NEXT: s_wait_loadcnt_dscnt 0x0 @@ -8410,6 +9566,22 @@ define float @v_maximumnum_f32_no_ieee(float %x, float %y) #0 { ; GFX11-NEXT: v_max_f32_e32 v0, v0, v1 ; GFX11-NEXT: s_setpc_b64 s[30:31] ; +; GFX1170-SDAG-LABEL: v_maximumnum_f32_no_ieee: +; GFX1170-SDAG: ; %bb.0: +; GFX1170-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1170-SDAG-NEXT: v_dual_max_num_f32 v1, v1, v1 :: v_dual_max_num_f32 v0, v0, v0 +; GFX1170-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX1170-SDAG-NEXT: v_max_num_f32_e32 v0, v0, v1 +; GFX1170-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX1170-GISEL-LABEL: v_maximumnum_f32_no_ieee: +; GFX1170-GISEL: ; %bb.0: +; GFX1170-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1170-GISEL-NEXT: v_dual_max_num_f32 v0, v0, v0 :: v_dual_max_num_f32 v1, v1, v1 +; GFX1170-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX1170-GISEL-NEXT: v_max_num_f32_e32 v0, v0, v1 +; GFX1170-GISEL-NEXT: s_setpc_b64 s[30:31] +; ; GFX12-SDAG-LABEL: v_maximumnum_f32_no_ieee: ; GFX12-SDAG: ; %bb.0: ; GFX12-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0 @@ -8468,6 +9640,12 @@ define float @v_maximumnum_f32_nnan_no_ieee(float %x, float %y) #0 { ; GFX11-NEXT: v_max_f32_e32 v0, v0, v1 ; GFX11-NEXT: s_setpc_b64 s[30:31] ; +; GFX1170-LABEL: v_maximumnum_f32_nnan_no_ieee: +; GFX1170: ; %bb.0: +; GFX1170-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1170-NEXT: v_max_num_f32_e32 v0, v0, v1 +; GFX1170-NEXT: s_setpc_b64 s[30:31] +; ; GFX12-LABEL: v_maximumnum_f32_nnan_no_ieee: ; GFX12: ; %bb.0: ; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 @@ -8512,6 +9690,24 @@ define double @v_maximumnum_f64_no_ieee(double %x, double %y) #0 { ; GFX11-NEXT: v_max_f64 v[0:1], v[0:1], v[2:3] ; GFX11-NEXT: s_setpc_b64 s[30:31] ; +; GFX1170-SDAG-LABEL: v_maximumnum_f64_no_ieee: +; GFX1170-SDAG: ; %bb.0: +; GFX1170-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1170-SDAG-NEXT: v_max_num_f64 v[2:3], v[2:3], v[2:3] +; GFX1170-SDAG-NEXT: v_max_num_f64 v[0:1], v[0:1], v[0:1] +; GFX1170-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX1170-SDAG-NEXT: v_max_num_f64 v[0:1], v[0:1], v[2:3] +; GFX1170-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX1170-GISEL-LABEL: v_maximumnum_f64_no_ieee: +; GFX1170-GISEL: ; %bb.0: +; GFX1170-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1170-GISEL-NEXT: v_max_num_f64 v[0:1], v[0:1], v[0:1] +; GFX1170-GISEL-NEXT: v_max_num_f64 v[2:3], v[2:3], v[2:3] +; GFX1170-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX1170-GISEL-NEXT: v_max_num_f64 v[0:1], v[0:1], v[2:3] +; GFX1170-GISEL-NEXT: s_setpc_b64 s[30:31] +; ; GFX12-SDAG-LABEL: v_maximumnum_f64_no_ieee: ; GFX12-SDAG: ; %bb.0: ; GFX12-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0 @@ -8572,6 +9768,12 @@ define double @v_maximumnum_f64_nnan_no_ieee(double %x, double %y) #0 { ; GFX11-NEXT: v_max_f64 v[0:1], v[0:1], v[2:3] ; GFX11-NEXT: s_setpc_b64 s[30:31] ; +; GFX1170-LABEL: v_maximumnum_f64_nnan_no_ieee: +; GFX1170: ; %bb.0: +; GFX1170-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1170-NEXT: v_max_num_f64 v[0:1], v[0:1], v[2:3] +; GFX1170-NEXT: s_setpc_b64 s[30:31] +; ; GFX12-LABEL: v_maximumnum_f64_nnan_no_ieee: ; GFX12: ; %bb.0: ; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 @@ -8654,6 +9856,24 @@ define <2 x half> @v_maximumnum_v2f16_no_ieee(<2 x half> %x, <2 x half> %y) #0 { ; GFX11-NEXT: v_pk_max_f16 v0, v0, v1 ; GFX11-NEXT: s_setpc_b64 s[30:31] ; +; GFX1170-SDAG-LABEL: v_maximumnum_v2f16_no_ieee: +; GFX1170-SDAG: ; %bb.0: +; GFX1170-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1170-SDAG-NEXT: v_pk_max_num_f16 v1, v1, v1 +; GFX1170-SDAG-NEXT: v_pk_max_num_f16 v0, v0, v0 +; GFX1170-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX1170-SDAG-NEXT: v_pk_max_num_f16 v0, v0, v1 +; GFX1170-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX1170-GISEL-LABEL: v_maximumnum_v2f16_no_ieee: +; GFX1170-GISEL: ; %bb.0: +; GFX1170-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1170-GISEL-NEXT: v_pk_max_num_f16 v0, v0, v0 +; GFX1170-GISEL-NEXT: v_pk_max_num_f16 v1, v1, v1 +; GFX1170-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX1170-GISEL-NEXT: v_pk_max_num_f16 v0, v0, v1 +; GFX1170-GISEL-NEXT: s_setpc_b64 s[30:31] +; ; GFX12-SDAG-LABEL: v_maximumnum_v2f16_no_ieee: ; GFX12-SDAG: ; %bb.0: ; GFX12-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0 @@ -8752,6 +9972,12 @@ define <2 x half> @v_maximumnum_v2f16_nnan_no_ieee(<2 x half> %x, <2 x half> %y) ; GFX11-NEXT: v_pk_max_f16 v0, v0, v1 ; GFX11-NEXT: s_setpc_b64 s[30:31] ; +; GFX1170-LABEL: v_maximumnum_v2f16_nnan_no_ieee: +; GFX1170: ; %bb.0: +; GFX1170-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1170-NEXT: v_pk_max_num_f16 v0, v0, v1 +; GFX1170-NEXT: s_setpc_b64 s[30:31] +; ; GFX12-LABEL: v_maximumnum_v2f16_nnan_no_ieee: ; GFX12: ; %bb.0: ; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 @@ -8854,6 +10080,13 @@ define <3 x half> @v_maximumnum_v3f16_nnan_no_ieee(<3 x half> %x, <3 x half> %y) ; GFX11-NEXT: v_pk_max_f16 v1, v1, v3 ; GFX11-NEXT: s_setpc_b64 s[30:31] ; +; GFX1170-LABEL: v_maximumnum_v3f16_nnan_no_ieee: +; GFX1170: ; %bb.0: +; GFX1170-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1170-NEXT: v_pk_max_num_f16 v0, v0, v2 +; GFX1170-NEXT: v_pk_max_num_f16 v1, v1, v3 +; GFX1170-NEXT: s_setpc_b64 s[30:31] +; ; GFX12-LABEL: v_maximumnum_v3f16_nnan_no_ieee: ; GFX12: ; %bb.0: ; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 @@ -8970,6 +10203,13 @@ define <4 x half> @v_maximumnum_v4f16_nnan_no_ieee(<4 x half> %x, <4 x half> %y) ; GFX11-NEXT: v_pk_max_f16 v1, v1, v3 ; GFX11-NEXT: s_setpc_b64 s[30:31] ; +; GFX1170-LABEL: v_maximumnum_v4f16_nnan_no_ieee: +; GFX1170: ; %bb.0: +; GFX1170-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1170-NEXT: v_pk_max_num_f16 v0, v0, v2 +; GFX1170-NEXT: v_pk_max_num_f16 v1, v1, v3 +; GFX1170-NEXT: s_setpc_b64 s[30:31] +; ; GFX12-LABEL: v_maximumnum_v4f16_nnan_no_ieee: ; GFX12: ; %bb.0: ; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 diff --git a/llvm/test/CodeGen/AMDGPU/minimummaximum.ll b/llvm/test/CodeGen/AMDGPU/minimummaximum.ll index c246b9d97e75..8489193b1f58 100644 --- a/llvm/test/CodeGen/AMDGPU/minimummaximum.ll +++ b/llvm/test/CodeGen/AMDGPU/minimummaximum.ll @@ -1,42 +1,67 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=amdgcn -mcpu=gfx1200 -mattr=+real-true16 < %s | FileCheck -check-prefixes=GFX12,SDAG,SDAG-TRUE16 %s -; RUN: llc -mtriple=amdgcn -mcpu=gfx1200 -mattr=-real-true16 < %s | FileCheck -check-prefixes=GFX12,SDAG,SDAG-FAKE16 %s -; RUN: llc -global-isel -new-reg-bank-select -mtriple=amdgcn -mcpu=gfx1200 -mattr=+real-true16 < %s | FileCheck -check-prefixes=GFX12,GISEL,GISEL-TRUE16 %s -; RUN: llc -global-isel -new-reg-bank-select -mtriple=amdgcn -mcpu=gfx1200 -mattr=-real-true16 < %s | FileCheck -check-prefixes=GFX12,GISEL,GISEL-FAKE16 %s +; RUN: llc -mtriple=amdgcn -mcpu=gfx1170 -mattr=+real-true16 < %s | FileCheck -check-prefixes=GCN,TRUE16,GFX1170-SDAG,GFX1170-SDAG-TRUE16 %s +; RUN: llc -mtriple=amdgcn -mcpu=gfx1170 -mattr=-real-true16 < %s | FileCheck -check-prefixes=GCN,FAKE16,GFX1170-SDAG,GFX1170-SDAG-FAKE16 %s +; RUN: llc -global-isel=1 -new-reg-bank-select -mtriple=amdgcn -mcpu=gfx1170 -mattr=+real-true16 < %s | FileCheck -check-prefixes=GCN,TRUE16,GFX1170-GISEL,GFX1170-GISEL-TRUE16 %s +; RUN: llc -global-isel=1 -new-reg-bank-select -mtriple=amdgcn -mcpu=gfx1170 -mattr=-real-true16 < %s | FileCheck -check-prefixes=GCN,FAKE16,GFX1170-GISEL,GFX1170-GISEL-FAKE16 %s +; RUN: llc -mtriple=amdgcn -mcpu=gfx1200 -mattr=+real-true16 < %s | FileCheck -check-prefixes=GCN,GFX12-SDAG,GFX12-SDAG-TRUE16 %s +; RUN: llc -mtriple=amdgcn -mcpu=gfx1200 -mattr=-real-true16 < %s | FileCheck -check-prefixes=GCN,GFX12-SDAG,GFX12-SDAG-FAKE16 %s +; RUN: llc -global-isel -new-reg-bank-select -mtriple=amdgcn -mcpu=gfx1200 -mattr=+real-true16 < %s | FileCheck -check-prefixes=GCN,GFX12-GISEL,GFX12-GISEL-TRUE16 %s +; RUN: llc -global-isel -new-reg-bank-select -mtriple=amdgcn -mcpu=gfx1200 -mattr=-real-true16 < %s | FileCheck -check-prefixes=GCN,GFX12-GISEL,GFX12-GISEL-FAKE16 %s define amdgpu_ps float @test_minmax_f32(float %a, float %b, float %c) { -; GFX12-LABEL: test_minmax_f32: -; GFX12: ; %bb.0: -; GFX12-NEXT: v_maximumminimum_f32 v0, v0, v1, v2 -; GFX12-NEXT: ; return to shader part epilog +; GCN-LABEL: test_minmax_f32: +; GCN: ; %bb.0: +; GCN-NEXT: v_maximumminimum_f32 v0, v0, v1, v2 +; GCN-NEXT: ; return to shader part epilog %max = call float @llvm.maximum.f32(float %a, float %b) %minmax = call float @llvm.minimum.f32(float %max, float %c) ret float %minmax } define amdgpu_ps void @s_test_minmax_f32(float inreg %a, float inreg %b, float inreg %c, ptr addrspace(1) inreg %out) { -; SDAG-LABEL: s_test_minmax_f32: -; SDAG: ; %bb.0: -; SDAG-NEXT: s_maximum_f32 s0, s0, s1 -; SDAG-NEXT: s_mov_b32 s5, s4 -; SDAG-NEXT: s_mov_b32 s4, s3 -; SDAG-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_3) -; SDAG-NEXT: s_minimum_f32 s0, s0, s2 -; SDAG-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, s0 -; SDAG-NEXT: global_store_b32 v0, v1, s[4:5] -; SDAG-NEXT: s_endpgm +; GFX1170-SDAG-LABEL: s_test_minmax_f32: +; GFX1170-SDAG: ; %bb.0: +; GFX1170-SDAG-NEXT: v_maximum_f32 v0, s0, s1 +; GFX1170-SDAG-NEXT: v_mov_b32_e32 v1, 0 +; GFX1170-SDAG-NEXT: s_mov_b32 s5, s4 +; GFX1170-SDAG-NEXT: s_mov_b32 s4, s3 +; GFX1170-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_2) +; GFX1170-SDAG-NEXT: v_minimum_f32 v0, v0, s2 +; GFX1170-SDAG-NEXT: global_store_b32 v1, v0, s[4:5] +; GFX1170-SDAG-NEXT: s_endpgm ; -; GISEL-LABEL: s_test_minmax_f32: -; GISEL: ; %bb.0: -; GISEL-NEXT: s_maximum_f32 s0, s0, s1 -; GISEL-NEXT: s_mov_b32 s6, s3 -; GISEL-NEXT: s_mov_b32 s7, s4 -; GISEL-NEXT: v_mov_b32_e32 v1, 0 -; GISEL-NEXT: s_minimum_f32 s0, s0, s2 -; GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_3) -; GISEL-NEXT: v_mov_b32_e32 v0, s0 -; GISEL-NEXT: global_store_b32 v1, v0, s[6:7] -; GISEL-NEXT: s_endpgm +; GFX1170-GISEL-LABEL: s_test_minmax_f32: +; GFX1170-GISEL: ; %bb.0: +; GFX1170-GISEL-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, 0 +; GFX1170-GISEL-NEXT: s_mov_b32 s6, s3 +; GFX1170-GISEL-NEXT: s_mov_b32 s7, s4 +; GFX1170-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX1170-GISEL-NEXT: v_maximumminimum_f32 v0, s0, s1, v0 +; GFX1170-GISEL-NEXT: global_store_b32 v1, v0, s[6:7] +; GFX1170-GISEL-NEXT: s_endpgm +; +; GFX12-SDAG-LABEL: s_test_minmax_f32: +; GFX12-SDAG: ; %bb.0: +; GFX12-SDAG-NEXT: s_maximum_f32 s0, s0, s1 +; GFX12-SDAG-NEXT: s_mov_b32 s5, s4 +; GFX12-SDAG-NEXT: s_mov_b32 s4, s3 +; GFX12-SDAG-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_3) +; GFX12-SDAG-NEXT: s_minimum_f32 s0, s0, s2 +; GFX12-SDAG-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, s0 +; GFX12-SDAG-NEXT: global_store_b32 v0, v1, s[4:5] +; GFX12-SDAG-NEXT: s_endpgm +; +; GFX12-GISEL-LABEL: s_test_minmax_f32: +; GFX12-GISEL: ; %bb.0: +; GFX12-GISEL-NEXT: s_maximum_f32 s0, s0, s1 +; GFX12-GISEL-NEXT: s_mov_b32 s6, s3 +; GFX12-GISEL-NEXT: s_mov_b32 s7, s4 +; GFX12-GISEL-NEXT: v_mov_b32_e32 v1, 0 +; GFX12-GISEL-NEXT: s_minimum_f32 s0, s0, s2 +; GFX12-GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_3) +; GFX12-GISEL-NEXT: v_mov_b32_e32 v0, s0 +; GFX12-GISEL-NEXT: global_store_b32 v1, v0, s[6:7] +; GFX12-GISEL-NEXT: s_endpgm %smax = call float @llvm.maximum.f32(float %a, float %b) %sminmax = call float @llvm.minimum.f32(float %smax, float %c) store float %sminmax, ptr addrspace(1) %out @@ -44,157 +69,230 @@ define amdgpu_ps void @s_test_minmax_f32(float inreg %a, float inreg %b, float i } define amdgpu_ps float @test_minmax_commuted_f32(float %a, float %b, float %c) { -; GFX12-LABEL: test_minmax_commuted_f32: -; GFX12: ; %bb.0: -; GFX12-NEXT: v_maximumminimum_f32 v0, v0, v1, v2 -; GFX12-NEXT: ; return to shader part epilog +; GCN-LABEL: test_minmax_commuted_f32: +; GCN: ; %bb.0: +; GCN-NEXT: v_maximumminimum_f32 v0, v0, v1, v2 +; GCN-NEXT: ; return to shader part epilog %max = call float @llvm.maximum.f32(float %a, float %b) %minmax = call float @llvm.minimum.f32(float %c, float %max) ret float %minmax } define amdgpu_ps float @test_maxmin_f32(float %a, float %b, float %c) { -; GFX12-LABEL: test_maxmin_f32: -; GFX12: ; %bb.0: -; GFX12-NEXT: v_minimummaximum_f32 v0, v0, v1, v2 -; GFX12-NEXT: ; return to shader part epilog +; GCN-LABEL: test_maxmin_f32: +; GCN: ; %bb.0: +; GCN-NEXT: v_minimummaximum_f32 v0, v0, v1, v2 +; GCN-NEXT: ; return to shader part epilog %min = call float @llvm.minimum.f32(float %a, float %b) %maxmin = call float @llvm.maximum.f32(float %min, float %c) ret float %maxmin } define amdgpu_ps float @test_maxmin_commuted_f32(float %a, float %b, float %c) { -; GFX12-LABEL: test_maxmin_commuted_f32: -; GFX12: ; %bb.0: -; GFX12-NEXT: v_minimummaximum_f32 v0, v0, v1, v2 -; GFX12-NEXT: ; return to shader part epilog +; GCN-LABEL: test_maxmin_commuted_f32: +; GCN: ; %bb.0: +; GCN-NEXT: v_minimummaximum_f32 v0, v0, v1, v2 +; GCN-NEXT: ; return to shader part epilog %min = call float @llvm.minimum.f32(float %a, float %b) %maxmin = call float @llvm.maximum.f32(float %c, float %min) ret float %maxmin } define amdgpu_ps half @test_minmax_f16(half %a, half %b, half %c) { -; SDAG-TRUE16-LABEL: test_minmax_f16: -; SDAG-TRUE16: ; %bb.0: -; SDAG-TRUE16-NEXT: v_maximumminimum_f16 v0.l, v0.l, v1.l, v2.l -; SDAG-TRUE16-NEXT: ; return to shader part epilog +; TRUE16-LABEL: test_minmax_f16: +; TRUE16: ; %bb.0: +; TRUE16-NEXT: v_maximumminimum_f16 v0.l, v0.l, v1.l, v2.l +; TRUE16-NEXT: ; return to shader part epilog ; -; SDAG-FAKE16-LABEL: test_minmax_f16: -; SDAG-FAKE16: ; %bb.0: -; SDAG-FAKE16-NEXT: v_maximumminimum_f16 v0, v0, v1, v2 -; SDAG-FAKE16-NEXT: ; return to shader part epilog +; FAKE16-LABEL: test_minmax_f16: +; FAKE16: ; %bb.0: +; FAKE16-NEXT: v_maximumminimum_f16 v0, v0, v1, v2 +; FAKE16-NEXT: ; return to shader part epilog ; -; GISEL-TRUE16-LABEL: test_minmax_f16: -; GISEL-TRUE16: ; %bb.0: -; GISEL-TRUE16-NEXT: v_maximumminimum_f16 v0.l, v0.l, v1.l, v2.l -; GISEL-TRUE16-NEXT: ; return to shader part epilog +; GFX12-SDAG-TRUE16-LABEL: test_minmax_f16: +; GFX12-SDAG-TRUE16: ; %bb.0: +; GFX12-SDAG-TRUE16-NEXT: v_maximumminimum_f16 v0.l, v0.l, v1.l, v2.l +; GFX12-SDAG-TRUE16-NEXT: ; return to shader part epilog ; -; GISEL-FAKE16-LABEL: test_minmax_f16: -; GISEL-FAKE16: ; %bb.0: -; GISEL-FAKE16-NEXT: v_maximumminimum_f16 v0, v0, v1, v2 -; GISEL-FAKE16-NEXT: ; return to shader part epilog +; GFX12-SDAG-FAKE16-LABEL: test_minmax_f16: +; GFX12-SDAG-FAKE16: ; %bb.0: +; GFX12-SDAG-FAKE16-NEXT: v_maximumminimum_f16 v0, v0, v1, v2 +; GFX12-SDAG-FAKE16-NEXT: ; return to shader part epilog +; +; GFX12-GISEL-TRUE16-LABEL: test_minmax_f16: +; GFX12-GISEL-TRUE16: ; %bb.0: +; GFX12-GISEL-TRUE16-NEXT: v_maximumminimum_f16 v0.l, v0.l, v1.l, v2.l +; GFX12-GISEL-TRUE16-NEXT: ; return to shader part epilog +; +; GFX12-GISEL-FAKE16-LABEL: test_minmax_f16: +; GFX12-GISEL-FAKE16: ; %bb.0: +; GFX12-GISEL-FAKE16-NEXT: v_maximumminimum_f16 v0, v0, v1, v2 +; GFX12-GISEL-FAKE16-NEXT: ; return to shader part epilog %max = call half @llvm.maximum.f16(half %a, half %b) %minmax = call half @llvm.minimum.f16(half %max, half %c) ret half %minmax } define amdgpu_ps half @test_minmax_commuted_f16(half %a, half %b, half %c) { -; SDAG-TRUE16-LABEL: test_minmax_commuted_f16: -; SDAG-TRUE16: ; %bb.0: -; SDAG-TRUE16-NEXT: v_maximumminimum_f16 v0.l, v0.l, v1.l, v2.l -; SDAG-TRUE16-NEXT: ; return to shader part epilog +; TRUE16-LABEL: test_minmax_commuted_f16: +; TRUE16: ; %bb.0: +; TRUE16-NEXT: v_maximumminimum_f16 v0.l, v0.l, v1.l, v2.l +; TRUE16-NEXT: ; return to shader part epilog ; -; SDAG-FAKE16-LABEL: test_minmax_commuted_f16: -; SDAG-FAKE16: ; %bb.0: -; SDAG-FAKE16-NEXT: v_maximumminimum_f16 v0, v0, v1, v2 -; SDAG-FAKE16-NEXT: ; return to shader part epilog +; FAKE16-LABEL: test_minmax_commuted_f16: +; FAKE16: ; %bb.0: +; FAKE16-NEXT: v_maximumminimum_f16 v0, v0, v1, v2 +; FAKE16-NEXT: ; return to shader part epilog ; -; GISEL-TRUE16-LABEL: test_minmax_commuted_f16: -; GISEL-TRUE16: ; %bb.0: -; GISEL-TRUE16-NEXT: v_maximumminimum_f16 v0.l, v0.l, v1.l, v2.l -; GISEL-TRUE16-NEXT: ; return to shader part epilog +; GFX12-SDAG-TRUE16-LABEL: test_minmax_commuted_f16: +; GFX12-SDAG-TRUE16: ; %bb.0: +; GFX12-SDAG-TRUE16-NEXT: v_maximumminimum_f16 v0.l, v0.l, v1.l, v2.l +; GFX12-SDAG-TRUE16-NEXT: ; return to shader part epilog ; -; GISEL-FAKE16-LABEL: test_minmax_commuted_f16: -; GISEL-FAKE16: ; %bb.0: -; GISEL-FAKE16-NEXT: v_maximumminimum_f16 v0, v0, v1, v2 -; GISEL-FAKE16-NEXT: ; return to shader part epilog +; GFX12-SDAG-FAKE16-LABEL: test_minmax_commuted_f16: +; GFX12-SDAG-FAKE16: ; %bb.0: +; GFX12-SDAG-FAKE16-NEXT: v_maximumminimum_f16 v0, v0, v1, v2 +; GFX12-SDAG-FAKE16-NEXT: ; return to shader part epilog +; +; GFX12-GISEL-TRUE16-LABEL: test_minmax_commuted_f16: +; GFX12-GISEL-TRUE16: ; %bb.0: +; GFX12-GISEL-TRUE16-NEXT: v_maximumminimum_f16 v0.l, v0.l, v1.l, v2.l +; GFX12-GISEL-TRUE16-NEXT: ; return to shader part epilog +; +; GFX12-GISEL-FAKE16-LABEL: test_minmax_commuted_f16: +; GFX12-GISEL-FAKE16: ; %bb.0: +; GFX12-GISEL-FAKE16-NEXT: v_maximumminimum_f16 v0, v0, v1, v2 +; GFX12-GISEL-FAKE16-NEXT: ; return to shader part epilog %max = call half @llvm.maximum.f16(half %a, half %b) %minmax = call half @llvm.minimum.f16(half %c, half %max) ret half %minmax } define amdgpu_ps half @test_maxmin_commuted_f16(half %a, half %b, half %c) { -; SDAG-TRUE16-LABEL: test_maxmin_commuted_f16: -; SDAG-TRUE16: ; %bb.0: -; SDAG-TRUE16-NEXT: v_minimummaximum_f16 v0.l, v0.l, v1.l, v2.l -; SDAG-TRUE16-NEXT: ; return to shader part epilog +; TRUE16-LABEL: test_maxmin_commuted_f16: +; TRUE16: ; %bb.0: +; TRUE16-NEXT: v_minimummaximum_f16 v0.l, v0.l, v1.l, v2.l +; TRUE16-NEXT: ; return to shader part epilog ; -; SDAG-FAKE16-LABEL: test_maxmin_commuted_f16: -; SDAG-FAKE16: ; %bb.0: -; SDAG-FAKE16-NEXT: v_minimummaximum_f16 v0, v0, v1, v2 -; SDAG-FAKE16-NEXT: ; return to shader part epilog +; FAKE16-LABEL: test_maxmin_commuted_f16: +; FAKE16: ; %bb.0: +; FAKE16-NEXT: v_minimummaximum_f16 v0, v0, v1, v2 +; FAKE16-NEXT: ; return to shader part epilog ; -; GISEL-TRUE16-LABEL: test_maxmin_commuted_f16: -; GISEL-TRUE16: ; %bb.0: -; GISEL-TRUE16-NEXT: v_minimummaximum_f16 v0.l, v0.l, v1.l, v2.l -; GISEL-TRUE16-NEXT: ; return to shader part epilog +; GFX12-SDAG-TRUE16-LABEL: test_maxmin_commuted_f16: +; GFX12-SDAG-TRUE16: ; %bb.0: +; GFX12-SDAG-TRUE16-NEXT: v_minimummaximum_f16 v0.l, v0.l, v1.l, v2.l +; GFX12-SDAG-TRUE16-NEXT: ; return to shader part epilog ; -; GISEL-FAKE16-LABEL: test_maxmin_commuted_f16: -; GISEL-FAKE16: ; %bb.0: -; GISEL-FAKE16-NEXT: v_minimummaximum_f16 v0, v0, v1, v2 -; GISEL-FAKE16-NEXT: ; return to shader part epilog +; GFX12-SDAG-FAKE16-LABEL: test_maxmin_commuted_f16: +; GFX12-SDAG-FAKE16: ; %bb.0: +; GFX12-SDAG-FAKE16-NEXT: v_minimummaximum_f16 v0, v0, v1, v2 +; GFX12-SDAG-FAKE16-NEXT: ; return to shader part epilog +; +; GFX12-GISEL-TRUE16-LABEL: test_maxmin_commuted_f16: +; GFX12-GISEL-TRUE16: ; %bb.0: +; GFX12-GISEL-TRUE16-NEXT: v_minimummaximum_f16 v0.l, v0.l, v1.l, v2.l +; GFX12-GISEL-TRUE16-NEXT: ; return to shader part epilog +; +; GFX12-GISEL-FAKE16-LABEL: test_maxmin_commuted_f16: +; GFX12-GISEL-FAKE16: ; %bb.0: +; GFX12-GISEL-FAKE16-NEXT: v_minimummaximum_f16 v0, v0, v1, v2 +; GFX12-GISEL-FAKE16-NEXT: ; return to shader part epilog %min = call half @llvm.minimum.f16(half %a, half %b) %maxmin = call half @llvm.maximum.f16(half %c, half %min) ret half %maxmin } define amdgpu_ps void @s_test_minmax_f16(half inreg %a, half inreg %b, half inreg %c, ptr addrspace(1) inreg %out) { -; SDAG-TRUE16-LABEL: s_test_minmax_f16: -; SDAG-TRUE16: ; %bb.0: -; SDAG-TRUE16-NEXT: s_maximum_f16 s0, s0, s1 -; SDAG-TRUE16-NEXT: v_mov_b32_e32 v1, 0 -; SDAG-TRUE16-NEXT: s_mov_b32 s5, s4 -; SDAG-TRUE16-NEXT: s_mov_b32 s4, s3 -; SDAG-TRUE16-NEXT: s_minimum_f16 s0, s0, s2 -; SDAG-TRUE16-NEXT: s_delay_alu instid0(SALU_CYCLE_3) -; SDAG-TRUE16-NEXT: v_mov_b16_e32 v0.l, s0 -; SDAG-TRUE16-NEXT: global_store_b16 v1, v0, s[4:5] -; SDAG-TRUE16-NEXT: s_endpgm +; GFX1170-SDAG-TRUE16-LABEL: s_test_minmax_f16: +; GFX1170-SDAG-TRUE16: ; %bb.0: +; GFX1170-SDAG-TRUE16-NEXT: v_maximum_f16 v0.l, s0, s1 +; GFX1170-SDAG-TRUE16-NEXT: v_mov_b32_e32 v1, 0 +; GFX1170-SDAG-TRUE16-NEXT: s_mov_b32 s5, s4 +; GFX1170-SDAG-TRUE16-NEXT: s_mov_b32 s4, s3 +; GFX1170-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) +; GFX1170-SDAG-TRUE16-NEXT: v_minimum_f16 v0.l, v0.l, s2 +; GFX1170-SDAG-TRUE16-NEXT: global_store_b16 v1, v0, s[4:5] +; GFX1170-SDAG-TRUE16-NEXT: s_endpgm ; -; SDAG-FAKE16-LABEL: s_test_minmax_f16: -; SDAG-FAKE16: ; %bb.0: -; SDAG-FAKE16-NEXT: s_maximum_f16 s0, s0, s1 -; SDAG-FAKE16-NEXT: s_mov_b32 s5, s4 -; SDAG-FAKE16-NEXT: s_mov_b32 s4, s3 -; SDAG-FAKE16-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_3) -; SDAG-FAKE16-NEXT: s_minimum_f16 s0, s0, s2 -; SDAG-FAKE16-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, s0 -; SDAG-FAKE16-NEXT: global_store_b16 v0, v1, s[4:5] -; SDAG-FAKE16-NEXT: s_endpgm +; GFX1170-SDAG-FAKE16-LABEL: s_test_minmax_f16: +; GFX1170-SDAG-FAKE16: ; %bb.0: +; GFX1170-SDAG-FAKE16-NEXT: v_maximum_f16 v0, s0, s1 +; GFX1170-SDAG-FAKE16-NEXT: v_mov_b32_e32 v1, 0 +; GFX1170-SDAG-FAKE16-NEXT: s_mov_b32 s5, s4 +; GFX1170-SDAG-FAKE16-NEXT: s_mov_b32 s4, s3 +; GFX1170-SDAG-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) +; GFX1170-SDAG-FAKE16-NEXT: v_minimum_f16 v0, v0, s2 +; GFX1170-SDAG-FAKE16-NEXT: global_store_b16 v1, v0, s[4:5] +; GFX1170-SDAG-FAKE16-NEXT: s_endpgm ; -; GISEL-TRUE16-LABEL: s_test_minmax_f16: -; GISEL-TRUE16: ; %bb.0: -; GISEL-TRUE16-NEXT: s_maximum_f16 s0, s0, s1 -; GISEL-TRUE16-NEXT: v_mov_b32_e32 v1, 0 -; GISEL-TRUE16-NEXT: s_mov_b32 s6, s3 -; GISEL-TRUE16-NEXT: s_mov_b32 s7, s4 -; GISEL-TRUE16-NEXT: s_minimum_f16 s0, s0, s2 -; GISEL-TRUE16-NEXT: s_delay_alu instid0(SALU_CYCLE_3) -; GISEL-TRUE16-NEXT: v_mov_b16_e32 v0.l, s0 -; GISEL-TRUE16-NEXT: global_store_b16 v1, v0, s[6:7] -; GISEL-TRUE16-NEXT: s_endpgm +; GFX1170-GISEL-TRUE16-LABEL: s_test_minmax_f16: +; GFX1170-GISEL-TRUE16: ; %bb.0: +; GFX1170-GISEL-TRUE16-NEXT: v_mov_b16_e32 v0.l, s2 +; GFX1170-GISEL-TRUE16-NEXT: v_mov_b32_e32 v1, 0 +; GFX1170-GISEL-TRUE16-NEXT: s_mov_b32 s6, s3 +; GFX1170-GISEL-TRUE16-NEXT: s_mov_b32 s7, s4 +; GFX1170-GISEL-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) +; GFX1170-GISEL-TRUE16-NEXT: v_maximumminimum_f16 v0.l, s0, s1, v0.l +; GFX1170-GISEL-TRUE16-NEXT: global_store_b16 v1, v0, s[6:7] +; GFX1170-GISEL-TRUE16-NEXT: s_endpgm ; -; GISEL-FAKE16-LABEL: s_test_minmax_f16: -; GISEL-FAKE16: ; %bb.0: -; GISEL-FAKE16-NEXT: s_maximum_f16 s0, s0, s1 -; GISEL-FAKE16-NEXT: s_mov_b32 s6, s3 -; GISEL-FAKE16-NEXT: s_mov_b32 s7, s4 -; GISEL-FAKE16-NEXT: v_mov_b32_e32 v1, 0 -; GISEL-FAKE16-NEXT: s_minimum_f16 s0, s0, s2 -; GISEL-FAKE16-NEXT: s_delay_alu instid0(SALU_CYCLE_3) -; GISEL-FAKE16-NEXT: v_mov_b32_e32 v0, s0 -; GISEL-FAKE16-NEXT: global_store_b16 v1, v0, s[6:7] -; GISEL-FAKE16-NEXT: s_endpgm +; GFX1170-GISEL-FAKE16-LABEL: s_test_minmax_f16: +; GFX1170-GISEL-FAKE16: ; %bb.0: +; GFX1170-GISEL-FAKE16-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, 0 +; GFX1170-GISEL-FAKE16-NEXT: s_mov_b32 s6, s3 +; GFX1170-GISEL-FAKE16-NEXT: s_mov_b32 s7, s4 +; GFX1170-GISEL-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX1170-GISEL-FAKE16-NEXT: v_maximumminimum_f16 v0, s0, s1, v0 +; GFX1170-GISEL-FAKE16-NEXT: global_store_b16 v1, v0, s[6:7] +; GFX1170-GISEL-FAKE16-NEXT: s_endpgm +; +; GFX12-SDAG-TRUE16-LABEL: s_test_minmax_f16: +; GFX12-SDAG-TRUE16: ; %bb.0: +; GFX12-SDAG-TRUE16-NEXT: s_maximum_f16 s0, s0, s1 +; GFX12-SDAG-TRUE16-NEXT: v_mov_b32_e32 v1, 0 +; GFX12-SDAG-TRUE16-NEXT: s_mov_b32 s5, s4 +; GFX12-SDAG-TRUE16-NEXT: s_mov_b32 s4, s3 +; GFX12-SDAG-TRUE16-NEXT: s_minimum_f16 s0, s0, s2 +; GFX12-SDAG-TRUE16-NEXT: s_delay_alu instid0(SALU_CYCLE_3) +; GFX12-SDAG-TRUE16-NEXT: v_mov_b16_e32 v0.l, s0 +; GFX12-SDAG-TRUE16-NEXT: global_store_b16 v1, v0, s[4:5] +; GFX12-SDAG-TRUE16-NEXT: s_endpgm +; +; GFX12-SDAG-FAKE16-LABEL: s_test_minmax_f16: +; GFX12-SDAG-FAKE16: ; %bb.0: +; GFX12-SDAG-FAKE16-NEXT: s_maximum_f16 s0, s0, s1 +; GFX12-SDAG-FAKE16-NEXT: s_mov_b32 s5, s4 +; GFX12-SDAG-FAKE16-NEXT: s_mov_b32 s4, s3 +; GFX12-SDAG-FAKE16-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_3) +; GFX12-SDAG-FAKE16-NEXT: s_minimum_f16 s0, s0, s2 +; GFX12-SDAG-FAKE16-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, s0 +; GFX12-SDAG-FAKE16-NEXT: global_store_b16 v0, v1, s[4:5] +; GFX12-SDAG-FAKE16-NEXT: s_endpgm +; +; GFX12-GISEL-TRUE16-LABEL: s_test_minmax_f16: +; GFX12-GISEL-TRUE16: ; %bb.0: +; GFX12-GISEL-TRUE16-NEXT: s_maximum_f16 s0, s0, s1 +; GFX12-GISEL-TRUE16-NEXT: v_mov_b32_e32 v1, 0 +; GFX12-GISEL-TRUE16-NEXT: s_mov_b32 s6, s3 +; GFX12-GISEL-TRUE16-NEXT: s_mov_b32 s7, s4 +; GFX12-GISEL-TRUE16-NEXT: s_minimum_f16 s0, s0, s2 +; GFX12-GISEL-TRUE16-NEXT: s_delay_alu instid0(SALU_CYCLE_3) +; GFX12-GISEL-TRUE16-NEXT: v_mov_b16_e32 v0.l, s0 +; GFX12-GISEL-TRUE16-NEXT: global_store_b16 v1, v0, s[6:7] +; GFX12-GISEL-TRUE16-NEXT: s_endpgm +; +; GFX12-GISEL-FAKE16-LABEL: s_test_minmax_f16: +; GFX12-GISEL-FAKE16: ; %bb.0: +; GFX12-GISEL-FAKE16-NEXT: s_maximum_f16 s0, s0, s1 +; GFX12-GISEL-FAKE16-NEXT: s_mov_b32 s6, s3 +; GFX12-GISEL-FAKE16-NEXT: s_mov_b32 s7, s4 +; GFX12-GISEL-FAKE16-NEXT: v_mov_b32_e32 v1, 0 +; GFX12-GISEL-FAKE16-NEXT: s_minimum_f16 s0, s0, s2 +; GFX12-GISEL-FAKE16-NEXT: s_delay_alu instid0(SALU_CYCLE_3) +; GFX12-GISEL-FAKE16-NEXT: v_mov_b32_e32 v0, s0 +; GFX12-GISEL-FAKE16-NEXT: global_store_b16 v1, v0, s[6:7] +; GFX12-GISEL-FAKE16-NEXT: s_endpgm %smax = call half @llvm.maximum.f16(half %a, half %b) %sminmax = call half @llvm.minimum.f16(half %smax, half %c) store half %sminmax, ptr addrspace(1) %out diff --git a/llvm/test/CodeGen/AMDGPU/minimumnum.ll b/llvm/test/CodeGen/AMDGPU/minimumnum.ll index 8c98931b0293..87f76bab79ed 100644 --- a/llvm/test/CodeGen/AMDGPU/minimumnum.ll +++ b/llvm/test/CodeGen/AMDGPU/minimumnum.ll @@ -20,6 +20,12 @@ ; RUN: llc -global-isel=0 -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1100 -mattr=-real-true16 < %s | FileCheck -check-prefixes=GFX11,GFX11-FAKE16,GFX11-SDAG,GFX11-FAKE16-SDAG %s ; RUN: llc -global-isel -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1100 -mattr=-real-true16 < %s | FileCheck -check-prefixes=GFX11,GFX11-FAKE16,GFX11-GISEL,GFX11-FAKE16-GISEL %s +; RUN: llc -global-isel=0 -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1170 -mattr=+real-true16 < %s | FileCheck -check-prefixes=GFX1170,GFX1170-TRUE16,GFX1170-SDAG,GFX1170-TRUE16-SDAG %s +; RUN: llc -global-isel -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1170 -mattr=+real-true16 < %s | FileCheck -check-prefixes=GFX1170,GFX1170-TRUE16,GFX1170-GISEL,GFX1170-TRUE16-GISEL %s + +; RUN: llc -global-isel=0 -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1170 -mattr=-real-true16 < %s | FileCheck -check-prefixes=GFX1170,GFX1170-FAKE16,GFX1170-SDAG,GFX1170-FAKE16-SDAG %s +; RUN: llc -global-isel -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1170 -mattr=-real-true16 < %s | FileCheck -check-prefixes=GFX1170,GFX1170-FAKE16,GFX1170-GISEL,GFX1170-FAKE16-GISEL %s + ; RUN: llc -global-isel=0 -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1200 -mattr=+real-true16 < %s | FileCheck -check-prefixes=GFX12,GFX12-TRUE16,GFX12-SDAG,GFX12-TRUE16-SDAG %s ; RUN: llc -global-isel -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1200 -mattr=+real-true16 < %s | FileCheck -check-prefixes=GFX12,GFX12-TRUE16,GFX12-GISEL,GFX12-TRUE16-GISEL %s @@ -129,6 +135,42 @@ define half @v_minimumnum_f16(half %x, half %y) { ; GFX11-FAKE16-GISEL-NEXT: v_min_f16_e32 v0, v0, v1 ; GFX11-FAKE16-GISEL-NEXT: s_setpc_b64 s[30:31] ; +; GFX1170-TRUE16-SDAG-LABEL: v_minimumnum_f16: +; GFX1170-TRUE16-SDAG: ; %bb.0: +; GFX1170-TRUE16-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1170-TRUE16-SDAG-NEXT: v_max_num_f16_e32 v0.h, v1.l, v1.l +; GFX1170-TRUE16-SDAG-NEXT: v_max_num_f16_e32 v0.l, v0.l, v0.l +; GFX1170-TRUE16-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX1170-TRUE16-SDAG-NEXT: v_min_num_f16_e32 v0.l, v0.l, v0.h +; GFX1170-TRUE16-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX1170-TRUE16-GISEL-LABEL: v_minimumnum_f16: +; GFX1170-TRUE16-GISEL: ; %bb.0: +; GFX1170-TRUE16-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1170-TRUE16-GISEL-NEXT: v_max_num_f16_e32 v0.l, v0.l, v0.l +; GFX1170-TRUE16-GISEL-NEXT: v_max_num_f16_e32 v0.h, v1.l, v1.l +; GFX1170-TRUE16-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX1170-TRUE16-GISEL-NEXT: v_min_num_f16_e32 v0.l, v0.l, v0.h +; GFX1170-TRUE16-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX1170-FAKE16-SDAG-LABEL: v_minimumnum_f16: +; GFX1170-FAKE16-SDAG: ; %bb.0: +; GFX1170-FAKE16-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1170-FAKE16-SDAG-NEXT: v_max_num_f16_e32 v1, v1, v1 +; GFX1170-FAKE16-SDAG-NEXT: v_max_num_f16_e32 v0, v0, v0 +; GFX1170-FAKE16-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX1170-FAKE16-SDAG-NEXT: v_min_num_f16_e32 v0, v0, v1 +; GFX1170-FAKE16-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX1170-FAKE16-GISEL-LABEL: v_minimumnum_f16: +; GFX1170-FAKE16-GISEL: ; %bb.0: +; GFX1170-FAKE16-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1170-FAKE16-GISEL-NEXT: v_max_num_f16_e32 v0, v0, v0 +; GFX1170-FAKE16-GISEL-NEXT: v_max_num_f16_e32 v1, v1, v1 +; GFX1170-FAKE16-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX1170-FAKE16-GISEL-NEXT: v_min_num_f16_e32 v0, v0, v1 +; GFX1170-FAKE16-GISEL-NEXT: s_setpc_b64 s[30:31] +; ; GFX12-TRUE16-SDAG-LABEL: v_minimumnum_f16: ; GFX12-TRUE16-SDAG: ; %bb.0: ; GFX12-TRUE16-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0 @@ -233,6 +275,18 @@ define half @v_minimumnum_f16_nnan(half %x, half %y) { ; GFX11-FAKE16-NEXT: v_min_f16_e32 v0, v0, v1 ; GFX11-FAKE16-NEXT: s_setpc_b64 s[30:31] ; +; GFX1170-TRUE16-LABEL: v_minimumnum_f16_nnan: +; GFX1170-TRUE16: ; %bb.0: +; GFX1170-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1170-TRUE16-NEXT: v_min_num_f16_e32 v0.l, v0.l, v1.l +; GFX1170-TRUE16-NEXT: s_setpc_b64 s[30:31] +; +; GFX1170-FAKE16-LABEL: v_minimumnum_f16_nnan: +; GFX1170-FAKE16: ; %bb.0: +; GFX1170-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1170-FAKE16-NEXT: v_min_num_f16_e32 v0, v0, v1 +; GFX1170-FAKE16-NEXT: s_setpc_b64 s[30:31] +; ; GFX12-TRUE16-LABEL: v_minimumnum_f16_nnan: ; GFX12-TRUE16: ; %bb.0: ; GFX12-TRUE16-NEXT: s_wait_loadcnt_dscnt 0x0 @@ -302,6 +356,22 @@ define half @v_minimumnum_f16_1.0(half %x) { ; GFX11-FAKE16-NEXT: v_min_f16_e32 v0, 1.0, v0 ; GFX11-FAKE16-NEXT: s_setpc_b64 s[30:31] ; +; GFX1170-TRUE16-LABEL: v_minimumnum_f16_1.0: +; GFX1170-TRUE16: ; %bb.0: +; GFX1170-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1170-TRUE16-NEXT: v_max_num_f16_e32 v0.l, v0.l, v0.l +; GFX1170-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX1170-TRUE16-NEXT: v_min_num_f16_e32 v0.l, 1.0, v0.l +; GFX1170-TRUE16-NEXT: s_setpc_b64 s[30:31] +; +; GFX1170-FAKE16-LABEL: v_minimumnum_f16_1.0: +; GFX1170-FAKE16: ; %bb.0: +; GFX1170-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1170-FAKE16-NEXT: v_max_num_f16_e32 v0, v0, v0 +; GFX1170-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX1170-FAKE16-NEXT: v_min_num_f16_e32 v0, 1.0, v0 +; GFX1170-FAKE16-NEXT: s_setpc_b64 s[30:31] +; ; GFX12-TRUE16-LABEL: v_minimumnum_f16_1.0: ; GFX12-TRUE16: ; %bb.0: ; GFX12-TRUE16-NEXT: s_wait_loadcnt_dscnt 0x0 @@ -410,6 +480,22 @@ define float @v_minimumnum_f32(float %x, float %y) { ; GFX11-GISEL-NEXT: v_min_f32_e32 v0, v0, v1 ; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31] ; +; GFX1170-SDAG-LABEL: v_minimumnum_f32: +; GFX1170-SDAG: ; %bb.0: +; GFX1170-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1170-SDAG-NEXT: v_dual_max_num_f32 v1, v1, v1 :: v_dual_max_num_f32 v0, v0, v0 +; GFX1170-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX1170-SDAG-NEXT: v_min_num_f32_e32 v0, v0, v1 +; GFX1170-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX1170-GISEL-LABEL: v_minimumnum_f32: +; GFX1170-GISEL: ; %bb.0: +; GFX1170-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1170-GISEL-NEXT: v_dual_max_num_f32 v0, v0, v0 :: v_dual_max_num_f32 v1, v1, v1 +; GFX1170-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX1170-GISEL-NEXT: v_min_num_f32_e32 v0, v0, v1 +; GFX1170-GISEL-NEXT: s_setpc_b64 s[30:31] +; ; GFX12-SDAG-LABEL: v_minimumnum_f32: ; GFX12-SDAG: ; %bb.0: ; GFX12-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0 @@ -468,6 +554,12 @@ define float @v_minimumnum_f32_nnan(float %x, float %y) { ; GFX11-NEXT: v_min_f32_e32 v0, v0, v1 ; GFX11-NEXT: s_setpc_b64 s[30:31] ; +; GFX1170-LABEL: v_minimumnum_f32_nnan: +; GFX1170: ; %bb.0: +; GFX1170-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1170-NEXT: v_min_num_f32_e32 v0, v0, v1 +; GFX1170-NEXT: s_setpc_b64 s[30:31] +; ; GFX12-LABEL: v_minimumnum_f32_nnan: ; GFX12: ; %bb.0: ; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 @@ -564,6 +656,24 @@ define double @v_minimumnum_f64(double %x, double %y) { ; GFX11-GISEL-NEXT: v_min_f64 v[0:1], v[0:1], v[2:3] ; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31] ; +; GFX1170-SDAG-LABEL: v_minimumnum_f64: +; GFX1170-SDAG: ; %bb.0: +; GFX1170-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1170-SDAG-NEXT: v_max_num_f64 v[2:3], v[2:3], v[2:3] +; GFX1170-SDAG-NEXT: v_max_num_f64 v[0:1], v[0:1], v[0:1] +; GFX1170-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX1170-SDAG-NEXT: v_min_num_f64 v[0:1], v[0:1], v[2:3] +; GFX1170-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX1170-GISEL-LABEL: v_minimumnum_f64: +; GFX1170-GISEL: ; %bb.0: +; GFX1170-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1170-GISEL-NEXT: v_max_num_f64 v[0:1], v[0:1], v[0:1] +; GFX1170-GISEL-NEXT: v_max_num_f64 v[2:3], v[2:3], v[2:3] +; GFX1170-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX1170-GISEL-NEXT: v_min_num_f64 v[0:1], v[0:1], v[2:3] +; GFX1170-GISEL-NEXT: s_setpc_b64 s[30:31] +; ; GFX12-SDAG-LABEL: v_minimumnum_f64: ; GFX12-SDAG: ; %bb.0: ; GFX12-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0 @@ -624,6 +734,12 @@ define double @v_minimumnum_f64_nnan(double %x, double %y) { ; GFX11-NEXT: v_min_f64 v[0:1], v[0:1], v[2:3] ; GFX11-NEXT: s_setpc_b64 s[30:31] ; +; GFX1170-LABEL: v_minimumnum_f64_nnan: +; GFX1170: ; %bb.0: +; GFX1170-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1170-NEXT: v_min_num_f64 v[0:1], v[0:1], v[2:3] +; GFX1170-NEXT: s_setpc_b64 s[30:31] +; ; GFX12-LABEL: v_minimumnum_f64_nnan: ; GFX12: ; %bb.0: ; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 @@ -674,6 +790,14 @@ define float @v_minimumnum_f32_1.0(float %x) { ; GFX11-NEXT: v_min_f32_e32 v0, 1.0, v0 ; GFX11-NEXT: s_setpc_b64 s[30:31] ; +; GFX1170-LABEL: v_minimumnum_f32_1.0: +; GFX1170: ; %bb.0: +; GFX1170-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1170-NEXT: v_max_num_f32_e32 v0, v0, v0 +; GFX1170-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX1170-NEXT: v_min_num_f32_e32 v0, 1.0, v0 +; GFX1170-NEXT: s_setpc_b64 s[30:31] +; ; GFX12-LABEL: v_minimumnum_f32_1.0: ; GFX12: ; %bb.0: ; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 @@ -730,6 +854,14 @@ define float @v_minimumnum_f32_rhs_not_snan(float %x, float %y) { ; GFX11-NEXT: v_min_f32_e32 v0, v0, v1 ; GFX11-NEXT: s_setpc_b64 s[30:31] ; +; GFX1170-LABEL: v_minimumnum_f32_rhs_not_snan: +; GFX1170: ; %bb.0: +; GFX1170-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1170-NEXT: v_dual_max_num_f32 v1, v1, v1 :: v_dual_max_num_f32 v0, v0, v0 +; GFX1170-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX1170-NEXT: v_min_num_f32_e32 v0, v0, v1 +; GFX1170-NEXT: s_setpc_b64 s[30:31] +; ; GFX12-LABEL: v_minimumnum_f32_rhs_not_snan: ; GFX12: ; %bb.0: ; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 @@ -787,6 +919,14 @@ define float @v_minimumnum_f32_lhs_not_snan(float %x, float %y) { ; GFX11-NEXT: v_min_f32_e32 v0, v0, v1 ; GFX11-NEXT: s_setpc_b64 s[30:31] ; +; GFX1170-LABEL: v_minimumnum_f32_lhs_not_snan: +; GFX1170: ; %bb.0: +; GFX1170-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1170-NEXT: v_dual_max_num_f32 v0, v0, v0 :: v_dual_max_num_f32 v1, v1, v1 +; GFX1170-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX1170-NEXT: v_min_num_f32_e32 v0, v0, v1 +; GFX1170-NEXT: s_setpc_b64 s[30:31] +; ; GFX12-LABEL: v_minimumnum_f32_lhs_not_snan: ; GFX12: ; %bb.0: ; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 @@ -844,6 +984,14 @@ define float @v_minimumnum_f32_both_operands_not_snan(float %x, float %y) { ; GFX11-NEXT: v_min_f32_e32 v0, v0, v1 ; GFX11-NEXT: s_setpc_b64 s[30:31] ; +; GFX1170-LABEL: v_minimumnum_f32_both_operands_not_snan: +; GFX1170: ; %bb.0: +; GFX1170-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1170-NEXT: v_dual_max_num_f32 v0, v0, v0 :: v_dual_max_num_f32 v1, v1, v1 +; GFX1170-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX1170-NEXT: v_min_num_f32_e32 v0, v0, v1 +; GFX1170-NEXT: s_setpc_b64 s[30:31] +; ; GFX12-LABEL: v_minimumnum_f32_both_operands_not_snan: ; GFX12: ; %bb.0: ; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 @@ -898,6 +1046,14 @@ define double @v_minimumnum_f64_1.0(double %x) { ; GFX11-NEXT: v_min_f64 v[0:1], v[0:1], 1.0 ; GFX11-NEXT: s_setpc_b64 s[30:31] ; +; GFX1170-LABEL: v_minimumnum_f64_1.0: +; GFX1170: ; %bb.0: +; GFX1170-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1170-NEXT: v_max_num_f64 v[0:1], v[0:1], v[0:1] +; GFX1170-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX1170-NEXT: v_min_num_f64 v[0:1], v[0:1], 1.0 +; GFX1170-NEXT: s_setpc_b64 s[30:31] +; ; GFX12-LABEL: v_minimumnum_f64_1.0: ; GFX12: ; %bb.0: ; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 @@ -1032,6 +1188,42 @@ define half @v_minimumnum_f16_v_s(half %x, half inreg %y) { ; GFX11-FAKE16-GISEL-NEXT: v_min_f16_e32 v0, v0, v1 ; GFX11-FAKE16-GISEL-NEXT: s_setpc_b64 s[30:31] ; +; GFX1170-TRUE16-SDAG-LABEL: v_minimumnum_f16_v_s: +; GFX1170-TRUE16-SDAG: ; %bb.0: +; GFX1170-TRUE16-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1170-TRUE16-SDAG-NEXT: v_max_num_f16_e64 v0.h, s0, s0 +; GFX1170-TRUE16-SDAG-NEXT: v_max_num_f16_e32 v0.l, v0.l, v0.l +; GFX1170-TRUE16-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX1170-TRUE16-SDAG-NEXT: v_min_num_f16_e32 v0.l, v0.l, v0.h +; GFX1170-TRUE16-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX1170-TRUE16-GISEL-LABEL: v_minimumnum_f16_v_s: +; GFX1170-TRUE16-GISEL: ; %bb.0: +; GFX1170-TRUE16-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1170-TRUE16-GISEL-NEXT: v_max_num_f16_e32 v0.l, v0.l, v0.l +; GFX1170-TRUE16-GISEL-NEXT: v_max_num_f16_e64 v0.h, s0, s0 +; GFX1170-TRUE16-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX1170-TRUE16-GISEL-NEXT: v_min_num_f16_e32 v0.l, v0.l, v0.h +; GFX1170-TRUE16-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX1170-FAKE16-SDAG-LABEL: v_minimumnum_f16_v_s: +; GFX1170-FAKE16-SDAG: ; %bb.0: +; GFX1170-FAKE16-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1170-FAKE16-SDAG-NEXT: v_max_num_f16_e64 v1, s0, s0 +; GFX1170-FAKE16-SDAG-NEXT: v_max_num_f16_e32 v0, v0, v0 +; GFX1170-FAKE16-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX1170-FAKE16-SDAG-NEXT: v_min_num_f16_e32 v0, v0, v1 +; GFX1170-FAKE16-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX1170-FAKE16-GISEL-LABEL: v_minimumnum_f16_v_s: +; GFX1170-FAKE16-GISEL: ; %bb.0: +; GFX1170-FAKE16-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1170-FAKE16-GISEL-NEXT: v_max_num_f16_e32 v0, v0, v0 +; GFX1170-FAKE16-GISEL-NEXT: v_max_num_f16_e64 v1, s0, s0 +; GFX1170-FAKE16-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX1170-FAKE16-GISEL-NEXT: v_min_num_f16_e32 v0, v0, v1 +; GFX1170-FAKE16-GISEL-NEXT: s_setpc_b64 s[30:31] +; ; GFX12-TRUE16-SDAG-LABEL: v_minimumnum_f16_v_s: ; GFX12-TRUE16-SDAG: ; %bb.0: ; GFX12-TRUE16-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0 @@ -1206,6 +1398,42 @@ define half @v_minimumnum_f16_s_s(half inreg %x, half inreg %y) { ; GFX11-FAKE16-GISEL-NEXT: v_min_f16_e32 v0, v0, v1 ; GFX11-FAKE16-GISEL-NEXT: s_setpc_b64 s[30:31] ; +; GFX1170-TRUE16-SDAG-LABEL: v_minimumnum_f16_s_s: +; GFX1170-TRUE16-SDAG: ; %bb.0: +; GFX1170-TRUE16-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1170-TRUE16-SDAG-NEXT: v_max_num_f16_e64 v0.l, s1, s1 +; GFX1170-TRUE16-SDAG-NEXT: v_max_num_f16_e64 v0.h, s0, s0 +; GFX1170-TRUE16-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX1170-TRUE16-SDAG-NEXT: v_min_num_f16_e32 v0.l, v0.h, v0.l +; GFX1170-TRUE16-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX1170-TRUE16-GISEL-LABEL: v_minimumnum_f16_s_s: +; GFX1170-TRUE16-GISEL: ; %bb.0: +; GFX1170-TRUE16-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1170-TRUE16-GISEL-NEXT: v_max_num_f16_e64 v0.l, s0, s0 +; GFX1170-TRUE16-GISEL-NEXT: v_max_num_f16_e64 v0.h, s1, s1 +; GFX1170-TRUE16-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX1170-TRUE16-GISEL-NEXT: v_min_num_f16_e32 v0.l, v0.l, v0.h +; GFX1170-TRUE16-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX1170-FAKE16-SDAG-LABEL: v_minimumnum_f16_s_s: +; GFX1170-FAKE16-SDAG: ; %bb.0: +; GFX1170-FAKE16-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1170-FAKE16-SDAG-NEXT: v_max_num_f16_e64 v0, s1, s1 +; GFX1170-FAKE16-SDAG-NEXT: v_max_num_f16_e64 v1, s0, s0 +; GFX1170-FAKE16-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX1170-FAKE16-SDAG-NEXT: v_min_num_f16_e32 v0, v1, v0 +; GFX1170-FAKE16-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX1170-FAKE16-GISEL-LABEL: v_minimumnum_f16_s_s: +; GFX1170-FAKE16-GISEL: ; %bb.0: +; GFX1170-FAKE16-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1170-FAKE16-GISEL-NEXT: v_max_num_f16_e64 v0, s0, s0 +; GFX1170-FAKE16-GISEL-NEXT: v_max_num_f16_e64 v1, s1, s1 +; GFX1170-FAKE16-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX1170-FAKE16-GISEL-NEXT: v_min_num_f16_e32 v0, v0, v1 +; GFX1170-FAKE16-GISEL-NEXT: s_setpc_b64 s[30:31] +; ; GFX12-TRUE16-SDAG-LABEL: v_minimumnum_f16_s_s: ; GFX12-TRUE16-SDAG: ; %bb.0: ; GFX12-TRUE16-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0 @@ -1360,6 +1588,24 @@ define float @v_minimumnum_f32_s_v(float inreg %x, float %y) { ; GFX11-GISEL-NEXT: v_min_f32_e32 v0, v1, v0 ; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31] ; +; GFX1170-SDAG-LABEL: v_minimumnum_f32_s_v: +; GFX1170-SDAG: ; %bb.0: +; GFX1170-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1170-SDAG-NEXT: v_max_num_f32_e32 v0, v0, v0 +; GFX1170-SDAG-NEXT: v_max_num_f32_e64 v1, s0, s0 +; GFX1170-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX1170-SDAG-NEXT: v_min_num_f32_e32 v0, v1, v0 +; GFX1170-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX1170-GISEL-LABEL: v_minimumnum_f32_s_v: +; GFX1170-GISEL: ; %bb.0: +; GFX1170-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1170-GISEL-NEXT: v_max_num_f32_e64 v1, s0, s0 +; GFX1170-GISEL-NEXT: v_max_num_f32_e32 v0, v0, v0 +; GFX1170-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX1170-GISEL-NEXT: v_min_num_f32_e32 v0, v1, v0 +; GFX1170-GISEL-NEXT: s_setpc_b64 s[30:31] +; ; GFX12-SDAG-LABEL: v_minimumnum_f32_s_v: ; GFX12-SDAG: ; %bb.0: ; GFX12-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0 @@ -1488,6 +1734,24 @@ define float @v_minimumnum_f32_v_s(float %x, float inreg %y) { ; GFX11-GISEL-NEXT: v_min_f32_e32 v0, v0, v1 ; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31] ; +; GFX1170-SDAG-LABEL: v_minimumnum_f32_v_s: +; GFX1170-SDAG: ; %bb.0: +; GFX1170-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1170-SDAG-NEXT: v_max_num_f32_e64 v1, s0, s0 +; GFX1170-SDAG-NEXT: v_max_num_f32_e32 v0, v0, v0 +; GFX1170-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX1170-SDAG-NEXT: v_min_num_f32_e32 v0, v0, v1 +; GFX1170-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX1170-GISEL-LABEL: v_minimumnum_f32_v_s: +; GFX1170-GISEL: ; %bb.0: +; GFX1170-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1170-GISEL-NEXT: v_max_num_f32_e32 v0, v0, v0 +; GFX1170-GISEL-NEXT: v_max_num_f32_e64 v1, s0, s0 +; GFX1170-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX1170-GISEL-NEXT: v_min_num_f32_e32 v0, v0, v1 +; GFX1170-GISEL-NEXT: s_setpc_b64 s[30:31] +; ; GFX12-SDAG-LABEL: v_minimumnum_f32_v_s: ; GFX12-SDAG: ; %bb.0: ; GFX12-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0 @@ -1616,6 +1880,24 @@ define float @v_minimumnum_f32_s_s(float inreg %x, float inreg %y) { ; GFX11-GISEL-NEXT: v_min_f32_e32 v0, v0, v1 ; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31] ; +; GFX1170-SDAG-LABEL: v_minimumnum_f32_s_s: +; GFX1170-SDAG: ; %bb.0: +; GFX1170-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1170-SDAG-NEXT: v_max_num_f32_e64 v0, s1, s1 +; GFX1170-SDAG-NEXT: v_max_num_f32_e64 v1, s0, s0 +; GFX1170-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX1170-SDAG-NEXT: v_min_num_f32_e32 v0, v1, v0 +; GFX1170-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX1170-GISEL-LABEL: v_minimumnum_f32_s_s: +; GFX1170-GISEL: ; %bb.0: +; GFX1170-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1170-GISEL-NEXT: v_max_num_f32_e64 v0, s0, s0 +; GFX1170-GISEL-NEXT: v_max_num_f32_e64 v1, s1, s1 +; GFX1170-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX1170-GISEL-NEXT: v_min_num_f32_e32 v0, v0, v1 +; GFX1170-GISEL-NEXT: s_setpc_b64 s[30:31] +; ; GFX12-SDAG-LABEL: v_minimumnum_f32_s_s: ; GFX12-SDAG: ; %bb.0: ; GFX12-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0 @@ -1727,6 +2009,15 @@ define double @v_minimumnum_f64_s_v(double inreg %x, double %y) { ; GFX11-NEXT: v_min_f64 v[0:1], v[2:3], v[0:1] ; GFX11-NEXT: s_setpc_b64 s[30:31] ; +; GFX1170-LABEL: v_minimumnum_f64_s_v: +; GFX1170: ; %bb.0: +; GFX1170-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1170-NEXT: v_max_num_f64 v[2:3], s[0:1], s[0:1] +; GFX1170-NEXT: v_max_num_f64 v[0:1], v[0:1], v[0:1] +; GFX1170-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX1170-NEXT: v_min_num_f64 v[0:1], v[2:3], v[0:1] +; GFX1170-NEXT: s_setpc_b64 s[30:31] +; ; GFX12-LABEL: v_minimumnum_f64_s_v: ; GFX12: ; %bb.0: ; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 @@ -1825,6 +2116,15 @@ define double @v_minimumnum_f64_v_s(double %x, double inreg %y) { ; GFX11-NEXT: v_min_f64 v[0:1], v[0:1], v[2:3] ; GFX11-NEXT: s_setpc_b64 s[30:31] ; +; GFX1170-LABEL: v_minimumnum_f64_v_s: +; GFX1170: ; %bb.0: +; GFX1170-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1170-NEXT: v_max_num_f64 v[2:3], s[0:1], s[0:1] +; GFX1170-NEXT: v_max_num_f64 v[0:1], v[0:1], v[0:1] +; GFX1170-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX1170-NEXT: v_min_num_f64 v[0:1], v[0:1], v[2:3] +; GFX1170-NEXT: s_setpc_b64 s[30:31] +; ; GFX12-LABEL: v_minimumnum_f64_v_s: ; GFX12: ; %bb.0: ; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 @@ -1940,6 +2240,24 @@ define double @v_minimumnum_f64_s_s(double inreg %x, double inreg %y) { ; GFX11-GISEL-NEXT: v_min_f64 v[0:1], v[0:1], v[2:3] ; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31] ; +; GFX1170-SDAG-LABEL: v_minimumnum_f64_s_s: +; GFX1170-SDAG: ; %bb.0: +; GFX1170-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1170-SDAG-NEXT: v_max_num_f64 v[0:1], s[2:3], s[2:3] +; GFX1170-SDAG-NEXT: v_max_num_f64 v[2:3], s[0:1], s[0:1] +; GFX1170-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX1170-SDAG-NEXT: v_min_num_f64 v[0:1], v[2:3], v[0:1] +; GFX1170-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX1170-GISEL-LABEL: v_minimumnum_f64_s_s: +; GFX1170-GISEL: ; %bb.0: +; GFX1170-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1170-GISEL-NEXT: v_max_num_f64 v[0:1], s[0:1], s[0:1] +; GFX1170-GISEL-NEXT: v_max_num_f64 v[2:3], s[2:3], s[2:3] +; GFX1170-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX1170-GISEL-NEXT: v_min_num_f64 v[0:1], v[0:1], v[2:3] +; GFX1170-GISEL-NEXT: s_setpc_b64 s[30:31] +; ; GFX12-SDAG-LABEL: v_minimumnum_f64_s_s: ; GFX12-SDAG: ; %bb.0: ; GFX12-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0 @@ -2052,6 +2370,24 @@ define float @v_minimumnum_f32_fabs_rhs(float %x, float %y) { ; GFX11-GISEL-NEXT: v_min_f32_e32 v0, v0, v1 ; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31] ; +; GFX1170-SDAG-LABEL: v_minimumnum_f32_fabs_rhs: +; GFX1170-SDAG: ; %bb.0: +; GFX1170-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1170-SDAG-NEXT: v_max_num_f32_e64 v1, |v1|, |v1| +; GFX1170-SDAG-NEXT: v_max_num_f32_e32 v0, v0, v0 +; GFX1170-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX1170-SDAG-NEXT: v_min_num_f32_e32 v0, v0, v1 +; GFX1170-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX1170-GISEL-LABEL: v_minimumnum_f32_fabs_rhs: +; GFX1170-GISEL: ; %bb.0: +; GFX1170-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1170-GISEL-NEXT: v_max_num_f32_e32 v0, v0, v0 +; GFX1170-GISEL-NEXT: v_max_num_f32_e64 v1, |v1|, |v1| +; GFX1170-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX1170-GISEL-NEXT: v_min_num_f32_e32 v0, v0, v1 +; GFX1170-GISEL-NEXT: s_setpc_b64 s[30:31] +; ; GFX12-SDAG-LABEL: v_minimumnum_f32_fabs_rhs: ; GFX12-SDAG: ; %bb.0: ; GFX12-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0 @@ -2165,6 +2501,24 @@ define float @v_minimumnum_f32_fneg_fabs_rhs(float %x, float %y) { ; GFX11-GISEL-NEXT: v_min_f32_e32 v0, v0, v1 ; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31] ; +; GFX1170-SDAG-LABEL: v_minimumnum_f32_fneg_fabs_rhs: +; GFX1170-SDAG: ; %bb.0: +; GFX1170-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1170-SDAG-NEXT: v_max_num_f32_e64 v1, -|v1|, -|v1| +; GFX1170-SDAG-NEXT: v_max_num_f32_e32 v0, v0, v0 +; GFX1170-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX1170-SDAG-NEXT: v_min_num_f32_e32 v0, v0, v1 +; GFX1170-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX1170-GISEL-LABEL: v_minimumnum_f32_fneg_fabs_rhs: +; GFX1170-GISEL: ; %bb.0: +; GFX1170-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1170-GISEL-NEXT: v_max_num_f32_e32 v0, v0, v0 +; GFX1170-GISEL-NEXT: v_max_num_f32_e64 v1, -|v1|, -|v1| +; GFX1170-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX1170-GISEL-NEXT: v_min_num_f32_e32 v0, v0, v1 +; GFX1170-GISEL-NEXT: s_setpc_b64 s[30:31] +; ; GFX12-SDAG-LABEL: v_minimumnum_f32_fneg_fabs_rhs: ; GFX12-SDAG: ; %bb.0: ; GFX12-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0 @@ -2279,6 +2633,24 @@ define float @v_minimumnum_f32_fabs(float %x, float %y) { ; GFX11-GISEL-NEXT: v_min_f32_e32 v0, v0, v1 ; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31] ; +; GFX1170-SDAG-LABEL: v_minimumnum_f32_fabs: +; GFX1170-SDAG: ; %bb.0: +; GFX1170-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1170-SDAG-NEXT: v_max_num_f32_e64 v1, |v1|, |v1| +; GFX1170-SDAG-NEXT: v_max_num_f32_e64 v0, |v0|, |v0| +; GFX1170-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX1170-SDAG-NEXT: v_min_num_f32_e32 v0, v0, v1 +; GFX1170-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX1170-GISEL-LABEL: v_minimumnum_f32_fabs: +; GFX1170-GISEL: ; %bb.0: +; GFX1170-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1170-GISEL-NEXT: v_max_num_f32_e64 v0, |v0|, |v0| +; GFX1170-GISEL-NEXT: v_max_num_f32_e64 v1, |v1|, |v1| +; GFX1170-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX1170-GISEL-NEXT: v_min_num_f32_e32 v0, v0, v1 +; GFX1170-GISEL-NEXT: s_setpc_b64 s[30:31] +; ; GFX12-SDAG-LABEL: v_minimumnum_f32_fabs: ; GFX12-SDAG: ; %bb.0: ; GFX12-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0 @@ -2393,6 +2765,24 @@ define float @v_minimumnum_f32_fneg(float %x, float %y) { ; GFX11-GISEL-NEXT: v_min_f32_e32 v0, v0, v1 ; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31] ; +; GFX1170-SDAG-LABEL: v_minimumnum_f32_fneg: +; GFX1170-SDAG: ; %bb.0: +; GFX1170-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1170-SDAG-NEXT: v_max_num_f32_e64 v1, -v1, -v1 +; GFX1170-SDAG-NEXT: v_max_num_f32_e64 v0, -v0, -v0 +; GFX1170-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX1170-SDAG-NEXT: v_min_num_f32_e32 v0, v0, v1 +; GFX1170-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX1170-GISEL-LABEL: v_minimumnum_f32_fneg: +; GFX1170-GISEL: ; %bb.0: +; GFX1170-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1170-GISEL-NEXT: v_max_num_f32_e64 v0, -v0, -v0 +; GFX1170-GISEL-NEXT: v_max_num_f32_e64 v1, -v1, -v1 +; GFX1170-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX1170-GISEL-NEXT: v_min_num_f32_e32 v0, v0, v1 +; GFX1170-GISEL-NEXT: s_setpc_b64 s[30:31] +; ; GFX12-SDAG-LABEL: v_minimumnum_f32_fneg: ; GFX12-SDAG: ; %bb.0: ; GFX12-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0 @@ -2527,6 +2917,42 @@ define half @v_minimumnum_f16_fabs_rhs(half %x, half %y) { ; GFX11-FAKE16-GISEL-NEXT: v_min_f16_e32 v0, v0, v1 ; GFX11-FAKE16-GISEL-NEXT: s_setpc_b64 s[30:31] ; +; GFX1170-TRUE16-SDAG-LABEL: v_minimumnum_f16_fabs_rhs: +; GFX1170-TRUE16-SDAG: ; %bb.0: +; GFX1170-TRUE16-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1170-TRUE16-SDAG-NEXT: v_max_num_f16_e64 v0.h, |v1.l|, |v1.l| +; GFX1170-TRUE16-SDAG-NEXT: v_max_num_f16_e32 v0.l, v0.l, v0.l +; GFX1170-TRUE16-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX1170-TRUE16-SDAG-NEXT: v_min_num_f16_e32 v0.l, v0.l, v0.h +; GFX1170-TRUE16-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX1170-TRUE16-GISEL-LABEL: v_minimumnum_f16_fabs_rhs: +; GFX1170-TRUE16-GISEL: ; %bb.0: +; GFX1170-TRUE16-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1170-TRUE16-GISEL-NEXT: v_max_num_f16_e32 v0.l, v0.l, v0.l +; GFX1170-TRUE16-GISEL-NEXT: v_max_num_f16_e64 v0.h, |v1.l|, |v1.l| +; GFX1170-TRUE16-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX1170-TRUE16-GISEL-NEXT: v_min_num_f16_e32 v0.l, v0.l, v0.h +; GFX1170-TRUE16-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX1170-FAKE16-SDAG-LABEL: v_minimumnum_f16_fabs_rhs: +; GFX1170-FAKE16-SDAG: ; %bb.0: +; GFX1170-FAKE16-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1170-FAKE16-SDAG-NEXT: v_max_num_f16_e64 v1, |v1|, |v1| +; GFX1170-FAKE16-SDAG-NEXT: v_max_num_f16_e32 v0, v0, v0 +; GFX1170-FAKE16-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX1170-FAKE16-SDAG-NEXT: v_min_num_f16_e32 v0, v0, v1 +; GFX1170-FAKE16-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX1170-FAKE16-GISEL-LABEL: v_minimumnum_f16_fabs_rhs: +; GFX1170-FAKE16-GISEL: ; %bb.0: +; GFX1170-FAKE16-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1170-FAKE16-GISEL-NEXT: v_max_num_f16_e32 v0, v0, v0 +; GFX1170-FAKE16-GISEL-NEXT: v_max_num_f16_e64 v1, |v1|, |v1| +; GFX1170-FAKE16-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX1170-FAKE16-GISEL-NEXT: v_min_num_f16_e32 v0, v0, v1 +; GFX1170-FAKE16-GISEL-NEXT: s_setpc_b64 s[30:31] +; ; GFX12-TRUE16-SDAG-LABEL: v_minimumnum_f16_fabs_rhs: ; GFX12-TRUE16-SDAG: ; %bb.0: ; GFX12-TRUE16-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0 @@ -2686,6 +3112,42 @@ define half @v_minimumnum_f16_fneg_fabs_rhs(half %x, half %y) { ; GFX11-FAKE16-GISEL-NEXT: v_min_f16_e32 v0, v0, v1 ; GFX11-FAKE16-GISEL-NEXT: s_setpc_b64 s[30:31] ; +; GFX1170-TRUE16-SDAG-LABEL: v_minimumnum_f16_fneg_fabs_rhs: +; GFX1170-TRUE16-SDAG: ; %bb.0: +; GFX1170-TRUE16-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1170-TRUE16-SDAG-NEXT: v_max_num_f16_e64 v0.h, -|v1.l|, -|v1.l| +; GFX1170-TRUE16-SDAG-NEXT: v_max_num_f16_e32 v0.l, v0.l, v0.l +; GFX1170-TRUE16-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX1170-TRUE16-SDAG-NEXT: v_min_num_f16_e32 v0.l, v0.l, v0.h +; GFX1170-TRUE16-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX1170-TRUE16-GISEL-LABEL: v_minimumnum_f16_fneg_fabs_rhs: +; GFX1170-TRUE16-GISEL: ; %bb.0: +; GFX1170-TRUE16-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1170-TRUE16-GISEL-NEXT: v_max_num_f16_e32 v0.l, v0.l, v0.l +; GFX1170-TRUE16-GISEL-NEXT: v_max_num_f16_e64 v0.h, -|v1.l|, -|v1.l| +; GFX1170-TRUE16-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX1170-TRUE16-GISEL-NEXT: v_min_num_f16_e32 v0.l, v0.l, v0.h +; GFX1170-TRUE16-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX1170-FAKE16-SDAG-LABEL: v_minimumnum_f16_fneg_fabs_rhs: +; GFX1170-FAKE16-SDAG: ; %bb.0: +; GFX1170-FAKE16-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1170-FAKE16-SDAG-NEXT: v_max_num_f16_e64 v1, -|v1|, -|v1| +; GFX1170-FAKE16-SDAG-NEXT: v_max_num_f16_e32 v0, v0, v0 +; GFX1170-FAKE16-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX1170-FAKE16-SDAG-NEXT: v_min_num_f16_e32 v0, v0, v1 +; GFX1170-FAKE16-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX1170-FAKE16-GISEL-LABEL: v_minimumnum_f16_fneg_fabs_rhs: +; GFX1170-FAKE16-GISEL: ; %bb.0: +; GFX1170-FAKE16-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1170-FAKE16-GISEL-NEXT: v_max_num_f16_e32 v0, v0, v0 +; GFX1170-FAKE16-GISEL-NEXT: v_max_num_f16_e64 v1, -|v1|, -|v1| +; GFX1170-FAKE16-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX1170-FAKE16-GISEL-NEXT: v_min_num_f16_e32 v0, v0, v1 +; GFX1170-FAKE16-GISEL-NEXT: s_setpc_b64 s[30:31] +; ; GFX12-TRUE16-SDAG-LABEL: v_minimumnum_f16_fneg_fabs_rhs: ; GFX12-TRUE16-SDAG: ; %bb.0: ; GFX12-TRUE16-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0 @@ -2846,6 +3308,42 @@ define half @v_minimumnum_f16_fabs(half %x, half %y) { ; GFX11-FAKE16-GISEL-NEXT: v_min_f16_e32 v0, v0, v1 ; GFX11-FAKE16-GISEL-NEXT: s_setpc_b64 s[30:31] ; +; GFX1170-TRUE16-SDAG-LABEL: v_minimumnum_f16_fabs: +; GFX1170-TRUE16-SDAG: ; %bb.0: +; GFX1170-TRUE16-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1170-TRUE16-SDAG-NEXT: v_max_num_f16_e64 v0.h, |v1.l|, |v1.l| +; GFX1170-TRUE16-SDAG-NEXT: v_max_num_f16_e64 v0.l, |v0.l|, |v0.l| +; GFX1170-TRUE16-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX1170-TRUE16-SDAG-NEXT: v_min_num_f16_e32 v0.l, v0.l, v0.h +; GFX1170-TRUE16-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX1170-TRUE16-GISEL-LABEL: v_minimumnum_f16_fabs: +; GFX1170-TRUE16-GISEL: ; %bb.0: +; GFX1170-TRUE16-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1170-TRUE16-GISEL-NEXT: v_max_num_f16_e64 v0.l, |v0.l|, |v0.l| +; GFX1170-TRUE16-GISEL-NEXT: v_max_num_f16_e64 v0.h, |v1.l|, |v1.l| +; GFX1170-TRUE16-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX1170-TRUE16-GISEL-NEXT: v_min_num_f16_e32 v0.l, v0.l, v0.h +; GFX1170-TRUE16-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX1170-FAKE16-SDAG-LABEL: v_minimumnum_f16_fabs: +; GFX1170-FAKE16-SDAG: ; %bb.0: +; GFX1170-FAKE16-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1170-FAKE16-SDAG-NEXT: v_max_num_f16_e64 v1, |v1|, |v1| +; GFX1170-FAKE16-SDAG-NEXT: v_max_num_f16_e64 v0, |v0|, |v0| +; GFX1170-FAKE16-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX1170-FAKE16-SDAG-NEXT: v_min_num_f16_e32 v0, v0, v1 +; GFX1170-FAKE16-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX1170-FAKE16-GISEL-LABEL: v_minimumnum_f16_fabs: +; GFX1170-FAKE16-GISEL: ; %bb.0: +; GFX1170-FAKE16-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1170-FAKE16-GISEL-NEXT: v_max_num_f16_e64 v0, |v0|, |v0| +; GFX1170-FAKE16-GISEL-NEXT: v_max_num_f16_e64 v1, |v1|, |v1| +; GFX1170-FAKE16-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX1170-FAKE16-GISEL-NEXT: v_min_num_f16_e32 v0, v0, v1 +; GFX1170-FAKE16-GISEL-NEXT: s_setpc_b64 s[30:31] +; ; GFX12-TRUE16-SDAG-LABEL: v_minimumnum_f16_fabs: ; GFX12-TRUE16-SDAG: ; %bb.0: ; GFX12-TRUE16-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0 @@ -3006,6 +3504,42 @@ define half @v_minimumnum_f16_fneg(half %x, half %y) { ; GFX11-FAKE16-GISEL-NEXT: v_min_f16_e32 v0, v0, v1 ; GFX11-FAKE16-GISEL-NEXT: s_setpc_b64 s[30:31] ; +; GFX1170-TRUE16-SDAG-LABEL: v_minimumnum_f16_fneg: +; GFX1170-TRUE16-SDAG: ; %bb.0: +; GFX1170-TRUE16-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1170-TRUE16-SDAG-NEXT: v_max_num_f16_e64 v0.h, -v1.l, -v1.l +; GFX1170-TRUE16-SDAG-NEXT: v_max_num_f16_e64 v0.l, -v0.l, -v0.l +; GFX1170-TRUE16-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX1170-TRUE16-SDAG-NEXT: v_min_num_f16_e32 v0.l, v0.l, v0.h +; GFX1170-TRUE16-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX1170-TRUE16-GISEL-LABEL: v_minimumnum_f16_fneg: +; GFX1170-TRUE16-GISEL: ; %bb.0: +; GFX1170-TRUE16-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1170-TRUE16-GISEL-NEXT: v_max_num_f16_e64 v0.l, -v0.l, -v0.l +; GFX1170-TRUE16-GISEL-NEXT: v_max_num_f16_e64 v0.h, -v1.l, -v1.l +; GFX1170-TRUE16-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX1170-TRUE16-GISEL-NEXT: v_min_num_f16_e32 v0.l, v0.l, v0.h +; GFX1170-TRUE16-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX1170-FAKE16-SDAG-LABEL: v_minimumnum_f16_fneg: +; GFX1170-FAKE16-SDAG: ; %bb.0: +; GFX1170-FAKE16-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1170-FAKE16-SDAG-NEXT: v_max_num_f16_e64 v1, -v1, -v1 +; GFX1170-FAKE16-SDAG-NEXT: v_max_num_f16_e64 v0, -v0, -v0 +; GFX1170-FAKE16-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX1170-FAKE16-SDAG-NEXT: v_min_num_f16_e32 v0, v0, v1 +; GFX1170-FAKE16-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX1170-FAKE16-GISEL-LABEL: v_minimumnum_f16_fneg: +; GFX1170-FAKE16-GISEL: ; %bb.0: +; GFX1170-FAKE16-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1170-FAKE16-GISEL-NEXT: v_max_num_f16_e64 v0, -v0, -v0 +; GFX1170-FAKE16-GISEL-NEXT: v_max_num_f16_e64 v1, -v1, -v1 +; GFX1170-FAKE16-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX1170-FAKE16-GISEL-NEXT: v_min_num_f16_e32 v0, v0, v1 +; GFX1170-FAKE16-GISEL-NEXT: s_setpc_b64 s[30:31] +; ; GFX12-TRUE16-SDAG-LABEL: v_minimumnum_f16_fneg: ; GFX12-TRUE16-SDAG: ; %bb.0: ; GFX12-TRUE16-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0 @@ -3146,6 +3680,24 @@ define double @v_minimumnum_f64_fneg(double %x, double %y) { ; GFX11-GISEL-NEXT: v_min_f64 v[0:1], v[0:1], v[2:3] ; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31] ; +; GFX1170-SDAG-LABEL: v_minimumnum_f64_fneg: +; GFX1170-SDAG: ; %bb.0: +; GFX1170-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1170-SDAG-NEXT: v_max_num_f64 v[2:3], -v[2:3], -v[2:3] +; GFX1170-SDAG-NEXT: v_max_num_f64 v[0:1], -v[0:1], -v[0:1] +; GFX1170-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX1170-SDAG-NEXT: v_min_num_f64 v[0:1], v[0:1], v[2:3] +; GFX1170-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX1170-GISEL-LABEL: v_minimumnum_f64_fneg: +; GFX1170-GISEL: ; %bb.0: +; GFX1170-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1170-GISEL-NEXT: v_max_num_f64 v[0:1], -v[0:1], -v[0:1] +; GFX1170-GISEL-NEXT: v_max_num_f64 v[2:3], -v[2:3], -v[2:3] +; GFX1170-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX1170-GISEL-NEXT: v_min_num_f64 v[0:1], v[0:1], v[2:3] +; GFX1170-GISEL-NEXT: s_setpc_b64 s[30:31] +; ; GFX12-SDAG-LABEL: v_minimumnum_f64_fneg: ; GFX12-SDAG: ; %bb.0: ; GFX12-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0 @@ -3304,6 +3856,24 @@ define <2 x half> @v_minimumnum_v2f16(<2 x half> %x, <2 x half> %y) { ; GFX11-GISEL-NEXT: v_pk_min_f16 v0, v0, v1 ; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31] ; +; GFX1170-SDAG-LABEL: v_minimumnum_v2f16: +; GFX1170-SDAG: ; %bb.0: +; GFX1170-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1170-SDAG-NEXT: v_pk_max_num_f16 v1, v1, v1 +; GFX1170-SDAG-NEXT: v_pk_max_num_f16 v0, v0, v0 +; GFX1170-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX1170-SDAG-NEXT: v_pk_min_num_f16 v0, v0, v1 +; GFX1170-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX1170-GISEL-LABEL: v_minimumnum_v2f16: +; GFX1170-GISEL: ; %bb.0: +; GFX1170-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1170-GISEL-NEXT: v_pk_max_num_f16 v0, v0, v0 +; GFX1170-GISEL-NEXT: v_pk_max_num_f16 v1, v1, v1 +; GFX1170-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX1170-GISEL-NEXT: v_pk_min_num_f16 v0, v0, v1 +; GFX1170-GISEL-NEXT: s_setpc_b64 s[30:31] +; ; GFX12-SDAG-LABEL: v_minimumnum_v2f16: ; GFX12-SDAG: ; %bb.0: ; GFX12-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0 @@ -3402,6 +3972,12 @@ define <2 x half> @v_minimumnum_v2f16_nnan(<2 x half> %x, <2 x half> %y) { ; GFX11-NEXT: v_pk_min_f16 v0, v0, v1 ; GFX11-NEXT: s_setpc_b64 s[30:31] ; +; GFX1170-LABEL: v_minimumnum_v2f16_nnan: +; GFX1170: ; %bb.0: +; GFX1170-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1170-NEXT: v_pk_min_num_f16 v0, v0, v1 +; GFX1170-NEXT: s_setpc_b64 s[30:31] +; ; GFX12-LABEL: v_minimumnum_v2f16_nnan: ; GFX12: ; %bb.0: ; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 @@ -3568,6 +4144,30 @@ define <3 x half> @v_minimumnum_v3f16(<3 x half> %x, <3 x half> %y) { ; GFX11-GISEL-NEXT: v_pk_min_f16 v1, v1, v3 ; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31] ; +; GFX1170-SDAG-LABEL: v_minimumnum_v3f16: +; GFX1170-SDAG: ; %bb.0: +; GFX1170-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1170-SDAG-NEXT: v_pk_max_num_f16 v3, v3, v3 +; GFX1170-SDAG-NEXT: v_pk_max_num_f16 v2, v2, v2 +; GFX1170-SDAG-NEXT: v_pk_max_num_f16 v0, v0, v0 +; GFX1170-SDAG-NEXT: v_pk_max_num_f16 v1, v1, v1 +; GFX1170-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) +; GFX1170-SDAG-NEXT: v_pk_min_num_f16 v0, v0, v2 +; GFX1170-SDAG-NEXT: v_pk_min_num_f16 v1, v1, v3 +; GFX1170-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX1170-GISEL-LABEL: v_minimumnum_v3f16: +; GFX1170-GISEL: ; %bb.0: +; GFX1170-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1170-GISEL-NEXT: v_pk_max_num_f16 v0, v0, v0 +; GFX1170-GISEL-NEXT: v_pk_max_num_f16 v2, v2, v2 +; GFX1170-GISEL-NEXT: v_pk_max_num_f16 v1, v1, v1 +; GFX1170-GISEL-NEXT: v_pk_max_num_f16 v3, v3, v3 +; GFX1170-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2) +; GFX1170-GISEL-NEXT: v_pk_min_num_f16 v0, v0, v2 +; GFX1170-GISEL-NEXT: v_pk_min_num_f16 v1, v1, v3 +; GFX1170-GISEL-NEXT: s_setpc_b64 s[30:31] +; ; GFX12-SDAG-LABEL: v_minimumnum_v3f16: ; GFX12-SDAG: ; %bb.0: ; GFX12-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0 @@ -3692,6 +4292,13 @@ define <3 x half> @v_minimumnum_v3f16_nnan(<3 x half> %x, <3 x half> %y) { ; GFX11-NEXT: v_pk_min_f16 v1, v1, v3 ; GFX11-NEXT: s_setpc_b64 s[30:31] ; +; GFX1170-LABEL: v_minimumnum_v3f16_nnan: +; GFX1170: ; %bb.0: +; GFX1170-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1170-NEXT: v_pk_min_num_f16 v0, v0, v2 +; GFX1170-NEXT: v_pk_min_num_f16 v1, v1, v3 +; GFX1170-NEXT: s_setpc_b64 s[30:31] +; ; GFX12-LABEL: v_minimumnum_v3f16_nnan: ; GFX12: ; %bb.0: ; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 @@ -3895,6 +4502,30 @@ define <4 x half> @v_minimumnum_v4f16(<4 x half> %x, <4 x half> %y) { ; GFX11-GISEL-NEXT: v_pk_min_f16 v1, v1, v3 ; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31] ; +; GFX1170-SDAG-LABEL: v_minimumnum_v4f16: +; GFX1170-SDAG: ; %bb.0: +; GFX1170-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1170-SDAG-NEXT: v_pk_max_num_f16 v2, v2, v2 +; GFX1170-SDAG-NEXT: v_pk_max_num_f16 v0, v0, v0 +; GFX1170-SDAG-NEXT: v_pk_max_num_f16 v3, v3, v3 +; GFX1170-SDAG-NEXT: v_pk_max_num_f16 v1, v1, v1 +; GFX1170-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2) +; GFX1170-SDAG-NEXT: v_pk_min_num_f16 v0, v0, v2 +; GFX1170-SDAG-NEXT: v_pk_min_num_f16 v1, v1, v3 +; GFX1170-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX1170-GISEL-LABEL: v_minimumnum_v4f16: +; GFX1170-GISEL: ; %bb.0: +; GFX1170-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1170-GISEL-NEXT: v_pk_max_num_f16 v0, v0, v0 +; GFX1170-GISEL-NEXT: v_pk_max_num_f16 v2, v2, v2 +; GFX1170-GISEL-NEXT: v_pk_max_num_f16 v1, v1, v1 +; GFX1170-GISEL-NEXT: v_pk_max_num_f16 v3, v3, v3 +; GFX1170-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2) +; GFX1170-GISEL-NEXT: v_pk_min_num_f16 v0, v0, v2 +; GFX1170-GISEL-NEXT: v_pk_min_num_f16 v1, v1, v3 +; GFX1170-GISEL-NEXT: s_setpc_b64 s[30:31] +; ; GFX12-SDAG-LABEL: v_minimumnum_v4f16: ; GFX12-SDAG: ; %bb.0: ; GFX12-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0 @@ -4032,6 +4663,13 @@ define <4 x half> @v_minimumnum_v4f16_nnan(<4 x half> %x, <4 x half> %y) { ; GFX11-NEXT: v_pk_min_f16 v1, v1, v3 ; GFX11-NEXT: s_setpc_b64 s[30:31] ; +; GFX1170-LABEL: v_minimumnum_v4f16_nnan: +; GFX1170: ; %bb.0: +; GFX1170-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1170-NEXT: v_pk_min_num_f16 v0, v0, v2 +; GFX1170-NEXT: v_pk_min_num_f16 v1, v1, v3 +; GFX1170-NEXT: s_setpc_b64 s[30:31] +; ; GFX12-LABEL: v_minimumnum_v4f16_nnan: ; GFX12: ; %bb.0: ; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 @@ -4297,6 +4935,36 @@ define <6 x half> @v_minimumnum_v6f16(<6 x half> %x, <6 x half> %y) { ; GFX11-GISEL-NEXT: v_pk_min_f16 v2, v2, v5 ; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31] ; +; GFX1170-SDAG-LABEL: v_minimumnum_v6f16: +; GFX1170-SDAG: ; %bb.0: +; GFX1170-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1170-SDAG-NEXT: v_pk_max_num_f16 v3, v3, v3 +; GFX1170-SDAG-NEXT: v_pk_max_num_f16 v0, v0, v0 +; GFX1170-SDAG-NEXT: v_pk_max_num_f16 v4, v4, v4 +; GFX1170-SDAG-NEXT: v_pk_max_num_f16 v1, v1, v1 +; GFX1170-SDAG-NEXT: v_pk_max_num_f16 v5, v5, v5 +; GFX1170-SDAG-NEXT: v_pk_max_num_f16 v2, v2, v2 +; GFX1170-SDAG-NEXT: v_pk_min_num_f16 v0, v0, v3 +; GFX1170-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_3) +; GFX1170-SDAG-NEXT: v_pk_min_num_f16 v1, v1, v4 +; GFX1170-SDAG-NEXT: v_pk_min_num_f16 v2, v2, v5 +; GFX1170-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX1170-GISEL-LABEL: v_minimumnum_v6f16: +; GFX1170-GISEL: ; %bb.0: +; GFX1170-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1170-GISEL-NEXT: v_pk_max_num_f16 v0, v0, v0 +; GFX1170-GISEL-NEXT: v_pk_max_num_f16 v3, v3, v3 +; GFX1170-GISEL-NEXT: v_pk_max_num_f16 v1, v1, v1 +; GFX1170-GISEL-NEXT: v_pk_max_num_f16 v4, v4, v4 +; GFX1170-GISEL-NEXT: v_pk_max_num_f16 v2, v2, v2 +; GFX1170-GISEL-NEXT: v_pk_max_num_f16 v5, v5, v5 +; GFX1170-GISEL-NEXT: v_pk_min_num_f16 v0, v0, v3 +; GFX1170-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_3) +; GFX1170-GISEL-NEXT: v_pk_min_num_f16 v1, v1, v4 +; GFX1170-GISEL-NEXT: v_pk_min_num_f16 v2, v2, v5 +; GFX1170-GISEL-NEXT: s_setpc_b64 s[30:31] +; ; GFX12-SDAG-LABEL: v_minimumnum_v6f16: ; GFX12-SDAG: ; %bb.0: ; GFX12-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0 @@ -4651,6 +5319,42 @@ define <8 x half> @v_minimumnum_v8f16(<8 x half> %x, <8 x half> %y) { ; GFX11-GISEL-NEXT: v_pk_min_f16 v3, v3, v7 ; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31] ; +; GFX1170-SDAG-LABEL: v_minimumnum_v8f16: +; GFX1170-SDAG: ; %bb.0: +; GFX1170-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1170-SDAG-NEXT: v_pk_max_num_f16 v4, v4, v4 +; GFX1170-SDAG-NEXT: v_pk_max_num_f16 v0, v0, v0 +; GFX1170-SDAG-NEXT: v_pk_max_num_f16 v5, v5, v5 +; GFX1170-SDAG-NEXT: v_pk_max_num_f16 v1, v1, v1 +; GFX1170-SDAG-NEXT: v_pk_max_num_f16 v6, v6, v6 +; GFX1170-SDAG-NEXT: v_pk_max_num_f16 v2, v2, v2 +; GFX1170-SDAG-NEXT: v_pk_max_num_f16 v7, v7, v7 +; GFX1170-SDAG-NEXT: v_pk_max_num_f16 v3, v3, v3 +; GFX1170-SDAG-NEXT: v_pk_min_num_f16 v0, v0, v4 +; GFX1170-SDAG-NEXT: v_pk_min_num_f16 v1, v1, v5 +; GFX1170-SDAG-NEXT: v_pk_min_num_f16 v2, v2, v6 +; GFX1170-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_4) +; GFX1170-SDAG-NEXT: v_pk_min_num_f16 v3, v3, v7 +; GFX1170-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX1170-GISEL-LABEL: v_minimumnum_v8f16: +; GFX1170-GISEL: ; %bb.0: +; GFX1170-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1170-GISEL-NEXT: v_pk_max_num_f16 v0, v0, v0 +; GFX1170-GISEL-NEXT: v_pk_max_num_f16 v4, v4, v4 +; GFX1170-GISEL-NEXT: v_pk_max_num_f16 v1, v1, v1 +; GFX1170-GISEL-NEXT: v_pk_max_num_f16 v5, v5, v5 +; GFX1170-GISEL-NEXT: v_pk_max_num_f16 v2, v2, v2 +; GFX1170-GISEL-NEXT: v_pk_max_num_f16 v6, v6, v6 +; GFX1170-GISEL-NEXT: v_pk_max_num_f16 v3, v3, v3 +; GFX1170-GISEL-NEXT: v_pk_max_num_f16 v7, v7, v7 +; GFX1170-GISEL-NEXT: v_pk_min_num_f16 v0, v0, v4 +; GFX1170-GISEL-NEXT: v_pk_min_num_f16 v1, v1, v5 +; GFX1170-GISEL-NEXT: v_pk_min_num_f16 v2, v2, v6 +; GFX1170-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_4) +; GFX1170-GISEL-NEXT: v_pk_min_num_f16 v3, v3, v7 +; GFX1170-GISEL-NEXT: s_setpc_b64 s[30:31] +; ; GFX12-SDAG-LABEL: v_minimumnum_v8f16: ; GFX12-SDAG: ; %bb.0: ; GFX12-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0 @@ -5257,6 +5961,64 @@ define <16 x half> @v_minimumnum_v16f16(<16 x half> %x, <16 x half> %y) { ; GFX11-GISEL-NEXT: v_pk_min_f16 v7, v7, v12 ; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31] ; +; GFX1170-SDAG-LABEL: v_minimumnum_v16f16: +; GFX1170-SDAG: ; %bb.0: +; GFX1170-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1170-SDAG-NEXT: v_pk_max_num_f16 v8, v8, v8 +; GFX1170-SDAG-NEXT: v_pk_max_num_f16 v0, v0, v0 +; GFX1170-SDAG-NEXT: v_pk_max_num_f16 v9, v9, v9 +; GFX1170-SDAG-NEXT: v_pk_max_num_f16 v1, v1, v1 +; GFX1170-SDAG-NEXT: v_pk_max_num_f16 v10, v10, v10 +; GFX1170-SDAG-NEXT: v_pk_max_num_f16 v2, v2, v2 +; GFX1170-SDAG-NEXT: v_pk_min_num_f16 v0, v0, v8 +; GFX1170-SDAG-NEXT: v_pk_max_num_f16 v8, v11, v11 +; GFX1170-SDAG-NEXT: v_pk_min_num_f16 v1, v1, v9 +; GFX1170-SDAG-NEXT: v_pk_max_num_f16 v3, v3, v3 +; GFX1170-SDAG-NEXT: v_pk_min_num_f16 v2, v2, v10 +; GFX1170-SDAG-NEXT: v_pk_max_num_f16 v9, v12, v12 +; GFX1170-SDAG-NEXT: v_pk_max_num_f16 v4, v4, v4 +; GFX1170-SDAG-NEXT: v_pk_max_num_f16 v10, v13, v13 +; GFX1170-SDAG-NEXT: v_pk_max_num_f16 v5, v5, v5 +; GFX1170-SDAG-NEXT: v_pk_max_num_f16 v11, v14, v14 +; GFX1170-SDAG-NEXT: v_pk_max_num_f16 v6, v6, v6 +; GFX1170-SDAG-NEXT: v_pk_max_num_f16 v12, v15, v15 +; GFX1170-SDAG-NEXT: v_pk_max_num_f16 v7, v7, v7 +; GFX1170-SDAG-NEXT: v_pk_min_num_f16 v3, v3, v8 +; GFX1170-SDAG-NEXT: v_pk_min_num_f16 v4, v4, v9 +; GFX1170-SDAG-NEXT: v_pk_min_num_f16 v5, v5, v10 +; GFX1170-SDAG-NEXT: v_pk_min_num_f16 v6, v6, v11 +; GFX1170-SDAG-NEXT: v_pk_min_num_f16 v7, v7, v12 +; GFX1170-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX1170-GISEL-LABEL: v_minimumnum_v16f16: +; GFX1170-GISEL: ; %bb.0: +; GFX1170-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1170-GISEL-NEXT: v_pk_max_num_f16 v0, v0, v0 +; GFX1170-GISEL-NEXT: v_pk_max_num_f16 v8, v8, v8 +; GFX1170-GISEL-NEXT: v_pk_max_num_f16 v1, v1, v1 +; GFX1170-GISEL-NEXT: v_pk_max_num_f16 v9, v9, v9 +; GFX1170-GISEL-NEXT: v_pk_max_num_f16 v2, v2, v2 +; GFX1170-GISEL-NEXT: v_pk_max_num_f16 v10, v10, v10 +; GFX1170-GISEL-NEXT: v_pk_min_num_f16 v0, v0, v8 +; GFX1170-GISEL-NEXT: v_pk_max_num_f16 v3, v3, v3 +; GFX1170-GISEL-NEXT: v_pk_min_num_f16 v1, v1, v9 +; GFX1170-GISEL-NEXT: v_pk_max_num_f16 v8, v11, v11 +; GFX1170-GISEL-NEXT: v_pk_min_num_f16 v2, v2, v10 +; GFX1170-GISEL-NEXT: v_pk_max_num_f16 v4, v4, v4 +; GFX1170-GISEL-NEXT: v_pk_max_num_f16 v9, v12, v12 +; GFX1170-GISEL-NEXT: v_pk_max_num_f16 v5, v5, v5 +; GFX1170-GISEL-NEXT: v_pk_max_num_f16 v10, v13, v13 +; GFX1170-GISEL-NEXT: v_pk_max_num_f16 v6, v6, v6 +; GFX1170-GISEL-NEXT: v_pk_max_num_f16 v11, v14, v14 +; GFX1170-GISEL-NEXT: v_pk_max_num_f16 v7, v7, v7 +; GFX1170-GISEL-NEXT: v_pk_max_num_f16 v12, v15, v15 +; GFX1170-GISEL-NEXT: v_pk_min_num_f16 v3, v3, v8 +; GFX1170-GISEL-NEXT: v_pk_min_num_f16 v4, v4, v9 +; GFX1170-GISEL-NEXT: v_pk_min_num_f16 v5, v5, v10 +; GFX1170-GISEL-NEXT: v_pk_min_num_f16 v6, v6, v11 +; GFX1170-GISEL-NEXT: v_pk_min_num_f16 v7, v7, v12 +; GFX1170-GISEL-NEXT: s_setpc_b64 s[30:31] +; ; GFX12-SDAG-LABEL: v_minimumnum_v16f16: ; GFX12-SDAG: ; %bb.0: ; GFX12-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0 @@ -6442,6 +7204,118 @@ define <32 x half> @v_minimumnum_v32f16(<32 x half> %x, <32 x half> %y) { ; GFX11-GISEL-NEXT: v_pk_min_f16 v15, v15, v16 ; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31] ; +; GFX1170-SDAG-LABEL: v_minimumnum_v32f16: +; GFX1170-SDAG: ; %bb.0: +; GFX1170-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1170-SDAG-NEXT: scratch_load_b32 v31, off, s32 +; GFX1170-SDAG-NEXT: v_pk_max_num_f16 v16, v16, v16 +; GFX1170-SDAG-NEXT: v_pk_max_num_f16 v0, v0, v0 +; GFX1170-SDAG-NEXT: v_pk_max_num_f16 v17, v17, v17 +; GFX1170-SDAG-NEXT: v_pk_max_num_f16 v1, v1, v1 +; GFX1170-SDAG-NEXT: v_pk_max_num_f16 v18, v18, v18 +; GFX1170-SDAG-NEXT: v_pk_max_num_f16 v2, v2, v2 +; GFX1170-SDAG-NEXT: v_pk_max_num_f16 v19, v19, v19 +; GFX1170-SDAG-NEXT: v_pk_max_num_f16 v3, v3, v3 +; GFX1170-SDAG-NEXT: v_pk_max_num_f16 v20, v20, v20 +; GFX1170-SDAG-NEXT: v_pk_max_num_f16 v4, v4, v4 +; GFX1170-SDAG-NEXT: v_pk_max_num_f16 v21, v21, v21 +; GFX1170-SDAG-NEXT: v_pk_max_num_f16 v5, v5, v5 +; GFX1170-SDAG-NEXT: v_pk_max_num_f16 v22, v22, v22 +; GFX1170-SDAG-NEXT: v_pk_max_num_f16 v6, v6, v6 +; GFX1170-SDAG-NEXT: v_pk_max_num_f16 v23, v23, v23 +; GFX1170-SDAG-NEXT: v_pk_max_num_f16 v7, v7, v7 +; GFX1170-SDAG-NEXT: v_pk_max_num_f16 v24, v24, v24 +; GFX1170-SDAG-NEXT: v_pk_max_num_f16 v8, v8, v8 +; GFX1170-SDAG-NEXT: v_pk_max_num_f16 v25, v25, v25 +; GFX1170-SDAG-NEXT: v_pk_max_num_f16 v9, v9, v9 +; GFX1170-SDAG-NEXT: v_pk_max_num_f16 v26, v26, v26 +; GFX1170-SDAG-NEXT: v_pk_max_num_f16 v10, v10, v10 +; GFX1170-SDAG-NEXT: v_pk_max_num_f16 v27, v27, v27 +; GFX1170-SDAG-NEXT: v_pk_max_num_f16 v11, v11, v11 +; GFX1170-SDAG-NEXT: v_pk_max_num_f16 v28, v28, v28 +; GFX1170-SDAG-NEXT: v_pk_max_num_f16 v12, v12, v12 +; GFX1170-SDAG-NEXT: v_pk_max_num_f16 v29, v29, v29 +; GFX1170-SDAG-NEXT: v_pk_max_num_f16 v13, v13, v13 +; GFX1170-SDAG-NEXT: v_pk_max_num_f16 v30, v30, v30 +; GFX1170-SDAG-NEXT: v_pk_max_num_f16 v14, v14, v14 +; GFX1170-SDAG-NEXT: v_pk_max_num_f16 v15, v15, v15 +; GFX1170-SDAG-NEXT: v_pk_min_num_f16 v0, v0, v16 +; GFX1170-SDAG-NEXT: v_pk_min_num_f16 v1, v1, v17 +; GFX1170-SDAG-NEXT: v_pk_min_num_f16 v2, v2, v18 +; GFX1170-SDAG-NEXT: v_pk_min_num_f16 v3, v3, v19 +; GFX1170-SDAG-NEXT: v_pk_min_num_f16 v4, v4, v20 +; GFX1170-SDAG-NEXT: v_pk_min_num_f16 v5, v5, v21 +; GFX1170-SDAG-NEXT: v_pk_min_num_f16 v6, v6, v22 +; GFX1170-SDAG-NEXT: v_pk_min_num_f16 v7, v7, v23 +; GFX1170-SDAG-NEXT: v_pk_min_num_f16 v8, v8, v24 +; GFX1170-SDAG-NEXT: v_pk_min_num_f16 v9, v9, v25 +; GFX1170-SDAG-NEXT: v_pk_min_num_f16 v10, v10, v26 +; GFX1170-SDAG-NEXT: v_pk_min_num_f16 v11, v11, v27 +; GFX1170-SDAG-NEXT: v_pk_min_num_f16 v12, v12, v28 +; GFX1170-SDAG-NEXT: v_pk_min_num_f16 v13, v13, v29 +; GFX1170-SDAG-NEXT: v_pk_min_num_f16 v14, v14, v30 +; GFX1170-SDAG-NEXT: s_waitcnt vmcnt(0) +; GFX1170-SDAG-NEXT: v_pk_max_num_f16 v16, v31, v31 +; GFX1170-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX1170-SDAG-NEXT: v_pk_min_num_f16 v15, v15, v16 +; GFX1170-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX1170-GISEL-LABEL: v_minimumnum_v32f16: +; GFX1170-GISEL: ; %bb.0: +; GFX1170-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1170-GISEL-NEXT: scratch_load_b32 v31, off, s32 +; GFX1170-GISEL-NEXT: v_pk_max_num_f16 v0, v0, v0 +; GFX1170-GISEL-NEXT: v_pk_max_num_f16 v16, v16, v16 +; GFX1170-GISEL-NEXT: v_pk_max_num_f16 v1, v1, v1 +; GFX1170-GISEL-NEXT: v_pk_max_num_f16 v17, v17, v17 +; GFX1170-GISEL-NEXT: v_pk_max_num_f16 v2, v2, v2 +; GFX1170-GISEL-NEXT: v_pk_max_num_f16 v18, v18, v18 +; GFX1170-GISEL-NEXT: v_pk_max_num_f16 v3, v3, v3 +; GFX1170-GISEL-NEXT: v_pk_max_num_f16 v19, v19, v19 +; GFX1170-GISEL-NEXT: v_pk_max_num_f16 v4, v4, v4 +; GFX1170-GISEL-NEXT: v_pk_max_num_f16 v20, v20, v20 +; GFX1170-GISEL-NEXT: v_pk_max_num_f16 v5, v5, v5 +; GFX1170-GISEL-NEXT: v_pk_max_num_f16 v21, v21, v21 +; GFX1170-GISEL-NEXT: v_pk_max_num_f16 v6, v6, v6 +; GFX1170-GISEL-NEXT: v_pk_max_num_f16 v22, v22, v22 +; GFX1170-GISEL-NEXT: v_pk_max_num_f16 v7, v7, v7 +; GFX1170-GISEL-NEXT: v_pk_max_num_f16 v23, v23, v23 +; GFX1170-GISEL-NEXT: v_pk_max_num_f16 v8, v8, v8 +; GFX1170-GISEL-NEXT: v_pk_max_num_f16 v24, v24, v24 +; GFX1170-GISEL-NEXT: v_pk_max_num_f16 v9, v9, v9 +; GFX1170-GISEL-NEXT: v_pk_max_num_f16 v25, v25, v25 +; GFX1170-GISEL-NEXT: v_pk_max_num_f16 v10, v10, v10 +; GFX1170-GISEL-NEXT: v_pk_max_num_f16 v26, v26, v26 +; GFX1170-GISEL-NEXT: v_pk_max_num_f16 v11, v11, v11 +; GFX1170-GISEL-NEXT: v_pk_max_num_f16 v27, v27, v27 +; GFX1170-GISEL-NEXT: v_pk_max_num_f16 v12, v12, v12 +; GFX1170-GISEL-NEXT: v_pk_max_num_f16 v28, v28, v28 +; GFX1170-GISEL-NEXT: v_pk_max_num_f16 v13, v13, v13 +; GFX1170-GISEL-NEXT: v_pk_max_num_f16 v29, v29, v29 +; GFX1170-GISEL-NEXT: v_pk_max_num_f16 v14, v14, v14 +; GFX1170-GISEL-NEXT: v_pk_max_num_f16 v30, v30, v30 +; GFX1170-GISEL-NEXT: v_pk_max_num_f16 v15, v15, v15 +; GFX1170-GISEL-NEXT: v_pk_min_num_f16 v0, v0, v16 +; GFX1170-GISEL-NEXT: v_pk_min_num_f16 v1, v1, v17 +; GFX1170-GISEL-NEXT: v_pk_min_num_f16 v2, v2, v18 +; GFX1170-GISEL-NEXT: v_pk_min_num_f16 v3, v3, v19 +; GFX1170-GISEL-NEXT: v_pk_min_num_f16 v4, v4, v20 +; GFX1170-GISEL-NEXT: v_pk_min_num_f16 v5, v5, v21 +; GFX1170-GISEL-NEXT: v_pk_min_num_f16 v6, v6, v22 +; GFX1170-GISEL-NEXT: v_pk_min_num_f16 v7, v7, v23 +; GFX1170-GISEL-NEXT: v_pk_min_num_f16 v8, v8, v24 +; GFX1170-GISEL-NEXT: v_pk_min_num_f16 v9, v9, v25 +; GFX1170-GISEL-NEXT: v_pk_min_num_f16 v10, v10, v26 +; GFX1170-GISEL-NEXT: v_pk_min_num_f16 v11, v11, v27 +; GFX1170-GISEL-NEXT: v_pk_min_num_f16 v12, v12, v28 +; GFX1170-GISEL-NEXT: v_pk_min_num_f16 v13, v13, v29 +; GFX1170-GISEL-NEXT: v_pk_min_num_f16 v14, v14, v30 +; GFX1170-GISEL-NEXT: s_waitcnt vmcnt(0) +; GFX1170-GISEL-NEXT: v_pk_max_num_f16 v16, v31, v31 +; GFX1170-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX1170-GISEL-NEXT: v_pk_min_num_f16 v15, v15, v16 +; GFX1170-GISEL-NEXT: s_setpc_b64 s[30:31] +; ; GFX12-SDAG-LABEL: v_minimumnum_v32f16: ; GFX12-SDAG: ; %bb.0: ; GFX12-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0 @@ -6672,6 +7546,24 @@ define <2 x float> @v_minimumnum_v2f32(<2 x float> %x, <2 x float> %y) { ; GFX11-GISEL-NEXT: v_dual_min_f32 v0, v0, v2 :: v_dual_min_f32 v1, v1, v3 ; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31] ; +; GFX1170-SDAG-LABEL: v_minimumnum_v2f32: +; GFX1170-SDAG: ; %bb.0: +; GFX1170-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1170-SDAG-NEXT: v_dual_max_num_f32 v2, v2, v2 :: v_dual_max_num_f32 v3, v3, v3 +; GFX1170-SDAG-NEXT: v_dual_max_num_f32 v0, v0, v0 :: v_dual_max_num_f32 v1, v1, v1 +; GFX1170-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX1170-SDAG-NEXT: v_dual_min_num_f32 v0, v0, v2 :: v_dual_min_num_f32 v1, v1, v3 +; GFX1170-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX1170-GISEL-LABEL: v_minimumnum_v2f32: +; GFX1170-GISEL: ; %bb.0: +; GFX1170-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1170-GISEL-NEXT: v_dual_max_num_f32 v0, v0, v0 :: v_dual_max_num_f32 v1, v1, v1 +; GFX1170-GISEL-NEXT: v_dual_max_num_f32 v2, v2, v2 :: v_dual_max_num_f32 v3, v3, v3 +; GFX1170-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX1170-GISEL-NEXT: v_dual_min_num_f32 v0, v0, v2 :: v_dual_min_num_f32 v1, v1, v3 +; GFX1170-GISEL-NEXT: s_setpc_b64 s[30:31] +; ; GFX12-SDAG-LABEL: v_minimumnum_v2f32: ; GFX12-SDAG: ; %bb.0: ; GFX12-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0 @@ -6736,6 +7628,12 @@ define <2 x float> @v_minimumnum_v2f32_nnan(<2 x float> %x, <2 x float> %y) { ; GFX11-NEXT: v_dual_min_f32 v0, v0, v2 :: v_dual_min_f32 v1, v1, v3 ; GFX11-NEXT: s_setpc_b64 s[30:31] ; +; GFX1170-LABEL: v_minimumnum_v2f32_nnan: +; GFX1170: ; %bb.0: +; GFX1170-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1170-NEXT: v_dual_min_num_f32 v0, v0, v2 :: v_dual_min_num_f32 v1, v1, v3 +; GFX1170-NEXT: s_setpc_b64 s[30:31] +; ; GFX12-LABEL: v_minimumnum_v2f32_nnan: ; GFX12: ; %bb.0: ; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 @@ -6884,6 +7782,28 @@ define <3 x float> @v_minimumnum_v3f32(<3 x float> %x, <3 x float> %y) { ; GFX11-GISEL-NEXT: v_min_f32_e32 v2, v2, v5 ; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31] ; +; GFX1170-SDAG-LABEL: v_minimumnum_v3f32: +; GFX1170-SDAG: ; %bb.0: +; GFX1170-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1170-SDAG-NEXT: v_dual_max_num_f32 v3, v3, v3 :: v_dual_max_num_f32 v0, v0, v0 +; GFX1170-SDAG-NEXT: v_dual_max_num_f32 v4, v4, v4 :: v_dual_max_num_f32 v1, v1, v1 +; GFX1170-SDAG-NEXT: v_dual_max_num_f32 v5, v5, v5 :: v_dual_max_num_f32 v2, v2, v2 +; GFX1170-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) +; GFX1170-SDAG-NEXT: v_dual_min_num_f32 v0, v0, v3 :: v_dual_min_num_f32 v1, v1, v4 +; GFX1170-SDAG-NEXT: v_min_num_f32_e32 v2, v2, v5 +; GFX1170-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX1170-GISEL-LABEL: v_minimumnum_v3f32: +; GFX1170-GISEL: ; %bb.0: +; GFX1170-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1170-GISEL-NEXT: v_dual_max_num_f32 v0, v0, v0 :: v_dual_max_num_f32 v3, v3, v3 +; GFX1170-GISEL-NEXT: v_dual_max_num_f32 v1, v1, v1 :: v_dual_max_num_f32 v4, v4, v4 +; GFX1170-GISEL-NEXT: v_dual_max_num_f32 v2, v2, v2 :: v_dual_max_num_f32 v5, v5, v5 +; GFX1170-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) +; GFX1170-GISEL-NEXT: v_dual_min_num_f32 v0, v0, v3 :: v_dual_min_num_f32 v1, v1, v4 +; GFX1170-GISEL-NEXT: v_min_num_f32_e32 v2, v2, v5 +; GFX1170-GISEL-NEXT: s_setpc_b64 s[30:31] +; ; GFX12-SDAG-LABEL: v_minimumnum_v3f32: ; GFX12-SDAG: ; %bb.0: ; GFX12-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0 @@ -6957,6 +7877,13 @@ define <3 x float> @v_minimumnum_v3f32_nnan(<3 x float> %x, <3 x float> %y) { ; GFX11-NEXT: v_min_f32_e32 v2, v2, v5 ; GFX11-NEXT: s_setpc_b64 s[30:31] ; +; GFX1170-LABEL: v_minimumnum_v3f32_nnan: +; GFX1170: ; %bb.0: +; GFX1170-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1170-NEXT: v_dual_min_num_f32 v0, v0, v3 :: v_dual_min_num_f32 v1, v1, v4 +; GFX1170-NEXT: v_min_num_f32_e32 v2, v2, v5 +; GFX1170-NEXT: s_setpc_b64 s[30:31] +; ; GFX12-LABEL: v_minimumnum_v3f32_nnan: ; GFX12: ; %bb.0: ; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 @@ -7132,6 +8059,30 @@ define <4 x float> @v_minimumnum_v4f32(<4 x float> %x, <4 x float> %y) { ; GFX11-GISEL-NEXT: v_dual_min_f32 v2, v2, v6 :: v_dual_min_f32 v3, v3, v7 ; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31] ; +; GFX1170-SDAG-LABEL: v_minimumnum_v4f32: +; GFX1170-SDAG: ; %bb.0: +; GFX1170-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1170-SDAG-NEXT: v_dual_max_num_f32 v4, v4, v4 :: v_dual_max_num_f32 v5, v5, v5 +; GFX1170-SDAG-NEXT: v_dual_max_num_f32 v0, v0, v0 :: v_dual_max_num_f32 v1, v1, v1 +; GFX1170-SDAG-NEXT: v_dual_max_num_f32 v6, v6, v6 :: v_dual_max_num_f32 v7, v7, v7 +; GFX1170-SDAG-NEXT: v_dual_max_num_f32 v2, v2, v2 :: v_dual_max_num_f32 v3, v3, v3 +; GFX1170-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2) +; GFX1170-SDAG-NEXT: v_dual_min_num_f32 v0, v0, v4 :: v_dual_min_num_f32 v1, v1, v5 +; GFX1170-SDAG-NEXT: v_dual_min_num_f32 v2, v2, v6 :: v_dual_min_num_f32 v3, v3, v7 +; GFX1170-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX1170-GISEL-LABEL: v_minimumnum_v4f32: +; GFX1170-GISEL: ; %bb.0: +; GFX1170-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1170-GISEL-NEXT: v_dual_max_num_f32 v0, v0, v0 :: v_dual_max_num_f32 v1, v1, v1 +; GFX1170-GISEL-NEXT: v_dual_max_num_f32 v4, v4, v4 :: v_dual_max_num_f32 v5, v5, v5 +; GFX1170-GISEL-NEXT: v_dual_max_num_f32 v2, v2, v2 :: v_dual_max_num_f32 v3, v3, v3 +; GFX1170-GISEL-NEXT: v_dual_max_num_f32 v6, v6, v6 :: v_dual_max_num_f32 v7, v7, v7 +; GFX1170-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2) +; GFX1170-GISEL-NEXT: v_dual_min_num_f32 v0, v0, v4 :: v_dual_min_num_f32 v1, v1, v5 +; GFX1170-GISEL-NEXT: v_dual_min_num_f32 v2, v2, v6 :: v_dual_min_num_f32 v3, v3, v7 +; GFX1170-GISEL-NEXT: s_setpc_b64 s[30:31] +; ; GFX12-SDAG-LABEL: v_minimumnum_v4f32: ; GFX12-SDAG: ; %bb.0: ; GFX12-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0 @@ -7211,6 +8162,13 @@ define <4 x float> @v_minimumnum_v4f32_nnan(<4 x float> %x, <4 x float> %y) { ; GFX11-NEXT: v_dual_min_f32 v2, v2, v6 :: v_dual_min_f32 v3, v3, v7 ; GFX11-NEXT: s_setpc_b64 s[30:31] ; +; GFX1170-LABEL: v_minimumnum_v4f32_nnan: +; GFX1170: ; %bb.0: +; GFX1170-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1170-NEXT: v_dual_min_num_f32 v0, v0, v4 :: v_dual_min_num_f32 v1, v1, v5 +; GFX1170-NEXT: v_dual_min_num_f32 v2, v2, v6 :: v_dual_min_num_f32 v3, v3, v7 +; GFX1170-NEXT: s_setpc_b64 s[30:31] +; ; GFX12-LABEL: v_minimumnum_v4f32_nnan: ; GFX12: ; %bb.0: ; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 @@ -7360,6 +8318,30 @@ define <2 x double> @v_minimumnum_v2f64(<2 x double> %x, <2 x double> %y) { ; GFX11-GISEL-NEXT: v_min_f64 v[2:3], v[2:3], v[6:7] ; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31] ; +; GFX1170-SDAG-LABEL: v_minimumnum_v2f64: +; GFX1170-SDAG: ; %bb.0: +; GFX1170-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1170-SDAG-NEXT: v_max_num_f64 v[4:5], v[4:5], v[4:5] +; GFX1170-SDAG-NEXT: v_max_num_f64 v[0:1], v[0:1], v[0:1] +; GFX1170-SDAG-NEXT: v_max_num_f64 v[6:7], v[6:7], v[6:7] +; GFX1170-SDAG-NEXT: v_max_num_f64 v[2:3], v[2:3], v[2:3] +; GFX1170-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2) +; GFX1170-SDAG-NEXT: v_min_num_f64 v[0:1], v[0:1], v[4:5] +; GFX1170-SDAG-NEXT: v_min_num_f64 v[2:3], v[2:3], v[6:7] +; GFX1170-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX1170-GISEL-LABEL: v_minimumnum_v2f64: +; GFX1170-GISEL: ; %bb.0: +; GFX1170-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1170-GISEL-NEXT: v_max_num_f64 v[0:1], v[0:1], v[0:1] +; GFX1170-GISEL-NEXT: v_max_num_f64 v[4:5], v[4:5], v[4:5] +; GFX1170-GISEL-NEXT: v_max_num_f64 v[2:3], v[2:3], v[2:3] +; GFX1170-GISEL-NEXT: v_max_num_f64 v[6:7], v[6:7], v[6:7] +; GFX1170-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2) +; GFX1170-GISEL-NEXT: v_min_num_f64 v[0:1], v[0:1], v[4:5] +; GFX1170-GISEL-NEXT: v_min_num_f64 v[2:3], v[2:3], v[6:7] +; GFX1170-GISEL-NEXT: s_setpc_b64 s[30:31] +; ; GFX12-SDAG-LABEL: v_minimumnum_v2f64: ; GFX12-SDAG: ; %bb.0: ; GFX12-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0 @@ -7431,6 +8413,13 @@ define <2 x double> @v_minimumnum_v2f64_nnan(<2 x double> %x, <2 x double> %y) { ; GFX11-NEXT: v_min_f64 v[2:3], v[2:3], v[6:7] ; GFX11-NEXT: s_setpc_b64 s[30:31] ; +; GFX1170-LABEL: v_minimumnum_v2f64_nnan: +; GFX1170: ; %bb.0: +; GFX1170-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1170-NEXT: v_min_num_f64 v[0:1], v[0:1], v[4:5] +; GFX1170-NEXT: v_min_num_f64 v[2:3], v[2:3], v[6:7] +; GFX1170-NEXT: s_setpc_b64 s[30:31] +; ; GFX12-LABEL: v_minimumnum_v2f64_nnan: ; GFX12: ; %bb.0: ; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 @@ -7616,6 +8605,36 @@ define <3 x double> @v_minimumnum_v3f64(<3 x double> %x, <3 x double> %y) { ; GFX11-GISEL-NEXT: v_min_f64 v[4:5], v[4:5], v[10:11] ; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31] ; +; GFX1170-SDAG-LABEL: v_minimumnum_v3f64: +; GFX1170-SDAG: ; %bb.0: +; GFX1170-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1170-SDAG-NEXT: v_max_num_f64 v[6:7], v[6:7], v[6:7] +; GFX1170-SDAG-NEXT: v_max_num_f64 v[0:1], v[0:1], v[0:1] +; GFX1170-SDAG-NEXT: v_max_num_f64 v[8:9], v[8:9], v[8:9] +; GFX1170-SDAG-NEXT: v_max_num_f64 v[2:3], v[2:3], v[2:3] +; GFX1170-SDAG-NEXT: v_max_num_f64 v[10:11], v[10:11], v[10:11] +; GFX1170-SDAG-NEXT: v_max_num_f64 v[4:5], v[4:5], v[4:5] +; GFX1170-SDAG-NEXT: v_min_num_f64 v[0:1], v[0:1], v[6:7] +; GFX1170-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_3) +; GFX1170-SDAG-NEXT: v_min_num_f64 v[2:3], v[2:3], v[8:9] +; GFX1170-SDAG-NEXT: v_min_num_f64 v[4:5], v[4:5], v[10:11] +; GFX1170-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX1170-GISEL-LABEL: v_minimumnum_v3f64: +; GFX1170-GISEL: ; %bb.0: +; GFX1170-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1170-GISEL-NEXT: v_max_num_f64 v[0:1], v[0:1], v[0:1] +; GFX1170-GISEL-NEXT: v_max_num_f64 v[6:7], v[6:7], v[6:7] +; GFX1170-GISEL-NEXT: v_max_num_f64 v[2:3], v[2:3], v[2:3] +; GFX1170-GISEL-NEXT: v_max_num_f64 v[8:9], v[8:9], v[8:9] +; GFX1170-GISEL-NEXT: v_max_num_f64 v[4:5], v[4:5], v[4:5] +; GFX1170-GISEL-NEXT: v_max_num_f64 v[10:11], v[10:11], v[10:11] +; GFX1170-GISEL-NEXT: v_min_num_f64 v[0:1], v[0:1], v[6:7] +; GFX1170-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_3) +; GFX1170-GISEL-NEXT: v_min_num_f64 v[2:3], v[2:3], v[8:9] +; GFX1170-GISEL-NEXT: v_min_num_f64 v[4:5], v[4:5], v[10:11] +; GFX1170-GISEL-NEXT: s_setpc_b64 s[30:31] +; ; GFX12-SDAG-LABEL: v_minimumnum_v3f64: ; GFX12-SDAG: ; %bb.0: ; GFX12-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0 @@ -7698,6 +8717,14 @@ define <3 x double> @v_minimumnum_v3f64_nnan(<3 x double> %x, <3 x double> %y) { ; GFX11-NEXT: v_min_f64 v[4:5], v[4:5], v[10:11] ; GFX11-NEXT: s_setpc_b64 s[30:31] ; +; GFX1170-LABEL: v_minimumnum_v3f64_nnan: +; GFX1170: ; %bb.0: +; GFX1170-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1170-NEXT: v_min_num_f64 v[0:1], v[0:1], v[6:7] +; GFX1170-NEXT: v_min_num_f64 v[2:3], v[2:3], v[8:9] +; GFX1170-NEXT: v_min_num_f64 v[4:5], v[4:5], v[10:11] +; GFX1170-NEXT: s_setpc_b64 s[30:31] +; ; GFX12-LABEL: v_minimumnum_v3f64_nnan: ; GFX12: ; %bb.0: ; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 @@ -7920,6 +8947,42 @@ define <4 x double> @v_minimumnum_v4f64(<4 x double> %x, <4 x double> %y) { ; GFX11-GISEL-NEXT: v_min_f64 v[6:7], v[6:7], v[14:15] ; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31] ; +; GFX1170-SDAG-LABEL: v_minimumnum_v4f64: +; GFX1170-SDAG: ; %bb.0: +; GFX1170-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1170-SDAG-NEXT: v_max_num_f64 v[8:9], v[8:9], v[8:9] +; GFX1170-SDAG-NEXT: v_max_num_f64 v[0:1], v[0:1], v[0:1] +; GFX1170-SDAG-NEXT: v_max_num_f64 v[10:11], v[10:11], v[10:11] +; GFX1170-SDAG-NEXT: v_max_num_f64 v[2:3], v[2:3], v[2:3] +; GFX1170-SDAG-NEXT: v_max_num_f64 v[12:13], v[12:13], v[12:13] +; GFX1170-SDAG-NEXT: v_max_num_f64 v[4:5], v[4:5], v[4:5] +; GFX1170-SDAG-NEXT: v_max_num_f64 v[14:15], v[14:15], v[14:15] +; GFX1170-SDAG-NEXT: v_max_num_f64 v[6:7], v[6:7], v[6:7] +; GFX1170-SDAG-NEXT: v_min_num_f64 v[0:1], v[0:1], v[8:9] +; GFX1170-SDAG-NEXT: v_min_num_f64 v[2:3], v[2:3], v[10:11] +; GFX1170-SDAG-NEXT: v_min_num_f64 v[4:5], v[4:5], v[12:13] +; GFX1170-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_4) +; GFX1170-SDAG-NEXT: v_min_num_f64 v[6:7], v[6:7], v[14:15] +; GFX1170-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX1170-GISEL-LABEL: v_minimumnum_v4f64: +; GFX1170-GISEL: ; %bb.0: +; GFX1170-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1170-GISEL-NEXT: v_max_num_f64 v[0:1], v[0:1], v[0:1] +; GFX1170-GISEL-NEXT: v_max_num_f64 v[8:9], v[8:9], v[8:9] +; GFX1170-GISEL-NEXT: v_max_num_f64 v[2:3], v[2:3], v[2:3] +; GFX1170-GISEL-NEXT: v_max_num_f64 v[10:11], v[10:11], v[10:11] +; GFX1170-GISEL-NEXT: v_max_num_f64 v[4:5], v[4:5], v[4:5] +; GFX1170-GISEL-NEXT: v_max_num_f64 v[12:13], v[12:13], v[12:13] +; GFX1170-GISEL-NEXT: v_max_num_f64 v[6:7], v[6:7], v[6:7] +; GFX1170-GISEL-NEXT: v_max_num_f64 v[14:15], v[14:15], v[14:15] +; GFX1170-GISEL-NEXT: v_min_num_f64 v[0:1], v[0:1], v[8:9] +; GFX1170-GISEL-NEXT: v_min_num_f64 v[2:3], v[2:3], v[10:11] +; GFX1170-GISEL-NEXT: v_min_num_f64 v[4:5], v[4:5], v[12:13] +; GFX1170-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_4) +; GFX1170-GISEL-NEXT: v_min_num_f64 v[6:7], v[6:7], v[14:15] +; GFX1170-GISEL-NEXT: s_setpc_b64 s[30:31] +; ; GFX12-SDAG-LABEL: v_minimumnum_v4f64: ; GFX12-SDAG: ; %bb.0: ; GFX12-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0 @@ -8013,6 +9076,15 @@ define <4 x double> @v_minimumnum_v4f64_nnan(<4 x double> %x, <4 x double> %y) { ; GFX11-NEXT: v_min_f64 v[6:7], v[6:7], v[14:15] ; GFX11-NEXT: s_setpc_b64 s[30:31] ; +; GFX1170-LABEL: v_minimumnum_v4f64_nnan: +; GFX1170: ; %bb.0: +; GFX1170-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1170-NEXT: v_min_num_f64 v[0:1], v[0:1], v[8:9] +; GFX1170-NEXT: v_min_num_f64 v[2:3], v[2:3], v[10:11] +; GFX1170-NEXT: v_min_num_f64 v[4:5], v[4:5], v[12:13] +; GFX1170-NEXT: v_min_num_f64 v[6:7], v[6:7], v[14:15] +; GFX1170-NEXT: s_setpc_b64 s[30:31] +; ; GFX12-LABEL: v_minimumnum_v4f64_nnan: ; GFX12: ; %bb.0: ; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 @@ -8078,6 +9150,42 @@ define half @v_minimumnum_f16_no_ieee(half %x, half %y) #0 { ; GFX11-FAKE16-NEXT: v_min_f16_e32 v0, v0, v1 ; GFX11-FAKE16-NEXT: s_setpc_b64 s[30:31] ; +; GFX1170-TRUE16-SDAG-LABEL: v_minimumnum_f16_no_ieee: +; GFX1170-TRUE16-SDAG: ; %bb.0: +; GFX1170-TRUE16-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1170-TRUE16-SDAG-NEXT: v_max_num_f16_e32 v0.h, v1.l, v1.l +; GFX1170-TRUE16-SDAG-NEXT: v_max_num_f16_e32 v0.l, v0.l, v0.l +; GFX1170-TRUE16-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX1170-TRUE16-SDAG-NEXT: v_min_num_f16_e32 v0.l, v0.l, v0.h +; GFX1170-TRUE16-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX1170-TRUE16-GISEL-LABEL: v_minimumnum_f16_no_ieee: +; GFX1170-TRUE16-GISEL: ; %bb.0: +; GFX1170-TRUE16-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1170-TRUE16-GISEL-NEXT: v_max_num_f16_e32 v0.l, v0.l, v0.l +; GFX1170-TRUE16-GISEL-NEXT: v_max_num_f16_e32 v0.h, v1.l, v1.l +; GFX1170-TRUE16-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX1170-TRUE16-GISEL-NEXT: v_min_num_f16_e32 v0.l, v0.l, v0.h +; GFX1170-TRUE16-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX1170-FAKE16-SDAG-LABEL: v_minimumnum_f16_no_ieee: +; GFX1170-FAKE16-SDAG: ; %bb.0: +; GFX1170-FAKE16-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1170-FAKE16-SDAG-NEXT: v_max_num_f16_e32 v1, v1, v1 +; GFX1170-FAKE16-SDAG-NEXT: v_max_num_f16_e32 v0, v0, v0 +; GFX1170-FAKE16-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX1170-FAKE16-SDAG-NEXT: v_min_num_f16_e32 v0, v0, v1 +; GFX1170-FAKE16-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX1170-FAKE16-GISEL-LABEL: v_minimumnum_f16_no_ieee: +; GFX1170-FAKE16-GISEL: ; %bb.0: +; GFX1170-FAKE16-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1170-FAKE16-GISEL-NEXT: v_max_num_f16_e32 v0, v0, v0 +; GFX1170-FAKE16-GISEL-NEXT: v_max_num_f16_e32 v1, v1, v1 +; GFX1170-FAKE16-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX1170-FAKE16-GISEL-NEXT: v_min_num_f16_e32 v0, v0, v1 +; GFX1170-FAKE16-GISEL-NEXT: s_setpc_b64 s[30:31] +; ; GFX12-TRUE16-SDAG-LABEL: v_minimumnum_f16_no_ieee: ; GFX12-TRUE16-SDAG: ; %bb.0: ; GFX12-TRUE16-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0 @@ -8182,6 +9290,18 @@ define half @v_minimumnum_f16_nan_no_ieee(half %x, half %y) #0 { ; GFX11-FAKE16-NEXT: v_min_f16_e32 v0, v0, v1 ; GFX11-FAKE16-NEXT: s_setpc_b64 s[30:31] ; +; GFX1170-TRUE16-LABEL: v_minimumnum_f16_nan_no_ieee: +; GFX1170-TRUE16: ; %bb.0: +; GFX1170-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1170-TRUE16-NEXT: v_min_num_f16_e32 v0.l, v0.l, v1.l +; GFX1170-TRUE16-NEXT: s_setpc_b64 s[30:31] +; +; GFX1170-FAKE16-LABEL: v_minimumnum_f16_nan_no_ieee: +; GFX1170-FAKE16: ; %bb.0: +; GFX1170-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1170-FAKE16-NEXT: v_min_num_f16_e32 v0, v0, v1 +; GFX1170-FAKE16-NEXT: s_setpc_b64 s[30:31] +; ; GFX12-TRUE16-LABEL: v_minimumnum_f16_nan_no_ieee: ; GFX12-TRUE16: ; %bb.0: ; GFX12-TRUE16-NEXT: s_wait_loadcnt_dscnt 0x0 @@ -8236,6 +9356,22 @@ define float @v_minimumnum_f32_no_ieee(float %x, float %y) #0 { ; GFX11-NEXT: v_min_f32_e32 v0, v0, v1 ; GFX11-NEXT: s_setpc_b64 s[30:31] ; +; GFX1170-SDAG-LABEL: v_minimumnum_f32_no_ieee: +; GFX1170-SDAG: ; %bb.0: +; GFX1170-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1170-SDAG-NEXT: v_dual_max_num_f32 v1, v1, v1 :: v_dual_max_num_f32 v0, v0, v0 +; GFX1170-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX1170-SDAG-NEXT: v_min_num_f32_e32 v0, v0, v1 +; GFX1170-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX1170-GISEL-LABEL: v_minimumnum_f32_no_ieee: +; GFX1170-GISEL: ; %bb.0: +; GFX1170-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1170-GISEL-NEXT: v_dual_max_num_f32 v0, v0, v0 :: v_dual_max_num_f32 v1, v1, v1 +; GFX1170-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX1170-GISEL-NEXT: v_min_num_f32_e32 v0, v0, v1 +; GFX1170-GISEL-NEXT: s_setpc_b64 s[30:31] +; ; GFX12-SDAG-LABEL: v_minimumnum_f32_no_ieee: ; GFX12-SDAG: ; %bb.0: ; GFX12-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0 @@ -8294,6 +9430,12 @@ define float @v_minimumnum_f32_nnan_no_ieee(float %x, float %y) #0 { ; GFX11-NEXT: v_min_f32_e32 v0, v0, v1 ; GFX11-NEXT: s_setpc_b64 s[30:31] ; +; GFX1170-LABEL: v_minimumnum_f32_nnan_no_ieee: +; GFX1170: ; %bb.0: +; GFX1170-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1170-NEXT: v_min_num_f32_e32 v0, v0, v1 +; GFX1170-NEXT: s_setpc_b64 s[30:31] +; ; GFX12-LABEL: v_minimumnum_f32_nnan_no_ieee: ; GFX12: ; %bb.0: ; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 @@ -8338,6 +9480,24 @@ define double @v_minimumnum_f64_no_ieee(double %x, double %y) #0 { ; GFX11-NEXT: v_min_f64 v[0:1], v[0:1], v[2:3] ; GFX11-NEXT: s_setpc_b64 s[30:31] ; +; GFX1170-SDAG-LABEL: v_minimumnum_f64_no_ieee: +; GFX1170-SDAG: ; %bb.0: +; GFX1170-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1170-SDAG-NEXT: v_max_num_f64 v[2:3], v[2:3], v[2:3] +; GFX1170-SDAG-NEXT: v_max_num_f64 v[0:1], v[0:1], v[0:1] +; GFX1170-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX1170-SDAG-NEXT: v_min_num_f64 v[0:1], v[0:1], v[2:3] +; GFX1170-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX1170-GISEL-LABEL: v_minimumnum_f64_no_ieee: +; GFX1170-GISEL: ; %bb.0: +; GFX1170-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1170-GISEL-NEXT: v_max_num_f64 v[0:1], v[0:1], v[0:1] +; GFX1170-GISEL-NEXT: v_max_num_f64 v[2:3], v[2:3], v[2:3] +; GFX1170-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX1170-GISEL-NEXT: v_min_num_f64 v[0:1], v[0:1], v[2:3] +; GFX1170-GISEL-NEXT: s_setpc_b64 s[30:31] +; ; GFX12-SDAG-LABEL: v_minimumnum_f64_no_ieee: ; GFX12-SDAG: ; %bb.0: ; GFX12-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0 @@ -8398,6 +9558,12 @@ define double @v_minimumnum_f64_nnan_no_ieee(double %x, double %y) #0 { ; GFX11-NEXT: v_min_f64 v[0:1], v[0:1], v[2:3] ; GFX11-NEXT: s_setpc_b64 s[30:31] ; +; GFX1170-LABEL: v_minimumnum_f64_nnan_no_ieee: +; GFX1170: ; %bb.0: +; GFX1170-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1170-NEXT: v_min_num_f64 v[0:1], v[0:1], v[2:3] +; GFX1170-NEXT: s_setpc_b64 s[30:31] +; ; GFX12-LABEL: v_minimumnum_f64_nnan_no_ieee: ; GFX12: ; %bb.0: ; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 @@ -8480,6 +9646,24 @@ define <2 x half> @v_minimumnum_v2f16_no_ieee(<2 x half> %x, <2 x half> %y) #0 { ; GFX11-NEXT: v_pk_min_f16 v0, v0, v1 ; GFX11-NEXT: s_setpc_b64 s[30:31] ; +; GFX1170-SDAG-LABEL: v_minimumnum_v2f16_no_ieee: +; GFX1170-SDAG: ; %bb.0: +; GFX1170-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1170-SDAG-NEXT: v_pk_max_num_f16 v1, v1, v1 +; GFX1170-SDAG-NEXT: v_pk_max_num_f16 v0, v0, v0 +; GFX1170-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX1170-SDAG-NEXT: v_pk_min_num_f16 v0, v0, v1 +; GFX1170-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX1170-GISEL-LABEL: v_minimumnum_v2f16_no_ieee: +; GFX1170-GISEL: ; %bb.0: +; GFX1170-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1170-GISEL-NEXT: v_pk_max_num_f16 v0, v0, v0 +; GFX1170-GISEL-NEXT: v_pk_max_num_f16 v1, v1, v1 +; GFX1170-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX1170-GISEL-NEXT: v_pk_min_num_f16 v0, v0, v1 +; GFX1170-GISEL-NEXT: s_setpc_b64 s[30:31] +; ; GFX12-SDAG-LABEL: v_minimumnum_v2f16_no_ieee: ; GFX12-SDAG: ; %bb.0: ; GFX12-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0 @@ -8578,6 +9762,12 @@ define <2 x half> @v_minimumnum_v2f16_nnan_no_ieee(<2 x half> %x, <2 x half> %y) ; GFX11-NEXT: v_pk_min_f16 v0, v0, v1 ; GFX11-NEXT: s_setpc_b64 s[30:31] ; +; GFX1170-LABEL: v_minimumnum_v2f16_nnan_no_ieee: +; GFX1170: ; %bb.0: +; GFX1170-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1170-NEXT: v_pk_min_num_f16 v0, v0, v1 +; GFX1170-NEXT: s_setpc_b64 s[30:31] +; ; GFX12-LABEL: v_minimumnum_v2f16_nnan_no_ieee: ; GFX12: ; %bb.0: ; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 @@ -8680,6 +9870,13 @@ define <3 x half> @v_minimumnum_v3f16_nnan_no_ieee(<3 x half> %x, <3 x half> %y) ; GFX11-NEXT: v_pk_min_f16 v1, v1, v3 ; GFX11-NEXT: s_setpc_b64 s[30:31] ; +; GFX1170-LABEL: v_minimumnum_v3f16_nnan_no_ieee: +; GFX1170: ; %bb.0: +; GFX1170-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1170-NEXT: v_pk_min_num_f16 v0, v0, v2 +; GFX1170-NEXT: v_pk_min_num_f16 v1, v1, v3 +; GFX1170-NEXT: s_setpc_b64 s[30:31] +; ; GFX12-LABEL: v_minimumnum_v3f16_nnan_no_ieee: ; GFX12: ; %bb.0: ; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 @@ -8796,6 +9993,13 @@ define <4 x half> @v_minimumnum_v4f16_nnan_no_ieee(<4 x half> %x, <4 x half> %y) ; GFX11-NEXT: v_pk_min_f16 v1, v1, v3 ; GFX11-NEXT: s_setpc_b64 s[30:31] ; +; GFX1170-LABEL: v_minimumnum_v4f16_nnan_no_ieee: +; GFX1170: ; %bb.0: +; GFX1170-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1170-NEXT: v_pk_min_num_f16 v0, v0, v2 +; GFX1170-NEXT: v_pk_min_num_f16 v1, v1, v3 +; GFX1170-NEXT: s_setpc_b64 s[30:31] +; ; GFX12-LABEL: v_minimumnum_v4f16_nnan_no_ieee: ; GFX12: ; %bb.0: ; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 diff --git a/llvm/test/CodeGen/AMDGPU/minmax.ll b/llvm/test/CodeGen/AMDGPU/minmax.ll index 456db08bda06..95592ed50387 100644 --- a/llvm/test/CodeGen/AMDGPU/minmax.ll +++ b/llvm/test/CodeGen/AMDGPU/minmax.ll @@ -3,6 +3,10 @@ ; RUN: llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx1100 -mattr=-real-true16 -amdgpu-enable-delay-alu=0 < %s | FileCheck -check-prefixes=GFX11,SDAG,SDAG-GFX11,SDAG-GFX11-FAKE16 %s ; RUN: llc -global-isel=1 -new-reg-bank-select -mtriple=amdgcn -mcpu=gfx1100 -mattr=+real-true16 -amdgpu-enable-delay-alu=0 < %s | FileCheck -check-prefixes=GFX11,GISEL,GISEL-GFX11,GISEL-GFX11-TRUE16 %s ; RUN: llc -global-isel=1 -new-reg-bank-select -mtriple=amdgcn -mcpu=gfx1100 -mattr=-real-true16 -amdgpu-enable-delay-alu=0 < %s | FileCheck -check-prefixes=GFX11,GISEL,GISEL-GFX11,GISEL-GFX11-FAKE16 %s +; RUN: llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx1170 -mattr=+real-true16 -amdgpu-enable-delay-alu=0 < %s | FileCheck -check-prefixes=GFX1170,SDAG,SDAG-GFX1170,SDAG-GFX1170-TRUE16 %s +; RUN: llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx1170 -mattr=-real-true16 -amdgpu-enable-delay-alu=0 < %s | FileCheck -check-prefixes=GFX1170,SDAG,SDAG-GFX1170,SDAG-GFX1170-FAKE16 %s +; RUN: llc -global-isel=1 -new-reg-bank-select -mtriple=amdgcn -mcpu=gfx1170 -mattr=+real-true16 -amdgpu-enable-delay-alu=0 < %s | FileCheck -check-prefixes=GFX1170,GISEL,GISEL-GFX1170,GISEL-GFX1170-TRUE16 %s +; RUN: llc -global-isel=1 -new-reg-bank-select -mtriple=amdgcn -mcpu=gfx1170 -mattr=-real-true16 -amdgpu-enable-delay-alu=0 < %s | FileCheck -check-prefixes=GFX1170,GISEL,GISEL-GFX1170,GISEL-GFX1170-FAKE16 %s ; RUN: llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx1200 -mattr=+real-true16 -amdgpu-enable-delay-alu=0 < %s | FileCheck -check-prefixes=GFX12,SDAG,SDAG-GFX12,SDAG-GFX12-TRUE16 %s ; RUN: llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx1200 -mattr=-real-true16 -amdgpu-enable-delay-alu=0 < %s | FileCheck -check-prefixes=GFX12,SDAG,SDAG-GFX12,SDAG-GFX12-FAKE16 %s ; RUN: llc -global-isel=1 -new-reg-bank-select -mtriple=amdgcn -mcpu=gfx1200 -mattr=+real-true16 -amdgpu-enable-delay-alu=0 < %s | FileCheck -check-prefixes=GFX12,GISEL,GISEL-GFX12,GISEL-GFX12-TRUE16 %s @@ -19,6 +23,12 @@ define i32 @test_minmax_i32(i32 %a, i32 %b, i32 %c) { ; GFX11-NEXT: v_maxmin_i32 v0, v0, v1, v2 ; GFX11-NEXT: s_setpc_b64 s[30:31] ; +; GFX1170-LABEL: test_minmax_i32: +; GFX1170: ; %bb.0: +; GFX1170-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1170-NEXT: v_maxmin_i32 v0, v0, v1, v2 +; GFX1170-NEXT: s_setpc_b64 s[30:31] +; ; GFX12-LABEL: test_minmax_i32: ; GFX12: ; %bb.0: ; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 @@ -61,6 +71,26 @@ define amdgpu_ps void @s_test_minmax_i32(i32 inreg %a, i32 inreg %b, i32 inreg % ; GISEL-GFX11-NEXT: global_store_b32 v1, v0, s[6:7] ; GISEL-GFX11-NEXT: s_endpgm ; +; SDAG-GFX1170-LABEL: s_test_minmax_i32: +; SDAG-GFX1170: ; %bb.0: +; SDAG-GFX1170-NEXT: s_max_i32 s0, s0, s1 +; SDAG-GFX1170-NEXT: s_mov_b32 s5, s4 +; SDAG-GFX1170-NEXT: s_min_i32 s0, s0, s2 +; SDAG-GFX1170-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, s0 +; SDAG-GFX1170-NEXT: s_mov_b32 s4, s3 +; SDAG-GFX1170-NEXT: global_store_b32 v0, v1, s[4:5] +; SDAG-GFX1170-NEXT: s_endpgm +; +; GISEL-GFX1170-LABEL: s_test_minmax_i32: +; GISEL-GFX1170: ; %bb.0: +; GISEL-GFX1170-NEXT: s_max_i32 s0, s0, s1 +; GISEL-GFX1170-NEXT: s_mov_b32 s6, s3 +; GISEL-GFX1170-NEXT: s_min_i32 s0, s0, s2 +; GISEL-GFX1170-NEXT: s_mov_b32 s7, s4 +; GISEL-GFX1170-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, 0 +; GISEL-GFX1170-NEXT: global_store_b32 v1, v0, s[6:7] +; GISEL-GFX1170-NEXT: s_endpgm +; ; SDAG-GFX12-LABEL: s_test_minmax_i32: ; SDAG-GFX12: ; %bb.0: ; SDAG-GFX12-NEXT: s_max_i32 s0, s0, s1 @@ -115,6 +145,12 @@ define i32 @test_minmax_commuted_i32(i32 %a, i32 %b, i32 %c) { ; GFX11-NEXT: v_maxmin_i32 v0, v0, v1, v2 ; GFX11-NEXT: s_setpc_b64 s[30:31] ; +; GFX1170-LABEL: test_minmax_commuted_i32: +; GFX1170: ; %bb.0: +; GFX1170-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1170-NEXT: v_maxmin_i32 v0, v0, v1, v2 +; GFX1170-NEXT: s_setpc_b64 s[30:31] +; ; GFX12-LABEL: test_minmax_commuted_i32: ; GFX12: ; %bb.0: ; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 @@ -143,6 +179,12 @@ define i32 @test_maxmin_i32(i32 %a, i32 %b, i32 %c) { ; GFX11-NEXT: v_minmax_i32 v0, v0, v1, v2 ; GFX11-NEXT: s_setpc_b64 s[30:31] ; +; GFX1170-LABEL: test_maxmin_i32: +; GFX1170: ; %bb.0: +; GFX1170-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1170-NEXT: v_minmax_i32 v0, v0, v1, v2 +; GFX1170-NEXT: s_setpc_b64 s[30:31] +; ; GFX12-LABEL: test_maxmin_i32: ; GFX12: ; %bb.0: ; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 @@ -171,6 +213,12 @@ define i32 @test_maxmin_commuted_i32(i32 %a, i32 %b, i32 %c) { ; GFX11-NEXT: v_minmax_i32 v0, v0, v1, v2 ; GFX11-NEXT: s_setpc_b64 s[30:31] ; +; GFX1170-LABEL: test_maxmin_commuted_i32: +; GFX1170: ; %bb.0: +; GFX1170-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1170-NEXT: v_minmax_i32 v0, v0, v1, v2 +; GFX1170-NEXT: s_setpc_b64 s[30:31] +; ; GFX12-LABEL: test_maxmin_commuted_i32: ; GFX12: ; %bb.0: ; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 @@ -200,6 +248,13 @@ define void @test_smed3_i32(ptr addrspace(1) %arg, i32 %x, i32 %y, i32 %z) { ; GFX11-NEXT: global_store_b32 v[0:1], v2, off ; GFX11-NEXT: s_setpc_b64 s[30:31] ; +; GFX1170-LABEL: test_smed3_i32: +; GFX1170: ; %bb.0: +; GFX1170-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1170-NEXT: v_med3_i32 v2, v2, v3, v4 +; GFX1170-NEXT: global_store_b32 v[0:1], v2, off +; GFX1170-NEXT: s_setpc_b64 s[30:31] +; ; GFX12-LABEL: test_smed3_i32: ; GFX12: ; %bb.0: ; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 @@ -233,6 +288,12 @@ define i32 @test_minmax_u32(i32 %a, i32 %b, i32 %c) { ; GFX11-NEXT: v_maxmin_u32 v0, v0, v1, v2 ; GFX11-NEXT: s_setpc_b64 s[30:31] ; +; GFX1170-LABEL: test_minmax_u32: +; GFX1170: ; %bb.0: +; GFX1170-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1170-NEXT: v_maxmin_u32 v0, v0, v1, v2 +; GFX1170-NEXT: s_setpc_b64 s[30:31] +; ; GFX12-LABEL: test_minmax_u32: ; GFX12: ; %bb.0: ; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 @@ -275,6 +336,26 @@ define amdgpu_ps void @s_test_minmax_u32(i32 inreg %a, i32 inreg %b, i32 inreg % ; GISEL-GFX11-NEXT: global_store_b32 v1, v0, s[6:7] ; GISEL-GFX11-NEXT: s_endpgm ; +; SDAG-GFX1170-LABEL: s_test_minmax_u32: +; SDAG-GFX1170: ; %bb.0: +; SDAG-GFX1170-NEXT: s_max_u32 s0, s0, s1 +; SDAG-GFX1170-NEXT: s_mov_b32 s5, s4 +; SDAG-GFX1170-NEXT: s_min_u32 s0, s0, s2 +; SDAG-GFX1170-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, s0 +; SDAG-GFX1170-NEXT: s_mov_b32 s4, s3 +; SDAG-GFX1170-NEXT: global_store_b32 v0, v1, s[4:5] +; SDAG-GFX1170-NEXT: s_endpgm +; +; GISEL-GFX1170-LABEL: s_test_minmax_u32: +; GISEL-GFX1170: ; %bb.0: +; GISEL-GFX1170-NEXT: s_max_u32 s0, s0, s1 +; GISEL-GFX1170-NEXT: s_mov_b32 s6, s3 +; GISEL-GFX1170-NEXT: s_min_u32 s0, s0, s2 +; GISEL-GFX1170-NEXT: s_mov_b32 s7, s4 +; GISEL-GFX1170-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, 0 +; GISEL-GFX1170-NEXT: global_store_b32 v1, v0, s[6:7] +; GISEL-GFX1170-NEXT: s_endpgm +; ; SDAG-GFX12-LABEL: s_test_minmax_u32: ; SDAG-GFX12: ; %bb.0: ; SDAG-GFX12-NEXT: s_max_u32 s0, s0, s1 @@ -329,6 +410,12 @@ define i32 @test_minmax_commuted_u32(i32 %a, i32 %b, i32 %c) { ; GFX11-NEXT: v_maxmin_u32 v0, v0, v1, v2 ; GFX11-NEXT: s_setpc_b64 s[30:31] ; +; GFX1170-LABEL: test_minmax_commuted_u32: +; GFX1170: ; %bb.0: +; GFX1170-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1170-NEXT: v_maxmin_u32 v0, v0, v1, v2 +; GFX1170-NEXT: s_setpc_b64 s[30:31] +; ; GFX12-LABEL: test_minmax_commuted_u32: ; GFX12: ; %bb.0: ; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 @@ -357,6 +444,12 @@ define i32 @test_maxmin_u32(i32 %a, i32 %b, i32 %c) { ; GFX11-NEXT: v_minmax_u32 v0, v0, v1, v2 ; GFX11-NEXT: s_setpc_b64 s[30:31] ; +; GFX1170-LABEL: test_maxmin_u32: +; GFX1170: ; %bb.0: +; GFX1170-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1170-NEXT: v_minmax_u32 v0, v0, v1, v2 +; GFX1170-NEXT: s_setpc_b64 s[30:31] +; ; GFX12-LABEL: test_maxmin_u32: ; GFX12: ; %bb.0: ; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 @@ -385,6 +478,12 @@ define i32 @test_maxmin_commuted_u32(i32 %a, i32 %b, i32 %c) { ; GFX11-NEXT: v_minmax_u32 v0, v0, v1, v2 ; GFX11-NEXT: s_setpc_b64 s[30:31] ; +; GFX1170-LABEL: test_maxmin_commuted_u32: +; GFX1170: ; %bb.0: +; GFX1170-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1170-NEXT: v_minmax_u32 v0, v0, v1, v2 +; GFX1170-NEXT: s_setpc_b64 s[30:31] +; ; GFX12-LABEL: test_maxmin_commuted_u32: ; GFX12: ; %bb.0: ; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 @@ -414,6 +513,13 @@ define void @test_umed3_i32(ptr addrspace(1) %arg, i32 %x, i32 %y, i32 %z) { ; GFX11-NEXT: global_store_b32 v[0:1], v2, off ; GFX11-NEXT: s_setpc_b64 s[30:31] ; +; GFX1170-LABEL: test_umed3_i32: +; GFX1170: ; %bb.0: +; GFX1170-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1170-NEXT: v_med3_u32 v2, v2, v3, v4 +; GFX1170-NEXT: global_store_b32 v[0:1], v2, off +; GFX1170-NEXT: s_setpc_b64 s[30:31] +; ; GFX12-LABEL: test_umed3_i32: ; GFX12: ; %bb.0: ; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 @@ -457,6 +563,22 @@ define float @test_minmax_f32_ieee_true(float %a, float %b, float %c) { ; GISEL-GFX11-NEXT: v_maxmin_f32 v0, v0, v1, v2 ; GISEL-GFX11-NEXT: s_setpc_b64 s[30:31] ; +; SDAG-GFX1170-LABEL: test_minmax_f32_ieee_true: +; SDAG-GFX1170: ; %bb.0: +; SDAG-GFX1170-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; SDAG-GFX1170-NEXT: v_dual_max_num_f32 v1, v1, v1 :: v_dual_max_num_f32 v0, v0, v0 +; SDAG-GFX1170-NEXT: v_max_num_f32_e32 v2, v2, v2 +; SDAG-GFX1170-NEXT: v_maxmin_num_f32 v0, v0, v1, v2 +; SDAG-GFX1170-NEXT: s_setpc_b64 s[30:31] +; +; GISEL-GFX1170-LABEL: test_minmax_f32_ieee_true: +; GISEL-GFX1170: ; %bb.0: +; GISEL-GFX1170-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GISEL-GFX1170-NEXT: v_dual_max_num_f32 v0, v0, v0 :: v_dual_max_num_f32 v1, v1, v1 +; GISEL-GFX1170-NEXT: v_max_num_f32_e32 v2, v2, v2 +; GISEL-GFX1170-NEXT: v_maxmin_num_f32 v0, v0, v1, v2 +; GISEL-GFX1170-NEXT: s_setpc_b64 s[30:31] +; ; SDAG-GFX12-LABEL: test_minmax_f32_ieee_true: ; SDAG-GFX12: ; %bb.0: ; SDAG-GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 @@ -522,6 +644,26 @@ define amdgpu_ps void @s_test_minmax_f32_ieee_false(float inreg %a, float inreg ; GISEL-GFX11-NEXT: global_store_b32 v1, v0, s[6:7] ; GISEL-GFX11-NEXT: s_endpgm ; +; SDAG-GFX1170-LABEL: s_test_minmax_f32_ieee_false: +; SDAG-GFX1170: ; %bb.0: +; SDAG-GFX1170-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, 0 +; SDAG-GFX1170-NEXT: s_mov_b32 s5, s4 +; SDAG-GFX1170-NEXT: s_mov_b32 s4, s3 +; SDAG-GFX1170-NEXT: v_maxmin_num_f32 v0, s0, s1, v0 +; SDAG-GFX1170-NEXT: global_store_b32 v1, v0, s[4:5] +; SDAG-GFX1170-NEXT: s_endpgm +; +; GISEL-GFX1170-LABEL: s_test_minmax_f32_ieee_false: +; GISEL-GFX1170: ; %bb.0: +; GISEL-GFX1170-NEXT: s_max_f32 s0, s0, s1 +; GISEL-GFX1170-NEXT: s_mov_b32 s6, s3 +; GISEL-GFX1170-NEXT: s_mov_b32 s7, s4 +; GISEL-GFX1170-NEXT: v_mov_b32_e32 v1, 0 +; GISEL-GFX1170-NEXT: s_min_f32 s0, s0, s2 +; GISEL-GFX1170-NEXT: v_mov_b32_e32 v0, s0 +; GISEL-GFX1170-NEXT: global_store_b32 v1, v0, s[6:7] +; GISEL-GFX1170-NEXT: s_endpgm +; ; SDAG-GFX12-LABEL: s_test_minmax_f32_ieee_false: ; SDAG-GFX12: ; %bb.0: ; SDAG-GFX12-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, 0 @@ -575,6 +717,11 @@ define amdgpu_ps float @test_minmax_commuted_f32_ieee_false(float %a, float %b, ; GFX11-NEXT: v_maxmin_f32 v0, v0, v1, v2 ; GFX11-NEXT: ; return to shader part epilog ; +; GFX1170-LABEL: test_minmax_commuted_f32_ieee_false: +; GFX1170: ; %bb.0: +; GFX1170-NEXT: v_maxmin_num_f32 v0, v0, v1, v2 +; GFX1170-NEXT: ; return to shader part epilog +; ; GFX12-LABEL: test_minmax_commuted_f32_ieee_false: ; GFX12: ; %bb.0: ; GFX12-NEXT: v_maxmin_num_f32 v0, v0, v1, v2 @@ -607,6 +754,22 @@ define float @test_maxmin_f32_ieee_true(float %a, float %b, float %c) { ; GISEL-GFX11-NEXT: v_minmax_f32 v0, v0, v1, v2 ; GISEL-GFX11-NEXT: s_setpc_b64 s[30:31] ; +; SDAG-GFX1170-LABEL: test_maxmin_f32_ieee_true: +; SDAG-GFX1170: ; %bb.0: +; SDAG-GFX1170-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; SDAG-GFX1170-NEXT: v_dual_max_num_f32 v1, v1, v1 :: v_dual_max_num_f32 v0, v0, v0 +; SDAG-GFX1170-NEXT: v_max_num_f32_e32 v2, v2, v2 +; SDAG-GFX1170-NEXT: v_minmax_num_f32 v0, v0, v1, v2 +; SDAG-GFX1170-NEXT: s_setpc_b64 s[30:31] +; +; GISEL-GFX1170-LABEL: test_maxmin_f32_ieee_true: +; GISEL-GFX1170: ; %bb.0: +; GISEL-GFX1170-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GISEL-GFX1170-NEXT: v_dual_max_num_f32 v0, v0, v0 :: v_dual_max_num_f32 v1, v1, v1 +; GISEL-GFX1170-NEXT: v_max_num_f32_e32 v2, v2, v2 +; GISEL-GFX1170-NEXT: v_minmax_num_f32 v0, v0, v1, v2 +; GISEL-GFX1170-NEXT: s_setpc_b64 s[30:31] +; ; SDAG-GFX12-LABEL: test_maxmin_f32_ieee_true: ; SDAG-GFX12: ; %bb.0: ; SDAG-GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 @@ -659,6 +822,11 @@ define amdgpu_ps float @test_maxmin_commuted_f32_ieee_false(float %a, float %b, ; GFX11-NEXT: v_minmax_f32 v0, v0, v1, v2 ; GFX11-NEXT: ; return to shader part epilog ; +; GFX1170-LABEL: test_maxmin_commuted_f32_ieee_false: +; GFX1170: ; %bb.0: +; GFX1170-NEXT: v_minmax_num_f32 v0, v0, v1, v2 +; GFX1170-NEXT: ; return to shader part epilog +; ; GFX12-LABEL: test_maxmin_commuted_f32_ieee_false: ; GFX12: ; %bb.0: ; GFX12-NEXT: v_minmax_num_f32 v0, v0, v1, v2 @@ -682,6 +850,13 @@ define void @test_med3_f32(ptr addrspace(1) %arg, float %x, float %y, float %z) ; GFX11-NEXT: global_store_b32 v[0:1], v2, off ; GFX11-NEXT: s_setpc_b64 s[30:31] ; +; GFX1170-LABEL: test_med3_f32: +; GFX1170: ; %bb.0: +; GFX1170-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1170-NEXT: v_med3_num_f32 v2, v2, v3, v4 +; GFX1170-NEXT: global_store_b32 v[0:1], v2, off +; GFX1170-NEXT: s_setpc_b64 s[30:31] +; ; GFX12-LABEL: test_med3_f32: ; GFX12: ; %bb.0: ; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 @@ -716,6 +891,13 @@ define void @test_med3_minimumnum_maximumnum_f32(ptr addrspace(1) %arg, float %x ; GFX11-NEXT: global_store_b32 v[0:1], v2, off ; GFX11-NEXT: s_setpc_b64 s[30:31] ; +; GFX1170-LABEL: test_med3_minimumnum_maximumnum_f32: +; GFX1170: ; %bb.0: +; GFX1170-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1170-NEXT: v_med3_num_f32 v2, v2, v3, v4 +; GFX1170-NEXT: global_store_b32 v[0:1], v2, off +; GFX1170-NEXT: s_setpc_b64 s[30:31] +; ; GFX12-LABEL: test_med3_minimumnum_maximumnum_f32: ; GFX12: ; %bb.0: ; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 @@ -763,6 +945,26 @@ define amdgpu_ps half @test_minmax_f16_ieee_false(half %a, half %b, half %c) { ; GISEL-GFX11-FAKE16-NEXT: v_maxmin_f16 v0, v0, v1, v2 ; GISEL-GFX11-FAKE16-NEXT: ; return to shader part epilog ; +; SDAG-GFX1170-TRUE16-LABEL: test_minmax_f16_ieee_false: +; SDAG-GFX1170-TRUE16: ; %bb.0: +; SDAG-GFX1170-TRUE16-NEXT: v_maxmin_num_f16 v0.l, v0.l, v1.l, v2.l +; SDAG-GFX1170-TRUE16-NEXT: ; return to shader part epilog +; +; SDAG-GFX1170-FAKE16-LABEL: test_minmax_f16_ieee_false: +; SDAG-GFX1170-FAKE16: ; %bb.0: +; SDAG-GFX1170-FAKE16-NEXT: v_maxmin_num_f16 v0, v0, v1, v2 +; SDAG-GFX1170-FAKE16-NEXT: ; return to shader part epilog +; +; GISEL-GFX1170-TRUE16-LABEL: test_minmax_f16_ieee_false: +; GISEL-GFX1170-TRUE16: ; %bb.0: +; GISEL-GFX1170-TRUE16-NEXT: v_maxmin_num_f16 v0.l, v0.l, v1.l, v2.l +; GISEL-GFX1170-TRUE16-NEXT: ; return to shader part epilog +; +; GISEL-GFX1170-FAKE16-LABEL: test_minmax_f16_ieee_false: +; GISEL-GFX1170-FAKE16: ; %bb.0: +; GISEL-GFX1170-FAKE16-NEXT: v_maxmin_num_f16 v0, v0, v1, v2 +; GISEL-GFX1170-FAKE16-NEXT: ; return to shader part epilog +; ; SDAG-GFX12-TRUE16-LABEL: test_minmax_f16_ieee_false: ; SDAG-GFX12-TRUE16: ; %bb.0: ; SDAG-GFX12-TRUE16-NEXT: v_maxmin_num_f16 v0.l, v0.l, v1.l, v2.l @@ -850,6 +1052,47 @@ define amdgpu_ps void @s_test_minmax_f16_ieee_false(half inreg %a, half inreg %b ; GISEL-GFX11-FAKE16-NEXT: global_store_b16 v1, v0, s[6:7] ; GISEL-GFX11-FAKE16-NEXT: s_endpgm ; +; SDAG-GFX1170-TRUE16-LABEL: s_test_minmax_f16_ieee_false: +; SDAG-GFX1170-TRUE16: ; %bb.0: +; SDAG-GFX1170-TRUE16-NEXT: v_mov_b16_e32 v0.l, s2 +; SDAG-GFX1170-TRUE16-NEXT: v_mov_b32_e32 v1, 0 +; SDAG-GFX1170-TRUE16-NEXT: s_mov_b32 s5, s4 +; SDAG-GFX1170-TRUE16-NEXT: s_mov_b32 s4, s3 +; SDAG-GFX1170-TRUE16-NEXT: v_maxmin_num_f16 v0.l, s0, s1, v0.l +; SDAG-GFX1170-TRUE16-NEXT: global_store_b16 v1, v0, s[4:5] +; SDAG-GFX1170-TRUE16-NEXT: s_endpgm +; +; SDAG-GFX1170-FAKE16-LABEL: s_test_minmax_f16_ieee_false: +; SDAG-GFX1170-FAKE16: ; %bb.0: +; SDAG-GFX1170-FAKE16-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, 0 +; SDAG-GFX1170-FAKE16-NEXT: s_mov_b32 s5, s4 +; SDAG-GFX1170-FAKE16-NEXT: s_mov_b32 s4, s3 +; SDAG-GFX1170-FAKE16-NEXT: v_maxmin_num_f16 v0, s0, s1, v0 +; SDAG-GFX1170-FAKE16-NEXT: global_store_b16 v1, v0, s[4:5] +; SDAG-GFX1170-FAKE16-NEXT: s_endpgm +; +; GISEL-GFX1170-TRUE16-LABEL: s_test_minmax_f16_ieee_false: +; GISEL-GFX1170-TRUE16: ; %bb.0: +; GISEL-GFX1170-TRUE16-NEXT: s_max_f16 s0, s0, s1 +; GISEL-GFX1170-TRUE16-NEXT: v_mov_b32_e32 v1, 0 +; GISEL-GFX1170-TRUE16-NEXT: s_mov_b32 s6, s3 +; GISEL-GFX1170-TRUE16-NEXT: s_mov_b32 s7, s4 +; GISEL-GFX1170-TRUE16-NEXT: s_min_f16 s0, s0, s2 +; GISEL-GFX1170-TRUE16-NEXT: v_mov_b16_e32 v0.l, s0 +; GISEL-GFX1170-TRUE16-NEXT: global_store_b16 v1, v0, s[6:7] +; GISEL-GFX1170-TRUE16-NEXT: s_endpgm +; +; GISEL-GFX1170-FAKE16-LABEL: s_test_minmax_f16_ieee_false: +; GISEL-GFX1170-FAKE16: ; %bb.0: +; GISEL-GFX1170-FAKE16-NEXT: s_max_f16 s0, s0, s1 +; GISEL-GFX1170-FAKE16-NEXT: s_mov_b32 s6, s3 +; GISEL-GFX1170-FAKE16-NEXT: s_mov_b32 s7, s4 +; GISEL-GFX1170-FAKE16-NEXT: v_mov_b32_e32 v1, 0 +; GISEL-GFX1170-FAKE16-NEXT: s_min_f16 s0, s0, s2 +; GISEL-GFX1170-FAKE16-NEXT: v_mov_b32_e32 v0, s0 +; GISEL-GFX1170-FAKE16-NEXT: global_store_b16 v1, v0, s[6:7] +; GISEL-GFX1170-FAKE16-NEXT: s_endpgm +; ; SDAG-GFX12-TRUE16-LABEL: s_test_minmax_f16_ieee_false: ; SDAG-GFX12-TRUE16: ; %bb.0: ; SDAG-GFX12-TRUE16-NEXT: v_mov_b16_e32 v0.l, s2 @@ -978,6 +1221,42 @@ define half @test_minmax_commuted_f16_ieee_true(half %a, half %b, half %c) { ; GISEL-GFX11-FAKE16-NEXT: v_maxmin_f16 v0, v0, v1, v2 ; GISEL-GFX11-FAKE16-NEXT: s_setpc_b64 s[30:31] ; +; SDAG-GFX1170-TRUE16-LABEL: test_minmax_commuted_f16_ieee_true: +; SDAG-GFX1170-TRUE16: ; %bb.0: +; SDAG-GFX1170-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; SDAG-GFX1170-TRUE16-NEXT: v_max_num_f16_e32 v0.h, v1.l, v1.l +; SDAG-GFX1170-TRUE16-NEXT: v_max_num_f16_e32 v0.l, v0.l, v0.l +; SDAG-GFX1170-TRUE16-NEXT: v_max_num_f16_e32 v1.l, v2.l, v2.l +; SDAG-GFX1170-TRUE16-NEXT: v_maxmin_num_f16 v0.l, v0.l, v0.h, v1.l +; SDAG-GFX1170-TRUE16-NEXT: s_setpc_b64 s[30:31] +; +; SDAG-GFX1170-FAKE16-LABEL: test_minmax_commuted_f16_ieee_true: +; SDAG-GFX1170-FAKE16: ; %bb.0: +; SDAG-GFX1170-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; SDAG-GFX1170-FAKE16-NEXT: v_max_num_f16_e32 v1, v1, v1 +; SDAG-GFX1170-FAKE16-NEXT: v_max_num_f16_e32 v0, v0, v0 +; SDAG-GFX1170-FAKE16-NEXT: v_max_num_f16_e32 v2, v2, v2 +; SDAG-GFX1170-FAKE16-NEXT: v_maxmin_num_f16 v0, v0, v1, v2 +; SDAG-GFX1170-FAKE16-NEXT: s_setpc_b64 s[30:31] +; +; GISEL-GFX1170-TRUE16-LABEL: test_minmax_commuted_f16_ieee_true: +; GISEL-GFX1170-TRUE16: ; %bb.0: +; GISEL-GFX1170-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GISEL-GFX1170-TRUE16-NEXT: v_max_num_f16_e32 v0.l, v0.l, v0.l +; GISEL-GFX1170-TRUE16-NEXT: v_max_num_f16_e32 v0.h, v1.l, v1.l +; GISEL-GFX1170-TRUE16-NEXT: v_max_num_f16_e32 v1.l, v2.l, v2.l +; GISEL-GFX1170-TRUE16-NEXT: v_maxmin_num_f16 v0.l, v0.l, v0.h, v1.l +; GISEL-GFX1170-TRUE16-NEXT: s_setpc_b64 s[30:31] +; +; GISEL-GFX1170-FAKE16-LABEL: test_minmax_commuted_f16_ieee_true: +; GISEL-GFX1170-FAKE16: ; %bb.0: +; GISEL-GFX1170-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GISEL-GFX1170-FAKE16-NEXT: v_max_num_f16_e32 v0, v0, v0 +; GISEL-GFX1170-FAKE16-NEXT: v_max_num_f16_e32 v1, v1, v1 +; GISEL-GFX1170-FAKE16-NEXT: v_max_num_f16_e32 v2, v2, v2 +; GISEL-GFX1170-FAKE16-NEXT: v_maxmin_num_f16 v0, v0, v1, v2 +; GISEL-GFX1170-FAKE16-NEXT: s_setpc_b64 s[30:31] +; ; SDAG-GFX12-TRUE16-LABEL: test_minmax_commuted_f16_ieee_true: ; SDAG-GFX12-TRUE16: ; %bb.0: ; SDAG-GFX12-TRUE16-NEXT: s_wait_loadcnt_dscnt 0x0 @@ -1095,6 +1374,26 @@ define amdgpu_ps half @test_maxmin_f16_ieee_false(half %a, half %b, half %c) { ; GISEL-GFX11-FAKE16-NEXT: v_minmax_f16 v0, v0, v1, v2 ; GISEL-GFX11-FAKE16-NEXT: ; return to shader part epilog ; +; SDAG-GFX1170-TRUE16-LABEL: test_maxmin_f16_ieee_false: +; SDAG-GFX1170-TRUE16: ; %bb.0: +; SDAG-GFX1170-TRUE16-NEXT: v_minmax_num_f16 v0.l, v0.l, v1.l, v2.l +; SDAG-GFX1170-TRUE16-NEXT: ; return to shader part epilog +; +; SDAG-GFX1170-FAKE16-LABEL: test_maxmin_f16_ieee_false: +; SDAG-GFX1170-FAKE16: ; %bb.0: +; SDAG-GFX1170-FAKE16-NEXT: v_minmax_num_f16 v0, v0, v1, v2 +; SDAG-GFX1170-FAKE16-NEXT: ; return to shader part epilog +; +; GISEL-GFX1170-TRUE16-LABEL: test_maxmin_f16_ieee_false: +; GISEL-GFX1170-TRUE16: ; %bb.0: +; GISEL-GFX1170-TRUE16-NEXT: v_minmax_num_f16 v0.l, v0.l, v1.l, v2.l +; GISEL-GFX1170-TRUE16-NEXT: ; return to shader part epilog +; +; GISEL-GFX1170-FAKE16-LABEL: test_maxmin_f16_ieee_false: +; GISEL-GFX1170-FAKE16: ; %bb.0: +; GISEL-GFX1170-FAKE16-NEXT: v_minmax_num_f16 v0, v0, v1, v2 +; GISEL-GFX1170-FAKE16-NEXT: ; return to shader part epilog +; ; SDAG-GFX12-TRUE16-LABEL: test_maxmin_f16_ieee_false: ; SDAG-GFX12-TRUE16: ; %bb.0: ; SDAG-GFX12-TRUE16-NEXT: v_minmax_num_f16 v0.l, v0.l, v1.l, v2.l @@ -1180,6 +1479,42 @@ define half @test_maxmin_commuted_f16_ieee_true(half %a, half %b, half %c) { ; GISEL-GFX11-FAKE16-NEXT: v_minmax_f16 v0, v0, v1, v2 ; GISEL-GFX11-FAKE16-NEXT: s_setpc_b64 s[30:31] ; +; SDAG-GFX1170-TRUE16-LABEL: test_maxmin_commuted_f16_ieee_true: +; SDAG-GFX1170-TRUE16: ; %bb.0: +; SDAG-GFX1170-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; SDAG-GFX1170-TRUE16-NEXT: v_max_num_f16_e32 v0.h, v1.l, v1.l +; SDAG-GFX1170-TRUE16-NEXT: v_max_num_f16_e32 v0.l, v0.l, v0.l +; SDAG-GFX1170-TRUE16-NEXT: v_max_num_f16_e32 v1.l, v2.l, v2.l +; SDAG-GFX1170-TRUE16-NEXT: v_minmax_num_f16 v0.l, v0.l, v0.h, v1.l +; SDAG-GFX1170-TRUE16-NEXT: s_setpc_b64 s[30:31] +; +; SDAG-GFX1170-FAKE16-LABEL: test_maxmin_commuted_f16_ieee_true: +; SDAG-GFX1170-FAKE16: ; %bb.0: +; SDAG-GFX1170-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; SDAG-GFX1170-FAKE16-NEXT: v_max_num_f16_e32 v1, v1, v1 +; SDAG-GFX1170-FAKE16-NEXT: v_max_num_f16_e32 v0, v0, v0 +; SDAG-GFX1170-FAKE16-NEXT: v_max_num_f16_e32 v2, v2, v2 +; SDAG-GFX1170-FAKE16-NEXT: v_minmax_num_f16 v0, v0, v1, v2 +; SDAG-GFX1170-FAKE16-NEXT: s_setpc_b64 s[30:31] +; +; GISEL-GFX1170-TRUE16-LABEL: test_maxmin_commuted_f16_ieee_true: +; GISEL-GFX1170-TRUE16: ; %bb.0: +; GISEL-GFX1170-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GISEL-GFX1170-TRUE16-NEXT: v_max_num_f16_e32 v0.l, v0.l, v0.l +; GISEL-GFX1170-TRUE16-NEXT: v_max_num_f16_e32 v0.h, v1.l, v1.l +; GISEL-GFX1170-TRUE16-NEXT: v_max_num_f16_e32 v1.l, v2.l, v2.l +; GISEL-GFX1170-TRUE16-NEXT: v_minmax_num_f16 v0.l, v0.l, v0.h, v1.l +; GISEL-GFX1170-TRUE16-NEXT: s_setpc_b64 s[30:31] +; +; GISEL-GFX1170-FAKE16-LABEL: test_maxmin_commuted_f16_ieee_true: +; GISEL-GFX1170-FAKE16: ; %bb.0: +; GISEL-GFX1170-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GISEL-GFX1170-FAKE16-NEXT: v_max_num_f16_e32 v0, v0, v0 +; GISEL-GFX1170-FAKE16-NEXT: v_max_num_f16_e32 v1, v1, v1 +; GISEL-GFX1170-FAKE16-NEXT: v_max_num_f16_e32 v2, v2, v2 +; GISEL-GFX1170-FAKE16-NEXT: v_minmax_num_f16 v0, v0, v1, v2 +; GISEL-GFX1170-FAKE16-NEXT: s_setpc_b64 s[30:31] +; ; SDAG-GFX12-TRUE16-LABEL: test_maxmin_commuted_f16_ieee_true: ; SDAG-GFX12-TRUE16: ; %bb.0: ; SDAG-GFX12-TRUE16-NEXT: s_wait_loadcnt_dscnt 0x0 @@ -1305,6 +1640,34 @@ define void @test_med3_f16(ptr addrspace(1) %arg, half %x, half %y, half %z) #0 ; GISEL-GFX11-FAKE16-NEXT: global_store_b16 v[0:1], v2, off ; GISEL-GFX11-FAKE16-NEXT: s_setpc_b64 s[30:31] ; +; SDAG-GFX1170-TRUE16-LABEL: test_med3_f16: +; SDAG-GFX1170-TRUE16: ; %bb.0: +; SDAG-GFX1170-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; SDAG-GFX1170-TRUE16-NEXT: v_med3_num_f16 v2.l, v2.l, v3.l, v4.l +; SDAG-GFX1170-TRUE16-NEXT: global_store_b16 v[0:1], v2, off +; SDAG-GFX1170-TRUE16-NEXT: s_setpc_b64 s[30:31] +; +; SDAG-GFX1170-FAKE16-LABEL: test_med3_f16: +; SDAG-GFX1170-FAKE16: ; %bb.0: +; SDAG-GFX1170-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; SDAG-GFX1170-FAKE16-NEXT: v_med3_num_f16 v2, v2, v3, v4 +; SDAG-GFX1170-FAKE16-NEXT: global_store_b16 v[0:1], v2, off +; SDAG-GFX1170-FAKE16-NEXT: s_setpc_b64 s[30:31] +; +; GISEL-GFX1170-TRUE16-LABEL: test_med3_f16: +; GISEL-GFX1170-TRUE16: ; %bb.0: +; GISEL-GFX1170-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GISEL-GFX1170-TRUE16-NEXT: v_med3_num_f16 v2.l, v2.l, v3.l, v4.l +; GISEL-GFX1170-TRUE16-NEXT: global_store_b16 v[0:1], v2, off +; GISEL-GFX1170-TRUE16-NEXT: s_setpc_b64 s[30:31] +; +; GISEL-GFX1170-FAKE16-LABEL: test_med3_f16: +; GISEL-GFX1170-FAKE16: ; %bb.0: +; GISEL-GFX1170-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GISEL-GFX1170-FAKE16-NEXT: v_med3_num_f16 v2, v2, v3, v4 +; GISEL-GFX1170-FAKE16-NEXT: global_store_b16 v[0:1], v2, off +; GISEL-GFX1170-FAKE16-NEXT: s_setpc_b64 s[30:31] +; ; SDAG-GFX12-TRUE16-LABEL: test_med3_f16: ; SDAG-GFX12-TRUE16: ; %bb.0: ; SDAG-GFX12-TRUE16-NEXT: s_wait_loadcnt_dscnt 0x0 diff --git a/llvm/test/CodeGen/AMDGPU/select-flags-to-fmin-fmax.ll b/llvm/test/CodeGen/AMDGPU/select-flags-to-fmin-fmax.ll index 59c0f1cc7782..344cf2dc77b6 100644 --- a/llvm/test/CodeGen/AMDGPU/select-flags-to-fmin-fmax.ll +++ b/llvm/test/CodeGen/AMDGPU/select-flags-to-fmin-fmax.ll @@ -1,6 +1,8 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 ; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx700 < %s | FileCheck -check-prefix=GFX7 %s ; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 < %s | FileCheck -check-prefix=GFX9 %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1170 -mattr=+real-true16 < %s | FileCheck -check-prefixes=GFX1170,GFX1170-TRUE16 %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1170 -mattr=-real-true16 < %s | FileCheck -check-prefixes=GFX1170,GFX1170-FAKE16 %s ; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1200 -mattr=+real-true16 < %s | FileCheck -check-prefixes=GFX12,GFX12-TRUE16 %s ; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1200 -mattr=-real-true16 < %s | FileCheck -check-prefixes=GFX12,GFX12-FAKE16 %s @@ -23,6 +25,13 @@ define float @v_test_fmin_legacy_ule_f32_safe(float %a, float %b) { ; GFX9-NEXT: v_cndmask_b32_e32 v0, v1, v0, vcc ; GFX9-NEXT: s_setpc_b64 s[30:31] ; +; GFX1170-LABEL: v_test_fmin_legacy_ule_f32_safe: +; GFX1170: ; %bb.0: +; GFX1170-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1170-NEXT: v_cmp_ngt_f32_e32 vcc_lo, v0, v1 +; GFX1170-NEXT: v_cndmask_b32_e32 v0, v1, v0, vcc_lo +; GFX1170-NEXT: s_setpc_b64 s[30:31] +; ; GFX12-LABEL: v_test_fmin_legacy_ule_f32_safe: ; GFX12: ; %bb.0: ; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 @@ -53,6 +62,13 @@ define float @v_test_fmin_legacy_ule_f32_nnan_flag(float %a, float %b) { ; GFX9-NEXT: v_cndmask_b32_e32 v0, v1, v0, vcc ; GFX9-NEXT: s_setpc_b64 s[30:31] ; +; GFX1170-LABEL: v_test_fmin_legacy_ule_f32_nnan_flag: +; GFX1170: ; %bb.0: +; GFX1170-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1170-NEXT: v_cmp_ngt_f32_e32 vcc_lo, v0, v1 +; GFX1170-NEXT: v_cndmask_b32_e32 v0, v1, v0, vcc_lo +; GFX1170-NEXT: s_setpc_b64 s[30:31] +; ; GFX12-LABEL: v_test_fmin_legacy_ule_f32_nnan_flag: ; GFX12: ; %bb.0: ; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 @@ -83,6 +99,13 @@ define float @v_test_fmin_legacy_ule_f32_nsz_flag(float %a, float %b) { ; GFX9-NEXT: v_cndmask_b32_e32 v0, v1, v0, vcc ; GFX9-NEXT: s_setpc_b64 s[30:31] ; +; GFX1170-LABEL: v_test_fmin_legacy_ule_f32_nsz_flag: +; GFX1170: ; %bb.0: +; GFX1170-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1170-NEXT: v_cmp_ngt_f32_e32 vcc_lo, v0, v1 +; GFX1170-NEXT: v_cndmask_b32_e32 v0, v1, v0, vcc_lo +; GFX1170-NEXT: s_setpc_b64 s[30:31] +; ; GFX12-LABEL: v_test_fmin_legacy_ule_f32_nsz_flag: ; GFX12: ; %bb.0: ; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 @@ -112,6 +135,12 @@ define float @v_test_fmin_legacy_ule_f32_nnan_nsz_flag(float %a, float %b) { ; GFX9-NEXT: v_min_f32_e32 v0, v0, v1 ; GFX9-NEXT: s_setpc_b64 s[30:31] ; +; GFX1170-LABEL: v_test_fmin_legacy_ule_f32_nnan_nsz_flag: +; GFX1170: ; %bb.0: +; GFX1170-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1170-NEXT: v_min_num_f32_e32 v0, v0, v1 +; GFX1170-NEXT: s_setpc_b64 s[30:31] +; ; GFX12-LABEL: v_test_fmin_legacy_ule_f32_nnan_nsz_flag: ; GFX12: ; %bb.0: ; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 @@ -140,6 +169,13 @@ define float @v_test_fmax_legacy_uge_f32_safe(float %a, float %b) { ; GFX9-NEXT: v_cndmask_b32_e32 v0, v1, v0, vcc ; GFX9-NEXT: s_setpc_b64 s[30:31] ; +; GFX1170-LABEL: v_test_fmax_legacy_uge_f32_safe: +; GFX1170: ; %bb.0: +; GFX1170-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1170-NEXT: v_cmp_nlt_f32_e32 vcc_lo, v0, v1 +; GFX1170-NEXT: v_cndmask_b32_e32 v0, v1, v0, vcc_lo +; GFX1170-NEXT: s_setpc_b64 s[30:31] +; ; GFX12-LABEL: v_test_fmax_legacy_uge_f32_safe: ; GFX12: ; %bb.0: ; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 @@ -170,6 +206,13 @@ define float @v_test_fmax_legacy_uge_f32_nnan_flag(float %a, float %b) { ; GFX9-NEXT: v_cndmask_b32_e32 v0, v1, v0, vcc ; GFX9-NEXT: s_setpc_b64 s[30:31] ; +; GFX1170-LABEL: v_test_fmax_legacy_uge_f32_nnan_flag: +; GFX1170: ; %bb.0: +; GFX1170-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1170-NEXT: v_cmp_nlt_f32_e32 vcc_lo, v0, v1 +; GFX1170-NEXT: v_cndmask_b32_e32 v0, v1, v0, vcc_lo +; GFX1170-NEXT: s_setpc_b64 s[30:31] +; ; GFX12-LABEL: v_test_fmax_legacy_uge_f32_nnan_flag: ; GFX12: ; %bb.0: ; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 @@ -200,6 +243,13 @@ define float @v_test_fmax_legacy_uge_f32_nsz_flag(float %a, float %b) { ; GFX9-NEXT: v_cndmask_b32_e32 v0, v1, v0, vcc ; GFX9-NEXT: s_setpc_b64 s[30:31] ; +; GFX1170-LABEL: v_test_fmax_legacy_uge_f32_nsz_flag: +; GFX1170: ; %bb.0: +; GFX1170-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1170-NEXT: v_cmp_nlt_f32_e32 vcc_lo, v0, v1 +; GFX1170-NEXT: v_cndmask_b32_e32 v0, v1, v0, vcc_lo +; GFX1170-NEXT: s_setpc_b64 s[30:31] +; ; GFX12-LABEL: v_test_fmax_legacy_uge_f32_nsz_flag: ; GFX12: ; %bb.0: ; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 @@ -229,6 +279,12 @@ define float @v_test_fmax_legacy_uge_f32_nnan_nsz_flag(float %a, float %b) { ; GFX9-NEXT: v_max_f32_e32 v0, v0, v1 ; GFX9-NEXT: s_setpc_b64 s[30:31] ; +; GFX1170-LABEL: v_test_fmax_legacy_uge_f32_nnan_nsz_flag: +; GFX1170: ; %bb.0: +; GFX1170-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1170-NEXT: v_max_num_f32_e32 v0, v0, v1 +; GFX1170-NEXT: s_setpc_b64 s[30:31] +; ; GFX12-LABEL: v_test_fmax_legacy_uge_f32_nnan_nsz_flag: ; GFX12: ; %bb.0: ; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 @@ -260,6 +316,15 @@ define <2 x float> @v_test_fmin_legacy_ule_v2f32_safe(<2 x float> %a, <2 x float ; GFX9-NEXT: v_cndmask_b32_e32 v1, v3, v1, vcc ; GFX9-NEXT: s_setpc_b64 s[30:31] ; +; GFX1170-LABEL: v_test_fmin_legacy_ule_v2f32_safe: +; GFX1170: ; %bb.0: +; GFX1170-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1170-NEXT: v_cmp_ngt_f32_e32 vcc_lo, v0, v2 +; GFX1170-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc_lo +; GFX1170-NEXT: v_cmp_ngt_f32_e32 vcc_lo, v1, v3 +; GFX1170-NEXT: v_cndmask_b32_e32 v1, v3, v1, vcc_lo +; GFX1170-NEXT: s_setpc_b64 s[30:31] +; ; GFX12-LABEL: v_test_fmin_legacy_ule_v2f32_safe: ; GFX12: ; %bb.0: ; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 @@ -296,6 +361,15 @@ define <2 x float> @v_test_fmin_legacy_ule_v2f32_nnan_flag(<2 x float> %a, <2 x ; GFX9-NEXT: v_cndmask_b32_e32 v1, v3, v1, vcc ; GFX9-NEXT: s_setpc_b64 s[30:31] ; +; GFX1170-LABEL: v_test_fmin_legacy_ule_v2f32_nnan_flag: +; GFX1170: ; %bb.0: +; GFX1170-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1170-NEXT: v_cmp_ngt_f32_e32 vcc_lo, v0, v2 +; GFX1170-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc_lo +; GFX1170-NEXT: v_cmp_ngt_f32_e32 vcc_lo, v1, v3 +; GFX1170-NEXT: v_cndmask_b32_e32 v1, v3, v1, vcc_lo +; GFX1170-NEXT: s_setpc_b64 s[30:31] +; ; GFX12-LABEL: v_test_fmin_legacy_ule_v2f32_nnan_flag: ; GFX12: ; %bb.0: ; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 @@ -332,6 +406,15 @@ define <2 x float> @v_test_fmin_legacy_ule_v2f32_nsz_flag(<2 x float> %a, <2 x f ; GFX9-NEXT: v_cndmask_b32_e32 v1, v3, v1, vcc ; GFX9-NEXT: s_setpc_b64 s[30:31] ; +; GFX1170-LABEL: v_test_fmin_legacy_ule_v2f32_nsz_flag: +; GFX1170: ; %bb.0: +; GFX1170-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1170-NEXT: v_cmp_ngt_f32_e32 vcc_lo, v0, v2 +; GFX1170-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc_lo +; GFX1170-NEXT: v_cmp_ngt_f32_e32 vcc_lo, v1, v3 +; GFX1170-NEXT: v_cndmask_b32_e32 v1, v3, v1, vcc_lo +; GFX1170-NEXT: s_setpc_b64 s[30:31] +; ; GFX12-LABEL: v_test_fmin_legacy_ule_v2f32_nsz_flag: ; GFX12: ; %bb.0: ; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 @@ -366,6 +449,12 @@ define <2 x float> @v_test_fmin_legacy_ule_v2f32_nnan_nsz_flag(<2 x float> %a, < ; GFX9-NEXT: v_min_f32_e32 v1, v1, v3 ; GFX9-NEXT: s_setpc_b64 s[30:31] ; +; GFX1170-LABEL: v_test_fmin_legacy_ule_v2f32_nnan_nsz_flag: +; GFX1170: ; %bb.0: +; GFX1170-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1170-NEXT: v_dual_min_num_f32 v0, v0, v2 :: v_dual_min_num_f32 v1, v1, v3 +; GFX1170-NEXT: s_setpc_b64 s[30:31] +; ; GFX12-LABEL: v_test_fmin_legacy_ule_v2f32_nnan_nsz_flag: ; GFX12: ; %bb.0: ; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 @@ -397,6 +486,15 @@ define <2 x float> @v_test_fmax_legacy_uge_v2f32_safe(<2 x float> %a, <2 x float ; GFX9-NEXT: v_cndmask_b32_e32 v1, v3, v1, vcc ; GFX9-NEXT: s_setpc_b64 s[30:31] ; +; GFX1170-LABEL: v_test_fmax_legacy_uge_v2f32_safe: +; GFX1170: ; %bb.0: +; GFX1170-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1170-NEXT: v_cmp_nlt_f32_e32 vcc_lo, v0, v2 +; GFX1170-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc_lo +; GFX1170-NEXT: v_cmp_nlt_f32_e32 vcc_lo, v1, v3 +; GFX1170-NEXT: v_cndmask_b32_e32 v1, v3, v1, vcc_lo +; GFX1170-NEXT: s_setpc_b64 s[30:31] +; ; GFX12-LABEL: v_test_fmax_legacy_uge_v2f32_safe: ; GFX12: ; %bb.0: ; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 @@ -433,6 +531,15 @@ define <2 x float> @v_test_fmax_legacy_uge_v2f32_nnan_flag(<2 x float> %a, <2 x ; GFX9-NEXT: v_cndmask_b32_e32 v1, v3, v1, vcc ; GFX9-NEXT: s_setpc_b64 s[30:31] ; +; GFX1170-LABEL: v_test_fmax_legacy_uge_v2f32_nnan_flag: +; GFX1170: ; %bb.0: +; GFX1170-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1170-NEXT: v_cmp_nlt_f32_e32 vcc_lo, v0, v2 +; GFX1170-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc_lo +; GFX1170-NEXT: v_cmp_nlt_f32_e32 vcc_lo, v1, v3 +; GFX1170-NEXT: v_cndmask_b32_e32 v1, v3, v1, vcc_lo +; GFX1170-NEXT: s_setpc_b64 s[30:31] +; ; GFX12-LABEL: v_test_fmax_legacy_uge_v2f32_nnan_flag: ; GFX12: ; %bb.0: ; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 @@ -469,6 +576,15 @@ define <2 x float> @v_test_fmax_legacy_uge_v2f32_nsz_flag(<2 x float> %a, <2 x f ; GFX9-NEXT: v_cndmask_b32_e32 v1, v3, v1, vcc ; GFX9-NEXT: s_setpc_b64 s[30:31] ; +; GFX1170-LABEL: v_test_fmax_legacy_uge_v2f32_nsz_flag: +; GFX1170: ; %bb.0: +; GFX1170-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1170-NEXT: v_cmp_nlt_f32_e32 vcc_lo, v0, v2 +; GFX1170-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc_lo +; GFX1170-NEXT: v_cmp_nlt_f32_e32 vcc_lo, v1, v3 +; GFX1170-NEXT: v_cndmask_b32_e32 v1, v3, v1, vcc_lo +; GFX1170-NEXT: s_setpc_b64 s[30:31] +; ; GFX12-LABEL: v_test_fmax_legacy_uge_v2f32_nsz_flag: ; GFX12: ; %bb.0: ; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 @@ -503,6 +619,12 @@ define <2 x float> @v_test_fmax_legacy_uge_v2f32_nnan_nsz_flag(<2 x float> %a, < ; GFX9-NEXT: v_max_f32_e32 v1, v1, v3 ; GFX9-NEXT: s_setpc_b64 s[30:31] ; +; GFX1170-LABEL: v_test_fmax_legacy_uge_v2f32_nnan_nsz_flag: +; GFX1170: ; %bb.0: +; GFX1170-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1170-NEXT: v_dual_max_num_f32 v0, v0, v2 :: v_dual_max_num_f32 v1, v1, v3 +; GFX1170-NEXT: s_setpc_b64 s[30:31] +; ; GFX12-LABEL: v_test_fmax_legacy_uge_v2f32_nnan_nsz_flag: ; GFX12: ; %bb.0: ; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 @@ -534,6 +656,20 @@ define half @v_test_fmin_legacy_ule_f16_safe(half %a, half %b) { ; GFX9-NEXT: v_cndmask_b32_e32 v0, v1, v0, vcc ; GFX9-NEXT: s_setpc_b64 s[30:31] ; +; GFX1170-TRUE16-LABEL: v_test_fmin_legacy_ule_f16_safe: +; GFX1170-TRUE16: ; %bb.0: +; GFX1170-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1170-TRUE16-NEXT: v_cmp_ngt_f16_e32 vcc_lo, v0.l, v1.l +; GFX1170-TRUE16-NEXT: v_cndmask_b16 v0.l, v1.l, v0.l, vcc_lo +; GFX1170-TRUE16-NEXT: s_setpc_b64 s[30:31] +; +; GFX1170-FAKE16-LABEL: v_test_fmin_legacy_ule_f16_safe: +; GFX1170-FAKE16: ; %bb.0: +; GFX1170-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1170-FAKE16-NEXT: v_cmp_ngt_f16_e32 vcc_lo, v0, v1 +; GFX1170-FAKE16-NEXT: v_cndmask_b32_e32 v0, v1, v0, vcc_lo +; GFX1170-FAKE16-NEXT: s_setpc_b64 s[30:31] +; ; GFX12-TRUE16-LABEL: v_test_fmin_legacy_ule_f16_safe: ; GFX12-TRUE16: ; %bb.0: ; GFX12-TRUE16-NEXT: s_wait_loadcnt_dscnt 0x0 @@ -579,6 +715,20 @@ define half @v_test_fmin_legacy_ule_f16_nnan_flag(half %a, half %b) { ; GFX9-NEXT: v_cndmask_b32_e32 v0, v1, v0, vcc ; GFX9-NEXT: s_setpc_b64 s[30:31] ; +; GFX1170-TRUE16-LABEL: v_test_fmin_legacy_ule_f16_nnan_flag: +; GFX1170-TRUE16: ; %bb.0: +; GFX1170-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1170-TRUE16-NEXT: v_cmp_ngt_f16_e32 vcc_lo, v0.l, v1.l +; GFX1170-TRUE16-NEXT: v_cndmask_b16 v0.l, v1.l, v0.l, vcc_lo +; GFX1170-TRUE16-NEXT: s_setpc_b64 s[30:31] +; +; GFX1170-FAKE16-LABEL: v_test_fmin_legacy_ule_f16_nnan_flag: +; GFX1170-FAKE16: ; %bb.0: +; GFX1170-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1170-FAKE16-NEXT: v_cmp_ngt_f16_e32 vcc_lo, v0, v1 +; GFX1170-FAKE16-NEXT: v_cndmask_b32_e32 v0, v1, v0, vcc_lo +; GFX1170-FAKE16-NEXT: s_setpc_b64 s[30:31] +; ; GFX12-TRUE16-LABEL: v_test_fmin_legacy_ule_f16_nnan_flag: ; GFX12-TRUE16: ; %bb.0: ; GFX12-TRUE16-NEXT: s_wait_loadcnt_dscnt 0x0 @@ -624,6 +774,20 @@ define half @v_test_fmin_legacy_ule_f16_nsz_flag(half %a, half %b) { ; GFX9-NEXT: v_cndmask_b32_e32 v0, v1, v0, vcc ; GFX9-NEXT: s_setpc_b64 s[30:31] ; +; GFX1170-TRUE16-LABEL: v_test_fmin_legacy_ule_f16_nsz_flag: +; GFX1170-TRUE16: ; %bb.0: +; GFX1170-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1170-TRUE16-NEXT: v_cmp_ngt_f16_e32 vcc_lo, v0.l, v1.l +; GFX1170-TRUE16-NEXT: v_cndmask_b16 v0.l, v1.l, v0.l, vcc_lo +; GFX1170-TRUE16-NEXT: s_setpc_b64 s[30:31] +; +; GFX1170-FAKE16-LABEL: v_test_fmin_legacy_ule_f16_nsz_flag: +; GFX1170-FAKE16: ; %bb.0: +; GFX1170-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1170-FAKE16-NEXT: v_cmp_ngt_f16_e32 vcc_lo, v0, v1 +; GFX1170-FAKE16-NEXT: v_cndmask_b32_e32 v0, v1, v0, vcc_lo +; GFX1170-FAKE16-NEXT: s_setpc_b64 s[30:31] +; ; GFX12-TRUE16-LABEL: v_test_fmin_legacy_ule_f16_nsz_flag: ; GFX12-TRUE16: ; %bb.0: ; GFX12-TRUE16-NEXT: s_wait_loadcnt_dscnt 0x0 @@ -668,6 +832,18 @@ define half @v_test_fmin_legacy_ule_f16_nnan_nsz_flag(half %a, half %b) { ; GFX9-NEXT: v_min_f16_e32 v0, v0, v1 ; GFX9-NEXT: s_setpc_b64 s[30:31] ; +; GFX1170-TRUE16-LABEL: v_test_fmin_legacy_ule_f16_nnan_nsz_flag: +; GFX1170-TRUE16: ; %bb.0: +; GFX1170-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1170-TRUE16-NEXT: v_min_num_f16_e32 v0.l, v0.l, v1.l +; GFX1170-TRUE16-NEXT: s_setpc_b64 s[30:31] +; +; GFX1170-FAKE16-LABEL: v_test_fmin_legacy_ule_f16_nnan_nsz_flag: +; GFX1170-FAKE16: ; %bb.0: +; GFX1170-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1170-FAKE16-NEXT: v_min_num_f16_e32 v0, v0, v1 +; GFX1170-FAKE16-NEXT: s_setpc_b64 s[30:31] +; ; GFX12-TRUE16-LABEL: v_test_fmin_legacy_ule_f16_nnan_nsz_flag: ; GFX12-TRUE16: ; %bb.0: ; GFX12-TRUE16-NEXT: s_wait_loadcnt_dscnt 0x0 @@ -709,6 +885,20 @@ define half @v_test_fmax_legacy_uge_f16_safe(half %a, half %b) { ; GFX9-NEXT: v_cndmask_b32_e32 v0, v1, v0, vcc ; GFX9-NEXT: s_setpc_b64 s[30:31] ; +; GFX1170-TRUE16-LABEL: v_test_fmax_legacy_uge_f16_safe: +; GFX1170-TRUE16: ; %bb.0: +; GFX1170-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1170-TRUE16-NEXT: v_cmp_nlt_f16_e32 vcc_lo, v0.l, v1.l +; GFX1170-TRUE16-NEXT: v_cndmask_b16 v0.l, v1.l, v0.l, vcc_lo +; GFX1170-TRUE16-NEXT: s_setpc_b64 s[30:31] +; +; GFX1170-FAKE16-LABEL: v_test_fmax_legacy_uge_f16_safe: +; GFX1170-FAKE16: ; %bb.0: +; GFX1170-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1170-FAKE16-NEXT: v_cmp_nlt_f16_e32 vcc_lo, v0, v1 +; GFX1170-FAKE16-NEXT: v_cndmask_b32_e32 v0, v1, v0, vcc_lo +; GFX1170-FAKE16-NEXT: s_setpc_b64 s[30:31] +; ; GFX12-TRUE16-LABEL: v_test_fmax_legacy_uge_f16_safe: ; GFX12-TRUE16: ; %bb.0: ; GFX12-TRUE16-NEXT: s_wait_loadcnt_dscnt 0x0 @@ -754,6 +944,20 @@ define half @v_test_fmax_legacy_uge_f16_nnan_flag(half %a, half %b) { ; GFX9-NEXT: v_cndmask_b32_e32 v0, v1, v0, vcc ; GFX9-NEXT: s_setpc_b64 s[30:31] ; +; GFX1170-TRUE16-LABEL: v_test_fmax_legacy_uge_f16_nnan_flag: +; GFX1170-TRUE16: ; %bb.0: +; GFX1170-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1170-TRUE16-NEXT: v_cmp_nlt_f16_e32 vcc_lo, v0.l, v1.l +; GFX1170-TRUE16-NEXT: v_cndmask_b16 v0.l, v1.l, v0.l, vcc_lo +; GFX1170-TRUE16-NEXT: s_setpc_b64 s[30:31] +; +; GFX1170-FAKE16-LABEL: v_test_fmax_legacy_uge_f16_nnan_flag: +; GFX1170-FAKE16: ; %bb.0: +; GFX1170-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1170-FAKE16-NEXT: v_cmp_nlt_f16_e32 vcc_lo, v0, v1 +; GFX1170-FAKE16-NEXT: v_cndmask_b32_e32 v0, v1, v0, vcc_lo +; GFX1170-FAKE16-NEXT: s_setpc_b64 s[30:31] +; ; GFX12-TRUE16-LABEL: v_test_fmax_legacy_uge_f16_nnan_flag: ; GFX12-TRUE16: ; %bb.0: ; GFX12-TRUE16-NEXT: s_wait_loadcnt_dscnt 0x0 @@ -799,6 +1003,20 @@ define half @v_test_fmax_legacy_uge_f16_nsz_flag(half %a, half %b) { ; GFX9-NEXT: v_cndmask_b32_e32 v0, v1, v0, vcc ; GFX9-NEXT: s_setpc_b64 s[30:31] ; +; GFX1170-TRUE16-LABEL: v_test_fmax_legacy_uge_f16_nsz_flag: +; GFX1170-TRUE16: ; %bb.0: +; GFX1170-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1170-TRUE16-NEXT: v_cmp_nlt_f16_e32 vcc_lo, v0.l, v1.l +; GFX1170-TRUE16-NEXT: v_cndmask_b16 v0.l, v1.l, v0.l, vcc_lo +; GFX1170-TRUE16-NEXT: s_setpc_b64 s[30:31] +; +; GFX1170-FAKE16-LABEL: v_test_fmax_legacy_uge_f16_nsz_flag: +; GFX1170-FAKE16: ; %bb.0: +; GFX1170-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1170-FAKE16-NEXT: v_cmp_nlt_f16_e32 vcc_lo, v0, v1 +; GFX1170-FAKE16-NEXT: v_cndmask_b32_e32 v0, v1, v0, vcc_lo +; GFX1170-FAKE16-NEXT: s_setpc_b64 s[30:31] +; ; GFX12-TRUE16-LABEL: v_test_fmax_legacy_uge_f16_nsz_flag: ; GFX12-TRUE16: ; %bb.0: ; GFX12-TRUE16-NEXT: s_wait_loadcnt_dscnt 0x0 @@ -843,6 +1061,18 @@ define half @v_test_fmax_legacy_uge_f16_nnan_nsz_flag(half %a, half %b) { ; GFX9-NEXT: v_max_f16_e32 v0, v0, v1 ; GFX9-NEXT: s_setpc_b64 s[30:31] ; +; GFX1170-TRUE16-LABEL: v_test_fmax_legacy_uge_f16_nnan_nsz_flag: +; GFX1170-TRUE16: ; %bb.0: +; GFX1170-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1170-TRUE16-NEXT: v_max_num_f16_e32 v0.l, v0.l, v1.l +; GFX1170-TRUE16-NEXT: s_setpc_b64 s[30:31] +; +; GFX1170-FAKE16-LABEL: v_test_fmax_legacy_uge_f16_nnan_nsz_flag: +; GFX1170-FAKE16: ; %bb.0: +; GFX1170-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1170-FAKE16-NEXT: v_max_num_f16_e32 v0, v0, v1 +; GFX1170-FAKE16-NEXT: s_setpc_b64 s[30:31] +; ; GFX12-TRUE16-LABEL: v_test_fmax_legacy_uge_f16_nnan_nsz_flag: ; GFX12-TRUE16: ; %bb.0: ; GFX12-TRUE16-NEXT: s_wait_loadcnt_dscnt 0x0 @@ -899,6 +1129,29 @@ define <2 x half> @v_test_fmin_legacy_ule_v2f16_safe(<2 x half> %a, <2 x half> % ; GFX9-NEXT: v_perm_b32 v0, v2, v0, s4 ; GFX9-NEXT: s_setpc_b64 s[30:31] ; +; GFX1170-TRUE16-LABEL: v_test_fmin_legacy_ule_v2f16_safe: +; GFX1170-TRUE16: ; %bb.0: +; GFX1170-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1170-TRUE16-NEXT: v_cmp_ngt_f16_e32 vcc_lo, v0.h, v1.h +; GFX1170-TRUE16-NEXT: v_cmp_ngt_f16_e64 s0, v0.l, v1.l +; GFX1170-TRUE16-NEXT: v_cndmask_b16 v0.h, v1.h, v0.h, vcc_lo +; GFX1170-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) +; GFX1170-TRUE16-NEXT: v_cndmask_b16 v0.l, v1.l, v0.l, s0 +; GFX1170-TRUE16-NEXT: s_setpc_b64 s[30:31] +; +; GFX1170-FAKE16-LABEL: v_test_fmin_legacy_ule_v2f16_safe: +; GFX1170-FAKE16: ; %bb.0: +; GFX1170-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1170-FAKE16-NEXT: v_lshrrev_b32_e32 v2, 16, v1 +; GFX1170-FAKE16-NEXT: v_lshrrev_b32_e32 v3, 16, v0 +; GFX1170-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_3) | instid1(VALU_DEP_1) +; GFX1170-FAKE16-NEXT: v_cmp_ngt_f16_e32 vcc_lo, v3, v2 +; GFX1170-FAKE16-NEXT: v_cndmask_b32_e32 v2, v2, v3, vcc_lo +; GFX1170-FAKE16-NEXT: v_cmp_ngt_f16_e32 vcc_lo, v0, v1 +; GFX1170-FAKE16-NEXT: v_cndmask_b32_e32 v0, v1, v0, vcc_lo +; GFX1170-FAKE16-NEXT: v_perm_b32 v0, v2, v0, 0x5040100 +; GFX1170-FAKE16-NEXT: s_setpc_b64 s[30:31] +; ; GFX12-TRUE16-LABEL: v_test_fmin_legacy_ule_v2f16_safe: ; GFX12-TRUE16: ; %bb.0: ; GFX12-TRUE16-NEXT: s_wait_loadcnt_dscnt 0x0 @@ -971,6 +1224,29 @@ define <2 x half> @v_test_fmin_legacy_ule_v2f16_nnan_flag(<2 x half> %a, <2 x ha ; GFX9-NEXT: v_perm_b32 v0, v2, v0, s4 ; GFX9-NEXT: s_setpc_b64 s[30:31] ; +; GFX1170-TRUE16-LABEL: v_test_fmin_legacy_ule_v2f16_nnan_flag: +; GFX1170-TRUE16: ; %bb.0: +; GFX1170-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1170-TRUE16-NEXT: v_cmp_ngt_f16_e32 vcc_lo, v0.h, v1.h +; GFX1170-TRUE16-NEXT: v_cmp_ngt_f16_e64 s0, v0.l, v1.l +; GFX1170-TRUE16-NEXT: v_cndmask_b16 v0.h, v1.h, v0.h, vcc_lo +; GFX1170-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) +; GFX1170-TRUE16-NEXT: v_cndmask_b16 v0.l, v1.l, v0.l, s0 +; GFX1170-TRUE16-NEXT: s_setpc_b64 s[30:31] +; +; GFX1170-FAKE16-LABEL: v_test_fmin_legacy_ule_v2f16_nnan_flag: +; GFX1170-FAKE16: ; %bb.0: +; GFX1170-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1170-FAKE16-NEXT: v_lshrrev_b32_e32 v2, 16, v1 +; GFX1170-FAKE16-NEXT: v_lshrrev_b32_e32 v3, 16, v0 +; GFX1170-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_3) | instid1(VALU_DEP_1) +; GFX1170-FAKE16-NEXT: v_cmp_ngt_f16_e32 vcc_lo, v3, v2 +; GFX1170-FAKE16-NEXT: v_cndmask_b32_e32 v2, v2, v3, vcc_lo +; GFX1170-FAKE16-NEXT: v_cmp_ngt_f16_e32 vcc_lo, v0, v1 +; GFX1170-FAKE16-NEXT: v_cndmask_b32_e32 v0, v1, v0, vcc_lo +; GFX1170-FAKE16-NEXT: v_perm_b32 v0, v2, v0, 0x5040100 +; GFX1170-FAKE16-NEXT: s_setpc_b64 s[30:31] +; ; GFX12-TRUE16-LABEL: v_test_fmin_legacy_ule_v2f16_nnan_flag: ; GFX12-TRUE16: ; %bb.0: ; GFX12-TRUE16-NEXT: s_wait_loadcnt_dscnt 0x0 @@ -1043,6 +1319,29 @@ define <2 x half> @v_test_fmin_legacy_ule_v2f16_nsz_flag(<2 x half> %a, <2 x hal ; GFX9-NEXT: v_perm_b32 v0, v2, v0, s4 ; GFX9-NEXT: s_setpc_b64 s[30:31] ; +; GFX1170-TRUE16-LABEL: v_test_fmin_legacy_ule_v2f16_nsz_flag: +; GFX1170-TRUE16: ; %bb.0: +; GFX1170-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1170-TRUE16-NEXT: v_cmp_ngt_f16_e32 vcc_lo, v0.h, v1.h +; GFX1170-TRUE16-NEXT: v_cmp_ngt_f16_e64 s0, v0.l, v1.l +; GFX1170-TRUE16-NEXT: v_cndmask_b16 v0.h, v1.h, v0.h, vcc_lo +; GFX1170-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) +; GFX1170-TRUE16-NEXT: v_cndmask_b16 v0.l, v1.l, v0.l, s0 +; GFX1170-TRUE16-NEXT: s_setpc_b64 s[30:31] +; +; GFX1170-FAKE16-LABEL: v_test_fmin_legacy_ule_v2f16_nsz_flag: +; GFX1170-FAKE16: ; %bb.0: +; GFX1170-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1170-FAKE16-NEXT: v_lshrrev_b32_e32 v2, 16, v1 +; GFX1170-FAKE16-NEXT: v_lshrrev_b32_e32 v3, 16, v0 +; GFX1170-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_3) | instid1(VALU_DEP_1) +; GFX1170-FAKE16-NEXT: v_cmp_ngt_f16_e32 vcc_lo, v3, v2 +; GFX1170-FAKE16-NEXT: v_cndmask_b32_e32 v2, v2, v3, vcc_lo +; GFX1170-FAKE16-NEXT: v_cmp_ngt_f16_e32 vcc_lo, v0, v1 +; GFX1170-FAKE16-NEXT: v_cndmask_b32_e32 v0, v1, v0, vcc_lo +; GFX1170-FAKE16-NEXT: v_perm_b32 v0, v2, v0, 0x5040100 +; GFX1170-FAKE16-NEXT: s_setpc_b64 s[30:31] +; ; GFX12-TRUE16-LABEL: v_test_fmin_legacy_ule_v2f16_nsz_flag: ; GFX12-TRUE16: ; %bb.0: ; GFX12-TRUE16-NEXT: s_wait_loadcnt_dscnt 0x0 @@ -1107,6 +1406,12 @@ define <2 x half> @v_test_fmin_legacy_ule_v2f16_nnan_nsz_flag(<2 x half> %a, <2 ; GFX9-NEXT: v_pk_min_f16 v0, v0, v1 ; GFX9-NEXT: s_setpc_b64 s[30:31] ; +; GFX1170-LABEL: v_test_fmin_legacy_ule_v2f16_nnan_nsz_flag: +; GFX1170: ; %bb.0: +; GFX1170-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1170-NEXT: v_pk_min_num_f16 v0, v0, v1 +; GFX1170-NEXT: s_setpc_b64 s[30:31] +; ; GFX12-LABEL: v_test_fmin_legacy_ule_v2f16_nnan_nsz_flag: ; GFX12: ; %bb.0: ; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 @@ -1153,6 +1458,29 @@ define <2 x half> @v_test_fmax_legacy_uge_v2f16_safe(<2 x half> %a, <2 x half> % ; GFX9-NEXT: v_perm_b32 v0, v2, v0, s4 ; GFX9-NEXT: s_setpc_b64 s[30:31] ; +; GFX1170-TRUE16-LABEL: v_test_fmax_legacy_uge_v2f16_safe: +; GFX1170-TRUE16: ; %bb.0: +; GFX1170-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1170-TRUE16-NEXT: v_cmp_nlt_f16_e32 vcc_lo, v0.h, v1.h +; GFX1170-TRUE16-NEXT: v_cmp_nlt_f16_e64 s0, v0.l, v1.l +; GFX1170-TRUE16-NEXT: v_cndmask_b16 v0.h, v1.h, v0.h, vcc_lo +; GFX1170-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) +; GFX1170-TRUE16-NEXT: v_cndmask_b16 v0.l, v1.l, v0.l, s0 +; GFX1170-TRUE16-NEXT: s_setpc_b64 s[30:31] +; +; GFX1170-FAKE16-LABEL: v_test_fmax_legacy_uge_v2f16_safe: +; GFX1170-FAKE16: ; %bb.0: +; GFX1170-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1170-FAKE16-NEXT: v_lshrrev_b32_e32 v2, 16, v1 +; GFX1170-FAKE16-NEXT: v_lshrrev_b32_e32 v3, 16, v0 +; GFX1170-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_3) | instid1(VALU_DEP_1) +; GFX1170-FAKE16-NEXT: v_cmp_nlt_f16_e32 vcc_lo, v3, v2 +; GFX1170-FAKE16-NEXT: v_cndmask_b32_e32 v2, v2, v3, vcc_lo +; GFX1170-FAKE16-NEXT: v_cmp_nlt_f16_e32 vcc_lo, v0, v1 +; GFX1170-FAKE16-NEXT: v_cndmask_b32_e32 v0, v1, v0, vcc_lo +; GFX1170-FAKE16-NEXT: v_perm_b32 v0, v2, v0, 0x5040100 +; GFX1170-FAKE16-NEXT: s_setpc_b64 s[30:31] +; ; GFX12-TRUE16-LABEL: v_test_fmax_legacy_uge_v2f16_safe: ; GFX12-TRUE16: ; %bb.0: ; GFX12-TRUE16-NEXT: s_wait_loadcnt_dscnt 0x0 @@ -1225,6 +1553,29 @@ define <2 x half> @v_test_fmax_legacy_uge_v2f16_nnan_flag(<2 x half> %a, <2 x ha ; GFX9-NEXT: v_perm_b32 v0, v2, v0, s4 ; GFX9-NEXT: s_setpc_b64 s[30:31] ; +; GFX1170-TRUE16-LABEL: v_test_fmax_legacy_uge_v2f16_nnan_flag: +; GFX1170-TRUE16: ; %bb.0: +; GFX1170-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1170-TRUE16-NEXT: v_cmp_nlt_f16_e32 vcc_lo, v0.h, v1.h +; GFX1170-TRUE16-NEXT: v_cmp_nlt_f16_e64 s0, v0.l, v1.l +; GFX1170-TRUE16-NEXT: v_cndmask_b16 v0.h, v1.h, v0.h, vcc_lo +; GFX1170-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) +; GFX1170-TRUE16-NEXT: v_cndmask_b16 v0.l, v1.l, v0.l, s0 +; GFX1170-TRUE16-NEXT: s_setpc_b64 s[30:31] +; +; GFX1170-FAKE16-LABEL: v_test_fmax_legacy_uge_v2f16_nnan_flag: +; GFX1170-FAKE16: ; %bb.0: +; GFX1170-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1170-FAKE16-NEXT: v_lshrrev_b32_e32 v2, 16, v1 +; GFX1170-FAKE16-NEXT: v_lshrrev_b32_e32 v3, 16, v0 +; GFX1170-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_3) | instid1(VALU_DEP_1) +; GFX1170-FAKE16-NEXT: v_cmp_nlt_f16_e32 vcc_lo, v3, v2 +; GFX1170-FAKE16-NEXT: v_cndmask_b32_e32 v2, v2, v3, vcc_lo +; GFX1170-FAKE16-NEXT: v_cmp_nlt_f16_e32 vcc_lo, v0, v1 +; GFX1170-FAKE16-NEXT: v_cndmask_b32_e32 v0, v1, v0, vcc_lo +; GFX1170-FAKE16-NEXT: v_perm_b32 v0, v2, v0, 0x5040100 +; GFX1170-FAKE16-NEXT: s_setpc_b64 s[30:31] +; ; GFX12-TRUE16-LABEL: v_test_fmax_legacy_uge_v2f16_nnan_flag: ; GFX12-TRUE16: ; %bb.0: ; GFX12-TRUE16-NEXT: s_wait_loadcnt_dscnt 0x0 @@ -1297,6 +1648,29 @@ define <2 x half> @v_test_fmax_legacy_uge_v2f16_nsz_flag(<2 x half> %a, <2 x hal ; GFX9-NEXT: v_perm_b32 v0, v2, v0, s4 ; GFX9-NEXT: s_setpc_b64 s[30:31] ; +; GFX1170-TRUE16-LABEL: v_test_fmax_legacy_uge_v2f16_nsz_flag: +; GFX1170-TRUE16: ; %bb.0: +; GFX1170-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1170-TRUE16-NEXT: v_cmp_nlt_f16_e32 vcc_lo, v0.h, v1.h +; GFX1170-TRUE16-NEXT: v_cmp_nlt_f16_e64 s0, v0.l, v1.l +; GFX1170-TRUE16-NEXT: v_cndmask_b16 v0.h, v1.h, v0.h, vcc_lo +; GFX1170-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) +; GFX1170-TRUE16-NEXT: v_cndmask_b16 v0.l, v1.l, v0.l, s0 +; GFX1170-TRUE16-NEXT: s_setpc_b64 s[30:31] +; +; GFX1170-FAKE16-LABEL: v_test_fmax_legacy_uge_v2f16_nsz_flag: +; GFX1170-FAKE16: ; %bb.0: +; GFX1170-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1170-FAKE16-NEXT: v_lshrrev_b32_e32 v2, 16, v1 +; GFX1170-FAKE16-NEXT: v_lshrrev_b32_e32 v3, 16, v0 +; GFX1170-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_3) | instid1(VALU_DEP_1) +; GFX1170-FAKE16-NEXT: v_cmp_nlt_f16_e32 vcc_lo, v3, v2 +; GFX1170-FAKE16-NEXT: v_cndmask_b32_e32 v2, v2, v3, vcc_lo +; GFX1170-FAKE16-NEXT: v_cmp_nlt_f16_e32 vcc_lo, v0, v1 +; GFX1170-FAKE16-NEXT: v_cndmask_b32_e32 v0, v1, v0, vcc_lo +; GFX1170-FAKE16-NEXT: v_perm_b32 v0, v2, v0, 0x5040100 +; GFX1170-FAKE16-NEXT: s_setpc_b64 s[30:31] +; ; GFX12-TRUE16-LABEL: v_test_fmax_legacy_uge_v2f16_nsz_flag: ; GFX12-TRUE16: ; %bb.0: ; GFX12-TRUE16-NEXT: s_wait_loadcnt_dscnt 0x0 @@ -1361,6 +1735,12 @@ define <2 x half> @v_test_fmax_legacy_uge_v2f16_nnan_nsz_flag(<2 x half> %a, <2 ; GFX9-NEXT: v_pk_max_f16 v0, v0, v1 ; GFX9-NEXT: s_setpc_b64 s[30:31] ; +; GFX1170-LABEL: v_test_fmax_legacy_uge_v2f16_nnan_nsz_flag: +; GFX1170: ; %bb.0: +; GFX1170-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1170-NEXT: v_pk_max_num_f16 v0, v0, v1 +; GFX1170-NEXT: s_setpc_b64 s[30:31] +; ; GFX12-LABEL: v_test_fmax_legacy_uge_v2f16_nnan_nsz_flag: ; GFX12: ; %bb.0: ; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 @@ -1427,6 +1807,42 @@ define <4 x half> @v_test_fmin_legacy_ule_v4f16_safe(<4 x half> %a, <4 x half> % ; GFX9-NEXT: v_perm_b32 v1, v6, v1, s4 ; GFX9-NEXT: s_setpc_b64 s[30:31] ; +; GFX1170-TRUE16-LABEL: v_test_fmin_legacy_ule_v4f16_safe: +; GFX1170-TRUE16: ; %bb.0: +; GFX1170-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1170-TRUE16-NEXT: v_cmp_ngt_f16_e32 vcc_lo, v1.h, v3.h +; GFX1170-TRUE16-NEXT: v_cmp_ngt_f16_e64 s0, v0.h, v2.h +; GFX1170-TRUE16-NEXT: v_cmp_ngt_f16_e64 s1, v0.l, v2.l +; GFX1170-TRUE16-NEXT: v_cmp_ngt_f16_e64 s2, v1.l, v3.l +; GFX1170-TRUE16-NEXT: v_cndmask_b16 v1.h, v3.h, v1.h, vcc_lo +; GFX1170-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4) +; GFX1170-TRUE16-NEXT: v_cndmask_b16 v0.h, v2.h, v0.h, s0 +; GFX1170-TRUE16-NEXT: v_cndmask_b16 v0.l, v2.l, v0.l, s1 +; GFX1170-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_4) +; GFX1170-TRUE16-NEXT: v_cndmask_b16 v1.l, v3.l, v1.l, s2 +; GFX1170-TRUE16-NEXT: s_setpc_b64 s[30:31] +; +; GFX1170-FAKE16-LABEL: v_test_fmin_legacy_ule_v4f16_safe: +; GFX1170-FAKE16: ; %bb.0: +; GFX1170-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1170-FAKE16-NEXT: v_lshrrev_b32_e32 v4, 16, v3 +; GFX1170-FAKE16-NEXT: v_lshrrev_b32_e32 v5, 16, v1 +; GFX1170-FAKE16-NEXT: v_lshrrev_b32_e32 v6, 16, v2 +; GFX1170-FAKE16-NEXT: v_lshrrev_b32_e32 v7, 16, v0 +; GFX1170-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(SKIP_1) | instid1(VALU_DEP_3) +; GFX1170-FAKE16-NEXT: v_cmp_ngt_f16_e32 vcc_lo, v5, v4 +; GFX1170-FAKE16-NEXT: v_cndmask_b32_e32 v4, v4, v5, vcc_lo +; GFX1170-FAKE16-NEXT: v_cmp_ngt_f16_e32 vcc_lo, v7, v6 +; GFX1170-FAKE16-NEXT: v_cndmask_b32_e32 v5, v6, v7, vcc_lo +; GFX1170-FAKE16-NEXT: v_cmp_ngt_f16_e32 vcc_lo, v0, v2 +; GFX1170-FAKE16-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc_lo +; GFX1170-FAKE16-NEXT: v_cmp_ngt_f16_e32 vcc_lo, v1, v3 +; GFX1170-FAKE16-NEXT: v_cndmask_b32_e32 v1, v3, v1, vcc_lo +; GFX1170-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2) +; GFX1170-FAKE16-NEXT: v_perm_b32 v0, v5, v0, 0x5040100 +; GFX1170-FAKE16-NEXT: v_perm_b32 v1, v4, v1, 0x5040100 +; GFX1170-FAKE16-NEXT: s_setpc_b64 s[30:31] +; ; GFX12-TRUE16-LABEL: v_test_fmin_legacy_ule_v4f16_safe: ; GFX12-TRUE16: ; %bb.0: ; GFX12-TRUE16-NEXT: s_wait_loadcnt_dscnt 0x0 @@ -1531,6 +1947,42 @@ define <4 x half> @v_test_fmin_legacy_ule_v4f16_nnan_flag(<4 x half> %a, <4 x ha ; GFX9-NEXT: v_perm_b32 v1, v6, v1, s4 ; GFX9-NEXT: s_setpc_b64 s[30:31] ; +; GFX1170-TRUE16-LABEL: v_test_fmin_legacy_ule_v4f16_nnan_flag: +; GFX1170-TRUE16: ; %bb.0: +; GFX1170-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1170-TRUE16-NEXT: v_cmp_ngt_f16_e32 vcc_lo, v1.h, v3.h +; GFX1170-TRUE16-NEXT: v_cmp_ngt_f16_e64 s0, v0.h, v2.h +; GFX1170-TRUE16-NEXT: v_cmp_ngt_f16_e64 s1, v0.l, v2.l +; GFX1170-TRUE16-NEXT: v_cmp_ngt_f16_e64 s2, v1.l, v3.l +; GFX1170-TRUE16-NEXT: v_cndmask_b16 v1.h, v3.h, v1.h, vcc_lo +; GFX1170-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4) +; GFX1170-TRUE16-NEXT: v_cndmask_b16 v0.h, v2.h, v0.h, s0 +; GFX1170-TRUE16-NEXT: v_cndmask_b16 v0.l, v2.l, v0.l, s1 +; GFX1170-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_4) +; GFX1170-TRUE16-NEXT: v_cndmask_b16 v1.l, v3.l, v1.l, s2 +; GFX1170-TRUE16-NEXT: s_setpc_b64 s[30:31] +; +; GFX1170-FAKE16-LABEL: v_test_fmin_legacy_ule_v4f16_nnan_flag: +; GFX1170-FAKE16: ; %bb.0: +; GFX1170-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1170-FAKE16-NEXT: v_lshrrev_b32_e32 v4, 16, v3 +; GFX1170-FAKE16-NEXT: v_lshrrev_b32_e32 v5, 16, v1 +; GFX1170-FAKE16-NEXT: v_lshrrev_b32_e32 v6, 16, v2 +; GFX1170-FAKE16-NEXT: v_lshrrev_b32_e32 v7, 16, v0 +; GFX1170-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(SKIP_1) | instid1(VALU_DEP_3) +; GFX1170-FAKE16-NEXT: v_cmp_ngt_f16_e32 vcc_lo, v5, v4 +; GFX1170-FAKE16-NEXT: v_cndmask_b32_e32 v4, v4, v5, vcc_lo +; GFX1170-FAKE16-NEXT: v_cmp_ngt_f16_e32 vcc_lo, v7, v6 +; GFX1170-FAKE16-NEXT: v_cndmask_b32_e32 v5, v6, v7, vcc_lo +; GFX1170-FAKE16-NEXT: v_cmp_ngt_f16_e32 vcc_lo, v0, v2 +; GFX1170-FAKE16-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc_lo +; GFX1170-FAKE16-NEXT: v_cmp_ngt_f16_e32 vcc_lo, v1, v3 +; GFX1170-FAKE16-NEXT: v_cndmask_b32_e32 v1, v3, v1, vcc_lo +; GFX1170-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2) +; GFX1170-FAKE16-NEXT: v_perm_b32 v0, v5, v0, 0x5040100 +; GFX1170-FAKE16-NEXT: v_perm_b32 v1, v4, v1, 0x5040100 +; GFX1170-FAKE16-NEXT: s_setpc_b64 s[30:31] +; ; GFX12-TRUE16-LABEL: v_test_fmin_legacy_ule_v4f16_nnan_flag: ; GFX12-TRUE16: ; %bb.0: ; GFX12-TRUE16-NEXT: s_wait_loadcnt_dscnt 0x0 @@ -1635,6 +2087,42 @@ define <4 x half> @v_test_fmin_legacy_ule_v4f16_nsz_flag(<4 x half> %a, <4 x hal ; GFX9-NEXT: v_perm_b32 v1, v6, v1, s4 ; GFX9-NEXT: s_setpc_b64 s[30:31] ; +; GFX1170-TRUE16-LABEL: v_test_fmin_legacy_ule_v4f16_nsz_flag: +; GFX1170-TRUE16: ; %bb.0: +; GFX1170-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1170-TRUE16-NEXT: v_cmp_ngt_f16_e32 vcc_lo, v1.h, v3.h +; GFX1170-TRUE16-NEXT: v_cmp_ngt_f16_e64 s0, v0.h, v2.h +; GFX1170-TRUE16-NEXT: v_cmp_ngt_f16_e64 s1, v0.l, v2.l +; GFX1170-TRUE16-NEXT: v_cmp_ngt_f16_e64 s2, v1.l, v3.l +; GFX1170-TRUE16-NEXT: v_cndmask_b16 v1.h, v3.h, v1.h, vcc_lo +; GFX1170-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4) +; GFX1170-TRUE16-NEXT: v_cndmask_b16 v0.h, v2.h, v0.h, s0 +; GFX1170-TRUE16-NEXT: v_cndmask_b16 v0.l, v2.l, v0.l, s1 +; GFX1170-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_4) +; GFX1170-TRUE16-NEXT: v_cndmask_b16 v1.l, v3.l, v1.l, s2 +; GFX1170-TRUE16-NEXT: s_setpc_b64 s[30:31] +; +; GFX1170-FAKE16-LABEL: v_test_fmin_legacy_ule_v4f16_nsz_flag: +; GFX1170-FAKE16: ; %bb.0: +; GFX1170-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1170-FAKE16-NEXT: v_lshrrev_b32_e32 v4, 16, v3 +; GFX1170-FAKE16-NEXT: v_lshrrev_b32_e32 v5, 16, v1 +; GFX1170-FAKE16-NEXT: v_lshrrev_b32_e32 v6, 16, v2 +; GFX1170-FAKE16-NEXT: v_lshrrev_b32_e32 v7, 16, v0 +; GFX1170-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(SKIP_1) | instid1(VALU_DEP_3) +; GFX1170-FAKE16-NEXT: v_cmp_ngt_f16_e32 vcc_lo, v5, v4 +; GFX1170-FAKE16-NEXT: v_cndmask_b32_e32 v4, v4, v5, vcc_lo +; GFX1170-FAKE16-NEXT: v_cmp_ngt_f16_e32 vcc_lo, v7, v6 +; GFX1170-FAKE16-NEXT: v_cndmask_b32_e32 v5, v6, v7, vcc_lo +; GFX1170-FAKE16-NEXT: v_cmp_ngt_f16_e32 vcc_lo, v0, v2 +; GFX1170-FAKE16-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc_lo +; GFX1170-FAKE16-NEXT: v_cmp_ngt_f16_e32 vcc_lo, v1, v3 +; GFX1170-FAKE16-NEXT: v_cndmask_b32_e32 v1, v3, v1, vcc_lo +; GFX1170-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2) +; GFX1170-FAKE16-NEXT: v_perm_b32 v0, v5, v0, 0x5040100 +; GFX1170-FAKE16-NEXT: v_perm_b32 v1, v4, v1, 0x5040100 +; GFX1170-FAKE16-NEXT: s_setpc_b64 s[30:31] +; ; GFX12-TRUE16-LABEL: v_test_fmin_legacy_ule_v4f16_nsz_flag: ; GFX12-TRUE16: ; %bb.0: ; GFX12-TRUE16-NEXT: s_wait_loadcnt_dscnt 0x0 @@ -1724,6 +2212,13 @@ define <4 x half> @v_test_fmin_legacy_ule_v4f16_nnan_nsz_flag(<4 x half> %a, <4 ; GFX9-NEXT: v_pk_min_f16 v1, v1, v3 ; GFX9-NEXT: s_setpc_b64 s[30:31] ; +; GFX1170-LABEL: v_test_fmin_legacy_ule_v4f16_nnan_nsz_flag: +; GFX1170: ; %bb.0: +; GFX1170-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1170-NEXT: v_pk_min_num_f16 v0, v0, v2 +; GFX1170-NEXT: v_pk_min_num_f16 v1, v1, v3 +; GFX1170-NEXT: s_setpc_b64 s[30:31] +; ; GFX12-LABEL: v_test_fmin_legacy_ule_v4f16_nnan_nsz_flag: ; GFX12: ; %bb.0: ; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 @@ -1791,6 +2286,42 @@ define <4 x half> @v_test_fmax_legacy_uge_v4f16_safe(<4 x half> %a, <4 x half> % ; GFX9-NEXT: v_perm_b32 v1, v6, v1, s4 ; GFX9-NEXT: s_setpc_b64 s[30:31] ; +; GFX1170-TRUE16-LABEL: v_test_fmax_legacy_uge_v4f16_safe: +; GFX1170-TRUE16: ; %bb.0: +; GFX1170-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1170-TRUE16-NEXT: v_cmp_nlt_f16_e32 vcc_lo, v1.h, v3.h +; GFX1170-TRUE16-NEXT: v_cmp_nlt_f16_e64 s0, v0.h, v2.h +; GFX1170-TRUE16-NEXT: v_cmp_nlt_f16_e64 s1, v0.l, v2.l +; GFX1170-TRUE16-NEXT: v_cmp_nlt_f16_e64 s2, v1.l, v3.l +; GFX1170-TRUE16-NEXT: v_cndmask_b16 v1.h, v3.h, v1.h, vcc_lo +; GFX1170-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4) +; GFX1170-TRUE16-NEXT: v_cndmask_b16 v0.h, v2.h, v0.h, s0 +; GFX1170-TRUE16-NEXT: v_cndmask_b16 v0.l, v2.l, v0.l, s1 +; GFX1170-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_4) +; GFX1170-TRUE16-NEXT: v_cndmask_b16 v1.l, v3.l, v1.l, s2 +; GFX1170-TRUE16-NEXT: s_setpc_b64 s[30:31] +; +; GFX1170-FAKE16-LABEL: v_test_fmax_legacy_uge_v4f16_safe: +; GFX1170-FAKE16: ; %bb.0: +; GFX1170-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1170-FAKE16-NEXT: v_lshrrev_b32_e32 v4, 16, v3 +; GFX1170-FAKE16-NEXT: v_lshrrev_b32_e32 v5, 16, v1 +; GFX1170-FAKE16-NEXT: v_lshrrev_b32_e32 v6, 16, v2 +; GFX1170-FAKE16-NEXT: v_lshrrev_b32_e32 v7, 16, v0 +; GFX1170-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(SKIP_1) | instid1(VALU_DEP_3) +; GFX1170-FAKE16-NEXT: v_cmp_nlt_f16_e32 vcc_lo, v5, v4 +; GFX1170-FAKE16-NEXT: v_cndmask_b32_e32 v4, v4, v5, vcc_lo +; GFX1170-FAKE16-NEXT: v_cmp_nlt_f16_e32 vcc_lo, v7, v6 +; GFX1170-FAKE16-NEXT: v_cndmask_b32_e32 v5, v6, v7, vcc_lo +; GFX1170-FAKE16-NEXT: v_cmp_nlt_f16_e32 vcc_lo, v0, v2 +; GFX1170-FAKE16-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc_lo +; GFX1170-FAKE16-NEXT: v_cmp_nlt_f16_e32 vcc_lo, v1, v3 +; GFX1170-FAKE16-NEXT: v_cndmask_b32_e32 v1, v3, v1, vcc_lo +; GFX1170-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2) +; GFX1170-FAKE16-NEXT: v_perm_b32 v0, v5, v0, 0x5040100 +; GFX1170-FAKE16-NEXT: v_perm_b32 v1, v4, v1, 0x5040100 +; GFX1170-FAKE16-NEXT: s_setpc_b64 s[30:31] +; ; GFX12-TRUE16-LABEL: v_test_fmax_legacy_uge_v4f16_safe: ; GFX12-TRUE16: ; %bb.0: ; GFX12-TRUE16-NEXT: s_wait_loadcnt_dscnt 0x0 @@ -1895,6 +2426,42 @@ define <4 x half> @v_test_fmax_legacy_uge_v4f16_nnan_flag(<4 x half> %a, <4 x ha ; GFX9-NEXT: v_perm_b32 v1, v6, v1, s4 ; GFX9-NEXT: s_setpc_b64 s[30:31] ; +; GFX1170-TRUE16-LABEL: v_test_fmax_legacy_uge_v4f16_nnan_flag: +; GFX1170-TRUE16: ; %bb.0: +; GFX1170-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1170-TRUE16-NEXT: v_cmp_nlt_f16_e32 vcc_lo, v1.h, v3.h +; GFX1170-TRUE16-NEXT: v_cmp_nlt_f16_e64 s0, v0.h, v2.h +; GFX1170-TRUE16-NEXT: v_cmp_nlt_f16_e64 s1, v0.l, v2.l +; GFX1170-TRUE16-NEXT: v_cmp_nlt_f16_e64 s2, v1.l, v3.l +; GFX1170-TRUE16-NEXT: v_cndmask_b16 v1.h, v3.h, v1.h, vcc_lo +; GFX1170-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4) +; GFX1170-TRUE16-NEXT: v_cndmask_b16 v0.h, v2.h, v0.h, s0 +; GFX1170-TRUE16-NEXT: v_cndmask_b16 v0.l, v2.l, v0.l, s1 +; GFX1170-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_4) +; GFX1170-TRUE16-NEXT: v_cndmask_b16 v1.l, v3.l, v1.l, s2 +; GFX1170-TRUE16-NEXT: s_setpc_b64 s[30:31] +; +; GFX1170-FAKE16-LABEL: v_test_fmax_legacy_uge_v4f16_nnan_flag: +; GFX1170-FAKE16: ; %bb.0: +; GFX1170-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1170-FAKE16-NEXT: v_lshrrev_b32_e32 v4, 16, v3 +; GFX1170-FAKE16-NEXT: v_lshrrev_b32_e32 v5, 16, v1 +; GFX1170-FAKE16-NEXT: v_lshrrev_b32_e32 v6, 16, v2 +; GFX1170-FAKE16-NEXT: v_lshrrev_b32_e32 v7, 16, v0 +; GFX1170-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(SKIP_1) | instid1(VALU_DEP_3) +; GFX1170-FAKE16-NEXT: v_cmp_nlt_f16_e32 vcc_lo, v5, v4 +; GFX1170-FAKE16-NEXT: v_cndmask_b32_e32 v4, v4, v5, vcc_lo +; GFX1170-FAKE16-NEXT: v_cmp_nlt_f16_e32 vcc_lo, v7, v6 +; GFX1170-FAKE16-NEXT: v_cndmask_b32_e32 v5, v6, v7, vcc_lo +; GFX1170-FAKE16-NEXT: v_cmp_nlt_f16_e32 vcc_lo, v0, v2 +; GFX1170-FAKE16-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc_lo +; GFX1170-FAKE16-NEXT: v_cmp_nlt_f16_e32 vcc_lo, v1, v3 +; GFX1170-FAKE16-NEXT: v_cndmask_b32_e32 v1, v3, v1, vcc_lo +; GFX1170-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2) +; GFX1170-FAKE16-NEXT: v_perm_b32 v0, v5, v0, 0x5040100 +; GFX1170-FAKE16-NEXT: v_perm_b32 v1, v4, v1, 0x5040100 +; GFX1170-FAKE16-NEXT: s_setpc_b64 s[30:31] +; ; GFX12-TRUE16-LABEL: v_test_fmax_legacy_uge_v4f16_nnan_flag: ; GFX12-TRUE16: ; %bb.0: ; GFX12-TRUE16-NEXT: s_wait_loadcnt_dscnt 0x0 @@ -1999,6 +2566,42 @@ define <4 x half> @v_test_fmax_legacy_uge_v4f16_nsz_flag(<4 x half> %a, <4 x hal ; GFX9-NEXT: v_perm_b32 v1, v6, v1, s4 ; GFX9-NEXT: s_setpc_b64 s[30:31] ; +; GFX1170-TRUE16-LABEL: v_test_fmax_legacy_uge_v4f16_nsz_flag: +; GFX1170-TRUE16: ; %bb.0: +; GFX1170-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1170-TRUE16-NEXT: v_cmp_nlt_f16_e32 vcc_lo, v1.h, v3.h +; GFX1170-TRUE16-NEXT: v_cmp_nlt_f16_e64 s0, v0.h, v2.h +; GFX1170-TRUE16-NEXT: v_cmp_nlt_f16_e64 s1, v0.l, v2.l +; GFX1170-TRUE16-NEXT: v_cmp_nlt_f16_e64 s2, v1.l, v3.l +; GFX1170-TRUE16-NEXT: v_cndmask_b16 v1.h, v3.h, v1.h, vcc_lo +; GFX1170-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4) +; GFX1170-TRUE16-NEXT: v_cndmask_b16 v0.h, v2.h, v0.h, s0 +; GFX1170-TRUE16-NEXT: v_cndmask_b16 v0.l, v2.l, v0.l, s1 +; GFX1170-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_4) +; GFX1170-TRUE16-NEXT: v_cndmask_b16 v1.l, v3.l, v1.l, s2 +; GFX1170-TRUE16-NEXT: s_setpc_b64 s[30:31] +; +; GFX1170-FAKE16-LABEL: v_test_fmax_legacy_uge_v4f16_nsz_flag: +; GFX1170-FAKE16: ; %bb.0: +; GFX1170-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1170-FAKE16-NEXT: v_lshrrev_b32_e32 v4, 16, v3 +; GFX1170-FAKE16-NEXT: v_lshrrev_b32_e32 v5, 16, v1 +; GFX1170-FAKE16-NEXT: v_lshrrev_b32_e32 v6, 16, v2 +; GFX1170-FAKE16-NEXT: v_lshrrev_b32_e32 v7, 16, v0 +; GFX1170-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(SKIP_1) | instid1(VALU_DEP_3) +; GFX1170-FAKE16-NEXT: v_cmp_nlt_f16_e32 vcc_lo, v5, v4 +; GFX1170-FAKE16-NEXT: v_cndmask_b32_e32 v4, v4, v5, vcc_lo +; GFX1170-FAKE16-NEXT: v_cmp_nlt_f16_e32 vcc_lo, v7, v6 +; GFX1170-FAKE16-NEXT: v_cndmask_b32_e32 v5, v6, v7, vcc_lo +; GFX1170-FAKE16-NEXT: v_cmp_nlt_f16_e32 vcc_lo, v0, v2 +; GFX1170-FAKE16-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc_lo +; GFX1170-FAKE16-NEXT: v_cmp_nlt_f16_e32 vcc_lo, v1, v3 +; GFX1170-FAKE16-NEXT: v_cndmask_b32_e32 v1, v3, v1, vcc_lo +; GFX1170-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2) +; GFX1170-FAKE16-NEXT: v_perm_b32 v0, v5, v0, 0x5040100 +; GFX1170-FAKE16-NEXT: v_perm_b32 v1, v4, v1, 0x5040100 +; GFX1170-FAKE16-NEXT: s_setpc_b64 s[30:31] +; ; GFX12-TRUE16-LABEL: v_test_fmax_legacy_uge_v4f16_nsz_flag: ; GFX12-TRUE16: ; %bb.0: ; GFX12-TRUE16-NEXT: s_wait_loadcnt_dscnt 0x0 @@ -2088,6 +2691,13 @@ define <4 x half> @v_test_fmax_legacy_uge_v4f16_nnan_nsz_flag(<4 x half> %a, <4 ; GFX9-NEXT: v_pk_max_f16 v1, v1, v3 ; GFX9-NEXT: s_setpc_b64 s[30:31] ; +; GFX1170-LABEL: v_test_fmax_legacy_uge_v4f16_nnan_nsz_flag: +; GFX1170: ; %bb.0: +; GFX1170-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1170-NEXT: v_pk_max_num_f16 v0, v0, v2 +; GFX1170-NEXT: v_pk_max_num_f16 v1, v1, v3 +; GFX1170-NEXT: s_setpc_b64 s[30:31] +; ; GFX12-LABEL: v_test_fmax_legacy_uge_v4f16_nnan_nsz_flag: ; GFX12: ; %bb.0: ; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 @@ -2120,6 +2730,14 @@ define float @v_test_fmin_legacy_uge_f32_nsz_flag__nnan_srcs(float %arg0, float ; GFX9-NEXT: v_min_f32_e32 v0, v0, v1 ; GFX9-NEXT: s_setpc_b64 s[30:31] ; +; GFX1170-LABEL: v_test_fmin_legacy_uge_f32_nsz_flag__nnan_srcs: +; GFX1170: ; %bb.0: +; GFX1170-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1170-NEXT: v_dual_add_f32 v0, v0, v0 :: v_dual_add_f32 v1, v1, v1 +; GFX1170-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX1170-NEXT: v_min_num_f32_e32 v0, v0, v1 +; GFX1170-NEXT: s_setpc_b64 s[30:31] +; ; GFX12-LABEL: v_test_fmin_legacy_uge_f32_nsz_flag__nnan_srcs: ; GFX12: ; %bb.0: ; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 @@ -2155,6 +2773,14 @@ define float @v_test_fmax_legacy_uge_f32_nsz_flag__nnan_srcs(float %arg0, float ; GFX9-NEXT: v_max_f32_e32 v0, v0, v1 ; GFX9-NEXT: s_setpc_b64 s[30:31] ; +; GFX1170-LABEL: v_test_fmax_legacy_uge_f32_nsz_flag__nnan_srcs: +; GFX1170: ; %bb.0: +; GFX1170-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1170-NEXT: v_dual_add_f32 v0, v0, v0 :: v_dual_add_f32 v1, v1, v1 +; GFX1170-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX1170-NEXT: v_max_num_f32_e32 v0, v0, v1 +; GFX1170-NEXT: s_setpc_b64 s[30:31] +; ; GFX12-LABEL: v_test_fmax_legacy_uge_f32_nsz_flag__nnan_srcs: ; GFX12: ; %bb.0: ; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 diff --git a/llvm/test/CodeGen/AMDGPU/vector-reduce-fmax.ll b/llvm/test/CodeGen/AMDGPU/vector-reduce-fmax.ll index f1d1ddf49bbf..6b2be6b05b12 100644 --- a/llvm/test/CodeGen/AMDGPU/vector-reduce-fmax.ll +++ b/llvm/test/CodeGen/AMDGPU/vector-reduce-fmax.ll @@ -11,6 +11,10 @@ ; RUN: llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx1100 -mattr=-real-true16 < %s | FileCheck -check-prefixes=GFX11,GFX11-SDAG,GFX11-SDAG-FAKE16 %s ; RUN: llc -global-isel=1 -new-reg-bank-select -mtriple=amdgcn -mcpu=gfx1100 -mattr=+real-true16 < %s | FileCheck -check-prefixes=GFX11,GFX11-GISEL,GFX11-GISEL-TRUE16 %s ; RUN: llc -global-isel=1 -new-reg-bank-select -mtriple=amdgcn -mcpu=gfx1100 -mattr=-real-true16 < %s | FileCheck -check-prefixes=GFX11,GFX11-GISEL,GFX11-GISEL-FAKE16 %s +; RUN: llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx1170 -mattr=+real-true16 < %s | FileCheck -check-prefixes=GFX1170,GFX1170-SDAG,GFX1170-SDAG-TRUE16 %s +; RUN: llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx1170 -mattr=-real-true16 < %s | FileCheck -check-prefixes=GFX1170,GFX1170-SDAG,GFX1170-SDAG-FAKE16 %s +; RUN: llc -global-isel=1 -new-reg-bank-select -mtriple=amdgcn -mcpu=gfx1170 -mattr=+real-true16 < %s | FileCheck -check-prefixes=GFX1170,GFX1170-GISEL,GFX1170-GISEL-TRUE16 %s +; RUN: llc -global-isel=1 -new-reg-bank-select -mtriple=amdgcn -mcpu=gfx1170 -mattr=-real-true16 < %s | FileCheck -check-prefixes=GFX1170,GFX1170-GISEL,GFX1170-GISEL-FAKE16 %s ; RUN: llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx1200 -mattr=+real-true16 < %s | FileCheck -check-prefixes=GFX12,GFX12-SDAG,GFX12-SDAG-TRUE16 %s ; RUN: llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx1200 -mattr=-real-true16 < %s | FileCheck -check-prefixes=GFX12,GFX12-SDAG,GFX12-SDAG-FAKE16 %s ; RUN: llc -global-isel=1 -new-reg-bank-select -mtriple=amdgcn -mcpu=gfx1200 -mattr=+real-true16 < %s | FileCheck -check-prefixes=GFX12,GFX12-GISEL,GFX12-GISEL-TRUE16 %s @@ -123,6 +127,44 @@ define half @test_vector_reduce_fmax_v2half(<2 x half> %v) { ; GFX11-GISEL-FAKE16-NEXT: v_max_f16_e32 v0, v0, v1 ; GFX11-GISEL-FAKE16-NEXT: s_setpc_b64 s[30:31] ; +; GFX1170-SDAG-TRUE16-LABEL: test_vector_reduce_fmax_v2half: +; GFX1170-SDAG-TRUE16: ; %bb.0: ; %entry +; GFX1170-SDAG-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1170-SDAG-TRUE16-NEXT: v_max_num_f16_e32 v0.h, v0.h, v0.h +; GFX1170-SDAG-TRUE16-NEXT: v_max_num_f16_e32 v0.l, v0.l, v0.l +; GFX1170-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX1170-SDAG-TRUE16-NEXT: v_max_num_f16_e32 v0.l, v0.l, v0.h +; GFX1170-SDAG-TRUE16-NEXT: s_setpc_b64 s[30:31] +; +; GFX1170-SDAG-FAKE16-LABEL: test_vector_reduce_fmax_v2half: +; GFX1170-SDAG-FAKE16: ; %bb.0: ; %entry +; GFX1170-SDAG-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1170-SDAG-FAKE16-NEXT: v_lshrrev_b32_e32 v1, 16, v0 +; GFX1170-SDAG-FAKE16-NEXT: v_max_num_f16_e32 v0, v0, v0 +; GFX1170-SDAG-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX1170-SDAG-FAKE16-NEXT: v_max_num_f16_e32 v1, v1, v1 +; GFX1170-SDAG-FAKE16-NEXT: v_max_num_f16_e32 v0, v0, v1 +; GFX1170-SDAG-FAKE16-NEXT: s_setpc_b64 s[30:31] +; +; GFX1170-GISEL-TRUE16-LABEL: test_vector_reduce_fmax_v2half: +; GFX1170-GISEL-TRUE16: ; %bb.0: ; %entry +; GFX1170-GISEL-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1170-GISEL-TRUE16-NEXT: v_max_num_f16_e32 v0.l, v0.l, v0.l +; GFX1170-GISEL-TRUE16-NEXT: v_max_num_f16_e32 v0.h, v0.h, v0.h +; GFX1170-GISEL-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX1170-GISEL-TRUE16-NEXT: v_max_num_f16_e32 v0.l, v0.l, v0.h +; GFX1170-GISEL-TRUE16-NEXT: s_setpc_b64 s[30:31] +; +; GFX1170-GISEL-FAKE16-LABEL: test_vector_reduce_fmax_v2half: +; GFX1170-GISEL-FAKE16: ; %bb.0: ; %entry +; GFX1170-GISEL-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1170-GISEL-FAKE16-NEXT: v_lshrrev_b32_e32 v1, 16, v0 +; GFX1170-GISEL-FAKE16-NEXT: v_max_num_f16_e32 v0, v0, v0 +; GFX1170-GISEL-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX1170-GISEL-FAKE16-NEXT: v_max_num_f16_e32 v1, v1, v1 +; GFX1170-GISEL-FAKE16-NEXT: v_max_num_f16_e32 v0, v0, v1 +; GFX1170-GISEL-FAKE16-NEXT: s_setpc_b64 s[30:31] +; ; GFX12-SDAG-TRUE16-LABEL: test_vector_reduce_fmax_v2half: ; GFX12-SDAG-TRUE16: ; %bb.0: ; %entry ; GFX12-SDAG-TRUE16-NEXT: s_wait_loadcnt_dscnt 0x0 @@ -307,6 +349,54 @@ define half @test_vector_reduce_fmax_v3half(<3 x half> %v) { ; GFX11-GISEL-FAKE16-NEXT: v_max_f16_e32 v0, v0, v1 ; GFX11-GISEL-FAKE16-NEXT: s_setpc_b64 s[30:31] ; +; GFX1170-SDAG-TRUE16-LABEL: test_vector_reduce_fmax_v3half: +; GFX1170-SDAG-TRUE16: ; %bb.0: ; %entry +; GFX1170-SDAG-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1170-SDAG-TRUE16-NEXT: v_mov_b16_e32 v1.h, 0x7e00 +; GFX1170-SDAG-TRUE16-NEXT: v_max_num_f16_e32 v1.l, v1.l, v1.l +; GFX1170-SDAG-TRUE16-NEXT: v_pk_max_num_f16 v0, v0, v0 +; GFX1170-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX1170-SDAG-TRUE16-NEXT: v_pk_max_num_f16 v0, v0, v1 +; GFX1170-SDAG-TRUE16-NEXT: v_max_num_f16_e32 v0.l, v0.l, v0.h +; GFX1170-SDAG-TRUE16-NEXT: s_setpc_b64 s[30:31] +; +; GFX1170-SDAG-FAKE16-LABEL: test_vector_reduce_fmax_v3half: +; GFX1170-SDAG-FAKE16: ; %bb.0: ; %entry +; GFX1170-SDAG-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1170-SDAG-FAKE16-NEXT: v_max_num_f16_e32 v1, v1, v1 +; GFX1170-SDAG-FAKE16-NEXT: v_pk_max_num_f16 v0, v0, v0 +; GFX1170-SDAG-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX1170-SDAG-FAKE16-NEXT: v_pack_b32_f16 v1, v1, 0x7e00 +; GFX1170-SDAG-FAKE16-NEXT: v_pk_max_num_f16 v0, v0, v1 +; GFX1170-SDAG-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX1170-SDAG-FAKE16-NEXT: v_lshrrev_b32_e32 v1, 16, v0 +; GFX1170-SDAG-FAKE16-NEXT: v_max_num_f16_e32 v0, v0, v1 +; GFX1170-SDAG-FAKE16-NEXT: s_setpc_b64 s[30:31] +; +; GFX1170-GISEL-TRUE16-LABEL: test_vector_reduce_fmax_v3half: +; GFX1170-GISEL-TRUE16: ; %bb.0: ; %entry +; GFX1170-GISEL-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1170-GISEL-TRUE16-NEXT: v_max_num_f16_e32 v0.l, v0.l, v0.l +; GFX1170-GISEL-TRUE16-NEXT: v_max_num_f16_e32 v0.h, v0.h, v0.h +; GFX1170-GISEL-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_1) +; GFX1170-GISEL-TRUE16-NEXT: v_max_num_f16_e32 v0.l, v0.l, v0.h +; GFX1170-GISEL-TRUE16-NEXT: v_max_num_f16_e32 v0.h, v1.l, v1.l +; GFX1170-GISEL-TRUE16-NEXT: v_max_num_f16_e32 v0.l, v0.l, v0.h +; GFX1170-GISEL-TRUE16-NEXT: s_setpc_b64 s[30:31] +; +; GFX1170-GISEL-FAKE16-LABEL: test_vector_reduce_fmax_v3half: +; GFX1170-GISEL-FAKE16: ; %bb.0: ; %entry +; GFX1170-GISEL-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1170-GISEL-FAKE16-NEXT: v_lshrrev_b32_e32 v2, 16, v0 +; GFX1170-GISEL-FAKE16-NEXT: v_max_num_f16_e32 v0, v0, v0 +; GFX1170-GISEL-FAKE16-NEXT: v_max_num_f16_e32 v1, v1, v1 +; GFX1170-GISEL-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX1170-GISEL-FAKE16-NEXT: v_max_num_f16_e32 v2, v2, v2 +; GFX1170-GISEL-FAKE16-NEXT: v_max_num_f16_e32 v0, v0, v2 +; GFX1170-GISEL-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX1170-GISEL-FAKE16-NEXT: v_max_num_f16_e32 v0, v0, v1 +; GFX1170-GISEL-FAKE16-NEXT: s_setpc_b64 s[30:31] +; ; GFX12-SDAG-TRUE16-LABEL: test_vector_reduce_fmax_v3half: ; GFX12-SDAG-TRUE16: ; %bb.0: ; %entry ; GFX12-SDAG-TRUE16-NEXT: s_wait_loadcnt_dscnt 0x0 @@ -535,6 +625,59 @@ define half @test_vector_reduce_fmax_v4half(<4 x half> %v) { ; GFX11-GISEL-FAKE16-NEXT: v_max_f16_e32 v0, v0, v1 ; GFX11-GISEL-FAKE16-NEXT: s_setpc_b64 s[30:31] ; +; GFX1170-SDAG-TRUE16-LABEL: test_vector_reduce_fmax_v4half: +; GFX1170-SDAG-TRUE16: ; %bb.0: ; %entry +; GFX1170-SDAG-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1170-SDAG-TRUE16-NEXT: v_pk_max_num_f16 v1, v1, v1 +; GFX1170-SDAG-TRUE16-NEXT: v_pk_max_num_f16 v0, v0, v0 +; GFX1170-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX1170-SDAG-TRUE16-NEXT: v_pk_max_num_f16 v0, v0, v1 +; GFX1170-SDAG-TRUE16-NEXT: v_max_num_f16_e32 v0.l, v0.l, v0.h +; GFX1170-SDAG-TRUE16-NEXT: s_setpc_b64 s[30:31] +; +; GFX1170-SDAG-FAKE16-LABEL: test_vector_reduce_fmax_v4half: +; GFX1170-SDAG-FAKE16: ; %bb.0: ; %entry +; GFX1170-SDAG-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1170-SDAG-FAKE16-NEXT: v_pk_max_num_f16 v1, v1, v1 +; GFX1170-SDAG-FAKE16-NEXT: v_pk_max_num_f16 v0, v0, v0 +; GFX1170-SDAG-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX1170-SDAG-FAKE16-NEXT: v_pk_max_num_f16 v0, v0, v1 +; GFX1170-SDAG-FAKE16-NEXT: v_lshrrev_b32_e32 v1, 16, v0 +; GFX1170-SDAG-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX1170-SDAG-FAKE16-NEXT: v_max_num_f16_e32 v0, v0, v1 +; GFX1170-SDAG-FAKE16-NEXT: s_setpc_b64 s[30:31] +; +; GFX1170-GISEL-TRUE16-LABEL: test_vector_reduce_fmax_v4half: +; GFX1170-GISEL-TRUE16: ; %bb.0: ; %entry +; GFX1170-GISEL-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1170-GISEL-TRUE16-NEXT: v_max_num_f16_e32 v0.l, v0.l, v0.l +; GFX1170-GISEL-TRUE16-NEXT: v_max_num_f16_e32 v0.h, v0.h, v0.h +; GFX1170-GISEL-TRUE16-NEXT: v_max_num_f16_e32 v1.l, v1.l, v1.l +; GFX1170-GISEL-TRUE16-NEXT: v_max_num_f16_e32 v1.h, v1.h, v1.h +; GFX1170-GISEL-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2) +; GFX1170-GISEL-TRUE16-NEXT: v_max_num_f16_e32 v0.l, v0.l, v0.h +; GFX1170-GISEL-TRUE16-NEXT: v_max_num_f16_e32 v0.h, v1.l, v1.h +; GFX1170-GISEL-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX1170-GISEL-TRUE16-NEXT: v_max_num_f16_e32 v0.l, v0.l, v0.h +; GFX1170-GISEL-TRUE16-NEXT: s_setpc_b64 s[30:31] +; +; GFX1170-GISEL-FAKE16-LABEL: test_vector_reduce_fmax_v4half: +; GFX1170-GISEL-FAKE16: ; %bb.0: ; %entry +; GFX1170-GISEL-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1170-GISEL-FAKE16-NEXT: v_lshrrev_b32_e32 v2, 16, v0 +; GFX1170-GISEL-FAKE16-NEXT: v_lshrrev_b32_e32 v3, 16, v1 +; GFX1170-GISEL-FAKE16-NEXT: v_max_num_f16_e32 v0, v0, v0 +; GFX1170-GISEL-FAKE16-NEXT: v_max_num_f16_e32 v1, v1, v1 +; GFX1170-GISEL-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4) +; GFX1170-GISEL-FAKE16-NEXT: v_max_num_f16_e32 v2, v2, v2 +; GFX1170-GISEL-FAKE16-NEXT: v_max_num_f16_e32 v3, v3, v3 +; GFX1170-GISEL-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) +; GFX1170-GISEL-FAKE16-NEXT: v_max_num_f16_e32 v0, v0, v2 +; GFX1170-GISEL-FAKE16-NEXT: v_max_num_f16_e32 v1, v1, v3 +; GFX1170-GISEL-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX1170-GISEL-FAKE16-NEXT: v_max_num_f16_e32 v0, v0, v1 +; GFX1170-GISEL-FAKE16-NEXT: s_setpc_b64 s[30:31] +; ; GFX12-SDAG-TRUE16-LABEL: test_vector_reduce_fmax_v4half: ; GFX12-SDAG-TRUE16: ; %bb.0: ; %entry ; GFX12-SDAG-TRUE16-NEXT: s_wait_loadcnt_dscnt 0x0 @@ -871,6 +1014,86 @@ define half @test_vector_reduce_fmax_v8half(<8 x half> %v) { ; GFX11-GISEL-FAKE16-NEXT: v_max_f16_e32 v0, v0, v1 ; GFX11-GISEL-FAKE16-NEXT: s_setpc_b64 s[30:31] ; +; GFX1170-SDAG-TRUE16-LABEL: test_vector_reduce_fmax_v8half: +; GFX1170-SDAG-TRUE16: ; %bb.0: ; %entry +; GFX1170-SDAG-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1170-SDAG-TRUE16-NEXT: v_pk_max_num_f16 v3, v3, v3 +; GFX1170-SDAG-TRUE16-NEXT: v_pk_max_num_f16 v1, v1, v1 +; GFX1170-SDAG-TRUE16-NEXT: v_pk_max_num_f16 v2, v2, v2 +; GFX1170-SDAG-TRUE16-NEXT: v_pk_max_num_f16 v0, v0, v0 +; GFX1170-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2) +; GFX1170-SDAG-TRUE16-NEXT: v_pk_max_num_f16 v1, v1, v3 +; GFX1170-SDAG-TRUE16-NEXT: v_pk_max_num_f16 v0, v0, v2 +; GFX1170-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX1170-SDAG-TRUE16-NEXT: v_pk_max_num_f16 v0, v0, v1 +; GFX1170-SDAG-TRUE16-NEXT: v_max_num_f16_e32 v0.l, v0.l, v0.h +; GFX1170-SDAG-TRUE16-NEXT: s_setpc_b64 s[30:31] +; +; GFX1170-SDAG-FAKE16-LABEL: test_vector_reduce_fmax_v8half: +; GFX1170-SDAG-FAKE16: ; %bb.0: ; %entry +; GFX1170-SDAG-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1170-SDAG-FAKE16-NEXT: v_pk_max_num_f16 v3, v3, v3 +; GFX1170-SDAG-FAKE16-NEXT: v_pk_max_num_f16 v1, v1, v1 +; GFX1170-SDAG-FAKE16-NEXT: v_pk_max_num_f16 v2, v2, v2 +; GFX1170-SDAG-FAKE16-NEXT: v_pk_max_num_f16 v0, v0, v0 +; GFX1170-SDAG-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2) +; GFX1170-SDAG-FAKE16-NEXT: v_pk_max_num_f16 v1, v1, v3 +; GFX1170-SDAG-FAKE16-NEXT: v_pk_max_num_f16 v0, v0, v2 +; GFX1170-SDAG-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX1170-SDAG-FAKE16-NEXT: v_pk_max_num_f16 v0, v0, v1 +; GFX1170-SDAG-FAKE16-NEXT: v_lshrrev_b32_e32 v1, 16, v0 +; GFX1170-SDAG-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX1170-SDAG-FAKE16-NEXT: v_max_num_f16_e32 v0, v0, v1 +; GFX1170-SDAG-FAKE16-NEXT: s_setpc_b64 s[30:31] +; +; GFX1170-GISEL-TRUE16-LABEL: test_vector_reduce_fmax_v8half: +; GFX1170-GISEL-TRUE16: ; %bb.0: ; %entry +; GFX1170-GISEL-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1170-GISEL-TRUE16-NEXT: v_max_num_f16_e32 v0.l, v0.l, v0.l +; GFX1170-GISEL-TRUE16-NEXT: v_max_num_f16_e32 v0.h, v0.h, v0.h +; GFX1170-GISEL-TRUE16-NEXT: v_max_num_f16_e32 v1.l, v1.l, v1.l +; GFX1170-GISEL-TRUE16-NEXT: v_max_num_f16_e32 v1.h, v1.h, v1.h +; GFX1170-GISEL-TRUE16-NEXT: v_max_num_f16_e32 v2.l, v2.l, v2.l +; GFX1170-GISEL-TRUE16-NEXT: v_max_num_f16_e32 v2.h, v2.h, v2.h +; GFX1170-GISEL-TRUE16-NEXT: v_max_num_f16_e32 v3.l, v3.l, v3.l +; GFX1170-GISEL-TRUE16-NEXT: v_max_num_f16_e32 v3.h, v3.h, v3.h +; GFX1170-GISEL-TRUE16-NEXT: v_max_num_f16_e32 v0.l, v0.l, v0.h +; GFX1170-GISEL-TRUE16-NEXT: v_max_num_f16_e32 v0.h, v1.l, v1.h +; GFX1170-GISEL-TRUE16-NEXT: v_max_num_f16_e32 v1.l, v2.l, v2.h +; GFX1170-GISEL-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_3) +; GFX1170-GISEL-TRUE16-NEXT: v_max_num_f16_e32 v1.h, v3.l, v3.h +; GFX1170-GISEL-TRUE16-NEXT: v_max_num_f16_e32 v0.l, v0.l, v0.h +; GFX1170-GISEL-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX1170-GISEL-TRUE16-NEXT: v_max_num_f16_e32 v0.h, v1.l, v1.h +; GFX1170-GISEL-TRUE16-NEXT: v_max_num_f16_e32 v0.l, v0.l, v0.h +; GFX1170-GISEL-TRUE16-NEXT: s_setpc_b64 s[30:31] +; +; GFX1170-GISEL-FAKE16-LABEL: test_vector_reduce_fmax_v8half: +; GFX1170-GISEL-FAKE16: ; %bb.0: ; %entry +; GFX1170-GISEL-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1170-GISEL-FAKE16-NEXT: v_lshrrev_b32_e32 v4, 16, v0 +; GFX1170-GISEL-FAKE16-NEXT: v_lshrrev_b32_e32 v5, 16, v1 +; GFX1170-GISEL-FAKE16-NEXT: v_lshrrev_b32_e32 v6, 16, v2 +; GFX1170-GISEL-FAKE16-NEXT: v_lshrrev_b32_e32 v7, 16, v3 +; GFX1170-GISEL-FAKE16-NEXT: v_max_num_f16_e32 v0, v0, v0 +; GFX1170-GISEL-FAKE16-NEXT: v_max_num_f16_e32 v1, v1, v1 +; GFX1170-GISEL-FAKE16-NEXT: v_max_num_f16_e32 v4, v4, v4 +; GFX1170-GISEL-FAKE16-NEXT: v_max_num_f16_e32 v5, v5, v5 +; GFX1170-GISEL-FAKE16-NEXT: v_max_num_f16_e32 v2, v2, v2 +; GFX1170-GISEL-FAKE16-NEXT: v_max_num_f16_e32 v6, v6, v6 +; GFX1170-GISEL-FAKE16-NEXT: v_max_num_f16_e32 v3, v3, v3 +; GFX1170-GISEL-FAKE16-NEXT: v_max_num_f16_e32 v7, v7, v7 +; GFX1170-GISEL-FAKE16-NEXT: v_max_num_f16_e32 v0, v0, v4 +; GFX1170-GISEL-FAKE16-NEXT: v_max_num_f16_e32 v1, v1, v5 +; GFX1170-GISEL-FAKE16-NEXT: v_max_num_f16_e32 v2, v2, v6 +; GFX1170-GISEL-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_3) +; GFX1170-GISEL-FAKE16-NEXT: v_max_num_f16_e32 v3, v3, v7 +; GFX1170-GISEL-FAKE16-NEXT: v_max_num_f16_e32 v0, v0, v1 +; GFX1170-GISEL-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX1170-GISEL-FAKE16-NEXT: v_max_num_f16_e32 v1, v2, v3 +; GFX1170-GISEL-FAKE16-NEXT: v_max_num_f16_e32 v0, v0, v1 +; GFX1170-GISEL-FAKE16-NEXT: s_setpc_b64 s[30:31] +; ; GFX12-SDAG-TRUE16-LABEL: test_vector_reduce_fmax_v8half: ; GFX12-SDAG-TRUE16: ; %bb.0: ; %entry ; GFX12-SDAG-TRUE16-NEXT: s_wait_loadcnt_dscnt 0x0 @@ -1438,6 +1661,138 @@ define half @test_vector_reduce_fmax_v16half(<16 x half> %v) { ; GFX11-GISEL-FAKE16-NEXT: v_max_f16_e32 v0, v0, v1 ; GFX11-GISEL-FAKE16-NEXT: s_setpc_b64 s[30:31] ; +; GFX1170-SDAG-TRUE16-LABEL: test_vector_reduce_fmax_v16half: +; GFX1170-SDAG-TRUE16: ; %bb.0: ; %entry +; GFX1170-SDAG-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1170-SDAG-TRUE16-NEXT: v_max_num_f16_e32 v0.h, v0.h, v0.h +; GFX1170-SDAG-TRUE16-NEXT: v_max_num_f16_e32 v0.l, v0.l, v0.l +; GFX1170-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX1170-SDAG-TRUE16-NEXT: v_max_num_f16_e32 v0.l, v0.l, v0.h +; GFX1170-SDAG-TRUE16-NEXT: v_max3_num_f16 v0.l, v0.l, v1.l, v1.h +; GFX1170-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX1170-SDAG-TRUE16-NEXT: v_max3_num_f16 v0.l, v0.l, v2.l, v2.h +; GFX1170-SDAG-TRUE16-NEXT: v_max3_num_f16 v0.l, v0.l, v3.l, v3.h +; GFX1170-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX1170-SDAG-TRUE16-NEXT: v_max3_num_f16 v0.l, v0.l, v4.l, v4.h +; GFX1170-SDAG-TRUE16-NEXT: v_max3_num_f16 v0.l, v0.l, v5.l, v5.h +; GFX1170-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX1170-SDAG-TRUE16-NEXT: v_max3_num_f16 v0.l, v0.l, v6.l, v6.h +; GFX1170-SDAG-TRUE16-NEXT: v_max3_num_f16 v0.l, v0.l, v7.l, v7.h +; GFX1170-SDAG-TRUE16-NEXT: s_setpc_b64 s[30:31] +; +; GFX1170-SDAG-FAKE16-LABEL: test_vector_reduce_fmax_v16half: +; GFX1170-SDAG-FAKE16: ; %bb.0: ; %entry +; GFX1170-SDAG-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1170-SDAG-FAKE16-NEXT: v_lshrrev_b32_e32 v8, 16, v0 +; GFX1170-SDAG-FAKE16-NEXT: v_max_num_f16_e32 v0, v0, v0 +; GFX1170-SDAG-FAKE16-NEXT: v_lshrrev_b32_e32 v9, 16, v1 +; GFX1170-SDAG-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX1170-SDAG-FAKE16-NEXT: v_max_num_f16_e32 v8, v8, v8 +; GFX1170-SDAG-FAKE16-NEXT: v_max_num_f16_e32 v0, v0, v8 +; GFX1170-SDAG-FAKE16-NEXT: v_lshrrev_b32_e32 v8, 16, v2 +; GFX1170-SDAG-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_2) +; GFX1170-SDAG-FAKE16-NEXT: v_max3_num_f16 v0, v0, v1, v9 +; GFX1170-SDAG-FAKE16-NEXT: v_lshrrev_b32_e32 v1, 16, v3 +; GFX1170-SDAG-FAKE16-NEXT: v_max3_num_f16 v0, v0, v2, v8 +; GFX1170-SDAG-FAKE16-NEXT: v_lshrrev_b32_e32 v2, 16, v4 +; GFX1170-SDAG-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_2) +; GFX1170-SDAG-FAKE16-NEXT: v_max3_num_f16 v0, v0, v3, v1 +; GFX1170-SDAG-FAKE16-NEXT: v_lshrrev_b32_e32 v1, 16, v5 +; GFX1170-SDAG-FAKE16-NEXT: v_max3_num_f16 v0, v0, v4, v2 +; GFX1170-SDAG-FAKE16-NEXT: v_lshrrev_b32_e32 v2, 16, v6 +; GFX1170-SDAG-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_2) +; GFX1170-SDAG-FAKE16-NEXT: v_max3_num_f16 v0, v0, v5, v1 +; GFX1170-SDAG-FAKE16-NEXT: v_lshrrev_b32_e32 v1, 16, v7 +; GFX1170-SDAG-FAKE16-NEXT: v_max3_num_f16 v0, v0, v6, v2 +; GFX1170-SDAG-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX1170-SDAG-FAKE16-NEXT: v_max3_num_f16 v0, v0, v7, v1 +; GFX1170-SDAG-FAKE16-NEXT: s_setpc_b64 s[30:31] +; +; GFX1170-GISEL-TRUE16-LABEL: test_vector_reduce_fmax_v16half: +; GFX1170-GISEL-TRUE16: ; %bb.0: ; %entry +; GFX1170-GISEL-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1170-GISEL-TRUE16-NEXT: v_max_num_f16_e32 v0.l, v0.l, v0.l +; GFX1170-GISEL-TRUE16-NEXT: v_max_num_f16_e32 v0.h, v0.h, v0.h +; GFX1170-GISEL-TRUE16-NEXT: v_max_num_f16_e32 v1.l, v1.l, v1.l +; GFX1170-GISEL-TRUE16-NEXT: v_max_num_f16_e32 v1.h, v1.h, v1.h +; GFX1170-GISEL-TRUE16-NEXT: v_max_num_f16_e32 v2.l, v2.l, v2.l +; GFX1170-GISEL-TRUE16-NEXT: v_max_num_f16_e32 v2.h, v2.h, v2.h +; GFX1170-GISEL-TRUE16-NEXT: v_max_num_f16_e32 v0.l, v0.l, v0.h +; GFX1170-GISEL-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(SKIP_1) | instid1(VALU_DEP_4) +; GFX1170-GISEL-TRUE16-NEXT: v_max_num_f16_e32 v0.h, v1.l, v1.h +; GFX1170-GISEL-TRUE16-NEXT: v_max_num_f16_e32 v1.h, v3.l, v3.l +; GFX1170-GISEL-TRUE16-NEXT: v_max_num_f16_e32 v1.l, v2.l, v2.h +; GFX1170-GISEL-TRUE16-NEXT: v_max_num_f16_e32 v2.l, v3.h, v3.h +; GFX1170-GISEL-TRUE16-NEXT: v_max_num_f16_e32 v2.h, v4.l, v4.l +; GFX1170-GISEL-TRUE16-NEXT: v_max_num_f16_e32 v3.l, v4.h, v4.h +; GFX1170-GISEL-TRUE16-NEXT: v_max_num_f16_e32 v3.h, v5.l, v5.l +; GFX1170-GISEL-TRUE16-NEXT: v_max_num_f16_e32 v4.l, v5.h, v5.h +; GFX1170-GISEL-TRUE16-NEXT: v_max_num_f16_e32 v4.h, v6.l, v6.l +; GFX1170-GISEL-TRUE16-NEXT: v_max_num_f16_e32 v5.l, v6.h, v6.h +; GFX1170-GISEL-TRUE16-NEXT: v_max_num_f16_e32 v5.h, v7.l, v7.l +; GFX1170-GISEL-TRUE16-NEXT: v_max_num_f16_e32 v6.l, v7.h, v7.h +; GFX1170-GISEL-TRUE16-NEXT: v_max_num_f16_e32 v1.h, v1.h, v2.l +; GFX1170-GISEL-TRUE16-NEXT: v_max_num_f16_e32 v2.l, v2.h, v3.l +; GFX1170-GISEL-TRUE16-NEXT: v_max_num_f16_e32 v2.h, v3.h, v4.l +; GFX1170-GISEL-TRUE16-NEXT: v_max_num_f16_e32 v3.l, v4.h, v5.l +; GFX1170-GISEL-TRUE16-NEXT: v_max_num_f16_e32 v3.h, v5.h, v6.l +; GFX1170-GISEL-TRUE16-NEXT: v_max_num_f16_e32 v0.l, v0.l, v0.h +; GFX1170-GISEL-TRUE16-NEXT: v_max_num_f16_e32 v0.h, v1.l, v1.h +; GFX1170-GISEL-TRUE16-NEXT: v_max_num_f16_e32 v1.l, v2.l, v2.h +; GFX1170-GISEL-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_3) +; GFX1170-GISEL-TRUE16-NEXT: v_max_num_f16_e32 v1.h, v3.l, v3.h +; GFX1170-GISEL-TRUE16-NEXT: v_max_num_f16_e32 v0.l, v0.l, v0.h +; GFX1170-GISEL-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX1170-GISEL-TRUE16-NEXT: v_max_num_f16_e32 v0.h, v1.l, v1.h +; GFX1170-GISEL-TRUE16-NEXT: v_max_num_f16_e32 v0.l, v0.l, v0.h +; GFX1170-GISEL-TRUE16-NEXT: s_setpc_b64 s[30:31] +; +; GFX1170-GISEL-FAKE16-LABEL: test_vector_reduce_fmax_v16half: +; GFX1170-GISEL-FAKE16: ; %bb.0: ; %entry +; GFX1170-GISEL-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1170-GISEL-FAKE16-NEXT: v_lshrrev_b32_e32 v8, 16, v0 +; GFX1170-GISEL-FAKE16-NEXT: v_lshrrev_b32_e32 v9, 16, v1 +; GFX1170-GISEL-FAKE16-NEXT: v_lshrrev_b32_e32 v10, 16, v2 +; GFX1170-GISEL-FAKE16-NEXT: v_lshrrev_b32_e32 v11, 16, v3 +; GFX1170-GISEL-FAKE16-NEXT: v_lshrrev_b32_e32 v12, 16, v4 +; GFX1170-GISEL-FAKE16-NEXT: v_lshrrev_b32_e32 v13, 16, v5 +; GFX1170-GISEL-FAKE16-NEXT: v_max_num_f16_e32 v0, v0, v0 +; GFX1170-GISEL-FAKE16-NEXT: v_max_num_f16_e32 v8, v8, v8 +; GFX1170-GISEL-FAKE16-NEXT: v_max_num_f16_e32 v1, v1, v1 +; GFX1170-GISEL-FAKE16-NEXT: v_max_num_f16_e32 v9, v9, v9 +; GFX1170-GISEL-FAKE16-NEXT: v_max_num_f16_e32 v2, v2, v2 +; GFX1170-GISEL-FAKE16-NEXT: v_max_num_f16_e32 v10, v10, v10 +; GFX1170-GISEL-FAKE16-NEXT: v_lshrrev_b32_e32 v14, 16, v6 +; GFX1170-GISEL-FAKE16-NEXT: v_lshrrev_b32_e32 v15, 16, v7 +; GFX1170-GISEL-FAKE16-NEXT: v_max_num_f16_e32 v0, v0, v8 +; GFX1170-GISEL-FAKE16-NEXT: v_max_num_f16_e32 v1, v1, v9 +; GFX1170-GISEL-FAKE16-NEXT: v_max_num_f16_e32 v2, v2, v10 +; GFX1170-GISEL-FAKE16-NEXT: v_max_num_f16_e32 v3, v3, v3 +; GFX1170-GISEL-FAKE16-NEXT: v_max_num_f16_e32 v8, v11, v11 +; GFX1170-GISEL-FAKE16-NEXT: v_max_num_f16_e32 v4, v4, v4 +; GFX1170-GISEL-FAKE16-NEXT: v_max_num_f16_e32 v9, v12, v12 +; GFX1170-GISEL-FAKE16-NEXT: v_max_num_f16_e32 v5, v5, v5 +; GFX1170-GISEL-FAKE16-NEXT: v_max_num_f16_e32 v10, v13, v13 +; GFX1170-GISEL-FAKE16-NEXT: v_max_num_f16_e32 v6, v6, v6 +; GFX1170-GISEL-FAKE16-NEXT: v_max_num_f16_e32 v11, v14, v14 +; GFX1170-GISEL-FAKE16-NEXT: v_max_num_f16_e32 v7, v7, v7 +; GFX1170-GISEL-FAKE16-NEXT: v_max_num_f16_e32 v12, v15, v15 +; GFX1170-GISEL-FAKE16-NEXT: v_max_num_f16_e32 v3, v3, v8 +; GFX1170-GISEL-FAKE16-NEXT: v_max_num_f16_e32 v4, v4, v9 +; GFX1170-GISEL-FAKE16-NEXT: v_max_num_f16_e32 v5, v5, v10 +; GFX1170-GISEL-FAKE16-NEXT: v_max_num_f16_e32 v6, v6, v11 +; GFX1170-GISEL-FAKE16-NEXT: v_max_num_f16_e32 v7, v7, v12 +; GFX1170-GISEL-FAKE16-NEXT: v_max_num_f16_e32 v0, v0, v1 +; GFX1170-GISEL-FAKE16-NEXT: v_max_num_f16_e32 v1, v2, v3 +; GFX1170-GISEL-FAKE16-NEXT: v_max_num_f16_e32 v2, v4, v5 +; GFX1170-GISEL-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_3) +; GFX1170-GISEL-FAKE16-NEXT: v_max_num_f16_e32 v3, v6, v7 +; GFX1170-GISEL-FAKE16-NEXT: v_max_num_f16_e32 v0, v0, v1 +; GFX1170-GISEL-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX1170-GISEL-FAKE16-NEXT: v_max_num_f16_e32 v1, v2, v3 +; GFX1170-GISEL-FAKE16-NEXT: v_max_num_f16_e32 v0, v0, v1 +; GFX1170-GISEL-FAKE16-NEXT: s_setpc_b64 s[30:31] +; ; GFX12-SDAG-TRUE16-LABEL: test_vector_reduce_fmax_v16half: ; GFX12-SDAG-TRUE16: ; %bb.0: ; %entry ; GFX12-SDAG-TRUE16-NEXT: s_wait_loadcnt_dscnt 0x0 @@ -1671,6 +2026,22 @@ define float @test_vector_reduce_fmax_v2float(<2 x float> %v) { ; GFX11-GISEL-NEXT: v_max_f32_e32 v0, v0, v1 ; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31] ; +; GFX1170-SDAG-LABEL: test_vector_reduce_fmax_v2float: +; GFX1170-SDAG: ; %bb.0: ; %entry +; GFX1170-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1170-SDAG-NEXT: v_dual_max_num_f32 v1, v1, v1 :: v_dual_max_num_f32 v0, v0, v0 +; GFX1170-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX1170-SDAG-NEXT: v_max_num_f32_e32 v0, v0, v1 +; GFX1170-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX1170-GISEL-LABEL: test_vector_reduce_fmax_v2float: +; GFX1170-GISEL: ; %bb.0: ; %entry +; GFX1170-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1170-GISEL-NEXT: v_dual_max_num_f32 v0, v0, v0 :: v_dual_max_num_f32 v1, v1, v1 +; GFX1170-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX1170-GISEL-NEXT: v_max_num_f32_e32 v0, v0, v1 +; GFX1170-GISEL-NEXT: s_setpc_b64 s[30:31] +; ; GFX12-SDAG-LABEL: test_vector_reduce_fmax_v2float: ; GFX12-SDAG: ; %bb.0: ; %entry ; GFX12-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0 @@ -1779,6 +2150,21 @@ define float @test_vector_reduce_fmax_v3float(<3 x float> %v) { ; GFX11-GISEL-NEXT: v_max_f32_e32 v0, v0, v1 ; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31] ; +; GFX1170-SDAG-LABEL: test_vector_reduce_fmax_v3float: +; GFX1170-SDAG: ; %bb.0: ; %entry +; GFX1170-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1170-SDAG-NEXT: v_max3_num_f32 v0, v0, v1, v2 +; GFX1170-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX1170-GISEL-LABEL: test_vector_reduce_fmax_v3float: +; GFX1170-GISEL: ; %bb.0: ; %entry +; GFX1170-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1170-GISEL-NEXT: v_dual_max_num_f32 v0, v0, v0 :: v_dual_max_num_f32 v1, v1, v1 +; GFX1170-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX1170-GISEL-NEXT: v_dual_max_num_f32 v0, v0, v1 :: v_dual_max_num_f32 v1, v2, v2 +; GFX1170-GISEL-NEXT: v_max_num_f32_e32 v0, v0, v1 +; GFX1170-GISEL-NEXT: s_setpc_b64 s[30:31] +; ; GFX12-SDAG-LABEL: test_vector_reduce_fmax_v3float: ; GFX12-SDAG: ; %bb.0: ; %entry ; GFX12-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0 @@ -1910,6 +2296,25 @@ define float @test_vector_reduce_fmax_v4float(<4 x float> %v) { ; GFX11-GISEL-NEXT: v_max_f32_e32 v0, v0, v1 ; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31] ; +; GFX1170-SDAG-LABEL: test_vector_reduce_fmax_v4float: +; GFX1170-SDAG: ; %bb.0: ; %entry +; GFX1170-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1170-SDAG-NEXT: v_dual_max_num_f32 v1, v1, v1 :: v_dual_max_num_f32 v0, v0, v0 +; GFX1170-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX1170-SDAG-NEXT: v_max_num_f32_e32 v0, v0, v1 +; GFX1170-SDAG-NEXT: v_max3_num_f32 v0, v0, v2, v3 +; GFX1170-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX1170-GISEL-LABEL: test_vector_reduce_fmax_v4float: +; GFX1170-GISEL: ; %bb.0: ; %entry +; GFX1170-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1170-GISEL-NEXT: v_dual_max_num_f32 v0, v0, v0 :: v_dual_max_num_f32 v1, v1, v1 +; GFX1170-GISEL-NEXT: v_dual_max_num_f32 v2, v2, v2 :: v_dual_max_num_f32 v3, v3, v3 +; GFX1170-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX1170-GISEL-NEXT: v_dual_max_num_f32 v0, v0, v1 :: v_dual_max_num_f32 v1, v2, v3 +; GFX1170-GISEL-NEXT: v_max_num_f32_e32 v0, v0, v1 +; GFX1170-GISEL-NEXT: s_setpc_b64 s[30:31] +; ; GFX12-SDAG-LABEL: test_vector_reduce_fmax_v4float: ; GFX12-SDAG: ; %bb.0: ; %entry ; GFX12-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0 @@ -2093,6 +2498,33 @@ define float @test_vector_reduce_fmax_v8float(<8 x float> %v) { ; GFX11-GISEL-NEXT: v_max_f32_e32 v0, v0, v1 ; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31] ; +; GFX1170-SDAG-LABEL: test_vector_reduce_fmax_v8float: +; GFX1170-SDAG: ; %bb.0: ; %entry +; GFX1170-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1170-SDAG-NEXT: v_dual_max_num_f32 v1, v1, v1 :: v_dual_max_num_f32 v0, v0, v0 +; GFX1170-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX1170-SDAG-NEXT: v_max_num_f32_e32 v0, v0, v1 +; GFX1170-SDAG-NEXT: v_max3_num_f32 v0, v0, v2, v3 +; GFX1170-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX1170-SDAG-NEXT: v_max3_num_f32 v0, v0, v4, v5 +; GFX1170-SDAG-NEXT: v_max3_num_f32 v0, v0, v6, v7 +; GFX1170-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX1170-GISEL-LABEL: test_vector_reduce_fmax_v8float: +; GFX1170-GISEL: ; %bb.0: ; %entry +; GFX1170-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1170-GISEL-NEXT: v_dual_max_num_f32 v0, v0, v0 :: v_dual_max_num_f32 v1, v1, v1 +; GFX1170-GISEL-NEXT: v_dual_max_num_f32 v2, v2, v2 :: v_dual_max_num_f32 v3, v3, v3 +; GFX1170-GISEL-NEXT: v_dual_max_num_f32 v4, v4, v4 :: v_dual_max_num_f32 v5, v5, v5 +; GFX1170-GISEL-NEXT: v_dual_max_num_f32 v6, v6, v6 :: v_dual_max_num_f32 v7, v7, v7 +; GFX1170-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2) +; GFX1170-GISEL-NEXT: v_dual_max_num_f32 v0, v0, v1 :: v_dual_max_num_f32 v1, v2, v3 +; GFX1170-GISEL-NEXT: v_dual_max_num_f32 v2, v4, v5 :: v_dual_max_num_f32 v3, v6, v7 +; GFX1170-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX1170-GISEL-NEXT: v_dual_max_num_f32 v0, v0, v1 :: v_dual_max_num_f32 v1, v2, v3 +; GFX1170-GISEL-NEXT: v_max_num_f32_e32 v0, v0, v1 +; GFX1170-GISEL-NEXT: s_setpc_b64 s[30:31] +; ; GFX12-SDAG-LABEL: test_vector_reduce_fmax_v8float: ; GFX12-SDAG: ; %bb.0: ; %entry ; GFX12-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0 @@ -2382,6 +2814,51 @@ define float @test_vector_reduce_fmax_v16float(<16 x float> %v) { ; GFX11-GISEL-NEXT: v_max_f32_e32 v0, v0, v1 ; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31] ; +; GFX1170-SDAG-LABEL: test_vector_reduce_fmax_v16float: +; GFX1170-SDAG: ; %bb.0: ; %entry +; GFX1170-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1170-SDAG-NEXT: v_dual_max_num_f32 v1, v1, v1 :: v_dual_max_num_f32 v0, v0, v0 +; GFX1170-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX1170-SDAG-NEXT: v_max_num_f32_e32 v0, v0, v1 +; GFX1170-SDAG-NEXT: v_max3_num_f32 v0, v0, v2, v3 +; GFX1170-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX1170-SDAG-NEXT: v_max3_num_f32 v0, v0, v4, v5 +; GFX1170-SDAG-NEXT: v_max3_num_f32 v0, v0, v6, v7 +; GFX1170-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX1170-SDAG-NEXT: v_max3_num_f32 v0, v0, v8, v9 +; GFX1170-SDAG-NEXT: v_max3_num_f32 v0, v0, v10, v11 +; GFX1170-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX1170-SDAG-NEXT: v_max3_num_f32 v0, v0, v12, v13 +; GFX1170-SDAG-NEXT: v_max3_num_f32 v0, v0, v14, v15 +; GFX1170-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX1170-GISEL-LABEL: test_vector_reduce_fmax_v16float: +; GFX1170-GISEL: ; %bb.0: ; %entry +; GFX1170-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1170-GISEL-NEXT: v_dual_max_num_f32 v0, v0, v0 :: v_dual_max_num_f32 v1, v1, v1 +; GFX1170-GISEL-NEXT: v_dual_max_num_f32 v2, v2, v2 :: v_dual_max_num_f32 v3, v3, v3 +; GFX1170-GISEL-NEXT: v_dual_max_num_f32 v4, v4, v4 :: v_dual_max_num_f32 v5, v5, v5 +; GFX1170-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) +; GFX1170-GISEL-NEXT: v_dual_max_num_f32 v0, v0, v1 :: v_dual_max_num_f32 v1, v2, v3 +; GFX1170-GISEL-NEXT: v_dual_max_num_f32 v3, v6, v6 :: v_dual_max_num_f32 v2, v4, v5 +; GFX1170-GISEL-NEXT: v_dual_max_num_f32 v4, v7, v7 :: v_dual_max_num_f32 v5, v8, v8 +; GFX1170-GISEL-NEXT: v_dual_max_num_f32 v6, v9, v9 :: v_dual_max_num_f32 v7, v10, v10 +; GFX1170-GISEL-NEXT: v_dual_max_num_f32 v8, v11, v11 :: v_dual_max_num_f32 v9, v12, v12 +; GFX1170-GISEL-NEXT: v_dual_max_num_f32 v10, v13, v13 :: v_dual_max_num_f32 v11, v14, v14 +; GFX1170-GISEL-NEXT: v_max_num_f32_e32 v12, v15, v15 +; GFX1170-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_3) +; GFX1170-GISEL-NEXT: v_dual_max_num_f32 v3, v3, v4 :: v_dual_max_num_f32 v4, v5, v6 +; GFX1170-GISEL-NEXT: v_dual_max_num_f32 v5, v7, v8 :: v_dual_max_num_f32 v6, v9, v10 +; GFX1170-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2) +; GFX1170-GISEL-NEXT: v_dual_max_num_f32 v7, v11, v12 :: v_dual_max_num_f32 v0, v0, v1 +; GFX1170-GISEL-NEXT: v_dual_max_num_f32 v1, v2, v3 :: v_dual_max_num_f32 v2, v4, v5 +; GFX1170-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX1170-GISEL-NEXT: v_dual_max_num_f32 v3, v6, v7 :: v_dual_max_num_f32 v0, v0, v1 +; GFX1170-GISEL-NEXT: v_max_num_f32_e32 v1, v2, v3 +; GFX1170-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX1170-GISEL-NEXT: v_max_num_f32_e32 v0, v0, v1 +; GFX1170-GISEL-NEXT: s_setpc_b64 s[30:31] +; ; GFX12-SDAG-LABEL: test_vector_reduce_fmax_v16float: ; GFX12-SDAG: ; %bb.0: ; %entry ; GFX12-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0 @@ -2523,6 +3000,24 @@ define double @test_vector_reduce_fmax_v2double(<2 x double> %v) { ; GFX11-GISEL-NEXT: v_max_f64 v[0:1], v[0:1], v[2:3] ; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31] ; +; GFX1170-SDAG-LABEL: test_vector_reduce_fmax_v2double: +; GFX1170-SDAG: ; %bb.0: ; %entry +; GFX1170-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1170-SDAG-NEXT: v_max_num_f64 v[2:3], v[2:3], v[2:3] +; GFX1170-SDAG-NEXT: v_max_num_f64 v[0:1], v[0:1], v[0:1] +; GFX1170-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX1170-SDAG-NEXT: v_max_num_f64 v[0:1], v[0:1], v[2:3] +; GFX1170-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX1170-GISEL-LABEL: test_vector_reduce_fmax_v2double: +; GFX1170-GISEL: ; %bb.0: ; %entry +; GFX1170-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1170-GISEL-NEXT: v_max_num_f64 v[0:1], v[0:1], v[0:1] +; GFX1170-GISEL-NEXT: v_max_num_f64 v[2:3], v[2:3], v[2:3] +; GFX1170-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX1170-GISEL-NEXT: v_max_num_f64 v[0:1], v[0:1], v[2:3] +; GFX1170-GISEL-NEXT: s_setpc_b64 s[30:31] +; ; GFX12-SDAG-LABEL: test_vector_reduce_fmax_v2double: ; GFX12-SDAG: ; %bb.0: ; %entry ; GFX12-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0 @@ -2656,6 +3151,28 @@ define double @test_vector_reduce_fmax_v3double(<3 x double> %v) { ; GFX11-GISEL-NEXT: v_max_f64 v[0:1], v[0:1], v[4:5] ; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31] ; +; GFX1170-SDAG-LABEL: test_vector_reduce_fmax_v3double: +; GFX1170-SDAG: ; %bb.0: ; %entry +; GFX1170-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1170-SDAG-NEXT: v_max_num_f64 v[2:3], v[2:3], v[2:3] +; GFX1170-SDAG-NEXT: v_max_num_f64 v[0:1], v[0:1], v[0:1] +; GFX1170-SDAG-NEXT: v_max_num_f64 v[4:5], v[4:5], v[4:5] +; GFX1170-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX1170-SDAG-NEXT: v_max_num_f64 v[0:1], v[0:1], v[2:3] +; GFX1170-SDAG-NEXT: v_max_num_f64 v[0:1], v[0:1], v[4:5] +; GFX1170-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX1170-GISEL-LABEL: test_vector_reduce_fmax_v3double: +; GFX1170-GISEL: ; %bb.0: ; %entry +; GFX1170-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1170-GISEL-NEXT: v_max_num_f64 v[0:1], v[0:1], v[0:1] +; GFX1170-GISEL-NEXT: v_max_num_f64 v[2:3], v[2:3], v[2:3] +; GFX1170-GISEL-NEXT: v_max_num_f64 v[4:5], v[4:5], v[4:5] +; GFX1170-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX1170-GISEL-NEXT: v_max_num_f64 v[0:1], v[0:1], v[2:3] +; GFX1170-GISEL-NEXT: v_max_num_f64 v[0:1], v[0:1], v[4:5] +; GFX1170-GISEL-NEXT: s_setpc_b64 s[30:31] +; ; GFX12-SDAG-LABEL: test_vector_reduce_fmax_v3double: ; GFX12-SDAG: ; %bb.0: ; %entry ; GFX12-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0 @@ -2815,6 +3332,34 @@ define double @test_vector_reduce_fmax_v4double(<4 x double> %v) { ; GFX11-GISEL-NEXT: v_max_f64 v[0:1], v[0:1], v[2:3] ; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31] ; +; GFX1170-SDAG-LABEL: test_vector_reduce_fmax_v4double: +; GFX1170-SDAG: ; %bb.0: ; %entry +; GFX1170-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1170-SDAG-NEXT: v_max_num_f64 v[2:3], v[2:3], v[2:3] +; GFX1170-SDAG-NEXT: v_max_num_f64 v[0:1], v[0:1], v[0:1] +; GFX1170-SDAG-NEXT: v_max_num_f64 v[4:5], v[4:5], v[4:5] +; GFX1170-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_2) +; GFX1170-SDAG-NEXT: v_max_num_f64 v[0:1], v[0:1], v[2:3] +; GFX1170-SDAG-NEXT: v_max_num_f64 v[2:3], v[6:7], v[6:7] +; GFX1170-SDAG-NEXT: v_max_num_f64 v[0:1], v[0:1], v[4:5] +; GFX1170-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX1170-SDAG-NEXT: v_max_num_f64 v[0:1], v[0:1], v[2:3] +; GFX1170-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX1170-GISEL-LABEL: test_vector_reduce_fmax_v4double: +; GFX1170-GISEL: ; %bb.0: ; %entry +; GFX1170-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1170-GISEL-NEXT: v_max_num_f64 v[0:1], v[0:1], v[0:1] +; GFX1170-GISEL-NEXT: v_max_num_f64 v[2:3], v[2:3], v[2:3] +; GFX1170-GISEL-NEXT: v_max_num_f64 v[4:5], v[4:5], v[4:5] +; GFX1170-GISEL-NEXT: v_max_num_f64 v[6:7], v[6:7], v[6:7] +; GFX1170-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2) +; GFX1170-GISEL-NEXT: v_max_num_f64 v[0:1], v[0:1], v[2:3] +; GFX1170-GISEL-NEXT: v_max_num_f64 v[2:3], v[4:5], v[6:7] +; GFX1170-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX1170-GISEL-NEXT: v_max_num_f64 v[0:1], v[0:1], v[2:3] +; GFX1170-GISEL-NEXT: s_setpc_b64 s[30:31] +; ; GFX12-SDAG-LABEL: test_vector_reduce_fmax_v4double: ; GFX12-SDAG: ; %bb.0: ; %entry ; GFX12-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0 @@ -3062,6 +3607,52 @@ define double @test_vector_reduce_fmax_v8double(<8 x double> %v) { ; GFX11-GISEL-NEXT: v_max_f64 v[0:1], v[0:1], v[2:3] ; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31] ; +; GFX1170-SDAG-LABEL: test_vector_reduce_fmax_v8double: +; GFX1170-SDAG: ; %bb.0: ; %entry +; GFX1170-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1170-SDAG-NEXT: v_max_num_f64 v[2:3], v[2:3], v[2:3] +; GFX1170-SDAG-NEXT: v_max_num_f64 v[0:1], v[0:1], v[0:1] +; GFX1170-SDAG-NEXT: v_max_num_f64 v[4:5], v[4:5], v[4:5] +; GFX1170-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_2) +; GFX1170-SDAG-NEXT: v_max_num_f64 v[0:1], v[0:1], v[2:3] +; GFX1170-SDAG-NEXT: v_max_num_f64 v[2:3], v[6:7], v[6:7] +; GFX1170-SDAG-NEXT: v_max_num_f64 v[0:1], v[0:1], v[4:5] +; GFX1170-SDAG-NEXT: v_max_num_f64 v[4:5], v[8:9], v[8:9] +; GFX1170-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_2) +; GFX1170-SDAG-NEXT: v_max_num_f64 v[0:1], v[0:1], v[2:3] +; GFX1170-SDAG-NEXT: v_max_num_f64 v[2:3], v[10:11], v[10:11] +; GFX1170-SDAG-NEXT: v_max_num_f64 v[0:1], v[0:1], v[4:5] +; GFX1170-SDAG-NEXT: v_max_num_f64 v[4:5], v[12:13], v[12:13] +; GFX1170-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_2) +; GFX1170-SDAG-NEXT: v_max_num_f64 v[0:1], v[0:1], v[2:3] +; GFX1170-SDAG-NEXT: v_max_num_f64 v[2:3], v[14:15], v[14:15] +; GFX1170-SDAG-NEXT: v_max_num_f64 v[0:1], v[0:1], v[4:5] +; GFX1170-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX1170-SDAG-NEXT: v_max_num_f64 v[0:1], v[0:1], v[2:3] +; GFX1170-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX1170-GISEL-LABEL: test_vector_reduce_fmax_v8double: +; GFX1170-GISEL: ; %bb.0: ; %entry +; GFX1170-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1170-GISEL-NEXT: v_max_num_f64 v[0:1], v[0:1], v[0:1] +; GFX1170-GISEL-NEXT: v_max_num_f64 v[2:3], v[2:3], v[2:3] +; GFX1170-GISEL-NEXT: v_max_num_f64 v[4:5], v[4:5], v[4:5] +; GFX1170-GISEL-NEXT: v_max_num_f64 v[6:7], v[6:7], v[6:7] +; GFX1170-GISEL-NEXT: v_max_num_f64 v[8:9], v[8:9], v[8:9] +; GFX1170-GISEL-NEXT: v_max_num_f64 v[10:11], v[10:11], v[10:11] +; GFX1170-GISEL-NEXT: v_max_num_f64 v[12:13], v[12:13], v[12:13] +; GFX1170-GISEL-NEXT: v_max_num_f64 v[14:15], v[14:15], v[14:15] +; GFX1170-GISEL-NEXT: v_max_num_f64 v[0:1], v[0:1], v[2:3] +; GFX1170-GISEL-NEXT: v_max_num_f64 v[2:3], v[4:5], v[6:7] +; GFX1170-GISEL-NEXT: v_max_num_f64 v[4:5], v[8:9], v[10:11] +; GFX1170-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_3) +; GFX1170-GISEL-NEXT: v_max_num_f64 v[6:7], v[12:13], v[14:15] +; GFX1170-GISEL-NEXT: v_max_num_f64 v[0:1], v[0:1], v[2:3] +; GFX1170-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX1170-GISEL-NEXT: v_max_num_f64 v[2:3], v[4:5], v[6:7] +; GFX1170-GISEL-NEXT: v_max_num_f64 v[0:1], v[0:1], v[2:3] +; GFX1170-GISEL-NEXT: s_setpc_b64 s[30:31] +; ; GFX12-SDAG-LABEL: test_vector_reduce_fmax_v8double: ; GFX12-SDAG: ; %bb.0: ; %entry ; GFX12-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0 @@ -3511,6 +4102,92 @@ define double @test_vector_reduce_fmax_v16double(<16 x double> %v) { ; GFX11-GISEL-NEXT: v_max_f64 v[0:1], v[0:1], v[2:3] ; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31] ; +; GFX1170-SDAG-LABEL: test_vector_reduce_fmax_v16double: +; GFX1170-SDAG: ; %bb.0: ; %entry +; GFX1170-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1170-SDAG-NEXT: v_max_num_f64 v[2:3], v[2:3], v[2:3] +; GFX1170-SDAG-NEXT: v_max_num_f64 v[0:1], v[0:1], v[0:1] +; GFX1170-SDAG-NEXT: v_max_num_f64 v[4:5], v[4:5], v[4:5] +; GFX1170-SDAG-NEXT: scratch_load_b32 v31, off, s32 +; GFX1170-SDAG-NEXT: v_max_num_f64 v[0:1], v[0:1], v[2:3] +; GFX1170-SDAG-NEXT: v_max_num_f64 v[2:3], v[6:7], v[6:7] +; GFX1170-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_2) +; GFX1170-SDAG-NEXT: v_max_num_f64 v[0:1], v[0:1], v[4:5] +; GFX1170-SDAG-NEXT: v_max_num_f64 v[4:5], v[8:9], v[8:9] +; GFX1170-SDAG-NEXT: v_max_num_f64 v[0:1], v[0:1], v[2:3] +; GFX1170-SDAG-NEXT: v_max_num_f64 v[2:3], v[10:11], v[10:11] +; GFX1170-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_2) +; GFX1170-SDAG-NEXT: v_max_num_f64 v[0:1], v[0:1], v[4:5] +; GFX1170-SDAG-NEXT: v_max_num_f64 v[4:5], v[12:13], v[12:13] +; GFX1170-SDAG-NEXT: v_max_num_f64 v[0:1], v[0:1], v[2:3] +; GFX1170-SDAG-NEXT: v_max_num_f64 v[2:3], v[14:15], v[14:15] +; GFX1170-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_2) +; GFX1170-SDAG-NEXT: v_max_num_f64 v[0:1], v[0:1], v[4:5] +; GFX1170-SDAG-NEXT: v_max_num_f64 v[4:5], v[16:17], v[16:17] +; GFX1170-SDAG-NEXT: v_max_num_f64 v[0:1], v[0:1], v[2:3] +; GFX1170-SDAG-NEXT: v_max_num_f64 v[2:3], v[18:19], v[18:19] +; GFX1170-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_2) +; GFX1170-SDAG-NEXT: v_max_num_f64 v[0:1], v[0:1], v[4:5] +; GFX1170-SDAG-NEXT: v_max_num_f64 v[4:5], v[20:21], v[20:21] +; GFX1170-SDAG-NEXT: v_max_num_f64 v[0:1], v[0:1], v[2:3] +; GFX1170-SDAG-NEXT: v_max_num_f64 v[2:3], v[22:23], v[22:23] +; GFX1170-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_2) +; GFX1170-SDAG-NEXT: v_max_num_f64 v[0:1], v[0:1], v[4:5] +; GFX1170-SDAG-NEXT: v_max_num_f64 v[4:5], v[24:25], v[24:25] +; GFX1170-SDAG-NEXT: v_max_num_f64 v[0:1], v[0:1], v[2:3] +; GFX1170-SDAG-NEXT: v_max_num_f64 v[2:3], v[26:27], v[26:27] +; GFX1170-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_2) +; GFX1170-SDAG-NEXT: v_max_num_f64 v[0:1], v[0:1], v[4:5] +; GFX1170-SDAG-NEXT: v_max_num_f64 v[4:5], v[28:29], v[28:29] +; GFX1170-SDAG-NEXT: v_max_num_f64 v[0:1], v[0:1], v[2:3] +; GFX1170-SDAG-NEXT: s_waitcnt vmcnt(0) +; GFX1170-SDAG-NEXT: v_max_num_f64 v[2:3], v[30:31], v[30:31] +; GFX1170-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX1170-SDAG-NEXT: v_max_num_f64 v[0:1], v[0:1], v[4:5] +; GFX1170-SDAG-NEXT: v_max_num_f64 v[0:1], v[0:1], v[2:3] +; GFX1170-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX1170-GISEL-LABEL: test_vector_reduce_fmax_v16double: +; GFX1170-GISEL: ; %bb.0: ; %entry +; GFX1170-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1170-GISEL-NEXT: scratch_load_b32 v31, off, s32 +; GFX1170-GISEL-NEXT: v_max_num_f64 v[0:1], v[0:1], v[0:1] +; GFX1170-GISEL-NEXT: v_max_num_f64 v[2:3], v[2:3], v[2:3] +; GFX1170-GISEL-NEXT: v_max_num_f64 v[4:5], v[4:5], v[4:5] +; GFX1170-GISEL-NEXT: v_max_num_f64 v[6:7], v[6:7], v[6:7] +; GFX1170-GISEL-NEXT: v_max_num_f64 v[8:9], v[8:9], v[8:9] +; GFX1170-GISEL-NEXT: v_max_num_f64 v[10:11], v[10:11], v[10:11] +; GFX1170-GISEL-NEXT: v_max_num_f64 v[12:13], v[12:13], v[12:13] +; GFX1170-GISEL-NEXT: v_max_num_f64 v[14:15], v[14:15], v[14:15] +; GFX1170-GISEL-NEXT: v_max_num_f64 v[16:17], v[16:17], v[16:17] +; GFX1170-GISEL-NEXT: v_max_num_f64 v[18:19], v[18:19], v[18:19] +; GFX1170-GISEL-NEXT: v_max_num_f64 v[20:21], v[20:21], v[20:21] +; GFX1170-GISEL-NEXT: v_max_num_f64 v[22:23], v[22:23], v[22:23] +; GFX1170-GISEL-NEXT: v_max_num_f64 v[24:25], v[24:25], v[24:25] +; GFX1170-GISEL-NEXT: v_max_num_f64 v[26:27], v[26:27], v[26:27] +; GFX1170-GISEL-NEXT: v_max_num_f64 v[28:29], v[28:29], v[28:29] +; GFX1170-GISEL-NEXT: v_max_num_f64 v[0:1], v[0:1], v[2:3] +; GFX1170-GISEL-NEXT: v_max_num_f64 v[2:3], v[4:5], v[6:7] +; GFX1170-GISEL-NEXT: v_max_num_f64 v[4:5], v[8:9], v[10:11] +; GFX1170-GISEL-NEXT: v_max_num_f64 v[6:7], v[12:13], v[14:15] +; GFX1170-GISEL-NEXT: v_max_num_f64 v[8:9], v[16:17], v[18:19] +; GFX1170-GISEL-NEXT: v_max_num_f64 v[10:11], v[20:21], v[22:23] +; GFX1170-GISEL-NEXT: v_max_num_f64 v[12:13], v[24:25], v[26:27] +; GFX1170-GISEL-NEXT: v_max_num_f64 v[0:1], v[0:1], v[2:3] +; GFX1170-GISEL-NEXT: v_max_num_f64 v[2:3], v[4:5], v[6:7] +; GFX1170-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_2) +; GFX1170-GISEL-NEXT: v_max_num_f64 v[4:5], v[8:9], v[10:11] +; GFX1170-GISEL-NEXT: v_max_num_f64 v[0:1], v[0:1], v[2:3] +; GFX1170-GISEL-NEXT: s_waitcnt vmcnt(0) +; GFX1170-GISEL-NEXT: v_max_num_f64 v[30:31], v[30:31], v[30:31] +; GFX1170-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX1170-GISEL-NEXT: v_max_num_f64 v[14:15], v[28:29], v[30:31] +; GFX1170-GISEL-NEXT: v_max_num_f64 v[6:7], v[12:13], v[14:15] +; GFX1170-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX1170-GISEL-NEXT: v_max_num_f64 v[2:3], v[4:5], v[6:7] +; GFX1170-GISEL-NEXT: v_max_num_f64 v[0:1], v[0:1], v[2:3] +; GFX1170-GISEL-NEXT: s_setpc_b64 s[30:31] +; ; GFX12-SDAG-LABEL: test_vector_reduce_fmax_v16double: ; GFX12-SDAG: ; %bb.0: ; %entry ; GFX12-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0 @@ -3627,6 +4304,7 @@ declare double @llvm.vector.reduce.fmax.v16double(<16 x double>) ;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line: ; GFX10: {{.*}} ; GFX11: {{.*}} +; GFX1170: {{.*}} ; GFX12: {{.*}} ; GFX8: {{.*}} ; GFX9: {{.*}} diff --git a/llvm/test/CodeGen/AMDGPU/vector-reduce-fmaximum.ll b/llvm/test/CodeGen/AMDGPU/vector-reduce-fmaximum.ll index a20b5de78627..e77b44d3ee3c 100644 --- a/llvm/test/CodeGen/AMDGPU/vector-reduce-fmaximum.ll +++ b/llvm/test/CodeGen/AMDGPU/vector-reduce-fmaximum.ll @@ -5,6 +5,8 @@ ; RUN: llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx1010 < %s | FileCheck -check-prefixes=GFX10,GFX10-SDAG %s ; RUN: llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx1100 -mattr=+real-true16 < %s | FileCheck -check-prefixes=GFX11,GFX11-SDAG,GFX11-SDAG-TRUE16 %s ; RUN: llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx1100 -mattr=-real-true16 < %s | FileCheck -check-prefixes=GFX11,GFX11-SDAG,GFX11-SDAG-FAKE16 %s +; RUN: llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx1170 -mattr=+real-true16 < %s | FileCheck -check-prefixes=GFX1170,GFX1170-SDAG,GFX1170-SDAG-TRUE16 %s +; RUN: llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx1170 -mattr=-real-true16 < %s | FileCheck -check-prefixes=GFX1170,GFX1170-SDAG,GFX1170-SDAG-FAKE16 %s ; RUN: llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx1200 -mattr=+real-true16 < %s | FileCheck -check-prefixes=GFX12,GFX12-SDAG,GFX12-SDAG-TRUE16 %s ; RUN: llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx1200 -mattr=-real-true16 < %s | FileCheck -check-prefixes=GFX12,GFX12-SDAG,GFX12-SDAG-FAKE16 %s @@ -69,6 +71,20 @@ define half @test_vector_reduce_fmaximum_v2half(<2 x half> %v) { ; GFX11-SDAG-FAKE16-NEXT: v_cndmask_b32_e32 v0, 0x7e00, v2, vcc_lo ; GFX11-SDAG-FAKE16-NEXT: s_setpc_b64 s[30:31] ; +; GFX1170-SDAG-TRUE16-LABEL: test_vector_reduce_fmaximum_v2half: +; GFX1170-SDAG-TRUE16: ; %bb.0: ; %entry +; GFX1170-SDAG-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1170-SDAG-TRUE16-NEXT: v_maximum_f16 v0.l, v0.l, v0.h +; GFX1170-SDAG-TRUE16-NEXT: s_setpc_b64 s[30:31] +; +; GFX1170-SDAG-FAKE16-LABEL: test_vector_reduce_fmaximum_v2half: +; GFX1170-SDAG-FAKE16: ; %bb.0: ; %entry +; GFX1170-SDAG-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1170-SDAG-FAKE16-NEXT: v_lshrrev_b32_e32 v1, 16, v0 +; GFX1170-SDAG-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX1170-SDAG-FAKE16-NEXT: v_maximum_f16 v0, v0, v1 +; GFX1170-SDAG-FAKE16-NEXT: s_setpc_b64 s[30:31] +; ; GFX12-SDAG-TRUE16-LABEL: test_vector_reduce_fmaximum_v2half: ; GFX12-SDAG-TRUE16: ; %bb.0: ; %entry ; GFX12-SDAG-TRUE16-NEXT: s_wait_loadcnt_dscnt 0x0 @@ -180,6 +196,27 @@ define half @test_vector_reduce_fmaximum_v3half(<3 x half> %v) { ; GFX11-SDAG-FAKE16-NEXT: v_cndmask_b32_e32 v0, 0x7e00, v2, vcc_lo ; GFX11-SDAG-FAKE16-NEXT: s_setpc_b64 s[30:31] ; +; GFX1170-SDAG-TRUE16-LABEL: test_vector_reduce_fmaximum_v3half: +; GFX1170-SDAG-TRUE16: ; %bb.0: ; %entry +; GFX1170-SDAG-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1170-SDAG-TRUE16-NEXT: v_mov_b16_e32 v1.h, 0xfc00 +; GFX1170-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX1170-SDAG-TRUE16-NEXT: v_pk_maximum_f16 v0, v0, v1 +; GFX1170-SDAG-TRUE16-NEXT: v_maximum_f16 v0.l, v0.l, v0.h +; GFX1170-SDAG-TRUE16-NEXT: s_setpc_b64 s[30:31] +; +; GFX1170-SDAG-FAKE16-LABEL: test_vector_reduce_fmaximum_v3half: +; GFX1170-SDAG-FAKE16: ; %bb.0: ; %entry +; GFX1170-SDAG-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1170-SDAG-FAKE16-NEXT: s_mov_b32 s0, 0xfc00 +; GFX1170-SDAG-FAKE16-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX1170-SDAG-FAKE16-NEXT: v_perm_b32 v1, s0, v1, 0x5040100 +; GFX1170-SDAG-FAKE16-NEXT: v_pk_maximum_f16 v0, v0, v1 +; GFX1170-SDAG-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX1170-SDAG-FAKE16-NEXT: v_lshrrev_b32_e32 v1, 16, v0 +; GFX1170-SDAG-FAKE16-NEXT: v_maximum_f16 v0, v0, v1 +; GFX1170-SDAG-FAKE16-NEXT: s_setpc_b64 s[30:31] +; ; GFX12-SDAG-TRUE16-LABEL: test_vector_reduce_fmaximum_v3half: ; GFX12-SDAG-TRUE16: ; %bb.0: ; %entry ; GFX12-SDAG-TRUE16-NEXT: s_wait_loadcnt_dscnt 0x0 @@ -326,6 +363,23 @@ define half @test_vector_reduce_fmaximum_v4half(<4 x half> %v) { ; GFX11-SDAG-FAKE16-NEXT: v_cndmask_b32_e32 v0, 0x7e00, v1, vcc_lo ; GFX11-SDAG-FAKE16-NEXT: s_setpc_b64 s[30:31] ; +; GFX1170-SDAG-TRUE16-LABEL: test_vector_reduce_fmaximum_v4half: +; GFX1170-SDAG-TRUE16: ; %bb.0: ; %entry +; GFX1170-SDAG-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1170-SDAG-TRUE16-NEXT: v_pk_maximum_f16 v0, v0, v1 +; GFX1170-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX1170-SDAG-TRUE16-NEXT: v_maximum_f16 v0.l, v0.l, v0.h +; GFX1170-SDAG-TRUE16-NEXT: s_setpc_b64 s[30:31] +; +; GFX1170-SDAG-FAKE16-LABEL: test_vector_reduce_fmaximum_v4half: +; GFX1170-SDAG-FAKE16: ; %bb.0: ; %entry +; GFX1170-SDAG-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1170-SDAG-FAKE16-NEXT: v_pk_maximum_f16 v0, v0, v1 +; GFX1170-SDAG-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX1170-SDAG-FAKE16-NEXT: v_lshrrev_b32_e32 v1, 16, v0 +; GFX1170-SDAG-FAKE16-NEXT: v_maximum_f16 v0, v0, v1 +; GFX1170-SDAG-FAKE16-NEXT: s_setpc_b64 s[30:31] +; ; GFX12-SDAG-TRUE16-LABEL: test_vector_reduce_fmaximum_v4half: ; GFX12-SDAG-TRUE16: ; %bb.0: ; %entry ; GFX12-SDAG-TRUE16-NEXT: s_wait_loadcnt_dscnt 0x0 @@ -560,6 +614,28 @@ define half @test_vector_reduce_fmaximum_v8half(<8 x half> %v) { ; GFX11-SDAG-FAKE16-NEXT: v_cndmask_b32_e32 v0, 0x7e00, v2, vcc_lo ; GFX11-SDAG-FAKE16-NEXT: s_setpc_b64 s[30:31] ; +; GFX1170-SDAG-TRUE16-LABEL: test_vector_reduce_fmaximum_v8half: +; GFX1170-SDAG-TRUE16: ; %bb.0: ; %entry +; GFX1170-SDAG-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1170-SDAG-TRUE16-NEXT: v_pk_maximum_f16 v1, v1, v3 +; GFX1170-SDAG-TRUE16-NEXT: v_pk_maximum_f16 v0, v0, v2 +; GFX1170-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX1170-SDAG-TRUE16-NEXT: v_pk_maximum_f16 v0, v0, v1 +; GFX1170-SDAG-TRUE16-NEXT: v_maximum_f16 v0.l, v0.l, v0.h +; GFX1170-SDAG-TRUE16-NEXT: s_setpc_b64 s[30:31] +; +; GFX1170-SDAG-FAKE16-LABEL: test_vector_reduce_fmaximum_v8half: +; GFX1170-SDAG-FAKE16: ; %bb.0: ; %entry +; GFX1170-SDAG-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1170-SDAG-FAKE16-NEXT: v_pk_maximum_f16 v1, v1, v3 +; GFX1170-SDAG-FAKE16-NEXT: v_pk_maximum_f16 v0, v0, v2 +; GFX1170-SDAG-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX1170-SDAG-FAKE16-NEXT: v_pk_maximum_f16 v0, v0, v1 +; GFX1170-SDAG-FAKE16-NEXT: v_lshrrev_b32_e32 v1, 16, v0 +; GFX1170-SDAG-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX1170-SDAG-FAKE16-NEXT: v_maximum_f16 v0, v0, v1 +; GFX1170-SDAG-FAKE16-NEXT: s_setpc_b64 s[30:31] +; ; GFX12-SDAG-TRUE16-LABEL: test_vector_reduce_fmaximum_v8half: ; GFX12-SDAG-TRUE16: ; %bb.0: ; %entry ; GFX12-SDAG-TRUE16-NEXT: s_wait_loadcnt_dscnt 0x0 @@ -1002,6 +1078,38 @@ define half @test_vector_reduce_fmaximum_v16half(<16 x half> %v) { ; GFX11-SDAG-FAKE16-NEXT: v_cndmask_b32_e32 v0, 0x7e00, v2, vcc_lo ; GFX11-SDAG-FAKE16-NEXT: s_setpc_b64 s[30:31] ; +; GFX1170-SDAG-TRUE16-LABEL: test_vector_reduce_fmaximum_v16half: +; GFX1170-SDAG-TRUE16: ; %bb.0: ; %entry +; GFX1170-SDAG-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1170-SDAG-TRUE16-NEXT: v_pk_maximum_f16 v3, v3, v7 +; GFX1170-SDAG-TRUE16-NEXT: v_pk_maximum_f16 v1, v1, v5 +; GFX1170-SDAG-TRUE16-NEXT: v_pk_maximum_f16 v2, v2, v6 +; GFX1170-SDAG-TRUE16-NEXT: v_pk_maximum_f16 v0, v0, v4 +; GFX1170-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2) +; GFX1170-SDAG-TRUE16-NEXT: v_pk_maximum_f16 v1, v1, v3 +; GFX1170-SDAG-TRUE16-NEXT: v_pk_maximum_f16 v0, v0, v2 +; GFX1170-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX1170-SDAG-TRUE16-NEXT: v_pk_maximum_f16 v0, v0, v1 +; GFX1170-SDAG-TRUE16-NEXT: v_maximum_f16 v0.l, v0.l, v0.h +; GFX1170-SDAG-TRUE16-NEXT: s_setpc_b64 s[30:31] +; +; GFX1170-SDAG-FAKE16-LABEL: test_vector_reduce_fmaximum_v16half: +; GFX1170-SDAG-FAKE16: ; %bb.0: ; %entry +; GFX1170-SDAG-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1170-SDAG-FAKE16-NEXT: v_pk_maximum_f16 v3, v3, v7 +; GFX1170-SDAG-FAKE16-NEXT: v_pk_maximum_f16 v1, v1, v5 +; GFX1170-SDAG-FAKE16-NEXT: v_pk_maximum_f16 v2, v2, v6 +; GFX1170-SDAG-FAKE16-NEXT: v_pk_maximum_f16 v0, v0, v4 +; GFX1170-SDAG-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2) +; GFX1170-SDAG-FAKE16-NEXT: v_pk_maximum_f16 v1, v1, v3 +; GFX1170-SDAG-FAKE16-NEXT: v_pk_maximum_f16 v0, v0, v2 +; GFX1170-SDAG-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX1170-SDAG-FAKE16-NEXT: v_pk_maximum_f16 v0, v0, v1 +; GFX1170-SDAG-FAKE16-NEXT: v_lshrrev_b32_e32 v1, 16, v0 +; GFX1170-SDAG-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX1170-SDAG-FAKE16-NEXT: v_maximum_f16 v0, v0, v1 +; GFX1170-SDAG-FAKE16-NEXT: s_setpc_b64 s[30:31] +; ; GFX12-SDAG-TRUE16-LABEL: test_vector_reduce_fmaximum_v16half: ; GFX12-SDAG-TRUE16: ; %bb.0: ; %entry ; GFX12-SDAG-TRUE16-NEXT: s_wait_loadcnt_dscnt 0x0 @@ -1092,6 +1200,12 @@ define float @test_vector_reduce_fmaximum_v2float(<2 x float> %v) { ; GFX11-NEXT: v_cndmask_b32_e32 v0, 0x7fc00000, v2, vcc_lo ; GFX11-NEXT: s_setpc_b64 s[30:31] ; +; GFX1170-LABEL: test_vector_reduce_fmaximum_v2float: +; GFX1170: ; %bb.0: ; %entry +; GFX1170-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1170-NEXT: v_maximum_f32 v0, v0, v1 +; GFX1170-NEXT: s_setpc_b64 s[30:31] +; ; GFX12-LABEL: test_vector_reduce_fmaximum_v2float: ; GFX12: ; %bb.0: ; %entry ; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 @@ -1169,6 +1283,12 @@ define float @test_vector_reduce_fmaximum_v3float(<3 x float> %v) { ; GFX11-NEXT: v_cndmask_b32_e32 v0, 0x7fc00000, v1, vcc_lo ; GFX11-NEXT: s_setpc_b64 s[30:31] ; +; GFX1170-LABEL: test_vector_reduce_fmaximum_v3float: +; GFX1170: ; %bb.0: ; %entry +; GFX1170-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1170-NEXT: v_maximum3_f32 v0, v0, v1, v2 +; GFX1170-NEXT: s_setpc_b64 s[30:31] +; ; GFX12-LABEL: test_vector_reduce_fmaximum_v3float: ; GFX12: ; %bb.0: ; %entry ; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 @@ -1263,6 +1383,14 @@ define float @test_vector_reduce_fmaximum_v4float(<4 x float> %v) { ; GFX11-NEXT: v_cndmask_b32_e32 v0, 0x7fc00000, v1, vcc_lo ; GFX11-NEXT: s_setpc_b64 s[30:31] ; +; GFX1170-LABEL: test_vector_reduce_fmaximum_v4float: +; GFX1170: ; %bb.0: ; %entry +; GFX1170-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1170-NEXT: v_maximum_f32 v0, v0, v1 +; GFX1170-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX1170-NEXT: v_maximum3_f32 v0, v0, v2, v3 +; GFX1170-NEXT: s_setpc_b64 s[30:31] +; ; GFX12-LABEL: test_vector_reduce_fmaximum_v4float: ; GFX12: ; %bb.0: ; %entry ; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 @@ -1427,6 +1555,17 @@ define float @test_vector_reduce_fmaximum_v8float(<8 x float> %v) { ; GFX11-NEXT: v_cndmask_b32_e32 v0, 0x7fc00000, v1, vcc_lo ; GFX11-NEXT: s_setpc_b64 s[30:31] ; +; GFX1170-LABEL: test_vector_reduce_fmaximum_v8float: +; GFX1170: ; %bb.0: ; %entry +; GFX1170-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1170-NEXT: v_maximum_f32 v0, v0, v1 +; GFX1170-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX1170-NEXT: v_maximum3_f32 v0, v0, v2, v3 +; GFX1170-NEXT: v_maximum3_f32 v0, v0, v4, v5 +; GFX1170-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX1170-NEXT: v_maximum3_f32 v0, v0, v6, v7 +; GFX1170-NEXT: s_setpc_b64 s[30:31] +; ; GFX12-LABEL: test_vector_reduce_fmaximum_v8float: ; GFX12: ; %bb.0: ; %entry ; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 @@ -1730,6 +1869,23 @@ define float @test_vector_reduce_fmaximum_v16float(<16 x float> %v) { ; GFX11-NEXT: v_cndmask_b32_e32 v0, 0x7fc00000, v1, vcc_lo ; GFX11-NEXT: s_setpc_b64 s[30:31] ; +; GFX1170-LABEL: test_vector_reduce_fmaximum_v16float: +; GFX1170: ; %bb.0: ; %entry +; GFX1170-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1170-NEXT: v_maximum_f32 v0, v0, v1 +; GFX1170-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX1170-NEXT: v_maximum3_f32 v0, v0, v2, v3 +; GFX1170-NEXT: v_maximum3_f32 v0, v0, v4, v5 +; GFX1170-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX1170-NEXT: v_maximum3_f32 v0, v0, v6, v7 +; GFX1170-NEXT: v_maximum3_f32 v0, v0, v8, v9 +; GFX1170-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX1170-NEXT: v_maximum3_f32 v0, v0, v10, v11 +; GFX1170-NEXT: v_maximum3_f32 v0, v0, v12, v13 +; GFX1170-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX1170-NEXT: v_maximum3_f32 v0, v0, v14, v15 +; GFX1170-NEXT: s_setpc_b64 s[30:31] +; ; GFX12-LABEL: test_vector_reduce_fmaximum_v16float: ; GFX12: ; %bb.0: ; %entry ; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 @@ -1807,6 +1963,12 @@ define double @test_vector_reduce_fmaximum_v2double(<2 x double> %v) { ; GFX11-NEXT: v_cndmask_b32_e64 v1, v5, 0x7ff80000, vcc_lo ; GFX11-NEXT: s_setpc_b64 s[30:31] ; +; GFX1170-LABEL: test_vector_reduce_fmaximum_v2double: +; GFX1170: ; %bb.0: ; %entry +; GFX1170-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1170-NEXT: v_maximum_f64 v[0:1], v[0:1], v[2:3] +; GFX1170-NEXT: s_setpc_b64 s[30:31] +; ; GFX12-LABEL: test_vector_reduce_fmaximum_v2double: ; GFX12: ; %bb.0: ; %entry ; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 @@ -1895,6 +2057,14 @@ define double @test_vector_reduce_fmaximum_v3double(<3 x double> %v) { ; GFX11-NEXT: v_cndmask_b32_e64 v1, v3, 0x7ff80000, vcc_lo ; GFX11-NEXT: s_setpc_b64 s[30:31] ; +; GFX1170-LABEL: test_vector_reduce_fmaximum_v3double: +; GFX1170: ; %bb.0: ; %entry +; GFX1170-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1170-NEXT: v_maximum_f64 v[0:1], v[0:1], v[2:3] +; GFX1170-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX1170-NEXT: v_maximum_f64 v[0:1], v[0:1], v[4:5] +; GFX1170-NEXT: s_setpc_b64 s[30:31] +; ; GFX12-LABEL: test_vector_reduce_fmaximum_v3double: ; GFX12: ; %bb.0: ; %entry ; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 @@ -2007,6 +2177,15 @@ define double @test_vector_reduce_fmaximum_v4double(<4 x double> %v) { ; GFX11-NEXT: v_cndmask_b32_e64 v1, v3, 0x7ff80000, vcc_lo ; GFX11-NEXT: s_setpc_b64 s[30:31] ; +; GFX1170-LABEL: test_vector_reduce_fmaximum_v4double: +; GFX1170: ; %bb.0: ; %entry +; GFX1170-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1170-NEXT: v_maximum_f64 v[0:1], v[0:1], v[2:3] +; GFX1170-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX1170-NEXT: v_maximum_f64 v[0:1], v[0:1], v[4:5] +; GFX1170-NEXT: v_maximum_f64 v[0:1], v[0:1], v[6:7] +; GFX1170-NEXT: s_setpc_b64 s[30:31] +; ; GFX12-LABEL: test_vector_reduce_fmaximum_v4double: ; GFX12: ; %bb.0: ; %entry ; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 @@ -2210,6 +2389,21 @@ define double @test_vector_reduce_fmaximum_v8double(<8 x double> %v) { ; GFX11-NEXT: v_cndmask_b32_e64 v1, v3, 0x7ff80000, vcc_lo ; GFX11-NEXT: s_setpc_b64 s[30:31] ; +; GFX1170-LABEL: test_vector_reduce_fmaximum_v8double: +; GFX1170: ; %bb.0: ; %entry +; GFX1170-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1170-NEXT: v_maximum_f64 v[0:1], v[0:1], v[2:3] +; GFX1170-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX1170-NEXT: v_maximum_f64 v[0:1], v[0:1], v[4:5] +; GFX1170-NEXT: v_maximum_f64 v[0:1], v[0:1], v[6:7] +; GFX1170-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX1170-NEXT: v_maximum_f64 v[0:1], v[0:1], v[8:9] +; GFX1170-NEXT: v_maximum_f64 v[0:1], v[0:1], v[10:11] +; GFX1170-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX1170-NEXT: v_maximum_f64 v[0:1], v[0:1], v[12:13] +; GFX1170-NEXT: v_maximum_f64 v[0:1], v[0:1], v[14:15] +; GFX1170-NEXT: s_setpc_b64 s[30:31] +; ; GFX12-LABEL: test_vector_reduce_fmaximum_v8double: ; GFX12: ; %bb.0: ; %entry ; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 @@ -2609,6 +2803,35 @@ define double @test_vector_reduce_fmaximum_v16double(<16 x double> %v) { ; GFX11-NEXT: v_cndmask_b32_e64 v1, v3, 0x7ff80000, vcc_lo ; GFX11-NEXT: s_setpc_b64 s[30:31] ; +; GFX1170-LABEL: test_vector_reduce_fmaximum_v16double: +; GFX1170: ; %bb.0: ; %entry +; GFX1170-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1170-NEXT: v_maximum_f64 v[0:1], v[0:1], v[2:3] +; GFX1170-NEXT: scratch_load_b32 v31, off, s32 +; GFX1170-NEXT: v_maximum_f64 v[0:1], v[0:1], v[4:5] +; GFX1170-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX1170-NEXT: v_maximum_f64 v[0:1], v[0:1], v[6:7] +; GFX1170-NEXT: v_maximum_f64 v[0:1], v[0:1], v[8:9] +; GFX1170-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX1170-NEXT: v_maximum_f64 v[0:1], v[0:1], v[10:11] +; GFX1170-NEXT: v_maximum_f64 v[0:1], v[0:1], v[12:13] +; GFX1170-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX1170-NEXT: v_maximum_f64 v[0:1], v[0:1], v[14:15] +; GFX1170-NEXT: v_maximum_f64 v[0:1], v[0:1], v[16:17] +; GFX1170-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX1170-NEXT: v_maximum_f64 v[0:1], v[0:1], v[18:19] +; GFX1170-NEXT: v_maximum_f64 v[0:1], v[0:1], v[20:21] +; GFX1170-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX1170-NEXT: v_maximum_f64 v[0:1], v[0:1], v[22:23] +; GFX1170-NEXT: v_maximum_f64 v[0:1], v[0:1], v[24:25] +; GFX1170-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX1170-NEXT: v_maximum_f64 v[0:1], v[0:1], v[26:27] +; GFX1170-NEXT: v_maximum_f64 v[0:1], v[0:1], v[28:29] +; GFX1170-NEXT: s_waitcnt vmcnt(0) +; GFX1170-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX1170-NEXT: v_maximum_f64 v[0:1], v[0:1], v[30:31] +; GFX1170-NEXT: s_setpc_b64 s[30:31] +; ; GFX12-LABEL: test_vector_reduce_fmaximum_v16double: ; GFX12: ; %bb.0: ; %entry ; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 @@ -2664,6 +2887,7 @@ declare double @llvm.vector.reduce.fmaximum.v16double(<16 x double>) ;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line: ; GFX10-SDAG: {{.*}} ; GFX11-SDAG: {{.*}} +; GFX1170-SDAG: {{.*}} ; GFX12-SDAG: {{.*}} ; GFX7-SDAG: {{.*}} ; GFX8-SDAG: {{.*}} diff --git a/llvm/test/CodeGen/AMDGPU/vector-reduce-fmin.ll b/llvm/test/CodeGen/AMDGPU/vector-reduce-fmin.ll index 9b26912e659e..efb210d6f07f 100644 --- a/llvm/test/CodeGen/AMDGPU/vector-reduce-fmin.ll +++ b/llvm/test/CodeGen/AMDGPU/vector-reduce-fmin.ll @@ -11,6 +11,10 @@ ; RUN: llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx1100 -mattr=-real-true16 < %s | FileCheck -check-prefixes=GFX11,GFX11-SDAG,GFX11-SDAG-FAKE16 %s ; RUN: llc -global-isel=1 -new-reg-bank-select -mtriple=amdgcn -mcpu=gfx1100 -mattr=+real-true16 < %s | FileCheck -check-prefixes=GFX11,GFX11-GISEL,GFX11-GISEL-TRUE16 %s ; RUN: llc -global-isel=1 -new-reg-bank-select -mtriple=amdgcn -mcpu=gfx1100 -mattr=-real-true16 < %s | FileCheck -check-prefixes=GFX11,GFX11-GISEL,GFX11-GISEL-FAKE16 %s +; RUN: llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx1170 -mattr=+real-true16 < %s | FileCheck -check-prefixes=GFX1170,GFX1170-SDAG,GFX1170-SDAG-TRUE16 %s +; RUN: llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx1170 -mattr=-real-true16 < %s | FileCheck -check-prefixes=GFX1170,GFX1170-SDAG,GFX1170-SDAG-FAKE16 %s +; RUN: llc -global-isel=1 -new-reg-bank-select -mtriple=amdgcn -mcpu=gfx1170 -mattr=+real-true16 < %s | FileCheck -check-prefixes=GFX1170,GFX1170-GISEL,GFX1170-GISEL-TRUE16 %s +; RUN: llc -global-isel=1 -new-reg-bank-select -mtriple=amdgcn -mcpu=gfx1170 -mattr=-real-true16 < %s | FileCheck -check-prefixes=GFX1170,GFX1170-GISEL,GFX1170-GISEL-FAKE16 %s ; RUN: llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx1200 -mattr=+real-true16 < %s | FileCheck -check-prefixes=GFX12,GFX12-SDAG,GFX12-SDAG-TRUE16 %s ; RUN: llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx1200 -mattr=-real-true16 < %s | FileCheck -check-prefixes=GFX12,GFX12-SDAG,GFX12-SDAG-FAKE16 %s ; RUN: llc -global-isel=1 -new-reg-bank-select -mtriple=amdgcn -mcpu=gfx1200 -mattr=+real-true16 < %s | FileCheck -check-prefixes=GFX12,GFX12-GISEL,GFX12-GISEL-TRUE16 %s @@ -123,6 +127,44 @@ define half @test_vector_reduce_fmin_v2half(<2 x half> %v) { ; GFX11-GISEL-FAKE16-NEXT: v_min_f16_e32 v0, v0, v1 ; GFX11-GISEL-FAKE16-NEXT: s_setpc_b64 s[30:31] ; +; GFX1170-SDAG-TRUE16-LABEL: test_vector_reduce_fmin_v2half: +; GFX1170-SDAG-TRUE16: ; %bb.0: ; %entry +; GFX1170-SDAG-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1170-SDAG-TRUE16-NEXT: v_max_num_f16_e32 v0.h, v0.h, v0.h +; GFX1170-SDAG-TRUE16-NEXT: v_max_num_f16_e32 v0.l, v0.l, v0.l +; GFX1170-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX1170-SDAG-TRUE16-NEXT: v_min_num_f16_e32 v0.l, v0.l, v0.h +; GFX1170-SDAG-TRUE16-NEXT: s_setpc_b64 s[30:31] +; +; GFX1170-SDAG-FAKE16-LABEL: test_vector_reduce_fmin_v2half: +; GFX1170-SDAG-FAKE16: ; %bb.0: ; %entry +; GFX1170-SDAG-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1170-SDAG-FAKE16-NEXT: v_lshrrev_b32_e32 v1, 16, v0 +; GFX1170-SDAG-FAKE16-NEXT: v_max_num_f16_e32 v0, v0, v0 +; GFX1170-SDAG-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX1170-SDAG-FAKE16-NEXT: v_max_num_f16_e32 v1, v1, v1 +; GFX1170-SDAG-FAKE16-NEXT: v_min_num_f16_e32 v0, v0, v1 +; GFX1170-SDAG-FAKE16-NEXT: s_setpc_b64 s[30:31] +; +; GFX1170-GISEL-TRUE16-LABEL: test_vector_reduce_fmin_v2half: +; GFX1170-GISEL-TRUE16: ; %bb.0: ; %entry +; GFX1170-GISEL-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1170-GISEL-TRUE16-NEXT: v_max_num_f16_e32 v0.l, v0.l, v0.l +; GFX1170-GISEL-TRUE16-NEXT: v_max_num_f16_e32 v0.h, v0.h, v0.h +; GFX1170-GISEL-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX1170-GISEL-TRUE16-NEXT: v_min_num_f16_e32 v0.l, v0.l, v0.h +; GFX1170-GISEL-TRUE16-NEXT: s_setpc_b64 s[30:31] +; +; GFX1170-GISEL-FAKE16-LABEL: test_vector_reduce_fmin_v2half: +; GFX1170-GISEL-FAKE16: ; %bb.0: ; %entry +; GFX1170-GISEL-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1170-GISEL-FAKE16-NEXT: v_lshrrev_b32_e32 v1, 16, v0 +; GFX1170-GISEL-FAKE16-NEXT: v_max_num_f16_e32 v0, v0, v0 +; GFX1170-GISEL-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX1170-GISEL-FAKE16-NEXT: v_max_num_f16_e32 v1, v1, v1 +; GFX1170-GISEL-FAKE16-NEXT: v_min_num_f16_e32 v0, v0, v1 +; GFX1170-GISEL-FAKE16-NEXT: s_setpc_b64 s[30:31] +; ; GFX12-SDAG-TRUE16-LABEL: test_vector_reduce_fmin_v2half: ; GFX12-SDAG-TRUE16: ; %bb.0: ; %entry ; GFX12-SDAG-TRUE16-NEXT: s_wait_loadcnt_dscnt 0x0 @@ -307,6 +349,54 @@ define half @test_vector_reduce_fmin_v3half(<3 x half> %v) { ; GFX11-GISEL-FAKE16-NEXT: v_min_f16_e32 v0, v0, v1 ; GFX11-GISEL-FAKE16-NEXT: s_setpc_b64 s[30:31] ; +; GFX1170-SDAG-TRUE16-LABEL: test_vector_reduce_fmin_v3half: +; GFX1170-SDAG-TRUE16: ; %bb.0: ; %entry +; GFX1170-SDAG-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1170-SDAG-TRUE16-NEXT: v_mov_b16_e32 v1.h, 0x7e00 +; GFX1170-SDAG-TRUE16-NEXT: v_max_num_f16_e32 v1.l, v1.l, v1.l +; GFX1170-SDAG-TRUE16-NEXT: v_pk_max_num_f16 v0, v0, v0 +; GFX1170-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX1170-SDAG-TRUE16-NEXT: v_pk_min_num_f16 v0, v0, v1 +; GFX1170-SDAG-TRUE16-NEXT: v_min_num_f16_e32 v0.l, v0.l, v0.h +; GFX1170-SDAG-TRUE16-NEXT: s_setpc_b64 s[30:31] +; +; GFX1170-SDAG-FAKE16-LABEL: test_vector_reduce_fmin_v3half: +; GFX1170-SDAG-FAKE16: ; %bb.0: ; %entry +; GFX1170-SDAG-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1170-SDAG-FAKE16-NEXT: v_max_num_f16_e32 v1, v1, v1 +; GFX1170-SDAG-FAKE16-NEXT: v_pk_max_num_f16 v0, v0, v0 +; GFX1170-SDAG-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX1170-SDAG-FAKE16-NEXT: v_pack_b32_f16 v1, v1, 0x7e00 +; GFX1170-SDAG-FAKE16-NEXT: v_pk_min_num_f16 v0, v0, v1 +; GFX1170-SDAG-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX1170-SDAG-FAKE16-NEXT: v_lshrrev_b32_e32 v1, 16, v0 +; GFX1170-SDAG-FAKE16-NEXT: v_min_num_f16_e32 v0, v0, v1 +; GFX1170-SDAG-FAKE16-NEXT: s_setpc_b64 s[30:31] +; +; GFX1170-GISEL-TRUE16-LABEL: test_vector_reduce_fmin_v3half: +; GFX1170-GISEL-TRUE16: ; %bb.0: ; %entry +; GFX1170-GISEL-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1170-GISEL-TRUE16-NEXT: v_max_num_f16_e32 v0.l, v0.l, v0.l +; GFX1170-GISEL-TRUE16-NEXT: v_max_num_f16_e32 v0.h, v0.h, v0.h +; GFX1170-GISEL-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_1) +; GFX1170-GISEL-TRUE16-NEXT: v_min_num_f16_e32 v0.l, v0.l, v0.h +; GFX1170-GISEL-TRUE16-NEXT: v_max_num_f16_e32 v0.h, v1.l, v1.l +; GFX1170-GISEL-TRUE16-NEXT: v_min_num_f16_e32 v0.l, v0.l, v0.h +; GFX1170-GISEL-TRUE16-NEXT: s_setpc_b64 s[30:31] +; +; GFX1170-GISEL-FAKE16-LABEL: test_vector_reduce_fmin_v3half: +; GFX1170-GISEL-FAKE16: ; %bb.0: ; %entry +; GFX1170-GISEL-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1170-GISEL-FAKE16-NEXT: v_lshrrev_b32_e32 v2, 16, v0 +; GFX1170-GISEL-FAKE16-NEXT: v_max_num_f16_e32 v0, v0, v0 +; GFX1170-GISEL-FAKE16-NEXT: v_max_num_f16_e32 v1, v1, v1 +; GFX1170-GISEL-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX1170-GISEL-FAKE16-NEXT: v_max_num_f16_e32 v2, v2, v2 +; GFX1170-GISEL-FAKE16-NEXT: v_min_num_f16_e32 v0, v0, v2 +; GFX1170-GISEL-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX1170-GISEL-FAKE16-NEXT: v_min_num_f16_e32 v0, v0, v1 +; GFX1170-GISEL-FAKE16-NEXT: s_setpc_b64 s[30:31] +; ; GFX12-SDAG-TRUE16-LABEL: test_vector_reduce_fmin_v3half: ; GFX12-SDAG-TRUE16: ; %bb.0: ; %entry ; GFX12-SDAG-TRUE16-NEXT: s_wait_loadcnt_dscnt 0x0 @@ -535,6 +625,59 @@ define half @test_vector_reduce_fmin_v4half(<4 x half> %v) { ; GFX11-GISEL-FAKE16-NEXT: v_min_f16_e32 v0, v0, v1 ; GFX11-GISEL-FAKE16-NEXT: s_setpc_b64 s[30:31] ; +; GFX1170-SDAG-TRUE16-LABEL: test_vector_reduce_fmin_v4half: +; GFX1170-SDAG-TRUE16: ; %bb.0: ; %entry +; GFX1170-SDAG-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1170-SDAG-TRUE16-NEXT: v_pk_max_num_f16 v1, v1, v1 +; GFX1170-SDAG-TRUE16-NEXT: v_pk_max_num_f16 v0, v0, v0 +; GFX1170-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX1170-SDAG-TRUE16-NEXT: v_pk_min_num_f16 v0, v0, v1 +; GFX1170-SDAG-TRUE16-NEXT: v_min_num_f16_e32 v0.l, v0.l, v0.h +; GFX1170-SDAG-TRUE16-NEXT: s_setpc_b64 s[30:31] +; +; GFX1170-SDAG-FAKE16-LABEL: test_vector_reduce_fmin_v4half: +; GFX1170-SDAG-FAKE16: ; %bb.0: ; %entry +; GFX1170-SDAG-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1170-SDAG-FAKE16-NEXT: v_pk_max_num_f16 v1, v1, v1 +; GFX1170-SDAG-FAKE16-NEXT: v_pk_max_num_f16 v0, v0, v0 +; GFX1170-SDAG-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX1170-SDAG-FAKE16-NEXT: v_pk_min_num_f16 v0, v0, v1 +; GFX1170-SDAG-FAKE16-NEXT: v_lshrrev_b32_e32 v1, 16, v0 +; GFX1170-SDAG-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX1170-SDAG-FAKE16-NEXT: v_min_num_f16_e32 v0, v0, v1 +; GFX1170-SDAG-FAKE16-NEXT: s_setpc_b64 s[30:31] +; +; GFX1170-GISEL-TRUE16-LABEL: test_vector_reduce_fmin_v4half: +; GFX1170-GISEL-TRUE16: ; %bb.0: ; %entry +; GFX1170-GISEL-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1170-GISEL-TRUE16-NEXT: v_max_num_f16_e32 v0.l, v0.l, v0.l +; GFX1170-GISEL-TRUE16-NEXT: v_max_num_f16_e32 v0.h, v0.h, v0.h +; GFX1170-GISEL-TRUE16-NEXT: v_max_num_f16_e32 v1.l, v1.l, v1.l +; GFX1170-GISEL-TRUE16-NEXT: v_max_num_f16_e32 v1.h, v1.h, v1.h +; GFX1170-GISEL-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2) +; GFX1170-GISEL-TRUE16-NEXT: v_min_num_f16_e32 v0.l, v0.l, v0.h +; GFX1170-GISEL-TRUE16-NEXT: v_min_num_f16_e32 v0.h, v1.l, v1.h +; GFX1170-GISEL-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX1170-GISEL-TRUE16-NEXT: v_min_num_f16_e32 v0.l, v0.l, v0.h +; GFX1170-GISEL-TRUE16-NEXT: s_setpc_b64 s[30:31] +; +; GFX1170-GISEL-FAKE16-LABEL: test_vector_reduce_fmin_v4half: +; GFX1170-GISEL-FAKE16: ; %bb.0: ; %entry +; GFX1170-GISEL-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1170-GISEL-FAKE16-NEXT: v_lshrrev_b32_e32 v2, 16, v0 +; GFX1170-GISEL-FAKE16-NEXT: v_lshrrev_b32_e32 v3, 16, v1 +; GFX1170-GISEL-FAKE16-NEXT: v_max_num_f16_e32 v0, v0, v0 +; GFX1170-GISEL-FAKE16-NEXT: v_max_num_f16_e32 v1, v1, v1 +; GFX1170-GISEL-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4) +; GFX1170-GISEL-FAKE16-NEXT: v_max_num_f16_e32 v2, v2, v2 +; GFX1170-GISEL-FAKE16-NEXT: v_max_num_f16_e32 v3, v3, v3 +; GFX1170-GISEL-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) +; GFX1170-GISEL-FAKE16-NEXT: v_min_num_f16_e32 v0, v0, v2 +; GFX1170-GISEL-FAKE16-NEXT: v_min_num_f16_e32 v1, v1, v3 +; GFX1170-GISEL-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX1170-GISEL-FAKE16-NEXT: v_min_num_f16_e32 v0, v0, v1 +; GFX1170-GISEL-FAKE16-NEXT: s_setpc_b64 s[30:31] +; ; GFX12-SDAG-TRUE16-LABEL: test_vector_reduce_fmin_v4half: ; GFX12-SDAG-TRUE16: ; %bb.0: ; %entry ; GFX12-SDAG-TRUE16-NEXT: s_wait_loadcnt_dscnt 0x0 @@ -871,6 +1014,86 @@ define half @test_vector_reduce_fmin_v8half(<8 x half> %v) { ; GFX11-GISEL-FAKE16-NEXT: v_min_f16_e32 v0, v0, v1 ; GFX11-GISEL-FAKE16-NEXT: s_setpc_b64 s[30:31] ; +; GFX1170-SDAG-TRUE16-LABEL: test_vector_reduce_fmin_v8half: +; GFX1170-SDAG-TRUE16: ; %bb.0: ; %entry +; GFX1170-SDAG-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1170-SDAG-TRUE16-NEXT: v_pk_max_num_f16 v3, v3, v3 +; GFX1170-SDAG-TRUE16-NEXT: v_pk_max_num_f16 v1, v1, v1 +; GFX1170-SDAG-TRUE16-NEXT: v_pk_max_num_f16 v2, v2, v2 +; GFX1170-SDAG-TRUE16-NEXT: v_pk_max_num_f16 v0, v0, v0 +; GFX1170-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2) +; GFX1170-SDAG-TRUE16-NEXT: v_pk_min_num_f16 v1, v1, v3 +; GFX1170-SDAG-TRUE16-NEXT: v_pk_min_num_f16 v0, v0, v2 +; GFX1170-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX1170-SDAG-TRUE16-NEXT: v_pk_min_num_f16 v0, v0, v1 +; GFX1170-SDAG-TRUE16-NEXT: v_min_num_f16_e32 v0.l, v0.l, v0.h +; GFX1170-SDAG-TRUE16-NEXT: s_setpc_b64 s[30:31] +; +; GFX1170-SDAG-FAKE16-LABEL: test_vector_reduce_fmin_v8half: +; GFX1170-SDAG-FAKE16: ; %bb.0: ; %entry +; GFX1170-SDAG-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1170-SDAG-FAKE16-NEXT: v_pk_max_num_f16 v3, v3, v3 +; GFX1170-SDAG-FAKE16-NEXT: v_pk_max_num_f16 v1, v1, v1 +; GFX1170-SDAG-FAKE16-NEXT: v_pk_max_num_f16 v2, v2, v2 +; GFX1170-SDAG-FAKE16-NEXT: v_pk_max_num_f16 v0, v0, v0 +; GFX1170-SDAG-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2) +; GFX1170-SDAG-FAKE16-NEXT: v_pk_min_num_f16 v1, v1, v3 +; GFX1170-SDAG-FAKE16-NEXT: v_pk_min_num_f16 v0, v0, v2 +; GFX1170-SDAG-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX1170-SDAG-FAKE16-NEXT: v_pk_min_num_f16 v0, v0, v1 +; GFX1170-SDAG-FAKE16-NEXT: v_lshrrev_b32_e32 v1, 16, v0 +; GFX1170-SDAG-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX1170-SDAG-FAKE16-NEXT: v_min_num_f16_e32 v0, v0, v1 +; GFX1170-SDAG-FAKE16-NEXT: s_setpc_b64 s[30:31] +; +; GFX1170-GISEL-TRUE16-LABEL: test_vector_reduce_fmin_v8half: +; GFX1170-GISEL-TRUE16: ; %bb.0: ; %entry +; GFX1170-GISEL-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1170-GISEL-TRUE16-NEXT: v_max_num_f16_e32 v0.l, v0.l, v0.l +; GFX1170-GISEL-TRUE16-NEXT: v_max_num_f16_e32 v0.h, v0.h, v0.h +; GFX1170-GISEL-TRUE16-NEXT: v_max_num_f16_e32 v1.l, v1.l, v1.l +; GFX1170-GISEL-TRUE16-NEXT: v_max_num_f16_e32 v1.h, v1.h, v1.h +; GFX1170-GISEL-TRUE16-NEXT: v_max_num_f16_e32 v2.l, v2.l, v2.l +; GFX1170-GISEL-TRUE16-NEXT: v_max_num_f16_e32 v2.h, v2.h, v2.h +; GFX1170-GISEL-TRUE16-NEXT: v_max_num_f16_e32 v3.l, v3.l, v3.l +; GFX1170-GISEL-TRUE16-NEXT: v_max_num_f16_e32 v3.h, v3.h, v3.h +; GFX1170-GISEL-TRUE16-NEXT: v_min_num_f16_e32 v0.l, v0.l, v0.h +; GFX1170-GISEL-TRUE16-NEXT: v_min_num_f16_e32 v0.h, v1.l, v1.h +; GFX1170-GISEL-TRUE16-NEXT: v_min_num_f16_e32 v1.l, v2.l, v2.h +; GFX1170-GISEL-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_3) +; GFX1170-GISEL-TRUE16-NEXT: v_min_num_f16_e32 v1.h, v3.l, v3.h +; GFX1170-GISEL-TRUE16-NEXT: v_min_num_f16_e32 v0.l, v0.l, v0.h +; GFX1170-GISEL-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX1170-GISEL-TRUE16-NEXT: v_min_num_f16_e32 v0.h, v1.l, v1.h +; GFX1170-GISEL-TRUE16-NEXT: v_min_num_f16_e32 v0.l, v0.l, v0.h +; GFX1170-GISEL-TRUE16-NEXT: s_setpc_b64 s[30:31] +; +; GFX1170-GISEL-FAKE16-LABEL: test_vector_reduce_fmin_v8half: +; GFX1170-GISEL-FAKE16: ; %bb.0: ; %entry +; GFX1170-GISEL-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1170-GISEL-FAKE16-NEXT: v_lshrrev_b32_e32 v4, 16, v0 +; GFX1170-GISEL-FAKE16-NEXT: v_lshrrev_b32_e32 v5, 16, v1 +; GFX1170-GISEL-FAKE16-NEXT: v_lshrrev_b32_e32 v6, 16, v2 +; GFX1170-GISEL-FAKE16-NEXT: v_lshrrev_b32_e32 v7, 16, v3 +; GFX1170-GISEL-FAKE16-NEXT: v_max_num_f16_e32 v0, v0, v0 +; GFX1170-GISEL-FAKE16-NEXT: v_max_num_f16_e32 v1, v1, v1 +; GFX1170-GISEL-FAKE16-NEXT: v_max_num_f16_e32 v4, v4, v4 +; GFX1170-GISEL-FAKE16-NEXT: v_max_num_f16_e32 v5, v5, v5 +; GFX1170-GISEL-FAKE16-NEXT: v_max_num_f16_e32 v2, v2, v2 +; GFX1170-GISEL-FAKE16-NEXT: v_max_num_f16_e32 v6, v6, v6 +; GFX1170-GISEL-FAKE16-NEXT: v_max_num_f16_e32 v3, v3, v3 +; GFX1170-GISEL-FAKE16-NEXT: v_max_num_f16_e32 v7, v7, v7 +; GFX1170-GISEL-FAKE16-NEXT: v_min_num_f16_e32 v0, v0, v4 +; GFX1170-GISEL-FAKE16-NEXT: v_min_num_f16_e32 v1, v1, v5 +; GFX1170-GISEL-FAKE16-NEXT: v_min_num_f16_e32 v2, v2, v6 +; GFX1170-GISEL-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_3) +; GFX1170-GISEL-FAKE16-NEXT: v_min_num_f16_e32 v3, v3, v7 +; GFX1170-GISEL-FAKE16-NEXT: v_min_num_f16_e32 v0, v0, v1 +; GFX1170-GISEL-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX1170-GISEL-FAKE16-NEXT: v_min_num_f16_e32 v1, v2, v3 +; GFX1170-GISEL-FAKE16-NEXT: v_min_num_f16_e32 v0, v0, v1 +; GFX1170-GISEL-FAKE16-NEXT: s_setpc_b64 s[30:31] +; ; GFX12-SDAG-TRUE16-LABEL: test_vector_reduce_fmin_v8half: ; GFX12-SDAG-TRUE16: ; %bb.0: ; %entry ; GFX12-SDAG-TRUE16-NEXT: s_wait_loadcnt_dscnt 0x0 @@ -1438,6 +1661,138 @@ define half @test_vector_reduce_fmin_v16half(<16 x half> %v) { ; GFX11-GISEL-FAKE16-NEXT: v_min_f16_e32 v0, v0, v1 ; GFX11-GISEL-FAKE16-NEXT: s_setpc_b64 s[30:31] ; +; GFX1170-SDAG-TRUE16-LABEL: test_vector_reduce_fmin_v16half: +; GFX1170-SDAG-TRUE16: ; %bb.0: ; %entry +; GFX1170-SDAG-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1170-SDAG-TRUE16-NEXT: v_max_num_f16_e32 v0.h, v0.h, v0.h +; GFX1170-SDAG-TRUE16-NEXT: v_max_num_f16_e32 v0.l, v0.l, v0.l +; GFX1170-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX1170-SDAG-TRUE16-NEXT: v_min_num_f16_e32 v0.l, v0.l, v0.h +; GFX1170-SDAG-TRUE16-NEXT: v_min3_num_f16 v0.l, v0.l, v1.l, v1.h +; GFX1170-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX1170-SDAG-TRUE16-NEXT: v_min3_num_f16 v0.l, v0.l, v2.l, v2.h +; GFX1170-SDAG-TRUE16-NEXT: v_min3_num_f16 v0.l, v0.l, v3.l, v3.h +; GFX1170-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX1170-SDAG-TRUE16-NEXT: v_min3_num_f16 v0.l, v0.l, v4.l, v4.h +; GFX1170-SDAG-TRUE16-NEXT: v_min3_num_f16 v0.l, v0.l, v5.l, v5.h +; GFX1170-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX1170-SDAG-TRUE16-NEXT: v_min3_num_f16 v0.l, v0.l, v6.l, v6.h +; GFX1170-SDAG-TRUE16-NEXT: v_min3_num_f16 v0.l, v0.l, v7.l, v7.h +; GFX1170-SDAG-TRUE16-NEXT: s_setpc_b64 s[30:31] +; +; GFX1170-SDAG-FAKE16-LABEL: test_vector_reduce_fmin_v16half: +; GFX1170-SDAG-FAKE16: ; %bb.0: ; %entry +; GFX1170-SDAG-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1170-SDAG-FAKE16-NEXT: v_lshrrev_b32_e32 v8, 16, v0 +; GFX1170-SDAG-FAKE16-NEXT: v_max_num_f16_e32 v0, v0, v0 +; GFX1170-SDAG-FAKE16-NEXT: v_lshrrev_b32_e32 v9, 16, v1 +; GFX1170-SDAG-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX1170-SDAG-FAKE16-NEXT: v_max_num_f16_e32 v8, v8, v8 +; GFX1170-SDAG-FAKE16-NEXT: v_min_num_f16_e32 v0, v0, v8 +; GFX1170-SDAG-FAKE16-NEXT: v_lshrrev_b32_e32 v8, 16, v2 +; GFX1170-SDAG-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_2) +; GFX1170-SDAG-FAKE16-NEXT: v_min3_num_f16 v0, v0, v1, v9 +; GFX1170-SDAG-FAKE16-NEXT: v_lshrrev_b32_e32 v1, 16, v3 +; GFX1170-SDAG-FAKE16-NEXT: v_min3_num_f16 v0, v0, v2, v8 +; GFX1170-SDAG-FAKE16-NEXT: v_lshrrev_b32_e32 v2, 16, v4 +; GFX1170-SDAG-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_2) +; GFX1170-SDAG-FAKE16-NEXT: v_min3_num_f16 v0, v0, v3, v1 +; GFX1170-SDAG-FAKE16-NEXT: v_lshrrev_b32_e32 v1, 16, v5 +; GFX1170-SDAG-FAKE16-NEXT: v_min3_num_f16 v0, v0, v4, v2 +; GFX1170-SDAG-FAKE16-NEXT: v_lshrrev_b32_e32 v2, 16, v6 +; GFX1170-SDAG-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_2) +; GFX1170-SDAG-FAKE16-NEXT: v_min3_num_f16 v0, v0, v5, v1 +; GFX1170-SDAG-FAKE16-NEXT: v_lshrrev_b32_e32 v1, 16, v7 +; GFX1170-SDAG-FAKE16-NEXT: v_min3_num_f16 v0, v0, v6, v2 +; GFX1170-SDAG-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX1170-SDAG-FAKE16-NEXT: v_min3_num_f16 v0, v0, v7, v1 +; GFX1170-SDAG-FAKE16-NEXT: s_setpc_b64 s[30:31] +; +; GFX1170-GISEL-TRUE16-LABEL: test_vector_reduce_fmin_v16half: +; GFX1170-GISEL-TRUE16: ; %bb.0: ; %entry +; GFX1170-GISEL-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1170-GISEL-TRUE16-NEXT: v_max_num_f16_e32 v0.l, v0.l, v0.l +; GFX1170-GISEL-TRUE16-NEXT: v_max_num_f16_e32 v0.h, v0.h, v0.h +; GFX1170-GISEL-TRUE16-NEXT: v_max_num_f16_e32 v1.l, v1.l, v1.l +; GFX1170-GISEL-TRUE16-NEXT: v_max_num_f16_e32 v1.h, v1.h, v1.h +; GFX1170-GISEL-TRUE16-NEXT: v_max_num_f16_e32 v2.l, v2.l, v2.l +; GFX1170-GISEL-TRUE16-NEXT: v_max_num_f16_e32 v2.h, v2.h, v2.h +; GFX1170-GISEL-TRUE16-NEXT: v_min_num_f16_e32 v0.l, v0.l, v0.h +; GFX1170-GISEL-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(SKIP_1) | instid1(VALU_DEP_4) +; GFX1170-GISEL-TRUE16-NEXT: v_min_num_f16_e32 v0.h, v1.l, v1.h +; GFX1170-GISEL-TRUE16-NEXT: v_max_num_f16_e32 v1.h, v3.l, v3.l +; GFX1170-GISEL-TRUE16-NEXT: v_min_num_f16_e32 v1.l, v2.l, v2.h +; GFX1170-GISEL-TRUE16-NEXT: v_max_num_f16_e32 v2.l, v3.h, v3.h +; GFX1170-GISEL-TRUE16-NEXT: v_max_num_f16_e32 v2.h, v4.l, v4.l +; GFX1170-GISEL-TRUE16-NEXT: v_max_num_f16_e32 v3.l, v4.h, v4.h +; GFX1170-GISEL-TRUE16-NEXT: v_max_num_f16_e32 v3.h, v5.l, v5.l +; GFX1170-GISEL-TRUE16-NEXT: v_max_num_f16_e32 v4.l, v5.h, v5.h +; GFX1170-GISEL-TRUE16-NEXT: v_max_num_f16_e32 v4.h, v6.l, v6.l +; GFX1170-GISEL-TRUE16-NEXT: v_max_num_f16_e32 v5.l, v6.h, v6.h +; GFX1170-GISEL-TRUE16-NEXT: v_max_num_f16_e32 v5.h, v7.l, v7.l +; GFX1170-GISEL-TRUE16-NEXT: v_max_num_f16_e32 v6.l, v7.h, v7.h +; GFX1170-GISEL-TRUE16-NEXT: v_min_num_f16_e32 v1.h, v1.h, v2.l +; GFX1170-GISEL-TRUE16-NEXT: v_min_num_f16_e32 v2.l, v2.h, v3.l +; GFX1170-GISEL-TRUE16-NEXT: v_min_num_f16_e32 v2.h, v3.h, v4.l +; GFX1170-GISEL-TRUE16-NEXT: v_min_num_f16_e32 v3.l, v4.h, v5.l +; GFX1170-GISEL-TRUE16-NEXT: v_min_num_f16_e32 v3.h, v5.h, v6.l +; GFX1170-GISEL-TRUE16-NEXT: v_min_num_f16_e32 v0.l, v0.l, v0.h +; GFX1170-GISEL-TRUE16-NEXT: v_min_num_f16_e32 v0.h, v1.l, v1.h +; GFX1170-GISEL-TRUE16-NEXT: v_min_num_f16_e32 v1.l, v2.l, v2.h +; GFX1170-GISEL-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_3) +; GFX1170-GISEL-TRUE16-NEXT: v_min_num_f16_e32 v1.h, v3.l, v3.h +; GFX1170-GISEL-TRUE16-NEXT: v_min_num_f16_e32 v0.l, v0.l, v0.h +; GFX1170-GISEL-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX1170-GISEL-TRUE16-NEXT: v_min_num_f16_e32 v0.h, v1.l, v1.h +; GFX1170-GISEL-TRUE16-NEXT: v_min_num_f16_e32 v0.l, v0.l, v0.h +; GFX1170-GISEL-TRUE16-NEXT: s_setpc_b64 s[30:31] +; +; GFX1170-GISEL-FAKE16-LABEL: test_vector_reduce_fmin_v16half: +; GFX1170-GISEL-FAKE16: ; %bb.0: ; %entry +; GFX1170-GISEL-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1170-GISEL-FAKE16-NEXT: v_lshrrev_b32_e32 v8, 16, v0 +; GFX1170-GISEL-FAKE16-NEXT: v_lshrrev_b32_e32 v9, 16, v1 +; GFX1170-GISEL-FAKE16-NEXT: v_lshrrev_b32_e32 v10, 16, v2 +; GFX1170-GISEL-FAKE16-NEXT: v_lshrrev_b32_e32 v11, 16, v3 +; GFX1170-GISEL-FAKE16-NEXT: v_lshrrev_b32_e32 v12, 16, v4 +; GFX1170-GISEL-FAKE16-NEXT: v_lshrrev_b32_e32 v13, 16, v5 +; GFX1170-GISEL-FAKE16-NEXT: v_max_num_f16_e32 v0, v0, v0 +; GFX1170-GISEL-FAKE16-NEXT: v_max_num_f16_e32 v8, v8, v8 +; GFX1170-GISEL-FAKE16-NEXT: v_max_num_f16_e32 v1, v1, v1 +; GFX1170-GISEL-FAKE16-NEXT: v_max_num_f16_e32 v9, v9, v9 +; GFX1170-GISEL-FAKE16-NEXT: v_max_num_f16_e32 v2, v2, v2 +; GFX1170-GISEL-FAKE16-NEXT: v_max_num_f16_e32 v10, v10, v10 +; GFX1170-GISEL-FAKE16-NEXT: v_lshrrev_b32_e32 v14, 16, v6 +; GFX1170-GISEL-FAKE16-NEXT: v_lshrrev_b32_e32 v15, 16, v7 +; GFX1170-GISEL-FAKE16-NEXT: v_min_num_f16_e32 v0, v0, v8 +; GFX1170-GISEL-FAKE16-NEXT: v_min_num_f16_e32 v1, v1, v9 +; GFX1170-GISEL-FAKE16-NEXT: v_min_num_f16_e32 v2, v2, v10 +; GFX1170-GISEL-FAKE16-NEXT: v_max_num_f16_e32 v3, v3, v3 +; GFX1170-GISEL-FAKE16-NEXT: v_max_num_f16_e32 v8, v11, v11 +; GFX1170-GISEL-FAKE16-NEXT: v_max_num_f16_e32 v4, v4, v4 +; GFX1170-GISEL-FAKE16-NEXT: v_max_num_f16_e32 v9, v12, v12 +; GFX1170-GISEL-FAKE16-NEXT: v_max_num_f16_e32 v5, v5, v5 +; GFX1170-GISEL-FAKE16-NEXT: v_max_num_f16_e32 v10, v13, v13 +; GFX1170-GISEL-FAKE16-NEXT: v_max_num_f16_e32 v6, v6, v6 +; GFX1170-GISEL-FAKE16-NEXT: v_max_num_f16_e32 v11, v14, v14 +; GFX1170-GISEL-FAKE16-NEXT: v_max_num_f16_e32 v7, v7, v7 +; GFX1170-GISEL-FAKE16-NEXT: v_max_num_f16_e32 v12, v15, v15 +; GFX1170-GISEL-FAKE16-NEXT: v_min_num_f16_e32 v3, v3, v8 +; GFX1170-GISEL-FAKE16-NEXT: v_min_num_f16_e32 v4, v4, v9 +; GFX1170-GISEL-FAKE16-NEXT: v_min_num_f16_e32 v5, v5, v10 +; GFX1170-GISEL-FAKE16-NEXT: v_min_num_f16_e32 v6, v6, v11 +; GFX1170-GISEL-FAKE16-NEXT: v_min_num_f16_e32 v7, v7, v12 +; GFX1170-GISEL-FAKE16-NEXT: v_min_num_f16_e32 v0, v0, v1 +; GFX1170-GISEL-FAKE16-NEXT: v_min_num_f16_e32 v1, v2, v3 +; GFX1170-GISEL-FAKE16-NEXT: v_min_num_f16_e32 v2, v4, v5 +; GFX1170-GISEL-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_3) +; GFX1170-GISEL-FAKE16-NEXT: v_min_num_f16_e32 v3, v6, v7 +; GFX1170-GISEL-FAKE16-NEXT: v_min_num_f16_e32 v0, v0, v1 +; GFX1170-GISEL-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX1170-GISEL-FAKE16-NEXT: v_min_num_f16_e32 v1, v2, v3 +; GFX1170-GISEL-FAKE16-NEXT: v_min_num_f16_e32 v0, v0, v1 +; GFX1170-GISEL-FAKE16-NEXT: s_setpc_b64 s[30:31] +; ; GFX12-SDAG-TRUE16-LABEL: test_vector_reduce_fmin_v16half: ; GFX12-SDAG-TRUE16: ; %bb.0: ; %entry ; GFX12-SDAG-TRUE16-NEXT: s_wait_loadcnt_dscnt 0x0 @@ -1671,6 +2026,22 @@ define float @test_vector_reduce_fmin_v2float(<2 x float> %v) { ; GFX11-GISEL-NEXT: v_min_f32_e32 v0, v0, v1 ; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31] ; +; GFX1170-SDAG-LABEL: test_vector_reduce_fmin_v2float: +; GFX1170-SDAG: ; %bb.0: ; %entry +; GFX1170-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1170-SDAG-NEXT: v_dual_max_num_f32 v1, v1, v1 :: v_dual_max_num_f32 v0, v0, v0 +; GFX1170-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX1170-SDAG-NEXT: v_min_num_f32_e32 v0, v0, v1 +; GFX1170-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX1170-GISEL-LABEL: test_vector_reduce_fmin_v2float: +; GFX1170-GISEL: ; %bb.0: ; %entry +; GFX1170-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1170-GISEL-NEXT: v_dual_max_num_f32 v0, v0, v0 :: v_dual_max_num_f32 v1, v1, v1 +; GFX1170-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX1170-GISEL-NEXT: v_min_num_f32_e32 v0, v0, v1 +; GFX1170-GISEL-NEXT: s_setpc_b64 s[30:31] +; ; GFX12-SDAG-LABEL: test_vector_reduce_fmin_v2float: ; GFX12-SDAG: ; %bb.0: ; %entry ; GFX12-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0 @@ -1779,6 +2150,21 @@ define float @test_vector_reduce_fmin_v3float(<3 x float> %v) { ; GFX11-GISEL-NEXT: v_min_f32_e32 v0, v0, v1 ; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31] ; +; GFX1170-SDAG-LABEL: test_vector_reduce_fmin_v3float: +; GFX1170-SDAG: ; %bb.0: ; %entry +; GFX1170-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1170-SDAG-NEXT: v_min3_num_f32 v0, v0, v1, v2 +; GFX1170-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX1170-GISEL-LABEL: test_vector_reduce_fmin_v3float: +; GFX1170-GISEL: ; %bb.0: ; %entry +; GFX1170-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1170-GISEL-NEXT: v_dual_max_num_f32 v0, v0, v0 :: v_dual_max_num_f32 v1, v1, v1 +; GFX1170-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX1170-GISEL-NEXT: v_dual_min_num_f32 v0, v0, v1 :: v_dual_max_num_f32 v1, v2, v2 +; GFX1170-GISEL-NEXT: v_min_num_f32_e32 v0, v0, v1 +; GFX1170-GISEL-NEXT: s_setpc_b64 s[30:31] +; ; GFX12-SDAG-LABEL: test_vector_reduce_fmin_v3float: ; GFX12-SDAG: ; %bb.0: ; %entry ; GFX12-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0 @@ -1910,6 +2296,25 @@ define float @test_vector_reduce_fmin_v4float(<4 x float> %v) { ; GFX11-GISEL-NEXT: v_min_f32_e32 v0, v0, v1 ; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31] ; +; GFX1170-SDAG-LABEL: test_vector_reduce_fmin_v4float: +; GFX1170-SDAG: ; %bb.0: ; %entry +; GFX1170-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1170-SDAG-NEXT: v_dual_max_num_f32 v1, v1, v1 :: v_dual_max_num_f32 v0, v0, v0 +; GFX1170-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX1170-SDAG-NEXT: v_min_num_f32_e32 v0, v0, v1 +; GFX1170-SDAG-NEXT: v_min3_num_f32 v0, v0, v2, v3 +; GFX1170-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX1170-GISEL-LABEL: test_vector_reduce_fmin_v4float: +; GFX1170-GISEL: ; %bb.0: ; %entry +; GFX1170-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1170-GISEL-NEXT: v_dual_max_num_f32 v0, v0, v0 :: v_dual_max_num_f32 v1, v1, v1 +; GFX1170-GISEL-NEXT: v_dual_max_num_f32 v2, v2, v2 :: v_dual_max_num_f32 v3, v3, v3 +; GFX1170-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX1170-GISEL-NEXT: v_dual_min_num_f32 v0, v0, v1 :: v_dual_min_num_f32 v1, v2, v3 +; GFX1170-GISEL-NEXT: v_min_num_f32_e32 v0, v0, v1 +; GFX1170-GISEL-NEXT: s_setpc_b64 s[30:31] +; ; GFX12-SDAG-LABEL: test_vector_reduce_fmin_v4float: ; GFX12-SDAG: ; %bb.0: ; %entry ; GFX12-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0 @@ -2093,6 +2498,33 @@ define float @test_vector_reduce_fmin_v8float(<8 x float> %v) { ; GFX11-GISEL-NEXT: v_min_f32_e32 v0, v0, v1 ; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31] ; +; GFX1170-SDAG-LABEL: test_vector_reduce_fmin_v8float: +; GFX1170-SDAG: ; %bb.0: ; %entry +; GFX1170-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1170-SDAG-NEXT: v_dual_max_num_f32 v1, v1, v1 :: v_dual_max_num_f32 v0, v0, v0 +; GFX1170-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX1170-SDAG-NEXT: v_min_num_f32_e32 v0, v0, v1 +; GFX1170-SDAG-NEXT: v_min3_num_f32 v0, v0, v2, v3 +; GFX1170-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX1170-SDAG-NEXT: v_min3_num_f32 v0, v0, v4, v5 +; GFX1170-SDAG-NEXT: v_min3_num_f32 v0, v0, v6, v7 +; GFX1170-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX1170-GISEL-LABEL: test_vector_reduce_fmin_v8float: +; GFX1170-GISEL: ; %bb.0: ; %entry +; GFX1170-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1170-GISEL-NEXT: v_dual_max_num_f32 v0, v0, v0 :: v_dual_max_num_f32 v1, v1, v1 +; GFX1170-GISEL-NEXT: v_dual_max_num_f32 v2, v2, v2 :: v_dual_max_num_f32 v3, v3, v3 +; GFX1170-GISEL-NEXT: v_dual_max_num_f32 v4, v4, v4 :: v_dual_max_num_f32 v5, v5, v5 +; GFX1170-GISEL-NEXT: v_dual_max_num_f32 v6, v6, v6 :: v_dual_max_num_f32 v7, v7, v7 +; GFX1170-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2) +; GFX1170-GISEL-NEXT: v_dual_min_num_f32 v0, v0, v1 :: v_dual_min_num_f32 v1, v2, v3 +; GFX1170-GISEL-NEXT: v_dual_min_num_f32 v2, v4, v5 :: v_dual_min_num_f32 v3, v6, v7 +; GFX1170-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX1170-GISEL-NEXT: v_dual_min_num_f32 v0, v0, v1 :: v_dual_min_num_f32 v1, v2, v3 +; GFX1170-GISEL-NEXT: v_min_num_f32_e32 v0, v0, v1 +; GFX1170-GISEL-NEXT: s_setpc_b64 s[30:31] +; ; GFX12-SDAG-LABEL: test_vector_reduce_fmin_v8float: ; GFX12-SDAG: ; %bb.0: ; %entry ; GFX12-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0 @@ -2382,6 +2814,51 @@ define float @test_vector_reduce_fmin_v16float(<16 x float> %v) { ; GFX11-GISEL-NEXT: v_min_f32_e32 v0, v0, v1 ; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31] ; +; GFX1170-SDAG-LABEL: test_vector_reduce_fmin_v16float: +; GFX1170-SDAG: ; %bb.0: ; %entry +; GFX1170-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1170-SDAG-NEXT: v_dual_max_num_f32 v1, v1, v1 :: v_dual_max_num_f32 v0, v0, v0 +; GFX1170-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX1170-SDAG-NEXT: v_min_num_f32_e32 v0, v0, v1 +; GFX1170-SDAG-NEXT: v_min3_num_f32 v0, v0, v2, v3 +; GFX1170-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX1170-SDAG-NEXT: v_min3_num_f32 v0, v0, v4, v5 +; GFX1170-SDAG-NEXT: v_min3_num_f32 v0, v0, v6, v7 +; GFX1170-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX1170-SDAG-NEXT: v_min3_num_f32 v0, v0, v8, v9 +; GFX1170-SDAG-NEXT: v_min3_num_f32 v0, v0, v10, v11 +; GFX1170-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX1170-SDAG-NEXT: v_min3_num_f32 v0, v0, v12, v13 +; GFX1170-SDAG-NEXT: v_min3_num_f32 v0, v0, v14, v15 +; GFX1170-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX1170-GISEL-LABEL: test_vector_reduce_fmin_v16float: +; GFX1170-GISEL: ; %bb.0: ; %entry +; GFX1170-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1170-GISEL-NEXT: v_dual_max_num_f32 v0, v0, v0 :: v_dual_max_num_f32 v1, v1, v1 +; GFX1170-GISEL-NEXT: v_dual_max_num_f32 v2, v2, v2 :: v_dual_max_num_f32 v3, v3, v3 +; GFX1170-GISEL-NEXT: v_dual_max_num_f32 v4, v4, v4 :: v_dual_max_num_f32 v5, v5, v5 +; GFX1170-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) +; GFX1170-GISEL-NEXT: v_dual_min_num_f32 v0, v0, v1 :: v_dual_min_num_f32 v1, v2, v3 +; GFX1170-GISEL-NEXT: v_dual_max_num_f32 v3, v6, v6 :: v_dual_min_num_f32 v2, v4, v5 +; GFX1170-GISEL-NEXT: v_dual_max_num_f32 v4, v7, v7 :: v_dual_max_num_f32 v5, v8, v8 +; GFX1170-GISEL-NEXT: v_dual_max_num_f32 v6, v9, v9 :: v_dual_max_num_f32 v7, v10, v10 +; GFX1170-GISEL-NEXT: v_dual_max_num_f32 v8, v11, v11 :: v_dual_max_num_f32 v9, v12, v12 +; GFX1170-GISEL-NEXT: v_dual_max_num_f32 v10, v13, v13 :: v_dual_max_num_f32 v11, v14, v14 +; GFX1170-GISEL-NEXT: v_max_num_f32_e32 v12, v15, v15 +; GFX1170-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_3) +; GFX1170-GISEL-NEXT: v_dual_min_num_f32 v3, v3, v4 :: v_dual_min_num_f32 v4, v5, v6 +; GFX1170-GISEL-NEXT: v_dual_min_num_f32 v5, v7, v8 :: v_dual_min_num_f32 v6, v9, v10 +; GFX1170-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2) +; GFX1170-GISEL-NEXT: v_dual_min_num_f32 v7, v11, v12 :: v_dual_min_num_f32 v0, v0, v1 +; GFX1170-GISEL-NEXT: v_dual_min_num_f32 v1, v2, v3 :: v_dual_min_num_f32 v2, v4, v5 +; GFX1170-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX1170-GISEL-NEXT: v_dual_min_num_f32 v3, v6, v7 :: v_dual_min_num_f32 v0, v0, v1 +; GFX1170-GISEL-NEXT: v_min_num_f32_e32 v1, v2, v3 +; GFX1170-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX1170-GISEL-NEXT: v_min_num_f32_e32 v0, v0, v1 +; GFX1170-GISEL-NEXT: s_setpc_b64 s[30:31] +; ; GFX12-SDAG-LABEL: test_vector_reduce_fmin_v16float: ; GFX12-SDAG: ; %bb.0: ; %entry ; GFX12-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0 @@ -2522,6 +2999,24 @@ define double @test_vector_reduce_fmin_v2double(<2 x double> %v) { ; GFX11-GISEL-NEXT: v_min_f64 v[0:1], v[0:1], v[2:3] ; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31] ; +; GFX1170-SDAG-LABEL: test_vector_reduce_fmin_v2double: +; GFX1170-SDAG: ; %bb.0: ; %entry +; GFX1170-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1170-SDAG-NEXT: v_max_num_f64 v[2:3], v[2:3], v[2:3] +; GFX1170-SDAG-NEXT: v_max_num_f64 v[0:1], v[0:1], v[0:1] +; GFX1170-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX1170-SDAG-NEXT: v_min_num_f64 v[0:1], v[0:1], v[2:3] +; GFX1170-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX1170-GISEL-LABEL: test_vector_reduce_fmin_v2double: +; GFX1170-GISEL: ; %bb.0: ; %entry +; GFX1170-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1170-GISEL-NEXT: v_max_num_f64 v[0:1], v[0:1], v[0:1] +; GFX1170-GISEL-NEXT: v_max_num_f64 v[2:3], v[2:3], v[2:3] +; GFX1170-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX1170-GISEL-NEXT: v_min_num_f64 v[0:1], v[0:1], v[2:3] +; GFX1170-GISEL-NEXT: s_setpc_b64 s[30:31] +; ; GFX12-SDAG-LABEL: test_vector_reduce_fmin_v2double: ; GFX12-SDAG: ; %bb.0: ; %entry ; GFX12-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0 @@ -2655,6 +3150,28 @@ define double @test_vector_reduce_fmin_v3double(<3 x double> %v) { ; GFX11-GISEL-NEXT: v_min_f64 v[0:1], v[0:1], v[4:5] ; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31] ; +; GFX1170-SDAG-LABEL: test_vector_reduce_fmin_v3double: +; GFX1170-SDAG: ; %bb.0: ; %entry +; GFX1170-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1170-SDAG-NEXT: v_max_num_f64 v[2:3], v[2:3], v[2:3] +; GFX1170-SDAG-NEXT: v_max_num_f64 v[0:1], v[0:1], v[0:1] +; GFX1170-SDAG-NEXT: v_max_num_f64 v[4:5], v[4:5], v[4:5] +; GFX1170-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX1170-SDAG-NEXT: v_min_num_f64 v[0:1], v[0:1], v[2:3] +; GFX1170-SDAG-NEXT: v_min_num_f64 v[0:1], v[0:1], v[4:5] +; GFX1170-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX1170-GISEL-LABEL: test_vector_reduce_fmin_v3double: +; GFX1170-GISEL: ; %bb.0: ; %entry +; GFX1170-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1170-GISEL-NEXT: v_max_num_f64 v[0:1], v[0:1], v[0:1] +; GFX1170-GISEL-NEXT: v_max_num_f64 v[2:3], v[2:3], v[2:3] +; GFX1170-GISEL-NEXT: v_max_num_f64 v[4:5], v[4:5], v[4:5] +; GFX1170-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX1170-GISEL-NEXT: v_min_num_f64 v[0:1], v[0:1], v[2:3] +; GFX1170-GISEL-NEXT: v_min_num_f64 v[0:1], v[0:1], v[4:5] +; GFX1170-GISEL-NEXT: s_setpc_b64 s[30:31] +; ; GFX12-SDAG-LABEL: test_vector_reduce_fmin_v3double: ; GFX12-SDAG: ; %bb.0: ; %entry ; GFX12-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0 @@ -2814,6 +3331,34 @@ define double @test_vector_reduce_fmin_v4double(<4 x double> %v) { ; GFX11-GISEL-NEXT: v_min_f64 v[0:1], v[0:1], v[2:3] ; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31] ; +; GFX1170-SDAG-LABEL: test_vector_reduce_fmin_v4double: +; GFX1170-SDAG: ; %bb.0: ; %entry +; GFX1170-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1170-SDAG-NEXT: v_max_num_f64 v[2:3], v[2:3], v[2:3] +; GFX1170-SDAG-NEXT: v_max_num_f64 v[0:1], v[0:1], v[0:1] +; GFX1170-SDAG-NEXT: v_max_num_f64 v[4:5], v[4:5], v[4:5] +; GFX1170-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_2) +; GFX1170-SDAG-NEXT: v_min_num_f64 v[0:1], v[0:1], v[2:3] +; GFX1170-SDAG-NEXT: v_max_num_f64 v[2:3], v[6:7], v[6:7] +; GFX1170-SDAG-NEXT: v_min_num_f64 v[0:1], v[0:1], v[4:5] +; GFX1170-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX1170-SDAG-NEXT: v_min_num_f64 v[0:1], v[0:1], v[2:3] +; GFX1170-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX1170-GISEL-LABEL: test_vector_reduce_fmin_v4double: +; GFX1170-GISEL: ; %bb.0: ; %entry +; GFX1170-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1170-GISEL-NEXT: v_max_num_f64 v[0:1], v[0:1], v[0:1] +; GFX1170-GISEL-NEXT: v_max_num_f64 v[2:3], v[2:3], v[2:3] +; GFX1170-GISEL-NEXT: v_max_num_f64 v[4:5], v[4:5], v[4:5] +; GFX1170-GISEL-NEXT: v_max_num_f64 v[6:7], v[6:7], v[6:7] +; GFX1170-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2) +; GFX1170-GISEL-NEXT: v_min_num_f64 v[0:1], v[0:1], v[2:3] +; GFX1170-GISEL-NEXT: v_min_num_f64 v[2:3], v[4:5], v[6:7] +; GFX1170-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX1170-GISEL-NEXT: v_min_num_f64 v[0:1], v[0:1], v[2:3] +; GFX1170-GISEL-NEXT: s_setpc_b64 s[30:31] +; ; GFX12-SDAG-LABEL: test_vector_reduce_fmin_v4double: ; GFX12-SDAG: ; %bb.0: ; %entry ; GFX12-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0 @@ -3061,6 +3606,52 @@ define double @test_vector_reduce_fmin_v8double(<8 x double> %v) { ; GFX11-GISEL-NEXT: v_min_f64 v[0:1], v[0:1], v[2:3] ; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31] ; +; GFX1170-SDAG-LABEL: test_vector_reduce_fmin_v8double: +; GFX1170-SDAG: ; %bb.0: ; %entry +; GFX1170-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1170-SDAG-NEXT: v_max_num_f64 v[2:3], v[2:3], v[2:3] +; GFX1170-SDAG-NEXT: v_max_num_f64 v[0:1], v[0:1], v[0:1] +; GFX1170-SDAG-NEXT: v_max_num_f64 v[4:5], v[4:5], v[4:5] +; GFX1170-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_2) +; GFX1170-SDAG-NEXT: v_min_num_f64 v[0:1], v[0:1], v[2:3] +; GFX1170-SDAG-NEXT: v_max_num_f64 v[2:3], v[6:7], v[6:7] +; GFX1170-SDAG-NEXT: v_min_num_f64 v[0:1], v[0:1], v[4:5] +; GFX1170-SDAG-NEXT: v_max_num_f64 v[4:5], v[8:9], v[8:9] +; GFX1170-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_2) +; GFX1170-SDAG-NEXT: v_min_num_f64 v[0:1], v[0:1], v[2:3] +; GFX1170-SDAG-NEXT: v_max_num_f64 v[2:3], v[10:11], v[10:11] +; GFX1170-SDAG-NEXT: v_min_num_f64 v[0:1], v[0:1], v[4:5] +; GFX1170-SDAG-NEXT: v_max_num_f64 v[4:5], v[12:13], v[12:13] +; GFX1170-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_2) +; GFX1170-SDAG-NEXT: v_min_num_f64 v[0:1], v[0:1], v[2:3] +; GFX1170-SDAG-NEXT: v_max_num_f64 v[2:3], v[14:15], v[14:15] +; GFX1170-SDAG-NEXT: v_min_num_f64 v[0:1], v[0:1], v[4:5] +; GFX1170-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX1170-SDAG-NEXT: v_min_num_f64 v[0:1], v[0:1], v[2:3] +; GFX1170-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX1170-GISEL-LABEL: test_vector_reduce_fmin_v8double: +; GFX1170-GISEL: ; %bb.0: ; %entry +; GFX1170-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1170-GISEL-NEXT: v_max_num_f64 v[0:1], v[0:1], v[0:1] +; GFX1170-GISEL-NEXT: v_max_num_f64 v[2:3], v[2:3], v[2:3] +; GFX1170-GISEL-NEXT: v_max_num_f64 v[4:5], v[4:5], v[4:5] +; GFX1170-GISEL-NEXT: v_max_num_f64 v[6:7], v[6:7], v[6:7] +; GFX1170-GISEL-NEXT: v_max_num_f64 v[8:9], v[8:9], v[8:9] +; GFX1170-GISEL-NEXT: v_max_num_f64 v[10:11], v[10:11], v[10:11] +; GFX1170-GISEL-NEXT: v_max_num_f64 v[12:13], v[12:13], v[12:13] +; GFX1170-GISEL-NEXT: v_max_num_f64 v[14:15], v[14:15], v[14:15] +; GFX1170-GISEL-NEXT: v_min_num_f64 v[0:1], v[0:1], v[2:3] +; GFX1170-GISEL-NEXT: v_min_num_f64 v[2:3], v[4:5], v[6:7] +; GFX1170-GISEL-NEXT: v_min_num_f64 v[4:5], v[8:9], v[10:11] +; GFX1170-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_3) +; GFX1170-GISEL-NEXT: v_min_num_f64 v[6:7], v[12:13], v[14:15] +; GFX1170-GISEL-NEXT: v_min_num_f64 v[0:1], v[0:1], v[2:3] +; GFX1170-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX1170-GISEL-NEXT: v_min_num_f64 v[2:3], v[4:5], v[6:7] +; GFX1170-GISEL-NEXT: v_min_num_f64 v[0:1], v[0:1], v[2:3] +; GFX1170-GISEL-NEXT: s_setpc_b64 s[30:31] +; ; GFX12-SDAG-LABEL: test_vector_reduce_fmin_v8double: ; GFX12-SDAG: ; %bb.0: ; %entry ; GFX12-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0 @@ -3510,6 +4101,92 @@ define double @test_vector_reduce_fmin_v16double(<16 x double> %v) { ; GFX11-GISEL-NEXT: v_min_f64 v[0:1], v[0:1], v[2:3] ; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31] ; +; GFX1170-SDAG-LABEL: test_vector_reduce_fmin_v16double: +; GFX1170-SDAG: ; %bb.0: ; %entry +; GFX1170-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1170-SDAG-NEXT: v_max_num_f64 v[2:3], v[2:3], v[2:3] +; GFX1170-SDAG-NEXT: v_max_num_f64 v[0:1], v[0:1], v[0:1] +; GFX1170-SDAG-NEXT: v_max_num_f64 v[4:5], v[4:5], v[4:5] +; GFX1170-SDAG-NEXT: scratch_load_b32 v31, off, s32 +; GFX1170-SDAG-NEXT: v_min_num_f64 v[0:1], v[0:1], v[2:3] +; GFX1170-SDAG-NEXT: v_max_num_f64 v[2:3], v[6:7], v[6:7] +; GFX1170-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_2) +; GFX1170-SDAG-NEXT: v_min_num_f64 v[0:1], v[0:1], v[4:5] +; GFX1170-SDAG-NEXT: v_max_num_f64 v[4:5], v[8:9], v[8:9] +; GFX1170-SDAG-NEXT: v_min_num_f64 v[0:1], v[0:1], v[2:3] +; GFX1170-SDAG-NEXT: v_max_num_f64 v[2:3], v[10:11], v[10:11] +; GFX1170-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_2) +; GFX1170-SDAG-NEXT: v_min_num_f64 v[0:1], v[0:1], v[4:5] +; GFX1170-SDAG-NEXT: v_max_num_f64 v[4:5], v[12:13], v[12:13] +; GFX1170-SDAG-NEXT: v_min_num_f64 v[0:1], v[0:1], v[2:3] +; GFX1170-SDAG-NEXT: v_max_num_f64 v[2:3], v[14:15], v[14:15] +; GFX1170-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_2) +; GFX1170-SDAG-NEXT: v_min_num_f64 v[0:1], v[0:1], v[4:5] +; GFX1170-SDAG-NEXT: v_max_num_f64 v[4:5], v[16:17], v[16:17] +; GFX1170-SDAG-NEXT: v_min_num_f64 v[0:1], v[0:1], v[2:3] +; GFX1170-SDAG-NEXT: v_max_num_f64 v[2:3], v[18:19], v[18:19] +; GFX1170-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_2) +; GFX1170-SDAG-NEXT: v_min_num_f64 v[0:1], v[0:1], v[4:5] +; GFX1170-SDAG-NEXT: v_max_num_f64 v[4:5], v[20:21], v[20:21] +; GFX1170-SDAG-NEXT: v_min_num_f64 v[0:1], v[0:1], v[2:3] +; GFX1170-SDAG-NEXT: v_max_num_f64 v[2:3], v[22:23], v[22:23] +; GFX1170-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_2) +; GFX1170-SDAG-NEXT: v_min_num_f64 v[0:1], v[0:1], v[4:5] +; GFX1170-SDAG-NEXT: v_max_num_f64 v[4:5], v[24:25], v[24:25] +; GFX1170-SDAG-NEXT: v_min_num_f64 v[0:1], v[0:1], v[2:3] +; GFX1170-SDAG-NEXT: v_max_num_f64 v[2:3], v[26:27], v[26:27] +; GFX1170-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_2) +; GFX1170-SDAG-NEXT: v_min_num_f64 v[0:1], v[0:1], v[4:5] +; GFX1170-SDAG-NEXT: v_max_num_f64 v[4:5], v[28:29], v[28:29] +; GFX1170-SDAG-NEXT: v_min_num_f64 v[0:1], v[0:1], v[2:3] +; GFX1170-SDAG-NEXT: s_waitcnt vmcnt(0) +; GFX1170-SDAG-NEXT: v_max_num_f64 v[2:3], v[30:31], v[30:31] +; GFX1170-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX1170-SDAG-NEXT: v_min_num_f64 v[0:1], v[0:1], v[4:5] +; GFX1170-SDAG-NEXT: v_min_num_f64 v[0:1], v[0:1], v[2:3] +; GFX1170-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX1170-GISEL-LABEL: test_vector_reduce_fmin_v16double: +; GFX1170-GISEL: ; %bb.0: ; %entry +; GFX1170-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1170-GISEL-NEXT: scratch_load_b32 v31, off, s32 +; GFX1170-GISEL-NEXT: v_max_num_f64 v[0:1], v[0:1], v[0:1] +; GFX1170-GISEL-NEXT: v_max_num_f64 v[2:3], v[2:3], v[2:3] +; GFX1170-GISEL-NEXT: v_max_num_f64 v[4:5], v[4:5], v[4:5] +; GFX1170-GISEL-NEXT: v_max_num_f64 v[6:7], v[6:7], v[6:7] +; GFX1170-GISEL-NEXT: v_max_num_f64 v[8:9], v[8:9], v[8:9] +; GFX1170-GISEL-NEXT: v_max_num_f64 v[10:11], v[10:11], v[10:11] +; GFX1170-GISEL-NEXT: v_max_num_f64 v[12:13], v[12:13], v[12:13] +; GFX1170-GISEL-NEXT: v_max_num_f64 v[14:15], v[14:15], v[14:15] +; GFX1170-GISEL-NEXT: v_max_num_f64 v[16:17], v[16:17], v[16:17] +; GFX1170-GISEL-NEXT: v_max_num_f64 v[18:19], v[18:19], v[18:19] +; GFX1170-GISEL-NEXT: v_max_num_f64 v[20:21], v[20:21], v[20:21] +; GFX1170-GISEL-NEXT: v_max_num_f64 v[22:23], v[22:23], v[22:23] +; GFX1170-GISEL-NEXT: v_max_num_f64 v[24:25], v[24:25], v[24:25] +; GFX1170-GISEL-NEXT: v_max_num_f64 v[26:27], v[26:27], v[26:27] +; GFX1170-GISEL-NEXT: v_max_num_f64 v[28:29], v[28:29], v[28:29] +; GFX1170-GISEL-NEXT: v_min_num_f64 v[0:1], v[0:1], v[2:3] +; GFX1170-GISEL-NEXT: v_min_num_f64 v[2:3], v[4:5], v[6:7] +; GFX1170-GISEL-NEXT: v_min_num_f64 v[4:5], v[8:9], v[10:11] +; GFX1170-GISEL-NEXT: v_min_num_f64 v[6:7], v[12:13], v[14:15] +; GFX1170-GISEL-NEXT: v_min_num_f64 v[8:9], v[16:17], v[18:19] +; GFX1170-GISEL-NEXT: v_min_num_f64 v[10:11], v[20:21], v[22:23] +; GFX1170-GISEL-NEXT: v_min_num_f64 v[12:13], v[24:25], v[26:27] +; GFX1170-GISEL-NEXT: v_min_num_f64 v[0:1], v[0:1], v[2:3] +; GFX1170-GISEL-NEXT: v_min_num_f64 v[2:3], v[4:5], v[6:7] +; GFX1170-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_2) +; GFX1170-GISEL-NEXT: v_min_num_f64 v[4:5], v[8:9], v[10:11] +; GFX1170-GISEL-NEXT: v_min_num_f64 v[0:1], v[0:1], v[2:3] +; GFX1170-GISEL-NEXT: s_waitcnt vmcnt(0) +; GFX1170-GISEL-NEXT: v_max_num_f64 v[30:31], v[30:31], v[30:31] +; GFX1170-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX1170-GISEL-NEXT: v_min_num_f64 v[14:15], v[28:29], v[30:31] +; GFX1170-GISEL-NEXT: v_min_num_f64 v[6:7], v[12:13], v[14:15] +; GFX1170-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX1170-GISEL-NEXT: v_min_num_f64 v[2:3], v[4:5], v[6:7] +; GFX1170-GISEL-NEXT: v_min_num_f64 v[0:1], v[0:1], v[2:3] +; GFX1170-GISEL-NEXT: s_setpc_b64 s[30:31] +; ; GFX12-SDAG-LABEL: test_vector_reduce_fmin_v16double: ; GFX12-SDAG: ; %bb.0: ; %entry ; GFX12-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0 @@ -3626,6 +4303,7 @@ declare double @llvm.vector.reduce.fmin.v16double(<16 x double>) ;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line: ; GFX10: {{.*}} ; GFX11: {{.*}} +; GFX1170: {{.*}} ; GFX12: {{.*}} ; GFX8: {{.*}} ; GFX9: {{.*}} diff --git a/llvm/test/CodeGen/AMDGPU/vector-reduce-fminimum.ll b/llvm/test/CodeGen/AMDGPU/vector-reduce-fminimum.ll index deeac90a952c..f331a3336442 100644 --- a/llvm/test/CodeGen/AMDGPU/vector-reduce-fminimum.ll +++ b/llvm/test/CodeGen/AMDGPU/vector-reduce-fminimum.ll @@ -5,6 +5,10 @@ ; RUN: llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx1010 < %s | FileCheck -check-prefixes=GFX10,GFX10-SDAG %s ; RUN: llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx1100 -mattr=+real-true16 < %s | FileCheck -check-prefixes=GFX11,GFX11-SDAG,GFX11-SDAG-TRUE16 %s ; RUN: llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx1100 -mattr=-real-true16 < %s | FileCheck -check-prefixes=GFX11,GFX11-SDAG,GFX11-SDAG-FAKE16 %s +; RUN: llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx1170 -mattr=+real-true16 < %s | FileCheck -check-prefixes=GFX1170,GFX1170-SDAG,GFX1170-SDAG-TRUE16 %s +; RUN: llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx1170 -mattr=-real-true16 < %s | FileCheck -check-prefixes=GFX1170,GFX1170-SDAG,GFX1170-SDAG-FAKE16 %s +; RUN: llc -global-isel=1 -mtriple=amdgcn -mcpu=gfx1170 -mattr=+real-true16 < %s | FileCheck -check-prefixes=GFX1170,GFX1170-GISEL,GFX1170-GISEL-TRUE16 %s +; RUN: llc -global-isel=1 -mtriple=amdgcn -mcpu=gfx1170 -mattr=-real-true16 < %s | FileCheck -check-prefixes=GFX1170,GFX1170-GISEL,GFX1170-GISEL-FAKE16 %s ; RUN: llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx1200 -mattr=+real-true16 < %s | FileCheck -check-prefixes=GFX12,GFX12-SDAG,GFX12-SDAG-TRUE16 %s ; RUN: llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx1200 -mattr=-real-true16 < %s | FileCheck -check-prefixes=GFX12,GFX12-SDAG,GFX12-SDAG-FAKE16 %s ; RUN: llc -global-isel=1 -new-reg-bank-select -mtriple=amdgcn -mcpu=gfx1200 -mattr=+real-true16 < %s | FileCheck -check-prefixes=GFX12,GFX12-GISEL,GFX12-GISEL-TRUE16 %s @@ -71,6 +75,34 @@ define half @test_vector_reduce_fminimum_v2half(<2 x half> %v) { ; GFX11-SDAG-FAKE16-NEXT: v_cndmask_b32_e32 v0, 0x7e00, v2, vcc_lo ; GFX11-SDAG-FAKE16-NEXT: s_setpc_b64 s[30:31] ; +; GFX1170-SDAG-TRUE16-LABEL: test_vector_reduce_fminimum_v2half: +; GFX1170-SDAG-TRUE16: ; %bb.0: ; %entry +; GFX1170-SDAG-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1170-SDAG-TRUE16-NEXT: v_minimum_f16 v0.l, v0.l, v0.h +; GFX1170-SDAG-TRUE16-NEXT: s_setpc_b64 s[30:31] +; +; GFX1170-SDAG-FAKE16-LABEL: test_vector_reduce_fminimum_v2half: +; GFX1170-SDAG-FAKE16: ; %bb.0: ; %entry +; GFX1170-SDAG-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1170-SDAG-FAKE16-NEXT: v_lshrrev_b32_e32 v1, 16, v0 +; GFX1170-SDAG-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX1170-SDAG-FAKE16-NEXT: v_minimum_f16 v0, v0, v1 +; GFX1170-SDAG-FAKE16-NEXT: s_setpc_b64 s[30:31] +; +; GFX1170-GISEL-TRUE16-LABEL: test_vector_reduce_fminimum_v2half: +; GFX1170-GISEL-TRUE16: ; %bb.0: ; %entry +; GFX1170-GISEL-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1170-GISEL-TRUE16-NEXT: v_minimum_f16 v0.l, v0.l, v0.h +; GFX1170-GISEL-TRUE16-NEXT: s_setpc_b64 s[30:31] +; +; GFX1170-GISEL-FAKE16-LABEL: test_vector_reduce_fminimum_v2half: +; GFX1170-GISEL-FAKE16: ; %bb.0: ; %entry +; GFX1170-GISEL-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1170-GISEL-FAKE16-NEXT: v_lshrrev_b32_e32 v1, 16, v0 +; GFX1170-GISEL-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX1170-GISEL-FAKE16-NEXT: v_minimum_f16 v0, v0, v1 +; GFX1170-GISEL-FAKE16-NEXT: s_setpc_b64 s[30:31] +; ; GFX12-SDAG-TRUE16-LABEL: test_vector_reduce_fminimum_v2half: ; GFX12-SDAG-TRUE16: ; %bb.0: ; %entry ; GFX12-SDAG-TRUE16-NEXT: s_wait_loadcnt_dscnt 0x0 @@ -204,6 +236,44 @@ define half @test_vector_reduce_fminimum_v3half(<3 x half> %v) { ; GFX11-SDAG-FAKE16-NEXT: v_cndmask_b32_e32 v0, 0x7e00, v2, vcc_lo ; GFX11-SDAG-FAKE16-NEXT: s_setpc_b64 s[30:31] ; +; GFX1170-SDAG-TRUE16-LABEL: test_vector_reduce_fminimum_v3half: +; GFX1170-SDAG-TRUE16: ; %bb.0: ; %entry +; GFX1170-SDAG-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1170-SDAG-TRUE16-NEXT: v_mov_b16_e32 v1.h, 0x7c00 +; GFX1170-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX1170-SDAG-TRUE16-NEXT: v_pk_minimum_f16 v0, v0, v1 +; GFX1170-SDAG-TRUE16-NEXT: v_minimum_f16 v0.l, v0.l, v0.h +; GFX1170-SDAG-TRUE16-NEXT: s_setpc_b64 s[30:31] +; +; GFX1170-SDAG-FAKE16-LABEL: test_vector_reduce_fminimum_v3half: +; GFX1170-SDAG-FAKE16: ; %bb.0: ; %entry +; GFX1170-SDAG-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1170-SDAG-FAKE16-NEXT: s_movk_i32 s0, 0x7c00 +; GFX1170-SDAG-FAKE16-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX1170-SDAG-FAKE16-NEXT: v_perm_b32 v1, s0, v1, 0x5040100 +; GFX1170-SDAG-FAKE16-NEXT: v_pk_minimum_f16 v0, v0, v1 +; GFX1170-SDAG-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX1170-SDAG-FAKE16-NEXT: v_lshrrev_b32_e32 v1, 16, v0 +; GFX1170-SDAG-FAKE16-NEXT: v_minimum_f16 v0, v0, v1 +; GFX1170-SDAG-FAKE16-NEXT: s_setpc_b64 s[30:31] +; +; GFX1170-GISEL-TRUE16-LABEL: test_vector_reduce_fminimum_v3half: +; GFX1170-GISEL-TRUE16: ; %bb.0: ; %entry +; GFX1170-GISEL-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1170-GISEL-TRUE16-NEXT: v_minimum_f16 v0.l, v0.l, v0.h +; GFX1170-GISEL-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX1170-GISEL-TRUE16-NEXT: v_minimum_f16 v0.l, v0.l, v1.l +; GFX1170-GISEL-TRUE16-NEXT: s_setpc_b64 s[30:31] +; +; GFX1170-GISEL-FAKE16-LABEL: test_vector_reduce_fminimum_v3half: +; GFX1170-GISEL-FAKE16: ; %bb.0: ; %entry +; GFX1170-GISEL-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1170-GISEL-FAKE16-NEXT: v_lshrrev_b32_e32 v2, 16, v0 +; GFX1170-GISEL-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX1170-GISEL-FAKE16-NEXT: v_minimum_f16 v0, v0, v2 +; GFX1170-GISEL-FAKE16-NEXT: v_minimum_f16 v0, v0, v1 +; GFX1170-GISEL-FAKE16-NEXT: s_setpc_b64 s[30:31] +; ; GFX12-SDAG-TRUE16-LABEL: test_vector_reduce_fminimum_v3half: ; GFX12-SDAG-TRUE16: ; %bb.0: ; %entry ; GFX12-SDAG-TRUE16-NEXT: s_wait_loadcnt_dscnt 0x0 @@ -375,6 +445,44 @@ define half @test_vector_reduce_fminimum_v4half(<4 x half> %v) { ; GFX11-SDAG-FAKE16-NEXT: v_cndmask_b32_e32 v0, 0x7e00, v1, vcc_lo ; GFX11-SDAG-FAKE16-NEXT: s_setpc_b64 s[30:31] ; +; GFX1170-SDAG-TRUE16-LABEL: test_vector_reduce_fminimum_v4half: +; GFX1170-SDAG-TRUE16: ; %bb.0: ; %entry +; GFX1170-SDAG-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1170-SDAG-TRUE16-NEXT: v_pk_minimum_f16 v0, v0, v1 +; GFX1170-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX1170-SDAG-TRUE16-NEXT: v_minimum_f16 v0.l, v0.l, v0.h +; GFX1170-SDAG-TRUE16-NEXT: s_setpc_b64 s[30:31] +; +; GFX1170-SDAG-FAKE16-LABEL: test_vector_reduce_fminimum_v4half: +; GFX1170-SDAG-FAKE16: ; %bb.0: ; %entry +; GFX1170-SDAG-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1170-SDAG-FAKE16-NEXT: v_pk_minimum_f16 v0, v0, v1 +; GFX1170-SDAG-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX1170-SDAG-FAKE16-NEXT: v_lshrrev_b32_e32 v1, 16, v0 +; GFX1170-SDAG-FAKE16-NEXT: v_minimum_f16 v0, v0, v1 +; GFX1170-SDAG-FAKE16-NEXT: s_setpc_b64 s[30:31] +; +; GFX1170-GISEL-TRUE16-LABEL: test_vector_reduce_fminimum_v4half: +; GFX1170-GISEL-TRUE16: ; %bb.0: ; %entry +; GFX1170-GISEL-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1170-GISEL-TRUE16-NEXT: v_minimum_f16 v0.l, v0.l, v0.h +; GFX1170-GISEL-TRUE16-NEXT: v_minimum_f16 v0.h, v1.l, v1.h +; GFX1170-GISEL-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX1170-GISEL-TRUE16-NEXT: v_minimum_f16 v0.l, v0.l, v0.h +; GFX1170-GISEL-TRUE16-NEXT: s_setpc_b64 s[30:31] +; +; GFX1170-GISEL-FAKE16-LABEL: test_vector_reduce_fminimum_v4half: +; GFX1170-GISEL-FAKE16: ; %bb.0: ; %entry +; GFX1170-GISEL-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1170-GISEL-FAKE16-NEXT: v_lshrrev_b32_e32 v2, 16, v0 +; GFX1170-GISEL-FAKE16-NEXT: v_lshrrev_b32_e32 v3, 16, v1 +; GFX1170-GISEL-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) +; GFX1170-GISEL-FAKE16-NEXT: v_minimum_f16 v0, v0, v2 +; GFX1170-GISEL-FAKE16-NEXT: v_minimum_f16 v1, v1, v3 +; GFX1170-GISEL-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX1170-GISEL-FAKE16-NEXT: v_minimum_f16 v0, v0, v1 +; GFX1170-GISEL-FAKE16-NEXT: s_setpc_b64 s[30:31] +; ; GFX12-SDAG-TRUE16-LABEL: test_vector_reduce_fminimum_v4half: ; GFX12-SDAG-TRUE16: ; %bb.0: ; %entry ; GFX12-SDAG-TRUE16-NEXT: s_wait_loadcnt_dscnt 0x0 @@ -638,6 +746,62 @@ define half @test_vector_reduce_fminimum_v8half(<8 x half> %v) { ; GFX11-SDAG-FAKE16-NEXT: v_cndmask_b32_e32 v0, 0x7e00, v2, vcc_lo ; GFX11-SDAG-FAKE16-NEXT: s_setpc_b64 s[30:31] ; +; GFX1170-SDAG-TRUE16-LABEL: test_vector_reduce_fminimum_v8half: +; GFX1170-SDAG-TRUE16: ; %bb.0: ; %entry +; GFX1170-SDAG-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1170-SDAG-TRUE16-NEXT: v_pk_minimum_f16 v1, v1, v3 +; GFX1170-SDAG-TRUE16-NEXT: v_pk_minimum_f16 v0, v0, v2 +; GFX1170-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX1170-SDAG-TRUE16-NEXT: v_pk_minimum_f16 v0, v0, v1 +; GFX1170-SDAG-TRUE16-NEXT: v_minimum_f16 v0.l, v0.l, v0.h +; GFX1170-SDAG-TRUE16-NEXT: s_setpc_b64 s[30:31] +; +; GFX1170-SDAG-FAKE16-LABEL: test_vector_reduce_fminimum_v8half: +; GFX1170-SDAG-FAKE16: ; %bb.0: ; %entry +; GFX1170-SDAG-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1170-SDAG-FAKE16-NEXT: v_pk_minimum_f16 v1, v1, v3 +; GFX1170-SDAG-FAKE16-NEXT: v_pk_minimum_f16 v0, v0, v2 +; GFX1170-SDAG-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX1170-SDAG-FAKE16-NEXT: v_pk_minimum_f16 v0, v0, v1 +; GFX1170-SDAG-FAKE16-NEXT: v_lshrrev_b32_e32 v1, 16, v0 +; GFX1170-SDAG-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX1170-SDAG-FAKE16-NEXT: v_minimum_f16 v0, v0, v1 +; GFX1170-SDAG-FAKE16-NEXT: s_setpc_b64 s[30:31] +; +; GFX1170-GISEL-TRUE16-LABEL: test_vector_reduce_fminimum_v8half: +; GFX1170-GISEL-TRUE16: ; %bb.0: ; %entry +; GFX1170-GISEL-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1170-GISEL-TRUE16-NEXT: v_minimum_f16 v0.l, v0.l, v0.h +; GFX1170-GISEL-TRUE16-NEXT: v_minimum_f16 v0.h, v1.l, v1.h +; GFX1170-GISEL-TRUE16-NEXT: v_minimum_f16 v1.l, v2.l, v2.h +; GFX1170-GISEL-TRUE16-NEXT: v_minimum_f16 v1.h, v3.l, v3.h +; GFX1170-GISEL-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2) +; GFX1170-GISEL-TRUE16-NEXT: v_minimum_f16 v0.l, v0.l, v0.h +; GFX1170-GISEL-TRUE16-NEXT: v_minimum_f16 v0.h, v1.l, v1.h +; GFX1170-GISEL-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX1170-GISEL-TRUE16-NEXT: v_minimum_f16 v0.l, v0.l, v0.h +; GFX1170-GISEL-TRUE16-NEXT: s_setpc_b64 s[30:31] +; +; GFX1170-GISEL-FAKE16-LABEL: test_vector_reduce_fminimum_v8half: +; GFX1170-GISEL-FAKE16: ; %bb.0: ; %entry +; GFX1170-GISEL-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1170-GISEL-FAKE16-NEXT: v_lshrrev_b32_e32 v4, 16, v0 +; GFX1170-GISEL-FAKE16-NEXT: v_lshrrev_b32_e32 v5, 16, v1 +; GFX1170-GISEL-FAKE16-NEXT: v_lshrrev_b32_e32 v6, 16, v2 +; GFX1170-GISEL-FAKE16-NEXT: v_lshrrev_b32_e32 v7, 16, v3 +; GFX1170-GISEL-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4) +; GFX1170-GISEL-FAKE16-NEXT: v_minimum_f16 v0, v0, v4 +; GFX1170-GISEL-FAKE16-NEXT: v_minimum_f16 v1, v1, v5 +; GFX1170-GISEL-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4) +; GFX1170-GISEL-FAKE16-NEXT: v_minimum_f16 v2, v2, v6 +; GFX1170-GISEL-FAKE16-NEXT: v_minimum_f16 v3, v3, v7 +; GFX1170-GISEL-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2) +; GFX1170-GISEL-FAKE16-NEXT: v_minimum_f16 v0, v0, v1 +; GFX1170-GISEL-FAKE16-NEXT: v_minimum_f16 v1, v2, v3 +; GFX1170-GISEL-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX1170-GISEL-FAKE16-NEXT: v_minimum_f16 v0, v0, v1 +; GFX1170-GISEL-FAKE16-NEXT: s_setpc_b64 s[30:31] +; ; GFX12-SDAG-TRUE16-LABEL: test_vector_reduce_fminimum_v8half: ; GFX12-SDAG-TRUE16: ; %bb.0: ; %entry ; GFX12-SDAG-TRUE16-NEXT: s_wait_loadcnt_dscnt 0x0 @@ -1122,6 +1286,90 @@ define half @test_vector_reduce_fminimum_v16half(<16 x half> %v) { ; GFX11-SDAG-FAKE16-NEXT: v_cndmask_b32_e32 v0, 0x7e00, v2, vcc_lo ; GFX11-SDAG-FAKE16-NEXT: s_setpc_b64 s[30:31] ; +; GFX1170-SDAG-TRUE16-LABEL: test_vector_reduce_fminimum_v16half: +; GFX1170-SDAG-TRUE16: ; %bb.0: ; %entry +; GFX1170-SDAG-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1170-SDAG-TRUE16-NEXT: v_pk_minimum_f16 v3, v3, v7 +; GFX1170-SDAG-TRUE16-NEXT: v_pk_minimum_f16 v1, v1, v5 +; GFX1170-SDAG-TRUE16-NEXT: v_pk_minimum_f16 v2, v2, v6 +; GFX1170-SDAG-TRUE16-NEXT: v_pk_minimum_f16 v0, v0, v4 +; GFX1170-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2) +; GFX1170-SDAG-TRUE16-NEXT: v_pk_minimum_f16 v1, v1, v3 +; GFX1170-SDAG-TRUE16-NEXT: v_pk_minimum_f16 v0, v0, v2 +; GFX1170-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX1170-SDAG-TRUE16-NEXT: v_pk_minimum_f16 v0, v0, v1 +; GFX1170-SDAG-TRUE16-NEXT: v_minimum_f16 v0.l, v0.l, v0.h +; GFX1170-SDAG-TRUE16-NEXT: s_setpc_b64 s[30:31] +; +; GFX1170-SDAG-FAKE16-LABEL: test_vector_reduce_fminimum_v16half: +; GFX1170-SDAG-FAKE16: ; %bb.0: ; %entry +; GFX1170-SDAG-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1170-SDAG-FAKE16-NEXT: v_pk_minimum_f16 v3, v3, v7 +; GFX1170-SDAG-FAKE16-NEXT: v_pk_minimum_f16 v1, v1, v5 +; GFX1170-SDAG-FAKE16-NEXT: v_pk_minimum_f16 v2, v2, v6 +; GFX1170-SDAG-FAKE16-NEXT: v_pk_minimum_f16 v0, v0, v4 +; GFX1170-SDAG-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2) +; GFX1170-SDAG-FAKE16-NEXT: v_pk_minimum_f16 v1, v1, v3 +; GFX1170-SDAG-FAKE16-NEXT: v_pk_minimum_f16 v0, v0, v2 +; GFX1170-SDAG-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX1170-SDAG-FAKE16-NEXT: v_pk_minimum_f16 v0, v0, v1 +; GFX1170-SDAG-FAKE16-NEXT: v_lshrrev_b32_e32 v1, 16, v0 +; GFX1170-SDAG-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX1170-SDAG-FAKE16-NEXT: v_minimum_f16 v0, v0, v1 +; GFX1170-SDAG-FAKE16-NEXT: s_setpc_b64 s[30:31] +; +; GFX1170-GISEL-TRUE16-LABEL: test_vector_reduce_fminimum_v16half: +; GFX1170-GISEL-TRUE16: ; %bb.0: ; %entry +; GFX1170-GISEL-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1170-GISEL-TRUE16-NEXT: v_minimum_f16 v0.l, v0.l, v0.h +; GFX1170-GISEL-TRUE16-NEXT: v_minimum_f16 v0.h, v1.l, v1.h +; GFX1170-GISEL-TRUE16-NEXT: v_minimum_f16 v1.l, v2.l, v2.h +; GFX1170-GISEL-TRUE16-NEXT: v_minimum_f16 v1.h, v3.l, v3.h +; GFX1170-GISEL-TRUE16-NEXT: v_minimum_f16 v2.l, v4.l, v4.h +; GFX1170-GISEL-TRUE16-NEXT: v_minimum_f16 v2.h, v5.l, v5.h +; GFX1170-GISEL-TRUE16-NEXT: v_minimum_f16 v3.l, v6.l, v6.h +; GFX1170-GISEL-TRUE16-NEXT: v_minimum_f16 v3.h, v7.l, v7.h +; GFX1170-GISEL-TRUE16-NEXT: v_minimum_f16 v0.l, v0.l, v0.h +; GFX1170-GISEL-TRUE16-NEXT: v_minimum_f16 v0.h, v1.l, v1.h +; GFX1170-GISEL-TRUE16-NEXT: v_minimum_f16 v1.l, v2.l, v2.h +; GFX1170-GISEL-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_3) +; GFX1170-GISEL-TRUE16-NEXT: v_minimum_f16 v1.h, v3.l, v3.h +; GFX1170-GISEL-TRUE16-NEXT: v_minimum_f16 v0.l, v0.l, v0.h +; GFX1170-GISEL-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX1170-GISEL-TRUE16-NEXT: v_minimum_f16 v0.h, v1.l, v1.h +; GFX1170-GISEL-TRUE16-NEXT: v_minimum_f16 v0.l, v0.l, v0.h +; GFX1170-GISEL-TRUE16-NEXT: s_setpc_b64 s[30:31] +; +; GFX1170-GISEL-FAKE16-LABEL: test_vector_reduce_fminimum_v16half: +; GFX1170-GISEL-FAKE16: ; %bb.0: ; %entry +; GFX1170-GISEL-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1170-GISEL-FAKE16-NEXT: v_lshrrev_b32_e32 v8, 16, v0 +; GFX1170-GISEL-FAKE16-NEXT: v_lshrrev_b32_e32 v9, 16, v1 +; GFX1170-GISEL-FAKE16-NEXT: v_lshrrev_b32_e32 v10, 16, v2 +; GFX1170-GISEL-FAKE16-NEXT: v_lshrrev_b32_e32 v11, 16, v3 +; GFX1170-GISEL-FAKE16-NEXT: v_lshrrev_b32_e32 v12, 16, v4 +; GFX1170-GISEL-FAKE16-NEXT: v_lshrrev_b32_e32 v13, 16, v5 +; GFX1170-GISEL-FAKE16-NEXT: v_lshrrev_b32_e32 v14, 16, v6 +; GFX1170-GISEL-FAKE16-NEXT: v_lshrrev_b32_e32 v15, 16, v7 +; GFX1170-GISEL-FAKE16-NEXT: v_minimum_f16 v0, v0, v8 +; GFX1170-GISEL-FAKE16-NEXT: v_minimum_f16 v1, v1, v9 +; GFX1170-GISEL-FAKE16-NEXT: v_minimum_f16 v2, v2, v10 +; GFX1170-GISEL-FAKE16-NEXT: v_minimum_f16 v3, v3, v11 +; GFX1170-GISEL-FAKE16-NEXT: v_minimum_f16 v4, v4, v12 +; GFX1170-GISEL-FAKE16-NEXT: v_minimum_f16 v5, v5, v13 +; GFX1170-GISEL-FAKE16-NEXT: v_minimum_f16 v6, v6, v14 +; GFX1170-GISEL-FAKE16-NEXT: v_minimum_f16 v7, v7, v15 +; GFX1170-GISEL-FAKE16-NEXT: v_minimum_f16 v0, v0, v1 +; GFX1170-GISEL-FAKE16-NEXT: v_minimum_f16 v1, v2, v3 +; GFX1170-GISEL-FAKE16-NEXT: v_minimum_f16 v2, v4, v5 +; GFX1170-GISEL-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_3) +; GFX1170-GISEL-FAKE16-NEXT: v_minimum_f16 v3, v6, v7 +; GFX1170-GISEL-FAKE16-NEXT: v_minimum_f16 v0, v0, v1 +; GFX1170-GISEL-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX1170-GISEL-FAKE16-NEXT: v_minimum_f16 v1, v2, v3 +; GFX1170-GISEL-FAKE16-NEXT: v_minimum_f16 v0, v0, v1 +; GFX1170-GISEL-FAKE16-NEXT: s_setpc_b64 s[30:31] +; ; GFX12-SDAG-TRUE16-LABEL: test_vector_reduce_fminimum_v16half: ; GFX12-SDAG-TRUE16: ; %bb.0: ; %entry ; GFX12-SDAG-TRUE16-NEXT: s_wait_loadcnt_dscnt 0x0 @@ -1272,6 +1520,12 @@ define float @test_vector_reduce_fminimum_v2float(<2 x float> %v) { ; GFX11-NEXT: v_cndmask_b32_e32 v0, 0x7fc00000, v2, vcc_lo ; GFX11-NEXT: s_setpc_b64 s[30:31] ; +; GFX1170-LABEL: test_vector_reduce_fminimum_v2float: +; GFX1170: ; %bb.0: ; %entry +; GFX1170-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1170-NEXT: v_minimum_f32 v0, v0, v1 +; GFX1170-NEXT: s_setpc_b64 s[30:31] +; ; GFX12-LABEL: test_vector_reduce_fminimum_v2float: ; GFX12: ; %bb.0: ; %entry ; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 @@ -1349,6 +1603,20 @@ define float @test_vector_reduce_fminimum_v3float(<3 x float> %v) { ; GFX11-NEXT: v_cndmask_b32_e32 v0, 0x7fc00000, v1, vcc_lo ; GFX11-NEXT: s_setpc_b64 s[30:31] ; +; GFX1170-SDAG-LABEL: test_vector_reduce_fminimum_v3float: +; GFX1170-SDAG: ; %bb.0: ; %entry +; GFX1170-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1170-SDAG-NEXT: v_minimum3_f32 v0, v0, v1, v2 +; GFX1170-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX1170-GISEL-LABEL: test_vector_reduce_fminimum_v3float: +; GFX1170-GISEL: ; %bb.0: ; %entry +; GFX1170-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1170-GISEL-NEXT: v_minimum_f32 v0, v0, v1 +; GFX1170-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX1170-GISEL-NEXT: v_minimum_f32 v0, v0, v2 +; GFX1170-GISEL-NEXT: s_setpc_b64 s[30:31] +; ; GFX12-SDAG-LABEL: test_vector_reduce_fminimum_v3float: ; GFX12-SDAG: ; %bb.0: ; %entry ; GFX12-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0 @@ -1455,6 +1723,23 @@ define float @test_vector_reduce_fminimum_v4float(<4 x float> %v) { ; GFX11-NEXT: v_cndmask_b32_e32 v0, 0x7fc00000, v1, vcc_lo ; GFX11-NEXT: s_setpc_b64 s[30:31] ; +; GFX1170-SDAG-LABEL: test_vector_reduce_fminimum_v4float: +; GFX1170-SDAG: ; %bb.0: ; %entry +; GFX1170-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1170-SDAG-NEXT: v_minimum_f32 v0, v0, v1 +; GFX1170-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX1170-SDAG-NEXT: v_minimum3_f32 v0, v0, v2, v3 +; GFX1170-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX1170-GISEL-LABEL: test_vector_reduce_fminimum_v4float: +; GFX1170-GISEL: ; %bb.0: ; %entry +; GFX1170-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1170-GISEL-NEXT: v_minimum_f32 v0, v0, v1 +; GFX1170-GISEL-NEXT: v_minimum_f32 v1, v2, v3 +; GFX1170-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX1170-GISEL-NEXT: v_minimum_f32 v0, v0, v1 +; GFX1170-GISEL-NEXT: s_setpc_b64 s[30:31] +; ; GFX12-SDAG-LABEL: test_vector_reduce_fminimum_v4float: ; GFX12-SDAG: ; %bb.0: ; %entry ; GFX12-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0 @@ -1632,6 +1917,31 @@ define float @test_vector_reduce_fminimum_v8float(<8 x float> %v) { ; GFX11-NEXT: v_cndmask_b32_e32 v0, 0x7fc00000, v1, vcc_lo ; GFX11-NEXT: s_setpc_b64 s[30:31] ; +; GFX1170-SDAG-LABEL: test_vector_reduce_fminimum_v8float: +; GFX1170-SDAG: ; %bb.0: ; %entry +; GFX1170-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1170-SDAG-NEXT: v_minimum_f32 v0, v0, v1 +; GFX1170-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX1170-SDAG-NEXT: v_minimum3_f32 v0, v0, v2, v3 +; GFX1170-SDAG-NEXT: v_minimum3_f32 v0, v0, v4, v5 +; GFX1170-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX1170-SDAG-NEXT: v_minimum3_f32 v0, v0, v6, v7 +; GFX1170-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX1170-GISEL-LABEL: test_vector_reduce_fminimum_v8float: +; GFX1170-GISEL: ; %bb.0: ; %entry +; GFX1170-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1170-GISEL-NEXT: v_minimum_f32 v0, v0, v1 +; GFX1170-GISEL-NEXT: v_minimum_f32 v1, v2, v3 +; GFX1170-GISEL-NEXT: v_minimum_f32 v2, v4, v5 +; GFX1170-GISEL-NEXT: v_minimum_f32 v3, v6, v7 +; GFX1170-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2) +; GFX1170-GISEL-NEXT: v_minimum_f32 v0, v0, v1 +; GFX1170-GISEL-NEXT: v_minimum_f32 v1, v2, v3 +; GFX1170-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX1170-GISEL-NEXT: v_minimum_f32 v0, v0, v1 +; GFX1170-GISEL-NEXT: s_setpc_b64 s[30:31] +; ; GFX12-SDAG-LABEL: test_vector_reduce_fminimum_v8float: ; GFX12-SDAG: ; %bb.0: ; %entry ; GFX12-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0 @@ -1953,6 +2263,45 @@ define float @test_vector_reduce_fminimum_v16float(<16 x float> %v) { ; GFX11-NEXT: v_cndmask_b32_e32 v0, 0x7fc00000, v1, vcc_lo ; GFX11-NEXT: s_setpc_b64 s[30:31] ; +; GFX1170-SDAG-LABEL: test_vector_reduce_fminimum_v16float: +; GFX1170-SDAG: ; %bb.0: ; %entry +; GFX1170-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1170-SDAG-NEXT: v_minimum_f32 v0, v0, v1 +; GFX1170-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX1170-SDAG-NEXT: v_minimum3_f32 v0, v0, v2, v3 +; GFX1170-SDAG-NEXT: v_minimum3_f32 v0, v0, v4, v5 +; GFX1170-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX1170-SDAG-NEXT: v_minimum3_f32 v0, v0, v6, v7 +; GFX1170-SDAG-NEXT: v_minimum3_f32 v0, v0, v8, v9 +; GFX1170-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX1170-SDAG-NEXT: v_minimum3_f32 v0, v0, v10, v11 +; GFX1170-SDAG-NEXT: v_minimum3_f32 v0, v0, v12, v13 +; GFX1170-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX1170-SDAG-NEXT: v_minimum3_f32 v0, v0, v14, v15 +; GFX1170-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX1170-GISEL-LABEL: test_vector_reduce_fminimum_v16float: +; GFX1170-GISEL: ; %bb.0: ; %entry +; GFX1170-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1170-GISEL-NEXT: v_minimum_f32 v0, v0, v1 +; GFX1170-GISEL-NEXT: v_minimum_f32 v1, v2, v3 +; GFX1170-GISEL-NEXT: v_minimum_f32 v2, v4, v5 +; GFX1170-GISEL-NEXT: v_minimum_f32 v3, v6, v7 +; GFX1170-GISEL-NEXT: v_minimum_f32 v4, v8, v9 +; GFX1170-GISEL-NEXT: v_minimum_f32 v5, v10, v11 +; GFX1170-GISEL-NEXT: v_minimum_f32 v6, v12, v13 +; GFX1170-GISEL-NEXT: v_minimum_f32 v7, v14, v15 +; GFX1170-GISEL-NEXT: v_minimum_f32 v0, v0, v1 +; GFX1170-GISEL-NEXT: v_minimum_f32 v1, v2, v3 +; GFX1170-GISEL-NEXT: v_minimum_f32 v2, v4, v5 +; GFX1170-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_3) +; GFX1170-GISEL-NEXT: v_minimum_f32 v3, v6, v7 +; GFX1170-GISEL-NEXT: v_minimum_f32 v0, v0, v1 +; GFX1170-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX1170-GISEL-NEXT: v_minimum_f32 v1, v2, v3 +; GFX1170-GISEL-NEXT: v_minimum_f32 v0, v0, v1 +; GFX1170-GISEL-NEXT: s_setpc_b64 s[30:31] +; ; GFX12-SDAG-LABEL: test_vector_reduce_fminimum_v16float: ; GFX12-SDAG: ; %bb.0: ; %entry ; GFX12-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0 @@ -2056,6 +2405,12 @@ define double @test_vector_reduce_fminimum_v2double(<2 x double> %v) { ; GFX11-NEXT: v_cndmask_b32_e64 v1, v5, 0x7ff80000, vcc_lo ; GFX11-NEXT: s_setpc_b64 s[30:31] ; +; GFX1170-LABEL: test_vector_reduce_fminimum_v2double: +; GFX1170: ; %bb.0: ; %entry +; GFX1170-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1170-NEXT: v_minimum_f64 v[0:1], v[0:1], v[2:3] +; GFX1170-NEXT: s_setpc_b64 s[30:31] +; ; GFX12-LABEL: test_vector_reduce_fminimum_v2double: ; GFX12: ; %bb.0: ; %entry ; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 @@ -2144,6 +2499,14 @@ define double @test_vector_reduce_fminimum_v3double(<3 x double> %v) { ; GFX11-NEXT: v_cndmask_b32_e64 v1, v3, 0x7ff80000, vcc_lo ; GFX11-NEXT: s_setpc_b64 s[30:31] ; +; GFX1170-LABEL: test_vector_reduce_fminimum_v3double: +; GFX1170: ; %bb.0: ; %entry +; GFX1170-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1170-NEXT: v_minimum_f64 v[0:1], v[0:1], v[2:3] +; GFX1170-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX1170-NEXT: v_minimum_f64 v[0:1], v[0:1], v[4:5] +; GFX1170-NEXT: s_setpc_b64 s[30:31] +; ; GFX12-LABEL: test_vector_reduce_fminimum_v3double: ; GFX12: ; %bb.0: ; %entry ; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 @@ -2256,6 +2619,24 @@ define double @test_vector_reduce_fminimum_v4double(<4 x double> %v) { ; GFX11-NEXT: v_cndmask_b32_e64 v1, v3, 0x7ff80000, vcc_lo ; GFX11-NEXT: s_setpc_b64 s[30:31] ; +; GFX1170-SDAG-LABEL: test_vector_reduce_fminimum_v4double: +; GFX1170-SDAG: ; %bb.0: ; %entry +; GFX1170-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1170-SDAG-NEXT: v_minimum_f64 v[0:1], v[0:1], v[2:3] +; GFX1170-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX1170-SDAG-NEXT: v_minimum_f64 v[0:1], v[0:1], v[4:5] +; GFX1170-SDAG-NEXT: v_minimum_f64 v[0:1], v[0:1], v[6:7] +; GFX1170-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX1170-GISEL-LABEL: test_vector_reduce_fminimum_v4double: +; GFX1170-GISEL: ; %bb.0: ; %entry +; GFX1170-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1170-GISEL-NEXT: v_minimum_f64 v[0:1], v[0:1], v[2:3] +; GFX1170-GISEL-NEXT: v_minimum_f64 v[2:3], v[4:5], v[6:7] +; GFX1170-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX1170-GISEL-NEXT: v_minimum_f64 v[0:1], v[0:1], v[2:3] +; GFX1170-GISEL-NEXT: s_setpc_b64 s[30:31] +; ; GFX12-SDAG-LABEL: test_vector_reduce_fminimum_v4double: ; GFX12-SDAG: ; %bb.0: ; %entry ; GFX12-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0 @@ -2472,6 +2853,35 @@ define double @test_vector_reduce_fminimum_v8double(<8 x double> %v) { ; GFX11-NEXT: v_cndmask_b32_e64 v1, v3, 0x7ff80000, vcc_lo ; GFX11-NEXT: s_setpc_b64 s[30:31] ; +; GFX1170-SDAG-LABEL: test_vector_reduce_fminimum_v8double: +; GFX1170-SDAG: ; %bb.0: ; %entry +; GFX1170-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1170-SDAG-NEXT: v_minimum_f64 v[0:1], v[0:1], v[2:3] +; GFX1170-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX1170-SDAG-NEXT: v_minimum_f64 v[0:1], v[0:1], v[4:5] +; GFX1170-SDAG-NEXT: v_minimum_f64 v[0:1], v[0:1], v[6:7] +; GFX1170-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX1170-SDAG-NEXT: v_minimum_f64 v[0:1], v[0:1], v[8:9] +; GFX1170-SDAG-NEXT: v_minimum_f64 v[0:1], v[0:1], v[10:11] +; GFX1170-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX1170-SDAG-NEXT: v_minimum_f64 v[0:1], v[0:1], v[12:13] +; GFX1170-SDAG-NEXT: v_minimum_f64 v[0:1], v[0:1], v[14:15] +; GFX1170-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX1170-GISEL-LABEL: test_vector_reduce_fminimum_v8double: +; GFX1170-GISEL: ; %bb.0: ; %entry +; GFX1170-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1170-GISEL-NEXT: v_minimum_f64 v[0:1], v[0:1], v[2:3] +; GFX1170-GISEL-NEXT: v_minimum_f64 v[2:3], v[4:5], v[6:7] +; GFX1170-GISEL-NEXT: v_minimum_f64 v[4:5], v[8:9], v[10:11] +; GFX1170-GISEL-NEXT: v_minimum_f64 v[6:7], v[12:13], v[14:15] +; GFX1170-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2) +; GFX1170-GISEL-NEXT: v_minimum_f64 v[0:1], v[0:1], v[2:3] +; GFX1170-GISEL-NEXT: v_minimum_f64 v[2:3], v[4:5], v[6:7] +; GFX1170-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX1170-GISEL-NEXT: v_minimum_f64 v[0:1], v[0:1], v[2:3] +; GFX1170-GISEL-NEXT: s_setpc_b64 s[30:31] +; ; GFX12-SDAG-LABEL: test_vector_reduce_fminimum_v8double: ; GFX12-SDAG: ; %bb.0: ; %entry ; GFX12-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0 @@ -2889,6 +3299,60 @@ define double @test_vector_reduce_fminimum_v16double(<16 x double> %v) { ; GFX11-NEXT: v_cndmask_b32_e64 v1, v3, 0x7ff80000, vcc_lo ; GFX11-NEXT: s_setpc_b64 s[30:31] ; +; GFX1170-SDAG-LABEL: test_vector_reduce_fminimum_v16double: +; GFX1170-SDAG: ; %bb.0: ; %entry +; GFX1170-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1170-SDAG-NEXT: v_minimum_f64 v[0:1], v[0:1], v[2:3] +; GFX1170-SDAG-NEXT: scratch_load_b32 v31, off, s32 +; GFX1170-SDAG-NEXT: v_minimum_f64 v[0:1], v[0:1], v[4:5] +; GFX1170-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX1170-SDAG-NEXT: v_minimum_f64 v[0:1], v[0:1], v[6:7] +; GFX1170-SDAG-NEXT: v_minimum_f64 v[0:1], v[0:1], v[8:9] +; GFX1170-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX1170-SDAG-NEXT: v_minimum_f64 v[0:1], v[0:1], v[10:11] +; GFX1170-SDAG-NEXT: v_minimum_f64 v[0:1], v[0:1], v[12:13] +; GFX1170-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX1170-SDAG-NEXT: v_minimum_f64 v[0:1], v[0:1], v[14:15] +; GFX1170-SDAG-NEXT: v_minimum_f64 v[0:1], v[0:1], v[16:17] +; GFX1170-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX1170-SDAG-NEXT: v_minimum_f64 v[0:1], v[0:1], v[18:19] +; GFX1170-SDAG-NEXT: v_minimum_f64 v[0:1], v[0:1], v[20:21] +; GFX1170-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX1170-SDAG-NEXT: v_minimum_f64 v[0:1], v[0:1], v[22:23] +; GFX1170-SDAG-NEXT: v_minimum_f64 v[0:1], v[0:1], v[24:25] +; GFX1170-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX1170-SDAG-NEXT: v_minimum_f64 v[0:1], v[0:1], v[26:27] +; GFX1170-SDAG-NEXT: v_minimum_f64 v[0:1], v[0:1], v[28:29] +; GFX1170-SDAG-NEXT: s_waitcnt vmcnt(0) +; GFX1170-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX1170-SDAG-NEXT: v_minimum_f64 v[0:1], v[0:1], v[30:31] +; GFX1170-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX1170-GISEL-LABEL: test_vector_reduce_fminimum_v16double: +; GFX1170-GISEL: ; %bb.0: ; %entry +; GFX1170-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1170-GISEL-NEXT: scratch_load_b32 v31, off, s32 +; GFX1170-GISEL-NEXT: v_minimum_f64 v[0:1], v[0:1], v[2:3] +; GFX1170-GISEL-NEXT: v_minimum_f64 v[2:3], v[4:5], v[6:7] +; GFX1170-GISEL-NEXT: v_minimum_f64 v[4:5], v[8:9], v[10:11] +; GFX1170-GISEL-NEXT: v_minimum_f64 v[6:7], v[12:13], v[14:15] +; GFX1170-GISEL-NEXT: v_minimum_f64 v[8:9], v[16:17], v[18:19] +; GFX1170-GISEL-NEXT: v_minimum_f64 v[10:11], v[20:21], v[22:23] +; GFX1170-GISEL-NEXT: v_minimum_f64 v[12:13], v[24:25], v[26:27] +; GFX1170-GISEL-NEXT: v_minimum_f64 v[0:1], v[0:1], v[2:3] +; GFX1170-GISEL-NEXT: v_minimum_f64 v[2:3], v[4:5], v[6:7] +; GFX1170-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_2) +; GFX1170-GISEL-NEXT: v_minimum_f64 v[4:5], v[8:9], v[10:11] +; GFX1170-GISEL-NEXT: v_minimum_f64 v[0:1], v[0:1], v[2:3] +; GFX1170-GISEL-NEXT: s_waitcnt vmcnt(0) +; GFX1170-GISEL-NEXT: v_minimum_f64 v[14:15], v[28:29], v[30:31] +; GFX1170-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX1170-GISEL-NEXT: v_minimum_f64 v[6:7], v[12:13], v[14:15] +; GFX1170-GISEL-NEXT: v_minimum_f64 v[2:3], v[4:5], v[6:7] +; GFX1170-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX1170-GISEL-NEXT: v_minimum_f64 v[0:1], v[0:1], v[2:3] +; GFX1170-GISEL-NEXT: s_setpc_b64 s[30:31] +; ; GFX12-SDAG-LABEL: test_vector_reduce_fminimum_v16double: ; GFX12-SDAG: ; %bb.0: ; %entry ; GFX12-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0