[AMDGPU] Codegen for min/max instructions for gfx1170 (#185625)

gfx1170 does not have s_minimum/maximum_f16/f32 instructions so a new
feature `SALUMinimumMaximumInsts` is added for gfx12+ subtargets.
This commit is contained in:
Mirko Brkušanin 2026-03-12 12:32:56 +01:00 committed by GitHub
parent a372eca60d
commit efd20a3603
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
30 changed files with 10350 additions and 315 deletions

View File

@ -186,6 +186,10 @@ defm IEEEMinimumMaximumInsts : AMDGPUSubtargetFeature<"ieee-minimum-maximum-inst
"v_pk_minimum/maximum_f16 instructions"
>;
defm SALUMinimumMaximumInsts : AMDGPUSubtargetFeature<"salu-minimum-maximum-insts",
"Has s_minimum/maximum_f16/f32 instructions"
>;
defm Minimum3Maximum3F32 : AMDGPUSubtargetFeature<"minimum3-maximum3-f32",
"Has v_minimum3_f32 and v_maximum3_f32 instructions"
>;
@ -1488,9 +1492,10 @@ def FeatureGFX12 : GCNSubtargetFeatureGeneration<"GFX12",
FeatureUnalignedDSAccess, FeatureTrue16BitInsts,
FeatureDefaultComponentBroadcast, FeatureMaxHardClauseLength32,
FeatureAtomicFMinFMaxF32GlobalInsts, FeatureAtomicFMinFMaxF32FlatInsts,
FeatureIEEEMinimumMaximumInsts, FeatureMinimum3Maximum3F32,
FeatureMinimum3Maximum3F16, FeatureAgentScopeFineGrainedRemoteMemoryAtomics,
FeatureFlatOffsetBits24, FeatureFlatSignedOffset, FeatureInstCacheLineSize128
FeatureIEEEMinimumMaximumInsts, FeatureSALUMinimumMaximumInsts,
FeatureMinimum3Maximum3F32, FeatureMinimum3Maximum3F16,
FeatureAgentScopeFineGrainedRemoteMemoryAtomics, FeatureFlatOffsetBits24,
FeatureFlatSignedOffset, FeatureInstCacheLineSize128
]
>;
@ -1513,9 +1518,10 @@ def FeatureGFX13 : GCNSubtargetFeatureGeneration<"GFX13",
FeatureUnalignedDSAccess, FeatureTrue16BitInsts,
FeatureDefaultComponentBroadcast, FeatureMaxHardClauseLength32,
FeatureAtomicFMinFMaxF32GlobalInsts, FeatureAtomicFMinFMaxF32FlatInsts,
FeatureIEEEMinimumMaximumInsts, FeatureMinimum3Maximum3F32,
FeatureMinimum3Maximum3F16, FeatureAgentScopeFineGrainedRemoteMemoryAtomics,
FeatureFlatOffsetBits24, FeatureFlatSignedOffset, FeatureInstCacheLineSize128
FeatureIEEEMinimumMaximumInsts, FeatureSALUMinimumMaximumInsts,
FeatureMinimum3Maximum3F32, FeatureMinimum3Maximum3F16,
FeatureAgentScopeFineGrainedRemoteMemoryAtomics, FeatureFlatOffsetBits24,
FeatureFlatSignedOffset, FeatureInstCacheLineSize128
]
>;
//===----------------------------------------------------------------------===//

View File

@ -1351,10 +1351,14 @@ RegBankLegalizeRules::RegBankLegalizeRules(const GCNSubtarget &_ST,
.Uni(S32, {{UniInVgprS32}, {Vgpr32, Vgpr32}})
.Div(S32, {{Vgpr32}, {Vgpr32, Vgpr32}});
bool hasSALUMinimumMaximumInsts = ST->hasSALUMinimumMaximumInsts();
addRulesForGOpcs({G_FMINIMUM, G_FMAXIMUM}, Standard)
.Uni(S16, {{Sgpr16}, {Sgpr16, Sgpr16}})
.Uni(S16, {{Sgpr16}, {Sgpr16, Sgpr16}}, hasSALUMinimumMaximumInsts)
.Uni(S16, {{UniInVgprS16}, {Vgpr16, Vgpr16}}, !hasSALUMinimumMaximumInsts)
.Div(S16, {{Vgpr16}, {Vgpr16, Vgpr16}})
.Uni(S32, {{Sgpr32}, {Sgpr32, Sgpr32}})
.Uni(S32, {{Sgpr32}, {Sgpr32, Sgpr32}}, hasSALUMinimumMaximumInsts)
.Uni(S32, {{UniInVgprS32}, {Vgpr32, Vgpr32}}, !hasSALUMinimumMaximumInsts)
.Div(S32, {{Vgpr32}, {Vgpr32, Vgpr32}})
.Uni(S64, {{UniInVgprS64}, {Vgpr64, Vgpr64}})
.Div(S64, {{Vgpr64}, {Vgpr64, Vgpr64}})

View File

@ -4075,8 +4075,6 @@ AMDGPURegisterBankInfo::getInstrMapping(const MachineInstr &MI) const {
case AMDGPU::G_INTRINSIC_ROUNDEVEN:
case AMDGPU::G_FMINNUM:
case AMDGPU::G_FMAXNUM:
case AMDGPU::G_FMINIMUM:
case AMDGPU::G_FMAXIMUM:
case AMDGPU::G_FMINIMUMNUM:
case AMDGPU::G_FMAXIMUMNUM:
case AMDGPU::G_INTRINSIC_TRUNC:
@ -4091,6 +4089,15 @@ AMDGPURegisterBankInfo::getInstrMapping(const MachineInstr &MI) const {
return getDefaultMappingSOP(MI);
return getDefaultMappingVOP(MI);
}
case AMDGPU::G_FMINIMUM:
case AMDGPU::G_FMAXIMUM: {
LLT Ty = MRI.getType(MI.getOperand(0).getReg());
unsigned Size = Ty.getSizeInBits();
if (Subtarget.hasSALUMinimumMaximumInsts() && Ty.isScalar() &&
(Size == 32 || Size == 16) && isSALUMapping(MI))
return getDefaultMappingSOP(MI);
return getDefaultMappingVOP(MI);
}
case AMDGPU::G_FPTOSI:
case AMDGPU::G_FPTOUI:
case AMDGPU::G_FPTOSI_SAT:

View File

@ -2034,9 +2034,9 @@ class ClampPat<Instruction inst, ValueType vt> : GCNPat <
>;
def : ClampPat<V_MAX_F32_e64, f32>;
let SubtargetPredicate = isNotGFX12Plus in
let SubtargetPredicate = NotHasIEEEMinimumMaximumInsts in
def : ClampPat<V_MAX_F64_e64, f64>;
let SubtargetPredicate = isGFX12Plus in
let SubtargetPredicate = HasIEEEMinimumMaximumInsts in
def : ClampPat<V_MAX_NUM_F64_e64, f64>;
let SubtargetPredicate = NotHasTrue16BitInsts in
def : ClampPat<V_MAX_F16_e64, f16>;
@ -3671,13 +3671,13 @@ multiclass SelectCanonicalizeAsMax<
def : GCNPat<
(fcanonicalize (f64 (VOP3Mods f64:$src, i32:$src_mods))),
(V_MAX_F64_e64 $src_mods, $src, $src_mods, $src)> {
let OtherPredicates = !listconcat(f64_preds, [isNotGFX12Plus]);
let OtherPredicates = !listconcat(f64_preds, [NotHasIEEEMinimumMaximumInsts]);
}
def : GCNPat<
(fcanonicalize (f64 (VOP3Mods f64:$src, i32:$src_mods))),
(V_MAX_NUM_F64_e64 $src_mods, $src, $src_mods, $src)> {
let OtherPredicates = !listconcat(f64_preds, [isGFX12Plus]);
let OtherPredicates = !listconcat(f64_preds, [HasIEEEMinimumMaximumInsts]);
}
def : GCNPat<

View File

@ -641,6 +641,8 @@ unsigned getVOPDEncodingFamily(const MCSubtargetInfo &ST) {
return SIEncodingFamily::GFX1250;
if (ST.hasFeature(AMDGPU::FeatureGFX12Insts))
return SIEncodingFamily::GFX12;
if (ST.hasFeature(AMDGPU::FeatureGFX11_7Insts))
return SIEncodingFamily::GFX1170;
if (ST.hasFeature(AMDGPU::FeatureGFX11Insts))
return SIEncodingFamily::GFX11;
llvm_unreachable("Subtarget generation does not support VOPD!");

View File

@ -1,5 +1,6 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc -global-isel -new-reg-bank-select -mtriple=amdgcn-amd-mesa3d -mcpu=gfx1010 < %s | FileCheck -check-prefix=GFX10 %s
; RUN: llc -global-isel -new-reg-bank-select -mtriple=amdgcn-amd-mesa3d -mcpu=gfx1170 -mattr=-real-true16 < %s | FileCheck -check-prefix=GFX1170 %s
; RUN: llc -global-isel -new-reg-bank-select -mtriple=amdgcn-amd-mesa3d -mcpu=gfx1200 -mattr=-real-true16 < %s | FileCheck -check-prefix=GFX12 %s
define float @test_fmed3_f32_known_nnan_ieee_true(float %a) #0 {
@ -9,6 +10,12 @@ define float @test_fmed3_f32_known_nnan_ieee_true(float %a) #0 {
; GFX10-NEXT: v_mul_f32_e64 v0, v0, 2.0 clamp
; GFX10-NEXT: s_setpc_b64 s[30:31]
;
; GFX1170-LABEL: test_fmed3_f32_known_nnan_ieee_true:
; GFX1170: ; %bb.0:
; GFX1170-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1170-NEXT: v_mul_f32_e64 v0, v0, 2.0 clamp
; GFX1170-NEXT: s_setpc_b64 s[30:31]
;
; GFX12-LABEL: test_fmed3_f32_known_nnan_ieee_true:
; GFX12: ; %bb.0:
; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
@ -30,6 +37,12 @@ define half @test_fmed3_f16_known_nnan_ieee_false(half %a) #1 {
; GFX10-NEXT: v_mul_f16_e64 v0, v0, 2.0 clamp
; GFX10-NEXT: s_setpc_b64 s[30:31]
;
; GFX1170-LABEL: test_fmed3_f16_known_nnan_ieee_false:
; GFX1170: ; %bb.0:
; GFX1170-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1170-NEXT: v_mul_f16_e64 v0, v0, 2.0 clamp
; GFX1170-NEXT: s_setpc_b64 s[30:31]
;
; GFX12-LABEL: test_fmed3_f16_known_nnan_ieee_false:
; GFX12: ; %bb.0:
; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
@ -53,6 +66,14 @@ define float @test_fmed3_non_SNaN_input_ieee_true_dx10clamp_true(float %a) #2 {
; GFX10-NEXT: v_min_f32_e64 v0, 0x41200000, v0 clamp
; GFX10-NEXT: s_setpc_b64 s[30:31]
;
; GFX1170-LABEL: test_fmed3_non_SNaN_input_ieee_true_dx10clamp_true:
; GFX1170: ; %bb.0:
; GFX1170-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1170-NEXT: v_max_num_f32_e32 v0, v0, v0
; GFX1170-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX1170-NEXT: v_min_num_f32_e64 v0, 0x41200000, v0 clamp
; GFX1170-NEXT: s_setpc_b64 s[30:31]
;
; GFX12-LABEL: test_fmed3_non_SNaN_input_ieee_true_dx10clamp_true:
; GFX12: ; %bb.0:
; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
@ -77,6 +98,12 @@ define float @test_fmed3_maybe_SNaN_input_zero_third_operand_ieee_true_dx10clamp
; GFX10-NEXT: v_mul_f32_e64 v0, v0, 2.0 clamp
; GFX10-NEXT: s_setpc_b64 s[30:31]
;
; GFX1170-LABEL: test_fmed3_maybe_SNaN_input_zero_third_operand_ieee_true_dx10clamp_true:
; GFX1170: ; %bb.0:
; GFX1170-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1170-NEXT: v_mul_f32_e64 v0, v0, 2.0 clamp
; GFX1170-NEXT: s_setpc_b64 s[30:31]
;
; GFX12-LABEL: test_fmed3_maybe_SNaN_input_zero_third_operand_ieee_true_dx10clamp_true:
; GFX12: ; %bb.0:
; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
@ -100,6 +127,12 @@ define float @test_fmed3_global_nnan(float %a) #3 {
; GFX10-NEXT: v_mul_f32_e64 v0, v0, 2.0 clamp
; GFX10-NEXT: s_setpc_b64 s[30:31]
;
; GFX1170-LABEL: test_fmed3_global_nnan:
; GFX1170: ; %bb.0:
; GFX1170-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1170-NEXT: v_mul_f32_e64 v0, v0, 2.0 clamp
; GFX1170-NEXT: s_setpc_b64 s[30:31]
;
; GFX12-LABEL: test_fmed3_global_nnan:
; GFX12: ; %bb.0:
; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
@ -127,6 +160,12 @@ define float @test_fmed3_f32_maybe_NaN_ieee_false(float %a) #1 {
; GFX10-NEXT: v_med3_f32 v0, v0, 1.0, 0
; GFX10-NEXT: s_setpc_b64 s[30:31]
;
; GFX1170-LABEL: test_fmed3_f32_maybe_NaN_ieee_false:
; GFX1170: ; %bb.0:
; GFX1170-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1170-NEXT: v_mul_f32_e64 v0, v0, 2.0 clamp
; GFX1170-NEXT: s_setpc_b64 s[30:31]
;
; GFX12-LABEL: test_fmed3_f32_maybe_NaN_ieee_false:
; GFX12: ; %bb.0:
; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
@ -151,6 +190,14 @@ define float @test_fmed3_non_SNaN_input_ieee_true_dx10clamp_false(float %a) #4 {
; GFX10-NEXT: v_med3_f32 v0, v0, 0, 1.0
; GFX10-NEXT: s_setpc_b64 s[30:31]
;
; GFX1170-LABEL: test_fmed3_non_SNaN_input_ieee_true_dx10clamp_false:
; GFX1170: ; %bb.0:
; GFX1170-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1170-NEXT: v_max_num_f32_e32 v0, v0, v0
; GFX1170-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX1170-NEXT: v_min_num_f32_e64 v0, 0x41200000, v0 clamp
; GFX1170-NEXT: s_setpc_b64 s[30:31]
;
; GFX12-LABEL: test_fmed3_non_SNaN_input_ieee_true_dx10clamp_false:
; GFX12: ; %bb.0:
; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
@ -175,6 +222,12 @@ define float @test_fmed3_maybe_SNaN_input_ieee_true_dx10clamp_true(float %a) #2
; GFX10-NEXT: v_mul_f32_e64 v0, v0, 2.0 clamp
; GFX10-NEXT: s_setpc_b64 s[30:31]
;
; GFX1170-LABEL: test_fmed3_maybe_SNaN_input_ieee_true_dx10clamp_true:
; GFX1170: ; %bb.0:
; GFX1170-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1170-NEXT: v_mul_f32_e64 v0, v0, 2.0 clamp
; GFX1170-NEXT: s_setpc_b64 s[30:31]
;
; GFX12-LABEL: test_fmed3_maybe_SNaN_input_ieee_true_dx10clamp_true:
; GFX12: ; %bb.0:
; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
@ -197,6 +250,12 @@ define float @test_fmed3_unknown_input_ieee_true_dx10clamp_true(float %a) #2 {
; GFX10-NEXT: v_med3_f32 v0, v0, 0, 1.0
; GFX10-NEXT: s_setpc_b64 s[30:31]
;
; GFX1170-LABEL: test_fmed3_unknown_input_ieee_true_dx10clamp_true:
; GFX1170: ; %bb.0:
; GFX1170-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1170-NEXT: v_med3_num_f32 v0, v0, 0, 1.0
; GFX1170-NEXT: s_setpc_b64 s[30:31]
;
; GFX12-LABEL: test_fmed3_unknown_input_ieee_true_dx10clamp_true:
; GFX12: ; %bb.0:
; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0

View File

@ -1,5 +1,6 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc -global-isel -new-reg-bank-select -mtriple=amdgcn-amd-mesa3d -mcpu=gfx1010 < %s | FileCheck -check-prefix=GFX10 %s
; RUN: llc -global-isel -new-reg-bank-select -mtriple=amdgcn-amd-mesa3d -mcpu=gfx1170 -mattr=-real-true16 < %s | FileCheck -check-prefix=GFX1170 %s
; RUN: llc -global-isel -new-reg-bank-select -mtriple=amdgcn-amd-mesa3d -mcpu=gfx1200 -mattr=-real-true16 < %s | FileCheck -check-prefix=GFX12 %s
define float @test_min_max_ValK0_K1_f32(float %a) #0 {
@ -9,6 +10,12 @@ define float @test_min_max_ValK0_K1_f32(float %a) #0 {
; GFX10-NEXT: v_mul_f32_e64 v0, v0, 2.0 clamp
; GFX10-NEXT: s_setpc_b64 s[30:31]
;
; GFX1170-LABEL: test_min_max_ValK0_K1_f32:
; GFX1170: ; %bb.0:
; GFX1170-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1170-NEXT: v_mul_f32_e64 v0, v0, 2.0 clamp
; GFX1170-NEXT: s_setpc_b64 s[30:31]
;
; GFX12-LABEL: test_min_max_ValK0_K1_f32:
; GFX12: ; %bb.0:
; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
@ -31,6 +38,12 @@ define double @test_min_max_K0Val_K1_f64(double %a) #1 {
; GFX10-NEXT: v_mul_f64 v[0:1], v[0:1], 2.0 clamp
; GFX10-NEXT: s_setpc_b64 s[30:31]
;
; GFX1170-LABEL: test_min_max_K0Val_K1_f64:
; GFX1170: ; %bb.0:
; GFX1170-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1170-NEXT: v_mul_f64 v[0:1], v[0:1], 2.0 clamp
; GFX1170-NEXT: s_setpc_b64 s[30:31]
;
; GFX12-LABEL: test_min_max_K0Val_K1_f64:
; GFX12: ; %bb.0:
; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
@ -54,6 +67,12 @@ define half @test_min_K1max_ValK0_f16(half %a) #2 {
; GFX10-NEXT: v_mul_f16_e64 v0, v0, 2.0 clamp
; GFX10-NEXT: s_setpc_b64 s[30:31]
;
; GFX1170-LABEL: test_min_K1max_ValK0_f16:
; GFX1170: ; %bb.0:
; GFX1170-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1170-NEXT: v_mul_f16_e64 v0, v0, 2.0 clamp
; GFX1170-NEXT: s_setpc_b64 s[30:31]
;
; GFX12-LABEL: test_min_K1max_ValK0_f16:
; GFX12: ; %bb.0:
; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
@ -76,6 +95,12 @@ define <2 x half> @test_min_K1max_K0Val_f16(<2 x half> %a) #1 {
; GFX10-NEXT: v_pk_mul_f16 v0, v0, 2.0 op_sel_hi:[1,0] clamp
; GFX10-NEXT: s_setpc_b64 s[30:31]
;
; GFX1170-LABEL: test_min_K1max_K0Val_f16:
; GFX1170: ; %bb.0:
; GFX1170-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1170-NEXT: v_pk_mul_f16 v0, v0, 2.0 op_sel_hi:[1,0] clamp
; GFX1170-NEXT: s_setpc_b64 s[30:31]
;
; GFX12-LABEL: test_min_K1max_K0Val_f16:
; GFX12: ; %bb.0:
; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
@ -98,6 +123,12 @@ define <2 x half> @test_min_max_splat_padded_with_undef(<2 x half> %a) #2 {
; GFX10-NEXT: v_pk_mul_f16 v0, v0, 2.0 op_sel_hi:[1,0] clamp
; GFX10-NEXT: s_setpc_b64 s[30:31]
;
; GFX1170-LABEL: test_min_max_splat_padded_with_undef:
; GFX1170: ; %bb.0:
; GFX1170-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1170-NEXT: v_pk_mul_f16 v0, v0, 2.0 op_sel_hi:[1,0] clamp
; GFX1170-NEXT: s_setpc_b64 s[30:31]
;
; GFX12-LABEL: test_min_max_splat_padded_with_undef:
; GFX12: ; %bb.0:
; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
@ -122,6 +153,12 @@ define float @test_max_min_ValK1_K0_f32(float %a) #0 {
; GFX10-NEXT: v_mul_f32_e64 v0, v0, 2.0 clamp
; GFX10-NEXT: s_setpc_b64 s[30:31]
;
; GFX1170-LABEL: test_max_min_ValK1_K0_f32:
; GFX1170: ; %bb.0:
; GFX1170-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1170-NEXT: v_mul_f32_e64 v0, v0, 2.0 clamp
; GFX1170-NEXT: s_setpc_b64 s[30:31]
;
; GFX12-LABEL: test_max_min_ValK1_K0_f32:
; GFX12: ; %bb.0:
; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
@ -144,6 +181,12 @@ define double @test_max_min_K1Val_K0_f64(double %a) #1 {
; GFX10-NEXT: v_mul_f64 v[0:1], v[0:1], 2.0 clamp
; GFX10-NEXT: s_setpc_b64 s[30:31]
;
; GFX1170-LABEL: test_max_min_K1Val_K0_f64:
; GFX1170: ; %bb.0:
; GFX1170-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1170-NEXT: v_mul_f64 v[0:1], v[0:1], 2.0 clamp
; GFX1170-NEXT: s_setpc_b64 s[30:31]
;
; GFX12-LABEL: test_max_min_K1Val_K0_f64:
; GFX12: ; %bb.0:
; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
@ -166,6 +209,12 @@ define half @test_max_K0min_ValK1_f16(half %a) #0 {
; GFX10-NEXT: v_mul_f16_e64 v0, v0, 2.0 clamp
; GFX10-NEXT: s_setpc_b64 s[30:31]
;
; GFX1170-LABEL: test_max_K0min_ValK1_f16:
; GFX1170: ; %bb.0:
; GFX1170-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1170-NEXT: v_mul_f16_e64 v0, v0, 2.0 clamp
; GFX1170-NEXT: s_setpc_b64 s[30:31]
;
; GFX12-LABEL: test_max_K0min_ValK1_f16:
; GFX12: ; %bb.0:
; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
@ -189,6 +238,12 @@ define <2 x half> @test_max_K0min_K1Val_v2f16(<2 x half> %a) #1 {
; GFX10-NEXT: v_pk_mul_f16 v0, v0, 2.0 op_sel_hi:[1,0] clamp
; GFX10-NEXT: s_setpc_b64 s[30:31]
;
; GFX1170-LABEL: test_max_K0min_K1Val_v2f16:
; GFX1170: ; %bb.0:
; GFX1170-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1170-NEXT: v_pk_mul_f16 v0, v0, 2.0 op_sel_hi:[1,0] clamp
; GFX1170-NEXT: s_setpc_b64 s[30:31]
;
; GFX12-LABEL: test_max_K0min_K1Val_v2f16:
; GFX12: ; %bb.0:
; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
@ -213,6 +268,12 @@ define float @test_min_max_global_nnan(float %a) {
; GFX10-NEXT: v_max_f32_e64 v0, v0, v0 clamp
; GFX10-NEXT: s_setpc_b64 s[30:31]
;
; GFX1170-LABEL: test_min_max_global_nnan:
; GFX1170: ; %bb.0:
; GFX1170-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1170-NEXT: v_max_num_f32_e64 v0, v0, v0 clamp
; GFX1170-NEXT: s_setpc_b64 s[30:31]
;
; GFX12-LABEL: test_min_max_global_nnan:
; GFX12: ; %bb.0:
; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
@ -234,6 +295,12 @@ define float @test_max_min_global_nnan(float %a) {
; GFX10-NEXT: v_max_f32_e64 v0, v0, v0 clamp
; GFX10-NEXT: s_setpc_b64 s[30:31]
;
; GFX1170-LABEL: test_max_min_global_nnan:
; GFX1170: ; %bb.0:
; GFX1170-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1170-NEXT: v_max_num_f32_e64 v0, v0, v0 clamp
; GFX1170-NEXT: s_setpc_b64 s[30:31]
;
; GFX12-LABEL: test_max_min_global_nnan:
; GFX12: ; %bb.0:
; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
@ -261,6 +328,12 @@ define float @test_min_max_K0_gt_K1(float %a) #0 {
; GFX10-NEXT: v_min_f32_e32 v0, 0, v0
; GFX10-NEXT: s_setpc_b64 s[30:31]
;
; GFX1170-LABEL: test_min_max_K0_gt_K1:
; GFX1170: ; %bb.0:
; GFX1170-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1170-NEXT: v_maxmin_num_f32 v0, v0, 1.0, 0
; GFX1170-NEXT: s_setpc_b64 s[30:31]
;
; GFX12-LABEL: test_min_max_K0_gt_K1:
; GFX12: ; %bb.0:
; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
@ -284,6 +357,12 @@ define float @test_max_min_K0_gt_K1(float %a) #0 {
; GFX10-NEXT: v_max_f32_e32 v0, 1.0, v0
; GFX10-NEXT: s_setpc_b64 s[30:31]
;
; GFX1170-LABEL: test_max_min_K0_gt_K1:
; GFX1170: ; %bb.0:
; GFX1170-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1170-NEXT: v_minmax_num_f32 v0, v0, 0, 1.0
; GFX1170-NEXT: s_setpc_b64 s[30:31]
;
; GFX12-LABEL: test_max_min_K0_gt_K1:
; GFX12: ; %bb.0:
; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
@ -310,6 +389,12 @@ define float @test_min_max_maybe_NaN_input_ieee_false(float %a) #1 {
; GFX10-NEXT: v_min_f32_e32 v0, 1.0, v0
; GFX10-NEXT: s_setpc_b64 s[30:31]
;
; GFX1170-LABEL: test_min_max_maybe_NaN_input_ieee_false:
; GFX1170: ; %bb.0:
; GFX1170-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1170-NEXT: v_mul_f32_e64 v0, v0, 2.0 clamp
; GFX1170-NEXT: s_setpc_b64 s[30:31]
;
; GFX12-LABEL: test_min_max_maybe_NaN_input_ieee_false:
; GFX12: ; %bb.0:
; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
@ -334,6 +419,12 @@ define float @test_min_max_maybe_NaN_input_ieee_true_dx10clamp_false(float %a) #
; GFX10-NEXT: v_med3_f32 v0, v0, 0, 1.0
; GFX10-NEXT: s_setpc_b64 s[30:31]
;
; GFX1170-LABEL: test_min_max_maybe_NaN_input_ieee_true_dx10clamp_false:
; GFX1170: ; %bb.0:
; GFX1170-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1170-NEXT: v_mul_f32_e64 v0, v0, 2.0 clamp
; GFX1170-NEXT: s_setpc_b64 s[30:31]
;
; GFX12-LABEL: test_min_max_maybe_NaN_input_ieee_true_dx10clamp_false:
; GFX12: ; %bb.0:
; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
@ -360,6 +451,14 @@ define float @test_max_min_maybe_NaN_input_ieee_true(float %a) #0 {
; GFX10-NEXT: v_max_f32_e32 v0, 0, v0
; GFX10-NEXT: s_setpc_b64 s[30:31]
;
; GFX1170-LABEL: test_max_min_maybe_NaN_input_ieee_true:
; GFX1170: ; %bb.0:
; GFX1170-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1170-NEXT: v_mul_f32_e32 v0, 2.0, v0
; GFX1170-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX1170-NEXT: v_minmax_num_f32 v0, v0, 1.0, 0
; GFX1170-NEXT: s_setpc_b64 s[30:31]
;
; GFX12-LABEL: test_max_min_maybe_NaN_input_ieee_true:
; GFX12: ; %bb.0:
; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
@ -386,6 +485,14 @@ define float @test_max_min_maybe_NaN_input_ieee_false(float %a) #1 {
; GFX10-NEXT: v_max_f32_e32 v0, 0, v0
; GFX10-NEXT: s_setpc_b64 s[30:31]
;
; GFX1170-LABEL: test_max_min_maybe_NaN_input_ieee_false:
; GFX1170: ; %bb.0:
; GFX1170-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1170-NEXT: v_mul_f32_e32 v0, 2.0, v0
; GFX1170-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX1170-NEXT: v_minmax_num_f32 v0, v0, 1.0, 0
; GFX1170-NEXT: s_setpc_b64 s[30:31]
;
; GFX12-LABEL: test_max_min_maybe_NaN_input_ieee_false:
; GFX12: ; %bb.0:
; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0

View File

@ -1,20 +1,27 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc -global-isel -new-reg-bank-select -mtriple=amdgcn-amd-mesa3d -mcpu=gfx1010 < %s | FileCheck -check-prefix=GFX10 %s
; RUN: llc -global-isel -new-reg-bank-select -mtriple=amdgcn-amd-mesa3d -mcpu=gfx803 < %s | FileCheck -check-prefix=GFX8 %s
; RUN: llc -global-isel -new-reg-bank-select -mtriple=amdgcn-amd-mesa3d -mcpu=gfx1010 < %s | FileCheck -check-prefix=GFX10 %s
; RUN: llc -global-isel -new-reg-bank-select -mtriple=amdgcn-amd-mesa3d -mcpu=gfx1170 -mattr=-real-true16 < %s | FileCheck -check-prefix=GFX1170 %s
; RUN: llc -global-isel -new-reg-bank-select -mtriple=amdgcn-amd-mesa3d -mcpu=gfx1200 -mattr=-real-true16 < %s | FileCheck -check-prefix=GFX12 %s
define float @test_min_max_ValK0_K1_f32(float %a) #0 {
; GFX8-LABEL: test_min_max_ValK0_K1_f32:
; GFX8: ; %bb.0:
; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX8-NEXT: v_med3_f32 v0, v0, 2.0, 4.0
; GFX8-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-LABEL: test_min_max_ValK0_K1_f32:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX10-NEXT: v_med3_f32 v0, v0, 2.0, 4.0
; GFX10-NEXT: s_setpc_b64 s[30:31]
;
; GFX8-LABEL: test_min_max_ValK0_K1_f32:
; GFX8: ; %bb.0:
; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX8-NEXT: v_med3_f32 v0, v0, 2.0, 4.0
; GFX8-NEXT: s_setpc_b64 s[30:31]
; GFX1170-LABEL: test_min_max_ValK0_K1_f32:
; GFX1170: ; %bb.0:
; GFX1170-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1170-NEXT: v_med3_num_f32 v0, v0, 2.0, 4.0
; GFX1170-NEXT: s_setpc_b64 s[30:31]
;
; GFX12-LABEL: test_min_max_ValK0_K1_f32:
; GFX12: ; %bb.0:
@ -31,17 +38,23 @@ define float @test_min_max_ValK0_K1_f32(float %a) #0 {
}
define float @test_min_max_K0Val_K1_f32(float %a) #1 {
; GFX8-LABEL: test_min_max_K0Val_K1_f32:
; GFX8: ; %bb.0:
; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX8-NEXT: v_med3_f32 v0, v0, 2.0, 4.0
; GFX8-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-LABEL: test_min_max_K0Val_K1_f32:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX10-NEXT: v_med3_f32 v0, v0, 2.0, 4.0
; GFX10-NEXT: s_setpc_b64 s[30:31]
;
; GFX8-LABEL: test_min_max_K0Val_K1_f32:
; GFX8: ; %bb.0:
; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX8-NEXT: v_med3_f32 v0, v0, 2.0, 4.0
; GFX8-NEXT: s_setpc_b64 s[30:31]
; GFX1170-LABEL: test_min_max_K0Val_K1_f32:
; GFX1170: ; %bb.0:
; GFX1170-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1170-NEXT: v_med3_num_f32 v0, v0, 2.0, 4.0
; GFX1170-NEXT: s_setpc_b64 s[30:31]
;
; GFX12-LABEL: test_min_max_K0Val_K1_f32:
; GFX12: ; %bb.0:
@ -60,13 +73,6 @@ define float @test_min_max_K0Val_K1_f32(float %a) #1 {
; min-max patterns for ieee=true do not have to check for NaNs
; 'v_max_f16_e32 v0, v0, v0' is from fcanonicalize of the input to fmin/fmax with ieee=true
define half @test_min_K1max_ValK0_f16(half %a) #0 {
; GFX10-LABEL: test_min_K1max_ValK0_f16:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX10-NEXT: v_max_f16_e32 v0, v0, v0
; GFX10-NEXT: v_med3_f16 v0, v0, 2.0, 4.0
; GFX10-NEXT: s_setpc_b64 s[30:31]
;
; GFX8-LABEL: test_min_K1max_ValK0_f16:
; GFX8: ; %bb.0:
; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@ -75,6 +81,21 @@ define half @test_min_K1max_ValK0_f16(half %a) #0 {
; GFX8-NEXT: v_min_f16_e32 v0, 4.0, v0
; GFX8-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-LABEL: test_min_K1max_ValK0_f16:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX10-NEXT: v_max_f16_e32 v0, v0, v0
; GFX10-NEXT: v_med3_f16 v0, v0, 2.0, 4.0
; GFX10-NEXT: s_setpc_b64 s[30:31]
;
; GFX1170-LABEL: test_min_K1max_ValK0_f16:
; GFX1170: ; %bb.0:
; GFX1170-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1170-NEXT: v_max_num_f16_e32 v0, v0, v0
; GFX1170-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX1170-NEXT: v_med3_num_f16 v0, v0, 2.0, 4.0
; GFX1170-NEXT: s_setpc_b64 s[30:31]
;
; GFX12-LABEL: test_min_K1max_ValK0_f16:
; GFX12: ; %bb.0:
; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
@ -92,12 +113,6 @@ define half @test_min_K1max_ValK0_f16(half %a) #0 {
}
define half @test_min_K1max_K0Val_f16(half %a) #1 {
; GFX10-LABEL: test_min_K1max_K0Val_f16:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX10-NEXT: v_med3_f16 v0, v0, 2.0, 4.0
; GFX10-NEXT: s_setpc_b64 s[30:31]
;
; GFX8-LABEL: test_min_K1max_K0Val_f16:
; GFX8: ; %bb.0:
; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@ -105,6 +120,18 @@ define half @test_min_K1max_K0Val_f16(half %a) #1 {
; GFX8-NEXT: v_min_f16_e32 v0, 4.0, v0
; GFX8-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-LABEL: test_min_K1max_K0Val_f16:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX10-NEXT: v_med3_f16 v0, v0, 2.0, 4.0
; GFX10-NEXT: s_setpc_b64 s[30:31]
;
; GFX1170-LABEL: test_min_K1max_K0Val_f16:
; GFX1170: ; %bb.0:
; GFX1170-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1170-NEXT: v_med3_num_f16 v0, v0, 2.0, 4.0
; GFX1170-NEXT: s_setpc_b64 s[30:31]
;
; GFX12-LABEL: test_min_K1max_K0Val_f16:
; GFX12: ; %bb.0:
; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
@ -121,17 +148,23 @@ define half @test_min_K1max_K0Val_f16(half %a) #1 {
; max-mix patterns work only for non-NaN inputs
define float @test_max_min_ValK1_K0_f32(float %a) #0 {
; GFX8-LABEL: test_max_min_ValK1_K0_f32:
; GFX8: ; %bb.0:
; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX8-NEXT: v_med3_f32 v0, v0, 2.0, 4.0
; GFX8-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-LABEL: test_max_min_ValK1_K0_f32:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX10-NEXT: v_med3_f32 v0, v0, 2.0, 4.0
; GFX10-NEXT: s_setpc_b64 s[30:31]
;
; GFX8-LABEL: test_max_min_ValK1_K0_f32:
; GFX8: ; %bb.0:
; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX8-NEXT: v_med3_f32 v0, v0, 2.0, 4.0
; GFX8-NEXT: s_setpc_b64 s[30:31]
; GFX1170-LABEL: test_max_min_ValK1_K0_f32:
; GFX1170: ; %bb.0:
; GFX1170-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1170-NEXT: v_med3_num_f32 v0, v0, 2.0, 4.0
; GFX1170-NEXT: s_setpc_b64 s[30:31]
;
; GFX12-LABEL: test_max_min_ValK1_K0_f32:
; GFX12: ; %bb.0:
@ -148,17 +181,23 @@ define float @test_max_min_ValK1_K0_f32(float %a) #0 {
}
define float @test_max_min_K1Val_K0_f32(float %a) #1 {
; GFX8-LABEL: test_max_min_K1Val_K0_f32:
; GFX8: ; %bb.0:
; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX8-NEXT: v_med3_f32 v0, v0, 2.0, 4.0
; GFX8-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-LABEL: test_max_min_K1Val_K0_f32:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX10-NEXT: v_med3_f32 v0, v0, 2.0, 4.0
; GFX10-NEXT: s_setpc_b64 s[30:31]
;
; GFX8-LABEL: test_max_min_K1Val_K0_f32:
; GFX8: ; %bb.0:
; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX8-NEXT: v_med3_f32 v0, v0, 2.0, 4.0
; GFX8-NEXT: s_setpc_b64 s[30:31]
; GFX1170-LABEL: test_max_min_K1Val_K0_f32:
; GFX1170: ; %bb.0:
; GFX1170-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1170-NEXT: v_med3_num_f32 v0, v0, 2.0, 4.0
; GFX1170-NEXT: s_setpc_b64 s[30:31]
;
; GFX12-LABEL: test_max_min_K1Val_K0_f32:
; GFX12: ; %bb.0:
@ -175,12 +214,6 @@ define float @test_max_min_K1Val_K0_f32(float %a) #1 {
}
define half @test_max_K0min_ValK1_f16(half %a) #0 {
; GFX10-LABEL: test_max_K0min_ValK1_f16:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX10-NEXT: v_med3_f16 v0, v0, 2.0, 4.0
; GFX10-NEXT: s_setpc_b64 s[30:31]
;
; GFX8-LABEL: test_max_K0min_ValK1_f16:
; GFX8: ; %bb.0:
; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@ -188,6 +221,18 @@ define half @test_max_K0min_ValK1_f16(half %a) #0 {
; GFX8-NEXT: v_max_f16_e32 v0, 2.0, v0
; GFX8-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-LABEL: test_max_K0min_ValK1_f16:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX10-NEXT: v_med3_f16 v0, v0, 2.0, 4.0
; GFX10-NEXT: s_setpc_b64 s[30:31]
;
; GFX1170-LABEL: test_max_K0min_ValK1_f16:
; GFX1170: ; %bb.0:
; GFX1170-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1170-NEXT: v_med3_num_f16 v0, v0, 2.0, 4.0
; GFX1170-NEXT: s_setpc_b64 s[30:31]
;
; GFX12-LABEL: test_max_K0min_ValK1_f16:
; GFX12: ; %bb.0:
; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
@ -203,12 +248,6 @@ define half @test_max_K0min_ValK1_f16(half %a) #0 {
}
define half @test_max_K0min_K1Val_f16(half %a) #1 {
; GFX10-LABEL: test_max_K0min_K1Val_f16:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX10-NEXT: v_med3_f16 v0, v0, 2.0, 4.0
; GFX10-NEXT: s_setpc_b64 s[30:31]
;
; GFX8-LABEL: test_max_K0min_K1Val_f16:
; GFX8: ; %bb.0:
; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@ -216,6 +255,18 @@ define half @test_max_K0min_K1Val_f16(half %a) #1 {
; GFX8-NEXT: v_max_f16_e32 v0, 2.0, v0
; GFX8-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-LABEL: test_max_K0min_K1Val_f16:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX10-NEXT: v_med3_f16 v0, v0, 2.0, 4.0
; GFX10-NEXT: s_setpc_b64 s[30:31]
;
; GFX1170-LABEL: test_max_K0min_K1Val_f16:
; GFX1170: ; %bb.0:
; GFX1170-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1170-NEXT: v_med3_num_f16 v0, v0, 2.0, 4.0
; GFX1170-NEXT: s_setpc_b64 s[30:31]
;
; GFX12-LABEL: test_max_K0min_K1Val_f16:
; GFX12: ; %bb.0:
; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
@ -233,17 +284,23 @@ define half @test_max_K0min_K1Val_f16(half %a) #1 {
; global nnan function attribute always forces fmed3 combine
define float @test_min_max_global_nnan(float %a) {
; GFX8-LABEL: test_min_max_global_nnan:
; GFX8: ; %bb.0:
; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX8-NEXT: v_med3_f32 v0, v0, 2.0, 4.0
; GFX8-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-LABEL: test_min_max_global_nnan:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX10-NEXT: v_med3_f32 v0, v0, 2.0, 4.0
; GFX10-NEXT: s_setpc_b64 s[30:31]
;
; GFX8-LABEL: test_min_max_global_nnan:
; GFX8: ; %bb.0:
; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX8-NEXT: v_med3_f32 v0, v0, 2.0, 4.0
; GFX8-NEXT: s_setpc_b64 s[30:31]
; GFX1170-LABEL: test_min_max_global_nnan:
; GFX1170: ; %bb.0:
; GFX1170-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1170-NEXT: v_med3_num_f32 v0, v0, 2.0, 4.0
; GFX1170-NEXT: s_setpc_b64 s[30:31]
;
; GFX12-LABEL: test_min_max_global_nnan:
; GFX12: ; %bb.0:
@ -260,17 +317,23 @@ define float @test_min_max_global_nnan(float %a) {
}
define float @test_max_min_global_nnan(float %a) {
; GFX8-LABEL: test_max_min_global_nnan:
; GFX8: ; %bb.0:
; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX8-NEXT: v_med3_f32 v0, v0, 2.0, 4.0
; GFX8-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-LABEL: test_max_min_global_nnan:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX10-NEXT: v_med3_f32 v0, v0, 2.0, 4.0
; GFX10-NEXT: s_setpc_b64 s[30:31]
;
; GFX8-LABEL: test_max_min_global_nnan:
; GFX8: ; %bb.0:
; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX8-NEXT: v_med3_f32 v0, v0, 2.0, 4.0
; GFX8-NEXT: s_setpc_b64 s[30:31]
; GFX1170-LABEL: test_max_min_global_nnan:
; GFX1170: ; %bb.0:
; GFX1170-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1170-NEXT: v_med3_num_f32 v0, v0, 2.0, 4.0
; GFX1170-NEXT: s_setpc_b64 s[30:31]
;
; GFX12-LABEL: test_max_min_global_nnan:
; GFX12: ; %bb.0:
@ -292,6 +355,13 @@ define float @test_max_min_global_nnan(float %a) {
; min(max(Val, K0), K1) K0 > K1, should be K0<=K1
define float @test_min_max_K0_gt_K1(float %a) #0 {
; GFX8-LABEL: test_min_max_K0_gt_K1:
; GFX8: ; %bb.0:
; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX8-NEXT: v_max_f32_e32 v0, 4.0, v0
; GFX8-NEXT: v_min_f32_e32 v0, 2.0, v0
; GFX8-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-LABEL: test_min_max_K0_gt_K1:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@ -299,12 +369,11 @@ define float @test_min_max_K0_gt_K1(float %a) #0 {
; GFX10-NEXT: v_min_f32_e32 v0, 2.0, v0
; GFX10-NEXT: s_setpc_b64 s[30:31]
;
; GFX8-LABEL: test_min_max_K0_gt_K1:
; GFX8: ; %bb.0:
; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX8-NEXT: v_max_f32_e32 v0, 4.0, v0
; GFX8-NEXT: v_min_f32_e32 v0, 2.0, v0
; GFX8-NEXT: s_setpc_b64 s[30:31]
; GFX1170-LABEL: test_min_max_K0_gt_K1:
; GFX1170: ; %bb.0:
; GFX1170-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1170-NEXT: v_maxmin_num_f32 v0, v0, 4.0, 2.0
; GFX1170-NEXT: s_setpc_b64 s[30:31]
;
; GFX12-LABEL: test_min_max_K0_gt_K1:
; GFX12: ; %bb.0:
@ -322,6 +391,13 @@ define float @test_min_max_K0_gt_K1(float %a) #0 {
; max(min(Val, K1), K0) K0 > K1, should be K0<=K1
define float @test_max_min_K0_gt_K1(float %a) #0 {
; GFX8-LABEL: test_max_min_K0_gt_K1:
; GFX8: ; %bb.0:
; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX8-NEXT: v_min_f32_e32 v0, 2.0, v0
; GFX8-NEXT: v_max_f32_e32 v0, 4.0, v0
; GFX8-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-LABEL: test_max_min_K0_gt_K1:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@ -329,12 +405,11 @@ define float @test_max_min_K0_gt_K1(float %a) #0 {
; GFX10-NEXT: v_max_f32_e32 v0, 4.0, v0
; GFX10-NEXT: s_setpc_b64 s[30:31]
;
; GFX8-LABEL: test_max_min_K0_gt_K1:
; GFX8: ; %bb.0:
; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX8-NEXT: v_min_f32_e32 v0, 2.0, v0
; GFX8-NEXT: v_max_f32_e32 v0, 4.0, v0
; GFX8-NEXT: s_setpc_b64 s[30:31]
; GFX1170-LABEL: test_max_min_K0_gt_K1:
; GFX1170: ; %bb.0:
; GFX1170-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1170-NEXT: v_minmax_num_f32 v0, v0, 2.0, 4.0
; GFX1170-NEXT: s_setpc_b64 s[30:31]
;
; GFX12-LABEL: test_max_min_K0_gt_K1:
; GFX12: ; %bb.0:
@ -352,6 +427,13 @@ define float @test_max_min_K0_gt_K1(float %a) #0 {
; non-inline constant
define float @test_min_max_non_inline_const(float %a) #0 {
; GFX8-LABEL: test_min_max_non_inline_const:
; GFX8: ; %bb.0:
; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX8-NEXT: v_max_f32_e32 v0, 2.0, v0
; GFX8-NEXT: v_min_f32_e32 v0, 0x41000000, v0
; GFX8-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-LABEL: test_min_max_non_inline_const:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@ -359,12 +441,11 @@ define float @test_min_max_non_inline_const(float %a) #0 {
; GFX10-NEXT: v_min_f32_e32 v0, 0x41000000, v0
; GFX10-NEXT: s_setpc_b64 s[30:31]
;
; GFX8-LABEL: test_min_max_non_inline_const:
; GFX8: ; %bb.0:
; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX8-NEXT: v_max_f32_e32 v0, 2.0, v0
; GFX8-NEXT: v_min_f32_e32 v0, 0x41000000, v0
; GFX8-NEXT: s_setpc_b64 s[30:31]
; GFX1170-LABEL: test_min_max_non_inline_const:
; GFX1170: ; %bb.0:
; GFX1170-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1170-NEXT: v_maxmin_num_f32 v0, v0, 2.0, 0x41000000
; GFX1170-NEXT: s_setpc_b64 s[30:31]
;
; GFX12-LABEL: test_min_max_non_inline_const:
; GFX12: ; %bb.0:
@ -383,6 +464,13 @@ define float @test_min_max_non_inline_const(float %a) #0 {
; there is no fmed3 for f64 or v2f16 types
define double @test_min_max_f64(double %a) #0 {
; GFX8-LABEL: test_min_max_f64:
; GFX8: ; %bb.0:
; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX8-NEXT: v_max_f64 v[0:1], v[0:1], 2.0
; GFX8-NEXT: v_min_f64 v[0:1], v[0:1], 4.0
; GFX8-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-LABEL: test_min_max_f64:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@ -390,12 +478,13 @@ define double @test_min_max_f64(double %a) #0 {
; GFX10-NEXT: v_min_f64 v[0:1], v[0:1], 4.0
; GFX10-NEXT: s_setpc_b64 s[30:31]
;
; GFX8-LABEL: test_min_max_f64:
; GFX8: ; %bb.0:
; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX8-NEXT: v_max_f64 v[0:1], v[0:1], 2.0
; GFX8-NEXT: v_min_f64 v[0:1], v[0:1], 4.0
; GFX8-NEXT: s_setpc_b64 s[30:31]
; GFX1170-LABEL: test_min_max_f64:
; GFX1170: ; %bb.0:
; GFX1170-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1170-NEXT: v_max_num_f64 v[0:1], v[0:1], 2.0
; GFX1170-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX1170-NEXT: v_min_num_f64 v[0:1], v[0:1], 4.0
; GFX1170-NEXT: s_setpc_b64 s[30:31]
;
; GFX12-LABEL: test_min_max_f64:
; GFX12: ; %bb.0:
@ -414,13 +503,6 @@ define double @test_min_max_f64(double %a) #0 {
}
define <2 x half> @test_min_max_v2f16(<2 x half> %a) #0 {
; GFX10-LABEL: test_min_max_v2f16:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX10-NEXT: v_pk_max_f16 v0, v0, 2.0 op_sel_hi:[1,0]
; GFX10-NEXT: v_pk_min_f16 v0, v0, 4.0 op_sel_hi:[1,0]
; GFX10-NEXT: s_setpc_b64 s[30:31]
;
; GFX8-LABEL: test_min_max_v2f16:
; GFX8: ; %bb.0:
; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@ -433,6 +515,21 @@ define <2 x half> @test_min_max_v2f16(<2 x half> %a) #0 {
; GFX8-NEXT: v_or_b32_e32 v0, v1, v0
; GFX8-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-LABEL: test_min_max_v2f16:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX10-NEXT: v_pk_max_f16 v0, v0, 2.0 op_sel_hi:[1,0]
; GFX10-NEXT: v_pk_min_f16 v0, v0, 4.0 op_sel_hi:[1,0]
; GFX10-NEXT: s_setpc_b64 s[30:31]
;
; GFX1170-LABEL: test_min_max_v2f16:
; GFX1170: ; %bb.0:
; GFX1170-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1170-NEXT: v_pk_max_num_f16 v0, v0, 2.0 op_sel_hi:[1,0]
; GFX1170-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX1170-NEXT: v_pk_min_num_f16 v0, v0, 4.0 op_sel_hi:[1,0]
; GFX1170-NEXT: s_setpc_b64 s[30:31]
;
; GFX12-LABEL: test_min_max_v2f16:
; GFX12: ; %bb.0:
; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
@ -453,6 +550,13 @@ define <2 x half> @test_min_max_v2f16(<2 x half> %a) #0 {
; min-max patterns for ieee=false require known non-NaN input
define float @test_min_max_maybe_NaN_input_ieee_false(float %a) #1 {
; GFX8-LABEL: test_min_max_maybe_NaN_input_ieee_false:
; GFX8: ; %bb.0:
; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX8-NEXT: v_max_f32_e32 v0, 2.0, v0
; GFX8-NEXT: v_min_f32_e32 v0, 4.0, v0
; GFX8-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-LABEL: test_min_max_maybe_NaN_input_ieee_false:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@ -460,12 +564,13 @@ define float @test_min_max_maybe_NaN_input_ieee_false(float %a) #1 {
; GFX10-NEXT: v_min_f32_e32 v0, 4.0, v0
; GFX10-NEXT: s_setpc_b64 s[30:31]
;
; GFX8-LABEL: test_min_max_maybe_NaN_input_ieee_false:
; GFX8: ; %bb.0:
; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX8-NEXT: v_max_f32_e32 v0, 2.0, v0
; GFX8-NEXT: v_min_f32_e32 v0, 4.0, v0
; GFX8-NEXT: s_setpc_b64 s[30:31]
; GFX1170-LABEL: test_min_max_maybe_NaN_input_ieee_false:
; GFX1170: ; %bb.0:
; GFX1170-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1170-NEXT: v_max_num_f32_e32 v0, v0, v0
; GFX1170-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX1170-NEXT: v_med3_num_f32 v0, v0, 2.0, 4.0
; GFX1170-NEXT: s_setpc_b64 s[30:31]
;
; GFX12-LABEL: test_min_max_maybe_NaN_input_ieee_false:
; GFX12: ; %bb.0:
@ -486,6 +591,13 @@ define float @test_min_max_maybe_NaN_input_ieee_false(float %a) #1 {
; max-min patterns always require known non-NaN input
define float @test_max_min_maybe_NaN_input_ieee_false(float %a) #1 {
; GFX8-LABEL: test_max_min_maybe_NaN_input_ieee_false:
; GFX8: ; %bb.0:
; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX8-NEXT: v_min_f32_e32 v0, 4.0, v0
; GFX8-NEXT: v_max_f32_e32 v0, 2.0, v0
; GFX8-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-LABEL: test_max_min_maybe_NaN_input_ieee_false:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@ -493,12 +605,13 @@ define float @test_max_min_maybe_NaN_input_ieee_false(float %a) #1 {
; GFX10-NEXT: v_max_f32_e32 v0, 2.0, v0
; GFX10-NEXT: s_setpc_b64 s[30:31]
;
; GFX8-LABEL: test_max_min_maybe_NaN_input_ieee_false:
; GFX8: ; %bb.0:
; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX8-NEXT: v_min_f32_e32 v0, 4.0, v0
; GFX8-NEXT: v_max_f32_e32 v0, 2.0, v0
; GFX8-NEXT: s_setpc_b64 s[30:31]
; GFX1170-LABEL: test_max_min_maybe_NaN_input_ieee_false:
; GFX1170: ; %bb.0:
; GFX1170-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1170-NEXT: v_max_num_f32_e32 v0, v0, v0
; GFX1170-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX1170-NEXT: v_minmax_num_f32 v0, v0, 4.0, 2.0
; GFX1170-NEXT: s_setpc_b64 s[30:31]
;
; GFX12-LABEL: test_max_min_maybe_NaN_input_ieee_false:
; GFX12: ; %bb.0:
@ -518,6 +631,14 @@ define float @test_max_min_maybe_NaN_input_ieee_false(float %a) #1 {
; 'v_max_f32_e32 v0, v0, v0' is from fcanonicalize of the input to fmin/fmax with ieee=true
define float @test_max_min_maybe_NaN_input_ieee_true(float %a) #0 {
; GFX8-LABEL: test_max_min_maybe_NaN_input_ieee_true:
; GFX8: ; %bb.0:
; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX8-NEXT: v_mul_f32_e32 v0, 1.0, v0
; GFX8-NEXT: v_min_f32_e32 v0, 4.0, v0
; GFX8-NEXT: v_max_f32_e32 v0, 2.0, v0
; GFX8-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-LABEL: test_max_min_maybe_NaN_input_ieee_true:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@ -526,13 +647,13 @@ define float @test_max_min_maybe_NaN_input_ieee_true(float %a) #0 {
; GFX10-NEXT: v_max_f32_e32 v0, 2.0, v0
; GFX10-NEXT: s_setpc_b64 s[30:31]
;
; GFX8-LABEL: test_max_min_maybe_NaN_input_ieee_true:
; GFX8: ; %bb.0:
; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX8-NEXT: v_mul_f32_e32 v0, 1.0, v0
; GFX8-NEXT: v_min_f32_e32 v0, 4.0, v0
; GFX8-NEXT: v_max_f32_e32 v0, 2.0, v0
; GFX8-NEXT: s_setpc_b64 s[30:31]
; GFX1170-LABEL: test_max_min_maybe_NaN_input_ieee_true:
; GFX1170: ; %bb.0:
; GFX1170-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1170-NEXT: v_max_num_f32_e32 v0, v0, v0
; GFX1170-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX1170-NEXT: v_minmax_num_f32 v0, v0, 4.0, 2.0
; GFX1170-NEXT: s_setpc_b64 s[30:31]
;
; GFX12-LABEL: test_max_min_maybe_NaN_input_ieee_true:
; GFX12: ; %bb.0:

View File

@ -1,6 +1,7 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc -global-isel -mtriple=amdgcn -mcpu=tahiti < %s | FileCheck -check-prefix=SI %s
; RUN: llc -global-isel -mtriple=amdgcn -mcpu=tonga < %s | FileCheck -check-prefix=VI %s
; RUN: llc -global-isel -mtriple=amdgcn -mcpu=gfx1170 < %s | FileCheck -check-prefix=GFX1170 %s
; RUN: llc -global-isel -mtriple=amdgcn -mcpu=gfx1200 < %s | FileCheck -check-prefix=GFX12 %s
define float @v_rsq_clamp_f32(float %src) #0 {
@ -18,6 +19,15 @@ define float @v_rsq_clamp_f32(float %src) #0 {
; VI-NEXT: v_max_f32_e32 v0, 0xff7fffff, v0
; VI-NEXT: s_setpc_b64 s[30:31]
;
; GFX1170-LABEL: v_rsq_clamp_f32:
; GFX1170: ; %bb.0:
; GFX1170-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1170-NEXT: v_rsq_f32_e32 v0, v0
; GFX1170-NEXT: v_mov_b32_e32 v1, 0xff7fffff
; GFX1170-NEXT: s_delay_alu instid0(TRANS32_DEP_1) | instid1(VALU_DEP_1)
; GFX1170-NEXT: v_minmax_num_f32 v0, v0, 0x7f7fffff, v1
; GFX1170-NEXT: s_setpc_b64 s[30:31]
;
; GFX12-LABEL: v_rsq_clamp_f32:
; GFX12: ; %bb.0:
; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
@ -49,6 +59,15 @@ define float @v_rsq_clamp_fabs_f32(float %src) #0 {
; VI-NEXT: v_max_f32_e32 v0, 0xff7fffff, v0
; VI-NEXT: s_setpc_b64 s[30:31]
;
; GFX1170-LABEL: v_rsq_clamp_fabs_f32:
; GFX1170: ; %bb.0:
; GFX1170-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1170-NEXT: v_rsq_f32_e64 v0, |v0|
; GFX1170-NEXT: v_mov_b32_e32 v1, 0xff7fffff
; GFX1170-NEXT: s_delay_alu instid0(TRANS32_DEP_1) | instid1(VALU_DEP_1)
; GFX1170-NEXT: v_minmax_num_f32 v0, v0, 0x7f7fffff, v1
; GFX1170-NEXT: s_setpc_b64 s[30:31]
;
; GFX12-LABEL: v_rsq_clamp_fabs_f32:
; GFX12: ; %bb.0:
; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
@ -85,6 +104,19 @@ define double @v_rsq_clamp_f64(double %src) #0 {
; VI-NEXT: v_max_f64 v[0:1], v[0:1], v[2:3]
; VI-NEXT: s_setpc_b64 s[30:31]
;
; GFX1170-LABEL: v_rsq_clamp_f64:
; GFX1170: ; %bb.0:
; GFX1170-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1170-NEXT: v_rsq_f64_e32 v[0:1], v[0:1]
; GFX1170-NEXT: v_mov_b32_e32 v2, -1
; GFX1170-NEXT: v_mov_b32_e32 v3, 0x7fefffff
; GFX1170-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_2) | instid1(VALU_DEP_1)
; GFX1170-NEXT: v_min_num_f64 v[0:1], v[0:1], v[2:3]
; GFX1170-NEXT: v_mov_b32_e32 v2, -1
; GFX1170-NEXT: v_mov_b32_e32 v3, 0xffefffff
; GFX1170-NEXT: v_max_num_f64 v[0:1], v[0:1], v[2:3]
; GFX1170-NEXT: s_setpc_b64 s[30:31]
;
; GFX12-LABEL: v_rsq_clamp_f64:
; GFX12: ; %bb.0:
; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
@ -124,6 +156,19 @@ define double @v_rsq_clamp_fabs_f64(double %src) #0 {
; VI-NEXT: v_max_f64 v[0:1], v[0:1], v[2:3]
; VI-NEXT: s_setpc_b64 s[30:31]
;
; GFX1170-LABEL: v_rsq_clamp_fabs_f64:
; GFX1170: ; %bb.0:
; GFX1170-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1170-NEXT: v_rsq_f64_e64 v[0:1], |v[0:1]|
; GFX1170-NEXT: v_mov_b32_e32 v2, -1
; GFX1170-NEXT: v_mov_b32_e32 v3, 0x7fefffff
; GFX1170-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_2) | instid1(VALU_DEP_1)
; GFX1170-NEXT: v_min_num_f64 v[0:1], v[0:1], v[2:3]
; GFX1170-NEXT: v_mov_b32_e32 v2, -1
; GFX1170-NEXT: v_mov_b32_e32 v3, 0xffefffff
; GFX1170-NEXT: v_max_num_f64 v[0:1], v[0:1], v[2:3]
; GFX1170-NEXT: s_setpc_b64 s[30:31]
;
; GFX12-LABEL: v_rsq_clamp_fabs_f64:
; GFX12: ; %bb.0:
; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
@ -160,6 +205,15 @@ define float @v_rsq_clamp_undef_f32() #0 {
; VI-NEXT: v_max_f32_e32 v0, 0xff7fffff, v0
; VI-NEXT: s_setpc_b64 s[30:31]
;
; GFX1170-LABEL: v_rsq_clamp_undef_f32:
; GFX1170: ; %bb.0:
; GFX1170-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1170-NEXT: v_rsq_f32_e32 v0, s0
; GFX1170-NEXT: v_mov_b32_e32 v1, 0xff7fffff
; GFX1170-NEXT: s_delay_alu instid0(TRANS32_DEP_1) | instid1(VALU_DEP_1)
; GFX1170-NEXT: v_minmax_num_f32 v0, v0, 0x7f7fffff, v1
; GFX1170-NEXT: s_setpc_b64 s[30:31]
;
; GFX12-LABEL: v_rsq_clamp_undef_f32:
; GFX12: ; %bb.0:
; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
@ -196,6 +250,19 @@ define double @v_rsq_clamp_undef_f64() #0 {
; VI-NEXT: v_max_f64 v[0:1], v[0:1], v[2:3]
; VI-NEXT: s_setpc_b64 s[30:31]
;
; GFX1170-LABEL: v_rsq_clamp_undef_f64:
; GFX1170: ; %bb.0:
; GFX1170-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1170-NEXT: v_rsq_f64_e32 v[0:1], s[0:1]
; GFX1170-NEXT: v_mov_b32_e32 v2, -1
; GFX1170-NEXT: v_mov_b32_e32 v3, 0x7fefffff
; GFX1170-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_2) | instid1(VALU_DEP_1)
; GFX1170-NEXT: v_min_num_f64 v[0:1], v[0:1], v[2:3]
; GFX1170-NEXT: v_mov_b32_e32 v2, -1
; GFX1170-NEXT: v_mov_b32_e32 v3, 0xffefffff
; GFX1170-NEXT: v_max_num_f64 v[0:1], v[0:1], v[2:3]
; GFX1170-NEXT: s_setpc_b64 s[30:31]
;
; GFX12-LABEL: v_rsq_clamp_undef_f64:
; GFX12: ; %bb.0:
; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
@ -231,6 +298,15 @@ define float @v_rsq_clamp_f32_non_ieee(float %src) #2 {
; VI-NEXT: v_max_f32_e32 v0, 0xff7fffff, v0
; VI-NEXT: s_setpc_b64 s[30:31]
;
; GFX1170-LABEL: v_rsq_clamp_f32_non_ieee:
; GFX1170: ; %bb.0:
; GFX1170-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1170-NEXT: v_rsq_f32_e32 v0, v0
; GFX1170-NEXT: v_mov_b32_e32 v1, 0xff7fffff
; GFX1170-NEXT: s_delay_alu instid0(TRANS32_DEP_1) | instid1(VALU_DEP_1)
; GFX1170-NEXT: v_minmax_num_f32 v0, v0, 0x7f7fffff, v1
; GFX1170-NEXT: s_setpc_b64 s[30:31]
;
; GFX12-LABEL: v_rsq_clamp_f32_non_ieee:
; GFX12: ; %bb.0:
; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
@ -266,6 +342,19 @@ define double @v_rsq_clamp_f64_non_ieee(double %src) #2 {
; VI-NEXT: v_max_f64 v[0:1], v[0:1], v[2:3]
; VI-NEXT: s_setpc_b64 s[30:31]
;
; GFX1170-LABEL: v_rsq_clamp_f64_non_ieee:
; GFX1170: ; %bb.0:
; GFX1170-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1170-NEXT: v_rsq_f64_e32 v[0:1], v[0:1]
; GFX1170-NEXT: v_mov_b32_e32 v2, -1
; GFX1170-NEXT: v_mov_b32_e32 v3, 0x7fefffff
; GFX1170-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_2) | instid1(VALU_DEP_1)
; GFX1170-NEXT: v_min_num_f64 v[0:1], v[0:1], v[2:3]
; GFX1170-NEXT: v_mov_b32_e32 v2, -1
; GFX1170-NEXT: v_mov_b32_e32 v3, 0xffefffff
; GFX1170-NEXT: v_max_num_f64 v[0:1], v[0:1], v[2:3]
; GFX1170-NEXT: s_setpc_b64 s[30:31]
;
; GFX12-LABEL: v_rsq_clamp_f64_non_ieee:
; GFX12: ; %bb.0:
; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0

View File

@ -4,6 +4,8 @@
; RUN: llc -mtriple=amdgcn -mcpu=gfx900 -mattr=-flat-for-global < %s | FileCheck -enable-var-scope -check-prefixes=GFX9 %s
; RUN: llc -mtriple=amdgcn -mcpu=gfx1100 -mattr=+real-true16 -mattr=-flat-for-global < %s | FileCheck -enable-var-scope -check-prefixes=GFX11,GFX11-TRUE16 %s
; RUN: llc -mtriple=amdgcn -mcpu=gfx1100 -mattr=-real-true16 -mattr=-flat-for-global < %s | FileCheck -enable-var-scope -check-prefixes=GFX11,GFX11-FAKE16 %s
; RUN: llc -mtriple=amdgcn -mcpu=gfx1170 -mattr=+real-true16 -mattr=-flat-for-global < %s | FileCheck -enable-var-scope -check-prefixes=GFX1170,GFX1170-TRUE16 %s
; RUN: llc -mtriple=amdgcn -mcpu=gfx1170 -mattr=-real-true16 -mattr=-flat-for-global < %s | FileCheck -enable-var-scope -check-prefixes=GFX1170,GFX1170-FAKE16 %s
; RUN: llc -mtriple=amdgcn -mcpu=gfx1200 -mattr=+real-true16 -mattr=-flat-for-global < %s | FileCheck -enable-var-scope -check-prefixes=GFX12,GFX12-TRUE16 %s
; RUN: llc -mtriple=amdgcn -mcpu=gfx1200 -mattr=-real-true16 -mattr=-flat-for-global < %s | FileCheck -enable-var-scope -check-prefixes=GFX12,GFX12-FAKE16 %s
; RUN: llc -mtriple=amdgcn -mcpu=gfx1250 -mattr=+real-true16 -mattr=-flat-for-global < %s | FileCheck -enable-var-scope -check-prefixes=GFX1250,GFX1250-TRUE16 %s
@ -130,6 +132,36 @@ define amdgpu_kernel void @test_fmax3_olt_0_f32(ptr addrspace(1) %out, ptr addrs
; GFX11-NEXT: buffer_store_b32 v0, off, s[8:11], 0
; GFX11-NEXT: s_endpgm
;
; GFX1170-LABEL: test_fmax3_olt_0_f32:
; GFX1170: ; %bb.0:
; GFX1170-NEXT: s_load_b256 s[0:7], s[4:5], 0x24
; GFX1170-NEXT: s_mov_b32 s10, -1
; GFX1170-NEXT: s_mov_b32 s11, 0x31016000
; GFX1170-NEXT: s_mov_b32 s14, s10
; GFX1170-NEXT: s_mov_b32 s15, s11
; GFX1170-NEXT: s_mov_b32 s18, s10
; GFX1170-NEXT: s_mov_b32 s19, s11
; GFX1170-NEXT: s_mov_b32 s22, s10
; GFX1170-NEXT: s_mov_b32 s23, s11
; GFX1170-NEXT: s_waitcnt lgkmcnt(0)
; GFX1170-NEXT: s_mov_b32 s12, s2
; GFX1170-NEXT: s_mov_b32 s13, s3
; GFX1170-NEXT: s_mov_b32 s16, s4
; GFX1170-NEXT: s_mov_b32 s17, s5
; GFX1170-NEXT: s_mov_b32 s20, s6
; GFX1170-NEXT: s_mov_b32 s21, s7
; GFX1170-NEXT: buffer_load_b32 v0, off, s[12:15], 0 glc dlc
; GFX1170-NEXT: s_waitcnt vmcnt(0)
; GFX1170-NEXT: buffer_load_b32 v1, off, s[16:19], 0 glc dlc
; GFX1170-NEXT: s_waitcnt vmcnt(0)
; GFX1170-NEXT: buffer_load_b32 v2, off, s[20:23], 0 glc dlc
; GFX1170-NEXT: s_waitcnt vmcnt(0)
; GFX1170-NEXT: s_mov_b32 s8, s0
; GFX1170-NEXT: s_mov_b32 s9, s1
; GFX1170-NEXT: v_max3_num_f32 v0, v0, v1, v2
; GFX1170-NEXT: buffer_store_b32 v0, off, s[8:11], 0
; GFX1170-NEXT: s_endpgm
;
; GFX12-LABEL: test_fmax3_olt_0_f32:
; GFX12: ; %bb.0:
; GFX12-NEXT: s_load_b256 s[0:7], s[4:5], 0x24
@ -321,6 +353,36 @@ define amdgpu_kernel void @test_fmax3_olt_1_f32(ptr addrspace(1) %out, ptr addrs
; GFX11-NEXT: buffer_store_b32 v0, off, s[8:11], 0
; GFX11-NEXT: s_endpgm
;
; GFX1170-LABEL: test_fmax3_olt_1_f32:
; GFX1170: ; %bb.0:
; GFX1170-NEXT: s_load_b256 s[0:7], s[4:5], 0x24
; GFX1170-NEXT: s_mov_b32 s10, -1
; GFX1170-NEXT: s_mov_b32 s11, 0x31016000
; GFX1170-NEXT: s_mov_b32 s14, s10
; GFX1170-NEXT: s_mov_b32 s15, s11
; GFX1170-NEXT: s_mov_b32 s18, s10
; GFX1170-NEXT: s_mov_b32 s19, s11
; GFX1170-NEXT: s_mov_b32 s22, s10
; GFX1170-NEXT: s_mov_b32 s23, s11
; GFX1170-NEXT: s_waitcnt lgkmcnt(0)
; GFX1170-NEXT: s_mov_b32 s12, s2
; GFX1170-NEXT: s_mov_b32 s13, s3
; GFX1170-NEXT: s_mov_b32 s16, s4
; GFX1170-NEXT: s_mov_b32 s17, s5
; GFX1170-NEXT: s_mov_b32 s20, s6
; GFX1170-NEXT: s_mov_b32 s21, s7
; GFX1170-NEXT: buffer_load_b32 v0, off, s[12:15], 0 glc dlc
; GFX1170-NEXT: s_waitcnt vmcnt(0)
; GFX1170-NEXT: buffer_load_b32 v1, off, s[16:19], 0 glc dlc
; GFX1170-NEXT: s_waitcnt vmcnt(0)
; GFX1170-NEXT: buffer_load_b32 v2, off, s[20:23], 0 glc dlc
; GFX1170-NEXT: s_waitcnt vmcnt(0)
; GFX1170-NEXT: s_mov_b32 s8, s0
; GFX1170-NEXT: s_mov_b32 s9, s1
; GFX1170-NEXT: v_max3_num_f32 v0, v2, v0, v1
; GFX1170-NEXT: buffer_store_b32 v0, off, s[8:11], 0
; GFX1170-NEXT: s_endpgm
;
; GFX12-LABEL: test_fmax3_olt_1_f32:
; GFX12: ; %bb.0:
; GFX12-NEXT: s_load_b256 s[0:7], s[4:5], 0x24
@ -552,6 +614,66 @@ define amdgpu_kernel void @test_fmax3_olt_0_f16(ptr addrspace(1) %out, ptr addrs
; GFX11-FAKE16-NEXT: buffer_store_b16 v0, off, s[8:11], 0
; GFX11-FAKE16-NEXT: s_endpgm
;
; GFX1170-TRUE16-LABEL: test_fmax3_olt_0_f16:
; GFX1170-TRUE16: ; %bb.0:
; GFX1170-TRUE16-NEXT: s_load_b256 s[0:7], s[4:5], 0x24
; GFX1170-TRUE16-NEXT: s_mov_b32 s10, -1
; GFX1170-TRUE16-NEXT: s_mov_b32 s11, 0x31016000
; GFX1170-TRUE16-NEXT: s_mov_b32 s14, s10
; GFX1170-TRUE16-NEXT: s_mov_b32 s15, s11
; GFX1170-TRUE16-NEXT: s_mov_b32 s18, s10
; GFX1170-TRUE16-NEXT: s_mov_b32 s19, s11
; GFX1170-TRUE16-NEXT: s_mov_b32 s22, s10
; GFX1170-TRUE16-NEXT: s_mov_b32 s23, s11
; GFX1170-TRUE16-NEXT: s_waitcnt lgkmcnt(0)
; GFX1170-TRUE16-NEXT: s_mov_b32 s12, s2
; GFX1170-TRUE16-NEXT: s_mov_b32 s13, s3
; GFX1170-TRUE16-NEXT: s_mov_b32 s16, s4
; GFX1170-TRUE16-NEXT: s_mov_b32 s17, s5
; GFX1170-TRUE16-NEXT: s_mov_b32 s20, s6
; GFX1170-TRUE16-NEXT: s_mov_b32 s21, s7
; GFX1170-TRUE16-NEXT: buffer_load_d16_b16 v0, off, s[12:15], 0 glc dlc
; GFX1170-TRUE16-NEXT: s_waitcnt vmcnt(0)
; GFX1170-TRUE16-NEXT: buffer_load_d16_hi_b16 v0, off, s[16:19], 0 glc dlc
; GFX1170-TRUE16-NEXT: s_waitcnt vmcnt(0)
; GFX1170-TRUE16-NEXT: buffer_load_d16_b16 v1, off, s[20:23], 0 glc dlc
; GFX1170-TRUE16-NEXT: s_waitcnt vmcnt(0)
; GFX1170-TRUE16-NEXT: s_mov_b32 s8, s0
; GFX1170-TRUE16-NEXT: s_mov_b32 s9, s1
; GFX1170-TRUE16-NEXT: v_max3_num_f16 v0.l, v0.l, v0.h, v1.l
; GFX1170-TRUE16-NEXT: buffer_store_b16 v0, off, s[8:11], 0
; GFX1170-TRUE16-NEXT: s_endpgm
;
; GFX1170-FAKE16-LABEL: test_fmax3_olt_0_f16:
; GFX1170-FAKE16: ; %bb.0:
; GFX1170-FAKE16-NEXT: s_load_b256 s[0:7], s[4:5], 0x24
; GFX1170-FAKE16-NEXT: s_mov_b32 s10, -1
; GFX1170-FAKE16-NEXT: s_mov_b32 s11, 0x31016000
; GFX1170-FAKE16-NEXT: s_mov_b32 s14, s10
; GFX1170-FAKE16-NEXT: s_mov_b32 s15, s11
; GFX1170-FAKE16-NEXT: s_mov_b32 s18, s10
; GFX1170-FAKE16-NEXT: s_mov_b32 s19, s11
; GFX1170-FAKE16-NEXT: s_mov_b32 s22, s10
; GFX1170-FAKE16-NEXT: s_mov_b32 s23, s11
; GFX1170-FAKE16-NEXT: s_waitcnt lgkmcnt(0)
; GFX1170-FAKE16-NEXT: s_mov_b32 s12, s2
; GFX1170-FAKE16-NEXT: s_mov_b32 s13, s3
; GFX1170-FAKE16-NEXT: s_mov_b32 s16, s4
; GFX1170-FAKE16-NEXT: s_mov_b32 s17, s5
; GFX1170-FAKE16-NEXT: s_mov_b32 s20, s6
; GFX1170-FAKE16-NEXT: s_mov_b32 s21, s7
; GFX1170-FAKE16-NEXT: buffer_load_u16 v0, off, s[12:15], 0 glc dlc
; GFX1170-FAKE16-NEXT: s_waitcnt vmcnt(0)
; GFX1170-FAKE16-NEXT: buffer_load_u16 v1, off, s[16:19], 0 glc dlc
; GFX1170-FAKE16-NEXT: s_waitcnt vmcnt(0)
; GFX1170-FAKE16-NEXT: buffer_load_u16 v2, off, s[20:23], 0 glc dlc
; GFX1170-FAKE16-NEXT: s_waitcnt vmcnt(0)
; GFX1170-FAKE16-NEXT: s_mov_b32 s8, s0
; GFX1170-FAKE16-NEXT: s_mov_b32 s9, s1
; GFX1170-FAKE16-NEXT: v_max3_num_f16 v0, v0, v1, v2
; GFX1170-FAKE16-NEXT: buffer_store_b16 v0, off, s[8:11], 0
; GFX1170-FAKE16-NEXT: s_endpgm
;
; GFX12-TRUE16-LABEL: test_fmax3_olt_0_f16:
; GFX12-TRUE16: ; %bb.0:
; GFX12-TRUE16-NEXT: s_load_b256 s[0:7], s[4:5], 0x24
@ -845,6 +967,66 @@ define amdgpu_kernel void @test_fmax3_olt_1_f16(ptr addrspace(1) %out, ptr addrs
; GFX11-FAKE16-NEXT: buffer_store_b16 v0, off, s[8:11], 0
; GFX11-FAKE16-NEXT: s_endpgm
;
; GFX1170-TRUE16-LABEL: test_fmax3_olt_1_f16:
; GFX1170-TRUE16: ; %bb.0:
; GFX1170-TRUE16-NEXT: s_load_b256 s[0:7], s[4:5], 0x24
; GFX1170-TRUE16-NEXT: s_mov_b32 s10, -1
; GFX1170-TRUE16-NEXT: s_mov_b32 s11, 0x31016000
; GFX1170-TRUE16-NEXT: s_mov_b32 s14, s10
; GFX1170-TRUE16-NEXT: s_mov_b32 s15, s11
; GFX1170-TRUE16-NEXT: s_mov_b32 s18, s10
; GFX1170-TRUE16-NEXT: s_mov_b32 s19, s11
; GFX1170-TRUE16-NEXT: s_mov_b32 s22, s10
; GFX1170-TRUE16-NEXT: s_mov_b32 s23, s11
; GFX1170-TRUE16-NEXT: s_waitcnt lgkmcnt(0)
; GFX1170-TRUE16-NEXT: s_mov_b32 s12, s2
; GFX1170-TRUE16-NEXT: s_mov_b32 s13, s3
; GFX1170-TRUE16-NEXT: s_mov_b32 s16, s4
; GFX1170-TRUE16-NEXT: s_mov_b32 s17, s5
; GFX1170-TRUE16-NEXT: s_mov_b32 s20, s6
; GFX1170-TRUE16-NEXT: s_mov_b32 s21, s7
; GFX1170-TRUE16-NEXT: buffer_load_d16_b16 v0, off, s[12:15], 0 glc dlc
; GFX1170-TRUE16-NEXT: s_waitcnt vmcnt(0)
; GFX1170-TRUE16-NEXT: buffer_load_d16_hi_b16 v0, off, s[16:19], 0 glc dlc
; GFX1170-TRUE16-NEXT: s_waitcnt vmcnt(0)
; GFX1170-TRUE16-NEXT: buffer_load_d16_b16 v1, off, s[20:23], 0 glc dlc
; GFX1170-TRUE16-NEXT: s_waitcnt vmcnt(0)
; GFX1170-TRUE16-NEXT: s_mov_b32 s8, s0
; GFX1170-TRUE16-NEXT: s_mov_b32 s9, s1
; GFX1170-TRUE16-NEXT: v_max3_num_f16 v0.l, v1.l, v0.l, v0.h
; GFX1170-TRUE16-NEXT: buffer_store_b16 v0, off, s[8:11], 0
; GFX1170-TRUE16-NEXT: s_endpgm
;
; GFX1170-FAKE16-LABEL: test_fmax3_olt_1_f16:
; GFX1170-FAKE16: ; %bb.0:
; GFX1170-FAKE16-NEXT: s_load_b256 s[0:7], s[4:5], 0x24
; GFX1170-FAKE16-NEXT: s_mov_b32 s10, -1
; GFX1170-FAKE16-NEXT: s_mov_b32 s11, 0x31016000
; GFX1170-FAKE16-NEXT: s_mov_b32 s14, s10
; GFX1170-FAKE16-NEXT: s_mov_b32 s15, s11
; GFX1170-FAKE16-NEXT: s_mov_b32 s18, s10
; GFX1170-FAKE16-NEXT: s_mov_b32 s19, s11
; GFX1170-FAKE16-NEXT: s_mov_b32 s22, s10
; GFX1170-FAKE16-NEXT: s_mov_b32 s23, s11
; GFX1170-FAKE16-NEXT: s_waitcnt lgkmcnt(0)
; GFX1170-FAKE16-NEXT: s_mov_b32 s12, s2
; GFX1170-FAKE16-NEXT: s_mov_b32 s13, s3
; GFX1170-FAKE16-NEXT: s_mov_b32 s16, s4
; GFX1170-FAKE16-NEXT: s_mov_b32 s17, s5
; GFX1170-FAKE16-NEXT: s_mov_b32 s20, s6
; GFX1170-FAKE16-NEXT: s_mov_b32 s21, s7
; GFX1170-FAKE16-NEXT: buffer_load_u16 v0, off, s[12:15], 0 glc dlc
; GFX1170-FAKE16-NEXT: s_waitcnt vmcnt(0)
; GFX1170-FAKE16-NEXT: buffer_load_u16 v1, off, s[16:19], 0 glc dlc
; GFX1170-FAKE16-NEXT: s_waitcnt vmcnt(0)
; GFX1170-FAKE16-NEXT: buffer_load_u16 v2, off, s[20:23], 0 glc dlc
; GFX1170-FAKE16-NEXT: s_waitcnt vmcnt(0)
; GFX1170-FAKE16-NEXT: s_mov_b32 s8, s0
; GFX1170-FAKE16-NEXT: s_mov_b32 s9, s1
; GFX1170-FAKE16-NEXT: v_max3_num_f16 v0, v2, v0, v1
; GFX1170-FAKE16-NEXT: buffer_store_b16 v0, off, s[8:11], 0
; GFX1170-FAKE16-NEXT: s_endpgm
;
; GFX12-TRUE16-LABEL: test_fmax3_olt_1_f16:
; GFX12-TRUE16: ; %bb.0:
; GFX12-TRUE16-NEXT: s_load_b256 s[0:7], s[4:5], 0x24
@ -1042,6 +1224,15 @@ define <2 x half> @no_fmax3_v2f16(<2 x half> %a, <2 x half> %b, <2 x half> %c, <
; GFX11-NEXT: v_pk_max_f16 v0, v0, v3
; GFX11-NEXT: s_setpc_b64 s[30:31]
;
; GFX1170-LABEL: no_fmax3_v2f16:
; GFX1170: ; %bb.0: ; %entry
; GFX1170-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1170-NEXT: v_pk_max_num_f16 v0, v0, v1
; GFX1170-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
; GFX1170-NEXT: v_pk_max_num_f16 v0, v2, v0
; GFX1170-NEXT: v_pk_max_num_f16 v0, v0, v3
; GFX1170-NEXT: s_setpc_b64 s[30:31]
;
; GFX12-LABEL: no_fmax3_v2f16:
; GFX12: ; %bb.0: ; %entry
; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0

View File

@ -1,6 +1,10 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx900 < %s | FileCheck -enable-var-scope -check-prefixes=GFX9,GFX9-SDAG %s
; RUN: llc -global-isel=1 -new-reg-bank-select -mtriple=amdgcn -mcpu=gfx900 < %s | FileCheck -enable-var-scope -check-prefixes=GFX9,GFX9-GISEL %s
; RUN: llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx1170 -mattr=+real-true16 < %s | FileCheck -enable-var-scope -check-prefixes=GFX1170,GFX1170-SDAG,GFX1170-SDAG-TRUE16 %s
; RUN: llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx1170 -mattr=-real-true16 < %s | FileCheck -enable-var-scope -check-prefixes=GFX1170,GFX1170-SDAG,GFX1170-SDAG-FAKE16 %s
; RUN: llc -global-isel=1 -new-reg-bank-select -mtriple=amdgcn -mcpu=gfx1170 -mattr=+real-true16 < %s | FileCheck -enable-var-scope -check-prefixes=GFX1170,GFX1170-GISEL,GFX1170-GISEL-TRUE16 %s
; RUN: llc -global-isel=1 -new-reg-bank-select -mtriple=amdgcn -mcpu=gfx1170 -mattr=-real-true16 < %s | FileCheck -enable-var-scope -check-prefixes=GFX1170,GFX1170-GISEL,GFX1170-GISEL-FAKE16 %s
; RUN: llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx1200 -mattr=+real-true16 < %s | FileCheck -enable-var-scope -check-prefixes=GFX12,GFX12-SDAG,GFX12-SDAG-TRUE16 %s
; RUN: llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx1200 -mattr=-real-true16 < %s | FileCheck -enable-var-scope -check-prefixes=GFX12,GFX12-SDAG,GFX12-SDAG-FAKE16 %s
; RUN: llc -global-isel=1 -new-reg-bank-select -mtriple=amdgcn -mcpu=gfx1200 -mattr=+real-true16 < %s | FileCheck -enable-var-scope -check-prefixes=GFX12,GFX12-GISEL,GFX12-GISEL-TRUE16 %s
@ -15,6 +19,11 @@ define amdgpu_ps float @test_fmaximum_f32_vv(float %a, float %b) {
; GFX9-NEXT: v_cndmask_b32_e32 v0, v3, v2, vcc
; GFX9-NEXT: ; return to shader part epilog
;
; GFX1170-LABEL: test_fmaximum_f32_vv:
; GFX1170: ; %bb.0:
; GFX1170-NEXT: v_maximum_f32 v0, v0, v1
; GFX1170-NEXT: ; return to shader part epilog
;
; GFX12-LABEL: test_fmaximum_f32_vv:
; GFX12: ; %bb.0:
; GFX12-NEXT: v_maximum_f32 v0, v0, v1
@ -44,6 +53,11 @@ define amdgpu_ps float @test_fmaximum_f32_ss(float inreg %a, float inreg %b) {
; GFX9-GISEL-NEXT: v_mov_b32_e32 v0, s0
; GFX9-GISEL-NEXT: ; return to shader part epilog
;
; GFX1170-LABEL: test_fmaximum_f32_ss:
; GFX1170: ; %bb.0:
; GFX1170-NEXT: v_maximum_f32 v0, s0, s1
; GFX1170-NEXT: ; return to shader part epilog
;
; GFX12-LABEL: test_fmaximum_f32_ss:
; GFX12: ; %bb.0:
; GFX12-NEXT: s_maximum_f32 s0, s0, s1
@ -63,6 +77,11 @@ define amdgpu_ps float @test_fmaximum_f32_vs(float %a, float inreg %b) {
; GFX9-NEXT: v_cndmask_b32_e32 v0, v2, v1, vcc
; GFX9-NEXT: ; return to shader part epilog
;
; GFX1170-LABEL: test_fmaximum_f32_vs:
; GFX1170: ; %bb.0:
; GFX1170-NEXT: v_maximum_f32 v0, v0, s0
; GFX1170-NEXT: ; return to shader part epilog
;
; GFX12-LABEL: test_fmaximum_f32_vs:
; GFX12: ; %bb.0:
; GFX12-NEXT: v_maximum_f32 v0, v0, s0
@ -77,6 +96,11 @@ define amdgpu_ps float @test_fmaximum_nnan_f32(float %a, float %b) {
; GFX9-NEXT: v_max_f32_e32 v0, v0, v1
; GFX9-NEXT: ; return to shader part epilog
;
; GFX1170-LABEL: test_fmaximum_nnan_f32:
; GFX1170: ; %bb.0:
; GFX1170-NEXT: v_maximum_f32 v0, v0, v1
; GFX1170-NEXT: ; return to shader part epilog
;
; GFX12-LABEL: test_fmaximum_nnan_f32:
; GFX12: ; %bb.0:
; GFX12-NEXT: v_maximum_f32 v0, v0, v1
@ -94,6 +118,11 @@ define amdgpu_ps float @test_fmaximum_nsz_f32(float %a, float %b) {
; GFX9-NEXT: v_cndmask_b32_e32 v0, v3, v2, vcc
; GFX9-NEXT: ; return to shader part epilog
;
; GFX1170-LABEL: test_fmaximum_nsz_f32:
; GFX1170: ; %bb.0:
; GFX1170-NEXT: v_maximum_f32 v0, v0, v1
; GFX1170-NEXT: ; return to shader part epilog
;
; GFX12-LABEL: test_fmaximum_nsz_f32:
; GFX12: ; %bb.0:
; GFX12-NEXT: v_maximum_f32 v0, v0, v1
@ -108,6 +137,11 @@ define amdgpu_ps float @test_fmaximum_signed_zero_f32() {
; GFX9-NEXT: v_mov_b32_e32 v0, 0
; GFX9-NEXT: ; return to shader part epilog
;
; GFX1170-LABEL: test_fmaximum_signed_zero_f32:
; GFX1170: ; %bb.0:
; GFX1170-NEXT: v_mov_b32_e32 v0, 0
; GFX1170-NEXT: ; return to shader part epilog
;
; GFX12-LABEL: test_fmaximum_signed_zero_f32:
; GFX12: ; %bb.0:
; GFX12-NEXT: v_mov_b32_e32 v0, 0
@ -128,6 +162,12 @@ define amdgpu_ps <2 x float> @test_fmaximum_v2f32(<2 x float> %a, <2 x float> %b
; GFX9-NEXT: v_cndmask_b32_e32 v1, v5, v2, vcc
; GFX9-NEXT: ; return to shader part epilog
;
; GFX1170-LABEL: test_fmaximum_v2f32:
; GFX1170: ; %bb.0:
; GFX1170-NEXT: v_maximum_f32 v0, v0, v2
; GFX1170-NEXT: v_maximum_f32 v1, v1, v3
; GFX1170-NEXT: ; return to shader part epilog
;
; GFX12-LABEL: test_fmaximum_v2f32:
; GFX12: ; %bb.0:
; GFX12-NEXT: v_maximum_f32 v0, v0, v2
@ -169,6 +209,12 @@ define amdgpu_ps <2 x float> @test_fmaximum_v2f32_ss(<2 x float> inreg %a, <2 x
; GFX9-GISEL-NEXT: v_mov_b32_e32 v1, s1
; GFX9-GISEL-NEXT: ; return to shader part epilog
;
; GFX1170-LABEL: test_fmaximum_v2f32_ss:
; GFX1170: ; %bb.0:
; GFX1170-NEXT: v_maximum_f32 v0, s0, s2
; GFX1170-NEXT: v_maximum_f32 v1, s1, s3
; GFX1170-NEXT: ; return to shader part epilog
;
; GFX12-LABEL: test_fmaximum_v2f32_ss:
; GFX12: ; %bb.0:
; GFX12-NEXT: s_maximum_f32 s0, s0, s2
@ -195,6 +241,13 @@ define amdgpu_ps <3 x float> @test_fmaximum_v3f32(<3 x float> %a, <3 x float> %b
; GFX9-NEXT: v_cndmask_b32_e32 v2, v7, v3, vcc
; GFX9-NEXT: ; return to shader part epilog
;
; GFX1170-LABEL: test_fmaximum_v3f32:
; GFX1170: ; %bb.0:
; GFX1170-NEXT: v_maximum_f32 v0, v0, v3
; GFX1170-NEXT: v_maximum_f32 v1, v1, v4
; GFX1170-NEXT: v_maximum_f32 v2, v2, v5
; GFX1170-NEXT: ; return to shader part epilog
;
; GFX12-LABEL: test_fmaximum_v3f32:
; GFX12: ; %bb.0:
; GFX12-NEXT: v_maximum_f32 v0, v0, v3
@ -223,6 +276,14 @@ define amdgpu_ps <4 x float> @test_fmaximum_v4f32(<4 x float> %a, <4 x float> %b
; GFX9-NEXT: v_cndmask_b32_e32 v3, v9, v4, vcc
; GFX9-NEXT: ; return to shader part epilog
;
; GFX1170-LABEL: test_fmaximum_v4f32:
; GFX1170: ; %bb.0:
; GFX1170-NEXT: v_maximum_f32 v0, v0, v4
; GFX1170-NEXT: v_maximum_f32 v1, v1, v5
; GFX1170-NEXT: v_maximum_f32 v2, v2, v6
; GFX1170-NEXT: v_maximum_f32 v3, v3, v7
; GFX1170-NEXT: ; return to shader part epilog
;
; GFX12-LABEL: test_fmaximum_v4f32:
; GFX12: ; %bb.0:
; GFX12-NEXT: v_maximum_f32 v0, v0, v4
@ -288,6 +349,26 @@ define amdgpu_ps <16 x float> @test_fmaximum_v16f32(<16 x float> %a, <16 x float
; GFX9-NEXT: v_cndmask_b32_e32 v15, v33, v16, vcc
; GFX9-NEXT: ; return to shader part epilog
;
; GFX1170-LABEL: test_fmaximum_v16f32:
; GFX1170: ; %bb.0:
; GFX1170-NEXT: v_maximum_f32 v0, v0, v16
; GFX1170-NEXT: v_maximum_f32 v1, v1, v17
; GFX1170-NEXT: v_maximum_f32 v2, v2, v18
; GFX1170-NEXT: v_maximum_f32 v3, v3, v19
; GFX1170-NEXT: v_maximum_f32 v4, v4, v20
; GFX1170-NEXT: v_maximum_f32 v5, v5, v21
; GFX1170-NEXT: v_maximum_f32 v6, v6, v22
; GFX1170-NEXT: v_maximum_f32 v7, v7, v23
; GFX1170-NEXT: v_maximum_f32 v8, v8, v24
; GFX1170-NEXT: v_maximum_f32 v9, v9, v25
; GFX1170-NEXT: v_maximum_f32 v10, v10, v26
; GFX1170-NEXT: v_maximum_f32 v11, v11, v27
; GFX1170-NEXT: v_maximum_f32 v12, v12, v28
; GFX1170-NEXT: v_maximum_f32 v13, v13, v29
; GFX1170-NEXT: v_maximum_f32 v14, v14, v30
; GFX1170-NEXT: v_maximum_f32 v15, v15, v31
; GFX1170-NEXT: ; return to shader part epilog
;
; GFX12-LABEL: test_fmaximum_v16f32:
; GFX12: ; %bb.0:
; GFX12-NEXT: v_maximum_f32 v0, v0, v16
@ -320,6 +401,26 @@ define amdgpu_ps half @test_fmaximum_f16_vv(half %a, half %b) {
; GFX9-NEXT: v_cndmask_b32_e32 v0, v3, v2, vcc
; GFX9-NEXT: ; return to shader part epilog
;
; GFX1170-SDAG-TRUE16-LABEL: test_fmaximum_f16_vv:
; GFX1170-SDAG-TRUE16: ; %bb.0:
; GFX1170-SDAG-TRUE16-NEXT: v_maximum_f16 v0.l, v0.l, v1.l
; GFX1170-SDAG-TRUE16-NEXT: ; return to shader part epilog
;
; GFX1170-SDAG-FAKE16-LABEL: test_fmaximum_f16_vv:
; GFX1170-SDAG-FAKE16: ; %bb.0:
; GFX1170-SDAG-FAKE16-NEXT: v_maximum_f16 v0, v0, v1
; GFX1170-SDAG-FAKE16-NEXT: ; return to shader part epilog
;
; GFX1170-GISEL-TRUE16-LABEL: test_fmaximum_f16_vv:
; GFX1170-GISEL-TRUE16: ; %bb.0:
; GFX1170-GISEL-TRUE16-NEXT: v_maximum_f16 v0.l, v0.l, v1.l
; GFX1170-GISEL-TRUE16-NEXT: ; return to shader part epilog
;
; GFX1170-GISEL-FAKE16-LABEL: test_fmaximum_f16_vv:
; GFX1170-GISEL-FAKE16: ; %bb.0:
; GFX1170-GISEL-FAKE16-NEXT: v_maximum_f16 v0, v0, v1
; GFX1170-GISEL-FAKE16-NEXT: ; return to shader part epilog
;
; GFX12-SDAG-TRUE16-LABEL: test_fmaximum_f16_vv:
; GFX12-SDAG-TRUE16: ; %bb.0:
; GFX12-SDAG-TRUE16-NEXT: v_maximum_f16 v0.l, v0.l, v1.l
@ -364,6 +465,26 @@ define amdgpu_ps half @test_fmaximum_f16_ss(half inreg %a, half inreg %b) {
; GFX9-GISEL-NEXT: v_mov_b32_e32 v0, s0
; GFX9-GISEL-NEXT: ; return to shader part epilog
;
; GFX1170-SDAG-TRUE16-LABEL: test_fmaximum_f16_ss:
; GFX1170-SDAG-TRUE16: ; %bb.0:
; GFX1170-SDAG-TRUE16-NEXT: v_maximum_f16 v0.l, s0, s1
; GFX1170-SDAG-TRUE16-NEXT: ; return to shader part epilog
;
; GFX1170-SDAG-FAKE16-LABEL: test_fmaximum_f16_ss:
; GFX1170-SDAG-FAKE16: ; %bb.0:
; GFX1170-SDAG-FAKE16-NEXT: v_maximum_f16 v0, s0, s1
; GFX1170-SDAG-FAKE16-NEXT: ; return to shader part epilog
;
; GFX1170-GISEL-TRUE16-LABEL: test_fmaximum_f16_ss:
; GFX1170-GISEL-TRUE16: ; %bb.0:
; GFX1170-GISEL-TRUE16-NEXT: v_maximum_f16 v0.l, s0, s1
; GFX1170-GISEL-TRUE16-NEXT: ; return to shader part epilog
;
; GFX1170-GISEL-FAKE16-LABEL: test_fmaximum_f16_ss:
; GFX1170-GISEL-FAKE16: ; %bb.0:
; GFX1170-GISEL-FAKE16-NEXT: v_maximum_f16 v0, s0, s1
; GFX1170-GISEL-FAKE16-NEXT: ; return to shader part epilog
;
; GFX12-LABEL: test_fmaximum_f16_ss:
; GFX12: ; %bb.0:
; GFX12-NEXT: s_maximum_f16 s0, s0, s1
@ -399,6 +520,11 @@ define amdgpu_ps <2 x half> @test_fmaximum_v2f16_vv(<2 x half> %a, <2 x half> %b
; GFX9-GISEL-NEXT: v_lshl_or_b32 v0, v1, 16, v0
; GFX9-GISEL-NEXT: ; return to shader part epilog
;
; GFX1170-LABEL: test_fmaximum_v2f16_vv:
; GFX1170: ; %bb.0:
; GFX1170-NEXT: v_pk_maximum_f16 v0, v0, v1
; GFX1170-NEXT: ; return to shader part epilog
;
; GFX12-LABEL: test_fmaximum_v2f16_vv:
; GFX12: ; %bb.0:
; GFX12-NEXT: v_pk_maximum_f16 v0, v0, v1
@ -448,6 +574,11 @@ define amdgpu_ps <2 x half> @test_fmaximum_v2f16_ss(<2 x half> inreg %a, <2 x ha
; GFX9-GISEL-NEXT: v_mov_b32_e32 v0, s0
; GFX9-GISEL-NEXT: ; return to shader part epilog
;
; GFX1170-LABEL: test_fmaximum_v2f16_ss:
; GFX1170: ; %bb.0:
; GFX1170-NEXT: v_pk_maximum_f16 v0, s0, s1
; GFX1170-NEXT: ; return to shader part epilog
;
; GFX12-LABEL: test_fmaximum_v2f16_ss:
; GFX12: ; %bb.0:
; GFX12-NEXT: v_pk_maximum_f16 v0, s0, s1
@ -490,6 +621,27 @@ define amdgpu_ps <3 x half> @test_fmaximum_v3f16_vv(<3 x half> %a, <3 x half> %b
; GFX9-GISEL-NEXT: v_lshl_or_b32 v1, s0, 16, v1
; GFX9-GISEL-NEXT: ; return to shader part epilog
;
; GFX1170-SDAG-LABEL: test_fmaximum_v3f16_vv:
; GFX1170-SDAG: ; %bb.0:
; GFX1170-SDAG-NEXT: v_pk_maximum_f16 v0, v0, v2
; GFX1170-SDAG-NEXT: v_pk_maximum_f16 v1, v1, v3
; GFX1170-SDAG-NEXT: ; return to shader part epilog
;
; GFX1170-GISEL-TRUE16-LABEL: test_fmaximum_v3f16_vv:
; GFX1170-GISEL-TRUE16: ; %bb.0:
; GFX1170-GISEL-TRUE16-NEXT: v_pk_maximum_f16 v0, v0, v2
; GFX1170-GISEL-TRUE16-NEXT: v_maximum_f16 v1.l, v1.l, v3.l
; GFX1170-GISEL-TRUE16-NEXT: ; return to shader part epilog
;
; GFX1170-GISEL-FAKE16-LABEL: test_fmaximum_v3f16_vv:
; GFX1170-GISEL-FAKE16: ; %bb.0:
; GFX1170-GISEL-FAKE16-NEXT: v_maximum_f16 v1, v1, v3
; GFX1170-GISEL-FAKE16-NEXT: v_pk_maximum_f16 v0, v0, v2
; GFX1170-GISEL-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
; GFX1170-GISEL-FAKE16-NEXT: v_and_b32_e32 v1, 0xffff, v1
; GFX1170-GISEL-FAKE16-NEXT: v_lshl_or_b32 v1, s0, 16, v1
; GFX1170-GISEL-FAKE16-NEXT: ; return to shader part epilog
;
; GFX12-SDAG-LABEL: test_fmaximum_v3f16_vv:
; GFX12-SDAG: ; %bb.0:
; GFX12-SDAG-NEXT: v_pk_maximum_f16 v0, v0, v2
@ -567,6 +719,30 @@ define amdgpu_ps <3 x half> @test_fmaximum_v3f16_ss(<3 x half> inreg %a, <3 x ha
; GFX9-GISEL-NEXT: v_mov_b32_e32 v1, s1
; GFX9-GISEL-NEXT: ; return to shader part epilog
;
; GFX1170-SDAG-LABEL: test_fmaximum_v3f16_ss:
; GFX1170-SDAG: ; %bb.0:
; GFX1170-SDAG-NEXT: v_pk_maximum_f16 v0, s0, s2
; GFX1170-SDAG-NEXT: v_pk_maximum_f16 v1, s1, s3
; GFX1170-SDAG-NEXT: ; return to shader part epilog
;
; GFX1170-GISEL-TRUE16-LABEL: test_fmaximum_v3f16_ss:
; GFX1170-GISEL-TRUE16: ; %bb.0:
; GFX1170-GISEL-TRUE16-NEXT: v_maximum_f16 v1.l, s1, s3
; GFX1170-GISEL-TRUE16-NEXT: v_pk_maximum_f16 v0, s0, s2
; GFX1170-GISEL-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
; GFX1170-GISEL-TRUE16-NEXT: v_readfirstlane_b32 s0, v1
; GFX1170-GISEL-TRUE16-NEXT: v_mov_b32_e32 v1, s0
; GFX1170-GISEL-TRUE16-NEXT: ; return to shader part epilog
;
; GFX1170-GISEL-FAKE16-LABEL: test_fmaximum_v3f16_ss:
; GFX1170-GISEL-FAKE16: ; %bb.0:
; GFX1170-GISEL-FAKE16-NEXT: v_maximum_f16 v1, s1, s3
; GFX1170-GISEL-FAKE16-NEXT: v_pk_maximum_f16 v0, s0, s2
; GFX1170-GISEL-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
; GFX1170-GISEL-FAKE16-NEXT: v_readfirstlane_b32 s0, v1
; GFX1170-GISEL-FAKE16-NEXT: v_mov_b32_e32 v1, s0
; GFX1170-GISEL-FAKE16-NEXT: ; return to shader part epilog
;
; GFX12-SDAG-LABEL: test_fmaximum_v3f16_ss:
; GFX12-SDAG: ; %bb.0:
; GFX12-SDAG-NEXT: v_pk_maximum_f16 v0, s0, s2
@ -624,6 +800,12 @@ define amdgpu_ps <4 x half> @test_fmaximum_v4f16(<4 x half> %a, <4 x half> %b) {
; GFX9-GISEL-NEXT: v_lshl_or_b32 v1, v2, 16, v1
; GFX9-GISEL-NEXT: ; return to shader part epilog
;
; GFX1170-LABEL: test_fmaximum_v4f16:
; GFX1170: ; %bb.0:
; GFX1170-NEXT: v_pk_maximum_f16 v0, v0, v2
; GFX1170-NEXT: v_pk_maximum_f16 v1, v1, v3
; GFX1170-NEXT: ; return to shader part epilog
;
; GFX12-LABEL: test_fmaximum_v4f16:
; GFX12: ; %bb.0:
; GFX12-NEXT: v_pk_maximum_f16 v0, v0, v2
@ -706,6 +888,12 @@ define amdgpu_ps <4 x half> @test_fmaximum_v4f16_ss(<4 x half> inreg %a, <4 x ha
; GFX9-GISEL-NEXT: v_mov_b32_e32 v1, s1
; GFX9-GISEL-NEXT: ; return to shader part epilog
;
; GFX1170-LABEL: test_fmaximum_v4f16_ss:
; GFX1170: ; %bb.0:
; GFX1170-NEXT: v_pk_maximum_f16 v0, s0, s2
; GFX1170-NEXT: v_pk_maximum_f16 v1, s1, s3
; GFX1170-NEXT: ; return to shader part epilog
;
; GFX12-LABEL: test_fmaximum_v4f16_ss:
; GFX12: ; %bb.0:
; GFX12-NEXT: v_pk_maximum_f16 v0, s0, s2
@ -734,6 +922,11 @@ define amdgpu_ps <2 x float> @test_fmaximum_f64_vv(double %a, double %b) {
; GFX9-GISEL-NEXT: v_cndmask_b32_e32 v1, v1, v5, vcc
; GFX9-GISEL-NEXT: ; return to shader part epilog
;
; GFX1170-LABEL: test_fmaximum_f64_vv:
; GFX1170: ; %bb.0:
; GFX1170-NEXT: v_maximum_f64 v[0:1], v[0:1], v[2:3]
; GFX1170-NEXT: ; return to shader part epilog
;
; GFX12-LABEL: test_fmaximum_f64_vv:
; GFX12: ; %bb.0:
; GFX12-NEXT: v_maximum_f64 v[0:1], v[0:1], v[2:3]
@ -771,6 +964,21 @@ define amdgpu_ps <2 x float> @test_fmaximum_f64_ss(double inreg %a, double inreg
; GFX9-GISEL-NEXT: v_mov_b32_e32 v1, s1
; GFX9-GISEL-NEXT: ; return to shader part epilog
;
; GFX1170-SDAG-LABEL: test_fmaximum_f64_ss:
; GFX1170-SDAG: ; %bb.0:
; GFX1170-SDAG-NEXT: v_maximum_f64 v[0:1], s[0:1], s[2:3]
; GFX1170-SDAG-NEXT: ; return to shader part epilog
;
; GFX1170-GISEL-LABEL: test_fmaximum_f64_ss:
; GFX1170-GISEL: ; %bb.0:
; GFX1170-GISEL-NEXT: v_maximum_f64 v[0:1], s[0:1], s[2:3]
; GFX1170-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_2)
; GFX1170-GISEL-NEXT: v_readfirstlane_b32 s0, v0
; GFX1170-GISEL-NEXT: v_readfirstlane_b32 s1, v1
; GFX1170-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX1170-GISEL-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1
; GFX1170-GISEL-NEXT: ; return to shader part epilog
;
; GFX12-SDAG-LABEL: test_fmaximum_f64_ss:
; GFX12-SDAG: ; %bb.0:
; GFX12-SDAG-NEXT: v_maximum_f64 v[0:1], s[0:1], s[2:3]
@ -835,6 +1043,27 @@ define amdgpu_ps <4 x float> @test_fmaximum_v2f64_ss(<2 x double> inreg %a, <2 x
; GFX9-GISEL-NEXT: v_mov_b32_e32 v3, s1
; GFX9-GISEL-NEXT: ; return to shader part epilog
;
; GFX1170-SDAG-LABEL: test_fmaximum_v2f64_ss:
; GFX1170-SDAG: ; %bb.0:
; GFX1170-SDAG-NEXT: v_maximum_f64 v[0:1], s[0:1], s[4:5]
; GFX1170-SDAG-NEXT: v_maximum_f64 v[2:3], s[2:3], s[6:7]
; GFX1170-SDAG-NEXT: ; return to shader part epilog
;
; GFX1170-GISEL-LABEL: test_fmaximum_v2f64_ss:
; GFX1170-GISEL: ; %bb.0:
; GFX1170-GISEL-NEXT: v_maximum_f64 v[0:1], s[0:1], s[4:5]
; GFX1170-GISEL-NEXT: v_maximum_f64 v[2:3], s[2:3], s[6:7]
; GFX1170-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_3)
; GFX1170-GISEL-NEXT: v_readfirstlane_b32 s0, v0
; GFX1170-GISEL-NEXT: v_readfirstlane_b32 s1, v1
; GFX1170-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_4)
; GFX1170-GISEL-NEXT: v_readfirstlane_b32 s2, v2
; GFX1170-GISEL-NEXT: v_readfirstlane_b32 s3, v3
; GFX1170-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2)
; GFX1170-GISEL-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1
; GFX1170-GISEL-NEXT: v_dual_mov_b32 v2, s2 :: v_dual_mov_b32 v3, s3
; GFX1170-GISEL-NEXT: ; return to shader part epilog
;
; GFX12-SDAG-LABEL: test_fmaximum_v2f64_ss:
; GFX12-SDAG: ; %bb.0:
; GFX12-SDAG-NEXT: v_maximum_f64 v[0:1], s[0:1], s[4:5]
@ -904,6 +1133,14 @@ define amdgpu_ps <8 x float> @test_fmaximum_v4f64(<4 x double> %a, <4 x double>
; GFX9-GISEL-NEXT: v_cndmask_b32_e64 v7, v18, v13, s[4:5]
; GFX9-GISEL-NEXT: ; return to shader part epilog
;
; GFX1170-LABEL: test_fmaximum_v4f64:
; GFX1170: ; %bb.0:
; GFX1170-NEXT: v_maximum_f64 v[0:1], v[0:1], v[8:9]
; GFX1170-NEXT: v_maximum_f64 v[2:3], v[2:3], v[10:11]
; GFX1170-NEXT: v_maximum_f64 v[4:5], v[4:5], v[12:13]
; GFX1170-NEXT: v_maximum_f64 v[6:7], v[6:7], v[14:15]
; GFX1170-NEXT: ; return to shader part epilog
;
; GFX12-LABEL: test_fmaximum_v4f64:
; GFX12: ; %bb.0:
; GFX12-NEXT: v_maximum_f64 v[0:1], v[0:1], v[8:9]
@ -992,6 +1229,36 @@ define amdgpu_ps <8 x float> @test_fmaximum_v4f64_ss(<4 x double> inreg %a, <4 x
; GFX9-GISEL-NEXT: v_mov_b32_e32 v7, s5
; GFX9-GISEL-NEXT: ; return to shader part epilog
;
; GFX1170-SDAG-LABEL: test_fmaximum_v4f64_ss:
; GFX1170-SDAG: ; %bb.0:
; GFX1170-SDAG-NEXT: v_maximum_f64 v[0:1], s[0:1], s[8:9]
; GFX1170-SDAG-NEXT: v_maximum_f64 v[2:3], s[2:3], s[10:11]
; GFX1170-SDAG-NEXT: v_maximum_f64 v[4:5], s[4:5], s[12:13]
; GFX1170-SDAG-NEXT: v_maximum_f64 v[6:7], s[6:7], s[14:15]
; GFX1170-SDAG-NEXT: ; return to shader part epilog
;
; GFX1170-GISEL-LABEL: test_fmaximum_v4f64_ss:
; GFX1170-GISEL: ; %bb.0:
; GFX1170-GISEL-NEXT: v_maximum_f64 v[0:1], s[0:1], s[8:9]
; GFX1170-GISEL-NEXT: v_maximum_f64 v[2:3], s[2:3], s[10:11]
; GFX1170-GISEL-NEXT: v_maximum_f64 v[4:5], s[4:5], s[12:13]
; GFX1170-GISEL-NEXT: v_maximum_f64 v[6:7], s[6:7], s[14:15]
; GFX1170-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_4)
; GFX1170-GISEL-NEXT: v_readfirstlane_b32 s0, v0
; GFX1170-GISEL-NEXT: v_readfirstlane_b32 s1, v1
; GFX1170-GISEL-NEXT: v_readfirstlane_b32 s2, v2
; GFX1170-GISEL-NEXT: v_readfirstlane_b32 s3, v3
; GFX1170-GISEL-NEXT: v_readfirstlane_b32 s4, v4
; GFX1170-GISEL-NEXT: v_readfirstlane_b32 s5, v5
; GFX1170-GISEL-NEXT: v_readfirstlane_b32 s6, v6
; GFX1170-GISEL-NEXT: v_readfirstlane_b32 s7, v7
; GFX1170-GISEL-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1
; GFX1170-GISEL-NEXT: v_dual_mov_b32 v2, s2 :: v_dual_mov_b32 v3, s3
; GFX1170-GISEL-NEXT: v_dual_mov_b32 v4, s4 :: v_dual_mov_b32 v5, s5
; GFX1170-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_4)
; GFX1170-GISEL-NEXT: v_dual_mov_b32 v6, s6 :: v_dual_mov_b32 v7, s7
; GFX1170-GISEL-NEXT: ; return to shader part epilog
;
; GFX12-SDAG-LABEL: test_fmaximum_v4f64_ss:
; GFX12-SDAG: ; %bb.0:
; GFX12-SDAG-NEXT: v_maximum_f64 v[0:1], s[0:1], s[8:9]
@ -1063,6 +1330,21 @@ define amdgpu_kernel void @fmaximumi_f32_move_to_valu(ptr addrspace(1) %out, ptr
; GFX9-GISEL-NEXT: global_store_dword v0, v1, s[0:1]
; GFX9-GISEL-NEXT: s_endpgm
;
; GFX1170-LABEL: fmaximumi_f32_move_to_valu:
; GFX1170: ; %bb.0:
; GFX1170-NEXT: s_clause 0x1
; GFX1170-NEXT: s_load_b128 s[0:3], s[4:5], 0x24
; GFX1170-NEXT: s_load_b64 s[4:5], s[4:5], 0x34
; GFX1170-NEXT: v_mov_b32_e32 v0, 0
; GFX1170-NEXT: s_waitcnt lgkmcnt(0)
; GFX1170-NEXT: global_load_b32 v1, v0, s[2:3] glc dlc
; GFX1170-NEXT: s_waitcnt vmcnt(0)
; GFX1170-NEXT: global_load_b32 v2, v0, s[4:5] glc dlc
; GFX1170-NEXT: s_waitcnt vmcnt(0)
; GFX1170-NEXT: v_maximum_f32 v1, v1, v2
; GFX1170-NEXT: global_store_b32 v0, v1, s[0:1]
; GFX1170-NEXT: s_endpgm
;
; GFX12-SDAG-LABEL: fmaximumi_f32_move_to_valu:
; GFX12-SDAG: ; %bb.0:
; GFX12-SDAG-NEXT: s_clause 0x1
@ -1143,6 +1425,69 @@ define amdgpu_kernel void @fmaximum_f16_move_to_valu(ptr addrspace(1) %out, ptr
; GFX9-GISEL-NEXT: global_store_short v0, v1, s[0:1]
; GFX9-GISEL-NEXT: s_endpgm
;
; GFX1170-SDAG-TRUE16-LABEL: fmaximum_f16_move_to_valu:
; GFX1170-SDAG-TRUE16: ; %bb.0:
; GFX1170-SDAG-TRUE16-NEXT: s_clause 0x1
; GFX1170-SDAG-TRUE16-NEXT: s_load_b128 s[0:3], s[4:5], 0x24
; GFX1170-SDAG-TRUE16-NEXT: s_load_b64 s[4:5], s[4:5], 0x34
; GFX1170-SDAG-TRUE16-NEXT: v_mov_b32_e32 v1, 0
; GFX1170-SDAG-TRUE16-NEXT: s_waitcnt lgkmcnt(0)
; GFX1170-SDAG-TRUE16-NEXT: global_load_d16_b16 v0, v1, s[2:3] glc dlc
; GFX1170-SDAG-TRUE16-NEXT: s_waitcnt vmcnt(0)
; GFX1170-SDAG-TRUE16-NEXT: global_load_d16_hi_b16 v0, v1, s[4:5] glc dlc
; GFX1170-SDAG-TRUE16-NEXT: s_waitcnt vmcnt(0)
; GFX1170-SDAG-TRUE16-NEXT: v_maximum_f16 v0.l, v0.l, v0.h
; GFX1170-SDAG-TRUE16-NEXT: global_store_b16 v1, v0, s[0:1]
; GFX1170-SDAG-TRUE16-NEXT: s_endpgm
;
; GFX1170-SDAG-FAKE16-LABEL: fmaximum_f16_move_to_valu:
; GFX1170-SDAG-FAKE16: ; %bb.0:
; GFX1170-SDAG-FAKE16-NEXT: s_clause 0x1
; GFX1170-SDAG-FAKE16-NEXT: s_load_b128 s[0:3], s[4:5], 0x24
; GFX1170-SDAG-FAKE16-NEXT: s_load_b64 s[4:5], s[4:5], 0x34
; GFX1170-SDAG-FAKE16-NEXT: v_mov_b32_e32 v0, 0
; GFX1170-SDAG-FAKE16-NEXT: s_waitcnt lgkmcnt(0)
; GFX1170-SDAG-FAKE16-NEXT: global_load_u16 v1, v0, s[2:3] glc dlc
; GFX1170-SDAG-FAKE16-NEXT: s_waitcnt vmcnt(0)
; GFX1170-SDAG-FAKE16-NEXT: global_load_u16 v2, v0, s[4:5] glc dlc
; GFX1170-SDAG-FAKE16-NEXT: s_waitcnt vmcnt(0)
; GFX1170-SDAG-FAKE16-NEXT: v_maximum_f16 v1, v1, v2
; GFX1170-SDAG-FAKE16-NEXT: global_store_b16 v0, v1, s[0:1]
; GFX1170-SDAG-FAKE16-NEXT: s_endpgm
;
; GFX1170-GISEL-TRUE16-LABEL: fmaximum_f16_move_to_valu:
; GFX1170-GISEL-TRUE16: ; %bb.0:
; GFX1170-GISEL-TRUE16-NEXT: s_clause 0x1
; GFX1170-GISEL-TRUE16-NEXT: s_load_b128 s[0:3], s[4:5], 0x24
; GFX1170-GISEL-TRUE16-NEXT: s_load_b64 s[4:5], s[4:5], 0x34
; GFX1170-GISEL-TRUE16-NEXT: v_mov_b32_e32 v1, 0
; GFX1170-GISEL-TRUE16-NEXT: s_waitcnt lgkmcnt(0)
; GFX1170-GISEL-TRUE16-NEXT: global_load_d16_b16 v0, v1, s[2:3] glc dlc
; GFX1170-GISEL-TRUE16-NEXT: s_waitcnt vmcnt(0)
; GFX1170-GISEL-TRUE16-NEXT: global_load_d16_hi_b16 v0, v1, s[4:5] glc dlc
; GFX1170-GISEL-TRUE16-NEXT: s_waitcnt vmcnt(0)
; GFX1170-GISEL-TRUE16-NEXT: v_maximum_f16 v0.l, v0.l, v0.h
; GFX1170-GISEL-TRUE16-NEXT: global_store_b16 v1, v0, s[0:1]
; GFX1170-GISEL-TRUE16-NEXT: s_endpgm
;
; GFX1170-GISEL-FAKE16-LABEL: fmaximum_f16_move_to_valu:
; GFX1170-GISEL-FAKE16: ; %bb.0:
; GFX1170-GISEL-FAKE16-NEXT: s_clause 0x1
; GFX1170-GISEL-FAKE16-NEXT: s_load_b128 s[0:3], s[4:5], 0x24
; GFX1170-GISEL-FAKE16-NEXT: s_load_b64 s[4:5], s[4:5], 0x34
; GFX1170-GISEL-FAKE16-NEXT: v_mov_b32_e32 v0, 0
; GFX1170-GISEL-FAKE16-NEXT: s_waitcnt lgkmcnt(0)
; GFX1170-GISEL-FAKE16-NEXT: global_load_u16 v1, v0, s[2:3] glc dlc
; GFX1170-GISEL-FAKE16-NEXT: s_waitcnt vmcnt(0)
; GFX1170-GISEL-FAKE16-NEXT: global_load_u16 v2, v0, s[4:5] glc dlc
; GFX1170-GISEL-FAKE16-NEXT: s_waitcnt vmcnt(0)
; GFX1170-GISEL-FAKE16-NEXT: v_readfirstlane_b32 s2, v1
; GFX1170-GISEL-FAKE16-NEXT: v_readfirstlane_b32 s3, v2
; GFX1170-GISEL-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX1170-GISEL-FAKE16-NEXT: v_maximum_f16 v1, s2, s3
; GFX1170-GISEL-FAKE16-NEXT: global_store_b16 v0, v1, s[0:1]
; GFX1170-GISEL-FAKE16-NEXT: s_endpgm
;
; GFX12-SDAG-TRUE16-LABEL: fmaximum_f16_move_to_valu:
; GFX12-SDAG-TRUE16: ; %bb.0:
; GFX12-SDAG-TRUE16-NEXT: s_clause 0x1
@ -1226,6 +1571,11 @@ define amdgpu_ps float @test_fmaximum_f32_ieee_on(float %a, float %b) #0 {
; GFX9-NEXT: v_cndmask_b32_e32 v0, v3, v2, vcc
; GFX9-NEXT: ; return to shader part epilog
;
; GFX1170-LABEL: test_fmaximum_f32_ieee_on:
; GFX1170: ; %bb.0:
; GFX1170-NEXT: v_maximum_f32 v0, v0, v1
; GFX1170-NEXT: ; return to shader part epilog
;
; GFX12-LABEL: test_fmaximum_f32_ieee_on:
; GFX12: ; %bb.0:
; GFX12-NEXT: v_maximum_f32 v0, v0, v1
@ -1243,6 +1593,11 @@ define amdgpu_ps float @test_fmaximum_f32_ieee_off(float %a, float %b) #1 {
; GFX9-NEXT: v_cndmask_b32_e32 v0, v3, v2, vcc
; GFX9-NEXT: ; return to shader part epilog
;
; GFX1170-LABEL: test_fmaximum_f32_ieee_off:
; GFX1170: ; %bb.0:
; GFX1170-NEXT: v_maximum_f32 v0, v0, v1
; GFX1170-NEXT: ; return to shader part epilog
;
; GFX12-LABEL: test_fmaximum_f32_ieee_off:
; GFX12: ; %bb.0:
; GFX12-NEXT: v_maximum_f32 v0, v0, v1

File diff suppressed because it is too large Load Diff

View File

@ -4,6 +4,8 @@
; RUN: llc -mtriple=amdgcn -mcpu=gfx900 -mattr=-flat-for-global < %s | FileCheck -enable-var-scope -check-prefixes=GFX9 %s
; RUN: llc -mtriple=amdgcn -mcpu=gfx1100 -mattr=+real-true16 -mattr=-flat-for-global < %s | FileCheck -enable-var-scope -check-prefixes=GFX11,GFX11-TRUE16 %s
; RUN: llc -mtriple=amdgcn -mcpu=gfx1100 -mattr=-real-true16 -mattr=-flat-for-global < %s | FileCheck -enable-var-scope -check-prefixes=GFX11,GFX11-FAKE16 %s
; RUN: llc -mtriple=amdgcn -mcpu=gfx1170 -mattr=+real-true16 -mattr=-flat-for-global < %s | FileCheck -enable-var-scope -check-prefixes=GFX1170,GFX1170-TRUE16 %s
; RUN: llc -mtriple=amdgcn -mcpu=gfx1170 -mattr=-real-true16 -mattr=-flat-for-global < %s | FileCheck -enable-var-scope -check-prefixes=GFX1170,GFX1170-FAKE16 %s
; RUN: llc -mtriple=amdgcn -mcpu=gfx1200 -mattr=+real-true16 -mattr=-flat-for-global < %s | FileCheck -enable-var-scope -check-prefixes=GFX12,GFX12-TRUE16 %s
; RUN: llc -mtriple=amdgcn -mcpu=gfx1200 -mattr=-real-true16 -mattr=-flat-for-global < %s | FileCheck -enable-var-scope -check-prefixes=GFX12,GFX12-FAKE16 %s
; RUN: llc -mtriple=amdgcn -mcpu=gfx1250 -mattr=+real-true16 -mattr=-flat-for-global < %s | FileCheck -enable-var-scope -check-prefixes=GFX1250,GFX1250-TRUE16 %s
@ -130,6 +132,36 @@ define amdgpu_kernel void @test_fmin3_olt_0_f32(ptr addrspace(1) %out, ptr addrs
; GFX11-NEXT: buffer_store_b32 v0, off, s[8:11], 0
; GFX11-NEXT: s_endpgm
;
; GFX1170-LABEL: test_fmin3_olt_0_f32:
; GFX1170: ; %bb.0:
; GFX1170-NEXT: s_load_b256 s[0:7], s[4:5], 0x24
; GFX1170-NEXT: s_mov_b32 s10, -1
; GFX1170-NEXT: s_mov_b32 s11, 0x31016000
; GFX1170-NEXT: s_mov_b32 s14, s10
; GFX1170-NEXT: s_mov_b32 s15, s11
; GFX1170-NEXT: s_mov_b32 s18, s10
; GFX1170-NEXT: s_mov_b32 s19, s11
; GFX1170-NEXT: s_mov_b32 s22, s10
; GFX1170-NEXT: s_mov_b32 s23, s11
; GFX1170-NEXT: s_waitcnt lgkmcnt(0)
; GFX1170-NEXT: s_mov_b32 s12, s2
; GFX1170-NEXT: s_mov_b32 s13, s3
; GFX1170-NEXT: s_mov_b32 s16, s4
; GFX1170-NEXT: s_mov_b32 s17, s5
; GFX1170-NEXT: s_mov_b32 s20, s6
; GFX1170-NEXT: s_mov_b32 s21, s7
; GFX1170-NEXT: buffer_load_b32 v0, off, s[12:15], 0 glc dlc
; GFX1170-NEXT: s_waitcnt vmcnt(0)
; GFX1170-NEXT: buffer_load_b32 v1, off, s[16:19], 0 glc dlc
; GFX1170-NEXT: s_waitcnt vmcnt(0)
; GFX1170-NEXT: buffer_load_b32 v2, off, s[20:23], 0 glc dlc
; GFX1170-NEXT: s_waitcnt vmcnt(0)
; GFX1170-NEXT: s_mov_b32 s8, s0
; GFX1170-NEXT: s_mov_b32 s9, s1
; GFX1170-NEXT: v_min3_num_f32 v0, v0, v1, v2
; GFX1170-NEXT: buffer_store_b32 v0, off, s[8:11], 0
; GFX1170-NEXT: s_endpgm
;
; GFX12-LABEL: test_fmin3_olt_0_f32:
; GFX12: ; %bb.0:
; GFX12-NEXT: s_load_b256 s[0:7], s[4:5], 0x24
@ -321,6 +353,36 @@ define amdgpu_kernel void @test_fmin3_olt_1_f32(ptr addrspace(1) %out, ptr addrs
; GFX11-NEXT: buffer_store_b32 v0, off, s[8:11], 0
; GFX11-NEXT: s_endpgm
;
; GFX1170-LABEL: test_fmin3_olt_1_f32:
; GFX1170: ; %bb.0:
; GFX1170-NEXT: s_load_b256 s[0:7], s[4:5], 0x24
; GFX1170-NEXT: s_mov_b32 s10, -1
; GFX1170-NEXT: s_mov_b32 s11, 0x31016000
; GFX1170-NEXT: s_mov_b32 s14, s10
; GFX1170-NEXT: s_mov_b32 s15, s11
; GFX1170-NEXT: s_mov_b32 s18, s10
; GFX1170-NEXT: s_mov_b32 s19, s11
; GFX1170-NEXT: s_mov_b32 s22, s10
; GFX1170-NEXT: s_mov_b32 s23, s11
; GFX1170-NEXT: s_waitcnt lgkmcnt(0)
; GFX1170-NEXT: s_mov_b32 s12, s2
; GFX1170-NEXT: s_mov_b32 s13, s3
; GFX1170-NEXT: s_mov_b32 s16, s4
; GFX1170-NEXT: s_mov_b32 s17, s5
; GFX1170-NEXT: s_mov_b32 s20, s6
; GFX1170-NEXT: s_mov_b32 s21, s7
; GFX1170-NEXT: buffer_load_b32 v0, off, s[12:15], 0 glc dlc
; GFX1170-NEXT: s_waitcnt vmcnt(0)
; GFX1170-NEXT: buffer_load_b32 v1, off, s[16:19], 0 glc dlc
; GFX1170-NEXT: s_waitcnt vmcnt(0)
; GFX1170-NEXT: buffer_load_b32 v2, off, s[20:23], 0 glc dlc
; GFX1170-NEXT: s_waitcnt vmcnt(0)
; GFX1170-NEXT: s_mov_b32 s8, s0
; GFX1170-NEXT: s_mov_b32 s9, s1
; GFX1170-NEXT: v_min3_num_f32 v0, v2, v0, v1
; GFX1170-NEXT: buffer_store_b32 v0, off, s[8:11], 0
; GFX1170-NEXT: s_endpgm
;
; GFX12-LABEL: test_fmin3_olt_1_f32:
; GFX12: ; %bb.0:
; GFX12-NEXT: s_load_b256 s[0:7], s[4:5], 0x24
@ -552,6 +614,66 @@ define amdgpu_kernel void @test_fmin3_olt_0_f16(ptr addrspace(1) %out, ptr addrs
; GFX11-FAKE16-NEXT: buffer_store_b16 v0, off, s[8:11], 0
; GFX11-FAKE16-NEXT: s_endpgm
;
; GFX1170-TRUE16-LABEL: test_fmin3_olt_0_f16:
; GFX1170-TRUE16: ; %bb.0:
; GFX1170-TRUE16-NEXT: s_load_b256 s[0:7], s[4:5], 0x24
; GFX1170-TRUE16-NEXT: s_mov_b32 s10, -1
; GFX1170-TRUE16-NEXT: s_mov_b32 s11, 0x31016000
; GFX1170-TRUE16-NEXT: s_mov_b32 s14, s10
; GFX1170-TRUE16-NEXT: s_mov_b32 s15, s11
; GFX1170-TRUE16-NEXT: s_mov_b32 s18, s10
; GFX1170-TRUE16-NEXT: s_mov_b32 s19, s11
; GFX1170-TRUE16-NEXT: s_mov_b32 s22, s10
; GFX1170-TRUE16-NEXT: s_mov_b32 s23, s11
; GFX1170-TRUE16-NEXT: s_waitcnt lgkmcnt(0)
; GFX1170-TRUE16-NEXT: s_mov_b32 s12, s2
; GFX1170-TRUE16-NEXT: s_mov_b32 s13, s3
; GFX1170-TRUE16-NEXT: s_mov_b32 s16, s4
; GFX1170-TRUE16-NEXT: s_mov_b32 s17, s5
; GFX1170-TRUE16-NEXT: s_mov_b32 s20, s6
; GFX1170-TRUE16-NEXT: s_mov_b32 s21, s7
; GFX1170-TRUE16-NEXT: buffer_load_d16_b16 v0, off, s[12:15], 0 glc dlc
; GFX1170-TRUE16-NEXT: s_waitcnt vmcnt(0)
; GFX1170-TRUE16-NEXT: buffer_load_d16_hi_b16 v0, off, s[16:19], 0 glc dlc
; GFX1170-TRUE16-NEXT: s_waitcnt vmcnt(0)
; GFX1170-TRUE16-NEXT: buffer_load_d16_b16 v1, off, s[20:23], 0 glc dlc
; GFX1170-TRUE16-NEXT: s_waitcnt vmcnt(0)
; GFX1170-TRUE16-NEXT: s_mov_b32 s8, s0
; GFX1170-TRUE16-NEXT: s_mov_b32 s9, s1
; GFX1170-TRUE16-NEXT: v_min3_num_f16 v0.l, v0.l, v0.h, v1.l
; GFX1170-TRUE16-NEXT: buffer_store_b16 v0, off, s[8:11], 0
; GFX1170-TRUE16-NEXT: s_endpgm
;
; GFX1170-FAKE16-LABEL: test_fmin3_olt_0_f16:
; GFX1170-FAKE16: ; %bb.0:
; GFX1170-FAKE16-NEXT: s_load_b256 s[0:7], s[4:5], 0x24
; GFX1170-FAKE16-NEXT: s_mov_b32 s10, -1
; GFX1170-FAKE16-NEXT: s_mov_b32 s11, 0x31016000
; GFX1170-FAKE16-NEXT: s_mov_b32 s14, s10
; GFX1170-FAKE16-NEXT: s_mov_b32 s15, s11
; GFX1170-FAKE16-NEXT: s_mov_b32 s18, s10
; GFX1170-FAKE16-NEXT: s_mov_b32 s19, s11
; GFX1170-FAKE16-NEXT: s_mov_b32 s22, s10
; GFX1170-FAKE16-NEXT: s_mov_b32 s23, s11
; GFX1170-FAKE16-NEXT: s_waitcnt lgkmcnt(0)
; GFX1170-FAKE16-NEXT: s_mov_b32 s12, s2
; GFX1170-FAKE16-NEXT: s_mov_b32 s13, s3
; GFX1170-FAKE16-NEXT: s_mov_b32 s16, s4
; GFX1170-FAKE16-NEXT: s_mov_b32 s17, s5
; GFX1170-FAKE16-NEXT: s_mov_b32 s20, s6
; GFX1170-FAKE16-NEXT: s_mov_b32 s21, s7
; GFX1170-FAKE16-NEXT: buffer_load_u16 v0, off, s[12:15], 0 glc dlc
; GFX1170-FAKE16-NEXT: s_waitcnt vmcnt(0)
; GFX1170-FAKE16-NEXT: buffer_load_u16 v1, off, s[16:19], 0 glc dlc
; GFX1170-FAKE16-NEXT: s_waitcnt vmcnt(0)
; GFX1170-FAKE16-NEXT: buffer_load_u16 v2, off, s[20:23], 0 glc dlc
; GFX1170-FAKE16-NEXT: s_waitcnt vmcnt(0)
; GFX1170-FAKE16-NEXT: s_mov_b32 s8, s0
; GFX1170-FAKE16-NEXT: s_mov_b32 s9, s1
; GFX1170-FAKE16-NEXT: v_min3_num_f16 v0, v0, v1, v2
; GFX1170-FAKE16-NEXT: buffer_store_b16 v0, off, s[8:11], 0
; GFX1170-FAKE16-NEXT: s_endpgm
;
; GFX12-TRUE16-LABEL: test_fmin3_olt_0_f16:
; GFX12-TRUE16: ; %bb.0:
; GFX12-TRUE16-NEXT: s_load_b256 s[0:7], s[4:5], 0x24
@ -845,6 +967,66 @@ define amdgpu_kernel void @test_fmin3_olt_1_f16(ptr addrspace(1) %out, ptr addrs
; GFX11-FAKE16-NEXT: buffer_store_b16 v0, off, s[8:11], 0
; GFX11-FAKE16-NEXT: s_endpgm
;
; GFX1170-TRUE16-LABEL: test_fmin3_olt_1_f16:
; GFX1170-TRUE16: ; %bb.0:
; GFX1170-TRUE16-NEXT: s_load_b256 s[0:7], s[4:5], 0x24
; GFX1170-TRUE16-NEXT: s_mov_b32 s10, -1
; GFX1170-TRUE16-NEXT: s_mov_b32 s11, 0x31016000
; GFX1170-TRUE16-NEXT: s_mov_b32 s14, s10
; GFX1170-TRUE16-NEXT: s_mov_b32 s15, s11
; GFX1170-TRUE16-NEXT: s_mov_b32 s18, s10
; GFX1170-TRUE16-NEXT: s_mov_b32 s19, s11
; GFX1170-TRUE16-NEXT: s_mov_b32 s22, s10
; GFX1170-TRUE16-NEXT: s_mov_b32 s23, s11
; GFX1170-TRUE16-NEXT: s_waitcnt lgkmcnt(0)
; GFX1170-TRUE16-NEXT: s_mov_b32 s12, s2
; GFX1170-TRUE16-NEXT: s_mov_b32 s13, s3
; GFX1170-TRUE16-NEXT: s_mov_b32 s16, s4
; GFX1170-TRUE16-NEXT: s_mov_b32 s17, s5
; GFX1170-TRUE16-NEXT: s_mov_b32 s20, s6
; GFX1170-TRUE16-NEXT: s_mov_b32 s21, s7
; GFX1170-TRUE16-NEXT: buffer_load_d16_b16 v0, off, s[12:15], 0 glc dlc
; GFX1170-TRUE16-NEXT: s_waitcnt vmcnt(0)
; GFX1170-TRUE16-NEXT: buffer_load_d16_hi_b16 v0, off, s[16:19], 0 glc dlc
; GFX1170-TRUE16-NEXT: s_waitcnt vmcnt(0)
; GFX1170-TRUE16-NEXT: buffer_load_d16_b16 v1, off, s[20:23], 0 glc dlc
; GFX1170-TRUE16-NEXT: s_waitcnt vmcnt(0)
; GFX1170-TRUE16-NEXT: s_mov_b32 s8, s0
; GFX1170-TRUE16-NEXT: s_mov_b32 s9, s1
; GFX1170-TRUE16-NEXT: v_min3_num_f16 v0.l, v1.l, v0.l, v0.h
; GFX1170-TRUE16-NEXT: buffer_store_b16 v0, off, s[8:11], 0
; GFX1170-TRUE16-NEXT: s_endpgm
;
; GFX1170-FAKE16-LABEL: test_fmin3_olt_1_f16:
; GFX1170-FAKE16: ; %bb.0:
; GFX1170-FAKE16-NEXT: s_load_b256 s[0:7], s[4:5], 0x24
; GFX1170-FAKE16-NEXT: s_mov_b32 s10, -1
; GFX1170-FAKE16-NEXT: s_mov_b32 s11, 0x31016000
; GFX1170-FAKE16-NEXT: s_mov_b32 s14, s10
; GFX1170-FAKE16-NEXT: s_mov_b32 s15, s11
; GFX1170-FAKE16-NEXT: s_mov_b32 s18, s10
; GFX1170-FAKE16-NEXT: s_mov_b32 s19, s11
; GFX1170-FAKE16-NEXT: s_mov_b32 s22, s10
; GFX1170-FAKE16-NEXT: s_mov_b32 s23, s11
; GFX1170-FAKE16-NEXT: s_waitcnt lgkmcnt(0)
; GFX1170-FAKE16-NEXT: s_mov_b32 s12, s2
; GFX1170-FAKE16-NEXT: s_mov_b32 s13, s3
; GFX1170-FAKE16-NEXT: s_mov_b32 s16, s4
; GFX1170-FAKE16-NEXT: s_mov_b32 s17, s5
; GFX1170-FAKE16-NEXT: s_mov_b32 s20, s6
; GFX1170-FAKE16-NEXT: s_mov_b32 s21, s7
; GFX1170-FAKE16-NEXT: buffer_load_u16 v0, off, s[12:15], 0 glc dlc
; GFX1170-FAKE16-NEXT: s_waitcnt vmcnt(0)
; GFX1170-FAKE16-NEXT: buffer_load_u16 v1, off, s[16:19], 0 glc dlc
; GFX1170-FAKE16-NEXT: s_waitcnt vmcnt(0)
; GFX1170-FAKE16-NEXT: buffer_load_u16 v2, off, s[20:23], 0 glc dlc
; GFX1170-FAKE16-NEXT: s_waitcnt vmcnt(0)
; GFX1170-FAKE16-NEXT: s_mov_b32 s8, s0
; GFX1170-FAKE16-NEXT: s_mov_b32 s9, s1
; GFX1170-FAKE16-NEXT: v_min3_num_f16 v0, v2, v0, v1
; GFX1170-FAKE16-NEXT: buffer_store_b16 v0, off, s[8:11], 0
; GFX1170-FAKE16-NEXT: s_endpgm
;
; GFX12-TRUE16-LABEL: test_fmin3_olt_1_f16:
; GFX12-TRUE16: ; %bb.0:
; GFX12-TRUE16-NEXT: s_load_b256 s[0:7], s[4:5], 0x24
@ -1042,6 +1224,15 @@ define <2 x half> @no_fmin3_v2f16(<2 x half> %a, <2 x half> %b, <2 x half> %c, <
; GFX11-NEXT: v_pk_min_f16 v0, v0, v3
; GFX11-NEXT: s_setpc_b64 s[30:31]
;
; GFX1170-LABEL: no_fmin3_v2f16:
; GFX1170: ; %bb.0: ; %entry
; GFX1170-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1170-NEXT: v_pk_min_num_f16 v0, v0, v1
; GFX1170-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
; GFX1170-NEXT: v_pk_min_num_f16 v0, v2, v0
; GFX1170-NEXT: v_pk_min_num_f16 v0, v0, v3
; GFX1170-NEXT: s_setpc_b64 s[30:31]
;
; GFX12-LABEL: no_fmin3_v2f16:
; GFX12: ; %bb.0: ; %entry
; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
@ -1204,6 +1395,39 @@ define amdgpu_kernel void @test_fmin3_olt_0_f64(ptr addrspace(1) %out, ptr addrs
; GFX11-NEXT: buffer_store_b64 v[0:1], off, s[8:11], 0
; GFX11-NEXT: s_endpgm
;
; GFX1170-LABEL: test_fmin3_olt_0_f64:
; GFX1170: ; %bb.0:
; GFX1170-NEXT: s_load_b256 s[0:7], s[4:5], 0x24
; GFX1170-NEXT: s_mov_b32 s10, -1
; GFX1170-NEXT: s_mov_b32 s11, 0x31016000
; GFX1170-NEXT: s_mov_b32 s14, s10
; GFX1170-NEXT: s_mov_b32 s15, s11
; GFX1170-NEXT: s_mov_b32 s18, s10
; GFX1170-NEXT: s_mov_b32 s19, s11
; GFX1170-NEXT: s_waitcnt lgkmcnt(0)
; GFX1170-NEXT: s_mov_b32 s12, s2
; GFX1170-NEXT: s_mov_b32 s13, s3
; GFX1170-NEXT: s_mov_b32 s16, s4
; GFX1170-NEXT: s_mov_b32 s17, s5
; GFX1170-NEXT: buffer_load_b64 v[0:1], off, s[12:15], 0 glc dlc
; GFX1170-NEXT: s_waitcnt vmcnt(0)
; GFX1170-NEXT: buffer_load_b64 v[2:3], off, s[16:19], 0 glc dlc
; GFX1170-NEXT: s_waitcnt vmcnt(0)
; GFX1170-NEXT: s_mov_b32 s12, s6
; GFX1170-NEXT: s_mov_b32 s13, s7
; GFX1170-NEXT: s_mov_b32 s8, s0
; GFX1170-NEXT: buffer_load_b64 v[4:5], off, s[12:15], 0 glc dlc
; GFX1170-NEXT: s_waitcnt vmcnt(0)
; GFX1170-NEXT: s_mov_b32 s9, s1
; GFX1170-NEXT: v_max_num_f64 v[0:1], v[0:1], v[0:1]
; GFX1170-NEXT: v_max_num_f64 v[2:3], v[2:3], v[2:3]
; GFX1170-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_1)
; GFX1170-NEXT: v_min_num_f64 v[0:1], v[0:1], v[2:3]
; GFX1170-NEXT: v_max_num_f64 v[2:3], v[4:5], v[4:5]
; GFX1170-NEXT: v_min_num_f64 v[0:1], v[0:1], v[2:3]
; GFX1170-NEXT: buffer_store_b64 v[0:1], off, s[8:11], 0
; GFX1170-NEXT: s_endpgm
;
; GFX12-LABEL: test_fmin3_olt_0_f64:
; GFX12: ; %bb.0:
; GFX12-NEXT: s_load_b256 s[0:7], s[4:5], 0x24
@ -1414,6 +1638,39 @@ define amdgpu_kernel void @test_fmin3_olt_1_f64(ptr addrspace(1) %out, ptr addrs
; GFX11-NEXT: buffer_store_b64 v[0:1], off, s[8:11], 0
; GFX11-NEXT: s_endpgm
;
; GFX1170-LABEL: test_fmin3_olt_1_f64:
; GFX1170: ; %bb.0:
; GFX1170-NEXT: s_load_b256 s[0:7], s[4:5], 0x24
; GFX1170-NEXT: s_mov_b32 s10, -1
; GFX1170-NEXT: s_mov_b32 s11, 0x31016000
; GFX1170-NEXT: s_mov_b32 s14, s10
; GFX1170-NEXT: s_mov_b32 s15, s11
; GFX1170-NEXT: s_mov_b32 s18, s10
; GFX1170-NEXT: s_mov_b32 s19, s11
; GFX1170-NEXT: s_waitcnt lgkmcnt(0)
; GFX1170-NEXT: s_mov_b32 s12, s2
; GFX1170-NEXT: s_mov_b32 s13, s3
; GFX1170-NEXT: s_mov_b32 s16, s4
; GFX1170-NEXT: s_mov_b32 s17, s5
; GFX1170-NEXT: buffer_load_b64 v[0:1], off, s[12:15], 0 glc dlc
; GFX1170-NEXT: s_waitcnt vmcnt(0)
; GFX1170-NEXT: buffer_load_b64 v[2:3], off, s[16:19], 0 glc dlc
; GFX1170-NEXT: s_waitcnt vmcnt(0)
; GFX1170-NEXT: s_mov_b32 s12, s6
; GFX1170-NEXT: s_mov_b32 s13, s7
; GFX1170-NEXT: s_mov_b32 s8, s0
; GFX1170-NEXT: buffer_load_b64 v[4:5], off, s[12:15], 0 glc dlc
; GFX1170-NEXT: s_waitcnt vmcnt(0)
; GFX1170-NEXT: s_mov_b32 s9, s1
; GFX1170-NEXT: v_max_num_f64 v[0:1], v[0:1], v[0:1]
; GFX1170-NEXT: v_max_num_f64 v[2:3], v[2:3], v[2:3]
; GFX1170-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_1)
; GFX1170-NEXT: v_min_num_f64 v[0:1], v[0:1], v[2:3]
; GFX1170-NEXT: v_max_num_f64 v[2:3], v[4:5], v[4:5]
; GFX1170-NEXT: v_min_num_f64 v[0:1], v[2:3], v[0:1]
; GFX1170-NEXT: buffer_store_b64 v[0:1], off, s[8:11], 0
; GFX1170-NEXT: s_endpgm
;
; GFX12-LABEL: test_fmin3_olt_1_f64:
; GFX12: ; %bb.0:
; GFX12-NEXT: s_load_b256 s[0:7], s[4:5], 0x24

View File

@ -1,6 +1,10 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx900 < %s | FileCheck -enable-var-scope -check-prefixes=GFX9,GFX9-SDAG %s
; RUN: llc -global-isel=1 -new-reg-bank-select -mtriple=amdgcn -mcpu=gfx900 < %s | FileCheck -enable-var-scope -check-prefixes=GFX9,GFX9-GISEL %s
; RUN: llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx1170 -mattr=+real-true16 < %s | FileCheck -enable-var-scope -check-prefixes=GFX1170,GFX1170-SDAG,GFX1170-SDAG-TRUE16 %s
; RUN: llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx1170 -mattr=-real-true16 < %s | FileCheck -enable-var-scope -check-prefixes=GFX1170,GFX1170-SDAG,GFX1170-SDAG-FAKE16 %s
; RUN: llc -global-isel=1 -new-reg-bank-select -mtriple=amdgcn -mcpu=gfx1170 -mattr=+real-true16 < %s | FileCheck -enable-var-scope -check-prefixes=GFX1170,GFX1170-GISEL,GFX1170-GISEL-TRUE16 %s
; RUN: llc -global-isel=1 -new-reg-bank-select -mtriple=amdgcn -mcpu=gfx1170 -mattr=-real-true16 < %s | FileCheck -enable-var-scope -check-prefixes=GFX1170,GFX1170-GISEL,GFX1170-GISEL-FAKE16 %s
; RUN: llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx1200 -mattr=+real-true16 < %s | FileCheck -enable-var-scope -check-prefixes=GFX12,GFX12-SDAG,GFX12-SDAG-TRUE16 %s
; RUN: llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx1200 -mattr=-real-true16 < %s | FileCheck -enable-var-scope -check-prefixes=GFX12,GFX12-SDAG,GFX12-SDAG-FAKE16 %s
; RUN: llc -global-isel=1 -new-reg-bank-select -mtriple=amdgcn -mcpu=gfx1200 -mattr=+real-true16 < %s | FileCheck -enable-var-scope -check-prefixes=GFX12,GFX12-GISEL,GFX12-GISEL-TRUE16 %s
@ -15,6 +19,11 @@ define amdgpu_ps float @test_fminimum_f32_vv(float %a, float %b) {
; GFX9-NEXT: v_cndmask_b32_e32 v0, v3, v2, vcc
; GFX9-NEXT: ; return to shader part epilog
;
; GFX1170-LABEL: test_fminimum_f32_vv:
; GFX1170: ; %bb.0:
; GFX1170-NEXT: v_minimum_f32 v0, v0, v1
; GFX1170-NEXT: ; return to shader part epilog
;
; GFX12-LABEL: test_fminimum_f32_vv:
; GFX12: ; %bb.0:
; GFX12-NEXT: v_minimum_f32 v0, v0, v1
@ -44,6 +53,11 @@ define amdgpu_ps float @test_fminimum_f32_ss(float inreg %a, float inreg %b) {
; GFX9-GISEL-NEXT: v_mov_b32_e32 v0, s0
; GFX9-GISEL-NEXT: ; return to shader part epilog
;
; GFX1170-LABEL: test_fminimum_f32_ss:
; GFX1170: ; %bb.0:
; GFX1170-NEXT: v_minimum_f32 v0, s0, s1
; GFX1170-NEXT: ; return to shader part epilog
;
; GFX12-LABEL: test_fminimum_f32_ss:
; GFX12: ; %bb.0:
; GFX12-NEXT: s_minimum_f32 s0, s0, s1
@ -63,6 +77,11 @@ define amdgpu_ps float @test_fminimum_f32_vs(float %a, float inreg %b) {
; GFX9-NEXT: v_cndmask_b32_e32 v0, v2, v1, vcc
; GFX9-NEXT: ; return to shader part epilog
;
; GFX1170-LABEL: test_fminimum_f32_vs:
; GFX1170: ; %bb.0:
; GFX1170-NEXT: v_minimum_f32 v0, v0, s0
; GFX1170-NEXT: ; return to shader part epilog
;
; GFX12-LABEL: test_fminimum_f32_vs:
; GFX12: ; %bb.0:
; GFX12-NEXT: v_minimum_f32 v0, v0, s0
@ -77,6 +96,11 @@ define amdgpu_ps float @test_fminimum_nnan_f32(float %a, float %b) {
; GFX9-NEXT: v_min_f32_e32 v0, v0, v1
; GFX9-NEXT: ; return to shader part epilog
;
; GFX1170-LABEL: test_fminimum_nnan_f32:
; GFX1170: ; %bb.0:
; GFX1170-NEXT: v_minimum_f32 v0, v0, v1
; GFX1170-NEXT: ; return to shader part epilog
;
; GFX12-LABEL: test_fminimum_nnan_f32:
; GFX12: ; %bb.0:
; GFX12-NEXT: v_minimum_f32 v0, v0, v1
@ -94,6 +118,11 @@ define amdgpu_ps float @test_fminimum_nsz_f32(float %a, float %b) {
; GFX9-NEXT: v_cndmask_b32_e32 v0, v3, v2, vcc
; GFX9-NEXT: ; return to shader part epilog
;
; GFX1170-LABEL: test_fminimum_nsz_f32:
; GFX1170: ; %bb.0:
; GFX1170-NEXT: v_minimum_f32 v0, v0, v1
; GFX1170-NEXT: ; return to shader part epilog
;
; GFX12-LABEL: test_fminimum_nsz_f32:
; GFX12: ; %bb.0:
; GFX12-NEXT: v_minimum_f32 v0, v0, v1
@ -108,6 +137,11 @@ define amdgpu_ps float @test_fminimum_signed_zero_f32() {
; GFX9-NEXT: v_bfrev_b32_e32 v0, 1
; GFX9-NEXT: ; return to shader part epilog
;
; GFX1170-LABEL: test_fminimum_signed_zero_f32:
; GFX1170: ; %bb.0:
; GFX1170-NEXT: v_bfrev_b32_e32 v0, 1
; GFX1170-NEXT: ; return to shader part epilog
;
; GFX12-LABEL: test_fminimum_signed_zero_f32:
; GFX12: ; %bb.0:
; GFX12-NEXT: v_bfrev_b32_e32 v0, 1
@ -128,6 +162,12 @@ define amdgpu_ps <2 x float> @test_fminimum_v2f32(<2 x float> %a, <2 x float> %b
; GFX9-NEXT: v_cndmask_b32_e32 v1, v5, v2, vcc
; GFX9-NEXT: ; return to shader part epilog
;
; GFX1170-LABEL: test_fminimum_v2f32:
; GFX1170: ; %bb.0:
; GFX1170-NEXT: v_minimum_f32 v0, v0, v2
; GFX1170-NEXT: v_minimum_f32 v1, v1, v3
; GFX1170-NEXT: ; return to shader part epilog
;
; GFX12-LABEL: test_fminimum_v2f32:
; GFX12: ; %bb.0:
; GFX12-NEXT: v_minimum_f32 v0, v0, v2
@ -169,6 +209,12 @@ define amdgpu_ps <2 x float> @test_fminimum_v2f32_ss(<2 x float> inreg %a, <2 x
; GFX9-GISEL-NEXT: v_mov_b32_e32 v1, s1
; GFX9-GISEL-NEXT: ; return to shader part epilog
;
; GFX1170-LABEL: test_fminimum_v2f32_ss:
; GFX1170: ; %bb.0:
; GFX1170-NEXT: v_minimum_f32 v0, s0, s2
; GFX1170-NEXT: v_minimum_f32 v1, s1, s3
; GFX1170-NEXT: ; return to shader part epilog
;
; GFX12-LABEL: test_fminimum_v2f32_ss:
; GFX12: ; %bb.0:
; GFX12-NEXT: s_minimum_f32 s0, s0, s2
@ -195,6 +241,13 @@ define amdgpu_ps <3 x float> @test_fminimum_v3f32(<3 x float> %a, <3 x float> %b
; GFX9-NEXT: v_cndmask_b32_e32 v2, v7, v3, vcc
; GFX9-NEXT: ; return to shader part epilog
;
; GFX1170-LABEL: test_fminimum_v3f32:
; GFX1170: ; %bb.0:
; GFX1170-NEXT: v_minimum_f32 v0, v0, v3
; GFX1170-NEXT: v_minimum_f32 v1, v1, v4
; GFX1170-NEXT: v_minimum_f32 v2, v2, v5
; GFX1170-NEXT: ; return to shader part epilog
;
; GFX12-LABEL: test_fminimum_v3f32:
; GFX12: ; %bb.0:
; GFX12-NEXT: v_minimum_f32 v0, v0, v3
@ -223,6 +276,14 @@ define amdgpu_ps <4 x float> @test_fminimum_v4f32(<4 x float> %a, <4 x float> %b
; GFX9-NEXT: v_cndmask_b32_e32 v3, v9, v4, vcc
; GFX9-NEXT: ; return to shader part epilog
;
; GFX1170-LABEL: test_fminimum_v4f32:
; GFX1170: ; %bb.0:
; GFX1170-NEXT: v_minimum_f32 v0, v0, v4
; GFX1170-NEXT: v_minimum_f32 v1, v1, v5
; GFX1170-NEXT: v_minimum_f32 v2, v2, v6
; GFX1170-NEXT: v_minimum_f32 v3, v3, v7
; GFX1170-NEXT: ; return to shader part epilog
;
; GFX12-LABEL: test_fminimum_v4f32:
; GFX12: ; %bb.0:
; GFX12-NEXT: v_minimum_f32 v0, v0, v4
@ -288,6 +349,26 @@ define amdgpu_ps <16 x float> @test_fminimum_v16f32(<16 x float> %a, <16 x float
; GFX9-NEXT: v_cndmask_b32_e32 v15, v33, v16, vcc
; GFX9-NEXT: ; return to shader part epilog
;
; GFX1170-LABEL: test_fminimum_v16f32:
; GFX1170: ; %bb.0:
; GFX1170-NEXT: v_minimum_f32 v0, v0, v16
; GFX1170-NEXT: v_minimum_f32 v1, v1, v17
; GFX1170-NEXT: v_minimum_f32 v2, v2, v18
; GFX1170-NEXT: v_minimum_f32 v3, v3, v19
; GFX1170-NEXT: v_minimum_f32 v4, v4, v20
; GFX1170-NEXT: v_minimum_f32 v5, v5, v21
; GFX1170-NEXT: v_minimum_f32 v6, v6, v22
; GFX1170-NEXT: v_minimum_f32 v7, v7, v23
; GFX1170-NEXT: v_minimum_f32 v8, v8, v24
; GFX1170-NEXT: v_minimum_f32 v9, v9, v25
; GFX1170-NEXT: v_minimum_f32 v10, v10, v26
; GFX1170-NEXT: v_minimum_f32 v11, v11, v27
; GFX1170-NEXT: v_minimum_f32 v12, v12, v28
; GFX1170-NEXT: v_minimum_f32 v13, v13, v29
; GFX1170-NEXT: v_minimum_f32 v14, v14, v30
; GFX1170-NEXT: v_minimum_f32 v15, v15, v31
; GFX1170-NEXT: ; return to shader part epilog
;
; GFX12-LABEL: test_fminimum_v16f32:
; GFX12: ; %bb.0:
; GFX12-NEXT: v_minimum_f32 v0, v0, v16
@ -320,6 +401,26 @@ define amdgpu_ps half @test_fminimum_f16_vv(half %a, half %b) {
; GFX9-NEXT: v_cndmask_b32_e32 v0, v3, v2, vcc
; GFX9-NEXT: ; return to shader part epilog
;
; GFX1170-SDAG-TRUE16-LABEL: test_fminimum_f16_vv:
; GFX1170-SDAG-TRUE16: ; %bb.0:
; GFX1170-SDAG-TRUE16-NEXT: v_minimum_f16 v0.l, v0.l, v1.l
; GFX1170-SDAG-TRUE16-NEXT: ; return to shader part epilog
;
; GFX1170-SDAG-FAKE16-LABEL: test_fminimum_f16_vv:
; GFX1170-SDAG-FAKE16: ; %bb.0:
; GFX1170-SDAG-FAKE16-NEXT: v_minimum_f16 v0, v0, v1
; GFX1170-SDAG-FAKE16-NEXT: ; return to shader part epilog
;
; GFX1170-GISEL-TRUE16-LABEL: test_fminimum_f16_vv:
; GFX1170-GISEL-TRUE16: ; %bb.0:
; GFX1170-GISEL-TRUE16-NEXT: v_minimum_f16 v0.l, v0.l, v1.l
; GFX1170-GISEL-TRUE16-NEXT: ; return to shader part epilog
;
; GFX1170-GISEL-FAKE16-LABEL: test_fminimum_f16_vv:
; GFX1170-GISEL-FAKE16: ; %bb.0:
; GFX1170-GISEL-FAKE16-NEXT: v_minimum_f16 v0, v0, v1
; GFX1170-GISEL-FAKE16-NEXT: ; return to shader part epilog
;
; GFX12-SDAG-TRUE16-LABEL: test_fminimum_f16_vv:
; GFX12-SDAG-TRUE16: ; %bb.0:
; GFX12-SDAG-TRUE16-NEXT: v_minimum_f16 v0.l, v0.l, v1.l
@ -364,6 +465,26 @@ define amdgpu_ps half @test_fminimum_f16_ss(half inreg %a, half inreg %b) {
; GFX9-GISEL-NEXT: v_mov_b32_e32 v0, s0
; GFX9-GISEL-NEXT: ; return to shader part epilog
;
; GFX1170-SDAG-TRUE16-LABEL: test_fminimum_f16_ss:
; GFX1170-SDAG-TRUE16: ; %bb.0:
; GFX1170-SDAG-TRUE16-NEXT: v_minimum_f16 v0.l, s0, s1
; GFX1170-SDAG-TRUE16-NEXT: ; return to shader part epilog
;
; GFX1170-SDAG-FAKE16-LABEL: test_fminimum_f16_ss:
; GFX1170-SDAG-FAKE16: ; %bb.0:
; GFX1170-SDAG-FAKE16-NEXT: v_minimum_f16 v0, s0, s1
; GFX1170-SDAG-FAKE16-NEXT: ; return to shader part epilog
;
; GFX1170-GISEL-TRUE16-LABEL: test_fminimum_f16_ss:
; GFX1170-GISEL-TRUE16: ; %bb.0:
; GFX1170-GISEL-TRUE16-NEXT: v_minimum_f16 v0.l, s0, s1
; GFX1170-GISEL-TRUE16-NEXT: ; return to shader part epilog
;
; GFX1170-GISEL-FAKE16-LABEL: test_fminimum_f16_ss:
; GFX1170-GISEL-FAKE16: ; %bb.0:
; GFX1170-GISEL-FAKE16-NEXT: v_minimum_f16 v0, s0, s1
; GFX1170-GISEL-FAKE16-NEXT: ; return to shader part epilog
;
; GFX12-LABEL: test_fminimum_f16_ss:
; GFX12: ; %bb.0:
; GFX12-NEXT: s_minimum_f16 s0, s0, s1
@ -399,6 +520,11 @@ define amdgpu_ps <2 x half> @test_fminimum_v2f16_vv(<2 x half> %a, <2 x half> %b
; GFX9-GISEL-NEXT: v_lshl_or_b32 v0, v1, 16, v0
; GFX9-GISEL-NEXT: ; return to shader part epilog
;
; GFX1170-LABEL: test_fminimum_v2f16_vv:
; GFX1170: ; %bb.0:
; GFX1170-NEXT: v_pk_minimum_f16 v0, v0, v1
; GFX1170-NEXT: ; return to shader part epilog
;
; GFX12-LABEL: test_fminimum_v2f16_vv:
; GFX12: ; %bb.0:
; GFX12-NEXT: v_pk_minimum_f16 v0, v0, v1
@ -448,6 +574,11 @@ define amdgpu_ps <2 x half> @test_fminimum_v2f16_ss(<2 x half> inreg %a, <2 x ha
; GFX9-GISEL-NEXT: v_mov_b32_e32 v0, s0
; GFX9-GISEL-NEXT: ; return to shader part epilog
;
; GFX1170-LABEL: test_fminimum_v2f16_ss:
; GFX1170: ; %bb.0:
; GFX1170-NEXT: v_pk_minimum_f16 v0, s0, s1
; GFX1170-NEXT: ; return to shader part epilog
;
; GFX12-LABEL: test_fminimum_v2f16_ss:
; GFX12: ; %bb.0:
; GFX12-NEXT: v_pk_minimum_f16 v0, s0, s1
@ -490,6 +621,27 @@ define amdgpu_ps <3 x half> @test_fminimum_v3f16_vv(<3 x half> %a, <3 x half> %b
; GFX9-GISEL-NEXT: v_lshl_or_b32 v1, s0, 16, v1
; GFX9-GISEL-NEXT: ; return to shader part epilog
;
; GFX1170-SDAG-LABEL: test_fminimum_v3f16_vv:
; GFX1170-SDAG: ; %bb.0:
; GFX1170-SDAG-NEXT: v_pk_minimum_f16 v0, v0, v2
; GFX1170-SDAG-NEXT: v_pk_minimum_f16 v1, v1, v3
; GFX1170-SDAG-NEXT: ; return to shader part epilog
;
; GFX1170-GISEL-TRUE16-LABEL: test_fminimum_v3f16_vv:
; GFX1170-GISEL-TRUE16: ; %bb.0:
; GFX1170-GISEL-TRUE16-NEXT: v_pk_minimum_f16 v0, v0, v2
; GFX1170-GISEL-TRUE16-NEXT: v_minimum_f16 v1.l, v1.l, v3.l
; GFX1170-GISEL-TRUE16-NEXT: ; return to shader part epilog
;
; GFX1170-GISEL-FAKE16-LABEL: test_fminimum_v3f16_vv:
; GFX1170-GISEL-FAKE16: ; %bb.0:
; GFX1170-GISEL-FAKE16-NEXT: v_minimum_f16 v1, v1, v3
; GFX1170-GISEL-FAKE16-NEXT: v_pk_minimum_f16 v0, v0, v2
; GFX1170-GISEL-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
; GFX1170-GISEL-FAKE16-NEXT: v_and_b32_e32 v1, 0xffff, v1
; GFX1170-GISEL-FAKE16-NEXT: v_lshl_or_b32 v1, s0, 16, v1
; GFX1170-GISEL-FAKE16-NEXT: ; return to shader part epilog
;
; GFX12-SDAG-LABEL: test_fminimum_v3f16_vv:
; GFX12-SDAG: ; %bb.0:
; GFX12-SDAG-NEXT: v_pk_minimum_f16 v0, v0, v2
@ -567,6 +719,30 @@ define amdgpu_ps <3 x half> @test_fminimum_v3f16_ss(<3 x half> inreg %a, <3 x ha
; GFX9-GISEL-NEXT: v_mov_b32_e32 v1, s1
; GFX9-GISEL-NEXT: ; return to shader part epilog
;
; GFX1170-SDAG-LABEL: test_fminimum_v3f16_ss:
; GFX1170-SDAG: ; %bb.0:
; GFX1170-SDAG-NEXT: v_pk_minimum_f16 v0, s0, s2
; GFX1170-SDAG-NEXT: v_pk_minimum_f16 v1, s1, s3
; GFX1170-SDAG-NEXT: ; return to shader part epilog
;
; GFX1170-GISEL-TRUE16-LABEL: test_fminimum_v3f16_ss:
; GFX1170-GISEL-TRUE16: ; %bb.0:
; GFX1170-GISEL-TRUE16-NEXT: v_minimum_f16 v1.l, s1, s3
; GFX1170-GISEL-TRUE16-NEXT: v_pk_minimum_f16 v0, s0, s2
; GFX1170-GISEL-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
; GFX1170-GISEL-TRUE16-NEXT: v_readfirstlane_b32 s0, v1
; GFX1170-GISEL-TRUE16-NEXT: v_mov_b32_e32 v1, s0
; GFX1170-GISEL-TRUE16-NEXT: ; return to shader part epilog
;
; GFX1170-GISEL-FAKE16-LABEL: test_fminimum_v3f16_ss:
; GFX1170-GISEL-FAKE16: ; %bb.0:
; GFX1170-GISEL-FAKE16-NEXT: v_minimum_f16 v1, s1, s3
; GFX1170-GISEL-FAKE16-NEXT: v_pk_minimum_f16 v0, s0, s2
; GFX1170-GISEL-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
; GFX1170-GISEL-FAKE16-NEXT: v_readfirstlane_b32 s0, v1
; GFX1170-GISEL-FAKE16-NEXT: v_mov_b32_e32 v1, s0
; GFX1170-GISEL-FAKE16-NEXT: ; return to shader part epilog
;
; GFX12-SDAG-LABEL: test_fminimum_v3f16_ss:
; GFX12-SDAG: ; %bb.0:
; GFX12-SDAG-NEXT: v_pk_minimum_f16 v0, s0, s2
@ -624,6 +800,12 @@ define amdgpu_ps <4 x half> @test_fminimum_v4f16(<4 x half> %a, <4 x half> %b) {
; GFX9-GISEL-NEXT: v_lshl_or_b32 v1, v2, 16, v1
; GFX9-GISEL-NEXT: ; return to shader part epilog
;
; GFX1170-LABEL: test_fminimum_v4f16:
; GFX1170: ; %bb.0:
; GFX1170-NEXT: v_pk_minimum_f16 v0, v0, v2
; GFX1170-NEXT: v_pk_minimum_f16 v1, v1, v3
; GFX1170-NEXT: ; return to shader part epilog
;
; GFX12-LABEL: test_fminimum_v4f16:
; GFX12: ; %bb.0:
; GFX12-NEXT: v_pk_minimum_f16 v0, v0, v2
@ -706,6 +888,12 @@ define amdgpu_ps <4 x half> @test_fminimum_v4f16_ss(<4 x half> inreg %a, <4 x ha
; GFX9-GISEL-NEXT: v_mov_b32_e32 v1, s1
; GFX9-GISEL-NEXT: ; return to shader part epilog
;
; GFX1170-LABEL: test_fminimum_v4f16_ss:
; GFX1170: ; %bb.0:
; GFX1170-NEXT: v_pk_minimum_f16 v0, s0, s2
; GFX1170-NEXT: v_pk_minimum_f16 v1, s1, s3
; GFX1170-NEXT: ; return to shader part epilog
;
; GFX12-LABEL: test_fminimum_v4f16_ss:
; GFX12: ; %bb.0:
; GFX12-NEXT: v_pk_minimum_f16 v0, s0, s2
@ -734,6 +922,11 @@ define amdgpu_ps <2 x float> @test_fminimum_f64_vv(double %a, double %b) {
; GFX9-GISEL-NEXT: v_cndmask_b32_e32 v1, v1, v5, vcc
; GFX9-GISEL-NEXT: ; return to shader part epilog
;
; GFX1170-LABEL: test_fminimum_f64_vv:
; GFX1170: ; %bb.0:
; GFX1170-NEXT: v_minimum_f64 v[0:1], v[0:1], v[2:3]
; GFX1170-NEXT: ; return to shader part epilog
;
; GFX12-LABEL: test_fminimum_f64_vv:
; GFX12: ; %bb.0:
; GFX12-NEXT: v_minimum_f64 v[0:1], v[0:1], v[2:3]
@ -771,6 +964,21 @@ define amdgpu_ps <2 x float> @test_fminimum_f64_ss(double inreg %a, double inreg
; GFX9-GISEL-NEXT: v_mov_b32_e32 v1, s1
; GFX9-GISEL-NEXT: ; return to shader part epilog
;
; GFX1170-SDAG-LABEL: test_fminimum_f64_ss:
; GFX1170-SDAG: ; %bb.0:
; GFX1170-SDAG-NEXT: v_minimum_f64 v[0:1], s[0:1], s[2:3]
; GFX1170-SDAG-NEXT: ; return to shader part epilog
;
; GFX1170-GISEL-LABEL: test_fminimum_f64_ss:
; GFX1170-GISEL: ; %bb.0:
; GFX1170-GISEL-NEXT: v_minimum_f64 v[0:1], s[0:1], s[2:3]
; GFX1170-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_2)
; GFX1170-GISEL-NEXT: v_readfirstlane_b32 s0, v0
; GFX1170-GISEL-NEXT: v_readfirstlane_b32 s1, v1
; GFX1170-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX1170-GISEL-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1
; GFX1170-GISEL-NEXT: ; return to shader part epilog
;
; GFX12-SDAG-LABEL: test_fminimum_f64_ss:
; GFX12-SDAG: ; %bb.0:
; GFX12-SDAG-NEXT: v_minimum_f64 v[0:1], s[0:1], s[2:3]
@ -835,6 +1043,27 @@ define amdgpu_ps <4 x float> @test_fminimum_v2f64_ss(<2 x double> inreg %a, <2 x
; GFX9-GISEL-NEXT: v_mov_b32_e32 v3, s1
; GFX9-GISEL-NEXT: ; return to shader part epilog
;
; GFX1170-SDAG-LABEL: test_fminimum_v2f64_ss:
; GFX1170-SDAG: ; %bb.0:
; GFX1170-SDAG-NEXT: v_minimum_f64 v[0:1], s[0:1], s[4:5]
; GFX1170-SDAG-NEXT: v_minimum_f64 v[2:3], s[2:3], s[6:7]
; GFX1170-SDAG-NEXT: ; return to shader part epilog
;
; GFX1170-GISEL-LABEL: test_fminimum_v2f64_ss:
; GFX1170-GISEL: ; %bb.0:
; GFX1170-GISEL-NEXT: v_minimum_f64 v[0:1], s[0:1], s[4:5]
; GFX1170-GISEL-NEXT: v_minimum_f64 v[2:3], s[2:3], s[6:7]
; GFX1170-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_3)
; GFX1170-GISEL-NEXT: v_readfirstlane_b32 s0, v0
; GFX1170-GISEL-NEXT: v_readfirstlane_b32 s1, v1
; GFX1170-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_4)
; GFX1170-GISEL-NEXT: v_readfirstlane_b32 s2, v2
; GFX1170-GISEL-NEXT: v_readfirstlane_b32 s3, v3
; GFX1170-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2)
; GFX1170-GISEL-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1
; GFX1170-GISEL-NEXT: v_dual_mov_b32 v2, s2 :: v_dual_mov_b32 v3, s3
; GFX1170-GISEL-NEXT: ; return to shader part epilog
;
; GFX12-SDAG-LABEL: test_fminimum_v2f64_ss:
; GFX12-SDAG: ; %bb.0:
; GFX12-SDAG-NEXT: v_minimum_f64 v[0:1], s[0:1], s[4:5]
@ -904,6 +1133,14 @@ define amdgpu_ps <8 x float> @test_fminimum_v4f64(<4 x double> %a, <4 x double>
; GFX9-GISEL-NEXT: v_cndmask_b32_e64 v7, v18, v13, s[4:5]
; GFX9-GISEL-NEXT: ; return to shader part epilog
;
; GFX1170-LABEL: test_fminimum_v4f64:
; GFX1170: ; %bb.0:
; GFX1170-NEXT: v_minimum_f64 v[0:1], v[0:1], v[8:9]
; GFX1170-NEXT: v_minimum_f64 v[2:3], v[2:3], v[10:11]
; GFX1170-NEXT: v_minimum_f64 v[4:5], v[4:5], v[12:13]
; GFX1170-NEXT: v_minimum_f64 v[6:7], v[6:7], v[14:15]
; GFX1170-NEXT: ; return to shader part epilog
;
; GFX12-LABEL: test_fminimum_v4f64:
; GFX12: ; %bb.0:
; GFX12-NEXT: v_minimum_f64 v[0:1], v[0:1], v[8:9]
@ -992,6 +1229,36 @@ define amdgpu_ps <8 x float> @test_fminimum_v4f64_ss(<4 x double> inreg %a, <4 x
; GFX9-GISEL-NEXT: v_mov_b32_e32 v7, s5
; GFX9-GISEL-NEXT: ; return to shader part epilog
;
; GFX1170-SDAG-LABEL: test_fminimum_v4f64_ss:
; GFX1170-SDAG: ; %bb.0:
; GFX1170-SDAG-NEXT: v_minimum_f64 v[0:1], s[0:1], s[8:9]
; GFX1170-SDAG-NEXT: v_minimum_f64 v[2:3], s[2:3], s[10:11]
; GFX1170-SDAG-NEXT: v_minimum_f64 v[4:5], s[4:5], s[12:13]
; GFX1170-SDAG-NEXT: v_minimum_f64 v[6:7], s[6:7], s[14:15]
; GFX1170-SDAG-NEXT: ; return to shader part epilog
;
; GFX1170-GISEL-LABEL: test_fminimum_v4f64_ss:
; GFX1170-GISEL: ; %bb.0:
; GFX1170-GISEL-NEXT: v_minimum_f64 v[0:1], s[0:1], s[8:9]
; GFX1170-GISEL-NEXT: v_minimum_f64 v[2:3], s[2:3], s[10:11]
; GFX1170-GISEL-NEXT: v_minimum_f64 v[4:5], s[4:5], s[12:13]
; GFX1170-GISEL-NEXT: v_minimum_f64 v[6:7], s[6:7], s[14:15]
; GFX1170-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_4)
; GFX1170-GISEL-NEXT: v_readfirstlane_b32 s0, v0
; GFX1170-GISEL-NEXT: v_readfirstlane_b32 s1, v1
; GFX1170-GISEL-NEXT: v_readfirstlane_b32 s2, v2
; GFX1170-GISEL-NEXT: v_readfirstlane_b32 s3, v3
; GFX1170-GISEL-NEXT: v_readfirstlane_b32 s4, v4
; GFX1170-GISEL-NEXT: v_readfirstlane_b32 s5, v5
; GFX1170-GISEL-NEXT: v_readfirstlane_b32 s6, v6
; GFX1170-GISEL-NEXT: v_readfirstlane_b32 s7, v7
; GFX1170-GISEL-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1
; GFX1170-GISEL-NEXT: v_dual_mov_b32 v2, s2 :: v_dual_mov_b32 v3, s3
; GFX1170-GISEL-NEXT: v_dual_mov_b32 v4, s4 :: v_dual_mov_b32 v5, s5
; GFX1170-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_4)
; GFX1170-GISEL-NEXT: v_dual_mov_b32 v6, s6 :: v_dual_mov_b32 v7, s7
; GFX1170-GISEL-NEXT: ; return to shader part epilog
;
; GFX12-SDAG-LABEL: test_fminimum_v4f64_ss:
; GFX12-SDAG: ; %bb.0:
; GFX12-SDAG-NEXT: v_minimum_f64 v[0:1], s[0:1], s[8:9]
@ -1063,6 +1330,21 @@ define amdgpu_kernel void @fminimumi_f32_move_to_valu(ptr addrspace(1) %out, ptr
; GFX9-GISEL-NEXT: global_store_dword v0, v1, s[0:1]
; GFX9-GISEL-NEXT: s_endpgm
;
; GFX1170-LABEL: fminimumi_f32_move_to_valu:
; GFX1170: ; %bb.0:
; GFX1170-NEXT: s_clause 0x1
; GFX1170-NEXT: s_load_b128 s[0:3], s[4:5], 0x24
; GFX1170-NEXT: s_load_b64 s[4:5], s[4:5], 0x34
; GFX1170-NEXT: v_mov_b32_e32 v0, 0
; GFX1170-NEXT: s_waitcnt lgkmcnt(0)
; GFX1170-NEXT: global_load_b32 v1, v0, s[2:3] glc dlc
; GFX1170-NEXT: s_waitcnt vmcnt(0)
; GFX1170-NEXT: global_load_b32 v2, v0, s[4:5] glc dlc
; GFX1170-NEXT: s_waitcnt vmcnt(0)
; GFX1170-NEXT: v_minimum_f32 v1, v1, v2
; GFX1170-NEXT: global_store_b32 v0, v1, s[0:1]
; GFX1170-NEXT: s_endpgm
;
; GFX12-SDAG-LABEL: fminimumi_f32_move_to_valu:
; GFX12-SDAG: ; %bb.0:
; GFX12-SDAG-NEXT: s_clause 0x1
@ -1143,6 +1425,69 @@ define amdgpu_kernel void @fminimum_f16_move_to_valu(ptr addrspace(1) %out, ptr
; GFX9-GISEL-NEXT: global_store_short v0, v1, s[0:1]
; GFX9-GISEL-NEXT: s_endpgm
;
; GFX1170-SDAG-TRUE16-LABEL: fminimum_f16_move_to_valu:
; GFX1170-SDAG-TRUE16: ; %bb.0:
; GFX1170-SDAG-TRUE16-NEXT: s_clause 0x1
; GFX1170-SDAG-TRUE16-NEXT: s_load_b128 s[0:3], s[4:5], 0x24
; GFX1170-SDAG-TRUE16-NEXT: s_load_b64 s[4:5], s[4:5], 0x34
; GFX1170-SDAG-TRUE16-NEXT: v_mov_b32_e32 v1, 0
; GFX1170-SDAG-TRUE16-NEXT: s_waitcnt lgkmcnt(0)
; GFX1170-SDAG-TRUE16-NEXT: global_load_d16_b16 v0, v1, s[2:3] glc dlc
; GFX1170-SDAG-TRUE16-NEXT: s_waitcnt vmcnt(0)
; GFX1170-SDAG-TRUE16-NEXT: global_load_d16_hi_b16 v0, v1, s[4:5] glc dlc
; GFX1170-SDAG-TRUE16-NEXT: s_waitcnt vmcnt(0)
; GFX1170-SDAG-TRUE16-NEXT: v_minimum_f16 v0.l, v0.l, v0.h
; GFX1170-SDAG-TRUE16-NEXT: global_store_b16 v1, v0, s[0:1]
; GFX1170-SDAG-TRUE16-NEXT: s_endpgm
;
; GFX1170-SDAG-FAKE16-LABEL: fminimum_f16_move_to_valu:
; GFX1170-SDAG-FAKE16: ; %bb.0:
; GFX1170-SDAG-FAKE16-NEXT: s_clause 0x1
; GFX1170-SDAG-FAKE16-NEXT: s_load_b128 s[0:3], s[4:5], 0x24
; GFX1170-SDAG-FAKE16-NEXT: s_load_b64 s[4:5], s[4:5], 0x34
; GFX1170-SDAG-FAKE16-NEXT: v_mov_b32_e32 v0, 0
; GFX1170-SDAG-FAKE16-NEXT: s_waitcnt lgkmcnt(0)
; GFX1170-SDAG-FAKE16-NEXT: global_load_u16 v1, v0, s[2:3] glc dlc
; GFX1170-SDAG-FAKE16-NEXT: s_waitcnt vmcnt(0)
; GFX1170-SDAG-FAKE16-NEXT: global_load_u16 v2, v0, s[4:5] glc dlc
; GFX1170-SDAG-FAKE16-NEXT: s_waitcnt vmcnt(0)
; GFX1170-SDAG-FAKE16-NEXT: v_minimum_f16 v1, v1, v2
; GFX1170-SDAG-FAKE16-NEXT: global_store_b16 v0, v1, s[0:1]
; GFX1170-SDAG-FAKE16-NEXT: s_endpgm
;
; GFX1170-GISEL-TRUE16-LABEL: fminimum_f16_move_to_valu:
; GFX1170-GISEL-TRUE16: ; %bb.0:
; GFX1170-GISEL-TRUE16-NEXT: s_clause 0x1
; GFX1170-GISEL-TRUE16-NEXT: s_load_b128 s[0:3], s[4:5], 0x24
; GFX1170-GISEL-TRUE16-NEXT: s_load_b64 s[4:5], s[4:5], 0x34
; GFX1170-GISEL-TRUE16-NEXT: v_mov_b32_e32 v1, 0
; GFX1170-GISEL-TRUE16-NEXT: s_waitcnt lgkmcnt(0)
; GFX1170-GISEL-TRUE16-NEXT: global_load_d16_b16 v0, v1, s[2:3] glc dlc
; GFX1170-GISEL-TRUE16-NEXT: s_waitcnt vmcnt(0)
; GFX1170-GISEL-TRUE16-NEXT: global_load_d16_hi_b16 v0, v1, s[4:5] glc dlc
; GFX1170-GISEL-TRUE16-NEXT: s_waitcnt vmcnt(0)
; GFX1170-GISEL-TRUE16-NEXT: v_minimum_f16 v0.l, v0.l, v0.h
; GFX1170-GISEL-TRUE16-NEXT: global_store_b16 v1, v0, s[0:1]
; GFX1170-GISEL-TRUE16-NEXT: s_endpgm
;
; GFX1170-GISEL-FAKE16-LABEL: fminimum_f16_move_to_valu:
; GFX1170-GISEL-FAKE16: ; %bb.0:
; GFX1170-GISEL-FAKE16-NEXT: s_clause 0x1
; GFX1170-GISEL-FAKE16-NEXT: s_load_b128 s[0:3], s[4:5], 0x24
; GFX1170-GISEL-FAKE16-NEXT: s_load_b64 s[4:5], s[4:5], 0x34
; GFX1170-GISEL-FAKE16-NEXT: v_mov_b32_e32 v0, 0
; GFX1170-GISEL-FAKE16-NEXT: s_waitcnt lgkmcnt(0)
; GFX1170-GISEL-FAKE16-NEXT: global_load_u16 v1, v0, s[2:3] glc dlc
; GFX1170-GISEL-FAKE16-NEXT: s_waitcnt vmcnt(0)
; GFX1170-GISEL-FAKE16-NEXT: global_load_u16 v2, v0, s[4:5] glc dlc
; GFX1170-GISEL-FAKE16-NEXT: s_waitcnt vmcnt(0)
; GFX1170-GISEL-FAKE16-NEXT: v_readfirstlane_b32 s2, v1
; GFX1170-GISEL-FAKE16-NEXT: v_readfirstlane_b32 s3, v2
; GFX1170-GISEL-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX1170-GISEL-FAKE16-NEXT: v_minimum_f16 v1, s2, s3
; GFX1170-GISEL-FAKE16-NEXT: global_store_b16 v0, v1, s[0:1]
; GFX1170-GISEL-FAKE16-NEXT: s_endpgm
;
; GFX12-SDAG-TRUE16-LABEL: fminimum_f16_move_to_valu:
; GFX12-SDAG-TRUE16: ; %bb.0:
; GFX12-SDAG-TRUE16-NEXT: s_clause 0x1
@ -1226,6 +1571,11 @@ define amdgpu_ps float @test_fminimum_f32_ieee_on(float %a, float %b) #0 {
; GFX9-NEXT: v_cndmask_b32_e32 v0, v3, v2, vcc
; GFX9-NEXT: ; return to shader part epilog
;
; GFX1170-LABEL: test_fminimum_f32_ieee_on:
; GFX1170: ; %bb.0:
; GFX1170-NEXT: v_minimum_f32 v0, v0, v1
; GFX1170-NEXT: ; return to shader part epilog
;
; GFX12-LABEL: test_fminimum_f32_ieee_on:
; GFX12: ; %bb.0:
; GFX12-NEXT: v_minimum_f32 v0, v0, v1
@ -1243,6 +1593,11 @@ define amdgpu_ps float @test_fminimum_f32_ieee_off(float %a, float %b) #1 {
; GFX9-NEXT: v_cndmask_b32_e32 v0, v3, v2, vcc
; GFX9-NEXT: ; return to shader part epilog
;
; GFX1170-LABEL: test_fminimum_f32_ieee_off:
; GFX1170: ; %bb.0:
; GFX1170-NEXT: v_minimum_f32 v0, v0, v1
; GFX1170-NEXT: ; return to shader part epilog
;
; GFX12-LABEL: test_fminimum_f32_ieee_off:
; GFX12: ; %bb.0:
; GFX12-NEXT: v_minimum_f32 v0, v0, v1

File diff suppressed because it is too large Load Diff

View File

@ -1,13 +1,15 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx703 < %s | FileCheck -check-prefixes=GCN,GFX7 %s
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx803 < %s | FileCheck -check-prefixes=GCN,GFX8 %s
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 < %s | FileCheck -check-prefixes=GCN,GFX9,GFX900 %s
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx950 < %s | FileCheck -check-prefixes=GCN,GFX9,GFX950 %s
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1030 < %s | FileCheck -check-prefixes=GCN,GFX10 %s
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1100 -mattr=+real-true16 < %s | FileCheck -check-prefixes=GCN,GFX11,GFX11-TRUE16 %s
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1100 -mattr=-real-true16 < %s | FileCheck -check-prefixes=GCN,GFX11,GFX11-FAKE16 %s
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1200 -mattr=+real-true16 < %s | FileCheck -check-prefixes=GCN,GFX12,GFX12-TRUE16 %s
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1200 -mattr=-real-true16 < %s | FileCheck -check-prefixes=GCN,GFX12,GFX12-FAKE16 %s
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx703 < %s | FileCheck -check-prefixes=GFX7 %s
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx803 < %s | FileCheck -check-prefixes=GFX8 %s
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 < %s | FileCheck -check-prefixes=GFX9,GFX900 %s
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx950 < %s | FileCheck -check-prefixes=GFX9,GFX950 %s
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1030 < %s | FileCheck -check-prefixes=GFX10 %s
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1100 -mattr=+real-true16 < %s | FileCheck -check-prefixes=GFX11,GFX11-TRUE16 %s
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1100 -mattr=-real-true16 < %s | FileCheck -check-prefixes=GFX11,GFX11-FAKE16 %s
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1170 -mattr=+real-true16 < %s | FileCheck -check-prefixes=GFX1170,GFX1170-TRUE16 %s
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1170 -mattr=-real-true16 < %s | FileCheck -check-prefixes=GFX1170,GFX1170-FAKE16 %s
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1200 -mattr=+real-true16 < %s | FileCheck -check-prefixes=GFX12,GFX12-TRUE16 %s
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1200 -mattr=-real-true16 < %s | FileCheck -check-prefixes=GFX12,GFX12-FAKE16 %s
define half @v_maximum_f16(half %src0, half %src1) {
; GFX7-LABEL: v_maximum_f16:
@ -72,6 +74,18 @@ define half @v_maximum_f16(half %src0, half %src1) {
; GFX11-FAKE16-NEXT: v_cndmask_b32_e32 v0, 0x7e00, v2, vcc_lo
; GFX11-FAKE16-NEXT: s_setpc_b64 s[30:31]
;
; GFX1170-TRUE16-LABEL: v_maximum_f16:
; GFX1170-TRUE16: ; %bb.0:
; GFX1170-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1170-TRUE16-NEXT: v_maximum_f16 v0.l, v0.l, v1.l
; GFX1170-TRUE16-NEXT: s_setpc_b64 s[30:31]
;
; GFX1170-FAKE16-LABEL: v_maximum_f16:
; GFX1170-FAKE16: ; %bb.0:
; GFX1170-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1170-FAKE16-NEXT: v_maximum_f16 v0, v0, v1
; GFX1170-FAKE16-NEXT: s_setpc_b64 s[30:31]
;
; GFX12-TRUE16-LABEL: v_maximum_f16:
; GFX12-TRUE16: ; %bb.0:
; GFX12-TRUE16-NEXT: s_wait_loadcnt_dscnt 0x0
@ -135,6 +149,18 @@ define half @v_maximum_f16__nnan(half %src0, half %src1) {
; GFX11-FAKE16-NEXT: v_max_f16_e32 v0, v0, v1
; GFX11-FAKE16-NEXT: s_setpc_b64 s[30:31]
;
; GFX1170-TRUE16-LABEL: v_maximum_f16__nnan:
; GFX1170-TRUE16: ; %bb.0:
; GFX1170-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1170-TRUE16-NEXT: v_maximum_f16 v0.l, v0.l, v1.l
; GFX1170-TRUE16-NEXT: s_setpc_b64 s[30:31]
;
; GFX1170-FAKE16-LABEL: v_maximum_f16__nnan:
; GFX1170-FAKE16: ; %bb.0:
; GFX1170-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1170-FAKE16-NEXT: v_maximum_f16 v0, v0, v1
; GFX1170-FAKE16-NEXT: s_setpc_b64 s[30:31]
;
; GFX12-TRUE16-LABEL: v_maximum_f16__nnan:
; GFX12-TRUE16: ; %bb.0:
; GFX12-TRUE16-NEXT: s_wait_loadcnt_dscnt 0x0
@ -221,6 +247,18 @@ define half @v_maximum_f16__nsz(half %src0, half %src1) {
; GFX11-FAKE16-NEXT: v_cndmask_b32_e32 v0, 0x7e00, v2, vcc_lo
; GFX11-FAKE16-NEXT: s_setpc_b64 s[30:31]
;
; GFX1170-TRUE16-LABEL: v_maximum_f16__nsz:
; GFX1170-TRUE16: ; %bb.0:
; GFX1170-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1170-TRUE16-NEXT: v_maximum_f16 v0.l, v0.l, v1.l
; GFX1170-TRUE16-NEXT: s_setpc_b64 s[30:31]
;
; GFX1170-FAKE16-LABEL: v_maximum_f16__nsz:
; GFX1170-FAKE16: ; %bb.0:
; GFX1170-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1170-FAKE16-NEXT: v_maximum_f16 v0, v0, v1
; GFX1170-FAKE16-NEXT: s_setpc_b64 s[30:31]
;
; GFX12-TRUE16-LABEL: v_maximum_f16__nsz:
; GFX12-TRUE16: ; %bb.0:
; GFX12-TRUE16-NEXT: s_wait_loadcnt_dscnt 0x0
@ -284,6 +322,18 @@ define half @v_maximum_f16__nnan_nsz(half %src0, half %src1) {
; GFX11-FAKE16-NEXT: v_max_f16_e32 v0, v0, v1
; GFX11-FAKE16-NEXT: s_setpc_b64 s[30:31]
;
; GFX1170-TRUE16-LABEL: v_maximum_f16__nnan_nsz:
; GFX1170-TRUE16: ; %bb.0:
; GFX1170-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1170-TRUE16-NEXT: v_maximum_f16 v0.l, v0.l, v1.l
; GFX1170-TRUE16-NEXT: s_setpc_b64 s[30:31]
;
; GFX1170-FAKE16-LABEL: v_maximum_f16__nnan_nsz:
; GFX1170-FAKE16: ; %bb.0:
; GFX1170-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1170-FAKE16-NEXT: v_maximum_f16 v0, v0, v1
; GFX1170-FAKE16-NEXT: s_setpc_b64 s[30:31]
;
; GFX12-TRUE16-LABEL: v_maximum_f16__nnan_nsz:
; GFX12-TRUE16: ; %bb.0:
; GFX12-TRUE16-NEXT: s_wait_loadcnt_dscnt 0x0
@ -379,6 +429,22 @@ define half @v_maximum_f16__nnan_src0(half %arg0, half %src1) {
; GFX11-FAKE16-NEXT: v_cndmask_b32_e32 v0, 0x7e00, v2, vcc_lo
; GFX11-FAKE16-NEXT: s_setpc_b64 s[30:31]
;
; GFX1170-TRUE16-LABEL: v_maximum_f16__nnan_src0:
; GFX1170-TRUE16: ; %bb.0:
; GFX1170-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1170-TRUE16-NEXT: v_add_f16_e32 v0.l, 1.0, v0.l
; GFX1170-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX1170-TRUE16-NEXT: v_maximum_f16 v0.l, v0.l, v1.l
; GFX1170-TRUE16-NEXT: s_setpc_b64 s[30:31]
;
; GFX1170-FAKE16-LABEL: v_maximum_f16__nnan_src0:
; GFX1170-FAKE16: ; %bb.0:
; GFX1170-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1170-FAKE16-NEXT: v_add_f16_e32 v0, 1.0, v0
; GFX1170-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX1170-FAKE16-NEXT: v_maximum_f16 v0, v0, v1
; GFX1170-FAKE16-NEXT: s_setpc_b64 s[30:31]
;
; GFX12-TRUE16-LABEL: v_maximum_f16__nnan_src0:
; GFX12-TRUE16: ; %bb.0:
; GFX12-TRUE16-NEXT: s_wait_loadcnt_dscnt 0x0
@ -479,6 +545,22 @@ define half @v_maximum_f16__nnan_src1(half %src0, half %arg1) {
; GFX11-FAKE16-NEXT: v_cndmask_b32_e32 v0, 0x7e00, v2, vcc_lo
; GFX11-FAKE16-NEXT: s_setpc_b64 s[30:31]
;
; GFX1170-TRUE16-LABEL: v_maximum_f16__nnan_src1:
; GFX1170-TRUE16: ; %bb.0:
; GFX1170-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1170-TRUE16-NEXT: v_add_f16_e32 v1.l, 1.0, v1.l
; GFX1170-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX1170-TRUE16-NEXT: v_maximum_f16 v0.l, v0.l, v1.l
; GFX1170-TRUE16-NEXT: s_setpc_b64 s[30:31]
;
; GFX1170-FAKE16-LABEL: v_maximum_f16__nnan_src1:
; GFX1170-FAKE16: ; %bb.0:
; GFX1170-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1170-FAKE16-NEXT: v_add_f16_e32 v1, 1.0, v1
; GFX1170-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX1170-FAKE16-NEXT: v_maximum_f16 v0, v0, v1
; GFX1170-FAKE16-NEXT: s_setpc_b64 s[30:31]
;
; GFX12-TRUE16-LABEL: v_maximum_f16__nnan_src1:
; GFX12-TRUE16: ; %bb.0:
; GFX12-TRUE16-NEXT: s_wait_loadcnt_dscnt 0x0
@ -601,6 +683,28 @@ define void @s_maximum_f16(half inreg %src0, half inreg %src1) {
; GFX11-FAKE16-NEXT: ;;#ASMEND
; GFX11-FAKE16-NEXT: s_setpc_b64 s[30:31]
;
; GFX1170-TRUE16-LABEL: s_maximum_f16:
; GFX1170-TRUE16: ; %bb.0:
; GFX1170-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1170-TRUE16-NEXT: v_maximum_f16 v0.l, s0, s1
; GFX1170-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX1170-TRUE16-NEXT: v_and_b32_e32 v0, 0xffff, v0
; GFX1170-TRUE16-NEXT: ;;#ASMSTART
; GFX1170-TRUE16-NEXT: ; use v0
; GFX1170-TRUE16-NEXT: ;;#ASMEND
; GFX1170-TRUE16-NEXT: s_setpc_b64 s[30:31]
;
; GFX1170-FAKE16-LABEL: s_maximum_f16:
; GFX1170-FAKE16: ; %bb.0:
; GFX1170-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1170-FAKE16-NEXT: v_maximum_f16 v0, s0, s1
; GFX1170-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX1170-FAKE16-NEXT: v_and_b32_e32 v0, 0xffff, v0
; GFX1170-FAKE16-NEXT: ;;#ASMSTART
; GFX1170-FAKE16-NEXT: ; use v0
; GFX1170-FAKE16-NEXT: ;;#ASMEND
; GFX1170-FAKE16-NEXT: s_setpc_b64 s[30:31]
;
; GFX12-LABEL: s_maximum_f16:
; GFX12: ; %bb.0:
; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
@ -720,6 +824,12 @@ define <2 x half> @v_maximum_v2f16(<2 x half> %src0, <2 x half> %src1) {
; GFX11-FAKE16-NEXT: v_perm_b32 v0, v1, v0, 0x5040100
; GFX11-FAKE16-NEXT: s_setpc_b64 s[30:31]
;
; GFX1170-LABEL: v_maximum_v2f16:
; GFX1170: ; %bb.0:
; GFX1170-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1170-NEXT: v_pk_maximum_f16 v0, v0, v1
; GFX1170-NEXT: s_setpc_b64 s[30:31]
;
; GFX12-LABEL: v_maximum_v2f16:
; GFX12: ; %bb.0:
; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
@ -777,6 +887,12 @@ define <2 x half> @v_maximum_v2f16__nnan(<2 x half> %src0, <2 x half> %src1) {
; GFX11-NEXT: v_pk_max_f16 v0, v0, v1
; GFX11-NEXT: s_setpc_b64 s[30:31]
;
; GFX1170-LABEL: v_maximum_v2f16__nnan:
; GFX1170: ; %bb.0:
; GFX1170-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1170-NEXT: v_pk_maximum_f16 v0, v0, v1
; GFX1170-NEXT: s_setpc_b64 s[30:31]
;
; GFX12-LABEL: v_maximum_v2f16__nnan:
; GFX12: ; %bb.0:
; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
@ -886,6 +1002,12 @@ define <2 x half> @v_maximum_v2f16__nsz(<2 x half> %src0, <2 x half> %src1) {
; GFX11-FAKE16-NEXT: v_perm_b32 v0, v1, v0, 0x5040100
; GFX11-FAKE16-NEXT: s_setpc_b64 s[30:31]
;
; GFX1170-LABEL: v_maximum_v2f16__nsz:
; GFX1170: ; %bb.0:
; GFX1170-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1170-NEXT: v_pk_maximum_f16 v0, v0, v1
; GFX1170-NEXT: s_setpc_b64 s[30:31]
;
; GFX12-LABEL: v_maximum_v2f16__nsz:
; GFX12: ; %bb.0:
; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
@ -943,6 +1065,12 @@ define <2 x half> @v_maximum_v2f16__nnan_nsz(<2 x half> %src0, <2 x half> %src1)
; GFX11-NEXT: v_pk_max_f16 v0, v0, v1
; GFX11-NEXT: s_setpc_b64 s[30:31]
;
; GFX1170-LABEL: v_maximum_v2f16__nnan_nsz:
; GFX1170: ; %bb.0:
; GFX1170-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1170-NEXT: v_pk_maximum_f16 v0, v0, v1
; GFX1170-NEXT: s_setpc_b64 s[30:31]
;
; GFX12-LABEL: v_maximum_v2f16__nnan_nsz:
; GFX12: ; %bb.0:
; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
@ -1089,6 +1217,15 @@ define void @s_maximum_v2f16(<2 x half> inreg %src0, <2 x half> inreg %src1) {
; GFX11-FAKE16-NEXT: ;;#ASMEND
; GFX11-FAKE16-NEXT: s_setpc_b64 s[30:31]
;
; GFX1170-LABEL: s_maximum_v2f16:
; GFX1170: ; %bb.0:
; GFX1170-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1170-NEXT: v_pk_maximum_f16 v0, s0, s1
; GFX1170-NEXT: ;;#ASMSTART
; GFX1170-NEXT: ; use v0
; GFX1170-NEXT: ;;#ASMEND
; GFX1170-NEXT: s_setpc_b64 s[30:31]
;
; GFX12-LABEL: s_maximum_v2f16:
; GFX12: ; %bb.0:
; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
@ -1227,6 +1364,13 @@ define <3 x half> @v_maximum_v3f16(<3 x half> %src0, <3 x half> %src1) {
; GFX11-FAKE16-NEXT: v_cndmask_b32_e32 v1, 0x7e00, v4, vcc_lo
; GFX11-FAKE16-NEXT: s_setpc_b64 s[30:31]
;
; GFX1170-LABEL: v_maximum_v3f16:
; GFX1170: ; %bb.0:
; GFX1170-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1170-NEXT: v_pk_maximum_f16 v0, v0, v2
; GFX1170-NEXT: v_pk_maximum_f16 v1, v1, v3
; GFX1170-NEXT: s_setpc_b64 s[30:31]
;
; GFX12-LABEL: v_maximum_v3f16:
; GFX12: ; %bb.0:
; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
@ -1293,6 +1437,13 @@ define <3 x half> @v_maximum_v3f16__nnan(<3 x half> %src0, <3 x half> %src1) {
; GFX11-NEXT: v_pk_max_f16 v1, v1, v3
; GFX11-NEXT: s_setpc_b64 s[30:31]
;
; GFX1170-LABEL: v_maximum_v3f16__nnan:
; GFX1170: ; %bb.0:
; GFX1170-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1170-NEXT: v_pk_maximum_f16 v0, v0, v2
; GFX1170-NEXT: v_pk_maximum_f16 v1, v1, v3
; GFX1170-NEXT: s_setpc_b64 s[30:31]
;
; GFX12-LABEL: v_maximum_v3f16__nnan:
; GFX12: ; %bb.0:
; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
@ -1427,6 +1578,13 @@ define <3 x half> @v_maximum_v3f16__nsz(<3 x half> %src0, <3 x half> %src1) {
; GFX11-FAKE16-NEXT: v_cndmask_b32_e32 v1, 0x7e00, v4, vcc_lo
; GFX11-FAKE16-NEXT: s_setpc_b64 s[30:31]
;
; GFX1170-LABEL: v_maximum_v3f16__nsz:
; GFX1170: ; %bb.0:
; GFX1170-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1170-NEXT: v_pk_maximum_f16 v0, v0, v2
; GFX1170-NEXT: v_pk_maximum_f16 v1, v1, v3
; GFX1170-NEXT: s_setpc_b64 s[30:31]
;
; GFX12-LABEL: v_maximum_v3f16__nsz:
; GFX12: ; %bb.0:
; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
@ -1493,6 +1651,13 @@ define <3 x half> @v_maximum_v3f16__nnan_nsz(<3 x half> %src0, <3 x half> %src1)
; GFX11-NEXT: v_pk_max_f16 v1, v1, v3
; GFX11-NEXT: s_setpc_b64 s[30:31]
;
; GFX1170-LABEL: v_maximum_v3f16__nnan_nsz:
; GFX1170: ; %bb.0:
; GFX1170-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1170-NEXT: v_pk_maximum_f16 v0, v0, v2
; GFX1170-NEXT: v_pk_maximum_f16 v1, v1, v3
; GFX1170-NEXT: s_setpc_b64 s[30:31]
;
; GFX12-LABEL: v_maximum_v3f16__nnan_nsz:
; GFX12: ; %bb.0:
; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
@ -1658,6 +1823,13 @@ define <4 x half> @v_maximum_v4f16(<4 x half> %src0, <4 x half> %src1) {
; GFX11-FAKE16-NEXT: v_perm_b32 v1, v3, v1, 0x5040100
; GFX11-FAKE16-NEXT: s_setpc_b64 s[30:31]
;
; GFX1170-LABEL: v_maximum_v4f16:
; GFX1170: ; %bb.0:
; GFX1170-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1170-NEXT: v_pk_maximum_f16 v0, v0, v2
; GFX1170-NEXT: v_pk_maximum_f16 v1, v1, v3
; GFX1170-NEXT: s_setpc_b64 s[30:31]
;
; GFX12-LABEL: v_maximum_v4f16:
; GFX12: ; %bb.0:
; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
@ -1734,6 +1906,13 @@ define <4 x half> @v_maximum_v4f16__nnan(<4 x half> %src0, <4 x half> %src1) {
; GFX11-NEXT: v_pk_max_f16 v1, v1, v3
; GFX11-NEXT: s_setpc_b64 s[30:31]
;
; GFX1170-LABEL: v_maximum_v4f16__nnan:
; GFX1170: ; %bb.0:
; GFX1170-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1170-NEXT: v_pk_maximum_f16 v0, v0, v2
; GFX1170-NEXT: v_pk_maximum_f16 v1, v1, v3
; GFX1170-NEXT: s_setpc_b64 s[30:31]
;
; GFX12-LABEL: v_maximum_v4f16__nnan:
; GFX12: ; %bb.0:
; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
@ -1899,6 +2078,13 @@ define <4 x half> @v_maximum_v4f16__nsz(<4 x half> %src0, <4 x half> %src1) {
; GFX11-FAKE16-NEXT: v_perm_b32 v1, v3, v1, 0x5040100
; GFX11-FAKE16-NEXT: s_setpc_b64 s[30:31]
;
; GFX1170-LABEL: v_maximum_v4f16__nsz:
; GFX1170: ; %bb.0:
; GFX1170-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1170-NEXT: v_pk_maximum_f16 v0, v0, v2
; GFX1170-NEXT: v_pk_maximum_f16 v1, v1, v3
; GFX1170-NEXT: s_setpc_b64 s[30:31]
;
; GFX12-LABEL: v_maximum_v4f16__nsz:
; GFX12: ; %bb.0:
; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
@ -1975,6 +2161,13 @@ define <4 x half> @v_maximum_v4f16__nnan_nsz(<4 x half> %src0, <4 x half> %src1)
; GFX11-NEXT: v_pk_max_f16 v1, v1, v3
; GFX11-NEXT: s_setpc_b64 s[30:31]
;
; GFX1170-LABEL: v_maximum_v4f16__nnan_nsz:
; GFX1170: ; %bb.0:
; GFX1170-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1170-NEXT: v_pk_maximum_f16 v0, v0, v2
; GFX1170-NEXT: v_pk_maximum_f16 v1, v1, v3
; GFX1170-NEXT: s_setpc_b64 s[30:31]
;
; GFX12-LABEL: v_maximum_v4f16__nnan_nsz:
; GFX12: ; %bb.0:
; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
@ -2251,6 +2444,15 @@ define <8 x half> @v_maximum_v8f16(<8 x half> %src0, <8 x half> %src1) {
; GFX11-FAKE16-NEXT: v_perm_b32 v3, v3, v9, 0x5040100
; GFX11-FAKE16-NEXT: s_setpc_b64 s[30:31]
;
; GFX1170-LABEL: v_maximum_v8f16:
; GFX1170: ; %bb.0:
; GFX1170-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1170-NEXT: v_pk_maximum_f16 v0, v0, v4
; GFX1170-NEXT: v_pk_maximum_f16 v1, v1, v5
; GFX1170-NEXT: v_pk_maximum_f16 v2, v2, v6
; GFX1170-NEXT: v_pk_maximum_f16 v3, v3, v7
; GFX1170-NEXT: s_setpc_b64 s[30:31]
;
; GFX12-LABEL: v_maximum_v8f16:
; GFX12: ; %bb.0:
; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
@ -2755,6 +2957,19 @@ define <16 x half> @v_maximum_v16f16(<16 x half> %src0, <16 x half> %src1) {
; GFX11-FAKE16-NEXT: v_perm_b32 v4, v4, v14, 0x5040100
; GFX11-FAKE16-NEXT: s_setpc_b64 s[30:31]
;
; GFX1170-LABEL: v_maximum_v16f16:
; GFX1170: ; %bb.0:
; GFX1170-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1170-NEXT: v_pk_maximum_f16 v0, v0, v8
; GFX1170-NEXT: v_pk_maximum_f16 v1, v1, v9
; GFX1170-NEXT: v_pk_maximum_f16 v2, v2, v10
; GFX1170-NEXT: v_pk_maximum_f16 v3, v3, v11
; GFX1170-NEXT: v_pk_maximum_f16 v4, v4, v12
; GFX1170-NEXT: v_pk_maximum_f16 v5, v5, v13
; GFX1170-NEXT: v_pk_maximum_f16 v6, v6, v14
; GFX1170-NEXT: v_pk_maximum_f16 v7, v7, v15
; GFX1170-NEXT: s_setpc_b64 s[30:31]
;
; GFX12-LABEL: v_maximum_v16f16:
; GFX12: ; %bb.0:
; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
@ -2774,5 +2989,3 @@ define <16 x half> @v_maximum_v16f16(<16 x half> %src0, <16 x half> %src1) {
%op = call <16 x half> @llvm.maximum.v16f16(<16 x half> %src0, <16 x half> %src1)
ret <16 x half> %op
}
;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
; GCN: {{.*}}

View File

@ -1,12 +1,12 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx703 < %s | FileCheck -check-prefixes=GCN,GFX7 %s
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx803 < %s | FileCheck -check-prefixes=GCN,GFX8 %s
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 < %s | FileCheck -check-prefixes=GCN,GFX9 %s
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 < %s | FileCheck -check-prefixes=GCN,GFX9,GFX900 %s
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx950 < %s | FileCheck -check-prefixes=GCN,GFX9,GFX950 %s
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1030 < %s | FileCheck -check-prefixes=GCN,GFX10 %s
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1100 < %s | FileCheck -check-prefixes=GCN,GFX11 %s
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1200 < %s | FileCheck -check-prefixes=GCN,GFX12 %s
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx703 < %s | FileCheck -check-prefixes=GFX7 %s
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx803 < %s | FileCheck -check-prefixes=GFX8 %s
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 < %s | FileCheck -check-prefixes=GFX9,GFX900 %s
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx950 < %s | FileCheck -check-prefixes=GFX9,GFX950 %s
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1030 < %s | FileCheck -check-prefixes=GFX10 %s
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1100 < %s | FileCheck -check-prefixes=GFX11 %s
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1170 < %s | FileCheck -check-prefixes=GFX1170 %s
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1200 < %s | FileCheck -check-prefixes=GFX12 %s
define float @v_maximum_f32(float %src0, float %src1) {
; GFX7-LABEL: v_maximum_f32:
@ -59,6 +59,12 @@ define float @v_maximum_f32(float %src0, float %src1) {
; GFX11-NEXT: v_cndmask_b32_e32 v0, 0x7fc00000, v2, vcc_lo
; GFX11-NEXT: s_setpc_b64 s[30:31]
;
; GFX1170-LABEL: v_maximum_f32:
; GFX1170: ; %bb.0:
; GFX1170-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1170-NEXT: v_maximum_f32 v0, v0, v1
; GFX1170-NEXT: s_setpc_b64 s[30:31]
;
; GFX12-LABEL: v_maximum_f32:
; GFX12: ; %bb.0:
; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
@ -103,6 +109,12 @@ define float @v_maximum_f32__nnan(float %src0, float %src1) {
; GFX11-NEXT: v_max_f32_e32 v0, v0, v1
; GFX11-NEXT: s_setpc_b64 s[30:31]
;
; GFX1170-LABEL: v_maximum_f32__nnan:
; GFX1170: ; %bb.0:
; GFX1170-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1170-NEXT: v_maximum_f32 v0, v0, v1
; GFX1170-NEXT: s_setpc_b64 s[30:31]
;
; GFX12-LABEL: v_maximum_f32__nnan:
; GFX12: ; %bb.0:
; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
@ -167,6 +179,12 @@ define float @v_maximum_f32__nsz(float %src0, float %src1) {
; GFX11-NEXT: v_cndmask_b32_e32 v0, 0x7fc00000, v2, vcc_lo
; GFX11-NEXT: s_setpc_b64 s[30:31]
;
; GFX1170-LABEL: v_maximum_f32__nsz:
; GFX1170: ; %bb.0:
; GFX1170-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1170-NEXT: v_maximum_f32 v0, v0, v1
; GFX1170-NEXT: s_setpc_b64 s[30:31]
;
; GFX12-LABEL: v_maximum_f32__nsz:
; GFX12: ; %bb.0:
; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
@ -211,6 +229,12 @@ define float @v_maximum_f32__nnan_nsz(float %src0, float %src1) {
; GFX11-NEXT: v_max_f32_e32 v0, v0, v1
; GFX11-NEXT: s_setpc_b64 s[30:31]
;
; GFX1170-LABEL: v_maximum_f32__nnan_nsz:
; GFX1170: ; %bb.0:
; GFX1170-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1170-NEXT: v_maximum_f32 v0, v0, v1
; GFX1170-NEXT: s_setpc_b64 s[30:31]
;
; GFX12-LABEL: v_maximum_f32__nnan_nsz:
; GFX12: ; %bb.0:
; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
@ -281,6 +305,14 @@ define float @v_maximum_f32__nnan_src0(float %arg0, float %src1) {
; GFX11-NEXT: v_cndmask_b32_e32 v0, 0x7fc00000, v2, vcc_lo
; GFX11-NEXT: s_setpc_b64 s[30:31]
;
; GFX1170-LABEL: v_maximum_f32__nnan_src0:
; GFX1170: ; %bb.0:
; GFX1170-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1170-NEXT: v_add_f32_e32 v0, 1.0, v0
; GFX1170-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX1170-NEXT: v_maximum_f32 v0, v0, v1
; GFX1170-NEXT: s_setpc_b64 s[30:31]
;
; GFX12-LABEL: v_maximum_f32__nnan_src0:
; GFX12: ; %bb.0:
; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
@ -354,6 +386,14 @@ define float @v_maximum_f32__nnan_src1(float %src0, float %arg1) {
; GFX11-NEXT: v_cndmask_b32_e32 v0, 0x7fc00000, v2, vcc_lo
; GFX11-NEXT: s_setpc_b64 s[30:31]
;
; GFX1170-LABEL: v_maximum_f32__nnan_src1:
; GFX1170: ; %bb.0:
; GFX1170-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1170-NEXT: v_add_f32_e32 v1, 1.0, v1
; GFX1170-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX1170-NEXT: v_maximum_f32 v0, v0, v1
; GFX1170-NEXT: s_setpc_b64 s[30:31]
;
; GFX12-LABEL: v_maximum_f32__nnan_src1:
; GFX12: ; %bb.0:
; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
@ -443,6 +483,15 @@ define void @s_maximum_f32(float inreg %src0, float inreg %src1) {
; GFX11-NEXT: ;;#ASMEND
; GFX11-NEXT: s_setpc_b64 s[30:31]
;
; GFX1170-LABEL: s_maximum_f32:
; GFX1170: ; %bb.0:
; GFX1170-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1170-NEXT: v_maximum_f32 v0, s0, s1
; GFX1170-NEXT: ;;#ASMSTART
; GFX1170-NEXT: ; use v0
; GFX1170-NEXT: ;;#ASMEND
; GFX1170-NEXT: s_setpc_b64 s[30:31]
;
; GFX12-LABEL: s_maximum_f32:
; GFX12: ; %bb.0:
; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
@ -527,6 +576,13 @@ define <2 x float> @v_maximum_v2f32(<2 x float> %src0, <2 x float> %src1) {
; GFX11-NEXT: v_cndmask_b32_e32 v1, 0x7fc00000, v5, vcc_lo
; GFX11-NEXT: s_setpc_b64 s[30:31]
;
; GFX1170-LABEL: v_maximum_v2f32:
; GFX1170: ; %bb.0:
; GFX1170-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1170-NEXT: v_maximum_f32 v0, v0, v2
; GFX1170-NEXT: v_maximum_f32 v1, v1, v3
; GFX1170-NEXT: s_setpc_b64 s[30:31]
;
; GFX12-LABEL: v_maximum_v2f32:
; GFX12: ; %bb.0:
; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
@ -576,6 +632,13 @@ define <2 x float> @v_maximum_v2f32__nnan(<2 x float> %src0, <2 x float> %src1)
; GFX11-NEXT: v_dual_max_f32 v0, v0, v2 :: v_dual_max_f32 v1, v1, v3
; GFX11-NEXT: s_setpc_b64 s[30:31]
;
; GFX1170-LABEL: v_maximum_v2f32__nnan:
; GFX1170: ; %bb.0:
; GFX1170-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1170-NEXT: v_maximum_f32 v0, v0, v2
; GFX1170-NEXT: v_maximum_f32 v1, v1, v3
; GFX1170-NEXT: s_setpc_b64 s[30:31]
;
; GFX12-LABEL: v_maximum_v2f32__nnan:
; GFX12: ; %bb.0:
; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
@ -656,6 +719,13 @@ define <2 x float> @v_maximum_v2f32__nsz(<2 x float> %src0, <2 x float> %src1) {
; GFX11-NEXT: v_cndmask_b32_e32 v1, 0x7fc00000, v5, vcc_lo
; GFX11-NEXT: s_setpc_b64 s[30:31]
;
; GFX1170-LABEL: v_maximum_v2f32__nsz:
; GFX1170: ; %bb.0:
; GFX1170-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1170-NEXT: v_maximum_f32 v0, v0, v2
; GFX1170-NEXT: v_maximum_f32 v1, v1, v3
; GFX1170-NEXT: s_setpc_b64 s[30:31]
;
; GFX12-LABEL: v_maximum_v2f32__nsz:
; GFX12: ; %bb.0:
; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
@ -705,6 +775,13 @@ define <2 x float> @v_maximum_v2f32__nnan_nsz(<2 x float> %src0, <2 x float> %sr
; GFX11-NEXT: v_dual_max_f32 v0, v0, v2 :: v_dual_max_f32 v1, v1, v3
; GFX11-NEXT: s_setpc_b64 s[30:31]
;
; GFX1170-LABEL: v_maximum_v2f32__nnan_nsz:
; GFX1170: ; %bb.0:
; GFX1170-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1170-NEXT: v_maximum_f32 v0, v0, v2
; GFX1170-NEXT: v_maximum_f32 v1, v1, v3
; GFX1170-NEXT: s_setpc_b64 s[30:31]
;
; GFX12-LABEL: v_maximum_v2f32__nnan_nsz:
; GFX12: ; %bb.0:
; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
@ -812,6 +889,16 @@ define void @s_maximum_v2f32(<2 x float> inreg %src0, <2 x float> inreg %src1) {
; GFX11-NEXT: ;;#ASMEND
; GFX11-NEXT: s_setpc_b64 s[30:31]
;
; GFX1170-LABEL: s_maximum_v2f32:
; GFX1170: ; %bb.0:
; GFX1170-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1170-NEXT: v_maximum_f32 v1, s1, s3
; GFX1170-NEXT: v_maximum_f32 v0, s0, s2
; GFX1170-NEXT: ;;#ASMSTART
; GFX1170-NEXT: ; use v[0:1]
; GFX1170-NEXT: ;;#ASMEND
; GFX1170-NEXT: s_setpc_b64 s[30:31]
;
; GFX12-LABEL: s_maximum_v2f32:
; GFX12: ; %bb.0:
; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
@ -913,6 +1000,14 @@ define <3 x float> @v_maximum_v3f32(<3 x float> %src0, <3 x float> %src1) {
; GFX11-NEXT: v_cndmask_b32_e32 v2, 0x7fc00000, v8, vcc_lo
; GFX11-NEXT: s_setpc_b64 s[30:31]
;
; GFX1170-LABEL: v_maximum_v3f32:
; GFX1170: ; %bb.0:
; GFX1170-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1170-NEXT: v_maximum_f32 v0, v0, v3
; GFX1170-NEXT: v_maximum_f32 v1, v1, v4
; GFX1170-NEXT: v_maximum_f32 v2, v2, v5
; GFX1170-NEXT: s_setpc_b64 s[30:31]
;
; GFX12-LABEL: v_maximum_v3f32:
; GFX12: ; %bb.0:
; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
@ -968,6 +1063,14 @@ define <3 x float> @v_maximum_v3f32__nnan(<3 x float> %src0, <3 x float> %src1)
; GFX11-NEXT: v_max_f32_e32 v2, v2, v5
; GFX11-NEXT: s_setpc_b64 s[30:31]
;
; GFX1170-LABEL: v_maximum_v3f32__nnan:
; GFX1170: ; %bb.0:
; GFX1170-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1170-NEXT: v_maximum_f32 v0, v0, v3
; GFX1170-NEXT: v_maximum_f32 v1, v1, v4
; GFX1170-NEXT: v_maximum_f32 v2, v2, v5
; GFX1170-NEXT: s_setpc_b64 s[30:31]
;
; GFX12-LABEL: v_maximum_v3f32__nnan:
; GFX12: ; %bb.0:
; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
@ -1065,6 +1168,14 @@ define <3 x float> @v_maximum_v3f32__nsz(<3 x float> %src0, <3 x float> %src1) {
; GFX11-NEXT: v_cndmask_b32_e32 v2, 0x7fc00000, v8, vcc_lo
; GFX11-NEXT: s_setpc_b64 s[30:31]
;
; GFX1170-LABEL: v_maximum_v3f32__nsz:
; GFX1170: ; %bb.0:
; GFX1170-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1170-NEXT: v_maximum_f32 v0, v0, v3
; GFX1170-NEXT: v_maximum_f32 v1, v1, v4
; GFX1170-NEXT: v_maximum_f32 v2, v2, v5
; GFX1170-NEXT: s_setpc_b64 s[30:31]
;
; GFX12-LABEL: v_maximum_v3f32__nsz:
; GFX12: ; %bb.0:
; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
@ -1120,6 +1231,14 @@ define <3 x float> @v_maximum_v3f32__nnan_nsz(<3 x float> %src0, <3 x float> %sr
; GFX11-NEXT: v_max_f32_e32 v2, v2, v5
; GFX11-NEXT: s_setpc_b64 s[30:31]
;
; GFX1170-LABEL: v_maximum_v3f32__nnan_nsz:
; GFX1170: ; %bb.0:
; GFX1170-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1170-NEXT: v_maximum_f32 v0, v0, v3
; GFX1170-NEXT: v_maximum_f32 v1, v1, v4
; GFX1170-NEXT: v_maximum_f32 v2, v2, v5
; GFX1170-NEXT: s_setpc_b64 s[30:31]
;
; GFX12-LABEL: v_maximum_v3f32__nnan_nsz:
; GFX12: ; %bb.0:
; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
@ -1233,6 +1352,15 @@ define <4 x float> @v_maximum_v4f32(<4 x float> %src0, <4 x float> %src1) {
; GFX11-NEXT: v_cndmask_b32_e32 v3, 0x7fc00000, v8, vcc_lo
; GFX11-NEXT: s_setpc_b64 s[30:31]
;
; GFX1170-LABEL: v_maximum_v4f32:
; GFX1170: ; %bb.0:
; GFX1170-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1170-NEXT: v_maximum_f32 v0, v0, v4
; GFX1170-NEXT: v_maximum_f32 v1, v1, v5
; GFX1170-NEXT: v_maximum_f32 v2, v2, v6
; GFX1170-NEXT: v_maximum_f32 v3, v3, v7
; GFX1170-NEXT: s_setpc_b64 s[30:31]
;
; GFX12-LABEL: v_maximum_v4f32:
; GFX12: ; %bb.0:
; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
@ -1293,6 +1421,15 @@ define <4 x float> @v_maximum_v4f32__nnan(<4 x float> %src0, <4 x float> %src1)
; GFX11-NEXT: v_dual_max_f32 v2, v2, v6 :: v_dual_max_f32 v3, v3, v7
; GFX11-NEXT: s_setpc_b64 s[30:31]
;
; GFX1170-LABEL: v_maximum_v4f32__nnan:
; GFX1170: ; %bb.0:
; GFX1170-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1170-NEXT: v_maximum_f32 v0, v0, v4
; GFX1170-NEXT: v_maximum_f32 v1, v1, v5
; GFX1170-NEXT: v_maximum_f32 v2, v2, v6
; GFX1170-NEXT: v_maximum_f32 v3, v3, v7
; GFX1170-NEXT: s_setpc_b64 s[30:31]
;
; GFX12-LABEL: v_maximum_v4f32__nnan:
; GFX12: ; %bb.0:
; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
@ -1407,6 +1544,15 @@ define <4 x float> @v_maximum_v4f32__nsz(<4 x float> %src0, <4 x float> %src1) {
; GFX11-NEXT: v_cndmask_b32_e32 v3, 0x7fc00000, v8, vcc_lo
; GFX11-NEXT: s_setpc_b64 s[30:31]
;
; GFX1170-LABEL: v_maximum_v4f32__nsz:
; GFX1170: ; %bb.0:
; GFX1170-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1170-NEXT: v_maximum_f32 v0, v0, v4
; GFX1170-NEXT: v_maximum_f32 v1, v1, v5
; GFX1170-NEXT: v_maximum_f32 v2, v2, v6
; GFX1170-NEXT: v_maximum_f32 v3, v3, v7
; GFX1170-NEXT: s_setpc_b64 s[30:31]
;
; GFX12-LABEL: v_maximum_v4f32__nsz:
; GFX12: ; %bb.0:
; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
@ -1467,6 +1613,15 @@ define <4 x float> @v_maximum_v4f32__nnan_nsz(<4 x float> %src0, <4 x float> %sr
; GFX11-NEXT: v_dual_max_f32 v2, v2, v6 :: v_dual_max_f32 v3, v3, v7
; GFX11-NEXT: s_setpc_b64 s[30:31]
;
; GFX1170-LABEL: v_maximum_v4f32__nnan_nsz:
; GFX1170: ; %bb.0:
; GFX1170-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1170-NEXT: v_maximum_f32 v0, v0, v4
; GFX1170-NEXT: v_maximum_f32 v1, v1, v5
; GFX1170-NEXT: v_maximum_f32 v2, v2, v6
; GFX1170-NEXT: v_maximum_f32 v3, v3, v7
; GFX1170-NEXT: s_setpc_b64 s[30:31]
;
; GFX12-LABEL: v_maximum_v4f32__nnan_nsz:
; GFX12: ; %bb.0:
; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
@ -1643,6 +1798,19 @@ define <8 x float> @v_maximum_v8f32(<8 x float> %src0, <8 x float> %src1) {
; GFX11-NEXT: v_cndmask_b32_e32 v7, 0x7fc00000, v10, vcc_lo
; GFX11-NEXT: s_setpc_b64 s[30:31]
;
; GFX1170-LABEL: v_maximum_v8f32:
; GFX1170: ; %bb.0:
; GFX1170-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1170-NEXT: v_maximum_f32 v0, v0, v8
; GFX1170-NEXT: v_maximum_f32 v1, v1, v9
; GFX1170-NEXT: v_maximum_f32 v2, v2, v10
; GFX1170-NEXT: v_maximum_f32 v3, v3, v11
; GFX1170-NEXT: v_maximum_f32 v4, v4, v12
; GFX1170-NEXT: v_maximum_f32 v5, v5, v13
; GFX1170-NEXT: v_maximum_f32 v6, v6, v14
; GFX1170-NEXT: v_maximum_f32 v7, v7, v15
; GFX1170-NEXT: s_setpc_b64 s[30:31]
;
; GFX12-LABEL: v_maximum_v8f32:
; GFX12: ; %bb.0:
; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
@ -1960,6 +2128,29 @@ define <16 x float> @v_maximum_v16f32(<16 x float> %src0, <16 x float> %src1) {
; GFX11-NEXT: v_cndmask_b32_e32 v15, 0x7fc00000, v16, vcc_lo
; GFX11-NEXT: s_setpc_b64 s[30:31]
;
; GFX1170-LABEL: v_maximum_v16f32:
; GFX1170: ; %bb.0:
; GFX1170-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1170-NEXT: scratch_load_b32 v31, off, s32
; GFX1170-NEXT: v_maximum_f32 v0, v0, v16
; GFX1170-NEXT: v_maximum_f32 v1, v1, v17
; GFX1170-NEXT: v_maximum_f32 v2, v2, v18
; GFX1170-NEXT: v_maximum_f32 v3, v3, v19
; GFX1170-NEXT: v_maximum_f32 v4, v4, v20
; GFX1170-NEXT: v_maximum_f32 v5, v5, v21
; GFX1170-NEXT: v_maximum_f32 v6, v6, v22
; GFX1170-NEXT: v_maximum_f32 v7, v7, v23
; GFX1170-NEXT: v_maximum_f32 v8, v8, v24
; GFX1170-NEXT: v_maximum_f32 v9, v9, v25
; GFX1170-NEXT: v_maximum_f32 v10, v10, v26
; GFX1170-NEXT: v_maximum_f32 v11, v11, v27
; GFX1170-NEXT: v_maximum_f32 v12, v12, v28
; GFX1170-NEXT: v_maximum_f32 v13, v13, v29
; GFX1170-NEXT: v_maximum_f32 v14, v14, v30
; GFX1170-NEXT: s_waitcnt vmcnt(0)
; GFX1170-NEXT: v_maximum_f32 v15, v15, v31
; GFX1170-NEXT: s_setpc_b64 s[30:31]
;
; GFX12-LABEL: v_maximum_v16f32:
; GFX12: ; %bb.0:
; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
@ -1989,5 +2180,3 @@ define <16 x float> @v_maximum_v16f32(<16 x float> %src0, <16 x float> %src1) {
%op = call <16 x float> @llvm.maximum.v16f32(<16 x float> %src0, <16 x float> %src1)
ret <16 x float> %op
}
;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
; GCN: {{.*}}

View File

@ -1,12 +1,12 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx703 < %s | FileCheck -check-prefixes=GCN,GFX7 %s
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx803 < %s | FileCheck -check-prefixes=GCN,GFX8 %s
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 < %s | FileCheck -check-prefixes=GCN,GFX9 %s
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 < %s | FileCheck -check-prefixes=GCN,GFX9,GFX900 %s
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx950 < %s | FileCheck -check-prefixes=GCN,GFX9,GFX950 %s
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1030 < %s | FileCheck -check-prefixes=GCN,GFX10 %s
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1100 < %s | FileCheck -check-prefixes=GCN,GFX11 %s
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1200 < %s | FileCheck -check-prefixes=GCN,GFX12 %s
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx703 < %s | FileCheck -check-prefixes=GFX7 %s
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx803 < %s | FileCheck -check-prefixes=GFX8 %s
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 < %s | FileCheck -check-prefixes=GFX9,GFX900 %s
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx950 < %s | FileCheck -check-prefixes=GFX9,GFX950 %s
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1030 < %s | FileCheck -check-prefixes=GFX10 %s
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1100 < %s | FileCheck -check-prefixes=GFX11 %s
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1170 < %s | FileCheck -check-prefixes=GFX1170 %s
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1200 < %s | FileCheck -check-prefixes=GFX12 %s
define double @v_maximum_f64(double %src0, double %src1) {
; GFX7-LABEL: v_maximum_f64:
@ -69,6 +69,12 @@ define double @v_maximum_f64(double %src0, double %src1) {
; GFX11-NEXT: v_cndmask_b32_e64 v1, v5, 0x7ff80000, vcc_lo
; GFX11-NEXT: s_setpc_b64 s[30:31]
;
; GFX1170-LABEL: v_maximum_f64:
; GFX1170: ; %bb.0:
; GFX1170-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1170-NEXT: v_maximum_f64 v[0:1], v[0:1], v[2:3]
; GFX1170-NEXT: s_setpc_b64 s[30:31]
;
; GFX12-LABEL: v_maximum_f64:
; GFX12: ; %bb.0:
; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
@ -113,6 +119,12 @@ define double @v_maximum_f64__nnan(double %src0, double %src1) {
; GFX11-NEXT: v_max_f64 v[0:1], v[0:1], v[2:3]
; GFX11-NEXT: s_setpc_b64 s[30:31]
;
; GFX1170-LABEL: v_maximum_f64__nnan:
; GFX1170: ; %bb.0:
; GFX1170-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1170-NEXT: v_maximum_f64 v[0:1], v[0:1], v[2:3]
; GFX1170-NEXT: s_setpc_b64 s[30:31]
;
; GFX12-LABEL: v_maximum_f64__nnan:
; GFX12: ; %bb.0:
; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
@ -187,6 +199,12 @@ define double @v_maximum_f64__nsz(double %src0, double %src1) {
; GFX11-NEXT: v_cndmask_b32_e64 v1, v5, 0x7ff80000, vcc_lo
; GFX11-NEXT: s_setpc_b64 s[30:31]
;
; GFX1170-LABEL: v_maximum_f64__nsz:
; GFX1170: ; %bb.0:
; GFX1170-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1170-NEXT: v_maximum_f64 v[0:1], v[0:1], v[2:3]
; GFX1170-NEXT: s_setpc_b64 s[30:31]
;
; GFX12-LABEL: v_maximum_f64__nsz:
; GFX12: ; %bb.0:
; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
@ -231,6 +249,12 @@ define double @v_maximum_f64__nnan_nsz(double %src0, double %src1) {
; GFX11-NEXT: v_max_f64 v[0:1], v[0:1], v[2:3]
; GFX11-NEXT: s_setpc_b64 s[30:31]
;
; GFX1170-LABEL: v_maximum_f64__nnan_nsz:
; GFX1170: ; %bb.0:
; GFX1170-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1170-NEXT: v_maximum_f64 v[0:1], v[0:1], v[2:3]
; GFX1170-NEXT: s_setpc_b64 s[30:31]
;
; GFX12-LABEL: v_maximum_f64__nnan_nsz:
; GFX12: ; %bb.0:
; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
@ -312,6 +336,14 @@ define double @v_maximum_f64__nnan_src0(double %arg0, double %src1) {
; GFX11-NEXT: v_cndmask_b32_e64 v1, v5, 0x7ff80000, vcc_lo
; GFX11-NEXT: s_setpc_b64 s[30:31]
;
; GFX1170-LABEL: v_maximum_f64__nnan_src0:
; GFX1170: ; %bb.0:
; GFX1170-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1170-NEXT: v_add_f64 v[0:1], v[0:1], 1.0
; GFX1170-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX1170-NEXT: v_maximum_f64 v[0:1], v[0:1], v[2:3]
; GFX1170-NEXT: s_setpc_b64 s[30:31]
;
; GFX12-LABEL: v_maximum_f64__nnan_src0:
; GFX12: ; %bb.0:
; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
@ -396,6 +428,14 @@ define double @v_maximum_f64__nnan_src1(double %src0, double %arg1) {
; GFX11-NEXT: v_cndmask_b32_e64 v1, v5, 0x7ff80000, vcc_lo
; GFX11-NEXT: s_setpc_b64 s[30:31]
;
; GFX1170-LABEL: v_maximum_f64__nnan_src1:
; GFX1170: ; %bb.0:
; GFX1170-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1170-NEXT: v_add_f64 v[2:3], v[2:3], 1.0
; GFX1170-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX1170-NEXT: v_maximum_f64 v[0:1], v[0:1], v[2:3]
; GFX1170-NEXT: s_setpc_b64 s[30:31]
;
; GFX12-LABEL: v_maximum_f64__nnan_src1:
; GFX12: ; %bb.0:
; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
@ -498,6 +538,15 @@ define void @s_maximum_f64(double inreg %src0, double inreg %src1) {
; GFX11-NEXT: ;;#ASMEND
; GFX11-NEXT: s_setpc_b64 s[30:31]
;
; GFX1170-LABEL: s_maximum_f64:
; GFX1170: ; %bb.0:
; GFX1170-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1170-NEXT: v_maximum_f64 v[0:1], s[0:1], s[2:3]
; GFX1170-NEXT: ;;#ASMSTART
; GFX1170-NEXT: ; use v[0:1]
; GFX1170-NEXT: ;;#ASMEND
; GFX1170-NEXT: s_setpc_b64 s[30:31]
;
; GFX12-LABEL: s_maximum_f64:
; GFX12: ; %bb.0:
; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
@ -601,6 +650,13 @@ define <2 x double> @v_maximum_v2f64(<2 x double> %src0, <2 x double> %src1) {
; GFX11-NEXT: v_cndmask_b32_e64 v3, v5, 0x7ff80000, s0
; GFX11-NEXT: s_setpc_b64 s[30:31]
;
; GFX1170-LABEL: v_maximum_v2f64:
; GFX1170: ; %bb.0:
; GFX1170-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1170-NEXT: v_maximum_f64 v[0:1], v[0:1], v[4:5]
; GFX1170-NEXT: v_maximum_f64 v[2:3], v[2:3], v[6:7]
; GFX1170-NEXT: s_setpc_b64 s[30:31]
;
; GFX12-LABEL: v_maximum_v2f64:
; GFX12: ; %bb.0:
; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
@ -651,6 +707,13 @@ define <2 x double> @v_maximum_v2f64__nnan(<2 x double> %src0, <2 x double> %src
; GFX11-NEXT: v_max_f64 v[2:3], v[2:3], v[6:7]
; GFX11-NEXT: s_setpc_b64 s[30:31]
;
; GFX1170-LABEL: v_maximum_v2f64__nnan:
; GFX1170: ; %bb.0:
; GFX1170-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1170-NEXT: v_maximum_f64 v[0:1], v[0:1], v[4:5]
; GFX1170-NEXT: v_maximum_f64 v[2:3], v[2:3], v[6:7]
; GFX1170-NEXT: s_setpc_b64 s[30:31]
;
; GFX12-LABEL: v_maximum_v2f64__nnan:
; GFX12: ; %bb.0:
; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
@ -751,6 +814,13 @@ define <2 x double> @v_maximum_v2f64__nsz(<2 x double> %src0, <2 x double> %src1
; GFX11-NEXT: v_cndmask_b32_e64 v3, v5, 0x7ff80000, s0
; GFX11-NEXT: s_setpc_b64 s[30:31]
;
; GFX1170-LABEL: v_maximum_v2f64__nsz:
; GFX1170: ; %bb.0:
; GFX1170-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1170-NEXT: v_maximum_f64 v[0:1], v[0:1], v[4:5]
; GFX1170-NEXT: v_maximum_f64 v[2:3], v[2:3], v[6:7]
; GFX1170-NEXT: s_setpc_b64 s[30:31]
;
; GFX12-LABEL: v_maximum_v2f64__nsz:
; GFX12: ; %bb.0:
; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
@ -801,6 +871,13 @@ define <2 x double> @v_maximum_v2f64__nnan_nsz(<2 x double> %src0, <2 x double>
; GFX11-NEXT: v_max_f64 v[2:3], v[2:3], v[6:7]
; GFX11-NEXT: s_setpc_b64 s[30:31]
;
; GFX1170-LABEL: v_maximum_v2f64__nnan_nsz:
; GFX1170: ; %bb.0:
; GFX1170-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1170-NEXT: v_maximum_f64 v[0:1], v[0:1], v[4:5]
; GFX1170-NEXT: v_maximum_f64 v[2:3], v[2:3], v[6:7]
; GFX1170-NEXT: s_setpc_b64 s[30:31]
;
; GFX12-LABEL: v_maximum_v2f64__nnan_nsz:
; GFX12: ; %bb.0:
; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
@ -932,6 +1009,16 @@ define void @s_maximum_v2f64(<2 x double> inreg %src0, <2 x double> inreg %src1)
; GFX11-NEXT: ;;#ASMEND
; GFX11-NEXT: s_setpc_b64 s[30:31]
;
; GFX1170-LABEL: s_maximum_v2f64:
; GFX1170: ; %bb.0:
; GFX1170-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1170-NEXT: v_maximum_f64 v[2:3], s[2:3], s[18:19]
; GFX1170-NEXT: v_maximum_f64 v[0:1], s[0:1], s[16:17]
; GFX1170-NEXT: ;;#ASMSTART
; GFX1170-NEXT: ; use v[0:3]
; GFX1170-NEXT: ;;#ASMEND
; GFX1170-NEXT: s_setpc_b64 s[30:31]
;
; GFX12-LABEL: s_maximum_v2f64:
; GFX12: ; %bb.0:
; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
@ -1060,6 +1147,14 @@ define <3 x double> @v_maximum_v3f64(<3 x double> %src0, <3 x double> %src1) {
; GFX11-NEXT: v_cndmask_b32_e64 v5, v9, 0x7ff80000, s1
; GFX11-NEXT: s_setpc_b64 s[30:31]
;
; GFX1170-LABEL: v_maximum_v3f64:
; GFX1170: ; %bb.0:
; GFX1170-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1170-NEXT: v_maximum_f64 v[0:1], v[0:1], v[6:7]
; GFX1170-NEXT: v_maximum_f64 v[2:3], v[2:3], v[8:9]
; GFX1170-NEXT: v_maximum_f64 v[4:5], v[4:5], v[10:11]
; GFX1170-NEXT: s_setpc_b64 s[30:31]
;
; GFX12-LABEL: v_maximum_v3f64:
; GFX12: ; %bb.0:
; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
@ -1116,6 +1211,14 @@ define <3 x double> @v_maximum_v3f64__nnan(<3 x double> %src0, <3 x double> %src
; GFX11-NEXT: v_max_f64 v[4:5], v[4:5], v[10:11]
; GFX11-NEXT: s_setpc_b64 s[30:31]
;
; GFX1170-LABEL: v_maximum_v3f64__nnan:
; GFX1170: ; %bb.0:
; GFX1170-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1170-NEXT: v_maximum_f64 v[0:1], v[0:1], v[6:7]
; GFX1170-NEXT: v_maximum_f64 v[2:3], v[2:3], v[8:9]
; GFX1170-NEXT: v_maximum_f64 v[4:5], v[4:5], v[10:11]
; GFX1170-NEXT: s_setpc_b64 s[30:31]
;
; GFX12-LABEL: v_maximum_v3f64__nnan:
; GFX12: ; %bb.0:
; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
@ -1241,6 +1344,14 @@ define <3 x double> @v_maximum_v3f64__nsz(<3 x double> %src0, <3 x double> %src1
; GFX11-NEXT: v_cndmask_b32_e64 v5, v9, 0x7ff80000, s1
; GFX11-NEXT: s_setpc_b64 s[30:31]
;
; GFX1170-LABEL: v_maximum_v3f64__nsz:
; GFX1170: ; %bb.0:
; GFX1170-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1170-NEXT: v_maximum_f64 v[0:1], v[0:1], v[6:7]
; GFX1170-NEXT: v_maximum_f64 v[2:3], v[2:3], v[8:9]
; GFX1170-NEXT: v_maximum_f64 v[4:5], v[4:5], v[10:11]
; GFX1170-NEXT: s_setpc_b64 s[30:31]
;
; GFX12-LABEL: v_maximum_v3f64__nsz:
; GFX12: ; %bb.0:
; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
@ -1297,6 +1408,14 @@ define <3 x double> @v_maximum_v3f64__nnan_nsz(<3 x double> %src0, <3 x double>
; GFX11-NEXT: v_max_f64 v[4:5], v[4:5], v[10:11]
; GFX11-NEXT: s_setpc_b64 s[30:31]
;
; GFX1170-LABEL: v_maximum_v3f64__nnan_nsz:
; GFX1170: ; %bb.0:
; GFX1170-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1170-NEXT: v_maximum_f64 v[0:1], v[0:1], v[6:7]
; GFX1170-NEXT: v_maximum_f64 v[2:3], v[2:3], v[8:9]
; GFX1170-NEXT: v_maximum_f64 v[4:5], v[4:5], v[10:11]
; GFX1170-NEXT: s_setpc_b64 s[30:31]
;
; GFX12-LABEL: v_maximum_v3f64__nnan_nsz:
; GFX12: ; %bb.0:
; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
@ -1447,6 +1566,15 @@ define <4 x double> @v_maximum_v4f64(<4 x double> %src0, <4 x double> %src1) {
; GFX11-NEXT: v_cndmask_b32_e64 v7, v13, 0x7ff80000, s2
; GFX11-NEXT: s_setpc_b64 s[30:31]
;
; GFX1170-LABEL: v_maximum_v4f64:
; GFX1170: ; %bb.0:
; GFX1170-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1170-NEXT: v_maximum_f64 v[0:1], v[0:1], v[8:9]
; GFX1170-NEXT: v_maximum_f64 v[2:3], v[2:3], v[10:11]
; GFX1170-NEXT: v_maximum_f64 v[4:5], v[4:5], v[12:13]
; GFX1170-NEXT: v_maximum_f64 v[6:7], v[6:7], v[14:15]
; GFX1170-NEXT: s_setpc_b64 s[30:31]
;
; GFX12-LABEL: v_maximum_v4f64:
; GFX12: ; %bb.0:
; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
@ -1509,6 +1637,15 @@ define <4 x double> @v_maximum_v4f64__nnan(<4 x double> %src0, <4 x double> %src
; GFX11-NEXT: v_max_f64 v[6:7], v[6:7], v[14:15]
; GFX11-NEXT: s_setpc_b64 s[30:31]
;
; GFX1170-LABEL: v_maximum_v4f64__nnan:
; GFX1170: ; %bb.0:
; GFX1170-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1170-NEXT: v_maximum_f64 v[0:1], v[0:1], v[8:9]
; GFX1170-NEXT: v_maximum_f64 v[2:3], v[2:3], v[10:11]
; GFX1170-NEXT: v_maximum_f64 v[4:5], v[4:5], v[12:13]
; GFX1170-NEXT: v_maximum_f64 v[6:7], v[6:7], v[14:15]
; GFX1170-NEXT: s_setpc_b64 s[30:31]
;
; GFX12-LABEL: v_maximum_v4f64__nnan:
; GFX12: ; %bb.0:
; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
@ -1660,6 +1797,15 @@ define <4 x double> @v_maximum_v4f64__nsz(<4 x double> %src0, <4 x double> %src1
; GFX11-NEXT: v_cndmask_b32_e64 v7, v13, 0x7ff80000, s2
; GFX11-NEXT: s_setpc_b64 s[30:31]
;
; GFX1170-LABEL: v_maximum_v4f64__nsz:
; GFX1170: ; %bb.0:
; GFX1170-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1170-NEXT: v_maximum_f64 v[0:1], v[0:1], v[8:9]
; GFX1170-NEXT: v_maximum_f64 v[2:3], v[2:3], v[10:11]
; GFX1170-NEXT: v_maximum_f64 v[4:5], v[4:5], v[12:13]
; GFX1170-NEXT: v_maximum_f64 v[6:7], v[6:7], v[14:15]
; GFX1170-NEXT: s_setpc_b64 s[30:31]
;
; GFX12-LABEL: v_maximum_v4f64__nsz:
; GFX12: ; %bb.0:
; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
@ -1722,6 +1868,15 @@ define <4 x double> @v_maximum_v4f64__nnan_nsz(<4 x double> %src0, <4 x double>
; GFX11-NEXT: v_max_f64 v[6:7], v[6:7], v[14:15]
; GFX11-NEXT: s_setpc_b64 s[30:31]
;
; GFX1170-LABEL: v_maximum_v4f64__nnan_nsz:
; GFX1170: ; %bb.0:
; GFX1170-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1170-NEXT: v_maximum_f64 v[0:1], v[0:1], v[8:9]
; GFX1170-NEXT: v_maximum_f64 v[2:3], v[2:3], v[10:11]
; GFX1170-NEXT: v_maximum_f64 v[4:5], v[4:5], v[12:13]
; GFX1170-NEXT: v_maximum_f64 v[6:7], v[6:7], v[14:15]
; GFX1170-NEXT: s_setpc_b64 s[30:31]
;
; GFX12-LABEL: v_maximum_v4f64__nnan_nsz:
; GFX12: ; %bb.0:
; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
@ -1982,6 +2137,21 @@ define <8 x double> @v_maximum_v8f64(<8 x double> %src0, <8 x double> %src1) {
; GFX11-NEXT: v_cndmask_b32_e64 v15, v29, 0x7ff80000, s6
; GFX11-NEXT: s_setpc_b64 s[30:31]
;
; GFX1170-LABEL: v_maximum_v8f64:
; GFX1170: ; %bb.0:
; GFX1170-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1170-NEXT: scratch_load_b32 v31, off, s32
; GFX1170-NEXT: v_maximum_f64 v[0:1], v[0:1], v[16:17]
; GFX1170-NEXT: v_maximum_f64 v[2:3], v[2:3], v[18:19]
; GFX1170-NEXT: v_maximum_f64 v[4:5], v[4:5], v[20:21]
; GFX1170-NEXT: v_maximum_f64 v[6:7], v[6:7], v[22:23]
; GFX1170-NEXT: v_maximum_f64 v[8:9], v[8:9], v[24:25]
; GFX1170-NEXT: v_maximum_f64 v[10:11], v[10:11], v[26:27]
; GFX1170-NEXT: v_maximum_f64 v[12:13], v[12:13], v[28:29]
; GFX1170-NEXT: s_waitcnt vmcnt(0)
; GFX1170-NEXT: v_maximum_f64 v[14:15], v[14:15], v[30:31]
; GFX1170-NEXT: s_setpc_b64 s[30:31]
;
; GFX12-LABEL: v_maximum_v8f64:
; GFX12: ; %bb.0:
; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
@ -2757,6 +2927,79 @@ define <16 x double> @v_maximum_v16f64(<16 x double> %src0, <16 x double> %src1)
; GFX11-NEXT: v_cndmask_b32_e64 v31, v85, 0x7ff80000, s14
; GFX11-NEXT: s_setpc_b64 s[30:31]
;
; GFX1170-LABEL: v_maximum_v16f64:
; GFX1170: ; %bb.0:
; GFX1170-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1170-NEXT: s_clause 0x1b
; GFX1170-NEXT: scratch_load_b32 v33, off, s32 offset:8
; GFX1170-NEXT: scratch_load_b32 v32, off, s32 offset:4
; GFX1170-NEXT: scratch_load_b32 v35, off, s32 offset:16
; GFX1170-NEXT: scratch_load_b32 v34, off, s32 offset:12
; GFX1170-NEXT: scratch_load_b32 v31, off, s32
; GFX1170-NEXT: scratch_load_b32 v37, off, s32 offset:120
; GFX1170-NEXT: scratch_load_b32 v39, off, s32 offset:104
; GFX1170-NEXT: scratch_load_b32 v49, off, s32 offset:24
; GFX1170-NEXT: scratch_load_b32 v48, off, s32 offset:20
; GFX1170-NEXT: scratch_load_b32 v51, off, s32 offset:32
; GFX1170-NEXT: scratch_load_b32 v50, off, s32 offset:28
; GFX1170-NEXT: scratch_load_b32 v53, off, s32 offset:40
; GFX1170-NEXT: scratch_load_b32 v52, off, s32 offset:36
; GFX1170-NEXT: scratch_load_b32 v55, off, s32 offset:48
; GFX1170-NEXT: scratch_load_b32 v54, off, s32 offset:44
; GFX1170-NEXT: scratch_load_b32 v65, off, s32 offset:56
; GFX1170-NEXT: scratch_load_b32 v64, off, s32 offset:52
; GFX1170-NEXT: scratch_load_b32 v67, off, s32 offset:64
; GFX1170-NEXT: scratch_load_b32 v66, off, s32 offset:60
; GFX1170-NEXT: scratch_load_b32 v69, off, s32 offset:72
; GFX1170-NEXT: scratch_load_b32 v68, off, s32 offset:68
; GFX1170-NEXT: scratch_load_b32 v71, off, s32 offset:80
; GFX1170-NEXT: scratch_load_b32 v70, off, s32 offset:76
; GFX1170-NEXT: scratch_load_b32 v81, off, s32 offset:88
; GFX1170-NEXT: scratch_load_b32 v80, off, s32 offset:84
; GFX1170-NEXT: scratch_load_b32 v83, off, s32 offset:96
; GFX1170-NEXT: scratch_load_b32 v82, off, s32 offset:92
; GFX1170-NEXT: scratch_load_b32 v38, off, s32 offset:100
; GFX1170-NEXT: s_waitcnt vmcnt(26)
; GFX1170-NEXT: v_maximum_f64 v[0:1], v[0:1], v[32:33]
; GFX1170-NEXT: s_clause 0x2
; GFX1170-NEXT: scratch_load_b32 v33, off, s32 offset:112
; GFX1170-NEXT: scratch_load_b32 v32, off, s32 offset:108
; GFX1170-NEXT: scratch_load_b32 v36, off, s32 offset:116
; GFX1170-NEXT: s_waitcnt vmcnt(27)
; GFX1170-NEXT: v_maximum_f64 v[2:3], v[2:3], v[34:35]
; GFX1170-NEXT: s_clause 0x1
; GFX1170-NEXT: scratch_load_b32 v35, off, s32 offset:128
; GFX1170-NEXT: scratch_load_b32 v34, off, s32 offset:124
; GFX1170-NEXT: s_waitcnt vmcnt(24)
; GFX1170-NEXT: v_maximum_f64 v[4:5], v[4:5], v[48:49]
; GFX1170-NEXT: s_waitcnt vmcnt(22)
; GFX1170-NEXT: v_maximum_f64 v[6:7], v[6:7], v[50:51]
; GFX1170-NEXT: s_waitcnt vmcnt(20)
; GFX1170-NEXT: v_maximum_f64 v[8:9], v[8:9], v[52:53]
; GFX1170-NEXT: s_waitcnt vmcnt(18)
; GFX1170-NEXT: v_maximum_f64 v[10:11], v[10:11], v[54:55]
; GFX1170-NEXT: s_waitcnt vmcnt(16)
; GFX1170-NEXT: v_maximum_f64 v[12:13], v[12:13], v[64:65]
; GFX1170-NEXT: s_waitcnt vmcnt(14)
; GFX1170-NEXT: v_maximum_f64 v[14:15], v[14:15], v[66:67]
; GFX1170-NEXT: s_waitcnt vmcnt(12)
; GFX1170-NEXT: v_maximum_f64 v[16:17], v[16:17], v[68:69]
; GFX1170-NEXT: s_waitcnt vmcnt(10)
; GFX1170-NEXT: v_maximum_f64 v[18:19], v[18:19], v[70:71]
; GFX1170-NEXT: s_waitcnt vmcnt(8)
; GFX1170-NEXT: v_maximum_f64 v[20:21], v[20:21], v[80:81]
; GFX1170-NEXT: s_waitcnt vmcnt(6)
; GFX1170-NEXT: v_maximum_f64 v[22:23], v[22:23], v[82:83]
; GFX1170-NEXT: s_waitcnt vmcnt(5)
; GFX1170-NEXT: v_maximum_f64 v[24:25], v[24:25], v[38:39]
; GFX1170-NEXT: s_waitcnt vmcnt(3)
; GFX1170-NEXT: v_maximum_f64 v[26:27], v[26:27], v[32:33]
; GFX1170-NEXT: s_waitcnt vmcnt(2)
; GFX1170-NEXT: v_maximum_f64 v[28:29], v[28:29], v[36:37]
; GFX1170-NEXT: s_waitcnt vmcnt(0)
; GFX1170-NEXT: v_maximum_f64 v[30:31], v[30:31], v[34:35]
; GFX1170-NEXT: s_setpc_b64 s[30:31]
;
; GFX12-LABEL: v_maximum_v16f64:
; GFX12: ; %bb.0:
; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
@ -2834,5 +3077,3 @@ define <16 x double> @v_maximum_v16f64(<16 x double> %src0, <16 x double> %src1)
%op = call <16 x double> @llvm.maximum.v16f64(<16 x double> %src0, <16 x double> %src1)
ret <16 x double> %op
}
;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
; GCN: {{.*}}

View File

@ -1,14 +1,15 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; xUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx703 < %s | FileCheck -check-prefixes=GCN,GFX7 %s
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx803 < %s | FileCheck -check-prefixes=GCN,GFX8 %s
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 < %s | FileCheck -check-prefixes=GCN,GFX9 %s
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 < %s | FileCheck -check-prefixes=GCN,GFX9,GFX900 %s
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx950 < %s | FileCheck -check-prefixes=GCN,GFX9,GFX950 %s
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1030 < %s | FileCheck -check-prefixes=GCN,GFX10 %s
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1100 -mattr=+real-true16 < %s | FileCheck -check-prefixes=GCN,GFX11,GFX11-TRUE16 %s
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1100 -mattr=-real-true16 < %s | FileCheck -check-prefixes=GCN,GFX11,GFX11-FAKE16 %s
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1200 -mattr=+real-true16 < %s | FileCheck -check-prefixes=GCN,GFX12,GFX12-TRUE16 %s
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1200 -mattr=-real-true16 < %s | FileCheck -check-prefixes=GCN,GFX12,GFX12-FAKE16 %s
; xUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx703 < %s | FileCheck -check-prefixes=GFX7 %s
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx803 < %s | FileCheck -check-prefixes=GFX8 %s
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 < %s | FileCheck -check-prefixes=GFX9,GFX900 %s
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx950 < %s | FileCheck -check-prefixes=GFX9,GFX950 %s
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1030 < %s | FileCheck -check-prefixes=GFX10 %s
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1100 -mattr=+real-true16 < %s | FileCheck -check-prefixes=GFX11,GFX11-TRUE16 %s
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1100 -mattr=-real-true16 < %s | FileCheck -check-prefixes=GFX11,GFX11-FAKE16 %s
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1170 -mattr=+real-true16 < %s | FileCheck -check-prefixes=GFX1170,GFX1170-TRUE16 %s
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1170 -mattr=-real-true16 < %s | FileCheck -check-prefixes=GFX1170,GFX1170-FAKE16 %s
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1200 -mattr=+real-true16 < %s | FileCheck -check-prefixes=GFX12,GFX12-TRUE16 %s
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1200 -mattr=-real-true16 < %s | FileCheck -check-prefixes=GFX12,GFX12-FAKE16 %s
define half @v_minimum_f16(half %src0, half %src1) {
; GFX8-LABEL: v_minimum_f16:
@ -61,6 +62,18 @@ define half @v_minimum_f16(half %src0, half %src1) {
; GFX11-FAKE16-NEXT: v_cndmask_b32_e32 v0, 0x7e00, v2, vcc_lo
; GFX11-FAKE16-NEXT: s_setpc_b64 s[30:31]
;
; GFX1170-TRUE16-LABEL: v_minimum_f16:
; GFX1170-TRUE16: ; %bb.0:
; GFX1170-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1170-TRUE16-NEXT: v_minimum_f16 v0.l, v0.l, v1.l
; GFX1170-TRUE16-NEXT: s_setpc_b64 s[30:31]
;
; GFX1170-FAKE16-LABEL: v_minimum_f16:
; GFX1170-FAKE16: ; %bb.0:
; GFX1170-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1170-FAKE16-NEXT: v_minimum_f16 v0, v0, v1
; GFX1170-FAKE16-NEXT: s_setpc_b64 s[30:31]
;
; GFX12-TRUE16-LABEL: v_minimum_f16:
; GFX12-TRUE16: ; %bb.0:
; GFX12-TRUE16-NEXT: s_wait_loadcnt_dscnt 0x0
@ -115,6 +128,18 @@ define half @v_minimum_f16__nnan(half %src0, half %src1) {
; GFX11-FAKE16-NEXT: v_min_f16_e32 v0, v0, v1
; GFX11-FAKE16-NEXT: s_setpc_b64 s[30:31]
;
; GFX1170-TRUE16-LABEL: v_minimum_f16__nnan:
; GFX1170-TRUE16: ; %bb.0:
; GFX1170-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1170-TRUE16-NEXT: v_minimum_f16 v0.l, v0.l, v1.l
; GFX1170-TRUE16-NEXT: s_setpc_b64 s[30:31]
;
; GFX1170-FAKE16-LABEL: v_minimum_f16__nnan:
; GFX1170-FAKE16: ; %bb.0:
; GFX1170-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1170-FAKE16-NEXT: v_minimum_f16 v0, v0, v1
; GFX1170-FAKE16-NEXT: s_setpc_b64 s[30:31]
;
; GFX12-TRUE16-LABEL: v_minimum_f16__nnan:
; GFX12-TRUE16: ; %bb.0:
; GFX12-TRUE16-NEXT: s_wait_loadcnt_dscnt 0x0
@ -189,6 +214,18 @@ define half @v_minimum_f16__nsz(half %src0, half %src1) {
; GFX11-FAKE16-NEXT: v_cndmask_b32_e32 v0, 0x7e00, v2, vcc_lo
; GFX11-FAKE16-NEXT: s_setpc_b64 s[30:31]
;
; GFX1170-TRUE16-LABEL: v_minimum_f16__nsz:
; GFX1170-TRUE16: ; %bb.0:
; GFX1170-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1170-TRUE16-NEXT: v_minimum_f16 v0.l, v0.l, v1.l
; GFX1170-TRUE16-NEXT: s_setpc_b64 s[30:31]
;
; GFX1170-FAKE16-LABEL: v_minimum_f16__nsz:
; GFX1170-FAKE16: ; %bb.0:
; GFX1170-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1170-FAKE16-NEXT: v_minimum_f16 v0, v0, v1
; GFX1170-FAKE16-NEXT: s_setpc_b64 s[30:31]
;
; GFX12-TRUE16-LABEL: v_minimum_f16__nsz:
; GFX12-TRUE16: ; %bb.0:
; GFX12-TRUE16-NEXT: s_wait_loadcnt_dscnt 0x0
@ -243,6 +280,18 @@ define half @v_minimum_f16__nnan_nsz(half %src0, half %src1) {
; GFX11-FAKE16-NEXT: v_min_f16_e32 v0, v0, v1
; GFX11-FAKE16-NEXT: s_setpc_b64 s[30:31]
;
; GFX1170-TRUE16-LABEL: v_minimum_f16__nnan_nsz:
; GFX1170-TRUE16: ; %bb.0:
; GFX1170-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1170-TRUE16-NEXT: v_minimum_f16 v0.l, v0.l, v1.l
; GFX1170-TRUE16-NEXT: s_setpc_b64 s[30:31]
;
; GFX1170-FAKE16-LABEL: v_minimum_f16__nnan_nsz:
; GFX1170-FAKE16: ; %bb.0:
; GFX1170-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1170-FAKE16-NEXT: v_minimum_f16 v0, v0, v1
; GFX1170-FAKE16-NEXT: s_setpc_b64 s[30:31]
;
; GFX12-TRUE16-LABEL: v_minimum_f16__nnan_nsz:
; GFX12-TRUE16: ; %bb.0:
; GFX12-TRUE16-NEXT: s_wait_loadcnt_dscnt 0x0
@ -323,6 +372,22 @@ define half @v_minimum_f16__nnan_src0(half %arg0, half %src1) {
; GFX11-FAKE16-NEXT: v_cndmask_b32_e32 v0, 0x7e00, v2, vcc_lo
; GFX11-FAKE16-NEXT: s_setpc_b64 s[30:31]
;
; GFX1170-TRUE16-LABEL: v_minimum_f16__nnan_src0:
; GFX1170-TRUE16: ; %bb.0:
; GFX1170-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1170-TRUE16-NEXT: v_add_f16_e32 v0.l, 1.0, v0.l
; GFX1170-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX1170-TRUE16-NEXT: v_minimum_f16 v0.l, v0.l, v1.l
; GFX1170-TRUE16-NEXT: s_setpc_b64 s[30:31]
;
; GFX1170-FAKE16-LABEL: v_minimum_f16__nnan_src0:
; GFX1170-FAKE16: ; %bb.0:
; GFX1170-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1170-FAKE16-NEXT: v_add_f16_e32 v0, 1.0, v0
; GFX1170-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX1170-FAKE16-NEXT: v_minimum_f16 v0, v0, v1
; GFX1170-FAKE16-NEXT: s_setpc_b64 s[30:31]
;
; GFX12-TRUE16-LABEL: v_minimum_f16__nnan_src0:
; GFX12-TRUE16: ; %bb.0:
; GFX12-TRUE16-NEXT: s_wait_loadcnt_dscnt 0x0
@ -408,6 +473,22 @@ define half @v_minimum_f16__nnan_src1(half %src0, half %arg1) {
; GFX11-FAKE16-NEXT: v_cndmask_b32_e32 v0, 0x7e00, v2, vcc_lo
; GFX11-FAKE16-NEXT: s_setpc_b64 s[30:31]
;
; GFX1170-TRUE16-LABEL: v_minimum_f16__nnan_src1:
; GFX1170-TRUE16: ; %bb.0:
; GFX1170-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1170-TRUE16-NEXT: v_add_f16_e32 v1.l, 1.0, v1.l
; GFX1170-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX1170-TRUE16-NEXT: v_minimum_f16 v0.l, v0.l, v1.l
; GFX1170-TRUE16-NEXT: s_setpc_b64 s[30:31]
;
; GFX1170-FAKE16-LABEL: v_minimum_f16__nnan_src1:
; GFX1170-FAKE16: ; %bb.0:
; GFX1170-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1170-FAKE16-NEXT: v_add_f16_e32 v1, 1.0, v1
; GFX1170-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX1170-FAKE16-NEXT: v_minimum_f16 v0, v0, v1
; GFX1170-FAKE16-NEXT: s_setpc_b64 s[30:31]
;
; GFX12-TRUE16-LABEL: v_minimum_f16__nnan_src1:
; GFX12-TRUE16: ; %bb.0:
; GFX12-TRUE16-NEXT: s_wait_loadcnt_dscnt 0x0
@ -515,6 +596,28 @@ define void @s_minimum_f16(half inreg %src0, half inreg %src1) {
; GFX11-FAKE16-NEXT: ;;#ASMEND
; GFX11-FAKE16-NEXT: s_setpc_b64 s[30:31]
;
; GFX1170-TRUE16-LABEL: s_minimum_f16:
; GFX1170-TRUE16: ; %bb.0:
; GFX1170-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1170-TRUE16-NEXT: v_minimum_f16 v0.l, s0, s1
; GFX1170-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX1170-TRUE16-NEXT: v_and_b32_e32 v0, 0xffff, v0
; GFX1170-TRUE16-NEXT: ;;#ASMSTART
; GFX1170-TRUE16-NEXT: ; use v0
; GFX1170-TRUE16-NEXT: ;;#ASMEND
; GFX1170-TRUE16-NEXT: s_setpc_b64 s[30:31]
;
; GFX1170-FAKE16-LABEL: s_minimum_f16:
; GFX1170-FAKE16: ; %bb.0:
; GFX1170-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1170-FAKE16-NEXT: v_minimum_f16 v0, s0, s1
; GFX1170-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX1170-FAKE16-NEXT: v_and_b32_e32 v0, 0xffff, v0
; GFX1170-FAKE16-NEXT: ;;#ASMSTART
; GFX1170-FAKE16-NEXT: ; use v0
; GFX1170-FAKE16-NEXT: ;;#ASMEND
; GFX1170-FAKE16-NEXT: s_setpc_b64 s[30:31]
;
; GFX12-LABEL: s_minimum_f16:
; GFX12: ; %bb.0:
; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
@ -612,6 +715,12 @@ define <2 x half> @v_minimum_v2f16(<2 x half> %src0, <2 x half> %src1) {
; GFX11-FAKE16-NEXT: v_perm_b32 v0, v1, v0, 0x5040100
; GFX11-FAKE16-NEXT: s_setpc_b64 s[30:31]
;
; GFX1170-LABEL: v_minimum_v2f16:
; GFX1170: ; %bb.0:
; GFX1170-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1170-NEXT: v_pk_minimum_f16 v0, v0, v1
; GFX1170-NEXT: s_setpc_b64 s[30:31]
;
; GFX12-LABEL: v_minimum_v2f16:
; GFX12: ; %bb.0:
; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
@ -652,6 +761,12 @@ define <2 x half> @v_minimum_v2f16__nnan(<2 x half> %src0, <2 x half> %src1) {
; GFX11-NEXT: v_pk_min_f16 v0, v0, v1
; GFX11-NEXT: s_setpc_b64 s[30:31]
;
; GFX1170-LABEL: v_minimum_v2f16__nnan:
; GFX1170: ; %bb.0:
; GFX1170-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1170-NEXT: v_pk_minimum_f16 v0, v0, v1
; GFX1170-NEXT: s_setpc_b64 s[30:31]
;
; GFX12-LABEL: v_minimum_v2f16__nnan:
; GFX12: ; %bb.0:
; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
@ -739,6 +854,12 @@ define <2 x half> @v_minimum_v2f16__nsz(<2 x half> %src0, <2 x half> %src1) {
; GFX11-FAKE16-NEXT: v_perm_b32 v0, v1, v0, 0x5040100
; GFX11-FAKE16-NEXT: s_setpc_b64 s[30:31]
;
; GFX1170-LABEL: v_minimum_v2f16__nsz:
; GFX1170: ; %bb.0:
; GFX1170-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1170-NEXT: v_pk_minimum_f16 v0, v0, v1
; GFX1170-NEXT: s_setpc_b64 s[30:31]
;
; GFX12-LABEL: v_minimum_v2f16__nsz:
; GFX12: ; %bb.0:
; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
@ -779,6 +900,12 @@ define <2 x half> @v_minimum_v2f16__nnan_nsz(<2 x half> %src0, <2 x half> %src1)
; GFX11-NEXT: v_pk_min_f16 v0, v0, v1
; GFX11-NEXT: s_setpc_b64 s[30:31]
;
; GFX1170-LABEL: v_minimum_v2f16__nnan_nsz:
; GFX1170: ; %bb.0:
; GFX1170-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1170-NEXT: v_pk_minimum_f16 v0, v0, v1
; GFX1170-NEXT: s_setpc_b64 s[30:31]
;
; GFX12-LABEL: v_minimum_v2f16__nnan_nsz:
; GFX12: ; %bb.0:
; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
@ -900,6 +1027,15 @@ define void @s_minimum_v2f16(<2 x half> inreg %src0, <2 x half> inreg %src1) {
; GFX11-FAKE16-NEXT: ;;#ASMEND
; GFX11-FAKE16-NEXT: s_setpc_b64 s[30:31]
;
; GFX1170-LABEL: s_minimum_v2f16:
; GFX1170: ; %bb.0:
; GFX1170-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1170-NEXT: v_pk_minimum_f16 v0, s0, s1
; GFX1170-NEXT: ;;#ASMSTART
; GFX1170-NEXT: ; use v0
; GFX1170-NEXT: ;;#ASMEND
; GFX1170-NEXT: s_setpc_b64 s[30:31]
;
; GFX12-LABEL: s_minimum_v2f16:
; GFX12: ; %bb.0:
; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
@ -1010,6 +1146,13 @@ define <3 x half> @v_minimum_v3f16(<3 x half> %src0, <3 x half> %src1) {
; GFX11-FAKE16-NEXT: v_cndmask_b32_e32 v1, 0x7e00, v4, vcc_lo
; GFX11-FAKE16-NEXT: s_setpc_b64 s[30:31]
;
; GFX1170-LABEL: v_minimum_v3f16:
; GFX1170: ; %bb.0:
; GFX1170-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1170-NEXT: v_pk_minimum_f16 v0, v0, v2
; GFX1170-NEXT: v_pk_minimum_f16 v1, v1, v3
; GFX1170-NEXT: s_setpc_b64 s[30:31]
;
; GFX12-LABEL: v_minimum_v3f16:
; GFX12: ; %bb.0:
; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
@ -1055,6 +1198,13 @@ define <3 x half> @v_minimum_v3f16__nnan(<3 x half> %src0, <3 x half> %src1) {
; GFX11-NEXT: v_pk_min_f16 v1, v1, v3
; GFX11-NEXT: s_setpc_b64 s[30:31]
;
; GFX1170-LABEL: v_minimum_v3f16__nnan:
; GFX1170: ; %bb.0:
; GFX1170-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1170-NEXT: v_pk_minimum_f16 v0, v0, v2
; GFX1170-NEXT: v_pk_minimum_f16 v1, v1, v3
; GFX1170-NEXT: s_setpc_b64 s[30:31]
;
; GFX12-LABEL: v_minimum_v3f16__nnan:
; GFX12: ; %bb.0:
; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
@ -1161,6 +1311,13 @@ define <3 x half> @v_minimum_v3f16__nsz(<3 x half> %src0, <3 x half> %src1) {
; GFX11-FAKE16-NEXT: v_cndmask_b32_e32 v1, 0x7e00, v4, vcc_lo
; GFX11-FAKE16-NEXT: s_setpc_b64 s[30:31]
;
; GFX1170-LABEL: v_minimum_v3f16__nsz:
; GFX1170: ; %bb.0:
; GFX1170-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1170-NEXT: v_pk_minimum_f16 v0, v0, v2
; GFX1170-NEXT: v_pk_minimum_f16 v1, v1, v3
; GFX1170-NEXT: s_setpc_b64 s[30:31]
;
; GFX12-LABEL: v_minimum_v3f16__nsz:
; GFX12: ; %bb.0:
; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
@ -1206,6 +1363,13 @@ define <3 x half> @v_minimum_v3f16__nnan_nsz(<3 x half> %src0, <3 x half> %src1)
; GFX11-NEXT: v_pk_min_f16 v1, v1, v3
; GFX11-NEXT: s_setpc_b64 s[30:31]
;
; GFX1170-LABEL: v_minimum_v3f16__nnan_nsz:
; GFX1170: ; %bb.0:
; GFX1170-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1170-NEXT: v_pk_minimum_f16 v0, v0, v2
; GFX1170-NEXT: v_pk_minimum_f16 v1, v1, v3
; GFX1170-NEXT: s_setpc_b64 s[30:31]
;
; GFX12-LABEL: v_minimum_v3f16__nnan_nsz:
; GFX12: ; %bb.0:
; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
@ -1333,6 +1497,13 @@ define <4 x half> @v_minimum_v4f16(<4 x half> %src0, <4 x half> %src1) {
; GFX11-FAKE16-NEXT: v_perm_b32 v1, v3, v1, 0x5040100
; GFX11-FAKE16-NEXT: s_setpc_b64 s[30:31]
;
; GFX1170-LABEL: v_minimum_v4f16:
; GFX1170: ; %bb.0:
; GFX1170-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1170-NEXT: v_pk_minimum_f16 v0, v0, v2
; GFX1170-NEXT: v_pk_minimum_f16 v1, v1, v3
; GFX1170-NEXT: s_setpc_b64 s[30:31]
;
; GFX12-LABEL: v_minimum_v4f16:
; GFX12: ; %bb.0:
; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
@ -1380,6 +1551,13 @@ define <4 x half> @v_minimum_v4f16__nnan(<4 x half> %src0, <4 x half> %src1) {
; GFX11-NEXT: v_pk_min_f16 v1, v1, v3
; GFX11-NEXT: s_setpc_b64 s[30:31]
;
; GFX1170-LABEL: v_minimum_v4f16__nnan:
; GFX1170: ; %bb.0:
; GFX1170-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1170-NEXT: v_pk_minimum_f16 v0, v0, v2
; GFX1170-NEXT: v_pk_minimum_f16 v1, v1, v3
; GFX1170-NEXT: s_setpc_b64 s[30:31]
;
; GFX12-LABEL: v_minimum_v4f16__nnan:
; GFX12: ; %bb.0:
; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
@ -1507,6 +1685,13 @@ define <4 x half> @v_minimum_v4f16__nsz(<4 x half> %src0, <4 x half> %src1) {
; GFX11-FAKE16-NEXT: v_perm_b32 v1, v3, v1, 0x5040100
; GFX11-FAKE16-NEXT: s_setpc_b64 s[30:31]
;
; GFX1170-LABEL: v_minimum_v4f16__nsz:
; GFX1170: ; %bb.0:
; GFX1170-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1170-NEXT: v_pk_minimum_f16 v0, v0, v2
; GFX1170-NEXT: v_pk_minimum_f16 v1, v1, v3
; GFX1170-NEXT: s_setpc_b64 s[30:31]
;
; GFX12-LABEL: v_minimum_v4f16__nsz:
; GFX12: ; %bb.0:
; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
@ -1554,6 +1739,13 @@ define <4 x half> @v_minimum_v4f16__nnan_nsz(<4 x half> %src0, <4 x half> %src1)
; GFX11-NEXT: v_pk_min_f16 v1, v1, v3
; GFX11-NEXT: s_setpc_b64 s[30:31]
;
; GFX1170-LABEL: v_minimum_v4f16__nnan_nsz:
; GFX1170: ; %bb.0:
; GFX1170-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1170-NEXT: v_pk_minimum_f16 v0, v0, v2
; GFX1170-NEXT: v_pk_minimum_f16 v1, v1, v3
; GFX1170-NEXT: s_setpc_b64 s[30:31]
;
; GFX12-LABEL: v_minimum_v4f16__nnan_nsz:
; GFX12: ; %bb.0:
; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
@ -1760,6 +1952,15 @@ define <8 x half> @v_minimum_v8f16(<8 x half> %src0, <8 x half> %src1) {
; GFX11-FAKE16-NEXT: v_perm_b32 v3, v3, v9, 0x5040100
; GFX11-FAKE16-NEXT: s_setpc_b64 s[30:31]
;
; GFX1170-LABEL: v_minimum_v8f16:
; GFX1170: ; %bb.0:
; GFX1170-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1170-NEXT: v_pk_minimum_f16 v0, v0, v4
; GFX1170-NEXT: v_pk_minimum_f16 v1, v1, v5
; GFX1170-NEXT: v_pk_minimum_f16 v2, v2, v6
; GFX1170-NEXT: v_pk_minimum_f16 v3, v3, v7
; GFX1170-NEXT: s_setpc_b64 s[30:31]
;
; GFX12-LABEL: v_minimum_v8f16:
; GFX12: ; %bb.0:
; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
@ -2130,6 +2331,19 @@ define <16 x half> @v_minimum_v16f16(<16 x half> %src0, <16 x half> %src1) {
; GFX11-FAKE16-NEXT: v_perm_b32 v4, v4, v14, 0x5040100
; GFX11-FAKE16-NEXT: s_setpc_b64 s[30:31]
;
; GFX1170-LABEL: v_minimum_v16f16:
; GFX1170: ; %bb.0:
; GFX1170-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1170-NEXT: v_pk_minimum_f16 v0, v0, v8
; GFX1170-NEXT: v_pk_minimum_f16 v1, v1, v9
; GFX1170-NEXT: v_pk_minimum_f16 v2, v2, v10
; GFX1170-NEXT: v_pk_minimum_f16 v3, v3, v11
; GFX1170-NEXT: v_pk_minimum_f16 v4, v4, v12
; GFX1170-NEXT: v_pk_minimum_f16 v5, v5, v13
; GFX1170-NEXT: v_pk_minimum_f16 v6, v6, v14
; GFX1170-NEXT: v_pk_minimum_f16 v7, v7, v15
; GFX1170-NEXT: s_setpc_b64 s[30:31]
;
; GFX12-LABEL: v_minimum_v16f16:
; GFX12: ; %bb.0:
; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
@ -2149,5 +2363,3 @@ define <16 x half> @v_minimum_v16f16(<16 x half> %src0, <16 x half> %src1) {
%op = call <16 x half> @llvm.minimum.v16f16(<16 x half> %src0, <16 x half> %src1)
ret <16 x half> %op
}
;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
; GCN: {{.*}}

View File

@ -1,12 +1,12 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx703 < %s | FileCheck -check-prefixes=GCN,GFX7 %s
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx803 < %s | FileCheck -check-prefixes=GCN,GFX8 %s
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 < %s | FileCheck -check-prefixes=GCN,GFX9 %s
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 < %s | FileCheck -check-prefixes=GCN,GFX9,GFX900 %s
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx950 < %s | FileCheck -check-prefixes=GCN,GFX9,GFX950 %s
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1030 < %s | FileCheck -check-prefixes=GCN,GFX10 %s
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1100 < %s | FileCheck -check-prefixes=GCN,GFX11 %s
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1200 < %s | FileCheck -check-prefixes=GCN,GFX12 %s
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx703 < %s | FileCheck -check-prefixes=GFX7 %s
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx803 < %s | FileCheck -check-prefixes=GFX8 %s
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 < %s | FileCheck -check-prefixes=GFX9,GFX900 %s
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx950 < %s | FileCheck -check-prefixes=GFX9,GFX950 %s
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1030 < %s | FileCheck -check-prefixes=GFX10 %s
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1100 < %s | FileCheck -check-prefixes=GFX11 %s
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1170 < %s | FileCheck -check-prefixes=GFX1170 %s
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1200 < %s | FileCheck -check-prefixes=GFX12 %s
define float @v_minimum_f32(float %src0, float %src1) {
; GFX7-LABEL: v_minimum_f32:
@ -59,6 +59,12 @@ define float @v_minimum_f32(float %src0, float %src1) {
; GFX11-NEXT: v_cndmask_b32_e32 v0, 0x7fc00000, v2, vcc_lo
; GFX11-NEXT: s_setpc_b64 s[30:31]
;
; GFX1170-LABEL: v_minimum_f32:
; GFX1170: ; %bb.0:
; GFX1170-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1170-NEXT: v_minimum_f32 v0, v0, v1
; GFX1170-NEXT: s_setpc_b64 s[30:31]
;
; GFX12-LABEL: v_minimum_f32:
; GFX12: ; %bb.0:
; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
@ -103,6 +109,12 @@ define float @v_minimum_f32__nnan(float %src0, float %src1) {
; GFX11-NEXT: v_min_f32_e32 v0, v0, v1
; GFX11-NEXT: s_setpc_b64 s[30:31]
;
; GFX1170-LABEL: v_minimum_f32__nnan:
; GFX1170: ; %bb.0:
; GFX1170-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1170-NEXT: v_minimum_f32 v0, v0, v1
; GFX1170-NEXT: s_setpc_b64 s[30:31]
;
; GFX12-LABEL: v_minimum_f32__nnan:
; GFX12: ; %bb.0:
; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
@ -167,6 +179,12 @@ define float @v_minimum_f32__nsz(float %src0, float %src1) {
; GFX11-NEXT: v_cndmask_b32_e32 v0, 0x7fc00000, v2, vcc_lo
; GFX11-NEXT: s_setpc_b64 s[30:31]
;
; GFX1170-LABEL: v_minimum_f32__nsz:
; GFX1170: ; %bb.0:
; GFX1170-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1170-NEXT: v_minimum_f32 v0, v0, v1
; GFX1170-NEXT: s_setpc_b64 s[30:31]
;
; GFX12-LABEL: v_minimum_f32__nsz:
; GFX12: ; %bb.0:
; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
@ -211,6 +229,12 @@ define float @v_minimum_f32__nnan_nsz(float %src0, float %src1) {
; GFX11-NEXT: v_min_f32_e32 v0, v0, v1
; GFX11-NEXT: s_setpc_b64 s[30:31]
;
; GFX1170-LABEL: v_minimum_f32__nnan_nsz:
; GFX1170: ; %bb.0:
; GFX1170-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1170-NEXT: v_minimum_f32 v0, v0, v1
; GFX1170-NEXT: s_setpc_b64 s[30:31]
;
; GFX12-LABEL: v_minimum_f32__nnan_nsz:
; GFX12: ; %bb.0:
; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
@ -281,6 +305,14 @@ define float @v_minimum_f32__nnan_src0(float %arg0, float %src1) {
; GFX11-NEXT: v_cndmask_b32_e32 v0, 0x7fc00000, v2, vcc_lo
; GFX11-NEXT: s_setpc_b64 s[30:31]
;
; GFX1170-LABEL: v_minimum_f32__nnan_src0:
; GFX1170: ; %bb.0:
; GFX1170-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1170-NEXT: v_add_f32_e32 v0, 1.0, v0
; GFX1170-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX1170-NEXT: v_minimum_f32 v0, v0, v1
; GFX1170-NEXT: s_setpc_b64 s[30:31]
;
; GFX12-LABEL: v_minimum_f32__nnan_src0:
; GFX12: ; %bb.0:
; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
@ -354,6 +386,14 @@ define float @v_minimum_f32__nnan_src1(float %src0, float %arg1) {
; GFX11-NEXT: v_cndmask_b32_e32 v0, 0x7fc00000, v2, vcc_lo
; GFX11-NEXT: s_setpc_b64 s[30:31]
;
; GFX1170-LABEL: v_minimum_f32__nnan_src1:
; GFX1170: ; %bb.0:
; GFX1170-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1170-NEXT: v_add_f32_e32 v1, 1.0, v1
; GFX1170-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX1170-NEXT: v_minimum_f32 v0, v0, v1
; GFX1170-NEXT: s_setpc_b64 s[30:31]
;
; GFX12-LABEL: v_minimum_f32__nnan_src1:
; GFX12: ; %bb.0:
; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
@ -443,6 +483,15 @@ define void @s_minimum_f32(float inreg %src0, float inreg %src1) {
; GFX11-NEXT: ;;#ASMEND
; GFX11-NEXT: s_setpc_b64 s[30:31]
;
; GFX1170-LABEL: s_minimum_f32:
; GFX1170: ; %bb.0:
; GFX1170-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1170-NEXT: v_minimum_f32 v0, s0, s1
; GFX1170-NEXT: ;;#ASMSTART
; GFX1170-NEXT: ; use v0
; GFX1170-NEXT: ;;#ASMEND
; GFX1170-NEXT: s_setpc_b64 s[30:31]
;
; GFX12-LABEL: s_minimum_f32:
; GFX12: ; %bb.0:
; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
@ -527,6 +576,13 @@ define <2 x float> @v_minimum_v2f32(<2 x float> %src0, <2 x float> %src1) {
; GFX11-NEXT: v_cndmask_b32_e32 v1, 0x7fc00000, v5, vcc_lo
; GFX11-NEXT: s_setpc_b64 s[30:31]
;
; GFX1170-LABEL: v_minimum_v2f32:
; GFX1170: ; %bb.0:
; GFX1170-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1170-NEXT: v_minimum_f32 v0, v0, v2
; GFX1170-NEXT: v_minimum_f32 v1, v1, v3
; GFX1170-NEXT: s_setpc_b64 s[30:31]
;
; GFX12-LABEL: v_minimum_v2f32:
; GFX12: ; %bb.0:
; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
@ -576,6 +632,13 @@ define <2 x float> @v_minimum_v2f32__nnan(<2 x float> %src0, <2 x float> %src1)
; GFX11-NEXT: v_dual_min_f32 v0, v0, v2 :: v_dual_min_f32 v1, v1, v3
; GFX11-NEXT: s_setpc_b64 s[30:31]
;
; GFX1170-LABEL: v_minimum_v2f32__nnan:
; GFX1170: ; %bb.0:
; GFX1170-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1170-NEXT: v_minimum_f32 v0, v0, v2
; GFX1170-NEXT: v_minimum_f32 v1, v1, v3
; GFX1170-NEXT: s_setpc_b64 s[30:31]
;
; GFX12-LABEL: v_minimum_v2f32__nnan:
; GFX12: ; %bb.0:
; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
@ -656,6 +719,13 @@ define <2 x float> @v_minimum_v2f32__nsz(<2 x float> %src0, <2 x float> %src1) {
; GFX11-NEXT: v_cndmask_b32_e32 v1, 0x7fc00000, v5, vcc_lo
; GFX11-NEXT: s_setpc_b64 s[30:31]
;
; GFX1170-LABEL: v_minimum_v2f32__nsz:
; GFX1170: ; %bb.0:
; GFX1170-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1170-NEXT: v_minimum_f32 v0, v0, v2
; GFX1170-NEXT: v_minimum_f32 v1, v1, v3
; GFX1170-NEXT: s_setpc_b64 s[30:31]
;
; GFX12-LABEL: v_minimum_v2f32__nsz:
; GFX12: ; %bb.0:
; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
@ -705,6 +775,13 @@ define <2 x float> @v_minimum_v2f32__nnan_nsz(<2 x float> %src0, <2 x float> %sr
; GFX11-NEXT: v_dual_min_f32 v0, v0, v2 :: v_dual_min_f32 v1, v1, v3
; GFX11-NEXT: s_setpc_b64 s[30:31]
;
; GFX1170-LABEL: v_minimum_v2f32__nnan_nsz:
; GFX1170: ; %bb.0:
; GFX1170-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1170-NEXT: v_minimum_f32 v0, v0, v2
; GFX1170-NEXT: v_minimum_f32 v1, v1, v3
; GFX1170-NEXT: s_setpc_b64 s[30:31]
;
; GFX12-LABEL: v_minimum_v2f32__nnan_nsz:
; GFX12: ; %bb.0:
; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
@ -812,6 +889,16 @@ define void @s_minimum_v2f32(<2 x float> inreg %src0, <2 x float> inreg %src1) {
; GFX11-NEXT: ;;#ASMEND
; GFX11-NEXT: s_setpc_b64 s[30:31]
;
; GFX1170-LABEL: s_minimum_v2f32:
; GFX1170: ; %bb.0:
; GFX1170-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1170-NEXT: v_minimum_f32 v1, s1, s3
; GFX1170-NEXT: v_minimum_f32 v0, s0, s2
; GFX1170-NEXT: ;;#ASMSTART
; GFX1170-NEXT: ; use v[0:1]
; GFX1170-NEXT: ;;#ASMEND
; GFX1170-NEXT: s_setpc_b64 s[30:31]
;
; GFX12-LABEL: s_minimum_v2f32:
; GFX12: ; %bb.0:
; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
@ -913,6 +1000,14 @@ define <3 x float> @v_minimum_v3f32(<3 x float> %src0, <3 x float> %src1) {
; GFX11-NEXT: v_cndmask_b32_e32 v2, 0x7fc00000, v8, vcc_lo
; GFX11-NEXT: s_setpc_b64 s[30:31]
;
; GFX1170-LABEL: v_minimum_v3f32:
; GFX1170: ; %bb.0:
; GFX1170-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1170-NEXT: v_minimum_f32 v0, v0, v3
; GFX1170-NEXT: v_minimum_f32 v1, v1, v4
; GFX1170-NEXT: v_minimum_f32 v2, v2, v5
; GFX1170-NEXT: s_setpc_b64 s[30:31]
;
; GFX12-LABEL: v_minimum_v3f32:
; GFX12: ; %bb.0:
; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
@ -968,6 +1063,14 @@ define <3 x float> @v_minimum_v3f32__nnan(<3 x float> %src0, <3 x float> %src1)
; GFX11-NEXT: v_min_f32_e32 v2, v2, v5
; GFX11-NEXT: s_setpc_b64 s[30:31]
;
; GFX1170-LABEL: v_minimum_v3f32__nnan:
; GFX1170: ; %bb.0:
; GFX1170-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1170-NEXT: v_minimum_f32 v0, v0, v3
; GFX1170-NEXT: v_minimum_f32 v1, v1, v4
; GFX1170-NEXT: v_minimum_f32 v2, v2, v5
; GFX1170-NEXT: s_setpc_b64 s[30:31]
;
; GFX12-LABEL: v_minimum_v3f32__nnan:
; GFX12: ; %bb.0:
; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
@ -1065,6 +1168,14 @@ define <3 x float> @v_minimum_v3f32__nsz(<3 x float> %src0, <3 x float> %src1) {
; GFX11-NEXT: v_cndmask_b32_e32 v2, 0x7fc00000, v8, vcc_lo
; GFX11-NEXT: s_setpc_b64 s[30:31]
;
; GFX1170-LABEL: v_minimum_v3f32__nsz:
; GFX1170: ; %bb.0:
; GFX1170-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1170-NEXT: v_minimum_f32 v0, v0, v3
; GFX1170-NEXT: v_minimum_f32 v1, v1, v4
; GFX1170-NEXT: v_minimum_f32 v2, v2, v5
; GFX1170-NEXT: s_setpc_b64 s[30:31]
;
; GFX12-LABEL: v_minimum_v3f32__nsz:
; GFX12: ; %bb.0:
; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
@ -1120,6 +1231,14 @@ define <3 x float> @v_minimum_v3f32__nnan_nsz(<3 x float> %src0, <3 x float> %sr
; GFX11-NEXT: v_min_f32_e32 v2, v2, v5
; GFX11-NEXT: s_setpc_b64 s[30:31]
;
; GFX1170-LABEL: v_minimum_v3f32__nnan_nsz:
; GFX1170: ; %bb.0:
; GFX1170-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1170-NEXT: v_minimum_f32 v0, v0, v3
; GFX1170-NEXT: v_minimum_f32 v1, v1, v4
; GFX1170-NEXT: v_minimum_f32 v2, v2, v5
; GFX1170-NEXT: s_setpc_b64 s[30:31]
;
; GFX12-LABEL: v_minimum_v3f32__nnan_nsz:
; GFX12: ; %bb.0:
; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
@ -1233,6 +1352,15 @@ define <4 x float> @v_minimum_v4f32(<4 x float> %src0, <4 x float> %src1) {
; GFX11-NEXT: v_cndmask_b32_e32 v3, 0x7fc00000, v8, vcc_lo
; GFX11-NEXT: s_setpc_b64 s[30:31]
;
; GFX1170-LABEL: v_minimum_v4f32:
; GFX1170: ; %bb.0:
; GFX1170-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1170-NEXT: v_minimum_f32 v0, v0, v4
; GFX1170-NEXT: v_minimum_f32 v1, v1, v5
; GFX1170-NEXT: v_minimum_f32 v2, v2, v6
; GFX1170-NEXT: v_minimum_f32 v3, v3, v7
; GFX1170-NEXT: s_setpc_b64 s[30:31]
;
; GFX12-LABEL: v_minimum_v4f32:
; GFX12: ; %bb.0:
; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
@ -1293,6 +1421,15 @@ define <4 x float> @v_minimum_v4f32__nnan(<4 x float> %src0, <4 x float> %src1)
; GFX11-NEXT: v_dual_min_f32 v2, v2, v6 :: v_dual_min_f32 v3, v3, v7
; GFX11-NEXT: s_setpc_b64 s[30:31]
;
; GFX1170-LABEL: v_minimum_v4f32__nnan:
; GFX1170: ; %bb.0:
; GFX1170-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1170-NEXT: v_minimum_f32 v0, v0, v4
; GFX1170-NEXT: v_minimum_f32 v1, v1, v5
; GFX1170-NEXT: v_minimum_f32 v2, v2, v6
; GFX1170-NEXT: v_minimum_f32 v3, v3, v7
; GFX1170-NEXT: s_setpc_b64 s[30:31]
;
; GFX12-LABEL: v_minimum_v4f32__nnan:
; GFX12: ; %bb.0:
; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
@ -1407,6 +1544,15 @@ define <4 x float> @v_minimum_v4f32__nsz(<4 x float> %src0, <4 x float> %src1) {
; GFX11-NEXT: v_cndmask_b32_e32 v3, 0x7fc00000, v8, vcc_lo
; GFX11-NEXT: s_setpc_b64 s[30:31]
;
; GFX1170-LABEL: v_minimum_v4f32__nsz:
; GFX1170: ; %bb.0:
; GFX1170-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1170-NEXT: v_minimum_f32 v0, v0, v4
; GFX1170-NEXT: v_minimum_f32 v1, v1, v5
; GFX1170-NEXT: v_minimum_f32 v2, v2, v6
; GFX1170-NEXT: v_minimum_f32 v3, v3, v7
; GFX1170-NEXT: s_setpc_b64 s[30:31]
;
; GFX12-LABEL: v_minimum_v4f32__nsz:
; GFX12: ; %bb.0:
; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
@ -1467,6 +1613,15 @@ define <4 x float> @v_minimum_v4f32__nnan_nsz(<4 x float> %src0, <4 x float> %sr
; GFX11-NEXT: v_dual_min_f32 v2, v2, v6 :: v_dual_min_f32 v3, v3, v7
; GFX11-NEXT: s_setpc_b64 s[30:31]
;
; GFX1170-LABEL: v_minimum_v4f32__nnan_nsz:
; GFX1170: ; %bb.0:
; GFX1170-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1170-NEXT: v_minimum_f32 v0, v0, v4
; GFX1170-NEXT: v_minimum_f32 v1, v1, v5
; GFX1170-NEXT: v_minimum_f32 v2, v2, v6
; GFX1170-NEXT: v_minimum_f32 v3, v3, v7
; GFX1170-NEXT: s_setpc_b64 s[30:31]
;
; GFX12-LABEL: v_minimum_v4f32__nnan_nsz:
; GFX12: ; %bb.0:
; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
@ -1643,6 +1798,19 @@ define <8 x float> @v_minimum_v8f32(<8 x float> %src0, <8 x float> %src1) {
; GFX11-NEXT: v_cndmask_b32_e32 v7, 0x7fc00000, v10, vcc_lo
; GFX11-NEXT: s_setpc_b64 s[30:31]
;
; GFX1170-LABEL: v_minimum_v8f32:
; GFX1170: ; %bb.0:
; GFX1170-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1170-NEXT: v_minimum_f32 v0, v0, v8
; GFX1170-NEXT: v_minimum_f32 v1, v1, v9
; GFX1170-NEXT: v_minimum_f32 v2, v2, v10
; GFX1170-NEXT: v_minimum_f32 v3, v3, v11
; GFX1170-NEXT: v_minimum_f32 v4, v4, v12
; GFX1170-NEXT: v_minimum_f32 v5, v5, v13
; GFX1170-NEXT: v_minimum_f32 v6, v6, v14
; GFX1170-NEXT: v_minimum_f32 v7, v7, v15
; GFX1170-NEXT: s_setpc_b64 s[30:31]
;
; GFX12-LABEL: v_minimum_v8f32:
; GFX12: ; %bb.0:
; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
@ -1960,6 +2128,29 @@ define <16 x float> @v_minimum_v16f32(<16 x float> %src0, <16 x float> %src1) {
; GFX11-NEXT: v_cndmask_b32_e32 v15, 0x7fc00000, v16, vcc_lo
; GFX11-NEXT: s_setpc_b64 s[30:31]
;
; GFX1170-LABEL: v_minimum_v16f32:
; GFX1170: ; %bb.0:
; GFX1170-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1170-NEXT: scratch_load_b32 v31, off, s32
; GFX1170-NEXT: v_minimum_f32 v0, v0, v16
; GFX1170-NEXT: v_minimum_f32 v1, v1, v17
; GFX1170-NEXT: v_minimum_f32 v2, v2, v18
; GFX1170-NEXT: v_minimum_f32 v3, v3, v19
; GFX1170-NEXT: v_minimum_f32 v4, v4, v20
; GFX1170-NEXT: v_minimum_f32 v5, v5, v21
; GFX1170-NEXT: v_minimum_f32 v6, v6, v22
; GFX1170-NEXT: v_minimum_f32 v7, v7, v23
; GFX1170-NEXT: v_minimum_f32 v8, v8, v24
; GFX1170-NEXT: v_minimum_f32 v9, v9, v25
; GFX1170-NEXT: v_minimum_f32 v10, v10, v26
; GFX1170-NEXT: v_minimum_f32 v11, v11, v27
; GFX1170-NEXT: v_minimum_f32 v12, v12, v28
; GFX1170-NEXT: v_minimum_f32 v13, v13, v29
; GFX1170-NEXT: v_minimum_f32 v14, v14, v30
; GFX1170-NEXT: s_waitcnt vmcnt(0)
; GFX1170-NEXT: v_minimum_f32 v15, v15, v31
; GFX1170-NEXT: s_setpc_b64 s[30:31]
;
; GFX12-LABEL: v_minimum_v16f32:
; GFX12: ; %bb.0:
; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
@ -1989,5 +2180,3 @@ define <16 x float> @v_minimum_v16f32(<16 x float> %src0, <16 x float> %src1) {
%op = call <16 x float> @llvm.minimum.v16f32(<16 x float> %src0, <16 x float> %src1)
ret <16 x float> %op
}
;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
; GCN: {{.*}}

View File

@ -1,12 +1,12 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx703 < %s | FileCheck -check-prefixes=GCN,GFX7 %s
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx803 < %s | FileCheck -check-prefixes=GCN,GFX8 %s
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 < %s | FileCheck -check-prefixes=GCN,GFX9 %s
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 < %s | FileCheck -check-prefixes=GCN,GFX9,GFX900 %s
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx950 < %s | FileCheck -check-prefixes=GCN,GFX9,GFX950 %s
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1030 < %s | FileCheck -check-prefixes=GCN,GFX10 %s
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1100 < %s | FileCheck -check-prefixes=GCN,GFX11 %s
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1200 < %s | FileCheck -check-prefixes=GCN,GFX12 %s
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx703 < %s | FileCheck -check-prefixes=GFX7 %s
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx803 < %s | FileCheck -check-prefixes=GFX8 %s
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 < %s | FileCheck -check-prefixes=GFX9,GFX900 %s
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx950 < %s | FileCheck -check-prefixes=GFX9,GFX950 %s
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1030 < %s | FileCheck -check-prefixes=GFX10 %s
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1100 < %s | FileCheck -check-prefixes=GFX11 %s
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1170 < %s | FileCheck -check-prefixes=GFX1170 %s
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1200 < %s | FileCheck -check-prefixes=GFX12 %s
define double @v_minimum_f64(double %src0, double %src1) {
; GFX7-LABEL: v_minimum_f64:
@ -69,6 +69,12 @@ define double @v_minimum_f64(double %src0, double %src1) {
; GFX11-NEXT: v_cndmask_b32_e64 v1, v5, 0x7ff80000, vcc_lo
; GFX11-NEXT: s_setpc_b64 s[30:31]
;
; GFX1170-LABEL: v_minimum_f64:
; GFX1170: ; %bb.0:
; GFX1170-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1170-NEXT: v_minimum_f64 v[0:1], v[0:1], v[2:3]
; GFX1170-NEXT: s_setpc_b64 s[30:31]
;
; GFX12-LABEL: v_minimum_f64:
; GFX12: ; %bb.0:
; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
@ -113,6 +119,12 @@ define double @v_minimum_f64__nnan(double %src0, double %src1) {
; GFX11-NEXT: v_min_f64 v[0:1], v[0:1], v[2:3]
; GFX11-NEXT: s_setpc_b64 s[30:31]
;
; GFX1170-LABEL: v_minimum_f64__nnan:
; GFX1170: ; %bb.0:
; GFX1170-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1170-NEXT: v_minimum_f64 v[0:1], v[0:1], v[2:3]
; GFX1170-NEXT: s_setpc_b64 s[30:31]
;
; GFX12-LABEL: v_minimum_f64__nnan:
; GFX12: ; %bb.0:
; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
@ -187,6 +199,12 @@ define double @v_minimum_f64__nsz(double %src0, double %src1) {
; GFX11-NEXT: v_cndmask_b32_e64 v1, v5, 0x7ff80000, vcc_lo
; GFX11-NEXT: s_setpc_b64 s[30:31]
;
; GFX1170-LABEL: v_minimum_f64__nsz:
; GFX1170: ; %bb.0:
; GFX1170-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1170-NEXT: v_minimum_f64 v[0:1], v[0:1], v[2:3]
; GFX1170-NEXT: s_setpc_b64 s[30:31]
;
; GFX12-LABEL: v_minimum_f64__nsz:
; GFX12: ; %bb.0:
; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
@ -231,6 +249,12 @@ define double @v_minimum_f64__nnan_nsz(double %src0, double %src1) {
; GFX11-NEXT: v_min_f64 v[0:1], v[0:1], v[2:3]
; GFX11-NEXT: s_setpc_b64 s[30:31]
;
; GFX1170-LABEL: v_minimum_f64__nnan_nsz:
; GFX1170: ; %bb.0:
; GFX1170-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1170-NEXT: v_minimum_f64 v[0:1], v[0:1], v[2:3]
; GFX1170-NEXT: s_setpc_b64 s[30:31]
;
; GFX12-LABEL: v_minimum_f64__nnan_nsz:
; GFX12: ; %bb.0:
; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
@ -312,6 +336,14 @@ define double @v_minimum_f64__nnan_src0(double %arg0, double %src1) {
; GFX11-NEXT: v_cndmask_b32_e64 v1, v5, 0x7ff80000, vcc_lo
; GFX11-NEXT: s_setpc_b64 s[30:31]
;
; GFX1170-LABEL: v_minimum_f64__nnan_src0:
; GFX1170: ; %bb.0:
; GFX1170-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1170-NEXT: v_add_f64 v[0:1], v[0:1], 1.0
; GFX1170-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX1170-NEXT: v_minimum_f64 v[0:1], v[0:1], v[2:3]
; GFX1170-NEXT: s_setpc_b64 s[30:31]
;
; GFX12-LABEL: v_minimum_f64__nnan_src0:
; GFX12: ; %bb.0:
; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
@ -396,6 +428,14 @@ define double @v_minimum_f64__nnan_src1(double %src0, double %arg1) {
; GFX11-NEXT: v_cndmask_b32_e64 v1, v5, 0x7ff80000, vcc_lo
; GFX11-NEXT: s_setpc_b64 s[30:31]
;
; GFX1170-LABEL: v_minimum_f64__nnan_src1:
; GFX1170: ; %bb.0:
; GFX1170-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1170-NEXT: v_add_f64 v[2:3], v[2:3], 1.0
; GFX1170-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX1170-NEXT: v_minimum_f64 v[0:1], v[0:1], v[2:3]
; GFX1170-NEXT: s_setpc_b64 s[30:31]
;
; GFX12-LABEL: v_minimum_f64__nnan_src1:
; GFX12: ; %bb.0:
; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
@ -498,6 +538,15 @@ define void @s_minimum_f64(double inreg %src0, double inreg %src1) {
; GFX11-NEXT: ;;#ASMEND
; GFX11-NEXT: s_setpc_b64 s[30:31]
;
; GFX1170-LABEL: s_minimum_f64:
; GFX1170: ; %bb.0:
; GFX1170-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1170-NEXT: v_minimum_f64 v[0:1], s[0:1], s[2:3]
; GFX1170-NEXT: ;;#ASMSTART
; GFX1170-NEXT: ; use v[0:1]
; GFX1170-NEXT: ;;#ASMEND
; GFX1170-NEXT: s_setpc_b64 s[30:31]
;
; GFX12-LABEL: s_minimum_f64:
; GFX12: ; %bb.0:
; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
@ -601,6 +650,13 @@ define <2 x double> @v_minimum_v2f64(<2 x double> %src0, <2 x double> %src1) {
; GFX11-NEXT: v_cndmask_b32_e64 v3, v5, 0x7ff80000, s0
; GFX11-NEXT: s_setpc_b64 s[30:31]
;
; GFX1170-LABEL: v_minimum_v2f64:
; GFX1170: ; %bb.0:
; GFX1170-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1170-NEXT: v_minimum_f64 v[0:1], v[0:1], v[4:5]
; GFX1170-NEXT: v_minimum_f64 v[2:3], v[2:3], v[6:7]
; GFX1170-NEXT: s_setpc_b64 s[30:31]
;
; GFX12-LABEL: v_minimum_v2f64:
; GFX12: ; %bb.0:
; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
@ -651,6 +707,13 @@ define <2 x double> @v_minimum_v2f64__nnan(<2 x double> %src0, <2 x double> %src
; GFX11-NEXT: v_min_f64 v[2:3], v[2:3], v[6:7]
; GFX11-NEXT: s_setpc_b64 s[30:31]
;
; GFX1170-LABEL: v_minimum_v2f64__nnan:
; GFX1170: ; %bb.0:
; GFX1170-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1170-NEXT: v_minimum_f64 v[0:1], v[0:1], v[4:5]
; GFX1170-NEXT: v_minimum_f64 v[2:3], v[2:3], v[6:7]
; GFX1170-NEXT: s_setpc_b64 s[30:31]
;
; GFX12-LABEL: v_minimum_v2f64__nnan:
; GFX12: ; %bb.0:
; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
@ -751,6 +814,13 @@ define <2 x double> @v_minimum_v2f64__nsz(<2 x double> %src0, <2 x double> %src1
; GFX11-NEXT: v_cndmask_b32_e64 v3, v5, 0x7ff80000, s0
; GFX11-NEXT: s_setpc_b64 s[30:31]
;
; GFX1170-LABEL: v_minimum_v2f64__nsz:
; GFX1170: ; %bb.0:
; GFX1170-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1170-NEXT: v_minimum_f64 v[0:1], v[0:1], v[4:5]
; GFX1170-NEXT: v_minimum_f64 v[2:3], v[2:3], v[6:7]
; GFX1170-NEXT: s_setpc_b64 s[30:31]
;
; GFX12-LABEL: v_minimum_v2f64__nsz:
; GFX12: ; %bb.0:
; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
@ -801,6 +871,13 @@ define <2 x double> @v_minimum_v2f64__nnan_nsz(<2 x double> %src0, <2 x double>
; GFX11-NEXT: v_min_f64 v[2:3], v[2:3], v[6:7]
; GFX11-NEXT: s_setpc_b64 s[30:31]
;
; GFX1170-LABEL: v_minimum_v2f64__nnan_nsz:
; GFX1170: ; %bb.0:
; GFX1170-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1170-NEXT: v_minimum_f64 v[0:1], v[0:1], v[4:5]
; GFX1170-NEXT: v_minimum_f64 v[2:3], v[2:3], v[6:7]
; GFX1170-NEXT: s_setpc_b64 s[30:31]
;
; GFX12-LABEL: v_minimum_v2f64__nnan_nsz:
; GFX12: ; %bb.0:
; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
@ -932,6 +1009,16 @@ define void @s_minimum_v2f64(<2 x double> inreg %src0, <2 x double> inreg %src1)
; GFX11-NEXT: ;;#ASMEND
; GFX11-NEXT: s_setpc_b64 s[30:31]
;
; GFX1170-LABEL: s_minimum_v2f64:
; GFX1170: ; %bb.0:
; GFX1170-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1170-NEXT: v_minimum_f64 v[2:3], s[2:3], s[18:19]
; GFX1170-NEXT: v_minimum_f64 v[0:1], s[0:1], s[16:17]
; GFX1170-NEXT: ;;#ASMSTART
; GFX1170-NEXT: ; use v[0:3]
; GFX1170-NEXT: ;;#ASMEND
; GFX1170-NEXT: s_setpc_b64 s[30:31]
;
; GFX12-LABEL: s_minimum_v2f64:
; GFX12: ; %bb.0:
; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
@ -1060,6 +1147,14 @@ define <3 x double> @v_minimum_v3f64(<3 x double> %src0, <3 x double> %src1) {
; GFX11-NEXT: v_cndmask_b32_e64 v5, v9, 0x7ff80000, s1
; GFX11-NEXT: s_setpc_b64 s[30:31]
;
; GFX1170-LABEL: v_minimum_v3f64:
; GFX1170: ; %bb.0:
; GFX1170-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1170-NEXT: v_minimum_f64 v[0:1], v[0:1], v[6:7]
; GFX1170-NEXT: v_minimum_f64 v[2:3], v[2:3], v[8:9]
; GFX1170-NEXT: v_minimum_f64 v[4:5], v[4:5], v[10:11]
; GFX1170-NEXT: s_setpc_b64 s[30:31]
;
; GFX12-LABEL: v_minimum_v3f64:
; GFX12: ; %bb.0:
; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
@ -1116,6 +1211,14 @@ define <3 x double> @v_minimum_v3f64__nnan(<3 x double> %src0, <3 x double> %src
; GFX11-NEXT: v_min_f64 v[4:5], v[4:5], v[10:11]
; GFX11-NEXT: s_setpc_b64 s[30:31]
;
; GFX1170-LABEL: v_minimum_v3f64__nnan:
; GFX1170: ; %bb.0:
; GFX1170-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1170-NEXT: v_minimum_f64 v[0:1], v[0:1], v[6:7]
; GFX1170-NEXT: v_minimum_f64 v[2:3], v[2:3], v[8:9]
; GFX1170-NEXT: v_minimum_f64 v[4:5], v[4:5], v[10:11]
; GFX1170-NEXT: s_setpc_b64 s[30:31]
;
; GFX12-LABEL: v_minimum_v3f64__nnan:
; GFX12: ; %bb.0:
; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
@ -1241,6 +1344,14 @@ define <3 x double> @v_minimum_v3f64__nsz(<3 x double> %src0, <3 x double> %src1
; GFX11-NEXT: v_cndmask_b32_e64 v5, v9, 0x7ff80000, s1
; GFX11-NEXT: s_setpc_b64 s[30:31]
;
; GFX1170-LABEL: v_minimum_v3f64__nsz:
; GFX1170: ; %bb.0:
; GFX1170-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1170-NEXT: v_minimum_f64 v[0:1], v[0:1], v[6:7]
; GFX1170-NEXT: v_minimum_f64 v[2:3], v[2:3], v[8:9]
; GFX1170-NEXT: v_minimum_f64 v[4:5], v[4:5], v[10:11]
; GFX1170-NEXT: s_setpc_b64 s[30:31]
;
; GFX12-LABEL: v_minimum_v3f64__nsz:
; GFX12: ; %bb.0:
; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
@ -1297,6 +1408,14 @@ define <3 x double> @v_minimum_v3f64__nnan_nsz(<3 x double> %src0, <3 x double>
; GFX11-NEXT: v_min_f64 v[4:5], v[4:5], v[10:11]
; GFX11-NEXT: s_setpc_b64 s[30:31]
;
; GFX1170-LABEL: v_minimum_v3f64__nnan_nsz:
; GFX1170: ; %bb.0:
; GFX1170-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1170-NEXT: v_minimum_f64 v[0:1], v[0:1], v[6:7]
; GFX1170-NEXT: v_minimum_f64 v[2:3], v[2:3], v[8:9]
; GFX1170-NEXT: v_minimum_f64 v[4:5], v[4:5], v[10:11]
; GFX1170-NEXT: s_setpc_b64 s[30:31]
;
; GFX12-LABEL: v_minimum_v3f64__nnan_nsz:
; GFX12: ; %bb.0:
; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
@ -1447,6 +1566,15 @@ define <4 x double> @v_minimum_v4f64(<4 x double> %src0, <4 x double> %src1) {
; GFX11-NEXT: v_cndmask_b32_e64 v7, v13, 0x7ff80000, s2
; GFX11-NEXT: s_setpc_b64 s[30:31]
;
; GFX1170-LABEL: v_minimum_v4f64:
; GFX1170: ; %bb.0:
; GFX1170-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1170-NEXT: v_minimum_f64 v[0:1], v[0:1], v[8:9]
; GFX1170-NEXT: v_minimum_f64 v[2:3], v[2:3], v[10:11]
; GFX1170-NEXT: v_minimum_f64 v[4:5], v[4:5], v[12:13]
; GFX1170-NEXT: v_minimum_f64 v[6:7], v[6:7], v[14:15]
; GFX1170-NEXT: s_setpc_b64 s[30:31]
;
; GFX12-LABEL: v_minimum_v4f64:
; GFX12: ; %bb.0:
; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
@ -1509,6 +1637,15 @@ define <4 x double> @v_minimum_v4f64__nnan(<4 x double> %src0, <4 x double> %src
; GFX11-NEXT: v_min_f64 v[6:7], v[6:7], v[14:15]
; GFX11-NEXT: s_setpc_b64 s[30:31]
;
; GFX1170-LABEL: v_minimum_v4f64__nnan:
; GFX1170: ; %bb.0:
; GFX1170-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1170-NEXT: v_minimum_f64 v[0:1], v[0:1], v[8:9]
; GFX1170-NEXT: v_minimum_f64 v[2:3], v[2:3], v[10:11]
; GFX1170-NEXT: v_minimum_f64 v[4:5], v[4:5], v[12:13]
; GFX1170-NEXT: v_minimum_f64 v[6:7], v[6:7], v[14:15]
; GFX1170-NEXT: s_setpc_b64 s[30:31]
;
; GFX12-LABEL: v_minimum_v4f64__nnan:
; GFX12: ; %bb.0:
; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
@ -1660,6 +1797,15 @@ define <4 x double> @v_minimum_v4f64__nsz(<4 x double> %src0, <4 x double> %src1
; GFX11-NEXT: v_cndmask_b32_e64 v7, v13, 0x7ff80000, s2
; GFX11-NEXT: s_setpc_b64 s[30:31]
;
; GFX1170-LABEL: v_minimum_v4f64__nsz:
; GFX1170: ; %bb.0:
; GFX1170-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1170-NEXT: v_minimum_f64 v[0:1], v[0:1], v[8:9]
; GFX1170-NEXT: v_minimum_f64 v[2:3], v[2:3], v[10:11]
; GFX1170-NEXT: v_minimum_f64 v[4:5], v[4:5], v[12:13]
; GFX1170-NEXT: v_minimum_f64 v[6:7], v[6:7], v[14:15]
; GFX1170-NEXT: s_setpc_b64 s[30:31]
;
; GFX12-LABEL: v_minimum_v4f64__nsz:
; GFX12: ; %bb.0:
; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
@ -1722,6 +1868,15 @@ define <4 x double> @v_minimum_v4f64__nnan_nsz(<4 x double> %src0, <4 x double>
; GFX11-NEXT: v_min_f64 v[6:7], v[6:7], v[14:15]
; GFX11-NEXT: s_setpc_b64 s[30:31]
;
; GFX1170-LABEL: v_minimum_v4f64__nnan_nsz:
; GFX1170: ; %bb.0:
; GFX1170-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1170-NEXT: v_minimum_f64 v[0:1], v[0:1], v[8:9]
; GFX1170-NEXT: v_minimum_f64 v[2:3], v[2:3], v[10:11]
; GFX1170-NEXT: v_minimum_f64 v[4:5], v[4:5], v[12:13]
; GFX1170-NEXT: v_minimum_f64 v[6:7], v[6:7], v[14:15]
; GFX1170-NEXT: s_setpc_b64 s[30:31]
;
; GFX12-LABEL: v_minimum_v4f64__nnan_nsz:
; GFX12: ; %bb.0:
; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
@ -1982,6 +2137,21 @@ define <8 x double> @v_minimum_v8f64(<8 x double> %src0, <8 x double> %src1) {
; GFX11-NEXT: v_cndmask_b32_e64 v15, v29, 0x7ff80000, s6
; GFX11-NEXT: s_setpc_b64 s[30:31]
;
; GFX1170-LABEL: v_minimum_v8f64:
; GFX1170: ; %bb.0:
; GFX1170-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1170-NEXT: scratch_load_b32 v31, off, s32
; GFX1170-NEXT: v_minimum_f64 v[0:1], v[0:1], v[16:17]
; GFX1170-NEXT: v_minimum_f64 v[2:3], v[2:3], v[18:19]
; GFX1170-NEXT: v_minimum_f64 v[4:5], v[4:5], v[20:21]
; GFX1170-NEXT: v_minimum_f64 v[6:7], v[6:7], v[22:23]
; GFX1170-NEXT: v_minimum_f64 v[8:9], v[8:9], v[24:25]
; GFX1170-NEXT: v_minimum_f64 v[10:11], v[10:11], v[26:27]
; GFX1170-NEXT: v_minimum_f64 v[12:13], v[12:13], v[28:29]
; GFX1170-NEXT: s_waitcnt vmcnt(0)
; GFX1170-NEXT: v_minimum_f64 v[14:15], v[14:15], v[30:31]
; GFX1170-NEXT: s_setpc_b64 s[30:31]
;
; GFX12-LABEL: v_minimum_v8f64:
; GFX12: ; %bb.0:
; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
@ -2757,6 +2927,79 @@ define <16 x double> @v_minimum_v16f64(<16 x double> %src0, <16 x double> %src1)
; GFX11-NEXT: v_cndmask_b32_e64 v31, v85, 0x7ff80000, s14
; GFX11-NEXT: s_setpc_b64 s[30:31]
;
; GFX1170-LABEL: v_minimum_v16f64:
; GFX1170: ; %bb.0:
; GFX1170-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1170-NEXT: s_clause 0x1b
; GFX1170-NEXT: scratch_load_b32 v33, off, s32 offset:8
; GFX1170-NEXT: scratch_load_b32 v32, off, s32 offset:4
; GFX1170-NEXT: scratch_load_b32 v35, off, s32 offset:16
; GFX1170-NEXT: scratch_load_b32 v34, off, s32 offset:12
; GFX1170-NEXT: scratch_load_b32 v31, off, s32
; GFX1170-NEXT: scratch_load_b32 v37, off, s32 offset:120
; GFX1170-NEXT: scratch_load_b32 v39, off, s32 offset:104
; GFX1170-NEXT: scratch_load_b32 v49, off, s32 offset:24
; GFX1170-NEXT: scratch_load_b32 v48, off, s32 offset:20
; GFX1170-NEXT: scratch_load_b32 v51, off, s32 offset:32
; GFX1170-NEXT: scratch_load_b32 v50, off, s32 offset:28
; GFX1170-NEXT: scratch_load_b32 v53, off, s32 offset:40
; GFX1170-NEXT: scratch_load_b32 v52, off, s32 offset:36
; GFX1170-NEXT: scratch_load_b32 v55, off, s32 offset:48
; GFX1170-NEXT: scratch_load_b32 v54, off, s32 offset:44
; GFX1170-NEXT: scratch_load_b32 v65, off, s32 offset:56
; GFX1170-NEXT: scratch_load_b32 v64, off, s32 offset:52
; GFX1170-NEXT: scratch_load_b32 v67, off, s32 offset:64
; GFX1170-NEXT: scratch_load_b32 v66, off, s32 offset:60
; GFX1170-NEXT: scratch_load_b32 v69, off, s32 offset:72
; GFX1170-NEXT: scratch_load_b32 v68, off, s32 offset:68
; GFX1170-NEXT: scratch_load_b32 v71, off, s32 offset:80
; GFX1170-NEXT: scratch_load_b32 v70, off, s32 offset:76
; GFX1170-NEXT: scratch_load_b32 v81, off, s32 offset:88
; GFX1170-NEXT: scratch_load_b32 v80, off, s32 offset:84
; GFX1170-NEXT: scratch_load_b32 v83, off, s32 offset:96
; GFX1170-NEXT: scratch_load_b32 v82, off, s32 offset:92
; GFX1170-NEXT: scratch_load_b32 v38, off, s32 offset:100
; GFX1170-NEXT: s_waitcnt vmcnt(26)
; GFX1170-NEXT: v_minimum_f64 v[0:1], v[0:1], v[32:33]
; GFX1170-NEXT: s_clause 0x2
; GFX1170-NEXT: scratch_load_b32 v33, off, s32 offset:112
; GFX1170-NEXT: scratch_load_b32 v32, off, s32 offset:108
; GFX1170-NEXT: scratch_load_b32 v36, off, s32 offset:116
; GFX1170-NEXT: s_waitcnt vmcnt(27)
; GFX1170-NEXT: v_minimum_f64 v[2:3], v[2:3], v[34:35]
; GFX1170-NEXT: s_clause 0x1
; GFX1170-NEXT: scratch_load_b32 v35, off, s32 offset:128
; GFX1170-NEXT: scratch_load_b32 v34, off, s32 offset:124
; GFX1170-NEXT: s_waitcnt vmcnt(24)
; GFX1170-NEXT: v_minimum_f64 v[4:5], v[4:5], v[48:49]
; GFX1170-NEXT: s_waitcnt vmcnt(22)
; GFX1170-NEXT: v_minimum_f64 v[6:7], v[6:7], v[50:51]
; GFX1170-NEXT: s_waitcnt vmcnt(20)
; GFX1170-NEXT: v_minimum_f64 v[8:9], v[8:9], v[52:53]
; GFX1170-NEXT: s_waitcnt vmcnt(18)
; GFX1170-NEXT: v_minimum_f64 v[10:11], v[10:11], v[54:55]
; GFX1170-NEXT: s_waitcnt vmcnt(16)
; GFX1170-NEXT: v_minimum_f64 v[12:13], v[12:13], v[64:65]
; GFX1170-NEXT: s_waitcnt vmcnt(14)
; GFX1170-NEXT: v_minimum_f64 v[14:15], v[14:15], v[66:67]
; GFX1170-NEXT: s_waitcnt vmcnt(12)
; GFX1170-NEXT: v_minimum_f64 v[16:17], v[16:17], v[68:69]
; GFX1170-NEXT: s_waitcnt vmcnt(10)
; GFX1170-NEXT: v_minimum_f64 v[18:19], v[18:19], v[70:71]
; GFX1170-NEXT: s_waitcnt vmcnt(8)
; GFX1170-NEXT: v_minimum_f64 v[20:21], v[20:21], v[80:81]
; GFX1170-NEXT: s_waitcnt vmcnt(6)
; GFX1170-NEXT: v_minimum_f64 v[22:23], v[22:23], v[82:83]
; GFX1170-NEXT: s_waitcnt vmcnt(5)
; GFX1170-NEXT: v_minimum_f64 v[24:25], v[24:25], v[38:39]
; GFX1170-NEXT: s_waitcnt vmcnt(3)
; GFX1170-NEXT: v_minimum_f64 v[26:27], v[26:27], v[32:33]
; GFX1170-NEXT: s_waitcnt vmcnt(2)
; GFX1170-NEXT: v_minimum_f64 v[28:29], v[28:29], v[36:37]
; GFX1170-NEXT: s_waitcnt vmcnt(0)
; GFX1170-NEXT: v_minimum_f64 v[30:31], v[30:31], v[34:35]
; GFX1170-NEXT: s_setpc_b64 s[30:31]
;
; GFX12-LABEL: v_minimum_v16f64:
; GFX12: ; %bb.0:
; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
@ -2834,5 +3077,3 @@ define <16 x double> @v_minimum_v16f64(<16 x double> %src0, <16 x double> %src1)
%op = call <16 x double> @llvm.minimum.v16f64(<16 x double> %src0, <16 x double> %src1)
ret <16 x double> %op
}
;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
; GCN: {{.*}}

File diff suppressed because it is too large Load Diff

View File

@ -1,42 +1,67 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc -mtriple=amdgcn -mcpu=gfx1200 -mattr=+real-true16 < %s | FileCheck -check-prefixes=GFX12,SDAG,SDAG-TRUE16 %s
; RUN: llc -mtriple=amdgcn -mcpu=gfx1200 -mattr=-real-true16 < %s | FileCheck -check-prefixes=GFX12,SDAG,SDAG-FAKE16 %s
; RUN: llc -global-isel -new-reg-bank-select -mtriple=amdgcn -mcpu=gfx1200 -mattr=+real-true16 < %s | FileCheck -check-prefixes=GFX12,GISEL,GISEL-TRUE16 %s
; RUN: llc -global-isel -new-reg-bank-select -mtriple=amdgcn -mcpu=gfx1200 -mattr=-real-true16 < %s | FileCheck -check-prefixes=GFX12,GISEL,GISEL-FAKE16 %s
; RUN: llc -mtriple=amdgcn -mcpu=gfx1170 -mattr=+real-true16 < %s | FileCheck -check-prefixes=GCN,TRUE16,GFX1170-SDAG,GFX1170-SDAG-TRUE16 %s
; RUN: llc -mtriple=amdgcn -mcpu=gfx1170 -mattr=-real-true16 < %s | FileCheck -check-prefixes=GCN,FAKE16,GFX1170-SDAG,GFX1170-SDAG-FAKE16 %s
; RUN: llc -global-isel=1 -new-reg-bank-select -mtriple=amdgcn -mcpu=gfx1170 -mattr=+real-true16 < %s | FileCheck -check-prefixes=GCN,TRUE16,GFX1170-GISEL,GFX1170-GISEL-TRUE16 %s
; RUN: llc -global-isel=1 -new-reg-bank-select -mtriple=amdgcn -mcpu=gfx1170 -mattr=-real-true16 < %s | FileCheck -check-prefixes=GCN,FAKE16,GFX1170-GISEL,GFX1170-GISEL-FAKE16 %s
; RUN: llc -mtriple=amdgcn -mcpu=gfx1200 -mattr=+real-true16 < %s | FileCheck -check-prefixes=GCN,GFX12-SDAG,GFX12-SDAG-TRUE16 %s
; RUN: llc -mtriple=amdgcn -mcpu=gfx1200 -mattr=-real-true16 < %s | FileCheck -check-prefixes=GCN,GFX12-SDAG,GFX12-SDAG-FAKE16 %s
; RUN: llc -global-isel -new-reg-bank-select -mtriple=amdgcn -mcpu=gfx1200 -mattr=+real-true16 < %s | FileCheck -check-prefixes=GCN,GFX12-GISEL,GFX12-GISEL-TRUE16 %s
; RUN: llc -global-isel -new-reg-bank-select -mtriple=amdgcn -mcpu=gfx1200 -mattr=-real-true16 < %s | FileCheck -check-prefixes=GCN,GFX12-GISEL,GFX12-GISEL-FAKE16 %s
define amdgpu_ps float @test_minmax_f32(float %a, float %b, float %c) {
; GFX12-LABEL: test_minmax_f32:
; GFX12: ; %bb.0:
; GFX12-NEXT: v_maximumminimum_f32 v0, v0, v1, v2
; GFX12-NEXT: ; return to shader part epilog
; GCN-LABEL: test_minmax_f32:
; GCN: ; %bb.0:
; GCN-NEXT: v_maximumminimum_f32 v0, v0, v1, v2
; GCN-NEXT: ; return to shader part epilog
%max = call float @llvm.maximum.f32(float %a, float %b)
%minmax = call float @llvm.minimum.f32(float %max, float %c)
ret float %minmax
}
define amdgpu_ps void @s_test_minmax_f32(float inreg %a, float inreg %b, float inreg %c, ptr addrspace(1) inreg %out) {
; SDAG-LABEL: s_test_minmax_f32:
; SDAG: ; %bb.0:
; SDAG-NEXT: s_maximum_f32 s0, s0, s1
; SDAG-NEXT: s_mov_b32 s5, s4
; SDAG-NEXT: s_mov_b32 s4, s3
; SDAG-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_3)
; SDAG-NEXT: s_minimum_f32 s0, s0, s2
; SDAG-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, s0
; SDAG-NEXT: global_store_b32 v0, v1, s[4:5]
; SDAG-NEXT: s_endpgm
; GFX1170-SDAG-LABEL: s_test_minmax_f32:
; GFX1170-SDAG: ; %bb.0:
; GFX1170-SDAG-NEXT: v_maximum_f32 v0, s0, s1
; GFX1170-SDAG-NEXT: v_mov_b32_e32 v1, 0
; GFX1170-SDAG-NEXT: s_mov_b32 s5, s4
; GFX1170-SDAG-NEXT: s_mov_b32 s4, s3
; GFX1170-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_2)
; GFX1170-SDAG-NEXT: v_minimum_f32 v0, v0, s2
; GFX1170-SDAG-NEXT: global_store_b32 v1, v0, s[4:5]
; GFX1170-SDAG-NEXT: s_endpgm
;
; GISEL-LABEL: s_test_minmax_f32:
; GISEL: ; %bb.0:
; GISEL-NEXT: s_maximum_f32 s0, s0, s1
; GISEL-NEXT: s_mov_b32 s6, s3
; GISEL-NEXT: s_mov_b32 s7, s4
; GISEL-NEXT: v_mov_b32_e32 v1, 0
; GISEL-NEXT: s_minimum_f32 s0, s0, s2
; GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_3)
; GISEL-NEXT: v_mov_b32_e32 v0, s0
; GISEL-NEXT: global_store_b32 v1, v0, s[6:7]
; GISEL-NEXT: s_endpgm
; GFX1170-GISEL-LABEL: s_test_minmax_f32:
; GFX1170-GISEL: ; %bb.0:
; GFX1170-GISEL-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, 0
; GFX1170-GISEL-NEXT: s_mov_b32 s6, s3
; GFX1170-GISEL-NEXT: s_mov_b32 s7, s4
; GFX1170-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX1170-GISEL-NEXT: v_maximumminimum_f32 v0, s0, s1, v0
; GFX1170-GISEL-NEXT: global_store_b32 v1, v0, s[6:7]
; GFX1170-GISEL-NEXT: s_endpgm
;
; GFX12-SDAG-LABEL: s_test_minmax_f32:
; GFX12-SDAG: ; %bb.0:
; GFX12-SDAG-NEXT: s_maximum_f32 s0, s0, s1
; GFX12-SDAG-NEXT: s_mov_b32 s5, s4
; GFX12-SDAG-NEXT: s_mov_b32 s4, s3
; GFX12-SDAG-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_3)
; GFX12-SDAG-NEXT: s_minimum_f32 s0, s0, s2
; GFX12-SDAG-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, s0
; GFX12-SDAG-NEXT: global_store_b32 v0, v1, s[4:5]
; GFX12-SDAG-NEXT: s_endpgm
;
; GFX12-GISEL-LABEL: s_test_minmax_f32:
; GFX12-GISEL: ; %bb.0:
; GFX12-GISEL-NEXT: s_maximum_f32 s0, s0, s1
; GFX12-GISEL-NEXT: s_mov_b32 s6, s3
; GFX12-GISEL-NEXT: s_mov_b32 s7, s4
; GFX12-GISEL-NEXT: v_mov_b32_e32 v1, 0
; GFX12-GISEL-NEXT: s_minimum_f32 s0, s0, s2
; GFX12-GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_3)
; GFX12-GISEL-NEXT: v_mov_b32_e32 v0, s0
; GFX12-GISEL-NEXT: global_store_b32 v1, v0, s[6:7]
; GFX12-GISEL-NEXT: s_endpgm
%smax = call float @llvm.maximum.f32(float %a, float %b)
%sminmax = call float @llvm.minimum.f32(float %smax, float %c)
store float %sminmax, ptr addrspace(1) %out
@ -44,157 +69,230 @@ define amdgpu_ps void @s_test_minmax_f32(float inreg %a, float inreg %b, float i
}
define amdgpu_ps float @test_minmax_commuted_f32(float %a, float %b, float %c) {
; GFX12-LABEL: test_minmax_commuted_f32:
; GFX12: ; %bb.0:
; GFX12-NEXT: v_maximumminimum_f32 v0, v0, v1, v2
; GFX12-NEXT: ; return to shader part epilog
; GCN-LABEL: test_minmax_commuted_f32:
; GCN: ; %bb.0:
; GCN-NEXT: v_maximumminimum_f32 v0, v0, v1, v2
; GCN-NEXT: ; return to shader part epilog
%max = call float @llvm.maximum.f32(float %a, float %b)
%minmax = call float @llvm.minimum.f32(float %c, float %max)
ret float %minmax
}
define amdgpu_ps float @test_maxmin_f32(float %a, float %b, float %c) {
; GFX12-LABEL: test_maxmin_f32:
; GFX12: ; %bb.0:
; GFX12-NEXT: v_minimummaximum_f32 v0, v0, v1, v2
; GFX12-NEXT: ; return to shader part epilog
; GCN-LABEL: test_maxmin_f32:
; GCN: ; %bb.0:
; GCN-NEXT: v_minimummaximum_f32 v0, v0, v1, v2
; GCN-NEXT: ; return to shader part epilog
%min = call float @llvm.minimum.f32(float %a, float %b)
%maxmin = call float @llvm.maximum.f32(float %min, float %c)
ret float %maxmin
}
define amdgpu_ps float @test_maxmin_commuted_f32(float %a, float %b, float %c) {
; GFX12-LABEL: test_maxmin_commuted_f32:
; GFX12: ; %bb.0:
; GFX12-NEXT: v_minimummaximum_f32 v0, v0, v1, v2
; GFX12-NEXT: ; return to shader part epilog
; GCN-LABEL: test_maxmin_commuted_f32:
; GCN: ; %bb.0:
; GCN-NEXT: v_minimummaximum_f32 v0, v0, v1, v2
; GCN-NEXT: ; return to shader part epilog
%min = call float @llvm.minimum.f32(float %a, float %b)
%maxmin = call float @llvm.maximum.f32(float %c, float %min)
ret float %maxmin
}
define amdgpu_ps half @test_minmax_f16(half %a, half %b, half %c) {
; SDAG-TRUE16-LABEL: test_minmax_f16:
; SDAG-TRUE16: ; %bb.0:
; SDAG-TRUE16-NEXT: v_maximumminimum_f16 v0.l, v0.l, v1.l, v2.l
; SDAG-TRUE16-NEXT: ; return to shader part epilog
; TRUE16-LABEL: test_minmax_f16:
; TRUE16: ; %bb.0:
; TRUE16-NEXT: v_maximumminimum_f16 v0.l, v0.l, v1.l, v2.l
; TRUE16-NEXT: ; return to shader part epilog
;
; SDAG-FAKE16-LABEL: test_minmax_f16:
; SDAG-FAKE16: ; %bb.0:
; SDAG-FAKE16-NEXT: v_maximumminimum_f16 v0, v0, v1, v2
; SDAG-FAKE16-NEXT: ; return to shader part epilog
; FAKE16-LABEL: test_minmax_f16:
; FAKE16: ; %bb.0:
; FAKE16-NEXT: v_maximumminimum_f16 v0, v0, v1, v2
; FAKE16-NEXT: ; return to shader part epilog
;
; GISEL-TRUE16-LABEL: test_minmax_f16:
; GISEL-TRUE16: ; %bb.0:
; GISEL-TRUE16-NEXT: v_maximumminimum_f16 v0.l, v0.l, v1.l, v2.l
; GISEL-TRUE16-NEXT: ; return to shader part epilog
; GFX12-SDAG-TRUE16-LABEL: test_minmax_f16:
; GFX12-SDAG-TRUE16: ; %bb.0:
; GFX12-SDAG-TRUE16-NEXT: v_maximumminimum_f16 v0.l, v0.l, v1.l, v2.l
; GFX12-SDAG-TRUE16-NEXT: ; return to shader part epilog
;
; GISEL-FAKE16-LABEL: test_minmax_f16:
; GISEL-FAKE16: ; %bb.0:
; GISEL-FAKE16-NEXT: v_maximumminimum_f16 v0, v0, v1, v2
; GISEL-FAKE16-NEXT: ; return to shader part epilog
; GFX12-SDAG-FAKE16-LABEL: test_minmax_f16:
; GFX12-SDAG-FAKE16: ; %bb.0:
; GFX12-SDAG-FAKE16-NEXT: v_maximumminimum_f16 v0, v0, v1, v2
; GFX12-SDAG-FAKE16-NEXT: ; return to shader part epilog
;
; GFX12-GISEL-TRUE16-LABEL: test_minmax_f16:
; GFX12-GISEL-TRUE16: ; %bb.0:
; GFX12-GISEL-TRUE16-NEXT: v_maximumminimum_f16 v0.l, v0.l, v1.l, v2.l
; GFX12-GISEL-TRUE16-NEXT: ; return to shader part epilog
;
; GFX12-GISEL-FAKE16-LABEL: test_minmax_f16:
; GFX12-GISEL-FAKE16: ; %bb.0:
; GFX12-GISEL-FAKE16-NEXT: v_maximumminimum_f16 v0, v0, v1, v2
; GFX12-GISEL-FAKE16-NEXT: ; return to shader part epilog
%max = call half @llvm.maximum.f16(half %a, half %b)
%minmax = call half @llvm.minimum.f16(half %max, half %c)
ret half %minmax
}
define amdgpu_ps half @test_minmax_commuted_f16(half %a, half %b, half %c) {
; SDAG-TRUE16-LABEL: test_minmax_commuted_f16:
; SDAG-TRUE16: ; %bb.0:
; SDAG-TRUE16-NEXT: v_maximumminimum_f16 v0.l, v0.l, v1.l, v2.l
; SDAG-TRUE16-NEXT: ; return to shader part epilog
; TRUE16-LABEL: test_minmax_commuted_f16:
; TRUE16: ; %bb.0:
; TRUE16-NEXT: v_maximumminimum_f16 v0.l, v0.l, v1.l, v2.l
; TRUE16-NEXT: ; return to shader part epilog
;
; SDAG-FAKE16-LABEL: test_minmax_commuted_f16:
; SDAG-FAKE16: ; %bb.0:
; SDAG-FAKE16-NEXT: v_maximumminimum_f16 v0, v0, v1, v2
; SDAG-FAKE16-NEXT: ; return to shader part epilog
; FAKE16-LABEL: test_minmax_commuted_f16:
; FAKE16: ; %bb.0:
; FAKE16-NEXT: v_maximumminimum_f16 v0, v0, v1, v2
; FAKE16-NEXT: ; return to shader part epilog
;
; GISEL-TRUE16-LABEL: test_minmax_commuted_f16:
; GISEL-TRUE16: ; %bb.0:
; GISEL-TRUE16-NEXT: v_maximumminimum_f16 v0.l, v0.l, v1.l, v2.l
; GISEL-TRUE16-NEXT: ; return to shader part epilog
; GFX12-SDAG-TRUE16-LABEL: test_minmax_commuted_f16:
; GFX12-SDAG-TRUE16: ; %bb.0:
; GFX12-SDAG-TRUE16-NEXT: v_maximumminimum_f16 v0.l, v0.l, v1.l, v2.l
; GFX12-SDAG-TRUE16-NEXT: ; return to shader part epilog
;
; GISEL-FAKE16-LABEL: test_minmax_commuted_f16:
; GISEL-FAKE16: ; %bb.0:
; GISEL-FAKE16-NEXT: v_maximumminimum_f16 v0, v0, v1, v2
; GISEL-FAKE16-NEXT: ; return to shader part epilog
; GFX12-SDAG-FAKE16-LABEL: test_minmax_commuted_f16:
; GFX12-SDAG-FAKE16: ; %bb.0:
; GFX12-SDAG-FAKE16-NEXT: v_maximumminimum_f16 v0, v0, v1, v2
; GFX12-SDAG-FAKE16-NEXT: ; return to shader part epilog
;
; GFX12-GISEL-TRUE16-LABEL: test_minmax_commuted_f16:
; GFX12-GISEL-TRUE16: ; %bb.0:
; GFX12-GISEL-TRUE16-NEXT: v_maximumminimum_f16 v0.l, v0.l, v1.l, v2.l
; GFX12-GISEL-TRUE16-NEXT: ; return to shader part epilog
;
; GFX12-GISEL-FAKE16-LABEL: test_minmax_commuted_f16:
; GFX12-GISEL-FAKE16: ; %bb.0:
; GFX12-GISEL-FAKE16-NEXT: v_maximumminimum_f16 v0, v0, v1, v2
; GFX12-GISEL-FAKE16-NEXT: ; return to shader part epilog
%max = call half @llvm.maximum.f16(half %a, half %b)
%minmax = call half @llvm.minimum.f16(half %c, half %max)
ret half %minmax
}
define amdgpu_ps half @test_maxmin_commuted_f16(half %a, half %b, half %c) {
; SDAG-TRUE16-LABEL: test_maxmin_commuted_f16:
; SDAG-TRUE16: ; %bb.0:
; SDAG-TRUE16-NEXT: v_minimummaximum_f16 v0.l, v0.l, v1.l, v2.l
; SDAG-TRUE16-NEXT: ; return to shader part epilog
; TRUE16-LABEL: test_maxmin_commuted_f16:
; TRUE16: ; %bb.0:
; TRUE16-NEXT: v_minimummaximum_f16 v0.l, v0.l, v1.l, v2.l
; TRUE16-NEXT: ; return to shader part epilog
;
; SDAG-FAKE16-LABEL: test_maxmin_commuted_f16:
; SDAG-FAKE16: ; %bb.0:
; SDAG-FAKE16-NEXT: v_minimummaximum_f16 v0, v0, v1, v2
; SDAG-FAKE16-NEXT: ; return to shader part epilog
; FAKE16-LABEL: test_maxmin_commuted_f16:
; FAKE16: ; %bb.0:
; FAKE16-NEXT: v_minimummaximum_f16 v0, v0, v1, v2
; FAKE16-NEXT: ; return to shader part epilog
;
; GISEL-TRUE16-LABEL: test_maxmin_commuted_f16:
; GISEL-TRUE16: ; %bb.0:
; GISEL-TRUE16-NEXT: v_minimummaximum_f16 v0.l, v0.l, v1.l, v2.l
; GISEL-TRUE16-NEXT: ; return to shader part epilog
; GFX12-SDAG-TRUE16-LABEL: test_maxmin_commuted_f16:
; GFX12-SDAG-TRUE16: ; %bb.0:
; GFX12-SDAG-TRUE16-NEXT: v_minimummaximum_f16 v0.l, v0.l, v1.l, v2.l
; GFX12-SDAG-TRUE16-NEXT: ; return to shader part epilog
;
; GISEL-FAKE16-LABEL: test_maxmin_commuted_f16:
; GISEL-FAKE16: ; %bb.0:
; GISEL-FAKE16-NEXT: v_minimummaximum_f16 v0, v0, v1, v2
; GISEL-FAKE16-NEXT: ; return to shader part epilog
; GFX12-SDAG-FAKE16-LABEL: test_maxmin_commuted_f16:
; GFX12-SDAG-FAKE16: ; %bb.0:
; GFX12-SDAG-FAKE16-NEXT: v_minimummaximum_f16 v0, v0, v1, v2
; GFX12-SDAG-FAKE16-NEXT: ; return to shader part epilog
;
; GFX12-GISEL-TRUE16-LABEL: test_maxmin_commuted_f16:
; GFX12-GISEL-TRUE16: ; %bb.0:
; GFX12-GISEL-TRUE16-NEXT: v_minimummaximum_f16 v0.l, v0.l, v1.l, v2.l
; GFX12-GISEL-TRUE16-NEXT: ; return to shader part epilog
;
; GFX12-GISEL-FAKE16-LABEL: test_maxmin_commuted_f16:
; GFX12-GISEL-FAKE16: ; %bb.0:
; GFX12-GISEL-FAKE16-NEXT: v_minimummaximum_f16 v0, v0, v1, v2
; GFX12-GISEL-FAKE16-NEXT: ; return to shader part epilog
%min = call half @llvm.minimum.f16(half %a, half %b)
%maxmin = call half @llvm.maximum.f16(half %c, half %min)
ret half %maxmin
}
define amdgpu_ps void @s_test_minmax_f16(half inreg %a, half inreg %b, half inreg %c, ptr addrspace(1) inreg %out) {
; SDAG-TRUE16-LABEL: s_test_minmax_f16:
; SDAG-TRUE16: ; %bb.0:
; SDAG-TRUE16-NEXT: s_maximum_f16 s0, s0, s1
; SDAG-TRUE16-NEXT: v_mov_b32_e32 v1, 0
; SDAG-TRUE16-NEXT: s_mov_b32 s5, s4
; SDAG-TRUE16-NEXT: s_mov_b32 s4, s3
; SDAG-TRUE16-NEXT: s_minimum_f16 s0, s0, s2
; SDAG-TRUE16-NEXT: s_delay_alu instid0(SALU_CYCLE_3)
; SDAG-TRUE16-NEXT: v_mov_b16_e32 v0.l, s0
; SDAG-TRUE16-NEXT: global_store_b16 v1, v0, s[4:5]
; SDAG-TRUE16-NEXT: s_endpgm
; GFX1170-SDAG-TRUE16-LABEL: s_test_minmax_f16:
; GFX1170-SDAG-TRUE16: ; %bb.0:
; GFX1170-SDAG-TRUE16-NEXT: v_maximum_f16 v0.l, s0, s1
; GFX1170-SDAG-TRUE16-NEXT: v_mov_b32_e32 v1, 0
; GFX1170-SDAG-TRUE16-NEXT: s_mov_b32 s5, s4
; GFX1170-SDAG-TRUE16-NEXT: s_mov_b32 s4, s3
; GFX1170-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2)
; GFX1170-SDAG-TRUE16-NEXT: v_minimum_f16 v0.l, v0.l, s2
; GFX1170-SDAG-TRUE16-NEXT: global_store_b16 v1, v0, s[4:5]
; GFX1170-SDAG-TRUE16-NEXT: s_endpgm
;
; SDAG-FAKE16-LABEL: s_test_minmax_f16:
; SDAG-FAKE16: ; %bb.0:
; SDAG-FAKE16-NEXT: s_maximum_f16 s0, s0, s1
; SDAG-FAKE16-NEXT: s_mov_b32 s5, s4
; SDAG-FAKE16-NEXT: s_mov_b32 s4, s3
; SDAG-FAKE16-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_3)
; SDAG-FAKE16-NEXT: s_minimum_f16 s0, s0, s2
; SDAG-FAKE16-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, s0
; SDAG-FAKE16-NEXT: global_store_b16 v0, v1, s[4:5]
; SDAG-FAKE16-NEXT: s_endpgm
; GFX1170-SDAG-FAKE16-LABEL: s_test_minmax_f16:
; GFX1170-SDAG-FAKE16: ; %bb.0:
; GFX1170-SDAG-FAKE16-NEXT: v_maximum_f16 v0, s0, s1
; GFX1170-SDAG-FAKE16-NEXT: v_mov_b32_e32 v1, 0
; GFX1170-SDAG-FAKE16-NEXT: s_mov_b32 s5, s4
; GFX1170-SDAG-FAKE16-NEXT: s_mov_b32 s4, s3
; GFX1170-SDAG-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2)
; GFX1170-SDAG-FAKE16-NEXT: v_minimum_f16 v0, v0, s2
; GFX1170-SDAG-FAKE16-NEXT: global_store_b16 v1, v0, s[4:5]
; GFX1170-SDAG-FAKE16-NEXT: s_endpgm
;
; GISEL-TRUE16-LABEL: s_test_minmax_f16:
; GISEL-TRUE16: ; %bb.0:
; GISEL-TRUE16-NEXT: s_maximum_f16 s0, s0, s1
; GISEL-TRUE16-NEXT: v_mov_b32_e32 v1, 0
; GISEL-TRUE16-NEXT: s_mov_b32 s6, s3
; GISEL-TRUE16-NEXT: s_mov_b32 s7, s4
; GISEL-TRUE16-NEXT: s_minimum_f16 s0, s0, s2
; GISEL-TRUE16-NEXT: s_delay_alu instid0(SALU_CYCLE_3)
; GISEL-TRUE16-NEXT: v_mov_b16_e32 v0.l, s0
; GISEL-TRUE16-NEXT: global_store_b16 v1, v0, s[6:7]
; GISEL-TRUE16-NEXT: s_endpgm
; GFX1170-GISEL-TRUE16-LABEL: s_test_minmax_f16:
; GFX1170-GISEL-TRUE16: ; %bb.0:
; GFX1170-GISEL-TRUE16-NEXT: v_mov_b16_e32 v0.l, s2
; GFX1170-GISEL-TRUE16-NEXT: v_mov_b32_e32 v1, 0
; GFX1170-GISEL-TRUE16-NEXT: s_mov_b32 s6, s3
; GFX1170-GISEL-TRUE16-NEXT: s_mov_b32 s7, s4
; GFX1170-GISEL-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2)
; GFX1170-GISEL-TRUE16-NEXT: v_maximumminimum_f16 v0.l, s0, s1, v0.l
; GFX1170-GISEL-TRUE16-NEXT: global_store_b16 v1, v0, s[6:7]
; GFX1170-GISEL-TRUE16-NEXT: s_endpgm
;
; GISEL-FAKE16-LABEL: s_test_minmax_f16:
; GISEL-FAKE16: ; %bb.0:
; GISEL-FAKE16-NEXT: s_maximum_f16 s0, s0, s1
; GISEL-FAKE16-NEXT: s_mov_b32 s6, s3
; GISEL-FAKE16-NEXT: s_mov_b32 s7, s4
; GISEL-FAKE16-NEXT: v_mov_b32_e32 v1, 0
; GISEL-FAKE16-NEXT: s_minimum_f16 s0, s0, s2
; GISEL-FAKE16-NEXT: s_delay_alu instid0(SALU_CYCLE_3)
; GISEL-FAKE16-NEXT: v_mov_b32_e32 v0, s0
; GISEL-FAKE16-NEXT: global_store_b16 v1, v0, s[6:7]
; GISEL-FAKE16-NEXT: s_endpgm
; GFX1170-GISEL-FAKE16-LABEL: s_test_minmax_f16:
; GFX1170-GISEL-FAKE16: ; %bb.0:
; GFX1170-GISEL-FAKE16-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, 0
; GFX1170-GISEL-FAKE16-NEXT: s_mov_b32 s6, s3
; GFX1170-GISEL-FAKE16-NEXT: s_mov_b32 s7, s4
; GFX1170-GISEL-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX1170-GISEL-FAKE16-NEXT: v_maximumminimum_f16 v0, s0, s1, v0
; GFX1170-GISEL-FAKE16-NEXT: global_store_b16 v1, v0, s[6:7]
; GFX1170-GISEL-FAKE16-NEXT: s_endpgm
;
; GFX12-SDAG-TRUE16-LABEL: s_test_minmax_f16:
; GFX12-SDAG-TRUE16: ; %bb.0:
; GFX12-SDAG-TRUE16-NEXT: s_maximum_f16 s0, s0, s1
; GFX12-SDAG-TRUE16-NEXT: v_mov_b32_e32 v1, 0
; GFX12-SDAG-TRUE16-NEXT: s_mov_b32 s5, s4
; GFX12-SDAG-TRUE16-NEXT: s_mov_b32 s4, s3
; GFX12-SDAG-TRUE16-NEXT: s_minimum_f16 s0, s0, s2
; GFX12-SDAG-TRUE16-NEXT: s_delay_alu instid0(SALU_CYCLE_3)
; GFX12-SDAG-TRUE16-NEXT: v_mov_b16_e32 v0.l, s0
; GFX12-SDAG-TRUE16-NEXT: global_store_b16 v1, v0, s[4:5]
; GFX12-SDAG-TRUE16-NEXT: s_endpgm
;
; GFX12-SDAG-FAKE16-LABEL: s_test_minmax_f16:
; GFX12-SDAG-FAKE16: ; %bb.0:
; GFX12-SDAG-FAKE16-NEXT: s_maximum_f16 s0, s0, s1
; GFX12-SDAG-FAKE16-NEXT: s_mov_b32 s5, s4
; GFX12-SDAG-FAKE16-NEXT: s_mov_b32 s4, s3
; GFX12-SDAG-FAKE16-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_3)
; GFX12-SDAG-FAKE16-NEXT: s_minimum_f16 s0, s0, s2
; GFX12-SDAG-FAKE16-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, s0
; GFX12-SDAG-FAKE16-NEXT: global_store_b16 v0, v1, s[4:5]
; GFX12-SDAG-FAKE16-NEXT: s_endpgm
;
; GFX12-GISEL-TRUE16-LABEL: s_test_minmax_f16:
; GFX12-GISEL-TRUE16: ; %bb.0:
; GFX12-GISEL-TRUE16-NEXT: s_maximum_f16 s0, s0, s1
; GFX12-GISEL-TRUE16-NEXT: v_mov_b32_e32 v1, 0
; GFX12-GISEL-TRUE16-NEXT: s_mov_b32 s6, s3
; GFX12-GISEL-TRUE16-NEXT: s_mov_b32 s7, s4
; GFX12-GISEL-TRUE16-NEXT: s_minimum_f16 s0, s0, s2
; GFX12-GISEL-TRUE16-NEXT: s_delay_alu instid0(SALU_CYCLE_3)
; GFX12-GISEL-TRUE16-NEXT: v_mov_b16_e32 v0.l, s0
; GFX12-GISEL-TRUE16-NEXT: global_store_b16 v1, v0, s[6:7]
; GFX12-GISEL-TRUE16-NEXT: s_endpgm
;
; GFX12-GISEL-FAKE16-LABEL: s_test_minmax_f16:
; GFX12-GISEL-FAKE16: ; %bb.0:
; GFX12-GISEL-FAKE16-NEXT: s_maximum_f16 s0, s0, s1
; GFX12-GISEL-FAKE16-NEXT: s_mov_b32 s6, s3
; GFX12-GISEL-FAKE16-NEXT: s_mov_b32 s7, s4
; GFX12-GISEL-FAKE16-NEXT: v_mov_b32_e32 v1, 0
; GFX12-GISEL-FAKE16-NEXT: s_minimum_f16 s0, s0, s2
; GFX12-GISEL-FAKE16-NEXT: s_delay_alu instid0(SALU_CYCLE_3)
; GFX12-GISEL-FAKE16-NEXT: v_mov_b32_e32 v0, s0
; GFX12-GISEL-FAKE16-NEXT: global_store_b16 v1, v0, s[6:7]
; GFX12-GISEL-FAKE16-NEXT: s_endpgm
%smax = call half @llvm.maximum.f16(half %a, half %b)
%sminmax = call half @llvm.minimum.f16(half %smax, half %c)
store half %sminmax, ptr addrspace(1) %out

File diff suppressed because it is too large Load Diff

View File

@ -3,6 +3,10 @@
; RUN: llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx1100 -mattr=-real-true16 -amdgpu-enable-delay-alu=0 < %s | FileCheck -check-prefixes=GFX11,SDAG,SDAG-GFX11,SDAG-GFX11-FAKE16 %s
; RUN: llc -global-isel=1 -new-reg-bank-select -mtriple=amdgcn -mcpu=gfx1100 -mattr=+real-true16 -amdgpu-enable-delay-alu=0 < %s | FileCheck -check-prefixes=GFX11,GISEL,GISEL-GFX11,GISEL-GFX11-TRUE16 %s
; RUN: llc -global-isel=1 -new-reg-bank-select -mtriple=amdgcn -mcpu=gfx1100 -mattr=-real-true16 -amdgpu-enable-delay-alu=0 < %s | FileCheck -check-prefixes=GFX11,GISEL,GISEL-GFX11,GISEL-GFX11-FAKE16 %s
; RUN: llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx1170 -mattr=+real-true16 -amdgpu-enable-delay-alu=0 < %s | FileCheck -check-prefixes=GFX1170,SDAG,SDAG-GFX1170,SDAG-GFX1170-TRUE16 %s
; RUN: llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx1170 -mattr=-real-true16 -amdgpu-enable-delay-alu=0 < %s | FileCheck -check-prefixes=GFX1170,SDAG,SDAG-GFX1170,SDAG-GFX1170-FAKE16 %s
; RUN: llc -global-isel=1 -new-reg-bank-select -mtriple=amdgcn -mcpu=gfx1170 -mattr=+real-true16 -amdgpu-enable-delay-alu=0 < %s | FileCheck -check-prefixes=GFX1170,GISEL,GISEL-GFX1170,GISEL-GFX1170-TRUE16 %s
; RUN: llc -global-isel=1 -new-reg-bank-select -mtriple=amdgcn -mcpu=gfx1170 -mattr=-real-true16 -amdgpu-enable-delay-alu=0 < %s | FileCheck -check-prefixes=GFX1170,GISEL,GISEL-GFX1170,GISEL-GFX1170-FAKE16 %s
; RUN: llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx1200 -mattr=+real-true16 -amdgpu-enable-delay-alu=0 < %s | FileCheck -check-prefixes=GFX12,SDAG,SDAG-GFX12,SDAG-GFX12-TRUE16 %s
; RUN: llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx1200 -mattr=-real-true16 -amdgpu-enable-delay-alu=0 < %s | FileCheck -check-prefixes=GFX12,SDAG,SDAG-GFX12,SDAG-GFX12-FAKE16 %s
; RUN: llc -global-isel=1 -new-reg-bank-select -mtriple=amdgcn -mcpu=gfx1200 -mattr=+real-true16 -amdgpu-enable-delay-alu=0 < %s | FileCheck -check-prefixes=GFX12,GISEL,GISEL-GFX12,GISEL-GFX12-TRUE16 %s
@ -19,6 +23,12 @@ define i32 @test_minmax_i32(i32 %a, i32 %b, i32 %c) {
; GFX11-NEXT: v_maxmin_i32 v0, v0, v1, v2
; GFX11-NEXT: s_setpc_b64 s[30:31]
;
; GFX1170-LABEL: test_minmax_i32:
; GFX1170: ; %bb.0:
; GFX1170-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1170-NEXT: v_maxmin_i32 v0, v0, v1, v2
; GFX1170-NEXT: s_setpc_b64 s[30:31]
;
; GFX12-LABEL: test_minmax_i32:
; GFX12: ; %bb.0:
; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
@ -61,6 +71,26 @@ define amdgpu_ps void @s_test_minmax_i32(i32 inreg %a, i32 inreg %b, i32 inreg %
; GISEL-GFX11-NEXT: global_store_b32 v1, v0, s[6:7]
; GISEL-GFX11-NEXT: s_endpgm
;
; SDAG-GFX1170-LABEL: s_test_minmax_i32:
; SDAG-GFX1170: ; %bb.0:
; SDAG-GFX1170-NEXT: s_max_i32 s0, s0, s1
; SDAG-GFX1170-NEXT: s_mov_b32 s5, s4
; SDAG-GFX1170-NEXT: s_min_i32 s0, s0, s2
; SDAG-GFX1170-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, s0
; SDAG-GFX1170-NEXT: s_mov_b32 s4, s3
; SDAG-GFX1170-NEXT: global_store_b32 v0, v1, s[4:5]
; SDAG-GFX1170-NEXT: s_endpgm
;
; GISEL-GFX1170-LABEL: s_test_minmax_i32:
; GISEL-GFX1170: ; %bb.0:
; GISEL-GFX1170-NEXT: s_max_i32 s0, s0, s1
; GISEL-GFX1170-NEXT: s_mov_b32 s6, s3
; GISEL-GFX1170-NEXT: s_min_i32 s0, s0, s2
; GISEL-GFX1170-NEXT: s_mov_b32 s7, s4
; GISEL-GFX1170-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, 0
; GISEL-GFX1170-NEXT: global_store_b32 v1, v0, s[6:7]
; GISEL-GFX1170-NEXT: s_endpgm
;
; SDAG-GFX12-LABEL: s_test_minmax_i32:
; SDAG-GFX12: ; %bb.0:
; SDAG-GFX12-NEXT: s_max_i32 s0, s0, s1
@ -115,6 +145,12 @@ define i32 @test_minmax_commuted_i32(i32 %a, i32 %b, i32 %c) {
; GFX11-NEXT: v_maxmin_i32 v0, v0, v1, v2
; GFX11-NEXT: s_setpc_b64 s[30:31]
;
; GFX1170-LABEL: test_minmax_commuted_i32:
; GFX1170: ; %bb.0:
; GFX1170-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1170-NEXT: v_maxmin_i32 v0, v0, v1, v2
; GFX1170-NEXT: s_setpc_b64 s[30:31]
;
; GFX12-LABEL: test_minmax_commuted_i32:
; GFX12: ; %bb.0:
; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
@ -143,6 +179,12 @@ define i32 @test_maxmin_i32(i32 %a, i32 %b, i32 %c) {
; GFX11-NEXT: v_minmax_i32 v0, v0, v1, v2
; GFX11-NEXT: s_setpc_b64 s[30:31]
;
; GFX1170-LABEL: test_maxmin_i32:
; GFX1170: ; %bb.0:
; GFX1170-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1170-NEXT: v_minmax_i32 v0, v0, v1, v2
; GFX1170-NEXT: s_setpc_b64 s[30:31]
;
; GFX12-LABEL: test_maxmin_i32:
; GFX12: ; %bb.0:
; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
@ -171,6 +213,12 @@ define i32 @test_maxmin_commuted_i32(i32 %a, i32 %b, i32 %c) {
; GFX11-NEXT: v_minmax_i32 v0, v0, v1, v2
; GFX11-NEXT: s_setpc_b64 s[30:31]
;
; GFX1170-LABEL: test_maxmin_commuted_i32:
; GFX1170: ; %bb.0:
; GFX1170-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1170-NEXT: v_minmax_i32 v0, v0, v1, v2
; GFX1170-NEXT: s_setpc_b64 s[30:31]
;
; GFX12-LABEL: test_maxmin_commuted_i32:
; GFX12: ; %bb.0:
; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
@ -200,6 +248,13 @@ define void @test_smed3_i32(ptr addrspace(1) %arg, i32 %x, i32 %y, i32 %z) {
; GFX11-NEXT: global_store_b32 v[0:1], v2, off
; GFX11-NEXT: s_setpc_b64 s[30:31]
;
; GFX1170-LABEL: test_smed3_i32:
; GFX1170: ; %bb.0:
; GFX1170-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1170-NEXT: v_med3_i32 v2, v2, v3, v4
; GFX1170-NEXT: global_store_b32 v[0:1], v2, off
; GFX1170-NEXT: s_setpc_b64 s[30:31]
;
; GFX12-LABEL: test_smed3_i32:
; GFX12: ; %bb.0:
; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
@ -233,6 +288,12 @@ define i32 @test_minmax_u32(i32 %a, i32 %b, i32 %c) {
; GFX11-NEXT: v_maxmin_u32 v0, v0, v1, v2
; GFX11-NEXT: s_setpc_b64 s[30:31]
;
; GFX1170-LABEL: test_minmax_u32:
; GFX1170: ; %bb.0:
; GFX1170-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1170-NEXT: v_maxmin_u32 v0, v0, v1, v2
; GFX1170-NEXT: s_setpc_b64 s[30:31]
;
; GFX12-LABEL: test_minmax_u32:
; GFX12: ; %bb.0:
; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
@ -275,6 +336,26 @@ define amdgpu_ps void @s_test_minmax_u32(i32 inreg %a, i32 inreg %b, i32 inreg %
; GISEL-GFX11-NEXT: global_store_b32 v1, v0, s[6:7]
; GISEL-GFX11-NEXT: s_endpgm
;
; SDAG-GFX1170-LABEL: s_test_minmax_u32:
; SDAG-GFX1170: ; %bb.0:
; SDAG-GFX1170-NEXT: s_max_u32 s0, s0, s1
; SDAG-GFX1170-NEXT: s_mov_b32 s5, s4
; SDAG-GFX1170-NEXT: s_min_u32 s0, s0, s2
; SDAG-GFX1170-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, s0
; SDAG-GFX1170-NEXT: s_mov_b32 s4, s3
; SDAG-GFX1170-NEXT: global_store_b32 v0, v1, s[4:5]
; SDAG-GFX1170-NEXT: s_endpgm
;
; GISEL-GFX1170-LABEL: s_test_minmax_u32:
; GISEL-GFX1170: ; %bb.0:
; GISEL-GFX1170-NEXT: s_max_u32 s0, s0, s1
; GISEL-GFX1170-NEXT: s_mov_b32 s6, s3
; GISEL-GFX1170-NEXT: s_min_u32 s0, s0, s2
; GISEL-GFX1170-NEXT: s_mov_b32 s7, s4
; GISEL-GFX1170-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, 0
; GISEL-GFX1170-NEXT: global_store_b32 v1, v0, s[6:7]
; GISEL-GFX1170-NEXT: s_endpgm
;
; SDAG-GFX12-LABEL: s_test_minmax_u32:
; SDAG-GFX12: ; %bb.0:
; SDAG-GFX12-NEXT: s_max_u32 s0, s0, s1
@ -329,6 +410,12 @@ define i32 @test_minmax_commuted_u32(i32 %a, i32 %b, i32 %c) {
; GFX11-NEXT: v_maxmin_u32 v0, v0, v1, v2
; GFX11-NEXT: s_setpc_b64 s[30:31]
;
; GFX1170-LABEL: test_minmax_commuted_u32:
; GFX1170: ; %bb.0:
; GFX1170-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1170-NEXT: v_maxmin_u32 v0, v0, v1, v2
; GFX1170-NEXT: s_setpc_b64 s[30:31]
;
; GFX12-LABEL: test_minmax_commuted_u32:
; GFX12: ; %bb.0:
; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
@ -357,6 +444,12 @@ define i32 @test_maxmin_u32(i32 %a, i32 %b, i32 %c) {
; GFX11-NEXT: v_minmax_u32 v0, v0, v1, v2
; GFX11-NEXT: s_setpc_b64 s[30:31]
;
; GFX1170-LABEL: test_maxmin_u32:
; GFX1170: ; %bb.0:
; GFX1170-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1170-NEXT: v_minmax_u32 v0, v0, v1, v2
; GFX1170-NEXT: s_setpc_b64 s[30:31]
;
; GFX12-LABEL: test_maxmin_u32:
; GFX12: ; %bb.0:
; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
@ -385,6 +478,12 @@ define i32 @test_maxmin_commuted_u32(i32 %a, i32 %b, i32 %c) {
; GFX11-NEXT: v_minmax_u32 v0, v0, v1, v2
; GFX11-NEXT: s_setpc_b64 s[30:31]
;
; GFX1170-LABEL: test_maxmin_commuted_u32:
; GFX1170: ; %bb.0:
; GFX1170-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1170-NEXT: v_minmax_u32 v0, v0, v1, v2
; GFX1170-NEXT: s_setpc_b64 s[30:31]
;
; GFX12-LABEL: test_maxmin_commuted_u32:
; GFX12: ; %bb.0:
; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
@ -414,6 +513,13 @@ define void @test_umed3_i32(ptr addrspace(1) %arg, i32 %x, i32 %y, i32 %z) {
; GFX11-NEXT: global_store_b32 v[0:1], v2, off
; GFX11-NEXT: s_setpc_b64 s[30:31]
;
; GFX1170-LABEL: test_umed3_i32:
; GFX1170: ; %bb.0:
; GFX1170-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1170-NEXT: v_med3_u32 v2, v2, v3, v4
; GFX1170-NEXT: global_store_b32 v[0:1], v2, off
; GFX1170-NEXT: s_setpc_b64 s[30:31]
;
; GFX12-LABEL: test_umed3_i32:
; GFX12: ; %bb.0:
; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
@ -457,6 +563,22 @@ define float @test_minmax_f32_ieee_true(float %a, float %b, float %c) {
; GISEL-GFX11-NEXT: v_maxmin_f32 v0, v0, v1, v2
; GISEL-GFX11-NEXT: s_setpc_b64 s[30:31]
;
; SDAG-GFX1170-LABEL: test_minmax_f32_ieee_true:
; SDAG-GFX1170: ; %bb.0:
; SDAG-GFX1170-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; SDAG-GFX1170-NEXT: v_dual_max_num_f32 v1, v1, v1 :: v_dual_max_num_f32 v0, v0, v0
; SDAG-GFX1170-NEXT: v_max_num_f32_e32 v2, v2, v2
; SDAG-GFX1170-NEXT: v_maxmin_num_f32 v0, v0, v1, v2
; SDAG-GFX1170-NEXT: s_setpc_b64 s[30:31]
;
; GISEL-GFX1170-LABEL: test_minmax_f32_ieee_true:
; GISEL-GFX1170: ; %bb.0:
; GISEL-GFX1170-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GISEL-GFX1170-NEXT: v_dual_max_num_f32 v0, v0, v0 :: v_dual_max_num_f32 v1, v1, v1
; GISEL-GFX1170-NEXT: v_max_num_f32_e32 v2, v2, v2
; GISEL-GFX1170-NEXT: v_maxmin_num_f32 v0, v0, v1, v2
; GISEL-GFX1170-NEXT: s_setpc_b64 s[30:31]
;
; SDAG-GFX12-LABEL: test_minmax_f32_ieee_true:
; SDAG-GFX12: ; %bb.0:
; SDAG-GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
@ -522,6 +644,26 @@ define amdgpu_ps void @s_test_minmax_f32_ieee_false(float inreg %a, float inreg
; GISEL-GFX11-NEXT: global_store_b32 v1, v0, s[6:7]
; GISEL-GFX11-NEXT: s_endpgm
;
; SDAG-GFX1170-LABEL: s_test_minmax_f32_ieee_false:
; SDAG-GFX1170: ; %bb.0:
; SDAG-GFX1170-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, 0
; SDAG-GFX1170-NEXT: s_mov_b32 s5, s4
; SDAG-GFX1170-NEXT: s_mov_b32 s4, s3
; SDAG-GFX1170-NEXT: v_maxmin_num_f32 v0, s0, s1, v0
; SDAG-GFX1170-NEXT: global_store_b32 v1, v0, s[4:5]
; SDAG-GFX1170-NEXT: s_endpgm
;
; GISEL-GFX1170-LABEL: s_test_minmax_f32_ieee_false:
; GISEL-GFX1170: ; %bb.0:
; GISEL-GFX1170-NEXT: s_max_f32 s0, s0, s1
; GISEL-GFX1170-NEXT: s_mov_b32 s6, s3
; GISEL-GFX1170-NEXT: s_mov_b32 s7, s4
; GISEL-GFX1170-NEXT: v_mov_b32_e32 v1, 0
; GISEL-GFX1170-NEXT: s_min_f32 s0, s0, s2
; GISEL-GFX1170-NEXT: v_mov_b32_e32 v0, s0
; GISEL-GFX1170-NEXT: global_store_b32 v1, v0, s[6:7]
; GISEL-GFX1170-NEXT: s_endpgm
;
; SDAG-GFX12-LABEL: s_test_minmax_f32_ieee_false:
; SDAG-GFX12: ; %bb.0:
; SDAG-GFX12-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, 0
@ -575,6 +717,11 @@ define amdgpu_ps float @test_minmax_commuted_f32_ieee_false(float %a, float %b,
; GFX11-NEXT: v_maxmin_f32 v0, v0, v1, v2
; GFX11-NEXT: ; return to shader part epilog
;
; GFX1170-LABEL: test_minmax_commuted_f32_ieee_false:
; GFX1170: ; %bb.0:
; GFX1170-NEXT: v_maxmin_num_f32 v0, v0, v1, v2
; GFX1170-NEXT: ; return to shader part epilog
;
; GFX12-LABEL: test_minmax_commuted_f32_ieee_false:
; GFX12: ; %bb.0:
; GFX12-NEXT: v_maxmin_num_f32 v0, v0, v1, v2
@ -607,6 +754,22 @@ define float @test_maxmin_f32_ieee_true(float %a, float %b, float %c) {
; GISEL-GFX11-NEXT: v_minmax_f32 v0, v0, v1, v2
; GISEL-GFX11-NEXT: s_setpc_b64 s[30:31]
;
; SDAG-GFX1170-LABEL: test_maxmin_f32_ieee_true:
; SDAG-GFX1170: ; %bb.0:
; SDAG-GFX1170-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; SDAG-GFX1170-NEXT: v_dual_max_num_f32 v1, v1, v1 :: v_dual_max_num_f32 v0, v0, v0
; SDAG-GFX1170-NEXT: v_max_num_f32_e32 v2, v2, v2
; SDAG-GFX1170-NEXT: v_minmax_num_f32 v0, v0, v1, v2
; SDAG-GFX1170-NEXT: s_setpc_b64 s[30:31]
;
; GISEL-GFX1170-LABEL: test_maxmin_f32_ieee_true:
; GISEL-GFX1170: ; %bb.0:
; GISEL-GFX1170-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GISEL-GFX1170-NEXT: v_dual_max_num_f32 v0, v0, v0 :: v_dual_max_num_f32 v1, v1, v1
; GISEL-GFX1170-NEXT: v_max_num_f32_e32 v2, v2, v2
; GISEL-GFX1170-NEXT: v_minmax_num_f32 v0, v0, v1, v2
; GISEL-GFX1170-NEXT: s_setpc_b64 s[30:31]
;
; SDAG-GFX12-LABEL: test_maxmin_f32_ieee_true:
; SDAG-GFX12: ; %bb.0:
; SDAG-GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
@ -659,6 +822,11 @@ define amdgpu_ps float @test_maxmin_commuted_f32_ieee_false(float %a, float %b,
; GFX11-NEXT: v_minmax_f32 v0, v0, v1, v2
; GFX11-NEXT: ; return to shader part epilog
;
; GFX1170-LABEL: test_maxmin_commuted_f32_ieee_false:
; GFX1170: ; %bb.0:
; GFX1170-NEXT: v_minmax_num_f32 v0, v0, v1, v2
; GFX1170-NEXT: ; return to shader part epilog
;
; GFX12-LABEL: test_maxmin_commuted_f32_ieee_false:
; GFX12: ; %bb.0:
; GFX12-NEXT: v_minmax_num_f32 v0, v0, v1, v2
@ -682,6 +850,13 @@ define void @test_med3_f32(ptr addrspace(1) %arg, float %x, float %y, float %z)
; GFX11-NEXT: global_store_b32 v[0:1], v2, off
; GFX11-NEXT: s_setpc_b64 s[30:31]
;
; GFX1170-LABEL: test_med3_f32:
; GFX1170: ; %bb.0:
; GFX1170-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1170-NEXT: v_med3_num_f32 v2, v2, v3, v4
; GFX1170-NEXT: global_store_b32 v[0:1], v2, off
; GFX1170-NEXT: s_setpc_b64 s[30:31]
;
; GFX12-LABEL: test_med3_f32:
; GFX12: ; %bb.0:
; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
@ -716,6 +891,13 @@ define void @test_med3_minimumnum_maximumnum_f32(ptr addrspace(1) %arg, float %x
; GFX11-NEXT: global_store_b32 v[0:1], v2, off
; GFX11-NEXT: s_setpc_b64 s[30:31]
;
; GFX1170-LABEL: test_med3_minimumnum_maximumnum_f32:
; GFX1170: ; %bb.0:
; GFX1170-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1170-NEXT: v_med3_num_f32 v2, v2, v3, v4
; GFX1170-NEXT: global_store_b32 v[0:1], v2, off
; GFX1170-NEXT: s_setpc_b64 s[30:31]
;
; GFX12-LABEL: test_med3_minimumnum_maximumnum_f32:
; GFX12: ; %bb.0:
; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
@ -763,6 +945,26 @@ define amdgpu_ps half @test_minmax_f16_ieee_false(half %a, half %b, half %c) {
; GISEL-GFX11-FAKE16-NEXT: v_maxmin_f16 v0, v0, v1, v2
; GISEL-GFX11-FAKE16-NEXT: ; return to shader part epilog
;
; SDAG-GFX1170-TRUE16-LABEL: test_minmax_f16_ieee_false:
; SDAG-GFX1170-TRUE16: ; %bb.0:
; SDAG-GFX1170-TRUE16-NEXT: v_maxmin_num_f16 v0.l, v0.l, v1.l, v2.l
; SDAG-GFX1170-TRUE16-NEXT: ; return to shader part epilog
;
; SDAG-GFX1170-FAKE16-LABEL: test_minmax_f16_ieee_false:
; SDAG-GFX1170-FAKE16: ; %bb.0:
; SDAG-GFX1170-FAKE16-NEXT: v_maxmin_num_f16 v0, v0, v1, v2
; SDAG-GFX1170-FAKE16-NEXT: ; return to shader part epilog
;
; GISEL-GFX1170-TRUE16-LABEL: test_minmax_f16_ieee_false:
; GISEL-GFX1170-TRUE16: ; %bb.0:
; GISEL-GFX1170-TRUE16-NEXT: v_maxmin_num_f16 v0.l, v0.l, v1.l, v2.l
; GISEL-GFX1170-TRUE16-NEXT: ; return to shader part epilog
;
; GISEL-GFX1170-FAKE16-LABEL: test_minmax_f16_ieee_false:
; GISEL-GFX1170-FAKE16: ; %bb.0:
; GISEL-GFX1170-FAKE16-NEXT: v_maxmin_num_f16 v0, v0, v1, v2
; GISEL-GFX1170-FAKE16-NEXT: ; return to shader part epilog
;
; SDAG-GFX12-TRUE16-LABEL: test_minmax_f16_ieee_false:
; SDAG-GFX12-TRUE16: ; %bb.0:
; SDAG-GFX12-TRUE16-NEXT: v_maxmin_num_f16 v0.l, v0.l, v1.l, v2.l
@ -850,6 +1052,47 @@ define amdgpu_ps void @s_test_minmax_f16_ieee_false(half inreg %a, half inreg %b
; GISEL-GFX11-FAKE16-NEXT: global_store_b16 v1, v0, s[6:7]
; GISEL-GFX11-FAKE16-NEXT: s_endpgm
;
; SDAG-GFX1170-TRUE16-LABEL: s_test_minmax_f16_ieee_false:
; SDAG-GFX1170-TRUE16: ; %bb.0:
; SDAG-GFX1170-TRUE16-NEXT: v_mov_b16_e32 v0.l, s2
; SDAG-GFX1170-TRUE16-NEXT: v_mov_b32_e32 v1, 0
; SDAG-GFX1170-TRUE16-NEXT: s_mov_b32 s5, s4
; SDAG-GFX1170-TRUE16-NEXT: s_mov_b32 s4, s3
; SDAG-GFX1170-TRUE16-NEXT: v_maxmin_num_f16 v0.l, s0, s1, v0.l
; SDAG-GFX1170-TRUE16-NEXT: global_store_b16 v1, v0, s[4:5]
; SDAG-GFX1170-TRUE16-NEXT: s_endpgm
;
; SDAG-GFX1170-FAKE16-LABEL: s_test_minmax_f16_ieee_false:
; SDAG-GFX1170-FAKE16: ; %bb.0:
; SDAG-GFX1170-FAKE16-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, 0
; SDAG-GFX1170-FAKE16-NEXT: s_mov_b32 s5, s4
; SDAG-GFX1170-FAKE16-NEXT: s_mov_b32 s4, s3
; SDAG-GFX1170-FAKE16-NEXT: v_maxmin_num_f16 v0, s0, s1, v0
; SDAG-GFX1170-FAKE16-NEXT: global_store_b16 v1, v0, s[4:5]
; SDAG-GFX1170-FAKE16-NEXT: s_endpgm
;
; GISEL-GFX1170-TRUE16-LABEL: s_test_minmax_f16_ieee_false:
; GISEL-GFX1170-TRUE16: ; %bb.0:
; GISEL-GFX1170-TRUE16-NEXT: s_max_f16 s0, s0, s1
; GISEL-GFX1170-TRUE16-NEXT: v_mov_b32_e32 v1, 0
; GISEL-GFX1170-TRUE16-NEXT: s_mov_b32 s6, s3
; GISEL-GFX1170-TRUE16-NEXT: s_mov_b32 s7, s4
; GISEL-GFX1170-TRUE16-NEXT: s_min_f16 s0, s0, s2
; GISEL-GFX1170-TRUE16-NEXT: v_mov_b16_e32 v0.l, s0
; GISEL-GFX1170-TRUE16-NEXT: global_store_b16 v1, v0, s[6:7]
; GISEL-GFX1170-TRUE16-NEXT: s_endpgm
;
; GISEL-GFX1170-FAKE16-LABEL: s_test_minmax_f16_ieee_false:
; GISEL-GFX1170-FAKE16: ; %bb.0:
; GISEL-GFX1170-FAKE16-NEXT: s_max_f16 s0, s0, s1
; GISEL-GFX1170-FAKE16-NEXT: s_mov_b32 s6, s3
; GISEL-GFX1170-FAKE16-NEXT: s_mov_b32 s7, s4
; GISEL-GFX1170-FAKE16-NEXT: v_mov_b32_e32 v1, 0
; GISEL-GFX1170-FAKE16-NEXT: s_min_f16 s0, s0, s2
; GISEL-GFX1170-FAKE16-NEXT: v_mov_b32_e32 v0, s0
; GISEL-GFX1170-FAKE16-NEXT: global_store_b16 v1, v0, s[6:7]
; GISEL-GFX1170-FAKE16-NEXT: s_endpgm
;
; SDAG-GFX12-TRUE16-LABEL: s_test_minmax_f16_ieee_false:
; SDAG-GFX12-TRUE16: ; %bb.0:
; SDAG-GFX12-TRUE16-NEXT: v_mov_b16_e32 v0.l, s2
@ -978,6 +1221,42 @@ define half @test_minmax_commuted_f16_ieee_true(half %a, half %b, half %c) {
; GISEL-GFX11-FAKE16-NEXT: v_maxmin_f16 v0, v0, v1, v2
; GISEL-GFX11-FAKE16-NEXT: s_setpc_b64 s[30:31]
;
; SDAG-GFX1170-TRUE16-LABEL: test_minmax_commuted_f16_ieee_true:
; SDAG-GFX1170-TRUE16: ; %bb.0:
; SDAG-GFX1170-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; SDAG-GFX1170-TRUE16-NEXT: v_max_num_f16_e32 v0.h, v1.l, v1.l
; SDAG-GFX1170-TRUE16-NEXT: v_max_num_f16_e32 v0.l, v0.l, v0.l
; SDAG-GFX1170-TRUE16-NEXT: v_max_num_f16_e32 v1.l, v2.l, v2.l
; SDAG-GFX1170-TRUE16-NEXT: v_maxmin_num_f16 v0.l, v0.l, v0.h, v1.l
; SDAG-GFX1170-TRUE16-NEXT: s_setpc_b64 s[30:31]
;
; SDAG-GFX1170-FAKE16-LABEL: test_minmax_commuted_f16_ieee_true:
; SDAG-GFX1170-FAKE16: ; %bb.0:
; SDAG-GFX1170-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; SDAG-GFX1170-FAKE16-NEXT: v_max_num_f16_e32 v1, v1, v1
; SDAG-GFX1170-FAKE16-NEXT: v_max_num_f16_e32 v0, v0, v0
; SDAG-GFX1170-FAKE16-NEXT: v_max_num_f16_e32 v2, v2, v2
; SDAG-GFX1170-FAKE16-NEXT: v_maxmin_num_f16 v0, v0, v1, v2
; SDAG-GFX1170-FAKE16-NEXT: s_setpc_b64 s[30:31]
;
; GISEL-GFX1170-TRUE16-LABEL: test_minmax_commuted_f16_ieee_true:
; GISEL-GFX1170-TRUE16: ; %bb.0:
; GISEL-GFX1170-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GISEL-GFX1170-TRUE16-NEXT: v_max_num_f16_e32 v0.l, v0.l, v0.l
; GISEL-GFX1170-TRUE16-NEXT: v_max_num_f16_e32 v0.h, v1.l, v1.l
; GISEL-GFX1170-TRUE16-NEXT: v_max_num_f16_e32 v1.l, v2.l, v2.l
; GISEL-GFX1170-TRUE16-NEXT: v_maxmin_num_f16 v0.l, v0.l, v0.h, v1.l
; GISEL-GFX1170-TRUE16-NEXT: s_setpc_b64 s[30:31]
;
; GISEL-GFX1170-FAKE16-LABEL: test_minmax_commuted_f16_ieee_true:
; GISEL-GFX1170-FAKE16: ; %bb.0:
; GISEL-GFX1170-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GISEL-GFX1170-FAKE16-NEXT: v_max_num_f16_e32 v0, v0, v0
; GISEL-GFX1170-FAKE16-NEXT: v_max_num_f16_e32 v1, v1, v1
; GISEL-GFX1170-FAKE16-NEXT: v_max_num_f16_e32 v2, v2, v2
; GISEL-GFX1170-FAKE16-NEXT: v_maxmin_num_f16 v0, v0, v1, v2
; GISEL-GFX1170-FAKE16-NEXT: s_setpc_b64 s[30:31]
;
; SDAG-GFX12-TRUE16-LABEL: test_minmax_commuted_f16_ieee_true:
; SDAG-GFX12-TRUE16: ; %bb.0:
; SDAG-GFX12-TRUE16-NEXT: s_wait_loadcnt_dscnt 0x0
@ -1095,6 +1374,26 @@ define amdgpu_ps half @test_maxmin_f16_ieee_false(half %a, half %b, half %c) {
; GISEL-GFX11-FAKE16-NEXT: v_minmax_f16 v0, v0, v1, v2
; GISEL-GFX11-FAKE16-NEXT: ; return to shader part epilog
;
; SDAG-GFX1170-TRUE16-LABEL: test_maxmin_f16_ieee_false:
; SDAG-GFX1170-TRUE16: ; %bb.0:
; SDAG-GFX1170-TRUE16-NEXT: v_minmax_num_f16 v0.l, v0.l, v1.l, v2.l
; SDAG-GFX1170-TRUE16-NEXT: ; return to shader part epilog
;
; SDAG-GFX1170-FAKE16-LABEL: test_maxmin_f16_ieee_false:
; SDAG-GFX1170-FAKE16: ; %bb.0:
; SDAG-GFX1170-FAKE16-NEXT: v_minmax_num_f16 v0, v0, v1, v2
; SDAG-GFX1170-FAKE16-NEXT: ; return to shader part epilog
;
; GISEL-GFX1170-TRUE16-LABEL: test_maxmin_f16_ieee_false:
; GISEL-GFX1170-TRUE16: ; %bb.0:
; GISEL-GFX1170-TRUE16-NEXT: v_minmax_num_f16 v0.l, v0.l, v1.l, v2.l
; GISEL-GFX1170-TRUE16-NEXT: ; return to shader part epilog
;
; GISEL-GFX1170-FAKE16-LABEL: test_maxmin_f16_ieee_false:
; GISEL-GFX1170-FAKE16: ; %bb.0:
; GISEL-GFX1170-FAKE16-NEXT: v_minmax_num_f16 v0, v0, v1, v2
; GISEL-GFX1170-FAKE16-NEXT: ; return to shader part epilog
;
; SDAG-GFX12-TRUE16-LABEL: test_maxmin_f16_ieee_false:
; SDAG-GFX12-TRUE16: ; %bb.0:
; SDAG-GFX12-TRUE16-NEXT: v_minmax_num_f16 v0.l, v0.l, v1.l, v2.l
@ -1180,6 +1479,42 @@ define half @test_maxmin_commuted_f16_ieee_true(half %a, half %b, half %c) {
; GISEL-GFX11-FAKE16-NEXT: v_minmax_f16 v0, v0, v1, v2
; GISEL-GFX11-FAKE16-NEXT: s_setpc_b64 s[30:31]
;
; SDAG-GFX1170-TRUE16-LABEL: test_maxmin_commuted_f16_ieee_true:
; SDAG-GFX1170-TRUE16: ; %bb.0:
; SDAG-GFX1170-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; SDAG-GFX1170-TRUE16-NEXT: v_max_num_f16_e32 v0.h, v1.l, v1.l
; SDAG-GFX1170-TRUE16-NEXT: v_max_num_f16_e32 v0.l, v0.l, v0.l
; SDAG-GFX1170-TRUE16-NEXT: v_max_num_f16_e32 v1.l, v2.l, v2.l
; SDAG-GFX1170-TRUE16-NEXT: v_minmax_num_f16 v0.l, v0.l, v0.h, v1.l
; SDAG-GFX1170-TRUE16-NEXT: s_setpc_b64 s[30:31]
;
; SDAG-GFX1170-FAKE16-LABEL: test_maxmin_commuted_f16_ieee_true:
; SDAG-GFX1170-FAKE16: ; %bb.0:
; SDAG-GFX1170-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; SDAG-GFX1170-FAKE16-NEXT: v_max_num_f16_e32 v1, v1, v1
; SDAG-GFX1170-FAKE16-NEXT: v_max_num_f16_e32 v0, v0, v0
; SDAG-GFX1170-FAKE16-NEXT: v_max_num_f16_e32 v2, v2, v2
; SDAG-GFX1170-FAKE16-NEXT: v_minmax_num_f16 v0, v0, v1, v2
; SDAG-GFX1170-FAKE16-NEXT: s_setpc_b64 s[30:31]
;
; GISEL-GFX1170-TRUE16-LABEL: test_maxmin_commuted_f16_ieee_true:
; GISEL-GFX1170-TRUE16: ; %bb.0:
; GISEL-GFX1170-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GISEL-GFX1170-TRUE16-NEXT: v_max_num_f16_e32 v0.l, v0.l, v0.l
; GISEL-GFX1170-TRUE16-NEXT: v_max_num_f16_e32 v0.h, v1.l, v1.l
; GISEL-GFX1170-TRUE16-NEXT: v_max_num_f16_e32 v1.l, v2.l, v2.l
; GISEL-GFX1170-TRUE16-NEXT: v_minmax_num_f16 v0.l, v0.l, v0.h, v1.l
; GISEL-GFX1170-TRUE16-NEXT: s_setpc_b64 s[30:31]
;
; GISEL-GFX1170-FAKE16-LABEL: test_maxmin_commuted_f16_ieee_true:
; GISEL-GFX1170-FAKE16: ; %bb.0:
; GISEL-GFX1170-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GISEL-GFX1170-FAKE16-NEXT: v_max_num_f16_e32 v0, v0, v0
; GISEL-GFX1170-FAKE16-NEXT: v_max_num_f16_e32 v1, v1, v1
; GISEL-GFX1170-FAKE16-NEXT: v_max_num_f16_e32 v2, v2, v2
; GISEL-GFX1170-FAKE16-NEXT: v_minmax_num_f16 v0, v0, v1, v2
; GISEL-GFX1170-FAKE16-NEXT: s_setpc_b64 s[30:31]
;
; SDAG-GFX12-TRUE16-LABEL: test_maxmin_commuted_f16_ieee_true:
; SDAG-GFX12-TRUE16: ; %bb.0:
; SDAG-GFX12-TRUE16-NEXT: s_wait_loadcnt_dscnt 0x0
@ -1305,6 +1640,34 @@ define void @test_med3_f16(ptr addrspace(1) %arg, half %x, half %y, half %z) #0
; GISEL-GFX11-FAKE16-NEXT: global_store_b16 v[0:1], v2, off
; GISEL-GFX11-FAKE16-NEXT: s_setpc_b64 s[30:31]
;
; SDAG-GFX1170-TRUE16-LABEL: test_med3_f16:
; SDAG-GFX1170-TRUE16: ; %bb.0:
; SDAG-GFX1170-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; SDAG-GFX1170-TRUE16-NEXT: v_med3_num_f16 v2.l, v2.l, v3.l, v4.l
; SDAG-GFX1170-TRUE16-NEXT: global_store_b16 v[0:1], v2, off
; SDAG-GFX1170-TRUE16-NEXT: s_setpc_b64 s[30:31]
;
; SDAG-GFX1170-FAKE16-LABEL: test_med3_f16:
; SDAG-GFX1170-FAKE16: ; %bb.0:
; SDAG-GFX1170-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; SDAG-GFX1170-FAKE16-NEXT: v_med3_num_f16 v2, v2, v3, v4
; SDAG-GFX1170-FAKE16-NEXT: global_store_b16 v[0:1], v2, off
; SDAG-GFX1170-FAKE16-NEXT: s_setpc_b64 s[30:31]
;
; GISEL-GFX1170-TRUE16-LABEL: test_med3_f16:
; GISEL-GFX1170-TRUE16: ; %bb.0:
; GISEL-GFX1170-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GISEL-GFX1170-TRUE16-NEXT: v_med3_num_f16 v2.l, v2.l, v3.l, v4.l
; GISEL-GFX1170-TRUE16-NEXT: global_store_b16 v[0:1], v2, off
; GISEL-GFX1170-TRUE16-NEXT: s_setpc_b64 s[30:31]
;
; GISEL-GFX1170-FAKE16-LABEL: test_med3_f16:
; GISEL-GFX1170-FAKE16: ; %bb.0:
; GISEL-GFX1170-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GISEL-GFX1170-FAKE16-NEXT: v_med3_num_f16 v2, v2, v3, v4
; GISEL-GFX1170-FAKE16-NEXT: global_store_b16 v[0:1], v2, off
; GISEL-GFX1170-FAKE16-NEXT: s_setpc_b64 s[30:31]
;
; SDAG-GFX12-TRUE16-LABEL: test_med3_f16:
; SDAG-GFX12-TRUE16: ; %bb.0:
; SDAG-GFX12-TRUE16-NEXT: s_wait_loadcnt_dscnt 0x0

View File

@ -1,6 +1,8 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx700 < %s | FileCheck -check-prefix=GFX7 %s
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 < %s | FileCheck -check-prefix=GFX9 %s
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1170 -mattr=+real-true16 < %s | FileCheck -check-prefixes=GFX1170,GFX1170-TRUE16 %s
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1170 -mattr=-real-true16 < %s | FileCheck -check-prefixes=GFX1170,GFX1170-FAKE16 %s
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1200 -mattr=+real-true16 < %s | FileCheck -check-prefixes=GFX12,GFX12-TRUE16 %s
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1200 -mattr=-real-true16 < %s | FileCheck -check-prefixes=GFX12,GFX12-FAKE16 %s
@ -23,6 +25,13 @@ define float @v_test_fmin_legacy_ule_f32_safe(float %a, float %b) {
; GFX9-NEXT: v_cndmask_b32_e32 v0, v1, v0, vcc
; GFX9-NEXT: s_setpc_b64 s[30:31]
;
; GFX1170-LABEL: v_test_fmin_legacy_ule_f32_safe:
; GFX1170: ; %bb.0:
; GFX1170-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1170-NEXT: v_cmp_ngt_f32_e32 vcc_lo, v0, v1
; GFX1170-NEXT: v_cndmask_b32_e32 v0, v1, v0, vcc_lo
; GFX1170-NEXT: s_setpc_b64 s[30:31]
;
; GFX12-LABEL: v_test_fmin_legacy_ule_f32_safe:
; GFX12: ; %bb.0:
; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
@ -53,6 +62,13 @@ define float @v_test_fmin_legacy_ule_f32_nnan_flag(float %a, float %b) {
; GFX9-NEXT: v_cndmask_b32_e32 v0, v1, v0, vcc
; GFX9-NEXT: s_setpc_b64 s[30:31]
;
; GFX1170-LABEL: v_test_fmin_legacy_ule_f32_nnan_flag:
; GFX1170: ; %bb.0:
; GFX1170-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1170-NEXT: v_cmp_ngt_f32_e32 vcc_lo, v0, v1
; GFX1170-NEXT: v_cndmask_b32_e32 v0, v1, v0, vcc_lo
; GFX1170-NEXT: s_setpc_b64 s[30:31]
;
; GFX12-LABEL: v_test_fmin_legacy_ule_f32_nnan_flag:
; GFX12: ; %bb.0:
; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
@ -83,6 +99,13 @@ define float @v_test_fmin_legacy_ule_f32_nsz_flag(float %a, float %b) {
; GFX9-NEXT: v_cndmask_b32_e32 v0, v1, v0, vcc
; GFX9-NEXT: s_setpc_b64 s[30:31]
;
; GFX1170-LABEL: v_test_fmin_legacy_ule_f32_nsz_flag:
; GFX1170: ; %bb.0:
; GFX1170-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1170-NEXT: v_cmp_ngt_f32_e32 vcc_lo, v0, v1
; GFX1170-NEXT: v_cndmask_b32_e32 v0, v1, v0, vcc_lo
; GFX1170-NEXT: s_setpc_b64 s[30:31]
;
; GFX12-LABEL: v_test_fmin_legacy_ule_f32_nsz_flag:
; GFX12: ; %bb.0:
; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
@ -112,6 +135,12 @@ define float @v_test_fmin_legacy_ule_f32_nnan_nsz_flag(float %a, float %b) {
; GFX9-NEXT: v_min_f32_e32 v0, v0, v1
; GFX9-NEXT: s_setpc_b64 s[30:31]
;
; GFX1170-LABEL: v_test_fmin_legacy_ule_f32_nnan_nsz_flag:
; GFX1170: ; %bb.0:
; GFX1170-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1170-NEXT: v_min_num_f32_e32 v0, v0, v1
; GFX1170-NEXT: s_setpc_b64 s[30:31]
;
; GFX12-LABEL: v_test_fmin_legacy_ule_f32_nnan_nsz_flag:
; GFX12: ; %bb.0:
; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
@ -140,6 +169,13 @@ define float @v_test_fmax_legacy_uge_f32_safe(float %a, float %b) {
; GFX9-NEXT: v_cndmask_b32_e32 v0, v1, v0, vcc
; GFX9-NEXT: s_setpc_b64 s[30:31]
;
; GFX1170-LABEL: v_test_fmax_legacy_uge_f32_safe:
; GFX1170: ; %bb.0:
; GFX1170-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1170-NEXT: v_cmp_nlt_f32_e32 vcc_lo, v0, v1
; GFX1170-NEXT: v_cndmask_b32_e32 v0, v1, v0, vcc_lo
; GFX1170-NEXT: s_setpc_b64 s[30:31]
;
; GFX12-LABEL: v_test_fmax_legacy_uge_f32_safe:
; GFX12: ; %bb.0:
; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
@ -170,6 +206,13 @@ define float @v_test_fmax_legacy_uge_f32_nnan_flag(float %a, float %b) {
; GFX9-NEXT: v_cndmask_b32_e32 v0, v1, v0, vcc
; GFX9-NEXT: s_setpc_b64 s[30:31]
;
; GFX1170-LABEL: v_test_fmax_legacy_uge_f32_nnan_flag:
; GFX1170: ; %bb.0:
; GFX1170-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1170-NEXT: v_cmp_nlt_f32_e32 vcc_lo, v0, v1
; GFX1170-NEXT: v_cndmask_b32_e32 v0, v1, v0, vcc_lo
; GFX1170-NEXT: s_setpc_b64 s[30:31]
;
; GFX12-LABEL: v_test_fmax_legacy_uge_f32_nnan_flag:
; GFX12: ; %bb.0:
; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
@ -200,6 +243,13 @@ define float @v_test_fmax_legacy_uge_f32_nsz_flag(float %a, float %b) {
; GFX9-NEXT: v_cndmask_b32_e32 v0, v1, v0, vcc
; GFX9-NEXT: s_setpc_b64 s[30:31]
;
; GFX1170-LABEL: v_test_fmax_legacy_uge_f32_nsz_flag:
; GFX1170: ; %bb.0:
; GFX1170-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1170-NEXT: v_cmp_nlt_f32_e32 vcc_lo, v0, v1
; GFX1170-NEXT: v_cndmask_b32_e32 v0, v1, v0, vcc_lo
; GFX1170-NEXT: s_setpc_b64 s[30:31]
;
; GFX12-LABEL: v_test_fmax_legacy_uge_f32_nsz_flag:
; GFX12: ; %bb.0:
; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
@ -229,6 +279,12 @@ define float @v_test_fmax_legacy_uge_f32_nnan_nsz_flag(float %a, float %b) {
; GFX9-NEXT: v_max_f32_e32 v0, v0, v1
; GFX9-NEXT: s_setpc_b64 s[30:31]
;
; GFX1170-LABEL: v_test_fmax_legacy_uge_f32_nnan_nsz_flag:
; GFX1170: ; %bb.0:
; GFX1170-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1170-NEXT: v_max_num_f32_e32 v0, v0, v1
; GFX1170-NEXT: s_setpc_b64 s[30:31]
;
; GFX12-LABEL: v_test_fmax_legacy_uge_f32_nnan_nsz_flag:
; GFX12: ; %bb.0:
; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
@ -260,6 +316,15 @@ define <2 x float> @v_test_fmin_legacy_ule_v2f32_safe(<2 x float> %a, <2 x float
; GFX9-NEXT: v_cndmask_b32_e32 v1, v3, v1, vcc
; GFX9-NEXT: s_setpc_b64 s[30:31]
;
; GFX1170-LABEL: v_test_fmin_legacy_ule_v2f32_safe:
; GFX1170: ; %bb.0:
; GFX1170-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1170-NEXT: v_cmp_ngt_f32_e32 vcc_lo, v0, v2
; GFX1170-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc_lo
; GFX1170-NEXT: v_cmp_ngt_f32_e32 vcc_lo, v1, v3
; GFX1170-NEXT: v_cndmask_b32_e32 v1, v3, v1, vcc_lo
; GFX1170-NEXT: s_setpc_b64 s[30:31]
;
; GFX12-LABEL: v_test_fmin_legacy_ule_v2f32_safe:
; GFX12: ; %bb.0:
; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
@ -296,6 +361,15 @@ define <2 x float> @v_test_fmin_legacy_ule_v2f32_nnan_flag(<2 x float> %a, <2 x
; GFX9-NEXT: v_cndmask_b32_e32 v1, v3, v1, vcc
; GFX9-NEXT: s_setpc_b64 s[30:31]
;
; GFX1170-LABEL: v_test_fmin_legacy_ule_v2f32_nnan_flag:
; GFX1170: ; %bb.0:
; GFX1170-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1170-NEXT: v_cmp_ngt_f32_e32 vcc_lo, v0, v2
; GFX1170-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc_lo
; GFX1170-NEXT: v_cmp_ngt_f32_e32 vcc_lo, v1, v3
; GFX1170-NEXT: v_cndmask_b32_e32 v1, v3, v1, vcc_lo
; GFX1170-NEXT: s_setpc_b64 s[30:31]
;
; GFX12-LABEL: v_test_fmin_legacy_ule_v2f32_nnan_flag:
; GFX12: ; %bb.0:
; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
@ -332,6 +406,15 @@ define <2 x float> @v_test_fmin_legacy_ule_v2f32_nsz_flag(<2 x float> %a, <2 x f
; GFX9-NEXT: v_cndmask_b32_e32 v1, v3, v1, vcc
; GFX9-NEXT: s_setpc_b64 s[30:31]
;
; GFX1170-LABEL: v_test_fmin_legacy_ule_v2f32_nsz_flag:
; GFX1170: ; %bb.0:
; GFX1170-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1170-NEXT: v_cmp_ngt_f32_e32 vcc_lo, v0, v2
; GFX1170-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc_lo
; GFX1170-NEXT: v_cmp_ngt_f32_e32 vcc_lo, v1, v3
; GFX1170-NEXT: v_cndmask_b32_e32 v1, v3, v1, vcc_lo
; GFX1170-NEXT: s_setpc_b64 s[30:31]
;
; GFX12-LABEL: v_test_fmin_legacy_ule_v2f32_nsz_flag:
; GFX12: ; %bb.0:
; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
@ -366,6 +449,12 @@ define <2 x float> @v_test_fmin_legacy_ule_v2f32_nnan_nsz_flag(<2 x float> %a, <
; GFX9-NEXT: v_min_f32_e32 v1, v1, v3
; GFX9-NEXT: s_setpc_b64 s[30:31]
;
; GFX1170-LABEL: v_test_fmin_legacy_ule_v2f32_nnan_nsz_flag:
; GFX1170: ; %bb.0:
; GFX1170-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1170-NEXT: v_dual_min_num_f32 v0, v0, v2 :: v_dual_min_num_f32 v1, v1, v3
; GFX1170-NEXT: s_setpc_b64 s[30:31]
;
; GFX12-LABEL: v_test_fmin_legacy_ule_v2f32_nnan_nsz_flag:
; GFX12: ; %bb.0:
; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
@ -397,6 +486,15 @@ define <2 x float> @v_test_fmax_legacy_uge_v2f32_safe(<2 x float> %a, <2 x float
; GFX9-NEXT: v_cndmask_b32_e32 v1, v3, v1, vcc
; GFX9-NEXT: s_setpc_b64 s[30:31]
;
; GFX1170-LABEL: v_test_fmax_legacy_uge_v2f32_safe:
; GFX1170: ; %bb.0:
; GFX1170-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1170-NEXT: v_cmp_nlt_f32_e32 vcc_lo, v0, v2
; GFX1170-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc_lo
; GFX1170-NEXT: v_cmp_nlt_f32_e32 vcc_lo, v1, v3
; GFX1170-NEXT: v_cndmask_b32_e32 v1, v3, v1, vcc_lo
; GFX1170-NEXT: s_setpc_b64 s[30:31]
;
; GFX12-LABEL: v_test_fmax_legacy_uge_v2f32_safe:
; GFX12: ; %bb.0:
; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
@ -433,6 +531,15 @@ define <2 x float> @v_test_fmax_legacy_uge_v2f32_nnan_flag(<2 x float> %a, <2 x
; GFX9-NEXT: v_cndmask_b32_e32 v1, v3, v1, vcc
; GFX9-NEXT: s_setpc_b64 s[30:31]
;
; GFX1170-LABEL: v_test_fmax_legacy_uge_v2f32_nnan_flag:
; GFX1170: ; %bb.0:
; GFX1170-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1170-NEXT: v_cmp_nlt_f32_e32 vcc_lo, v0, v2
; GFX1170-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc_lo
; GFX1170-NEXT: v_cmp_nlt_f32_e32 vcc_lo, v1, v3
; GFX1170-NEXT: v_cndmask_b32_e32 v1, v3, v1, vcc_lo
; GFX1170-NEXT: s_setpc_b64 s[30:31]
;
; GFX12-LABEL: v_test_fmax_legacy_uge_v2f32_nnan_flag:
; GFX12: ; %bb.0:
; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
@ -469,6 +576,15 @@ define <2 x float> @v_test_fmax_legacy_uge_v2f32_nsz_flag(<2 x float> %a, <2 x f
; GFX9-NEXT: v_cndmask_b32_e32 v1, v3, v1, vcc
; GFX9-NEXT: s_setpc_b64 s[30:31]
;
; GFX1170-LABEL: v_test_fmax_legacy_uge_v2f32_nsz_flag:
; GFX1170: ; %bb.0:
; GFX1170-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1170-NEXT: v_cmp_nlt_f32_e32 vcc_lo, v0, v2
; GFX1170-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc_lo
; GFX1170-NEXT: v_cmp_nlt_f32_e32 vcc_lo, v1, v3
; GFX1170-NEXT: v_cndmask_b32_e32 v1, v3, v1, vcc_lo
; GFX1170-NEXT: s_setpc_b64 s[30:31]
;
; GFX12-LABEL: v_test_fmax_legacy_uge_v2f32_nsz_flag:
; GFX12: ; %bb.0:
; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
@ -503,6 +619,12 @@ define <2 x float> @v_test_fmax_legacy_uge_v2f32_nnan_nsz_flag(<2 x float> %a, <
; GFX9-NEXT: v_max_f32_e32 v1, v1, v3
; GFX9-NEXT: s_setpc_b64 s[30:31]
;
; GFX1170-LABEL: v_test_fmax_legacy_uge_v2f32_nnan_nsz_flag:
; GFX1170: ; %bb.0:
; GFX1170-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1170-NEXT: v_dual_max_num_f32 v0, v0, v2 :: v_dual_max_num_f32 v1, v1, v3
; GFX1170-NEXT: s_setpc_b64 s[30:31]
;
; GFX12-LABEL: v_test_fmax_legacy_uge_v2f32_nnan_nsz_flag:
; GFX12: ; %bb.0:
; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
@ -534,6 +656,20 @@ define half @v_test_fmin_legacy_ule_f16_safe(half %a, half %b) {
; GFX9-NEXT: v_cndmask_b32_e32 v0, v1, v0, vcc
; GFX9-NEXT: s_setpc_b64 s[30:31]
;
; GFX1170-TRUE16-LABEL: v_test_fmin_legacy_ule_f16_safe:
; GFX1170-TRUE16: ; %bb.0:
; GFX1170-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1170-TRUE16-NEXT: v_cmp_ngt_f16_e32 vcc_lo, v0.l, v1.l
; GFX1170-TRUE16-NEXT: v_cndmask_b16 v0.l, v1.l, v0.l, vcc_lo
; GFX1170-TRUE16-NEXT: s_setpc_b64 s[30:31]
;
; GFX1170-FAKE16-LABEL: v_test_fmin_legacy_ule_f16_safe:
; GFX1170-FAKE16: ; %bb.0:
; GFX1170-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1170-FAKE16-NEXT: v_cmp_ngt_f16_e32 vcc_lo, v0, v1
; GFX1170-FAKE16-NEXT: v_cndmask_b32_e32 v0, v1, v0, vcc_lo
; GFX1170-FAKE16-NEXT: s_setpc_b64 s[30:31]
;
; GFX12-TRUE16-LABEL: v_test_fmin_legacy_ule_f16_safe:
; GFX12-TRUE16: ; %bb.0:
; GFX12-TRUE16-NEXT: s_wait_loadcnt_dscnt 0x0
@ -579,6 +715,20 @@ define half @v_test_fmin_legacy_ule_f16_nnan_flag(half %a, half %b) {
; GFX9-NEXT: v_cndmask_b32_e32 v0, v1, v0, vcc
; GFX9-NEXT: s_setpc_b64 s[30:31]
;
; GFX1170-TRUE16-LABEL: v_test_fmin_legacy_ule_f16_nnan_flag:
; GFX1170-TRUE16: ; %bb.0:
; GFX1170-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1170-TRUE16-NEXT: v_cmp_ngt_f16_e32 vcc_lo, v0.l, v1.l
; GFX1170-TRUE16-NEXT: v_cndmask_b16 v0.l, v1.l, v0.l, vcc_lo
; GFX1170-TRUE16-NEXT: s_setpc_b64 s[30:31]
;
; GFX1170-FAKE16-LABEL: v_test_fmin_legacy_ule_f16_nnan_flag:
; GFX1170-FAKE16: ; %bb.0:
; GFX1170-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1170-FAKE16-NEXT: v_cmp_ngt_f16_e32 vcc_lo, v0, v1
; GFX1170-FAKE16-NEXT: v_cndmask_b32_e32 v0, v1, v0, vcc_lo
; GFX1170-FAKE16-NEXT: s_setpc_b64 s[30:31]
;
; GFX12-TRUE16-LABEL: v_test_fmin_legacy_ule_f16_nnan_flag:
; GFX12-TRUE16: ; %bb.0:
; GFX12-TRUE16-NEXT: s_wait_loadcnt_dscnt 0x0
@ -624,6 +774,20 @@ define half @v_test_fmin_legacy_ule_f16_nsz_flag(half %a, half %b) {
; GFX9-NEXT: v_cndmask_b32_e32 v0, v1, v0, vcc
; GFX9-NEXT: s_setpc_b64 s[30:31]
;
; GFX1170-TRUE16-LABEL: v_test_fmin_legacy_ule_f16_nsz_flag:
; GFX1170-TRUE16: ; %bb.0:
; GFX1170-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1170-TRUE16-NEXT: v_cmp_ngt_f16_e32 vcc_lo, v0.l, v1.l
; GFX1170-TRUE16-NEXT: v_cndmask_b16 v0.l, v1.l, v0.l, vcc_lo
; GFX1170-TRUE16-NEXT: s_setpc_b64 s[30:31]
;
; GFX1170-FAKE16-LABEL: v_test_fmin_legacy_ule_f16_nsz_flag:
; GFX1170-FAKE16: ; %bb.0:
; GFX1170-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1170-FAKE16-NEXT: v_cmp_ngt_f16_e32 vcc_lo, v0, v1
; GFX1170-FAKE16-NEXT: v_cndmask_b32_e32 v0, v1, v0, vcc_lo
; GFX1170-FAKE16-NEXT: s_setpc_b64 s[30:31]
;
; GFX12-TRUE16-LABEL: v_test_fmin_legacy_ule_f16_nsz_flag:
; GFX12-TRUE16: ; %bb.0:
; GFX12-TRUE16-NEXT: s_wait_loadcnt_dscnt 0x0
@ -668,6 +832,18 @@ define half @v_test_fmin_legacy_ule_f16_nnan_nsz_flag(half %a, half %b) {
; GFX9-NEXT: v_min_f16_e32 v0, v0, v1
; GFX9-NEXT: s_setpc_b64 s[30:31]
;
; GFX1170-TRUE16-LABEL: v_test_fmin_legacy_ule_f16_nnan_nsz_flag:
; GFX1170-TRUE16: ; %bb.0:
; GFX1170-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1170-TRUE16-NEXT: v_min_num_f16_e32 v0.l, v0.l, v1.l
; GFX1170-TRUE16-NEXT: s_setpc_b64 s[30:31]
;
; GFX1170-FAKE16-LABEL: v_test_fmin_legacy_ule_f16_nnan_nsz_flag:
; GFX1170-FAKE16: ; %bb.0:
; GFX1170-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1170-FAKE16-NEXT: v_min_num_f16_e32 v0, v0, v1
; GFX1170-FAKE16-NEXT: s_setpc_b64 s[30:31]
;
; GFX12-TRUE16-LABEL: v_test_fmin_legacy_ule_f16_nnan_nsz_flag:
; GFX12-TRUE16: ; %bb.0:
; GFX12-TRUE16-NEXT: s_wait_loadcnt_dscnt 0x0
@ -709,6 +885,20 @@ define half @v_test_fmax_legacy_uge_f16_safe(half %a, half %b) {
; GFX9-NEXT: v_cndmask_b32_e32 v0, v1, v0, vcc
; GFX9-NEXT: s_setpc_b64 s[30:31]
;
; GFX1170-TRUE16-LABEL: v_test_fmax_legacy_uge_f16_safe:
; GFX1170-TRUE16: ; %bb.0:
; GFX1170-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1170-TRUE16-NEXT: v_cmp_nlt_f16_e32 vcc_lo, v0.l, v1.l
; GFX1170-TRUE16-NEXT: v_cndmask_b16 v0.l, v1.l, v0.l, vcc_lo
; GFX1170-TRUE16-NEXT: s_setpc_b64 s[30:31]
;
; GFX1170-FAKE16-LABEL: v_test_fmax_legacy_uge_f16_safe:
; GFX1170-FAKE16: ; %bb.0:
; GFX1170-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1170-FAKE16-NEXT: v_cmp_nlt_f16_e32 vcc_lo, v0, v1
; GFX1170-FAKE16-NEXT: v_cndmask_b32_e32 v0, v1, v0, vcc_lo
; GFX1170-FAKE16-NEXT: s_setpc_b64 s[30:31]
;
; GFX12-TRUE16-LABEL: v_test_fmax_legacy_uge_f16_safe:
; GFX12-TRUE16: ; %bb.0:
; GFX12-TRUE16-NEXT: s_wait_loadcnt_dscnt 0x0
@ -754,6 +944,20 @@ define half @v_test_fmax_legacy_uge_f16_nnan_flag(half %a, half %b) {
; GFX9-NEXT: v_cndmask_b32_e32 v0, v1, v0, vcc
; GFX9-NEXT: s_setpc_b64 s[30:31]
;
; GFX1170-TRUE16-LABEL: v_test_fmax_legacy_uge_f16_nnan_flag:
; GFX1170-TRUE16: ; %bb.0:
; GFX1170-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1170-TRUE16-NEXT: v_cmp_nlt_f16_e32 vcc_lo, v0.l, v1.l
; GFX1170-TRUE16-NEXT: v_cndmask_b16 v0.l, v1.l, v0.l, vcc_lo
; GFX1170-TRUE16-NEXT: s_setpc_b64 s[30:31]
;
; GFX1170-FAKE16-LABEL: v_test_fmax_legacy_uge_f16_nnan_flag:
; GFX1170-FAKE16: ; %bb.0:
; GFX1170-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1170-FAKE16-NEXT: v_cmp_nlt_f16_e32 vcc_lo, v0, v1
; GFX1170-FAKE16-NEXT: v_cndmask_b32_e32 v0, v1, v0, vcc_lo
; GFX1170-FAKE16-NEXT: s_setpc_b64 s[30:31]
;
; GFX12-TRUE16-LABEL: v_test_fmax_legacy_uge_f16_nnan_flag:
; GFX12-TRUE16: ; %bb.0:
; GFX12-TRUE16-NEXT: s_wait_loadcnt_dscnt 0x0
@ -799,6 +1003,20 @@ define half @v_test_fmax_legacy_uge_f16_nsz_flag(half %a, half %b) {
; GFX9-NEXT: v_cndmask_b32_e32 v0, v1, v0, vcc
; GFX9-NEXT: s_setpc_b64 s[30:31]
;
; GFX1170-TRUE16-LABEL: v_test_fmax_legacy_uge_f16_nsz_flag:
; GFX1170-TRUE16: ; %bb.0:
; GFX1170-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1170-TRUE16-NEXT: v_cmp_nlt_f16_e32 vcc_lo, v0.l, v1.l
; GFX1170-TRUE16-NEXT: v_cndmask_b16 v0.l, v1.l, v0.l, vcc_lo
; GFX1170-TRUE16-NEXT: s_setpc_b64 s[30:31]
;
; GFX1170-FAKE16-LABEL: v_test_fmax_legacy_uge_f16_nsz_flag:
; GFX1170-FAKE16: ; %bb.0:
; GFX1170-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1170-FAKE16-NEXT: v_cmp_nlt_f16_e32 vcc_lo, v0, v1
; GFX1170-FAKE16-NEXT: v_cndmask_b32_e32 v0, v1, v0, vcc_lo
; GFX1170-FAKE16-NEXT: s_setpc_b64 s[30:31]
;
; GFX12-TRUE16-LABEL: v_test_fmax_legacy_uge_f16_nsz_flag:
; GFX12-TRUE16: ; %bb.0:
; GFX12-TRUE16-NEXT: s_wait_loadcnt_dscnt 0x0
@ -843,6 +1061,18 @@ define half @v_test_fmax_legacy_uge_f16_nnan_nsz_flag(half %a, half %b) {
; GFX9-NEXT: v_max_f16_e32 v0, v0, v1
; GFX9-NEXT: s_setpc_b64 s[30:31]
;
; GFX1170-TRUE16-LABEL: v_test_fmax_legacy_uge_f16_nnan_nsz_flag:
; GFX1170-TRUE16: ; %bb.0:
; GFX1170-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1170-TRUE16-NEXT: v_max_num_f16_e32 v0.l, v0.l, v1.l
; GFX1170-TRUE16-NEXT: s_setpc_b64 s[30:31]
;
; GFX1170-FAKE16-LABEL: v_test_fmax_legacy_uge_f16_nnan_nsz_flag:
; GFX1170-FAKE16: ; %bb.0:
; GFX1170-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1170-FAKE16-NEXT: v_max_num_f16_e32 v0, v0, v1
; GFX1170-FAKE16-NEXT: s_setpc_b64 s[30:31]
;
; GFX12-TRUE16-LABEL: v_test_fmax_legacy_uge_f16_nnan_nsz_flag:
; GFX12-TRUE16: ; %bb.0:
; GFX12-TRUE16-NEXT: s_wait_loadcnt_dscnt 0x0
@ -899,6 +1129,29 @@ define <2 x half> @v_test_fmin_legacy_ule_v2f16_safe(<2 x half> %a, <2 x half> %
; GFX9-NEXT: v_perm_b32 v0, v2, v0, s4
; GFX9-NEXT: s_setpc_b64 s[30:31]
;
; GFX1170-TRUE16-LABEL: v_test_fmin_legacy_ule_v2f16_safe:
; GFX1170-TRUE16: ; %bb.0:
; GFX1170-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1170-TRUE16-NEXT: v_cmp_ngt_f16_e32 vcc_lo, v0.h, v1.h
; GFX1170-TRUE16-NEXT: v_cmp_ngt_f16_e64 s0, v0.l, v1.l
; GFX1170-TRUE16-NEXT: v_cndmask_b16 v0.h, v1.h, v0.h, vcc_lo
; GFX1170-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2)
; GFX1170-TRUE16-NEXT: v_cndmask_b16 v0.l, v1.l, v0.l, s0
; GFX1170-TRUE16-NEXT: s_setpc_b64 s[30:31]
;
; GFX1170-FAKE16-LABEL: v_test_fmin_legacy_ule_v2f16_safe:
; GFX1170-FAKE16: ; %bb.0:
; GFX1170-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1170-FAKE16-NEXT: v_lshrrev_b32_e32 v2, 16, v1
; GFX1170-FAKE16-NEXT: v_lshrrev_b32_e32 v3, 16, v0
; GFX1170-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_3) | instid1(VALU_DEP_1)
; GFX1170-FAKE16-NEXT: v_cmp_ngt_f16_e32 vcc_lo, v3, v2
; GFX1170-FAKE16-NEXT: v_cndmask_b32_e32 v2, v2, v3, vcc_lo
; GFX1170-FAKE16-NEXT: v_cmp_ngt_f16_e32 vcc_lo, v0, v1
; GFX1170-FAKE16-NEXT: v_cndmask_b32_e32 v0, v1, v0, vcc_lo
; GFX1170-FAKE16-NEXT: v_perm_b32 v0, v2, v0, 0x5040100
; GFX1170-FAKE16-NEXT: s_setpc_b64 s[30:31]
;
; GFX12-TRUE16-LABEL: v_test_fmin_legacy_ule_v2f16_safe:
; GFX12-TRUE16: ; %bb.0:
; GFX12-TRUE16-NEXT: s_wait_loadcnt_dscnt 0x0
@ -971,6 +1224,29 @@ define <2 x half> @v_test_fmin_legacy_ule_v2f16_nnan_flag(<2 x half> %a, <2 x ha
; GFX9-NEXT: v_perm_b32 v0, v2, v0, s4
; GFX9-NEXT: s_setpc_b64 s[30:31]
;
; GFX1170-TRUE16-LABEL: v_test_fmin_legacy_ule_v2f16_nnan_flag:
; GFX1170-TRUE16: ; %bb.0:
; GFX1170-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1170-TRUE16-NEXT: v_cmp_ngt_f16_e32 vcc_lo, v0.h, v1.h
; GFX1170-TRUE16-NEXT: v_cmp_ngt_f16_e64 s0, v0.l, v1.l
; GFX1170-TRUE16-NEXT: v_cndmask_b16 v0.h, v1.h, v0.h, vcc_lo
; GFX1170-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2)
; GFX1170-TRUE16-NEXT: v_cndmask_b16 v0.l, v1.l, v0.l, s0
; GFX1170-TRUE16-NEXT: s_setpc_b64 s[30:31]
;
; GFX1170-FAKE16-LABEL: v_test_fmin_legacy_ule_v2f16_nnan_flag:
; GFX1170-FAKE16: ; %bb.0:
; GFX1170-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1170-FAKE16-NEXT: v_lshrrev_b32_e32 v2, 16, v1
; GFX1170-FAKE16-NEXT: v_lshrrev_b32_e32 v3, 16, v0
; GFX1170-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_3) | instid1(VALU_DEP_1)
; GFX1170-FAKE16-NEXT: v_cmp_ngt_f16_e32 vcc_lo, v3, v2
; GFX1170-FAKE16-NEXT: v_cndmask_b32_e32 v2, v2, v3, vcc_lo
; GFX1170-FAKE16-NEXT: v_cmp_ngt_f16_e32 vcc_lo, v0, v1
; GFX1170-FAKE16-NEXT: v_cndmask_b32_e32 v0, v1, v0, vcc_lo
; GFX1170-FAKE16-NEXT: v_perm_b32 v0, v2, v0, 0x5040100
; GFX1170-FAKE16-NEXT: s_setpc_b64 s[30:31]
;
; GFX12-TRUE16-LABEL: v_test_fmin_legacy_ule_v2f16_nnan_flag:
; GFX12-TRUE16: ; %bb.0:
; GFX12-TRUE16-NEXT: s_wait_loadcnt_dscnt 0x0
@ -1043,6 +1319,29 @@ define <2 x half> @v_test_fmin_legacy_ule_v2f16_nsz_flag(<2 x half> %a, <2 x hal
; GFX9-NEXT: v_perm_b32 v0, v2, v0, s4
; GFX9-NEXT: s_setpc_b64 s[30:31]
;
; GFX1170-TRUE16-LABEL: v_test_fmin_legacy_ule_v2f16_nsz_flag:
; GFX1170-TRUE16: ; %bb.0:
; GFX1170-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1170-TRUE16-NEXT: v_cmp_ngt_f16_e32 vcc_lo, v0.h, v1.h
; GFX1170-TRUE16-NEXT: v_cmp_ngt_f16_e64 s0, v0.l, v1.l
; GFX1170-TRUE16-NEXT: v_cndmask_b16 v0.h, v1.h, v0.h, vcc_lo
; GFX1170-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2)
; GFX1170-TRUE16-NEXT: v_cndmask_b16 v0.l, v1.l, v0.l, s0
; GFX1170-TRUE16-NEXT: s_setpc_b64 s[30:31]
;
; GFX1170-FAKE16-LABEL: v_test_fmin_legacy_ule_v2f16_nsz_flag:
; GFX1170-FAKE16: ; %bb.0:
; GFX1170-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1170-FAKE16-NEXT: v_lshrrev_b32_e32 v2, 16, v1
; GFX1170-FAKE16-NEXT: v_lshrrev_b32_e32 v3, 16, v0
; GFX1170-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_3) | instid1(VALU_DEP_1)
; GFX1170-FAKE16-NEXT: v_cmp_ngt_f16_e32 vcc_lo, v3, v2
; GFX1170-FAKE16-NEXT: v_cndmask_b32_e32 v2, v2, v3, vcc_lo
; GFX1170-FAKE16-NEXT: v_cmp_ngt_f16_e32 vcc_lo, v0, v1
; GFX1170-FAKE16-NEXT: v_cndmask_b32_e32 v0, v1, v0, vcc_lo
; GFX1170-FAKE16-NEXT: v_perm_b32 v0, v2, v0, 0x5040100
; GFX1170-FAKE16-NEXT: s_setpc_b64 s[30:31]
;
; GFX12-TRUE16-LABEL: v_test_fmin_legacy_ule_v2f16_nsz_flag:
; GFX12-TRUE16: ; %bb.0:
; GFX12-TRUE16-NEXT: s_wait_loadcnt_dscnt 0x0
@ -1107,6 +1406,12 @@ define <2 x half> @v_test_fmin_legacy_ule_v2f16_nnan_nsz_flag(<2 x half> %a, <2
; GFX9-NEXT: v_pk_min_f16 v0, v0, v1
; GFX9-NEXT: s_setpc_b64 s[30:31]
;
; GFX1170-LABEL: v_test_fmin_legacy_ule_v2f16_nnan_nsz_flag:
; GFX1170: ; %bb.0:
; GFX1170-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1170-NEXT: v_pk_min_num_f16 v0, v0, v1
; GFX1170-NEXT: s_setpc_b64 s[30:31]
;
; GFX12-LABEL: v_test_fmin_legacy_ule_v2f16_nnan_nsz_flag:
; GFX12: ; %bb.0:
; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
@ -1153,6 +1458,29 @@ define <2 x half> @v_test_fmax_legacy_uge_v2f16_safe(<2 x half> %a, <2 x half> %
; GFX9-NEXT: v_perm_b32 v0, v2, v0, s4
; GFX9-NEXT: s_setpc_b64 s[30:31]
;
; GFX1170-TRUE16-LABEL: v_test_fmax_legacy_uge_v2f16_safe:
; GFX1170-TRUE16: ; %bb.0:
; GFX1170-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1170-TRUE16-NEXT: v_cmp_nlt_f16_e32 vcc_lo, v0.h, v1.h
; GFX1170-TRUE16-NEXT: v_cmp_nlt_f16_e64 s0, v0.l, v1.l
; GFX1170-TRUE16-NEXT: v_cndmask_b16 v0.h, v1.h, v0.h, vcc_lo
; GFX1170-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2)
; GFX1170-TRUE16-NEXT: v_cndmask_b16 v0.l, v1.l, v0.l, s0
; GFX1170-TRUE16-NEXT: s_setpc_b64 s[30:31]
;
; GFX1170-FAKE16-LABEL: v_test_fmax_legacy_uge_v2f16_safe:
; GFX1170-FAKE16: ; %bb.0:
; GFX1170-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1170-FAKE16-NEXT: v_lshrrev_b32_e32 v2, 16, v1
; GFX1170-FAKE16-NEXT: v_lshrrev_b32_e32 v3, 16, v0
; GFX1170-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_3) | instid1(VALU_DEP_1)
; GFX1170-FAKE16-NEXT: v_cmp_nlt_f16_e32 vcc_lo, v3, v2
; GFX1170-FAKE16-NEXT: v_cndmask_b32_e32 v2, v2, v3, vcc_lo
; GFX1170-FAKE16-NEXT: v_cmp_nlt_f16_e32 vcc_lo, v0, v1
; GFX1170-FAKE16-NEXT: v_cndmask_b32_e32 v0, v1, v0, vcc_lo
; GFX1170-FAKE16-NEXT: v_perm_b32 v0, v2, v0, 0x5040100
; GFX1170-FAKE16-NEXT: s_setpc_b64 s[30:31]
;
; GFX12-TRUE16-LABEL: v_test_fmax_legacy_uge_v2f16_safe:
; GFX12-TRUE16: ; %bb.0:
; GFX12-TRUE16-NEXT: s_wait_loadcnt_dscnt 0x0
@ -1225,6 +1553,29 @@ define <2 x half> @v_test_fmax_legacy_uge_v2f16_nnan_flag(<2 x half> %a, <2 x ha
; GFX9-NEXT: v_perm_b32 v0, v2, v0, s4
; GFX9-NEXT: s_setpc_b64 s[30:31]
;
; GFX1170-TRUE16-LABEL: v_test_fmax_legacy_uge_v2f16_nnan_flag:
; GFX1170-TRUE16: ; %bb.0:
; GFX1170-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1170-TRUE16-NEXT: v_cmp_nlt_f16_e32 vcc_lo, v0.h, v1.h
; GFX1170-TRUE16-NEXT: v_cmp_nlt_f16_e64 s0, v0.l, v1.l
; GFX1170-TRUE16-NEXT: v_cndmask_b16 v0.h, v1.h, v0.h, vcc_lo
; GFX1170-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2)
; GFX1170-TRUE16-NEXT: v_cndmask_b16 v0.l, v1.l, v0.l, s0
; GFX1170-TRUE16-NEXT: s_setpc_b64 s[30:31]
;
; GFX1170-FAKE16-LABEL: v_test_fmax_legacy_uge_v2f16_nnan_flag:
; GFX1170-FAKE16: ; %bb.0:
; GFX1170-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1170-FAKE16-NEXT: v_lshrrev_b32_e32 v2, 16, v1
; GFX1170-FAKE16-NEXT: v_lshrrev_b32_e32 v3, 16, v0
; GFX1170-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_3) | instid1(VALU_DEP_1)
; GFX1170-FAKE16-NEXT: v_cmp_nlt_f16_e32 vcc_lo, v3, v2
; GFX1170-FAKE16-NEXT: v_cndmask_b32_e32 v2, v2, v3, vcc_lo
; GFX1170-FAKE16-NEXT: v_cmp_nlt_f16_e32 vcc_lo, v0, v1
; GFX1170-FAKE16-NEXT: v_cndmask_b32_e32 v0, v1, v0, vcc_lo
; GFX1170-FAKE16-NEXT: v_perm_b32 v0, v2, v0, 0x5040100
; GFX1170-FAKE16-NEXT: s_setpc_b64 s[30:31]
;
; GFX12-TRUE16-LABEL: v_test_fmax_legacy_uge_v2f16_nnan_flag:
; GFX12-TRUE16: ; %bb.0:
; GFX12-TRUE16-NEXT: s_wait_loadcnt_dscnt 0x0
@ -1297,6 +1648,29 @@ define <2 x half> @v_test_fmax_legacy_uge_v2f16_nsz_flag(<2 x half> %a, <2 x hal
; GFX9-NEXT: v_perm_b32 v0, v2, v0, s4
; GFX9-NEXT: s_setpc_b64 s[30:31]
;
; GFX1170-TRUE16-LABEL: v_test_fmax_legacy_uge_v2f16_nsz_flag:
; GFX1170-TRUE16: ; %bb.0:
; GFX1170-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1170-TRUE16-NEXT: v_cmp_nlt_f16_e32 vcc_lo, v0.h, v1.h
; GFX1170-TRUE16-NEXT: v_cmp_nlt_f16_e64 s0, v0.l, v1.l
; GFX1170-TRUE16-NEXT: v_cndmask_b16 v0.h, v1.h, v0.h, vcc_lo
; GFX1170-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2)
; GFX1170-TRUE16-NEXT: v_cndmask_b16 v0.l, v1.l, v0.l, s0
; GFX1170-TRUE16-NEXT: s_setpc_b64 s[30:31]
;
; GFX1170-FAKE16-LABEL: v_test_fmax_legacy_uge_v2f16_nsz_flag:
; GFX1170-FAKE16: ; %bb.0:
; GFX1170-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1170-FAKE16-NEXT: v_lshrrev_b32_e32 v2, 16, v1
; GFX1170-FAKE16-NEXT: v_lshrrev_b32_e32 v3, 16, v0
; GFX1170-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_3) | instid1(VALU_DEP_1)
; GFX1170-FAKE16-NEXT: v_cmp_nlt_f16_e32 vcc_lo, v3, v2
; GFX1170-FAKE16-NEXT: v_cndmask_b32_e32 v2, v2, v3, vcc_lo
; GFX1170-FAKE16-NEXT: v_cmp_nlt_f16_e32 vcc_lo, v0, v1
; GFX1170-FAKE16-NEXT: v_cndmask_b32_e32 v0, v1, v0, vcc_lo
; GFX1170-FAKE16-NEXT: v_perm_b32 v0, v2, v0, 0x5040100
; GFX1170-FAKE16-NEXT: s_setpc_b64 s[30:31]
;
; GFX12-TRUE16-LABEL: v_test_fmax_legacy_uge_v2f16_nsz_flag:
; GFX12-TRUE16: ; %bb.0:
; GFX12-TRUE16-NEXT: s_wait_loadcnt_dscnt 0x0
@ -1361,6 +1735,12 @@ define <2 x half> @v_test_fmax_legacy_uge_v2f16_nnan_nsz_flag(<2 x half> %a, <2
; GFX9-NEXT: v_pk_max_f16 v0, v0, v1
; GFX9-NEXT: s_setpc_b64 s[30:31]
;
; GFX1170-LABEL: v_test_fmax_legacy_uge_v2f16_nnan_nsz_flag:
; GFX1170: ; %bb.0:
; GFX1170-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1170-NEXT: v_pk_max_num_f16 v0, v0, v1
; GFX1170-NEXT: s_setpc_b64 s[30:31]
;
; GFX12-LABEL: v_test_fmax_legacy_uge_v2f16_nnan_nsz_flag:
; GFX12: ; %bb.0:
; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
@ -1427,6 +1807,42 @@ define <4 x half> @v_test_fmin_legacy_ule_v4f16_safe(<4 x half> %a, <4 x half> %
; GFX9-NEXT: v_perm_b32 v1, v6, v1, s4
; GFX9-NEXT: s_setpc_b64 s[30:31]
;
; GFX1170-TRUE16-LABEL: v_test_fmin_legacy_ule_v4f16_safe:
; GFX1170-TRUE16: ; %bb.0:
; GFX1170-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1170-TRUE16-NEXT: v_cmp_ngt_f16_e32 vcc_lo, v1.h, v3.h
; GFX1170-TRUE16-NEXT: v_cmp_ngt_f16_e64 s0, v0.h, v2.h
; GFX1170-TRUE16-NEXT: v_cmp_ngt_f16_e64 s1, v0.l, v2.l
; GFX1170-TRUE16-NEXT: v_cmp_ngt_f16_e64 s2, v1.l, v3.l
; GFX1170-TRUE16-NEXT: v_cndmask_b16 v1.h, v3.h, v1.h, vcc_lo
; GFX1170-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4)
; GFX1170-TRUE16-NEXT: v_cndmask_b16 v0.h, v2.h, v0.h, s0
; GFX1170-TRUE16-NEXT: v_cndmask_b16 v0.l, v2.l, v0.l, s1
; GFX1170-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_4)
; GFX1170-TRUE16-NEXT: v_cndmask_b16 v1.l, v3.l, v1.l, s2
; GFX1170-TRUE16-NEXT: s_setpc_b64 s[30:31]
;
; GFX1170-FAKE16-LABEL: v_test_fmin_legacy_ule_v4f16_safe:
; GFX1170-FAKE16: ; %bb.0:
; GFX1170-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1170-FAKE16-NEXT: v_lshrrev_b32_e32 v4, 16, v3
; GFX1170-FAKE16-NEXT: v_lshrrev_b32_e32 v5, 16, v1
; GFX1170-FAKE16-NEXT: v_lshrrev_b32_e32 v6, 16, v2
; GFX1170-FAKE16-NEXT: v_lshrrev_b32_e32 v7, 16, v0
; GFX1170-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(SKIP_1) | instid1(VALU_DEP_3)
; GFX1170-FAKE16-NEXT: v_cmp_ngt_f16_e32 vcc_lo, v5, v4
; GFX1170-FAKE16-NEXT: v_cndmask_b32_e32 v4, v4, v5, vcc_lo
; GFX1170-FAKE16-NEXT: v_cmp_ngt_f16_e32 vcc_lo, v7, v6
; GFX1170-FAKE16-NEXT: v_cndmask_b32_e32 v5, v6, v7, vcc_lo
; GFX1170-FAKE16-NEXT: v_cmp_ngt_f16_e32 vcc_lo, v0, v2
; GFX1170-FAKE16-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc_lo
; GFX1170-FAKE16-NEXT: v_cmp_ngt_f16_e32 vcc_lo, v1, v3
; GFX1170-FAKE16-NEXT: v_cndmask_b32_e32 v1, v3, v1, vcc_lo
; GFX1170-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2)
; GFX1170-FAKE16-NEXT: v_perm_b32 v0, v5, v0, 0x5040100
; GFX1170-FAKE16-NEXT: v_perm_b32 v1, v4, v1, 0x5040100
; GFX1170-FAKE16-NEXT: s_setpc_b64 s[30:31]
;
; GFX12-TRUE16-LABEL: v_test_fmin_legacy_ule_v4f16_safe:
; GFX12-TRUE16: ; %bb.0:
; GFX12-TRUE16-NEXT: s_wait_loadcnt_dscnt 0x0
@ -1531,6 +1947,42 @@ define <4 x half> @v_test_fmin_legacy_ule_v4f16_nnan_flag(<4 x half> %a, <4 x ha
; GFX9-NEXT: v_perm_b32 v1, v6, v1, s4
; GFX9-NEXT: s_setpc_b64 s[30:31]
;
; GFX1170-TRUE16-LABEL: v_test_fmin_legacy_ule_v4f16_nnan_flag:
; GFX1170-TRUE16: ; %bb.0:
; GFX1170-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1170-TRUE16-NEXT: v_cmp_ngt_f16_e32 vcc_lo, v1.h, v3.h
; GFX1170-TRUE16-NEXT: v_cmp_ngt_f16_e64 s0, v0.h, v2.h
; GFX1170-TRUE16-NEXT: v_cmp_ngt_f16_e64 s1, v0.l, v2.l
; GFX1170-TRUE16-NEXT: v_cmp_ngt_f16_e64 s2, v1.l, v3.l
; GFX1170-TRUE16-NEXT: v_cndmask_b16 v1.h, v3.h, v1.h, vcc_lo
; GFX1170-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4)
; GFX1170-TRUE16-NEXT: v_cndmask_b16 v0.h, v2.h, v0.h, s0
; GFX1170-TRUE16-NEXT: v_cndmask_b16 v0.l, v2.l, v0.l, s1
; GFX1170-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_4)
; GFX1170-TRUE16-NEXT: v_cndmask_b16 v1.l, v3.l, v1.l, s2
; GFX1170-TRUE16-NEXT: s_setpc_b64 s[30:31]
;
; GFX1170-FAKE16-LABEL: v_test_fmin_legacy_ule_v4f16_nnan_flag:
; GFX1170-FAKE16: ; %bb.0:
; GFX1170-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1170-FAKE16-NEXT: v_lshrrev_b32_e32 v4, 16, v3
; GFX1170-FAKE16-NEXT: v_lshrrev_b32_e32 v5, 16, v1
; GFX1170-FAKE16-NEXT: v_lshrrev_b32_e32 v6, 16, v2
; GFX1170-FAKE16-NEXT: v_lshrrev_b32_e32 v7, 16, v0
; GFX1170-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(SKIP_1) | instid1(VALU_DEP_3)
; GFX1170-FAKE16-NEXT: v_cmp_ngt_f16_e32 vcc_lo, v5, v4
; GFX1170-FAKE16-NEXT: v_cndmask_b32_e32 v4, v4, v5, vcc_lo
; GFX1170-FAKE16-NEXT: v_cmp_ngt_f16_e32 vcc_lo, v7, v6
; GFX1170-FAKE16-NEXT: v_cndmask_b32_e32 v5, v6, v7, vcc_lo
; GFX1170-FAKE16-NEXT: v_cmp_ngt_f16_e32 vcc_lo, v0, v2
; GFX1170-FAKE16-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc_lo
; GFX1170-FAKE16-NEXT: v_cmp_ngt_f16_e32 vcc_lo, v1, v3
; GFX1170-FAKE16-NEXT: v_cndmask_b32_e32 v1, v3, v1, vcc_lo
; GFX1170-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2)
; GFX1170-FAKE16-NEXT: v_perm_b32 v0, v5, v0, 0x5040100
; GFX1170-FAKE16-NEXT: v_perm_b32 v1, v4, v1, 0x5040100
; GFX1170-FAKE16-NEXT: s_setpc_b64 s[30:31]
;
; GFX12-TRUE16-LABEL: v_test_fmin_legacy_ule_v4f16_nnan_flag:
; GFX12-TRUE16: ; %bb.0:
; GFX12-TRUE16-NEXT: s_wait_loadcnt_dscnt 0x0
@ -1635,6 +2087,42 @@ define <4 x half> @v_test_fmin_legacy_ule_v4f16_nsz_flag(<4 x half> %a, <4 x hal
; GFX9-NEXT: v_perm_b32 v1, v6, v1, s4
; GFX9-NEXT: s_setpc_b64 s[30:31]
;
; GFX1170-TRUE16-LABEL: v_test_fmin_legacy_ule_v4f16_nsz_flag:
; GFX1170-TRUE16: ; %bb.0:
; GFX1170-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1170-TRUE16-NEXT: v_cmp_ngt_f16_e32 vcc_lo, v1.h, v3.h
; GFX1170-TRUE16-NEXT: v_cmp_ngt_f16_e64 s0, v0.h, v2.h
; GFX1170-TRUE16-NEXT: v_cmp_ngt_f16_e64 s1, v0.l, v2.l
; GFX1170-TRUE16-NEXT: v_cmp_ngt_f16_e64 s2, v1.l, v3.l
; GFX1170-TRUE16-NEXT: v_cndmask_b16 v1.h, v3.h, v1.h, vcc_lo
; GFX1170-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4)
; GFX1170-TRUE16-NEXT: v_cndmask_b16 v0.h, v2.h, v0.h, s0
; GFX1170-TRUE16-NEXT: v_cndmask_b16 v0.l, v2.l, v0.l, s1
; GFX1170-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_4)
; GFX1170-TRUE16-NEXT: v_cndmask_b16 v1.l, v3.l, v1.l, s2
; GFX1170-TRUE16-NEXT: s_setpc_b64 s[30:31]
;
; GFX1170-FAKE16-LABEL: v_test_fmin_legacy_ule_v4f16_nsz_flag:
; GFX1170-FAKE16: ; %bb.0:
; GFX1170-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1170-FAKE16-NEXT: v_lshrrev_b32_e32 v4, 16, v3
; GFX1170-FAKE16-NEXT: v_lshrrev_b32_e32 v5, 16, v1
; GFX1170-FAKE16-NEXT: v_lshrrev_b32_e32 v6, 16, v2
; GFX1170-FAKE16-NEXT: v_lshrrev_b32_e32 v7, 16, v0
; GFX1170-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(SKIP_1) | instid1(VALU_DEP_3)
; GFX1170-FAKE16-NEXT: v_cmp_ngt_f16_e32 vcc_lo, v5, v4
; GFX1170-FAKE16-NEXT: v_cndmask_b32_e32 v4, v4, v5, vcc_lo
; GFX1170-FAKE16-NEXT: v_cmp_ngt_f16_e32 vcc_lo, v7, v6
; GFX1170-FAKE16-NEXT: v_cndmask_b32_e32 v5, v6, v7, vcc_lo
; GFX1170-FAKE16-NEXT: v_cmp_ngt_f16_e32 vcc_lo, v0, v2
; GFX1170-FAKE16-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc_lo
; GFX1170-FAKE16-NEXT: v_cmp_ngt_f16_e32 vcc_lo, v1, v3
; GFX1170-FAKE16-NEXT: v_cndmask_b32_e32 v1, v3, v1, vcc_lo
; GFX1170-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2)
; GFX1170-FAKE16-NEXT: v_perm_b32 v0, v5, v0, 0x5040100
; GFX1170-FAKE16-NEXT: v_perm_b32 v1, v4, v1, 0x5040100
; GFX1170-FAKE16-NEXT: s_setpc_b64 s[30:31]
;
; GFX12-TRUE16-LABEL: v_test_fmin_legacy_ule_v4f16_nsz_flag:
; GFX12-TRUE16: ; %bb.0:
; GFX12-TRUE16-NEXT: s_wait_loadcnt_dscnt 0x0
@ -1724,6 +2212,13 @@ define <4 x half> @v_test_fmin_legacy_ule_v4f16_nnan_nsz_flag(<4 x half> %a, <4
; GFX9-NEXT: v_pk_min_f16 v1, v1, v3
; GFX9-NEXT: s_setpc_b64 s[30:31]
;
; GFX1170-LABEL: v_test_fmin_legacy_ule_v4f16_nnan_nsz_flag:
; GFX1170: ; %bb.0:
; GFX1170-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1170-NEXT: v_pk_min_num_f16 v0, v0, v2
; GFX1170-NEXT: v_pk_min_num_f16 v1, v1, v3
; GFX1170-NEXT: s_setpc_b64 s[30:31]
;
; GFX12-LABEL: v_test_fmin_legacy_ule_v4f16_nnan_nsz_flag:
; GFX12: ; %bb.0:
; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
@ -1791,6 +2286,42 @@ define <4 x half> @v_test_fmax_legacy_uge_v4f16_safe(<4 x half> %a, <4 x half> %
; GFX9-NEXT: v_perm_b32 v1, v6, v1, s4
; GFX9-NEXT: s_setpc_b64 s[30:31]
;
; GFX1170-TRUE16-LABEL: v_test_fmax_legacy_uge_v4f16_safe:
; GFX1170-TRUE16: ; %bb.0:
; GFX1170-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1170-TRUE16-NEXT: v_cmp_nlt_f16_e32 vcc_lo, v1.h, v3.h
; GFX1170-TRUE16-NEXT: v_cmp_nlt_f16_e64 s0, v0.h, v2.h
; GFX1170-TRUE16-NEXT: v_cmp_nlt_f16_e64 s1, v0.l, v2.l
; GFX1170-TRUE16-NEXT: v_cmp_nlt_f16_e64 s2, v1.l, v3.l
; GFX1170-TRUE16-NEXT: v_cndmask_b16 v1.h, v3.h, v1.h, vcc_lo
; GFX1170-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4)
; GFX1170-TRUE16-NEXT: v_cndmask_b16 v0.h, v2.h, v0.h, s0
; GFX1170-TRUE16-NEXT: v_cndmask_b16 v0.l, v2.l, v0.l, s1
; GFX1170-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_4)
; GFX1170-TRUE16-NEXT: v_cndmask_b16 v1.l, v3.l, v1.l, s2
; GFX1170-TRUE16-NEXT: s_setpc_b64 s[30:31]
;
; GFX1170-FAKE16-LABEL: v_test_fmax_legacy_uge_v4f16_safe:
; GFX1170-FAKE16: ; %bb.0:
; GFX1170-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1170-FAKE16-NEXT: v_lshrrev_b32_e32 v4, 16, v3
; GFX1170-FAKE16-NEXT: v_lshrrev_b32_e32 v5, 16, v1
; GFX1170-FAKE16-NEXT: v_lshrrev_b32_e32 v6, 16, v2
; GFX1170-FAKE16-NEXT: v_lshrrev_b32_e32 v7, 16, v0
; GFX1170-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(SKIP_1) | instid1(VALU_DEP_3)
; GFX1170-FAKE16-NEXT: v_cmp_nlt_f16_e32 vcc_lo, v5, v4
; GFX1170-FAKE16-NEXT: v_cndmask_b32_e32 v4, v4, v5, vcc_lo
; GFX1170-FAKE16-NEXT: v_cmp_nlt_f16_e32 vcc_lo, v7, v6
; GFX1170-FAKE16-NEXT: v_cndmask_b32_e32 v5, v6, v7, vcc_lo
; GFX1170-FAKE16-NEXT: v_cmp_nlt_f16_e32 vcc_lo, v0, v2
; GFX1170-FAKE16-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc_lo
; GFX1170-FAKE16-NEXT: v_cmp_nlt_f16_e32 vcc_lo, v1, v3
; GFX1170-FAKE16-NEXT: v_cndmask_b32_e32 v1, v3, v1, vcc_lo
; GFX1170-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2)
; GFX1170-FAKE16-NEXT: v_perm_b32 v0, v5, v0, 0x5040100
; GFX1170-FAKE16-NEXT: v_perm_b32 v1, v4, v1, 0x5040100
; GFX1170-FAKE16-NEXT: s_setpc_b64 s[30:31]
;
; GFX12-TRUE16-LABEL: v_test_fmax_legacy_uge_v4f16_safe:
; GFX12-TRUE16: ; %bb.0:
; GFX12-TRUE16-NEXT: s_wait_loadcnt_dscnt 0x0
@ -1895,6 +2426,42 @@ define <4 x half> @v_test_fmax_legacy_uge_v4f16_nnan_flag(<4 x half> %a, <4 x ha
; GFX9-NEXT: v_perm_b32 v1, v6, v1, s4
; GFX9-NEXT: s_setpc_b64 s[30:31]
;
; GFX1170-TRUE16-LABEL: v_test_fmax_legacy_uge_v4f16_nnan_flag:
; GFX1170-TRUE16: ; %bb.0:
; GFX1170-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1170-TRUE16-NEXT: v_cmp_nlt_f16_e32 vcc_lo, v1.h, v3.h
; GFX1170-TRUE16-NEXT: v_cmp_nlt_f16_e64 s0, v0.h, v2.h
; GFX1170-TRUE16-NEXT: v_cmp_nlt_f16_e64 s1, v0.l, v2.l
; GFX1170-TRUE16-NEXT: v_cmp_nlt_f16_e64 s2, v1.l, v3.l
; GFX1170-TRUE16-NEXT: v_cndmask_b16 v1.h, v3.h, v1.h, vcc_lo
; GFX1170-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4)
; GFX1170-TRUE16-NEXT: v_cndmask_b16 v0.h, v2.h, v0.h, s0
; GFX1170-TRUE16-NEXT: v_cndmask_b16 v0.l, v2.l, v0.l, s1
; GFX1170-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_4)
; GFX1170-TRUE16-NEXT: v_cndmask_b16 v1.l, v3.l, v1.l, s2
; GFX1170-TRUE16-NEXT: s_setpc_b64 s[30:31]
;
; GFX1170-FAKE16-LABEL: v_test_fmax_legacy_uge_v4f16_nnan_flag:
; GFX1170-FAKE16: ; %bb.0:
; GFX1170-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1170-FAKE16-NEXT: v_lshrrev_b32_e32 v4, 16, v3
; GFX1170-FAKE16-NEXT: v_lshrrev_b32_e32 v5, 16, v1
; GFX1170-FAKE16-NEXT: v_lshrrev_b32_e32 v6, 16, v2
; GFX1170-FAKE16-NEXT: v_lshrrev_b32_e32 v7, 16, v0
; GFX1170-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(SKIP_1) | instid1(VALU_DEP_3)
; GFX1170-FAKE16-NEXT: v_cmp_nlt_f16_e32 vcc_lo, v5, v4
; GFX1170-FAKE16-NEXT: v_cndmask_b32_e32 v4, v4, v5, vcc_lo
; GFX1170-FAKE16-NEXT: v_cmp_nlt_f16_e32 vcc_lo, v7, v6
; GFX1170-FAKE16-NEXT: v_cndmask_b32_e32 v5, v6, v7, vcc_lo
; GFX1170-FAKE16-NEXT: v_cmp_nlt_f16_e32 vcc_lo, v0, v2
; GFX1170-FAKE16-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc_lo
; GFX1170-FAKE16-NEXT: v_cmp_nlt_f16_e32 vcc_lo, v1, v3
; GFX1170-FAKE16-NEXT: v_cndmask_b32_e32 v1, v3, v1, vcc_lo
; GFX1170-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2)
; GFX1170-FAKE16-NEXT: v_perm_b32 v0, v5, v0, 0x5040100
; GFX1170-FAKE16-NEXT: v_perm_b32 v1, v4, v1, 0x5040100
; GFX1170-FAKE16-NEXT: s_setpc_b64 s[30:31]
;
; GFX12-TRUE16-LABEL: v_test_fmax_legacy_uge_v4f16_nnan_flag:
; GFX12-TRUE16: ; %bb.0:
; GFX12-TRUE16-NEXT: s_wait_loadcnt_dscnt 0x0
@ -1999,6 +2566,42 @@ define <4 x half> @v_test_fmax_legacy_uge_v4f16_nsz_flag(<4 x half> %a, <4 x hal
; GFX9-NEXT: v_perm_b32 v1, v6, v1, s4
; GFX9-NEXT: s_setpc_b64 s[30:31]
;
; GFX1170-TRUE16-LABEL: v_test_fmax_legacy_uge_v4f16_nsz_flag:
; GFX1170-TRUE16: ; %bb.0:
; GFX1170-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1170-TRUE16-NEXT: v_cmp_nlt_f16_e32 vcc_lo, v1.h, v3.h
; GFX1170-TRUE16-NEXT: v_cmp_nlt_f16_e64 s0, v0.h, v2.h
; GFX1170-TRUE16-NEXT: v_cmp_nlt_f16_e64 s1, v0.l, v2.l
; GFX1170-TRUE16-NEXT: v_cmp_nlt_f16_e64 s2, v1.l, v3.l
; GFX1170-TRUE16-NEXT: v_cndmask_b16 v1.h, v3.h, v1.h, vcc_lo
; GFX1170-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4)
; GFX1170-TRUE16-NEXT: v_cndmask_b16 v0.h, v2.h, v0.h, s0
; GFX1170-TRUE16-NEXT: v_cndmask_b16 v0.l, v2.l, v0.l, s1
; GFX1170-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_4)
; GFX1170-TRUE16-NEXT: v_cndmask_b16 v1.l, v3.l, v1.l, s2
; GFX1170-TRUE16-NEXT: s_setpc_b64 s[30:31]
;
; GFX1170-FAKE16-LABEL: v_test_fmax_legacy_uge_v4f16_nsz_flag:
; GFX1170-FAKE16: ; %bb.0:
; GFX1170-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1170-FAKE16-NEXT: v_lshrrev_b32_e32 v4, 16, v3
; GFX1170-FAKE16-NEXT: v_lshrrev_b32_e32 v5, 16, v1
; GFX1170-FAKE16-NEXT: v_lshrrev_b32_e32 v6, 16, v2
; GFX1170-FAKE16-NEXT: v_lshrrev_b32_e32 v7, 16, v0
; GFX1170-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(SKIP_1) | instid1(VALU_DEP_3)
; GFX1170-FAKE16-NEXT: v_cmp_nlt_f16_e32 vcc_lo, v5, v4
; GFX1170-FAKE16-NEXT: v_cndmask_b32_e32 v4, v4, v5, vcc_lo
; GFX1170-FAKE16-NEXT: v_cmp_nlt_f16_e32 vcc_lo, v7, v6
; GFX1170-FAKE16-NEXT: v_cndmask_b32_e32 v5, v6, v7, vcc_lo
; GFX1170-FAKE16-NEXT: v_cmp_nlt_f16_e32 vcc_lo, v0, v2
; GFX1170-FAKE16-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc_lo
; GFX1170-FAKE16-NEXT: v_cmp_nlt_f16_e32 vcc_lo, v1, v3
; GFX1170-FAKE16-NEXT: v_cndmask_b32_e32 v1, v3, v1, vcc_lo
; GFX1170-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2)
; GFX1170-FAKE16-NEXT: v_perm_b32 v0, v5, v0, 0x5040100
; GFX1170-FAKE16-NEXT: v_perm_b32 v1, v4, v1, 0x5040100
; GFX1170-FAKE16-NEXT: s_setpc_b64 s[30:31]
;
; GFX12-TRUE16-LABEL: v_test_fmax_legacy_uge_v4f16_nsz_flag:
; GFX12-TRUE16: ; %bb.0:
; GFX12-TRUE16-NEXT: s_wait_loadcnt_dscnt 0x0
@ -2088,6 +2691,13 @@ define <4 x half> @v_test_fmax_legacy_uge_v4f16_nnan_nsz_flag(<4 x half> %a, <4
; GFX9-NEXT: v_pk_max_f16 v1, v1, v3
; GFX9-NEXT: s_setpc_b64 s[30:31]
;
; GFX1170-LABEL: v_test_fmax_legacy_uge_v4f16_nnan_nsz_flag:
; GFX1170: ; %bb.0:
; GFX1170-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1170-NEXT: v_pk_max_num_f16 v0, v0, v2
; GFX1170-NEXT: v_pk_max_num_f16 v1, v1, v3
; GFX1170-NEXT: s_setpc_b64 s[30:31]
;
; GFX12-LABEL: v_test_fmax_legacy_uge_v4f16_nnan_nsz_flag:
; GFX12: ; %bb.0:
; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
@ -2120,6 +2730,14 @@ define float @v_test_fmin_legacy_uge_f32_nsz_flag__nnan_srcs(float %arg0, float
; GFX9-NEXT: v_min_f32_e32 v0, v0, v1
; GFX9-NEXT: s_setpc_b64 s[30:31]
;
; GFX1170-LABEL: v_test_fmin_legacy_uge_f32_nsz_flag__nnan_srcs:
; GFX1170: ; %bb.0:
; GFX1170-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1170-NEXT: v_dual_add_f32 v0, v0, v0 :: v_dual_add_f32 v1, v1, v1
; GFX1170-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX1170-NEXT: v_min_num_f32_e32 v0, v0, v1
; GFX1170-NEXT: s_setpc_b64 s[30:31]
;
; GFX12-LABEL: v_test_fmin_legacy_uge_f32_nsz_flag__nnan_srcs:
; GFX12: ; %bb.0:
; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
@ -2155,6 +2773,14 @@ define float @v_test_fmax_legacy_uge_f32_nsz_flag__nnan_srcs(float %arg0, float
; GFX9-NEXT: v_max_f32_e32 v0, v0, v1
; GFX9-NEXT: s_setpc_b64 s[30:31]
;
; GFX1170-LABEL: v_test_fmax_legacy_uge_f32_nsz_flag__nnan_srcs:
; GFX1170: ; %bb.0:
; GFX1170-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1170-NEXT: v_dual_add_f32 v0, v0, v0 :: v_dual_add_f32 v1, v1, v1
; GFX1170-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX1170-NEXT: v_max_num_f32_e32 v0, v0, v1
; GFX1170-NEXT: s_setpc_b64 s[30:31]
;
; GFX12-LABEL: v_test_fmax_legacy_uge_f32_nsz_flag__nnan_srcs:
; GFX12: ; %bb.0:
; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0

View File

@ -11,6 +11,10 @@
; RUN: llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx1100 -mattr=-real-true16 < %s | FileCheck -check-prefixes=GFX11,GFX11-SDAG,GFX11-SDAG-FAKE16 %s
; RUN: llc -global-isel=1 -new-reg-bank-select -mtriple=amdgcn -mcpu=gfx1100 -mattr=+real-true16 < %s | FileCheck -check-prefixes=GFX11,GFX11-GISEL,GFX11-GISEL-TRUE16 %s
; RUN: llc -global-isel=1 -new-reg-bank-select -mtriple=amdgcn -mcpu=gfx1100 -mattr=-real-true16 < %s | FileCheck -check-prefixes=GFX11,GFX11-GISEL,GFX11-GISEL-FAKE16 %s
; RUN: llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx1170 -mattr=+real-true16 < %s | FileCheck -check-prefixes=GFX1170,GFX1170-SDAG,GFX1170-SDAG-TRUE16 %s
; RUN: llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx1170 -mattr=-real-true16 < %s | FileCheck -check-prefixes=GFX1170,GFX1170-SDAG,GFX1170-SDAG-FAKE16 %s
; RUN: llc -global-isel=1 -new-reg-bank-select -mtriple=amdgcn -mcpu=gfx1170 -mattr=+real-true16 < %s | FileCheck -check-prefixes=GFX1170,GFX1170-GISEL,GFX1170-GISEL-TRUE16 %s
; RUN: llc -global-isel=1 -new-reg-bank-select -mtriple=amdgcn -mcpu=gfx1170 -mattr=-real-true16 < %s | FileCheck -check-prefixes=GFX1170,GFX1170-GISEL,GFX1170-GISEL-FAKE16 %s
; RUN: llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx1200 -mattr=+real-true16 < %s | FileCheck -check-prefixes=GFX12,GFX12-SDAG,GFX12-SDAG-TRUE16 %s
; RUN: llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx1200 -mattr=-real-true16 < %s | FileCheck -check-prefixes=GFX12,GFX12-SDAG,GFX12-SDAG-FAKE16 %s
; RUN: llc -global-isel=1 -new-reg-bank-select -mtriple=amdgcn -mcpu=gfx1200 -mattr=+real-true16 < %s | FileCheck -check-prefixes=GFX12,GFX12-GISEL,GFX12-GISEL-TRUE16 %s
@ -123,6 +127,44 @@ define half @test_vector_reduce_fmax_v2half(<2 x half> %v) {
; GFX11-GISEL-FAKE16-NEXT: v_max_f16_e32 v0, v0, v1
; GFX11-GISEL-FAKE16-NEXT: s_setpc_b64 s[30:31]
;
; GFX1170-SDAG-TRUE16-LABEL: test_vector_reduce_fmax_v2half:
; GFX1170-SDAG-TRUE16: ; %bb.0: ; %entry
; GFX1170-SDAG-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1170-SDAG-TRUE16-NEXT: v_max_num_f16_e32 v0.h, v0.h, v0.h
; GFX1170-SDAG-TRUE16-NEXT: v_max_num_f16_e32 v0.l, v0.l, v0.l
; GFX1170-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX1170-SDAG-TRUE16-NEXT: v_max_num_f16_e32 v0.l, v0.l, v0.h
; GFX1170-SDAG-TRUE16-NEXT: s_setpc_b64 s[30:31]
;
; GFX1170-SDAG-FAKE16-LABEL: test_vector_reduce_fmax_v2half:
; GFX1170-SDAG-FAKE16: ; %bb.0: ; %entry
; GFX1170-SDAG-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1170-SDAG-FAKE16-NEXT: v_lshrrev_b32_e32 v1, 16, v0
; GFX1170-SDAG-FAKE16-NEXT: v_max_num_f16_e32 v0, v0, v0
; GFX1170-SDAG-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
; GFX1170-SDAG-FAKE16-NEXT: v_max_num_f16_e32 v1, v1, v1
; GFX1170-SDAG-FAKE16-NEXT: v_max_num_f16_e32 v0, v0, v1
; GFX1170-SDAG-FAKE16-NEXT: s_setpc_b64 s[30:31]
;
; GFX1170-GISEL-TRUE16-LABEL: test_vector_reduce_fmax_v2half:
; GFX1170-GISEL-TRUE16: ; %bb.0: ; %entry
; GFX1170-GISEL-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1170-GISEL-TRUE16-NEXT: v_max_num_f16_e32 v0.l, v0.l, v0.l
; GFX1170-GISEL-TRUE16-NEXT: v_max_num_f16_e32 v0.h, v0.h, v0.h
; GFX1170-GISEL-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX1170-GISEL-TRUE16-NEXT: v_max_num_f16_e32 v0.l, v0.l, v0.h
; GFX1170-GISEL-TRUE16-NEXT: s_setpc_b64 s[30:31]
;
; GFX1170-GISEL-FAKE16-LABEL: test_vector_reduce_fmax_v2half:
; GFX1170-GISEL-FAKE16: ; %bb.0: ; %entry
; GFX1170-GISEL-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1170-GISEL-FAKE16-NEXT: v_lshrrev_b32_e32 v1, 16, v0
; GFX1170-GISEL-FAKE16-NEXT: v_max_num_f16_e32 v0, v0, v0
; GFX1170-GISEL-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
; GFX1170-GISEL-FAKE16-NEXT: v_max_num_f16_e32 v1, v1, v1
; GFX1170-GISEL-FAKE16-NEXT: v_max_num_f16_e32 v0, v0, v1
; GFX1170-GISEL-FAKE16-NEXT: s_setpc_b64 s[30:31]
;
; GFX12-SDAG-TRUE16-LABEL: test_vector_reduce_fmax_v2half:
; GFX12-SDAG-TRUE16: ; %bb.0: ; %entry
; GFX12-SDAG-TRUE16-NEXT: s_wait_loadcnt_dscnt 0x0
@ -307,6 +349,54 @@ define half @test_vector_reduce_fmax_v3half(<3 x half> %v) {
; GFX11-GISEL-FAKE16-NEXT: v_max_f16_e32 v0, v0, v1
; GFX11-GISEL-FAKE16-NEXT: s_setpc_b64 s[30:31]
;
; GFX1170-SDAG-TRUE16-LABEL: test_vector_reduce_fmax_v3half:
; GFX1170-SDAG-TRUE16: ; %bb.0: ; %entry
; GFX1170-SDAG-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1170-SDAG-TRUE16-NEXT: v_mov_b16_e32 v1.h, 0x7e00
; GFX1170-SDAG-TRUE16-NEXT: v_max_num_f16_e32 v1.l, v1.l, v1.l
; GFX1170-SDAG-TRUE16-NEXT: v_pk_max_num_f16 v0, v0, v0
; GFX1170-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
; GFX1170-SDAG-TRUE16-NEXT: v_pk_max_num_f16 v0, v0, v1
; GFX1170-SDAG-TRUE16-NEXT: v_max_num_f16_e32 v0.l, v0.l, v0.h
; GFX1170-SDAG-TRUE16-NEXT: s_setpc_b64 s[30:31]
;
; GFX1170-SDAG-FAKE16-LABEL: test_vector_reduce_fmax_v3half:
; GFX1170-SDAG-FAKE16: ; %bb.0: ; %entry
; GFX1170-SDAG-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1170-SDAG-FAKE16-NEXT: v_max_num_f16_e32 v1, v1, v1
; GFX1170-SDAG-FAKE16-NEXT: v_pk_max_num_f16 v0, v0, v0
; GFX1170-SDAG-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
; GFX1170-SDAG-FAKE16-NEXT: v_pack_b32_f16 v1, v1, 0x7e00
; GFX1170-SDAG-FAKE16-NEXT: v_pk_max_num_f16 v0, v0, v1
; GFX1170-SDAG-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
; GFX1170-SDAG-FAKE16-NEXT: v_lshrrev_b32_e32 v1, 16, v0
; GFX1170-SDAG-FAKE16-NEXT: v_max_num_f16_e32 v0, v0, v1
; GFX1170-SDAG-FAKE16-NEXT: s_setpc_b64 s[30:31]
;
; GFX1170-GISEL-TRUE16-LABEL: test_vector_reduce_fmax_v3half:
; GFX1170-GISEL-TRUE16: ; %bb.0: ; %entry
; GFX1170-GISEL-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1170-GISEL-TRUE16-NEXT: v_max_num_f16_e32 v0.l, v0.l, v0.l
; GFX1170-GISEL-TRUE16-NEXT: v_max_num_f16_e32 v0.h, v0.h, v0.h
; GFX1170-GISEL-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_1)
; GFX1170-GISEL-TRUE16-NEXT: v_max_num_f16_e32 v0.l, v0.l, v0.h
; GFX1170-GISEL-TRUE16-NEXT: v_max_num_f16_e32 v0.h, v1.l, v1.l
; GFX1170-GISEL-TRUE16-NEXT: v_max_num_f16_e32 v0.l, v0.l, v0.h
; GFX1170-GISEL-TRUE16-NEXT: s_setpc_b64 s[30:31]
;
; GFX1170-GISEL-FAKE16-LABEL: test_vector_reduce_fmax_v3half:
; GFX1170-GISEL-FAKE16: ; %bb.0: ; %entry
; GFX1170-GISEL-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1170-GISEL-FAKE16-NEXT: v_lshrrev_b32_e32 v2, 16, v0
; GFX1170-GISEL-FAKE16-NEXT: v_max_num_f16_e32 v0, v0, v0
; GFX1170-GISEL-FAKE16-NEXT: v_max_num_f16_e32 v1, v1, v1
; GFX1170-GISEL-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_1)
; GFX1170-GISEL-FAKE16-NEXT: v_max_num_f16_e32 v2, v2, v2
; GFX1170-GISEL-FAKE16-NEXT: v_max_num_f16_e32 v0, v0, v2
; GFX1170-GISEL-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX1170-GISEL-FAKE16-NEXT: v_max_num_f16_e32 v0, v0, v1
; GFX1170-GISEL-FAKE16-NEXT: s_setpc_b64 s[30:31]
;
; GFX12-SDAG-TRUE16-LABEL: test_vector_reduce_fmax_v3half:
; GFX12-SDAG-TRUE16: ; %bb.0: ; %entry
; GFX12-SDAG-TRUE16-NEXT: s_wait_loadcnt_dscnt 0x0
@ -535,6 +625,59 @@ define half @test_vector_reduce_fmax_v4half(<4 x half> %v) {
; GFX11-GISEL-FAKE16-NEXT: v_max_f16_e32 v0, v0, v1
; GFX11-GISEL-FAKE16-NEXT: s_setpc_b64 s[30:31]
;
; GFX1170-SDAG-TRUE16-LABEL: test_vector_reduce_fmax_v4half:
; GFX1170-SDAG-TRUE16: ; %bb.0: ; %entry
; GFX1170-SDAG-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1170-SDAG-TRUE16-NEXT: v_pk_max_num_f16 v1, v1, v1
; GFX1170-SDAG-TRUE16-NEXT: v_pk_max_num_f16 v0, v0, v0
; GFX1170-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
; GFX1170-SDAG-TRUE16-NEXT: v_pk_max_num_f16 v0, v0, v1
; GFX1170-SDAG-TRUE16-NEXT: v_max_num_f16_e32 v0.l, v0.l, v0.h
; GFX1170-SDAG-TRUE16-NEXT: s_setpc_b64 s[30:31]
;
; GFX1170-SDAG-FAKE16-LABEL: test_vector_reduce_fmax_v4half:
; GFX1170-SDAG-FAKE16: ; %bb.0: ; %entry
; GFX1170-SDAG-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1170-SDAG-FAKE16-NEXT: v_pk_max_num_f16 v1, v1, v1
; GFX1170-SDAG-FAKE16-NEXT: v_pk_max_num_f16 v0, v0, v0
; GFX1170-SDAG-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
; GFX1170-SDAG-FAKE16-NEXT: v_pk_max_num_f16 v0, v0, v1
; GFX1170-SDAG-FAKE16-NEXT: v_lshrrev_b32_e32 v1, 16, v0
; GFX1170-SDAG-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX1170-SDAG-FAKE16-NEXT: v_max_num_f16_e32 v0, v0, v1
; GFX1170-SDAG-FAKE16-NEXT: s_setpc_b64 s[30:31]
;
; GFX1170-GISEL-TRUE16-LABEL: test_vector_reduce_fmax_v4half:
; GFX1170-GISEL-TRUE16: ; %bb.0: ; %entry
; GFX1170-GISEL-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1170-GISEL-TRUE16-NEXT: v_max_num_f16_e32 v0.l, v0.l, v0.l
; GFX1170-GISEL-TRUE16-NEXT: v_max_num_f16_e32 v0.h, v0.h, v0.h
; GFX1170-GISEL-TRUE16-NEXT: v_max_num_f16_e32 v1.l, v1.l, v1.l
; GFX1170-GISEL-TRUE16-NEXT: v_max_num_f16_e32 v1.h, v1.h, v1.h
; GFX1170-GISEL-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2)
; GFX1170-GISEL-TRUE16-NEXT: v_max_num_f16_e32 v0.l, v0.l, v0.h
; GFX1170-GISEL-TRUE16-NEXT: v_max_num_f16_e32 v0.h, v1.l, v1.h
; GFX1170-GISEL-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX1170-GISEL-TRUE16-NEXT: v_max_num_f16_e32 v0.l, v0.l, v0.h
; GFX1170-GISEL-TRUE16-NEXT: s_setpc_b64 s[30:31]
;
; GFX1170-GISEL-FAKE16-LABEL: test_vector_reduce_fmax_v4half:
; GFX1170-GISEL-FAKE16: ; %bb.0: ; %entry
; GFX1170-GISEL-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1170-GISEL-FAKE16-NEXT: v_lshrrev_b32_e32 v2, 16, v0
; GFX1170-GISEL-FAKE16-NEXT: v_lshrrev_b32_e32 v3, 16, v1
; GFX1170-GISEL-FAKE16-NEXT: v_max_num_f16_e32 v0, v0, v0
; GFX1170-GISEL-FAKE16-NEXT: v_max_num_f16_e32 v1, v1, v1
; GFX1170-GISEL-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4)
; GFX1170-GISEL-FAKE16-NEXT: v_max_num_f16_e32 v2, v2, v2
; GFX1170-GISEL-FAKE16-NEXT: v_max_num_f16_e32 v3, v3, v3
; GFX1170-GISEL-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
; GFX1170-GISEL-FAKE16-NEXT: v_max_num_f16_e32 v0, v0, v2
; GFX1170-GISEL-FAKE16-NEXT: v_max_num_f16_e32 v1, v1, v3
; GFX1170-GISEL-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX1170-GISEL-FAKE16-NEXT: v_max_num_f16_e32 v0, v0, v1
; GFX1170-GISEL-FAKE16-NEXT: s_setpc_b64 s[30:31]
;
; GFX12-SDAG-TRUE16-LABEL: test_vector_reduce_fmax_v4half:
; GFX12-SDAG-TRUE16: ; %bb.0: ; %entry
; GFX12-SDAG-TRUE16-NEXT: s_wait_loadcnt_dscnt 0x0
@ -871,6 +1014,86 @@ define half @test_vector_reduce_fmax_v8half(<8 x half> %v) {
; GFX11-GISEL-FAKE16-NEXT: v_max_f16_e32 v0, v0, v1
; GFX11-GISEL-FAKE16-NEXT: s_setpc_b64 s[30:31]
;
; GFX1170-SDAG-TRUE16-LABEL: test_vector_reduce_fmax_v8half:
; GFX1170-SDAG-TRUE16: ; %bb.0: ; %entry
; GFX1170-SDAG-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1170-SDAG-TRUE16-NEXT: v_pk_max_num_f16 v3, v3, v3
; GFX1170-SDAG-TRUE16-NEXT: v_pk_max_num_f16 v1, v1, v1
; GFX1170-SDAG-TRUE16-NEXT: v_pk_max_num_f16 v2, v2, v2
; GFX1170-SDAG-TRUE16-NEXT: v_pk_max_num_f16 v0, v0, v0
; GFX1170-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2)
; GFX1170-SDAG-TRUE16-NEXT: v_pk_max_num_f16 v1, v1, v3
; GFX1170-SDAG-TRUE16-NEXT: v_pk_max_num_f16 v0, v0, v2
; GFX1170-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
; GFX1170-SDAG-TRUE16-NEXT: v_pk_max_num_f16 v0, v0, v1
; GFX1170-SDAG-TRUE16-NEXT: v_max_num_f16_e32 v0.l, v0.l, v0.h
; GFX1170-SDAG-TRUE16-NEXT: s_setpc_b64 s[30:31]
;
; GFX1170-SDAG-FAKE16-LABEL: test_vector_reduce_fmax_v8half:
; GFX1170-SDAG-FAKE16: ; %bb.0: ; %entry
; GFX1170-SDAG-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1170-SDAG-FAKE16-NEXT: v_pk_max_num_f16 v3, v3, v3
; GFX1170-SDAG-FAKE16-NEXT: v_pk_max_num_f16 v1, v1, v1
; GFX1170-SDAG-FAKE16-NEXT: v_pk_max_num_f16 v2, v2, v2
; GFX1170-SDAG-FAKE16-NEXT: v_pk_max_num_f16 v0, v0, v0
; GFX1170-SDAG-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2)
; GFX1170-SDAG-FAKE16-NEXT: v_pk_max_num_f16 v1, v1, v3
; GFX1170-SDAG-FAKE16-NEXT: v_pk_max_num_f16 v0, v0, v2
; GFX1170-SDAG-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
; GFX1170-SDAG-FAKE16-NEXT: v_pk_max_num_f16 v0, v0, v1
; GFX1170-SDAG-FAKE16-NEXT: v_lshrrev_b32_e32 v1, 16, v0
; GFX1170-SDAG-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX1170-SDAG-FAKE16-NEXT: v_max_num_f16_e32 v0, v0, v1
; GFX1170-SDAG-FAKE16-NEXT: s_setpc_b64 s[30:31]
;
; GFX1170-GISEL-TRUE16-LABEL: test_vector_reduce_fmax_v8half:
; GFX1170-GISEL-TRUE16: ; %bb.0: ; %entry
; GFX1170-GISEL-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1170-GISEL-TRUE16-NEXT: v_max_num_f16_e32 v0.l, v0.l, v0.l
; GFX1170-GISEL-TRUE16-NEXT: v_max_num_f16_e32 v0.h, v0.h, v0.h
; GFX1170-GISEL-TRUE16-NEXT: v_max_num_f16_e32 v1.l, v1.l, v1.l
; GFX1170-GISEL-TRUE16-NEXT: v_max_num_f16_e32 v1.h, v1.h, v1.h
; GFX1170-GISEL-TRUE16-NEXT: v_max_num_f16_e32 v2.l, v2.l, v2.l
; GFX1170-GISEL-TRUE16-NEXT: v_max_num_f16_e32 v2.h, v2.h, v2.h
; GFX1170-GISEL-TRUE16-NEXT: v_max_num_f16_e32 v3.l, v3.l, v3.l
; GFX1170-GISEL-TRUE16-NEXT: v_max_num_f16_e32 v3.h, v3.h, v3.h
; GFX1170-GISEL-TRUE16-NEXT: v_max_num_f16_e32 v0.l, v0.l, v0.h
; GFX1170-GISEL-TRUE16-NEXT: v_max_num_f16_e32 v0.h, v1.l, v1.h
; GFX1170-GISEL-TRUE16-NEXT: v_max_num_f16_e32 v1.l, v2.l, v2.h
; GFX1170-GISEL-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_3)
; GFX1170-GISEL-TRUE16-NEXT: v_max_num_f16_e32 v1.h, v3.l, v3.h
; GFX1170-GISEL-TRUE16-NEXT: v_max_num_f16_e32 v0.l, v0.l, v0.h
; GFX1170-GISEL-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
; GFX1170-GISEL-TRUE16-NEXT: v_max_num_f16_e32 v0.h, v1.l, v1.h
; GFX1170-GISEL-TRUE16-NEXT: v_max_num_f16_e32 v0.l, v0.l, v0.h
; GFX1170-GISEL-TRUE16-NEXT: s_setpc_b64 s[30:31]
;
; GFX1170-GISEL-FAKE16-LABEL: test_vector_reduce_fmax_v8half:
; GFX1170-GISEL-FAKE16: ; %bb.0: ; %entry
; GFX1170-GISEL-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1170-GISEL-FAKE16-NEXT: v_lshrrev_b32_e32 v4, 16, v0
; GFX1170-GISEL-FAKE16-NEXT: v_lshrrev_b32_e32 v5, 16, v1
; GFX1170-GISEL-FAKE16-NEXT: v_lshrrev_b32_e32 v6, 16, v2
; GFX1170-GISEL-FAKE16-NEXT: v_lshrrev_b32_e32 v7, 16, v3
; GFX1170-GISEL-FAKE16-NEXT: v_max_num_f16_e32 v0, v0, v0
; GFX1170-GISEL-FAKE16-NEXT: v_max_num_f16_e32 v1, v1, v1
; GFX1170-GISEL-FAKE16-NEXT: v_max_num_f16_e32 v4, v4, v4
; GFX1170-GISEL-FAKE16-NEXT: v_max_num_f16_e32 v5, v5, v5
; GFX1170-GISEL-FAKE16-NEXT: v_max_num_f16_e32 v2, v2, v2
; GFX1170-GISEL-FAKE16-NEXT: v_max_num_f16_e32 v6, v6, v6
; GFX1170-GISEL-FAKE16-NEXT: v_max_num_f16_e32 v3, v3, v3
; GFX1170-GISEL-FAKE16-NEXT: v_max_num_f16_e32 v7, v7, v7
; GFX1170-GISEL-FAKE16-NEXT: v_max_num_f16_e32 v0, v0, v4
; GFX1170-GISEL-FAKE16-NEXT: v_max_num_f16_e32 v1, v1, v5
; GFX1170-GISEL-FAKE16-NEXT: v_max_num_f16_e32 v2, v2, v6
; GFX1170-GISEL-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_3)
; GFX1170-GISEL-FAKE16-NEXT: v_max_num_f16_e32 v3, v3, v7
; GFX1170-GISEL-FAKE16-NEXT: v_max_num_f16_e32 v0, v0, v1
; GFX1170-GISEL-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
; GFX1170-GISEL-FAKE16-NEXT: v_max_num_f16_e32 v1, v2, v3
; GFX1170-GISEL-FAKE16-NEXT: v_max_num_f16_e32 v0, v0, v1
; GFX1170-GISEL-FAKE16-NEXT: s_setpc_b64 s[30:31]
;
; GFX12-SDAG-TRUE16-LABEL: test_vector_reduce_fmax_v8half:
; GFX12-SDAG-TRUE16: ; %bb.0: ; %entry
; GFX12-SDAG-TRUE16-NEXT: s_wait_loadcnt_dscnt 0x0
@ -1438,6 +1661,138 @@ define half @test_vector_reduce_fmax_v16half(<16 x half> %v) {
; GFX11-GISEL-FAKE16-NEXT: v_max_f16_e32 v0, v0, v1
; GFX11-GISEL-FAKE16-NEXT: s_setpc_b64 s[30:31]
;
; GFX1170-SDAG-TRUE16-LABEL: test_vector_reduce_fmax_v16half:
; GFX1170-SDAG-TRUE16: ; %bb.0: ; %entry
; GFX1170-SDAG-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1170-SDAG-TRUE16-NEXT: v_max_num_f16_e32 v0.h, v0.h, v0.h
; GFX1170-SDAG-TRUE16-NEXT: v_max_num_f16_e32 v0.l, v0.l, v0.l
; GFX1170-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
; GFX1170-SDAG-TRUE16-NEXT: v_max_num_f16_e32 v0.l, v0.l, v0.h
; GFX1170-SDAG-TRUE16-NEXT: v_max3_num_f16 v0.l, v0.l, v1.l, v1.h
; GFX1170-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
; GFX1170-SDAG-TRUE16-NEXT: v_max3_num_f16 v0.l, v0.l, v2.l, v2.h
; GFX1170-SDAG-TRUE16-NEXT: v_max3_num_f16 v0.l, v0.l, v3.l, v3.h
; GFX1170-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
; GFX1170-SDAG-TRUE16-NEXT: v_max3_num_f16 v0.l, v0.l, v4.l, v4.h
; GFX1170-SDAG-TRUE16-NEXT: v_max3_num_f16 v0.l, v0.l, v5.l, v5.h
; GFX1170-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
; GFX1170-SDAG-TRUE16-NEXT: v_max3_num_f16 v0.l, v0.l, v6.l, v6.h
; GFX1170-SDAG-TRUE16-NEXT: v_max3_num_f16 v0.l, v0.l, v7.l, v7.h
; GFX1170-SDAG-TRUE16-NEXT: s_setpc_b64 s[30:31]
;
; GFX1170-SDAG-FAKE16-LABEL: test_vector_reduce_fmax_v16half:
; GFX1170-SDAG-FAKE16: ; %bb.0: ; %entry
; GFX1170-SDAG-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1170-SDAG-FAKE16-NEXT: v_lshrrev_b32_e32 v8, 16, v0
; GFX1170-SDAG-FAKE16-NEXT: v_max_num_f16_e32 v0, v0, v0
; GFX1170-SDAG-FAKE16-NEXT: v_lshrrev_b32_e32 v9, 16, v1
; GFX1170-SDAG-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_1)
; GFX1170-SDAG-FAKE16-NEXT: v_max_num_f16_e32 v8, v8, v8
; GFX1170-SDAG-FAKE16-NEXT: v_max_num_f16_e32 v0, v0, v8
; GFX1170-SDAG-FAKE16-NEXT: v_lshrrev_b32_e32 v8, 16, v2
; GFX1170-SDAG-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_2)
; GFX1170-SDAG-FAKE16-NEXT: v_max3_num_f16 v0, v0, v1, v9
; GFX1170-SDAG-FAKE16-NEXT: v_lshrrev_b32_e32 v1, 16, v3
; GFX1170-SDAG-FAKE16-NEXT: v_max3_num_f16 v0, v0, v2, v8
; GFX1170-SDAG-FAKE16-NEXT: v_lshrrev_b32_e32 v2, 16, v4
; GFX1170-SDAG-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_2)
; GFX1170-SDAG-FAKE16-NEXT: v_max3_num_f16 v0, v0, v3, v1
; GFX1170-SDAG-FAKE16-NEXT: v_lshrrev_b32_e32 v1, 16, v5
; GFX1170-SDAG-FAKE16-NEXT: v_max3_num_f16 v0, v0, v4, v2
; GFX1170-SDAG-FAKE16-NEXT: v_lshrrev_b32_e32 v2, 16, v6
; GFX1170-SDAG-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_2)
; GFX1170-SDAG-FAKE16-NEXT: v_max3_num_f16 v0, v0, v5, v1
; GFX1170-SDAG-FAKE16-NEXT: v_lshrrev_b32_e32 v1, 16, v7
; GFX1170-SDAG-FAKE16-NEXT: v_max3_num_f16 v0, v0, v6, v2
; GFX1170-SDAG-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX1170-SDAG-FAKE16-NEXT: v_max3_num_f16 v0, v0, v7, v1
; GFX1170-SDAG-FAKE16-NEXT: s_setpc_b64 s[30:31]
;
; GFX1170-GISEL-TRUE16-LABEL: test_vector_reduce_fmax_v16half:
; GFX1170-GISEL-TRUE16: ; %bb.0: ; %entry
; GFX1170-GISEL-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1170-GISEL-TRUE16-NEXT: v_max_num_f16_e32 v0.l, v0.l, v0.l
; GFX1170-GISEL-TRUE16-NEXT: v_max_num_f16_e32 v0.h, v0.h, v0.h
; GFX1170-GISEL-TRUE16-NEXT: v_max_num_f16_e32 v1.l, v1.l, v1.l
; GFX1170-GISEL-TRUE16-NEXT: v_max_num_f16_e32 v1.h, v1.h, v1.h
; GFX1170-GISEL-TRUE16-NEXT: v_max_num_f16_e32 v2.l, v2.l, v2.l
; GFX1170-GISEL-TRUE16-NEXT: v_max_num_f16_e32 v2.h, v2.h, v2.h
; GFX1170-GISEL-TRUE16-NEXT: v_max_num_f16_e32 v0.l, v0.l, v0.h
; GFX1170-GISEL-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(SKIP_1) | instid1(VALU_DEP_4)
; GFX1170-GISEL-TRUE16-NEXT: v_max_num_f16_e32 v0.h, v1.l, v1.h
; GFX1170-GISEL-TRUE16-NEXT: v_max_num_f16_e32 v1.h, v3.l, v3.l
; GFX1170-GISEL-TRUE16-NEXT: v_max_num_f16_e32 v1.l, v2.l, v2.h
; GFX1170-GISEL-TRUE16-NEXT: v_max_num_f16_e32 v2.l, v3.h, v3.h
; GFX1170-GISEL-TRUE16-NEXT: v_max_num_f16_e32 v2.h, v4.l, v4.l
; GFX1170-GISEL-TRUE16-NEXT: v_max_num_f16_e32 v3.l, v4.h, v4.h
; GFX1170-GISEL-TRUE16-NEXT: v_max_num_f16_e32 v3.h, v5.l, v5.l
; GFX1170-GISEL-TRUE16-NEXT: v_max_num_f16_e32 v4.l, v5.h, v5.h
; GFX1170-GISEL-TRUE16-NEXT: v_max_num_f16_e32 v4.h, v6.l, v6.l
; GFX1170-GISEL-TRUE16-NEXT: v_max_num_f16_e32 v5.l, v6.h, v6.h
; GFX1170-GISEL-TRUE16-NEXT: v_max_num_f16_e32 v5.h, v7.l, v7.l
; GFX1170-GISEL-TRUE16-NEXT: v_max_num_f16_e32 v6.l, v7.h, v7.h
; GFX1170-GISEL-TRUE16-NEXT: v_max_num_f16_e32 v1.h, v1.h, v2.l
; GFX1170-GISEL-TRUE16-NEXT: v_max_num_f16_e32 v2.l, v2.h, v3.l
; GFX1170-GISEL-TRUE16-NEXT: v_max_num_f16_e32 v2.h, v3.h, v4.l
; GFX1170-GISEL-TRUE16-NEXT: v_max_num_f16_e32 v3.l, v4.h, v5.l
; GFX1170-GISEL-TRUE16-NEXT: v_max_num_f16_e32 v3.h, v5.h, v6.l
; GFX1170-GISEL-TRUE16-NEXT: v_max_num_f16_e32 v0.l, v0.l, v0.h
; GFX1170-GISEL-TRUE16-NEXT: v_max_num_f16_e32 v0.h, v1.l, v1.h
; GFX1170-GISEL-TRUE16-NEXT: v_max_num_f16_e32 v1.l, v2.l, v2.h
; GFX1170-GISEL-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_3)
; GFX1170-GISEL-TRUE16-NEXT: v_max_num_f16_e32 v1.h, v3.l, v3.h
; GFX1170-GISEL-TRUE16-NEXT: v_max_num_f16_e32 v0.l, v0.l, v0.h
; GFX1170-GISEL-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
; GFX1170-GISEL-TRUE16-NEXT: v_max_num_f16_e32 v0.h, v1.l, v1.h
; GFX1170-GISEL-TRUE16-NEXT: v_max_num_f16_e32 v0.l, v0.l, v0.h
; GFX1170-GISEL-TRUE16-NEXT: s_setpc_b64 s[30:31]
;
; GFX1170-GISEL-FAKE16-LABEL: test_vector_reduce_fmax_v16half:
; GFX1170-GISEL-FAKE16: ; %bb.0: ; %entry
; GFX1170-GISEL-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1170-GISEL-FAKE16-NEXT: v_lshrrev_b32_e32 v8, 16, v0
; GFX1170-GISEL-FAKE16-NEXT: v_lshrrev_b32_e32 v9, 16, v1
; GFX1170-GISEL-FAKE16-NEXT: v_lshrrev_b32_e32 v10, 16, v2
; GFX1170-GISEL-FAKE16-NEXT: v_lshrrev_b32_e32 v11, 16, v3
; GFX1170-GISEL-FAKE16-NEXT: v_lshrrev_b32_e32 v12, 16, v4
; GFX1170-GISEL-FAKE16-NEXT: v_lshrrev_b32_e32 v13, 16, v5
; GFX1170-GISEL-FAKE16-NEXT: v_max_num_f16_e32 v0, v0, v0
; GFX1170-GISEL-FAKE16-NEXT: v_max_num_f16_e32 v8, v8, v8
; GFX1170-GISEL-FAKE16-NEXT: v_max_num_f16_e32 v1, v1, v1
; GFX1170-GISEL-FAKE16-NEXT: v_max_num_f16_e32 v9, v9, v9
; GFX1170-GISEL-FAKE16-NEXT: v_max_num_f16_e32 v2, v2, v2
; GFX1170-GISEL-FAKE16-NEXT: v_max_num_f16_e32 v10, v10, v10
; GFX1170-GISEL-FAKE16-NEXT: v_lshrrev_b32_e32 v14, 16, v6
; GFX1170-GISEL-FAKE16-NEXT: v_lshrrev_b32_e32 v15, 16, v7
; GFX1170-GISEL-FAKE16-NEXT: v_max_num_f16_e32 v0, v0, v8
; GFX1170-GISEL-FAKE16-NEXT: v_max_num_f16_e32 v1, v1, v9
; GFX1170-GISEL-FAKE16-NEXT: v_max_num_f16_e32 v2, v2, v10
; GFX1170-GISEL-FAKE16-NEXT: v_max_num_f16_e32 v3, v3, v3
; GFX1170-GISEL-FAKE16-NEXT: v_max_num_f16_e32 v8, v11, v11
; GFX1170-GISEL-FAKE16-NEXT: v_max_num_f16_e32 v4, v4, v4
; GFX1170-GISEL-FAKE16-NEXT: v_max_num_f16_e32 v9, v12, v12
; GFX1170-GISEL-FAKE16-NEXT: v_max_num_f16_e32 v5, v5, v5
; GFX1170-GISEL-FAKE16-NEXT: v_max_num_f16_e32 v10, v13, v13
; GFX1170-GISEL-FAKE16-NEXT: v_max_num_f16_e32 v6, v6, v6
; GFX1170-GISEL-FAKE16-NEXT: v_max_num_f16_e32 v11, v14, v14
; GFX1170-GISEL-FAKE16-NEXT: v_max_num_f16_e32 v7, v7, v7
; GFX1170-GISEL-FAKE16-NEXT: v_max_num_f16_e32 v12, v15, v15
; GFX1170-GISEL-FAKE16-NEXT: v_max_num_f16_e32 v3, v3, v8
; GFX1170-GISEL-FAKE16-NEXT: v_max_num_f16_e32 v4, v4, v9
; GFX1170-GISEL-FAKE16-NEXT: v_max_num_f16_e32 v5, v5, v10
; GFX1170-GISEL-FAKE16-NEXT: v_max_num_f16_e32 v6, v6, v11
; GFX1170-GISEL-FAKE16-NEXT: v_max_num_f16_e32 v7, v7, v12
; GFX1170-GISEL-FAKE16-NEXT: v_max_num_f16_e32 v0, v0, v1
; GFX1170-GISEL-FAKE16-NEXT: v_max_num_f16_e32 v1, v2, v3
; GFX1170-GISEL-FAKE16-NEXT: v_max_num_f16_e32 v2, v4, v5
; GFX1170-GISEL-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_3)
; GFX1170-GISEL-FAKE16-NEXT: v_max_num_f16_e32 v3, v6, v7
; GFX1170-GISEL-FAKE16-NEXT: v_max_num_f16_e32 v0, v0, v1
; GFX1170-GISEL-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
; GFX1170-GISEL-FAKE16-NEXT: v_max_num_f16_e32 v1, v2, v3
; GFX1170-GISEL-FAKE16-NEXT: v_max_num_f16_e32 v0, v0, v1
; GFX1170-GISEL-FAKE16-NEXT: s_setpc_b64 s[30:31]
;
; GFX12-SDAG-TRUE16-LABEL: test_vector_reduce_fmax_v16half:
; GFX12-SDAG-TRUE16: ; %bb.0: ; %entry
; GFX12-SDAG-TRUE16-NEXT: s_wait_loadcnt_dscnt 0x0
@ -1671,6 +2026,22 @@ define float @test_vector_reduce_fmax_v2float(<2 x float> %v) {
; GFX11-GISEL-NEXT: v_max_f32_e32 v0, v0, v1
; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31]
;
; GFX1170-SDAG-LABEL: test_vector_reduce_fmax_v2float:
; GFX1170-SDAG: ; %bb.0: ; %entry
; GFX1170-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1170-SDAG-NEXT: v_dual_max_num_f32 v1, v1, v1 :: v_dual_max_num_f32 v0, v0, v0
; GFX1170-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX1170-SDAG-NEXT: v_max_num_f32_e32 v0, v0, v1
; GFX1170-SDAG-NEXT: s_setpc_b64 s[30:31]
;
; GFX1170-GISEL-LABEL: test_vector_reduce_fmax_v2float:
; GFX1170-GISEL: ; %bb.0: ; %entry
; GFX1170-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1170-GISEL-NEXT: v_dual_max_num_f32 v0, v0, v0 :: v_dual_max_num_f32 v1, v1, v1
; GFX1170-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX1170-GISEL-NEXT: v_max_num_f32_e32 v0, v0, v1
; GFX1170-GISEL-NEXT: s_setpc_b64 s[30:31]
;
; GFX12-SDAG-LABEL: test_vector_reduce_fmax_v2float:
; GFX12-SDAG: ; %bb.0: ; %entry
; GFX12-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0
@ -1779,6 +2150,21 @@ define float @test_vector_reduce_fmax_v3float(<3 x float> %v) {
; GFX11-GISEL-NEXT: v_max_f32_e32 v0, v0, v1
; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31]
;
; GFX1170-SDAG-LABEL: test_vector_reduce_fmax_v3float:
; GFX1170-SDAG: ; %bb.0: ; %entry
; GFX1170-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1170-SDAG-NEXT: v_max3_num_f32 v0, v0, v1, v2
; GFX1170-SDAG-NEXT: s_setpc_b64 s[30:31]
;
; GFX1170-GISEL-LABEL: test_vector_reduce_fmax_v3float:
; GFX1170-GISEL: ; %bb.0: ; %entry
; GFX1170-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1170-GISEL-NEXT: v_dual_max_num_f32 v0, v0, v0 :: v_dual_max_num_f32 v1, v1, v1
; GFX1170-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
; GFX1170-GISEL-NEXT: v_dual_max_num_f32 v0, v0, v1 :: v_dual_max_num_f32 v1, v2, v2
; GFX1170-GISEL-NEXT: v_max_num_f32_e32 v0, v0, v1
; GFX1170-GISEL-NEXT: s_setpc_b64 s[30:31]
;
; GFX12-SDAG-LABEL: test_vector_reduce_fmax_v3float:
; GFX12-SDAG: ; %bb.0: ; %entry
; GFX12-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0
@ -1910,6 +2296,25 @@ define float @test_vector_reduce_fmax_v4float(<4 x float> %v) {
; GFX11-GISEL-NEXT: v_max_f32_e32 v0, v0, v1
; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31]
;
; GFX1170-SDAG-LABEL: test_vector_reduce_fmax_v4float:
; GFX1170-SDAG: ; %bb.0: ; %entry
; GFX1170-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1170-SDAG-NEXT: v_dual_max_num_f32 v1, v1, v1 :: v_dual_max_num_f32 v0, v0, v0
; GFX1170-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
; GFX1170-SDAG-NEXT: v_max_num_f32_e32 v0, v0, v1
; GFX1170-SDAG-NEXT: v_max3_num_f32 v0, v0, v2, v3
; GFX1170-SDAG-NEXT: s_setpc_b64 s[30:31]
;
; GFX1170-GISEL-LABEL: test_vector_reduce_fmax_v4float:
; GFX1170-GISEL: ; %bb.0: ; %entry
; GFX1170-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1170-GISEL-NEXT: v_dual_max_num_f32 v0, v0, v0 :: v_dual_max_num_f32 v1, v1, v1
; GFX1170-GISEL-NEXT: v_dual_max_num_f32 v2, v2, v2 :: v_dual_max_num_f32 v3, v3, v3
; GFX1170-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
; GFX1170-GISEL-NEXT: v_dual_max_num_f32 v0, v0, v1 :: v_dual_max_num_f32 v1, v2, v3
; GFX1170-GISEL-NEXT: v_max_num_f32_e32 v0, v0, v1
; GFX1170-GISEL-NEXT: s_setpc_b64 s[30:31]
;
; GFX12-SDAG-LABEL: test_vector_reduce_fmax_v4float:
; GFX12-SDAG: ; %bb.0: ; %entry
; GFX12-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0
@ -2093,6 +2498,33 @@ define float @test_vector_reduce_fmax_v8float(<8 x float> %v) {
; GFX11-GISEL-NEXT: v_max_f32_e32 v0, v0, v1
; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31]
;
; GFX1170-SDAG-LABEL: test_vector_reduce_fmax_v8float:
; GFX1170-SDAG: ; %bb.0: ; %entry
; GFX1170-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1170-SDAG-NEXT: v_dual_max_num_f32 v1, v1, v1 :: v_dual_max_num_f32 v0, v0, v0
; GFX1170-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
; GFX1170-SDAG-NEXT: v_max_num_f32_e32 v0, v0, v1
; GFX1170-SDAG-NEXT: v_max3_num_f32 v0, v0, v2, v3
; GFX1170-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
; GFX1170-SDAG-NEXT: v_max3_num_f32 v0, v0, v4, v5
; GFX1170-SDAG-NEXT: v_max3_num_f32 v0, v0, v6, v7
; GFX1170-SDAG-NEXT: s_setpc_b64 s[30:31]
;
; GFX1170-GISEL-LABEL: test_vector_reduce_fmax_v8float:
; GFX1170-GISEL: ; %bb.0: ; %entry
; GFX1170-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1170-GISEL-NEXT: v_dual_max_num_f32 v0, v0, v0 :: v_dual_max_num_f32 v1, v1, v1
; GFX1170-GISEL-NEXT: v_dual_max_num_f32 v2, v2, v2 :: v_dual_max_num_f32 v3, v3, v3
; GFX1170-GISEL-NEXT: v_dual_max_num_f32 v4, v4, v4 :: v_dual_max_num_f32 v5, v5, v5
; GFX1170-GISEL-NEXT: v_dual_max_num_f32 v6, v6, v6 :: v_dual_max_num_f32 v7, v7, v7
; GFX1170-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2)
; GFX1170-GISEL-NEXT: v_dual_max_num_f32 v0, v0, v1 :: v_dual_max_num_f32 v1, v2, v3
; GFX1170-GISEL-NEXT: v_dual_max_num_f32 v2, v4, v5 :: v_dual_max_num_f32 v3, v6, v7
; GFX1170-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
; GFX1170-GISEL-NEXT: v_dual_max_num_f32 v0, v0, v1 :: v_dual_max_num_f32 v1, v2, v3
; GFX1170-GISEL-NEXT: v_max_num_f32_e32 v0, v0, v1
; GFX1170-GISEL-NEXT: s_setpc_b64 s[30:31]
;
; GFX12-SDAG-LABEL: test_vector_reduce_fmax_v8float:
; GFX12-SDAG: ; %bb.0: ; %entry
; GFX12-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0
@ -2382,6 +2814,51 @@ define float @test_vector_reduce_fmax_v16float(<16 x float> %v) {
; GFX11-GISEL-NEXT: v_max_f32_e32 v0, v0, v1
; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31]
;
; GFX1170-SDAG-LABEL: test_vector_reduce_fmax_v16float:
; GFX1170-SDAG: ; %bb.0: ; %entry
; GFX1170-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1170-SDAG-NEXT: v_dual_max_num_f32 v1, v1, v1 :: v_dual_max_num_f32 v0, v0, v0
; GFX1170-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
; GFX1170-SDAG-NEXT: v_max_num_f32_e32 v0, v0, v1
; GFX1170-SDAG-NEXT: v_max3_num_f32 v0, v0, v2, v3
; GFX1170-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
; GFX1170-SDAG-NEXT: v_max3_num_f32 v0, v0, v4, v5
; GFX1170-SDAG-NEXT: v_max3_num_f32 v0, v0, v6, v7
; GFX1170-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
; GFX1170-SDAG-NEXT: v_max3_num_f32 v0, v0, v8, v9
; GFX1170-SDAG-NEXT: v_max3_num_f32 v0, v0, v10, v11
; GFX1170-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
; GFX1170-SDAG-NEXT: v_max3_num_f32 v0, v0, v12, v13
; GFX1170-SDAG-NEXT: v_max3_num_f32 v0, v0, v14, v15
; GFX1170-SDAG-NEXT: s_setpc_b64 s[30:31]
;
; GFX1170-GISEL-LABEL: test_vector_reduce_fmax_v16float:
; GFX1170-GISEL: ; %bb.0: ; %entry
; GFX1170-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1170-GISEL-NEXT: v_dual_max_num_f32 v0, v0, v0 :: v_dual_max_num_f32 v1, v1, v1
; GFX1170-GISEL-NEXT: v_dual_max_num_f32 v2, v2, v2 :: v_dual_max_num_f32 v3, v3, v3
; GFX1170-GISEL-NEXT: v_dual_max_num_f32 v4, v4, v4 :: v_dual_max_num_f32 v5, v5, v5
; GFX1170-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
; GFX1170-GISEL-NEXT: v_dual_max_num_f32 v0, v0, v1 :: v_dual_max_num_f32 v1, v2, v3
; GFX1170-GISEL-NEXT: v_dual_max_num_f32 v3, v6, v6 :: v_dual_max_num_f32 v2, v4, v5
; GFX1170-GISEL-NEXT: v_dual_max_num_f32 v4, v7, v7 :: v_dual_max_num_f32 v5, v8, v8
; GFX1170-GISEL-NEXT: v_dual_max_num_f32 v6, v9, v9 :: v_dual_max_num_f32 v7, v10, v10
; GFX1170-GISEL-NEXT: v_dual_max_num_f32 v8, v11, v11 :: v_dual_max_num_f32 v9, v12, v12
; GFX1170-GISEL-NEXT: v_dual_max_num_f32 v10, v13, v13 :: v_dual_max_num_f32 v11, v14, v14
; GFX1170-GISEL-NEXT: v_max_num_f32_e32 v12, v15, v15
; GFX1170-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_3)
; GFX1170-GISEL-NEXT: v_dual_max_num_f32 v3, v3, v4 :: v_dual_max_num_f32 v4, v5, v6
; GFX1170-GISEL-NEXT: v_dual_max_num_f32 v5, v7, v8 :: v_dual_max_num_f32 v6, v9, v10
; GFX1170-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2)
; GFX1170-GISEL-NEXT: v_dual_max_num_f32 v7, v11, v12 :: v_dual_max_num_f32 v0, v0, v1
; GFX1170-GISEL-NEXT: v_dual_max_num_f32 v1, v2, v3 :: v_dual_max_num_f32 v2, v4, v5
; GFX1170-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
; GFX1170-GISEL-NEXT: v_dual_max_num_f32 v3, v6, v7 :: v_dual_max_num_f32 v0, v0, v1
; GFX1170-GISEL-NEXT: v_max_num_f32_e32 v1, v2, v3
; GFX1170-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX1170-GISEL-NEXT: v_max_num_f32_e32 v0, v0, v1
; GFX1170-GISEL-NEXT: s_setpc_b64 s[30:31]
;
; GFX12-SDAG-LABEL: test_vector_reduce_fmax_v16float:
; GFX12-SDAG: ; %bb.0: ; %entry
; GFX12-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0
@ -2523,6 +3000,24 @@ define double @test_vector_reduce_fmax_v2double(<2 x double> %v) {
; GFX11-GISEL-NEXT: v_max_f64 v[0:1], v[0:1], v[2:3]
; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31]
;
; GFX1170-SDAG-LABEL: test_vector_reduce_fmax_v2double:
; GFX1170-SDAG: ; %bb.0: ; %entry
; GFX1170-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1170-SDAG-NEXT: v_max_num_f64 v[2:3], v[2:3], v[2:3]
; GFX1170-SDAG-NEXT: v_max_num_f64 v[0:1], v[0:1], v[0:1]
; GFX1170-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX1170-SDAG-NEXT: v_max_num_f64 v[0:1], v[0:1], v[2:3]
; GFX1170-SDAG-NEXT: s_setpc_b64 s[30:31]
;
; GFX1170-GISEL-LABEL: test_vector_reduce_fmax_v2double:
; GFX1170-GISEL: ; %bb.0: ; %entry
; GFX1170-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1170-GISEL-NEXT: v_max_num_f64 v[0:1], v[0:1], v[0:1]
; GFX1170-GISEL-NEXT: v_max_num_f64 v[2:3], v[2:3], v[2:3]
; GFX1170-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX1170-GISEL-NEXT: v_max_num_f64 v[0:1], v[0:1], v[2:3]
; GFX1170-GISEL-NEXT: s_setpc_b64 s[30:31]
;
; GFX12-SDAG-LABEL: test_vector_reduce_fmax_v2double:
; GFX12-SDAG: ; %bb.0: ; %entry
; GFX12-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0
@ -2656,6 +3151,28 @@ define double @test_vector_reduce_fmax_v3double(<3 x double> %v) {
; GFX11-GISEL-NEXT: v_max_f64 v[0:1], v[0:1], v[4:5]
; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31]
;
; GFX1170-SDAG-LABEL: test_vector_reduce_fmax_v3double:
; GFX1170-SDAG: ; %bb.0: ; %entry
; GFX1170-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1170-SDAG-NEXT: v_max_num_f64 v[2:3], v[2:3], v[2:3]
; GFX1170-SDAG-NEXT: v_max_num_f64 v[0:1], v[0:1], v[0:1]
; GFX1170-SDAG-NEXT: v_max_num_f64 v[4:5], v[4:5], v[4:5]
; GFX1170-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
; GFX1170-SDAG-NEXT: v_max_num_f64 v[0:1], v[0:1], v[2:3]
; GFX1170-SDAG-NEXT: v_max_num_f64 v[0:1], v[0:1], v[4:5]
; GFX1170-SDAG-NEXT: s_setpc_b64 s[30:31]
;
; GFX1170-GISEL-LABEL: test_vector_reduce_fmax_v3double:
; GFX1170-GISEL: ; %bb.0: ; %entry
; GFX1170-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1170-GISEL-NEXT: v_max_num_f64 v[0:1], v[0:1], v[0:1]
; GFX1170-GISEL-NEXT: v_max_num_f64 v[2:3], v[2:3], v[2:3]
; GFX1170-GISEL-NEXT: v_max_num_f64 v[4:5], v[4:5], v[4:5]
; GFX1170-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
; GFX1170-GISEL-NEXT: v_max_num_f64 v[0:1], v[0:1], v[2:3]
; GFX1170-GISEL-NEXT: v_max_num_f64 v[0:1], v[0:1], v[4:5]
; GFX1170-GISEL-NEXT: s_setpc_b64 s[30:31]
;
; GFX12-SDAG-LABEL: test_vector_reduce_fmax_v3double:
; GFX12-SDAG: ; %bb.0: ; %entry
; GFX12-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0
@ -2815,6 +3332,34 @@ define double @test_vector_reduce_fmax_v4double(<4 x double> %v) {
; GFX11-GISEL-NEXT: v_max_f64 v[0:1], v[0:1], v[2:3]
; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31]
;
; GFX1170-SDAG-LABEL: test_vector_reduce_fmax_v4double:
; GFX1170-SDAG: ; %bb.0: ; %entry
; GFX1170-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1170-SDAG-NEXT: v_max_num_f64 v[2:3], v[2:3], v[2:3]
; GFX1170-SDAG-NEXT: v_max_num_f64 v[0:1], v[0:1], v[0:1]
; GFX1170-SDAG-NEXT: v_max_num_f64 v[4:5], v[4:5], v[4:5]
; GFX1170-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_2)
; GFX1170-SDAG-NEXT: v_max_num_f64 v[0:1], v[0:1], v[2:3]
; GFX1170-SDAG-NEXT: v_max_num_f64 v[2:3], v[6:7], v[6:7]
; GFX1170-SDAG-NEXT: v_max_num_f64 v[0:1], v[0:1], v[4:5]
; GFX1170-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX1170-SDAG-NEXT: v_max_num_f64 v[0:1], v[0:1], v[2:3]
; GFX1170-SDAG-NEXT: s_setpc_b64 s[30:31]
;
; GFX1170-GISEL-LABEL: test_vector_reduce_fmax_v4double:
; GFX1170-GISEL: ; %bb.0: ; %entry
; GFX1170-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1170-GISEL-NEXT: v_max_num_f64 v[0:1], v[0:1], v[0:1]
; GFX1170-GISEL-NEXT: v_max_num_f64 v[2:3], v[2:3], v[2:3]
; GFX1170-GISEL-NEXT: v_max_num_f64 v[4:5], v[4:5], v[4:5]
; GFX1170-GISEL-NEXT: v_max_num_f64 v[6:7], v[6:7], v[6:7]
; GFX1170-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2)
; GFX1170-GISEL-NEXT: v_max_num_f64 v[0:1], v[0:1], v[2:3]
; GFX1170-GISEL-NEXT: v_max_num_f64 v[2:3], v[4:5], v[6:7]
; GFX1170-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX1170-GISEL-NEXT: v_max_num_f64 v[0:1], v[0:1], v[2:3]
; GFX1170-GISEL-NEXT: s_setpc_b64 s[30:31]
;
; GFX12-SDAG-LABEL: test_vector_reduce_fmax_v4double:
; GFX12-SDAG: ; %bb.0: ; %entry
; GFX12-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0
@ -3062,6 +3607,52 @@ define double @test_vector_reduce_fmax_v8double(<8 x double> %v) {
; GFX11-GISEL-NEXT: v_max_f64 v[0:1], v[0:1], v[2:3]
; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31]
;
; GFX1170-SDAG-LABEL: test_vector_reduce_fmax_v8double:
; GFX1170-SDAG: ; %bb.0: ; %entry
; GFX1170-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1170-SDAG-NEXT: v_max_num_f64 v[2:3], v[2:3], v[2:3]
; GFX1170-SDAG-NEXT: v_max_num_f64 v[0:1], v[0:1], v[0:1]
; GFX1170-SDAG-NEXT: v_max_num_f64 v[4:5], v[4:5], v[4:5]
; GFX1170-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_2)
; GFX1170-SDAG-NEXT: v_max_num_f64 v[0:1], v[0:1], v[2:3]
; GFX1170-SDAG-NEXT: v_max_num_f64 v[2:3], v[6:7], v[6:7]
; GFX1170-SDAG-NEXT: v_max_num_f64 v[0:1], v[0:1], v[4:5]
; GFX1170-SDAG-NEXT: v_max_num_f64 v[4:5], v[8:9], v[8:9]
; GFX1170-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_2)
; GFX1170-SDAG-NEXT: v_max_num_f64 v[0:1], v[0:1], v[2:3]
; GFX1170-SDAG-NEXT: v_max_num_f64 v[2:3], v[10:11], v[10:11]
; GFX1170-SDAG-NEXT: v_max_num_f64 v[0:1], v[0:1], v[4:5]
; GFX1170-SDAG-NEXT: v_max_num_f64 v[4:5], v[12:13], v[12:13]
; GFX1170-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_2)
; GFX1170-SDAG-NEXT: v_max_num_f64 v[0:1], v[0:1], v[2:3]
; GFX1170-SDAG-NEXT: v_max_num_f64 v[2:3], v[14:15], v[14:15]
; GFX1170-SDAG-NEXT: v_max_num_f64 v[0:1], v[0:1], v[4:5]
; GFX1170-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX1170-SDAG-NEXT: v_max_num_f64 v[0:1], v[0:1], v[2:3]
; GFX1170-SDAG-NEXT: s_setpc_b64 s[30:31]
;
; GFX1170-GISEL-LABEL: test_vector_reduce_fmax_v8double:
; GFX1170-GISEL: ; %bb.0: ; %entry
; GFX1170-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1170-GISEL-NEXT: v_max_num_f64 v[0:1], v[0:1], v[0:1]
; GFX1170-GISEL-NEXT: v_max_num_f64 v[2:3], v[2:3], v[2:3]
; GFX1170-GISEL-NEXT: v_max_num_f64 v[4:5], v[4:5], v[4:5]
; GFX1170-GISEL-NEXT: v_max_num_f64 v[6:7], v[6:7], v[6:7]
; GFX1170-GISEL-NEXT: v_max_num_f64 v[8:9], v[8:9], v[8:9]
; GFX1170-GISEL-NEXT: v_max_num_f64 v[10:11], v[10:11], v[10:11]
; GFX1170-GISEL-NEXT: v_max_num_f64 v[12:13], v[12:13], v[12:13]
; GFX1170-GISEL-NEXT: v_max_num_f64 v[14:15], v[14:15], v[14:15]
; GFX1170-GISEL-NEXT: v_max_num_f64 v[0:1], v[0:1], v[2:3]
; GFX1170-GISEL-NEXT: v_max_num_f64 v[2:3], v[4:5], v[6:7]
; GFX1170-GISEL-NEXT: v_max_num_f64 v[4:5], v[8:9], v[10:11]
; GFX1170-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_3)
; GFX1170-GISEL-NEXT: v_max_num_f64 v[6:7], v[12:13], v[14:15]
; GFX1170-GISEL-NEXT: v_max_num_f64 v[0:1], v[0:1], v[2:3]
; GFX1170-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
; GFX1170-GISEL-NEXT: v_max_num_f64 v[2:3], v[4:5], v[6:7]
; GFX1170-GISEL-NEXT: v_max_num_f64 v[0:1], v[0:1], v[2:3]
; GFX1170-GISEL-NEXT: s_setpc_b64 s[30:31]
;
; GFX12-SDAG-LABEL: test_vector_reduce_fmax_v8double:
; GFX12-SDAG: ; %bb.0: ; %entry
; GFX12-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0
@ -3511,6 +4102,92 @@ define double @test_vector_reduce_fmax_v16double(<16 x double> %v) {
; GFX11-GISEL-NEXT: v_max_f64 v[0:1], v[0:1], v[2:3]
; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31]
;
; GFX1170-SDAG-LABEL: test_vector_reduce_fmax_v16double:
; GFX1170-SDAG: ; %bb.0: ; %entry
; GFX1170-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1170-SDAG-NEXT: v_max_num_f64 v[2:3], v[2:3], v[2:3]
; GFX1170-SDAG-NEXT: v_max_num_f64 v[0:1], v[0:1], v[0:1]
; GFX1170-SDAG-NEXT: v_max_num_f64 v[4:5], v[4:5], v[4:5]
; GFX1170-SDAG-NEXT: scratch_load_b32 v31, off, s32
; GFX1170-SDAG-NEXT: v_max_num_f64 v[0:1], v[0:1], v[2:3]
; GFX1170-SDAG-NEXT: v_max_num_f64 v[2:3], v[6:7], v[6:7]
; GFX1170-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_2)
; GFX1170-SDAG-NEXT: v_max_num_f64 v[0:1], v[0:1], v[4:5]
; GFX1170-SDAG-NEXT: v_max_num_f64 v[4:5], v[8:9], v[8:9]
; GFX1170-SDAG-NEXT: v_max_num_f64 v[0:1], v[0:1], v[2:3]
; GFX1170-SDAG-NEXT: v_max_num_f64 v[2:3], v[10:11], v[10:11]
; GFX1170-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_2)
; GFX1170-SDAG-NEXT: v_max_num_f64 v[0:1], v[0:1], v[4:5]
; GFX1170-SDAG-NEXT: v_max_num_f64 v[4:5], v[12:13], v[12:13]
; GFX1170-SDAG-NEXT: v_max_num_f64 v[0:1], v[0:1], v[2:3]
; GFX1170-SDAG-NEXT: v_max_num_f64 v[2:3], v[14:15], v[14:15]
; GFX1170-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_2)
; GFX1170-SDAG-NEXT: v_max_num_f64 v[0:1], v[0:1], v[4:5]
; GFX1170-SDAG-NEXT: v_max_num_f64 v[4:5], v[16:17], v[16:17]
; GFX1170-SDAG-NEXT: v_max_num_f64 v[0:1], v[0:1], v[2:3]
; GFX1170-SDAG-NEXT: v_max_num_f64 v[2:3], v[18:19], v[18:19]
; GFX1170-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_2)
; GFX1170-SDAG-NEXT: v_max_num_f64 v[0:1], v[0:1], v[4:5]
; GFX1170-SDAG-NEXT: v_max_num_f64 v[4:5], v[20:21], v[20:21]
; GFX1170-SDAG-NEXT: v_max_num_f64 v[0:1], v[0:1], v[2:3]
; GFX1170-SDAG-NEXT: v_max_num_f64 v[2:3], v[22:23], v[22:23]
; GFX1170-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_2)
; GFX1170-SDAG-NEXT: v_max_num_f64 v[0:1], v[0:1], v[4:5]
; GFX1170-SDAG-NEXT: v_max_num_f64 v[4:5], v[24:25], v[24:25]
; GFX1170-SDAG-NEXT: v_max_num_f64 v[0:1], v[0:1], v[2:3]
; GFX1170-SDAG-NEXT: v_max_num_f64 v[2:3], v[26:27], v[26:27]
; GFX1170-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_2)
; GFX1170-SDAG-NEXT: v_max_num_f64 v[0:1], v[0:1], v[4:5]
; GFX1170-SDAG-NEXT: v_max_num_f64 v[4:5], v[28:29], v[28:29]
; GFX1170-SDAG-NEXT: v_max_num_f64 v[0:1], v[0:1], v[2:3]
; GFX1170-SDAG-NEXT: s_waitcnt vmcnt(0)
; GFX1170-SDAG-NEXT: v_max_num_f64 v[2:3], v[30:31], v[30:31]
; GFX1170-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
; GFX1170-SDAG-NEXT: v_max_num_f64 v[0:1], v[0:1], v[4:5]
; GFX1170-SDAG-NEXT: v_max_num_f64 v[0:1], v[0:1], v[2:3]
; GFX1170-SDAG-NEXT: s_setpc_b64 s[30:31]
;
; GFX1170-GISEL-LABEL: test_vector_reduce_fmax_v16double:
; GFX1170-GISEL: ; %bb.0: ; %entry
; GFX1170-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1170-GISEL-NEXT: scratch_load_b32 v31, off, s32
; GFX1170-GISEL-NEXT: v_max_num_f64 v[0:1], v[0:1], v[0:1]
; GFX1170-GISEL-NEXT: v_max_num_f64 v[2:3], v[2:3], v[2:3]
; GFX1170-GISEL-NEXT: v_max_num_f64 v[4:5], v[4:5], v[4:5]
; GFX1170-GISEL-NEXT: v_max_num_f64 v[6:7], v[6:7], v[6:7]
; GFX1170-GISEL-NEXT: v_max_num_f64 v[8:9], v[8:9], v[8:9]
; GFX1170-GISEL-NEXT: v_max_num_f64 v[10:11], v[10:11], v[10:11]
; GFX1170-GISEL-NEXT: v_max_num_f64 v[12:13], v[12:13], v[12:13]
; GFX1170-GISEL-NEXT: v_max_num_f64 v[14:15], v[14:15], v[14:15]
; GFX1170-GISEL-NEXT: v_max_num_f64 v[16:17], v[16:17], v[16:17]
; GFX1170-GISEL-NEXT: v_max_num_f64 v[18:19], v[18:19], v[18:19]
; GFX1170-GISEL-NEXT: v_max_num_f64 v[20:21], v[20:21], v[20:21]
; GFX1170-GISEL-NEXT: v_max_num_f64 v[22:23], v[22:23], v[22:23]
; GFX1170-GISEL-NEXT: v_max_num_f64 v[24:25], v[24:25], v[24:25]
; GFX1170-GISEL-NEXT: v_max_num_f64 v[26:27], v[26:27], v[26:27]
; GFX1170-GISEL-NEXT: v_max_num_f64 v[28:29], v[28:29], v[28:29]
; GFX1170-GISEL-NEXT: v_max_num_f64 v[0:1], v[0:1], v[2:3]
; GFX1170-GISEL-NEXT: v_max_num_f64 v[2:3], v[4:5], v[6:7]
; GFX1170-GISEL-NEXT: v_max_num_f64 v[4:5], v[8:9], v[10:11]
; GFX1170-GISEL-NEXT: v_max_num_f64 v[6:7], v[12:13], v[14:15]
; GFX1170-GISEL-NEXT: v_max_num_f64 v[8:9], v[16:17], v[18:19]
; GFX1170-GISEL-NEXT: v_max_num_f64 v[10:11], v[20:21], v[22:23]
; GFX1170-GISEL-NEXT: v_max_num_f64 v[12:13], v[24:25], v[26:27]
; GFX1170-GISEL-NEXT: v_max_num_f64 v[0:1], v[0:1], v[2:3]
; GFX1170-GISEL-NEXT: v_max_num_f64 v[2:3], v[4:5], v[6:7]
; GFX1170-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_2)
; GFX1170-GISEL-NEXT: v_max_num_f64 v[4:5], v[8:9], v[10:11]
; GFX1170-GISEL-NEXT: v_max_num_f64 v[0:1], v[0:1], v[2:3]
; GFX1170-GISEL-NEXT: s_waitcnt vmcnt(0)
; GFX1170-GISEL-NEXT: v_max_num_f64 v[30:31], v[30:31], v[30:31]
; GFX1170-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
; GFX1170-GISEL-NEXT: v_max_num_f64 v[14:15], v[28:29], v[30:31]
; GFX1170-GISEL-NEXT: v_max_num_f64 v[6:7], v[12:13], v[14:15]
; GFX1170-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
; GFX1170-GISEL-NEXT: v_max_num_f64 v[2:3], v[4:5], v[6:7]
; GFX1170-GISEL-NEXT: v_max_num_f64 v[0:1], v[0:1], v[2:3]
; GFX1170-GISEL-NEXT: s_setpc_b64 s[30:31]
;
; GFX12-SDAG-LABEL: test_vector_reduce_fmax_v16double:
; GFX12-SDAG: ; %bb.0: ; %entry
; GFX12-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0
@ -3627,6 +4304,7 @@ declare double @llvm.vector.reduce.fmax.v16double(<16 x double>)
;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
; GFX10: {{.*}}
; GFX11: {{.*}}
; GFX1170: {{.*}}
; GFX12: {{.*}}
; GFX8: {{.*}}
; GFX9: {{.*}}

View File

@ -5,6 +5,8 @@
; RUN: llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx1010 < %s | FileCheck -check-prefixes=GFX10,GFX10-SDAG %s
; RUN: llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx1100 -mattr=+real-true16 < %s | FileCheck -check-prefixes=GFX11,GFX11-SDAG,GFX11-SDAG-TRUE16 %s
; RUN: llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx1100 -mattr=-real-true16 < %s | FileCheck -check-prefixes=GFX11,GFX11-SDAG,GFX11-SDAG-FAKE16 %s
; RUN: llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx1170 -mattr=+real-true16 < %s | FileCheck -check-prefixes=GFX1170,GFX1170-SDAG,GFX1170-SDAG-TRUE16 %s
; RUN: llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx1170 -mattr=-real-true16 < %s | FileCheck -check-prefixes=GFX1170,GFX1170-SDAG,GFX1170-SDAG-FAKE16 %s
; RUN: llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx1200 -mattr=+real-true16 < %s | FileCheck -check-prefixes=GFX12,GFX12-SDAG,GFX12-SDAG-TRUE16 %s
; RUN: llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx1200 -mattr=-real-true16 < %s | FileCheck -check-prefixes=GFX12,GFX12-SDAG,GFX12-SDAG-FAKE16 %s
@ -69,6 +71,20 @@ define half @test_vector_reduce_fmaximum_v2half(<2 x half> %v) {
; GFX11-SDAG-FAKE16-NEXT: v_cndmask_b32_e32 v0, 0x7e00, v2, vcc_lo
; GFX11-SDAG-FAKE16-NEXT: s_setpc_b64 s[30:31]
;
; GFX1170-SDAG-TRUE16-LABEL: test_vector_reduce_fmaximum_v2half:
; GFX1170-SDAG-TRUE16: ; %bb.0: ; %entry
; GFX1170-SDAG-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1170-SDAG-TRUE16-NEXT: v_maximum_f16 v0.l, v0.l, v0.h
; GFX1170-SDAG-TRUE16-NEXT: s_setpc_b64 s[30:31]
;
; GFX1170-SDAG-FAKE16-LABEL: test_vector_reduce_fmaximum_v2half:
; GFX1170-SDAG-FAKE16: ; %bb.0: ; %entry
; GFX1170-SDAG-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1170-SDAG-FAKE16-NEXT: v_lshrrev_b32_e32 v1, 16, v0
; GFX1170-SDAG-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX1170-SDAG-FAKE16-NEXT: v_maximum_f16 v0, v0, v1
; GFX1170-SDAG-FAKE16-NEXT: s_setpc_b64 s[30:31]
;
; GFX12-SDAG-TRUE16-LABEL: test_vector_reduce_fmaximum_v2half:
; GFX12-SDAG-TRUE16: ; %bb.0: ; %entry
; GFX12-SDAG-TRUE16-NEXT: s_wait_loadcnt_dscnt 0x0
@ -180,6 +196,27 @@ define half @test_vector_reduce_fmaximum_v3half(<3 x half> %v) {
; GFX11-SDAG-FAKE16-NEXT: v_cndmask_b32_e32 v0, 0x7e00, v2, vcc_lo
; GFX11-SDAG-FAKE16-NEXT: s_setpc_b64 s[30:31]
;
; GFX1170-SDAG-TRUE16-LABEL: test_vector_reduce_fmaximum_v3half:
; GFX1170-SDAG-TRUE16: ; %bb.0: ; %entry
; GFX1170-SDAG-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1170-SDAG-TRUE16-NEXT: v_mov_b16_e32 v1.h, 0xfc00
; GFX1170-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
; GFX1170-SDAG-TRUE16-NEXT: v_pk_maximum_f16 v0, v0, v1
; GFX1170-SDAG-TRUE16-NEXT: v_maximum_f16 v0.l, v0.l, v0.h
; GFX1170-SDAG-TRUE16-NEXT: s_setpc_b64 s[30:31]
;
; GFX1170-SDAG-FAKE16-LABEL: test_vector_reduce_fmaximum_v3half:
; GFX1170-SDAG-FAKE16: ; %bb.0: ; %entry
; GFX1170-SDAG-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1170-SDAG-FAKE16-NEXT: s_mov_b32 s0, 0xfc00
; GFX1170-SDAG-FAKE16-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(VALU_DEP_1)
; GFX1170-SDAG-FAKE16-NEXT: v_perm_b32 v1, s0, v1, 0x5040100
; GFX1170-SDAG-FAKE16-NEXT: v_pk_maximum_f16 v0, v0, v1
; GFX1170-SDAG-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
; GFX1170-SDAG-FAKE16-NEXT: v_lshrrev_b32_e32 v1, 16, v0
; GFX1170-SDAG-FAKE16-NEXT: v_maximum_f16 v0, v0, v1
; GFX1170-SDAG-FAKE16-NEXT: s_setpc_b64 s[30:31]
;
; GFX12-SDAG-TRUE16-LABEL: test_vector_reduce_fmaximum_v3half:
; GFX12-SDAG-TRUE16: ; %bb.0: ; %entry
; GFX12-SDAG-TRUE16-NEXT: s_wait_loadcnt_dscnt 0x0
@ -326,6 +363,23 @@ define half @test_vector_reduce_fmaximum_v4half(<4 x half> %v) {
; GFX11-SDAG-FAKE16-NEXT: v_cndmask_b32_e32 v0, 0x7e00, v1, vcc_lo
; GFX11-SDAG-FAKE16-NEXT: s_setpc_b64 s[30:31]
;
; GFX1170-SDAG-TRUE16-LABEL: test_vector_reduce_fmaximum_v4half:
; GFX1170-SDAG-TRUE16: ; %bb.0: ; %entry
; GFX1170-SDAG-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1170-SDAG-TRUE16-NEXT: v_pk_maximum_f16 v0, v0, v1
; GFX1170-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX1170-SDAG-TRUE16-NEXT: v_maximum_f16 v0.l, v0.l, v0.h
; GFX1170-SDAG-TRUE16-NEXT: s_setpc_b64 s[30:31]
;
; GFX1170-SDAG-FAKE16-LABEL: test_vector_reduce_fmaximum_v4half:
; GFX1170-SDAG-FAKE16: ; %bb.0: ; %entry
; GFX1170-SDAG-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1170-SDAG-FAKE16-NEXT: v_pk_maximum_f16 v0, v0, v1
; GFX1170-SDAG-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
; GFX1170-SDAG-FAKE16-NEXT: v_lshrrev_b32_e32 v1, 16, v0
; GFX1170-SDAG-FAKE16-NEXT: v_maximum_f16 v0, v0, v1
; GFX1170-SDAG-FAKE16-NEXT: s_setpc_b64 s[30:31]
;
; GFX12-SDAG-TRUE16-LABEL: test_vector_reduce_fmaximum_v4half:
; GFX12-SDAG-TRUE16: ; %bb.0: ; %entry
; GFX12-SDAG-TRUE16-NEXT: s_wait_loadcnt_dscnt 0x0
@ -560,6 +614,28 @@ define half @test_vector_reduce_fmaximum_v8half(<8 x half> %v) {
; GFX11-SDAG-FAKE16-NEXT: v_cndmask_b32_e32 v0, 0x7e00, v2, vcc_lo
; GFX11-SDAG-FAKE16-NEXT: s_setpc_b64 s[30:31]
;
; GFX1170-SDAG-TRUE16-LABEL: test_vector_reduce_fmaximum_v8half:
; GFX1170-SDAG-TRUE16: ; %bb.0: ; %entry
; GFX1170-SDAG-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1170-SDAG-TRUE16-NEXT: v_pk_maximum_f16 v1, v1, v3
; GFX1170-SDAG-TRUE16-NEXT: v_pk_maximum_f16 v0, v0, v2
; GFX1170-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
; GFX1170-SDAG-TRUE16-NEXT: v_pk_maximum_f16 v0, v0, v1
; GFX1170-SDAG-TRUE16-NEXT: v_maximum_f16 v0.l, v0.l, v0.h
; GFX1170-SDAG-TRUE16-NEXT: s_setpc_b64 s[30:31]
;
; GFX1170-SDAG-FAKE16-LABEL: test_vector_reduce_fmaximum_v8half:
; GFX1170-SDAG-FAKE16: ; %bb.0: ; %entry
; GFX1170-SDAG-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1170-SDAG-FAKE16-NEXT: v_pk_maximum_f16 v1, v1, v3
; GFX1170-SDAG-FAKE16-NEXT: v_pk_maximum_f16 v0, v0, v2
; GFX1170-SDAG-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
; GFX1170-SDAG-FAKE16-NEXT: v_pk_maximum_f16 v0, v0, v1
; GFX1170-SDAG-FAKE16-NEXT: v_lshrrev_b32_e32 v1, 16, v0
; GFX1170-SDAG-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX1170-SDAG-FAKE16-NEXT: v_maximum_f16 v0, v0, v1
; GFX1170-SDAG-FAKE16-NEXT: s_setpc_b64 s[30:31]
;
; GFX12-SDAG-TRUE16-LABEL: test_vector_reduce_fmaximum_v8half:
; GFX12-SDAG-TRUE16: ; %bb.0: ; %entry
; GFX12-SDAG-TRUE16-NEXT: s_wait_loadcnt_dscnt 0x0
@ -1002,6 +1078,38 @@ define half @test_vector_reduce_fmaximum_v16half(<16 x half> %v) {
; GFX11-SDAG-FAKE16-NEXT: v_cndmask_b32_e32 v0, 0x7e00, v2, vcc_lo
; GFX11-SDAG-FAKE16-NEXT: s_setpc_b64 s[30:31]
;
; GFX1170-SDAG-TRUE16-LABEL: test_vector_reduce_fmaximum_v16half:
; GFX1170-SDAG-TRUE16: ; %bb.0: ; %entry
; GFX1170-SDAG-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1170-SDAG-TRUE16-NEXT: v_pk_maximum_f16 v3, v3, v7
; GFX1170-SDAG-TRUE16-NEXT: v_pk_maximum_f16 v1, v1, v5
; GFX1170-SDAG-TRUE16-NEXT: v_pk_maximum_f16 v2, v2, v6
; GFX1170-SDAG-TRUE16-NEXT: v_pk_maximum_f16 v0, v0, v4
; GFX1170-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2)
; GFX1170-SDAG-TRUE16-NEXT: v_pk_maximum_f16 v1, v1, v3
; GFX1170-SDAG-TRUE16-NEXT: v_pk_maximum_f16 v0, v0, v2
; GFX1170-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
; GFX1170-SDAG-TRUE16-NEXT: v_pk_maximum_f16 v0, v0, v1
; GFX1170-SDAG-TRUE16-NEXT: v_maximum_f16 v0.l, v0.l, v0.h
; GFX1170-SDAG-TRUE16-NEXT: s_setpc_b64 s[30:31]
;
; GFX1170-SDAG-FAKE16-LABEL: test_vector_reduce_fmaximum_v16half:
; GFX1170-SDAG-FAKE16: ; %bb.0: ; %entry
; GFX1170-SDAG-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1170-SDAG-FAKE16-NEXT: v_pk_maximum_f16 v3, v3, v7
; GFX1170-SDAG-FAKE16-NEXT: v_pk_maximum_f16 v1, v1, v5
; GFX1170-SDAG-FAKE16-NEXT: v_pk_maximum_f16 v2, v2, v6
; GFX1170-SDAG-FAKE16-NEXT: v_pk_maximum_f16 v0, v0, v4
; GFX1170-SDAG-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2)
; GFX1170-SDAG-FAKE16-NEXT: v_pk_maximum_f16 v1, v1, v3
; GFX1170-SDAG-FAKE16-NEXT: v_pk_maximum_f16 v0, v0, v2
; GFX1170-SDAG-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
; GFX1170-SDAG-FAKE16-NEXT: v_pk_maximum_f16 v0, v0, v1
; GFX1170-SDAG-FAKE16-NEXT: v_lshrrev_b32_e32 v1, 16, v0
; GFX1170-SDAG-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX1170-SDAG-FAKE16-NEXT: v_maximum_f16 v0, v0, v1
; GFX1170-SDAG-FAKE16-NEXT: s_setpc_b64 s[30:31]
;
; GFX12-SDAG-TRUE16-LABEL: test_vector_reduce_fmaximum_v16half:
; GFX12-SDAG-TRUE16: ; %bb.0: ; %entry
; GFX12-SDAG-TRUE16-NEXT: s_wait_loadcnt_dscnt 0x0
@ -1092,6 +1200,12 @@ define float @test_vector_reduce_fmaximum_v2float(<2 x float> %v) {
; GFX11-NEXT: v_cndmask_b32_e32 v0, 0x7fc00000, v2, vcc_lo
; GFX11-NEXT: s_setpc_b64 s[30:31]
;
; GFX1170-LABEL: test_vector_reduce_fmaximum_v2float:
; GFX1170: ; %bb.0: ; %entry
; GFX1170-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1170-NEXT: v_maximum_f32 v0, v0, v1
; GFX1170-NEXT: s_setpc_b64 s[30:31]
;
; GFX12-LABEL: test_vector_reduce_fmaximum_v2float:
; GFX12: ; %bb.0: ; %entry
; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
@ -1169,6 +1283,12 @@ define float @test_vector_reduce_fmaximum_v3float(<3 x float> %v) {
; GFX11-NEXT: v_cndmask_b32_e32 v0, 0x7fc00000, v1, vcc_lo
; GFX11-NEXT: s_setpc_b64 s[30:31]
;
; GFX1170-LABEL: test_vector_reduce_fmaximum_v3float:
; GFX1170: ; %bb.0: ; %entry
; GFX1170-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1170-NEXT: v_maximum3_f32 v0, v0, v1, v2
; GFX1170-NEXT: s_setpc_b64 s[30:31]
;
; GFX12-LABEL: test_vector_reduce_fmaximum_v3float:
; GFX12: ; %bb.0: ; %entry
; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
@ -1263,6 +1383,14 @@ define float @test_vector_reduce_fmaximum_v4float(<4 x float> %v) {
; GFX11-NEXT: v_cndmask_b32_e32 v0, 0x7fc00000, v1, vcc_lo
; GFX11-NEXT: s_setpc_b64 s[30:31]
;
; GFX1170-LABEL: test_vector_reduce_fmaximum_v4float:
; GFX1170: ; %bb.0: ; %entry
; GFX1170-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1170-NEXT: v_maximum_f32 v0, v0, v1
; GFX1170-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX1170-NEXT: v_maximum3_f32 v0, v0, v2, v3
; GFX1170-NEXT: s_setpc_b64 s[30:31]
;
; GFX12-LABEL: test_vector_reduce_fmaximum_v4float:
; GFX12: ; %bb.0: ; %entry
; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
@ -1427,6 +1555,17 @@ define float @test_vector_reduce_fmaximum_v8float(<8 x float> %v) {
; GFX11-NEXT: v_cndmask_b32_e32 v0, 0x7fc00000, v1, vcc_lo
; GFX11-NEXT: s_setpc_b64 s[30:31]
;
; GFX1170-LABEL: test_vector_reduce_fmaximum_v8float:
; GFX1170: ; %bb.0: ; %entry
; GFX1170-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1170-NEXT: v_maximum_f32 v0, v0, v1
; GFX1170-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
; GFX1170-NEXT: v_maximum3_f32 v0, v0, v2, v3
; GFX1170-NEXT: v_maximum3_f32 v0, v0, v4, v5
; GFX1170-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX1170-NEXT: v_maximum3_f32 v0, v0, v6, v7
; GFX1170-NEXT: s_setpc_b64 s[30:31]
;
; GFX12-LABEL: test_vector_reduce_fmaximum_v8float:
; GFX12: ; %bb.0: ; %entry
; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
@ -1730,6 +1869,23 @@ define float @test_vector_reduce_fmaximum_v16float(<16 x float> %v) {
; GFX11-NEXT: v_cndmask_b32_e32 v0, 0x7fc00000, v1, vcc_lo
; GFX11-NEXT: s_setpc_b64 s[30:31]
;
; GFX1170-LABEL: test_vector_reduce_fmaximum_v16float:
; GFX1170: ; %bb.0: ; %entry
; GFX1170-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1170-NEXT: v_maximum_f32 v0, v0, v1
; GFX1170-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
; GFX1170-NEXT: v_maximum3_f32 v0, v0, v2, v3
; GFX1170-NEXT: v_maximum3_f32 v0, v0, v4, v5
; GFX1170-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
; GFX1170-NEXT: v_maximum3_f32 v0, v0, v6, v7
; GFX1170-NEXT: v_maximum3_f32 v0, v0, v8, v9
; GFX1170-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
; GFX1170-NEXT: v_maximum3_f32 v0, v0, v10, v11
; GFX1170-NEXT: v_maximum3_f32 v0, v0, v12, v13
; GFX1170-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX1170-NEXT: v_maximum3_f32 v0, v0, v14, v15
; GFX1170-NEXT: s_setpc_b64 s[30:31]
;
; GFX12-LABEL: test_vector_reduce_fmaximum_v16float:
; GFX12: ; %bb.0: ; %entry
; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
@ -1807,6 +1963,12 @@ define double @test_vector_reduce_fmaximum_v2double(<2 x double> %v) {
; GFX11-NEXT: v_cndmask_b32_e64 v1, v5, 0x7ff80000, vcc_lo
; GFX11-NEXT: s_setpc_b64 s[30:31]
;
; GFX1170-LABEL: test_vector_reduce_fmaximum_v2double:
; GFX1170: ; %bb.0: ; %entry
; GFX1170-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1170-NEXT: v_maximum_f64 v[0:1], v[0:1], v[2:3]
; GFX1170-NEXT: s_setpc_b64 s[30:31]
;
; GFX12-LABEL: test_vector_reduce_fmaximum_v2double:
; GFX12: ; %bb.0: ; %entry
; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
@ -1895,6 +2057,14 @@ define double @test_vector_reduce_fmaximum_v3double(<3 x double> %v) {
; GFX11-NEXT: v_cndmask_b32_e64 v1, v3, 0x7ff80000, vcc_lo
; GFX11-NEXT: s_setpc_b64 s[30:31]
;
; GFX1170-LABEL: test_vector_reduce_fmaximum_v3double:
; GFX1170: ; %bb.0: ; %entry
; GFX1170-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1170-NEXT: v_maximum_f64 v[0:1], v[0:1], v[2:3]
; GFX1170-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX1170-NEXT: v_maximum_f64 v[0:1], v[0:1], v[4:5]
; GFX1170-NEXT: s_setpc_b64 s[30:31]
;
; GFX12-LABEL: test_vector_reduce_fmaximum_v3double:
; GFX12: ; %bb.0: ; %entry
; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
@ -2007,6 +2177,15 @@ define double @test_vector_reduce_fmaximum_v4double(<4 x double> %v) {
; GFX11-NEXT: v_cndmask_b32_e64 v1, v3, 0x7ff80000, vcc_lo
; GFX11-NEXT: s_setpc_b64 s[30:31]
;
; GFX1170-LABEL: test_vector_reduce_fmaximum_v4double:
; GFX1170: ; %bb.0: ; %entry
; GFX1170-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1170-NEXT: v_maximum_f64 v[0:1], v[0:1], v[2:3]
; GFX1170-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
; GFX1170-NEXT: v_maximum_f64 v[0:1], v[0:1], v[4:5]
; GFX1170-NEXT: v_maximum_f64 v[0:1], v[0:1], v[6:7]
; GFX1170-NEXT: s_setpc_b64 s[30:31]
;
; GFX12-LABEL: test_vector_reduce_fmaximum_v4double:
; GFX12: ; %bb.0: ; %entry
; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
@ -2210,6 +2389,21 @@ define double @test_vector_reduce_fmaximum_v8double(<8 x double> %v) {
; GFX11-NEXT: v_cndmask_b32_e64 v1, v3, 0x7ff80000, vcc_lo
; GFX11-NEXT: s_setpc_b64 s[30:31]
;
; GFX1170-LABEL: test_vector_reduce_fmaximum_v8double:
; GFX1170: ; %bb.0: ; %entry
; GFX1170-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1170-NEXT: v_maximum_f64 v[0:1], v[0:1], v[2:3]
; GFX1170-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
; GFX1170-NEXT: v_maximum_f64 v[0:1], v[0:1], v[4:5]
; GFX1170-NEXT: v_maximum_f64 v[0:1], v[0:1], v[6:7]
; GFX1170-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
; GFX1170-NEXT: v_maximum_f64 v[0:1], v[0:1], v[8:9]
; GFX1170-NEXT: v_maximum_f64 v[0:1], v[0:1], v[10:11]
; GFX1170-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
; GFX1170-NEXT: v_maximum_f64 v[0:1], v[0:1], v[12:13]
; GFX1170-NEXT: v_maximum_f64 v[0:1], v[0:1], v[14:15]
; GFX1170-NEXT: s_setpc_b64 s[30:31]
;
; GFX12-LABEL: test_vector_reduce_fmaximum_v8double:
; GFX12: ; %bb.0: ; %entry
; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
@ -2609,6 +2803,35 @@ define double @test_vector_reduce_fmaximum_v16double(<16 x double> %v) {
; GFX11-NEXT: v_cndmask_b32_e64 v1, v3, 0x7ff80000, vcc_lo
; GFX11-NEXT: s_setpc_b64 s[30:31]
;
; GFX1170-LABEL: test_vector_reduce_fmaximum_v16double:
; GFX1170: ; %bb.0: ; %entry
; GFX1170-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1170-NEXT: v_maximum_f64 v[0:1], v[0:1], v[2:3]
; GFX1170-NEXT: scratch_load_b32 v31, off, s32
; GFX1170-NEXT: v_maximum_f64 v[0:1], v[0:1], v[4:5]
; GFX1170-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
; GFX1170-NEXT: v_maximum_f64 v[0:1], v[0:1], v[6:7]
; GFX1170-NEXT: v_maximum_f64 v[0:1], v[0:1], v[8:9]
; GFX1170-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
; GFX1170-NEXT: v_maximum_f64 v[0:1], v[0:1], v[10:11]
; GFX1170-NEXT: v_maximum_f64 v[0:1], v[0:1], v[12:13]
; GFX1170-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
; GFX1170-NEXT: v_maximum_f64 v[0:1], v[0:1], v[14:15]
; GFX1170-NEXT: v_maximum_f64 v[0:1], v[0:1], v[16:17]
; GFX1170-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
; GFX1170-NEXT: v_maximum_f64 v[0:1], v[0:1], v[18:19]
; GFX1170-NEXT: v_maximum_f64 v[0:1], v[0:1], v[20:21]
; GFX1170-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
; GFX1170-NEXT: v_maximum_f64 v[0:1], v[0:1], v[22:23]
; GFX1170-NEXT: v_maximum_f64 v[0:1], v[0:1], v[24:25]
; GFX1170-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
; GFX1170-NEXT: v_maximum_f64 v[0:1], v[0:1], v[26:27]
; GFX1170-NEXT: v_maximum_f64 v[0:1], v[0:1], v[28:29]
; GFX1170-NEXT: s_waitcnt vmcnt(0)
; GFX1170-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX1170-NEXT: v_maximum_f64 v[0:1], v[0:1], v[30:31]
; GFX1170-NEXT: s_setpc_b64 s[30:31]
;
; GFX12-LABEL: test_vector_reduce_fmaximum_v16double:
; GFX12: ; %bb.0: ; %entry
; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
@ -2664,6 +2887,7 @@ declare double @llvm.vector.reduce.fmaximum.v16double(<16 x double>)
;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
; GFX10-SDAG: {{.*}}
; GFX11-SDAG: {{.*}}
; GFX1170-SDAG: {{.*}}
; GFX12-SDAG: {{.*}}
; GFX7-SDAG: {{.*}}
; GFX8-SDAG: {{.*}}

View File

@ -11,6 +11,10 @@
; RUN: llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx1100 -mattr=-real-true16 < %s | FileCheck -check-prefixes=GFX11,GFX11-SDAG,GFX11-SDAG-FAKE16 %s
; RUN: llc -global-isel=1 -new-reg-bank-select -mtriple=amdgcn -mcpu=gfx1100 -mattr=+real-true16 < %s | FileCheck -check-prefixes=GFX11,GFX11-GISEL,GFX11-GISEL-TRUE16 %s
; RUN: llc -global-isel=1 -new-reg-bank-select -mtriple=amdgcn -mcpu=gfx1100 -mattr=-real-true16 < %s | FileCheck -check-prefixes=GFX11,GFX11-GISEL,GFX11-GISEL-FAKE16 %s
; RUN: llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx1170 -mattr=+real-true16 < %s | FileCheck -check-prefixes=GFX1170,GFX1170-SDAG,GFX1170-SDAG-TRUE16 %s
; RUN: llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx1170 -mattr=-real-true16 < %s | FileCheck -check-prefixes=GFX1170,GFX1170-SDAG,GFX1170-SDAG-FAKE16 %s
; RUN: llc -global-isel=1 -new-reg-bank-select -mtriple=amdgcn -mcpu=gfx1170 -mattr=+real-true16 < %s | FileCheck -check-prefixes=GFX1170,GFX1170-GISEL,GFX1170-GISEL-TRUE16 %s
; RUN: llc -global-isel=1 -new-reg-bank-select -mtriple=amdgcn -mcpu=gfx1170 -mattr=-real-true16 < %s | FileCheck -check-prefixes=GFX1170,GFX1170-GISEL,GFX1170-GISEL-FAKE16 %s
; RUN: llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx1200 -mattr=+real-true16 < %s | FileCheck -check-prefixes=GFX12,GFX12-SDAG,GFX12-SDAG-TRUE16 %s
; RUN: llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx1200 -mattr=-real-true16 < %s | FileCheck -check-prefixes=GFX12,GFX12-SDAG,GFX12-SDAG-FAKE16 %s
; RUN: llc -global-isel=1 -new-reg-bank-select -mtriple=amdgcn -mcpu=gfx1200 -mattr=+real-true16 < %s | FileCheck -check-prefixes=GFX12,GFX12-GISEL,GFX12-GISEL-TRUE16 %s
@ -123,6 +127,44 @@ define half @test_vector_reduce_fmin_v2half(<2 x half> %v) {
; GFX11-GISEL-FAKE16-NEXT: v_min_f16_e32 v0, v0, v1
; GFX11-GISEL-FAKE16-NEXT: s_setpc_b64 s[30:31]
;
; GFX1170-SDAG-TRUE16-LABEL: test_vector_reduce_fmin_v2half:
; GFX1170-SDAG-TRUE16: ; %bb.0: ; %entry
; GFX1170-SDAG-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1170-SDAG-TRUE16-NEXT: v_max_num_f16_e32 v0.h, v0.h, v0.h
; GFX1170-SDAG-TRUE16-NEXT: v_max_num_f16_e32 v0.l, v0.l, v0.l
; GFX1170-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX1170-SDAG-TRUE16-NEXT: v_min_num_f16_e32 v0.l, v0.l, v0.h
; GFX1170-SDAG-TRUE16-NEXT: s_setpc_b64 s[30:31]
;
; GFX1170-SDAG-FAKE16-LABEL: test_vector_reduce_fmin_v2half:
; GFX1170-SDAG-FAKE16: ; %bb.0: ; %entry
; GFX1170-SDAG-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1170-SDAG-FAKE16-NEXT: v_lshrrev_b32_e32 v1, 16, v0
; GFX1170-SDAG-FAKE16-NEXT: v_max_num_f16_e32 v0, v0, v0
; GFX1170-SDAG-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
; GFX1170-SDAG-FAKE16-NEXT: v_max_num_f16_e32 v1, v1, v1
; GFX1170-SDAG-FAKE16-NEXT: v_min_num_f16_e32 v0, v0, v1
; GFX1170-SDAG-FAKE16-NEXT: s_setpc_b64 s[30:31]
;
; GFX1170-GISEL-TRUE16-LABEL: test_vector_reduce_fmin_v2half:
; GFX1170-GISEL-TRUE16: ; %bb.0: ; %entry
; GFX1170-GISEL-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1170-GISEL-TRUE16-NEXT: v_max_num_f16_e32 v0.l, v0.l, v0.l
; GFX1170-GISEL-TRUE16-NEXT: v_max_num_f16_e32 v0.h, v0.h, v0.h
; GFX1170-GISEL-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX1170-GISEL-TRUE16-NEXT: v_min_num_f16_e32 v0.l, v0.l, v0.h
; GFX1170-GISEL-TRUE16-NEXT: s_setpc_b64 s[30:31]
;
; GFX1170-GISEL-FAKE16-LABEL: test_vector_reduce_fmin_v2half:
; GFX1170-GISEL-FAKE16: ; %bb.0: ; %entry
; GFX1170-GISEL-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1170-GISEL-FAKE16-NEXT: v_lshrrev_b32_e32 v1, 16, v0
; GFX1170-GISEL-FAKE16-NEXT: v_max_num_f16_e32 v0, v0, v0
; GFX1170-GISEL-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
; GFX1170-GISEL-FAKE16-NEXT: v_max_num_f16_e32 v1, v1, v1
; GFX1170-GISEL-FAKE16-NEXT: v_min_num_f16_e32 v0, v0, v1
; GFX1170-GISEL-FAKE16-NEXT: s_setpc_b64 s[30:31]
;
; GFX12-SDAG-TRUE16-LABEL: test_vector_reduce_fmin_v2half:
; GFX12-SDAG-TRUE16: ; %bb.0: ; %entry
; GFX12-SDAG-TRUE16-NEXT: s_wait_loadcnt_dscnt 0x0
@ -307,6 +349,54 @@ define half @test_vector_reduce_fmin_v3half(<3 x half> %v) {
; GFX11-GISEL-FAKE16-NEXT: v_min_f16_e32 v0, v0, v1
; GFX11-GISEL-FAKE16-NEXT: s_setpc_b64 s[30:31]
;
; GFX1170-SDAG-TRUE16-LABEL: test_vector_reduce_fmin_v3half:
; GFX1170-SDAG-TRUE16: ; %bb.0: ; %entry
; GFX1170-SDAG-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1170-SDAG-TRUE16-NEXT: v_mov_b16_e32 v1.h, 0x7e00
; GFX1170-SDAG-TRUE16-NEXT: v_max_num_f16_e32 v1.l, v1.l, v1.l
; GFX1170-SDAG-TRUE16-NEXT: v_pk_max_num_f16 v0, v0, v0
; GFX1170-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
; GFX1170-SDAG-TRUE16-NEXT: v_pk_min_num_f16 v0, v0, v1
; GFX1170-SDAG-TRUE16-NEXT: v_min_num_f16_e32 v0.l, v0.l, v0.h
; GFX1170-SDAG-TRUE16-NEXT: s_setpc_b64 s[30:31]
;
; GFX1170-SDAG-FAKE16-LABEL: test_vector_reduce_fmin_v3half:
; GFX1170-SDAG-FAKE16: ; %bb.0: ; %entry
; GFX1170-SDAG-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1170-SDAG-FAKE16-NEXT: v_max_num_f16_e32 v1, v1, v1
; GFX1170-SDAG-FAKE16-NEXT: v_pk_max_num_f16 v0, v0, v0
; GFX1170-SDAG-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
; GFX1170-SDAG-FAKE16-NEXT: v_pack_b32_f16 v1, v1, 0x7e00
; GFX1170-SDAG-FAKE16-NEXT: v_pk_min_num_f16 v0, v0, v1
; GFX1170-SDAG-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
; GFX1170-SDAG-FAKE16-NEXT: v_lshrrev_b32_e32 v1, 16, v0
; GFX1170-SDAG-FAKE16-NEXT: v_min_num_f16_e32 v0, v0, v1
; GFX1170-SDAG-FAKE16-NEXT: s_setpc_b64 s[30:31]
;
; GFX1170-GISEL-TRUE16-LABEL: test_vector_reduce_fmin_v3half:
; GFX1170-GISEL-TRUE16: ; %bb.0: ; %entry
; GFX1170-GISEL-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1170-GISEL-TRUE16-NEXT: v_max_num_f16_e32 v0.l, v0.l, v0.l
; GFX1170-GISEL-TRUE16-NEXT: v_max_num_f16_e32 v0.h, v0.h, v0.h
; GFX1170-GISEL-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_1)
; GFX1170-GISEL-TRUE16-NEXT: v_min_num_f16_e32 v0.l, v0.l, v0.h
; GFX1170-GISEL-TRUE16-NEXT: v_max_num_f16_e32 v0.h, v1.l, v1.l
; GFX1170-GISEL-TRUE16-NEXT: v_min_num_f16_e32 v0.l, v0.l, v0.h
; GFX1170-GISEL-TRUE16-NEXT: s_setpc_b64 s[30:31]
;
; GFX1170-GISEL-FAKE16-LABEL: test_vector_reduce_fmin_v3half:
; GFX1170-GISEL-FAKE16: ; %bb.0: ; %entry
; GFX1170-GISEL-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1170-GISEL-FAKE16-NEXT: v_lshrrev_b32_e32 v2, 16, v0
; GFX1170-GISEL-FAKE16-NEXT: v_max_num_f16_e32 v0, v0, v0
; GFX1170-GISEL-FAKE16-NEXT: v_max_num_f16_e32 v1, v1, v1
; GFX1170-GISEL-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_1)
; GFX1170-GISEL-FAKE16-NEXT: v_max_num_f16_e32 v2, v2, v2
; GFX1170-GISEL-FAKE16-NEXT: v_min_num_f16_e32 v0, v0, v2
; GFX1170-GISEL-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX1170-GISEL-FAKE16-NEXT: v_min_num_f16_e32 v0, v0, v1
; GFX1170-GISEL-FAKE16-NEXT: s_setpc_b64 s[30:31]
;
; GFX12-SDAG-TRUE16-LABEL: test_vector_reduce_fmin_v3half:
; GFX12-SDAG-TRUE16: ; %bb.0: ; %entry
; GFX12-SDAG-TRUE16-NEXT: s_wait_loadcnt_dscnt 0x0
@ -535,6 +625,59 @@ define half @test_vector_reduce_fmin_v4half(<4 x half> %v) {
; GFX11-GISEL-FAKE16-NEXT: v_min_f16_e32 v0, v0, v1
; GFX11-GISEL-FAKE16-NEXT: s_setpc_b64 s[30:31]
;
; GFX1170-SDAG-TRUE16-LABEL: test_vector_reduce_fmin_v4half:
; GFX1170-SDAG-TRUE16: ; %bb.0: ; %entry
; GFX1170-SDAG-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1170-SDAG-TRUE16-NEXT: v_pk_max_num_f16 v1, v1, v1
; GFX1170-SDAG-TRUE16-NEXT: v_pk_max_num_f16 v0, v0, v0
; GFX1170-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
; GFX1170-SDAG-TRUE16-NEXT: v_pk_min_num_f16 v0, v0, v1
; GFX1170-SDAG-TRUE16-NEXT: v_min_num_f16_e32 v0.l, v0.l, v0.h
; GFX1170-SDAG-TRUE16-NEXT: s_setpc_b64 s[30:31]
;
; GFX1170-SDAG-FAKE16-LABEL: test_vector_reduce_fmin_v4half:
; GFX1170-SDAG-FAKE16: ; %bb.0: ; %entry
; GFX1170-SDAG-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1170-SDAG-FAKE16-NEXT: v_pk_max_num_f16 v1, v1, v1
; GFX1170-SDAG-FAKE16-NEXT: v_pk_max_num_f16 v0, v0, v0
; GFX1170-SDAG-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
; GFX1170-SDAG-FAKE16-NEXT: v_pk_min_num_f16 v0, v0, v1
; GFX1170-SDAG-FAKE16-NEXT: v_lshrrev_b32_e32 v1, 16, v0
; GFX1170-SDAG-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX1170-SDAG-FAKE16-NEXT: v_min_num_f16_e32 v0, v0, v1
; GFX1170-SDAG-FAKE16-NEXT: s_setpc_b64 s[30:31]
;
; GFX1170-GISEL-TRUE16-LABEL: test_vector_reduce_fmin_v4half:
; GFX1170-GISEL-TRUE16: ; %bb.0: ; %entry
; GFX1170-GISEL-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1170-GISEL-TRUE16-NEXT: v_max_num_f16_e32 v0.l, v0.l, v0.l
; GFX1170-GISEL-TRUE16-NEXT: v_max_num_f16_e32 v0.h, v0.h, v0.h
; GFX1170-GISEL-TRUE16-NEXT: v_max_num_f16_e32 v1.l, v1.l, v1.l
; GFX1170-GISEL-TRUE16-NEXT: v_max_num_f16_e32 v1.h, v1.h, v1.h
; GFX1170-GISEL-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2)
; GFX1170-GISEL-TRUE16-NEXT: v_min_num_f16_e32 v0.l, v0.l, v0.h
; GFX1170-GISEL-TRUE16-NEXT: v_min_num_f16_e32 v0.h, v1.l, v1.h
; GFX1170-GISEL-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX1170-GISEL-TRUE16-NEXT: v_min_num_f16_e32 v0.l, v0.l, v0.h
; GFX1170-GISEL-TRUE16-NEXT: s_setpc_b64 s[30:31]
;
; GFX1170-GISEL-FAKE16-LABEL: test_vector_reduce_fmin_v4half:
; GFX1170-GISEL-FAKE16: ; %bb.0: ; %entry
; GFX1170-GISEL-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1170-GISEL-FAKE16-NEXT: v_lshrrev_b32_e32 v2, 16, v0
; GFX1170-GISEL-FAKE16-NEXT: v_lshrrev_b32_e32 v3, 16, v1
; GFX1170-GISEL-FAKE16-NEXT: v_max_num_f16_e32 v0, v0, v0
; GFX1170-GISEL-FAKE16-NEXT: v_max_num_f16_e32 v1, v1, v1
; GFX1170-GISEL-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4)
; GFX1170-GISEL-FAKE16-NEXT: v_max_num_f16_e32 v2, v2, v2
; GFX1170-GISEL-FAKE16-NEXT: v_max_num_f16_e32 v3, v3, v3
; GFX1170-GISEL-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
; GFX1170-GISEL-FAKE16-NEXT: v_min_num_f16_e32 v0, v0, v2
; GFX1170-GISEL-FAKE16-NEXT: v_min_num_f16_e32 v1, v1, v3
; GFX1170-GISEL-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX1170-GISEL-FAKE16-NEXT: v_min_num_f16_e32 v0, v0, v1
; GFX1170-GISEL-FAKE16-NEXT: s_setpc_b64 s[30:31]
;
; GFX12-SDAG-TRUE16-LABEL: test_vector_reduce_fmin_v4half:
; GFX12-SDAG-TRUE16: ; %bb.0: ; %entry
; GFX12-SDAG-TRUE16-NEXT: s_wait_loadcnt_dscnt 0x0
@ -871,6 +1014,86 @@ define half @test_vector_reduce_fmin_v8half(<8 x half> %v) {
; GFX11-GISEL-FAKE16-NEXT: v_min_f16_e32 v0, v0, v1
; GFX11-GISEL-FAKE16-NEXT: s_setpc_b64 s[30:31]
;
; GFX1170-SDAG-TRUE16-LABEL: test_vector_reduce_fmin_v8half:
; GFX1170-SDAG-TRUE16: ; %bb.0: ; %entry
; GFX1170-SDAG-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1170-SDAG-TRUE16-NEXT: v_pk_max_num_f16 v3, v3, v3
; GFX1170-SDAG-TRUE16-NEXT: v_pk_max_num_f16 v1, v1, v1
; GFX1170-SDAG-TRUE16-NEXT: v_pk_max_num_f16 v2, v2, v2
; GFX1170-SDAG-TRUE16-NEXT: v_pk_max_num_f16 v0, v0, v0
; GFX1170-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2)
; GFX1170-SDAG-TRUE16-NEXT: v_pk_min_num_f16 v1, v1, v3
; GFX1170-SDAG-TRUE16-NEXT: v_pk_min_num_f16 v0, v0, v2
; GFX1170-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
; GFX1170-SDAG-TRUE16-NEXT: v_pk_min_num_f16 v0, v0, v1
; GFX1170-SDAG-TRUE16-NEXT: v_min_num_f16_e32 v0.l, v0.l, v0.h
; GFX1170-SDAG-TRUE16-NEXT: s_setpc_b64 s[30:31]
;
; GFX1170-SDAG-FAKE16-LABEL: test_vector_reduce_fmin_v8half:
; GFX1170-SDAG-FAKE16: ; %bb.0: ; %entry
; GFX1170-SDAG-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1170-SDAG-FAKE16-NEXT: v_pk_max_num_f16 v3, v3, v3
; GFX1170-SDAG-FAKE16-NEXT: v_pk_max_num_f16 v1, v1, v1
; GFX1170-SDAG-FAKE16-NEXT: v_pk_max_num_f16 v2, v2, v2
; GFX1170-SDAG-FAKE16-NEXT: v_pk_max_num_f16 v0, v0, v0
; GFX1170-SDAG-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2)
; GFX1170-SDAG-FAKE16-NEXT: v_pk_min_num_f16 v1, v1, v3
; GFX1170-SDAG-FAKE16-NEXT: v_pk_min_num_f16 v0, v0, v2
; GFX1170-SDAG-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
; GFX1170-SDAG-FAKE16-NEXT: v_pk_min_num_f16 v0, v0, v1
; GFX1170-SDAG-FAKE16-NEXT: v_lshrrev_b32_e32 v1, 16, v0
; GFX1170-SDAG-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX1170-SDAG-FAKE16-NEXT: v_min_num_f16_e32 v0, v0, v1
; GFX1170-SDAG-FAKE16-NEXT: s_setpc_b64 s[30:31]
;
; GFX1170-GISEL-TRUE16-LABEL: test_vector_reduce_fmin_v8half:
; GFX1170-GISEL-TRUE16: ; %bb.0: ; %entry
; GFX1170-GISEL-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1170-GISEL-TRUE16-NEXT: v_max_num_f16_e32 v0.l, v0.l, v0.l
; GFX1170-GISEL-TRUE16-NEXT: v_max_num_f16_e32 v0.h, v0.h, v0.h
; GFX1170-GISEL-TRUE16-NEXT: v_max_num_f16_e32 v1.l, v1.l, v1.l
; GFX1170-GISEL-TRUE16-NEXT: v_max_num_f16_e32 v1.h, v1.h, v1.h
; GFX1170-GISEL-TRUE16-NEXT: v_max_num_f16_e32 v2.l, v2.l, v2.l
; GFX1170-GISEL-TRUE16-NEXT: v_max_num_f16_e32 v2.h, v2.h, v2.h
; GFX1170-GISEL-TRUE16-NEXT: v_max_num_f16_e32 v3.l, v3.l, v3.l
; GFX1170-GISEL-TRUE16-NEXT: v_max_num_f16_e32 v3.h, v3.h, v3.h
; GFX1170-GISEL-TRUE16-NEXT: v_min_num_f16_e32 v0.l, v0.l, v0.h
; GFX1170-GISEL-TRUE16-NEXT: v_min_num_f16_e32 v0.h, v1.l, v1.h
; GFX1170-GISEL-TRUE16-NEXT: v_min_num_f16_e32 v1.l, v2.l, v2.h
; GFX1170-GISEL-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_3)
; GFX1170-GISEL-TRUE16-NEXT: v_min_num_f16_e32 v1.h, v3.l, v3.h
; GFX1170-GISEL-TRUE16-NEXT: v_min_num_f16_e32 v0.l, v0.l, v0.h
; GFX1170-GISEL-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
; GFX1170-GISEL-TRUE16-NEXT: v_min_num_f16_e32 v0.h, v1.l, v1.h
; GFX1170-GISEL-TRUE16-NEXT: v_min_num_f16_e32 v0.l, v0.l, v0.h
; GFX1170-GISEL-TRUE16-NEXT: s_setpc_b64 s[30:31]
;
; GFX1170-GISEL-FAKE16-LABEL: test_vector_reduce_fmin_v8half:
; GFX1170-GISEL-FAKE16: ; %bb.0: ; %entry
; GFX1170-GISEL-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1170-GISEL-FAKE16-NEXT: v_lshrrev_b32_e32 v4, 16, v0
; GFX1170-GISEL-FAKE16-NEXT: v_lshrrev_b32_e32 v5, 16, v1
; GFX1170-GISEL-FAKE16-NEXT: v_lshrrev_b32_e32 v6, 16, v2
; GFX1170-GISEL-FAKE16-NEXT: v_lshrrev_b32_e32 v7, 16, v3
; GFX1170-GISEL-FAKE16-NEXT: v_max_num_f16_e32 v0, v0, v0
; GFX1170-GISEL-FAKE16-NEXT: v_max_num_f16_e32 v1, v1, v1
; GFX1170-GISEL-FAKE16-NEXT: v_max_num_f16_e32 v4, v4, v4
; GFX1170-GISEL-FAKE16-NEXT: v_max_num_f16_e32 v5, v5, v5
; GFX1170-GISEL-FAKE16-NEXT: v_max_num_f16_e32 v2, v2, v2
; GFX1170-GISEL-FAKE16-NEXT: v_max_num_f16_e32 v6, v6, v6
; GFX1170-GISEL-FAKE16-NEXT: v_max_num_f16_e32 v3, v3, v3
; GFX1170-GISEL-FAKE16-NEXT: v_max_num_f16_e32 v7, v7, v7
; GFX1170-GISEL-FAKE16-NEXT: v_min_num_f16_e32 v0, v0, v4
; GFX1170-GISEL-FAKE16-NEXT: v_min_num_f16_e32 v1, v1, v5
; GFX1170-GISEL-FAKE16-NEXT: v_min_num_f16_e32 v2, v2, v6
; GFX1170-GISEL-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_3)
; GFX1170-GISEL-FAKE16-NEXT: v_min_num_f16_e32 v3, v3, v7
; GFX1170-GISEL-FAKE16-NEXT: v_min_num_f16_e32 v0, v0, v1
; GFX1170-GISEL-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
; GFX1170-GISEL-FAKE16-NEXT: v_min_num_f16_e32 v1, v2, v3
; GFX1170-GISEL-FAKE16-NEXT: v_min_num_f16_e32 v0, v0, v1
; GFX1170-GISEL-FAKE16-NEXT: s_setpc_b64 s[30:31]
;
; GFX12-SDAG-TRUE16-LABEL: test_vector_reduce_fmin_v8half:
; GFX12-SDAG-TRUE16: ; %bb.0: ; %entry
; GFX12-SDAG-TRUE16-NEXT: s_wait_loadcnt_dscnt 0x0
@ -1438,6 +1661,138 @@ define half @test_vector_reduce_fmin_v16half(<16 x half> %v) {
; GFX11-GISEL-FAKE16-NEXT: v_min_f16_e32 v0, v0, v1
; GFX11-GISEL-FAKE16-NEXT: s_setpc_b64 s[30:31]
;
; GFX1170-SDAG-TRUE16-LABEL: test_vector_reduce_fmin_v16half:
; GFX1170-SDAG-TRUE16: ; %bb.0: ; %entry
; GFX1170-SDAG-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1170-SDAG-TRUE16-NEXT: v_max_num_f16_e32 v0.h, v0.h, v0.h
; GFX1170-SDAG-TRUE16-NEXT: v_max_num_f16_e32 v0.l, v0.l, v0.l
; GFX1170-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
; GFX1170-SDAG-TRUE16-NEXT: v_min_num_f16_e32 v0.l, v0.l, v0.h
; GFX1170-SDAG-TRUE16-NEXT: v_min3_num_f16 v0.l, v0.l, v1.l, v1.h
; GFX1170-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
; GFX1170-SDAG-TRUE16-NEXT: v_min3_num_f16 v0.l, v0.l, v2.l, v2.h
; GFX1170-SDAG-TRUE16-NEXT: v_min3_num_f16 v0.l, v0.l, v3.l, v3.h
; GFX1170-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
; GFX1170-SDAG-TRUE16-NEXT: v_min3_num_f16 v0.l, v0.l, v4.l, v4.h
; GFX1170-SDAG-TRUE16-NEXT: v_min3_num_f16 v0.l, v0.l, v5.l, v5.h
; GFX1170-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
; GFX1170-SDAG-TRUE16-NEXT: v_min3_num_f16 v0.l, v0.l, v6.l, v6.h
; GFX1170-SDAG-TRUE16-NEXT: v_min3_num_f16 v0.l, v0.l, v7.l, v7.h
; GFX1170-SDAG-TRUE16-NEXT: s_setpc_b64 s[30:31]
;
; GFX1170-SDAG-FAKE16-LABEL: test_vector_reduce_fmin_v16half:
; GFX1170-SDAG-FAKE16: ; %bb.0: ; %entry
; GFX1170-SDAG-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1170-SDAG-FAKE16-NEXT: v_lshrrev_b32_e32 v8, 16, v0
; GFX1170-SDAG-FAKE16-NEXT: v_max_num_f16_e32 v0, v0, v0
; GFX1170-SDAG-FAKE16-NEXT: v_lshrrev_b32_e32 v9, 16, v1
; GFX1170-SDAG-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_1)
; GFX1170-SDAG-FAKE16-NEXT: v_max_num_f16_e32 v8, v8, v8
; GFX1170-SDAG-FAKE16-NEXT: v_min_num_f16_e32 v0, v0, v8
; GFX1170-SDAG-FAKE16-NEXT: v_lshrrev_b32_e32 v8, 16, v2
; GFX1170-SDAG-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_2)
; GFX1170-SDAG-FAKE16-NEXT: v_min3_num_f16 v0, v0, v1, v9
; GFX1170-SDAG-FAKE16-NEXT: v_lshrrev_b32_e32 v1, 16, v3
; GFX1170-SDAG-FAKE16-NEXT: v_min3_num_f16 v0, v0, v2, v8
; GFX1170-SDAG-FAKE16-NEXT: v_lshrrev_b32_e32 v2, 16, v4
; GFX1170-SDAG-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_2)
; GFX1170-SDAG-FAKE16-NEXT: v_min3_num_f16 v0, v0, v3, v1
; GFX1170-SDAG-FAKE16-NEXT: v_lshrrev_b32_e32 v1, 16, v5
; GFX1170-SDAG-FAKE16-NEXT: v_min3_num_f16 v0, v0, v4, v2
; GFX1170-SDAG-FAKE16-NEXT: v_lshrrev_b32_e32 v2, 16, v6
; GFX1170-SDAG-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_2)
; GFX1170-SDAG-FAKE16-NEXT: v_min3_num_f16 v0, v0, v5, v1
; GFX1170-SDAG-FAKE16-NEXT: v_lshrrev_b32_e32 v1, 16, v7
; GFX1170-SDAG-FAKE16-NEXT: v_min3_num_f16 v0, v0, v6, v2
; GFX1170-SDAG-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX1170-SDAG-FAKE16-NEXT: v_min3_num_f16 v0, v0, v7, v1
; GFX1170-SDAG-FAKE16-NEXT: s_setpc_b64 s[30:31]
;
; GFX1170-GISEL-TRUE16-LABEL: test_vector_reduce_fmin_v16half:
; GFX1170-GISEL-TRUE16: ; %bb.0: ; %entry
; GFX1170-GISEL-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1170-GISEL-TRUE16-NEXT: v_max_num_f16_e32 v0.l, v0.l, v0.l
; GFX1170-GISEL-TRUE16-NEXT: v_max_num_f16_e32 v0.h, v0.h, v0.h
; GFX1170-GISEL-TRUE16-NEXT: v_max_num_f16_e32 v1.l, v1.l, v1.l
; GFX1170-GISEL-TRUE16-NEXT: v_max_num_f16_e32 v1.h, v1.h, v1.h
; GFX1170-GISEL-TRUE16-NEXT: v_max_num_f16_e32 v2.l, v2.l, v2.l
; GFX1170-GISEL-TRUE16-NEXT: v_max_num_f16_e32 v2.h, v2.h, v2.h
; GFX1170-GISEL-TRUE16-NEXT: v_min_num_f16_e32 v0.l, v0.l, v0.h
; GFX1170-GISEL-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(SKIP_1) | instid1(VALU_DEP_4)
; GFX1170-GISEL-TRUE16-NEXT: v_min_num_f16_e32 v0.h, v1.l, v1.h
; GFX1170-GISEL-TRUE16-NEXT: v_max_num_f16_e32 v1.h, v3.l, v3.l
; GFX1170-GISEL-TRUE16-NEXT: v_min_num_f16_e32 v1.l, v2.l, v2.h
; GFX1170-GISEL-TRUE16-NEXT: v_max_num_f16_e32 v2.l, v3.h, v3.h
; GFX1170-GISEL-TRUE16-NEXT: v_max_num_f16_e32 v2.h, v4.l, v4.l
; GFX1170-GISEL-TRUE16-NEXT: v_max_num_f16_e32 v3.l, v4.h, v4.h
; GFX1170-GISEL-TRUE16-NEXT: v_max_num_f16_e32 v3.h, v5.l, v5.l
; GFX1170-GISEL-TRUE16-NEXT: v_max_num_f16_e32 v4.l, v5.h, v5.h
; GFX1170-GISEL-TRUE16-NEXT: v_max_num_f16_e32 v4.h, v6.l, v6.l
; GFX1170-GISEL-TRUE16-NEXT: v_max_num_f16_e32 v5.l, v6.h, v6.h
; GFX1170-GISEL-TRUE16-NEXT: v_max_num_f16_e32 v5.h, v7.l, v7.l
; GFX1170-GISEL-TRUE16-NEXT: v_max_num_f16_e32 v6.l, v7.h, v7.h
; GFX1170-GISEL-TRUE16-NEXT: v_min_num_f16_e32 v1.h, v1.h, v2.l
; GFX1170-GISEL-TRUE16-NEXT: v_min_num_f16_e32 v2.l, v2.h, v3.l
; GFX1170-GISEL-TRUE16-NEXT: v_min_num_f16_e32 v2.h, v3.h, v4.l
; GFX1170-GISEL-TRUE16-NEXT: v_min_num_f16_e32 v3.l, v4.h, v5.l
; GFX1170-GISEL-TRUE16-NEXT: v_min_num_f16_e32 v3.h, v5.h, v6.l
; GFX1170-GISEL-TRUE16-NEXT: v_min_num_f16_e32 v0.l, v0.l, v0.h
; GFX1170-GISEL-TRUE16-NEXT: v_min_num_f16_e32 v0.h, v1.l, v1.h
; GFX1170-GISEL-TRUE16-NEXT: v_min_num_f16_e32 v1.l, v2.l, v2.h
; GFX1170-GISEL-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_3)
; GFX1170-GISEL-TRUE16-NEXT: v_min_num_f16_e32 v1.h, v3.l, v3.h
; GFX1170-GISEL-TRUE16-NEXT: v_min_num_f16_e32 v0.l, v0.l, v0.h
; GFX1170-GISEL-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
; GFX1170-GISEL-TRUE16-NEXT: v_min_num_f16_e32 v0.h, v1.l, v1.h
; GFX1170-GISEL-TRUE16-NEXT: v_min_num_f16_e32 v0.l, v0.l, v0.h
; GFX1170-GISEL-TRUE16-NEXT: s_setpc_b64 s[30:31]
;
; GFX1170-GISEL-FAKE16-LABEL: test_vector_reduce_fmin_v16half:
; GFX1170-GISEL-FAKE16: ; %bb.0: ; %entry
; GFX1170-GISEL-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1170-GISEL-FAKE16-NEXT: v_lshrrev_b32_e32 v8, 16, v0
; GFX1170-GISEL-FAKE16-NEXT: v_lshrrev_b32_e32 v9, 16, v1
; GFX1170-GISEL-FAKE16-NEXT: v_lshrrev_b32_e32 v10, 16, v2
; GFX1170-GISEL-FAKE16-NEXT: v_lshrrev_b32_e32 v11, 16, v3
; GFX1170-GISEL-FAKE16-NEXT: v_lshrrev_b32_e32 v12, 16, v4
; GFX1170-GISEL-FAKE16-NEXT: v_lshrrev_b32_e32 v13, 16, v5
; GFX1170-GISEL-FAKE16-NEXT: v_max_num_f16_e32 v0, v0, v0
; GFX1170-GISEL-FAKE16-NEXT: v_max_num_f16_e32 v8, v8, v8
; GFX1170-GISEL-FAKE16-NEXT: v_max_num_f16_e32 v1, v1, v1
; GFX1170-GISEL-FAKE16-NEXT: v_max_num_f16_e32 v9, v9, v9
; GFX1170-GISEL-FAKE16-NEXT: v_max_num_f16_e32 v2, v2, v2
; GFX1170-GISEL-FAKE16-NEXT: v_max_num_f16_e32 v10, v10, v10
; GFX1170-GISEL-FAKE16-NEXT: v_lshrrev_b32_e32 v14, 16, v6
; GFX1170-GISEL-FAKE16-NEXT: v_lshrrev_b32_e32 v15, 16, v7
; GFX1170-GISEL-FAKE16-NEXT: v_min_num_f16_e32 v0, v0, v8
; GFX1170-GISEL-FAKE16-NEXT: v_min_num_f16_e32 v1, v1, v9
; GFX1170-GISEL-FAKE16-NEXT: v_min_num_f16_e32 v2, v2, v10
; GFX1170-GISEL-FAKE16-NEXT: v_max_num_f16_e32 v3, v3, v3
; GFX1170-GISEL-FAKE16-NEXT: v_max_num_f16_e32 v8, v11, v11
; GFX1170-GISEL-FAKE16-NEXT: v_max_num_f16_e32 v4, v4, v4
; GFX1170-GISEL-FAKE16-NEXT: v_max_num_f16_e32 v9, v12, v12
; GFX1170-GISEL-FAKE16-NEXT: v_max_num_f16_e32 v5, v5, v5
; GFX1170-GISEL-FAKE16-NEXT: v_max_num_f16_e32 v10, v13, v13
; GFX1170-GISEL-FAKE16-NEXT: v_max_num_f16_e32 v6, v6, v6
; GFX1170-GISEL-FAKE16-NEXT: v_max_num_f16_e32 v11, v14, v14
; GFX1170-GISEL-FAKE16-NEXT: v_max_num_f16_e32 v7, v7, v7
; GFX1170-GISEL-FAKE16-NEXT: v_max_num_f16_e32 v12, v15, v15
; GFX1170-GISEL-FAKE16-NEXT: v_min_num_f16_e32 v3, v3, v8
; GFX1170-GISEL-FAKE16-NEXT: v_min_num_f16_e32 v4, v4, v9
; GFX1170-GISEL-FAKE16-NEXT: v_min_num_f16_e32 v5, v5, v10
; GFX1170-GISEL-FAKE16-NEXT: v_min_num_f16_e32 v6, v6, v11
; GFX1170-GISEL-FAKE16-NEXT: v_min_num_f16_e32 v7, v7, v12
; GFX1170-GISEL-FAKE16-NEXT: v_min_num_f16_e32 v0, v0, v1
; GFX1170-GISEL-FAKE16-NEXT: v_min_num_f16_e32 v1, v2, v3
; GFX1170-GISEL-FAKE16-NEXT: v_min_num_f16_e32 v2, v4, v5
; GFX1170-GISEL-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_3)
; GFX1170-GISEL-FAKE16-NEXT: v_min_num_f16_e32 v3, v6, v7
; GFX1170-GISEL-FAKE16-NEXT: v_min_num_f16_e32 v0, v0, v1
; GFX1170-GISEL-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
; GFX1170-GISEL-FAKE16-NEXT: v_min_num_f16_e32 v1, v2, v3
; GFX1170-GISEL-FAKE16-NEXT: v_min_num_f16_e32 v0, v0, v1
; GFX1170-GISEL-FAKE16-NEXT: s_setpc_b64 s[30:31]
;
; GFX12-SDAG-TRUE16-LABEL: test_vector_reduce_fmin_v16half:
; GFX12-SDAG-TRUE16: ; %bb.0: ; %entry
; GFX12-SDAG-TRUE16-NEXT: s_wait_loadcnt_dscnt 0x0
@ -1671,6 +2026,22 @@ define float @test_vector_reduce_fmin_v2float(<2 x float> %v) {
; GFX11-GISEL-NEXT: v_min_f32_e32 v0, v0, v1
; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31]
;
; GFX1170-SDAG-LABEL: test_vector_reduce_fmin_v2float:
; GFX1170-SDAG: ; %bb.0: ; %entry
; GFX1170-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1170-SDAG-NEXT: v_dual_max_num_f32 v1, v1, v1 :: v_dual_max_num_f32 v0, v0, v0
; GFX1170-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX1170-SDAG-NEXT: v_min_num_f32_e32 v0, v0, v1
; GFX1170-SDAG-NEXT: s_setpc_b64 s[30:31]
;
; GFX1170-GISEL-LABEL: test_vector_reduce_fmin_v2float:
; GFX1170-GISEL: ; %bb.0: ; %entry
; GFX1170-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1170-GISEL-NEXT: v_dual_max_num_f32 v0, v0, v0 :: v_dual_max_num_f32 v1, v1, v1
; GFX1170-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX1170-GISEL-NEXT: v_min_num_f32_e32 v0, v0, v1
; GFX1170-GISEL-NEXT: s_setpc_b64 s[30:31]
;
; GFX12-SDAG-LABEL: test_vector_reduce_fmin_v2float:
; GFX12-SDAG: ; %bb.0: ; %entry
; GFX12-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0
@ -1779,6 +2150,21 @@ define float @test_vector_reduce_fmin_v3float(<3 x float> %v) {
; GFX11-GISEL-NEXT: v_min_f32_e32 v0, v0, v1
; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31]
;
; GFX1170-SDAG-LABEL: test_vector_reduce_fmin_v3float:
; GFX1170-SDAG: ; %bb.0: ; %entry
; GFX1170-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1170-SDAG-NEXT: v_min3_num_f32 v0, v0, v1, v2
; GFX1170-SDAG-NEXT: s_setpc_b64 s[30:31]
;
; GFX1170-GISEL-LABEL: test_vector_reduce_fmin_v3float:
; GFX1170-GISEL: ; %bb.0: ; %entry
; GFX1170-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1170-GISEL-NEXT: v_dual_max_num_f32 v0, v0, v0 :: v_dual_max_num_f32 v1, v1, v1
; GFX1170-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
; GFX1170-GISEL-NEXT: v_dual_min_num_f32 v0, v0, v1 :: v_dual_max_num_f32 v1, v2, v2
; GFX1170-GISEL-NEXT: v_min_num_f32_e32 v0, v0, v1
; GFX1170-GISEL-NEXT: s_setpc_b64 s[30:31]
;
; GFX12-SDAG-LABEL: test_vector_reduce_fmin_v3float:
; GFX12-SDAG: ; %bb.0: ; %entry
; GFX12-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0
@ -1910,6 +2296,25 @@ define float @test_vector_reduce_fmin_v4float(<4 x float> %v) {
; GFX11-GISEL-NEXT: v_min_f32_e32 v0, v0, v1
; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31]
;
; GFX1170-SDAG-LABEL: test_vector_reduce_fmin_v4float:
; GFX1170-SDAG: ; %bb.0: ; %entry
; GFX1170-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1170-SDAG-NEXT: v_dual_max_num_f32 v1, v1, v1 :: v_dual_max_num_f32 v0, v0, v0
; GFX1170-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
; GFX1170-SDAG-NEXT: v_min_num_f32_e32 v0, v0, v1
; GFX1170-SDAG-NEXT: v_min3_num_f32 v0, v0, v2, v3
; GFX1170-SDAG-NEXT: s_setpc_b64 s[30:31]
;
; GFX1170-GISEL-LABEL: test_vector_reduce_fmin_v4float:
; GFX1170-GISEL: ; %bb.0: ; %entry
; GFX1170-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1170-GISEL-NEXT: v_dual_max_num_f32 v0, v0, v0 :: v_dual_max_num_f32 v1, v1, v1
; GFX1170-GISEL-NEXT: v_dual_max_num_f32 v2, v2, v2 :: v_dual_max_num_f32 v3, v3, v3
; GFX1170-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
; GFX1170-GISEL-NEXT: v_dual_min_num_f32 v0, v0, v1 :: v_dual_min_num_f32 v1, v2, v3
; GFX1170-GISEL-NEXT: v_min_num_f32_e32 v0, v0, v1
; GFX1170-GISEL-NEXT: s_setpc_b64 s[30:31]
;
; GFX12-SDAG-LABEL: test_vector_reduce_fmin_v4float:
; GFX12-SDAG: ; %bb.0: ; %entry
; GFX12-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0
@ -2093,6 +2498,33 @@ define float @test_vector_reduce_fmin_v8float(<8 x float> %v) {
; GFX11-GISEL-NEXT: v_min_f32_e32 v0, v0, v1
; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31]
;
; GFX1170-SDAG-LABEL: test_vector_reduce_fmin_v8float:
; GFX1170-SDAG: ; %bb.0: ; %entry
; GFX1170-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1170-SDAG-NEXT: v_dual_max_num_f32 v1, v1, v1 :: v_dual_max_num_f32 v0, v0, v0
; GFX1170-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
; GFX1170-SDAG-NEXT: v_min_num_f32_e32 v0, v0, v1
; GFX1170-SDAG-NEXT: v_min3_num_f32 v0, v0, v2, v3
; GFX1170-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
; GFX1170-SDAG-NEXT: v_min3_num_f32 v0, v0, v4, v5
; GFX1170-SDAG-NEXT: v_min3_num_f32 v0, v0, v6, v7
; GFX1170-SDAG-NEXT: s_setpc_b64 s[30:31]
;
; GFX1170-GISEL-LABEL: test_vector_reduce_fmin_v8float:
; GFX1170-GISEL: ; %bb.0: ; %entry
; GFX1170-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1170-GISEL-NEXT: v_dual_max_num_f32 v0, v0, v0 :: v_dual_max_num_f32 v1, v1, v1
; GFX1170-GISEL-NEXT: v_dual_max_num_f32 v2, v2, v2 :: v_dual_max_num_f32 v3, v3, v3
; GFX1170-GISEL-NEXT: v_dual_max_num_f32 v4, v4, v4 :: v_dual_max_num_f32 v5, v5, v5
; GFX1170-GISEL-NEXT: v_dual_max_num_f32 v6, v6, v6 :: v_dual_max_num_f32 v7, v7, v7
; GFX1170-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2)
; GFX1170-GISEL-NEXT: v_dual_min_num_f32 v0, v0, v1 :: v_dual_min_num_f32 v1, v2, v3
; GFX1170-GISEL-NEXT: v_dual_min_num_f32 v2, v4, v5 :: v_dual_min_num_f32 v3, v6, v7
; GFX1170-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
; GFX1170-GISEL-NEXT: v_dual_min_num_f32 v0, v0, v1 :: v_dual_min_num_f32 v1, v2, v3
; GFX1170-GISEL-NEXT: v_min_num_f32_e32 v0, v0, v1
; GFX1170-GISEL-NEXT: s_setpc_b64 s[30:31]
;
; GFX12-SDAG-LABEL: test_vector_reduce_fmin_v8float:
; GFX12-SDAG: ; %bb.0: ; %entry
; GFX12-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0
@ -2382,6 +2814,51 @@ define float @test_vector_reduce_fmin_v16float(<16 x float> %v) {
; GFX11-GISEL-NEXT: v_min_f32_e32 v0, v0, v1
; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31]
;
; GFX1170-SDAG-LABEL: test_vector_reduce_fmin_v16float:
; GFX1170-SDAG: ; %bb.0: ; %entry
; GFX1170-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1170-SDAG-NEXT: v_dual_max_num_f32 v1, v1, v1 :: v_dual_max_num_f32 v0, v0, v0
; GFX1170-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
; GFX1170-SDAG-NEXT: v_min_num_f32_e32 v0, v0, v1
; GFX1170-SDAG-NEXT: v_min3_num_f32 v0, v0, v2, v3
; GFX1170-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
; GFX1170-SDAG-NEXT: v_min3_num_f32 v0, v0, v4, v5
; GFX1170-SDAG-NEXT: v_min3_num_f32 v0, v0, v6, v7
; GFX1170-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
; GFX1170-SDAG-NEXT: v_min3_num_f32 v0, v0, v8, v9
; GFX1170-SDAG-NEXT: v_min3_num_f32 v0, v0, v10, v11
; GFX1170-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
; GFX1170-SDAG-NEXT: v_min3_num_f32 v0, v0, v12, v13
; GFX1170-SDAG-NEXT: v_min3_num_f32 v0, v0, v14, v15
; GFX1170-SDAG-NEXT: s_setpc_b64 s[30:31]
;
; GFX1170-GISEL-LABEL: test_vector_reduce_fmin_v16float:
; GFX1170-GISEL: ; %bb.0: ; %entry
; GFX1170-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1170-GISEL-NEXT: v_dual_max_num_f32 v0, v0, v0 :: v_dual_max_num_f32 v1, v1, v1
; GFX1170-GISEL-NEXT: v_dual_max_num_f32 v2, v2, v2 :: v_dual_max_num_f32 v3, v3, v3
; GFX1170-GISEL-NEXT: v_dual_max_num_f32 v4, v4, v4 :: v_dual_max_num_f32 v5, v5, v5
; GFX1170-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
; GFX1170-GISEL-NEXT: v_dual_min_num_f32 v0, v0, v1 :: v_dual_min_num_f32 v1, v2, v3
; GFX1170-GISEL-NEXT: v_dual_max_num_f32 v3, v6, v6 :: v_dual_min_num_f32 v2, v4, v5
; GFX1170-GISEL-NEXT: v_dual_max_num_f32 v4, v7, v7 :: v_dual_max_num_f32 v5, v8, v8
; GFX1170-GISEL-NEXT: v_dual_max_num_f32 v6, v9, v9 :: v_dual_max_num_f32 v7, v10, v10
; GFX1170-GISEL-NEXT: v_dual_max_num_f32 v8, v11, v11 :: v_dual_max_num_f32 v9, v12, v12
; GFX1170-GISEL-NEXT: v_dual_max_num_f32 v10, v13, v13 :: v_dual_max_num_f32 v11, v14, v14
; GFX1170-GISEL-NEXT: v_max_num_f32_e32 v12, v15, v15
; GFX1170-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_3)
; GFX1170-GISEL-NEXT: v_dual_min_num_f32 v3, v3, v4 :: v_dual_min_num_f32 v4, v5, v6
; GFX1170-GISEL-NEXT: v_dual_min_num_f32 v5, v7, v8 :: v_dual_min_num_f32 v6, v9, v10
; GFX1170-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2)
; GFX1170-GISEL-NEXT: v_dual_min_num_f32 v7, v11, v12 :: v_dual_min_num_f32 v0, v0, v1
; GFX1170-GISEL-NEXT: v_dual_min_num_f32 v1, v2, v3 :: v_dual_min_num_f32 v2, v4, v5
; GFX1170-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
; GFX1170-GISEL-NEXT: v_dual_min_num_f32 v3, v6, v7 :: v_dual_min_num_f32 v0, v0, v1
; GFX1170-GISEL-NEXT: v_min_num_f32_e32 v1, v2, v3
; GFX1170-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX1170-GISEL-NEXT: v_min_num_f32_e32 v0, v0, v1
; GFX1170-GISEL-NEXT: s_setpc_b64 s[30:31]
;
; GFX12-SDAG-LABEL: test_vector_reduce_fmin_v16float:
; GFX12-SDAG: ; %bb.0: ; %entry
; GFX12-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0
@ -2522,6 +2999,24 @@ define double @test_vector_reduce_fmin_v2double(<2 x double> %v) {
; GFX11-GISEL-NEXT: v_min_f64 v[0:1], v[0:1], v[2:3]
; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31]
;
; GFX1170-SDAG-LABEL: test_vector_reduce_fmin_v2double:
; GFX1170-SDAG: ; %bb.0: ; %entry
; GFX1170-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1170-SDAG-NEXT: v_max_num_f64 v[2:3], v[2:3], v[2:3]
; GFX1170-SDAG-NEXT: v_max_num_f64 v[0:1], v[0:1], v[0:1]
; GFX1170-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX1170-SDAG-NEXT: v_min_num_f64 v[0:1], v[0:1], v[2:3]
; GFX1170-SDAG-NEXT: s_setpc_b64 s[30:31]
;
; GFX1170-GISEL-LABEL: test_vector_reduce_fmin_v2double:
; GFX1170-GISEL: ; %bb.0: ; %entry
; GFX1170-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1170-GISEL-NEXT: v_max_num_f64 v[0:1], v[0:1], v[0:1]
; GFX1170-GISEL-NEXT: v_max_num_f64 v[2:3], v[2:3], v[2:3]
; GFX1170-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX1170-GISEL-NEXT: v_min_num_f64 v[0:1], v[0:1], v[2:3]
; GFX1170-GISEL-NEXT: s_setpc_b64 s[30:31]
;
; GFX12-SDAG-LABEL: test_vector_reduce_fmin_v2double:
; GFX12-SDAG: ; %bb.0: ; %entry
; GFX12-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0
@ -2655,6 +3150,28 @@ define double @test_vector_reduce_fmin_v3double(<3 x double> %v) {
; GFX11-GISEL-NEXT: v_min_f64 v[0:1], v[0:1], v[4:5]
; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31]
;
; GFX1170-SDAG-LABEL: test_vector_reduce_fmin_v3double:
; GFX1170-SDAG: ; %bb.0: ; %entry
; GFX1170-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1170-SDAG-NEXT: v_max_num_f64 v[2:3], v[2:3], v[2:3]
; GFX1170-SDAG-NEXT: v_max_num_f64 v[0:1], v[0:1], v[0:1]
; GFX1170-SDAG-NEXT: v_max_num_f64 v[4:5], v[4:5], v[4:5]
; GFX1170-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
; GFX1170-SDAG-NEXT: v_min_num_f64 v[0:1], v[0:1], v[2:3]
; GFX1170-SDAG-NEXT: v_min_num_f64 v[0:1], v[0:1], v[4:5]
; GFX1170-SDAG-NEXT: s_setpc_b64 s[30:31]
;
; GFX1170-GISEL-LABEL: test_vector_reduce_fmin_v3double:
; GFX1170-GISEL: ; %bb.0: ; %entry
; GFX1170-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1170-GISEL-NEXT: v_max_num_f64 v[0:1], v[0:1], v[0:1]
; GFX1170-GISEL-NEXT: v_max_num_f64 v[2:3], v[2:3], v[2:3]
; GFX1170-GISEL-NEXT: v_max_num_f64 v[4:5], v[4:5], v[4:5]
; GFX1170-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
; GFX1170-GISEL-NEXT: v_min_num_f64 v[0:1], v[0:1], v[2:3]
; GFX1170-GISEL-NEXT: v_min_num_f64 v[0:1], v[0:1], v[4:5]
; GFX1170-GISEL-NEXT: s_setpc_b64 s[30:31]
;
; GFX12-SDAG-LABEL: test_vector_reduce_fmin_v3double:
; GFX12-SDAG: ; %bb.0: ; %entry
; GFX12-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0
@ -2814,6 +3331,34 @@ define double @test_vector_reduce_fmin_v4double(<4 x double> %v) {
; GFX11-GISEL-NEXT: v_min_f64 v[0:1], v[0:1], v[2:3]
; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31]
;
; GFX1170-SDAG-LABEL: test_vector_reduce_fmin_v4double:
; GFX1170-SDAG: ; %bb.0: ; %entry
; GFX1170-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1170-SDAG-NEXT: v_max_num_f64 v[2:3], v[2:3], v[2:3]
; GFX1170-SDAG-NEXT: v_max_num_f64 v[0:1], v[0:1], v[0:1]
; GFX1170-SDAG-NEXT: v_max_num_f64 v[4:5], v[4:5], v[4:5]
; GFX1170-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_2)
; GFX1170-SDAG-NEXT: v_min_num_f64 v[0:1], v[0:1], v[2:3]
; GFX1170-SDAG-NEXT: v_max_num_f64 v[2:3], v[6:7], v[6:7]
; GFX1170-SDAG-NEXT: v_min_num_f64 v[0:1], v[0:1], v[4:5]
; GFX1170-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX1170-SDAG-NEXT: v_min_num_f64 v[0:1], v[0:1], v[2:3]
; GFX1170-SDAG-NEXT: s_setpc_b64 s[30:31]
;
; GFX1170-GISEL-LABEL: test_vector_reduce_fmin_v4double:
; GFX1170-GISEL: ; %bb.0: ; %entry
; GFX1170-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1170-GISEL-NEXT: v_max_num_f64 v[0:1], v[0:1], v[0:1]
; GFX1170-GISEL-NEXT: v_max_num_f64 v[2:3], v[2:3], v[2:3]
; GFX1170-GISEL-NEXT: v_max_num_f64 v[4:5], v[4:5], v[4:5]
; GFX1170-GISEL-NEXT: v_max_num_f64 v[6:7], v[6:7], v[6:7]
; GFX1170-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2)
; GFX1170-GISEL-NEXT: v_min_num_f64 v[0:1], v[0:1], v[2:3]
; GFX1170-GISEL-NEXT: v_min_num_f64 v[2:3], v[4:5], v[6:7]
; GFX1170-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX1170-GISEL-NEXT: v_min_num_f64 v[0:1], v[0:1], v[2:3]
; GFX1170-GISEL-NEXT: s_setpc_b64 s[30:31]
;
; GFX12-SDAG-LABEL: test_vector_reduce_fmin_v4double:
; GFX12-SDAG: ; %bb.0: ; %entry
; GFX12-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0
@ -3061,6 +3606,52 @@ define double @test_vector_reduce_fmin_v8double(<8 x double> %v) {
; GFX11-GISEL-NEXT: v_min_f64 v[0:1], v[0:1], v[2:3]
; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31]
;
; GFX1170-SDAG-LABEL: test_vector_reduce_fmin_v8double:
; GFX1170-SDAG: ; %bb.0: ; %entry
; GFX1170-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1170-SDAG-NEXT: v_max_num_f64 v[2:3], v[2:3], v[2:3]
; GFX1170-SDAG-NEXT: v_max_num_f64 v[0:1], v[0:1], v[0:1]
; GFX1170-SDAG-NEXT: v_max_num_f64 v[4:5], v[4:5], v[4:5]
; GFX1170-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_2)
; GFX1170-SDAG-NEXT: v_min_num_f64 v[0:1], v[0:1], v[2:3]
; GFX1170-SDAG-NEXT: v_max_num_f64 v[2:3], v[6:7], v[6:7]
; GFX1170-SDAG-NEXT: v_min_num_f64 v[0:1], v[0:1], v[4:5]
; GFX1170-SDAG-NEXT: v_max_num_f64 v[4:5], v[8:9], v[8:9]
; GFX1170-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_2)
; GFX1170-SDAG-NEXT: v_min_num_f64 v[0:1], v[0:1], v[2:3]
; GFX1170-SDAG-NEXT: v_max_num_f64 v[2:3], v[10:11], v[10:11]
; GFX1170-SDAG-NEXT: v_min_num_f64 v[0:1], v[0:1], v[4:5]
; GFX1170-SDAG-NEXT: v_max_num_f64 v[4:5], v[12:13], v[12:13]
; GFX1170-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_2)
; GFX1170-SDAG-NEXT: v_min_num_f64 v[0:1], v[0:1], v[2:3]
; GFX1170-SDAG-NEXT: v_max_num_f64 v[2:3], v[14:15], v[14:15]
; GFX1170-SDAG-NEXT: v_min_num_f64 v[0:1], v[0:1], v[4:5]
; GFX1170-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX1170-SDAG-NEXT: v_min_num_f64 v[0:1], v[0:1], v[2:3]
; GFX1170-SDAG-NEXT: s_setpc_b64 s[30:31]
;
; GFX1170-GISEL-LABEL: test_vector_reduce_fmin_v8double:
; GFX1170-GISEL: ; %bb.0: ; %entry
; GFX1170-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1170-GISEL-NEXT: v_max_num_f64 v[0:1], v[0:1], v[0:1]
; GFX1170-GISEL-NEXT: v_max_num_f64 v[2:3], v[2:3], v[2:3]
; GFX1170-GISEL-NEXT: v_max_num_f64 v[4:5], v[4:5], v[4:5]
; GFX1170-GISEL-NEXT: v_max_num_f64 v[6:7], v[6:7], v[6:7]
; GFX1170-GISEL-NEXT: v_max_num_f64 v[8:9], v[8:9], v[8:9]
; GFX1170-GISEL-NEXT: v_max_num_f64 v[10:11], v[10:11], v[10:11]
; GFX1170-GISEL-NEXT: v_max_num_f64 v[12:13], v[12:13], v[12:13]
; GFX1170-GISEL-NEXT: v_max_num_f64 v[14:15], v[14:15], v[14:15]
; GFX1170-GISEL-NEXT: v_min_num_f64 v[0:1], v[0:1], v[2:3]
; GFX1170-GISEL-NEXT: v_min_num_f64 v[2:3], v[4:5], v[6:7]
; GFX1170-GISEL-NEXT: v_min_num_f64 v[4:5], v[8:9], v[10:11]
; GFX1170-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_3)
; GFX1170-GISEL-NEXT: v_min_num_f64 v[6:7], v[12:13], v[14:15]
; GFX1170-GISEL-NEXT: v_min_num_f64 v[0:1], v[0:1], v[2:3]
; GFX1170-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
; GFX1170-GISEL-NEXT: v_min_num_f64 v[2:3], v[4:5], v[6:7]
; GFX1170-GISEL-NEXT: v_min_num_f64 v[0:1], v[0:1], v[2:3]
; GFX1170-GISEL-NEXT: s_setpc_b64 s[30:31]
;
; GFX12-SDAG-LABEL: test_vector_reduce_fmin_v8double:
; GFX12-SDAG: ; %bb.0: ; %entry
; GFX12-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0
@ -3510,6 +4101,92 @@ define double @test_vector_reduce_fmin_v16double(<16 x double> %v) {
; GFX11-GISEL-NEXT: v_min_f64 v[0:1], v[0:1], v[2:3]
; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31]
;
; GFX1170-SDAG-LABEL: test_vector_reduce_fmin_v16double:
; GFX1170-SDAG: ; %bb.0: ; %entry
; GFX1170-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1170-SDAG-NEXT: v_max_num_f64 v[2:3], v[2:3], v[2:3]
; GFX1170-SDAG-NEXT: v_max_num_f64 v[0:1], v[0:1], v[0:1]
; GFX1170-SDAG-NEXT: v_max_num_f64 v[4:5], v[4:5], v[4:5]
; GFX1170-SDAG-NEXT: scratch_load_b32 v31, off, s32
; GFX1170-SDAG-NEXT: v_min_num_f64 v[0:1], v[0:1], v[2:3]
; GFX1170-SDAG-NEXT: v_max_num_f64 v[2:3], v[6:7], v[6:7]
; GFX1170-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_2)
; GFX1170-SDAG-NEXT: v_min_num_f64 v[0:1], v[0:1], v[4:5]
; GFX1170-SDAG-NEXT: v_max_num_f64 v[4:5], v[8:9], v[8:9]
; GFX1170-SDAG-NEXT: v_min_num_f64 v[0:1], v[0:1], v[2:3]
; GFX1170-SDAG-NEXT: v_max_num_f64 v[2:3], v[10:11], v[10:11]
; GFX1170-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_2)
; GFX1170-SDAG-NEXT: v_min_num_f64 v[0:1], v[0:1], v[4:5]
; GFX1170-SDAG-NEXT: v_max_num_f64 v[4:5], v[12:13], v[12:13]
; GFX1170-SDAG-NEXT: v_min_num_f64 v[0:1], v[0:1], v[2:3]
; GFX1170-SDAG-NEXT: v_max_num_f64 v[2:3], v[14:15], v[14:15]
; GFX1170-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_2)
; GFX1170-SDAG-NEXT: v_min_num_f64 v[0:1], v[0:1], v[4:5]
; GFX1170-SDAG-NEXT: v_max_num_f64 v[4:5], v[16:17], v[16:17]
; GFX1170-SDAG-NEXT: v_min_num_f64 v[0:1], v[0:1], v[2:3]
; GFX1170-SDAG-NEXT: v_max_num_f64 v[2:3], v[18:19], v[18:19]
; GFX1170-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_2)
; GFX1170-SDAG-NEXT: v_min_num_f64 v[0:1], v[0:1], v[4:5]
; GFX1170-SDAG-NEXT: v_max_num_f64 v[4:5], v[20:21], v[20:21]
; GFX1170-SDAG-NEXT: v_min_num_f64 v[0:1], v[0:1], v[2:3]
; GFX1170-SDAG-NEXT: v_max_num_f64 v[2:3], v[22:23], v[22:23]
; GFX1170-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_2)
; GFX1170-SDAG-NEXT: v_min_num_f64 v[0:1], v[0:1], v[4:5]
; GFX1170-SDAG-NEXT: v_max_num_f64 v[4:5], v[24:25], v[24:25]
; GFX1170-SDAG-NEXT: v_min_num_f64 v[0:1], v[0:1], v[2:3]
; GFX1170-SDAG-NEXT: v_max_num_f64 v[2:3], v[26:27], v[26:27]
; GFX1170-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_2)
; GFX1170-SDAG-NEXT: v_min_num_f64 v[0:1], v[0:1], v[4:5]
; GFX1170-SDAG-NEXT: v_max_num_f64 v[4:5], v[28:29], v[28:29]
; GFX1170-SDAG-NEXT: v_min_num_f64 v[0:1], v[0:1], v[2:3]
; GFX1170-SDAG-NEXT: s_waitcnt vmcnt(0)
; GFX1170-SDAG-NEXT: v_max_num_f64 v[2:3], v[30:31], v[30:31]
; GFX1170-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
; GFX1170-SDAG-NEXT: v_min_num_f64 v[0:1], v[0:1], v[4:5]
; GFX1170-SDAG-NEXT: v_min_num_f64 v[0:1], v[0:1], v[2:3]
; GFX1170-SDAG-NEXT: s_setpc_b64 s[30:31]
;
; GFX1170-GISEL-LABEL: test_vector_reduce_fmin_v16double:
; GFX1170-GISEL: ; %bb.0: ; %entry
; GFX1170-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1170-GISEL-NEXT: scratch_load_b32 v31, off, s32
; GFX1170-GISEL-NEXT: v_max_num_f64 v[0:1], v[0:1], v[0:1]
; GFX1170-GISEL-NEXT: v_max_num_f64 v[2:3], v[2:3], v[2:3]
; GFX1170-GISEL-NEXT: v_max_num_f64 v[4:5], v[4:5], v[4:5]
; GFX1170-GISEL-NEXT: v_max_num_f64 v[6:7], v[6:7], v[6:7]
; GFX1170-GISEL-NEXT: v_max_num_f64 v[8:9], v[8:9], v[8:9]
; GFX1170-GISEL-NEXT: v_max_num_f64 v[10:11], v[10:11], v[10:11]
; GFX1170-GISEL-NEXT: v_max_num_f64 v[12:13], v[12:13], v[12:13]
; GFX1170-GISEL-NEXT: v_max_num_f64 v[14:15], v[14:15], v[14:15]
; GFX1170-GISEL-NEXT: v_max_num_f64 v[16:17], v[16:17], v[16:17]
; GFX1170-GISEL-NEXT: v_max_num_f64 v[18:19], v[18:19], v[18:19]
; GFX1170-GISEL-NEXT: v_max_num_f64 v[20:21], v[20:21], v[20:21]
; GFX1170-GISEL-NEXT: v_max_num_f64 v[22:23], v[22:23], v[22:23]
; GFX1170-GISEL-NEXT: v_max_num_f64 v[24:25], v[24:25], v[24:25]
; GFX1170-GISEL-NEXT: v_max_num_f64 v[26:27], v[26:27], v[26:27]
; GFX1170-GISEL-NEXT: v_max_num_f64 v[28:29], v[28:29], v[28:29]
; GFX1170-GISEL-NEXT: v_min_num_f64 v[0:1], v[0:1], v[2:3]
; GFX1170-GISEL-NEXT: v_min_num_f64 v[2:3], v[4:5], v[6:7]
; GFX1170-GISEL-NEXT: v_min_num_f64 v[4:5], v[8:9], v[10:11]
; GFX1170-GISEL-NEXT: v_min_num_f64 v[6:7], v[12:13], v[14:15]
; GFX1170-GISEL-NEXT: v_min_num_f64 v[8:9], v[16:17], v[18:19]
; GFX1170-GISEL-NEXT: v_min_num_f64 v[10:11], v[20:21], v[22:23]
; GFX1170-GISEL-NEXT: v_min_num_f64 v[12:13], v[24:25], v[26:27]
; GFX1170-GISEL-NEXT: v_min_num_f64 v[0:1], v[0:1], v[2:3]
; GFX1170-GISEL-NEXT: v_min_num_f64 v[2:3], v[4:5], v[6:7]
; GFX1170-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_2)
; GFX1170-GISEL-NEXT: v_min_num_f64 v[4:5], v[8:9], v[10:11]
; GFX1170-GISEL-NEXT: v_min_num_f64 v[0:1], v[0:1], v[2:3]
; GFX1170-GISEL-NEXT: s_waitcnt vmcnt(0)
; GFX1170-GISEL-NEXT: v_max_num_f64 v[30:31], v[30:31], v[30:31]
; GFX1170-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
; GFX1170-GISEL-NEXT: v_min_num_f64 v[14:15], v[28:29], v[30:31]
; GFX1170-GISEL-NEXT: v_min_num_f64 v[6:7], v[12:13], v[14:15]
; GFX1170-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
; GFX1170-GISEL-NEXT: v_min_num_f64 v[2:3], v[4:5], v[6:7]
; GFX1170-GISEL-NEXT: v_min_num_f64 v[0:1], v[0:1], v[2:3]
; GFX1170-GISEL-NEXT: s_setpc_b64 s[30:31]
;
; GFX12-SDAG-LABEL: test_vector_reduce_fmin_v16double:
; GFX12-SDAG: ; %bb.0: ; %entry
; GFX12-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0
@ -3626,6 +4303,7 @@ declare double @llvm.vector.reduce.fmin.v16double(<16 x double>)
;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
; GFX10: {{.*}}
; GFX11: {{.*}}
; GFX1170: {{.*}}
; GFX12: {{.*}}
; GFX8: {{.*}}
; GFX9: {{.*}}

View File

@ -5,6 +5,10 @@
; RUN: llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx1010 < %s | FileCheck -check-prefixes=GFX10,GFX10-SDAG %s
; RUN: llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx1100 -mattr=+real-true16 < %s | FileCheck -check-prefixes=GFX11,GFX11-SDAG,GFX11-SDAG-TRUE16 %s
; RUN: llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx1100 -mattr=-real-true16 < %s | FileCheck -check-prefixes=GFX11,GFX11-SDAG,GFX11-SDAG-FAKE16 %s
; RUN: llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx1170 -mattr=+real-true16 < %s | FileCheck -check-prefixes=GFX1170,GFX1170-SDAG,GFX1170-SDAG-TRUE16 %s
; RUN: llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx1170 -mattr=-real-true16 < %s | FileCheck -check-prefixes=GFX1170,GFX1170-SDAG,GFX1170-SDAG-FAKE16 %s
; RUN: llc -global-isel=1 -mtriple=amdgcn -mcpu=gfx1170 -mattr=+real-true16 < %s | FileCheck -check-prefixes=GFX1170,GFX1170-GISEL,GFX1170-GISEL-TRUE16 %s
; RUN: llc -global-isel=1 -mtriple=amdgcn -mcpu=gfx1170 -mattr=-real-true16 < %s | FileCheck -check-prefixes=GFX1170,GFX1170-GISEL,GFX1170-GISEL-FAKE16 %s
; RUN: llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx1200 -mattr=+real-true16 < %s | FileCheck -check-prefixes=GFX12,GFX12-SDAG,GFX12-SDAG-TRUE16 %s
; RUN: llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx1200 -mattr=-real-true16 < %s | FileCheck -check-prefixes=GFX12,GFX12-SDAG,GFX12-SDAG-FAKE16 %s
; RUN: llc -global-isel=1 -new-reg-bank-select -mtriple=amdgcn -mcpu=gfx1200 -mattr=+real-true16 < %s | FileCheck -check-prefixes=GFX12,GFX12-GISEL,GFX12-GISEL-TRUE16 %s
@ -71,6 +75,34 @@ define half @test_vector_reduce_fminimum_v2half(<2 x half> %v) {
; GFX11-SDAG-FAKE16-NEXT: v_cndmask_b32_e32 v0, 0x7e00, v2, vcc_lo
; GFX11-SDAG-FAKE16-NEXT: s_setpc_b64 s[30:31]
;
; GFX1170-SDAG-TRUE16-LABEL: test_vector_reduce_fminimum_v2half:
; GFX1170-SDAG-TRUE16: ; %bb.0: ; %entry
; GFX1170-SDAG-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1170-SDAG-TRUE16-NEXT: v_minimum_f16 v0.l, v0.l, v0.h
; GFX1170-SDAG-TRUE16-NEXT: s_setpc_b64 s[30:31]
;
; GFX1170-SDAG-FAKE16-LABEL: test_vector_reduce_fminimum_v2half:
; GFX1170-SDAG-FAKE16: ; %bb.0: ; %entry
; GFX1170-SDAG-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1170-SDAG-FAKE16-NEXT: v_lshrrev_b32_e32 v1, 16, v0
; GFX1170-SDAG-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX1170-SDAG-FAKE16-NEXT: v_minimum_f16 v0, v0, v1
; GFX1170-SDAG-FAKE16-NEXT: s_setpc_b64 s[30:31]
;
; GFX1170-GISEL-TRUE16-LABEL: test_vector_reduce_fminimum_v2half:
; GFX1170-GISEL-TRUE16: ; %bb.0: ; %entry
; GFX1170-GISEL-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1170-GISEL-TRUE16-NEXT: v_minimum_f16 v0.l, v0.l, v0.h
; GFX1170-GISEL-TRUE16-NEXT: s_setpc_b64 s[30:31]
;
; GFX1170-GISEL-FAKE16-LABEL: test_vector_reduce_fminimum_v2half:
; GFX1170-GISEL-FAKE16: ; %bb.0: ; %entry
; GFX1170-GISEL-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1170-GISEL-FAKE16-NEXT: v_lshrrev_b32_e32 v1, 16, v0
; GFX1170-GISEL-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX1170-GISEL-FAKE16-NEXT: v_minimum_f16 v0, v0, v1
; GFX1170-GISEL-FAKE16-NEXT: s_setpc_b64 s[30:31]
;
; GFX12-SDAG-TRUE16-LABEL: test_vector_reduce_fminimum_v2half:
; GFX12-SDAG-TRUE16: ; %bb.0: ; %entry
; GFX12-SDAG-TRUE16-NEXT: s_wait_loadcnt_dscnt 0x0
@ -204,6 +236,44 @@ define half @test_vector_reduce_fminimum_v3half(<3 x half> %v) {
; GFX11-SDAG-FAKE16-NEXT: v_cndmask_b32_e32 v0, 0x7e00, v2, vcc_lo
; GFX11-SDAG-FAKE16-NEXT: s_setpc_b64 s[30:31]
;
; GFX1170-SDAG-TRUE16-LABEL: test_vector_reduce_fminimum_v3half:
; GFX1170-SDAG-TRUE16: ; %bb.0: ; %entry
; GFX1170-SDAG-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1170-SDAG-TRUE16-NEXT: v_mov_b16_e32 v1.h, 0x7c00
; GFX1170-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
; GFX1170-SDAG-TRUE16-NEXT: v_pk_minimum_f16 v0, v0, v1
; GFX1170-SDAG-TRUE16-NEXT: v_minimum_f16 v0.l, v0.l, v0.h
; GFX1170-SDAG-TRUE16-NEXT: s_setpc_b64 s[30:31]
;
; GFX1170-SDAG-FAKE16-LABEL: test_vector_reduce_fminimum_v3half:
; GFX1170-SDAG-FAKE16: ; %bb.0: ; %entry
; GFX1170-SDAG-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1170-SDAG-FAKE16-NEXT: s_movk_i32 s0, 0x7c00
; GFX1170-SDAG-FAKE16-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(VALU_DEP_1)
; GFX1170-SDAG-FAKE16-NEXT: v_perm_b32 v1, s0, v1, 0x5040100
; GFX1170-SDAG-FAKE16-NEXT: v_pk_minimum_f16 v0, v0, v1
; GFX1170-SDAG-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
; GFX1170-SDAG-FAKE16-NEXT: v_lshrrev_b32_e32 v1, 16, v0
; GFX1170-SDAG-FAKE16-NEXT: v_minimum_f16 v0, v0, v1
; GFX1170-SDAG-FAKE16-NEXT: s_setpc_b64 s[30:31]
;
; GFX1170-GISEL-TRUE16-LABEL: test_vector_reduce_fminimum_v3half:
; GFX1170-GISEL-TRUE16: ; %bb.0: ; %entry
; GFX1170-GISEL-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1170-GISEL-TRUE16-NEXT: v_minimum_f16 v0.l, v0.l, v0.h
; GFX1170-GISEL-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX1170-GISEL-TRUE16-NEXT: v_minimum_f16 v0.l, v0.l, v1.l
; GFX1170-GISEL-TRUE16-NEXT: s_setpc_b64 s[30:31]
;
; GFX1170-GISEL-FAKE16-LABEL: test_vector_reduce_fminimum_v3half:
; GFX1170-GISEL-FAKE16: ; %bb.0: ; %entry
; GFX1170-GISEL-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1170-GISEL-FAKE16-NEXT: v_lshrrev_b32_e32 v2, 16, v0
; GFX1170-GISEL-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
; GFX1170-GISEL-FAKE16-NEXT: v_minimum_f16 v0, v0, v2
; GFX1170-GISEL-FAKE16-NEXT: v_minimum_f16 v0, v0, v1
; GFX1170-GISEL-FAKE16-NEXT: s_setpc_b64 s[30:31]
;
; GFX12-SDAG-TRUE16-LABEL: test_vector_reduce_fminimum_v3half:
; GFX12-SDAG-TRUE16: ; %bb.0: ; %entry
; GFX12-SDAG-TRUE16-NEXT: s_wait_loadcnt_dscnt 0x0
@ -375,6 +445,44 @@ define half @test_vector_reduce_fminimum_v4half(<4 x half> %v) {
; GFX11-SDAG-FAKE16-NEXT: v_cndmask_b32_e32 v0, 0x7e00, v1, vcc_lo
; GFX11-SDAG-FAKE16-NEXT: s_setpc_b64 s[30:31]
;
; GFX1170-SDAG-TRUE16-LABEL: test_vector_reduce_fminimum_v4half:
; GFX1170-SDAG-TRUE16: ; %bb.0: ; %entry
; GFX1170-SDAG-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1170-SDAG-TRUE16-NEXT: v_pk_minimum_f16 v0, v0, v1
; GFX1170-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX1170-SDAG-TRUE16-NEXT: v_minimum_f16 v0.l, v0.l, v0.h
; GFX1170-SDAG-TRUE16-NEXT: s_setpc_b64 s[30:31]
;
; GFX1170-SDAG-FAKE16-LABEL: test_vector_reduce_fminimum_v4half:
; GFX1170-SDAG-FAKE16: ; %bb.0: ; %entry
; GFX1170-SDAG-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1170-SDAG-FAKE16-NEXT: v_pk_minimum_f16 v0, v0, v1
; GFX1170-SDAG-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
; GFX1170-SDAG-FAKE16-NEXT: v_lshrrev_b32_e32 v1, 16, v0
; GFX1170-SDAG-FAKE16-NEXT: v_minimum_f16 v0, v0, v1
; GFX1170-SDAG-FAKE16-NEXT: s_setpc_b64 s[30:31]
;
; GFX1170-GISEL-TRUE16-LABEL: test_vector_reduce_fminimum_v4half:
; GFX1170-GISEL-TRUE16: ; %bb.0: ; %entry
; GFX1170-GISEL-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1170-GISEL-TRUE16-NEXT: v_minimum_f16 v0.l, v0.l, v0.h
; GFX1170-GISEL-TRUE16-NEXT: v_minimum_f16 v0.h, v1.l, v1.h
; GFX1170-GISEL-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX1170-GISEL-TRUE16-NEXT: v_minimum_f16 v0.l, v0.l, v0.h
; GFX1170-GISEL-TRUE16-NEXT: s_setpc_b64 s[30:31]
;
; GFX1170-GISEL-FAKE16-LABEL: test_vector_reduce_fminimum_v4half:
; GFX1170-GISEL-FAKE16: ; %bb.0: ; %entry
; GFX1170-GISEL-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1170-GISEL-FAKE16-NEXT: v_lshrrev_b32_e32 v2, 16, v0
; GFX1170-GISEL-FAKE16-NEXT: v_lshrrev_b32_e32 v3, 16, v1
; GFX1170-GISEL-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
; GFX1170-GISEL-FAKE16-NEXT: v_minimum_f16 v0, v0, v2
; GFX1170-GISEL-FAKE16-NEXT: v_minimum_f16 v1, v1, v3
; GFX1170-GISEL-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX1170-GISEL-FAKE16-NEXT: v_minimum_f16 v0, v0, v1
; GFX1170-GISEL-FAKE16-NEXT: s_setpc_b64 s[30:31]
;
; GFX12-SDAG-TRUE16-LABEL: test_vector_reduce_fminimum_v4half:
; GFX12-SDAG-TRUE16: ; %bb.0: ; %entry
; GFX12-SDAG-TRUE16-NEXT: s_wait_loadcnt_dscnt 0x0
@ -638,6 +746,62 @@ define half @test_vector_reduce_fminimum_v8half(<8 x half> %v) {
; GFX11-SDAG-FAKE16-NEXT: v_cndmask_b32_e32 v0, 0x7e00, v2, vcc_lo
; GFX11-SDAG-FAKE16-NEXT: s_setpc_b64 s[30:31]
;
; GFX1170-SDAG-TRUE16-LABEL: test_vector_reduce_fminimum_v8half:
; GFX1170-SDAG-TRUE16: ; %bb.0: ; %entry
; GFX1170-SDAG-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1170-SDAG-TRUE16-NEXT: v_pk_minimum_f16 v1, v1, v3
; GFX1170-SDAG-TRUE16-NEXT: v_pk_minimum_f16 v0, v0, v2
; GFX1170-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
; GFX1170-SDAG-TRUE16-NEXT: v_pk_minimum_f16 v0, v0, v1
; GFX1170-SDAG-TRUE16-NEXT: v_minimum_f16 v0.l, v0.l, v0.h
; GFX1170-SDAG-TRUE16-NEXT: s_setpc_b64 s[30:31]
;
; GFX1170-SDAG-FAKE16-LABEL: test_vector_reduce_fminimum_v8half:
; GFX1170-SDAG-FAKE16: ; %bb.0: ; %entry
; GFX1170-SDAG-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1170-SDAG-FAKE16-NEXT: v_pk_minimum_f16 v1, v1, v3
; GFX1170-SDAG-FAKE16-NEXT: v_pk_minimum_f16 v0, v0, v2
; GFX1170-SDAG-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
; GFX1170-SDAG-FAKE16-NEXT: v_pk_minimum_f16 v0, v0, v1
; GFX1170-SDAG-FAKE16-NEXT: v_lshrrev_b32_e32 v1, 16, v0
; GFX1170-SDAG-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX1170-SDAG-FAKE16-NEXT: v_minimum_f16 v0, v0, v1
; GFX1170-SDAG-FAKE16-NEXT: s_setpc_b64 s[30:31]
;
; GFX1170-GISEL-TRUE16-LABEL: test_vector_reduce_fminimum_v8half:
; GFX1170-GISEL-TRUE16: ; %bb.0: ; %entry
; GFX1170-GISEL-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1170-GISEL-TRUE16-NEXT: v_minimum_f16 v0.l, v0.l, v0.h
; GFX1170-GISEL-TRUE16-NEXT: v_minimum_f16 v0.h, v1.l, v1.h
; GFX1170-GISEL-TRUE16-NEXT: v_minimum_f16 v1.l, v2.l, v2.h
; GFX1170-GISEL-TRUE16-NEXT: v_minimum_f16 v1.h, v3.l, v3.h
; GFX1170-GISEL-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2)
; GFX1170-GISEL-TRUE16-NEXT: v_minimum_f16 v0.l, v0.l, v0.h
; GFX1170-GISEL-TRUE16-NEXT: v_minimum_f16 v0.h, v1.l, v1.h
; GFX1170-GISEL-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX1170-GISEL-TRUE16-NEXT: v_minimum_f16 v0.l, v0.l, v0.h
; GFX1170-GISEL-TRUE16-NEXT: s_setpc_b64 s[30:31]
;
; GFX1170-GISEL-FAKE16-LABEL: test_vector_reduce_fminimum_v8half:
; GFX1170-GISEL-FAKE16: ; %bb.0: ; %entry
; GFX1170-GISEL-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1170-GISEL-FAKE16-NEXT: v_lshrrev_b32_e32 v4, 16, v0
; GFX1170-GISEL-FAKE16-NEXT: v_lshrrev_b32_e32 v5, 16, v1
; GFX1170-GISEL-FAKE16-NEXT: v_lshrrev_b32_e32 v6, 16, v2
; GFX1170-GISEL-FAKE16-NEXT: v_lshrrev_b32_e32 v7, 16, v3
; GFX1170-GISEL-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4)
; GFX1170-GISEL-FAKE16-NEXT: v_minimum_f16 v0, v0, v4
; GFX1170-GISEL-FAKE16-NEXT: v_minimum_f16 v1, v1, v5
; GFX1170-GISEL-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4)
; GFX1170-GISEL-FAKE16-NEXT: v_minimum_f16 v2, v2, v6
; GFX1170-GISEL-FAKE16-NEXT: v_minimum_f16 v3, v3, v7
; GFX1170-GISEL-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2)
; GFX1170-GISEL-FAKE16-NEXT: v_minimum_f16 v0, v0, v1
; GFX1170-GISEL-FAKE16-NEXT: v_minimum_f16 v1, v2, v3
; GFX1170-GISEL-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX1170-GISEL-FAKE16-NEXT: v_minimum_f16 v0, v0, v1
; GFX1170-GISEL-FAKE16-NEXT: s_setpc_b64 s[30:31]
;
; GFX12-SDAG-TRUE16-LABEL: test_vector_reduce_fminimum_v8half:
; GFX12-SDAG-TRUE16: ; %bb.0: ; %entry
; GFX12-SDAG-TRUE16-NEXT: s_wait_loadcnt_dscnt 0x0
@ -1122,6 +1286,90 @@ define half @test_vector_reduce_fminimum_v16half(<16 x half> %v) {
; GFX11-SDAG-FAKE16-NEXT: v_cndmask_b32_e32 v0, 0x7e00, v2, vcc_lo
; GFX11-SDAG-FAKE16-NEXT: s_setpc_b64 s[30:31]
;
; GFX1170-SDAG-TRUE16-LABEL: test_vector_reduce_fminimum_v16half:
; GFX1170-SDAG-TRUE16: ; %bb.0: ; %entry
; GFX1170-SDAG-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1170-SDAG-TRUE16-NEXT: v_pk_minimum_f16 v3, v3, v7
; GFX1170-SDAG-TRUE16-NEXT: v_pk_minimum_f16 v1, v1, v5
; GFX1170-SDAG-TRUE16-NEXT: v_pk_minimum_f16 v2, v2, v6
; GFX1170-SDAG-TRUE16-NEXT: v_pk_minimum_f16 v0, v0, v4
; GFX1170-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2)
; GFX1170-SDAG-TRUE16-NEXT: v_pk_minimum_f16 v1, v1, v3
; GFX1170-SDAG-TRUE16-NEXT: v_pk_minimum_f16 v0, v0, v2
; GFX1170-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
; GFX1170-SDAG-TRUE16-NEXT: v_pk_minimum_f16 v0, v0, v1
; GFX1170-SDAG-TRUE16-NEXT: v_minimum_f16 v0.l, v0.l, v0.h
; GFX1170-SDAG-TRUE16-NEXT: s_setpc_b64 s[30:31]
;
; GFX1170-SDAG-FAKE16-LABEL: test_vector_reduce_fminimum_v16half:
; GFX1170-SDAG-FAKE16: ; %bb.0: ; %entry
; GFX1170-SDAG-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1170-SDAG-FAKE16-NEXT: v_pk_minimum_f16 v3, v3, v7
; GFX1170-SDAG-FAKE16-NEXT: v_pk_minimum_f16 v1, v1, v5
; GFX1170-SDAG-FAKE16-NEXT: v_pk_minimum_f16 v2, v2, v6
; GFX1170-SDAG-FAKE16-NEXT: v_pk_minimum_f16 v0, v0, v4
; GFX1170-SDAG-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2)
; GFX1170-SDAG-FAKE16-NEXT: v_pk_minimum_f16 v1, v1, v3
; GFX1170-SDAG-FAKE16-NEXT: v_pk_minimum_f16 v0, v0, v2
; GFX1170-SDAG-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
; GFX1170-SDAG-FAKE16-NEXT: v_pk_minimum_f16 v0, v0, v1
; GFX1170-SDAG-FAKE16-NEXT: v_lshrrev_b32_e32 v1, 16, v0
; GFX1170-SDAG-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX1170-SDAG-FAKE16-NEXT: v_minimum_f16 v0, v0, v1
; GFX1170-SDAG-FAKE16-NEXT: s_setpc_b64 s[30:31]
;
; GFX1170-GISEL-TRUE16-LABEL: test_vector_reduce_fminimum_v16half:
; GFX1170-GISEL-TRUE16: ; %bb.0: ; %entry
; GFX1170-GISEL-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1170-GISEL-TRUE16-NEXT: v_minimum_f16 v0.l, v0.l, v0.h
; GFX1170-GISEL-TRUE16-NEXT: v_minimum_f16 v0.h, v1.l, v1.h
; GFX1170-GISEL-TRUE16-NEXT: v_minimum_f16 v1.l, v2.l, v2.h
; GFX1170-GISEL-TRUE16-NEXT: v_minimum_f16 v1.h, v3.l, v3.h
; GFX1170-GISEL-TRUE16-NEXT: v_minimum_f16 v2.l, v4.l, v4.h
; GFX1170-GISEL-TRUE16-NEXT: v_minimum_f16 v2.h, v5.l, v5.h
; GFX1170-GISEL-TRUE16-NEXT: v_minimum_f16 v3.l, v6.l, v6.h
; GFX1170-GISEL-TRUE16-NEXT: v_minimum_f16 v3.h, v7.l, v7.h
; GFX1170-GISEL-TRUE16-NEXT: v_minimum_f16 v0.l, v0.l, v0.h
; GFX1170-GISEL-TRUE16-NEXT: v_minimum_f16 v0.h, v1.l, v1.h
; GFX1170-GISEL-TRUE16-NEXT: v_minimum_f16 v1.l, v2.l, v2.h
; GFX1170-GISEL-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_3)
; GFX1170-GISEL-TRUE16-NEXT: v_minimum_f16 v1.h, v3.l, v3.h
; GFX1170-GISEL-TRUE16-NEXT: v_minimum_f16 v0.l, v0.l, v0.h
; GFX1170-GISEL-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
; GFX1170-GISEL-TRUE16-NEXT: v_minimum_f16 v0.h, v1.l, v1.h
; GFX1170-GISEL-TRUE16-NEXT: v_minimum_f16 v0.l, v0.l, v0.h
; GFX1170-GISEL-TRUE16-NEXT: s_setpc_b64 s[30:31]
;
; GFX1170-GISEL-FAKE16-LABEL: test_vector_reduce_fminimum_v16half:
; GFX1170-GISEL-FAKE16: ; %bb.0: ; %entry
; GFX1170-GISEL-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1170-GISEL-FAKE16-NEXT: v_lshrrev_b32_e32 v8, 16, v0
; GFX1170-GISEL-FAKE16-NEXT: v_lshrrev_b32_e32 v9, 16, v1
; GFX1170-GISEL-FAKE16-NEXT: v_lshrrev_b32_e32 v10, 16, v2
; GFX1170-GISEL-FAKE16-NEXT: v_lshrrev_b32_e32 v11, 16, v3
; GFX1170-GISEL-FAKE16-NEXT: v_lshrrev_b32_e32 v12, 16, v4
; GFX1170-GISEL-FAKE16-NEXT: v_lshrrev_b32_e32 v13, 16, v5
; GFX1170-GISEL-FAKE16-NEXT: v_lshrrev_b32_e32 v14, 16, v6
; GFX1170-GISEL-FAKE16-NEXT: v_lshrrev_b32_e32 v15, 16, v7
; GFX1170-GISEL-FAKE16-NEXT: v_minimum_f16 v0, v0, v8
; GFX1170-GISEL-FAKE16-NEXT: v_minimum_f16 v1, v1, v9
; GFX1170-GISEL-FAKE16-NEXT: v_minimum_f16 v2, v2, v10
; GFX1170-GISEL-FAKE16-NEXT: v_minimum_f16 v3, v3, v11
; GFX1170-GISEL-FAKE16-NEXT: v_minimum_f16 v4, v4, v12
; GFX1170-GISEL-FAKE16-NEXT: v_minimum_f16 v5, v5, v13
; GFX1170-GISEL-FAKE16-NEXT: v_minimum_f16 v6, v6, v14
; GFX1170-GISEL-FAKE16-NEXT: v_minimum_f16 v7, v7, v15
; GFX1170-GISEL-FAKE16-NEXT: v_minimum_f16 v0, v0, v1
; GFX1170-GISEL-FAKE16-NEXT: v_minimum_f16 v1, v2, v3
; GFX1170-GISEL-FAKE16-NEXT: v_minimum_f16 v2, v4, v5
; GFX1170-GISEL-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_3)
; GFX1170-GISEL-FAKE16-NEXT: v_minimum_f16 v3, v6, v7
; GFX1170-GISEL-FAKE16-NEXT: v_minimum_f16 v0, v0, v1
; GFX1170-GISEL-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
; GFX1170-GISEL-FAKE16-NEXT: v_minimum_f16 v1, v2, v3
; GFX1170-GISEL-FAKE16-NEXT: v_minimum_f16 v0, v0, v1
; GFX1170-GISEL-FAKE16-NEXT: s_setpc_b64 s[30:31]
;
; GFX12-SDAG-TRUE16-LABEL: test_vector_reduce_fminimum_v16half:
; GFX12-SDAG-TRUE16: ; %bb.0: ; %entry
; GFX12-SDAG-TRUE16-NEXT: s_wait_loadcnt_dscnt 0x0
@ -1272,6 +1520,12 @@ define float @test_vector_reduce_fminimum_v2float(<2 x float> %v) {
; GFX11-NEXT: v_cndmask_b32_e32 v0, 0x7fc00000, v2, vcc_lo
; GFX11-NEXT: s_setpc_b64 s[30:31]
;
; GFX1170-LABEL: test_vector_reduce_fminimum_v2float:
; GFX1170: ; %bb.0: ; %entry
; GFX1170-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1170-NEXT: v_minimum_f32 v0, v0, v1
; GFX1170-NEXT: s_setpc_b64 s[30:31]
;
; GFX12-LABEL: test_vector_reduce_fminimum_v2float:
; GFX12: ; %bb.0: ; %entry
; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
@ -1349,6 +1603,20 @@ define float @test_vector_reduce_fminimum_v3float(<3 x float> %v) {
; GFX11-NEXT: v_cndmask_b32_e32 v0, 0x7fc00000, v1, vcc_lo
; GFX11-NEXT: s_setpc_b64 s[30:31]
;
; GFX1170-SDAG-LABEL: test_vector_reduce_fminimum_v3float:
; GFX1170-SDAG: ; %bb.0: ; %entry
; GFX1170-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1170-SDAG-NEXT: v_minimum3_f32 v0, v0, v1, v2
; GFX1170-SDAG-NEXT: s_setpc_b64 s[30:31]
;
; GFX1170-GISEL-LABEL: test_vector_reduce_fminimum_v3float:
; GFX1170-GISEL: ; %bb.0: ; %entry
; GFX1170-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1170-GISEL-NEXT: v_minimum_f32 v0, v0, v1
; GFX1170-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX1170-GISEL-NEXT: v_minimum_f32 v0, v0, v2
; GFX1170-GISEL-NEXT: s_setpc_b64 s[30:31]
;
; GFX12-SDAG-LABEL: test_vector_reduce_fminimum_v3float:
; GFX12-SDAG: ; %bb.0: ; %entry
; GFX12-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0
@ -1455,6 +1723,23 @@ define float @test_vector_reduce_fminimum_v4float(<4 x float> %v) {
; GFX11-NEXT: v_cndmask_b32_e32 v0, 0x7fc00000, v1, vcc_lo
; GFX11-NEXT: s_setpc_b64 s[30:31]
;
; GFX1170-SDAG-LABEL: test_vector_reduce_fminimum_v4float:
; GFX1170-SDAG: ; %bb.0: ; %entry
; GFX1170-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1170-SDAG-NEXT: v_minimum_f32 v0, v0, v1
; GFX1170-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX1170-SDAG-NEXT: v_minimum3_f32 v0, v0, v2, v3
; GFX1170-SDAG-NEXT: s_setpc_b64 s[30:31]
;
; GFX1170-GISEL-LABEL: test_vector_reduce_fminimum_v4float:
; GFX1170-GISEL: ; %bb.0: ; %entry
; GFX1170-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1170-GISEL-NEXT: v_minimum_f32 v0, v0, v1
; GFX1170-GISEL-NEXT: v_minimum_f32 v1, v2, v3
; GFX1170-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX1170-GISEL-NEXT: v_minimum_f32 v0, v0, v1
; GFX1170-GISEL-NEXT: s_setpc_b64 s[30:31]
;
; GFX12-SDAG-LABEL: test_vector_reduce_fminimum_v4float:
; GFX12-SDAG: ; %bb.0: ; %entry
; GFX12-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0
@ -1632,6 +1917,31 @@ define float @test_vector_reduce_fminimum_v8float(<8 x float> %v) {
; GFX11-NEXT: v_cndmask_b32_e32 v0, 0x7fc00000, v1, vcc_lo
; GFX11-NEXT: s_setpc_b64 s[30:31]
;
; GFX1170-SDAG-LABEL: test_vector_reduce_fminimum_v8float:
; GFX1170-SDAG: ; %bb.0: ; %entry
; GFX1170-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1170-SDAG-NEXT: v_minimum_f32 v0, v0, v1
; GFX1170-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
; GFX1170-SDAG-NEXT: v_minimum3_f32 v0, v0, v2, v3
; GFX1170-SDAG-NEXT: v_minimum3_f32 v0, v0, v4, v5
; GFX1170-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX1170-SDAG-NEXT: v_minimum3_f32 v0, v0, v6, v7
; GFX1170-SDAG-NEXT: s_setpc_b64 s[30:31]
;
; GFX1170-GISEL-LABEL: test_vector_reduce_fminimum_v8float:
; GFX1170-GISEL: ; %bb.0: ; %entry
; GFX1170-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1170-GISEL-NEXT: v_minimum_f32 v0, v0, v1
; GFX1170-GISEL-NEXT: v_minimum_f32 v1, v2, v3
; GFX1170-GISEL-NEXT: v_minimum_f32 v2, v4, v5
; GFX1170-GISEL-NEXT: v_minimum_f32 v3, v6, v7
; GFX1170-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2)
; GFX1170-GISEL-NEXT: v_minimum_f32 v0, v0, v1
; GFX1170-GISEL-NEXT: v_minimum_f32 v1, v2, v3
; GFX1170-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX1170-GISEL-NEXT: v_minimum_f32 v0, v0, v1
; GFX1170-GISEL-NEXT: s_setpc_b64 s[30:31]
;
; GFX12-SDAG-LABEL: test_vector_reduce_fminimum_v8float:
; GFX12-SDAG: ; %bb.0: ; %entry
; GFX12-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0
@ -1953,6 +2263,45 @@ define float @test_vector_reduce_fminimum_v16float(<16 x float> %v) {
; GFX11-NEXT: v_cndmask_b32_e32 v0, 0x7fc00000, v1, vcc_lo
; GFX11-NEXT: s_setpc_b64 s[30:31]
;
; GFX1170-SDAG-LABEL: test_vector_reduce_fminimum_v16float:
; GFX1170-SDAG: ; %bb.0: ; %entry
; GFX1170-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1170-SDAG-NEXT: v_minimum_f32 v0, v0, v1
; GFX1170-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
; GFX1170-SDAG-NEXT: v_minimum3_f32 v0, v0, v2, v3
; GFX1170-SDAG-NEXT: v_minimum3_f32 v0, v0, v4, v5
; GFX1170-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
; GFX1170-SDAG-NEXT: v_minimum3_f32 v0, v0, v6, v7
; GFX1170-SDAG-NEXT: v_minimum3_f32 v0, v0, v8, v9
; GFX1170-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
; GFX1170-SDAG-NEXT: v_minimum3_f32 v0, v0, v10, v11
; GFX1170-SDAG-NEXT: v_minimum3_f32 v0, v0, v12, v13
; GFX1170-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX1170-SDAG-NEXT: v_minimum3_f32 v0, v0, v14, v15
; GFX1170-SDAG-NEXT: s_setpc_b64 s[30:31]
;
; GFX1170-GISEL-LABEL: test_vector_reduce_fminimum_v16float:
; GFX1170-GISEL: ; %bb.0: ; %entry
; GFX1170-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1170-GISEL-NEXT: v_minimum_f32 v0, v0, v1
; GFX1170-GISEL-NEXT: v_minimum_f32 v1, v2, v3
; GFX1170-GISEL-NEXT: v_minimum_f32 v2, v4, v5
; GFX1170-GISEL-NEXT: v_minimum_f32 v3, v6, v7
; GFX1170-GISEL-NEXT: v_minimum_f32 v4, v8, v9
; GFX1170-GISEL-NEXT: v_minimum_f32 v5, v10, v11
; GFX1170-GISEL-NEXT: v_minimum_f32 v6, v12, v13
; GFX1170-GISEL-NEXT: v_minimum_f32 v7, v14, v15
; GFX1170-GISEL-NEXT: v_minimum_f32 v0, v0, v1
; GFX1170-GISEL-NEXT: v_minimum_f32 v1, v2, v3
; GFX1170-GISEL-NEXT: v_minimum_f32 v2, v4, v5
; GFX1170-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_3)
; GFX1170-GISEL-NEXT: v_minimum_f32 v3, v6, v7
; GFX1170-GISEL-NEXT: v_minimum_f32 v0, v0, v1
; GFX1170-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
; GFX1170-GISEL-NEXT: v_minimum_f32 v1, v2, v3
; GFX1170-GISEL-NEXT: v_minimum_f32 v0, v0, v1
; GFX1170-GISEL-NEXT: s_setpc_b64 s[30:31]
;
; GFX12-SDAG-LABEL: test_vector_reduce_fminimum_v16float:
; GFX12-SDAG: ; %bb.0: ; %entry
; GFX12-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0
@ -2056,6 +2405,12 @@ define double @test_vector_reduce_fminimum_v2double(<2 x double> %v) {
; GFX11-NEXT: v_cndmask_b32_e64 v1, v5, 0x7ff80000, vcc_lo
; GFX11-NEXT: s_setpc_b64 s[30:31]
;
; GFX1170-LABEL: test_vector_reduce_fminimum_v2double:
; GFX1170: ; %bb.0: ; %entry
; GFX1170-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1170-NEXT: v_minimum_f64 v[0:1], v[0:1], v[2:3]
; GFX1170-NEXT: s_setpc_b64 s[30:31]
;
; GFX12-LABEL: test_vector_reduce_fminimum_v2double:
; GFX12: ; %bb.0: ; %entry
; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
@ -2144,6 +2499,14 @@ define double @test_vector_reduce_fminimum_v3double(<3 x double> %v) {
; GFX11-NEXT: v_cndmask_b32_e64 v1, v3, 0x7ff80000, vcc_lo
; GFX11-NEXT: s_setpc_b64 s[30:31]
;
; GFX1170-LABEL: test_vector_reduce_fminimum_v3double:
; GFX1170: ; %bb.0: ; %entry
; GFX1170-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1170-NEXT: v_minimum_f64 v[0:1], v[0:1], v[2:3]
; GFX1170-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX1170-NEXT: v_minimum_f64 v[0:1], v[0:1], v[4:5]
; GFX1170-NEXT: s_setpc_b64 s[30:31]
;
; GFX12-LABEL: test_vector_reduce_fminimum_v3double:
; GFX12: ; %bb.0: ; %entry
; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
@ -2256,6 +2619,24 @@ define double @test_vector_reduce_fminimum_v4double(<4 x double> %v) {
; GFX11-NEXT: v_cndmask_b32_e64 v1, v3, 0x7ff80000, vcc_lo
; GFX11-NEXT: s_setpc_b64 s[30:31]
;
; GFX1170-SDAG-LABEL: test_vector_reduce_fminimum_v4double:
; GFX1170-SDAG: ; %bb.0: ; %entry
; GFX1170-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1170-SDAG-NEXT: v_minimum_f64 v[0:1], v[0:1], v[2:3]
; GFX1170-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
; GFX1170-SDAG-NEXT: v_minimum_f64 v[0:1], v[0:1], v[4:5]
; GFX1170-SDAG-NEXT: v_minimum_f64 v[0:1], v[0:1], v[6:7]
; GFX1170-SDAG-NEXT: s_setpc_b64 s[30:31]
;
; GFX1170-GISEL-LABEL: test_vector_reduce_fminimum_v4double:
; GFX1170-GISEL: ; %bb.0: ; %entry
; GFX1170-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1170-GISEL-NEXT: v_minimum_f64 v[0:1], v[0:1], v[2:3]
; GFX1170-GISEL-NEXT: v_minimum_f64 v[2:3], v[4:5], v[6:7]
; GFX1170-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX1170-GISEL-NEXT: v_minimum_f64 v[0:1], v[0:1], v[2:3]
; GFX1170-GISEL-NEXT: s_setpc_b64 s[30:31]
;
; GFX12-SDAG-LABEL: test_vector_reduce_fminimum_v4double:
; GFX12-SDAG: ; %bb.0: ; %entry
; GFX12-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0
@ -2472,6 +2853,35 @@ define double @test_vector_reduce_fminimum_v8double(<8 x double> %v) {
; GFX11-NEXT: v_cndmask_b32_e64 v1, v3, 0x7ff80000, vcc_lo
; GFX11-NEXT: s_setpc_b64 s[30:31]
;
; GFX1170-SDAG-LABEL: test_vector_reduce_fminimum_v8double:
; GFX1170-SDAG: ; %bb.0: ; %entry
; GFX1170-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1170-SDAG-NEXT: v_minimum_f64 v[0:1], v[0:1], v[2:3]
; GFX1170-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
; GFX1170-SDAG-NEXT: v_minimum_f64 v[0:1], v[0:1], v[4:5]
; GFX1170-SDAG-NEXT: v_minimum_f64 v[0:1], v[0:1], v[6:7]
; GFX1170-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
; GFX1170-SDAG-NEXT: v_minimum_f64 v[0:1], v[0:1], v[8:9]
; GFX1170-SDAG-NEXT: v_minimum_f64 v[0:1], v[0:1], v[10:11]
; GFX1170-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
; GFX1170-SDAG-NEXT: v_minimum_f64 v[0:1], v[0:1], v[12:13]
; GFX1170-SDAG-NEXT: v_minimum_f64 v[0:1], v[0:1], v[14:15]
; GFX1170-SDAG-NEXT: s_setpc_b64 s[30:31]
;
; GFX1170-GISEL-LABEL: test_vector_reduce_fminimum_v8double:
; GFX1170-GISEL: ; %bb.0: ; %entry
; GFX1170-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1170-GISEL-NEXT: v_minimum_f64 v[0:1], v[0:1], v[2:3]
; GFX1170-GISEL-NEXT: v_minimum_f64 v[2:3], v[4:5], v[6:7]
; GFX1170-GISEL-NEXT: v_minimum_f64 v[4:5], v[8:9], v[10:11]
; GFX1170-GISEL-NEXT: v_minimum_f64 v[6:7], v[12:13], v[14:15]
; GFX1170-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2)
; GFX1170-GISEL-NEXT: v_minimum_f64 v[0:1], v[0:1], v[2:3]
; GFX1170-GISEL-NEXT: v_minimum_f64 v[2:3], v[4:5], v[6:7]
; GFX1170-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX1170-GISEL-NEXT: v_minimum_f64 v[0:1], v[0:1], v[2:3]
; GFX1170-GISEL-NEXT: s_setpc_b64 s[30:31]
;
; GFX12-SDAG-LABEL: test_vector_reduce_fminimum_v8double:
; GFX12-SDAG: ; %bb.0: ; %entry
; GFX12-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0
@ -2889,6 +3299,60 @@ define double @test_vector_reduce_fminimum_v16double(<16 x double> %v) {
; GFX11-NEXT: v_cndmask_b32_e64 v1, v3, 0x7ff80000, vcc_lo
; GFX11-NEXT: s_setpc_b64 s[30:31]
;
; GFX1170-SDAG-LABEL: test_vector_reduce_fminimum_v16double:
; GFX1170-SDAG: ; %bb.0: ; %entry
; GFX1170-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1170-SDAG-NEXT: v_minimum_f64 v[0:1], v[0:1], v[2:3]
; GFX1170-SDAG-NEXT: scratch_load_b32 v31, off, s32
; GFX1170-SDAG-NEXT: v_minimum_f64 v[0:1], v[0:1], v[4:5]
; GFX1170-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
; GFX1170-SDAG-NEXT: v_minimum_f64 v[0:1], v[0:1], v[6:7]
; GFX1170-SDAG-NEXT: v_minimum_f64 v[0:1], v[0:1], v[8:9]
; GFX1170-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
; GFX1170-SDAG-NEXT: v_minimum_f64 v[0:1], v[0:1], v[10:11]
; GFX1170-SDAG-NEXT: v_minimum_f64 v[0:1], v[0:1], v[12:13]
; GFX1170-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
; GFX1170-SDAG-NEXT: v_minimum_f64 v[0:1], v[0:1], v[14:15]
; GFX1170-SDAG-NEXT: v_minimum_f64 v[0:1], v[0:1], v[16:17]
; GFX1170-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
; GFX1170-SDAG-NEXT: v_minimum_f64 v[0:1], v[0:1], v[18:19]
; GFX1170-SDAG-NEXT: v_minimum_f64 v[0:1], v[0:1], v[20:21]
; GFX1170-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
; GFX1170-SDAG-NEXT: v_minimum_f64 v[0:1], v[0:1], v[22:23]
; GFX1170-SDAG-NEXT: v_minimum_f64 v[0:1], v[0:1], v[24:25]
; GFX1170-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
; GFX1170-SDAG-NEXT: v_minimum_f64 v[0:1], v[0:1], v[26:27]
; GFX1170-SDAG-NEXT: v_minimum_f64 v[0:1], v[0:1], v[28:29]
; GFX1170-SDAG-NEXT: s_waitcnt vmcnt(0)
; GFX1170-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX1170-SDAG-NEXT: v_minimum_f64 v[0:1], v[0:1], v[30:31]
; GFX1170-SDAG-NEXT: s_setpc_b64 s[30:31]
;
; GFX1170-GISEL-LABEL: test_vector_reduce_fminimum_v16double:
; GFX1170-GISEL: ; %bb.0: ; %entry
; GFX1170-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1170-GISEL-NEXT: scratch_load_b32 v31, off, s32
; GFX1170-GISEL-NEXT: v_minimum_f64 v[0:1], v[0:1], v[2:3]
; GFX1170-GISEL-NEXT: v_minimum_f64 v[2:3], v[4:5], v[6:7]
; GFX1170-GISEL-NEXT: v_minimum_f64 v[4:5], v[8:9], v[10:11]
; GFX1170-GISEL-NEXT: v_minimum_f64 v[6:7], v[12:13], v[14:15]
; GFX1170-GISEL-NEXT: v_minimum_f64 v[8:9], v[16:17], v[18:19]
; GFX1170-GISEL-NEXT: v_minimum_f64 v[10:11], v[20:21], v[22:23]
; GFX1170-GISEL-NEXT: v_minimum_f64 v[12:13], v[24:25], v[26:27]
; GFX1170-GISEL-NEXT: v_minimum_f64 v[0:1], v[0:1], v[2:3]
; GFX1170-GISEL-NEXT: v_minimum_f64 v[2:3], v[4:5], v[6:7]
; GFX1170-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_2)
; GFX1170-GISEL-NEXT: v_minimum_f64 v[4:5], v[8:9], v[10:11]
; GFX1170-GISEL-NEXT: v_minimum_f64 v[0:1], v[0:1], v[2:3]
; GFX1170-GISEL-NEXT: s_waitcnt vmcnt(0)
; GFX1170-GISEL-NEXT: v_minimum_f64 v[14:15], v[28:29], v[30:31]
; GFX1170-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
; GFX1170-GISEL-NEXT: v_minimum_f64 v[6:7], v[12:13], v[14:15]
; GFX1170-GISEL-NEXT: v_minimum_f64 v[2:3], v[4:5], v[6:7]
; GFX1170-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX1170-GISEL-NEXT: v_minimum_f64 v[0:1], v[0:1], v[2:3]
; GFX1170-GISEL-NEXT: s_setpc_b64 s[30:31]
;
; GFX12-SDAG-LABEL: test_vector_reduce_fminimum_v16double:
; GFX12-SDAG: ; %bb.0: ; %entry
; GFX12-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0