
The existing way of managing clustered nodes was done through adding weak edges between the neighbouring cluster nodes, which is a sort of ordered queue. And this will be later recorded as `NextClusterPred` or `NextClusterSucc` in `ScheduleDAGMI`. But actually the instruction may be picked not in the exact order of the queue. For example, we have a queue of cluster nodes A B C. But during scheduling, node B might be picked first, then it will be very likely that we only cluster B and C for Top-Down scheduling (leaving A alone). Another issue is: ``` if (!ReorderWhileClustering && SUa->NodeNum > SUb->NodeNum) std::swap(SUa, SUb); if (!DAG->addEdge(SUb, SDep(SUa, SDep::Cluster))) ``` may break the cluster queue. For example, we want to cluster nodes (order as in `MemOpRecords`): 1 3 2. 1(SUa) will be pred of 3(SUb) normally. But when it comes to (3, 2), As 3(SUa) > 2(SUb), we would reorder the two nodes, which makes 2 be pred of 3. This makes both 1 and 2 become preds of 3, but there is no edge between 1 and 2. Thus we get a broken cluster chain. To fix both issues, we introduce an unordered set in the change. This could help improve clustering in some hard case. One key reason the change causes so many test check changes is: As the cluster candidates are not ordered now, the candidates might be picked in different order from before. The most affected targets are: AMDGPU, AArch64, RISCV. For RISCV, it seems to me most are just minor instruction reorder, don't see obvious regression. For AArch64, there were some combining of ldr into ldp being affected. With two cases being regressed and two being improved. This has more deeper reason that machine scheduler cannot cluster them well both before and after the change, and the load combine algorithm later is also not smart enough. For AMDGPU, some cases have more v_dual instructions used while some are regressed. It seems less critical. Seems like test `v_vselect_v32bf16` gets more buffer_load being claused.
2839 lines
129 KiB
LLVM
2839 lines
129 KiB
LLVM
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
|
|
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx703 < %s | FileCheck -check-prefixes=GCN,GFX7 %s
|
|
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx803 < %s | FileCheck -check-prefixes=GCN,GFX8 %s
|
|
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 < %s | FileCheck -check-prefixes=GCN,GFX9 %s
|
|
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 < %s | FileCheck -check-prefixes=GCN,GFX9,GFX900 %s
|
|
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx950 < %s | FileCheck -check-prefixes=GCN,GFX9,GFX950 %s
|
|
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1030 < %s | FileCheck -check-prefixes=GCN,GFX10 %s
|
|
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1100 < %s | FileCheck -check-prefixes=GCN,GFX11 %s
|
|
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1200 < %s | FileCheck -check-prefixes=GCN,GFX12 %s
|
|
|
|
define double @v_maximum_f64(double %src0, double %src1) {
|
|
; GFX7-LABEL: v_maximum_f64:
|
|
; GFX7: ; %bb.0:
|
|
; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX7-NEXT: v_max_f64 v[4:5], v[0:1], v[2:3]
|
|
; GFX7-NEXT: v_cmp_u_f64_e32 vcc, v[0:1], v[2:3]
|
|
; GFX7-NEXT: v_mov_b32_e32 v1, 0x7ff80000
|
|
; GFX7-NEXT: v_cndmask_b32_e64 v0, v4, 0, vcc
|
|
; GFX7-NEXT: v_cndmask_b32_e32 v1, v5, v1, vcc
|
|
; GFX7-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX8-LABEL: v_maximum_f64:
|
|
; GFX8: ; %bb.0:
|
|
; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX8-NEXT: v_max_f64 v[4:5], v[0:1], v[2:3]
|
|
; GFX8-NEXT: v_cmp_u_f64_e32 vcc, v[0:1], v[2:3]
|
|
; GFX8-NEXT: v_mov_b32_e32 v1, 0x7ff80000
|
|
; GFX8-NEXT: v_cndmask_b32_e64 v0, v4, 0, vcc
|
|
; GFX8-NEXT: v_cndmask_b32_e32 v1, v5, v1, vcc
|
|
; GFX8-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX900-LABEL: v_maximum_f64:
|
|
; GFX900: ; %bb.0:
|
|
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX900-NEXT: v_max_f64 v[4:5], v[0:1], v[2:3]
|
|
; GFX900-NEXT: v_cmp_u_f64_e32 vcc, v[0:1], v[2:3]
|
|
; GFX900-NEXT: v_mov_b32_e32 v1, 0x7ff80000
|
|
; GFX900-NEXT: v_cndmask_b32_e64 v0, v4, 0, vcc
|
|
; GFX900-NEXT: v_cndmask_b32_e32 v1, v5, v1, vcc
|
|
; GFX900-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX950-LABEL: v_maximum_f64:
|
|
; GFX950: ; %bb.0:
|
|
; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX950-NEXT: v_max_f64 v[4:5], v[0:1], v[2:3]
|
|
; GFX950-NEXT: v_cmp_u_f64_e32 vcc, v[0:1], v[2:3]
|
|
; GFX950-NEXT: v_mov_b32_e32 v1, 0x7ff80000
|
|
; GFX950-NEXT: s_nop 0
|
|
; GFX950-NEXT: v_cndmask_b32_e64 v0, v4, 0, vcc
|
|
; GFX950-NEXT: v_cndmask_b32_e32 v1, v5, v1, vcc
|
|
; GFX950-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX10-LABEL: v_maximum_f64:
|
|
; GFX10: ; %bb.0:
|
|
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX10-NEXT: v_max_f64 v[4:5], v[0:1], v[2:3]
|
|
; GFX10-NEXT: v_cmp_u_f64_e32 vcc_lo, v[0:1], v[2:3]
|
|
; GFX10-NEXT: v_cndmask_b32_e64 v0, v4, 0, vcc_lo
|
|
; GFX10-NEXT: v_cndmask_b32_e64 v1, v5, 0x7ff80000, vcc_lo
|
|
; GFX10-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX11-LABEL: v_maximum_f64:
|
|
; GFX11: ; %bb.0:
|
|
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX11-NEXT: v_max_f64 v[4:5], v[0:1], v[2:3]
|
|
; GFX11-NEXT: v_cmp_u_f64_e32 vcc_lo, v[0:1], v[2:3]
|
|
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_3)
|
|
; GFX11-NEXT: v_cndmask_b32_e64 v0, v4, 0, vcc_lo
|
|
; GFX11-NEXT: v_cndmask_b32_e64 v1, v5, 0x7ff80000, vcc_lo
|
|
; GFX11-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX12-LABEL: v_maximum_f64:
|
|
; GFX12: ; %bb.0:
|
|
; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
|
|
; GFX12-NEXT: s_wait_expcnt 0x0
|
|
; GFX12-NEXT: s_wait_samplecnt 0x0
|
|
; GFX12-NEXT: s_wait_bvhcnt 0x0
|
|
; GFX12-NEXT: s_wait_kmcnt 0x0
|
|
; GFX12-NEXT: v_maximum_f64 v[0:1], v[0:1], v[2:3]
|
|
; GFX12-NEXT: s_setpc_b64 s[30:31]
|
|
%op = call double @llvm.maximum.f64(double %src0, double %src1)
|
|
ret double %op
|
|
}
|
|
|
|
define double @v_maximum_f64__nnan(double %src0, double %src1) {
|
|
; GFX7-LABEL: v_maximum_f64__nnan:
|
|
; GFX7: ; %bb.0:
|
|
; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX7-NEXT: v_max_f64 v[0:1], v[0:1], v[2:3]
|
|
; GFX7-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX8-LABEL: v_maximum_f64__nnan:
|
|
; GFX8: ; %bb.0:
|
|
; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX8-NEXT: v_max_f64 v[0:1], v[0:1], v[2:3]
|
|
; GFX8-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX9-LABEL: v_maximum_f64__nnan:
|
|
; GFX9: ; %bb.0:
|
|
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX9-NEXT: v_max_f64 v[0:1], v[0:1], v[2:3]
|
|
; GFX9-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX10-LABEL: v_maximum_f64__nnan:
|
|
; GFX10: ; %bb.0:
|
|
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX10-NEXT: v_max_f64 v[0:1], v[0:1], v[2:3]
|
|
; GFX10-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX11-LABEL: v_maximum_f64__nnan:
|
|
; GFX11: ; %bb.0:
|
|
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX11-NEXT: v_max_f64 v[0:1], v[0:1], v[2:3]
|
|
; GFX11-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX12-LABEL: v_maximum_f64__nnan:
|
|
; GFX12: ; %bb.0:
|
|
; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
|
|
; GFX12-NEXT: s_wait_expcnt 0x0
|
|
; GFX12-NEXT: s_wait_samplecnt 0x0
|
|
; GFX12-NEXT: s_wait_bvhcnt 0x0
|
|
; GFX12-NEXT: s_wait_kmcnt 0x0
|
|
; GFX12-NEXT: v_maximum_f64 v[0:1], v[0:1], v[2:3]
|
|
; GFX12-NEXT: s_setpc_b64 s[30:31]
|
|
%op = call nnan double @llvm.maximum.f64(double %src0, double %src1)
|
|
ret double %op
|
|
}
|
|
|
|
define double @v_maximum_f64__nsz(double %src0, double %src1) {
|
|
; GFX7-LABEL: v_maximum_f64__nsz:
|
|
; GFX7: ; %bb.0:
|
|
; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX7-NEXT: v_max_f64 v[4:5], v[0:1], v[2:3]
|
|
; GFX7-NEXT: v_cmp_u_f64_e32 vcc, v[0:1], v[2:3]
|
|
; GFX7-NEXT: v_mov_b32_e32 v1, 0x7ff80000
|
|
; GFX7-NEXT: v_cndmask_b32_e64 v0, v4, 0, vcc
|
|
; GFX7-NEXT: v_cndmask_b32_e32 v1, v5, v1, vcc
|
|
; GFX7-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX8-LABEL: v_maximum_f64__nsz:
|
|
; GFX8: ; %bb.0:
|
|
; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX8-NEXT: v_max_f64 v[4:5], v[0:1], v[2:3]
|
|
; GFX8-NEXT: v_cmp_u_f64_e32 vcc, v[0:1], v[2:3]
|
|
; GFX8-NEXT: v_mov_b32_e32 v1, 0x7ff80000
|
|
; GFX8-NEXT: v_cndmask_b32_e64 v0, v4, 0, vcc
|
|
; GFX8-NEXT: v_cndmask_b32_e32 v1, v5, v1, vcc
|
|
; GFX8-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX900-LABEL: v_maximum_f64__nsz:
|
|
; GFX900: ; %bb.0:
|
|
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX900-NEXT: v_max_f64 v[4:5], v[0:1], v[2:3]
|
|
; GFX900-NEXT: v_cmp_u_f64_e32 vcc, v[0:1], v[2:3]
|
|
; GFX900-NEXT: v_mov_b32_e32 v1, 0x7ff80000
|
|
; GFX900-NEXT: v_cndmask_b32_e64 v0, v4, 0, vcc
|
|
; GFX900-NEXT: v_cndmask_b32_e32 v1, v5, v1, vcc
|
|
; GFX900-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX950-LABEL: v_maximum_f64__nsz:
|
|
; GFX950: ; %bb.0:
|
|
; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX950-NEXT: v_max_f64 v[4:5], v[0:1], v[2:3]
|
|
; GFX950-NEXT: v_cmp_u_f64_e32 vcc, v[0:1], v[2:3]
|
|
; GFX950-NEXT: v_mov_b32_e32 v1, 0x7ff80000
|
|
; GFX950-NEXT: s_nop 0
|
|
; GFX950-NEXT: v_cndmask_b32_e64 v0, v4, 0, vcc
|
|
; GFX950-NEXT: v_cndmask_b32_e32 v1, v5, v1, vcc
|
|
; GFX950-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX10-LABEL: v_maximum_f64__nsz:
|
|
; GFX10: ; %bb.0:
|
|
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX10-NEXT: v_max_f64 v[4:5], v[0:1], v[2:3]
|
|
; GFX10-NEXT: v_cmp_u_f64_e32 vcc_lo, v[0:1], v[2:3]
|
|
; GFX10-NEXT: v_cndmask_b32_e64 v0, v4, 0, vcc_lo
|
|
; GFX10-NEXT: v_cndmask_b32_e64 v1, v5, 0x7ff80000, vcc_lo
|
|
; GFX10-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX11-LABEL: v_maximum_f64__nsz:
|
|
; GFX11: ; %bb.0:
|
|
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX11-NEXT: v_max_f64 v[4:5], v[0:1], v[2:3]
|
|
; GFX11-NEXT: v_cmp_u_f64_e32 vcc_lo, v[0:1], v[2:3]
|
|
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_3)
|
|
; GFX11-NEXT: v_cndmask_b32_e64 v0, v4, 0, vcc_lo
|
|
; GFX11-NEXT: v_cndmask_b32_e64 v1, v5, 0x7ff80000, vcc_lo
|
|
; GFX11-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX12-LABEL: v_maximum_f64__nsz:
|
|
; GFX12: ; %bb.0:
|
|
; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
|
|
; GFX12-NEXT: s_wait_expcnt 0x0
|
|
; GFX12-NEXT: s_wait_samplecnt 0x0
|
|
; GFX12-NEXT: s_wait_bvhcnt 0x0
|
|
; GFX12-NEXT: s_wait_kmcnt 0x0
|
|
; GFX12-NEXT: v_maximum_f64 v[0:1], v[0:1], v[2:3]
|
|
; GFX12-NEXT: s_setpc_b64 s[30:31]
|
|
%op = call nsz double @llvm.maximum.f64(double %src0, double %src1)
|
|
ret double %op
|
|
}
|
|
|
|
define double @v_maximum_f64__nnan_nsz(double %src0, double %src1) {
|
|
; GFX7-LABEL: v_maximum_f64__nnan_nsz:
|
|
; GFX7: ; %bb.0:
|
|
; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX7-NEXT: v_max_f64 v[0:1], v[0:1], v[2:3]
|
|
; GFX7-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX8-LABEL: v_maximum_f64__nnan_nsz:
|
|
; GFX8: ; %bb.0:
|
|
; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX8-NEXT: v_max_f64 v[0:1], v[0:1], v[2:3]
|
|
; GFX8-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX9-LABEL: v_maximum_f64__nnan_nsz:
|
|
; GFX9: ; %bb.0:
|
|
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX9-NEXT: v_max_f64 v[0:1], v[0:1], v[2:3]
|
|
; GFX9-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX10-LABEL: v_maximum_f64__nnan_nsz:
|
|
; GFX10: ; %bb.0:
|
|
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX10-NEXT: v_max_f64 v[0:1], v[0:1], v[2:3]
|
|
; GFX10-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX11-LABEL: v_maximum_f64__nnan_nsz:
|
|
; GFX11: ; %bb.0:
|
|
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX11-NEXT: v_max_f64 v[0:1], v[0:1], v[2:3]
|
|
; GFX11-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX12-LABEL: v_maximum_f64__nnan_nsz:
|
|
; GFX12: ; %bb.0:
|
|
; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
|
|
; GFX12-NEXT: s_wait_expcnt 0x0
|
|
; GFX12-NEXT: s_wait_samplecnt 0x0
|
|
; GFX12-NEXT: s_wait_bvhcnt 0x0
|
|
; GFX12-NEXT: s_wait_kmcnt 0x0
|
|
; GFX12-NEXT: v_maximum_f64 v[0:1], v[0:1], v[2:3]
|
|
; GFX12-NEXT: s_setpc_b64 s[30:31]
|
|
%op = call nnan nsz double @llvm.maximum.f64(double %src0, double %src1)
|
|
ret double %op
|
|
}
|
|
|
|
define double @v_maximum_f64__nnan_src0(double %arg0, double %src1) {
|
|
; GFX7-LABEL: v_maximum_f64__nnan_src0:
|
|
; GFX7: ; %bb.0:
|
|
; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX7-NEXT: v_add_f64 v[0:1], v[0:1], 1.0
|
|
; GFX7-NEXT: v_max_f64 v[4:5], v[0:1], v[2:3]
|
|
; GFX7-NEXT: v_cmp_u_f64_e32 vcc, v[0:1], v[2:3]
|
|
; GFX7-NEXT: v_mov_b32_e32 v1, 0x7ff80000
|
|
; GFX7-NEXT: v_cndmask_b32_e64 v0, v4, 0, vcc
|
|
; GFX7-NEXT: v_cndmask_b32_e32 v1, v5, v1, vcc
|
|
; GFX7-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX8-LABEL: v_maximum_f64__nnan_src0:
|
|
; GFX8: ; %bb.0:
|
|
; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX8-NEXT: v_add_f64 v[0:1], v[0:1], 1.0
|
|
; GFX8-NEXT: v_max_f64 v[4:5], v[0:1], v[2:3]
|
|
; GFX8-NEXT: v_cmp_u_f64_e32 vcc, v[0:1], v[2:3]
|
|
; GFX8-NEXT: v_mov_b32_e32 v1, 0x7ff80000
|
|
; GFX8-NEXT: v_cndmask_b32_e64 v0, v4, 0, vcc
|
|
; GFX8-NEXT: v_cndmask_b32_e32 v1, v5, v1, vcc
|
|
; GFX8-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX900-LABEL: v_maximum_f64__nnan_src0:
|
|
; GFX900: ; %bb.0:
|
|
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX900-NEXT: v_add_f64 v[0:1], v[0:1], 1.0
|
|
; GFX900-NEXT: v_max_f64 v[4:5], v[0:1], v[2:3]
|
|
; GFX900-NEXT: v_cmp_u_f64_e32 vcc, v[0:1], v[2:3]
|
|
; GFX900-NEXT: v_mov_b32_e32 v1, 0x7ff80000
|
|
; GFX900-NEXT: v_cndmask_b32_e64 v0, v4, 0, vcc
|
|
; GFX900-NEXT: v_cndmask_b32_e32 v1, v5, v1, vcc
|
|
; GFX900-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX950-LABEL: v_maximum_f64__nnan_src0:
|
|
; GFX950: ; %bb.0:
|
|
; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX950-NEXT: v_add_f64 v[0:1], v[0:1], 1.0
|
|
; GFX950-NEXT: v_max_f64 v[4:5], v[0:1], v[2:3]
|
|
; GFX950-NEXT: v_cmp_u_f64_e32 vcc, v[0:1], v[2:3]
|
|
; GFX950-NEXT: v_mov_b32_e32 v1, 0x7ff80000
|
|
; GFX950-NEXT: s_nop 0
|
|
; GFX950-NEXT: v_cndmask_b32_e64 v0, v4, 0, vcc
|
|
; GFX950-NEXT: v_cndmask_b32_e32 v1, v5, v1, vcc
|
|
; GFX950-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX10-LABEL: v_maximum_f64__nnan_src0:
|
|
; GFX10: ; %bb.0:
|
|
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX10-NEXT: v_add_f64 v[0:1], v[0:1], 1.0
|
|
; GFX10-NEXT: v_max_f64 v[4:5], v[0:1], v[2:3]
|
|
; GFX10-NEXT: v_cmp_u_f64_e32 vcc_lo, v[0:1], v[2:3]
|
|
; GFX10-NEXT: v_cndmask_b32_e64 v0, v4, 0, vcc_lo
|
|
; GFX10-NEXT: v_cndmask_b32_e64 v1, v5, 0x7ff80000, vcc_lo
|
|
; GFX10-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX11-LABEL: v_maximum_f64__nnan_src0:
|
|
; GFX11: ; %bb.0:
|
|
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX11-NEXT: v_add_f64 v[0:1], v[0:1], 1.0
|
|
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2)
|
|
; GFX11-NEXT: v_max_f64 v[4:5], v[0:1], v[2:3]
|
|
; GFX11-NEXT: v_cmp_u_f64_e32 vcc_lo, v[0:1], v[2:3]
|
|
; GFX11-NEXT: v_cndmask_b32_e64 v0, v4, 0, vcc_lo
|
|
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_3)
|
|
; GFX11-NEXT: v_cndmask_b32_e64 v1, v5, 0x7ff80000, vcc_lo
|
|
; GFX11-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX12-LABEL: v_maximum_f64__nnan_src0:
|
|
; GFX12: ; %bb.0:
|
|
; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
|
|
; GFX12-NEXT: s_wait_expcnt 0x0
|
|
; GFX12-NEXT: s_wait_samplecnt 0x0
|
|
; GFX12-NEXT: s_wait_bvhcnt 0x0
|
|
; GFX12-NEXT: s_wait_kmcnt 0x0
|
|
; GFX12-NEXT: v_add_f64_e32 v[0:1], 1.0, v[0:1]
|
|
; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1)
|
|
; GFX12-NEXT: v_maximum_f64 v[0:1], v[0:1], v[2:3]
|
|
; GFX12-NEXT: s_setpc_b64 s[30:31]
|
|
%src0 = fadd nnan double %arg0, 1.0
|
|
%op = call double @llvm.maximum.f64(double %src0, double %src1)
|
|
ret double %op
|
|
}
|
|
|
|
define double @v_maximum_f64__nnan_src1(double %src0, double %arg1) {
|
|
; GFX7-LABEL: v_maximum_f64__nnan_src1:
|
|
; GFX7: ; %bb.0:
|
|
; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX7-NEXT: v_add_f64 v[2:3], v[2:3], 1.0
|
|
; GFX7-NEXT: v_max_f64 v[4:5], v[0:1], v[2:3]
|
|
; GFX7-NEXT: v_cmp_u_f64_e32 vcc, v[0:1], v[2:3]
|
|
; GFX7-NEXT: v_mov_b32_e32 v1, 0x7ff80000
|
|
; GFX7-NEXT: v_cndmask_b32_e64 v0, v4, 0, vcc
|
|
; GFX7-NEXT: v_cndmask_b32_e32 v1, v5, v1, vcc
|
|
; GFX7-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX8-LABEL: v_maximum_f64__nnan_src1:
|
|
; GFX8: ; %bb.0:
|
|
; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX8-NEXT: v_add_f64 v[2:3], v[2:3], 1.0
|
|
; GFX8-NEXT: v_max_f64 v[4:5], v[0:1], v[2:3]
|
|
; GFX8-NEXT: v_cmp_u_f64_e32 vcc, v[0:1], v[2:3]
|
|
; GFX8-NEXT: v_mov_b32_e32 v1, 0x7ff80000
|
|
; GFX8-NEXT: v_cndmask_b32_e64 v0, v4, 0, vcc
|
|
; GFX8-NEXT: v_cndmask_b32_e32 v1, v5, v1, vcc
|
|
; GFX8-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX900-LABEL: v_maximum_f64__nnan_src1:
|
|
; GFX900: ; %bb.0:
|
|
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX900-NEXT: v_add_f64 v[2:3], v[2:3], 1.0
|
|
; GFX900-NEXT: v_max_f64 v[4:5], v[0:1], v[2:3]
|
|
; GFX900-NEXT: v_cmp_u_f64_e32 vcc, v[0:1], v[2:3]
|
|
; GFX900-NEXT: v_mov_b32_e32 v1, 0x7ff80000
|
|
; GFX900-NEXT: v_cndmask_b32_e64 v0, v4, 0, vcc
|
|
; GFX900-NEXT: v_cndmask_b32_e32 v1, v5, v1, vcc
|
|
; GFX900-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX950-LABEL: v_maximum_f64__nnan_src1:
|
|
; GFX950: ; %bb.0:
|
|
; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX950-NEXT: v_add_f64 v[2:3], v[2:3], 1.0
|
|
; GFX950-NEXT: v_max_f64 v[4:5], v[0:1], v[2:3]
|
|
; GFX950-NEXT: v_cmp_u_f64_e32 vcc, v[0:1], v[2:3]
|
|
; GFX950-NEXT: v_mov_b32_e32 v1, 0x7ff80000
|
|
; GFX950-NEXT: s_nop 0
|
|
; GFX950-NEXT: v_cndmask_b32_e64 v0, v4, 0, vcc
|
|
; GFX950-NEXT: v_cndmask_b32_e32 v1, v5, v1, vcc
|
|
; GFX950-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX10-LABEL: v_maximum_f64__nnan_src1:
|
|
; GFX10: ; %bb.0:
|
|
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX10-NEXT: v_add_f64 v[2:3], v[2:3], 1.0
|
|
; GFX10-NEXT: v_max_f64 v[4:5], v[0:1], v[2:3]
|
|
; GFX10-NEXT: v_cmp_u_f64_e32 vcc_lo, v[0:1], v[2:3]
|
|
; GFX10-NEXT: v_cndmask_b32_e64 v0, v4, 0, vcc_lo
|
|
; GFX10-NEXT: v_cndmask_b32_e64 v1, v5, 0x7ff80000, vcc_lo
|
|
; GFX10-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX11-LABEL: v_maximum_f64__nnan_src1:
|
|
; GFX11: ; %bb.0:
|
|
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX11-NEXT: v_add_f64 v[2:3], v[2:3], 1.0
|
|
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2)
|
|
; GFX11-NEXT: v_max_f64 v[4:5], v[0:1], v[2:3]
|
|
; GFX11-NEXT: v_cmp_u_f64_e32 vcc_lo, v[0:1], v[2:3]
|
|
; GFX11-NEXT: v_cndmask_b32_e64 v0, v4, 0, vcc_lo
|
|
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_3)
|
|
; GFX11-NEXT: v_cndmask_b32_e64 v1, v5, 0x7ff80000, vcc_lo
|
|
; GFX11-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX12-LABEL: v_maximum_f64__nnan_src1:
|
|
; GFX12: ; %bb.0:
|
|
; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
|
|
; GFX12-NEXT: s_wait_expcnt 0x0
|
|
; GFX12-NEXT: s_wait_samplecnt 0x0
|
|
; GFX12-NEXT: s_wait_bvhcnt 0x0
|
|
; GFX12-NEXT: s_wait_kmcnt 0x0
|
|
; GFX12-NEXT: v_add_f64_e32 v[2:3], 1.0, v[2:3]
|
|
; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1)
|
|
; GFX12-NEXT: v_maximum_f64 v[0:1], v[0:1], v[2:3]
|
|
; GFX12-NEXT: s_setpc_b64 s[30:31]
|
|
%src1 = fadd nnan double %arg1, 1.0
|
|
%op = call double @llvm.maximum.f64(double %src0, double %src1)
|
|
ret double %op
|
|
}
|
|
|
|
define void @s_maximum_f64(double inreg %src0, double inreg %src1) {
|
|
; GFX7-LABEL: s_maximum_f64:
|
|
; GFX7: ; %bb.0:
|
|
; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX7-NEXT: v_mov_b32_e32 v0, s18
|
|
; GFX7-NEXT: v_mov_b32_e32 v1, s19
|
|
; GFX7-NEXT: v_max_f64 v[2:3], s[16:17], v[0:1]
|
|
; GFX7-NEXT: v_cmp_u_f64_e32 vcc, s[16:17], v[0:1]
|
|
; GFX7-NEXT: v_mov_b32_e32 v4, 0x7ff80000
|
|
; GFX7-NEXT: v_cndmask_b32_e32 v1, v3, v4, vcc
|
|
; GFX7-NEXT: v_cndmask_b32_e64 v0, v2, 0, vcc
|
|
; GFX7-NEXT: ;;#ASMSTART
|
|
; GFX7-NEXT: ; use v[0:1]
|
|
; GFX7-NEXT: ;;#ASMEND
|
|
; GFX7-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX8-LABEL: s_maximum_f64:
|
|
; GFX8: ; %bb.0:
|
|
; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX8-NEXT: v_mov_b32_e32 v0, s18
|
|
; GFX8-NEXT: v_mov_b32_e32 v1, s19
|
|
; GFX8-NEXT: v_max_f64 v[2:3], s[16:17], v[0:1]
|
|
; GFX8-NEXT: v_cmp_u_f64_e32 vcc, s[16:17], v[0:1]
|
|
; GFX8-NEXT: v_mov_b32_e32 v4, 0x7ff80000
|
|
; GFX8-NEXT: v_cndmask_b32_e32 v1, v3, v4, vcc
|
|
; GFX8-NEXT: v_cndmask_b32_e64 v0, v2, 0, vcc
|
|
; GFX8-NEXT: ;;#ASMSTART
|
|
; GFX8-NEXT: ; use v[0:1]
|
|
; GFX8-NEXT: ;;#ASMEND
|
|
; GFX8-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX900-LABEL: s_maximum_f64:
|
|
; GFX900: ; %bb.0:
|
|
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX900-NEXT: v_mov_b32_e32 v0, s18
|
|
; GFX900-NEXT: v_mov_b32_e32 v1, s19
|
|
; GFX900-NEXT: v_max_f64 v[2:3], s[16:17], v[0:1]
|
|
; GFX900-NEXT: v_cmp_u_f64_e32 vcc, s[16:17], v[0:1]
|
|
; GFX900-NEXT: v_mov_b32_e32 v4, 0x7ff80000
|
|
; GFX900-NEXT: v_cndmask_b32_e32 v1, v3, v4, vcc
|
|
; GFX900-NEXT: v_cndmask_b32_e64 v0, v2, 0, vcc
|
|
; GFX900-NEXT: ;;#ASMSTART
|
|
; GFX900-NEXT: ; use v[0:1]
|
|
; GFX900-NEXT: ;;#ASMEND
|
|
; GFX900-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX950-LABEL: s_maximum_f64:
|
|
; GFX950: ; %bb.0:
|
|
; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX950-NEXT: v_mov_b64_e32 v[0:1], s[2:3]
|
|
; GFX950-NEXT: v_max_f64 v[2:3], s[0:1], v[0:1]
|
|
; GFX950-NEXT: v_mov_b32_e32 v4, 0x7ff80000
|
|
; GFX950-NEXT: v_cmp_u_f64_e32 vcc, s[0:1], v[0:1]
|
|
; GFX950-NEXT: s_nop 1
|
|
; GFX950-NEXT: v_cndmask_b32_e32 v1, v3, v4, vcc
|
|
; GFX950-NEXT: v_cndmask_b32_e64 v0, v2, 0, vcc
|
|
; GFX950-NEXT: ;;#ASMSTART
|
|
; GFX950-NEXT: ; use v[0:1]
|
|
; GFX950-NEXT: ;;#ASMEND
|
|
; GFX950-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX10-LABEL: s_maximum_f64:
|
|
; GFX10: ; %bb.0:
|
|
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX10-NEXT: v_max_f64 v[0:1], s[16:17], s[18:19]
|
|
; GFX10-NEXT: v_cmp_u_f64_e64 s4, s[16:17], s[18:19]
|
|
; GFX10-NEXT: v_cndmask_b32_e64 v1, v1, 0x7ff80000, s4
|
|
; GFX10-NEXT: v_cndmask_b32_e64 v0, v0, 0, s4
|
|
; GFX10-NEXT: ;;#ASMSTART
|
|
; GFX10-NEXT: ; use v[0:1]
|
|
; GFX10-NEXT: ;;#ASMEND
|
|
; GFX10-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX11-LABEL: s_maximum_f64:
|
|
; GFX11: ; %bb.0:
|
|
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX11-NEXT: v_max_f64 v[0:1], s[0:1], s[2:3]
|
|
; GFX11-NEXT: v_cmp_u_f64_e64 s0, s[0:1], s[2:3]
|
|
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_3)
|
|
; GFX11-NEXT: v_cndmask_b32_e64 v1, v1, 0x7ff80000, s0
|
|
; GFX11-NEXT: v_cndmask_b32_e64 v0, v0, 0, s0
|
|
; GFX11-NEXT: ;;#ASMSTART
|
|
; GFX11-NEXT: ; use v[0:1]
|
|
; GFX11-NEXT: ;;#ASMEND
|
|
; GFX11-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX12-LABEL: s_maximum_f64:
|
|
; GFX12: ; %bb.0:
|
|
; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
|
|
; GFX12-NEXT: s_wait_expcnt 0x0
|
|
; GFX12-NEXT: s_wait_samplecnt 0x0
|
|
; GFX12-NEXT: s_wait_bvhcnt 0x0
|
|
; GFX12-NEXT: s_wait_kmcnt 0x0
|
|
; GFX12-NEXT: v_maximum_f64 v[0:1], s[0:1], s[2:3]
|
|
; GFX12-NEXT: ;;#ASMSTART
|
|
; GFX12-NEXT: ; use v[0:1]
|
|
; GFX12-NEXT: ;;#ASMEND
|
|
; GFX12-NEXT: s_setpc_b64 s[30:31]
|
|
%op = call double @llvm.maximum.f64(double %src0, double %src1)
|
|
call void asm sideeffect "; use $0", "s"(double %op)
|
|
ret void
|
|
}
|
|
|
|
define <2 x double> @v_maximum_v2f64(<2 x double> %src0, <2 x double> %src1) {
|
|
; GFX7-LABEL: v_maximum_v2f64:
|
|
; GFX7: ; %bb.0:
|
|
; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX7-NEXT: v_max_f64 v[8:9], v[0:1], v[4:5]
|
|
; GFX7-NEXT: v_cmp_u_f64_e32 vcc, v[0:1], v[4:5]
|
|
; GFX7-NEXT: v_max_f64 v[4:5], v[2:3], v[6:7]
|
|
; GFX7-NEXT: v_cmp_u_f64_e64 s[4:5], v[2:3], v[6:7]
|
|
; GFX7-NEXT: v_mov_b32_e32 v3, 0x7ff80000
|
|
; GFX7-NEXT: v_cndmask_b32_e64 v0, v8, 0, vcc
|
|
; GFX7-NEXT: v_cndmask_b32_e32 v1, v9, v3, vcc
|
|
; GFX7-NEXT: v_cndmask_b32_e64 v2, v4, 0, s[4:5]
|
|
; GFX7-NEXT: v_cndmask_b32_e64 v3, v5, v3, s[4:5]
|
|
; GFX7-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX8-LABEL: v_maximum_v2f64:
|
|
; GFX8: ; %bb.0:
|
|
; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX8-NEXT: v_max_f64 v[8:9], v[0:1], v[4:5]
|
|
; GFX8-NEXT: v_cmp_u_f64_e32 vcc, v[0:1], v[4:5]
|
|
; GFX8-NEXT: v_max_f64 v[4:5], v[2:3], v[6:7]
|
|
; GFX8-NEXT: v_cmp_u_f64_e64 s[4:5], v[2:3], v[6:7]
|
|
; GFX8-NEXT: v_mov_b32_e32 v3, 0x7ff80000
|
|
; GFX8-NEXT: v_cndmask_b32_e64 v0, v8, 0, vcc
|
|
; GFX8-NEXT: v_cndmask_b32_e32 v1, v9, v3, vcc
|
|
; GFX8-NEXT: v_cndmask_b32_e64 v2, v4, 0, s[4:5]
|
|
; GFX8-NEXT: v_cndmask_b32_e64 v3, v5, v3, s[4:5]
|
|
; GFX8-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX900-LABEL: v_maximum_v2f64:
|
|
; GFX900: ; %bb.0:
|
|
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX900-NEXT: v_max_f64 v[8:9], v[0:1], v[4:5]
|
|
; GFX900-NEXT: v_cmp_u_f64_e32 vcc, v[0:1], v[4:5]
|
|
; GFX900-NEXT: v_max_f64 v[4:5], v[2:3], v[6:7]
|
|
; GFX900-NEXT: v_cmp_u_f64_e64 s[4:5], v[2:3], v[6:7]
|
|
; GFX900-NEXT: v_mov_b32_e32 v3, 0x7ff80000
|
|
; GFX900-NEXT: v_cndmask_b32_e64 v0, v8, 0, vcc
|
|
; GFX900-NEXT: v_cndmask_b32_e32 v1, v9, v3, vcc
|
|
; GFX900-NEXT: v_cndmask_b32_e64 v2, v4, 0, s[4:5]
|
|
; GFX900-NEXT: v_cndmask_b32_e64 v3, v5, v3, s[4:5]
|
|
; GFX900-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX950-LABEL: v_maximum_v2f64:
|
|
; GFX950: ; %bb.0:
|
|
; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX950-NEXT: v_max_f64 v[8:9], v[0:1], v[4:5]
|
|
; GFX950-NEXT: v_cmp_u_f64_e32 vcc, v[0:1], v[4:5]
|
|
; GFX950-NEXT: v_max_f64 v[4:5], v[2:3], v[6:7]
|
|
; GFX950-NEXT: s_nop 0
|
|
; GFX950-NEXT: v_cndmask_b32_e64 v0, v8, 0, vcc
|
|
; GFX950-NEXT: v_mov_b32_e32 v8, 0x7ff80000
|
|
; GFX950-NEXT: v_cndmask_b32_e32 v1, v9, v8, vcc
|
|
; GFX950-NEXT: v_cmp_u_f64_e32 vcc, v[2:3], v[6:7]
|
|
; GFX950-NEXT: s_nop 1
|
|
; GFX950-NEXT: v_cndmask_b32_e64 v2, v4, 0, vcc
|
|
; GFX950-NEXT: v_cndmask_b32_e32 v3, v5, v8, vcc
|
|
; GFX950-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX10-LABEL: v_maximum_v2f64:
|
|
; GFX10: ; %bb.0:
|
|
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX10-NEXT: v_max_f64 v[8:9], v[0:1], v[4:5]
|
|
; GFX10-NEXT: v_cmp_u_f64_e32 vcc_lo, v[0:1], v[4:5]
|
|
; GFX10-NEXT: v_max_f64 v[4:5], v[2:3], v[6:7]
|
|
; GFX10-NEXT: v_cmp_u_f64_e64 s4, v[2:3], v[6:7]
|
|
; GFX10-NEXT: v_cndmask_b32_e64 v0, v8, 0, vcc_lo
|
|
; GFX10-NEXT: v_cndmask_b32_e64 v1, v9, 0x7ff80000, vcc_lo
|
|
; GFX10-NEXT: v_cndmask_b32_e64 v2, v4, 0, s4
|
|
; GFX10-NEXT: v_cndmask_b32_e64 v3, v5, 0x7ff80000, s4
|
|
; GFX10-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX11-LABEL: v_maximum_v2f64:
|
|
; GFX11: ; %bb.0:
|
|
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX11-NEXT: v_max_f64 v[8:9], v[0:1], v[4:5]
|
|
; GFX11-NEXT: v_cmp_u_f64_e32 vcc_lo, v[0:1], v[4:5]
|
|
; GFX11-NEXT: v_max_f64 v[4:5], v[2:3], v[6:7]
|
|
; GFX11-NEXT: v_cmp_u_f64_e64 s0, v[2:3], v[6:7]
|
|
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(SKIP_1) | instid1(VALU_DEP_3)
|
|
; GFX11-NEXT: v_cndmask_b32_e64 v0, v8, 0, vcc_lo
|
|
; GFX11-NEXT: v_cndmask_b32_e64 v1, v9, 0x7ff80000, vcc_lo
|
|
; GFX11-NEXT: v_cndmask_b32_e64 v2, v4, 0, s0
|
|
; GFX11-NEXT: v_cndmask_b32_e64 v3, v5, 0x7ff80000, s0
|
|
; GFX11-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX12-LABEL: v_maximum_v2f64:
|
|
; GFX12: ; %bb.0:
|
|
; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
|
|
; GFX12-NEXT: s_wait_expcnt 0x0
|
|
; GFX12-NEXT: s_wait_samplecnt 0x0
|
|
; GFX12-NEXT: s_wait_bvhcnt 0x0
|
|
; GFX12-NEXT: s_wait_kmcnt 0x0
|
|
; GFX12-NEXT: v_maximum_f64 v[0:1], v[0:1], v[4:5]
|
|
; GFX12-NEXT: v_maximum_f64 v[2:3], v[2:3], v[6:7]
|
|
; GFX12-NEXT: s_setpc_b64 s[30:31]
|
|
%op = call <2 x double> @llvm.maximum.v2f64(<2 x double> %src0, <2 x double> %src1)
|
|
ret <2 x double> %op
|
|
}
|
|
|
|
define <2 x double> @v_maximum_v2f64__nnan(<2 x double> %src0, <2 x double> %src1) {
|
|
; GFX7-LABEL: v_maximum_v2f64__nnan:
|
|
; GFX7: ; %bb.0:
|
|
; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX7-NEXT: v_max_f64 v[0:1], v[0:1], v[4:5]
|
|
; GFX7-NEXT: v_max_f64 v[2:3], v[2:3], v[6:7]
|
|
; GFX7-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX8-LABEL: v_maximum_v2f64__nnan:
|
|
; GFX8: ; %bb.0:
|
|
; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX8-NEXT: v_max_f64 v[0:1], v[0:1], v[4:5]
|
|
; GFX8-NEXT: v_max_f64 v[2:3], v[2:3], v[6:7]
|
|
; GFX8-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX9-LABEL: v_maximum_v2f64__nnan:
|
|
; GFX9: ; %bb.0:
|
|
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX9-NEXT: v_max_f64 v[0:1], v[0:1], v[4:5]
|
|
; GFX9-NEXT: v_max_f64 v[2:3], v[2:3], v[6:7]
|
|
; GFX9-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX10-LABEL: v_maximum_v2f64__nnan:
|
|
; GFX10: ; %bb.0:
|
|
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX10-NEXT: v_max_f64 v[0:1], v[0:1], v[4:5]
|
|
; GFX10-NEXT: v_max_f64 v[2:3], v[2:3], v[6:7]
|
|
; GFX10-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX11-LABEL: v_maximum_v2f64__nnan:
|
|
; GFX11: ; %bb.0:
|
|
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX11-NEXT: v_max_f64 v[0:1], v[0:1], v[4:5]
|
|
; GFX11-NEXT: v_max_f64 v[2:3], v[2:3], v[6:7]
|
|
; GFX11-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX12-LABEL: v_maximum_v2f64__nnan:
|
|
; GFX12: ; %bb.0:
|
|
; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
|
|
; GFX12-NEXT: s_wait_expcnt 0x0
|
|
; GFX12-NEXT: s_wait_samplecnt 0x0
|
|
; GFX12-NEXT: s_wait_bvhcnt 0x0
|
|
; GFX12-NEXT: s_wait_kmcnt 0x0
|
|
; GFX12-NEXT: v_maximum_f64 v[0:1], v[0:1], v[4:5]
|
|
; GFX12-NEXT: v_maximum_f64 v[2:3], v[2:3], v[6:7]
|
|
; GFX12-NEXT: s_setpc_b64 s[30:31]
|
|
%op = call nnan <2 x double> @llvm.maximum.v2f64(<2 x double> %src0, <2 x double> %src1)
|
|
ret <2 x double> %op
|
|
}
|
|
|
|
define <2 x double> @v_maximum_v2f64__nsz(<2 x double> %src0, <2 x double> %src1) {
|
|
; GFX7-LABEL: v_maximum_v2f64__nsz:
|
|
; GFX7: ; %bb.0:
|
|
; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX7-NEXT: v_max_f64 v[8:9], v[0:1], v[4:5]
|
|
; GFX7-NEXT: v_cmp_u_f64_e32 vcc, v[0:1], v[4:5]
|
|
; GFX7-NEXT: v_max_f64 v[4:5], v[2:3], v[6:7]
|
|
; GFX7-NEXT: v_cmp_u_f64_e64 s[4:5], v[2:3], v[6:7]
|
|
; GFX7-NEXT: v_mov_b32_e32 v3, 0x7ff80000
|
|
; GFX7-NEXT: v_cndmask_b32_e64 v0, v8, 0, vcc
|
|
; GFX7-NEXT: v_cndmask_b32_e32 v1, v9, v3, vcc
|
|
; GFX7-NEXT: v_cndmask_b32_e64 v2, v4, 0, s[4:5]
|
|
; GFX7-NEXT: v_cndmask_b32_e64 v3, v5, v3, s[4:5]
|
|
; GFX7-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX8-LABEL: v_maximum_v2f64__nsz:
|
|
; GFX8: ; %bb.0:
|
|
; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX8-NEXT: v_max_f64 v[8:9], v[0:1], v[4:5]
|
|
; GFX8-NEXT: v_cmp_u_f64_e32 vcc, v[0:1], v[4:5]
|
|
; GFX8-NEXT: v_max_f64 v[4:5], v[2:3], v[6:7]
|
|
; GFX8-NEXT: v_cmp_u_f64_e64 s[4:5], v[2:3], v[6:7]
|
|
; GFX8-NEXT: v_mov_b32_e32 v3, 0x7ff80000
|
|
; GFX8-NEXT: v_cndmask_b32_e64 v0, v8, 0, vcc
|
|
; GFX8-NEXT: v_cndmask_b32_e32 v1, v9, v3, vcc
|
|
; GFX8-NEXT: v_cndmask_b32_e64 v2, v4, 0, s[4:5]
|
|
; GFX8-NEXT: v_cndmask_b32_e64 v3, v5, v3, s[4:5]
|
|
; GFX8-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX900-LABEL: v_maximum_v2f64__nsz:
|
|
; GFX900: ; %bb.0:
|
|
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX900-NEXT: v_max_f64 v[8:9], v[0:1], v[4:5]
|
|
; GFX900-NEXT: v_cmp_u_f64_e32 vcc, v[0:1], v[4:5]
|
|
; GFX900-NEXT: v_max_f64 v[4:5], v[2:3], v[6:7]
|
|
; GFX900-NEXT: v_cmp_u_f64_e64 s[4:5], v[2:3], v[6:7]
|
|
; GFX900-NEXT: v_mov_b32_e32 v3, 0x7ff80000
|
|
; GFX900-NEXT: v_cndmask_b32_e64 v0, v8, 0, vcc
|
|
; GFX900-NEXT: v_cndmask_b32_e32 v1, v9, v3, vcc
|
|
; GFX900-NEXT: v_cndmask_b32_e64 v2, v4, 0, s[4:5]
|
|
; GFX900-NEXT: v_cndmask_b32_e64 v3, v5, v3, s[4:5]
|
|
; GFX900-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX950-LABEL: v_maximum_v2f64__nsz:
|
|
; GFX950: ; %bb.0:
|
|
; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX950-NEXT: v_max_f64 v[8:9], v[0:1], v[4:5]
|
|
; GFX950-NEXT: v_cmp_u_f64_e32 vcc, v[0:1], v[4:5]
|
|
; GFX950-NEXT: v_max_f64 v[4:5], v[2:3], v[6:7]
|
|
; GFX950-NEXT: s_nop 0
|
|
; GFX950-NEXT: v_cndmask_b32_e64 v0, v8, 0, vcc
|
|
; GFX950-NEXT: v_mov_b32_e32 v8, 0x7ff80000
|
|
; GFX950-NEXT: v_cndmask_b32_e32 v1, v9, v8, vcc
|
|
; GFX950-NEXT: v_cmp_u_f64_e32 vcc, v[2:3], v[6:7]
|
|
; GFX950-NEXT: s_nop 1
|
|
; GFX950-NEXT: v_cndmask_b32_e64 v2, v4, 0, vcc
|
|
; GFX950-NEXT: v_cndmask_b32_e32 v3, v5, v8, vcc
|
|
; GFX950-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX10-LABEL: v_maximum_v2f64__nsz:
|
|
; GFX10: ; %bb.0:
|
|
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX10-NEXT: v_max_f64 v[8:9], v[0:1], v[4:5]
|
|
; GFX10-NEXT: v_cmp_u_f64_e32 vcc_lo, v[0:1], v[4:5]
|
|
; GFX10-NEXT: v_max_f64 v[4:5], v[2:3], v[6:7]
|
|
; GFX10-NEXT: v_cmp_u_f64_e64 s4, v[2:3], v[6:7]
|
|
; GFX10-NEXT: v_cndmask_b32_e64 v0, v8, 0, vcc_lo
|
|
; GFX10-NEXT: v_cndmask_b32_e64 v1, v9, 0x7ff80000, vcc_lo
|
|
; GFX10-NEXT: v_cndmask_b32_e64 v2, v4, 0, s4
|
|
; GFX10-NEXT: v_cndmask_b32_e64 v3, v5, 0x7ff80000, s4
|
|
; GFX10-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX11-LABEL: v_maximum_v2f64__nsz:
|
|
; GFX11: ; %bb.0:
|
|
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX11-NEXT: v_max_f64 v[8:9], v[0:1], v[4:5]
|
|
; GFX11-NEXT: v_cmp_u_f64_e32 vcc_lo, v[0:1], v[4:5]
|
|
; GFX11-NEXT: v_max_f64 v[4:5], v[2:3], v[6:7]
|
|
; GFX11-NEXT: v_cmp_u_f64_e64 s0, v[2:3], v[6:7]
|
|
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(SKIP_1) | instid1(VALU_DEP_3)
|
|
; GFX11-NEXT: v_cndmask_b32_e64 v0, v8, 0, vcc_lo
|
|
; GFX11-NEXT: v_cndmask_b32_e64 v1, v9, 0x7ff80000, vcc_lo
|
|
; GFX11-NEXT: v_cndmask_b32_e64 v2, v4, 0, s0
|
|
; GFX11-NEXT: v_cndmask_b32_e64 v3, v5, 0x7ff80000, s0
|
|
; GFX11-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX12-LABEL: v_maximum_v2f64__nsz:
|
|
; GFX12: ; %bb.0:
|
|
; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
|
|
; GFX12-NEXT: s_wait_expcnt 0x0
|
|
; GFX12-NEXT: s_wait_samplecnt 0x0
|
|
; GFX12-NEXT: s_wait_bvhcnt 0x0
|
|
; GFX12-NEXT: s_wait_kmcnt 0x0
|
|
; GFX12-NEXT: v_maximum_f64 v[0:1], v[0:1], v[4:5]
|
|
; GFX12-NEXT: v_maximum_f64 v[2:3], v[2:3], v[6:7]
|
|
; GFX12-NEXT: s_setpc_b64 s[30:31]
|
|
%op = call nsz <2 x double> @llvm.maximum.v2f64(<2 x double> %src0, <2 x double> %src1)
|
|
ret <2 x double> %op
|
|
}
|
|
|
|
define <2 x double> @v_maximum_v2f64__nnan_nsz(<2 x double> %src0, <2 x double> %src1) {
|
|
; GFX7-LABEL: v_maximum_v2f64__nnan_nsz:
|
|
; GFX7: ; %bb.0:
|
|
; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX7-NEXT: v_max_f64 v[0:1], v[0:1], v[4:5]
|
|
; GFX7-NEXT: v_max_f64 v[2:3], v[2:3], v[6:7]
|
|
; GFX7-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX8-LABEL: v_maximum_v2f64__nnan_nsz:
|
|
; GFX8: ; %bb.0:
|
|
; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX8-NEXT: v_max_f64 v[0:1], v[0:1], v[4:5]
|
|
; GFX8-NEXT: v_max_f64 v[2:3], v[2:3], v[6:7]
|
|
; GFX8-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX9-LABEL: v_maximum_v2f64__nnan_nsz:
|
|
; GFX9: ; %bb.0:
|
|
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX9-NEXT: v_max_f64 v[0:1], v[0:1], v[4:5]
|
|
; GFX9-NEXT: v_max_f64 v[2:3], v[2:3], v[6:7]
|
|
; GFX9-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX10-LABEL: v_maximum_v2f64__nnan_nsz:
|
|
; GFX10: ; %bb.0:
|
|
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX10-NEXT: v_max_f64 v[0:1], v[0:1], v[4:5]
|
|
; GFX10-NEXT: v_max_f64 v[2:3], v[2:3], v[6:7]
|
|
; GFX10-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX11-LABEL: v_maximum_v2f64__nnan_nsz:
|
|
; GFX11: ; %bb.0:
|
|
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX11-NEXT: v_max_f64 v[0:1], v[0:1], v[4:5]
|
|
; GFX11-NEXT: v_max_f64 v[2:3], v[2:3], v[6:7]
|
|
; GFX11-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX12-LABEL: v_maximum_v2f64__nnan_nsz:
|
|
; GFX12: ; %bb.0:
|
|
; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
|
|
; GFX12-NEXT: s_wait_expcnt 0x0
|
|
; GFX12-NEXT: s_wait_samplecnt 0x0
|
|
; GFX12-NEXT: s_wait_bvhcnt 0x0
|
|
; GFX12-NEXT: s_wait_kmcnt 0x0
|
|
; GFX12-NEXT: v_maximum_f64 v[0:1], v[0:1], v[4:5]
|
|
; GFX12-NEXT: v_maximum_f64 v[2:3], v[2:3], v[6:7]
|
|
; GFX12-NEXT: s_setpc_b64 s[30:31]
|
|
%op = call nnan nsz <2 x double> @llvm.maximum.v2f64(<2 x double> %src0, <2 x double> %src1)
|
|
ret <2 x double> %op
|
|
}
|
|
|
|
define void @s_maximum_v2f64(<2 x double> inreg %src0, <2 x double> inreg %src1) {
|
|
; GFX7-LABEL: s_maximum_v2f64:
|
|
; GFX7: ; %bb.0:
|
|
; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX7-NEXT: v_mov_b32_e32 v0, s22
|
|
; GFX7-NEXT: v_mov_b32_e32 v1, s23
|
|
; GFX7-NEXT: v_max_f64 v[2:3], s[18:19], v[0:1]
|
|
; GFX7-NEXT: v_cmp_u_f64_e32 vcc, s[18:19], v[0:1]
|
|
; GFX7-NEXT: v_mov_b32_e32 v0, s20
|
|
; GFX7-NEXT: v_mov_b32_e32 v1, s21
|
|
; GFX7-NEXT: v_max_f64 v[4:5], s[16:17], v[0:1]
|
|
; GFX7-NEXT: v_cmp_u_f64_e64 s[4:5], s[16:17], v[0:1]
|
|
; GFX7-NEXT: v_mov_b32_e32 v6, 0x7ff80000
|
|
; GFX7-NEXT: v_cndmask_b32_e32 v3, v3, v6, vcc
|
|
; GFX7-NEXT: v_cndmask_b32_e64 v2, v2, 0, vcc
|
|
; GFX7-NEXT: v_cndmask_b32_e64 v1, v5, v6, s[4:5]
|
|
; GFX7-NEXT: v_cndmask_b32_e64 v0, v4, 0, s[4:5]
|
|
; GFX7-NEXT: ;;#ASMSTART
|
|
; GFX7-NEXT: ; use v[0:3]
|
|
; GFX7-NEXT: ;;#ASMEND
|
|
; GFX7-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX8-LABEL: s_maximum_v2f64:
|
|
; GFX8: ; %bb.0:
|
|
; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX8-NEXT: v_mov_b32_e32 v0, s22
|
|
; GFX8-NEXT: v_mov_b32_e32 v1, s23
|
|
; GFX8-NEXT: v_max_f64 v[2:3], s[18:19], v[0:1]
|
|
; GFX8-NEXT: v_cmp_u_f64_e32 vcc, s[18:19], v[0:1]
|
|
; GFX8-NEXT: v_mov_b32_e32 v0, s20
|
|
; GFX8-NEXT: v_mov_b32_e32 v1, s21
|
|
; GFX8-NEXT: v_max_f64 v[4:5], s[16:17], v[0:1]
|
|
; GFX8-NEXT: v_cmp_u_f64_e64 s[4:5], s[16:17], v[0:1]
|
|
; GFX8-NEXT: v_mov_b32_e32 v6, 0x7ff80000
|
|
; GFX8-NEXT: v_cndmask_b32_e32 v3, v3, v6, vcc
|
|
; GFX8-NEXT: v_cndmask_b32_e64 v2, v2, 0, vcc
|
|
; GFX8-NEXT: v_cndmask_b32_e64 v1, v5, v6, s[4:5]
|
|
; GFX8-NEXT: v_cndmask_b32_e64 v0, v4, 0, s[4:5]
|
|
; GFX8-NEXT: ;;#ASMSTART
|
|
; GFX8-NEXT: ; use v[0:3]
|
|
; GFX8-NEXT: ;;#ASMEND
|
|
; GFX8-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX900-LABEL: s_maximum_v2f64:
|
|
; GFX900: ; %bb.0:
|
|
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX900-NEXT: v_mov_b32_e32 v0, s22
|
|
; GFX900-NEXT: v_mov_b32_e32 v1, s23
|
|
; GFX900-NEXT: v_max_f64 v[2:3], s[18:19], v[0:1]
|
|
; GFX900-NEXT: v_cmp_u_f64_e32 vcc, s[18:19], v[0:1]
|
|
; GFX900-NEXT: v_mov_b32_e32 v0, s20
|
|
; GFX900-NEXT: v_mov_b32_e32 v1, s21
|
|
; GFX900-NEXT: v_max_f64 v[4:5], s[16:17], v[0:1]
|
|
; GFX900-NEXT: v_cmp_u_f64_e64 s[4:5], s[16:17], v[0:1]
|
|
; GFX900-NEXT: v_mov_b32_e32 v6, 0x7ff80000
|
|
; GFX900-NEXT: v_cndmask_b32_e32 v3, v3, v6, vcc
|
|
; GFX900-NEXT: v_cndmask_b32_e64 v2, v2, 0, vcc
|
|
; GFX900-NEXT: v_cndmask_b32_e64 v1, v5, v6, s[4:5]
|
|
; GFX900-NEXT: v_cndmask_b32_e64 v0, v4, 0, s[4:5]
|
|
; GFX900-NEXT: ;;#ASMSTART
|
|
; GFX900-NEXT: ; use v[0:3]
|
|
; GFX900-NEXT: ;;#ASMEND
|
|
; GFX900-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX950-LABEL: s_maximum_v2f64:
|
|
; GFX950: ; %bb.0:
|
|
; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX950-NEXT: v_mov_b64_e32 v[0:1], s[18:19]
|
|
; GFX950-NEXT: v_max_f64 v[2:3], s[2:3], v[0:1]
|
|
; GFX950-NEXT: v_mov_b32_e32 v6, 0x7ff80000
|
|
; GFX950-NEXT: v_cmp_u_f64_e32 vcc, s[2:3], v[0:1]
|
|
; GFX950-NEXT: v_mov_b64_e32 v[0:1], s[16:17]
|
|
; GFX950-NEXT: v_max_f64 v[4:5], s[0:1], v[0:1]
|
|
; GFX950-NEXT: v_cndmask_b32_e32 v3, v3, v6, vcc
|
|
; GFX950-NEXT: v_cndmask_b32_e64 v2, v2, 0, vcc
|
|
; GFX950-NEXT: v_cmp_u_f64_e32 vcc, s[0:1], v[0:1]
|
|
; GFX950-NEXT: s_nop 1
|
|
; GFX950-NEXT: v_cndmask_b32_e32 v1, v5, v6, vcc
|
|
; GFX950-NEXT: v_cndmask_b32_e64 v0, v4, 0, vcc
|
|
; GFX950-NEXT: ;;#ASMSTART
|
|
; GFX950-NEXT: ; use v[0:3]
|
|
; GFX950-NEXT: ;;#ASMEND
|
|
; GFX950-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX10-LABEL: s_maximum_v2f64:
|
|
; GFX10: ; %bb.0:
|
|
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX10-NEXT: v_max_f64 v[0:1], s[18:19], s[22:23]
|
|
; GFX10-NEXT: v_cmp_u_f64_e64 s4, s[18:19], s[22:23]
|
|
; GFX10-NEXT: v_max_f64 v[4:5], s[16:17], s[20:21]
|
|
; GFX10-NEXT: v_cmp_u_f64_e64 s5, s[16:17], s[20:21]
|
|
; GFX10-NEXT: v_cndmask_b32_e64 v3, v1, 0x7ff80000, s4
|
|
; GFX10-NEXT: v_cndmask_b32_e64 v2, v0, 0, s4
|
|
; GFX10-NEXT: v_cndmask_b32_e64 v1, v5, 0x7ff80000, s5
|
|
; GFX10-NEXT: v_cndmask_b32_e64 v0, v4, 0, s5
|
|
; GFX10-NEXT: ;;#ASMSTART
|
|
; GFX10-NEXT: ; use v[0:3]
|
|
; GFX10-NEXT: ;;#ASMEND
|
|
; GFX10-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX11-LABEL: s_maximum_v2f64:
|
|
; GFX11: ; %bb.0:
|
|
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX11-NEXT: v_max_f64 v[0:1], s[2:3], s[18:19]
|
|
; GFX11-NEXT: v_cmp_u_f64_e64 s2, s[2:3], s[18:19]
|
|
; GFX11-NEXT: v_max_f64 v[4:5], s[0:1], s[16:17]
|
|
; GFX11-NEXT: v_cmp_u_f64_e64 s0, s[0:1], s[16:17]
|
|
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(SKIP_1) | instid1(VALU_DEP_3)
|
|
; GFX11-NEXT: v_cndmask_b32_e64 v3, v1, 0x7ff80000, s2
|
|
; GFX11-NEXT: v_cndmask_b32_e64 v2, v0, 0, s2
|
|
; GFX11-NEXT: v_cndmask_b32_e64 v1, v5, 0x7ff80000, s0
|
|
; GFX11-NEXT: v_cndmask_b32_e64 v0, v4, 0, s0
|
|
; GFX11-NEXT: ;;#ASMSTART
|
|
; GFX11-NEXT: ; use v[0:3]
|
|
; GFX11-NEXT: ;;#ASMEND
|
|
; GFX11-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX12-LABEL: s_maximum_v2f64:
|
|
; GFX12: ; %bb.0:
|
|
; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
|
|
; GFX12-NEXT: s_wait_expcnt 0x0
|
|
; GFX12-NEXT: s_wait_samplecnt 0x0
|
|
; GFX12-NEXT: s_wait_bvhcnt 0x0
|
|
; GFX12-NEXT: s_wait_kmcnt 0x0
|
|
; GFX12-NEXT: v_maximum_f64 v[2:3], s[2:3], s[18:19]
|
|
; GFX12-NEXT: v_maximum_f64 v[0:1], s[0:1], s[16:17]
|
|
; GFX12-NEXT: ;;#ASMSTART
|
|
; GFX12-NEXT: ; use v[0:3]
|
|
; GFX12-NEXT: ;;#ASMEND
|
|
; GFX12-NEXT: s_setpc_b64 s[30:31]
|
|
%op = call <2 x double> @llvm.maximum.v2f64(<2 x double> %src0, <2 x double> %src1)
|
|
call void asm sideeffect "; use $0", "s"(<2 x double> %op)
|
|
ret void
|
|
}
|
|
|
|
define <3 x double> @v_maximum_v3f64(<3 x double> %src0, <3 x double> %src1) {
|
|
; GFX7-LABEL: v_maximum_v3f64:
|
|
; GFX7: ; %bb.0:
|
|
; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX7-NEXT: v_max_f64 v[12:13], v[0:1], v[6:7]
|
|
; GFX7-NEXT: v_cmp_u_f64_e32 vcc, v[0:1], v[6:7]
|
|
; GFX7-NEXT: v_max_f64 v[6:7], v[2:3], v[8:9]
|
|
; GFX7-NEXT: v_cmp_u_f64_e64 s[4:5], v[2:3], v[8:9]
|
|
; GFX7-NEXT: v_max_f64 v[8:9], v[4:5], v[10:11]
|
|
; GFX7-NEXT: v_cmp_u_f64_e64 s[6:7], v[4:5], v[10:11]
|
|
; GFX7-NEXT: v_mov_b32_e32 v5, 0x7ff80000
|
|
; GFX7-NEXT: v_cndmask_b32_e64 v0, v12, 0, vcc
|
|
; GFX7-NEXT: v_cndmask_b32_e32 v1, v13, v5, vcc
|
|
; GFX7-NEXT: v_cndmask_b32_e64 v2, v6, 0, s[4:5]
|
|
; GFX7-NEXT: v_cndmask_b32_e64 v3, v7, v5, s[4:5]
|
|
; GFX7-NEXT: v_cndmask_b32_e64 v4, v8, 0, s[6:7]
|
|
; GFX7-NEXT: v_cndmask_b32_e64 v5, v9, v5, s[6:7]
|
|
; GFX7-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX8-LABEL: v_maximum_v3f64:
|
|
; GFX8: ; %bb.0:
|
|
; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX8-NEXT: v_max_f64 v[12:13], v[0:1], v[6:7]
|
|
; GFX8-NEXT: v_cmp_u_f64_e32 vcc, v[0:1], v[6:7]
|
|
; GFX8-NEXT: v_max_f64 v[6:7], v[2:3], v[8:9]
|
|
; GFX8-NEXT: v_cmp_u_f64_e64 s[4:5], v[2:3], v[8:9]
|
|
; GFX8-NEXT: v_max_f64 v[8:9], v[4:5], v[10:11]
|
|
; GFX8-NEXT: v_cmp_u_f64_e64 s[6:7], v[4:5], v[10:11]
|
|
; GFX8-NEXT: v_mov_b32_e32 v5, 0x7ff80000
|
|
; GFX8-NEXT: v_cndmask_b32_e64 v0, v12, 0, vcc
|
|
; GFX8-NEXT: v_cndmask_b32_e32 v1, v13, v5, vcc
|
|
; GFX8-NEXT: v_cndmask_b32_e64 v2, v6, 0, s[4:5]
|
|
; GFX8-NEXT: v_cndmask_b32_e64 v3, v7, v5, s[4:5]
|
|
; GFX8-NEXT: v_cndmask_b32_e64 v4, v8, 0, s[6:7]
|
|
; GFX8-NEXT: v_cndmask_b32_e64 v5, v9, v5, s[6:7]
|
|
; GFX8-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX900-LABEL: v_maximum_v3f64:
|
|
; GFX900: ; %bb.0:
|
|
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX900-NEXT: v_max_f64 v[12:13], v[0:1], v[6:7]
|
|
; GFX900-NEXT: v_cmp_u_f64_e32 vcc, v[0:1], v[6:7]
|
|
; GFX900-NEXT: v_max_f64 v[6:7], v[2:3], v[8:9]
|
|
; GFX900-NEXT: v_cmp_u_f64_e64 s[4:5], v[2:3], v[8:9]
|
|
; GFX900-NEXT: v_max_f64 v[8:9], v[4:5], v[10:11]
|
|
; GFX900-NEXT: v_cmp_u_f64_e64 s[6:7], v[4:5], v[10:11]
|
|
; GFX900-NEXT: v_mov_b32_e32 v5, 0x7ff80000
|
|
; GFX900-NEXT: v_cndmask_b32_e64 v0, v12, 0, vcc
|
|
; GFX900-NEXT: v_cndmask_b32_e32 v1, v13, v5, vcc
|
|
; GFX900-NEXT: v_cndmask_b32_e64 v2, v6, 0, s[4:5]
|
|
; GFX900-NEXT: v_cndmask_b32_e64 v3, v7, v5, s[4:5]
|
|
; GFX900-NEXT: v_cndmask_b32_e64 v4, v8, 0, s[6:7]
|
|
; GFX900-NEXT: v_cndmask_b32_e64 v5, v9, v5, s[6:7]
|
|
; GFX900-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX950-LABEL: v_maximum_v3f64:
|
|
; GFX950: ; %bb.0:
|
|
; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX950-NEXT: v_max_f64 v[12:13], v[0:1], v[6:7]
|
|
; GFX950-NEXT: v_cmp_u_f64_e32 vcc, v[0:1], v[6:7]
|
|
; GFX950-NEXT: v_max_f64 v[6:7], v[2:3], v[8:9]
|
|
; GFX950-NEXT: s_nop 0
|
|
; GFX950-NEXT: v_cndmask_b32_e64 v0, v12, 0, vcc
|
|
; GFX950-NEXT: v_mov_b32_e32 v12, 0x7ff80000
|
|
; GFX950-NEXT: v_cndmask_b32_e32 v1, v13, v12, vcc
|
|
; GFX950-NEXT: v_cmp_u_f64_e32 vcc, v[2:3], v[8:9]
|
|
; GFX950-NEXT: s_nop 1
|
|
; GFX950-NEXT: v_cndmask_b32_e64 v2, v6, 0, vcc
|
|
; GFX950-NEXT: v_cndmask_b32_e32 v3, v7, v12, vcc
|
|
; GFX950-NEXT: v_max_f64 v[6:7], v[4:5], v[10:11]
|
|
; GFX950-NEXT: v_cmp_u_f64_e32 vcc, v[4:5], v[10:11]
|
|
; GFX950-NEXT: s_nop 1
|
|
; GFX950-NEXT: v_cndmask_b32_e64 v4, v6, 0, vcc
|
|
; GFX950-NEXT: v_cndmask_b32_e32 v5, v7, v12, vcc
|
|
; GFX950-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX10-LABEL: v_maximum_v3f64:
|
|
; GFX10: ; %bb.0:
|
|
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX10-NEXT: v_max_f64 v[12:13], v[0:1], v[6:7]
|
|
; GFX10-NEXT: v_cmp_u_f64_e32 vcc_lo, v[0:1], v[6:7]
|
|
; GFX10-NEXT: v_max_f64 v[6:7], v[2:3], v[8:9]
|
|
; GFX10-NEXT: v_cmp_u_f64_e64 s4, v[2:3], v[8:9]
|
|
; GFX10-NEXT: v_max_f64 v[8:9], v[4:5], v[10:11]
|
|
; GFX10-NEXT: v_cmp_u_f64_e64 s5, v[4:5], v[10:11]
|
|
; GFX10-NEXT: v_cndmask_b32_e64 v0, v12, 0, vcc_lo
|
|
; GFX10-NEXT: v_cndmask_b32_e64 v1, v13, 0x7ff80000, vcc_lo
|
|
; GFX10-NEXT: v_cndmask_b32_e64 v2, v6, 0, s4
|
|
; GFX10-NEXT: v_cndmask_b32_e64 v3, v7, 0x7ff80000, s4
|
|
; GFX10-NEXT: v_cndmask_b32_e64 v4, v8, 0, s5
|
|
; GFX10-NEXT: v_cndmask_b32_e64 v5, v9, 0x7ff80000, s5
|
|
; GFX10-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX11-LABEL: v_maximum_v3f64:
|
|
; GFX11: ; %bb.0:
|
|
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX11-NEXT: v_max_f64 v[12:13], v[0:1], v[6:7]
|
|
; GFX11-NEXT: v_cmp_u_f64_e32 vcc_lo, v[0:1], v[6:7]
|
|
; GFX11-NEXT: v_max_f64 v[6:7], v[2:3], v[8:9]
|
|
; GFX11-NEXT: v_cmp_u_f64_e64 s0, v[2:3], v[8:9]
|
|
; GFX11-NEXT: v_max_f64 v[8:9], v[4:5], v[10:11]
|
|
; GFX11-NEXT: v_cmp_u_f64_e64 s1, v[4:5], v[10:11]
|
|
; GFX11-NEXT: v_cndmask_b32_e64 v0, v12, 0, vcc_lo
|
|
; GFX11-NEXT: v_cndmask_b32_e64 v1, v13, 0x7ff80000, vcc_lo
|
|
; GFX11-NEXT: v_cndmask_b32_e64 v2, v6, 0, s0
|
|
; GFX11-NEXT: v_cndmask_b32_e64 v3, v7, 0x7ff80000, s0
|
|
; GFX11-NEXT: v_cndmask_b32_e64 v4, v8, 0, s1
|
|
; GFX11-NEXT: v_cndmask_b32_e64 v5, v9, 0x7ff80000, s1
|
|
; GFX11-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX12-LABEL: v_maximum_v3f64:
|
|
; GFX12: ; %bb.0:
|
|
; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
|
|
; GFX12-NEXT: s_wait_expcnt 0x0
|
|
; GFX12-NEXT: s_wait_samplecnt 0x0
|
|
; GFX12-NEXT: s_wait_bvhcnt 0x0
|
|
; GFX12-NEXT: s_wait_kmcnt 0x0
|
|
; GFX12-NEXT: v_maximum_f64 v[0:1], v[0:1], v[6:7]
|
|
; GFX12-NEXT: v_maximum_f64 v[2:3], v[2:3], v[8:9]
|
|
; GFX12-NEXT: v_maximum_f64 v[4:5], v[4:5], v[10:11]
|
|
; GFX12-NEXT: s_setpc_b64 s[30:31]
|
|
%op = call <3 x double> @llvm.maximum.v3f64(<3 x double> %src0, <3 x double> %src1)
|
|
ret <3 x double> %op
|
|
}
|
|
|
|
define <3 x double> @v_maximum_v3f64__nnan(<3 x double> %src0, <3 x double> %src1) {
|
|
; GFX7-LABEL: v_maximum_v3f64__nnan:
|
|
; GFX7: ; %bb.0:
|
|
; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX7-NEXT: v_max_f64 v[0:1], v[0:1], v[6:7]
|
|
; GFX7-NEXT: v_max_f64 v[2:3], v[2:3], v[8:9]
|
|
; GFX7-NEXT: v_max_f64 v[4:5], v[4:5], v[10:11]
|
|
; GFX7-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX8-LABEL: v_maximum_v3f64__nnan:
|
|
; GFX8: ; %bb.0:
|
|
; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX8-NEXT: v_max_f64 v[0:1], v[0:1], v[6:7]
|
|
; GFX8-NEXT: v_max_f64 v[2:3], v[2:3], v[8:9]
|
|
; GFX8-NEXT: v_max_f64 v[4:5], v[4:5], v[10:11]
|
|
; GFX8-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX9-LABEL: v_maximum_v3f64__nnan:
|
|
; GFX9: ; %bb.0:
|
|
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX9-NEXT: v_max_f64 v[0:1], v[0:1], v[6:7]
|
|
; GFX9-NEXT: v_max_f64 v[2:3], v[2:3], v[8:9]
|
|
; GFX9-NEXT: v_max_f64 v[4:5], v[4:5], v[10:11]
|
|
; GFX9-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX10-LABEL: v_maximum_v3f64__nnan:
|
|
; GFX10: ; %bb.0:
|
|
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX10-NEXT: v_max_f64 v[0:1], v[0:1], v[6:7]
|
|
; GFX10-NEXT: v_max_f64 v[2:3], v[2:3], v[8:9]
|
|
; GFX10-NEXT: v_max_f64 v[4:5], v[4:5], v[10:11]
|
|
; GFX10-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX11-LABEL: v_maximum_v3f64__nnan:
|
|
; GFX11: ; %bb.0:
|
|
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX11-NEXT: v_max_f64 v[0:1], v[0:1], v[6:7]
|
|
; GFX11-NEXT: v_max_f64 v[2:3], v[2:3], v[8:9]
|
|
; GFX11-NEXT: v_max_f64 v[4:5], v[4:5], v[10:11]
|
|
; GFX11-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX12-LABEL: v_maximum_v3f64__nnan:
|
|
; GFX12: ; %bb.0:
|
|
; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
|
|
; GFX12-NEXT: s_wait_expcnt 0x0
|
|
; GFX12-NEXT: s_wait_samplecnt 0x0
|
|
; GFX12-NEXT: s_wait_bvhcnt 0x0
|
|
; GFX12-NEXT: s_wait_kmcnt 0x0
|
|
; GFX12-NEXT: v_maximum_f64 v[0:1], v[0:1], v[6:7]
|
|
; GFX12-NEXT: v_maximum_f64 v[2:3], v[2:3], v[8:9]
|
|
; GFX12-NEXT: v_maximum_f64 v[4:5], v[4:5], v[10:11]
|
|
; GFX12-NEXT: s_setpc_b64 s[30:31]
|
|
%op = call nnan <3 x double> @llvm.maximum.v3f64(<3 x double> %src0, <3 x double> %src1)
|
|
ret <3 x double> %op
|
|
}
|
|
|
|
define <3 x double> @v_maximum_v3f64__nsz(<3 x double> %src0, <3 x double> %src1) {
|
|
; GFX7-LABEL: v_maximum_v3f64__nsz:
|
|
; GFX7: ; %bb.0:
|
|
; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX7-NEXT: v_max_f64 v[12:13], v[0:1], v[6:7]
|
|
; GFX7-NEXT: v_cmp_u_f64_e32 vcc, v[0:1], v[6:7]
|
|
; GFX7-NEXT: v_max_f64 v[6:7], v[2:3], v[8:9]
|
|
; GFX7-NEXT: v_cmp_u_f64_e64 s[4:5], v[2:3], v[8:9]
|
|
; GFX7-NEXT: v_max_f64 v[8:9], v[4:5], v[10:11]
|
|
; GFX7-NEXT: v_cmp_u_f64_e64 s[6:7], v[4:5], v[10:11]
|
|
; GFX7-NEXT: v_mov_b32_e32 v5, 0x7ff80000
|
|
; GFX7-NEXT: v_cndmask_b32_e64 v0, v12, 0, vcc
|
|
; GFX7-NEXT: v_cndmask_b32_e32 v1, v13, v5, vcc
|
|
; GFX7-NEXT: v_cndmask_b32_e64 v2, v6, 0, s[4:5]
|
|
; GFX7-NEXT: v_cndmask_b32_e64 v3, v7, v5, s[4:5]
|
|
; GFX7-NEXT: v_cndmask_b32_e64 v4, v8, 0, s[6:7]
|
|
; GFX7-NEXT: v_cndmask_b32_e64 v5, v9, v5, s[6:7]
|
|
; GFX7-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX8-LABEL: v_maximum_v3f64__nsz:
|
|
; GFX8: ; %bb.0:
|
|
; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX8-NEXT: v_max_f64 v[12:13], v[0:1], v[6:7]
|
|
; GFX8-NEXT: v_cmp_u_f64_e32 vcc, v[0:1], v[6:7]
|
|
; GFX8-NEXT: v_max_f64 v[6:7], v[2:3], v[8:9]
|
|
; GFX8-NEXT: v_cmp_u_f64_e64 s[4:5], v[2:3], v[8:9]
|
|
; GFX8-NEXT: v_max_f64 v[8:9], v[4:5], v[10:11]
|
|
; GFX8-NEXT: v_cmp_u_f64_e64 s[6:7], v[4:5], v[10:11]
|
|
; GFX8-NEXT: v_mov_b32_e32 v5, 0x7ff80000
|
|
; GFX8-NEXT: v_cndmask_b32_e64 v0, v12, 0, vcc
|
|
; GFX8-NEXT: v_cndmask_b32_e32 v1, v13, v5, vcc
|
|
; GFX8-NEXT: v_cndmask_b32_e64 v2, v6, 0, s[4:5]
|
|
; GFX8-NEXT: v_cndmask_b32_e64 v3, v7, v5, s[4:5]
|
|
; GFX8-NEXT: v_cndmask_b32_e64 v4, v8, 0, s[6:7]
|
|
; GFX8-NEXT: v_cndmask_b32_e64 v5, v9, v5, s[6:7]
|
|
; GFX8-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX900-LABEL: v_maximum_v3f64__nsz:
|
|
; GFX900: ; %bb.0:
|
|
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX900-NEXT: v_max_f64 v[12:13], v[0:1], v[6:7]
|
|
; GFX900-NEXT: v_cmp_u_f64_e32 vcc, v[0:1], v[6:7]
|
|
; GFX900-NEXT: v_max_f64 v[6:7], v[2:3], v[8:9]
|
|
; GFX900-NEXT: v_cmp_u_f64_e64 s[4:5], v[2:3], v[8:9]
|
|
; GFX900-NEXT: v_max_f64 v[8:9], v[4:5], v[10:11]
|
|
; GFX900-NEXT: v_cmp_u_f64_e64 s[6:7], v[4:5], v[10:11]
|
|
; GFX900-NEXT: v_mov_b32_e32 v5, 0x7ff80000
|
|
; GFX900-NEXT: v_cndmask_b32_e64 v0, v12, 0, vcc
|
|
; GFX900-NEXT: v_cndmask_b32_e32 v1, v13, v5, vcc
|
|
; GFX900-NEXT: v_cndmask_b32_e64 v2, v6, 0, s[4:5]
|
|
; GFX900-NEXT: v_cndmask_b32_e64 v3, v7, v5, s[4:5]
|
|
; GFX900-NEXT: v_cndmask_b32_e64 v4, v8, 0, s[6:7]
|
|
; GFX900-NEXT: v_cndmask_b32_e64 v5, v9, v5, s[6:7]
|
|
; GFX900-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX950-LABEL: v_maximum_v3f64__nsz:
|
|
; GFX950: ; %bb.0:
|
|
; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX950-NEXT: v_max_f64 v[12:13], v[0:1], v[6:7]
|
|
; GFX950-NEXT: v_cmp_u_f64_e32 vcc, v[0:1], v[6:7]
|
|
; GFX950-NEXT: v_max_f64 v[6:7], v[2:3], v[8:9]
|
|
; GFX950-NEXT: s_nop 0
|
|
; GFX950-NEXT: v_cndmask_b32_e64 v0, v12, 0, vcc
|
|
; GFX950-NEXT: v_mov_b32_e32 v12, 0x7ff80000
|
|
; GFX950-NEXT: v_cndmask_b32_e32 v1, v13, v12, vcc
|
|
; GFX950-NEXT: v_cmp_u_f64_e32 vcc, v[2:3], v[8:9]
|
|
; GFX950-NEXT: s_nop 1
|
|
; GFX950-NEXT: v_cndmask_b32_e64 v2, v6, 0, vcc
|
|
; GFX950-NEXT: v_cndmask_b32_e32 v3, v7, v12, vcc
|
|
; GFX950-NEXT: v_max_f64 v[6:7], v[4:5], v[10:11]
|
|
; GFX950-NEXT: v_cmp_u_f64_e32 vcc, v[4:5], v[10:11]
|
|
; GFX950-NEXT: s_nop 1
|
|
; GFX950-NEXT: v_cndmask_b32_e64 v4, v6, 0, vcc
|
|
; GFX950-NEXT: v_cndmask_b32_e32 v5, v7, v12, vcc
|
|
; GFX950-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX10-LABEL: v_maximum_v3f64__nsz:
|
|
; GFX10: ; %bb.0:
|
|
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX10-NEXT: v_max_f64 v[12:13], v[0:1], v[6:7]
|
|
; GFX10-NEXT: v_cmp_u_f64_e32 vcc_lo, v[0:1], v[6:7]
|
|
; GFX10-NEXT: v_max_f64 v[6:7], v[2:3], v[8:9]
|
|
; GFX10-NEXT: v_cmp_u_f64_e64 s4, v[2:3], v[8:9]
|
|
; GFX10-NEXT: v_max_f64 v[8:9], v[4:5], v[10:11]
|
|
; GFX10-NEXT: v_cmp_u_f64_e64 s5, v[4:5], v[10:11]
|
|
; GFX10-NEXT: v_cndmask_b32_e64 v0, v12, 0, vcc_lo
|
|
; GFX10-NEXT: v_cndmask_b32_e64 v1, v13, 0x7ff80000, vcc_lo
|
|
; GFX10-NEXT: v_cndmask_b32_e64 v2, v6, 0, s4
|
|
; GFX10-NEXT: v_cndmask_b32_e64 v3, v7, 0x7ff80000, s4
|
|
; GFX10-NEXT: v_cndmask_b32_e64 v4, v8, 0, s5
|
|
; GFX10-NEXT: v_cndmask_b32_e64 v5, v9, 0x7ff80000, s5
|
|
; GFX10-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX11-LABEL: v_maximum_v3f64__nsz:
|
|
; GFX11: ; %bb.0:
|
|
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX11-NEXT: v_max_f64 v[12:13], v[0:1], v[6:7]
|
|
; GFX11-NEXT: v_cmp_u_f64_e32 vcc_lo, v[0:1], v[6:7]
|
|
; GFX11-NEXT: v_max_f64 v[6:7], v[2:3], v[8:9]
|
|
; GFX11-NEXT: v_cmp_u_f64_e64 s0, v[2:3], v[8:9]
|
|
; GFX11-NEXT: v_max_f64 v[8:9], v[4:5], v[10:11]
|
|
; GFX11-NEXT: v_cmp_u_f64_e64 s1, v[4:5], v[10:11]
|
|
; GFX11-NEXT: v_cndmask_b32_e64 v0, v12, 0, vcc_lo
|
|
; GFX11-NEXT: v_cndmask_b32_e64 v1, v13, 0x7ff80000, vcc_lo
|
|
; GFX11-NEXT: v_cndmask_b32_e64 v2, v6, 0, s0
|
|
; GFX11-NEXT: v_cndmask_b32_e64 v3, v7, 0x7ff80000, s0
|
|
; GFX11-NEXT: v_cndmask_b32_e64 v4, v8, 0, s1
|
|
; GFX11-NEXT: v_cndmask_b32_e64 v5, v9, 0x7ff80000, s1
|
|
; GFX11-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX12-LABEL: v_maximum_v3f64__nsz:
|
|
; GFX12: ; %bb.0:
|
|
; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
|
|
; GFX12-NEXT: s_wait_expcnt 0x0
|
|
; GFX12-NEXT: s_wait_samplecnt 0x0
|
|
; GFX12-NEXT: s_wait_bvhcnt 0x0
|
|
; GFX12-NEXT: s_wait_kmcnt 0x0
|
|
; GFX12-NEXT: v_maximum_f64 v[0:1], v[0:1], v[6:7]
|
|
; GFX12-NEXT: v_maximum_f64 v[2:3], v[2:3], v[8:9]
|
|
; GFX12-NEXT: v_maximum_f64 v[4:5], v[4:5], v[10:11]
|
|
; GFX12-NEXT: s_setpc_b64 s[30:31]
|
|
%op = call nsz <3 x double> @llvm.maximum.v3f64(<3 x double> %src0, <3 x double> %src1)
|
|
ret <3 x double> %op
|
|
}
|
|
|
|
define <3 x double> @v_maximum_v3f64__nnan_nsz(<3 x double> %src0, <3 x double> %src1) {
|
|
; GFX7-LABEL: v_maximum_v3f64__nnan_nsz:
|
|
; GFX7: ; %bb.0:
|
|
; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX7-NEXT: v_max_f64 v[0:1], v[0:1], v[6:7]
|
|
; GFX7-NEXT: v_max_f64 v[2:3], v[2:3], v[8:9]
|
|
; GFX7-NEXT: v_max_f64 v[4:5], v[4:5], v[10:11]
|
|
; GFX7-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX8-LABEL: v_maximum_v3f64__nnan_nsz:
|
|
; GFX8: ; %bb.0:
|
|
; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX8-NEXT: v_max_f64 v[0:1], v[0:1], v[6:7]
|
|
; GFX8-NEXT: v_max_f64 v[2:3], v[2:3], v[8:9]
|
|
; GFX8-NEXT: v_max_f64 v[4:5], v[4:5], v[10:11]
|
|
; GFX8-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX9-LABEL: v_maximum_v3f64__nnan_nsz:
|
|
; GFX9: ; %bb.0:
|
|
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX9-NEXT: v_max_f64 v[0:1], v[0:1], v[6:7]
|
|
; GFX9-NEXT: v_max_f64 v[2:3], v[2:3], v[8:9]
|
|
; GFX9-NEXT: v_max_f64 v[4:5], v[4:5], v[10:11]
|
|
; GFX9-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX10-LABEL: v_maximum_v3f64__nnan_nsz:
|
|
; GFX10: ; %bb.0:
|
|
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX10-NEXT: v_max_f64 v[0:1], v[0:1], v[6:7]
|
|
; GFX10-NEXT: v_max_f64 v[2:3], v[2:3], v[8:9]
|
|
; GFX10-NEXT: v_max_f64 v[4:5], v[4:5], v[10:11]
|
|
; GFX10-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX11-LABEL: v_maximum_v3f64__nnan_nsz:
|
|
; GFX11: ; %bb.0:
|
|
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX11-NEXT: v_max_f64 v[0:1], v[0:1], v[6:7]
|
|
; GFX11-NEXT: v_max_f64 v[2:3], v[2:3], v[8:9]
|
|
; GFX11-NEXT: v_max_f64 v[4:5], v[4:5], v[10:11]
|
|
; GFX11-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX12-LABEL: v_maximum_v3f64__nnan_nsz:
|
|
; GFX12: ; %bb.0:
|
|
; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
|
|
; GFX12-NEXT: s_wait_expcnt 0x0
|
|
; GFX12-NEXT: s_wait_samplecnt 0x0
|
|
; GFX12-NEXT: s_wait_bvhcnt 0x0
|
|
; GFX12-NEXT: s_wait_kmcnt 0x0
|
|
; GFX12-NEXT: v_maximum_f64 v[0:1], v[0:1], v[6:7]
|
|
; GFX12-NEXT: v_maximum_f64 v[2:3], v[2:3], v[8:9]
|
|
; GFX12-NEXT: v_maximum_f64 v[4:5], v[4:5], v[10:11]
|
|
; GFX12-NEXT: s_setpc_b64 s[30:31]
|
|
%op = call nnan nsz <3 x double> @llvm.maximum.v3f64(<3 x double> %src0, <3 x double> %src1)
|
|
ret <3 x double> %op
|
|
}
|
|
|
|
define <4 x double> @v_maximum_v4f64(<4 x double> %src0, <4 x double> %src1) {
|
|
; GFX7-LABEL: v_maximum_v4f64:
|
|
; GFX7: ; %bb.0:
|
|
; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX7-NEXT: v_max_f64 v[16:17], v[0:1], v[8:9]
|
|
; GFX7-NEXT: v_cmp_u_f64_e32 vcc, v[0:1], v[8:9]
|
|
; GFX7-NEXT: v_max_f64 v[8:9], v[2:3], v[10:11]
|
|
; GFX7-NEXT: v_cmp_u_f64_e64 s[4:5], v[2:3], v[10:11]
|
|
; GFX7-NEXT: v_max_f64 v[10:11], v[4:5], v[12:13]
|
|
; GFX7-NEXT: v_cmp_u_f64_e64 s[6:7], v[4:5], v[12:13]
|
|
; GFX7-NEXT: v_max_f64 v[12:13], v[6:7], v[14:15]
|
|
; GFX7-NEXT: v_cmp_u_f64_e64 s[8:9], v[6:7], v[14:15]
|
|
; GFX7-NEXT: v_mov_b32_e32 v7, 0x7ff80000
|
|
; GFX7-NEXT: v_cndmask_b32_e64 v0, v16, 0, vcc
|
|
; GFX7-NEXT: v_cndmask_b32_e32 v1, v17, v7, vcc
|
|
; GFX7-NEXT: v_cndmask_b32_e64 v2, v8, 0, s[4:5]
|
|
; GFX7-NEXT: v_cndmask_b32_e64 v3, v9, v7, s[4:5]
|
|
; GFX7-NEXT: v_cndmask_b32_e64 v4, v10, 0, s[6:7]
|
|
; GFX7-NEXT: v_cndmask_b32_e64 v5, v11, v7, s[6:7]
|
|
; GFX7-NEXT: v_cndmask_b32_e64 v6, v12, 0, s[8:9]
|
|
; GFX7-NEXT: v_cndmask_b32_e64 v7, v13, v7, s[8:9]
|
|
; GFX7-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX8-LABEL: v_maximum_v4f64:
|
|
; GFX8: ; %bb.0:
|
|
; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX8-NEXT: v_max_f64 v[16:17], v[0:1], v[8:9]
|
|
; GFX8-NEXT: v_cmp_u_f64_e32 vcc, v[0:1], v[8:9]
|
|
; GFX8-NEXT: v_max_f64 v[8:9], v[2:3], v[10:11]
|
|
; GFX8-NEXT: v_cmp_u_f64_e64 s[4:5], v[2:3], v[10:11]
|
|
; GFX8-NEXT: v_max_f64 v[10:11], v[4:5], v[12:13]
|
|
; GFX8-NEXT: v_cmp_u_f64_e64 s[6:7], v[4:5], v[12:13]
|
|
; GFX8-NEXT: v_max_f64 v[12:13], v[6:7], v[14:15]
|
|
; GFX8-NEXT: v_cmp_u_f64_e64 s[8:9], v[6:7], v[14:15]
|
|
; GFX8-NEXT: v_mov_b32_e32 v7, 0x7ff80000
|
|
; GFX8-NEXT: v_cndmask_b32_e64 v0, v16, 0, vcc
|
|
; GFX8-NEXT: v_cndmask_b32_e32 v1, v17, v7, vcc
|
|
; GFX8-NEXT: v_cndmask_b32_e64 v2, v8, 0, s[4:5]
|
|
; GFX8-NEXT: v_cndmask_b32_e64 v3, v9, v7, s[4:5]
|
|
; GFX8-NEXT: v_cndmask_b32_e64 v4, v10, 0, s[6:7]
|
|
; GFX8-NEXT: v_cndmask_b32_e64 v5, v11, v7, s[6:7]
|
|
; GFX8-NEXT: v_cndmask_b32_e64 v6, v12, 0, s[8:9]
|
|
; GFX8-NEXT: v_cndmask_b32_e64 v7, v13, v7, s[8:9]
|
|
; GFX8-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX900-LABEL: v_maximum_v4f64:
|
|
; GFX900: ; %bb.0:
|
|
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX900-NEXT: v_max_f64 v[16:17], v[0:1], v[8:9]
|
|
; GFX900-NEXT: v_cmp_u_f64_e32 vcc, v[0:1], v[8:9]
|
|
; GFX900-NEXT: v_max_f64 v[8:9], v[2:3], v[10:11]
|
|
; GFX900-NEXT: v_cmp_u_f64_e64 s[4:5], v[2:3], v[10:11]
|
|
; GFX900-NEXT: v_max_f64 v[10:11], v[4:5], v[12:13]
|
|
; GFX900-NEXT: v_cmp_u_f64_e64 s[6:7], v[4:5], v[12:13]
|
|
; GFX900-NEXT: v_max_f64 v[12:13], v[6:7], v[14:15]
|
|
; GFX900-NEXT: v_cmp_u_f64_e64 s[8:9], v[6:7], v[14:15]
|
|
; GFX900-NEXT: v_mov_b32_e32 v7, 0x7ff80000
|
|
; GFX900-NEXT: v_cndmask_b32_e64 v0, v16, 0, vcc
|
|
; GFX900-NEXT: v_cndmask_b32_e32 v1, v17, v7, vcc
|
|
; GFX900-NEXT: v_cndmask_b32_e64 v2, v8, 0, s[4:5]
|
|
; GFX900-NEXT: v_cndmask_b32_e64 v3, v9, v7, s[4:5]
|
|
; GFX900-NEXT: v_cndmask_b32_e64 v4, v10, 0, s[6:7]
|
|
; GFX900-NEXT: v_cndmask_b32_e64 v5, v11, v7, s[6:7]
|
|
; GFX900-NEXT: v_cndmask_b32_e64 v6, v12, 0, s[8:9]
|
|
; GFX900-NEXT: v_cndmask_b32_e64 v7, v13, v7, s[8:9]
|
|
; GFX900-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX950-LABEL: v_maximum_v4f64:
|
|
; GFX950: ; %bb.0:
|
|
; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX950-NEXT: v_max_f64 v[16:17], v[0:1], v[8:9]
|
|
; GFX950-NEXT: v_cmp_u_f64_e32 vcc, v[0:1], v[8:9]
|
|
; GFX950-NEXT: v_max_f64 v[8:9], v[2:3], v[10:11]
|
|
; GFX950-NEXT: s_nop 0
|
|
; GFX950-NEXT: v_cndmask_b32_e64 v0, v16, 0, vcc
|
|
; GFX950-NEXT: v_mov_b32_e32 v16, 0x7ff80000
|
|
; GFX950-NEXT: v_cndmask_b32_e32 v1, v17, v16, vcc
|
|
; GFX950-NEXT: v_cmp_u_f64_e32 vcc, v[2:3], v[10:11]
|
|
; GFX950-NEXT: s_nop 1
|
|
; GFX950-NEXT: v_cndmask_b32_e64 v2, v8, 0, vcc
|
|
; GFX950-NEXT: v_cndmask_b32_e32 v3, v9, v16, vcc
|
|
; GFX950-NEXT: v_max_f64 v[8:9], v[4:5], v[12:13]
|
|
; GFX950-NEXT: v_cmp_u_f64_e32 vcc, v[4:5], v[12:13]
|
|
; GFX950-NEXT: s_nop 1
|
|
; GFX950-NEXT: v_cndmask_b32_e64 v4, v8, 0, vcc
|
|
; GFX950-NEXT: v_cndmask_b32_e32 v5, v9, v16, vcc
|
|
; GFX950-NEXT: v_max_f64 v[8:9], v[6:7], v[14:15]
|
|
; GFX950-NEXT: v_cmp_u_f64_e32 vcc, v[6:7], v[14:15]
|
|
; GFX950-NEXT: s_nop 1
|
|
; GFX950-NEXT: v_cndmask_b32_e64 v6, v8, 0, vcc
|
|
; GFX950-NEXT: v_cndmask_b32_e32 v7, v9, v16, vcc
|
|
; GFX950-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX10-LABEL: v_maximum_v4f64:
|
|
; GFX10: ; %bb.0:
|
|
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX10-NEXT: v_max_f64 v[16:17], v[0:1], v[8:9]
|
|
; GFX10-NEXT: v_cmp_u_f64_e32 vcc_lo, v[0:1], v[8:9]
|
|
; GFX10-NEXT: v_max_f64 v[8:9], v[2:3], v[10:11]
|
|
; GFX10-NEXT: v_cmp_u_f64_e64 s4, v[2:3], v[10:11]
|
|
; GFX10-NEXT: v_max_f64 v[10:11], v[4:5], v[12:13]
|
|
; GFX10-NEXT: v_cmp_u_f64_e64 s5, v[4:5], v[12:13]
|
|
; GFX10-NEXT: v_max_f64 v[12:13], v[6:7], v[14:15]
|
|
; GFX10-NEXT: v_cmp_u_f64_e64 s6, v[6:7], v[14:15]
|
|
; GFX10-NEXT: v_cndmask_b32_e64 v0, v16, 0, vcc_lo
|
|
; GFX10-NEXT: v_cndmask_b32_e64 v1, v17, 0x7ff80000, vcc_lo
|
|
; GFX10-NEXT: v_cndmask_b32_e64 v2, v8, 0, s4
|
|
; GFX10-NEXT: v_cndmask_b32_e64 v3, v9, 0x7ff80000, s4
|
|
; GFX10-NEXT: v_cndmask_b32_e64 v4, v10, 0, s5
|
|
; GFX10-NEXT: v_cndmask_b32_e64 v5, v11, 0x7ff80000, s5
|
|
; GFX10-NEXT: v_cndmask_b32_e64 v6, v12, 0, s6
|
|
; GFX10-NEXT: v_cndmask_b32_e64 v7, v13, 0x7ff80000, s6
|
|
; GFX10-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX11-LABEL: v_maximum_v4f64:
|
|
; GFX11: ; %bb.0:
|
|
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX11-NEXT: v_max_f64 v[16:17], v[0:1], v[8:9]
|
|
; GFX11-NEXT: v_cmp_u_f64_e32 vcc_lo, v[0:1], v[8:9]
|
|
; GFX11-NEXT: v_max_f64 v[8:9], v[2:3], v[10:11]
|
|
; GFX11-NEXT: v_cmp_u_f64_e64 s0, v[2:3], v[10:11]
|
|
; GFX11-NEXT: v_max_f64 v[10:11], v[4:5], v[12:13]
|
|
; GFX11-NEXT: v_cmp_u_f64_e64 s1, v[4:5], v[12:13]
|
|
; GFX11-NEXT: v_max_f64 v[12:13], v[6:7], v[14:15]
|
|
; GFX11-NEXT: v_cmp_u_f64_e64 s2, v[6:7], v[14:15]
|
|
; GFX11-NEXT: v_cndmask_b32_e64 v0, v16, 0, vcc_lo
|
|
; GFX11-NEXT: v_cndmask_b32_e64 v1, v17, 0x7ff80000, vcc_lo
|
|
; GFX11-NEXT: v_cndmask_b32_e64 v2, v8, 0, s0
|
|
; GFX11-NEXT: v_cndmask_b32_e64 v3, v9, 0x7ff80000, s0
|
|
; GFX11-NEXT: v_cndmask_b32_e64 v4, v10, 0, s1
|
|
; GFX11-NEXT: v_cndmask_b32_e64 v5, v11, 0x7ff80000, s1
|
|
; GFX11-NEXT: v_cndmask_b32_e64 v6, v12, 0, s2
|
|
; GFX11-NEXT: v_cndmask_b32_e64 v7, v13, 0x7ff80000, s2
|
|
; GFX11-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX12-LABEL: v_maximum_v4f64:
|
|
; GFX12: ; %bb.0:
|
|
; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
|
|
; GFX12-NEXT: s_wait_expcnt 0x0
|
|
; GFX12-NEXT: s_wait_samplecnt 0x0
|
|
; GFX12-NEXT: s_wait_bvhcnt 0x0
|
|
; GFX12-NEXT: s_wait_kmcnt 0x0
|
|
; GFX12-NEXT: v_maximum_f64 v[0:1], v[0:1], v[8:9]
|
|
; GFX12-NEXT: v_maximum_f64 v[2:3], v[2:3], v[10:11]
|
|
; GFX12-NEXT: v_maximum_f64 v[4:5], v[4:5], v[12:13]
|
|
; GFX12-NEXT: v_maximum_f64 v[6:7], v[6:7], v[14:15]
|
|
; GFX12-NEXT: s_setpc_b64 s[30:31]
|
|
%op = call <4 x double> @llvm.maximum.v4f64(<4 x double> %src0, <4 x double> %src1)
|
|
ret <4 x double> %op
|
|
}
|
|
|
|
define <4 x double> @v_maximum_v4f64__nnan(<4 x double> %src0, <4 x double> %src1) {
|
|
; GFX7-LABEL: v_maximum_v4f64__nnan:
|
|
; GFX7: ; %bb.0:
|
|
; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX7-NEXT: v_max_f64 v[0:1], v[0:1], v[8:9]
|
|
; GFX7-NEXT: v_max_f64 v[2:3], v[2:3], v[10:11]
|
|
; GFX7-NEXT: v_max_f64 v[4:5], v[4:5], v[12:13]
|
|
; GFX7-NEXT: v_max_f64 v[6:7], v[6:7], v[14:15]
|
|
; GFX7-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX8-LABEL: v_maximum_v4f64__nnan:
|
|
; GFX8: ; %bb.0:
|
|
; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX8-NEXT: v_max_f64 v[0:1], v[0:1], v[8:9]
|
|
; GFX8-NEXT: v_max_f64 v[2:3], v[2:3], v[10:11]
|
|
; GFX8-NEXT: v_max_f64 v[4:5], v[4:5], v[12:13]
|
|
; GFX8-NEXT: v_max_f64 v[6:7], v[6:7], v[14:15]
|
|
; GFX8-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX9-LABEL: v_maximum_v4f64__nnan:
|
|
; GFX9: ; %bb.0:
|
|
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX9-NEXT: v_max_f64 v[0:1], v[0:1], v[8:9]
|
|
; GFX9-NEXT: v_max_f64 v[2:3], v[2:3], v[10:11]
|
|
; GFX9-NEXT: v_max_f64 v[4:5], v[4:5], v[12:13]
|
|
; GFX9-NEXT: v_max_f64 v[6:7], v[6:7], v[14:15]
|
|
; GFX9-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX10-LABEL: v_maximum_v4f64__nnan:
|
|
; GFX10: ; %bb.0:
|
|
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX10-NEXT: v_max_f64 v[0:1], v[0:1], v[8:9]
|
|
; GFX10-NEXT: v_max_f64 v[2:3], v[2:3], v[10:11]
|
|
; GFX10-NEXT: v_max_f64 v[4:5], v[4:5], v[12:13]
|
|
; GFX10-NEXT: v_max_f64 v[6:7], v[6:7], v[14:15]
|
|
; GFX10-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX11-LABEL: v_maximum_v4f64__nnan:
|
|
; GFX11: ; %bb.0:
|
|
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX11-NEXT: v_max_f64 v[0:1], v[0:1], v[8:9]
|
|
; GFX11-NEXT: v_max_f64 v[2:3], v[2:3], v[10:11]
|
|
; GFX11-NEXT: v_max_f64 v[4:5], v[4:5], v[12:13]
|
|
; GFX11-NEXT: v_max_f64 v[6:7], v[6:7], v[14:15]
|
|
; GFX11-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX12-LABEL: v_maximum_v4f64__nnan:
|
|
; GFX12: ; %bb.0:
|
|
; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
|
|
; GFX12-NEXT: s_wait_expcnt 0x0
|
|
; GFX12-NEXT: s_wait_samplecnt 0x0
|
|
; GFX12-NEXT: s_wait_bvhcnt 0x0
|
|
; GFX12-NEXT: s_wait_kmcnt 0x0
|
|
; GFX12-NEXT: v_maximum_f64 v[0:1], v[0:1], v[8:9]
|
|
; GFX12-NEXT: v_maximum_f64 v[2:3], v[2:3], v[10:11]
|
|
; GFX12-NEXT: v_maximum_f64 v[4:5], v[4:5], v[12:13]
|
|
; GFX12-NEXT: v_maximum_f64 v[6:7], v[6:7], v[14:15]
|
|
; GFX12-NEXT: s_setpc_b64 s[30:31]
|
|
%op = call nnan <4 x double> @llvm.maximum.v4f64(<4 x double> %src0, <4 x double> %src1)
|
|
ret <4 x double> %op
|
|
}
|
|
|
|
define <4 x double> @v_maximum_v4f64__nsz(<4 x double> %src0, <4 x double> %src1) {
|
|
; GFX7-LABEL: v_maximum_v4f64__nsz:
|
|
; GFX7: ; %bb.0:
|
|
; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX7-NEXT: v_max_f64 v[16:17], v[0:1], v[8:9]
|
|
; GFX7-NEXT: v_cmp_u_f64_e32 vcc, v[0:1], v[8:9]
|
|
; GFX7-NEXT: v_max_f64 v[8:9], v[2:3], v[10:11]
|
|
; GFX7-NEXT: v_cmp_u_f64_e64 s[4:5], v[2:3], v[10:11]
|
|
; GFX7-NEXT: v_max_f64 v[10:11], v[4:5], v[12:13]
|
|
; GFX7-NEXT: v_cmp_u_f64_e64 s[6:7], v[4:5], v[12:13]
|
|
; GFX7-NEXT: v_max_f64 v[12:13], v[6:7], v[14:15]
|
|
; GFX7-NEXT: v_cmp_u_f64_e64 s[8:9], v[6:7], v[14:15]
|
|
; GFX7-NEXT: v_mov_b32_e32 v7, 0x7ff80000
|
|
; GFX7-NEXT: v_cndmask_b32_e64 v0, v16, 0, vcc
|
|
; GFX7-NEXT: v_cndmask_b32_e32 v1, v17, v7, vcc
|
|
; GFX7-NEXT: v_cndmask_b32_e64 v2, v8, 0, s[4:5]
|
|
; GFX7-NEXT: v_cndmask_b32_e64 v3, v9, v7, s[4:5]
|
|
; GFX7-NEXT: v_cndmask_b32_e64 v4, v10, 0, s[6:7]
|
|
; GFX7-NEXT: v_cndmask_b32_e64 v5, v11, v7, s[6:7]
|
|
; GFX7-NEXT: v_cndmask_b32_e64 v6, v12, 0, s[8:9]
|
|
; GFX7-NEXT: v_cndmask_b32_e64 v7, v13, v7, s[8:9]
|
|
; GFX7-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX8-LABEL: v_maximum_v4f64__nsz:
|
|
; GFX8: ; %bb.0:
|
|
; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX8-NEXT: v_max_f64 v[16:17], v[0:1], v[8:9]
|
|
; GFX8-NEXT: v_cmp_u_f64_e32 vcc, v[0:1], v[8:9]
|
|
; GFX8-NEXT: v_max_f64 v[8:9], v[2:3], v[10:11]
|
|
; GFX8-NEXT: v_cmp_u_f64_e64 s[4:5], v[2:3], v[10:11]
|
|
; GFX8-NEXT: v_max_f64 v[10:11], v[4:5], v[12:13]
|
|
; GFX8-NEXT: v_cmp_u_f64_e64 s[6:7], v[4:5], v[12:13]
|
|
; GFX8-NEXT: v_max_f64 v[12:13], v[6:7], v[14:15]
|
|
; GFX8-NEXT: v_cmp_u_f64_e64 s[8:9], v[6:7], v[14:15]
|
|
; GFX8-NEXT: v_mov_b32_e32 v7, 0x7ff80000
|
|
; GFX8-NEXT: v_cndmask_b32_e64 v0, v16, 0, vcc
|
|
; GFX8-NEXT: v_cndmask_b32_e32 v1, v17, v7, vcc
|
|
; GFX8-NEXT: v_cndmask_b32_e64 v2, v8, 0, s[4:5]
|
|
; GFX8-NEXT: v_cndmask_b32_e64 v3, v9, v7, s[4:5]
|
|
; GFX8-NEXT: v_cndmask_b32_e64 v4, v10, 0, s[6:7]
|
|
; GFX8-NEXT: v_cndmask_b32_e64 v5, v11, v7, s[6:7]
|
|
; GFX8-NEXT: v_cndmask_b32_e64 v6, v12, 0, s[8:9]
|
|
; GFX8-NEXT: v_cndmask_b32_e64 v7, v13, v7, s[8:9]
|
|
; GFX8-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX900-LABEL: v_maximum_v4f64__nsz:
|
|
; GFX900: ; %bb.0:
|
|
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX900-NEXT: v_max_f64 v[16:17], v[0:1], v[8:9]
|
|
; GFX900-NEXT: v_cmp_u_f64_e32 vcc, v[0:1], v[8:9]
|
|
; GFX900-NEXT: v_max_f64 v[8:9], v[2:3], v[10:11]
|
|
; GFX900-NEXT: v_cmp_u_f64_e64 s[4:5], v[2:3], v[10:11]
|
|
; GFX900-NEXT: v_max_f64 v[10:11], v[4:5], v[12:13]
|
|
; GFX900-NEXT: v_cmp_u_f64_e64 s[6:7], v[4:5], v[12:13]
|
|
; GFX900-NEXT: v_max_f64 v[12:13], v[6:7], v[14:15]
|
|
; GFX900-NEXT: v_cmp_u_f64_e64 s[8:9], v[6:7], v[14:15]
|
|
; GFX900-NEXT: v_mov_b32_e32 v7, 0x7ff80000
|
|
; GFX900-NEXT: v_cndmask_b32_e64 v0, v16, 0, vcc
|
|
; GFX900-NEXT: v_cndmask_b32_e32 v1, v17, v7, vcc
|
|
; GFX900-NEXT: v_cndmask_b32_e64 v2, v8, 0, s[4:5]
|
|
; GFX900-NEXT: v_cndmask_b32_e64 v3, v9, v7, s[4:5]
|
|
; GFX900-NEXT: v_cndmask_b32_e64 v4, v10, 0, s[6:7]
|
|
; GFX900-NEXT: v_cndmask_b32_e64 v5, v11, v7, s[6:7]
|
|
; GFX900-NEXT: v_cndmask_b32_e64 v6, v12, 0, s[8:9]
|
|
; GFX900-NEXT: v_cndmask_b32_e64 v7, v13, v7, s[8:9]
|
|
; GFX900-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX950-LABEL: v_maximum_v4f64__nsz:
|
|
; GFX950: ; %bb.0:
|
|
; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX950-NEXT: v_max_f64 v[16:17], v[0:1], v[8:9]
|
|
; GFX950-NEXT: v_cmp_u_f64_e32 vcc, v[0:1], v[8:9]
|
|
; GFX950-NEXT: v_max_f64 v[8:9], v[2:3], v[10:11]
|
|
; GFX950-NEXT: s_nop 0
|
|
; GFX950-NEXT: v_cndmask_b32_e64 v0, v16, 0, vcc
|
|
; GFX950-NEXT: v_mov_b32_e32 v16, 0x7ff80000
|
|
; GFX950-NEXT: v_cndmask_b32_e32 v1, v17, v16, vcc
|
|
; GFX950-NEXT: v_cmp_u_f64_e32 vcc, v[2:3], v[10:11]
|
|
; GFX950-NEXT: s_nop 1
|
|
; GFX950-NEXT: v_cndmask_b32_e64 v2, v8, 0, vcc
|
|
; GFX950-NEXT: v_cndmask_b32_e32 v3, v9, v16, vcc
|
|
; GFX950-NEXT: v_max_f64 v[8:9], v[4:5], v[12:13]
|
|
; GFX950-NEXT: v_cmp_u_f64_e32 vcc, v[4:5], v[12:13]
|
|
; GFX950-NEXT: s_nop 1
|
|
; GFX950-NEXT: v_cndmask_b32_e64 v4, v8, 0, vcc
|
|
; GFX950-NEXT: v_cndmask_b32_e32 v5, v9, v16, vcc
|
|
; GFX950-NEXT: v_max_f64 v[8:9], v[6:7], v[14:15]
|
|
; GFX950-NEXT: v_cmp_u_f64_e32 vcc, v[6:7], v[14:15]
|
|
; GFX950-NEXT: s_nop 1
|
|
; GFX950-NEXT: v_cndmask_b32_e64 v6, v8, 0, vcc
|
|
; GFX950-NEXT: v_cndmask_b32_e32 v7, v9, v16, vcc
|
|
; GFX950-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX10-LABEL: v_maximum_v4f64__nsz:
|
|
; GFX10: ; %bb.0:
|
|
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX10-NEXT: v_max_f64 v[16:17], v[0:1], v[8:9]
|
|
; GFX10-NEXT: v_cmp_u_f64_e32 vcc_lo, v[0:1], v[8:9]
|
|
; GFX10-NEXT: v_max_f64 v[8:9], v[2:3], v[10:11]
|
|
; GFX10-NEXT: v_cmp_u_f64_e64 s4, v[2:3], v[10:11]
|
|
; GFX10-NEXT: v_max_f64 v[10:11], v[4:5], v[12:13]
|
|
; GFX10-NEXT: v_cmp_u_f64_e64 s5, v[4:5], v[12:13]
|
|
; GFX10-NEXT: v_max_f64 v[12:13], v[6:7], v[14:15]
|
|
; GFX10-NEXT: v_cmp_u_f64_e64 s6, v[6:7], v[14:15]
|
|
; GFX10-NEXT: v_cndmask_b32_e64 v0, v16, 0, vcc_lo
|
|
; GFX10-NEXT: v_cndmask_b32_e64 v1, v17, 0x7ff80000, vcc_lo
|
|
; GFX10-NEXT: v_cndmask_b32_e64 v2, v8, 0, s4
|
|
; GFX10-NEXT: v_cndmask_b32_e64 v3, v9, 0x7ff80000, s4
|
|
; GFX10-NEXT: v_cndmask_b32_e64 v4, v10, 0, s5
|
|
; GFX10-NEXT: v_cndmask_b32_e64 v5, v11, 0x7ff80000, s5
|
|
; GFX10-NEXT: v_cndmask_b32_e64 v6, v12, 0, s6
|
|
; GFX10-NEXT: v_cndmask_b32_e64 v7, v13, 0x7ff80000, s6
|
|
; GFX10-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX11-LABEL: v_maximum_v4f64__nsz:
|
|
; GFX11: ; %bb.0:
|
|
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX11-NEXT: v_max_f64 v[16:17], v[0:1], v[8:9]
|
|
; GFX11-NEXT: v_cmp_u_f64_e32 vcc_lo, v[0:1], v[8:9]
|
|
; GFX11-NEXT: v_max_f64 v[8:9], v[2:3], v[10:11]
|
|
; GFX11-NEXT: v_cmp_u_f64_e64 s0, v[2:3], v[10:11]
|
|
; GFX11-NEXT: v_max_f64 v[10:11], v[4:5], v[12:13]
|
|
; GFX11-NEXT: v_cmp_u_f64_e64 s1, v[4:5], v[12:13]
|
|
; GFX11-NEXT: v_max_f64 v[12:13], v[6:7], v[14:15]
|
|
; GFX11-NEXT: v_cmp_u_f64_e64 s2, v[6:7], v[14:15]
|
|
; GFX11-NEXT: v_cndmask_b32_e64 v0, v16, 0, vcc_lo
|
|
; GFX11-NEXT: v_cndmask_b32_e64 v1, v17, 0x7ff80000, vcc_lo
|
|
; GFX11-NEXT: v_cndmask_b32_e64 v2, v8, 0, s0
|
|
; GFX11-NEXT: v_cndmask_b32_e64 v3, v9, 0x7ff80000, s0
|
|
; GFX11-NEXT: v_cndmask_b32_e64 v4, v10, 0, s1
|
|
; GFX11-NEXT: v_cndmask_b32_e64 v5, v11, 0x7ff80000, s1
|
|
; GFX11-NEXT: v_cndmask_b32_e64 v6, v12, 0, s2
|
|
; GFX11-NEXT: v_cndmask_b32_e64 v7, v13, 0x7ff80000, s2
|
|
; GFX11-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX12-LABEL: v_maximum_v4f64__nsz:
|
|
; GFX12: ; %bb.0:
|
|
; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
|
|
; GFX12-NEXT: s_wait_expcnt 0x0
|
|
; GFX12-NEXT: s_wait_samplecnt 0x0
|
|
; GFX12-NEXT: s_wait_bvhcnt 0x0
|
|
; GFX12-NEXT: s_wait_kmcnt 0x0
|
|
; GFX12-NEXT: v_maximum_f64 v[0:1], v[0:1], v[8:9]
|
|
; GFX12-NEXT: v_maximum_f64 v[2:3], v[2:3], v[10:11]
|
|
; GFX12-NEXT: v_maximum_f64 v[4:5], v[4:5], v[12:13]
|
|
; GFX12-NEXT: v_maximum_f64 v[6:7], v[6:7], v[14:15]
|
|
; GFX12-NEXT: s_setpc_b64 s[30:31]
|
|
%op = call nsz <4 x double> @llvm.maximum.v4f64(<4 x double> %src0, <4 x double> %src1)
|
|
ret <4 x double> %op
|
|
}
|
|
|
|
define <4 x double> @v_maximum_v4f64__nnan_nsz(<4 x double> %src0, <4 x double> %src1) {
|
|
; GFX7-LABEL: v_maximum_v4f64__nnan_nsz:
|
|
; GFX7: ; %bb.0:
|
|
; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX7-NEXT: v_max_f64 v[0:1], v[0:1], v[8:9]
|
|
; GFX7-NEXT: v_max_f64 v[2:3], v[2:3], v[10:11]
|
|
; GFX7-NEXT: v_max_f64 v[4:5], v[4:5], v[12:13]
|
|
; GFX7-NEXT: v_max_f64 v[6:7], v[6:7], v[14:15]
|
|
; GFX7-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX8-LABEL: v_maximum_v4f64__nnan_nsz:
|
|
; GFX8: ; %bb.0:
|
|
; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX8-NEXT: v_max_f64 v[0:1], v[0:1], v[8:9]
|
|
; GFX8-NEXT: v_max_f64 v[2:3], v[2:3], v[10:11]
|
|
; GFX8-NEXT: v_max_f64 v[4:5], v[4:5], v[12:13]
|
|
; GFX8-NEXT: v_max_f64 v[6:7], v[6:7], v[14:15]
|
|
; GFX8-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX9-LABEL: v_maximum_v4f64__nnan_nsz:
|
|
; GFX9: ; %bb.0:
|
|
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX9-NEXT: v_max_f64 v[0:1], v[0:1], v[8:9]
|
|
; GFX9-NEXT: v_max_f64 v[2:3], v[2:3], v[10:11]
|
|
; GFX9-NEXT: v_max_f64 v[4:5], v[4:5], v[12:13]
|
|
; GFX9-NEXT: v_max_f64 v[6:7], v[6:7], v[14:15]
|
|
; GFX9-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX10-LABEL: v_maximum_v4f64__nnan_nsz:
|
|
; GFX10: ; %bb.0:
|
|
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX10-NEXT: v_max_f64 v[0:1], v[0:1], v[8:9]
|
|
; GFX10-NEXT: v_max_f64 v[2:3], v[2:3], v[10:11]
|
|
; GFX10-NEXT: v_max_f64 v[4:5], v[4:5], v[12:13]
|
|
; GFX10-NEXT: v_max_f64 v[6:7], v[6:7], v[14:15]
|
|
; GFX10-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX11-LABEL: v_maximum_v4f64__nnan_nsz:
|
|
; GFX11: ; %bb.0:
|
|
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX11-NEXT: v_max_f64 v[0:1], v[0:1], v[8:9]
|
|
; GFX11-NEXT: v_max_f64 v[2:3], v[2:3], v[10:11]
|
|
; GFX11-NEXT: v_max_f64 v[4:5], v[4:5], v[12:13]
|
|
; GFX11-NEXT: v_max_f64 v[6:7], v[6:7], v[14:15]
|
|
; GFX11-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX12-LABEL: v_maximum_v4f64__nnan_nsz:
|
|
; GFX12: ; %bb.0:
|
|
; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
|
|
; GFX12-NEXT: s_wait_expcnt 0x0
|
|
; GFX12-NEXT: s_wait_samplecnt 0x0
|
|
; GFX12-NEXT: s_wait_bvhcnt 0x0
|
|
; GFX12-NEXT: s_wait_kmcnt 0x0
|
|
; GFX12-NEXT: v_maximum_f64 v[0:1], v[0:1], v[8:9]
|
|
; GFX12-NEXT: v_maximum_f64 v[2:3], v[2:3], v[10:11]
|
|
; GFX12-NEXT: v_maximum_f64 v[4:5], v[4:5], v[12:13]
|
|
; GFX12-NEXT: v_maximum_f64 v[6:7], v[6:7], v[14:15]
|
|
; GFX12-NEXT: s_setpc_b64 s[30:31]
|
|
%op = call nnan nsz <4 x double> @llvm.maximum.v4f64(<4 x double> %src0, <4 x double> %src1)
|
|
ret <4 x double> %op
|
|
}
|
|
|
|
define <8 x double> @v_maximum_v8f64(<8 x double> %src0, <8 x double> %src1) {
|
|
; GFX7-LABEL: v_maximum_v8f64:
|
|
; GFX7: ; %bb.0:
|
|
; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX7-NEXT: buffer_load_dword v31, off, s[0:3], s32
|
|
; GFX7-NEXT: v_max_f64 v[32:33], v[0:1], v[16:17]
|
|
; GFX7-NEXT: v_cmp_u_f64_e32 vcc, v[0:1], v[16:17]
|
|
; GFX7-NEXT: v_max_f64 v[16:17], v[2:3], v[18:19]
|
|
; GFX7-NEXT: v_cmp_u_f64_e64 s[4:5], v[2:3], v[18:19]
|
|
; GFX7-NEXT: v_mov_b32_e32 v34, 0x7ff80000
|
|
; GFX7-NEXT: v_max_f64 v[18:19], v[4:5], v[20:21]
|
|
; GFX7-NEXT: v_cmp_u_f64_e64 s[6:7], v[4:5], v[20:21]
|
|
; GFX7-NEXT: v_max_f64 v[20:21], v[6:7], v[22:23]
|
|
; GFX7-NEXT: v_cmp_u_f64_e64 s[8:9], v[6:7], v[22:23]
|
|
; GFX7-NEXT: v_max_f64 v[22:23], v[8:9], v[24:25]
|
|
; GFX7-NEXT: v_cmp_u_f64_e64 s[10:11], v[8:9], v[24:25]
|
|
; GFX7-NEXT: v_max_f64 v[24:25], v[10:11], v[26:27]
|
|
; GFX7-NEXT: v_cmp_u_f64_e64 s[12:13], v[10:11], v[26:27]
|
|
; GFX7-NEXT: v_max_f64 v[26:27], v[12:13], v[28:29]
|
|
; GFX7-NEXT: v_cmp_u_f64_e64 s[14:15], v[12:13], v[28:29]
|
|
; GFX7-NEXT: v_cndmask_b32_e64 v0, v32, 0, vcc
|
|
; GFX7-NEXT: v_cndmask_b32_e32 v1, v33, v34, vcc
|
|
; GFX7-NEXT: v_cndmask_b32_e64 v2, v16, 0, s[4:5]
|
|
; GFX7-NEXT: v_cndmask_b32_e64 v3, v17, v34, s[4:5]
|
|
; GFX7-NEXT: v_cndmask_b32_e64 v4, v18, 0, s[6:7]
|
|
; GFX7-NEXT: v_cndmask_b32_e64 v5, v19, v34, s[6:7]
|
|
; GFX7-NEXT: v_cndmask_b32_e64 v6, v20, 0, s[8:9]
|
|
; GFX7-NEXT: v_cndmask_b32_e64 v7, v21, v34, s[8:9]
|
|
; GFX7-NEXT: v_cndmask_b32_e64 v8, v22, 0, s[10:11]
|
|
; GFX7-NEXT: v_cndmask_b32_e64 v9, v23, v34, s[10:11]
|
|
; GFX7-NEXT: v_cndmask_b32_e64 v10, v24, 0, s[12:13]
|
|
; GFX7-NEXT: v_cndmask_b32_e64 v11, v25, v34, s[12:13]
|
|
; GFX7-NEXT: v_cndmask_b32_e64 v12, v26, 0, s[14:15]
|
|
; GFX7-NEXT: v_cndmask_b32_e64 v13, v27, v34, s[14:15]
|
|
; GFX7-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX7-NEXT: v_max_f64 v[16:17], v[14:15], v[30:31]
|
|
; GFX7-NEXT: v_cmp_u_f64_e32 vcc, v[14:15], v[30:31]
|
|
; GFX7-NEXT: v_cndmask_b32_e64 v14, v16, 0, vcc
|
|
; GFX7-NEXT: v_cndmask_b32_e32 v15, v17, v34, vcc
|
|
; GFX7-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX8-LABEL: v_maximum_v8f64:
|
|
; GFX8: ; %bb.0:
|
|
; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX8-NEXT: buffer_load_dword v31, off, s[0:3], s32
|
|
; GFX8-NEXT: v_max_f64 v[32:33], v[0:1], v[16:17]
|
|
; GFX8-NEXT: v_cmp_u_f64_e32 vcc, v[0:1], v[16:17]
|
|
; GFX8-NEXT: v_max_f64 v[16:17], v[2:3], v[18:19]
|
|
; GFX8-NEXT: v_cmp_u_f64_e64 s[4:5], v[2:3], v[18:19]
|
|
; GFX8-NEXT: v_mov_b32_e32 v34, 0x7ff80000
|
|
; GFX8-NEXT: v_max_f64 v[18:19], v[4:5], v[20:21]
|
|
; GFX8-NEXT: v_cmp_u_f64_e64 s[6:7], v[4:5], v[20:21]
|
|
; GFX8-NEXT: v_max_f64 v[20:21], v[6:7], v[22:23]
|
|
; GFX8-NEXT: v_cmp_u_f64_e64 s[8:9], v[6:7], v[22:23]
|
|
; GFX8-NEXT: v_max_f64 v[22:23], v[8:9], v[24:25]
|
|
; GFX8-NEXT: v_cmp_u_f64_e64 s[10:11], v[8:9], v[24:25]
|
|
; GFX8-NEXT: v_max_f64 v[24:25], v[10:11], v[26:27]
|
|
; GFX8-NEXT: v_cmp_u_f64_e64 s[12:13], v[10:11], v[26:27]
|
|
; GFX8-NEXT: v_max_f64 v[26:27], v[12:13], v[28:29]
|
|
; GFX8-NEXT: v_cmp_u_f64_e64 s[14:15], v[12:13], v[28:29]
|
|
; GFX8-NEXT: v_cndmask_b32_e64 v0, v32, 0, vcc
|
|
; GFX8-NEXT: v_cndmask_b32_e32 v1, v33, v34, vcc
|
|
; GFX8-NEXT: v_cndmask_b32_e64 v2, v16, 0, s[4:5]
|
|
; GFX8-NEXT: v_cndmask_b32_e64 v3, v17, v34, s[4:5]
|
|
; GFX8-NEXT: v_cndmask_b32_e64 v4, v18, 0, s[6:7]
|
|
; GFX8-NEXT: v_cndmask_b32_e64 v5, v19, v34, s[6:7]
|
|
; GFX8-NEXT: v_cndmask_b32_e64 v6, v20, 0, s[8:9]
|
|
; GFX8-NEXT: v_cndmask_b32_e64 v7, v21, v34, s[8:9]
|
|
; GFX8-NEXT: v_cndmask_b32_e64 v8, v22, 0, s[10:11]
|
|
; GFX8-NEXT: v_cndmask_b32_e64 v9, v23, v34, s[10:11]
|
|
; GFX8-NEXT: v_cndmask_b32_e64 v10, v24, 0, s[12:13]
|
|
; GFX8-NEXT: v_cndmask_b32_e64 v11, v25, v34, s[12:13]
|
|
; GFX8-NEXT: v_cndmask_b32_e64 v12, v26, 0, s[14:15]
|
|
; GFX8-NEXT: v_cndmask_b32_e64 v13, v27, v34, s[14:15]
|
|
; GFX8-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX8-NEXT: v_max_f64 v[16:17], v[14:15], v[30:31]
|
|
; GFX8-NEXT: v_cmp_u_f64_e32 vcc, v[14:15], v[30:31]
|
|
; GFX8-NEXT: v_cndmask_b32_e64 v14, v16, 0, vcc
|
|
; GFX8-NEXT: v_cndmask_b32_e32 v15, v17, v34, vcc
|
|
; GFX8-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX900-LABEL: v_maximum_v8f64:
|
|
; GFX900: ; %bb.0:
|
|
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX900-NEXT: buffer_load_dword v31, off, s[0:3], s32
|
|
; GFX900-NEXT: v_max_f64 v[32:33], v[0:1], v[16:17]
|
|
; GFX900-NEXT: v_cmp_u_f64_e32 vcc, v[0:1], v[16:17]
|
|
; GFX900-NEXT: v_max_f64 v[16:17], v[2:3], v[18:19]
|
|
; GFX900-NEXT: v_cmp_u_f64_e64 s[4:5], v[2:3], v[18:19]
|
|
; GFX900-NEXT: v_mov_b32_e32 v34, 0x7ff80000
|
|
; GFX900-NEXT: v_max_f64 v[18:19], v[4:5], v[20:21]
|
|
; GFX900-NEXT: v_cmp_u_f64_e64 s[6:7], v[4:5], v[20:21]
|
|
; GFX900-NEXT: v_max_f64 v[20:21], v[6:7], v[22:23]
|
|
; GFX900-NEXT: v_cmp_u_f64_e64 s[8:9], v[6:7], v[22:23]
|
|
; GFX900-NEXT: v_max_f64 v[22:23], v[8:9], v[24:25]
|
|
; GFX900-NEXT: v_cmp_u_f64_e64 s[10:11], v[8:9], v[24:25]
|
|
; GFX900-NEXT: v_max_f64 v[24:25], v[10:11], v[26:27]
|
|
; GFX900-NEXT: v_cmp_u_f64_e64 s[12:13], v[10:11], v[26:27]
|
|
; GFX900-NEXT: v_max_f64 v[26:27], v[12:13], v[28:29]
|
|
; GFX900-NEXT: v_cmp_u_f64_e64 s[14:15], v[12:13], v[28:29]
|
|
; GFX900-NEXT: v_cndmask_b32_e64 v0, v32, 0, vcc
|
|
; GFX900-NEXT: v_cndmask_b32_e32 v1, v33, v34, vcc
|
|
; GFX900-NEXT: v_cndmask_b32_e64 v2, v16, 0, s[4:5]
|
|
; GFX900-NEXT: v_cndmask_b32_e64 v3, v17, v34, s[4:5]
|
|
; GFX900-NEXT: v_cndmask_b32_e64 v4, v18, 0, s[6:7]
|
|
; GFX900-NEXT: v_cndmask_b32_e64 v5, v19, v34, s[6:7]
|
|
; GFX900-NEXT: v_cndmask_b32_e64 v6, v20, 0, s[8:9]
|
|
; GFX900-NEXT: v_cndmask_b32_e64 v7, v21, v34, s[8:9]
|
|
; GFX900-NEXT: v_cndmask_b32_e64 v8, v22, 0, s[10:11]
|
|
; GFX900-NEXT: v_cndmask_b32_e64 v9, v23, v34, s[10:11]
|
|
; GFX900-NEXT: v_cndmask_b32_e64 v10, v24, 0, s[12:13]
|
|
; GFX900-NEXT: v_cndmask_b32_e64 v11, v25, v34, s[12:13]
|
|
; GFX900-NEXT: v_cndmask_b32_e64 v12, v26, 0, s[14:15]
|
|
; GFX900-NEXT: v_cndmask_b32_e64 v13, v27, v34, s[14:15]
|
|
; GFX900-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX900-NEXT: v_max_f64 v[16:17], v[14:15], v[30:31]
|
|
; GFX900-NEXT: v_cmp_u_f64_e32 vcc, v[14:15], v[30:31]
|
|
; GFX900-NEXT: v_cndmask_b32_e64 v14, v16, 0, vcc
|
|
; GFX900-NEXT: v_cndmask_b32_e32 v15, v17, v34, vcc
|
|
; GFX900-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX950-LABEL: v_maximum_v8f64:
|
|
; GFX950: ; %bb.0:
|
|
; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX950-NEXT: scratch_load_dword v31, off, s32
|
|
; GFX950-NEXT: v_mov_b32_e32 v54, 0x7ff80000
|
|
; GFX950-NEXT: v_max_f64 v[32:33], v[0:1], v[16:17]
|
|
; GFX950-NEXT: v_cmp_u_f64_e32 vcc, v[0:1], v[16:17]
|
|
; GFX950-NEXT: v_max_f64 v[34:35], v[2:3], v[18:19]
|
|
; GFX950-NEXT: v_max_f64 v[36:37], v[4:5], v[20:21]
|
|
; GFX950-NEXT: v_cndmask_b32_e64 v0, v32, 0, vcc
|
|
; GFX950-NEXT: v_cndmask_b32_e32 v1, v33, v54, vcc
|
|
; GFX950-NEXT: v_cmp_u_f64_e32 vcc, v[2:3], v[18:19]
|
|
; GFX950-NEXT: v_max_f64 v[38:39], v[6:7], v[22:23]
|
|
; GFX950-NEXT: v_max_f64 v[48:49], v[8:9], v[24:25]
|
|
; GFX950-NEXT: v_cndmask_b32_e64 v2, v34, 0, vcc
|
|
; GFX950-NEXT: v_cndmask_b32_e32 v3, v35, v54, vcc
|
|
; GFX950-NEXT: v_cmp_u_f64_e32 vcc, v[4:5], v[20:21]
|
|
; GFX950-NEXT: v_max_f64 v[50:51], v[10:11], v[26:27]
|
|
; GFX950-NEXT: v_max_f64 v[52:53], v[12:13], v[28:29]
|
|
; GFX950-NEXT: v_cndmask_b32_e64 v4, v36, 0, vcc
|
|
; GFX950-NEXT: v_cndmask_b32_e32 v5, v37, v54, vcc
|
|
; GFX950-NEXT: v_cmp_u_f64_e32 vcc, v[6:7], v[22:23]
|
|
; GFX950-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX950-NEXT: v_max_f64 v[16:17], v[14:15], v[30:31]
|
|
; GFX950-NEXT: v_cndmask_b32_e64 v6, v38, 0, vcc
|
|
; GFX950-NEXT: v_cndmask_b32_e32 v7, v39, v54, vcc
|
|
; GFX950-NEXT: v_cmp_u_f64_e32 vcc, v[8:9], v[24:25]
|
|
; GFX950-NEXT: s_nop 1
|
|
; GFX950-NEXT: v_cndmask_b32_e64 v8, v48, 0, vcc
|
|
; GFX950-NEXT: v_cndmask_b32_e32 v9, v49, v54, vcc
|
|
; GFX950-NEXT: v_cmp_u_f64_e32 vcc, v[10:11], v[26:27]
|
|
; GFX950-NEXT: s_nop 1
|
|
; GFX950-NEXT: v_cndmask_b32_e64 v10, v50, 0, vcc
|
|
; GFX950-NEXT: v_cndmask_b32_e32 v11, v51, v54, vcc
|
|
; GFX950-NEXT: v_cmp_u_f64_e32 vcc, v[12:13], v[28:29]
|
|
; GFX950-NEXT: s_nop 1
|
|
; GFX950-NEXT: v_cndmask_b32_e64 v12, v52, 0, vcc
|
|
; GFX950-NEXT: v_cndmask_b32_e32 v13, v53, v54, vcc
|
|
; GFX950-NEXT: v_cmp_u_f64_e32 vcc, v[14:15], v[30:31]
|
|
; GFX950-NEXT: s_nop 1
|
|
; GFX950-NEXT: v_cndmask_b32_e64 v14, v16, 0, vcc
|
|
; GFX950-NEXT: v_cndmask_b32_e32 v15, v17, v54, vcc
|
|
; GFX950-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX10-LABEL: v_maximum_v8f64:
|
|
; GFX10: ; %bb.0:
|
|
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX10-NEXT: buffer_load_dword v31, off, s[0:3], s32
|
|
; GFX10-NEXT: v_max_f64 v[32:33], v[0:1], v[16:17]
|
|
; GFX10-NEXT: v_cmp_u_f64_e32 vcc_lo, v[0:1], v[16:17]
|
|
; GFX10-NEXT: v_max_f64 v[16:17], v[2:3], v[18:19]
|
|
; GFX10-NEXT: v_cmp_u_f64_e64 s4, v[2:3], v[18:19]
|
|
; GFX10-NEXT: v_max_f64 v[18:19], v[4:5], v[20:21]
|
|
; GFX10-NEXT: v_cmp_u_f64_e64 s5, v[4:5], v[20:21]
|
|
; GFX10-NEXT: v_max_f64 v[20:21], v[6:7], v[22:23]
|
|
; GFX10-NEXT: v_cmp_u_f64_e64 s6, v[6:7], v[22:23]
|
|
; GFX10-NEXT: v_max_f64 v[22:23], v[8:9], v[24:25]
|
|
; GFX10-NEXT: v_cmp_u_f64_e64 s7, v[8:9], v[24:25]
|
|
; GFX10-NEXT: v_max_f64 v[24:25], v[10:11], v[26:27]
|
|
; GFX10-NEXT: v_cmp_u_f64_e64 s8, v[10:11], v[26:27]
|
|
; GFX10-NEXT: v_max_f64 v[26:27], v[12:13], v[28:29]
|
|
; GFX10-NEXT: v_cmp_u_f64_e64 s9, v[12:13], v[28:29]
|
|
; GFX10-NEXT: v_cndmask_b32_e64 v0, v32, 0, vcc_lo
|
|
; GFX10-NEXT: v_cndmask_b32_e64 v1, v33, 0x7ff80000, vcc_lo
|
|
; GFX10-NEXT: v_cndmask_b32_e64 v2, v16, 0, s4
|
|
; GFX10-NEXT: v_cndmask_b32_e64 v3, v17, 0x7ff80000, s4
|
|
; GFX10-NEXT: v_cndmask_b32_e64 v4, v18, 0, s5
|
|
; GFX10-NEXT: v_cndmask_b32_e64 v5, v19, 0x7ff80000, s5
|
|
; GFX10-NEXT: v_cndmask_b32_e64 v6, v20, 0, s6
|
|
; GFX10-NEXT: v_cndmask_b32_e64 v7, v21, 0x7ff80000, s6
|
|
; GFX10-NEXT: v_cndmask_b32_e64 v8, v22, 0, s7
|
|
; GFX10-NEXT: v_cndmask_b32_e64 v9, v23, 0x7ff80000, s7
|
|
; GFX10-NEXT: v_cndmask_b32_e64 v10, v24, 0, s8
|
|
; GFX10-NEXT: v_cndmask_b32_e64 v11, v25, 0x7ff80000, s8
|
|
; GFX10-NEXT: v_cndmask_b32_e64 v12, v26, 0, s9
|
|
; GFX10-NEXT: v_cndmask_b32_e64 v13, v27, 0x7ff80000, s9
|
|
; GFX10-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX10-NEXT: v_max_f64 v[28:29], v[14:15], v[30:31]
|
|
; GFX10-NEXT: v_cmp_u_f64_e64 s10, v[14:15], v[30:31]
|
|
; GFX10-NEXT: v_cndmask_b32_e64 v14, v28, 0, s10
|
|
; GFX10-NEXT: v_cndmask_b32_e64 v15, v29, 0x7ff80000, s10
|
|
; GFX10-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX11-LABEL: v_maximum_v8f64:
|
|
; GFX11: ; %bb.0:
|
|
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX11-NEXT: scratch_load_b32 v31, off, s32
|
|
; GFX11-NEXT: v_max_f64 v[32:33], v[0:1], v[16:17]
|
|
; GFX11-NEXT: v_cmp_u_f64_e32 vcc_lo, v[0:1], v[16:17]
|
|
; GFX11-NEXT: v_max_f64 v[16:17], v[2:3], v[18:19]
|
|
; GFX11-NEXT: v_cmp_u_f64_e64 s0, v[2:3], v[18:19]
|
|
; GFX11-NEXT: v_max_f64 v[18:19], v[4:5], v[20:21]
|
|
; GFX11-NEXT: v_cmp_u_f64_e64 s1, v[4:5], v[20:21]
|
|
; GFX11-NEXT: v_max_f64 v[20:21], v[6:7], v[22:23]
|
|
; GFX11-NEXT: v_cmp_u_f64_e64 s2, v[6:7], v[22:23]
|
|
; GFX11-NEXT: v_max_f64 v[22:23], v[8:9], v[24:25]
|
|
; GFX11-NEXT: v_cmp_u_f64_e64 s3, v[8:9], v[24:25]
|
|
; GFX11-NEXT: v_max_f64 v[24:25], v[10:11], v[26:27]
|
|
; GFX11-NEXT: v_cmp_u_f64_e64 s4, v[10:11], v[26:27]
|
|
; GFX11-NEXT: v_max_f64 v[26:27], v[12:13], v[28:29]
|
|
; GFX11-NEXT: v_cmp_u_f64_e64 s5, v[12:13], v[28:29]
|
|
; GFX11-NEXT: v_cndmask_b32_e64 v0, v32, 0, vcc_lo
|
|
; GFX11-NEXT: v_cndmask_b32_e64 v1, v33, 0x7ff80000, vcc_lo
|
|
; GFX11-NEXT: v_cndmask_b32_e64 v2, v16, 0, s0
|
|
; GFX11-NEXT: v_cndmask_b32_e64 v3, v17, 0x7ff80000, s0
|
|
; GFX11-NEXT: v_cndmask_b32_e64 v4, v18, 0, s1
|
|
; GFX11-NEXT: v_cndmask_b32_e64 v5, v19, 0x7ff80000, s1
|
|
; GFX11-NEXT: v_cndmask_b32_e64 v6, v20, 0, s2
|
|
; GFX11-NEXT: v_cndmask_b32_e64 v7, v21, 0x7ff80000, s2
|
|
; GFX11-NEXT: v_cndmask_b32_e64 v8, v22, 0, s3
|
|
; GFX11-NEXT: v_cndmask_b32_e64 v9, v23, 0x7ff80000, s3
|
|
; GFX11-NEXT: v_cndmask_b32_e64 v10, v24, 0, s4
|
|
; GFX11-NEXT: v_cndmask_b32_e64 v11, v25, 0x7ff80000, s4
|
|
; GFX11-NEXT: v_cndmask_b32_e64 v12, v26, 0, s5
|
|
; GFX11-NEXT: v_cndmask_b32_e64 v13, v27, 0x7ff80000, s5
|
|
; GFX11-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX11-NEXT: v_max_f64 v[28:29], v[14:15], v[30:31]
|
|
; GFX11-NEXT: v_cmp_u_f64_e64 s6, v[14:15], v[30:31]
|
|
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_3)
|
|
; GFX11-NEXT: v_cndmask_b32_e64 v14, v28, 0, s6
|
|
; GFX11-NEXT: v_cndmask_b32_e64 v15, v29, 0x7ff80000, s6
|
|
; GFX11-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX12-LABEL: v_maximum_v8f64:
|
|
; GFX12: ; %bb.0:
|
|
; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
|
|
; GFX12-NEXT: s_wait_expcnt 0x0
|
|
; GFX12-NEXT: s_wait_samplecnt 0x0
|
|
; GFX12-NEXT: s_wait_bvhcnt 0x0
|
|
; GFX12-NEXT: s_wait_kmcnt 0x0
|
|
; GFX12-NEXT: scratch_load_b32 v31, off, s32
|
|
; GFX12-NEXT: v_maximum_f64 v[0:1], v[0:1], v[16:17]
|
|
; GFX12-NEXT: v_maximum_f64 v[2:3], v[2:3], v[18:19]
|
|
; GFX12-NEXT: v_maximum_f64 v[4:5], v[4:5], v[20:21]
|
|
; GFX12-NEXT: v_maximum_f64 v[6:7], v[6:7], v[22:23]
|
|
; GFX12-NEXT: v_maximum_f64 v[8:9], v[8:9], v[24:25]
|
|
; GFX12-NEXT: v_maximum_f64 v[10:11], v[10:11], v[26:27]
|
|
; GFX12-NEXT: v_maximum_f64 v[12:13], v[12:13], v[28:29]
|
|
; GFX12-NEXT: s_wait_loadcnt 0x0
|
|
; GFX12-NEXT: v_maximum_f64 v[14:15], v[14:15], v[30:31]
|
|
; GFX12-NEXT: s_setpc_b64 s[30:31]
|
|
%op = call <8 x double> @llvm.maximum.v8f64(<8 x double> %src0, <8 x double> %src1)
|
|
ret <8 x double> %op
|
|
}
|
|
|
|
define <16 x double> @v_maximum_v16f64(<16 x double> %src0, <16 x double> %src1) {
|
|
; GFX7-LABEL: v_maximum_v16f64:
|
|
; GFX7: ; %bb.0:
|
|
; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX7-NEXT: buffer_load_dword v32, off, s[0:3], s32 offset:8
|
|
; GFX7-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:4
|
|
; GFX7-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX7-NEXT: v_cmp_u_f64_e32 vcc, v[0:1], v[31:32]
|
|
; GFX7-NEXT: v_max_f64 v[0:1], v[0:1], v[31:32]
|
|
; GFX7-NEXT: buffer_load_dword v32, off, s[0:3], s32 offset:16
|
|
; GFX7-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:12
|
|
; GFX7-NEXT: v_cndmask_b32_e64 v0, v0, 0, vcc
|
|
; GFX7-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX7-NEXT: v_cmp_u_f64_e64 s[4:5], v[2:3], v[31:32]
|
|
; GFX7-NEXT: v_max_f64 v[2:3], v[2:3], v[31:32]
|
|
; GFX7-NEXT: buffer_load_dword v32, off, s[0:3], s32 offset:24
|
|
; GFX7-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:20
|
|
; GFX7-NEXT: v_cndmask_b32_e64 v2, v2, 0, s[4:5]
|
|
; GFX7-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX7-NEXT: v_cmp_u_f64_e64 s[6:7], v[4:5], v[31:32]
|
|
; GFX7-NEXT: v_max_f64 v[4:5], v[4:5], v[31:32]
|
|
; GFX7-NEXT: buffer_load_dword v32, off, s[0:3], s32 offset:32
|
|
; GFX7-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:28
|
|
; GFX7-NEXT: v_cndmask_b32_e64 v4, v4, 0, s[6:7]
|
|
; GFX7-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX7-NEXT: v_cmp_u_f64_e64 s[8:9], v[6:7], v[31:32]
|
|
; GFX7-NEXT: v_max_f64 v[6:7], v[6:7], v[31:32]
|
|
; GFX7-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:36
|
|
; GFX7-NEXT: buffer_load_dword v32, off, s[0:3], s32 offset:40
|
|
; GFX7-NEXT: v_cndmask_b32_e64 v6, v6, 0, s[8:9]
|
|
; GFX7-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX7-NEXT: v_cmp_u_f64_e64 s[10:11], v[8:9], v[31:32]
|
|
; GFX7-NEXT: v_max_f64 v[8:9], v[8:9], v[31:32]
|
|
; GFX7-NEXT: buffer_load_dword v32, off, s[0:3], s32 offset:48
|
|
; GFX7-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:44
|
|
; GFX7-NEXT: v_cndmask_b32_e64 v8, v8, 0, s[10:11]
|
|
; GFX7-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX7-NEXT: v_cmp_u_f64_e64 s[12:13], v[10:11], v[31:32]
|
|
; GFX7-NEXT: v_max_f64 v[10:11], v[10:11], v[31:32]
|
|
; GFX7-NEXT: buffer_load_dword v32, off, s[0:3], s32 offset:56
|
|
; GFX7-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:52
|
|
; GFX7-NEXT: v_cndmask_b32_e64 v10, v10, 0, s[12:13]
|
|
; GFX7-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX7-NEXT: v_cmp_u_f64_e64 s[14:15], v[12:13], v[31:32]
|
|
; GFX7-NEXT: v_max_f64 v[12:13], v[12:13], v[31:32]
|
|
; GFX7-NEXT: buffer_load_dword v32, off, s[0:3], s32 offset:64
|
|
; GFX7-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:60
|
|
; GFX7-NEXT: v_cndmask_b32_e64 v12, v12, 0, s[14:15]
|
|
; GFX7-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX7-NEXT: v_cmp_u_f64_e64 s[16:17], v[14:15], v[31:32]
|
|
; GFX7-NEXT: v_max_f64 v[14:15], v[14:15], v[31:32]
|
|
; GFX7-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:68
|
|
; GFX7-NEXT: buffer_load_dword v32, off, s[0:3], s32 offset:72
|
|
; GFX7-NEXT: v_cndmask_b32_e64 v14, v14, 0, s[16:17]
|
|
; GFX7-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX7-NEXT: v_cmp_u_f64_e64 s[18:19], v[16:17], v[31:32]
|
|
; GFX7-NEXT: v_max_f64 v[16:17], v[16:17], v[31:32]
|
|
; GFX7-NEXT: buffer_load_dword v32, off, s[0:3], s32 offset:80
|
|
; GFX7-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:76
|
|
; GFX7-NEXT: v_cndmask_b32_e64 v16, v16, 0, s[18:19]
|
|
; GFX7-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX7-NEXT: v_cmp_u_f64_e64 s[20:21], v[18:19], v[31:32]
|
|
; GFX7-NEXT: v_max_f64 v[18:19], v[18:19], v[31:32]
|
|
; GFX7-NEXT: buffer_load_dword v32, off, s[0:3], s32 offset:88
|
|
; GFX7-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:84
|
|
; GFX7-NEXT: v_cndmask_b32_e64 v18, v18, 0, s[20:21]
|
|
; GFX7-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX7-NEXT: v_cmp_u_f64_e64 s[22:23], v[20:21], v[31:32]
|
|
; GFX7-NEXT: v_max_f64 v[20:21], v[20:21], v[31:32]
|
|
; GFX7-NEXT: buffer_load_dword v32, off, s[0:3], s32 offset:96
|
|
; GFX7-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:92
|
|
; GFX7-NEXT: v_cndmask_b32_e64 v20, v20, 0, s[22:23]
|
|
; GFX7-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX7-NEXT: v_cmp_u_f64_e64 s[24:25], v[22:23], v[31:32]
|
|
; GFX7-NEXT: v_max_f64 v[22:23], v[22:23], v[31:32]
|
|
; GFX7-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:100
|
|
; GFX7-NEXT: buffer_load_dword v32, off, s[0:3], s32 offset:104
|
|
; GFX7-NEXT: v_cndmask_b32_e64 v22, v22, 0, s[24:25]
|
|
; GFX7-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX7-NEXT: v_cmp_u_f64_e64 s[26:27], v[24:25], v[31:32]
|
|
; GFX7-NEXT: v_max_f64 v[24:25], v[24:25], v[31:32]
|
|
; GFX7-NEXT: buffer_load_dword v32, off, s[0:3], s32 offset:112
|
|
; GFX7-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:108
|
|
; GFX7-NEXT: v_cndmask_b32_e64 v24, v24, 0, s[26:27]
|
|
; GFX7-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX7-NEXT: v_cmp_u_f64_e64 s[28:29], v[26:27], v[31:32]
|
|
; GFX7-NEXT: v_max_f64 v[26:27], v[26:27], v[31:32]
|
|
; GFX7-NEXT: buffer_load_dword v32, off, s[0:3], s32 offset:120
|
|
; GFX7-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:116
|
|
; GFX7-NEXT: v_cndmask_b32_e64 v26, v26, 0, s[28:29]
|
|
; GFX7-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX7-NEXT: v_cmp_u_f64_e64 s[40:41], v[28:29], v[31:32]
|
|
; GFX7-NEXT: v_max_f64 v[28:29], v[28:29], v[31:32]
|
|
; GFX7-NEXT: buffer_load_dword v31, off, s[0:3], s32
|
|
; GFX7-NEXT: buffer_load_dword v33, off, s[0:3], s32 offset:128
|
|
; GFX7-NEXT: buffer_load_dword v32, off, s[0:3], s32 offset:124
|
|
; GFX7-NEXT: v_cndmask_b32_e64 v28, v28, 0, s[40:41]
|
|
; GFX7-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX7-NEXT: v_cmp_u_f64_e64 s[42:43], v[30:31], v[32:33]
|
|
; GFX7-NEXT: v_max_f64 v[30:31], v[30:31], v[32:33]
|
|
; GFX7-NEXT: v_mov_b32_e32 v32, 0x7ff80000
|
|
; GFX7-NEXT: v_cndmask_b32_e32 v1, v1, v32, vcc
|
|
; GFX7-NEXT: v_cndmask_b32_e64 v3, v3, v32, s[4:5]
|
|
; GFX7-NEXT: v_cndmask_b32_e64 v5, v5, v32, s[6:7]
|
|
; GFX7-NEXT: v_cndmask_b32_e64 v7, v7, v32, s[8:9]
|
|
; GFX7-NEXT: v_cndmask_b32_e64 v9, v9, v32, s[10:11]
|
|
; GFX7-NEXT: v_cndmask_b32_e64 v11, v11, v32, s[12:13]
|
|
; GFX7-NEXT: v_cndmask_b32_e64 v13, v13, v32, s[14:15]
|
|
; GFX7-NEXT: v_cndmask_b32_e64 v15, v15, v32, s[16:17]
|
|
; GFX7-NEXT: v_cndmask_b32_e64 v17, v17, v32, s[18:19]
|
|
; GFX7-NEXT: v_cndmask_b32_e64 v19, v19, v32, s[20:21]
|
|
; GFX7-NEXT: v_cndmask_b32_e64 v21, v21, v32, s[22:23]
|
|
; GFX7-NEXT: v_cndmask_b32_e64 v23, v23, v32, s[24:25]
|
|
; GFX7-NEXT: v_cndmask_b32_e64 v25, v25, v32, s[26:27]
|
|
; GFX7-NEXT: v_cndmask_b32_e64 v27, v27, v32, s[28:29]
|
|
; GFX7-NEXT: v_cndmask_b32_e64 v29, v29, v32, s[40:41]
|
|
; GFX7-NEXT: v_cndmask_b32_e64 v31, v31, v32, s[42:43]
|
|
; GFX7-NEXT: v_cndmask_b32_e64 v30, v30, 0, s[42:43]
|
|
; GFX7-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX8-LABEL: v_maximum_v16f64:
|
|
; GFX8: ; %bb.0:
|
|
; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX8-NEXT: buffer_load_dword v32, off, s[0:3], s32 offset:8
|
|
; GFX8-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:4
|
|
; GFX8-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX8-NEXT: v_cmp_u_f64_e32 vcc, v[0:1], v[31:32]
|
|
; GFX8-NEXT: v_max_f64 v[0:1], v[0:1], v[31:32]
|
|
; GFX8-NEXT: buffer_load_dword v32, off, s[0:3], s32 offset:16
|
|
; GFX8-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:12
|
|
; GFX8-NEXT: v_cndmask_b32_e64 v0, v0, 0, vcc
|
|
; GFX8-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX8-NEXT: v_cmp_u_f64_e64 s[4:5], v[2:3], v[31:32]
|
|
; GFX8-NEXT: v_max_f64 v[2:3], v[2:3], v[31:32]
|
|
; GFX8-NEXT: buffer_load_dword v32, off, s[0:3], s32 offset:24
|
|
; GFX8-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:20
|
|
; GFX8-NEXT: v_cndmask_b32_e64 v2, v2, 0, s[4:5]
|
|
; GFX8-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX8-NEXT: v_cmp_u_f64_e64 s[6:7], v[4:5], v[31:32]
|
|
; GFX8-NEXT: v_max_f64 v[4:5], v[4:5], v[31:32]
|
|
; GFX8-NEXT: buffer_load_dword v32, off, s[0:3], s32 offset:32
|
|
; GFX8-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:28
|
|
; GFX8-NEXT: v_cndmask_b32_e64 v4, v4, 0, s[6:7]
|
|
; GFX8-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX8-NEXT: v_cmp_u_f64_e64 s[8:9], v[6:7], v[31:32]
|
|
; GFX8-NEXT: v_max_f64 v[6:7], v[6:7], v[31:32]
|
|
; GFX8-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:36
|
|
; GFX8-NEXT: buffer_load_dword v32, off, s[0:3], s32 offset:40
|
|
; GFX8-NEXT: v_cndmask_b32_e64 v6, v6, 0, s[8:9]
|
|
; GFX8-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX8-NEXT: v_cmp_u_f64_e64 s[10:11], v[8:9], v[31:32]
|
|
; GFX8-NEXT: v_max_f64 v[8:9], v[8:9], v[31:32]
|
|
; GFX8-NEXT: buffer_load_dword v32, off, s[0:3], s32 offset:48
|
|
; GFX8-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:44
|
|
; GFX8-NEXT: v_cndmask_b32_e64 v8, v8, 0, s[10:11]
|
|
; GFX8-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX8-NEXT: v_cmp_u_f64_e64 s[12:13], v[10:11], v[31:32]
|
|
; GFX8-NEXT: v_max_f64 v[10:11], v[10:11], v[31:32]
|
|
; GFX8-NEXT: buffer_load_dword v32, off, s[0:3], s32 offset:56
|
|
; GFX8-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:52
|
|
; GFX8-NEXT: v_cndmask_b32_e64 v10, v10, 0, s[12:13]
|
|
; GFX8-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX8-NEXT: v_cmp_u_f64_e64 s[14:15], v[12:13], v[31:32]
|
|
; GFX8-NEXT: v_max_f64 v[12:13], v[12:13], v[31:32]
|
|
; GFX8-NEXT: buffer_load_dword v32, off, s[0:3], s32 offset:64
|
|
; GFX8-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:60
|
|
; GFX8-NEXT: v_cndmask_b32_e64 v12, v12, 0, s[14:15]
|
|
; GFX8-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX8-NEXT: v_cmp_u_f64_e64 s[16:17], v[14:15], v[31:32]
|
|
; GFX8-NEXT: v_max_f64 v[14:15], v[14:15], v[31:32]
|
|
; GFX8-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:68
|
|
; GFX8-NEXT: buffer_load_dword v32, off, s[0:3], s32 offset:72
|
|
; GFX8-NEXT: v_cndmask_b32_e64 v14, v14, 0, s[16:17]
|
|
; GFX8-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX8-NEXT: v_cmp_u_f64_e64 s[18:19], v[16:17], v[31:32]
|
|
; GFX8-NEXT: v_max_f64 v[16:17], v[16:17], v[31:32]
|
|
; GFX8-NEXT: buffer_load_dword v32, off, s[0:3], s32 offset:80
|
|
; GFX8-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:76
|
|
; GFX8-NEXT: v_cndmask_b32_e64 v16, v16, 0, s[18:19]
|
|
; GFX8-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX8-NEXT: v_cmp_u_f64_e64 s[20:21], v[18:19], v[31:32]
|
|
; GFX8-NEXT: v_max_f64 v[18:19], v[18:19], v[31:32]
|
|
; GFX8-NEXT: buffer_load_dword v32, off, s[0:3], s32 offset:88
|
|
; GFX8-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:84
|
|
; GFX8-NEXT: v_cndmask_b32_e64 v18, v18, 0, s[20:21]
|
|
; GFX8-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX8-NEXT: v_cmp_u_f64_e64 s[22:23], v[20:21], v[31:32]
|
|
; GFX8-NEXT: v_max_f64 v[20:21], v[20:21], v[31:32]
|
|
; GFX8-NEXT: buffer_load_dword v32, off, s[0:3], s32 offset:96
|
|
; GFX8-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:92
|
|
; GFX8-NEXT: v_cndmask_b32_e64 v20, v20, 0, s[22:23]
|
|
; GFX8-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX8-NEXT: v_cmp_u_f64_e64 s[24:25], v[22:23], v[31:32]
|
|
; GFX8-NEXT: v_max_f64 v[22:23], v[22:23], v[31:32]
|
|
; GFX8-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:100
|
|
; GFX8-NEXT: buffer_load_dword v32, off, s[0:3], s32 offset:104
|
|
; GFX8-NEXT: v_cndmask_b32_e64 v22, v22, 0, s[24:25]
|
|
; GFX8-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX8-NEXT: v_cmp_u_f64_e64 s[26:27], v[24:25], v[31:32]
|
|
; GFX8-NEXT: v_max_f64 v[24:25], v[24:25], v[31:32]
|
|
; GFX8-NEXT: buffer_load_dword v32, off, s[0:3], s32 offset:112
|
|
; GFX8-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:108
|
|
; GFX8-NEXT: v_cndmask_b32_e64 v24, v24, 0, s[26:27]
|
|
; GFX8-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX8-NEXT: v_cmp_u_f64_e64 s[28:29], v[26:27], v[31:32]
|
|
; GFX8-NEXT: v_max_f64 v[26:27], v[26:27], v[31:32]
|
|
; GFX8-NEXT: buffer_load_dword v32, off, s[0:3], s32 offset:120
|
|
; GFX8-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:116
|
|
; GFX8-NEXT: v_cndmask_b32_e64 v26, v26, 0, s[28:29]
|
|
; GFX8-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX8-NEXT: v_cmp_u_f64_e64 s[40:41], v[28:29], v[31:32]
|
|
; GFX8-NEXT: v_max_f64 v[28:29], v[28:29], v[31:32]
|
|
; GFX8-NEXT: buffer_load_dword v31, off, s[0:3], s32
|
|
; GFX8-NEXT: buffer_load_dword v33, off, s[0:3], s32 offset:128
|
|
; GFX8-NEXT: buffer_load_dword v32, off, s[0:3], s32 offset:124
|
|
; GFX8-NEXT: v_cndmask_b32_e64 v28, v28, 0, s[40:41]
|
|
; GFX8-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX8-NEXT: v_cmp_u_f64_e64 s[42:43], v[30:31], v[32:33]
|
|
; GFX8-NEXT: v_max_f64 v[30:31], v[30:31], v[32:33]
|
|
; GFX8-NEXT: v_mov_b32_e32 v32, 0x7ff80000
|
|
; GFX8-NEXT: v_cndmask_b32_e32 v1, v1, v32, vcc
|
|
; GFX8-NEXT: v_cndmask_b32_e64 v3, v3, v32, s[4:5]
|
|
; GFX8-NEXT: v_cndmask_b32_e64 v5, v5, v32, s[6:7]
|
|
; GFX8-NEXT: v_cndmask_b32_e64 v7, v7, v32, s[8:9]
|
|
; GFX8-NEXT: v_cndmask_b32_e64 v9, v9, v32, s[10:11]
|
|
; GFX8-NEXT: v_cndmask_b32_e64 v11, v11, v32, s[12:13]
|
|
; GFX8-NEXT: v_cndmask_b32_e64 v13, v13, v32, s[14:15]
|
|
; GFX8-NEXT: v_cndmask_b32_e64 v15, v15, v32, s[16:17]
|
|
; GFX8-NEXT: v_cndmask_b32_e64 v17, v17, v32, s[18:19]
|
|
; GFX8-NEXT: v_cndmask_b32_e64 v19, v19, v32, s[20:21]
|
|
; GFX8-NEXT: v_cndmask_b32_e64 v21, v21, v32, s[22:23]
|
|
; GFX8-NEXT: v_cndmask_b32_e64 v23, v23, v32, s[24:25]
|
|
; GFX8-NEXT: v_cndmask_b32_e64 v25, v25, v32, s[26:27]
|
|
; GFX8-NEXT: v_cndmask_b32_e64 v27, v27, v32, s[28:29]
|
|
; GFX8-NEXT: v_cndmask_b32_e64 v29, v29, v32, s[40:41]
|
|
; GFX8-NEXT: v_cndmask_b32_e64 v31, v31, v32, s[42:43]
|
|
; GFX8-NEXT: v_cndmask_b32_e64 v30, v30, 0, s[42:43]
|
|
; GFX8-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX900-LABEL: v_maximum_v16f64:
|
|
; GFX900: ; %bb.0:
|
|
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX900-NEXT: buffer_load_dword v32, off, s[0:3], s32 offset:8
|
|
; GFX900-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:4
|
|
; GFX900-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX900-NEXT: v_cmp_u_f64_e32 vcc, v[0:1], v[31:32]
|
|
; GFX900-NEXT: v_max_f64 v[0:1], v[0:1], v[31:32]
|
|
; GFX900-NEXT: buffer_load_dword v32, off, s[0:3], s32 offset:16
|
|
; GFX900-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:12
|
|
; GFX900-NEXT: v_cndmask_b32_e64 v0, v0, 0, vcc
|
|
; GFX900-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX900-NEXT: v_cmp_u_f64_e64 s[4:5], v[2:3], v[31:32]
|
|
; GFX900-NEXT: v_max_f64 v[2:3], v[2:3], v[31:32]
|
|
; GFX900-NEXT: buffer_load_dword v32, off, s[0:3], s32 offset:24
|
|
; GFX900-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:20
|
|
; GFX900-NEXT: v_cndmask_b32_e64 v2, v2, 0, s[4:5]
|
|
; GFX900-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX900-NEXT: v_cmp_u_f64_e64 s[6:7], v[4:5], v[31:32]
|
|
; GFX900-NEXT: v_max_f64 v[4:5], v[4:5], v[31:32]
|
|
; GFX900-NEXT: buffer_load_dword v32, off, s[0:3], s32 offset:32
|
|
; GFX900-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:28
|
|
; GFX900-NEXT: v_cndmask_b32_e64 v4, v4, 0, s[6:7]
|
|
; GFX900-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX900-NEXT: v_cmp_u_f64_e64 s[8:9], v[6:7], v[31:32]
|
|
; GFX900-NEXT: v_max_f64 v[6:7], v[6:7], v[31:32]
|
|
; GFX900-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:36
|
|
; GFX900-NEXT: buffer_load_dword v32, off, s[0:3], s32 offset:40
|
|
; GFX900-NEXT: v_cndmask_b32_e64 v6, v6, 0, s[8:9]
|
|
; GFX900-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX900-NEXT: v_cmp_u_f64_e64 s[10:11], v[8:9], v[31:32]
|
|
; GFX900-NEXT: v_max_f64 v[8:9], v[8:9], v[31:32]
|
|
; GFX900-NEXT: buffer_load_dword v32, off, s[0:3], s32 offset:48
|
|
; GFX900-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:44
|
|
; GFX900-NEXT: v_cndmask_b32_e64 v8, v8, 0, s[10:11]
|
|
; GFX900-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX900-NEXT: v_cmp_u_f64_e64 s[12:13], v[10:11], v[31:32]
|
|
; GFX900-NEXT: v_max_f64 v[10:11], v[10:11], v[31:32]
|
|
; GFX900-NEXT: buffer_load_dword v32, off, s[0:3], s32 offset:56
|
|
; GFX900-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:52
|
|
; GFX900-NEXT: v_cndmask_b32_e64 v10, v10, 0, s[12:13]
|
|
; GFX900-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX900-NEXT: v_cmp_u_f64_e64 s[14:15], v[12:13], v[31:32]
|
|
; GFX900-NEXT: v_max_f64 v[12:13], v[12:13], v[31:32]
|
|
; GFX900-NEXT: buffer_load_dword v32, off, s[0:3], s32 offset:64
|
|
; GFX900-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:60
|
|
; GFX900-NEXT: v_cndmask_b32_e64 v12, v12, 0, s[14:15]
|
|
; GFX900-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX900-NEXT: v_cmp_u_f64_e64 s[16:17], v[14:15], v[31:32]
|
|
; GFX900-NEXT: v_max_f64 v[14:15], v[14:15], v[31:32]
|
|
; GFX900-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:68
|
|
; GFX900-NEXT: buffer_load_dword v32, off, s[0:3], s32 offset:72
|
|
; GFX900-NEXT: v_cndmask_b32_e64 v14, v14, 0, s[16:17]
|
|
; GFX900-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX900-NEXT: v_cmp_u_f64_e64 s[18:19], v[16:17], v[31:32]
|
|
; GFX900-NEXT: v_max_f64 v[16:17], v[16:17], v[31:32]
|
|
; GFX900-NEXT: buffer_load_dword v32, off, s[0:3], s32 offset:80
|
|
; GFX900-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:76
|
|
; GFX900-NEXT: v_cndmask_b32_e64 v16, v16, 0, s[18:19]
|
|
; GFX900-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX900-NEXT: v_cmp_u_f64_e64 s[20:21], v[18:19], v[31:32]
|
|
; GFX900-NEXT: v_max_f64 v[18:19], v[18:19], v[31:32]
|
|
; GFX900-NEXT: buffer_load_dword v32, off, s[0:3], s32 offset:88
|
|
; GFX900-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:84
|
|
; GFX900-NEXT: v_cndmask_b32_e64 v18, v18, 0, s[20:21]
|
|
; GFX900-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX900-NEXT: v_cmp_u_f64_e64 s[22:23], v[20:21], v[31:32]
|
|
; GFX900-NEXT: v_max_f64 v[20:21], v[20:21], v[31:32]
|
|
; GFX900-NEXT: buffer_load_dword v32, off, s[0:3], s32 offset:96
|
|
; GFX900-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:92
|
|
; GFX900-NEXT: v_cndmask_b32_e64 v20, v20, 0, s[22:23]
|
|
; GFX900-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX900-NEXT: v_cmp_u_f64_e64 s[24:25], v[22:23], v[31:32]
|
|
; GFX900-NEXT: v_max_f64 v[22:23], v[22:23], v[31:32]
|
|
; GFX900-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:100
|
|
; GFX900-NEXT: buffer_load_dword v32, off, s[0:3], s32 offset:104
|
|
; GFX900-NEXT: v_cndmask_b32_e64 v22, v22, 0, s[24:25]
|
|
; GFX900-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX900-NEXT: v_cmp_u_f64_e64 s[26:27], v[24:25], v[31:32]
|
|
; GFX900-NEXT: v_max_f64 v[24:25], v[24:25], v[31:32]
|
|
; GFX900-NEXT: buffer_load_dword v32, off, s[0:3], s32 offset:112
|
|
; GFX900-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:108
|
|
; GFX900-NEXT: v_cndmask_b32_e64 v24, v24, 0, s[26:27]
|
|
; GFX900-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX900-NEXT: v_cmp_u_f64_e64 s[28:29], v[26:27], v[31:32]
|
|
; GFX900-NEXT: v_max_f64 v[26:27], v[26:27], v[31:32]
|
|
; GFX900-NEXT: buffer_load_dword v32, off, s[0:3], s32 offset:120
|
|
; GFX900-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:116
|
|
; GFX900-NEXT: v_cndmask_b32_e64 v26, v26, 0, s[28:29]
|
|
; GFX900-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX900-NEXT: v_cmp_u_f64_e64 s[40:41], v[28:29], v[31:32]
|
|
; GFX900-NEXT: v_max_f64 v[28:29], v[28:29], v[31:32]
|
|
; GFX900-NEXT: buffer_load_dword v31, off, s[0:3], s32
|
|
; GFX900-NEXT: buffer_load_dword v33, off, s[0:3], s32 offset:128
|
|
; GFX900-NEXT: buffer_load_dword v32, off, s[0:3], s32 offset:124
|
|
; GFX900-NEXT: v_cndmask_b32_e64 v28, v28, 0, s[40:41]
|
|
; GFX900-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX900-NEXT: v_cmp_u_f64_e64 s[42:43], v[30:31], v[32:33]
|
|
; GFX900-NEXT: v_max_f64 v[30:31], v[30:31], v[32:33]
|
|
; GFX900-NEXT: v_mov_b32_e32 v32, 0x7ff80000
|
|
; GFX900-NEXT: v_cndmask_b32_e32 v1, v1, v32, vcc
|
|
; GFX900-NEXT: v_cndmask_b32_e64 v3, v3, v32, s[4:5]
|
|
; GFX900-NEXT: v_cndmask_b32_e64 v5, v5, v32, s[6:7]
|
|
; GFX900-NEXT: v_cndmask_b32_e64 v7, v7, v32, s[8:9]
|
|
; GFX900-NEXT: v_cndmask_b32_e64 v9, v9, v32, s[10:11]
|
|
; GFX900-NEXT: v_cndmask_b32_e64 v11, v11, v32, s[12:13]
|
|
; GFX900-NEXT: v_cndmask_b32_e64 v13, v13, v32, s[14:15]
|
|
; GFX900-NEXT: v_cndmask_b32_e64 v15, v15, v32, s[16:17]
|
|
; GFX900-NEXT: v_cndmask_b32_e64 v17, v17, v32, s[18:19]
|
|
; GFX900-NEXT: v_cndmask_b32_e64 v19, v19, v32, s[20:21]
|
|
; GFX900-NEXT: v_cndmask_b32_e64 v21, v21, v32, s[22:23]
|
|
; GFX900-NEXT: v_cndmask_b32_e64 v23, v23, v32, s[24:25]
|
|
; GFX900-NEXT: v_cndmask_b32_e64 v25, v25, v32, s[26:27]
|
|
; GFX900-NEXT: v_cndmask_b32_e64 v27, v27, v32, s[28:29]
|
|
; GFX900-NEXT: v_cndmask_b32_e64 v29, v29, v32, s[40:41]
|
|
; GFX900-NEXT: v_cndmask_b32_e64 v31, v31, v32, s[42:43]
|
|
; GFX900-NEXT: v_cndmask_b32_e64 v30, v30, 0, s[42:43]
|
|
; GFX900-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX950-LABEL: v_maximum_v16f64:
|
|
; GFX950: ; %bb.0:
|
|
; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX950-NEXT: v_accvgpr_write_b32 a0, v40 ; Reload Reuse
|
|
; GFX950-NEXT: v_accvgpr_write_b32 a1, v41 ; Reload Reuse
|
|
; GFX950-NEXT: v_accvgpr_write_b32 a2, v42 ; Reload Reuse
|
|
; GFX950-NEXT: v_accvgpr_write_b32 a3, v43 ; Reload Reuse
|
|
; GFX950-NEXT: v_accvgpr_write_b32 a4, v44 ; Reload Reuse
|
|
; GFX950-NEXT: v_accvgpr_write_b32 a5, v45 ; Reload Reuse
|
|
; GFX950-NEXT: v_accvgpr_write_b32 a6, v46 ; Reload Reuse
|
|
; GFX950-NEXT: v_accvgpr_write_b32 a7, v47 ; Reload Reuse
|
|
; GFX950-NEXT: v_accvgpr_write_b32 a8, v56 ; Reload Reuse
|
|
; GFX950-NEXT: v_accvgpr_write_b32 a9, v57 ; Reload Reuse
|
|
; GFX950-NEXT: scratch_load_dword v33, off, s32 offset:8
|
|
; GFX950-NEXT: scratch_load_dword v32, off, s32 offset:4
|
|
; GFX950-NEXT: scratch_load_dword v37, off, s32 offset:16
|
|
; GFX950-NEXT: scratch_load_dword v36, off, s32 offset:12
|
|
; GFX950-NEXT: scratch_load_dword v39, off, s32 offset:24
|
|
; GFX950-NEXT: scratch_load_dword v38, off, s32 offset:20
|
|
; GFX950-NEXT: scratch_load_dword v57, off, s32 offset:32
|
|
; GFX950-NEXT: scratch_load_dword v56, off, s32 offset:28
|
|
; GFX950-NEXT: scratch_load_dword v47, off, s32 offset:40
|
|
; GFX950-NEXT: scratch_load_dword v46, off, s32 offset:36
|
|
; GFX950-NEXT: scratch_load_dword v45, off, s32 offset:48
|
|
; GFX950-NEXT: scratch_load_dword v44, off, s32 offset:44
|
|
; GFX950-NEXT: scratch_load_dword v43, off, s32 offset:56
|
|
; GFX950-NEXT: scratch_load_dword v42, off, s32 offset:52
|
|
; GFX950-NEXT: scratch_load_dword v41, off, s32 offset:64
|
|
; GFX950-NEXT: scratch_load_dword v40, off, s32 offset:60
|
|
; GFX950-NEXT: scratch_load_dword v55, off, s32 offset:72
|
|
; GFX950-NEXT: scratch_load_dword v54, off, s32 offset:68
|
|
; GFX950-NEXT: scratch_load_dword v53, off, s32 offset:80
|
|
; GFX950-NEXT: scratch_load_dword v52, off, s32 offset:76
|
|
; GFX950-NEXT: scratch_load_dword v51, off, s32 offset:88
|
|
; GFX950-NEXT: scratch_load_dword v50, off, s32 offset:84
|
|
; GFX950-NEXT: scratch_load_dword v49, off, s32 offset:96
|
|
; GFX950-NEXT: scratch_load_dword v48, off, s32 offset:92
|
|
; GFX950-NEXT: scratch_load_dword v31, off, s32
|
|
; GFX950-NEXT: scratch_load_dword v35, off, s32 offset:104
|
|
; GFX950-NEXT: scratch_load_dword v34, off, s32 offset:100
|
|
; GFX950-NEXT: v_accvgpr_write_b32 a10, v58 ; Reload Reuse
|
|
; GFX950-NEXT: v_accvgpr_write_b32 a11, v59 ; Reload Reuse
|
|
; GFX950-NEXT: v_accvgpr_write_b32 a12, v60 ; Reload Reuse
|
|
; GFX950-NEXT: v_accvgpr_write_b32 a13, v61 ; Reload Reuse
|
|
; GFX950-NEXT: v_accvgpr_write_b32 a14, v62 ; Reload Reuse
|
|
; GFX950-NEXT: v_accvgpr_write_b32 a15, v63 ; Reload Reuse
|
|
; GFX950-NEXT: s_waitcnt vmcnt(25)
|
|
; GFX950-NEXT: v_max_f64 v[58:59], v[0:1], v[32:33]
|
|
; GFX950-NEXT: v_cmp_u_f64_e32 vcc, v[0:1], v[32:33]
|
|
; GFX950-NEXT: scratch_load_dword v33, off, s32 offset:112
|
|
; GFX950-NEXT: scratch_load_dword v32, off, s32 offset:108
|
|
; GFX950-NEXT: s_waitcnt vmcnt(25)
|
|
; GFX950-NEXT: v_max_f64 v[60:61], v[2:3], v[36:37]
|
|
; GFX950-NEXT: v_cmp_u_f64_e64 s[0:1], v[2:3], v[36:37]
|
|
; GFX950-NEXT: scratch_load_dword v37, off, s32 offset:120
|
|
; GFX950-NEXT: scratch_load_dword v36, off, s32 offset:116
|
|
; GFX950-NEXT: s_waitcnt vmcnt(25)
|
|
; GFX950-NEXT: v_max_f64 v[62:63], v[4:5], v[38:39]
|
|
; GFX950-NEXT: v_cmp_u_f64_e64 s[2:3], v[4:5], v[38:39]
|
|
; GFX950-NEXT: scratch_load_dword v39, off, s32 offset:128
|
|
; GFX950-NEXT: scratch_load_dword v38, off, s32 offset:124
|
|
; GFX950-NEXT: v_mov_b32_e32 v2, 0x7ff80000
|
|
; GFX950-NEXT: s_waitcnt vmcnt(25)
|
|
; GFX950-NEXT: v_max_f64 v[0:1], v[6:7], v[56:57]
|
|
; GFX950-NEXT: v_cmp_u_f64_e64 s[4:5], v[6:7], v[56:57]
|
|
; GFX950-NEXT: s_waitcnt vmcnt(23)
|
|
; GFX950-NEXT: v_max_f64 v[56:57], v[8:9], v[46:47]
|
|
; GFX950-NEXT: v_cndmask_b32_e64 v58, v58, 0, vcc
|
|
; GFX950-NEXT: v_cndmask_b32_e32 v59, v59, v2, vcc
|
|
; GFX950-NEXT: v_cmp_u_f64_e32 vcc, v[8:9], v[46:47]
|
|
; GFX950-NEXT: v_cndmask_b32_e64 v6, v0, 0, s[4:5]
|
|
; GFX950-NEXT: v_cndmask_b32_e64 v7, v1, v2, s[4:5]
|
|
; GFX950-NEXT: v_cndmask_b32_e64 v8, v56, 0, vcc
|
|
; GFX950-NEXT: v_cndmask_b32_e32 v9, v57, v2, vcc
|
|
; GFX950-NEXT: s_waitcnt vmcnt(21)
|
|
; GFX950-NEXT: v_max_f64 v[0:1], v[10:11], v[44:45]
|
|
; GFX950-NEXT: v_cmp_u_f64_e32 vcc, v[10:11], v[44:45]
|
|
; GFX950-NEXT: v_cndmask_b32_e64 v60, v60, 0, s[0:1]
|
|
; GFX950-NEXT: v_cndmask_b32_e64 v3, v61, v2, s[0:1]
|
|
; GFX950-NEXT: v_cndmask_b32_e64 v10, v0, 0, vcc
|
|
; GFX950-NEXT: v_cndmask_b32_e32 v11, v1, v2, vcc
|
|
; GFX950-NEXT: s_waitcnt vmcnt(19)
|
|
; GFX950-NEXT: v_max_f64 v[0:1], v[12:13], v[42:43]
|
|
; GFX950-NEXT: v_cmp_u_f64_e32 vcc, v[12:13], v[42:43]
|
|
; GFX950-NEXT: v_cndmask_b32_e64 v4, v62, 0, s[2:3]
|
|
; GFX950-NEXT: v_cndmask_b32_e64 v5, v63, v2, s[2:3]
|
|
; GFX950-NEXT: v_cndmask_b32_e64 v12, v0, 0, vcc
|
|
; GFX950-NEXT: v_cndmask_b32_e32 v13, v1, v2, vcc
|
|
; GFX950-NEXT: s_waitcnt vmcnt(17)
|
|
; GFX950-NEXT: v_max_f64 v[0:1], v[14:15], v[40:41]
|
|
; GFX950-NEXT: v_cmp_u_f64_e32 vcc, v[14:15], v[40:41]
|
|
; GFX950-NEXT: v_accvgpr_read_b32 v63, a15 ; Reload Reuse
|
|
; GFX950-NEXT: v_accvgpr_read_b32 v62, a14 ; Reload Reuse
|
|
; GFX950-NEXT: v_cndmask_b32_e64 v14, v0, 0, vcc
|
|
; GFX950-NEXT: v_cndmask_b32_e32 v15, v1, v2, vcc
|
|
; GFX950-NEXT: s_waitcnt vmcnt(15)
|
|
; GFX950-NEXT: v_max_f64 v[0:1], v[16:17], v[54:55]
|
|
; GFX950-NEXT: v_cmp_u_f64_e32 vcc, v[16:17], v[54:55]
|
|
; GFX950-NEXT: v_accvgpr_read_b32 v61, a13 ; Reload Reuse
|
|
; GFX950-NEXT: v_accvgpr_read_b32 v57, a9 ; Reload Reuse
|
|
; GFX950-NEXT: v_cndmask_b32_e64 v16, v0, 0, vcc
|
|
; GFX950-NEXT: v_cndmask_b32_e32 v17, v1, v2, vcc
|
|
; GFX950-NEXT: s_waitcnt vmcnt(13)
|
|
; GFX950-NEXT: v_max_f64 v[0:1], v[18:19], v[52:53]
|
|
; GFX950-NEXT: v_cmp_u_f64_e32 vcc, v[18:19], v[52:53]
|
|
; GFX950-NEXT: v_accvgpr_read_b32 v56, a8 ; Reload Reuse
|
|
; GFX950-NEXT: v_accvgpr_read_b32 v47, a7 ; Reload Reuse
|
|
; GFX950-NEXT: v_cndmask_b32_e64 v18, v0, 0, vcc
|
|
; GFX950-NEXT: v_cndmask_b32_e32 v19, v1, v2, vcc
|
|
; GFX950-NEXT: s_waitcnt vmcnt(11)
|
|
; GFX950-NEXT: v_max_f64 v[0:1], v[20:21], v[50:51]
|
|
; GFX950-NEXT: v_cmp_u_f64_e32 vcc, v[20:21], v[50:51]
|
|
; GFX950-NEXT: v_accvgpr_read_b32 v46, a6 ; Reload Reuse
|
|
; GFX950-NEXT: v_accvgpr_read_b32 v45, a5 ; Reload Reuse
|
|
; GFX950-NEXT: v_cndmask_b32_e64 v20, v0, 0, vcc
|
|
; GFX950-NEXT: v_cndmask_b32_e32 v21, v1, v2, vcc
|
|
; GFX950-NEXT: s_waitcnt vmcnt(9)
|
|
; GFX950-NEXT: v_max_f64 v[0:1], v[22:23], v[48:49]
|
|
; GFX950-NEXT: v_cmp_u_f64_e32 vcc, v[22:23], v[48:49]
|
|
; GFX950-NEXT: v_accvgpr_read_b32 v44, a4 ; Reload Reuse
|
|
; GFX950-NEXT: v_accvgpr_read_b32 v43, a3 ; Reload Reuse
|
|
; GFX950-NEXT: v_cndmask_b32_e64 v22, v0, 0, vcc
|
|
; GFX950-NEXT: v_cndmask_b32_e32 v23, v1, v2, vcc
|
|
; GFX950-NEXT: s_waitcnt vmcnt(6)
|
|
; GFX950-NEXT: v_max_f64 v[0:1], v[24:25], v[34:35]
|
|
; GFX950-NEXT: v_cmp_u_f64_e32 vcc, v[24:25], v[34:35]
|
|
; GFX950-NEXT: v_accvgpr_read_b32 v42, a2 ; Reload Reuse
|
|
; GFX950-NEXT: v_accvgpr_read_b32 v41, a1 ; Reload Reuse
|
|
; GFX950-NEXT: v_cndmask_b32_e64 v24, v0, 0, vcc
|
|
; GFX950-NEXT: v_cndmask_b32_e32 v25, v1, v2, vcc
|
|
; GFX950-NEXT: v_accvgpr_read_b32 v40, a0 ; Reload Reuse
|
|
; GFX950-NEXT: s_waitcnt vmcnt(4)
|
|
; GFX950-NEXT: v_max_f64 v[0:1], v[26:27], v[32:33]
|
|
; GFX950-NEXT: v_cmp_u_f64_e32 vcc, v[26:27], v[32:33]
|
|
; GFX950-NEXT: s_nop 1
|
|
; GFX950-NEXT: v_cndmask_b32_e64 v26, v0, 0, vcc
|
|
; GFX950-NEXT: v_cndmask_b32_e32 v27, v1, v2, vcc
|
|
; GFX950-NEXT: s_waitcnt vmcnt(2)
|
|
; GFX950-NEXT: v_max_f64 v[0:1], v[28:29], v[36:37]
|
|
; GFX950-NEXT: v_cmp_u_f64_e32 vcc, v[28:29], v[36:37]
|
|
; GFX950-NEXT: s_nop 1
|
|
; GFX950-NEXT: v_cndmask_b32_e64 v28, v0, 0, vcc
|
|
; GFX950-NEXT: v_cndmask_b32_e32 v29, v1, v2, vcc
|
|
; GFX950-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX950-NEXT: v_max_f64 v[0:1], v[30:31], v[38:39]
|
|
; GFX950-NEXT: v_cmp_u_f64_e32 vcc, v[30:31], v[38:39]
|
|
; GFX950-NEXT: s_nop 1
|
|
; GFX950-NEXT: v_cndmask_b32_e64 v30, v0, 0, vcc
|
|
; GFX950-NEXT: v_cndmask_b32_e32 v31, v1, v2, vcc
|
|
; GFX950-NEXT: v_mov_b32_e32 v0, v58
|
|
; GFX950-NEXT: v_mov_b32_e32 v1, v59
|
|
; GFX950-NEXT: v_mov_b32_e32 v2, v60
|
|
; GFX950-NEXT: v_accvgpr_read_b32 v60, a12 ; Reload Reuse
|
|
; GFX950-NEXT: v_accvgpr_read_b32 v59, a11 ; Reload Reuse
|
|
; GFX950-NEXT: v_accvgpr_read_b32 v58, a10 ; Reload Reuse
|
|
; GFX950-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX10-LABEL: v_maximum_v16f64:
|
|
; GFX10: ; %bb.0:
|
|
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX10-NEXT: s_clause 0x18
|
|
; GFX10-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:4
|
|
; GFX10-NEXT: buffer_load_dword v32, off, s[0:3], s32 offset:8
|
|
; GFX10-NEXT: buffer_load_dword v34, off, s[0:3], s32 offset:16
|
|
; GFX10-NEXT: buffer_load_dword v33, off, s[0:3], s32 offset:12
|
|
; GFX10-NEXT: buffer_load_dword v36, off, s[0:3], s32 offset:24
|
|
; GFX10-NEXT: buffer_load_dword v35, off, s[0:3], s32 offset:20
|
|
; GFX10-NEXT: buffer_load_dword v37, off, s[0:3], s32 offset:36
|
|
; GFX10-NEXT: buffer_load_dword v49, off, s[0:3], s32 offset:32
|
|
; GFX10-NEXT: buffer_load_dword v48, off, s[0:3], s32 offset:28
|
|
; GFX10-NEXT: buffer_load_dword v50, off, s[0:3], s32 offset:68
|
|
; GFX10-NEXT: buffer_load_dword v53, off, s[0:3], s32 offset:64
|
|
; GFX10-NEXT: buffer_load_dword v52, off, s[0:3], s32 offset:60
|
|
; GFX10-NEXT: buffer_load_dword v55, off, s[0:3], s32 offset:56
|
|
; GFX10-NEXT: buffer_load_dword v54, off, s[0:3], s32 offset:52
|
|
; GFX10-NEXT: buffer_load_dword v65, off, s[0:3], s32 offset:48
|
|
; GFX10-NEXT: buffer_load_dword v64, off, s[0:3], s32 offset:44
|
|
; GFX10-NEXT: buffer_load_dword v38, off, s[0:3], s32 offset:40
|
|
; GFX10-NEXT: buffer_load_dword v66, off, s[0:3], s32 offset:100
|
|
; GFX10-NEXT: buffer_load_dword v69, off, s[0:3], s32 offset:96
|
|
; GFX10-NEXT: buffer_load_dword v68, off, s[0:3], s32 offset:92
|
|
; GFX10-NEXT: buffer_load_dword v71, off, s[0:3], s32 offset:88
|
|
; GFX10-NEXT: buffer_load_dword v70, off, s[0:3], s32 offset:84
|
|
; GFX10-NEXT: buffer_load_dword v81, off, s[0:3], s32 offset:80
|
|
; GFX10-NEXT: buffer_load_dword v80, off, s[0:3], s32 offset:76
|
|
; GFX10-NEXT: buffer_load_dword v51, off, s[0:3], s32 offset:72
|
|
; GFX10-NEXT: s_waitcnt vmcnt(23)
|
|
; GFX10-NEXT: v_max_f64 v[82:83], v[0:1], v[31:32]
|
|
; GFX10-NEXT: v_cmp_u_f64_e32 vcc_lo, v[0:1], v[31:32]
|
|
; GFX10-NEXT: s_waitcnt vmcnt(21)
|
|
; GFX10-NEXT: v_max_f64 v[84:85], v[2:3], v[33:34]
|
|
; GFX10-NEXT: v_cmp_u_f64_e64 s4, v[2:3], v[33:34]
|
|
; GFX10-NEXT: s_waitcnt vmcnt(19)
|
|
; GFX10-NEXT: v_max_f64 v[32:33], v[4:5], v[35:36]
|
|
; GFX10-NEXT: v_cmp_u_f64_e64 s5, v[4:5], v[35:36]
|
|
; GFX10-NEXT: s_clause 0x7
|
|
; GFX10-NEXT: buffer_load_dword v1, off, s[0:3], s32 offset:112
|
|
; GFX10-NEXT: buffer_load_dword v67, off, s[0:3], s32 offset:104
|
|
; GFX10-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:108
|
|
; GFX10-NEXT: buffer_load_dword v3, off, s[0:3], s32 offset:120
|
|
; GFX10-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:116
|
|
; GFX10-NEXT: buffer_load_dword v31, off, s[0:3], s32
|
|
; GFX10-NEXT: buffer_load_dword v5, off, s[0:3], s32 offset:128
|
|
; GFX10-NEXT: buffer_load_dword v4, off, s[0:3], s32 offset:124
|
|
; GFX10-NEXT: s_waitcnt vmcnt(24)
|
|
; GFX10-NEXT: v_max_f64 v[34:35], v[6:7], v[48:49]
|
|
; GFX10-NEXT: v_cmp_u_f64_e64 s6, v[6:7], v[48:49]
|
|
; GFX10-NEXT: s_waitcnt vmcnt(21)
|
|
; GFX10-NEXT: v_cmp_u_f64_e64 s10, v[14:15], v[52:53]
|
|
; GFX10-NEXT: s_waitcnt vmcnt(19)
|
|
; GFX10-NEXT: v_cmp_u_f64_e64 s9, v[12:13], v[54:55]
|
|
; GFX10-NEXT: s_waitcnt vmcnt(17)
|
|
; GFX10-NEXT: v_cmp_u_f64_e64 s8, v[10:11], v[64:65]
|
|
; GFX10-NEXT: s_waitcnt vmcnt(16)
|
|
; GFX10-NEXT: v_max_f64 v[48:49], v[8:9], v[37:38]
|
|
; GFX10-NEXT: v_cmp_u_f64_e64 s7, v[8:9], v[37:38]
|
|
; GFX10-NEXT: v_max_f64 v[36:37], v[10:11], v[64:65]
|
|
; GFX10-NEXT: v_max_f64 v[38:39], v[12:13], v[54:55]
|
|
; GFX10-NEXT: v_max_f64 v[54:55], v[14:15], v[52:53]
|
|
; GFX10-NEXT: s_waitcnt vmcnt(11)
|
|
; GFX10-NEXT: v_max_f64 v[64:65], v[20:21], v[70:71]
|
|
; GFX10-NEXT: v_cmp_u_f64_e64 s13, v[20:21], v[70:71]
|
|
; GFX10-NEXT: s_waitcnt vmcnt(9)
|
|
; GFX10-NEXT: v_cmp_u_f64_e64 s12, v[18:19], v[80:81]
|
|
; GFX10-NEXT: s_waitcnt vmcnt(8)
|
|
; GFX10-NEXT: v_max_f64 v[52:53], v[16:17], v[50:51]
|
|
; GFX10-NEXT: v_cmp_u_f64_e64 s11, v[16:17], v[50:51]
|
|
; GFX10-NEXT: v_max_f64 v[50:51], v[18:19], v[80:81]
|
|
; GFX10-NEXT: v_max_f64 v[70:71], v[22:23], v[68:69]
|
|
; GFX10-NEXT: v_cmp_u_f64_e64 s14, v[22:23], v[68:69]
|
|
; GFX10-NEXT: v_cndmask_b32_e64 v6, v34, 0, s6
|
|
; GFX10-NEXT: v_cndmask_b32_e64 v7, v35, 0x7ff80000, s6
|
|
; GFX10-NEXT: v_cndmask_b32_e64 v8, v48, 0, s7
|
|
; GFX10-NEXT: v_cndmask_b32_e64 v9, v49, 0x7ff80000, s7
|
|
; GFX10-NEXT: v_cndmask_b32_e64 v10, v36, 0, s8
|
|
; GFX10-NEXT: v_cndmask_b32_e64 v11, v37, 0x7ff80000, s8
|
|
; GFX10-NEXT: v_cndmask_b32_e64 v12, v38, 0, s9
|
|
; GFX10-NEXT: v_cndmask_b32_e64 v13, v39, 0x7ff80000, s9
|
|
; GFX10-NEXT: v_cndmask_b32_e64 v14, v54, 0, s10
|
|
; GFX10-NEXT: v_cndmask_b32_e64 v15, v55, 0x7ff80000, s10
|
|
; GFX10-NEXT: v_cndmask_b32_e64 v16, v52, 0, s11
|
|
; GFX10-NEXT: v_cndmask_b32_e64 v17, v53, 0x7ff80000, s11
|
|
; GFX10-NEXT: v_cndmask_b32_e64 v18, v50, 0, s12
|
|
; GFX10-NEXT: v_cndmask_b32_e64 v19, v51, 0x7ff80000, s12
|
|
; GFX10-NEXT: v_cndmask_b32_e64 v20, v64, 0, s13
|
|
; GFX10-NEXT: v_cndmask_b32_e64 v21, v65, 0x7ff80000, s13
|
|
; GFX10-NEXT: v_cndmask_b32_e64 v22, v70, 0, s14
|
|
; GFX10-NEXT: v_cndmask_b32_e64 v23, v71, 0x7ff80000, s14
|
|
; GFX10-NEXT: s_waitcnt vmcnt(6)
|
|
; GFX10-NEXT: v_max_f64 v[68:69], v[24:25], v[66:67]
|
|
; GFX10-NEXT: v_cmp_u_f64_e64 s15, v[24:25], v[66:67]
|
|
; GFX10-NEXT: s_waitcnt vmcnt(5)
|
|
; GFX10-NEXT: v_max_f64 v[66:67], v[26:27], v[0:1]
|
|
; GFX10-NEXT: v_cmp_u_f64_e64 s16, v[26:27], v[0:1]
|
|
; GFX10-NEXT: s_waitcnt vmcnt(3)
|
|
; GFX10-NEXT: v_max_f64 v[80:81], v[28:29], v[2:3]
|
|
; GFX10-NEXT: v_cmp_u_f64_e64 s17, v[28:29], v[2:3]
|
|
; GFX10-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX10-NEXT: v_max_f64 v[86:87], v[30:31], v[4:5]
|
|
; GFX10-NEXT: v_cmp_u_f64_e64 s18, v[30:31], v[4:5]
|
|
; GFX10-NEXT: v_cndmask_b32_e64 v0, v82, 0, vcc_lo
|
|
; GFX10-NEXT: v_cndmask_b32_e64 v1, v83, 0x7ff80000, vcc_lo
|
|
; GFX10-NEXT: v_cndmask_b32_e64 v2, v84, 0, s4
|
|
; GFX10-NEXT: v_cndmask_b32_e64 v3, v85, 0x7ff80000, s4
|
|
; GFX10-NEXT: v_cndmask_b32_e64 v4, v32, 0, s5
|
|
; GFX10-NEXT: v_cndmask_b32_e64 v5, v33, 0x7ff80000, s5
|
|
; GFX10-NEXT: v_cndmask_b32_e64 v24, v68, 0, s15
|
|
; GFX10-NEXT: v_cndmask_b32_e64 v25, v69, 0x7ff80000, s15
|
|
; GFX10-NEXT: v_cndmask_b32_e64 v26, v66, 0, s16
|
|
; GFX10-NEXT: v_cndmask_b32_e64 v27, v67, 0x7ff80000, s16
|
|
; GFX10-NEXT: v_cndmask_b32_e64 v28, v80, 0, s17
|
|
; GFX10-NEXT: v_cndmask_b32_e64 v29, v81, 0x7ff80000, s17
|
|
; GFX10-NEXT: v_cndmask_b32_e64 v30, v86, 0, s18
|
|
; GFX10-NEXT: v_cndmask_b32_e64 v31, v87, 0x7ff80000, s18
|
|
; GFX10-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX11-LABEL: v_maximum_v16f64:
|
|
; GFX11: ; %bb.0:
|
|
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX11-NEXT: s_clause 0x1f
|
|
; GFX11-NEXT: scratch_load_b32 v31, off, s32
|
|
; GFX11-NEXT: scratch_load_b32 v33, off, s32 offset:8
|
|
; GFX11-NEXT: scratch_load_b32 v32, off, s32 offset:4
|
|
; GFX11-NEXT: scratch_load_b32 v35, off, s32 offset:16
|
|
; GFX11-NEXT: scratch_load_b32 v34, off, s32 offset:12
|
|
; GFX11-NEXT: scratch_load_b32 v37, off, s32 offset:24
|
|
; GFX11-NEXT: scratch_load_b32 v36, off, s32 offset:20
|
|
; GFX11-NEXT: scratch_load_b32 v39, off, s32 offset:32
|
|
; GFX11-NEXT: scratch_load_b32 v38, off, s32 offset:28
|
|
; GFX11-NEXT: scratch_load_b32 v49, off, s32 offset:40
|
|
; GFX11-NEXT: scratch_load_b32 v48, off, s32 offset:36
|
|
; GFX11-NEXT: scratch_load_b32 v51, off, s32 offset:48
|
|
; GFX11-NEXT: scratch_load_b32 v50, off, s32 offset:44
|
|
; GFX11-NEXT: scratch_load_b32 v53, off, s32 offset:56
|
|
; GFX11-NEXT: scratch_load_b32 v52, off, s32 offset:52
|
|
; GFX11-NEXT: scratch_load_b32 v55, off, s32 offset:64
|
|
; GFX11-NEXT: scratch_load_b32 v54, off, s32 offset:60
|
|
; GFX11-NEXT: scratch_load_b32 v65, off, s32 offset:72
|
|
; GFX11-NEXT: scratch_load_b32 v64, off, s32 offset:68
|
|
; GFX11-NEXT: scratch_load_b32 v67, off, s32 offset:80
|
|
; GFX11-NEXT: scratch_load_b32 v66, off, s32 offset:76
|
|
; GFX11-NEXT: scratch_load_b32 v69, off, s32 offset:88
|
|
; GFX11-NEXT: scratch_load_b32 v68, off, s32 offset:84
|
|
; GFX11-NEXT: scratch_load_b32 v71, off, s32 offset:96
|
|
; GFX11-NEXT: scratch_load_b32 v70, off, s32 offset:92
|
|
; GFX11-NEXT: scratch_load_b32 v81, off, s32 offset:104
|
|
; GFX11-NEXT: scratch_load_b32 v80, off, s32 offset:100
|
|
; GFX11-NEXT: scratch_load_b32 v83, off, s32 offset:112
|
|
; GFX11-NEXT: scratch_load_b32 v82, off, s32 offset:108
|
|
; GFX11-NEXT: scratch_load_b32 v85, off, s32 offset:120
|
|
; GFX11-NEXT: scratch_load_b32 v84, off, s32 offset:116
|
|
; GFX11-NEXT: scratch_load_b32 v87, off, s32 offset:128
|
|
; GFX11-NEXT: scratch_load_b32 v86, off, s32 offset:124
|
|
; GFX11-NEXT: s_waitcnt vmcnt(30)
|
|
; GFX11-NEXT: v_max_f64 v[96:97], v[0:1], v[32:33]
|
|
; GFX11-NEXT: v_cmp_u_f64_e32 vcc_lo, v[0:1], v[32:33]
|
|
; GFX11-NEXT: s_waitcnt vmcnt(28)
|
|
; GFX11-NEXT: v_max_f64 v[32:33], v[2:3], v[34:35]
|
|
; GFX11-NEXT: v_cmp_u_f64_e64 s0, v[2:3], v[34:35]
|
|
; GFX11-NEXT: s_waitcnt vmcnt(26)
|
|
; GFX11-NEXT: v_max_f64 v[34:35], v[4:5], v[36:37]
|
|
; GFX11-NEXT: v_cmp_u_f64_e64 s1, v[4:5], v[36:37]
|
|
; GFX11-NEXT: s_waitcnt vmcnt(24)
|
|
; GFX11-NEXT: v_max_f64 v[36:37], v[6:7], v[38:39]
|
|
; GFX11-NEXT: v_cmp_u_f64_e64 s2, v[6:7], v[38:39]
|
|
; GFX11-NEXT: s_waitcnt vmcnt(22)
|
|
; GFX11-NEXT: v_max_f64 v[38:39], v[8:9], v[48:49]
|
|
; GFX11-NEXT: v_cmp_u_f64_e64 s3, v[8:9], v[48:49]
|
|
; GFX11-NEXT: s_waitcnt vmcnt(20)
|
|
; GFX11-NEXT: v_max_f64 v[48:49], v[10:11], v[50:51]
|
|
; GFX11-NEXT: v_cmp_u_f64_e64 s4, v[10:11], v[50:51]
|
|
; GFX11-NEXT: s_waitcnt vmcnt(18)
|
|
; GFX11-NEXT: v_max_f64 v[50:51], v[12:13], v[52:53]
|
|
; GFX11-NEXT: v_cmp_u_f64_e64 s5, v[12:13], v[52:53]
|
|
; GFX11-NEXT: s_waitcnt vmcnt(16)
|
|
; GFX11-NEXT: v_max_f64 v[52:53], v[14:15], v[54:55]
|
|
; GFX11-NEXT: v_cmp_u_f64_e64 s6, v[14:15], v[54:55]
|
|
; GFX11-NEXT: s_waitcnt vmcnt(14)
|
|
; GFX11-NEXT: v_max_f64 v[54:55], v[16:17], v[64:65]
|
|
; GFX11-NEXT: v_cmp_u_f64_e64 s7, v[16:17], v[64:65]
|
|
; GFX11-NEXT: s_waitcnt vmcnt(12)
|
|
; GFX11-NEXT: v_max_f64 v[64:65], v[18:19], v[66:67]
|
|
; GFX11-NEXT: v_cmp_u_f64_e64 s8, v[18:19], v[66:67]
|
|
; GFX11-NEXT: s_waitcnt vmcnt(10)
|
|
; GFX11-NEXT: v_max_f64 v[66:67], v[20:21], v[68:69]
|
|
; GFX11-NEXT: v_cmp_u_f64_e64 s9, v[20:21], v[68:69]
|
|
; GFX11-NEXT: s_waitcnt vmcnt(8)
|
|
; GFX11-NEXT: v_max_f64 v[68:69], v[22:23], v[70:71]
|
|
; GFX11-NEXT: v_cmp_u_f64_e64 s10, v[22:23], v[70:71]
|
|
; GFX11-NEXT: s_waitcnt vmcnt(6)
|
|
; GFX11-NEXT: v_max_f64 v[70:71], v[24:25], v[80:81]
|
|
; GFX11-NEXT: v_cmp_u_f64_e64 s11, v[24:25], v[80:81]
|
|
; GFX11-NEXT: s_waitcnt vmcnt(4)
|
|
; GFX11-NEXT: v_max_f64 v[80:81], v[26:27], v[82:83]
|
|
; GFX11-NEXT: v_cmp_u_f64_e64 s12, v[26:27], v[82:83]
|
|
; GFX11-NEXT: s_waitcnt vmcnt(2)
|
|
; GFX11-NEXT: v_max_f64 v[82:83], v[28:29], v[84:85]
|
|
; GFX11-NEXT: v_cmp_u_f64_e64 s13, v[28:29], v[84:85]
|
|
; GFX11-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX11-NEXT: v_max_f64 v[84:85], v[30:31], v[86:87]
|
|
; GFX11-NEXT: v_cmp_u_f64_e64 s14, v[30:31], v[86:87]
|
|
; GFX11-NEXT: v_cndmask_b32_e64 v0, v96, 0, vcc_lo
|
|
; GFX11-NEXT: v_cndmask_b32_e64 v1, v97, 0x7ff80000, vcc_lo
|
|
; GFX11-NEXT: v_cndmask_b32_e64 v2, v32, 0, s0
|
|
; GFX11-NEXT: v_cndmask_b32_e64 v3, v33, 0x7ff80000, s0
|
|
; GFX11-NEXT: v_cndmask_b32_e64 v4, v34, 0, s1
|
|
; GFX11-NEXT: v_cndmask_b32_e64 v5, v35, 0x7ff80000, s1
|
|
; GFX11-NEXT: v_cndmask_b32_e64 v6, v36, 0, s2
|
|
; GFX11-NEXT: v_cndmask_b32_e64 v7, v37, 0x7ff80000, s2
|
|
; GFX11-NEXT: v_cndmask_b32_e64 v8, v38, 0, s3
|
|
; GFX11-NEXT: v_cndmask_b32_e64 v9, v39, 0x7ff80000, s3
|
|
; GFX11-NEXT: v_cndmask_b32_e64 v10, v48, 0, s4
|
|
; GFX11-NEXT: v_cndmask_b32_e64 v11, v49, 0x7ff80000, s4
|
|
; GFX11-NEXT: v_cndmask_b32_e64 v12, v50, 0, s5
|
|
; GFX11-NEXT: v_cndmask_b32_e64 v13, v51, 0x7ff80000, s5
|
|
; GFX11-NEXT: v_cndmask_b32_e64 v14, v52, 0, s6
|
|
; GFX11-NEXT: v_cndmask_b32_e64 v15, v53, 0x7ff80000, s6
|
|
; GFX11-NEXT: v_cndmask_b32_e64 v16, v54, 0, s7
|
|
; GFX11-NEXT: v_cndmask_b32_e64 v17, v55, 0x7ff80000, s7
|
|
; GFX11-NEXT: v_cndmask_b32_e64 v18, v64, 0, s8
|
|
; GFX11-NEXT: v_cndmask_b32_e64 v19, v65, 0x7ff80000, s8
|
|
; GFX11-NEXT: v_cndmask_b32_e64 v20, v66, 0, s9
|
|
; GFX11-NEXT: v_cndmask_b32_e64 v21, v67, 0x7ff80000, s9
|
|
; GFX11-NEXT: v_cndmask_b32_e64 v22, v68, 0, s10
|
|
; GFX11-NEXT: v_cndmask_b32_e64 v23, v69, 0x7ff80000, s10
|
|
; GFX11-NEXT: v_cndmask_b32_e64 v24, v70, 0, s11
|
|
; GFX11-NEXT: v_cndmask_b32_e64 v25, v71, 0x7ff80000, s11
|
|
; GFX11-NEXT: v_cndmask_b32_e64 v26, v80, 0, s12
|
|
; GFX11-NEXT: v_cndmask_b32_e64 v27, v81, 0x7ff80000, s12
|
|
; GFX11-NEXT: v_cndmask_b32_e64 v28, v82, 0, s13
|
|
; GFX11-NEXT: v_cndmask_b32_e64 v29, v83, 0x7ff80000, s13
|
|
; GFX11-NEXT: v_cndmask_b32_e64 v30, v84, 0, s14
|
|
; GFX11-NEXT: v_cndmask_b32_e64 v31, v85, 0x7ff80000, s14
|
|
; GFX11-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX12-LABEL: v_maximum_v16f64:
|
|
; GFX12: ; %bb.0:
|
|
; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
|
|
; GFX12-NEXT: s_wait_expcnt 0x0
|
|
; GFX12-NEXT: s_wait_samplecnt 0x0
|
|
; GFX12-NEXT: s_wait_bvhcnt 0x0
|
|
; GFX12-NEXT: s_wait_kmcnt 0x0
|
|
; GFX12-NEXT: s_clause 0x1f
|
|
; GFX12-NEXT: scratch_load_b32 v31, off, s32
|
|
; GFX12-NEXT: scratch_load_b32 v33, off, s32 offset:8
|
|
; GFX12-NEXT: scratch_load_b32 v32, off, s32 offset:4
|
|
; GFX12-NEXT: scratch_load_b32 v35, off, s32 offset:16
|
|
; GFX12-NEXT: scratch_load_b32 v34, off, s32 offset:12
|
|
; GFX12-NEXT: scratch_load_b32 v37, off, s32 offset:24
|
|
; GFX12-NEXT: scratch_load_b32 v36, off, s32 offset:20
|
|
; GFX12-NEXT: scratch_load_b32 v39, off, s32 offset:32
|
|
; GFX12-NEXT: scratch_load_b32 v38, off, s32 offset:28
|
|
; GFX12-NEXT: scratch_load_b32 v49, off, s32 offset:40
|
|
; GFX12-NEXT: scratch_load_b32 v48, off, s32 offset:36
|
|
; GFX12-NEXT: scratch_load_b32 v51, off, s32 offset:48
|
|
; GFX12-NEXT: scratch_load_b32 v50, off, s32 offset:44
|
|
; GFX12-NEXT: scratch_load_b32 v53, off, s32 offset:56
|
|
; GFX12-NEXT: scratch_load_b32 v52, off, s32 offset:52
|
|
; GFX12-NEXT: scratch_load_b32 v55, off, s32 offset:64
|
|
; GFX12-NEXT: scratch_load_b32 v54, off, s32 offset:60
|
|
; GFX12-NEXT: scratch_load_b32 v65, off, s32 offset:72
|
|
; GFX12-NEXT: scratch_load_b32 v64, off, s32 offset:68
|
|
; GFX12-NEXT: scratch_load_b32 v67, off, s32 offset:80
|
|
; GFX12-NEXT: scratch_load_b32 v66, off, s32 offset:76
|
|
; GFX12-NEXT: scratch_load_b32 v69, off, s32 offset:88
|
|
; GFX12-NEXT: scratch_load_b32 v68, off, s32 offset:84
|
|
; GFX12-NEXT: scratch_load_b32 v71, off, s32 offset:96
|
|
; GFX12-NEXT: scratch_load_b32 v70, off, s32 offset:92
|
|
; GFX12-NEXT: scratch_load_b32 v81, off, s32 offset:104
|
|
; GFX12-NEXT: scratch_load_b32 v80, off, s32 offset:100
|
|
; GFX12-NEXT: scratch_load_b32 v83, off, s32 offset:112
|
|
; GFX12-NEXT: scratch_load_b32 v82, off, s32 offset:108
|
|
; GFX12-NEXT: scratch_load_b32 v85, off, s32 offset:120
|
|
; GFX12-NEXT: scratch_load_b32 v84, off, s32 offset:116
|
|
; GFX12-NEXT: scratch_load_b32 v87, off, s32 offset:128
|
|
; GFX12-NEXT: scratch_load_b32 v86, off, s32 offset:124
|
|
; GFX12-NEXT: s_wait_loadcnt 0x1e
|
|
; GFX12-NEXT: v_maximum_f64 v[0:1], v[0:1], v[32:33]
|
|
; GFX12-NEXT: s_wait_loadcnt 0x1c
|
|
; GFX12-NEXT: v_maximum_f64 v[2:3], v[2:3], v[34:35]
|
|
; GFX12-NEXT: s_wait_loadcnt 0x1a
|
|
; GFX12-NEXT: v_maximum_f64 v[4:5], v[4:5], v[36:37]
|
|
; GFX12-NEXT: s_wait_loadcnt 0x18
|
|
; GFX12-NEXT: v_maximum_f64 v[6:7], v[6:7], v[38:39]
|
|
; GFX12-NEXT: s_wait_loadcnt 0x16
|
|
; GFX12-NEXT: v_maximum_f64 v[8:9], v[8:9], v[48:49]
|
|
; GFX12-NEXT: s_wait_loadcnt 0x14
|
|
; GFX12-NEXT: v_maximum_f64 v[10:11], v[10:11], v[50:51]
|
|
; GFX12-NEXT: s_wait_loadcnt 0x12
|
|
; GFX12-NEXT: v_maximum_f64 v[12:13], v[12:13], v[52:53]
|
|
; GFX12-NEXT: s_wait_loadcnt 0x10
|
|
; GFX12-NEXT: v_maximum_f64 v[14:15], v[14:15], v[54:55]
|
|
; GFX12-NEXT: s_wait_loadcnt 0xe
|
|
; GFX12-NEXT: v_maximum_f64 v[16:17], v[16:17], v[64:65]
|
|
; GFX12-NEXT: s_wait_loadcnt 0xc
|
|
; GFX12-NEXT: v_maximum_f64 v[18:19], v[18:19], v[66:67]
|
|
; GFX12-NEXT: s_wait_loadcnt 0xa
|
|
; GFX12-NEXT: v_maximum_f64 v[20:21], v[20:21], v[68:69]
|
|
; GFX12-NEXT: s_wait_loadcnt 0x8
|
|
; GFX12-NEXT: v_maximum_f64 v[22:23], v[22:23], v[70:71]
|
|
; GFX12-NEXT: s_wait_loadcnt 0x6
|
|
; GFX12-NEXT: v_maximum_f64 v[24:25], v[24:25], v[80:81]
|
|
; GFX12-NEXT: s_wait_loadcnt 0x4
|
|
; GFX12-NEXT: v_maximum_f64 v[26:27], v[26:27], v[82:83]
|
|
; GFX12-NEXT: s_wait_loadcnt 0x2
|
|
; GFX12-NEXT: v_maximum_f64 v[28:29], v[28:29], v[84:85]
|
|
; GFX12-NEXT: s_wait_loadcnt 0x0
|
|
; GFX12-NEXT: v_maximum_f64 v[30:31], v[30:31], v[86:87]
|
|
; GFX12-NEXT: s_setpc_b64 s[30:31]
|
|
%op = call <16 x double> @llvm.maximum.v16f64(<16 x double> %src0, <16 x double> %src1)
|
|
ret <16 x double> %op
|
|
}
|
|
;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
|
|
; GCN: {{.*}}
|