
The existing way of managing clustered nodes was done through adding weak edges between the neighbouring cluster nodes, which is a sort of ordered queue. And this will be later recorded as `NextClusterPred` or `NextClusterSucc` in `ScheduleDAGMI`. But actually the instruction may be picked not in the exact order of the queue. For example, we have a queue of cluster nodes A B C. But during scheduling, node B might be picked first, then it will be very likely that we only cluster B and C for Top-Down scheduling (leaving A alone). Another issue is: ``` if (!ReorderWhileClustering && SUa->NodeNum > SUb->NodeNum) std::swap(SUa, SUb); if (!DAG->addEdge(SUb, SDep(SUa, SDep::Cluster))) ``` may break the cluster queue. For example, we want to cluster nodes (order as in `MemOpRecords`): 1 3 2. 1(SUa) will be pred of 3(SUb) normally. But when it comes to (3, 2), As 3(SUa) > 2(SUb), we would reorder the two nodes, which makes 2 be pred of 3. This makes both 1 and 2 become preds of 3, but there is no edge between 1 and 2. Thus we get a broken cluster chain. To fix both issues, we introduce an unordered set in the change. This could help improve clustering in some hard case. One key reason the change causes so many test check changes is: As the cluster candidates are not ordered now, the candidates might be picked in different order from before. The most affected targets are: AMDGPU, AArch64, RISCV. For RISCV, it seems to me most are just minor instruction reorder, don't see obvious regression. For AArch64, there were some combining of ldr into ldp being affected. With two cases being regressed and two being improved. This has more deeper reason that machine scheduler cannot cluster them well both before and after the change, and the load combine algorithm later is also not smart enough. For AMDGPU, some cases have more v_dual instructions used while some are regressed. It seems less critical. Seems like test `v_vselect_v32bf16` gets more buffer_load being claused.
4993 lines
214 KiB
LLVM
4993 lines
214 KiB
LLVM
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 2
|
|
; RUN: llc -mtriple=amdgcn -mcpu=hawaii -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=CIGFX89,CI %s
|
|
; RUN: llc -mtriple=amdgcn -mcpu=fiji -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=CIGFX89,GFX89,VI %s
|
|
; RUN: llc -mtriple=amdgcn -mcpu=gfx900 -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=CIGFX89,GFX89,GFX9 %s
|
|
; RUN: llc -mtriple=amdgcn -mcpu=gfx1100 -mattr=+real-true16 -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GFX11,GFX11-TRUE16 %s
|
|
; RUN: llc -mtriple=amdgcn -mcpu=gfx1100 -mattr=-real-true16 -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GFX11,GFX11-FAKE16 %s
|
|
|
|
define void @void_func_i1(i1 %arg0) #0 {
|
|
; CIGFX89-LABEL: void_func_i1:
|
|
; CIGFX89: ; %bb.0:
|
|
; CIGFX89-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; CIGFX89-NEXT: v_and_b32_e32 v0, 1, v0
|
|
; CIGFX89-NEXT: s_mov_b32 s7, 0xf000
|
|
; CIGFX89-NEXT: s_mov_b32 s6, -1
|
|
; CIGFX89-NEXT: buffer_store_byte v0, off, s[4:7], 0
|
|
; CIGFX89-NEXT: s_waitcnt vmcnt(0)
|
|
; CIGFX89-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX11-LABEL: void_func_i1:
|
|
; GFX11: ; %bb.0:
|
|
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX11-NEXT: v_and_b32_e32 v0, 1, v0
|
|
; GFX11-NEXT: s_mov_b32 s3, 0x31016000
|
|
; GFX11-NEXT: s_mov_b32 s2, -1
|
|
; GFX11-NEXT: buffer_store_b8 v0, off, s[0:3], 0
|
|
; GFX11-NEXT: s_setpc_b64 s[30:31]
|
|
store i1 %arg0, ptr addrspace(1) poison
|
|
ret void
|
|
}
|
|
|
|
define void @void_func_i1_zeroext(i1 zeroext %arg0) #0 {
|
|
; CIGFX89-LABEL: void_func_i1_zeroext:
|
|
; CIGFX89: ; %bb.0:
|
|
; CIGFX89-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; CIGFX89-NEXT: v_or_b32_e32 v0, 12, v0
|
|
; CIGFX89-NEXT: s_mov_b32 s7, 0xf000
|
|
; CIGFX89-NEXT: s_mov_b32 s6, -1
|
|
; CIGFX89-NEXT: buffer_store_dword v0, off, s[4:7], 0
|
|
; CIGFX89-NEXT: s_waitcnt vmcnt(0)
|
|
; CIGFX89-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX11-LABEL: void_func_i1_zeroext:
|
|
; GFX11: ; %bb.0:
|
|
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX11-NEXT: v_or_b32_e32 v0, 12, v0
|
|
; GFX11-NEXT: s_mov_b32 s3, 0x31016000
|
|
; GFX11-NEXT: s_mov_b32 s2, -1
|
|
; GFX11-NEXT: buffer_store_b32 v0, off, s[0:3], 0
|
|
; GFX11-NEXT: s_setpc_b64 s[30:31]
|
|
%ext = zext i1 %arg0 to i32
|
|
%add = add i32 %ext, 12
|
|
store i32 %add, ptr addrspace(1) poison
|
|
ret void
|
|
}
|
|
|
|
define void @void_func_i1_signext(i1 signext %arg0) #0 {
|
|
; CI-LABEL: void_func_i1_signext:
|
|
; CI: ; %bb.0:
|
|
; CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; CI-NEXT: v_add_i32_e32 v0, vcc, 12, v0
|
|
; CI-NEXT: s_mov_b32 s7, 0xf000
|
|
; CI-NEXT: s_mov_b32 s6, -1
|
|
; CI-NEXT: buffer_store_dword v0, off, s[4:7], 0
|
|
; CI-NEXT: s_waitcnt vmcnt(0)
|
|
; CI-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; VI-LABEL: void_func_i1_signext:
|
|
; VI: ; %bb.0:
|
|
; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; VI-NEXT: v_add_u32_e32 v0, vcc, 12, v0
|
|
; VI-NEXT: s_mov_b32 s7, 0xf000
|
|
; VI-NEXT: s_mov_b32 s6, -1
|
|
; VI-NEXT: buffer_store_dword v0, off, s[4:7], 0
|
|
; VI-NEXT: s_waitcnt vmcnt(0)
|
|
; VI-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX9-LABEL: void_func_i1_signext:
|
|
; GFX9: ; %bb.0:
|
|
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX9-NEXT: v_add_u32_e32 v0, 12, v0
|
|
; GFX9-NEXT: s_mov_b32 s7, 0xf000
|
|
; GFX9-NEXT: s_mov_b32 s6, -1
|
|
; GFX9-NEXT: buffer_store_dword v0, off, s[4:7], 0
|
|
; GFX9-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX9-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX11-LABEL: void_func_i1_signext:
|
|
; GFX11: ; %bb.0:
|
|
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX11-NEXT: v_add_nc_u32_e32 v0, 12, v0
|
|
; GFX11-NEXT: s_mov_b32 s3, 0x31016000
|
|
; GFX11-NEXT: s_mov_b32 s2, -1
|
|
; GFX11-NEXT: buffer_store_b32 v0, off, s[0:3], 0
|
|
; GFX11-NEXT: s_setpc_b64 s[30:31]
|
|
%ext = sext i1 %arg0 to i32
|
|
%add = add i32 %ext, 12
|
|
store i32 %add, ptr addrspace(1) poison
|
|
ret void
|
|
}
|
|
|
|
define void @i1_arg_i1_use(i1 %arg) #0 {
|
|
; CIGFX89-LABEL: i1_arg_i1_use:
|
|
; CIGFX89: ; %bb.0: ; %bb
|
|
; CIGFX89-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; CIGFX89-NEXT: v_and_b32_e32 v0, 1, v0
|
|
; CIGFX89-NEXT: v_cmp_ne_u32_e32 vcc, 1, v0
|
|
; CIGFX89-NEXT: s_and_saveexec_b64 s[4:5], vcc
|
|
; CIGFX89-NEXT: s_cbranch_execz .LBB3_2
|
|
; CIGFX89-NEXT: ; %bb.1: ; %bb1
|
|
; CIGFX89-NEXT: s_mov_b32 s7, 0xf000
|
|
; CIGFX89-NEXT: s_mov_b32 s6, -1
|
|
; CIGFX89-NEXT: v_mov_b32_e32 v0, 0
|
|
; CIGFX89-NEXT: buffer_store_dword v0, off, s[4:7], 0
|
|
; CIGFX89-NEXT: s_waitcnt vmcnt(0)
|
|
; CIGFX89-NEXT: .LBB3_2: ; %bb2
|
|
; CIGFX89-NEXT: s_or_b64 exec, exec, s[4:5]
|
|
; CIGFX89-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX11-LABEL: i1_arg_i1_use:
|
|
; GFX11: ; %bb.0: ; %bb
|
|
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX11-NEXT: v_and_b32_e32 v0, 1, v0
|
|
; GFX11-NEXT: s_mov_b32 s0, exec_lo
|
|
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
|
|
; GFX11-NEXT: v_cmpx_ne_u32_e32 1, v0
|
|
; GFX11-NEXT: s_cbranch_execz .LBB3_2
|
|
; GFX11-NEXT: ; %bb.1: ; %bb1
|
|
; GFX11-NEXT: v_mov_b32_e32 v0, 0
|
|
; GFX11-NEXT: s_mov_b32 s3, 0x31016000
|
|
; GFX11-NEXT: s_mov_b32 s2, -1
|
|
; GFX11-NEXT: buffer_store_b32 v0, off, s[0:3], 0 dlc
|
|
; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
|
|
; GFX11-NEXT: .LBB3_2: ; %bb2
|
|
; GFX11-NEXT: s_or_b32 exec_lo, exec_lo, s0
|
|
; GFX11-NEXT: s_setpc_b64 s[30:31]
|
|
bb:
|
|
br i1 %arg, label %bb2, label %bb1
|
|
|
|
bb1:
|
|
store volatile i32 0, ptr addrspace(1) poison
|
|
br label %bb2
|
|
|
|
bb2:
|
|
ret void
|
|
}
|
|
|
|
define void @void_func_i8(i8 %arg0) #0 {
|
|
; CIGFX89-LABEL: void_func_i8:
|
|
; CIGFX89: ; %bb.0:
|
|
; CIGFX89-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; CIGFX89-NEXT: s_mov_b32 s7, 0xf000
|
|
; CIGFX89-NEXT: s_mov_b32 s6, -1
|
|
; CIGFX89-NEXT: buffer_store_byte v0, off, s[4:7], 0
|
|
; CIGFX89-NEXT: s_waitcnt vmcnt(0)
|
|
; CIGFX89-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX11-LABEL: void_func_i8:
|
|
; GFX11: ; %bb.0:
|
|
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX11-NEXT: s_mov_b32 s3, 0x31016000
|
|
; GFX11-NEXT: s_mov_b32 s2, -1
|
|
; GFX11-NEXT: buffer_store_b8 v0, off, s[0:3], 0
|
|
; GFX11-NEXT: s_setpc_b64 s[30:31]
|
|
store i8 %arg0, ptr addrspace(1) poison
|
|
ret void
|
|
}
|
|
|
|
define void @void_func_i8_zeroext(i8 zeroext %arg0) #0 {
|
|
; CI-LABEL: void_func_i8_zeroext:
|
|
; CI: ; %bb.0:
|
|
; CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; CI-NEXT: v_add_i32_e32 v0, vcc, 12, v0
|
|
; CI-NEXT: s_mov_b32 s7, 0xf000
|
|
; CI-NEXT: s_mov_b32 s6, -1
|
|
; CI-NEXT: buffer_store_dword v0, off, s[4:7], 0
|
|
; CI-NEXT: s_waitcnt vmcnt(0)
|
|
; CI-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; VI-LABEL: void_func_i8_zeroext:
|
|
; VI: ; %bb.0:
|
|
; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; VI-NEXT: v_add_u32_e32 v0, vcc, 12, v0
|
|
; VI-NEXT: s_mov_b32 s7, 0xf000
|
|
; VI-NEXT: s_mov_b32 s6, -1
|
|
; VI-NEXT: buffer_store_dword v0, off, s[4:7], 0
|
|
; VI-NEXT: s_waitcnt vmcnt(0)
|
|
; VI-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX9-LABEL: void_func_i8_zeroext:
|
|
; GFX9: ; %bb.0:
|
|
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX9-NEXT: v_add_u32_e32 v0, 12, v0
|
|
; GFX9-NEXT: s_mov_b32 s7, 0xf000
|
|
; GFX9-NEXT: s_mov_b32 s6, -1
|
|
; GFX9-NEXT: buffer_store_dword v0, off, s[4:7], 0
|
|
; GFX9-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX9-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX11-LABEL: void_func_i8_zeroext:
|
|
; GFX11: ; %bb.0:
|
|
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX11-NEXT: v_add_nc_u32_e32 v0, 12, v0
|
|
; GFX11-NEXT: s_mov_b32 s3, 0x31016000
|
|
; GFX11-NEXT: s_mov_b32 s2, -1
|
|
; GFX11-NEXT: buffer_store_b32 v0, off, s[0:3], 0
|
|
; GFX11-NEXT: s_setpc_b64 s[30:31]
|
|
%ext = zext i8 %arg0 to i32
|
|
%add = add i32 %ext, 12
|
|
store i32 %add, ptr addrspace(1) poison
|
|
ret void
|
|
}
|
|
|
|
define void @void_func_i8_signext(i8 signext %arg0) #0 {
|
|
; CI-LABEL: void_func_i8_signext:
|
|
; CI: ; %bb.0:
|
|
; CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; CI-NEXT: v_add_i32_e32 v0, vcc, 12, v0
|
|
; CI-NEXT: s_mov_b32 s7, 0xf000
|
|
; CI-NEXT: s_mov_b32 s6, -1
|
|
; CI-NEXT: buffer_store_dword v0, off, s[4:7], 0
|
|
; CI-NEXT: s_waitcnt vmcnt(0)
|
|
; CI-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; VI-LABEL: void_func_i8_signext:
|
|
; VI: ; %bb.0:
|
|
; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; VI-NEXT: v_add_u32_e32 v0, vcc, 12, v0
|
|
; VI-NEXT: s_mov_b32 s7, 0xf000
|
|
; VI-NEXT: s_mov_b32 s6, -1
|
|
; VI-NEXT: buffer_store_dword v0, off, s[4:7], 0
|
|
; VI-NEXT: s_waitcnt vmcnt(0)
|
|
; VI-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX9-LABEL: void_func_i8_signext:
|
|
; GFX9: ; %bb.0:
|
|
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX9-NEXT: v_add_u32_e32 v0, 12, v0
|
|
; GFX9-NEXT: s_mov_b32 s7, 0xf000
|
|
; GFX9-NEXT: s_mov_b32 s6, -1
|
|
; GFX9-NEXT: buffer_store_dword v0, off, s[4:7], 0
|
|
; GFX9-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX9-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX11-LABEL: void_func_i8_signext:
|
|
; GFX11: ; %bb.0:
|
|
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX11-NEXT: v_add_nc_u32_e32 v0, 12, v0
|
|
; GFX11-NEXT: s_mov_b32 s3, 0x31016000
|
|
; GFX11-NEXT: s_mov_b32 s2, -1
|
|
; GFX11-NEXT: buffer_store_b32 v0, off, s[0:3], 0
|
|
; GFX11-NEXT: s_setpc_b64 s[30:31]
|
|
%ext = sext i8 %arg0 to i32
|
|
%add = add i32 %ext, 12
|
|
store i32 %add, ptr addrspace(1) poison
|
|
ret void
|
|
}
|
|
|
|
define void @void_func_i16(i16 %arg0) #0 {
|
|
; CIGFX89-LABEL: void_func_i16:
|
|
; CIGFX89: ; %bb.0:
|
|
; CIGFX89-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; CIGFX89-NEXT: s_mov_b32 s7, 0xf000
|
|
; CIGFX89-NEXT: s_mov_b32 s6, -1
|
|
; CIGFX89-NEXT: buffer_store_short v0, off, s[4:7], 0
|
|
; CIGFX89-NEXT: s_waitcnt vmcnt(0)
|
|
; CIGFX89-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX11-LABEL: void_func_i16:
|
|
; GFX11: ; %bb.0:
|
|
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX11-NEXT: s_mov_b32 s3, 0x31016000
|
|
; GFX11-NEXT: s_mov_b32 s2, -1
|
|
; GFX11-NEXT: buffer_store_b16 v0, off, s[0:3], 0
|
|
; GFX11-NEXT: s_setpc_b64 s[30:31]
|
|
store i16 %arg0, ptr addrspace(1) poison
|
|
ret void
|
|
}
|
|
|
|
define void @void_func_i16_zeroext(i16 zeroext %arg0) #0 {
|
|
; CI-LABEL: void_func_i16_zeroext:
|
|
; CI: ; %bb.0:
|
|
; CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; CI-NEXT: v_add_i32_e32 v0, vcc, 12, v0
|
|
; CI-NEXT: s_mov_b32 s7, 0xf000
|
|
; CI-NEXT: s_mov_b32 s6, -1
|
|
; CI-NEXT: buffer_store_dword v0, off, s[4:7], 0
|
|
; CI-NEXT: s_waitcnt vmcnt(0)
|
|
; CI-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; VI-LABEL: void_func_i16_zeroext:
|
|
; VI: ; %bb.0:
|
|
; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; VI-NEXT: v_add_u32_e32 v0, vcc, 12, v0
|
|
; VI-NEXT: s_mov_b32 s7, 0xf000
|
|
; VI-NEXT: s_mov_b32 s6, -1
|
|
; VI-NEXT: buffer_store_dword v0, off, s[4:7], 0
|
|
; VI-NEXT: s_waitcnt vmcnt(0)
|
|
; VI-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX9-LABEL: void_func_i16_zeroext:
|
|
; GFX9: ; %bb.0:
|
|
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX9-NEXT: v_add_u32_e32 v0, 12, v0
|
|
; GFX9-NEXT: s_mov_b32 s7, 0xf000
|
|
; GFX9-NEXT: s_mov_b32 s6, -1
|
|
; GFX9-NEXT: buffer_store_dword v0, off, s[4:7], 0
|
|
; GFX9-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX9-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX11-LABEL: void_func_i16_zeroext:
|
|
; GFX11: ; %bb.0:
|
|
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX11-NEXT: v_add_nc_u32_e32 v0, 12, v0
|
|
; GFX11-NEXT: s_mov_b32 s3, 0x31016000
|
|
; GFX11-NEXT: s_mov_b32 s2, -1
|
|
; GFX11-NEXT: buffer_store_b32 v0, off, s[0:3], 0
|
|
; GFX11-NEXT: s_setpc_b64 s[30:31]
|
|
%ext = zext i16 %arg0 to i32
|
|
%add = add i32 %ext, 12
|
|
store i32 %add, ptr addrspace(1) poison
|
|
ret void
|
|
}
|
|
|
|
define void @void_func_i16_signext(i16 signext %arg0) #0 {
|
|
; CI-LABEL: void_func_i16_signext:
|
|
; CI: ; %bb.0:
|
|
; CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; CI-NEXT: v_add_i32_e32 v0, vcc, 12, v0
|
|
; CI-NEXT: s_mov_b32 s7, 0xf000
|
|
; CI-NEXT: s_mov_b32 s6, -1
|
|
; CI-NEXT: buffer_store_dword v0, off, s[4:7], 0
|
|
; CI-NEXT: s_waitcnt vmcnt(0)
|
|
; CI-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; VI-LABEL: void_func_i16_signext:
|
|
; VI: ; %bb.0:
|
|
; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; VI-NEXT: v_add_u32_e32 v0, vcc, 12, v0
|
|
; VI-NEXT: s_mov_b32 s7, 0xf000
|
|
; VI-NEXT: s_mov_b32 s6, -1
|
|
; VI-NEXT: buffer_store_dword v0, off, s[4:7], 0
|
|
; VI-NEXT: s_waitcnt vmcnt(0)
|
|
; VI-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX9-LABEL: void_func_i16_signext:
|
|
; GFX9: ; %bb.0:
|
|
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX9-NEXT: v_add_u32_e32 v0, 12, v0
|
|
; GFX9-NEXT: s_mov_b32 s7, 0xf000
|
|
; GFX9-NEXT: s_mov_b32 s6, -1
|
|
; GFX9-NEXT: buffer_store_dword v0, off, s[4:7], 0
|
|
; GFX9-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX9-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX11-LABEL: void_func_i16_signext:
|
|
; GFX11: ; %bb.0:
|
|
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX11-NEXT: v_add_nc_u32_e32 v0, 12, v0
|
|
; GFX11-NEXT: s_mov_b32 s3, 0x31016000
|
|
; GFX11-NEXT: s_mov_b32 s2, -1
|
|
; GFX11-NEXT: buffer_store_b32 v0, off, s[0:3], 0
|
|
; GFX11-NEXT: s_setpc_b64 s[30:31]
|
|
%ext = sext i16 %arg0 to i32
|
|
%add = add i32 %ext, 12
|
|
store i32 %add, ptr addrspace(1) poison
|
|
ret void
|
|
}
|
|
|
|
define void @void_func_i32(i32 %arg0) #0 {
|
|
; CIGFX89-LABEL: void_func_i32:
|
|
; CIGFX89: ; %bb.0:
|
|
; CIGFX89-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; CIGFX89-NEXT: s_mov_b32 s7, 0xf000
|
|
; CIGFX89-NEXT: s_mov_b32 s6, -1
|
|
; CIGFX89-NEXT: buffer_store_dword v0, off, s[4:7], 0
|
|
; CIGFX89-NEXT: s_waitcnt vmcnt(0)
|
|
; CIGFX89-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX11-LABEL: void_func_i32:
|
|
; GFX11: ; %bb.0:
|
|
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX11-NEXT: s_mov_b32 s3, 0x31016000
|
|
; GFX11-NEXT: s_mov_b32 s2, -1
|
|
; GFX11-NEXT: buffer_store_b32 v0, off, s[0:3], 0
|
|
; GFX11-NEXT: s_setpc_b64 s[30:31]
|
|
store i32 %arg0, ptr addrspace(1) poison
|
|
ret void
|
|
}
|
|
|
|
define void @void_func_i64(i64 %arg0) #0 {
|
|
; CIGFX89-LABEL: void_func_i64:
|
|
; CIGFX89: ; %bb.0:
|
|
; CIGFX89-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; CIGFX89-NEXT: s_mov_b32 s7, 0xf000
|
|
; CIGFX89-NEXT: s_mov_b32 s6, -1
|
|
; CIGFX89-NEXT: buffer_store_dwordx2 v[0:1], off, s[4:7], 0
|
|
; CIGFX89-NEXT: s_waitcnt vmcnt(0)
|
|
; CIGFX89-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX11-LABEL: void_func_i64:
|
|
; GFX11: ; %bb.0:
|
|
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX11-NEXT: s_mov_b32 s3, 0x31016000
|
|
; GFX11-NEXT: s_mov_b32 s2, -1
|
|
; GFX11-NEXT: buffer_store_b64 v[0:1], off, s[0:3], 0
|
|
; GFX11-NEXT: s_setpc_b64 s[30:31]
|
|
store i64 %arg0, ptr addrspace(1) poison
|
|
ret void
|
|
}
|
|
|
|
define void @void_func_f16(half %arg0) #0 {
|
|
; CI-LABEL: void_func_f16:
|
|
; CI: ; %bb.0:
|
|
; CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; CI-NEXT: v_cvt_f16_f32_e32 v0, v0
|
|
; CI-NEXT: s_mov_b32 s7, 0xf000
|
|
; CI-NEXT: s_mov_b32 s6, -1
|
|
; CI-NEXT: buffer_store_short v0, off, s[4:7], 0
|
|
; CI-NEXT: s_waitcnt vmcnt(0)
|
|
; CI-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX89-LABEL: void_func_f16:
|
|
; GFX89: ; %bb.0:
|
|
; GFX89-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX89-NEXT: s_mov_b32 s7, 0xf000
|
|
; GFX89-NEXT: s_mov_b32 s6, -1
|
|
; GFX89-NEXT: buffer_store_short v0, off, s[4:7], 0
|
|
; GFX89-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX89-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX11-LABEL: void_func_f16:
|
|
; GFX11: ; %bb.0:
|
|
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX11-NEXT: s_mov_b32 s3, 0x31016000
|
|
; GFX11-NEXT: s_mov_b32 s2, -1
|
|
; GFX11-NEXT: buffer_store_b16 v0, off, s[0:3], 0
|
|
; GFX11-NEXT: s_setpc_b64 s[30:31]
|
|
store half %arg0, ptr addrspace(1) poison
|
|
ret void
|
|
}
|
|
|
|
define void @void_func_f32(float %arg0) #0 {
|
|
; CIGFX89-LABEL: void_func_f32:
|
|
; CIGFX89: ; %bb.0:
|
|
; CIGFX89-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; CIGFX89-NEXT: s_mov_b32 s7, 0xf000
|
|
; CIGFX89-NEXT: s_mov_b32 s6, -1
|
|
; CIGFX89-NEXT: buffer_store_dword v0, off, s[4:7], 0
|
|
; CIGFX89-NEXT: s_waitcnt vmcnt(0)
|
|
; CIGFX89-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX11-LABEL: void_func_f32:
|
|
; GFX11: ; %bb.0:
|
|
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX11-NEXT: s_mov_b32 s3, 0x31016000
|
|
; GFX11-NEXT: s_mov_b32 s2, -1
|
|
; GFX11-NEXT: buffer_store_b32 v0, off, s[0:3], 0
|
|
; GFX11-NEXT: s_setpc_b64 s[30:31]
|
|
store float %arg0, ptr addrspace(1) poison
|
|
ret void
|
|
}
|
|
|
|
define void @void_func_f64(double %arg0) #0 {
|
|
; CIGFX89-LABEL: void_func_f64:
|
|
; CIGFX89: ; %bb.0:
|
|
; CIGFX89-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; CIGFX89-NEXT: s_mov_b32 s7, 0xf000
|
|
; CIGFX89-NEXT: s_mov_b32 s6, -1
|
|
; CIGFX89-NEXT: buffer_store_dwordx2 v[0:1], off, s[4:7], 0
|
|
; CIGFX89-NEXT: s_waitcnt vmcnt(0)
|
|
; CIGFX89-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX11-LABEL: void_func_f64:
|
|
; GFX11: ; %bb.0:
|
|
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX11-NEXT: s_mov_b32 s3, 0x31016000
|
|
; GFX11-NEXT: s_mov_b32 s2, -1
|
|
; GFX11-NEXT: buffer_store_b64 v[0:1], off, s[0:3], 0
|
|
; GFX11-NEXT: s_setpc_b64 s[30:31]
|
|
store double %arg0, ptr addrspace(1) poison
|
|
ret void
|
|
}
|
|
|
|
define void @void_func_v2i32(<2 x i32> %arg0) #0 {
|
|
; CIGFX89-LABEL: void_func_v2i32:
|
|
; CIGFX89: ; %bb.0:
|
|
; CIGFX89-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; CIGFX89-NEXT: s_mov_b32 s7, 0xf000
|
|
; CIGFX89-NEXT: s_mov_b32 s6, -1
|
|
; CIGFX89-NEXT: buffer_store_dwordx2 v[0:1], off, s[4:7], 0
|
|
; CIGFX89-NEXT: s_waitcnt vmcnt(0)
|
|
; CIGFX89-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX11-LABEL: void_func_v2i32:
|
|
; GFX11: ; %bb.0:
|
|
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX11-NEXT: s_mov_b32 s3, 0x31016000
|
|
; GFX11-NEXT: s_mov_b32 s2, -1
|
|
; GFX11-NEXT: buffer_store_b64 v[0:1], off, s[0:3], 0
|
|
; GFX11-NEXT: s_setpc_b64 s[30:31]
|
|
store <2 x i32> %arg0, ptr addrspace(1) poison
|
|
ret void
|
|
}
|
|
|
|
define void @void_func_v3i32(<3 x i32> %arg0) #0 {
|
|
; CIGFX89-LABEL: void_func_v3i32:
|
|
; CIGFX89: ; %bb.0:
|
|
; CIGFX89-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; CIGFX89-NEXT: s_mov_b32 s7, 0xf000
|
|
; CIGFX89-NEXT: s_mov_b32 s6, -1
|
|
; CIGFX89-NEXT: buffer_store_dwordx3 v[0:2], off, s[4:7], 0
|
|
; CIGFX89-NEXT: s_waitcnt vmcnt(0)
|
|
; CIGFX89-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX11-LABEL: void_func_v3i32:
|
|
; GFX11: ; %bb.0:
|
|
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX11-NEXT: s_mov_b32 s3, 0x31016000
|
|
; GFX11-NEXT: s_mov_b32 s2, -1
|
|
; GFX11-NEXT: buffer_store_b96 v[0:2], off, s[0:3], 0
|
|
; GFX11-NEXT: s_setpc_b64 s[30:31]
|
|
store <3 x i32> %arg0, ptr addrspace(1) poison
|
|
ret void
|
|
}
|
|
|
|
define void @void_func_v4i32(<4 x i32> %arg0) #0 {
|
|
; CIGFX89-LABEL: void_func_v4i32:
|
|
; CIGFX89: ; %bb.0:
|
|
; CIGFX89-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; CIGFX89-NEXT: s_mov_b32 s7, 0xf000
|
|
; CIGFX89-NEXT: s_mov_b32 s6, -1
|
|
; CIGFX89-NEXT: buffer_store_dwordx4 v[0:3], off, s[4:7], 0
|
|
; CIGFX89-NEXT: s_waitcnt vmcnt(0)
|
|
; CIGFX89-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX11-LABEL: void_func_v4i32:
|
|
; GFX11: ; %bb.0:
|
|
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX11-NEXT: s_mov_b32 s3, 0x31016000
|
|
; GFX11-NEXT: s_mov_b32 s2, -1
|
|
; GFX11-NEXT: buffer_store_b128 v[0:3], off, s[0:3], 0
|
|
; GFX11-NEXT: s_setpc_b64 s[30:31]
|
|
store <4 x i32> %arg0, ptr addrspace(1) poison
|
|
ret void
|
|
}
|
|
|
|
define void @void_func_v5i32(<5 x i32> %arg0) #0 {
|
|
; CIGFX89-LABEL: void_func_v5i32:
|
|
; CIGFX89: ; %bb.0:
|
|
; CIGFX89-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; CIGFX89-NEXT: s_mov_b32 s7, 0xf000
|
|
; CIGFX89-NEXT: s_mov_b32 s6, -1
|
|
; CIGFX89-NEXT: buffer_store_dword v4, off, s[4:7], 0
|
|
; CIGFX89-NEXT: buffer_store_dwordx4 v[0:3], off, s[4:7], 0
|
|
; CIGFX89-NEXT: s_waitcnt vmcnt(0)
|
|
; CIGFX89-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX11-LABEL: void_func_v5i32:
|
|
; GFX11: ; %bb.0:
|
|
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX11-NEXT: s_mov_b32 s3, 0x31016000
|
|
; GFX11-NEXT: s_mov_b32 s2, -1
|
|
; GFX11-NEXT: s_clause 0x1
|
|
; GFX11-NEXT: buffer_store_b32 v4, off, s[0:3], 0
|
|
; GFX11-NEXT: buffer_store_b128 v[0:3], off, s[0:3], 0
|
|
; GFX11-NEXT: s_setpc_b64 s[30:31]
|
|
store <5 x i32> %arg0, ptr addrspace(1) poison
|
|
ret void
|
|
}
|
|
|
|
define void @void_func_v8i32(<8 x i32> %arg0) #0 {
|
|
; CIGFX89-LABEL: void_func_v8i32:
|
|
; CIGFX89: ; %bb.0:
|
|
; CIGFX89-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; CIGFX89-NEXT: s_mov_b32 s7, 0xf000
|
|
; CIGFX89-NEXT: s_mov_b32 s6, -1
|
|
; CIGFX89-NEXT: buffer_store_dwordx4 v[4:7], off, s[4:7], 0
|
|
; CIGFX89-NEXT: buffer_store_dwordx4 v[0:3], off, s[4:7], 0
|
|
; CIGFX89-NEXT: s_waitcnt vmcnt(0)
|
|
; CIGFX89-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX11-LABEL: void_func_v8i32:
|
|
; GFX11: ; %bb.0:
|
|
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX11-NEXT: s_mov_b32 s3, 0x31016000
|
|
; GFX11-NEXT: s_mov_b32 s2, -1
|
|
; GFX11-NEXT: s_clause 0x1
|
|
; GFX11-NEXT: buffer_store_b128 v[4:7], off, s[0:3], 0
|
|
; GFX11-NEXT: buffer_store_b128 v[0:3], off, s[0:3], 0
|
|
; GFX11-NEXT: s_setpc_b64 s[30:31]
|
|
store <8 x i32> %arg0, ptr addrspace(1) poison
|
|
ret void
|
|
}
|
|
|
|
define void @void_func_v16i32(<16 x i32> %arg0) #0 {
|
|
; CIGFX89-LABEL: void_func_v16i32:
|
|
; CIGFX89: ; %bb.0:
|
|
; CIGFX89-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; CIGFX89-NEXT: s_mov_b32 s7, 0xf000
|
|
; CIGFX89-NEXT: s_mov_b32 s6, -1
|
|
; CIGFX89-NEXT: buffer_store_dwordx4 v[12:15], off, s[4:7], 0
|
|
; CIGFX89-NEXT: buffer_store_dwordx4 v[8:11], off, s[4:7], 0
|
|
; CIGFX89-NEXT: buffer_store_dwordx4 v[4:7], off, s[4:7], 0
|
|
; CIGFX89-NEXT: buffer_store_dwordx4 v[0:3], off, s[4:7], 0
|
|
; CIGFX89-NEXT: s_waitcnt vmcnt(0)
|
|
; CIGFX89-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX11-LABEL: void_func_v16i32:
|
|
; GFX11: ; %bb.0:
|
|
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX11-NEXT: s_mov_b32 s3, 0x31016000
|
|
; GFX11-NEXT: s_mov_b32 s2, -1
|
|
; GFX11-NEXT: s_clause 0x3
|
|
; GFX11-NEXT: buffer_store_b128 v[12:15], off, s[0:3], 0
|
|
; GFX11-NEXT: buffer_store_b128 v[8:11], off, s[0:3], 0
|
|
; GFX11-NEXT: buffer_store_b128 v[4:7], off, s[0:3], 0
|
|
; GFX11-NEXT: buffer_store_b128 v[0:3], off, s[0:3], 0
|
|
; GFX11-NEXT: s_setpc_b64 s[30:31]
|
|
store <16 x i32> %arg0, ptr addrspace(1) poison
|
|
ret void
|
|
}
|
|
|
|
define void @void_func_v32i32(<32 x i32> %arg0) #0 {
|
|
; CIGFX89-LABEL: void_func_v32i32:
|
|
; CIGFX89: ; %bb.0:
|
|
; CIGFX89-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; CIGFX89-NEXT: buffer_load_dword v31, off, s[0:3], s32
|
|
; CIGFX89-NEXT: s_mov_b32 s7, 0xf000
|
|
; CIGFX89-NEXT: s_mov_b32 s6, -1
|
|
; CIGFX89-NEXT: buffer_store_dwordx4 v[24:27], off, s[4:7], 0
|
|
; CIGFX89-NEXT: buffer_store_dwordx4 v[20:23], off, s[4:7], 0
|
|
; CIGFX89-NEXT: buffer_store_dwordx4 v[16:19], off, s[4:7], 0
|
|
; CIGFX89-NEXT: buffer_store_dwordx4 v[12:15], off, s[4:7], 0
|
|
; CIGFX89-NEXT: buffer_store_dwordx4 v[8:11], off, s[4:7], 0
|
|
; CIGFX89-NEXT: buffer_store_dwordx4 v[4:7], off, s[4:7], 0
|
|
; CIGFX89-NEXT: s_waitcnt vmcnt(6)
|
|
; CIGFX89-NEXT: buffer_store_dwordx4 v[28:31], off, s[4:7], 0
|
|
; CIGFX89-NEXT: buffer_store_dwordx4 v[0:3], off, s[4:7], 0
|
|
; CIGFX89-NEXT: s_waitcnt vmcnt(0)
|
|
; CIGFX89-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX11-LABEL: void_func_v32i32:
|
|
; GFX11: ; %bb.0:
|
|
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX11-NEXT: scratch_load_b32 v31, off, s32
|
|
; GFX11-NEXT: s_mov_b32 s3, 0x31016000
|
|
; GFX11-NEXT: s_mov_b32 s2, -1
|
|
; GFX11-NEXT: s_clause 0x3
|
|
; GFX11-NEXT: buffer_store_b128 v[20:23], off, s[0:3], 0
|
|
; GFX11-NEXT: buffer_store_b128 v[16:19], off, s[0:3], 0
|
|
; GFX11-NEXT: buffer_store_b128 v[12:15], off, s[0:3], 0
|
|
; GFX11-NEXT: buffer_store_b128 v[8:11], off, s[0:3], 0
|
|
; GFX11-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX11-NEXT: s_clause 0x3
|
|
; GFX11-NEXT: buffer_store_b128 v[28:31], off, s[0:3], 0
|
|
; GFX11-NEXT: buffer_store_b128 v[24:27], off, s[0:3], 0
|
|
; GFX11-NEXT: buffer_store_b128 v[4:7], off, s[0:3], 0
|
|
; GFX11-NEXT: buffer_store_b128 v[0:3], off, s[0:3], 0
|
|
; GFX11-NEXT: s_setpc_b64 s[30:31]
|
|
store <32 x i32> %arg0, ptr addrspace(1) poison
|
|
ret void
|
|
}
|
|
|
|
; 1 over register limit
|
|
define void @void_func_v33i32(<33 x i32> %arg0) #0 {
|
|
; CI-LABEL: void_func_v33i32:
|
|
; CI: ; %bb.0:
|
|
; CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; CI-NEXT: s_mov_b32 s7, 0xf000
|
|
; CI-NEXT: s_mov_b32 s6, -1
|
|
; CI-NEXT: buffer_store_dwordx4 v[24:27], off, s[4:7], 0
|
|
; CI-NEXT: buffer_store_dwordx4 v[20:23], off, s[4:7], 0
|
|
; CI-NEXT: buffer_load_dword v31, off, s[0:3], s32
|
|
; CI-NEXT: buffer_load_dword v20, off, s[0:3], s32 offset:4
|
|
; CI-NEXT: buffer_store_dwordx4 v[16:19], off, s[4:7], 0
|
|
; CI-NEXT: buffer_store_dwordx4 v[12:15], off, s[4:7], 0
|
|
; CI-NEXT: buffer_store_dwordx4 v[8:11], off, s[4:7], 0
|
|
; CI-NEXT: buffer_store_dwordx4 v[4:7], off, s[4:7], 0
|
|
; CI-NEXT: buffer_store_dwordx4 v[0:3], off, s[4:7], 0
|
|
; CI-NEXT: s_waitcnt vmcnt(6)
|
|
; CI-NEXT: buffer_store_dwordx4 v[28:31], off, s[4:7], 0
|
|
; CI-NEXT: s_waitcnt vmcnt(6)
|
|
; CI-NEXT: buffer_store_dword v20, off, s[4:7], 0
|
|
; CI-NEXT: s_waitcnt vmcnt(0)
|
|
; CI-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; VI-LABEL: void_func_v33i32:
|
|
; VI: ; %bb.0:
|
|
; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; VI-NEXT: s_mov_b32 s7, 0xf000
|
|
; VI-NEXT: s_mov_b32 s6, -1
|
|
; VI-NEXT: buffer_store_dwordx4 v[24:27], off, s[4:7], 0
|
|
; VI-NEXT: buffer_store_dwordx4 v[20:23], off, s[4:7], 0
|
|
; VI-NEXT: buffer_load_dword v31, off, s[0:3], s32
|
|
; VI-NEXT: buffer_load_dword v20, off, s[0:3], s32 offset:4
|
|
; VI-NEXT: buffer_store_dwordx4 v[16:19], off, s[4:7], 0
|
|
; VI-NEXT: buffer_store_dwordx4 v[12:15], off, s[4:7], 0
|
|
; VI-NEXT: buffer_store_dwordx4 v[8:11], off, s[4:7], 0
|
|
; VI-NEXT: buffer_store_dwordx4 v[4:7], off, s[4:7], 0
|
|
; VI-NEXT: buffer_store_dwordx4 v[0:3], off, s[4:7], 0
|
|
; VI-NEXT: s_waitcnt vmcnt(6)
|
|
; VI-NEXT: buffer_store_dwordx4 v[28:31], off, s[4:7], 0
|
|
; VI-NEXT: s_waitcnt vmcnt(6)
|
|
; VI-NEXT: buffer_store_dword v20, off, s[4:7], 0
|
|
; VI-NEXT: s_waitcnt vmcnt(0)
|
|
; VI-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX9-LABEL: void_func_v33i32:
|
|
; GFX9: ; %bb.0:
|
|
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX9-NEXT: s_mov_b32 s7, 0xf000
|
|
; GFX9-NEXT: s_mov_b32 s6, -1
|
|
; GFX9-NEXT: buffer_store_dwordx4 v[24:27], off, s[4:7], 0
|
|
; GFX9-NEXT: buffer_store_dwordx4 v[20:23], off, s[4:7], 0
|
|
; GFX9-NEXT: buffer_load_dword v31, off, s[0:3], s32
|
|
; GFX9-NEXT: s_nop 0
|
|
; GFX9-NEXT: buffer_load_dword v20, off, s[0:3], s32 offset:4
|
|
; GFX9-NEXT: s_nop 0
|
|
; GFX9-NEXT: buffer_store_dwordx4 v[16:19], off, s[4:7], 0
|
|
; GFX9-NEXT: buffer_store_dwordx4 v[12:15], off, s[4:7], 0
|
|
; GFX9-NEXT: buffer_store_dwordx4 v[8:11], off, s[4:7], 0
|
|
; GFX9-NEXT: buffer_store_dwordx4 v[4:7], off, s[4:7], 0
|
|
; GFX9-NEXT: buffer_store_dwordx4 v[0:3], off, s[4:7], 0
|
|
; GFX9-NEXT: s_waitcnt vmcnt(6)
|
|
; GFX9-NEXT: buffer_store_dwordx4 v[28:31], off, s[4:7], 0
|
|
; GFX9-NEXT: s_waitcnt vmcnt(6)
|
|
; GFX9-NEXT: buffer_store_dword v20, off, s[4:7], 0
|
|
; GFX9-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX9-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX11-LABEL: void_func_v33i32:
|
|
; GFX11: ; %bb.0:
|
|
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX11-NEXT: s_clause 0x1
|
|
; GFX11-NEXT: scratch_load_b32 v31, off, s32
|
|
; GFX11-NEXT: scratch_load_b32 v32, off, s32 offset:4
|
|
; GFX11-NEXT: s_mov_b32 s3, 0x31016000
|
|
; GFX11-NEXT: s_mov_b32 s2, -1
|
|
; GFX11-NEXT: s_clause 0x5
|
|
; GFX11-NEXT: buffer_store_b128 v[20:23], off, s[0:3], 0
|
|
; GFX11-NEXT: buffer_store_b128 v[16:19], off, s[0:3], 0
|
|
; GFX11-NEXT: buffer_store_b128 v[12:15], off, s[0:3], 0
|
|
; GFX11-NEXT: buffer_store_b128 v[8:11], off, s[0:3], 0
|
|
; GFX11-NEXT: buffer_store_b128 v[4:7], off, s[0:3], 0
|
|
; GFX11-NEXT: buffer_store_b128 v[0:3], off, s[0:3], 0
|
|
; GFX11-NEXT: s_waitcnt vmcnt(1)
|
|
; GFX11-NEXT: s_clause 0x1
|
|
; GFX11-NEXT: buffer_store_b128 v[28:31], off, s[0:3], 0
|
|
; GFX11-NEXT: buffer_store_b128 v[24:27], off, s[0:3], 0
|
|
; GFX11-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX11-NEXT: buffer_store_b32 v32, off, s[0:3], 0
|
|
; GFX11-NEXT: s_setpc_b64 s[30:31]
|
|
store <33 x i32> %arg0, ptr addrspace(1) poison
|
|
ret void
|
|
}
|
|
|
|
define void @void_func_v2i64(<2 x i64> %arg0) #0 {
|
|
; CIGFX89-LABEL: void_func_v2i64:
|
|
; CIGFX89: ; %bb.0:
|
|
; CIGFX89-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; CIGFX89-NEXT: s_mov_b32 s7, 0xf000
|
|
; CIGFX89-NEXT: s_mov_b32 s6, -1
|
|
; CIGFX89-NEXT: buffer_store_dwordx4 v[0:3], off, s[4:7], 0
|
|
; CIGFX89-NEXT: s_waitcnt vmcnt(0)
|
|
; CIGFX89-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX11-LABEL: void_func_v2i64:
|
|
; GFX11: ; %bb.0:
|
|
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX11-NEXT: s_mov_b32 s3, 0x31016000
|
|
; GFX11-NEXT: s_mov_b32 s2, -1
|
|
; GFX11-NEXT: buffer_store_b128 v[0:3], off, s[0:3], 0
|
|
; GFX11-NEXT: s_setpc_b64 s[30:31]
|
|
store <2 x i64> %arg0, ptr addrspace(1) poison
|
|
ret void
|
|
}
|
|
|
|
define void @void_func_v3i64(<3 x i64> %arg0) #0 {
|
|
; CIGFX89-LABEL: void_func_v3i64:
|
|
; CIGFX89: ; %bb.0:
|
|
; CIGFX89-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; CIGFX89-NEXT: s_mov_b32 s7, 0xf000
|
|
; CIGFX89-NEXT: s_mov_b32 s6, -1
|
|
; CIGFX89-NEXT: buffer_store_dwordx2 v[4:5], off, s[4:7], 0
|
|
; CIGFX89-NEXT: buffer_store_dwordx4 v[0:3], off, s[4:7], 0
|
|
; CIGFX89-NEXT: s_waitcnt vmcnt(0)
|
|
; CIGFX89-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX11-LABEL: void_func_v3i64:
|
|
; GFX11: ; %bb.0:
|
|
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX11-NEXT: s_mov_b32 s3, 0x31016000
|
|
; GFX11-NEXT: s_mov_b32 s2, -1
|
|
; GFX11-NEXT: s_clause 0x1
|
|
; GFX11-NEXT: buffer_store_b64 v[4:5], off, s[0:3], 0
|
|
; GFX11-NEXT: buffer_store_b128 v[0:3], off, s[0:3], 0
|
|
; GFX11-NEXT: s_setpc_b64 s[30:31]
|
|
store <3 x i64> %arg0, ptr addrspace(1) poison
|
|
ret void
|
|
}
|
|
|
|
define void @void_func_v4i64(<4 x i64> %arg0) #0 {
|
|
; CIGFX89-LABEL: void_func_v4i64:
|
|
; CIGFX89: ; %bb.0:
|
|
; CIGFX89-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; CIGFX89-NEXT: s_mov_b32 s7, 0xf000
|
|
; CIGFX89-NEXT: s_mov_b32 s6, -1
|
|
; CIGFX89-NEXT: buffer_store_dwordx4 v[4:7], off, s[4:7], 0
|
|
; CIGFX89-NEXT: buffer_store_dwordx4 v[0:3], off, s[4:7], 0
|
|
; CIGFX89-NEXT: s_waitcnt vmcnt(0)
|
|
; CIGFX89-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX11-LABEL: void_func_v4i64:
|
|
; GFX11: ; %bb.0:
|
|
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX11-NEXT: s_mov_b32 s3, 0x31016000
|
|
; GFX11-NEXT: s_mov_b32 s2, -1
|
|
; GFX11-NEXT: s_clause 0x1
|
|
; GFX11-NEXT: buffer_store_b128 v[4:7], off, s[0:3], 0
|
|
; GFX11-NEXT: buffer_store_b128 v[0:3], off, s[0:3], 0
|
|
; GFX11-NEXT: s_setpc_b64 s[30:31]
|
|
store <4 x i64> %arg0, ptr addrspace(1) poison
|
|
ret void
|
|
}
|
|
|
|
define void @void_func_v5i64(<5 x i64> %arg0) #0 {
|
|
; CIGFX89-LABEL: void_func_v5i64:
|
|
; CIGFX89: ; %bb.0:
|
|
; CIGFX89-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; CIGFX89-NEXT: s_mov_b32 s7, 0xf000
|
|
; CIGFX89-NEXT: s_mov_b32 s6, -1
|
|
; CIGFX89-NEXT: buffer_store_dwordx4 v[4:7], off, s[4:7], 0
|
|
; CIGFX89-NEXT: buffer_store_dwordx4 v[0:3], off, s[4:7], 0
|
|
; CIGFX89-NEXT: buffer_store_dwordx2 v[8:9], off, s[4:7], 0
|
|
; CIGFX89-NEXT: s_waitcnt vmcnt(0)
|
|
; CIGFX89-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX11-LABEL: void_func_v5i64:
|
|
; GFX11: ; %bb.0:
|
|
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX11-NEXT: s_mov_b32 s3, 0x31016000
|
|
; GFX11-NEXT: s_mov_b32 s2, -1
|
|
; GFX11-NEXT: s_clause 0x2
|
|
; GFX11-NEXT: buffer_store_b128 v[4:7], off, s[0:3], 0
|
|
; GFX11-NEXT: buffer_store_b128 v[0:3], off, s[0:3], 0
|
|
; GFX11-NEXT: buffer_store_b64 v[8:9], off, s[0:3], 0
|
|
; GFX11-NEXT: s_setpc_b64 s[30:31]
|
|
store <5 x i64> %arg0, ptr addrspace(1) poison
|
|
ret void
|
|
}
|
|
|
|
define void @void_func_v8i64(<8 x i64> %arg0) #0 {
|
|
; CIGFX89-LABEL: void_func_v8i64:
|
|
; CIGFX89: ; %bb.0:
|
|
; CIGFX89-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; CIGFX89-NEXT: s_mov_b32 s7, 0xf000
|
|
; CIGFX89-NEXT: s_mov_b32 s6, -1
|
|
; CIGFX89-NEXT: buffer_store_dwordx4 v[12:15], off, s[4:7], 0
|
|
; CIGFX89-NEXT: buffer_store_dwordx4 v[8:11], off, s[4:7], 0
|
|
; CIGFX89-NEXT: buffer_store_dwordx4 v[4:7], off, s[4:7], 0
|
|
; CIGFX89-NEXT: buffer_store_dwordx4 v[0:3], off, s[4:7], 0
|
|
; CIGFX89-NEXT: s_waitcnt vmcnt(0)
|
|
; CIGFX89-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX11-LABEL: void_func_v8i64:
|
|
; GFX11: ; %bb.0:
|
|
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX11-NEXT: s_mov_b32 s3, 0x31016000
|
|
; GFX11-NEXT: s_mov_b32 s2, -1
|
|
; GFX11-NEXT: s_clause 0x3
|
|
; GFX11-NEXT: buffer_store_b128 v[12:15], off, s[0:3], 0
|
|
; GFX11-NEXT: buffer_store_b128 v[8:11], off, s[0:3], 0
|
|
; GFX11-NEXT: buffer_store_b128 v[4:7], off, s[0:3], 0
|
|
; GFX11-NEXT: buffer_store_b128 v[0:3], off, s[0:3], 0
|
|
; GFX11-NEXT: s_setpc_b64 s[30:31]
|
|
store <8 x i64> %arg0, ptr addrspace(1) poison
|
|
ret void
|
|
}
|
|
|
|
define void @void_func_v16i64(<16 x i64> %arg0) #0 {
|
|
; CIGFX89-LABEL: void_func_v16i64:
|
|
; CIGFX89: ; %bb.0:
|
|
; CIGFX89-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; CIGFX89-NEXT: buffer_load_dword v31, off, s[0:3], s32
|
|
; CIGFX89-NEXT: s_mov_b32 s7, 0xf000
|
|
; CIGFX89-NEXT: s_mov_b32 s6, -1
|
|
; CIGFX89-NEXT: buffer_store_dwordx4 v[24:27], off, s[4:7], 0
|
|
; CIGFX89-NEXT: buffer_store_dwordx4 v[20:23], off, s[4:7], 0
|
|
; CIGFX89-NEXT: buffer_store_dwordx4 v[16:19], off, s[4:7], 0
|
|
; CIGFX89-NEXT: buffer_store_dwordx4 v[12:15], off, s[4:7], 0
|
|
; CIGFX89-NEXT: buffer_store_dwordx4 v[8:11], off, s[4:7], 0
|
|
; CIGFX89-NEXT: buffer_store_dwordx4 v[4:7], off, s[4:7], 0
|
|
; CIGFX89-NEXT: s_waitcnt vmcnt(6)
|
|
; CIGFX89-NEXT: buffer_store_dwordx4 v[28:31], off, s[4:7], 0
|
|
; CIGFX89-NEXT: buffer_store_dwordx4 v[0:3], off, s[4:7], 0
|
|
; CIGFX89-NEXT: s_waitcnt vmcnt(0)
|
|
; CIGFX89-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX11-LABEL: void_func_v16i64:
|
|
; GFX11: ; %bb.0:
|
|
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX11-NEXT: scratch_load_b32 v31, off, s32
|
|
; GFX11-NEXT: s_mov_b32 s3, 0x31016000
|
|
; GFX11-NEXT: s_mov_b32 s2, -1
|
|
; GFX11-NEXT: s_clause 0x3
|
|
; GFX11-NEXT: buffer_store_b128 v[20:23], off, s[0:3], 0
|
|
; GFX11-NEXT: buffer_store_b128 v[16:19], off, s[0:3], 0
|
|
; GFX11-NEXT: buffer_store_b128 v[12:15], off, s[0:3], 0
|
|
; GFX11-NEXT: buffer_store_b128 v[8:11], off, s[0:3], 0
|
|
; GFX11-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX11-NEXT: s_clause 0x3
|
|
; GFX11-NEXT: buffer_store_b128 v[28:31], off, s[0:3], 0
|
|
; GFX11-NEXT: buffer_store_b128 v[24:27], off, s[0:3], 0
|
|
; GFX11-NEXT: buffer_store_b128 v[4:7], off, s[0:3], 0
|
|
; GFX11-NEXT: buffer_store_b128 v[0:3], off, s[0:3], 0
|
|
; GFX11-NEXT: s_setpc_b64 s[30:31]
|
|
store <16 x i64> %arg0, ptr addrspace(1) poison
|
|
ret void
|
|
}
|
|
|
|
define void @void_func_v2i8(<2 x i8> %arg0) #0 {
|
|
; CI-LABEL: void_func_v2i8:
|
|
; CI: ; %bb.0:
|
|
; CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; CI-NEXT: v_lshlrev_b32_e32 v1, 8, v1
|
|
; CI-NEXT: v_and_b32_e32 v0, 0xff, v0
|
|
; CI-NEXT: v_or_b32_e32 v0, v0, v1
|
|
; CI-NEXT: s_mov_b64 s[4:5], 0
|
|
; CI-NEXT: s_mov_b32 s7, 0xf000
|
|
; CI-NEXT: s_mov_b32 s6, -1
|
|
; CI-NEXT: buffer_store_short v0, off, s[4:7], 0
|
|
; CI-NEXT: s_waitcnt vmcnt(0)
|
|
; CI-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX89-LABEL: void_func_v2i8:
|
|
; GFX89: ; %bb.0:
|
|
; GFX89-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX89-NEXT: v_lshlrev_b16_e32 v1, 8, v1
|
|
; GFX89-NEXT: v_or_b32_sdwa v0, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
|
|
; GFX89-NEXT: s_mov_b64 s[4:5], 0
|
|
; GFX89-NEXT: s_mov_b32 s7, 0xf000
|
|
; GFX89-NEXT: s_mov_b32 s6, -1
|
|
; GFX89-NEXT: buffer_store_short v0, off, s[4:7], 0
|
|
; GFX89-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX89-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX11-TRUE16-LABEL: void_func_v2i8:
|
|
; GFX11-TRUE16: ; %bb.0:
|
|
; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX11-TRUE16-NEXT: v_lshlrev_b16 v0.h, 8, v1.l
|
|
; GFX11-TRUE16-NEXT: v_and_b16 v0.l, 0xff, v0.l
|
|
; GFX11-TRUE16-NEXT: s_mov_b64 s[0:1], 0
|
|
; GFX11-TRUE16-NEXT: s_mov_b32 s3, 0x31016000
|
|
; GFX11-TRUE16-NEXT: s_mov_b32 s2, -1
|
|
; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
|
|
; GFX11-TRUE16-NEXT: v_or_b16 v0.l, v0.l, v0.h
|
|
; GFX11-TRUE16-NEXT: buffer_store_b16 v0, off, s[0:3], 0
|
|
; GFX11-TRUE16-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX11-FAKE16-LABEL: void_func_v2i8:
|
|
; GFX11-FAKE16: ; %bb.0:
|
|
; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX11-FAKE16-NEXT: v_lshlrev_b16 v1, 8, v1
|
|
; GFX11-FAKE16-NEXT: v_and_b32_e32 v0, 0xff, v0
|
|
; GFX11-FAKE16-NEXT: s_mov_b64 s[0:1], 0
|
|
; GFX11-FAKE16-NEXT: s_mov_b32 s3, 0x31016000
|
|
; GFX11-FAKE16-NEXT: s_mov_b32 s2, -1
|
|
; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
|
|
; GFX11-FAKE16-NEXT: v_or_b32_e32 v0, v0, v1
|
|
; GFX11-FAKE16-NEXT: buffer_store_b16 v0, off, s[0:3], 0
|
|
; GFX11-FAKE16-NEXT: s_setpc_b64 s[30:31]
|
|
store <2 x i8> %arg0, ptr addrspace(1) null
|
|
ret void
|
|
}
|
|
|
|
define void @void_func_v2i16(<2 x i16> %arg0) #0 {
|
|
; CI-LABEL: void_func_v2i16:
|
|
; CI: ; %bb.0:
|
|
; CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; CI-NEXT: v_lshlrev_b32_e32 v1, 16, v1
|
|
; CI-NEXT: v_and_b32_e32 v0, 0xffff, v0
|
|
; CI-NEXT: v_or_b32_e32 v0, v0, v1
|
|
; CI-NEXT: s_mov_b32 s7, 0xf000
|
|
; CI-NEXT: s_mov_b32 s6, -1
|
|
; CI-NEXT: buffer_store_dword v0, off, s[4:7], 0
|
|
; CI-NEXT: s_waitcnt vmcnt(0)
|
|
; CI-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX89-LABEL: void_func_v2i16:
|
|
; GFX89: ; %bb.0:
|
|
; GFX89-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX89-NEXT: s_mov_b32 s7, 0xf000
|
|
; GFX89-NEXT: s_mov_b32 s6, -1
|
|
; GFX89-NEXT: buffer_store_dword v0, off, s[4:7], 0
|
|
; GFX89-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX89-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX11-LABEL: void_func_v2i16:
|
|
; GFX11: ; %bb.0:
|
|
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX11-NEXT: s_mov_b32 s3, 0x31016000
|
|
; GFX11-NEXT: s_mov_b32 s2, -1
|
|
; GFX11-NEXT: buffer_store_b32 v0, off, s[0:3], 0
|
|
; GFX11-NEXT: s_setpc_b64 s[30:31]
|
|
store <2 x i16> %arg0, ptr addrspace(1) poison
|
|
ret void
|
|
}
|
|
|
|
define void @void_func_v3i8(<3 x i8> %arg0) #0 {
|
|
; CI-LABEL: void_func_v3i8:
|
|
; CI: ; %bb.0:
|
|
; CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; CI-NEXT: v_lshlrev_b32_e32 v1, 8, v1
|
|
; CI-NEXT: v_and_b32_e32 v0, 0xff, v0
|
|
; CI-NEXT: s_mov_b64 s[4:5], 2
|
|
; CI-NEXT: s_mov_b32 s7, 0xf000
|
|
; CI-NEXT: s_mov_b32 s6, -1
|
|
; CI-NEXT: v_or_b32_e32 v0, v0, v1
|
|
; CI-NEXT: buffer_store_byte v2, off, s[4:7], 0
|
|
; CI-NEXT: s_mov_b64 s[4:5], 0
|
|
; CI-NEXT: buffer_store_short v0, off, s[4:7], 0
|
|
; CI-NEXT: s_waitcnt vmcnt(0)
|
|
; CI-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX89-LABEL: void_func_v3i8:
|
|
; GFX89: ; %bb.0:
|
|
; GFX89-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX89-NEXT: v_lshlrev_b16_e32 v1, 8, v1
|
|
; GFX89-NEXT: s_mov_b64 s[4:5], 2
|
|
; GFX89-NEXT: s_mov_b32 s7, 0xf000
|
|
; GFX89-NEXT: s_mov_b32 s6, -1
|
|
; GFX89-NEXT: v_or_b32_sdwa v0, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
|
|
; GFX89-NEXT: buffer_store_byte v2, off, s[4:7], 0
|
|
; GFX89-NEXT: s_mov_b64 s[4:5], 0
|
|
; GFX89-NEXT: buffer_store_short v0, off, s[4:7], 0
|
|
; GFX89-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX89-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX11-TRUE16-LABEL: void_func_v3i8:
|
|
; GFX11-TRUE16: ; %bb.0:
|
|
; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX11-TRUE16-NEXT: v_lshlrev_b16 v0.h, 8, v1.l
|
|
; GFX11-TRUE16-NEXT: v_and_b16 v0.l, 0xff, v0.l
|
|
; GFX11-TRUE16-NEXT: s_mov_b64 s[0:1], 2
|
|
; GFX11-TRUE16-NEXT: s_mov_b32 s3, 0x31016000
|
|
; GFX11-TRUE16-NEXT: s_mov_b32 s2, -1
|
|
; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
|
|
; GFX11-TRUE16-NEXT: v_or_b16 v0.l, v0.l, v0.h
|
|
; GFX11-TRUE16-NEXT: buffer_store_b8 v2, off, s[0:3], 0
|
|
; GFX11-TRUE16-NEXT: s_mov_b64 s[0:1], 0
|
|
; GFX11-TRUE16-NEXT: buffer_store_b16 v0, off, s[0:3], 0
|
|
; GFX11-TRUE16-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX11-FAKE16-LABEL: void_func_v3i8:
|
|
; GFX11-FAKE16: ; %bb.0:
|
|
; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX11-FAKE16-NEXT: v_lshlrev_b16 v1, 8, v1
|
|
; GFX11-FAKE16-NEXT: v_and_b32_e32 v0, 0xff, v0
|
|
; GFX11-FAKE16-NEXT: s_mov_b64 s[0:1], 2
|
|
; GFX11-FAKE16-NEXT: s_mov_b32 s3, 0x31016000
|
|
; GFX11-FAKE16-NEXT: s_mov_b32 s2, -1
|
|
; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
|
|
; GFX11-FAKE16-NEXT: v_or_b32_e32 v0, v0, v1
|
|
; GFX11-FAKE16-NEXT: buffer_store_b8 v2, off, s[0:3], 0
|
|
; GFX11-FAKE16-NEXT: s_mov_b64 s[0:1], 0
|
|
; GFX11-FAKE16-NEXT: buffer_store_b16 v0, off, s[0:3], 0
|
|
; GFX11-FAKE16-NEXT: s_setpc_b64 s[30:31]
|
|
store <3 x i8> %arg0, ptr addrspace(1) null
|
|
ret void
|
|
}
|
|
|
|
define void @void_func_v4i8(<4 x i8> %arg0) #0 {
|
|
; CI-LABEL: void_func_v4i8:
|
|
; CI: ; %bb.0:
|
|
; CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; CI-NEXT: v_and_b32_e32 v2, 0xff, v2
|
|
; CI-NEXT: v_lshlrev_b32_e32 v1, 8, v1
|
|
; CI-NEXT: v_and_b32_e32 v0, 0xff, v0
|
|
; CI-NEXT: v_lshlrev_b32_e32 v3, 24, v3
|
|
; CI-NEXT: v_lshlrev_b32_e32 v2, 16, v2
|
|
; CI-NEXT: v_or_b32_e32 v0, v0, v1
|
|
; CI-NEXT: v_or_b32_e32 v2, v3, v2
|
|
; CI-NEXT: v_and_b32_e32 v0, 0xffff, v0
|
|
; CI-NEXT: v_or_b32_e32 v0, v0, v2
|
|
; CI-NEXT: s_mov_b64 s[4:5], 0
|
|
; CI-NEXT: s_mov_b32 s7, 0xf000
|
|
; CI-NEXT: s_mov_b32 s6, -1
|
|
; CI-NEXT: buffer_store_dword v0, off, s[4:7], 0
|
|
; CI-NEXT: s_waitcnt vmcnt(0)
|
|
; CI-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX89-LABEL: void_func_v4i8:
|
|
; GFX89: ; %bb.0:
|
|
; GFX89-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX89-NEXT: v_lshlrev_b16_e32 v1, 8, v1
|
|
; GFX89-NEXT: v_or_b32_sdwa v0, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
|
|
; GFX89-NEXT: v_lshlrev_b16_e32 v1, 8, v3
|
|
; GFX89-NEXT: v_or_b32_sdwa v1, v2, v1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
|
|
; GFX89-NEXT: v_or_b32_sdwa v0, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
|
|
; GFX89-NEXT: s_mov_b64 s[4:5], 0
|
|
; GFX89-NEXT: s_mov_b32 s7, 0xf000
|
|
; GFX89-NEXT: s_mov_b32 s6, -1
|
|
; GFX89-NEXT: buffer_store_dword v0, off, s[4:7], 0
|
|
; GFX89-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX89-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX11-TRUE16-LABEL: void_func_v4i8:
|
|
; GFX11-TRUE16: ; %bb.0:
|
|
; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX11-TRUE16-NEXT: v_lshlrev_b16 v0.h, 8, v1.l
|
|
; GFX11-TRUE16-NEXT: v_and_b16 v0.l, 0xff, v0.l
|
|
; GFX11-TRUE16-NEXT: v_lshlrev_b16 v1.l, 8, v3.l
|
|
; GFX11-TRUE16-NEXT: v_and_b16 v1.h, 0xff, v2.l
|
|
; GFX11-TRUE16-NEXT: s_mov_b64 s[0:1], 0
|
|
; GFX11-TRUE16-NEXT: s_mov_b32 s3, 0x31016000
|
|
; GFX11-TRUE16-NEXT: v_or_b16 v0.l, v0.l, v0.h
|
|
; GFX11-TRUE16-NEXT: s_mov_b32 s2, -1
|
|
; GFX11-TRUE16-NEXT: v_or_b16 v1.l, v1.h, v1.l
|
|
; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
|
|
; GFX11-TRUE16-NEXT: v_and_b32_e32 v0, 0xffff, v0
|
|
; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v1, 16, v1
|
|
; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
|
|
; GFX11-TRUE16-NEXT: v_or_b32_e32 v0, v0, v1
|
|
; GFX11-TRUE16-NEXT: buffer_store_b32 v0, off, s[0:3], 0
|
|
; GFX11-TRUE16-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX11-FAKE16-LABEL: void_func_v4i8:
|
|
; GFX11-FAKE16: ; %bb.0:
|
|
; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX11-FAKE16-NEXT: v_lshlrev_b16 v1, 8, v1
|
|
; GFX11-FAKE16-NEXT: v_and_b32_e32 v0, 0xff, v0
|
|
; GFX11-FAKE16-NEXT: v_lshlrev_b16 v3, 8, v3
|
|
; GFX11-FAKE16-NEXT: v_and_b32_e32 v2, 0xff, v2
|
|
; GFX11-FAKE16-NEXT: s_mov_b64 s[0:1], 0
|
|
; GFX11-FAKE16-NEXT: s_mov_b32 s3, 0x31016000
|
|
; GFX11-FAKE16-NEXT: v_or_b32_e32 v0, v0, v1
|
|
; GFX11-FAKE16-NEXT: s_mov_b32 s2, -1
|
|
; GFX11-FAKE16-NEXT: v_or_b32_e32 v1, v2, v3
|
|
; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
|
|
; GFX11-FAKE16-NEXT: v_and_b32_e32 v0, 0xffff, v0
|
|
; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v1, 16, v1
|
|
; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
|
|
; GFX11-FAKE16-NEXT: v_or_b32_e32 v0, v0, v1
|
|
; GFX11-FAKE16-NEXT: buffer_store_b32 v0, off, s[0:3], 0
|
|
; GFX11-FAKE16-NEXT: s_setpc_b64 s[30:31]
|
|
store <4 x i8> %arg0, ptr addrspace(1) null
|
|
ret void
|
|
}
|
|
|
|
define void @void_func_v5i8(<5 x i8> %arg0) #0 {
|
|
; CI-LABEL: void_func_v5i8:
|
|
; CI: ; %bb.0:
|
|
; CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; CI-NEXT: v_and_b32_e32 v2, 0xff, v2
|
|
; CI-NEXT: v_lshlrev_b32_e32 v1, 8, v1
|
|
; CI-NEXT: v_and_b32_e32 v0, 0xff, v0
|
|
; CI-NEXT: v_lshlrev_b32_e32 v3, 24, v3
|
|
; CI-NEXT: v_lshlrev_b32_e32 v2, 16, v2
|
|
; CI-NEXT: v_or_b32_e32 v0, v0, v1
|
|
; CI-NEXT: v_or_b32_e32 v2, v3, v2
|
|
; CI-NEXT: v_and_b32_e32 v0, 0xffff, v0
|
|
; CI-NEXT: s_mov_b64 s[4:5], 4
|
|
; CI-NEXT: s_mov_b32 s7, 0xf000
|
|
; CI-NEXT: s_mov_b32 s6, -1
|
|
; CI-NEXT: v_or_b32_e32 v0, v0, v2
|
|
; CI-NEXT: buffer_store_byte v4, off, s[4:7], 0
|
|
; CI-NEXT: s_mov_b64 s[4:5], 0
|
|
; CI-NEXT: buffer_store_dword v0, off, s[4:7], 0
|
|
; CI-NEXT: s_waitcnt vmcnt(0)
|
|
; CI-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX89-LABEL: void_func_v5i8:
|
|
; GFX89: ; %bb.0:
|
|
; GFX89-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX89-NEXT: v_lshlrev_b16_e32 v1, 8, v1
|
|
; GFX89-NEXT: v_or_b32_sdwa v0, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
|
|
; GFX89-NEXT: v_lshlrev_b16_e32 v1, 8, v3
|
|
; GFX89-NEXT: v_or_b32_sdwa v1, v2, v1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
|
|
; GFX89-NEXT: s_mov_b64 s[4:5], 4
|
|
; GFX89-NEXT: s_mov_b32 s7, 0xf000
|
|
; GFX89-NEXT: s_mov_b32 s6, -1
|
|
; GFX89-NEXT: v_or_b32_sdwa v0, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
|
|
; GFX89-NEXT: buffer_store_byte v4, off, s[4:7], 0
|
|
; GFX89-NEXT: s_mov_b64 s[4:5], 0
|
|
; GFX89-NEXT: buffer_store_dword v0, off, s[4:7], 0
|
|
; GFX89-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX89-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX11-TRUE16-LABEL: void_func_v5i8:
|
|
; GFX11-TRUE16: ; %bb.0:
|
|
; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX11-TRUE16-NEXT: v_lshlrev_b16 v0.h, 8, v1.l
|
|
; GFX11-TRUE16-NEXT: v_and_b16 v0.l, 0xff, v0.l
|
|
; GFX11-TRUE16-NEXT: v_lshlrev_b16 v1.l, 8, v3.l
|
|
; GFX11-TRUE16-NEXT: v_and_b16 v1.h, 0xff, v2.l
|
|
; GFX11-TRUE16-NEXT: s_mov_b64 s[0:1], 4
|
|
; GFX11-TRUE16-NEXT: s_mov_b32 s3, 0x31016000
|
|
; GFX11-TRUE16-NEXT: v_or_b16 v0.l, v0.l, v0.h
|
|
; GFX11-TRUE16-NEXT: s_mov_b32 s2, -1
|
|
; GFX11-TRUE16-NEXT: v_or_b16 v1.l, v1.h, v1.l
|
|
; GFX11-TRUE16-NEXT: buffer_store_b8 v4, off, s[0:3], 0
|
|
; GFX11-TRUE16-NEXT: s_mov_b64 s[0:1], 0
|
|
; GFX11-TRUE16-NEXT: v_and_b32_e32 v0, 0xffff, v0
|
|
; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v1, 16, v1
|
|
; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
|
|
; GFX11-TRUE16-NEXT: v_or_b32_e32 v0, v0, v1
|
|
; GFX11-TRUE16-NEXT: buffer_store_b32 v0, off, s[0:3], 0
|
|
; GFX11-TRUE16-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX11-FAKE16-LABEL: void_func_v5i8:
|
|
; GFX11-FAKE16: ; %bb.0:
|
|
; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX11-FAKE16-NEXT: v_lshlrev_b16 v1, 8, v1
|
|
; GFX11-FAKE16-NEXT: v_and_b32_e32 v0, 0xff, v0
|
|
; GFX11-FAKE16-NEXT: v_lshlrev_b16 v3, 8, v3
|
|
; GFX11-FAKE16-NEXT: v_and_b32_e32 v2, 0xff, v2
|
|
; GFX11-FAKE16-NEXT: s_mov_b64 s[0:1], 4
|
|
; GFX11-FAKE16-NEXT: s_mov_b32 s3, 0x31016000
|
|
; GFX11-FAKE16-NEXT: v_or_b32_e32 v0, v0, v1
|
|
; GFX11-FAKE16-NEXT: s_mov_b32 s2, -1
|
|
; GFX11-FAKE16-NEXT: v_or_b32_e32 v1, v2, v3
|
|
; GFX11-FAKE16-NEXT: buffer_store_b8 v4, off, s[0:3], 0
|
|
; GFX11-FAKE16-NEXT: s_mov_b64 s[0:1], 0
|
|
; GFX11-FAKE16-NEXT: v_and_b32_e32 v0, 0xffff, v0
|
|
; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v1, 16, v1
|
|
; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
|
|
; GFX11-FAKE16-NEXT: v_or_b32_e32 v0, v0, v1
|
|
; GFX11-FAKE16-NEXT: buffer_store_b32 v0, off, s[0:3], 0
|
|
; GFX11-FAKE16-NEXT: s_setpc_b64 s[30:31]
|
|
store <5 x i8> %arg0, ptr addrspace(1) null
|
|
ret void
|
|
}
|
|
|
|
define void @void_func_v8i8(<8 x i8> %arg0) #0 {
|
|
; CI-LABEL: void_func_v8i8:
|
|
; CI: ; %bb.0:
|
|
; CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; CI-NEXT: v_and_b32_e32 v6, 0xff, v6
|
|
; CI-NEXT: v_lshlrev_b32_e32 v5, 8, v5
|
|
; CI-NEXT: v_and_b32_e32 v4, 0xff, v4
|
|
; CI-NEXT: v_and_b32_e32 v2, 0xff, v2
|
|
; CI-NEXT: v_lshlrev_b32_e32 v1, 8, v1
|
|
; CI-NEXT: v_and_b32_e32 v0, 0xff, v0
|
|
; CI-NEXT: v_lshlrev_b32_e32 v7, 24, v7
|
|
; CI-NEXT: v_lshlrev_b32_e32 v6, 16, v6
|
|
; CI-NEXT: v_or_b32_e32 v4, v4, v5
|
|
; CI-NEXT: v_lshlrev_b32_e32 v3, 24, v3
|
|
; CI-NEXT: v_lshlrev_b32_e32 v2, 16, v2
|
|
; CI-NEXT: v_or_b32_e32 v0, v0, v1
|
|
; CI-NEXT: v_or_b32_e32 v6, v7, v6
|
|
; CI-NEXT: v_and_b32_e32 v4, 0xffff, v4
|
|
; CI-NEXT: v_or_b32_e32 v2, v3, v2
|
|
; CI-NEXT: v_and_b32_e32 v0, 0xffff, v0
|
|
; CI-NEXT: v_or_b32_e32 v4, v4, v6
|
|
; CI-NEXT: v_or_b32_e32 v3, v0, v2
|
|
; CI-NEXT: s_mov_b64 s[4:5], 0
|
|
; CI-NEXT: s_mov_b32 s7, 0xf000
|
|
; CI-NEXT: s_mov_b32 s6, -1
|
|
; CI-NEXT: buffer_store_dwordx2 v[3:4], off, s[4:7], 0
|
|
; CI-NEXT: s_waitcnt vmcnt(0)
|
|
; CI-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX89-LABEL: void_func_v8i8:
|
|
; GFX89: ; %bb.0:
|
|
; GFX89-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX89-NEXT: v_lshlrev_b16_e32 v5, 8, v5
|
|
; GFX89-NEXT: v_lshlrev_b16_e32 v1, 8, v1
|
|
; GFX89-NEXT: v_or_b32_sdwa v4, v4, v5 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
|
|
; GFX89-NEXT: v_lshlrev_b16_e32 v5, 8, v7
|
|
; GFX89-NEXT: v_or_b32_sdwa v0, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
|
|
; GFX89-NEXT: v_lshlrev_b16_e32 v1, 8, v3
|
|
; GFX89-NEXT: v_or_b32_sdwa v5, v6, v5 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
|
|
; GFX89-NEXT: v_or_b32_sdwa v1, v2, v1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
|
|
; GFX89-NEXT: v_or_b32_sdwa v4, v4, v5 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
|
|
; GFX89-NEXT: v_or_b32_sdwa v3, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
|
|
; GFX89-NEXT: s_mov_b64 s[4:5], 0
|
|
; GFX89-NEXT: s_mov_b32 s7, 0xf000
|
|
; GFX89-NEXT: s_mov_b32 s6, -1
|
|
; GFX89-NEXT: buffer_store_dwordx2 v[3:4], off, s[4:7], 0
|
|
; GFX89-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX89-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX11-TRUE16-LABEL: void_func_v8i8:
|
|
; GFX11-TRUE16: ; %bb.0:
|
|
; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX11-TRUE16-NEXT: v_lshlrev_b16 v4.h, 8, v5.l
|
|
; GFX11-TRUE16-NEXT: v_and_b16 v4.l, 0xff, v4.l
|
|
; GFX11-TRUE16-NEXT: v_lshlrev_b16 v5.l, 8, v7.l
|
|
; GFX11-TRUE16-NEXT: v_and_b16 v0.h, 0xff, v6.l
|
|
; GFX11-TRUE16-NEXT: v_lshlrev_b16 v1.l, 8, v1.l
|
|
; GFX11-TRUE16-NEXT: v_and_b16 v0.l, 0xff, v0.l
|
|
; GFX11-TRUE16-NEXT: v_lshlrev_b16 v1.h, 8, v3.l
|
|
; GFX11-TRUE16-NEXT: v_and_b16 v2.l, 0xff, v2.l
|
|
; GFX11-TRUE16-NEXT: v_or_b16 v3.l, v4.l, v4.h
|
|
; GFX11-TRUE16-NEXT: v_or_b16 v4.l, v0.h, v5.l
|
|
; GFX11-TRUE16-NEXT: v_or_b16 v0.l, v0.l, v1.l
|
|
; GFX11-TRUE16-NEXT: s_mov_b64 s[0:1], 0
|
|
; GFX11-TRUE16-NEXT: v_or_b16 v1.l, v2.l, v1.h
|
|
; GFX11-TRUE16-NEXT: v_and_b32_e32 v2, 0xffff, v3
|
|
; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v3, 16, v4
|
|
; GFX11-TRUE16-NEXT: v_and_b32_e32 v0, 0xffff, v0
|
|
; GFX11-TRUE16-NEXT: s_mov_b32 s3, 0x31016000
|
|
; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v4, 16, v1
|
|
; GFX11-TRUE16-NEXT: s_mov_b32 s2, -1
|
|
; GFX11-TRUE16-NEXT: v_or_b32_e32 v1, v2, v3
|
|
; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2)
|
|
; GFX11-TRUE16-NEXT: v_or_b32_e32 v0, v0, v4
|
|
; GFX11-TRUE16-NEXT: buffer_store_b64 v[0:1], off, s[0:3], 0
|
|
; GFX11-TRUE16-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX11-FAKE16-LABEL: void_func_v8i8:
|
|
; GFX11-FAKE16: ; %bb.0:
|
|
; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX11-FAKE16-NEXT: v_lshlrev_b16 v5, 8, v5
|
|
; GFX11-FAKE16-NEXT: v_and_b32_e32 v4, 0xff, v4
|
|
; GFX11-FAKE16-NEXT: v_lshlrev_b16 v7, 8, v7
|
|
; GFX11-FAKE16-NEXT: v_and_b32_e32 v6, 0xff, v6
|
|
; GFX11-FAKE16-NEXT: v_lshlrev_b16 v1, 8, v1
|
|
; GFX11-FAKE16-NEXT: v_and_b32_e32 v0, 0xff, v0
|
|
; GFX11-FAKE16-NEXT: v_lshlrev_b16 v3, 8, v3
|
|
; GFX11-FAKE16-NEXT: v_and_b32_e32 v2, 0xff, v2
|
|
; GFX11-FAKE16-NEXT: v_or_b32_e32 v4, v4, v5
|
|
; GFX11-FAKE16-NEXT: v_or_b32_e32 v5, v6, v7
|
|
; GFX11-FAKE16-NEXT: v_or_b32_e32 v0, v0, v1
|
|
; GFX11-FAKE16-NEXT: s_mov_b64 s[0:1], 0
|
|
; GFX11-FAKE16-NEXT: v_or_b32_e32 v1, v2, v3
|
|
; GFX11-FAKE16-NEXT: v_and_b32_e32 v2, 0xffff, v4
|
|
; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v3, 16, v5
|
|
; GFX11-FAKE16-NEXT: v_and_b32_e32 v0, 0xffff, v0
|
|
; GFX11-FAKE16-NEXT: s_mov_b32 s3, 0x31016000
|
|
; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v4, 16, v1
|
|
; GFX11-FAKE16-NEXT: s_mov_b32 s2, -1
|
|
; GFX11-FAKE16-NEXT: v_or_b32_e32 v1, v2, v3
|
|
; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2)
|
|
; GFX11-FAKE16-NEXT: v_or_b32_e32 v0, v0, v4
|
|
; GFX11-FAKE16-NEXT: buffer_store_b64 v[0:1], off, s[0:3], 0
|
|
; GFX11-FAKE16-NEXT: s_setpc_b64 s[30:31]
|
|
store <8 x i8> %arg0, ptr addrspace(1) null
|
|
ret void
|
|
}
|
|
|
|
define void @void_func_v16i8(<16 x i8> %arg0) #0 {
|
|
; CI-LABEL: void_func_v16i8:
|
|
; CI: ; %bb.0:
|
|
; CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; CI-NEXT: v_and_b32_e32 v14, 0xff, v14
|
|
; CI-NEXT: v_lshlrev_b32_e32 v13, 8, v13
|
|
; CI-NEXT: v_and_b32_e32 v12, 0xff, v12
|
|
; CI-NEXT: v_and_b32_e32 v10, 0xff, v10
|
|
; CI-NEXT: v_lshlrev_b32_e32 v9, 8, v9
|
|
; CI-NEXT: v_and_b32_e32 v8, 0xff, v8
|
|
; CI-NEXT: v_and_b32_e32 v6, 0xff, v6
|
|
; CI-NEXT: v_lshlrev_b32_e32 v5, 8, v5
|
|
; CI-NEXT: v_and_b32_e32 v4, 0xff, v4
|
|
; CI-NEXT: v_and_b32_e32 v2, 0xff, v2
|
|
; CI-NEXT: v_lshlrev_b32_e32 v1, 8, v1
|
|
; CI-NEXT: v_and_b32_e32 v0, 0xff, v0
|
|
; CI-NEXT: v_lshlrev_b32_e32 v15, 24, v15
|
|
; CI-NEXT: v_lshlrev_b32_e32 v14, 16, v14
|
|
; CI-NEXT: v_or_b32_e32 v12, v12, v13
|
|
; CI-NEXT: v_lshlrev_b32_e32 v11, 24, v11
|
|
; CI-NEXT: v_lshlrev_b32_e32 v10, 16, v10
|
|
; CI-NEXT: v_or_b32_e32 v8, v8, v9
|
|
; CI-NEXT: v_lshlrev_b32_e32 v7, 24, v7
|
|
; CI-NEXT: v_lshlrev_b32_e32 v6, 16, v6
|
|
; CI-NEXT: v_or_b32_e32 v4, v4, v5
|
|
; CI-NEXT: v_lshlrev_b32_e32 v3, 24, v3
|
|
; CI-NEXT: v_lshlrev_b32_e32 v2, 16, v2
|
|
; CI-NEXT: v_or_b32_e32 v0, v0, v1
|
|
; CI-NEXT: v_or_b32_e32 v14, v15, v14
|
|
; CI-NEXT: v_and_b32_e32 v12, 0xffff, v12
|
|
; CI-NEXT: v_or_b32_e32 v10, v11, v10
|
|
; CI-NEXT: v_and_b32_e32 v8, 0xffff, v8
|
|
; CI-NEXT: v_or_b32_e32 v6, v7, v6
|
|
; CI-NEXT: v_and_b32_e32 v4, 0xffff, v4
|
|
; CI-NEXT: v_or_b32_e32 v2, v3, v2
|
|
; CI-NEXT: v_and_b32_e32 v0, 0xffff, v0
|
|
; CI-NEXT: v_or_b32_e32 v12, v12, v14
|
|
; CI-NEXT: v_or_b32_e32 v11, v8, v10
|
|
; CI-NEXT: v_or_b32_e32 v10, v4, v6
|
|
; CI-NEXT: v_or_b32_e32 v9, v0, v2
|
|
; CI-NEXT: s_mov_b64 s[4:5], 0
|
|
; CI-NEXT: s_mov_b32 s7, 0xf000
|
|
; CI-NEXT: s_mov_b32 s6, -1
|
|
; CI-NEXT: buffer_store_dwordx4 v[9:12], off, s[4:7], 0
|
|
; CI-NEXT: s_waitcnt vmcnt(0)
|
|
; CI-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX89-LABEL: void_func_v16i8:
|
|
; GFX89: ; %bb.0:
|
|
; GFX89-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX89-NEXT: v_lshlrev_b16_e32 v13, 8, v13
|
|
; GFX89-NEXT: v_lshlrev_b16_e32 v9, 8, v9
|
|
; GFX89-NEXT: v_lshlrev_b16_e32 v5, 8, v5
|
|
; GFX89-NEXT: v_lshlrev_b16_e32 v1, 8, v1
|
|
; GFX89-NEXT: v_or_b32_sdwa v12, v12, v13 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
|
|
; GFX89-NEXT: v_lshlrev_b16_e32 v13, 8, v15
|
|
; GFX89-NEXT: v_or_b32_sdwa v8, v8, v9 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
|
|
; GFX89-NEXT: v_lshlrev_b16_e32 v9, 8, v11
|
|
; GFX89-NEXT: v_or_b32_sdwa v4, v4, v5 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
|
|
; GFX89-NEXT: v_lshlrev_b16_e32 v5, 8, v7
|
|
; GFX89-NEXT: v_or_b32_sdwa v0, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
|
|
; GFX89-NEXT: v_lshlrev_b16_e32 v1, 8, v3
|
|
; GFX89-NEXT: v_or_b32_sdwa v13, v14, v13 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
|
|
; GFX89-NEXT: v_or_b32_sdwa v9, v10, v9 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
|
|
; GFX89-NEXT: v_or_b32_sdwa v5, v6, v5 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
|
|
; GFX89-NEXT: v_or_b32_sdwa v1, v2, v1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
|
|
; GFX89-NEXT: v_or_b32_sdwa v12, v12, v13 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
|
|
; GFX89-NEXT: v_or_b32_sdwa v11, v8, v9 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
|
|
; GFX89-NEXT: v_or_b32_sdwa v10, v4, v5 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
|
|
; GFX89-NEXT: v_or_b32_sdwa v9, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
|
|
; GFX89-NEXT: s_mov_b64 s[4:5], 0
|
|
; GFX89-NEXT: s_mov_b32 s7, 0xf000
|
|
; GFX89-NEXT: s_mov_b32 s6, -1
|
|
; GFX89-NEXT: buffer_store_dwordx4 v[9:12], off, s[4:7], 0
|
|
; GFX89-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX89-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX11-TRUE16-LABEL: void_func_v16i8:
|
|
; GFX11-TRUE16: ; %bb.0:
|
|
; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX11-TRUE16-NEXT: v_lshlrev_b16 v13.l, 8, v13.l
|
|
; GFX11-TRUE16-NEXT: v_and_b16 v8.h, 0xff, v12.l
|
|
; GFX11-TRUE16-NEXT: v_lshlrev_b16 v9.h, 8, v15.l
|
|
; GFX11-TRUE16-NEXT: v_and_b16 v12.l, 0xff, v14.l
|
|
; GFX11-TRUE16-NEXT: v_lshlrev_b16 v9.l, 8, v9.l
|
|
; GFX11-TRUE16-NEXT: v_and_b16 v8.l, 0xff, v8.l
|
|
; GFX11-TRUE16-NEXT: v_lshlrev_b16 v4.h, 8, v11.l
|
|
; GFX11-TRUE16-NEXT: v_and_b16 v5.h, 0xff, v10.l
|
|
; GFX11-TRUE16-NEXT: v_lshlrev_b16 v5.l, 8, v5.l
|
|
; GFX11-TRUE16-NEXT: v_and_b16 v4.l, 0xff, v4.l
|
|
; GFX11-TRUE16-NEXT: v_lshlrev_b16 v7.l, 8, v7.l
|
|
; GFX11-TRUE16-NEXT: v_and_b16 v0.h, 0xff, v6.l
|
|
; GFX11-TRUE16-NEXT: v_lshlrev_b16 v1.l, 8, v1.l
|
|
; GFX11-TRUE16-NEXT: v_and_b16 v0.l, 0xff, v0.l
|
|
; GFX11-TRUE16-NEXT: v_lshlrev_b16 v1.h, 8, v3.l
|
|
; GFX11-TRUE16-NEXT: v_and_b16 v2.l, 0xff, v2.l
|
|
; GFX11-TRUE16-NEXT: v_or_b16 v13.l, v8.h, v13.l
|
|
; GFX11-TRUE16-NEXT: v_or_b16 v12.l, v12.l, v9.h
|
|
; GFX11-TRUE16-NEXT: v_or_b16 v8.l, v8.l, v9.l
|
|
; GFX11-TRUE16-NEXT: v_or_b16 v3.l, v5.h, v4.h
|
|
; GFX11-TRUE16-NEXT: v_or_b16 v4.l, v4.l, v5.l
|
|
; GFX11-TRUE16-NEXT: v_or_b16 v5.l, v0.h, v7.l
|
|
; GFX11-TRUE16-NEXT: v_or_b16 v0.l, v0.l, v1.l
|
|
; GFX11-TRUE16-NEXT: v_or_b16 v1.l, v2.l, v1.h
|
|
; GFX11-TRUE16-NEXT: v_and_b32_e32 v9, 0xffff, v13
|
|
; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v12, 16, v12
|
|
; GFX11-TRUE16-NEXT: v_and_b32_e32 v8, 0xffff, v8
|
|
; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v2, 16, v3
|
|
; GFX11-TRUE16-NEXT: v_and_b32_e32 v4, 0xffff, v4
|
|
; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v5, 16, v5
|
|
; GFX11-TRUE16-NEXT: v_and_b32_e32 v0, 0xffff, v0
|
|
; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v6, 16, v1
|
|
; GFX11-TRUE16-NEXT: v_or_b32_e32 v3, v9, v12
|
|
; GFX11-TRUE16-NEXT: v_or_b32_e32 v2, v8, v2
|
|
; GFX11-TRUE16-NEXT: v_or_b32_e32 v1, v4, v5
|
|
; GFX11-TRUE16-NEXT: s_mov_b64 s[0:1], 0
|
|
; GFX11-TRUE16-NEXT: v_or_b32_e32 v0, v0, v6
|
|
; GFX11-TRUE16-NEXT: s_mov_b32 s3, 0x31016000
|
|
; GFX11-TRUE16-NEXT: s_mov_b32 s2, -1
|
|
; GFX11-TRUE16-NEXT: buffer_store_b128 v[0:3], off, s[0:3], 0
|
|
; GFX11-TRUE16-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX11-FAKE16-LABEL: void_func_v16i8:
|
|
; GFX11-FAKE16: ; %bb.0:
|
|
; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX11-FAKE16-NEXT: v_lshlrev_b16 v13, 8, v13
|
|
; GFX11-FAKE16-NEXT: v_and_b32_e32 v12, 0xff, v12
|
|
; GFX11-FAKE16-NEXT: v_lshlrev_b16 v15, 8, v15
|
|
; GFX11-FAKE16-NEXT: v_and_b32_e32 v14, 0xff, v14
|
|
; GFX11-FAKE16-NEXT: v_lshlrev_b16 v9, 8, v9
|
|
; GFX11-FAKE16-NEXT: v_and_b32_e32 v8, 0xff, v8
|
|
; GFX11-FAKE16-NEXT: v_lshlrev_b16 v11, 8, v11
|
|
; GFX11-FAKE16-NEXT: v_and_b32_e32 v10, 0xff, v10
|
|
; GFX11-FAKE16-NEXT: v_lshlrev_b16 v5, 8, v5
|
|
; GFX11-FAKE16-NEXT: v_and_b32_e32 v4, 0xff, v4
|
|
; GFX11-FAKE16-NEXT: v_lshlrev_b16 v7, 8, v7
|
|
; GFX11-FAKE16-NEXT: v_and_b32_e32 v6, 0xff, v6
|
|
; GFX11-FAKE16-NEXT: v_lshlrev_b16 v1, 8, v1
|
|
; GFX11-FAKE16-NEXT: v_and_b32_e32 v0, 0xff, v0
|
|
; GFX11-FAKE16-NEXT: v_lshlrev_b16 v3, 8, v3
|
|
; GFX11-FAKE16-NEXT: v_and_b32_e32 v2, 0xff, v2
|
|
; GFX11-FAKE16-NEXT: v_or_b32_e32 v12, v12, v13
|
|
; GFX11-FAKE16-NEXT: v_or_b32_e32 v13, v14, v15
|
|
; GFX11-FAKE16-NEXT: v_or_b32_e32 v8, v8, v9
|
|
; GFX11-FAKE16-NEXT: v_or_b32_e32 v10, v10, v11
|
|
; GFX11-FAKE16-NEXT: v_or_b32_e32 v4, v4, v5
|
|
; GFX11-FAKE16-NEXT: v_or_b32_e32 v5, v6, v7
|
|
; GFX11-FAKE16-NEXT: v_or_b32_e32 v0, v0, v1
|
|
; GFX11-FAKE16-NEXT: v_or_b32_e32 v1, v2, v3
|
|
; GFX11-FAKE16-NEXT: v_and_b32_e32 v9, 0xffff, v12
|
|
; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v12, 16, v13
|
|
; GFX11-FAKE16-NEXT: v_and_b32_e32 v8, 0xffff, v8
|
|
; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v2, 16, v10
|
|
; GFX11-FAKE16-NEXT: v_and_b32_e32 v4, 0xffff, v4
|
|
; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v5, 16, v5
|
|
; GFX11-FAKE16-NEXT: v_and_b32_e32 v0, 0xffff, v0
|
|
; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v6, 16, v1
|
|
; GFX11-FAKE16-NEXT: v_or_b32_e32 v3, v9, v12
|
|
; GFX11-FAKE16-NEXT: v_or_b32_e32 v2, v8, v2
|
|
; GFX11-FAKE16-NEXT: v_or_b32_e32 v1, v4, v5
|
|
; GFX11-FAKE16-NEXT: s_mov_b64 s[0:1], 0
|
|
; GFX11-FAKE16-NEXT: v_or_b32_e32 v0, v0, v6
|
|
; GFX11-FAKE16-NEXT: s_mov_b32 s3, 0x31016000
|
|
; GFX11-FAKE16-NEXT: s_mov_b32 s2, -1
|
|
; GFX11-FAKE16-NEXT: buffer_store_b128 v[0:3], off, s[0:3], 0
|
|
; GFX11-FAKE16-NEXT: s_setpc_b64 s[30:31]
|
|
store <16 x i8> %arg0, ptr addrspace(1) null
|
|
ret void
|
|
}
|
|
|
|
define void @void_func_v32i8(<32 x i8> %arg0) #0 {
|
|
; CI-LABEL: void_func_v32i8:
|
|
; CI: ; %bb.0:
|
|
; CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; CI-NEXT: v_lshlrev_b32_e32 v5, 8, v5
|
|
; CI-NEXT: v_and_b32_e32 v4, 0xff, v4
|
|
; CI-NEXT: v_or_b32_e32 v4, v4, v5
|
|
; CI-NEXT: buffer_load_dword v5, off, s[0:3], s32
|
|
; CI-NEXT: v_lshlrev_b32_e32 v9, 8, v9
|
|
; CI-NEXT: v_and_b32_e32 v8, 0xff, v8
|
|
; CI-NEXT: v_lshlrev_b32_e32 v13, 8, v13
|
|
; CI-NEXT: v_and_b32_e32 v12, 0xff, v12
|
|
; CI-NEXT: v_or_b32_e32 v8, v8, v9
|
|
; CI-NEXT: v_lshlrev_b32_e32 v1, 8, v1
|
|
; CI-NEXT: v_and_b32_e32 v0, 0xff, v0
|
|
; CI-NEXT: v_and_b32_e32 v9, 0xff, v14
|
|
; CI-NEXT: v_and_b32_e32 v10, 0xff, v10
|
|
; CI-NEXT: v_and_b32_e32 v6, 0xff, v6
|
|
; CI-NEXT: v_or_b32_e32 v12, v12, v13
|
|
; CI-NEXT: v_or_b32_e32 v0, v0, v1
|
|
; CI-NEXT: v_lshlrev_b32_e32 v1, 24, v15
|
|
; CI-NEXT: v_lshlrev_b32_e32 v11, 24, v11
|
|
; CI-NEXT: v_lshlrev_b32_e32 v7, 24, v7
|
|
; CI-NEXT: v_and_b32_e32 v2, 0xff, v2
|
|
; CI-NEXT: v_lshlrev_b32_e32 v9, 16, v9
|
|
; CI-NEXT: v_lshlrev_b32_e32 v10, 16, v10
|
|
; CI-NEXT: v_lshlrev_b32_e32 v6, 16, v6
|
|
; CI-NEXT: v_lshlrev_b32_e32 v3, 24, v3
|
|
; CI-NEXT: v_lshlrev_b32_e32 v13, 8, v29
|
|
; CI-NEXT: v_and_b32_e32 v14, 0xff, v28
|
|
; CI-NEXT: v_and_b32_e32 v26, 0xff, v26
|
|
; CI-NEXT: v_lshlrev_b32_e32 v25, 8, v25
|
|
; CI-NEXT: v_and_b32_e32 v24, 0xff, v24
|
|
; CI-NEXT: v_lshlrev_b32_e32 v2, 16, v2
|
|
; CI-NEXT: v_or_b32_e32 v1, v1, v9
|
|
; CI-NEXT: v_or_b32_e32 v9, v11, v10
|
|
; CI-NEXT: v_and_b32_e32 v10, 0xffff, v12
|
|
; CI-NEXT: v_or_b32_e32 v6, v7, v6
|
|
; CI-NEXT: v_and_b32_e32 v4, 0xffff, v4
|
|
; CI-NEXT: v_lshlrev_b32_e32 v15, 24, v27
|
|
; CI-NEXT: v_and_b32_e32 v27, 0xff, v30
|
|
; CI-NEXT: v_or_b32_e32 v13, v14, v13
|
|
; CI-NEXT: v_lshlrev_b32_e32 v14, 16, v26
|
|
; CI-NEXT: v_or_b32_e32 v7, v3, v2
|
|
; CI-NEXT: v_or_b32_e32 v3, v10, v1
|
|
; CI-NEXT: v_or_b32_e32 v1, v4, v6
|
|
; CI-NEXT: v_lshlrev_b32_e32 v26, 16, v27
|
|
; CI-NEXT: v_or_b32_e32 v11, v15, v14
|
|
; CI-NEXT: v_and_b32_e32 v0, 0xffff, v0
|
|
; CI-NEXT: v_and_b32_e32 v12, 0xffff, v13
|
|
; CI-NEXT: v_and_b32_e32 v8, 0xffff, v8
|
|
; CI-NEXT: v_or_b32_e32 v0, v0, v7
|
|
; CI-NEXT: v_or_b32_e32 v2, v8, v9
|
|
; CI-NEXT: v_and_b32_e32 v8, 0xff, v20
|
|
; CI-NEXT: v_and_b32_e32 v9, 0xff, v16
|
|
; CI-NEXT: s_mov_b64 s[4:5], 16
|
|
; CI-NEXT: s_mov_b32 s7, 0xf000
|
|
; CI-NEXT: s_mov_b32 s6, -1
|
|
; CI-NEXT: s_waitcnt vmcnt(0)
|
|
; CI-NEXT: v_lshlrev_b32_e32 v4, 24, v5
|
|
; CI-NEXT: v_or_b32_e32 v5, v24, v25
|
|
; CI-NEXT: v_and_b32_e32 v5, 0xffff, v5
|
|
; CI-NEXT: v_or_b32_e32 v4, v4, v26
|
|
; CI-NEXT: v_or_b32_e32 v6, v5, v11
|
|
; CI-NEXT: v_and_b32_e32 v5, 0xff, v22
|
|
; CI-NEXT: v_or_b32_e32 v7, v12, v4
|
|
; CI-NEXT: v_lshlrev_b32_e32 v4, 24, v23
|
|
; CI-NEXT: v_lshlrev_b32_e32 v5, 16, v5
|
|
; CI-NEXT: v_or_b32_e32 v4, v4, v5
|
|
; CI-NEXT: v_lshlrev_b32_e32 v5, 8, v21
|
|
; CI-NEXT: v_or_b32_e32 v5, v8, v5
|
|
; CI-NEXT: v_and_b32_e32 v5, 0xffff, v5
|
|
; CI-NEXT: v_and_b32_e32 v8, 0xff, v18
|
|
; CI-NEXT: v_or_b32_e32 v5, v5, v4
|
|
; CI-NEXT: v_lshlrev_b32_e32 v4, 24, v19
|
|
; CI-NEXT: v_lshlrev_b32_e32 v8, 16, v8
|
|
; CI-NEXT: v_or_b32_e32 v4, v4, v8
|
|
; CI-NEXT: v_lshlrev_b32_e32 v8, 8, v17
|
|
; CI-NEXT: v_or_b32_e32 v8, v9, v8
|
|
; CI-NEXT: v_and_b32_e32 v8, 0xffff, v8
|
|
; CI-NEXT: v_or_b32_e32 v4, v8, v4
|
|
; CI-NEXT: buffer_store_dwordx4 v[4:7], off, s[4:7], 0
|
|
; CI-NEXT: s_mov_b64 s[4:5], 0
|
|
; CI-NEXT: buffer_store_dwordx4 v[0:3], off, s[4:7], 0
|
|
; CI-NEXT: s_waitcnt vmcnt(0)
|
|
; CI-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX89-LABEL: void_func_v32i8:
|
|
; GFX89: ; %bb.0:
|
|
; GFX89-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX89-NEXT: v_lshlrev_b16_e32 v9, 8, v9
|
|
; GFX89-NEXT: v_or_b32_sdwa v8, v8, v9 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
|
|
; GFX89-NEXT: v_lshlrev_b16_e32 v9, 8, v11
|
|
; GFX89-NEXT: v_or_b32_sdwa v9, v10, v9 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
|
|
; GFX89-NEXT: buffer_load_ubyte v10, off, s[0:3], s32
|
|
; GFX89-NEXT: v_lshlrev_b16_e32 v13, 8, v13
|
|
; GFX89-NEXT: v_or_b32_sdwa v12, v12, v13 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
|
|
; GFX89-NEXT: v_lshlrev_b16_e32 v13, 8, v15
|
|
; GFX89-NEXT: v_lshlrev_b16_e32 v5, 8, v5
|
|
; GFX89-NEXT: v_lshlrev_b16_e32 v7, 8, v7
|
|
; GFX89-NEXT: v_lshlrev_b16_e32 v3, 8, v3
|
|
; GFX89-NEXT: v_or_b32_sdwa v13, v14, v13 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
|
|
; GFX89-NEXT: v_lshlrev_b16_e32 v1, 8, v1
|
|
; GFX89-NEXT: v_lshlrev_b16_e32 v11, 8, v29
|
|
; GFX89-NEXT: v_lshlrev_b16_e32 v14, 8, v25
|
|
; GFX89-NEXT: v_lshlrev_b16_e32 v15, 8, v27
|
|
; GFX89-NEXT: v_lshlrev_b16_e32 v21, 8, v21
|
|
; GFX89-NEXT: v_lshlrev_b16_e32 v23, 8, v23
|
|
; GFX89-NEXT: v_lshlrev_b16_e32 v17, 8, v17
|
|
; GFX89-NEXT: v_lshlrev_b16_e32 v19, 8, v19
|
|
; GFX89-NEXT: v_or_b32_sdwa v4, v4, v5 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
|
|
; GFX89-NEXT: v_or_b32_sdwa v5, v6, v7 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
|
|
; GFX89-NEXT: v_or_b32_sdwa v6, v2, v3 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
|
|
; GFX89-NEXT: v_or_b32_sdwa v2, v8, v9 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
|
|
; GFX89-NEXT: v_or_b32_sdwa v0, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
|
|
; GFX89-NEXT: v_or_b32_sdwa v7, v28, v11 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
|
|
; GFX89-NEXT: v_or_b32_sdwa v11, v24, v14 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
|
|
; GFX89-NEXT: v_or_b32_sdwa v14, v26, v15 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
|
|
; GFX89-NEXT: v_or_b32_sdwa v15, v20, v21 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
|
|
; GFX89-NEXT: v_or_b32_sdwa v20, v22, v23 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
|
|
; GFX89-NEXT: v_or_b32_sdwa v16, v16, v17 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
|
|
; GFX89-NEXT: v_or_b32_sdwa v17, v18, v19 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
|
|
; GFX89-NEXT: s_mov_b64 s[4:5], 16
|
|
; GFX89-NEXT: s_mov_b32 s7, 0xf000
|
|
; GFX89-NEXT: s_mov_b32 s6, -1
|
|
; GFX89-NEXT: v_or_b32_sdwa v1, v4, v5 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
|
|
; GFX89-NEXT: v_or_b32_sdwa v0, v0, v6 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
|
|
; GFX89-NEXT: v_or_b32_sdwa v6, v11, v14 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
|
|
; GFX89-NEXT: v_or_b32_sdwa v5, v15, v20 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
|
|
; GFX89-NEXT: v_or_b32_sdwa v4, v16, v17 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
|
|
; GFX89-NEXT: v_or_b32_sdwa v3, v12, v13 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
|
|
; GFX89-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX89-NEXT: v_lshlrev_b16_e32 v8, 8, v10
|
|
; GFX89-NEXT: v_or_b32_sdwa v8, v30, v8 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
|
|
; GFX89-NEXT: v_or_b32_sdwa v7, v7, v8 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
|
|
; GFX89-NEXT: buffer_store_dwordx4 v[4:7], off, s[4:7], 0
|
|
; GFX89-NEXT: s_mov_b64 s[4:5], 0
|
|
; GFX89-NEXT: buffer_store_dwordx4 v[0:3], off, s[4:7], 0
|
|
; GFX89-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX89-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX11-TRUE16-LABEL: void_func_v32i8:
|
|
; GFX11-TRUE16: ; %bb.0:
|
|
; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX11-TRUE16-NEXT: scratch_load_d16_u8 v31, off, s32
|
|
; GFX11-TRUE16-NEXT: v_lshlrev_b16 v1.l, 8, v1.l
|
|
; GFX11-TRUE16-NEXT: v_and_b16 v0.l, 0xff, v0.l
|
|
; GFX11-TRUE16-NEXT: v_and_b16 v1.h, 0xff, v12.l
|
|
; GFX11-TRUE16-NEXT: v_lshlrev_b16 v5.l, 8, v5.l
|
|
; GFX11-TRUE16-NEXT: v_and_b16 v4.l, 0xff, v4.l
|
|
; GFX11-TRUE16-NEXT: v_lshlrev_b16 v7.l, 8, v7.l
|
|
; GFX11-TRUE16-NEXT: v_and_b16 v6.l, 0xff, v6.l
|
|
; GFX11-TRUE16-NEXT: v_lshlrev_b16 v11.h, 8, v21.l
|
|
; GFX11-TRUE16-NEXT: v_and_b16 v12.l, 0xff, v20.l
|
|
; GFX11-TRUE16-NEXT: v_or_b16 v0.l, v0.l, v1.l
|
|
; GFX11-TRUE16-NEXT: v_lshlrev_b16 v0.h, 8, v13.l
|
|
; GFX11-TRUE16-NEXT: v_lshlrev_b16 v2.h, 8, v15.l
|
|
; GFX11-TRUE16-NEXT: v_and_b16 v3.h, 0xff, v14.l
|
|
; GFX11-TRUE16-NEXT: v_lshlrev_b16 v4.h, 8, v9.l
|
|
; GFX11-TRUE16-NEXT: v_and_b16 v5.h, 0xff, v8.l
|
|
; GFX11-TRUE16-NEXT: v_lshlrev_b16 v6.h, 8, v11.l
|
|
; GFX11-TRUE16-NEXT: v_and_b16 v7.h, 0xff, v10.l
|
|
; GFX11-TRUE16-NEXT: v_lshlrev_b16 v3.l, 8, v3.l
|
|
; GFX11-TRUE16-NEXT: v_and_b16 v2.l, 0xff, v2.l
|
|
; GFX11-TRUE16-NEXT: v_lshlrev_b16 v8.l, 8, v29.l
|
|
; GFX11-TRUE16-NEXT: v_and_b16 v8.h, 0xff, v28.l
|
|
; GFX11-TRUE16-NEXT: v_and_b16 v9.l, 0xff, v30.l
|
|
; GFX11-TRUE16-NEXT: v_lshlrev_b16 v9.h, 8, v25.l
|
|
; GFX11-TRUE16-NEXT: v_and_b16 v10.l, 0xff, v24.l
|
|
; GFX11-TRUE16-NEXT: v_lshlrev_b16 v10.h, 8, v27.l
|
|
; GFX11-TRUE16-NEXT: v_and_b16 v11.l, 0xff, v26.l
|
|
; GFX11-TRUE16-NEXT: v_lshlrev_b16 v12.h, 8, v23.l
|
|
; GFX11-TRUE16-NEXT: v_and_b16 v13.l, 0xff, v22.l
|
|
; GFX11-TRUE16-NEXT: v_lshlrev_b16 v13.h, 8, v17.l
|
|
; GFX11-TRUE16-NEXT: v_and_b16 v14.l, 0xff, v16.l
|
|
; GFX11-TRUE16-NEXT: v_lshlrev_b16 v14.h, 8, v19.l
|
|
; GFX11-TRUE16-NEXT: v_and_b16 v15.l, 0xff, v18.l
|
|
; GFX11-TRUE16-NEXT: v_or_b16 v4.l, v4.l, v5.l
|
|
; GFX11-TRUE16-NEXT: v_or_b16 v5.l, v6.l, v7.l
|
|
; GFX11-TRUE16-NEXT: v_or_b16 v7.l, v12.l, v11.h
|
|
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v12.l, v0.l
|
|
; GFX11-TRUE16-NEXT: v_or_b16 v16.l, v1.h, v0.h
|
|
; GFX11-TRUE16-NEXT: v_or_b16 v17.l, v3.h, v2.h
|
|
; GFX11-TRUE16-NEXT: v_or_b16 v18.l, v5.h, v4.h
|
|
; GFX11-TRUE16-NEXT: v_or_b16 v19.l, v7.h, v6.h
|
|
; GFX11-TRUE16-NEXT: v_or_b16 v1.l, v2.l, v3.l
|
|
; GFX11-TRUE16-NEXT: v_or_b16 v2.l, v8.h, v8.l
|
|
; GFX11-TRUE16-NEXT: v_or_b16 v3.l, v10.l, v9.h
|
|
; GFX11-TRUE16-NEXT: v_or_b16 v6.l, v11.l, v10.h
|
|
; GFX11-TRUE16-NEXT: v_or_b16 v8.l, v13.l, v12.h
|
|
; GFX11-TRUE16-NEXT: v_or_b16 v10.l, v14.l, v13.h
|
|
; GFX11-TRUE16-NEXT: v_or_b16 v11.l, v15.l, v14.h
|
|
; GFX11-TRUE16-NEXT: v_and_b32_e32 v13, 0xffff, v16
|
|
; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v14, 16, v17
|
|
; GFX11-TRUE16-NEXT: v_and_b32_e32 v15, 0xffff, v18
|
|
; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v16, 16, v19
|
|
; GFX11-TRUE16-NEXT: v_and_b32_e32 v17, 0xffff, v4
|
|
; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v18, 16, v5
|
|
; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v19, 16, v1
|
|
; GFX11-TRUE16-NEXT: v_and_b32_e32 v1, 0xffff, v2
|
|
; GFX11-TRUE16-NEXT: v_and_b32_e32 v4, 0xffff, v3
|
|
; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v5, 16, v6
|
|
; GFX11-TRUE16-NEXT: v_and_b32_e32 v7, 0xffff, v7
|
|
; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v8, 16, v8
|
|
; GFX11-TRUE16-NEXT: v_and_b32_e32 v12, 0xffff, v12
|
|
; GFX11-TRUE16-NEXT: v_or_b32_e32 v3, v13, v14
|
|
; GFX11-TRUE16-NEXT: v_or_b32_e32 v6, v4, v5
|
|
; GFX11-TRUE16-NEXT: v_or_b32_e32 v2, v15, v16
|
|
; GFX11-TRUE16-NEXT: v_or_b32_e32 v5, v7, v8
|
|
; GFX11-TRUE16-NEXT: s_mov_b64 s[0:1], 16
|
|
; GFX11-TRUE16-NEXT: s_mov_b32 s3, 0x31016000
|
|
; GFX11-TRUE16-NEXT: s_mov_b32 s2, -1
|
|
; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX11-TRUE16-NEXT: v_lshlrev_b16 v0.l, 8, v31.l
|
|
; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_2) | instid1(VALU_DEP_3)
|
|
; GFX11-TRUE16-NEXT: v_or_b16 v0.l, v9.l, v0.l
|
|
; GFX11-TRUE16-NEXT: v_and_b32_e32 v9, 0xffff, v10
|
|
; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v10, 16, v11
|
|
; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v0, 16, v0
|
|
; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
|
|
; GFX11-TRUE16-NEXT: v_or_b32_e32 v4, v9, v10
|
|
; GFX11-TRUE16-NEXT: v_or_b32_e32 v7, v1, v0
|
|
; GFX11-TRUE16-NEXT: v_or_b32_e32 v1, v17, v18
|
|
; GFX11-TRUE16-NEXT: v_or_b32_e32 v0, v12, v19
|
|
; GFX11-TRUE16-NEXT: buffer_store_b128 v[4:7], off, s[0:3], 0
|
|
; GFX11-TRUE16-NEXT: s_mov_b64 s[0:1], 0
|
|
; GFX11-TRUE16-NEXT: buffer_store_b128 v[0:3], off, s[0:3], 0
|
|
; GFX11-TRUE16-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX11-FAKE16-LABEL: void_func_v32i8:
|
|
; GFX11-FAKE16: ; %bb.0:
|
|
; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX11-FAKE16-NEXT: scratch_load_u8 v31, off, s32
|
|
; GFX11-FAKE16-NEXT: v_lshlrev_b16 v1, 8, v1
|
|
; GFX11-FAKE16-NEXT: v_and_b32_e32 v0, 0xff, v0
|
|
; GFX11-FAKE16-NEXT: v_lshlrev_b16 v3, 8, v3
|
|
; GFX11-FAKE16-NEXT: v_and_b32_e32 v2, 0xff, v2
|
|
; GFX11-FAKE16-NEXT: v_lshlrev_b16 v9, 8, v9
|
|
; GFX11-FAKE16-NEXT: v_and_b32_e32 v8, 0xff, v8
|
|
; GFX11-FAKE16-NEXT: v_lshlrev_b16 v11, 8, v11
|
|
; GFX11-FAKE16-NEXT: v_and_b32_e32 v10, 0xff, v10
|
|
; GFX11-FAKE16-NEXT: v_lshlrev_b16 v17, 8, v17
|
|
; GFX11-FAKE16-NEXT: v_and_b32_e32 v16, 0xff, v16
|
|
; GFX11-FAKE16-NEXT: v_or_b32_e32 v0, v0, v1
|
|
; GFX11-FAKE16-NEXT: v_or_b32_e32 v1, v2, v3
|
|
; GFX11-FAKE16-NEXT: v_lshlrev_b16 v13, 8, v13
|
|
; GFX11-FAKE16-NEXT: v_and_b32_e32 v12, 0xff, v12
|
|
; GFX11-FAKE16-NEXT: v_lshlrev_b16 v15, 8, v15
|
|
; GFX11-FAKE16-NEXT: v_and_b32_e32 v14, 0xff, v14
|
|
; GFX11-FAKE16-NEXT: v_lshlrev_b16 v5, 8, v5
|
|
; GFX11-FAKE16-NEXT: v_and_b32_e32 v4, 0xff, v4
|
|
; GFX11-FAKE16-NEXT: v_lshlrev_b16 v7, 8, v7
|
|
; GFX11-FAKE16-NEXT: v_and_b32_e32 v6, 0xff, v6
|
|
; GFX11-FAKE16-NEXT: v_lshlrev_b16 v29, 8, v29
|
|
; GFX11-FAKE16-NEXT: v_and_b32_e32 v28, 0xff, v28
|
|
; GFX11-FAKE16-NEXT: v_and_b32_e32 v30, 0xff, v30
|
|
; GFX11-FAKE16-NEXT: v_lshlrev_b16 v25, 8, v25
|
|
; GFX11-FAKE16-NEXT: v_and_b32_e32 v24, 0xff, v24
|
|
; GFX11-FAKE16-NEXT: v_lshlrev_b16 v27, 8, v27
|
|
; GFX11-FAKE16-NEXT: v_and_b32_e32 v26, 0xff, v26
|
|
; GFX11-FAKE16-NEXT: v_lshlrev_b16 v21, 8, v21
|
|
; GFX11-FAKE16-NEXT: v_and_b32_e32 v20, 0xff, v20
|
|
; GFX11-FAKE16-NEXT: v_lshlrev_b16 v23, 8, v23
|
|
; GFX11-FAKE16-NEXT: v_and_b32_e32 v22, 0xff, v22
|
|
; GFX11-FAKE16-NEXT: v_lshlrev_b16 v19, 8, v19
|
|
; GFX11-FAKE16-NEXT: v_and_b32_e32 v18, 0xff, v18
|
|
; GFX11-FAKE16-NEXT: v_or_b32_e32 v8, v8, v9
|
|
; GFX11-FAKE16-NEXT: v_or_b32_e32 v9, v10, v11
|
|
; GFX11-FAKE16-NEXT: v_or_b32_e32 v11, v16, v17
|
|
; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v17, 16, v1
|
|
; GFX11-FAKE16-NEXT: v_or_b32_e32 v12, v12, v13
|
|
; GFX11-FAKE16-NEXT: v_or_b32_e32 v13, v14, v15
|
|
; GFX11-FAKE16-NEXT: v_or_b32_e32 v4, v4, v5
|
|
; GFX11-FAKE16-NEXT: v_or_b32_e32 v5, v6, v7
|
|
; GFX11-FAKE16-NEXT: v_or_b32_e32 v2, v28, v29
|
|
; GFX11-FAKE16-NEXT: v_or_b32_e32 v3, v24, v25
|
|
; GFX11-FAKE16-NEXT: v_or_b32_e32 v6, v26, v27
|
|
; GFX11-FAKE16-NEXT: v_or_b32_e32 v7, v20, v21
|
|
; GFX11-FAKE16-NEXT: v_or_b32_e32 v10, v22, v23
|
|
; GFX11-FAKE16-NEXT: v_or_b32_e32 v14, v18, v19
|
|
; GFX11-FAKE16-NEXT: v_and_b32_e32 v15, 0xffff, v4
|
|
; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v16, 16, v5
|
|
; GFX11-FAKE16-NEXT: v_and_b32_e32 v18, 0xffff, v2
|
|
; GFX11-FAKE16-NEXT: v_and_b32_e32 v4, 0xffff, v3
|
|
; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v5, 16, v6
|
|
; GFX11-FAKE16-NEXT: v_and_b32_e32 v7, 0xffff, v7
|
|
; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v10, 16, v10
|
|
; GFX11-FAKE16-NEXT: v_and_b32_e32 v11, 0xffff, v11
|
|
; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v14, 16, v14
|
|
; GFX11-FAKE16-NEXT: v_and_b32_e32 v12, 0xffff, v12
|
|
; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v13, 16, v13
|
|
; GFX11-FAKE16-NEXT: v_and_b32_e32 v8, 0xffff, v8
|
|
; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v9, 16, v9
|
|
; GFX11-FAKE16-NEXT: v_and_b32_e32 v0, 0xffff, v0
|
|
; GFX11-FAKE16-NEXT: v_or_b32_e32 v6, v4, v5
|
|
; GFX11-FAKE16-NEXT: v_or_b32_e32 v5, v7, v10
|
|
; GFX11-FAKE16-NEXT: v_or_b32_e32 v4, v11, v14
|
|
; GFX11-FAKE16-NEXT: v_or_b32_e32 v3, v12, v13
|
|
; GFX11-FAKE16-NEXT: v_or_b32_e32 v2, v8, v9
|
|
; GFX11-FAKE16-NEXT: v_or_b32_e32 v0, v0, v17
|
|
; GFX11-FAKE16-NEXT: s_mov_b64 s[0:1], 16
|
|
; GFX11-FAKE16-NEXT: s_mov_b32 s3, 0x31016000
|
|
; GFX11-FAKE16-NEXT: s_mov_b32 s2, -1
|
|
; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX11-FAKE16-NEXT: v_lshlrev_b16 v1, 8, v31
|
|
; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
|
|
; GFX11-FAKE16-NEXT: v_or_b32_e32 v1, v30, v1
|
|
; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v1, 16, v1
|
|
; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
|
|
; GFX11-FAKE16-NEXT: v_or_b32_e32 v7, v18, v1
|
|
; GFX11-FAKE16-NEXT: v_or_b32_e32 v1, v15, v16
|
|
; GFX11-FAKE16-NEXT: buffer_store_b128 v[4:7], off, s[0:3], 0
|
|
; GFX11-FAKE16-NEXT: s_mov_b64 s[0:1], 0
|
|
; GFX11-FAKE16-NEXT: buffer_store_b128 v[0:3], off, s[0:3], 0
|
|
; GFX11-FAKE16-NEXT: s_setpc_b64 s[30:31]
|
|
store <32 x i8> %arg0, ptr addrspace(1) null
|
|
ret void
|
|
}
|
|
|
|
define void @void_func_v3i16(<3 x i16> %arg0) #0 {
|
|
; CI-LABEL: void_func_v3i16:
|
|
; CI: ; %bb.0:
|
|
; CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; CI-NEXT: v_lshlrev_b32_e32 v1, 16, v1
|
|
; CI-NEXT: v_and_b32_e32 v0, 0xffff, v0
|
|
; CI-NEXT: s_mov_b32 s7, 0xf000
|
|
; CI-NEXT: s_mov_b32 s6, -1
|
|
; CI-NEXT: v_or_b32_e32 v0, v0, v1
|
|
; CI-NEXT: buffer_store_short v2, off, s[4:7], 0
|
|
; CI-NEXT: buffer_store_dword v0, off, s[4:7], 0
|
|
; CI-NEXT: s_waitcnt vmcnt(0)
|
|
; CI-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX89-LABEL: void_func_v3i16:
|
|
; GFX89: ; %bb.0:
|
|
; GFX89-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX89-NEXT: s_mov_b32 s7, 0xf000
|
|
; GFX89-NEXT: s_mov_b32 s6, -1
|
|
; GFX89-NEXT: buffer_store_short v1, off, s[4:7], 0
|
|
; GFX89-NEXT: buffer_store_dword v0, off, s[4:7], 0
|
|
; GFX89-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX89-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX11-LABEL: void_func_v3i16:
|
|
; GFX11: ; %bb.0:
|
|
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX11-NEXT: s_mov_b32 s3, 0x31016000
|
|
; GFX11-NEXT: s_mov_b32 s2, -1
|
|
; GFX11-NEXT: s_clause 0x1
|
|
; GFX11-NEXT: buffer_store_b16 v1, off, s[0:3], 0
|
|
; GFX11-NEXT: buffer_store_b32 v0, off, s[0:3], 0
|
|
; GFX11-NEXT: s_setpc_b64 s[30:31]
|
|
store <3 x i16> %arg0, ptr addrspace(1) poison
|
|
ret void
|
|
}
|
|
|
|
define void @void_func_v4i16(<4 x i16> %arg0) #0 {
|
|
; CI-LABEL: void_func_v4i16:
|
|
; CI: ; %bb.0:
|
|
; CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; CI-NEXT: v_lshlrev_b32_e32 v3, 16, v3
|
|
; CI-NEXT: v_and_b32_e32 v2, 0xffff, v2
|
|
; CI-NEXT: v_lshlrev_b32_e32 v1, 16, v1
|
|
; CI-NEXT: v_and_b32_e32 v0, 0xffff, v0
|
|
; CI-NEXT: v_or_b32_e32 v2, v2, v3
|
|
; CI-NEXT: v_or_b32_e32 v1, v0, v1
|
|
; CI-NEXT: s_mov_b32 s7, 0xf000
|
|
; CI-NEXT: s_mov_b32 s6, -1
|
|
; CI-NEXT: buffer_store_dwordx2 v[1:2], off, s[4:7], 0
|
|
; CI-NEXT: s_waitcnt vmcnt(0)
|
|
; CI-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX89-LABEL: void_func_v4i16:
|
|
; GFX89: ; %bb.0:
|
|
; GFX89-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX89-NEXT: s_mov_b32 s7, 0xf000
|
|
; GFX89-NEXT: s_mov_b32 s6, -1
|
|
; GFX89-NEXT: buffer_store_dwordx2 v[0:1], off, s[4:7], 0
|
|
; GFX89-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX89-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX11-LABEL: void_func_v4i16:
|
|
; GFX11: ; %bb.0:
|
|
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX11-NEXT: s_mov_b32 s3, 0x31016000
|
|
; GFX11-NEXT: s_mov_b32 s2, -1
|
|
; GFX11-NEXT: buffer_store_b64 v[0:1], off, s[0:3], 0
|
|
; GFX11-NEXT: s_setpc_b64 s[30:31]
|
|
store <4 x i16> %arg0, ptr addrspace(1) poison
|
|
ret void
|
|
}
|
|
|
|
define void @void_func_v5i16(<5 x i16> %arg0) #0 {
|
|
; CI-LABEL: void_func_v5i16:
|
|
; CI: ; %bb.0:
|
|
; CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; CI-NEXT: v_lshlrev_b32_e32 v3, 16, v3
|
|
; CI-NEXT: v_and_b32_e32 v2, 0xffff, v2
|
|
; CI-NEXT: v_lshlrev_b32_e32 v1, 16, v1
|
|
; CI-NEXT: v_and_b32_e32 v0, 0xffff, v0
|
|
; CI-NEXT: s_mov_b32 s7, 0xf000
|
|
; CI-NEXT: s_mov_b32 s6, -1
|
|
; CI-NEXT: v_or_b32_e32 v2, v2, v3
|
|
; CI-NEXT: v_or_b32_e32 v1, v0, v1
|
|
; CI-NEXT: buffer_store_short v4, off, s[4:7], 0
|
|
; CI-NEXT: buffer_store_dwordx2 v[1:2], off, s[4:7], 0
|
|
; CI-NEXT: s_waitcnt vmcnt(0)
|
|
; CI-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX89-LABEL: void_func_v5i16:
|
|
; GFX89: ; %bb.0:
|
|
; GFX89-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX89-NEXT: s_mov_b32 s7, 0xf000
|
|
; GFX89-NEXT: s_mov_b32 s6, -1
|
|
; GFX89-NEXT: buffer_store_short v2, off, s[4:7], 0
|
|
; GFX89-NEXT: buffer_store_dwordx2 v[0:1], off, s[4:7], 0
|
|
; GFX89-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX89-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX11-LABEL: void_func_v5i16:
|
|
; GFX11: ; %bb.0:
|
|
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX11-NEXT: s_mov_b32 s3, 0x31016000
|
|
; GFX11-NEXT: s_mov_b32 s2, -1
|
|
; GFX11-NEXT: s_clause 0x1
|
|
; GFX11-NEXT: buffer_store_b16 v2, off, s[0:3], 0
|
|
; GFX11-NEXT: buffer_store_b64 v[0:1], off, s[0:3], 0
|
|
; GFX11-NEXT: s_setpc_b64 s[30:31]
|
|
store <5 x i16> %arg0, ptr addrspace(1) poison
|
|
ret void
|
|
}
|
|
|
|
define void @void_func_v8i16(<8 x i16> %arg0) #0 {
|
|
; CI-LABEL: void_func_v8i16:
|
|
; CI: ; %bb.0:
|
|
; CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; CI-NEXT: v_lshlrev_b32_e32 v7, 16, v7
|
|
; CI-NEXT: v_and_b32_e32 v6, 0xffff, v6
|
|
; CI-NEXT: v_lshlrev_b32_e32 v5, 16, v5
|
|
; CI-NEXT: v_and_b32_e32 v4, 0xffff, v4
|
|
; CI-NEXT: v_lshlrev_b32_e32 v3, 16, v3
|
|
; CI-NEXT: v_and_b32_e32 v2, 0xffff, v2
|
|
; CI-NEXT: v_lshlrev_b32_e32 v1, 16, v1
|
|
; CI-NEXT: v_and_b32_e32 v0, 0xffff, v0
|
|
; CI-NEXT: v_or_b32_e32 v6, v6, v7
|
|
; CI-NEXT: v_or_b32_e32 v5, v4, v5
|
|
; CI-NEXT: v_or_b32_e32 v4, v2, v3
|
|
; CI-NEXT: v_or_b32_e32 v3, v0, v1
|
|
; CI-NEXT: s_mov_b32 s7, 0xf000
|
|
; CI-NEXT: s_mov_b32 s6, -1
|
|
; CI-NEXT: buffer_store_dwordx4 v[3:6], off, s[4:7], 0
|
|
; CI-NEXT: s_waitcnt vmcnt(0)
|
|
; CI-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX89-LABEL: void_func_v8i16:
|
|
; GFX89: ; %bb.0:
|
|
; GFX89-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX89-NEXT: s_mov_b32 s7, 0xf000
|
|
; GFX89-NEXT: s_mov_b32 s6, -1
|
|
; GFX89-NEXT: buffer_store_dwordx4 v[0:3], off, s[4:7], 0
|
|
; GFX89-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX89-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX11-LABEL: void_func_v8i16:
|
|
; GFX11: ; %bb.0:
|
|
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX11-NEXT: s_mov_b32 s3, 0x31016000
|
|
; GFX11-NEXT: s_mov_b32 s2, -1
|
|
; GFX11-NEXT: buffer_store_b128 v[0:3], off, s[0:3], 0
|
|
; GFX11-NEXT: s_setpc_b64 s[30:31]
|
|
store <8 x i16> %arg0, ptr addrspace(1) poison
|
|
ret void
|
|
}
|
|
|
|
define void @void_func_v16i16(<16 x i16> %arg0) #0 {
|
|
; CI-LABEL: void_func_v16i16:
|
|
; CI: ; %bb.0:
|
|
; CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; CI-NEXT: v_lshlrev_b32_e32 v5, 16, v5
|
|
; CI-NEXT: v_and_b32_e32 v4, 0xffff, v4
|
|
; CI-NEXT: v_lshlrev_b32_e32 v3, 16, v3
|
|
; CI-NEXT: v_and_b32_e32 v2, 0xffff, v2
|
|
; CI-NEXT: v_lshlrev_b32_e32 v1, 16, v1
|
|
; CI-NEXT: v_and_b32_e32 v0, 0xffff, v0
|
|
; CI-NEXT: v_or_b32_e32 v5, v4, v5
|
|
; CI-NEXT: v_or_b32_e32 v4, v2, v3
|
|
; CI-NEXT: v_or_b32_e32 v3, v0, v1
|
|
; CI-NEXT: v_lshlrev_b32_e32 v0, 16, v15
|
|
; CI-NEXT: v_and_b32_e32 v1, 0xffff, v14
|
|
; CI-NEXT: v_or_b32_e32 v14, v1, v0
|
|
; CI-NEXT: v_lshlrev_b32_e32 v0, 16, v13
|
|
; CI-NEXT: v_and_b32_e32 v1, 0xffff, v12
|
|
; CI-NEXT: v_or_b32_e32 v13, v1, v0
|
|
; CI-NEXT: v_lshlrev_b32_e32 v0, 16, v11
|
|
; CI-NEXT: v_and_b32_e32 v1, 0xffff, v10
|
|
; CI-NEXT: v_or_b32_e32 v12, v1, v0
|
|
; CI-NEXT: v_lshlrev_b32_e32 v0, 16, v9
|
|
; CI-NEXT: v_and_b32_e32 v1, 0xffff, v8
|
|
; CI-NEXT: v_lshlrev_b32_e32 v7, 16, v7
|
|
; CI-NEXT: v_and_b32_e32 v6, 0xffff, v6
|
|
; CI-NEXT: v_or_b32_e32 v11, v1, v0
|
|
; CI-NEXT: s_mov_b32 s7, 0xf000
|
|
; CI-NEXT: s_mov_b32 s6, -1
|
|
; CI-NEXT: v_or_b32_e32 v6, v6, v7
|
|
; CI-NEXT: buffer_store_dwordx4 v[11:14], off, s[4:7], 0
|
|
; CI-NEXT: buffer_store_dwordx4 v[3:6], off, s[4:7], 0
|
|
; CI-NEXT: s_waitcnt vmcnt(0)
|
|
; CI-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX89-LABEL: void_func_v16i16:
|
|
; GFX89: ; %bb.0:
|
|
; GFX89-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX89-NEXT: s_mov_b32 s7, 0xf000
|
|
; GFX89-NEXT: s_mov_b32 s6, -1
|
|
; GFX89-NEXT: buffer_store_dwordx4 v[4:7], off, s[4:7], 0
|
|
; GFX89-NEXT: buffer_store_dwordx4 v[0:3], off, s[4:7], 0
|
|
; GFX89-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX89-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX11-LABEL: void_func_v16i16:
|
|
; GFX11: ; %bb.0:
|
|
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX11-NEXT: s_mov_b32 s3, 0x31016000
|
|
; GFX11-NEXT: s_mov_b32 s2, -1
|
|
; GFX11-NEXT: s_clause 0x1
|
|
; GFX11-NEXT: buffer_store_b128 v[4:7], off, s[0:3], 0
|
|
; GFX11-NEXT: buffer_store_b128 v[0:3], off, s[0:3], 0
|
|
; GFX11-NEXT: s_setpc_b64 s[30:31]
|
|
store <16 x i16> %arg0, ptr addrspace(1) poison
|
|
ret void
|
|
}
|
|
|
|
define void @void_func_v2i24(<2 x i24> %arg0) #0 {
|
|
; CI-LABEL: void_func_v2i24:
|
|
; CI: ; %bb.0:
|
|
; CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; CI-NEXT: v_add_i32_e32 v0, vcc, v0, v1
|
|
; CI-NEXT: v_lshrrev_b32_e32 v1, 16, v0
|
|
; CI-NEXT: s_mov_b32 s7, 0xf000
|
|
; CI-NEXT: s_mov_b32 s6, -1
|
|
; CI-NEXT: buffer_store_byte v1, off, s[4:7], 0
|
|
; CI-NEXT: buffer_store_short v0, off, s[4:7], 0
|
|
; CI-NEXT: s_waitcnt vmcnt(0)
|
|
; CI-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; VI-LABEL: void_func_v2i24:
|
|
; VI: ; %bb.0:
|
|
; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; VI-NEXT: v_add_u32_e32 v0, vcc, v0, v1
|
|
; VI-NEXT: v_lshrrev_b32_e32 v1, 16, v0
|
|
; VI-NEXT: s_mov_b32 s7, 0xf000
|
|
; VI-NEXT: s_mov_b32 s6, -1
|
|
; VI-NEXT: buffer_store_byte v1, off, s[4:7], 0
|
|
; VI-NEXT: buffer_store_short v0, off, s[4:7], 0
|
|
; VI-NEXT: s_waitcnt vmcnt(0)
|
|
; VI-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX9-LABEL: void_func_v2i24:
|
|
; GFX9: ; %bb.0:
|
|
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX9-NEXT: v_add_u32_e32 v0, v0, v1
|
|
; GFX9-NEXT: v_lshrrev_b32_e32 v1, 16, v0
|
|
; GFX9-NEXT: s_mov_b32 s7, 0xf000
|
|
; GFX9-NEXT: s_mov_b32 s6, -1
|
|
; GFX9-NEXT: buffer_store_byte v1, off, s[4:7], 0
|
|
; GFX9-NEXT: buffer_store_short v0, off, s[4:7], 0
|
|
; GFX9-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX9-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX11-TRUE16-LABEL: void_func_v2i24:
|
|
; GFX11-TRUE16: ; %bb.0:
|
|
; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v0, v0, v1
|
|
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v1.h, 0
|
|
; GFX11-TRUE16-NEXT: s_mov_b32 s3, 0x31016000
|
|
; GFX11-TRUE16-NEXT: s_mov_b32 s2, -1
|
|
; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2)
|
|
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v1.l, v0.h
|
|
; GFX11-TRUE16-NEXT: s_clause 0x1
|
|
; GFX11-TRUE16-NEXT: buffer_store_b16 v0, off, s[0:3], 0
|
|
; GFX11-TRUE16-NEXT: buffer_store_b8 v1, off, s[0:3], 0
|
|
; GFX11-TRUE16-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX11-FAKE16-LABEL: void_func_v2i24:
|
|
; GFX11-FAKE16: ; %bb.0:
|
|
; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v0, v0, v1
|
|
; GFX11-FAKE16-NEXT: s_mov_b32 s3, 0x31016000
|
|
; GFX11-FAKE16-NEXT: s_mov_b32 s2, -1
|
|
; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
|
|
; GFX11-FAKE16-NEXT: v_lshrrev_b32_e32 v1, 16, v0
|
|
; GFX11-FAKE16-NEXT: s_clause 0x1
|
|
; GFX11-FAKE16-NEXT: buffer_store_b8 v1, off, s[0:3], 0
|
|
; GFX11-FAKE16-NEXT: buffer_store_b16 v0, off, s[0:3], 0
|
|
; GFX11-FAKE16-NEXT: s_setpc_b64 s[30:31]
|
|
%elt0 = extractelement <2 x i24> %arg0, i32 0
|
|
%elt1 = extractelement <2 x i24> %arg0, i32 1
|
|
%add = add i24 %elt0, %elt1
|
|
store i24 %add, ptr addrspace(1) poison
|
|
ret void
|
|
}
|
|
|
|
define void @void_func_v2f32(<2 x float> %arg0) #0 {
|
|
; CIGFX89-LABEL: void_func_v2f32:
|
|
; CIGFX89: ; %bb.0:
|
|
; CIGFX89-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; CIGFX89-NEXT: s_mov_b32 s7, 0xf000
|
|
; CIGFX89-NEXT: s_mov_b32 s6, -1
|
|
; CIGFX89-NEXT: buffer_store_dwordx2 v[0:1], off, s[4:7], 0
|
|
; CIGFX89-NEXT: s_waitcnt vmcnt(0)
|
|
; CIGFX89-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX11-LABEL: void_func_v2f32:
|
|
; GFX11: ; %bb.0:
|
|
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX11-NEXT: s_mov_b32 s3, 0x31016000
|
|
; GFX11-NEXT: s_mov_b32 s2, -1
|
|
; GFX11-NEXT: buffer_store_b64 v[0:1], off, s[0:3], 0
|
|
; GFX11-NEXT: s_setpc_b64 s[30:31]
|
|
store <2 x float> %arg0, ptr addrspace(1) poison
|
|
ret void
|
|
}
|
|
|
|
define void @void_func_v3f32(<3 x float> %arg0) #0 {
|
|
; CIGFX89-LABEL: void_func_v3f32:
|
|
; CIGFX89: ; %bb.0:
|
|
; CIGFX89-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; CIGFX89-NEXT: s_mov_b32 s7, 0xf000
|
|
; CIGFX89-NEXT: s_mov_b32 s6, -1
|
|
; CIGFX89-NEXT: buffer_store_dwordx3 v[0:2], off, s[4:7], 0
|
|
; CIGFX89-NEXT: s_waitcnt vmcnt(0)
|
|
; CIGFX89-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX11-LABEL: void_func_v3f32:
|
|
; GFX11: ; %bb.0:
|
|
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX11-NEXT: s_mov_b32 s3, 0x31016000
|
|
; GFX11-NEXT: s_mov_b32 s2, -1
|
|
; GFX11-NEXT: buffer_store_b96 v[0:2], off, s[0:3], 0
|
|
; GFX11-NEXT: s_setpc_b64 s[30:31]
|
|
store <3 x float> %arg0, ptr addrspace(1) poison
|
|
ret void
|
|
}
|
|
|
|
define void @void_func_v4f32(<4 x float> %arg0) #0 {
|
|
; CIGFX89-LABEL: void_func_v4f32:
|
|
; CIGFX89: ; %bb.0:
|
|
; CIGFX89-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; CIGFX89-NEXT: s_mov_b32 s7, 0xf000
|
|
; CIGFX89-NEXT: s_mov_b32 s6, -1
|
|
; CIGFX89-NEXT: buffer_store_dwordx4 v[0:3], off, s[4:7], 0
|
|
; CIGFX89-NEXT: s_waitcnt vmcnt(0)
|
|
; CIGFX89-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX11-LABEL: void_func_v4f32:
|
|
; GFX11: ; %bb.0:
|
|
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX11-NEXT: s_mov_b32 s3, 0x31016000
|
|
; GFX11-NEXT: s_mov_b32 s2, -1
|
|
; GFX11-NEXT: buffer_store_b128 v[0:3], off, s[0:3], 0
|
|
; GFX11-NEXT: s_setpc_b64 s[30:31]
|
|
store <4 x float> %arg0, ptr addrspace(1) poison
|
|
ret void
|
|
}
|
|
|
|
define void @void_func_v8f32(<8 x float> %arg0) #0 {
|
|
; CIGFX89-LABEL: void_func_v8f32:
|
|
; CIGFX89: ; %bb.0:
|
|
; CIGFX89-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; CIGFX89-NEXT: s_mov_b32 s7, 0xf000
|
|
; CIGFX89-NEXT: s_mov_b32 s6, -1
|
|
; CIGFX89-NEXT: buffer_store_dwordx4 v[4:7], off, s[4:7], 0
|
|
; CIGFX89-NEXT: buffer_store_dwordx4 v[0:3], off, s[4:7], 0
|
|
; CIGFX89-NEXT: s_waitcnt vmcnt(0)
|
|
; CIGFX89-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX11-LABEL: void_func_v8f32:
|
|
; GFX11: ; %bb.0:
|
|
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX11-NEXT: s_mov_b32 s3, 0x31016000
|
|
; GFX11-NEXT: s_mov_b32 s2, -1
|
|
; GFX11-NEXT: s_clause 0x1
|
|
; GFX11-NEXT: buffer_store_b128 v[4:7], off, s[0:3], 0
|
|
; GFX11-NEXT: buffer_store_b128 v[0:3], off, s[0:3], 0
|
|
; GFX11-NEXT: s_setpc_b64 s[30:31]
|
|
store <8 x float> %arg0, ptr addrspace(1) poison
|
|
ret void
|
|
}
|
|
|
|
define void @void_func_v16f32(<16 x float> %arg0) #0 {
|
|
; CIGFX89-LABEL: void_func_v16f32:
|
|
; CIGFX89: ; %bb.0:
|
|
; CIGFX89-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; CIGFX89-NEXT: s_mov_b32 s7, 0xf000
|
|
; CIGFX89-NEXT: s_mov_b32 s6, -1
|
|
; CIGFX89-NEXT: buffer_store_dwordx4 v[12:15], off, s[4:7], 0
|
|
; CIGFX89-NEXT: buffer_store_dwordx4 v[8:11], off, s[4:7], 0
|
|
; CIGFX89-NEXT: buffer_store_dwordx4 v[4:7], off, s[4:7], 0
|
|
; CIGFX89-NEXT: buffer_store_dwordx4 v[0:3], off, s[4:7], 0
|
|
; CIGFX89-NEXT: s_waitcnt vmcnt(0)
|
|
; CIGFX89-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX11-LABEL: void_func_v16f32:
|
|
; GFX11: ; %bb.0:
|
|
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX11-NEXT: s_mov_b32 s3, 0x31016000
|
|
; GFX11-NEXT: s_mov_b32 s2, -1
|
|
; GFX11-NEXT: s_clause 0x3
|
|
; GFX11-NEXT: buffer_store_b128 v[12:15], off, s[0:3], 0
|
|
; GFX11-NEXT: buffer_store_b128 v[8:11], off, s[0:3], 0
|
|
; GFX11-NEXT: buffer_store_b128 v[4:7], off, s[0:3], 0
|
|
; GFX11-NEXT: buffer_store_b128 v[0:3], off, s[0:3], 0
|
|
; GFX11-NEXT: s_setpc_b64 s[30:31]
|
|
store <16 x float> %arg0, ptr addrspace(1) poison
|
|
ret void
|
|
}
|
|
|
|
define void @void_func_v2f64(<2 x double> %arg0) #0 {
|
|
; CIGFX89-LABEL: void_func_v2f64:
|
|
; CIGFX89: ; %bb.0:
|
|
; CIGFX89-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; CIGFX89-NEXT: s_mov_b32 s7, 0xf000
|
|
; CIGFX89-NEXT: s_mov_b32 s6, -1
|
|
; CIGFX89-NEXT: buffer_store_dwordx4 v[0:3], off, s[4:7], 0
|
|
; CIGFX89-NEXT: s_waitcnt vmcnt(0)
|
|
; CIGFX89-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX11-LABEL: void_func_v2f64:
|
|
; GFX11: ; %bb.0:
|
|
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX11-NEXT: s_mov_b32 s3, 0x31016000
|
|
; GFX11-NEXT: s_mov_b32 s2, -1
|
|
; GFX11-NEXT: buffer_store_b128 v[0:3], off, s[0:3], 0
|
|
; GFX11-NEXT: s_setpc_b64 s[30:31]
|
|
store <2 x double> %arg0, ptr addrspace(1) poison
|
|
ret void
|
|
}
|
|
|
|
define void @void_func_v3f64(<3 x double> %arg0) #0 {
|
|
; CIGFX89-LABEL: void_func_v3f64:
|
|
; CIGFX89: ; %bb.0:
|
|
; CIGFX89-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; CIGFX89-NEXT: s_mov_b32 s7, 0xf000
|
|
; CIGFX89-NEXT: s_mov_b32 s6, -1
|
|
; CIGFX89-NEXT: buffer_store_dwordx2 v[4:5], off, s[4:7], 0
|
|
; CIGFX89-NEXT: buffer_store_dwordx4 v[0:3], off, s[4:7], 0
|
|
; CIGFX89-NEXT: s_waitcnt vmcnt(0)
|
|
; CIGFX89-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX11-LABEL: void_func_v3f64:
|
|
; GFX11: ; %bb.0:
|
|
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX11-NEXT: s_mov_b32 s3, 0x31016000
|
|
; GFX11-NEXT: s_mov_b32 s2, -1
|
|
; GFX11-NEXT: s_clause 0x1
|
|
; GFX11-NEXT: buffer_store_b64 v[4:5], off, s[0:3], 0
|
|
; GFX11-NEXT: buffer_store_b128 v[0:3], off, s[0:3], 0
|
|
; GFX11-NEXT: s_setpc_b64 s[30:31]
|
|
store <3 x double> %arg0, ptr addrspace(1) poison
|
|
ret void
|
|
}
|
|
|
|
define void @void_func_v4f64(<4 x double> %arg0) #0 {
|
|
; CIGFX89-LABEL: void_func_v4f64:
|
|
; CIGFX89: ; %bb.0:
|
|
; CIGFX89-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; CIGFX89-NEXT: s_mov_b32 s7, 0xf000
|
|
; CIGFX89-NEXT: s_mov_b32 s6, -1
|
|
; CIGFX89-NEXT: buffer_store_dwordx4 v[4:7], off, s[4:7], 0
|
|
; CIGFX89-NEXT: buffer_store_dwordx4 v[0:3], off, s[4:7], 0
|
|
; CIGFX89-NEXT: s_waitcnt vmcnt(0)
|
|
; CIGFX89-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX11-LABEL: void_func_v4f64:
|
|
; GFX11: ; %bb.0:
|
|
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX11-NEXT: s_mov_b32 s3, 0x31016000
|
|
; GFX11-NEXT: s_mov_b32 s2, -1
|
|
; GFX11-NEXT: s_clause 0x1
|
|
; GFX11-NEXT: buffer_store_b128 v[4:7], off, s[0:3], 0
|
|
; GFX11-NEXT: buffer_store_b128 v[0:3], off, s[0:3], 0
|
|
; GFX11-NEXT: s_setpc_b64 s[30:31]
|
|
store <4 x double> %arg0, ptr addrspace(1) poison
|
|
ret void
|
|
}
|
|
|
|
define void @void_func_v8f64(<8 x double> %arg0) #0 {
|
|
; CIGFX89-LABEL: void_func_v8f64:
|
|
; CIGFX89: ; %bb.0:
|
|
; CIGFX89-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; CIGFX89-NEXT: s_mov_b32 s7, 0xf000
|
|
; CIGFX89-NEXT: s_mov_b32 s6, -1
|
|
; CIGFX89-NEXT: buffer_store_dwordx4 v[12:15], off, s[4:7], 0
|
|
; CIGFX89-NEXT: buffer_store_dwordx4 v[8:11], off, s[4:7], 0
|
|
; CIGFX89-NEXT: buffer_store_dwordx4 v[4:7], off, s[4:7], 0
|
|
; CIGFX89-NEXT: buffer_store_dwordx4 v[0:3], off, s[4:7], 0
|
|
; CIGFX89-NEXT: s_waitcnt vmcnt(0)
|
|
; CIGFX89-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX11-LABEL: void_func_v8f64:
|
|
; GFX11: ; %bb.0:
|
|
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX11-NEXT: s_mov_b32 s3, 0x31016000
|
|
; GFX11-NEXT: s_mov_b32 s2, -1
|
|
; GFX11-NEXT: s_clause 0x3
|
|
; GFX11-NEXT: buffer_store_b128 v[12:15], off, s[0:3], 0
|
|
; GFX11-NEXT: buffer_store_b128 v[8:11], off, s[0:3], 0
|
|
; GFX11-NEXT: buffer_store_b128 v[4:7], off, s[0:3], 0
|
|
; GFX11-NEXT: buffer_store_b128 v[0:3], off, s[0:3], 0
|
|
; GFX11-NEXT: s_setpc_b64 s[30:31]
|
|
store <8 x double> %arg0, ptr addrspace(1) poison
|
|
ret void
|
|
}
|
|
|
|
define void @void_func_v16f64(<16 x double> %arg0) #0 {
|
|
; CIGFX89-LABEL: void_func_v16f64:
|
|
; CIGFX89: ; %bb.0:
|
|
; CIGFX89-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; CIGFX89-NEXT: buffer_load_dword v31, off, s[0:3], s32
|
|
; CIGFX89-NEXT: s_mov_b32 s7, 0xf000
|
|
; CIGFX89-NEXT: s_mov_b32 s6, -1
|
|
; CIGFX89-NEXT: buffer_store_dwordx4 v[24:27], off, s[4:7], 0
|
|
; CIGFX89-NEXT: buffer_store_dwordx4 v[20:23], off, s[4:7], 0
|
|
; CIGFX89-NEXT: buffer_store_dwordx4 v[16:19], off, s[4:7], 0
|
|
; CIGFX89-NEXT: buffer_store_dwordx4 v[12:15], off, s[4:7], 0
|
|
; CIGFX89-NEXT: buffer_store_dwordx4 v[8:11], off, s[4:7], 0
|
|
; CIGFX89-NEXT: buffer_store_dwordx4 v[4:7], off, s[4:7], 0
|
|
; CIGFX89-NEXT: s_waitcnt vmcnt(6)
|
|
; CIGFX89-NEXT: buffer_store_dwordx4 v[28:31], off, s[4:7], 0
|
|
; CIGFX89-NEXT: buffer_store_dwordx4 v[0:3], off, s[4:7], 0
|
|
; CIGFX89-NEXT: s_waitcnt vmcnt(0)
|
|
; CIGFX89-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX11-LABEL: void_func_v16f64:
|
|
; GFX11: ; %bb.0:
|
|
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX11-NEXT: scratch_load_b32 v31, off, s32
|
|
; GFX11-NEXT: s_mov_b32 s3, 0x31016000
|
|
; GFX11-NEXT: s_mov_b32 s2, -1
|
|
; GFX11-NEXT: s_clause 0x3
|
|
; GFX11-NEXT: buffer_store_b128 v[20:23], off, s[0:3], 0
|
|
; GFX11-NEXT: buffer_store_b128 v[16:19], off, s[0:3], 0
|
|
; GFX11-NEXT: buffer_store_b128 v[12:15], off, s[0:3], 0
|
|
; GFX11-NEXT: buffer_store_b128 v[8:11], off, s[0:3], 0
|
|
; GFX11-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX11-NEXT: s_clause 0x3
|
|
; GFX11-NEXT: buffer_store_b128 v[28:31], off, s[0:3], 0
|
|
; GFX11-NEXT: buffer_store_b128 v[24:27], off, s[0:3], 0
|
|
; GFX11-NEXT: buffer_store_b128 v[4:7], off, s[0:3], 0
|
|
; GFX11-NEXT: buffer_store_b128 v[0:3], off, s[0:3], 0
|
|
; GFX11-NEXT: s_setpc_b64 s[30:31]
|
|
store <16 x double> %arg0, ptr addrspace(1) poison
|
|
ret void
|
|
}
|
|
|
|
define void @void_func_v2f16(<2 x half> %arg0) #0 {
|
|
; CI-LABEL: void_func_v2f16:
|
|
; CI: ; %bb.0:
|
|
; CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; CI-NEXT: v_cvt_f16_f32_e32 v1, v1
|
|
; CI-NEXT: v_cvt_f16_f32_e32 v0, v0
|
|
; CI-NEXT: s_mov_b32 s7, 0xf000
|
|
; CI-NEXT: s_mov_b32 s6, -1
|
|
; CI-NEXT: v_lshlrev_b32_e32 v1, 16, v1
|
|
; CI-NEXT: v_or_b32_e32 v0, v0, v1
|
|
; CI-NEXT: buffer_store_dword v0, off, s[4:7], 0
|
|
; CI-NEXT: s_waitcnt vmcnt(0)
|
|
; CI-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX89-LABEL: void_func_v2f16:
|
|
; GFX89: ; %bb.0:
|
|
; GFX89-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX89-NEXT: s_mov_b32 s7, 0xf000
|
|
; GFX89-NEXT: s_mov_b32 s6, -1
|
|
; GFX89-NEXT: buffer_store_dword v0, off, s[4:7], 0
|
|
; GFX89-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX89-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX11-LABEL: void_func_v2f16:
|
|
; GFX11: ; %bb.0:
|
|
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX11-NEXT: s_mov_b32 s3, 0x31016000
|
|
; GFX11-NEXT: s_mov_b32 s2, -1
|
|
; GFX11-NEXT: buffer_store_b32 v0, off, s[0:3], 0
|
|
; GFX11-NEXT: s_setpc_b64 s[30:31]
|
|
store <2 x half> %arg0, ptr addrspace(1) poison
|
|
ret void
|
|
}
|
|
|
|
; FIXME: Different abi if f16 legal
|
|
define void @void_func_v3f16(<3 x half> %arg0) #0 {
|
|
; CI-LABEL: void_func_v3f16:
|
|
; CI: ; %bb.0:
|
|
; CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; CI-NEXT: v_cvt_f16_f32_e32 v1, v1
|
|
; CI-NEXT: v_cvt_f16_f32_e32 v0, v0
|
|
; CI-NEXT: v_cvt_f16_f32_e32 v2, v2
|
|
; CI-NEXT: s_mov_b32 s7, 0xf000
|
|
; CI-NEXT: v_lshlrev_b32_e32 v1, 16, v1
|
|
; CI-NEXT: s_mov_b32 s6, -1
|
|
; CI-NEXT: v_or_b32_e32 v0, v0, v1
|
|
; CI-NEXT: buffer_store_short v2, off, s[4:7], 0
|
|
; CI-NEXT: buffer_store_dword v0, off, s[4:7], 0
|
|
; CI-NEXT: s_waitcnt vmcnt(0)
|
|
; CI-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX89-LABEL: void_func_v3f16:
|
|
; GFX89: ; %bb.0:
|
|
; GFX89-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX89-NEXT: s_mov_b32 s7, 0xf000
|
|
; GFX89-NEXT: s_mov_b32 s6, -1
|
|
; GFX89-NEXT: buffer_store_short v1, off, s[4:7], 0
|
|
; GFX89-NEXT: buffer_store_dword v0, off, s[4:7], 0
|
|
; GFX89-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX89-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX11-LABEL: void_func_v3f16:
|
|
; GFX11: ; %bb.0:
|
|
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX11-NEXT: s_mov_b32 s3, 0x31016000
|
|
; GFX11-NEXT: s_mov_b32 s2, -1
|
|
; GFX11-NEXT: s_clause 0x1
|
|
; GFX11-NEXT: buffer_store_b16 v1, off, s[0:3], 0
|
|
; GFX11-NEXT: buffer_store_b32 v0, off, s[0:3], 0
|
|
; GFX11-NEXT: s_setpc_b64 s[30:31]
|
|
store <3 x half> %arg0, ptr addrspace(1) poison
|
|
ret void
|
|
}
|
|
|
|
define void @void_func_v4f16(<4 x half> %arg0) #0 {
|
|
; CI-LABEL: void_func_v4f16:
|
|
; CI: ; %bb.0:
|
|
; CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; CI-NEXT: v_cvt_f16_f32_e32 v3, v3
|
|
; CI-NEXT: v_cvt_f16_f32_e32 v2, v2
|
|
; CI-NEXT: v_cvt_f16_f32_e32 v4, v1
|
|
; CI-NEXT: v_cvt_f16_f32_e32 v0, v0
|
|
; CI-NEXT: v_lshlrev_b32_e32 v1, 16, v3
|
|
; CI-NEXT: v_or_b32_e32 v1, v2, v1
|
|
; CI-NEXT: v_lshlrev_b32_e32 v2, 16, v4
|
|
; CI-NEXT: v_or_b32_e32 v0, v0, v2
|
|
; CI-NEXT: s_mov_b32 s7, 0xf000
|
|
; CI-NEXT: s_mov_b32 s6, -1
|
|
; CI-NEXT: buffer_store_dwordx2 v[0:1], off, s[4:7], 0
|
|
; CI-NEXT: s_waitcnt vmcnt(0)
|
|
; CI-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX89-LABEL: void_func_v4f16:
|
|
; GFX89: ; %bb.0:
|
|
; GFX89-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX89-NEXT: s_mov_b32 s7, 0xf000
|
|
; GFX89-NEXT: s_mov_b32 s6, -1
|
|
; GFX89-NEXT: buffer_store_dwordx2 v[0:1], off, s[4:7], 0
|
|
; GFX89-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX89-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX11-LABEL: void_func_v4f16:
|
|
; GFX11: ; %bb.0:
|
|
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX11-NEXT: s_mov_b32 s3, 0x31016000
|
|
; GFX11-NEXT: s_mov_b32 s2, -1
|
|
; GFX11-NEXT: buffer_store_b64 v[0:1], off, s[0:3], 0
|
|
; GFX11-NEXT: s_setpc_b64 s[30:31]
|
|
store <4 x half> %arg0, ptr addrspace(1) poison
|
|
ret void
|
|
}
|
|
|
|
define void @void_func_v8f16(<8 x half> %arg0) #0 {
|
|
; CI-LABEL: void_func_v8f16:
|
|
; CI: ; %bb.0:
|
|
; CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; CI-NEXT: v_cvt_f16_f32_e32 v7, v7
|
|
; CI-NEXT: v_cvt_f16_f32_e32 v6, v6
|
|
; CI-NEXT: v_cvt_f16_f32_e32 v8, v5
|
|
; CI-NEXT: v_cvt_f16_f32_e32 v3, v3
|
|
; CI-NEXT: v_cvt_f16_f32_e32 v1, v1
|
|
; CI-NEXT: v_cvt_f16_f32_e32 v4, v4
|
|
; CI-NEXT: v_cvt_f16_f32_e32 v2, v2
|
|
; CI-NEXT: v_cvt_f16_f32_e32 v0, v0
|
|
; CI-NEXT: v_lshlrev_b32_e32 v5, 16, v7
|
|
; CI-NEXT: v_or_b32_e32 v5, v6, v5
|
|
; CI-NEXT: v_lshlrev_b32_e32 v6, 16, v8
|
|
; CI-NEXT: v_lshlrev_b32_e32 v3, 16, v3
|
|
; CI-NEXT: v_lshlrev_b32_e32 v1, 16, v1
|
|
; CI-NEXT: v_or_b32_e32 v4, v4, v6
|
|
; CI-NEXT: v_or_b32_e32 v3, v2, v3
|
|
; CI-NEXT: v_or_b32_e32 v2, v0, v1
|
|
; CI-NEXT: s_mov_b32 s7, 0xf000
|
|
; CI-NEXT: s_mov_b32 s6, -1
|
|
; CI-NEXT: buffer_store_dwordx4 v[2:5], off, s[4:7], 0
|
|
; CI-NEXT: s_waitcnt vmcnt(0)
|
|
; CI-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX89-LABEL: void_func_v8f16:
|
|
; GFX89: ; %bb.0:
|
|
; GFX89-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX89-NEXT: s_mov_b32 s7, 0xf000
|
|
; GFX89-NEXT: s_mov_b32 s6, -1
|
|
; GFX89-NEXT: buffer_store_dwordx4 v[0:3], off, s[4:7], 0
|
|
; GFX89-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX89-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX11-LABEL: void_func_v8f16:
|
|
; GFX11: ; %bb.0:
|
|
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX11-NEXT: s_mov_b32 s3, 0x31016000
|
|
; GFX11-NEXT: s_mov_b32 s2, -1
|
|
; GFX11-NEXT: buffer_store_b128 v[0:3], off, s[0:3], 0
|
|
; GFX11-NEXT: s_setpc_b64 s[30:31]
|
|
store <8 x half> %arg0, ptr addrspace(1) poison
|
|
ret void
|
|
}
|
|
|
|
define void @void_func_v16f16(<16 x half> %arg0) #0 {
|
|
; CI-LABEL: void_func_v16f16:
|
|
; CI: ; %bb.0:
|
|
; CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; CI-NEXT: v_cvt_f16_f32_e32 v7, v7
|
|
; CI-NEXT: v_cvt_f16_f32_e32 v3, v3
|
|
; CI-NEXT: v_cvt_f16_f32_e32 v1, v1
|
|
; CI-NEXT: v_cvt_f16_f32_e32 v6, v6
|
|
; CI-NEXT: v_cvt_f16_f32_e32 v16, v5
|
|
; CI-NEXT: v_cvt_f16_f32_e32 v2, v2
|
|
; CI-NEXT: v_cvt_f16_f32_e32 v0, v0
|
|
; CI-NEXT: v_cvt_f16_f32_e32 v4, v4
|
|
; CI-NEXT: v_lshlrev_b32_e32 v5, 16, v7
|
|
; CI-NEXT: v_lshlrev_b32_e32 v3, 16, v3
|
|
; CI-NEXT: v_lshlrev_b32_e32 v1, 16, v1
|
|
; CI-NEXT: v_or_b32_e32 v5, v6, v5
|
|
; CI-NEXT: v_lshlrev_b32_e32 v6, 16, v16
|
|
; CI-NEXT: v_or_b32_e32 v3, v2, v3
|
|
; CI-NEXT: v_or_b32_e32 v2, v0, v1
|
|
; CI-NEXT: v_cvt_f16_f32_e32 v0, v15
|
|
; CI-NEXT: v_or_b32_e32 v4, v4, v6
|
|
; CI-NEXT: v_cvt_f16_f32_e32 v1, v14
|
|
; CI-NEXT: v_cvt_f16_f32_e32 v6, v13
|
|
; CI-NEXT: v_cvt_f16_f32_e32 v7, v12
|
|
; CI-NEXT: v_lshlrev_b32_e32 v0, 16, v0
|
|
; CI-NEXT: v_or_b32_e32 v13, v1, v0
|
|
; CI-NEXT: v_lshlrev_b32_e32 v0, 16, v6
|
|
; CI-NEXT: v_or_b32_e32 v12, v7, v0
|
|
; CI-NEXT: v_cvt_f16_f32_e32 v0, v11
|
|
; CI-NEXT: v_cvt_f16_f32_e32 v1, v10
|
|
; CI-NEXT: v_cvt_f16_f32_e32 v6, v9
|
|
; CI-NEXT: v_cvt_f16_f32_e32 v7, v8
|
|
; CI-NEXT: v_lshlrev_b32_e32 v0, 16, v0
|
|
; CI-NEXT: v_or_b32_e32 v11, v1, v0
|
|
; CI-NEXT: v_lshlrev_b32_e32 v0, 16, v6
|
|
; CI-NEXT: v_or_b32_e32 v10, v7, v0
|
|
; CI-NEXT: s_mov_b32 s7, 0xf000
|
|
; CI-NEXT: s_mov_b32 s6, -1
|
|
; CI-NEXT: buffer_store_dwordx4 v[10:13], off, s[4:7], 0
|
|
; CI-NEXT: buffer_store_dwordx4 v[2:5], off, s[4:7], 0
|
|
; CI-NEXT: s_waitcnt vmcnt(0)
|
|
; CI-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX89-LABEL: void_func_v16f16:
|
|
; GFX89: ; %bb.0:
|
|
; GFX89-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX89-NEXT: s_mov_b32 s7, 0xf000
|
|
; GFX89-NEXT: s_mov_b32 s6, -1
|
|
; GFX89-NEXT: buffer_store_dwordx4 v[4:7], off, s[4:7], 0
|
|
; GFX89-NEXT: buffer_store_dwordx4 v[0:3], off, s[4:7], 0
|
|
; GFX89-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX89-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX11-LABEL: void_func_v16f16:
|
|
; GFX11: ; %bb.0:
|
|
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX11-NEXT: s_mov_b32 s3, 0x31016000
|
|
; GFX11-NEXT: s_mov_b32 s2, -1
|
|
; GFX11-NEXT: s_clause 0x1
|
|
; GFX11-NEXT: buffer_store_b128 v[4:7], off, s[0:3], 0
|
|
; GFX11-NEXT: buffer_store_b128 v[0:3], off, s[0:3], 0
|
|
; GFX11-NEXT: s_setpc_b64 s[30:31]
|
|
store <16 x half> %arg0, ptr addrspace(1) poison
|
|
ret void
|
|
}
|
|
|
|
; Make sure there is no alignment requirement for passed vgprs.
|
|
define void @void_func_i32_i64_i32(i32 %arg0, i64 %arg1, i32 %arg2) #0 {
|
|
; CIGFX89-LABEL: void_func_i32_i64_i32:
|
|
; CIGFX89: ; %bb.0:
|
|
; CIGFX89-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; CIGFX89-NEXT: s_mov_b32 s7, 0xf000
|
|
; CIGFX89-NEXT: s_mov_b32 s6, -1
|
|
; CIGFX89-NEXT: buffer_store_dword v0, off, s[4:7], 0
|
|
; CIGFX89-NEXT: s_waitcnt vmcnt(0)
|
|
; CIGFX89-NEXT: buffer_store_dwordx2 v[1:2], off, s[4:7], 0
|
|
; CIGFX89-NEXT: s_waitcnt vmcnt(0)
|
|
; CIGFX89-NEXT: buffer_store_dword v3, off, s[4:7], 0
|
|
; CIGFX89-NEXT: s_waitcnt vmcnt(0)
|
|
; CIGFX89-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX11-LABEL: void_func_i32_i64_i32:
|
|
; GFX11: ; %bb.0:
|
|
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX11-NEXT: s_mov_b32 s3, 0x31016000
|
|
; GFX11-NEXT: s_mov_b32 s2, -1
|
|
; GFX11-NEXT: buffer_store_b32 v0, off, s[0:3], 0 dlc
|
|
; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
|
|
; GFX11-NEXT: buffer_store_b64 v[1:2], off, s[0:3], 0 dlc
|
|
; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
|
|
; GFX11-NEXT: buffer_store_b32 v3, off, s[0:3], 0 dlc
|
|
; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
|
|
; GFX11-NEXT: s_setpc_b64 s[30:31]
|
|
store volatile i32 %arg0, ptr addrspace(1) poison
|
|
store volatile i64 %arg1, ptr addrspace(1) poison
|
|
store volatile i32 %arg2, ptr addrspace(1) poison
|
|
ret void
|
|
}
|
|
|
|
define void @void_func_struct_i32({ i32 } %arg0) #0 {
|
|
; CIGFX89-LABEL: void_func_struct_i32:
|
|
; CIGFX89: ; %bb.0:
|
|
; CIGFX89-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; CIGFX89-NEXT: s_mov_b32 s7, 0xf000
|
|
; CIGFX89-NEXT: s_mov_b32 s6, -1
|
|
; CIGFX89-NEXT: buffer_store_dword v0, off, s[4:7], 0
|
|
; CIGFX89-NEXT: s_waitcnt vmcnt(0)
|
|
; CIGFX89-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX11-LABEL: void_func_struct_i32:
|
|
; GFX11: ; %bb.0:
|
|
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX11-NEXT: s_mov_b32 s3, 0x31016000
|
|
; GFX11-NEXT: s_mov_b32 s2, -1
|
|
; GFX11-NEXT: buffer_store_b32 v0, off, s[0:3], 0
|
|
; GFX11-NEXT: s_setpc_b64 s[30:31]
|
|
store { i32 } %arg0, ptr addrspace(1) poison
|
|
ret void
|
|
}
|
|
|
|
define void @void_func_struct_i8_i32({ i8, i32 } %arg0) #0 {
|
|
; CIGFX89-LABEL: void_func_struct_i8_i32:
|
|
; CIGFX89: ; %bb.0:
|
|
; CIGFX89-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; CIGFX89-NEXT: s_mov_b32 s7, 0xf000
|
|
; CIGFX89-NEXT: s_mov_b32 s6, -1
|
|
; CIGFX89-NEXT: buffer_store_dword v1, off, s[4:7], 0
|
|
; CIGFX89-NEXT: buffer_store_byte v0, off, s[4:7], 0
|
|
; CIGFX89-NEXT: s_waitcnt vmcnt(0)
|
|
; CIGFX89-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX11-LABEL: void_func_struct_i8_i32:
|
|
; GFX11: ; %bb.0:
|
|
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX11-NEXT: s_mov_b32 s3, 0x31016000
|
|
; GFX11-NEXT: s_mov_b32 s2, -1
|
|
; GFX11-NEXT: s_clause 0x1
|
|
; GFX11-NEXT: buffer_store_b32 v1, off, s[0:3], 0
|
|
; GFX11-NEXT: buffer_store_b8 v0, off, s[0:3], 0
|
|
; GFX11-NEXT: s_setpc_b64 s[30:31]
|
|
store { i8, i32 } %arg0, ptr addrspace(1) poison
|
|
ret void
|
|
}
|
|
|
|
define void @void_func_byval_struct_i8_i32(ptr addrspace(5) byval({ i8, i32 }) %arg0) #0 {
|
|
; CIGFX89-LABEL: void_func_byval_struct_i8_i32:
|
|
; CIGFX89: ; %bb.0:
|
|
; CIGFX89-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; CIGFX89-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:4
|
|
; CIGFX89-NEXT: buffer_load_ubyte v1, off, s[0:3], s32
|
|
; CIGFX89-NEXT: s_mov_b32 s7, 0xf000
|
|
; CIGFX89-NEXT: s_mov_b32 s6, -1
|
|
; CIGFX89-NEXT: s_waitcnt vmcnt(1)
|
|
; CIGFX89-NEXT: buffer_store_dword v0, off, s[4:7], 0
|
|
; CIGFX89-NEXT: s_waitcnt vmcnt(1)
|
|
; CIGFX89-NEXT: buffer_store_byte v1, off, s[4:7], 0
|
|
; CIGFX89-NEXT: s_waitcnt vmcnt(0)
|
|
; CIGFX89-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX11-TRUE16-LABEL: void_func_byval_struct_i8_i32:
|
|
; GFX11-TRUE16: ; %bb.0:
|
|
; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX11-TRUE16-NEXT: s_clause 0x1
|
|
; GFX11-TRUE16-NEXT: scratch_load_d16_u8 v0, off, s32
|
|
; GFX11-TRUE16-NEXT: scratch_load_b32 v1, off, s32 offset:4
|
|
; GFX11-TRUE16-NEXT: s_mov_b32 s3, 0x31016000
|
|
; GFX11-TRUE16-NEXT: s_mov_b32 s2, -1
|
|
; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX11-TRUE16-NEXT: s_clause 0x1
|
|
; GFX11-TRUE16-NEXT: buffer_store_b32 v1, off, s[0:3], 0
|
|
; GFX11-TRUE16-NEXT: buffer_store_b8 v0, off, s[0:3], 0
|
|
; GFX11-TRUE16-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX11-FAKE16-LABEL: void_func_byval_struct_i8_i32:
|
|
; GFX11-FAKE16: ; %bb.0:
|
|
; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX11-FAKE16-NEXT: s_clause 0x1
|
|
; GFX11-FAKE16-NEXT: scratch_load_b32 v0, off, s32 offset:4
|
|
; GFX11-FAKE16-NEXT: scratch_load_u8 v1, off, s32
|
|
; GFX11-FAKE16-NEXT: s_mov_b32 s3, 0x31016000
|
|
; GFX11-FAKE16-NEXT: s_mov_b32 s2, -1
|
|
; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(1)
|
|
; GFX11-FAKE16-NEXT: buffer_store_b32 v0, off, s[0:3], 0
|
|
; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX11-FAKE16-NEXT: buffer_store_b8 v1, off, s[0:3], 0
|
|
; GFX11-FAKE16-NEXT: s_setpc_b64 s[30:31]
|
|
%arg0.load = load { i8, i32 }, ptr addrspace(5) %arg0
|
|
store { i8, i32 } %arg0.load, ptr addrspace(1) poison
|
|
ret void
|
|
}
|
|
|
|
define void @void_func_byval_struct_i8_i32_x2(ptr addrspace(5) byval({ i8, i32 }) %arg0, ptr addrspace(5) byval({ i8, i32 }) %arg1, i32 %arg2) #0 {
|
|
; CI-LABEL: void_func_byval_struct_i8_i32_x2:
|
|
; CI: ; %bb.0:
|
|
; CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; CI-NEXT: buffer_load_ubyte v1, off, s[0:3], s32 glc
|
|
; CI-NEXT: s_waitcnt vmcnt(0)
|
|
; CI-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:4 glc
|
|
; CI-NEXT: s_waitcnt vmcnt(0)
|
|
; CI-NEXT: buffer_load_ubyte v3, off, s[0:3], s32 offset:8 glc
|
|
; CI-NEXT: s_waitcnt vmcnt(0)
|
|
; CI-NEXT: buffer_load_dword v4, off, s[0:3], s32 offset:12 glc
|
|
; CI-NEXT: s_waitcnt vmcnt(0)
|
|
; CI-NEXT: s_mov_b32 s7, 0xf000
|
|
; CI-NEXT: s_mov_b32 s6, -1
|
|
; CI-NEXT: s_mov_b32 m0, -1
|
|
; CI-NEXT: buffer_store_dword v2, off, s[4:7], 0
|
|
; CI-NEXT: s_waitcnt vmcnt(0)
|
|
; CI-NEXT: buffer_store_byte v1, off, s[4:7], 0
|
|
; CI-NEXT: s_waitcnt vmcnt(0)
|
|
; CI-NEXT: buffer_store_dword v4, off, s[4:7], 0
|
|
; CI-NEXT: s_waitcnt vmcnt(0)
|
|
; CI-NEXT: buffer_store_byte v3, off, s[4:7], 0
|
|
; CI-NEXT: s_waitcnt vmcnt(0)
|
|
; CI-NEXT: ds_write_b32 v0, v0
|
|
; CI-NEXT: s_waitcnt lgkmcnt(0)
|
|
; CI-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; VI-LABEL: void_func_byval_struct_i8_i32_x2:
|
|
; VI: ; %bb.0:
|
|
; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; VI-NEXT: buffer_load_ubyte v1, off, s[0:3], s32 glc
|
|
; VI-NEXT: s_waitcnt vmcnt(0)
|
|
; VI-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:4 glc
|
|
; VI-NEXT: s_waitcnt vmcnt(0)
|
|
; VI-NEXT: buffer_load_ubyte v3, off, s[0:3], s32 offset:8 glc
|
|
; VI-NEXT: s_waitcnt vmcnt(0)
|
|
; VI-NEXT: buffer_load_dword v4, off, s[0:3], s32 offset:12 glc
|
|
; VI-NEXT: s_waitcnt vmcnt(0)
|
|
; VI-NEXT: s_mov_b32 s7, 0xf000
|
|
; VI-NEXT: s_mov_b32 s6, -1
|
|
; VI-NEXT: s_mov_b32 m0, -1
|
|
; VI-NEXT: buffer_store_dword v2, off, s[4:7], 0
|
|
; VI-NEXT: s_waitcnt vmcnt(0)
|
|
; VI-NEXT: buffer_store_byte v1, off, s[4:7], 0
|
|
; VI-NEXT: s_waitcnt vmcnt(0)
|
|
; VI-NEXT: buffer_store_dword v4, off, s[4:7], 0
|
|
; VI-NEXT: s_waitcnt vmcnt(0)
|
|
; VI-NEXT: buffer_store_byte v3, off, s[4:7], 0
|
|
; VI-NEXT: s_waitcnt vmcnt(0)
|
|
; VI-NEXT: ds_write_b32 v0, v0
|
|
; VI-NEXT: s_waitcnt lgkmcnt(0)
|
|
; VI-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX9-LABEL: void_func_byval_struct_i8_i32_x2:
|
|
; GFX9: ; %bb.0:
|
|
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX9-NEXT: buffer_load_ubyte v1, off, s[0:3], s32 glc
|
|
; GFX9-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX9-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:4 glc
|
|
; GFX9-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX9-NEXT: buffer_load_ubyte v3, off, s[0:3], s32 offset:8 glc
|
|
; GFX9-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX9-NEXT: buffer_load_dword v4, off, s[0:3], s32 offset:12 glc
|
|
; GFX9-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX9-NEXT: s_mov_b32 s7, 0xf000
|
|
; GFX9-NEXT: s_mov_b32 s6, -1
|
|
; GFX9-NEXT: buffer_store_dword v2, off, s[4:7], 0
|
|
; GFX9-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX9-NEXT: buffer_store_byte v1, off, s[4:7], 0
|
|
; GFX9-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX9-NEXT: buffer_store_dword v4, off, s[4:7], 0
|
|
; GFX9-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX9-NEXT: buffer_store_byte v3, off, s[4:7], 0
|
|
; GFX9-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX9-NEXT: ds_write_b32 v0, v0
|
|
; GFX9-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX9-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX11-TRUE16-LABEL: void_func_byval_struct_i8_i32_x2:
|
|
; GFX11-TRUE16: ; %bb.0:
|
|
; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX11-TRUE16-NEXT: scratch_load_d16_u8 v1, off, s32 glc dlc
|
|
; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX11-TRUE16-NEXT: scratch_load_b32 v3, off, s32 offset:4 glc dlc
|
|
; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX11-TRUE16-NEXT: scratch_load_d16_u8 v2, off, s32 offset:8 glc dlc
|
|
; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX11-TRUE16-NEXT: scratch_load_b32 v4, off, s32 offset:12 glc dlc
|
|
; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX11-TRUE16-NEXT: s_mov_b32 s3, 0x31016000
|
|
; GFX11-TRUE16-NEXT: s_mov_b32 s2, -1
|
|
; GFX11-TRUE16-NEXT: buffer_store_b32 v3, off, s[0:3], 0 dlc
|
|
; GFX11-TRUE16-NEXT: s_waitcnt_vscnt null, 0x0
|
|
; GFX11-TRUE16-NEXT: buffer_store_b8 v1, off, s[0:3], 0 dlc
|
|
; GFX11-TRUE16-NEXT: s_waitcnt_vscnt null, 0x0
|
|
; GFX11-TRUE16-NEXT: buffer_store_b32 v4, off, s[0:3], 0 dlc
|
|
; GFX11-TRUE16-NEXT: s_waitcnt_vscnt null, 0x0
|
|
; GFX11-TRUE16-NEXT: buffer_store_b8 v2, off, s[0:3], 0 dlc
|
|
; GFX11-TRUE16-NEXT: s_waitcnt_vscnt null, 0x0
|
|
; GFX11-TRUE16-NEXT: ds_store_b32 v0, v0
|
|
; GFX11-TRUE16-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX11-TRUE16-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX11-FAKE16-LABEL: void_func_byval_struct_i8_i32_x2:
|
|
; GFX11-FAKE16: ; %bb.0:
|
|
; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX11-FAKE16-NEXT: scratch_load_u8 v1, off, s32 glc dlc
|
|
; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX11-FAKE16-NEXT: scratch_load_b32 v2, off, s32 offset:4 glc dlc
|
|
; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX11-FAKE16-NEXT: scratch_load_u8 v3, off, s32 offset:8 glc dlc
|
|
; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX11-FAKE16-NEXT: scratch_load_b32 v4, off, s32 offset:12 glc dlc
|
|
; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX11-FAKE16-NEXT: s_mov_b32 s3, 0x31016000
|
|
; GFX11-FAKE16-NEXT: s_mov_b32 s2, -1
|
|
; GFX11-FAKE16-NEXT: buffer_store_b32 v2, off, s[0:3], 0 dlc
|
|
; GFX11-FAKE16-NEXT: s_waitcnt_vscnt null, 0x0
|
|
; GFX11-FAKE16-NEXT: buffer_store_b8 v1, off, s[0:3], 0 dlc
|
|
; GFX11-FAKE16-NEXT: s_waitcnt_vscnt null, 0x0
|
|
; GFX11-FAKE16-NEXT: buffer_store_b32 v4, off, s[0:3], 0 dlc
|
|
; GFX11-FAKE16-NEXT: s_waitcnt_vscnt null, 0x0
|
|
; GFX11-FAKE16-NEXT: buffer_store_b8 v3, off, s[0:3], 0 dlc
|
|
; GFX11-FAKE16-NEXT: s_waitcnt_vscnt null, 0x0
|
|
; GFX11-FAKE16-NEXT: ds_store_b32 v0, v0
|
|
; GFX11-FAKE16-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX11-FAKE16-NEXT: s_setpc_b64 s[30:31]
|
|
%arg0.load = load volatile { i8, i32 }, ptr addrspace(5) %arg0
|
|
%arg1.load = load volatile { i8, i32 }, ptr addrspace(5) %arg1
|
|
store volatile { i8, i32 } %arg0.load, ptr addrspace(1) poison
|
|
store volatile { i8, i32 } %arg1.load, ptr addrspace(1) poison
|
|
store volatile i32 %arg2, ptr addrspace(3) poison
|
|
ret void
|
|
}
|
|
|
|
define void @void_func_byval_i32_byval_i64(ptr addrspace(5) byval(i32) %arg0, ptr addrspace(5) byval(i64) %arg1) #0 {
|
|
; CIGFX89-LABEL: void_func_byval_i32_byval_i64:
|
|
; CIGFX89: ; %bb.0:
|
|
; CIGFX89-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; CIGFX89-NEXT: buffer_load_dword v2, off, s[0:3], s32
|
|
; CIGFX89-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:8
|
|
; CIGFX89-NEXT: buffer_load_dword v1, off, s[0:3], s32 offset:12
|
|
; CIGFX89-NEXT: s_mov_b32 s7, 0xf000
|
|
; CIGFX89-NEXT: s_mov_b32 s6, -1
|
|
; CIGFX89-NEXT: s_waitcnt vmcnt(2)
|
|
; CIGFX89-NEXT: buffer_store_dword v2, off, s[4:7], 0
|
|
; CIGFX89-NEXT: s_waitcnt vmcnt(1)
|
|
; CIGFX89-NEXT: buffer_store_dwordx2 v[0:1], off, s[4:7], 0
|
|
; CIGFX89-NEXT: s_waitcnt vmcnt(0)
|
|
; CIGFX89-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX11-LABEL: void_func_byval_i32_byval_i64:
|
|
; GFX11: ; %bb.0:
|
|
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX11-NEXT: s_clause 0x1
|
|
; GFX11-NEXT: scratch_load_b32 v2, off, s32
|
|
; GFX11-NEXT: scratch_load_b64 v[0:1], off, s32 offset:8
|
|
; GFX11-NEXT: s_mov_b32 s3, 0x31016000
|
|
; GFX11-NEXT: s_mov_b32 s2, -1
|
|
; GFX11-NEXT: s_waitcnt vmcnt(1)
|
|
; GFX11-NEXT: buffer_store_b32 v2, off, s[0:3], 0
|
|
; GFX11-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX11-NEXT: buffer_store_b64 v[0:1], off, s[0:3], 0
|
|
; GFX11-NEXT: s_setpc_b64 s[30:31]
|
|
%arg0.load = load i32, ptr addrspace(5) %arg0
|
|
%arg1.load = load i64, ptr addrspace(5) %arg1
|
|
store i32 %arg0.load, ptr addrspace(1) poison
|
|
store i64 %arg1.load, ptr addrspace(1) poison
|
|
ret void
|
|
}
|
|
|
|
define void @void_func_v32i32_i32_i64(<32 x i32> %arg0, i32 %arg1, i64 %arg2) #0 {
|
|
; CIGFX89-LABEL: void_func_v32i32_i32_i64:
|
|
; CIGFX89: ; %bb.0:
|
|
; CIGFX89-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; CIGFX89-NEXT: buffer_load_dword v31, off, s[0:3], s32
|
|
; CIGFX89-NEXT: buffer_load_dword v33, off, s[0:3], s32 offset:12
|
|
; CIGFX89-NEXT: buffer_load_dword v32, off, s[0:3], s32 offset:8
|
|
; CIGFX89-NEXT: buffer_load_dword v34, off, s[0:3], s32 offset:4
|
|
; CIGFX89-NEXT: s_mov_b32 s7, 0xf000
|
|
; CIGFX89-NEXT: s_mov_b32 s6, -1
|
|
; CIGFX89-NEXT: s_waitcnt vmcnt(3)
|
|
; CIGFX89-NEXT: buffer_store_dwordx4 v[28:31], off, s[4:7], 0
|
|
; CIGFX89-NEXT: s_waitcnt vmcnt(0)
|
|
; CIGFX89-NEXT: buffer_store_dwordx4 v[24:27], off, s[4:7], 0
|
|
; CIGFX89-NEXT: s_waitcnt vmcnt(0)
|
|
; CIGFX89-NEXT: buffer_store_dwordx4 v[20:23], off, s[4:7], 0
|
|
; CIGFX89-NEXT: s_waitcnt vmcnt(0)
|
|
; CIGFX89-NEXT: buffer_store_dwordx4 v[16:19], off, s[4:7], 0
|
|
; CIGFX89-NEXT: s_waitcnt vmcnt(0)
|
|
; CIGFX89-NEXT: buffer_store_dwordx4 v[12:15], off, s[4:7], 0
|
|
; CIGFX89-NEXT: s_waitcnt vmcnt(0)
|
|
; CIGFX89-NEXT: buffer_store_dwordx4 v[8:11], off, s[4:7], 0
|
|
; CIGFX89-NEXT: s_waitcnt vmcnt(0)
|
|
; CIGFX89-NEXT: buffer_store_dwordx4 v[4:7], off, s[4:7], 0
|
|
; CIGFX89-NEXT: s_waitcnt vmcnt(0)
|
|
; CIGFX89-NEXT: buffer_store_dwordx4 v[0:3], off, s[4:7], 0
|
|
; CIGFX89-NEXT: s_waitcnt vmcnt(0)
|
|
; CIGFX89-NEXT: buffer_store_dword v34, off, s[4:7], 0
|
|
; CIGFX89-NEXT: s_waitcnt vmcnt(0)
|
|
; CIGFX89-NEXT: buffer_store_dwordx2 v[32:33], off, s[4:7], 0
|
|
; CIGFX89-NEXT: s_waitcnt vmcnt(0)
|
|
; CIGFX89-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX11-LABEL: void_func_v32i32_i32_i64:
|
|
; GFX11: ; %bb.0:
|
|
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX11-NEXT: s_clause 0x3
|
|
; GFX11-NEXT: scratch_load_b32 v31, off, s32
|
|
; GFX11-NEXT: scratch_load_b32 v33, off, s32 offset:12
|
|
; GFX11-NEXT: scratch_load_b32 v34, off, s32 offset:4
|
|
; GFX11-NEXT: scratch_load_b32 v32, off, s32 offset:8
|
|
; GFX11-NEXT: s_mov_b32 s3, 0x31016000
|
|
; GFX11-NEXT: s_mov_b32 s2, -1
|
|
; GFX11-NEXT: s_waitcnt vmcnt(3)
|
|
; GFX11-NEXT: buffer_store_b128 v[28:31], off, s[0:3], 0 dlc
|
|
; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
|
|
; GFX11-NEXT: buffer_store_b128 v[24:27], off, s[0:3], 0 dlc
|
|
; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
|
|
; GFX11-NEXT: buffer_store_b128 v[20:23], off, s[0:3], 0 dlc
|
|
; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
|
|
; GFX11-NEXT: buffer_store_b128 v[16:19], off, s[0:3], 0 dlc
|
|
; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
|
|
; GFX11-NEXT: buffer_store_b128 v[12:15], off, s[0:3], 0 dlc
|
|
; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
|
|
; GFX11-NEXT: buffer_store_b128 v[8:11], off, s[0:3], 0 dlc
|
|
; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
|
|
; GFX11-NEXT: buffer_store_b128 v[4:7], off, s[0:3], 0 dlc
|
|
; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
|
|
; GFX11-NEXT: buffer_store_b128 v[0:3], off, s[0:3], 0 dlc
|
|
; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
|
|
; GFX11-NEXT: s_waitcnt vmcnt(1)
|
|
; GFX11-NEXT: buffer_store_b32 v34, off, s[0:3], 0 dlc
|
|
; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
|
|
; GFX11-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX11-NEXT: buffer_store_b64 v[32:33], off, s[0:3], 0 dlc
|
|
; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
|
|
; GFX11-NEXT: s_setpc_b64 s[30:31]
|
|
store volatile <32 x i32> %arg0, ptr addrspace(1) poison
|
|
store volatile i32 %arg1, ptr addrspace(1) poison
|
|
store volatile i64 %arg2, ptr addrspace(1) poison
|
|
ret void
|
|
}
|
|
|
|
; FIXME: Different ext load types on CI vs. VI
|
|
define void @void_func_v32i32_i1_i8_i16_bf16(<32 x i32> %arg0, i1 %arg1, i8 %arg2, i16 %arg3, half %arg4, bfloat %arg5) #0 {
|
|
; CI-LABEL: void_func_v32i32_i1_i8_i16_bf16:
|
|
; CI: ; %bb.0:
|
|
; CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; CI-NEXT: buffer_load_dword v31, off, s[0:3], s32
|
|
; CI-NEXT: buffer_load_dword v32, off, s[0:3], s32 offset:20
|
|
; CI-NEXT: buffer_load_dword v33, off, s[0:3], s32 offset:16
|
|
; CI-NEXT: buffer_load_ubyte v34, off, s[0:3], s32 offset:4
|
|
; CI-NEXT: buffer_load_dword v35, off, s[0:3], s32 offset:8
|
|
; CI-NEXT: buffer_load_dword v36, off, s[0:3], s32 offset:12
|
|
; CI-NEXT: s_mov_b32 s7, 0xf000
|
|
; CI-NEXT: s_mov_b32 s6, -1
|
|
; CI-NEXT: s_waitcnt vmcnt(5)
|
|
; CI-NEXT: buffer_store_dwordx4 v[28:31], off, s[4:7], 0
|
|
; CI-NEXT: s_waitcnt vmcnt(0)
|
|
; CI-NEXT: buffer_store_dwordx4 v[24:27], off, s[4:7], 0
|
|
; CI-NEXT: s_waitcnt vmcnt(0)
|
|
; CI-NEXT: buffer_store_dwordx4 v[20:23], off, s[4:7], 0
|
|
; CI-NEXT: s_waitcnt vmcnt(0)
|
|
; CI-NEXT: buffer_store_dwordx4 v[16:19], off, s[4:7], 0
|
|
; CI-NEXT: s_waitcnt vmcnt(0)
|
|
; CI-NEXT: buffer_store_dwordx4 v[12:15], off, s[4:7], 0
|
|
; CI-NEXT: s_waitcnt vmcnt(0)
|
|
; CI-NEXT: v_mul_f32_e32 v12, 1.0, v32
|
|
; CI-NEXT: v_cvt_f16_f32_e32 v13, v33
|
|
; CI-NEXT: buffer_store_dwordx4 v[8:11], off, s[4:7], 0
|
|
; CI-NEXT: s_waitcnt vmcnt(0)
|
|
; CI-NEXT: buffer_store_dwordx4 v[4:7], off, s[4:7], 0
|
|
; CI-NEXT: s_waitcnt vmcnt(0)
|
|
; CI-NEXT: buffer_store_dwordx4 v[0:3], off, s[4:7], 0
|
|
; CI-NEXT: s_waitcnt vmcnt(0)
|
|
; CI-NEXT: v_and_b32_e32 v0, 1, v34
|
|
; CI-NEXT: v_lshrrev_b32_e32 v1, 16, v12
|
|
; CI-NEXT: buffer_store_byte v0, off, s[4:7], 0
|
|
; CI-NEXT: s_waitcnt vmcnt(0)
|
|
; CI-NEXT: buffer_store_byte v35, off, s[4:7], 0
|
|
; CI-NEXT: s_waitcnt vmcnt(0)
|
|
; CI-NEXT: buffer_store_short v36, off, s[4:7], 0
|
|
; CI-NEXT: s_waitcnt vmcnt(0)
|
|
; CI-NEXT: buffer_store_short v13, off, s[4:7], 0
|
|
; CI-NEXT: s_waitcnt vmcnt(0)
|
|
; CI-NEXT: buffer_store_short v1, off, s[4:7], 0
|
|
; CI-NEXT: s_waitcnt vmcnt(0)
|
|
; CI-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX89-LABEL: void_func_v32i32_i1_i8_i16_bf16:
|
|
; GFX89: ; %bb.0:
|
|
; GFX89-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX89-NEXT: buffer_load_dword v31, off, s[0:3], s32
|
|
; GFX89-NEXT: buffer_load_ubyte v32, off, s[0:3], s32 offset:4
|
|
; GFX89-NEXT: buffer_load_ushort v33, off, s[0:3], s32 offset:8
|
|
; GFX89-NEXT: buffer_load_ushort v34, off, s[0:3], s32 offset:12
|
|
; GFX89-NEXT: buffer_load_ushort v35, off, s[0:3], s32 offset:16
|
|
; GFX89-NEXT: buffer_load_ushort v36, off, s[0:3], s32 offset:20
|
|
; GFX89-NEXT: s_mov_b32 s7, 0xf000
|
|
; GFX89-NEXT: s_mov_b32 s6, -1
|
|
; GFX89-NEXT: s_waitcnt vmcnt(5)
|
|
; GFX89-NEXT: buffer_store_dwordx4 v[28:31], off, s[4:7], 0
|
|
; GFX89-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX89-NEXT: buffer_store_dwordx4 v[24:27], off, s[4:7], 0
|
|
; GFX89-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX89-NEXT: buffer_store_dwordx4 v[20:23], off, s[4:7], 0
|
|
; GFX89-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX89-NEXT: buffer_store_dwordx4 v[16:19], off, s[4:7], 0
|
|
; GFX89-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX89-NEXT: buffer_store_dwordx4 v[12:15], off, s[4:7], 0
|
|
; GFX89-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX89-NEXT: buffer_store_dwordx4 v[8:11], off, s[4:7], 0
|
|
; GFX89-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX89-NEXT: buffer_store_dwordx4 v[4:7], off, s[4:7], 0
|
|
; GFX89-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX89-NEXT: buffer_store_dwordx4 v[0:3], off, s[4:7], 0
|
|
; GFX89-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX89-NEXT: v_and_b32_e32 v0, 1, v32
|
|
; GFX89-NEXT: buffer_store_byte v0, off, s[4:7], 0
|
|
; GFX89-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX89-NEXT: buffer_store_byte v33, off, s[4:7], 0
|
|
; GFX89-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX89-NEXT: buffer_store_short v34, off, s[4:7], 0
|
|
; GFX89-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX89-NEXT: buffer_store_short v35, off, s[4:7], 0
|
|
; GFX89-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX89-NEXT: buffer_store_short v36, off, s[4:7], 0
|
|
; GFX89-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX89-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX11-TRUE16-LABEL: void_func_v32i32_i1_i8_i16_bf16:
|
|
; GFX11-TRUE16: ; %bb.0:
|
|
; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX11-TRUE16-NEXT: s_clause 0x5
|
|
; GFX11-TRUE16-NEXT: scratch_load_b32 v31, off, s32
|
|
; GFX11-TRUE16-NEXT: scratch_load_u8 v36, off, s32 offset:4
|
|
; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v32, off, s32 offset:8
|
|
; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v33, off, s32 offset:12
|
|
; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v34, off, s32 offset:16
|
|
; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v35, off, s32 offset:20
|
|
; GFX11-TRUE16-NEXT: s_mov_b32 s3, 0x31016000
|
|
; GFX11-TRUE16-NEXT: s_mov_b32 s2, -1
|
|
; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(5)
|
|
; GFX11-TRUE16-NEXT: buffer_store_b128 v[28:31], off, s[0:3], 0 dlc
|
|
; GFX11-TRUE16-NEXT: s_waitcnt_vscnt null, 0x0
|
|
; GFX11-TRUE16-NEXT: buffer_store_b128 v[24:27], off, s[0:3], 0 dlc
|
|
; GFX11-TRUE16-NEXT: s_waitcnt_vscnt null, 0x0
|
|
; GFX11-TRUE16-NEXT: buffer_store_b128 v[20:23], off, s[0:3], 0 dlc
|
|
; GFX11-TRUE16-NEXT: s_waitcnt_vscnt null, 0x0
|
|
; GFX11-TRUE16-NEXT: buffer_store_b128 v[16:19], off, s[0:3], 0 dlc
|
|
; GFX11-TRUE16-NEXT: s_waitcnt_vscnt null, 0x0
|
|
; GFX11-TRUE16-NEXT: buffer_store_b128 v[12:15], off, s[0:3], 0 dlc
|
|
; GFX11-TRUE16-NEXT: s_waitcnt_vscnt null, 0x0
|
|
; GFX11-TRUE16-NEXT: buffer_store_b128 v[8:11], off, s[0:3], 0 dlc
|
|
; GFX11-TRUE16-NEXT: s_waitcnt_vscnt null, 0x0
|
|
; GFX11-TRUE16-NEXT: buffer_store_b128 v[4:7], off, s[0:3], 0 dlc
|
|
; GFX11-TRUE16-NEXT: s_waitcnt_vscnt null, 0x0
|
|
; GFX11-TRUE16-NEXT: buffer_store_b128 v[0:3], off, s[0:3], 0 dlc
|
|
; GFX11-TRUE16-NEXT: s_waitcnt_vscnt null, 0x0
|
|
; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(4)
|
|
; GFX11-TRUE16-NEXT: v_and_b32_e32 v0, 1, v36
|
|
; GFX11-TRUE16-NEXT: buffer_store_b8 v0, off, s[0:3], 0 dlc
|
|
; GFX11-TRUE16-NEXT: s_waitcnt_vscnt null, 0x0
|
|
; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(3)
|
|
; GFX11-TRUE16-NEXT: buffer_store_b8 v32, off, s[0:3], 0 dlc
|
|
; GFX11-TRUE16-NEXT: s_waitcnt_vscnt null, 0x0
|
|
; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(2)
|
|
; GFX11-TRUE16-NEXT: buffer_store_b16 v33, off, s[0:3], 0 dlc
|
|
; GFX11-TRUE16-NEXT: s_waitcnt_vscnt null, 0x0
|
|
; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(1)
|
|
; GFX11-TRUE16-NEXT: buffer_store_b16 v34, off, s[0:3], 0 dlc
|
|
; GFX11-TRUE16-NEXT: s_waitcnt_vscnt null, 0x0
|
|
; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX11-TRUE16-NEXT: buffer_store_b16 v35, off, s[0:3], 0 dlc
|
|
; GFX11-TRUE16-NEXT: s_waitcnt_vscnt null, 0x0
|
|
; GFX11-TRUE16-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX11-FAKE16-LABEL: void_func_v32i32_i1_i8_i16_bf16:
|
|
; GFX11-FAKE16: ; %bb.0:
|
|
; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX11-FAKE16-NEXT: s_clause 0x5
|
|
; GFX11-FAKE16-NEXT: scratch_load_b32 v31, off, s32
|
|
; GFX11-FAKE16-NEXT: scratch_load_u8 v32, off, s32 offset:4
|
|
; GFX11-FAKE16-NEXT: scratch_load_u16 v33, off, s32 offset:8
|
|
; GFX11-FAKE16-NEXT: scratch_load_u16 v34, off, s32 offset:12
|
|
; GFX11-FAKE16-NEXT: scratch_load_u16 v35, off, s32 offset:16
|
|
; GFX11-FAKE16-NEXT: scratch_load_u16 v36, off, s32 offset:20
|
|
; GFX11-FAKE16-NEXT: s_mov_b32 s3, 0x31016000
|
|
; GFX11-FAKE16-NEXT: s_mov_b32 s2, -1
|
|
; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(5)
|
|
; GFX11-FAKE16-NEXT: buffer_store_b128 v[28:31], off, s[0:3], 0 dlc
|
|
; GFX11-FAKE16-NEXT: s_waitcnt_vscnt null, 0x0
|
|
; GFX11-FAKE16-NEXT: buffer_store_b128 v[24:27], off, s[0:3], 0 dlc
|
|
; GFX11-FAKE16-NEXT: s_waitcnt_vscnt null, 0x0
|
|
; GFX11-FAKE16-NEXT: buffer_store_b128 v[20:23], off, s[0:3], 0 dlc
|
|
; GFX11-FAKE16-NEXT: s_waitcnt_vscnt null, 0x0
|
|
; GFX11-FAKE16-NEXT: buffer_store_b128 v[16:19], off, s[0:3], 0 dlc
|
|
; GFX11-FAKE16-NEXT: s_waitcnt_vscnt null, 0x0
|
|
; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(4)
|
|
; GFX11-FAKE16-NEXT: v_and_b32_e32 v16, 1, v32
|
|
; GFX11-FAKE16-NEXT: buffer_store_b128 v[12:15], off, s[0:3], 0 dlc
|
|
; GFX11-FAKE16-NEXT: s_waitcnt_vscnt null, 0x0
|
|
; GFX11-FAKE16-NEXT: buffer_store_b128 v[8:11], off, s[0:3], 0 dlc
|
|
; GFX11-FAKE16-NEXT: s_waitcnt_vscnt null, 0x0
|
|
; GFX11-FAKE16-NEXT: buffer_store_b128 v[4:7], off, s[0:3], 0 dlc
|
|
; GFX11-FAKE16-NEXT: s_waitcnt_vscnt null, 0x0
|
|
; GFX11-FAKE16-NEXT: buffer_store_b128 v[0:3], off, s[0:3], 0 dlc
|
|
; GFX11-FAKE16-NEXT: s_waitcnt_vscnt null, 0x0
|
|
; GFX11-FAKE16-NEXT: buffer_store_b8 v16, off, s[0:3], 0 dlc
|
|
; GFX11-FAKE16-NEXT: s_waitcnt_vscnt null, 0x0
|
|
; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(3)
|
|
; GFX11-FAKE16-NEXT: buffer_store_b8 v33, off, s[0:3], 0 dlc
|
|
; GFX11-FAKE16-NEXT: s_waitcnt_vscnt null, 0x0
|
|
; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(2)
|
|
; GFX11-FAKE16-NEXT: buffer_store_b16 v34, off, s[0:3], 0 dlc
|
|
; GFX11-FAKE16-NEXT: s_waitcnt_vscnt null, 0x0
|
|
; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(1)
|
|
; GFX11-FAKE16-NEXT: buffer_store_b16 v35, off, s[0:3], 0 dlc
|
|
; GFX11-FAKE16-NEXT: s_waitcnt_vscnt null, 0x0
|
|
; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX11-FAKE16-NEXT: buffer_store_b16 v36, off, s[0:3], 0 dlc
|
|
; GFX11-FAKE16-NEXT: s_waitcnt_vscnt null, 0x0
|
|
; GFX11-FAKE16-NEXT: s_setpc_b64 s[30:31]
|
|
store volatile <32 x i32> %arg0, ptr addrspace(1) poison
|
|
store volatile i1 %arg1, ptr addrspace(1) poison
|
|
store volatile i8 %arg2, ptr addrspace(1) poison
|
|
store volatile i16 %arg3, ptr addrspace(1) poison
|
|
store volatile half %arg4, ptr addrspace(1) poison
|
|
store volatile bfloat %arg5, ptr addrspace(1) poison
|
|
ret void
|
|
}
|
|
|
|
define void @void_func_v32i32_v2i32_v2f32(<32 x i32> %arg0, <2 x i32> %arg1, <2 x float> %arg2) #0 {
|
|
; CIGFX89-LABEL: void_func_v32i32_v2i32_v2f32:
|
|
; CIGFX89: ; %bb.0:
|
|
; CIGFX89-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; CIGFX89-NEXT: buffer_load_dword v31, off, s[0:3], s32
|
|
; CIGFX89-NEXT: buffer_load_dword v33, off, s[0:3], s32 offset:8
|
|
; CIGFX89-NEXT: buffer_load_dword v32, off, s[0:3], s32 offset:4
|
|
; CIGFX89-NEXT: buffer_load_dword v35, off, s[0:3], s32 offset:16
|
|
; CIGFX89-NEXT: buffer_load_dword v34, off, s[0:3], s32 offset:12
|
|
; CIGFX89-NEXT: s_mov_b32 s7, 0xf000
|
|
; CIGFX89-NEXT: s_mov_b32 s6, -1
|
|
; CIGFX89-NEXT: s_waitcnt vmcnt(4)
|
|
; CIGFX89-NEXT: buffer_store_dwordx4 v[28:31], off, s[4:7], 0
|
|
; CIGFX89-NEXT: s_waitcnt vmcnt(0)
|
|
; CIGFX89-NEXT: buffer_store_dwordx4 v[24:27], off, s[4:7], 0
|
|
; CIGFX89-NEXT: s_waitcnt vmcnt(0)
|
|
; CIGFX89-NEXT: buffer_store_dwordx4 v[20:23], off, s[4:7], 0
|
|
; CIGFX89-NEXT: s_waitcnt vmcnt(0)
|
|
; CIGFX89-NEXT: buffer_store_dwordx4 v[16:19], off, s[4:7], 0
|
|
; CIGFX89-NEXT: s_waitcnt vmcnt(0)
|
|
; CIGFX89-NEXT: buffer_store_dwordx4 v[12:15], off, s[4:7], 0
|
|
; CIGFX89-NEXT: s_waitcnt vmcnt(0)
|
|
; CIGFX89-NEXT: buffer_store_dwordx4 v[8:11], off, s[4:7], 0
|
|
; CIGFX89-NEXT: s_waitcnt vmcnt(0)
|
|
; CIGFX89-NEXT: buffer_store_dwordx4 v[4:7], off, s[4:7], 0
|
|
; CIGFX89-NEXT: s_waitcnt vmcnt(0)
|
|
; CIGFX89-NEXT: buffer_store_dwordx4 v[0:3], off, s[4:7], 0
|
|
; CIGFX89-NEXT: s_waitcnt vmcnt(0)
|
|
; CIGFX89-NEXT: buffer_store_dwordx2 v[32:33], off, s[4:7], 0
|
|
; CIGFX89-NEXT: s_waitcnt vmcnt(0)
|
|
; CIGFX89-NEXT: buffer_store_dwordx2 v[34:35], off, s[4:7], 0
|
|
; CIGFX89-NEXT: s_waitcnt vmcnt(0)
|
|
; CIGFX89-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX11-LABEL: void_func_v32i32_v2i32_v2f32:
|
|
; GFX11: ; %bb.0:
|
|
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX11-NEXT: s_clause 0x4
|
|
; GFX11-NEXT: scratch_load_b32 v31, off, s32
|
|
; GFX11-NEXT: scratch_load_b32 v33, off, s32 offset:8
|
|
; GFX11-NEXT: scratch_load_b32 v32, off, s32 offset:4
|
|
; GFX11-NEXT: scratch_load_b32 v35, off, s32 offset:16
|
|
; GFX11-NEXT: scratch_load_b32 v34, off, s32 offset:12
|
|
; GFX11-NEXT: s_mov_b32 s3, 0x31016000
|
|
; GFX11-NEXT: s_mov_b32 s2, -1
|
|
; GFX11-NEXT: s_waitcnt vmcnt(4)
|
|
; GFX11-NEXT: buffer_store_b128 v[28:31], off, s[0:3], 0 dlc
|
|
; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
|
|
; GFX11-NEXT: buffer_store_b128 v[24:27], off, s[0:3], 0 dlc
|
|
; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
|
|
; GFX11-NEXT: buffer_store_b128 v[20:23], off, s[0:3], 0 dlc
|
|
; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
|
|
; GFX11-NEXT: buffer_store_b128 v[16:19], off, s[0:3], 0 dlc
|
|
; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
|
|
; GFX11-NEXT: buffer_store_b128 v[12:15], off, s[0:3], 0 dlc
|
|
; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
|
|
; GFX11-NEXT: buffer_store_b128 v[8:11], off, s[0:3], 0 dlc
|
|
; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
|
|
; GFX11-NEXT: buffer_store_b128 v[4:7], off, s[0:3], 0 dlc
|
|
; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
|
|
; GFX11-NEXT: buffer_store_b128 v[0:3], off, s[0:3], 0 dlc
|
|
; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
|
|
; GFX11-NEXT: s_waitcnt vmcnt(2)
|
|
; GFX11-NEXT: buffer_store_b64 v[32:33], off, s[0:3], 0 dlc
|
|
; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
|
|
; GFX11-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX11-NEXT: buffer_store_b64 v[34:35], off, s[0:3], 0 dlc
|
|
; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
|
|
; GFX11-NEXT: s_setpc_b64 s[30:31]
|
|
store volatile <32 x i32> %arg0, ptr addrspace(1) poison
|
|
store volatile <2 x i32> %arg1, ptr addrspace(1) poison
|
|
store volatile <2 x float> %arg2, ptr addrspace(1) poison
|
|
ret void
|
|
}
|
|
|
|
define void @void_func_v32i32_v2i16_v2f16_v2bf16_v4bf16(<32 x i32> %arg0, <2 x i16> %arg1, <2 x half> %arg2, <2 x bfloat> %arg3, <4 x bfloat> %arg4) #0 {
|
|
; CI-LABEL: void_func_v32i32_v2i16_v2f16_v2bf16_v4bf16:
|
|
; CI: ; %bb.0:
|
|
; CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; CI-NEXT: buffer_load_dword v31, off, s[0:3], s32
|
|
; CI-NEXT: s_mov_b32 s7, 0xf000
|
|
; CI-NEXT: s_mov_b32 s6, -1
|
|
; CI-NEXT: buffer_load_dword v32, off, s[0:3], s32 offset:28
|
|
; CI-NEXT: buffer_load_dword v33, off, s[0:3], s32 offset:32
|
|
; CI-NEXT: buffer_load_dword v34, off, s[0:3], s32 offset:36
|
|
; CI-NEXT: buffer_load_dword v35, off, s[0:3], s32 offset:40
|
|
; CI-NEXT: buffer_load_dword v36, off, s[0:3], s32 offset:20
|
|
; CI-NEXT: buffer_load_dword v37, off, s[0:3], s32 offset:24
|
|
; CI-NEXT: buffer_load_dword v38, off, s[0:3], s32 offset:12
|
|
; CI-NEXT: s_waitcnt vmcnt(7)
|
|
; CI-NEXT: buffer_store_dwordx4 v[28:31], off, s[4:7], 0
|
|
; CI-NEXT: s_waitcnt vmcnt(0)
|
|
; CI-NEXT: buffer_store_dwordx4 v[24:27], off, s[4:7], 0
|
|
; CI-NEXT: s_waitcnt vmcnt(0)
|
|
; CI-NEXT: buffer_store_dwordx4 v[20:23], off, s[4:7], 0
|
|
; CI-NEXT: s_waitcnt vmcnt(0)
|
|
; CI-NEXT: buffer_load_dword v20, off, s[0:3], s32 offset:16
|
|
; CI-NEXT: buffer_store_dwordx4 v[16:19], off, s[4:7], 0
|
|
; CI-NEXT: s_waitcnt vmcnt(0)
|
|
; CI-NEXT: buffer_load_dword v16, off, s[0:3], s32 offset:8
|
|
; CI-NEXT: buffer_load_dword v17, off, s[0:3], s32 offset:4
|
|
; CI-NEXT: buffer_store_dwordx4 v[12:15], off, s[4:7], 0
|
|
; CI-NEXT: s_waitcnt vmcnt(0)
|
|
; CI-NEXT: buffer_store_dwordx4 v[8:11], off, s[4:7], 0
|
|
; CI-NEXT: s_waitcnt vmcnt(0)
|
|
; CI-NEXT: buffer_store_dwordx4 v[4:7], off, s[4:7], 0
|
|
; CI-NEXT: s_waitcnt vmcnt(0)
|
|
; CI-NEXT: v_cvt_f16_f32_e32 v10, v38
|
|
; CI-NEXT: v_mul_f32_e32 v4, 1.0, v32
|
|
; CI-NEXT: v_mul_f32_e32 v5, 1.0, v33
|
|
; CI-NEXT: v_mul_f32_e32 v6, 1.0, v34
|
|
; CI-NEXT: v_mul_f32_e32 v7, 1.0, v35
|
|
; CI-NEXT: v_mul_f32_e32 v8, 1.0, v36
|
|
; CI-NEXT: v_mul_f32_e32 v9, 1.0, v37
|
|
; CI-NEXT: buffer_store_dwordx4 v[0:3], off, s[4:7], 0
|
|
; CI-NEXT: s_waitcnt vmcnt(0)
|
|
; CI-NEXT: buffer_store_short v16, off, s[4:7], 0
|
|
; CI-NEXT: s_waitcnt vmcnt(0)
|
|
; CI-NEXT: buffer_store_short v17, off, s[4:7], 0
|
|
; CI-NEXT: s_waitcnt vmcnt(0)
|
|
; CI-NEXT: v_cvt_f16_f32_e32 v11, v20
|
|
; CI-NEXT: v_lshrrev_b32_e32 v0, 16, v4
|
|
; CI-NEXT: v_lshrrev_b32_e32 v1, 16, v5
|
|
; CI-NEXT: v_lshrrev_b32_e32 v2, 16, v6
|
|
; CI-NEXT: v_lshrrev_b32_e32 v3, 16, v7
|
|
; CI-NEXT: v_lshrrev_b32_e32 v4, 16, v8
|
|
; CI-NEXT: v_lshrrev_b32_e32 v5, 16, v9
|
|
; CI-NEXT: buffer_store_short v11, off, s[4:7], 0
|
|
; CI-NEXT: s_waitcnt vmcnt(0)
|
|
; CI-NEXT: buffer_store_short v10, off, s[4:7], 0
|
|
; CI-NEXT: s_waitcnt vmcnt(0)
|
|
; CI-NEXT: buffer_store_short v5, off, s[4:7], 0
|
|
; CI-NEXT: s_waitcnt vmcnt(0)
|
|
; CI-NEXT: buffer_store_short v4, off, s[4:7], 0
|
|
; CI-NEXT: s_waitcnt vmcnt(0)
|
|
; CI-NEXT: buffer_store_short v3, off, s[4:7], 0
|
|
; CI-NEXT: s_waitcnt vmcnt(0)
|
|
; CI-NEXT: buffer_store_short v2, off, s[4:7], 0
|
|
; CI-NEXT: s_waitcnt vmcnt(0)
|
|
; CI-NEXT: buffer_store_short v1, off, s[4:7], 0
|
|
; CI-NEXT: s_waitcnt vmcnt(0)
|
|
; CI-NEXT: buffer_store_short v0, off, s[4:7], 0
|
|
; CI-NEXT: s_waitcnt vmcnt(0)
|
|
; CI-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX89-LABEL: void_func_v32i32_v2i16_v2f16_v2bf16_v4bf16:
|
|
; GFX89: ; %bb.0:
|
|
; GFX89-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX89-NEXT: buffer_load_dword v31, off, s[0:3], s32
|
|
; GFX89-NEXT: buffer_load_dword v33, off, s[0:3], s32 offset:20
|
|
; GFX89-NEXT: buffer_load_dword v32, off, s[0:3], s32 offset:16
|
|
; GFX89-NEXT: buffer_load_dword v34, off, s[0:3], s32 offset:4
|
|
; GFX89-NEXT: buffer_load_dword v35, off, s[0:3], s32 offset:8
|
|
; GFX89-NEXT: buffer_load_dword v36, off, s[0:3], s32 offset:12
|
|
; GFX89-NEXT: s_mov_b32 s7, 0xf000
|
|
; GFX89-NEXT: s_mov_b32 s6, -1
|
|
; GFX89-NEXT: s_waitcnt vmcnt(5)
|
|
; GFX89-NEXT: buffer_store_dwordx4 v[28:31], off, s[4:7], 0
|
|
; GFX89-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX89-NEXT: buffer_store_dwordx4 v[24:27], off, s[4:7], 0
|
|
; GFX89-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX89-NEXT: buffer_store_dwordx4 v[20:23], off, s[4:7], 0
|
|
; GFX89-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX89-NEXT: buffer_store_dwordx4 v[16:19], off, s[4:7], 0
|
|
; GFX89-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX89-NEXT: buffer_store_dwordx4 v[12:15], off, s[4:7], 0
|
|
; GFX89-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX89-NEXT: buffer_store_dwordx4 v[8:11], off, s[4:7], 0
|
|
; GFX89-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX89-NEXT: buffer_store_dwordx4 v[4:7], off, s[4:7], 0
|
|
; GFX89-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX89-NEXT: buffer_store_dwordx4 v[0:3], off, s[4:7], 0
|
|
; GFX89-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX89-NEXT: buffer_store_dword v34, off, s[4:7], 0
|
|
; GFX89-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX89-NEXT: buffer_store_dword v35, off, s[4:7], 0
|
|
; GFX89-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX89-NEXT: buffer_store_dword v36, off, s[4:7], 0
|
|
; GFX89-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX89-NEXT: buffer_store_dwordx2 v[32:33], off, s[4:7], 0
|
|
; GFX89-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX89-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX11-LABEL: void_func_v32i32_v2i16_v2f16_v2bf16_v4bf16:
|
|
; GFX11: ; %bb.0:
|
|
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX11-NEXT: s_clause 0x5
|
|
; GFX11-NEXT: scratch_load_b32 v31, off, s32
|
|
; GFX11-NEXT: scratch_load_b32 v33, off, s32 offset:20
|
|
; GFX11-NEXT: scratch_load_b32 v34, off, s32 offset:4
|
|
; GFX11-NEXT: scratch_load_b32 v35, off, s32 offset:8
|
|
; GFX11-NEXT: scratch_load_b32 v36, off, s32 offset:12
|
|
; GFX11-NEXT: scratch_load_b32 v32, off, s32 offset:16
|
|
; GFX11-NEXT: s_mov_b32 s3, 0x31016000
|
|
; GFX11-NEXT: s_mov_b32 s2, -1
|
|
; GFX11-NEXT: s_waitcnt vmcnt(5)
|
|
; GFX11-NEXT: buffer_store_b128 v[28:31], off, s[0:3], 0 dlc
|
|
; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
|
|
; GFX11-NEXT: buffer_store_b128 v[24:27], off, s[0:3], 0 dlc
|
|
; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
|
|
; GFX11-NEXT: buffer_store_b128 v[20:23], off, s[0:3], 0 dlc
|
|
; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
|
|
; GFX11-NEXT: buffer_store_b128 v[16:19], off, s[0:3], 0 dlc
|
|
; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
|
|
; GFX11-NEXT: buffer_store_b128 v[12:15], off, s[0:3], 0 dlc
|
|
; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
|
|
; GFX11-NEXT: buffer_store_b128 v[8:11], off, s[0:3], 0 dlc
|
|
; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
|
|
; GFX11-NEXT: buffer_store_b128 v[4:7], off, s[0:3], 0 dlc
|
|
; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
|
|
; GFX11-NEXT: buffer_store_b128 v[0:3], off, s[0:3], 0 dlc
|
|
; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
|
|
; GFX11-NEXT: s_waitcnt vmcnt(3)
|
|
; GFX11-NEXT: buffer_store_b32 v34, off, s[0:3], 0 dlc
|
|
; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
|
|
; GFX11-NEXT: s_waitcnt vmcnt(2)
|
|
; GFX11-NEXT: buffer_store_b32 v35, off, s[0:3], 0 dlc
|
|
; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
|
|
; GFX11-NEXT: s_waitcnt vmcnt(1)
|
|
; GFX11-NEXT: buffer_store_b32 v36, off, s[0:3], 0 dlc
|
|
; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
|
|
; GFX11-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX11-NEXT: buffer_store_b64 v[32:33], off, s[0:3], 0 dlc
|
|
; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
|
|
; GFX11-NEXT: s_setpc_b64 s[30:31]
|
|
store volatile <32 x i32> %arg0, ptr addrspace(1) poison
|
|
store volatile <2 x i16> %arg1, ptr addrspace(1) poison
|
|
store volatile <2 x half> %arg2, ptr addrspace(1) poison
|
|
store volatile <2 x bfloat> %arg3, ptr addrspace(1) poison
|
|
store volatile <4 x bfloat> %arg4, ptr addrspace(1) poison
|
|
ret void
|
|
}
|
|
|
|
define void @void_func_v32i32_v2i64_v2f64(<32 x i32> %arg0, <2 x i64> %arg1, <2 x double> %arg2) #0 {
|
|
; CI-LABEL: void_func_v32i32_v2i64_v2f64:
|
|
; CI: ; %bb.0:
|
|
; CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; CI-NEXT: buffer_load_dword v31, off, s[0:3], s32
|
|
; CI-NEXT: buffer_load_dword v34, off, s[0:3], s32 offset:32
|
|
; CI-NEXT: buffer_load_dword v33, off, s[0:3], s32 offset:28
|
|
; CI-NEXT: buffer_load_dword v32, off, s[0:3], s32 offset:24
|
|
; CI-NEXT: buffer_load_dword v38, off, s[0:3], s32 offset:16
|
|
; CI-NEXT: buffer_load_dword v37, off, s[0:3], s32 offset:12
|
|
; CI-NEXT: buffer_load_dword v36, off, s[0:3], s32 offset:8
|
|
; CI-NEXT: buffer_load_dword v35, off, s[0:3], s32 offset:4
|
|
; CI-NEXT: s_mov_b32 s7, 0xf000
|
|
; CI-NEXT: s_mov_b32 s6, -1
|
|
; CI-NEXT: s_waitcnt vmcnt(7)
|
|
; CI-NEXT: buffer_store_dwordx4 v[28:31], off, s[4:7], 0
|
|
; CI-NEXT: s_waitcnt vmcnt(0)
|
|
; CI-NEXT: buffer_store_dwordx4 v[24:27], off, s[4:7], 0
|
|
; CI-NEXT: s_waitcnt vmcnt(0)
|
|
; CI-NEXT: buffer_store_dwordx4 v[20:23], off, s[4:7], 0
|
|
; CI-NEXT: s_waitcnt vmcnt(0)
|
|
; CI-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:20
|
|
; CI-NEXT: buffer_store_dwordx4 v[16:19], off, s[4:7], 0
|
|
; CI-NEXT: s_waitcnt vmcnt(0)
|
|
; CI-NEXT: buffer_store_dwordx4 v[12:15], off, s[4:7], 0
|
|
; CI-NEXT: s_waitcnt vmcnt(0)
|
|
; CI-NEXT: buffer_store_dwordx4 v[8:11], off, s[4:7], 0
|
|
; CI-NEXT: s_waitcnt vmcnt(0)
|
|
; CI-NEXT: buffer_store_dwordx4 v[4:7], off, s[4:7], 0
|
|
; CI-NEXT: s_waitcnt vmcnt(0)
|
|
; CI-NEXT: buffer_store_dwordx4 v[0:3], off, s[4:7], 0
|
|
; CI-NEXT: s_waitcnt vmcnt(0)
|
|
; CI-NEXT: buffer_store_dwordx4 v[35:38], off, s[4:7], 0
|
|
; CI-NEXT: s_waitcnt vmcnt(0)
|
|
; CI-NEXT: buffer_store_dwordx4 v[31:34], off, s[4:7], 0
|
|
; CI-NEXT: s_waitcnt vmcnt(0)
|
|
; CI-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; VI-LABEL: void_func_v32i32_v2i64_v2f64:
|
|
; VI: ; %bb.0:
|
|
; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; VI-NEXT: buffer_load_dword v31, off, s[0:3], s32
|
|
; VI-NEXT: buffer_load_dword v34, off, s[0:3], s32 offset:32
|
|
; VI-NEXT: buffer_load_dword v33, off, s[0:3], s32 offset:28
|
|
; VI-NEXT: buffer_load_dword v32, off, s[0:3], s32 offset:24
|
|
; VI-NEXT: buffer_load_dword v38, off, s[0:3], s32 offset:16
|
|
; VI-NEXT: buffer_load_dword v37, off, s[0:3], s32 offset:12
|
|
; VI-NEXT: buffer_load_dword v36, off, s[0:3], s32 offset:8
|
|
; VI-NEXT: buffer_load_dword v35, off, s[0:3], s32 offset:4
|
|
; VI-NEXT: s_mov_b32 s7, 0xf000
|
|
; VI-NEXT: s_mov_b32 s6, -1
|
|
; VI-NEXT: s_waitcnt vmcnt(7)
|
|
; VI-NEXT: buffer_store_dwordx4 v[28:31], off, s[4:7], 0
|
|
; VI-NEXT: s_waitcnt vmcnt(0)
|
|
; VI-NEXT: buffer_store_dwordx4 v[24:27], off, s[4:7], 0
|
|
; VI-NEXT: s_waitcnt vmcnt(0)
|
|
; VI-NEXT: buffer_store_dwordx4 v[20:23], off, s[4:7], 0
|
|
; VI-NEXT: s_waitcnt vmcnt(0)
|
|
; VI-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:20
|
|
; VI-NEXT: buffer_store_dwordx4 v[16:19], off, s[4:7], 0
|
|
; VI-NEXT: s_waitcnt vmcnt(0)
|
|
; VI-NEXT: buffer_store_dwordx4 v[12:15], off, s[4:7], 0
|
|
; VI-NEXT: s_waitcnt vmcnt(0)
|
|
; VI-NEXT: buffer_store_dwordx4 v[8:11], off, s[4:7], 0
|
|
; VI-NEXT: s_waitcnt vmcnt(0)
|
|
; VI-NEXT: buffer_store_dwordx4 v[4:7], off, s[4:7], 0
|
|
; VI-NEXT: s_waitcnt vmcnt(0)
|
|
; VI-NEXT: buffer_store_dwordx4 v[0:3], off, s[4:7], 0
|
|
; VI-NEXT: s_waitcnt vmcnt(0)
|
|
; VI-NEXT: buffer_store_dwordx4 v[35:38], off, s[4:7], 0
|
|
; VI-NEXT: s_waitcnt vmcnt(0)
|
|
; VI-NEXT: buffer_store_dwordx4 v[31:34], off, s[4:7], 0
|
|
; VI-NEXT: s_waitcnt vmcnt(0)
|
|
; VI-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX9-LABEL: void_func_v32i32_v2i64_v2f64:
|
|
; GFX9: ; %bb.0:
|
|
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX9-NEXT: buffer_load_dword v31, off, s[0:3], s32
|
|
; GFX9-NEXT: buffer_load_dword v34, off, s[0:3], s32 offset:32
|
|
; GFX9-NEXT: buffer_load_dword v33, off, s[0:3], s32 offset:28
|
|
; GFX9-NEXT: buffer_load_dword v32, off, s[0:3], s32 offset:24
|
|
; GFX9-NEXT: buffer_load_dword v38, off, s[0:3], s32 offset:16
|
|
; GFX9-NEXT: buffer_load_dword v37, off, s[0:3], s32 offset:12
|
|
; GFX9-NEXT: buffer_load_dword v36, off, s[0:3], s32 offset:8
|
|
; GFX9-NEXT: buffer_load_dword v35, off, s[0:3], s32 offset:4
|
|
; GFX9-NEXT: s_mov_b32 s7, 0xf000
|
|
; GFX9-NEXT: s_mov_b32 s6, -1
|
|
; GFX9-NEXT: s_waitcnt vmcnt(7)
|
|
; GFX9-NEXT: buffer_store_dwordx4 v[28:31], off, s[4:7], 0
|
|
; GFX9-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX9-NEXT: buffer_store_dwordx4 v[24:27], off, s[4:7], 0
|
|
; GFX9-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX9-NEXT: buffer_store_dwordx4 v[20:23], off, s[4:7], 0
|
|
; GFX9-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX9-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:20
|
|
; GFX9-NEXT: s_nop 0
|
|
; GFX9-NEXT: buffer_store_dwordx4 v[16:19], off, s[4:7], 0
|
|
; GFX9-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX9-NEXT: buffer_store_dwordx4 v[12:15], off, s[4:7], 0
|
|
; GFX9-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX9-NEXT: buffer_store_dwordx4 v[8:11], off, s[4:7], 0
|
|
; GFX9-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX9-NEXT: buffer_store_dwordx4 v[4:7], off, s[4:7], 0
|
|
; GFX9-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX9-NEXT: buffer_store_dwordx4 v[0:3], off, s[4:7], 0
|
|
; GFX9-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX9-NEXT: buffer_store_dwordx4 v[35:38], off, s[4:7], 0
|
|
; GFX9-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX9-NEXT: buffer_store_dwordx4 v[31:34], off, s[4:7], 0
|
|
; GFX9-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX9-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX11-LABEL: void_func_v32i32_v2i64_v2f64:
|
|
; GFX11: ; %bb.0:
|
|
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX11-NEXT: s_clause 0x8
|
|
; GFX11-NEXT: scratch_load_b32 v31, off, s32
|
|
; GFX11-NEXT: scratch_load_b32 v35, off, s32 offset:32
|
|
; GFX11-NEXT: scratch_load_b32 v34, off, s32 offset:28
|
|
; GFX11-NEXT: scratch_load_b32 v33, off, s32 offset:24
|
|
; GFX11-NEXT: scratch_load_b32 v39, off, s32 offset:16
|
|
; GFX11-NEXT: scratch_load_b32 v38, off, s32 offset:12
|
|
; GFX11-NEXT: scratch_load_b32 v37, off, s32 offset:8
|
|
; GFX11-NEXT: scratch_load_b32 v36, off, s32 offset:4
|
|
; GFX11-NEXT: scratch_load_b32 v32, off, s32 offset:20
|
|
; GFX11-NEXT: s_mov_b32 s3, 0x31016000
|
|
; GFX11-NEXT: s_mov_b32 s2, -1
|
|
; GFX11-NEXT: s_waitcnt vmcnt(8)
|
|
; GFX11-NEXT: buffer_store_b128 v[28:31], off, s[0:3], 0 dlc
|
|
; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
|
|
; GFX11-NEXT: buffer_store_b128 v[24:27], off, s[0:3], 0 dlc
|
|
; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
|
|
; GFX11-NEXT: buffer_store_b128 v[20:23], off, s[0:3], 0 dlc
|
|
; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
|
|
; GFX11-NEXT: buffer_store_b128 v[16:19], off, s[0:3], 0 dlc
|
|
; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
|
|
; GFX11-NEXT: buffer_store_b128 v[12:15], off, s[0:3], 0 dlc
|
|
; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
|
|
; GFX11-NEXT: buffer_store_b128 v[8:11], off, s[0:3], 0 dlc
|
|
; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
|
|
; GFX11-NEXT: buffer_store_b128 v[4:7], off, s[0:3], 0 dlc
|
|
; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
|
|
; GFX11-NEXT: buffer_store_b128 v[0:3], off, s[0:3], 0 dlc
|
|
; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
|
|
; GFX11-NEXT: s_waitcnt vmcnt(1)
|
|
; GFX11-NEXT: buffer_store_b128 v[36:39], off, s[0:3], 0 dlc
|
|
; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
|
|
; GFX11-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX11-NEXT: buffer_store_b128 v[32:35], off, s[0:3], 0 dlc
|
|
; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
|
|
; GFX11-NEXT: s_setpc_b64 s[30:31]
|
|
store volatile <32 x i32> %arg0, ptr addrspace(1) poison
|
|
store volatile <2 x i64> %arg1, ptr addrspace(1) poison
|
|
store volatile <2 x double> %arg2, ptr addrspace(1) poison
|
|
ret void
|
|
}
|
|
|
|
define void @void_func_v32i32_v4i32_v4f32(<32 x i32> %arg0, <4 x i32> %arg1, <4 x float> %arg2) #0 {
|
|
; CIGFX89-LABEL: void_func_v32i32_v4i32_v4f32:
|
|
; CIGFX89: ; %bb.0:
|
|
; CIGFX89-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; CIGFX89-NEXT: buffer_load_dword v31, off, s[0:3], s32
|
|
; CIGFX89-NEXT: buffer_load_dword v36, off, s[0:3], s32 offset:20
|
|
; CIGFX89-NEXT: buffer_load_dword v35, off, s[0:3], s32 offset:16
|
|
; CIGFX89-NEXT: buffer_load_dword v34, off, s[0:3], s32 offset:12
|
|
; CIGFX89-NEXT: buffer_load_dword v33, off, s[0:3], s32 offset:8
|
|
; CIGFX89-NEXT: buffer_load_dword v32, off, s[0:3], s32 offset:4
|
|
; CIGFX89-NEXT: buffer_load_dword v39, off, s[0:3], s32 offset:32
|
|
; CIGFX89-NEXT: buffer_load_dword v38, off, s[0:3], s32 offset:28
|
|
; CIGFX89-NEXT: buffer_load_dword v37, off, s[0:3], s32 offset:24
|
|
; CIGFX89-NEXT: s_mov_b32 s7, 0xf000
|
|
; CIGFX89-NEXT: s_mov_b32 s6, -1
|
|
; CIGFX89-NEXT: s_waitcnt vmcnt(8)
|
|
; CIGFX89-NEXT: buffer_store_dwordx4 v[28:31], off, s[4:7], 0
|
|
; CIGFX89-NEXT: s_waitcnt vmcnt(0)
|
|
; CIGFX89-NEXT: buffer_store_dwordx4 v[24:27], off, s[4:7], 0
|
|
; CIGFX89-NEXT: s_waitcnt vmcnt(0)
|
|
; CIGFX89-NEXT: buffer_store_dwordx4 v[20:23], off, s[4:7], 0
|
|
; CIGFX89-NEXT: s_waitcnt vmcnt(0)
|
|
; CIGFX89-NEXT: buffer_store_dwordx4 v[16:19], off, s[4:7], 0
|
|
; CIGFX89-NEXT: s_waitcnt vmcnt(0)
|
|
; CIGFX89-NEXT: buffer_store_dwordx4 v[12:15], off, s[4:7], 0
|
|
; CIGFX89-NEXT: s_waitcnt vmcnt(0)
|
|
; CIGFX89-NEXT: buffer_store_dwordx4 v[8:11], off, s[4:7], 0
|
|
; CIGFX89-NEXT: s_waitcnt vmcnt(0)
|
|
; CIGFX89-NEXT: buffer_store_dwordx4 v[4:7], off, s[4:7], 0
|
|
; CIGFX89-NEXT: s_waitcnt vmcnt(0)
|
|
; CIGFX89-NEXT: buffer_store_dwordx4 v[0:3], off, s[4:7], 0
|
|
; CIGFX89-NEXT: s_waitcnt vmcnt(0)
|
|
; CIGFX89-NEXT: buffer_store_dwordx4 v[32:35], off, s[4:7], 0
|
|
; CIGFX89-NEXT: s_waitcnt vmcnt(0)
|
|
; CIGFX89-NEXT: buffer_store_dwordx4 v[36:39], off, s[4:7], 0
|
|
; CIGFX89-NEXT: s_waitcnt vmcnt(0)
|
|
; CIGFX89-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX11-LABEL: void_func_v32i32_v4i32_v4f32:
|
|
; GFX11: ; %bb.0:
|
|
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX11-NEXT: s_clause 0x8
|
|
; GFX11-NEXT: scratch_load_b32 v31, off, s32
|
|
; GFX11-NEXT: scratch_load_b32 v35, off, s32 offset:16
|
|
; GFX11-NEXT: scratch_load_b32 v34, off, s32 offset:12
|
|
; GFX11-NEXT: scratch_load_b32 v33, off, s32 offset:8
|
|
; GFX11-NEXT: scratch_load_b32 v32, off, s32 offset:4
|
|
; GFX11-NEXT: scratch_load_b32 v39, off, s32 offset:32
|
|
; GFX11-NEXT: scratch_load_b32 v38, off, s32 offset:28
|
|
; GFX11-NEXT: scratch_load_b32 v37, off, s32 offset:24
|
|
; GFX11-NEXT: scratch_load_b32 v36, off, s32 offset:20
|
|
; GFX11-NEXT: s_mov_b32 s3, 0x31016000
|
|
; GFX11-NEXT: s_mov_b32 s2, -1
|
|
; GFX11-NEXT: s_waitcnt vmcnt(8)
|
|
; GFX11-NEXT: buffer_store_b128 v[28:31], off, s[0:3], 0 dlc
|
|
; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
|
|
; GFX11-NEXT: buffer_store_b128 v[24:27], off, s[0:3], 0 dlc
|
|
; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
|
|
; GFX11-NEXT: buffer_store_b128 v[20:23], off, s[0:3], 0 dlc
|
|
; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
|
|
; GFX11-NEXT: buffer_store_b128 v[16:19], off, s[0:3], 0 dlc
|
|
; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
|
|
; GFX11-NEXT: buffer_store_b128 v[12:15], off, s[0:3], 0 dlc
|
|
; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
|
|
; GFX11-NEXT: buffer_store_b128 v[8:11], off, s[0:3], 0 dlc
|
|
; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
|
|
; GFX11-NEXT: buffer_store_b128 v[4:7], off, s[0:3], 0 dlc
|
|
; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
|
|
; GFX11-NEXT: buffer_store_b128 v[0:3], off, s[0:3], 0 dlc
|
|
; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
|
|
; GFX11-NEXT: s_waitcnt vmcnt(4)
|
|
; GFX11-NEXT: buffer_store_b128 v[32:35], off, s[0:3], 0 dlc
|
|
; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
|
|
; GFX11-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX11-NEXT: buffer_store_b128 v[36:39], off, s[0:3], 0 dlc
|
|
; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
|
|
; GFX11-NEXT: s_setpc_b64 s[30:31]
|
|
store volatile <32 x i32> %arg0, ptr addrspace(1) poison
|
|
store volatile <4 x i32> %arg1, ptr addrspace(1) poison
|
|
store volatile <4 x float> %arg2, ptr addrspace(1) poison
|
|
ret void
|
|
}
|
|
|
|
define void @void_func_v32i32_v8i32_v8f32(<32 x i32> %arg0, <8 x i32> %arg1, <8 x float> %arg2) #0 {
|
|
; CI-LABEL: void_func_v32i32_v8i32_v8f32:
|
|
; CI: ; %bb.0:
|
|
; CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; CI-NEXT: buffer_load_dword v31, off, s[0:3], s32
|
|
; CI-NEXT: s_mov_b32 s7, 0xf000
|
|
; CI-NEXT: s_mov_b32 s6, -1
|
|
; CI-NEXT: buffer_load_dword v35, off, s[0:3], s32 offset:16
|
|
; CI-NEXT: buffer_load_dword v34, off, s[0:3], s32 offset:12
|
|
; CI-NEXT: buffer_load_dword v33, off, s[0:3], s32 offset:8
|
|
; CI-NEXT: buffer_load_dword v32, off, s[0:3], s32 offset:4
|
|
; CI-NEXT: buffer_load_dword v39, off, s[0:3], s32 offset:32
|
|
; CI-NEXT: buffer_load_dword v38, off, s[0:3], s32 offset:28
|
|
; CI-NEXT: buffer_load_dword v37, off, s[0:3], s32 offset:24
|
|
; CI-NEXT: s_waitcnt vmcnt(7)
|
|
; CI-NEXT: buffer_store_dwordx4 v[28:31], off, s[4:7], 0
|
|
; CI-NEXT: s_waitcnt vmcnt(0)
|
|
; CI-NEXT: buffer_store_dwordx4 v[24:27], off, s[4:7], 0
|
|
; CI-NEXT: s_waitcnt vmcnt(0)
|
|
; CI-NEXT: buffer_store_dwordx4 v[20:23], off, s[4:7], 0
|
|
; CI-NEXT: s_waitcnt vmcnt(0)
|
|
; CI-NEXT: buffer_store_dwordx4 v[16:19], off, s[4:7], 0
|
|
; CI-NEXT: s_waitcnt vmcnt(0)
|
|
; CI-NEXT: buffer_store_dwordx4 v[12:15], off, s[4:7], 0
|
|
; CI-NEXT: s_waitcnt vmcnt(0)
|
|
; CI-NEXT: buffer_load_dword v36, off, s[0:3], s32 offset:20
|
|
; CI-NEXT: buffer_load_dword v15, off, s[0:3], s32 offset:48
|
|
; CI-NEXT: buffer_load_dword v14, off, s[0:3], s32 offset:44
|
|
; CI-NEXT: buffer_load_dword v13, off, s[0:3], s32 offset:40
|
|
; CI-NEXT: buffer_load_dword v19, off, s[0:3], s32 offset:64
|
|
; CI-NEXT: buffer_load_dword v18, off, s[0:3], s32 offset:60
|
|
; CI-NEXT: buffer_load_dword v17, off, s[0:3], s32 offset:56
|
|
; CI-NEXT: buffer_load_dword v16, off, s[0:3], s32 offset:52
|
|
; CI-NEXT: buffer_load_dword v12, off, s[0:3], s32 offset:36
|
|
; CI-NEXT: buffer_store_dwordx4 v[8:11], off, s[4:7], 0
|
|
; CI-NEXT: s_waitcnt vmcnt(0)
|
|
; CI-NEXT: buffer_store_dwordx4 v[4:7], off, s[4:7], 0
|
|
; CI-NEXT: s_waitcnt vmcnt(0)
|
|
; CI-NEXT: buffer_store_dwordx4 v[0:3], off, s[4:7], 0
|
|
; CI-NEXT: s_waitcnt vmcnt(0)
|
|
; CI-NEXT: buffer_store_dwordx4 v[36:39], off, s[4:7], 0
|
|
; CI-NEXT: s_waitcnt vmcnt(0)
|
|
; CI-NEXT: buffer_store_dwordx4 v[32:35], off, s[4:7], 0
|
|
; CI-NEXT: s_waitcnt vmcnt(0)
|
|
; CI-NEXT: buffer_store_dwordx4 v[16:19], off, s[4:7], 0
|
|
; CI-NEXT: s_waitcnt vmcnt(0)
|
|
; CI-NEXT: buffer_store_dwordx4 v[12:15], off, s[4:7], 0
|
|
; CI-NEXT: s_waitcnt vmcnt(0)
|
|
; CI-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; VI-LABEL: void_func_v32i32_v8i32_v8f32:
|
|
; VI: ; %bb.0:
|
|
; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; VI-NEXT: buffer_load_dword v31, off, s[0:3], s32
|
|
; VI-NEXT: s_mov_b32 s7, 0xf000
|
|
; VI-NEXT: s_mov_b32 s6, -1
|
|
; VI-NEXT: buffer_load_dword v35, off, s[0:3], s32 offset:16
|
|
; VI-NEXT: buffer_load_dword v34, off, s[0:3], s32 offset:12
|
|
; VI-NEXT: buffer_load_dword v33, off, s[0:3], s32 offset:8
|
|
; VI-NEXT: buffer_load_dword v32, off, s[0:3], s32 offset:4
|
|
; VI-NEXT: buffer_load_dword v39, off, s[0:3], s32 offset:32
|
|
; VI-NEXT: buffer_load_dword v38, off, s[0:3], s32 offset:28
|
|
; VI-NEXT: buffer_load_dword v37, off, s[0:3], s32 offset:24
|
|
; VI-NEXT: s_waitcnt vmcnt(7)
|
|
; VI-NEXT: buffer_store_dwordx4 v[28:31], off, s[4:7], 0
|
|
; VI-NEXT: s_waitcnt vmcnt(0)
|
|
; VI-NEXT: buffer_store_dwordx4 v[24:27], off, s[4:7], 0
|
|
; VI-NEXT: s_waitcnt vmcnt(0)
|
|
; VI-NEXT: buffer_store_dwordx4 v[20:23], off, s[4:7], 0
|
|
; VI-NEXT: s_waitcnt vmcnt(0)
|
|
; VI-NEXT: buffer_store_dwordx4 v[16:19], off, s[4:7], 0
|
|
; VI-NEXT: s_waitcnt vmcnt(0)
|
|
; VI-NEXT: buffer_store_dwordx4 v[12:15], off, s[4:7], 0
|
|
; VI-NEXT: s_waitcnt vmcnt(0)
|
|
; VI-NEXT: buffer_load_dword v36, off, s[0:3], s32 offset:20
|
|
; VI-NEXT: buffer_load_dword v15, off, s[0:3], s32 offset:48
|
|
; VI-NEXT: buffer_load_dword v14, off, s[0:3], s32 offset:44
|
|
; VI-NEXT: buffer_load_dword v13, off, s[0:3], s32 offset:40
|
|
; VI-NEXT: buffer_load_dword v19, off, s[0:3], s32 offset:64
|
|
; VI-NEXT: buffer_load_dword v18, off, s[0:3], s32 offset:60
|
|
; VI-NEXT: buffer_load_dword v17, off, s[0:3], s32 offset:56
|
|
; VI-NEXT: buffer_load_dword v16, off, s[0:3], s32 offset:52
|
|
; VI-NEXT: buffer_load_dword v12, off, s[0:3], s32 offset:36
|
|
; VI-NEXT: buffer_store_dwordx4 v[8:11], off, s[4:7], 0
|
|
; VI-NEXT: s_waitcnt vmcnt(0)
|
|
; VI-NEXT: buffer_store_dwordx4 v[4:7], off, s[4:7], 0
|
|
; VI-NEXT: s_waitcnt vmcnt(0)
|
|
; VI-NEXT: buffer_store_dwordx4 v[0:3], off, s[4:7], 0
|
|
; VI-NEXT: s_waitcnt vmcnt(0)
|
|
; VI-NEXT: buffer_store_dwordx4 v[36:39], off, s[4:7], 0
|
|
; VI-NEXT: s_waitcnt vmcnt(0)
|
|
; VI-NEXT: buffer_store_dwordx4 v[32:35], off, s[4:7], 0
|
|
; VI-NEXT: s_waitcnt vmcnt(0)
|
|
; VI-NEXT: buffer_store_dwordx4 v[16:19], off, s[4:7], 0
|
|
; VI-NEXT: s_waitcnt vmcnt(0)
|
|
; VI-NEXT: buffer_store_dwordx4 v[12:15], off, s[4:7], 0
|
|
; VI-NEXT: s_waitcnt vmcnt(0)
|
|
; VI-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX9-LABEL: void_func_v32i32_v8i32_v8f32:
|
|
; GFX9: ; %bb.0:
|
|
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX9-NEXT: buffer_load_dword v31, off, s[0:3], s32
|
|
; GFX9-NEXT: s_mov_b32 s7, 0xf000
|
|
; GFX9-NEXT: s_mov_b32 s6, -1
|
|
; GFX9-NEXT: buffer_load_dword v35, off, s[0:3], s32 offset:16
|
|
; GFX9-NEXT: buffer_load_dword v34, off, s[0:3], s32 offset:12
|
|
; GFX9-NEXT: buffer_load_dword v33, off, s[0:3], s32 offset:8
|
|
; GFX9-NEXT: buffer_load_dword v32, off, s[0:3], s32 offset:4
|
|
; GFX9-NEXT: buffer_load_dword v39, off, s[0:3], s32 offset:32
|
|
; GFX9-NEXT: buffer_load_dword v38, off, s[0:3], s32 offset:28
|
|
; GFX9-NEXT: buffer_load_dword v37, off, s[0:3], s32 offset:24
|
|
; GFX9-NEXT: s_waitcnt vmcnt(7)
|
|
; GFX9-NEXT: buffer_store_dwordx4 v[28:31], off, s[4:7], 0
|
|
; GFX9-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX9-NEXT: buffer_store_dwordx4 v[24:27], off, s[4:7], 0
|
|
; GFX9-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX9-NEXT: buffer_store_dwordx4 v[20:23], off, s[4:7], 0
|
|
; GFX9-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX9-NEXT: buffer_store_dwordx4 v[16:19], off, s[4:7], 0
|
|
; GFX9-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX9-NEXT: buffer_store_dwordx4 v[12:15], off, s[4:7], 0
|
|
; GFX9-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX9-NEXT: buffer_load_dword v36, off, s[0:3], s32 offset:20
|
|
; GFX9-NEXT: buffer_load_dword v15, off, s[0:3], s32 offset:48
|
|
; GFX9-NEXT: buffer_load_dword v14, off, s[0:3], s32 offset:44
|
|
; GFX9-NEXT: buffer_load_dword v13, off, s[0:3], s32 offset:40
|
|
; GFX9-NEXT: buffer_load_dword v19, off, s[0:3], s32 offset:64
|
|
; GFX9-NEXT: buffer_load_dword v18, off, s[0:3], s32 offset:60
|
|
; GFX9-NEXT: buffer_load_dword v17, off, s[0:3], s32 offset:56
|
|
; GFX9-NEXT: buffer_load_dword v16, off, s[0:3], s32 offset:52
|
|
; GFX9-NEXT: buffer_load_dword v12, off, s[0:3], s32 offset:36
|
|
; GFX9-NEXT: s_nop 0
|
|
; GFX9-NEXT: buffer_store_dwordx4 v[8:11], off, s[4:7], 0
|
|
; GFX9-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX9-NEXT: buffer_store_dwordx4 v[4:7], off, s[4:7], 0
|
|
; GFX9-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX9-NEXT: buffer_store_dwordx4 v[0:3], off, s[4:7], 0
|
|
; GFX9-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX9-NEXT: buffer_store_dwordx4 v[36:39], off, s[4:7], 0
|
|
; GFX9-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX9-NEXT: buffer_store_dwordx4 v[32:35], off, s[4:7], 0
|
|
; GFX9-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX9-NEXT: buffer_store_dwordx4 v[16:19], off, s[4:7], 0
|
|
; GFX9-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX9-NEXT: buffer_store_dwordx4 v[12:15], off, s[4:7], 0
|
|
; GFX9-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX9-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX11-LABEL: void_func_v32i32_v8i32_v8f32:
|
|
; GFX11: ; %bb.0:
|
|
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX11-NEXT: s_clause 0x10
|
|
; GFX11-NEXT: scratch_load_b32 v35, off, s32 offset:48
|
|
; GFX11-NEXT: scratch_load_b32 v34, off, s32 offset:44
|
|
; GFX11-NEXT: scratch_load_b32 v33, off, s32 offset:40
|
|
; GFX11-NEXT: scratch_load_b32 v39, off, s32 offset:64
|
|
; GFX11-NEXT: scratch_load_b32 v38, off, s32 offset:60
|
|
; GFX11-NEXT: scratch_load_b32 v31, off, s32
|
|
; GFX11-NEXT: scratch_load_b32 v37, off, s32 offset:56
|
|
; GFX11-NEXT: scratch_load_b32 v51, off, s32 offset:16
|
|
; GFX11-NEXT: scratch_load_b32 v50, off, s32 offset:12
|
|
; GFX11-NEXT: scratch_load_b32 v49, off, s32 offset:8
|
|
; GFX11-NEXT: scratch_load_b32 v55, off, s32 offset:32
|
|
; GFX11-NEXT: scratch_load_b32 v54, off, s32 offset:28
|
|
; GFX11-NEXT: scratch_load_b32 v53, off, s32 offset:24
|
|
; GFX11-NEXT: scratch_load_b32 v52, off, s32 offset:20
|
|
; GFX11-NEXT: scratch_load_b32 v48, off, s32 offset:4
|
|
; GFX11-NEXT: scratch_load_b32 v36, off, s32 offset:52
|
|
; GFX11-NEXT: scratch_load_b32 v32, off, s32 offset:36
|
|
; GFX11-NEXT: s_mov_b32 s3, 0x31016000
|
|
; GFX11-NEXT: s_mov_b32 s2, -1
|
|
; GFX11-NEXT: s_waitcnt vmcnt(11)
|
|
; GFX11-NEXT: buffer_store_b128 v[28:31], off, s[0:3], 0 dlc
|
|
; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
|
|
; GFX11-NEXT: buffer_store_b128 v[24:27], off, s[0:3], 0 dlc
|
|
; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
|
|
; GFX11-NEXT: buffer_store_b128 v[20:23], off, s[0:3], 0 dlc
|
|
; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
|
|
; GFX11-NEXT: buffer_store_b128 v[16:19], off, s[0:3], 0 dlc
|
|
; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
|
|
; GFX11-NEXT: buffer_store_b128 v[12:15], off, s[0:3], 0 dlc
|
|
; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
|
|
; GFX11-NEXT: buffer_store_b128 v[8:11], off, s[0:3], 0 dlc
|
|
; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
|
|
; GFX11-NEXT: buffer_store_b128 v[4:7], off, s[0:3], 0 dlc
|
|
; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
|
|
; GFX11-NEXT: buffer_store_b128 v[0:3], off, s[0:3], 0 dlc
|
|
; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
|
|
; GFX11-NEXT: s_waitcnt vmcnt(3)
|
|
; GFX11-NEXT: buffer_store_b128 v[52:55], off, s[0:3], 0 dlc
|
|
; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
|
|
; GFX11-NEXT: s_waitcnt vmcnt(2)
|
|
; GFX11-NEXT: buffer_store_b128 v[48:51], off, s[0:3], 0 dlc
|
|
; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
|
|
; GFX11-NEXT: s_waitcnt vmcnt(1)
|
|
; GFX11-NEXT: buffer_store_b128 v[36:39], off, s[0:3], 0 dlc
|
|
; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
|
|
; GFX11-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX11-NEXT: buffer_store_b128 v[32:35], off, s[0:3], 0 dlc
|
|
; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
|
|
; GFX11-NEXT: s_setpc_b64 s[30:31]
|
|
store volatile <32 x i32> %arg0, ptr addrspace(1) poison
|
|
store volatile <8 x i32> %arg1, ptr addrspace(1) poison
|
|
store volatile <8 x float> %arg2, ptr addrspace(1) poison
|
|
ret void
|
|
}
|
|
|
|
define void @void_func_v32i32_v16i32_v16f32(<32 x i32> %arg0, <16 x i32> %arg1, <16 x float> %arg2) #0 {
|
|
; CI-LABEL: void_func_v32i32_v16i32_v16f32:
|
|
; CI: ; %bb.0:
|
|
; CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; CI-NEXT: buffer_load_dword v31, off, s[0:3], s32
|
|
; CI-NEXT: buffer_load_dword v35, off, s[0:3], s32 offset:64
|
|
; CI-NEXT: buffer_load_dword v34, off, s[0:3], s32 offset:60
|
|
; CI-NEXT: buffer_load_dword v33, off, s[0:3], s32 offset:56
|
|
; CI-NEXT: buffer_load_dword v32, off, s[0:3], s32 offset:52
|
|
; CI-NEXT: s_mov_b32 s7, 0xf000
|
|
; CI-NEXT: s_mov_b32 s6, -1
|
|
; CI-NEXT: buffer_load_dword v39, off, s[0:3], s32 offset:48
|
|
; CI-NEXT: buffer_load_dword v38, off, s[0:3], s32 offset:44
|
|
; CI-NEXT: buffer_load_dword v37, off, s[0:3], s32 offset:40
|
|
; CI-NEXT: s_waitcnt vmcnt(7)
|
|
; CI-NEXT: buffer_store_dwordx4 v[28:31], off, s[4:7], 0
|
|
; CI-NEXT: s_waitcnt vmcnt(0)
|
|
; CI-NEXT: buffer_store_dwordx4 v[24:27], off, s[4:7], 0
|
|
; CI-NEXT: s_waitcnt vmcnt(0)
|
|
; CI-NEXT: buffer_store_dwordx4 v[20:23], off, s[4:7], 0
|
|
; CI-NEXT: s_waitcnt vmcnt(0)
|
|
; CI-NEXT: buffer_store_dwordx4 v[16:19], off, s[4:7], 0
|
|
; CI-NEXT: s_waitcnt vmcnt(0)
|
|
; CI-NEXT: buffer_store_dwordx4 v[12:15], off, s[4:7], 0
|
|
; CI-NEXT: s_waitcnt vmcnt(0)
|
|
; CI-NEXT: buffer_load_dword v36, off, s[0:3], s32 offset:36
|
|
; CI-NEXT: buffer_load_dword v15, off, s[0:3], s32 offset:32
|
|
; CI-NEXT: buffer_load_dword v14, off, s[0:3], s32 offset:28
|
|
; CI-NEXT: buffer_load_dword v13, off, s[0:3], s32 offset:24
|
|
; CI-NEXT: buffer_load_dword v12, off, s[0:3], s32 offset:20
|
|
; CI-NEXT: buffer_load_dword v19, off, s[0:3], s32 offset:16
|
|
; CI-NEXT: buffer_load_dword v18, off, s[0:3], s32 offset:12
|
|
; CI-NEXT: buffer_load_dword v17, off, s[0:3], s32 offset:8
|
|
; CI-NEXT: buffer_store_dwordx4 v[8:11], off, s[4:7], 0
|
|
; CI-NEXT: s_waitcnt vmcnt(0)
|
|
; CI-NEXT: buffer_store_dwordx4 v[4:7], off, s[4:7], 0
|
|
; CI-NEXT: s_waitcnt vmcnt(0)
|
|
; CI-NEXT: buffer_load_dword v16, off, s[0:3], s32 offset:4
|
|
; CI-NEXT: buffer_load_dword v7, off, s[0:3], s32 offset:112
|
|
; CI-NEXT: buffer_load_dword v6, off, s[0:3], s32 offset:108
|
|
; CI-NEXT: buffer_load_dword v5, off, s[0:3], s32 offset:104
|
|
; CI-NEXT: buffer_load_dword v4, off, s[0:3], s32 offset:100
|
|
; CI-NEXT: buffer_load_dword v11, off, s[0:3], s32 offset:128
|
|
; CI-NEXT: buffer_load_dword v10, off, s[0:3], s32 offset:124
|
|
; CI-NEXT: buffer_load_dword v9, off, s[0:3], s32 offset:120
|
|
; CI-NEXT: buffer_store_dwordx4 v[0:3], off, s[4:7], 0
|
|
; CI-NEXT: s_waitcnt vmcnt(0)
|
|
; CI-NEXT: buffer_store_dwordx4 v[32:35], off, s[4:7], 0
|
|
; CI-NEXT: s_waitcnt vmcnt(0)
|
|
; CI-NEXT: buffer_load_dword v8, off, s[0:3], s32 offset:116
|
|
; CI-NEXT: buffer_load_dword v3, off, s[0:3], s32 offset:80
|
|
; CI-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:76
|
|
; CI-NEXT: buffer_load_dword v1, off, s[0:3], s32 offset:72
|
|
; CI-NEXT: buffer_load_dword v23, off, s[0:3], s32 offset:96
|
|
; CI-NEXT: buffer_load_dword v22, off, s[0:3], s32 offset:92
|
|
; CI-NEXT: buffer_load_dword v21, off, s[0:3], s32 offset:88
|
|
; CI-NEXT: buffer_load_dword v20, off, s[0:3], s32 offset:84
|
|
; CI-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:68
|
|
; CI-NEXT: buffer_store_dwordx4 v[36:39], off, s[4:7], 0
|
|
; CI-NEXT: s_waitcnt vmcnt(0)
|
|
; CI-NEXT: buffer_store_dwordx4 v[12:15], off, s[4:7], 0
|
|
; CI-NEXT: s_waitcnt vmcnt(0)
|
|
; CI-NEXT: buffer_store_dwordx4 v[16:19], off, s[4:7], 0
|
|
; CI-NEXT: s_waitcnt vmcnt(0)
|
|
; CI-NEXT: buffer_store_dwordx4 v[8:11], off, s[4:7], 0
|
|
; CI-NEXT: s_waitcnt vmcnt(0)
|
|
; CI-NEXT: buffer_store_dwordx4 v[4:7], off, s[4:7], 0
|
|
; CI-NEXT: s_waitcnt vmcnt(0)
|
|
; CI-NEXT: buffer_store_dwordx4 v[20:23], off, s[4:7], 0
|
|
; CI-NEXT: s_waitcnt vmcnt(0)
|
|
; CI-NEXT: buffer_store_dwordx4 v[0:3], off, s[4:7], 0
|
|
; CI-NEXT: s_waitcnt vmcnt(0)
|
|
; CI-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; VI-LABEL: void_func_v32i32_v16i32_v16f32:
|
|
; VI: ; %bb.0:
|
|
; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; VI-NEXT: buffer_load_dword v31, off, s[0:3], s32
|
|
; VI-NEXT: buffer_load_dword v35, off, s[0:3], s32 offset:64
|
|
; VI-NEXT: buffer_load_dword v34, off, s[0:3], s32 offset:60
|
|
; VI-NEXT: buffer_load_dword v33, off, s[0:3], s32 offset:56
|
|
; VI-NEXT: buffer_load_dword v32, off, s[0:3], s32 offset:52
|
|
; VI-NEXT: s_mov_b32 s7, 0xf000
|
|
; VI-NEXT: s_mov_b32 s6, -1
|
|
; VI-NEXT: buffer_load_dword v39, off, s[0:3], s32 offset:48
|
|
; VI-NEXT: buffer_load_dword v38, off, s[0:3], s32 offset:44
|
|
; VI-NEXT: buffer_load_dword v37, off, s[0:3], s32 offset:40
|
|
; VI-NEXT: s_waitcnt vmcnt(7)
|
|
; VI-NEXT: buffer_store_dwordx4 v[28:31], off, s[4:7], 0
|
|
; VI-NEXT: s_waitcnt vmcnt(0)
|
|
; VI-NEXT: buffer_store_dwordx4 v[24:27], off, s[4:7], 0
|
|
; VI-NEXT: s_waitcnt vmcnt(0)
|
|
; VI-NEXT: buffer_store_dwordx4 v[20:23], off, s[4:7], 0
|
|
; VI-NEXT: s_waitcnt vmcnt(0)
|
|
; VI-NEXT: buffer_store_dwordx4 v[16:19], off, s[4:7], 0
|
|
; VI-NEXT: s_waitcnt vmcnt(0)
|
|
; VI-NEXT: buffer_store_dwordx4 v[12:15], off, s[4:7], 0
|
|
; VI-NEXT: s_waitcnt vmcnt(0)
|
|
; VI-NEXT: buffer_load_dword v36, off, s[0:3], s32 offset:36
|
|
; VI-NEXT: buffer_load_dword v15, off, s[0:3], s32 offset:32
|
|
; VI-NEXT: buffer_load_dword v14, off, s[0:3], s32 offset:28
|
|
; VI-NEXT: buffer_load_dword v13, off, s[0:3], s32 offset:24
|
|
; VI-NEXT: buffer_load_dword v12, off, s[0:3], s32 offset:20
|
|
; VI-NEXT: buffer_load_dword v19, off, s[0:3], s32 offset:16
|
|
; VI-NEXT: buffer_load_dword v18, off, s[0:3], s32 offset:12
|
|
; VI-NEXT: buffer_load_dword v17, off, s[0:3], s32 offset:8
|
|
; VI-NEXT: buffer_store_dwordx4 v[8:11], off, s[4:7], 0
|
|
; VI-NEXT: s_waitcnt vmcnt(0)
|
|
; VI-NEXT: buffer_store_dwordx4 v[4:7], off, s[4:7], 0
|
|
; VI-NEXT: s_waitcnt vmcnt(0)
|
|
; VI-NEXT: buffer_load_dword v16, off, s[0:3], s32 offset:4
|
|
; VI-NEXT: buffer_load_dword v7, off, s[0:3], s32 offset:112
|
|
; VI-NEXT: buffer_load_dword v6, off, s[0:3], s32 offset:108
|
|
; VI-NEXT: buffer_load_dword v5, off, s[0:3], s32 offset:104
|
|
; VI-NEXT: buffer_load_dword v4, off, s[0:3], s32 offset:100
|
|
; VI-NEXT: buffer_load_dword v11, off, s[0:3], s32 offset:128
|
|
; VI-NEXT: buffer_load_dword v10, off, s[0:3], s32 offset:124
|
|
; VI-NEXT: buffer_load_dword v9, off, s[0:3], s32 offset:120
|
|
; VI-NEXT: buffer_store_dwordx4 v[0:3], off, s[4:7], 0
|
|
; VI-NEXT: s_waitcnt vmcnt(0)
|
|
; VI-NEXT: buffer_store_dwordx4 v[32:35], off, s[4:7], 0
|
|
; VI-NEXT: s_waitcnt vmcnt(0)
|
|
; VI-NEXT: buffer_load_dword v8, off, s[0:3], s32 offset:116
|
|
; VI-NEXT: buffer_load_dword v3, off, s[0:3], s32 offset:80
|
|
; VI-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:76
|
|
; VI-NEXT: buffer_load_dword v1, off, s[0:3], s32 offset:72
|
|
; VI-NEXT: buffer_load_dword v23, off, s[0:3], s32 offset:96
|
|
; VI-NEXT: buffer_load_dword v22, off, s[0:3], s32 offset:92
|
|
; VI-NEXT: buffer_load_dword v21, off, s[0:3], s32 offset:88
|
|
; VI-NEXT: buffer_load_dword v20, off, s[0:3], s32 offset:84
|
|
; VI-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:68
|
|
; VI-NEXT: buffer_store_dwordx4 v[36:39], off, s[4:7], 0
|
|
; VI-NEXT: s_waitcnt vmcnt(0)
|
|
; VI-NEXT: buffer_store_dwordx4 v[12:15], off, s[4:7], 0
|
|
; VI-NEXT: s_waitcnt vmcnt(0)
|
|
; VI-NEXT: buffer_store_dwordx4 v[16:19], off, s[4:7], 0
|
|
; VI-NEXT: s_waitcnt vmcnt(0)
|
|
; VI-NEXT: buffer_store_dwordx4 v[8:11], off, s[4:7], 0
|
|
; VI-NEXT: s_waitcnt vmcnt(0)
|
|
; VI-NEXT: buffer_store_dwordx4 v[4:7], off, s[4:7], 0
|
|
; VI-NEXT: s_waitcnt vmcnt(0)
|
|
; VI-NEXT: buffer_store_dwordx4 v[20:23], off, s[4:7], 0
|
|
; VI-NEXT: s_waitcnt vmcnt(0)
|
|
; VI-NEXT: buffer_store_dwordx4 v[0:3], off, s[4:7], 0
|
|
; VI-NEXT: s_waitcnt vmcnt(0)
|
|
; VI-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX9-LABEL: void_func_v32i32_v16i32_v16f32:
|
|
; GFX9: ; %bb.0:
|
|
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX9-NEXT: buffer_load_dword v31, off, s[0:3], s32
|
|
; GFX9-NEXT: buffer_load_dword v35, off, s[0:3], s32 offset:64
|
|
; GFX9-NEXT: buffer_load_dword v34, off, s[0:3], s32 offset:60
|
|
; GFX9-NEXT: buffer_load_dword v33, off, s[0:3], s32 offset:56
|
|
; GFX9-NEXT: buffer_load_dword v32, off, s[0:3], s32 offset:52
|
|
; GFX9-NEXT: s_mov_b32 s7, 0xf000
|
|
; GFX9-NEXT: s_mov_b32 s6, -1
|
|
; GFX9-NEXT: buffer_load_dword v39, off, s[0:3], s32 offset:48
|
|
; GFX9-NEXT: buffer_load_dword v38, off, s[0:3], s32 offset:44
|
|
; GFX9-NEXT: buffer_load_dword v37, off, s[0:3], s32 offset:40
|
|
; GFX9-NEXT: s_waitcnt vmcnt(7)
|
|
; GFX9-NEXT: buffer_store_dwordx4 v[28:31], off, s[4:7], 0
|
|
; GFX9-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX9-NEXT: buffer_store_dwordx4 v[24:27], off, s[4:7], 0
|
|
; GFX9-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX9-NEXT: buffer_store_dwordx4 v[20:23], off, s[4:7], 0
|
|
; GFX9-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX9-NEXT: buffer_store_dwordx4 v[16:19], off, s[4:7], 0
|
|
; GFX9-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX9-NEXT: buffer_store_dwordx4 v[12:15], off, s[4:7], 0
|
|
; GFX9-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX9-NEXT: buffer_load_dword v36, off, s[0:3], s32 offset:36
|
|
; GFX9-NEXT: buffer_load_dword v15, off, s[0:3], s32 offset:32
|
|
; GFX9-NEXT: buffer_load_dword v14, off, s[0:3], s32 offset:28
|
|
; GFX9-NEXT: buffer_load_dword v13, off, s[0:3], s32 offset:24
|
|
; GFX9-NEXT: buffer_load_dword v12, off, s[0:3], s32 offset:20
|
|
; GFX9-NEXT: buffer_load_dword v19, off, s[0:3], s32 offset:16
|
|
; GFX9-NEXT: buffer_load_dword v18, off, s[0:3], s32 offset:12
|
|
; GFX9-NEXT: buffer_load_dword v17, off, s[0:3], s32 offset:8
|
|
; GFX9-NEXT: s_nop 0
|
|
; GFX9-NEXT: buffer_store_dwordx4 v[8:11], off, s[4:7], 0
|
|
; GFX9-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX9-NEXT: buffer_store_dwordx4 v[4:7], off, s[4:7], 0
|
|
; GFX9-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX9-NEXT: buffer_load_dword v16, off, s[0:3], s32 offset:4
|
|
; GFX9-NEXT: buffer_load_dword v7, off, s[0:3], s32 offset:112
|
|
; GFX9-NEXT: buffer_load_dword v6, off, s[0:3], s32 offset:108
|
|
; GFX9-NEXT: buffer_load_dword v5, off, s[0:3], s32 offset:104
|
|
; GFX9-NEXT: buffer_load_dword v4, off, s[0:3], s32 offset:100
|
|
; GFX9-NEXT: buffer_load_dword v11, off, s[0:3], s32 offset:128
|
|
; GFX9-NEXT: buffer_load_dword v10, off, s[0:3], s32 offset:124
|
|
; GFX9-NEXT: buffer_load_dword v9, off, s[0:3], s32 offset:120
|
|
; GFX9-NEXT: s_nop 0
|
|
; GFX9-NEXT: buffer_store_dwordx4 v[0:3], off, s[4:7], 0
|
|
; GFX9-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX9-NEXT: buffer_store_dwordx4 v[32:35], off, s[4:7], 0
|
|
; GFX9-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX9-NEXT: buffer_load_dword v8, off, s[0:3], s32 offset:116
|
|
; GFX9-NEXT: buffer_load_dword v3, off, s[0:3], s32 offset:80
|
|
; GFX9-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:76
|
|
; GFX9-NEXT: buffer_load_dword v1, off, s[0:3], s32 offset:72
|
|
; GFX9-NEXT: buffer_load_dword v23, off, s[0:3], s32 offset:96
|
|
; GFX9-NEXT: buffer_load_dword v22, off, s[0:3], s32 offset:92
|
|
; GFX9-NEXT: buffer_load_dword v21, off, s[0:3], s32 offset:88
|
|
; GFX9-NEXT: buffer_load_dword v20, off, s[0:3], s32 offset:84
|
|
; GFX9-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:68
|
|
; GFX9-NEXT: s_nop 0
|
|
; GFX9-NEXT: buffer_store_dwordx4 v[36:39], off, s[4:7], 0
|
|
; GFX9-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX9-NEXT: buffer_store_dwordx4 v[12:15], off, s[4:7], 0
|
|
; GFX9-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX9-NEXT: buffer_store_dwordx4 v[16:19], off, s[4:7], 0
|
|
; GFX9-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX9-NEXT: buffer_store_dwordx4 v[8:11], off, s[4:7], 0
|
|
; GFX9-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX9-NEXT: buffer_store_dwordx4 v[4:7], off, s[4:7], 0
|
|
; GFX9-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX9-NEXT: buffer_store_dwordx4 v[20:23], off, s[4:7], 0
|
|
; GFX9-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX9-NEXT: buffer_store_dwordx4 v[0:3], off, s[4:7], 0
|
|
; GFX9-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX9-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX11-LABEL: void_func_v32i32_v16i32_v16f32:
|
|
; GFX11: ; %bb.0:
|
|
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX11-NEXT: s_clause 0x1f
|
|
; GFX11-NEXT: scratch_load_b32 v35, off, s32 offset:80
|
|
; GFX11-NEXT: scratch_load_b32 v34, off, s32 offset:76
|
|
; GFX11-NEXT: scratch_load_b32 v33, off, s32 offset:72
|
|
; GFX11-NEXT: scratch_load_b32 v39, off, s32 offset:96
|
|
; GFX11-NEXT: scratch_load_b32 v38, off, s32 offset:92
|
|
; GFX11-NEXT: scratch_load_b32 v37, off, s32 offset:88
|
|
; GFX11-NEXT: scratch_load_b32 v51, off, s32 offset:112
|
|
; GFX11-NEXT: scratch_load_b32 v50, off, s32 offset:108
|
|
; GFX11-NEXT: scratch_load_b32 v49, off, s32 offset:104
|
|
; GFX11-NEXT: scratch_load_b32 v55, off, s32 offset:128
|
|
; GFX11-NEXT: scratch_load_b32 v54, off, s32 offset:124
|
|
; GFX11-NEXT: scratch_load_b32 v53, off, s32 offset:120
|
|
; GFX11-NEXT: scratch_load_b32 v67, off, s32 offset:16
|
|
; GFX11-NEXT: scratch_load_b32 v66, off, s32 offset:12
|
|
; GFX11-NEXT: scratch_load_b32 v65, off, s32 offset:8
|
|
; GFX11-NEXT: scratch_load_b32 v71, off, s32 offset:32
|
|
; GFX11-NEXT: scratch_load_b32 v70, off, s32 offset:28
|
|
; GFX11-NEXT: scratch_load_b32 v31, off, s32
|
|
; GFX11-NEXT: scratch_load_b32 v69, off, s32 offset:24
|
|
; GFX11-NEXT: scratch_load_b32 v83, off, s32 offset:48
|
|
; GFX11-NEXT: scratch_load_b32 v82, off, s32 offset:44
|
|
; GFX11-NEXT: scratch_load_b32 v81, off, s32 offset:40
|
|
; GFX11-NEXT: scratch_load_b32 v87, off, s32 offset:64
|
|
; GFX11-NEXT: scratch_load_b32 v86, off, s32 offset:60
|
|
; GFX11-NEXT: scratch_load_b32 v85, off, s32 offset:56
|
|
; GFX11-NEXT: scratch_load_b32 v84, off, s32 offset:52
|
|
; GFX11-NEXT: scratch_load_b32 v80, off, s32 offset:36
|
|
; GFX11-NEXT: scratch_load_b32 v68, off, s32 offset:20
|
|
; GFX11-NEXT: scratch_load_b32 v64, off, s32 offset:4
|
|
; GFX11-NEXT: scratch_load_b32 v52, off, s32 offset:116
|
|
; GFX11-NEXT: scratch_load_b32 v48, off, s32 offset:100
|
|
; GFX11-NEXT: scratch_load_b32 v36, off, s32 offset:84
|
|
; GFX11-NEXT: scratch_load_b32 v32, off, s32 offset:68
|
|
; GFX11-NEXT: s_mov_b32 s3, 0x31016000
|
|
; GFX11-NEXT: s_mov_b32 s2, -1
|
|
; GFX11-NEXT: s_waitcnt vmcnt(15)
|
|
; GFX11-NEXT: buffer_store_b128 v[28:31], off, s[0:3], 0 dlc
|
|
; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
|
|
; GFX11-NEXT: buffer_store_b128 v[24:27], off, s[0:3], 0 dlc
|
|
; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
|
|
; GFX11-NEXT: buffer_store_b128 v[20:23], off, s[0:3], 0 dlc
|
|
; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
|
|
; GFX11-NEXT: buffer_store_b128 v[16:19], off, s[0:3], 0 dlc
|
|
; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
|
|
; GFX11-NEXT: buffer_store_b128 v[12:15], off, s[0:3], 0 dlc
|
|
; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
|
|
; GFX11-NEXT: buffer_store_b128 v[8:11], off, s[0:3], 0 dlc
|
|
; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
|
|
; GFX11-NEXT: buffer_store_b128 v[4:7], off, s[0:3], 0 dlc
|
|
; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
|
|
; GFX11-NEXT: buffer_store_b128 v[0:3], off, s[0:3], 0 dlc
|
|
; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
|
|
; GFX11-NEXT: s_waitcnt vmcnt(7)
|
|
; GFX11-NEXT: buffer_store_b128 v[84:87], off, s[0:3], 0 dlc
|
|
; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
|
|
; GFX11-NEXT: s_waitcnt vmcnt(6)
|
|
; GFX11-NEXT: buffer_store_b128 v[80:83], off, s[0:3], 0 dlc
|
|
; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
|
|
; GFX11-NEXT: s_waitcnt vmcnt(5)
|
|
; GFX11-NEXT: buffer_store_b128 v[68:71], off, s[0:3], 0 dlc
|
|
; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
|
|
; GFX11-NEXT: s_waitcnt vmcnt(4)
|
|
; GFX11-NEXT: buffer_store_b128 v[64:67], off, s[0:3], 0 dlc
|
|
; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
|
|
; GFX11-NEXT: s_waitcnt vmcnt(3)
|
|
; GFX11-NEXT: buffer_store_b128 v[52:55], off, s[0:3], 0 dlc
|
|
; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
|
|
; GFX11-NEXT: s_waitcnt vmcnt(2)
|
|
; GFX11-NEXT: buffer_store_b128 v[48:51], off, s[0:3], 0 dlc
|
|
; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
|
|
; GFX11-NEXT: s_waitcnt vmcnt(1)
|
|
; GFX11-NEXT: buffer_store_b128 v[36:39], off, s[0:3], 0 dlc
|
|
; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
|
|
; GFX11-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX11-NEXT: buffer_store_b128 v[32:35], off, s[0:3], 0 dlc
|
|
; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
|
|
; GFX11-NEXT: s_setpc_b64 s[30:31]
|
|
store volatile <32 x i32> %arg0, ptr addrspace(1) poison
|
|
store volatile <16 x i32> %arg1, ptr addrspace(1) poison
|
|
store volatile <16 x float> %arg2, ptr addrspace(1) poison
|
|
ret void
|
|
}
|
|
|
|
; Make sure v3 isn't a wasted register because of v3 types being promoted to v4
|
|
define void @void_func_v3f32_wasted_reg(<3 x float> %arg0, i32 %arg1) #0 {
|
|
; CI-LABEL: void_func_v3f32_wasted_reg:
|
|
; CI: ; %bb.0:
|
|
; CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; CI-NEXT: s_mov_b32 m0, -1
|
|
; CI-NEXT: ds_write_b32 v0, v0
|
|
; CI-NEXT: ds_write_b32 v0, v1
|
|
; CI-NEXT: ds_write_b32 v0, v2
|
|
; CI-NEXT: ds_write_b32 v0, v3
|
|
; CI-NEXT: s_waitcnt lgkmcnt(0)
|
|
; CI-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; VI-LABEL: void_func_v3f32_wasted_reg:
|
|
; VI: ; %bb.0:
|
|
; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; VI-NEXT: s_mov_b32 m0, -1
|
|
; VI-NEXT: ds_write_b32 v0, v0
|
|
; VI-NEXT: ds_write_b32 v0, v1
|
|
; VI-NEXT: ds_write_b32 v0, v2
|
|
; VI-NEXT: ds_write_b32 v0, v3
|
|
; VI-NEXT: s_waitcnt lgkmcnt(0)
|
|
; VI-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX9-LABEL: void_func_v3f32_wasted_reg:
|
|
; GFX9: ; %bb.0:
|
|
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX9-NEXT: ds_write_b32 v0, v0
|
|
; GFX9-NEXT: ds_write_b32 v0, v1
|
|
; GFX9-NEXT: ds_write_b32 v0, v2
|
|
; GFX9-NEXT: ds_write_b32 v0, v3
|
|
; GFX9-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX9-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX11-LABEL: void_func_v3f32_wasted_reg:
|
|
; GFX11: ; %bb.0:
|
|
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX11-NEXT: ds_store_b32 v0, v0
|
|
; GFX11-NEXT: ds_store_b32 v0, v1
|
|
; GFX11-NEXT: ds_store_b32 v0, v2
|
|
; GFX11-NEXT: ds_store_b32 v0, v3
|
|
; GFX11-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX11-NEXT: s_setpc_b64 s[30:31]
|
|
%arg0.0 = extractelement <3 x float> %arg0, i32 0
|
|
%arg0.1 = extractelement <3 x float> %arg0, i32 1
|
|
%arg0.2 = extractelement <3 x float> %arg0, i32 2
|
|
store volatile float %arg0.0, ptr addrspace(3) poison
|
|
store volatile float %arg0.1, ptr addrspace(3) poison
|
|
store volatile float %arg0.2, ptr addrspace(3) poison
|
|
store volatile i32 %arg1, ptr addrspace(3) poison
|
|
ret void
|
|
}
|
|
|
|
define void @void_func_v3i32_wasted_reg(<3 x i32> %arg0, i32 %arg1) #0 {
|
|
; CI-LABEL: void_func_v3i32_wasted_reg:
|
|
; CI: ; %bb.0:
|
|
; CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; CI-NEXT: s_mov_b32 m0, -1
|
|
; CI-NEXT: ds_write_b32 v0, v0
|
|
; CI-NEXT: ds_write_b32 v0, v1
|
|
; CI-NEXT: ds_write_b32 v0, v2
|
|
; CI-NEXT: ds_write_b32 v0, v3
|
|
; CI-NEXT: s_waitcnt lgkmcnt(0)
|
|
; CI-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; VI-LABEL: void_func_v3i32_wasted_reg:
|
|
; VI: ; %bb.0:
|
|
; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; VI-NEXT: s_mov_b32 m0, -1
|
|
; VI-NEXT: ds_write_b32 v0, v0
|
|
; VI-NEXT: ds_write_b32 v0, v1
|
|
; VI-NEXT: ds_write_b32 v0, v2
|
|
; VI-NEXT: ds_write_b32 v0, v3
|
|
; VI-NEXT: s_waitcnt lgkmcnt(0)
|
|
; VI-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX9-LABEL: void_func_v3i32_wasted_reg:
|
|
; GFX9: ; %bb.0:
|
|
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX9-NEXT: ds_write_b32 v0, v0
|
|
; GFX9-NEXT: ds_write_b32 v0, v1
|
|
; GFX9-NEXT: ds_write_b32 v0, v2
|
|
; GFX9-NEXT: ds_write_b32 v0, v3
|
|
; GFX9-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX9-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX11-LABEL: void_func_v3i32_wasted_reg:
|
|
; GFX11: ; %bb.0:
|
|
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX11-NEXT: ds_store_b32 v0, v0
|
|
; GFX11-NEXT: ds_store_b32 v0, v1
|
|
; GFX11-NEXT: ds_store_b32 v0, v2
|
|
; GFX11-NEXT: ds_store_b32 v0, v3
|
|
; GFX11-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX11-NEXT: s_setpc_b64 s[30:31]
|
|
%arg0.0 = extractelement <3 x i32> %arg0, i32 0
|
|
%arg0.1 = extractelement <3 x i32> %arg0, i32 1
|
|
%arg0.2 = extractelement <3 x i32> %arg0, i32 2
|
|
store volatile i32 %arg0.0, ptr addrspace(3) poison
|
|
store volatile i32 %arg0.1, ptr addrspace(3) poison
|
|
store volatile i32 %arg0.2, ptr addrspace(3) poison
|
|
store volatile i32 %arg1, ptr addrspace(3) poison
|
|
ret void
|
|
}
|
|
|
|
; Check there is no crash.
|
|
define void @void_func_volatile_v16i8(<16 x i8> %arg0) #0 {
|
|
; CIGFX89-LABEL: void_func_volatile_v16i8:
|
|
; CIGFX89: ; %bb.0:
|
|
; CIGFX89-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; CIGFX89-NEXT: s_mov_b32 s7, 0xf000
|
|
; CIGFX89-NEXT: s_mov_b32 s6, -1
|
|
; CIGFX89-NEXT: buffer_store_byte v15, off, s[4:7], 0
|
|
; CIGFX89-NEXT: s_waitcnt vmcnt(0)
|
|
; CIGFX89-NEXT: buffer_store_byte v14, off, s[4:7], 0
|
|
; CIGFX89-NEXT: s_waitcnt vmcnt(0)
|
|
; CIGFX89-NEXT: buffer_store_byte v13, off, s[4:7], 0
|
|
; CIGFX89-NEXT: s_waitcnt vmcnt(0)
|
|
; CIGFX89-NEXT: buffer_store_byte v12, off, s[4:7], 0
|
|
; CIGFX89-NEXT: s_waitcnt vmcnt(0)
|
|
; CIGFX89-NEXT: buffer_store_byte v11, off, s[4:7], 0
|
|
; CIGFX89-NEXT: s_waitcnt vmcnt(0)
|
|
; CIGFX89-NEXT: buffer_store_byte v10, off, s[4:7], 0
|
|
; CIGFX89-NEXT: s_waitcnt vmcnt(0)
|
|
; CIGFX89-NEXT: buffer_store_byte v9, off, s[4:7], 0
|
|
; CIGFX89-NEXT: s_waitcnt vmcnt(0)
|
|
; CIGFX89-NEXT: buffer_store_byte v8, off, s[4:7], 0
|
|
; CIGFX89-NEXT: s_waitcnt vmcnt(0)
|
|
; CIGFX89-NEXT: buffer_store_byte v7, off, s[4:7], 0
|
|
; CIGFX89-NEXT: s_waitcnt vmcnt(0)
|
|
; CIGFX89-NEXT: buffer_store_byte v6, off, s[4:7], 0
|
|
; CIGFX89-NEXT: s_waitcnt vmcnt(0)
|
|
; CIGFX89-NEXT: buffer_store_byte v5, off, s[4:7], 0
|
|
; CIGFX89-NEXT: s_waitcnt vmcnt(0)
|
|
; CIGFX89-NEXT: buffer_store_byte v4, off, s[4:7], 0
|
|
; CIGFX89-NEXT: s_waitcnt vmcnt(0)
|
|
; CIGFX89-NEXT: buffer_store_byte v3, off, s[4:7], 0
|
|
; CIGFX89-NEXT: s_waitcnt vmcnt(0)
|
|
; CIGFX89-NEXT: buffer_store_byte v2, off, s[4:7], 0
|
|
; CIGFX89-NEXT: s_waitcnt vmcnt(0)
|
|
; CIGFX89-NEXT: buffer_store_byte v1, off, s[4:7], 0
|
|
; CIGFX89-NEXT: s_waitcnt vmcnt(0)
|
|
; CIGFX89-NEXT: buffer_store_byte v0, off, s[4:7], 0
|
|
; CIGFX89-NEXT: s_waitcnt vmcnt(0)
|
|
; CIGFX89-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX11-LABEL: void_func_volatile_v16i8:
|
|
; GFX11: ; %bb.0:
|
|
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX11-NEXT: s_mov_b32 s3, 0x31016000
|
|
; GFX11-NEXT: s_mov_b32 s2, -1
|
|
; GFX11-NEXT: buffer_store_b8 v15, off, s[0:3], 0 dlc
|
|
; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
|
|
; GFX11-NEXT: buffer_store_b8 v14, off, s[0:3], 0 dlc
|
|
; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
|
|
; GFX11-NEXT: buffer_store_b8 v13, off, s[0:3], 0 dlc
|
|
; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
|
|
; GFX11-NEXT: buffer_store_b8 v12, off, s[0:3], 0 dlc
|
|
; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
|
|
; GFX11-NEXT: buffer_store_b8 v11, off, s[0:3], 0 dlc
|
|
; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
|
|
; GFX11-NEXT: buffer_store_b8 v10, off, s[0:3], 0 dlc
|
|
; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
|
|
; GFX11-NEXT: buffer_store_b8 v9, off, s[0:3], 0 dlc
|
|
; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
|
|
; GFX11-NEXT: buffer_store_b8 v8, off, s[0:3], 0 dlc
|
|
; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
|
|
; GFX11-NEXT: buffer_store_b8 v7, off, s[0:3], 0 dlc
|
|
; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
|
|
; GFX11-NEXT: buffer_store_b8 v6, off, s[0:3], 0 dlc
|
|
; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
|
|
; GFX11-NEXT: buffer_store_b8 v5, off, s[0:3], 0 dlc
|
|
; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
|
|
; GFX11-NEXT: buffer_store_b8 v4, off, s[0:3], 0 dlc
|
|
; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
|
|
; GFX11-NEXT: buffer_store_b8 v3, off, s[0:3], 0 dlc
|
|
; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
|
|
; GFX11-NEXT: buffer_store_b8 v2, off, s[0:3], 0 dlc
|
|
; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
|
|
; GFX11-NEXT: buffer_store_b8 v1, off, s[0:3], 0 dlc
|
|
; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
|
|
; GFX11-NEXT: buffer_store_b8 v0, off, s[0:3], 0 dlc
|
|
; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
|
|
; GFX11-NEXT: s_setpc_b64 s[30:31]
|
|
store volatile <16 x i8> %arg0, ptr addrspace(1) poison
|
|
ret void
|
|
}
|
|
|
|
; Check there is no crash.
|
|
define void @void_func_v32i32_v16i8(<32 x i32> %arg0, <16 x i8> %arg1) #0 {
|
|
; CI-LABEL: void_func_v32i32_v16i8:
|
|
; CI: ; %bb.0:
|
|
; CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; CI-NEXT: buffer_load_dword v31, off, s[0:3], s32
|
|
; CI-NEXT: s_mov_b32 s7, 0xf000
|
|
; CI-NEXT: s_mov_b32 s6, -1
|
|
; CI-NEXT: buffer_load_dword v32, off, s[0:3], s32 offset:48
|
|
; CI-NEXT: buffer_load_dword v33, off, s[0:3], s32 offset:60
|
|
; CI-NEXT: buffer_load_dword v34, off, s[0:3], s32 offset:64
|
|
; CI-NEXT: buffer_load_dword v35, off, s[0:3], s32 offset:52
|
|
; CI-NEXT: buffer_load_dword v36, off, s[0:3], s32 offset:56
|
|
; CI-NEXT: buffer_load_dword v37, off, s[0:3], s32 offset:36
|
|
; CI-NEXT: buffer_load_dword v38, off, s[0:3], s32 offset:40
|
|
; CI-NEXT: s_waitcnt vmcnt(7)
|
|
; CI-NEXT: buffer_store_dwordx4 v[28:31], off, s[4:7], 0
|
|
; CI-NEXT: s_waitcnt vmcnt(0)
|
|
; CI-NEXT: buffer_store_dwordx4 v[24:27], off, s[4:7], 0
|
|
; CI-NEXT: s_waitcnt vmcnt(0)
|
|
; CI-NEXT: buffer_store_dwordx4 v[20:23], off, s[4:7], 0
|
|
; CI-NEXT: s_waitcnt vmcnt(0)
|
|
; CI-NEXT: buffer_store_dwordx4 v[16:19], off, s[4:7], 0
|
|
; CI-NEXT: s_waitcnt vmcnt(0)
|
|
; CI-NEXT: buffer_store_dwordx4 v[12:15], off, s[4:7], 0
|
|
; CI-NEXT: s_waitcnt vmcnt(0)
|
|
; CI-NEXT: buffer_load_dword v12, off, s[0:3], s32 offset:32
|
|
; CI-NEXT: buffer_load_dword v13, off, s[0:3], s32 offset:28
|
|
; CI-NEXT: buffer_load_dword v14, off, s[0:3], s32 offset:24
|
|
; CI-NEXT: buffer_load_dword v15, off, s[0:3], s32 offset:20
|
|
; CI-NEXT: buffer_load_dword v16, off, s[0:3], s32 offset:16
|
|
; CI-NEXT: buffer_load_dword v17, off, s[0:3], s32 offset:12
|
|
; CI-NEXT: buffer_load_dword v18, off, s[0:3], s32 offset:8
|
|
; CI-NEXT: buffer_load_dword v19, off, s[0:3], s32 offset:4
|
|
; CI-NEXT: buffer_load_dword v20, off, s[0:3], s32 offset:44
|
|
; CI-NEXT: buffer_store_dwordx4 v[8:11], off, s[4:7], 0
|
|
; CI-NEXT: s_waitcnt vmcnt(0)
|
|
; CI-NEXT: buffer_store_dwordx4 v[4:7], off, s[4:7], 0
|
|
; CI-NEXT: s_waitcnt vmcnt(0)
|
|
; CI-NEXT: buffer_store_dwordx4 v[0:3], off, s[4:7], 0
|
|
; CI-NEXT: s_waitcnt vmcnt(0)
|
|
; CI-NEXT: buffer_store_byte v34, off, s[4:7], 0
|
|
; CI-NEXT: s_waitcnt vmcnt(0)
|
|
; CI-NEXT: buffer_store_byte v33, off, s[4:7], 0
|
|
; CI-NEXT: s_waitcnt vmcnt(0)
|
|
; CI-NEXT: buffer_store_byte v36, off, s[4:7], 0
|
|
; CI-NEXT: s_waitcnt vmcnt(0)
|
|
; CI-NEXT: buffer_store_byte v35, off, s[4:7], 0
|
|
; CI-NEXT: s_waitcnt vmcnt(0)
|
|
; CI-NEXT: buffer_store_byte v32, off, s[4:7], 0
|
|
; CI-NEXT: s_waitcnt vmcnt(0)
|
|
; CI-NEXT: buffer_store_byte v20, off, s[4:7], 0
|
|
; CI-NEXT: s_waitcnt vmcnt(0)
|
|
; CI-NEXT: buffer_store_byte v38, off, s[4:7], 0
|
|
; CI-NEXT: s_waitcnt vmcnt(0)
|
|
; CI-NEXT: buffer_store_byte v37, off, s[4:7], 0
|
|
; CI-NEXT: s_waitcnt vmcnt(0)
|
|
; CI-NEXT: buffer_store_byte v12, off, s[4:7], 0
|
|
; CI-NEXT: s_waitcnt vmcnt(0)
|
|
; CI-NEXT: buffer_store_byte v13, off, s[4:7], 0
|
|
; CI-NEXT: s_waitcnt vmcnt(0)
|
|
; CI-NEXT: buffer_store_byte v14, off, s[4:7], 0
|
|
; CI-NEXT: s_waitcnt vmcnt(0)
|
|
; CI-NEXT: buffer_store_byte v15, off, s[4:7], 0
|
|
; CI-NEXT: s_waitcnt vmcnt(0)
|
|
; CI-NEXT: buffer_store_byte v16, off, s[4:7], 0
|
|
; CI-NEXT: s_waitcnt vmcnt(0)
|
|
; CI-NEXT: buffer_store_byte v17, off, s[4:7], 0
|
|
; CI-NEXT: s_waitcnt vmcnt(0)
|
|
; CI-NEXT: buffer_store_byte v18, off, s[4:7], 0
|
|
; CI-NEXT: s_waitcnt vmcnt(0)
|
|
; CI-NEXT: buffer_store_byte v19, off, s[4:7], 0
|
|
; CI-NEXT: s_waitcnt vmcnt(0)
|
|
; CI-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; VI-LABEL: void_func_v32i32_v16i8:
|
|
; VI: ; %bb.0:
|
|
; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; VI-NEXT: buffer_load_dword v31, off, s[0:3], s32
|
|
; VI-NEXT: s_mov_b32 s7, 0xf000
|
|
; VI-NEXT: s_mov_b32 s6, -1
|
|
; VI-NEXT: buffer_load_ubyte v32, off, s[0:3], s32 offset:48
|
|
; VI-NEXT: buffer_load_ubyte v33, off, s[0:3], s32 offset:60
|
|
; VI-NEXT: buffer_load_ubyte v34, off, s[0:3], s32 offset:64
|
|
; VI-NEXT: buffer_load_ubyte v35, off, s[0:3], s32 offset:52
|
|
; VI-NEXT: buffer_load_ubyte v36, off, s[0:3], s32 offset:56
|
|
; VI-NEXT: buffer_load_ubyte v37, off, s[0:3], s32 offset:36
|
|
; VI-NEXT: buffer_load_ubyte v38, off, s[0:3], s32 offset:40
|
|
; VI-NEXT: s_waitcnt vmcnt(7)
|
|
; VI-NEXT: buffer_store_dwordx4 v[28:31], off, s[4:7], 0
|
|
; VI-NEXT: s_waitcnt vmcnt(0)
|
|
; VI-NEXT: buffer_store_dwordx4 v[24:27], off, s[4:7], 0
|
|
; VI-NEXT: s_waitcnt vmcnt(0)
|
|
; VI-NEXT: buffer_store_dwordx4 v[20:23], off, s[4:7], 0
|
|
; VI-NEXT: s_waitcnt vmcnt(0)
|
|
; VI-NEXT: buffer_store_dwordx4 v[16:19], off, s[4:7], 0
|
|
; VI-NEXT: s_waitcnt vmcnt(0)
|
|
; VI-NEXT: buffer_store_dwordx4 v[12:15], off, s[4:7], 0
|
|
; VI-NEXT: s_waitcnt vmcnt(0)
|
|
; VI-NEXT: buffer_load_ubyte v12, off, s[0:3], s32 offset:32
|
|
; VI-NEXT: buffer_load_ubyte v13, off, s[0:3], s32 offset:28
|
|
; VI-NEXT: buffer_load_ubyte v14, off, s[0:3], s32 offset:24
|
|
; VI-NEXT: buffer_load_ubyte v15, off, s[0:3], s32 offset:20
|
|
; VI-NEXT: buffer_load_ubyte v16, off, s[0:3], s32 offset:16
|
|
; VI-NEXT: buffer_load_ubyte v17, off, s[0:3], s32 offset:12
|
|
; VI-NEXT: buffer_load_ubyte v18, off, s[0:3], s32 offset:8
|
|
; VI-NEXT: buffer_load_ubyte v19, off, s[0:3], s32 offset:4
|
|
; VI-NEXT: buffer_load_ubyte v20, off, s[0:3], s32 offset:44
|
|
; VI-NEXT: buffer_store_dwordx4 v[8:11], off, s[4:7], 0
|
|
; VI-NEXT: s_waitcnt vmcnt(0)
|
|
; VI-NEXT: buffer_store_dwordx4 v[4:7], off, s[4:7], 0
|
|
; VI-NEXT: s_waitcnt vmcnt(0)
|
|
; VI-NEXT: buffer_store_dwordx4 v[0:3], off, s[4:7], 0
|
|
; VI-NEXT: s_waitcnt vmcnt(0)
|
|
; VI-NEXT: buffer_store_byte v34, off, s[4:7], 0
|
|
; VI-NEXT: s_waitcnt vmcnt(0)
|
|
; VI-NEXT: buffer_store_byte v33, off, s[4:7], 0
|
|
; VI-NEXT: s_waitcnt vmcnt(0)
|
|
; VI-NEXT: buffer_store_byte v36, off, s[4:7], 0
|
|
; VI-NEXT: s_waitcnt vmcnt(0)
|
|
; VI-NEXT: buffer_store_byte v35, off, s[4:7], 0
|
|
; VI-NEXT: s_waitcnt vmcnt(0)
|
|
; VI-NEXT: buffer_store_byte v32, off, s[4:7], 0
|
|
; VI-NEXT: s_waitcnt vmcnt(0)
|
|
; VI-NEXT: buffer_store_byte v20, off, s[4:7], 0
|
|
; VI-NEXT: s_waitcnt vmcnt(0)
|
|
; VI-NEXT: buffer_store_byte v38, off, s[4:7], 0
|
|
; VI-NEXT: s_waitcnt vmcnt(0)
|
|
; VI-NEXT: buffer_store_byte v37, off, s[4:7], 0
|
|
; VI-NEXT: s_waitcnt vmcnt(0)
|
|
; VI-NEXT: buffer_store_byte v12, off, s[4:7], 0
|
|
; VI-NEXT: s_waitcnt vmcnt(0)
|
|
; VI-NEXT: buffer_store_byte v13, off, s[4:7], 0
|
|
; VI-NEXT: s_waitcnt vmcnt(0)
|
|
; VI-NEXT: buffer_store_byte v14, off, s[4:7], 0
|
|
; VI-NEXT: s_waitcnt vmcnt(0)
|
|
; VI-NEXT: buffer_store_byte v15, off, s[4:7], 0
|
|
; VI-NEXT: s_waitcnt vmcnt(0)
|
|
; VI-NEXT: buffer_store_byte v16, off, s[4:7], 0
|
|
; VI-NEXT: s_waitcnt vmcnt(0)
|
|
; VI-NEXT: buffer_store_byte v17, off, s[4:7], 0
|
|
; VI-NEXT: s_waitcnt vmcnt(0)
|
|
; VI-NEXT: buffer_store_byte v18, off, s[4:7], 0
|
|
; VI-NEXT: s_waitcnt vmcnt(0)
|
|
; VI-NEXT: buffer_store_byte v19, off, s[4:7], 0
|
|
; VI-NEXT: s_waitcnt vmcnt(0)
|
|
; VI-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX9-LABEL: void_func_v32i32_v16i8:
|
|
; GFX9: ; %bb.0:
|
|
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX9-NEXT: buffer_load_dword v31, off, s[0:3], s32
|
|
; GFX9-NEXT: s_mov_b32 s7, 0xf000
|
|
; GFX9-NEXT: s_mov_b32 s6, -1
|
|
; GFX9-NEXT: buffer_load_ubyte v32, off, s[0:3], s32 offset:48
|
|
; GFX9-NEXT: buffer_load_ubyte v33, off, s[0:3], s32 offset:60
|
|
; GFX9-NEXT: buffer_load_ubyte v34, off, s[0:3], s32 offset:64
|
|
; GFX9-NEXT: buffer_load_ubyte v35, off, s[0:3], s32 offset:52
|
|
; GFX9-NEXT: buffer_load_ubyte v36, off, s[0:3], s32 offset:56
|
|
; GFX9-NEXT: buffer_load_ubyte v37, off, s[0:3], s32 offset:36
|
|
; GFX9-NEXT: buffer_load_ubyte v38, off, s[0:3], s32 offset:40
|
|
; GFX9-NEXT: s_waitcnt vmcnt(7)
|
|
; GFX9-NEXT: buffer_store_dwordx4 v[28:31], off, s[4:7], 0
|
|
; GFX9-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX9-NEXT: buffer_store_dwordx4 v[24:27], off, s[4:7], 0
|
|
; GFX9-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX9-NEXT: buffer_store_dwordx4 v[20:23], off, s[4:7], 0
|
|
; GFX9-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX9-NEXT: buffer_store_dwordx4 v[16:19], off, s[4:7], 0
|
|
; GFX9-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX9-NEXT: buffer_store_dwordx4 v[12:15], off, s[4:7], 0
|
|
; GFX9-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX9-NEXT: buffer_load_ubyte v12, off, s[0:3], s32 offset:32
|
|
; GFX9-NEXT: buffer_load_ubyte v13, off, s[0:3], s32 offset:28
|
|
; GFX9-NEXT: buffer_load_ubyte v14, off, s[0:3], s32 offset:24
|
|
; GFX9-NEXT: buffer_load_ubyte v15, off, s[0:3], s32 offset:20
|
|
; GFX9-NEXT: buffer_load_ubyte v16, off, s[0:3], s32 offset:16
|
|
; GFX9-NEXT: buffer_load_ubyte v17, off, s[0:3], s32 offset:12
|
|
; GFX9-NEXT: buffer_load_ubyte v18, off, s[0:3], s32 offset:8
|
|
; GFX9-NEXT: buffer_load_ubyte v19, off, s[0:3], s32 offset:4
|
|
; GFX9-NEXT: buffer_load_ubyte v20, off, s[0:3], s32 offset:44
|
|
; GFX9-NEXT: s_nop 0
|
|
; GFX9-NEXT: buffer_store_dwordx4 v[8:11], off, s[4:7], 0
|
|
; GFX9-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX9-NEXT: buffer_store_dwordx4 v[4:7], off, s[4:7], 0
|
|
; GFX9-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX9-NEXT: buffer_store_dwordx4 v[0:3], off, s[4:7], 0
|
|
; GFX9-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX9-NEXT: buffer_store_byte v34, off, s[4:7], 0
|
|
; GFX9-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX9-NEXT: buffer_store_byte v33, off, s[4:7], 0
|
|
; GFX9-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX9-NEXT: buffer_store_byte v36, off, s[4:7], 0
|
|
; GFX9-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX9-NEXT: buffer_store_byte v35, off, s[4:7], 0
|
|
; GFX9-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX9-NEXT: buffer_store_byte v32, off, s[4:7], 0
|
|
; GFX9-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX9-NEXT: buffer_store_byte v20, off, s[4:7], 0
|
|
; GFX9-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX9-NEXT: buffer_store_byte v38, off, s[4:7], 0
|
|
; GFX9-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX9-NEXT: buffer_store_byte v37, off, s[4:7], 0
|
|
; GFX9-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX9-NEXT: buffer_store_byte v12, off, s[4:7], 0
|
|
; GFX9-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX9-NEXT: buffer_store_byte v13, off, s[4:7], 0
|
|
; GFX9-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX9-NEXT: buffer_store_byte v14, off, s[4:7], 0
|
|
; GFX9-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX9-NEXT: buffer_store_byte v15, off, s[4:7], 0
|
|
; GFX9-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX9-NEXT: buffer_store_byte v16, off, s[4:7], 0
|
|
; GFX9-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX9-NEXT: buffer_store_byte v17, off, s[4:7], 0
|
|
; GFX9-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX9-NEXT: buffer_store_byte v18, off, s[4:7], 0
|
|
; GFX9-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX9-NEXT: buffer_store_byte v19, off, s[4:7], 0
|
|
; GFX9-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX9-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX11-TRUE16-LABEL: void_func_v32i32_v16i8:
|
|
; GFX11-TRUE16: ; %bb.0:
|
|
; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX11-TRUE16-NEXT: s_clause 0x10
|
|
; GFX11-TRUE16-NEXT: scratch_load_b32 v31, off, s32
|
|
; GFX11-TRUE16-NEXT: scratch_load_d16_u8 v32, off, s32 offset:64
|
|
; GFX11-TRUE16-NEXT: scratch_load_d16_u8 v33, off, s32 offset:60
|
|
; GFX11-TRUE16-NEXT: scratch_load_d16_u8 v34, off, s32 offset:56
|
|
; GFX11-TRUE16-NEXT: scratch_load_d16_u8 v35, off, s32 offset:52
|
|
; GFX11-TRUE16-NEXT: scratch_load_d16_u8 v36, off, s32 offset:48
|
|
; GFX11-TRUE16-NEXT: scratch_load_d16_u8 v37, off, s32 offset:44
|
|
; GFX11-TRUE16-NEXT: scratch_load_d16_u8 v38, off, s32 offset:40
|
|
; GFX11-TRUE16-NEXT: scratch_load_d16_u8 v39, off, s32 offset:36
|
|
; GFX11-TRUE16-NEXT: scratch_load_d16_u8 v48, off, s32 offset:32
|
|
; GFX11-TRUE16-NEXT: scratch_load_d16_u8 v49, off, s32 offset:28
|
|
; GFX11-TRUE16-NEXT: scratch_load_d16_u8 v50, off, s32 offset:24
|
|
; GFX11-TRUE16-NEXT: scratch_load_d16_u8 v51, off, s32 offset:20
|
|
; GFX11-TRUE16-NEXT: scratch_load_d16_u8 v52, off, s32 offset:16
|
|
; GFX11-TRUE16-NEXT: scratch_load_d16_u8 v53, off, s32 offset:12
|
|
; GFX11-TRUE16-NEXT: scratch_load_d16_u8 v54, off, s32 offset:8
|
|
; GFX11-TRUE16-NEXT: scratch_load_d16_u8 v55, off, s32 offset:4
|
|
; GFX11-TRUE16-NEXT: s_mov_b32 s3, 0x31016000
|
|
; GFX11-TRUE16-NEXT: s_mov_b32 s2, -1
|
|
; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(16)
|
|
; GFX11-TRUE16-NEXT: buffer_store_b128 v[28:31], off, s[0:3], 0 dlc
|
|
; GFX11-TRUE16-NEXT: s_waitcnt_vscnt null, 0x0
|
|
; GFX11-TRUE16-NEXT: buffer_store_b128 v[24:27], off, s[0:3], 0 dlc
|
|
; GFX11-TRUE16-NEXT: s_waitcnt_vscnt null, 0x0
|
|
; GFX11-TRUE16-NEXT: buffer_store_b128 v[20:23], off, s[0:3], 0 dlc
|
|
; GFX11-TRUE16-NEXT: s_waitcnt_vscnt null, 0x0
|
|
; GFX11-TRUE16-NEXT: buffer_store_b128 v[16:19], off, s[0:3], 0 dlc
|
|
; GFX11-TRUE16-NEXT: s_waitcnt_vscnt null, 0x0
|
|
; GFX11-TRUE16-NEXT: buffer_store_b128 v[12:15], off, s[0:3], 0 dlc
|
|
; GFX11-TRUE16-NEXT: s_waitcnt_vscnt null, 0x0
|
|
; GFX11-TRUE16-NEXT: buffer_store_b128 v[8:11], off, s[0:3], 0 dlc
|
|
; GFX11-TRUE16-NEXT: s_waitcnt_vscnt null, 0x0
|
|
; GFX11-TRUE16-NEXT: buffer_store_b128 v[4:7], off, s[0:3], 0 dlc
|
|
; GFX11-TRUE16-NEXT: s_waitcnt_vscnt null, 0x0
|
|
; GFX11-TRUE16-NEXT: buffer_store_b128 v[0:3], off, s[0:3], 0 dlc
|
|
; GFX11-TRUE16-NEXT: s_waitcnt_vscnt null, 0x0
|
|
; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(15)
|
|
; GFX11-TRUE16-NEXT: buffer_store_b8 v32, off, s[0:3], 0 dlc
|
|
; GFX11-TRUE16-NEXT: s_waitcnt_vscnt null, 0x0
|
|
; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(14)
|
|
; GFX11-TRUE16-NEXT: buffer_store_b8 v33, off, s[0:3], 0 dlc
|
|
; GFX11-TRUE16-NEXT: s_waitcnt_vscnt null, 0x0
|
|
; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(13)
|
|
; GFX11-TRUE16-NEXT: buffer_store_b8 v34, off, s[0:3], 0 dlc
|
|
; GFX11-TRUE16-NEXT: s_waitcnt_vscnt null, 0x0
|
|
; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(12)
|
|
; GFX11-TRUE16-NEXT: buffer_store_b8 v35, off, s[0:3], 0 dlc
|
|
; GFX11-TRUE16-NEXT: s_waitcnt_vscnt null, 0x0
|
|
; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(11)
|
|
; GFX11-TRUE16-NEXT: buffer_store_b8 v36, off, s[0:3], 0 dlc
|
|
; GFX11-TRUE16-NEXT: s_waitcnt_vscnt null, 0x0
|
|
; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(10)
|
|
; GFX11-TRUE16-NEXT: buffer_store_b8 v37, off, s[0:3], 0 dlc
|
|
; GFX11-TRUE16-NEXT: s_waitcnt_vscnt null, 0x0
|
|
; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(9)
|
|
; GFX11-TRUE16-NEXT: buffer_store_b8 v38, off, s[0:3], 0 dlc
|
|
; GFX11-TRUE16-NEXT: s_waitcnt_vscnt null, 0x0
|
|
; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(8)
|
|
; GFX11-TRUE16-NEXT: buffer_store_b8 v39, off, s[0:3], 0 dlc
|
|
; GFX11-TRUE16-NEXT: s_waitcnt_vscnt null, 0x0
|
|
; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(7)
|
|
; GFX11-TRUE16-NEXT: buffer_store_b8 v48, off, s[0:3], 0 dlc
|
|
; GFX11-TRUE16-NEXT: s_waitcnt_vscnt null, 0x0
|
|
; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(6)
|
|
; GFX11-TRUE16-NEXT: buffer_store_b8 v49, off, s[0:3], 0 dlc
|
|
; GFX11-TRUE16-NEXT: s_waitcnt_vscnt null, 0x0
|
|
; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(5)
|
|
; GFX11-TRUE16-NEXT: buffer_store_b8 v50, off, s[0:3], 0 dlc
|
|
; GFX11-TRUE16-NEXT: s_waitcnt_vscnt null, 0x0
|
|
; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(4)
|
|
; GFX11-TRUE16-NEXT: buffer_store_b8 v51, off, s[0:3], 0 dlc
|
|
; GFX11-TRUE16-NEXT: s_waitcnt_vscnt null, 0x0
|
|
; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(3)
|
|
; GFX11-TRUE16-NEXT: buffer_store_b8 v52, off, s[0:3], 0 dlc
|
|
; GFX11-TRUE16-NEXT: s_waitcnt_vscnt null, 0x0
|
|
; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(2)
|
|
; GFX11-TRUE16-NEXT: buffer_store_b8 v53, off, s[0:3], 0 dlc
|
|
; GFX11-TRUE16-NEXT: s_waitcnt_vscnt null, 0x0
|
|
; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(1)
|
|
; GFX11-TRUE16-NEXT: buffer_store_b8 v54, off, s[0:3], 0 dlc
|
|
; GFX11-TRUE16-NEXT: s_waitcnt_vscnt null, 0x0
|
|
; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX11-TRUE16-NEXT: buffer_store_b8 v55, off, s[0:3], 0 dlc
|
|
; GFX11-TRUE16-NEXT: s_waitcnt_vscnt null, 0x0
|
|
; GFX11-TRUE16-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX11-FAKE16-LABEL: void_func_v32i32_v16i8:
|
|
; GFX11-FAKE16: ; %bb.0:
|
|
; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX11-FAKE16-NEXT: s_clause 0x10
|
|
; GFX11-FAKE16-NEXT: scratch_load_b32 v31, off, s32
|
|
; GFX11-FAKE16-NEXT: scratch_load_u8 v32, off, s32 offset:64
|
|
; GFX11-FAKE16-NEXT: scratch_load_u8 v33, off, s32 offset:60
|
|
; GFX11-FAKE16-NEXT: scratch_load_u8 v34, off, s32 offset:56
|
|
; GFX11-FAKE16-NEXT: scratch_load_u8 v35, off, s32 offset:52
|
|
; GFX11-FAKE16-NEXT: scratch_load_u8 v36, off, s32 offset:48
|
|
; GFX11-FAKE16-NEXT: scratch_load_u8 v37, off, s32 offset:44
|
|
; GFX11-FAKE16-NEXT: scratch_load_u8 v38, off, s32 offset:40
|
|
; GFX11-FAKE16-NEXT: scratch_load_u8 v39, off, s32 offset:36
|
|
; GFX11-FAKE16-NEXT: scratch_load_u8 v48, off, s32 offset:32
|
|
; GFX11-FAKE16-NEXT: scratch_load_u8 v49, off, s32 offset:28
|
|
; GFX11-FAKE16-NEXT: scratch_load_u8 v50, off, s32 offset:24
|
|
; GFX11-FAKE16-NEXT: scratch_load_u8 v51, off, s32 offset:20
|
|
; GFX11-FAKE16-NEXT: scratch_load_u8 v52, off, s32 offset:16
|
|
; GFX11-FAKE16-NEXT: scratch_load_u8 v53, off, s32 offset:12
|
|
; GFX11-FAKE16-NEXT: scratch_load_u8 v54, off, s32 offset:8
|
|
; GFX11-FAKE16-NEXT: scratch_load_u8 v55, off, s32 offset:4
|
|
; GFX11-FAKE16-NEXT: s_mov_b32 s3, 0x31016000
|
|
; GFX11-FAKE16-NEXT: s_mov_b32 s2, -1
|
|
; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(16)
|
|
; GFX11-FAKE16-NEXT: buffer_store_b128 v[28:31], off, s[0:3], 0 dlc
|
|
; GFX11-FAKE16-NEXT: s_waitcnt_vscnt null, 0x0
|
|
; GFX11-FAKE16-NEXT: buffer_store_b128 v[24:27], off, s[0:3], 0 dlc
|
|
; GFX11-FAKE16-NEXT: s_waitcnt_vscnt null, 0x0
|
|
; GFX11-FAKE16-NEXT: buffer_store_b128 v[20:23], off, s[0:3], 0 dlc
|
|
; GFX11-FAKE16-NEXT: s_waitcnt_vscnt null, 0x0
|
|
; GFX11-FAKE16-NEXT: buffer_store_b128 v[16:19], off, s[0:3], 0 dlc
|
|
; GFX11-FAKE16-NEXT: s_waitcnt_vscnt null, 0x0
|
|
; GFX11-FAKE16-NEXT: buffer_store_b128 v[12:15], off, s[0:3], 0 dlc
|
|
; GFX11-FAKE16-NEXT: s_waitcnt_vscnt null, 0x0
|
|
; GFX11-FAKE16-NEXT: buffer_store_b128 v[8:11], off, s[0:3], 0 dlc
|
|
; GFX11-FAKE16-NEXT: s_waitcnt_vscnt null, 0x0
|
|
; GFX11-FAKE16-NEXT: buffer_store_b128 v[4:7], off, s[0:3], 0 dlc
|
|
; GFX11-FAKE16-NEXT: s_waitcnt_vscnt null, 0x0
|
|
; GFX11-FAKE16-NEXT: buffer_store_b128 v[0:3], off, s[0:3], 0 dlc
|
|
; GFX11-FAKE16-NEXT: s_waitcnt_vscnt null, 0x0
|
|
; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(15)
|
|
; GFX11-FAKE16-NEXT: buffer_store_b8 v32, off, s[0:3], 0 dlc
|
|
; GFX11-FAKE16-NEXT: s_waitcnt_vscnt null, 0x0
|
|
; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(14)
|
|
; GFX11-FAKE16-NEXT: buffer_store_b8 v33, off, s[0:3], 0 dlc
|
|
; GFX11-FAKE16-NEXT: s_waitcnt_vscnt null, 0x0
|
|
; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(13)
|
|
; GFX11-FAKE16-NEXT: buffer_store_b8 v34, off, s[0:3], 0 dlc
|
|
; GFX11-FAKE16-NEXT: s_waitcnt_vscnt null, 0x0
|
|
; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(12)
|
|
; GFX11-FAKE16-NEXT: buffer_store_b8 v35, off, s[0:3], 0 dlc
|
|
; GFX11-FAKE16-NEXT: s_waitcnt_vscnt null, 0x0
|
|
; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(11)
|
|
; GFX11-FAKE16-NEXT: buffer_store_b8 v36, off, s[0:3], 0 dlc
|
|
; GFX11-FAKE16-NEXT: s_waitcnt_vscnt null, 0x0
|
|
; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(10)
|
|
; GFX11-FAKE16-NEXT: buffer_store_b8 v37, off, s[0:3], 0 dlc
|
|
; GFX11-FAKE16-NEXT: s_waitcnt_vscnt null, 0x0
|
|
; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(9)
|
|
; GFX11-FAKE16-NEXT: buffer_store_b8 v38, off, s[0:3], 0 dlc
|
|
; GFX11-FAKE16-NEXT: s_waitcnt_vscnt null, 0x0
|
|
; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(8)
|
|
; GFX11-FAKE16-NEXT: buffer_store_b8 v39, off, s[0:3], 0 dlc
|
|
; GFX11-FAKE16-NEXT: s_waitcnt_vscnt null, 0x0
|
|
; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(7)
|
|
; GFX11-FAKE16-NEXT: buffer_store_b8 v48, off, s[0:3], 0 dlc
|
|
; GFX11-FAKE16-NEXT: s_waitcnt_vscnt null, 0x0
|
|
; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(6)
|
|
; GFX11-FAKE16-NEXT: buffer_store_b8 v49, off, s[0:3], 0 dlc
|
|
; GFX11-FAKE16-NEXT: s_waitcnt_vscnt null, 0x0
|
|
; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(5)
|
|
; GFX11-FAKE16-NEXT: buffer_store_b8 v50, off, s[0:3], 0 dlc
|
|
; GFX11-FAKE16-NEXT: s_waitcnt_vscnt null, 0x0
|
|
; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(4)
|
|
; GFX11-FAKE16-NEXT: buffer_store_b8 v51, off, s[0:3], 0 dlc
|
|
; GFX11-FAKE16-NEXT: s_waitcnt_vscnt null, 0x0
|
|
; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(3)
|
|
; GFX11-FAKE16-NEXT: buffer_store_b8 v52, off, s[0:3], 0 dlc
|
|
; GFX11-FAKE16-NEXT: s_waitcnt_vscnt null, 0x0
|
|
; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(2)
|
|
; GFX11-FAKE16-NEXT: buffer_store_b8 v53, off, s[0:3], 0 dlc
|
|
; GFX11-FAKE16-NEXT: s_waitcnt_vscnt null, 0x0
|
|
; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(1)
|
|
; GFX11-FAKE16-NEXT: buffer_store_b8 v54, off, s[0:3], 0 dlc
|
|
; GFX11-FAKE16-NEXT: s_waitcnt_vscnt null, 0x0
|
|
; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX11-FAKE16-NEXT: buffer_store_b8 v55, off, s[0:3], 0 dlc
|
|
; GFX11-FAKE16-NEXT: s_waitcnt_vscnt null, 0x0
|
|
; GFX11-FAKE16-NEXT: s_setpc_b64 s[30:31]
|
|
store volatile <32 x i32> %arg0, ptr addrspace(1) poison
|
|
store volatile <16 x i8> %arg1, ptr addrspace(1) poison
|
|
ret void
|
|
}
|
|
|
|
|
|
define void @void_func_bf16(bfloat %arg0) #0 {
|
|
; CI-LABEL: void_func_bf16:
|
|
; CI: ; %bb.0:
|
|
; CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; CI-NEXT: v_mul_f32_e32 v0, 1.0, v0
|
|
; CI-NEXT: v_lshrrev_b32_e32 v0, 16, v0
|
|
; CI-NEXT: s_mov_b32 s7, 0xf000
|
|
; CI-NEXT: s_mov_b32 s6, -1
|
|
; CI-NEXT: buffer_store_short v0, off, s[4:7], 0
|
|
; CI-NEXT: s_waitcnt vmcnt(0)
|
|
; CI-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX89-LABEL: void_func_bf16:
|
|
; GFX89: ; %bb.0:
|
|
; GFX89-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX89-NEXT: s_mov_b32 s7, 0xf000
|
|
; GFX89-NEXT: s_mov_b32 s6, -1
|
|
; GFX89-NEXT: buffer_store_short v0, off, s[4:7], 0
|
|
; GFX89-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX89-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX11-LABEL: void_func_bf16:
|
|
; GFX11: ; %bb.0:
|
|
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX11-NEXT: s_mov_b32 s3, 0x31016000
|
|
; GFX11-NEXT: s_mov_b32 s2, -1
|
|
; GFX11-NEXT: buffer_store_b16 v0, off, s[0:3], 0
|
|
; GFX11-NEXT: s_setpc_b64 s[30:31]
|
|
store bfloat %arg0, ptr addrspace(1) poison
|
|
ret void
|
|
}
|
|
|
|
define void @void_func_v2bf16(<2 x bfloat> %arg0) #0 {
|
|
; CI-LABEL: void_func_v2bf16:
|
|
; CI: ; %bb.0:
|
|
; CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; CI-NEXT: v_mul_f32_e32 v1, 1.0, v1
|
|
; CI-NEXT: v_lshrrev_b32_e32 v1, 16, v1
|
|
; CI-NEXT: v_mul_f32_e32 v0, 1.0, v0
|
|
; CI-NEXT: v_alignbit_b32 v0, v1, v0, 16
|
|
; CI-NEXT: s_mov_b32 s7, 0xf000
|
|
; CI-NEXT: s_mov_b32 s6, -1
|
|
; CI-NEXT: buffer_store_dword v0, off, s[4:7], 0
|
|
; CI-NEXT: s_waitcnt vmcnt(0)
|
|
; CI-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX89-LABEL: void_func_v2bf16:
|
|
; GFX89: ; %bb.0:
|
|
; GFX89-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX89-NEXT: s_mov_b32 s7, 0xf000
|
|
; GFX89-NEXT: s_mov_b32 s6, -1
|
|
; GFX89-NEXT: buffer_store_dword v0, off, s[4:7], 0
|
|
; GFX89-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX89-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX11-LABEL: void_func_v2bf16:
|
|
; GFX11: ; %bb.0:
|
|
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX11-NEXT: s_mov_b32 s3, 0x31016000
|
|
; GFX11-NEXT: s_mov_b32 s2, -1
|
|
; GFX11-NEXT: buffer_store_b32 v0, off, s[0:3], 0
|
|
; GFX11-NEXT: s_setpc_b64 s[30:31]
|
|
store <2 x bfloat> %arg0, ptr addrspace(1) poison
|
|
ret void
|
|
}
|
|
|
|
define void @void_func_v3bf16(<3 x bfloat> %arg0) #0 {
|
|
; CI-LABEL: void_func_v3bf16:
|
|
; CI: ; %bb.0:
|
|
; CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; CI-NEXT: v_mul_f32_e32 v1, 1.0, v1
|
|
; CI-NEXT: v_lshrrev_b32_e32 v1, 16, v1
|
|
; CI-NEXT: v_mul_f32_e32 v0, 1.0, v0
|
|
; CI-NEXT: v_alignbit_b32 v0, v1, v0, 16
|
|
; CI-NEXT: v_mul_f32_e32 v1, 1.0, v2
|
|
; CI-NEXT: v_lshrrev_b32_e32 v1, 16, v1
|
|
; CI-NEXT: s_mov_b32 s7, 0xf000
|
|
; CI-NEXT: s_mov_b32 s6, -1
|
|
; CI-NEXT: buffer_store_short v1, off, s[4:7], 0
|
|
; CI-NEXT: buffer_store_dword v0, off, s[4:7], 0
|
|
; CI-NEXT: s_waitcnt vmcnt(0)
|
|
; CI-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX89-LABEL: void_func_v3bf16:
|
|
; GFX89: ; %bb.0:
|
|
; GFX89-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX89-NEXT: s_mov_b32 s7, 0xf000
|
|
; GFX89-NEXT: s_mov_b32 s6, -1
|
|
; GFX89-NEXT: buffer_store_short v1, off, s[4:7], 0
|
|
; GFX89-NEXT: buffer_store_dword v0, off, s[4:7], 0
|
|
; GFX89-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX89-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX11-LABEL: void_func_v3bf16:
|
|
; GFX11: ; %bb.0:
|
|
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX11-NEXT: s_mov_b32 s3, 0x31016000
|
|
; GFX11-NEXT: s_mov_b32 s2, -1
|
|
; GFX11-NEXT: s_clause 0x1
|
|
; GFX11-NEXT: buffer_store_b16 v1, off, s[0:3], 0
|
|
; GFX11-NEXT: buffer_store_b32 v0, off, s[0:3], 0
|
|
; GFX11-NEXT: s_setpc_b64 s[30:31]
|
|
store <3 x bfloat> %arg0, ptr addrspace(1) poison
|
|
ret void
|
|
}
|
|
|
|
define void @void_func_v4bf16(<4 x bfloat> %arg0) #0 {
|
|
; CI-LABEL: void_func_v4bf16:
|
|
; CI: ; %bb.0:
|
|
; CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; CI-NEXT: v_mul_f32_e32 v3, 1.0, v3
|
|
; CI-NEXT: v_mul_f32_e32 v1, 1.0, v1
|
|
; CI-NEXT: v_lshrrev_b32_e32 v3, 16, v3
|
|
; CI-NEXT: v_mul_f32_e32 v2, 1.0, v2
|
|
; CI-NEXT: v_lshrrev_b32_e32 v1, 16, v1
|
|
; CI-NEXT: v_mul_f32_e32 v0, 1.0, v0
|
|
; CI-NEXT: v_alignbit_b32 v2, v3, v2, 16
|
|
; CI-NEXT: v_alignbit_b32 v1, v1, v0, 16
|
|
; CI-NEXT: s_mov_b32 s7, 0xf000
|
|
; CI-NEXT: s_mov_b32 s6, -1
|
|
; CI-NEXT: buffer_store_dwordx2 v[1:2], off, s[4:7], 0
|
|
; CI-NEXT: s_waitcnt vmcnt(0)
|
|
; CI-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX89-LABEL: void_func_v4bf16:
|
|
; GFX89: ; %bb.0:
|
|
; GFX89-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX89-NEXT: s_mov_b32 s7, 0xf000
|
|
; GFX89-NEXT: s_mov_b32 s6, -1
|
|
; GFX89-NEXT: buffer_store_dwordx2 v[0:1], off, s[4:7], 0
|
|
; GFX89-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX89-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX11-LABEL: void_func_v4bf16:
|
|
; GFX11: ; %bb.0:
|
|
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX11-NEXT: s_mov_b32 s3, 0x31016000
|
|
; GFX11-NEXT: s_mov_b32 s2, -1
|
|
; GFX11-NEXT: buffer_store_b64 v[0:1], off, s[0:3], 0
|
|
; GFX11-NEXT: s_setpc_b64 s[30:31]
|
|
store <4 x bfloat> %arg0, ptr addrspace(1) poison
|
|
ret void
|
|
}
|
|
|
|
define void @void_func_v8bf16(<8 x bfloat> %arg0) #0 {
|
|
; CI-LABEL: void_func_v8bf16:
|
|
; CI: ; %bb.0:
|
|
; CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; CI-NEXT: v_mul_f32_e32 v7, 1.0, v7
|
|
; CI-NEXT: v_mul_f32_e32 v5, 1.0, v5
|
|
; CI-NEXT: v_mul_f32_e32 v3, 1.0, v3
|
|
; CI-NEXT: v_mul_f32_e32 v1, 1.0, v1
|
|
; CI-NEXT: v_lshrrev_b32_e32 v7, 16, v7
|
|
; CI-NEXT: v_mul_f32_e32 v6, 1.0, v6
|
|
; CI-NEXT: v_lshrrev_b32_e32 v5, 16, v5
|
|
; CI-NEXT: v_mul_f32_e32 v4, 1.0, v4
|
|
; CI-NEXT: v_lshrrev_b32_e32 v3, 16, v3
|
|
; CI-NEXT: v_mul_f32_e32 v2, 1.0, v2
|
|
; CI-NEXT: v_lshrrev_b32_e32 v1, 16, v1
|
|
; CI-NEXT: v_mul_f32_e32 v0, 1.0, v0
|
|
; CI-NEXT: v_alignbit_b32 v6, v7, v6, 16
|
|
; CI-NEXT: v_alignbit_b32 v5, v5, v4, 16
|
|
; CI-NEXT: v_alignbit_b32 v4, v3, v2, 16
|
|
; CI-NEXT: v_alignbit_b32 v3, v1, v0, 16
|
|
; CI-NEXT: s_mov_b32 s7, 0xf000
|
|
; CI-NEXT: s_mov_b32 s6, -1
|
|
; CI-NEXT: buffer_store_dwordx4 v[3:6], off, s[4:7], 0
|
|
; CI-NEXT: s_waitcnt vmcnt(0)
|
|
; CI-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX89-LABEL: void_func_v8bf16:
|
|
; GFX89: ; %bb.0:
|
|
; GFX89-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX89-NEXT: s_mov_b32 s7, 0xf000
|
|
; GFX89-NEXT: s_mov_b32 s6, -1
|
|
; GFX89-NEXT: buffer_store_dwordx4 v[0:3], off, s[4:7], 0
|
|
; GFX89-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX89-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX11-LABEL: void_func_v8bf16:
|
|
; GFX11: ; %bb.0:
|
|
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX11-NEXT: s_mov_b32 s3, 0x31016000
|
|
; GFX11-NEXT: s_mov_b32 s2, -1
|
|
; GFX11-NEXT: buffer_store_b128 v[0:3], off, s[0:3], 0
|
|
; GFX11-NEXT: s_setpc_b64 s[30:31]
|
|
store <8 x bfloat> %arg0, ptr addrspace(1) poison
|
|
ret void
|
|
}
|
|
|
|
define void @void_func_v16bf16(<16 x bfloat> %arg0) #0 {
|
|
; CI-LABEL: void_func_v16bf16:
|
|
; CI: ; %bb.0:
|
|
; CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; CI-NEXT: v_mul_f32_e32 v5, 1.0, v5
|
|
; CI-NEXT: v_mul_f32_e32 v3, 1.0, v3
|
|
; CI-NEXT: v_mul_f32_e32 v1, 1.0, v1
|
|
; CI-NEXT: v_lshrrev_b32_e32 v5, 16, v5
|
|
; CI-NEXT: v_mul_f32_e32 v4, 1.0, v4
|
|
; CI-NEXT: v_lshrrev_b32_e32 v3, 16, v3
|
|
; CI-NEXT: v_mul_f32_e32 v2, 1.0, v2
|
|
; CI-NEXT: v_lshrrev_b32_e32 v1, 16, v1
|
|
; CI-NEXT: v_mul_f32_e32 v0, 1.0, v0
|
|
; CI-NEXT: v_alignbit_b32 v5, v5, v4, 16
|
|
; CI-NEXT: v_alignbit_b32 v4, v3, v2, 16
|
|
; CI-NEXT: v_alignbit_b32 v3, v1, v0, 16
|
|
; CI-NEXT: v_mul_f32_e32 v0, 1.0, v15
|
|
; CI-NEXT: v_lshrrev_b32_e32 v0, 16, v0
|
|
; CI-NEXT: v_mul_f32_e32 v1, 1.0, v14
|
|
; CI-NEXT: v_alignbit_b32 v14, v0, v1, 16
|
|
; CI-NEXT: v_mul_f32_e32 v0, 1.0, v13
|
|
; CI-NEXT: v_lshrrev_b32_e32 v0, 16, v0
|
|
; CI-NEXT: v_mul_f32_e32 v1, 1.0, v12
|
|
; CI-NEXT: v_alignbit_b32 v13, v0, v1, 16
|
|
; CI-NEXT: v_mul_f32_e32 v0, 1.0, v11
|
|
; CI-NEXT: v_lshrrev_b32_e32 v0, 16, v0
|
|
; CI-NEXT: v_mul_f32_e32 v1, 1.0, v10
|
|
; CI-NEXT: v_alignbit_b32 v12, v0, v1, 16
|
|
; CI-NEXT: v_mul_f32_e32 v0, 1.0, v9
|
|
; CI-NEXT: v_mul_f32_e32 v7, 1.0, v7
|
|
; CI-NEXT: v_lshrrev_b32_e32 v0, 16, v0
|
|
; CI-NEXT: v_mul_f32_e32 v1, 1.0, v8
|
|
; CI-NEXT: v_lshrrev_b32_e32 v7, 16, v7
|
|
; CI-NEXT: v_mul_f32_e32 v6, 1.0, v6
|
|
; CI-NEXT: v_alignbit_b32 v11, v0, v1, 16
|
|
; CI-NEXT: s_mov_b32 s7, 0xf000
|
|
; CI-NEXT: s_mov_b32 s6, -1
|
|
; CI-NEXT: v_alignbit_b32 v6, v7, v6, 16
|
|
; CI-NEXT: buffer_store_dwordx4 v[11:14], off, s[4:7], 0
|
|
; CI-NEXT: buffer_store_dwordx4 v[3:6], off, s[4:7], 0
|
|
; CI-NEXT: s_waitcnt vmcnt(0)
|
|
; CI-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX89-LABEL: void_func_v16bf16:
|
|
; GFX89: ; %bb.0:
|
|
; GFX89-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX89-NEXT: s_mov_b32 s7, 0xf000
|
|
; GFX89-NEXT: s_mov_b32 s6, -1
|
|
; GFX89-NEXT: buffer_store_dwordx4 v[4:7], off, s[4:7], 0
|
|
; GFX89-NEXT: buffer_store_dwordx4 v[0:3], off, s[4:7], 0
|
|
; GFX89-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX89-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX11-LABEL: void_func_v16bf16:
|
|
; GFX11: ; %bb.0:
|
|
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX11-NEXT: s_mov_b32 s3, 0x31016000
|
|
; GFX11-NEXT: s_mov_b32 s2, -1
|
|
; GFX11-NEXT: s_clause 0x1
|
|
; GFX11-NEXT: buffer_store_b128 v[4:7], off, s[0:3], 0
|
|
; GFX11-NEXT: buffer_store_b128 v[0:3], off, s[0:3], 0
|
|
; GFX11-NEXT: s_setpc_b64 s[30:31]
|
|
store <16 x bfloat> %arg0, ptr addrspace(1) poison
|
|
ret void
|
|
}
|
|
|
|
attributes #0 = { nounwind }
|