
Similar to InstCombinerImpl::freezeOtherUses, attempt to ensure that we merge multiple frozen/unfrozen uses of a SDValue. This fixes a number of hasOneUse() problems when trying to push FREEZE nodes through the DAG. Remove SimplifyMultipleUseDemandedBits handling of FREEZE nodes as we now want to keep the common node, and not bypass for some nodes just because of DemandedElts. Fixes #149799
14774 lines
702 KiB
LLVM
14774 lines
702 KiB
LLVM
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
|
|
; RUN: llc -global-isel=0 -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx600 < %s | FileCheck -check-prefixes=GFX6,GFX6-SDAG %s
|
|
; RUN: llc -global-isel=1 -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx600 < %s | FileCheck -check-prefixes=GFX6,GFX6-GISEL %s
|
|
|
|
; RUN: llc -global-isel=0 -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx700 < %s | FileCheck -check-prefixes=GFX7,GFX7-SDAG %s
|
|
; RUN: llc -global-isel=1 -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx700 < %s | FileCheck -check-prefixes=GFX7,GFX7-GISEL %s
|
|
|
|
; RUN: llc -global-isel=0 -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx803 < %s | FileCheck -check-prefixes=GFX8,GFX8-SDAG %s
|
|
; RUN: llc -global-isel=1 -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx803 < %s | FileCheck -check-prefixes=GFX8,GFX8-GISEL %s
|
|
|
|
; RUN: llc -global-isel=0 -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx900 < %s | FileCheck -check-prefixes=GFX9,GFX8-SDAG %s
|
|
; RUN: llc -global-isel=1 -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx900 < %s | FileCheck -check-prefixes=GFX9,GFX9-GISEL %s
|
|
|
|
; RUN: llc -global-isel=0 -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1010 < %s | FileCheck -check-prefixes=GFX10,GFX10-SDAG %s
|
|
; RUN: llc -global-isel=1 -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1010 < %s | FileCheck -check-prefixes=GFX10,GFX10-GISEL %s
|
|
|
|
; RUN: llc -global-isel=0 -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1100 -mattr=+real-true16 -amdgpu-enable-delay-alu=0 < %s | FileCheck -check-prefixes=GFX11,GFX11-SDAG,GFX11-SDAG-TRUE16 %s
|
|
; RUN: llc -global-isel=0 -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1100 -mattr=-real-true16 -amdgpu-enable-delay-alu=0 < %s | FileCheck -check-prefixes=GFX11,GFX11-SDAG,GFX11-SDAG-FAKE16 %s
|
|
; FIXME-TRUE16 enable gisel
|
|
; XUN: llc -global-isel=1 -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1100 -mattr=+real-true16 -amdgpu-enable-delay-alu=0 < %s | FileCheck -check-prefixes=GFX11,GFX11-GISEL,GFX11-GISEL-TRUE16 %s
|
|
; RUN: llc -global-isel=1 -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1100 -mattr=-real-true16 -amdgpu-enable-delay-alu=0 < %s | FileCheck -check-prefixes=GFX11,GFX11-GISEL %s
|
|
|
|
define void @freeze_v2i32(ptr addrspace(1) %ptra, ptr addrspace(1) %ptrb) {
|
|
; GFX6-SDAG-LABEL: freeze_v2i32:
|
|
; GFX6-SDAG: ; %bb.0:
|
|
; GFX6-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX6-SDAG-NEXT: s_mov_b32 s6, 0
|
|
; GFX6-SDAG-NEXT: s_mov_b32 s7, 0xf000
|
|
; GFX6-SDAG-NEXT: s_mov_b32 s4, s6
|
|
; GFX6-SDAG-NEXT: s_mov_b32 s5, s6
|
|
; GFX6-SDAG-NEXT: buffer_load_dwordx2 v[0:1], v[0:1], s[4:7], 0 addr64
|
|
; GFX6-SDAG-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX6-SDAG-NEXT: buffer_store_dwordx2 v[0:1], v[2:3], s[4:7], 0 addr64
|
|
; GFX6-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0)
|
|
; GFX6-SDAG-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX6-GISEL-LABEL: freeze_v2i32:
|
|
; GFX6-GISEL: ; %bb.0:
|
|
; GFX6-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX6-GISEL-NEXT: s_mov_b32 s6, 0
|
|
; GFX6-GISEL-NEXT: s_mov_b32 s7, 0xf000
|
|
; GFX6-GISEL-NEXT: s_mov_b64 s[4:5], 0
|
|
; GFX6-GISEL-NEXT: buffer_load_dwordx2 v[0:1], v[0:1], s[4:7], 0 addr64
|
|
; GFX6-GISEL-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX6-GISEL-NEXT: buffer_store_dwordx2 v[0:1], v[2:3], s[4:7], 0 addr64
|
|
; GFX6-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0)
|
|
; GFX6-GISEL-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX7-SDAG-LABEL: freeze_v2i32:
|
|
; GFX7-SDAG: ; %bb.0:
|
|
; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX7-SDAG-NEXT: s_mov_b32 s6, 0
|
|
; GFX7-SDAG-NEXT: s_mov_b32 s7, 0xf000
|
|
; GFX7-SDAG-NEXT: s_mov_b32 s4, s6
|
|
; GFX7-SDAG-NEXT: s_mov_b32 s5, s6
|
|
; GFX7-SDAG-NEXT: buffer_load_dwordx2 v[0:1], v[0:1], s[4:7], 0 addr64
|
|
; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX7-SDAG-NEXT: buffer_store_dwordx2 v[0:1], v[2:3], s[4:7], 0 addr64
|
|
; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX7-SDAG-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX7-GISEL-LABEL: freeze_v2i32:
|
|
; GFX7-GISEL: ; %bb.0:
|
|
; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX7-GISEL-NEXT: s_mov_b32 s6, 0
|
|
; GFX7-GISEL-NEXT: s_mov_b32 s7, 0xf000
|
|
; GFX7-GISEL-NEXT: s_mov_b64 s[4:5], 0
|
|
; GFX7-GISEL-NEXT: buffer_load_dwordx2 v[0:1], v[0:1], s[4:7], 0 addr64
|
|
; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX7-GISEL-NEXT: buffer_store_dwordx2 v[0:1], v[2:3], s[4:7], 0 addr64
|
|
; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX7-GISEL-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX8-LABEL: freeze_v2i32:
|
|
; GFX8: ; %bb.0:
|
|
; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX8-NEXT: flat_load_dwordx2 v[0:1], v[0:1]
|
|
; GFX8-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX8-NEXT: flat_store_dwordx2 v[2:3], v[0:1]
|
|
; GFX8-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX8-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX9-LABEL: freeze_v2i32:
|
|
; GFX9: ; %bb.0:
|
|
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX9-NEXT: global_load_dwordx2 v[0:1], v[0:1], off
|
|
; GFX9-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX9-NEXT: global_store_dwordx2 v[2:3], v[0:1], off
|
|
; GFX9-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX9-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX10-LABEL: freeze_v2i32:
|
|
; GFX10: ; %bb.0:
|
|
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX10-NEXT: global_load_dwordx2 v[0:1], v[0:1], off
|
|
; GFX10-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX10-NEXT: global_store_dwordx2 v[2:3], v[0:1], off
|
|
; GFX10-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX11-LABEL: freeze_v2i32:
|
|
; GFX11: ; %bb.0:
|
|
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX11-NEXT: global_load_b64 v[0:1], v[0:1], off
|
|
; GFX11-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX11-NEXT: global_store_b64 v[2:3], v[0:1], off
|
|
; GFX11-NEXT: s_setpc_b64 s[30:31]
|
|
%a = load <2 x i32>, ptr addrspace(1) %ptra, align 4
|
|
%freeze = freeze <2 x i32> %a
|
|
store <2 x i32> %freeze, ptr addrspace(1) %ptrb, align 4
|
|
ret void
|
|
}
|
|
|
|
define void @freeze_v3i32(ptr addrspace(1) %ptra, ptr addrspace(1) %ptrb) {
|
|
; GFX6-SDAG-LABEL: freeze_v3i32:
|
|
; GFX6-SDAG: ; %bb.0:
|
|
; GFX6-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX6-SDAG-NEXT: s_mov_b32 s6, 0
|
|
; GFX6-SDAG-NEXT: s_mov_b32 s7, 0xf000
|
|
; GFX6-SDAG-NEXT: s_mov_b32 s4, s6
|
|
; GFX6-SDAG-NEXT: s_mov_b32 s5, s6
|
|
; GFX6-SDAG-NEXT: buffer_load_dword v4, v[0:1], s[4:7], 0 addr64 offset:8
|
|
; GFX6-SDAG-NEXT: buffer_load_dwordx2 v[0:1], v[0:1], s[4:7], 0 addr64
|
|
; GFX6-SDAG-NEXT: s_waitcnt vmcnt(1)
|
|
; GFX6-SDAG-NEXT: buffer_store_dword v4, v[2:3], s[4:7], 0 addr64 offset:8
|
|
; GFX6-SDAG-NEXT: s_waitcnt vmcnt(1)
|
|
; GFX6-SDAG-NEXT: buffer_store_dwordx2 v[0:1], v[2:3], s[4:7], 0 addr64
|
|
; GFX6-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0)
|
|
; GFX6-SDAG-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX6-GISEL-LABEL: freeze_v3i32:
|
|
; GFX6-GISEL: ; %bb.0:
|
|
; GFX6-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX6-GISEL-NEXT: s_mov_b32 s6, 0
|
|
; GFX6-GISEL-NEXT: s_mov_b32 s7, 0xf000
|
|
; GFX6-GISEL-NEXT: s_mov_b64 s[4:5], 0
|
|
; GFX6-GISEL-NEXT: buffer_load_dwordx2 v[4:5], v[0:1], s[4:7], 0 addr64
|
|
; GFX6-GISEL-NEXT: buffer_load_dword v0, v[0:1], s[4:7], 0 addr64 offset:8
|
|
; GFX6-GISEL-NEXT: s_waitcnt vmcnt(1)
|
|
; GFX6-GISEL-NEXT: buffer_store_dwordx2 v[4:5], v[2:3], s[4:7], 0 addr64
|
|
; GFX6-GISEL-NEXT: s_waitcnt vmcnt(1)
|
|
; GFX6-GISEL-NEXT: buffer_store_dword v0, v[2:3], s[4:7], 0 addr64 offset:8
|
|
; GFX6-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0)
|
|
; GFX6-GISEL-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX7-SDAG-LABEL: freeze_v3i32:
|
|
; GFX7-SDAG: ; %bb.0:
|
|
; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX7-SDAG-NEXT: s_mov_b32 s6, 0
|
|
; GFX7-SDAG-NEXT: s_mov_b32 s7, 0xf000
|
|
; GFX7-SDAG-NEXT: s_mov_b32 s4, s6
|
|
; GFX7-SDAG-NEXT: s_mov_b32 s5, s6
|
|
; GFX7-SDAG-NEXT: buffer_load_dwordx3 v[4:6], v[0:1], s[4:7], 0 addr64
|
|
; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX7-SDAG-NEXT: buffer_store_dwordx3 v[4:6], v[2:3], s[4:7], 0 addr64
|
|
; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX7-SDAG-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX7-GISEL-LABEL: freeze_v3i32:
|
|
; GFX7-GISEL: ; %bb.0:
|
|
; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX7-GISEL-NEXT: s_mov_b32 s6, 0
|
|
; GFX7-GISEL-NEXT: s_mov_b32 s7, 0xf000
|
|
; GFX7-GISEL-NEXT: s_mov_b64 s[4:5], 0
|
|
; GFX7-GISEL-NEXT: buffer_load_dwordx3 v[4:6], v[0:1], s[4:7], 0 addr64
|
|
; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX7-GISEL-NEXT: buffer_store_dwordx3 v[4:6], v[2:3], s[4:7], 0 addr64
|
|
; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX7-GISEL-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX8-LABEL: freeze_v3i32:
|
|
; GFX8: ; %bb.0:
|
|
; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX8-NEXT: flat_load_dwordx3 v[4:6], v[0:1]
|
|
; GFX8-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX8-NEXT: flat_store_dwordx3 v[2:3], v[4:6]
|
|
; GFX8-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX8-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX9-LABEL: freeze_v3i32:
|
|
; GFX9: ; %bb.0:
|
|
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX9-NEXT: global_load_dwordx3 v[4:6], v[0:1], off
|
|
; GFX9-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX9-NEXT: global_store_dwordx3 v[2:3], v[4:6], off
|
|
; GFX9-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX9-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX10-LABEL: freeze_v3i32:
|
|
; GFX10: ; %bb.0:
|
|
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX10-NEXT: global_load_dwordx3 v[4:6], v[0:1], off
|
|
; GFX10-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX10-NEXT: global_store_dwordx3 v[2:3], v[4:6], off
|
|
; GFX10-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX11-LABEL: freeze_v3i32:
|
|
; GFX11: ; %bb.0:
|
|
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX11-NEXT: global_load_b96 v[4:6], v[0:1], off
|
|
; GFX11-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX11-NEXT: global_store_b96 v[2:3], v[4:6], off
|
|
; GFX11-NEXT: s_setpc_b64 s[30:31]
|
|
%a = load <3 x i32>, ptr addrspace(1) %ptra, align 4
|
|
%freeze = freeze <3 x i32> %a
|
|
store <3 x i32> %freeze, ptr addrspace(1) %ptrb, align 4
|
|
ret void
|
|
}
|
|
|
|
define void @freeze_v4i32(ptr addrspace(1) %ptra, ptr addrspace(1) %ptrb) {
|
|
; GFX6-SDAG-LABEL: freeze_v4i32:
|
|
; GFX6-SDAG: ; %bb.0:
|
|
; GFX6-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX6-SDAG-NEXT: s_mov_b32 s6, 0
|
|
; GFX6-SDAG-NEXT: s_mov_b32 s7, 0xf000
|
|
; GFX6-SDAG-NEXT: s_mov_b32 s4, s6
|
|
; GFX6-SDAG-NEXT: s_mov_b32 s5, s6
|
|
; GFX6-SDAG-NEXT: buffer_load_dwordx4 v[4:7], v[0:1], s[4:7], 0 addr64
|
|
; GFX6-SDAG-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX6-SDAG-NEXT: buffer_store_dwordx4 v[4:7], v[2:3], s[4:7], 0 addr64
|
|
; GFX6-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0)
|
|
; GFX6-SDAG-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX6-GISEL-LABEL: freeze_v4i32:
|
|
; GFX6-GISEL: ; %bb.0:
|
|
; GFX6-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX6-GISEL-NEXT: s_mov_b32 s6, 0
|
|
; GFX6-GISEL-NEXT: s_mov_b32 s7, 0xf000
|
|
; GFX6-GISEL-NEXT: s_mov_b64 s[4:5], 0
|
|
; GFX6-GISEL-NEXT: buffer_load_dwordx4 v[4:7], v[0:1], s[4:7], 0 addr64
|
|
; GFX6-GISEL-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX6-GISEL-NEXT: buffer_store_dwordx4 v[4:7], v[2:3], s[4:7], 0 addr64
|
|
; GFX6-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0)
|
|
; GFX6-GISEL-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX7-SDAG-LABEL: freeze_v4i32:
|
|
; GFX7-SDAG: ; %bb.0:
|
|
; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX7-SDAG-NEXT: s_mov_b32 s6, 0
|
|
; GFX7-SDAG-NEXT: s_mov_b32 s7, 0xf000
|
|
; GFX7-SDAG-NEXT: s_mov_b32 s4, s6
|
|
; GFX7-SDAG-NEXT: s_mov_b32 s5, s6
|
|
; GFX7-SDAG-NEXT: buffer_load_dwordx4 v[4:7], v[0:1], s[4:7], 0 addr64
|
|
; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX7-SDAG-NEXT: buffer_store_dwordx4 v[4:7], v[2:3], s[4:7], 0 addr64
|
|
; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX7-SDAG-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX7-GISEL-LABEL: freeze_v4i32:
|
|
; GFX7-GISEL: ; %bb.0:
|
|
; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX7-GISEL-NEXT: s_mov_b32 s6, 0
|
|
; GFX7-GISEL-NEXT: s_mov_b32 s7, 0xf000
|
|
; GFX7-GISEL-NEXT: s_mov_b64 s[4:5], 0
|
|
; GFX7-GISEL-NEXT: buffer_load_dwordx4 v[4:7], v[0:1], s[4:7], 0 addr64
|
|
; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX7-GISEL-NEXT: buffer_store_dwordx4 v[4:7], v[2:3], s[4:7], 0 addr64
|
|
; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX7-GISEL-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX8-LABEL: freeze_v4i32:
|
|
; GFX8: ; %bb.0:
|
|
; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX8-NEXT: flat_load_dwordx4 v[4:7], v[0:1]
|
|
; GFX8-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX8-NEXT: flat_store_dwordx4 v[2:3], v[4:7]
|
|
; GFX8-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX8-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX9-LABEL: freeze_v4i32:
|
|
; GFX9: ; %bb.0:
|
|
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX9-NEXT: global_load_dwordx4 v[4:7], v[0:1], off
|
|
; GFX9-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX9-NEXT: global_store_dwordx4 v[2:3], v[4:7], off
|
|
; GFX9-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX9-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX10-LABEL: freeze_v4i32:
|
|
; GFX10: ; %bb.0:
|
|
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX10-NEXT: global_load_dwordx4 v[4:7], v[0:1], off
|
|
; GFX10-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX10-NEXT: global_store_dwordx4 v[2:3], v[4:7], off
|
|
; GFX10-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX11-LABEL: freeze_v4i32:
|
|
; GFX11: ; %bb.0:
|
|
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX11-NEXT: global_load_b128 v[4:7], v[0:1], off
|
|
; GFX11-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX11-NEXT: global_store_b128 v[2:3], v[4:7], off
|
|
; GFX11-NEXT: s_setpc_b64 s[30:31]
|
|
%a = load <4 x i32>, ptr addrspace(1) %ptra, align 4
|
|
%freeze = freeze <4 x i32> %a
|
|
store <4 x i32> %freeze, ptr addrspace(1) %ptrb, align 4
|
|
ret void
|
|
}
|
|
|
|
define void @freeze_v5i32(ptr addrspace(1) %ptra, ptr addrspace(1) %ptrb) {
|
|
; GFX6-SDAG-LABEL: freeze_v5i32:
|
|
; GFX6-SDAG: ; %bb.0:
|
|
; GFX6-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX6-SDAG-NEXT: s_mov_b32 s6, 0
|
|
; GFX6-SDAG-NEXT: s_mov_b32 s7, 0xf000
|
|
; GFX6-SDAG-NEXT: s_mov_b32 s4, s6
|
|
; GFX6-SDAG-NEXT: s_mov_b32 s5, s6
|
|
; GFX6-SDAG-NEXT: buffer_load_dword v8, v[0:1], s[4:7], 0 addr64 offset:16
|
|
; GFX6-SDAG-NEXT: buffer_load_dwordx4 v[4:7], v[0:1], s[4:7], 0 addr64
|
|
; GFX6-SDAG-NEXT: s_waitcnt vmcnt(1)
|
|
; GFX6-SDAG-NEXT: buffer_store_dword v8, v[2:3], s[4:7], 0 addr64 offset:16
|
|
; GFX6-SDAG-NEXT: s_waitcnt vmcnt(1)
|
|
; GFX6-SDAG-NEXT: buffer_store_dwordx4 v[4:7], v[2:3], s[4:7], 0 addr64
|
|
; GFX6-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0)
|
|
; GFX6-SDAG-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX6-GISEL-LABEL: freeze_v5i32:
|
|
; GFX6-GISEL: ; %bb.0:
|
|
; GFX6-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX6-GISEL-NEXT: s_mov_b32 s6, 0
|
|
; GFX6-GISEL-NEXT: s_mov_b32 s7, 0xf000
|
|
; GFX6-GISEL-NEXT: s_mov_b64 s[4:5], 0
|
|
; GFX6-GISEL-NEXT: buffer_load_dwordx4 v[4:7], v[0:1], s[4:7], 0 addr64
|
|
; GFX6-GISEL-NEXT: buffer_load_dword v0, v[0:1], s[4:7], 0 addr64 offset:16
|
|
; GFX6-GISEL-NEXT: s_waitcnt vmcnt(1)
|
|
; GFX6-GISEL-NEXT: buffer_store_dwordx4 v[4:7], v[2:3], s[4:7], 0 addr64
|
|
; GFX6-GISEL-NEXT: s_waitcnt vmcnt(1)
|
|
; GFX6-GISEL-NEXT: buffer_store_dword v0, v[2:3], s[4:7], 0 addr64 offset:16
|
|
; GFX6-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0)
|
|
; GFX6-GISEL-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX7-SDAG-LABEL: freeze_v5i32:
|
|
; GFX7-SDAG: ; %bb.0:
|
|
; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX7-SDAG-NEXT: s_mov_b32 s6, 0
|
|
; GFX7-SDAG-NEXT: s_mov_b32 s7, 0xf000
|
|
; GFX7-SDAG-NEXT: s_mov_b32 s4, s6
|
|
; GFX7-SDAG-NEXT: s_mov_b32 s5, s6
|
|
; GFX7-SDAG-NEXT: buffer_load_dword v8, v[0:1], s[4:7], 0 addr64 offset:16
|
|
; GFX7-SDAG-NEXT: buffer_load_dwordx4 v[4:7], v[0:1], s[4:7], 0 addr64
|
|
; GFX7-SDAG-NEXT: s_waitcnt vmcnt(1)
|
|
; GFX7-SDAG-NEXT: buffer_store_dword v8, v[2:3], s[4:7], 0 addr64 offset:16
|
|
; GFX7-SDAG-NEXT: s_waitcnt vmcnt(1)
|
|
; GFX7-SDAG-NEXT: buffer_store_dwordx4 v[4:7], v[2:3], s[4:7], 0 addr64
|
|
; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX7-SDAG-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX7-GISEL-LABEL: freeze_v5i32:
|
|
; GFX7-GISEL: ; %bb.0:
|
|
; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX7-GISEL-NEXT: s_mov_b32 s6, 0
|
|
; GFX7-GISEL-NEXT: s_mov_b32 s7, 0xf000
|
|
; GFX7-GISEL-NEXT: s_mov_b64 s[4:5], 0
|
|
; GFX7-GISEL-NEXT: buffer_load_dwordx4 v[4:7], v[0:1], s[4:7], 0 addr64
|
|
; GFX7-GISEL-NEXT: buffer_load_dword v0, v[0:1], s[4:7], 0 addr64 offset:16
|
|
; GFX7-GISEL-NEXT: s_waitcnt vmcnt(1)
|
|
; GFX7-GISEL-NEXT: buffer_store_dwordx4 v[4:7], v[2:3], s[4:7], 0 addr64
|
|
; GFX7-GISEL-NEXT: s_waitcnt vmcnt(1)
|
|
; GFX7-GISEL-NEXT: buffer_store_dword v0, v[2:3], s[4:7], 0 addr64 offset:16
|
|
; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX7-GISEL-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX8-GISEL-LABEL: freeze_v5i32:
|
|
; GFX8-GISEL: ; %bb.0:
|
|
; GFX8-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX8-GISEL-NEXT: flat_load_dwordx4 v[4:7], v[0:1]
|
|
; GFX8-GISEL-NEXT: v_add_u32_e32 v0, vcc, 16, v0
|
|
; GFX8-GISEL-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc
|
|
; GFX8-GISEL-NEXT: flat_load_dword v8, v[0:1]
|
|
; GFX8-GISEL-NEXT: v_add_u32_e32 v0, vcc, 16, v2
|
|
; GFX8-GISEL-NEXT: v_addc_u32_e32 v1, vcc, 0, v3, vcc
|
|
; GFX8-GISEL-NEXT: s_waitcnt vmcnt(1)
|
|
; GFX8-GISEL-NEXT: flat_store_dwordx4 v[2:3], v[4:7]
|
|
; GFX8-GISEL-NEXT: s_waitcnt vmcnt(1)
|
|
; GFX8-GISEL-NEXT: flat_store_dword v[0:1], v8
|
|
; GFX8-GISEL-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX8-GISEL-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX9-GISEL-LABEL: freeze_v5i32:
|
|
; GFX9-GISEL: ; %bb.0:
|
|
; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX9-GISEL-NEXT: global_load_dwordx4 v[4:7], v[0:1], off
|
|
; GFX9-GISEL-NEXT: global_load_dword v8, v[0:1], off offset:16
|
|
; GFX9-GISEL-NEXT: s_waitcnt vmcnt(1)
|
|
; GFX9-GISEL-NEXT: global_store_dwordx4 v[2:3], v[4:7], off
|
|
; GFX9-GISEL-NEXT: s_waitcnt vmcnt(1)
|
|
; GFX9-GISEL-NEXT: global_store_dword v[2:3], v8, off offset:16
|
|
; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX10-SDAG-LABEL: freeze_v5i32:
|
|
; GFX10-SDAG: ; %bb.0:
|
|
; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX10-SDAG-NEXT: s_clause 0x1
|
|
; GFX10-SDAG-NEXT: global_load_dword v8, v[0:1], off offset:16
|
|
; GFX10-SDAG-NEXT: global_load_dwordx4 v[4:7], v[0:1], off
|
|
; GFX10-SDAG-NEXT: s_waitcnt vmcnt(1)
|
|
; GFX10-SDAG-NEXT: global_store_dword v[2:3], v8, off offset:16
|
|
; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX10-SDAG-NEXT: global_store_dwordx4 v[2:3], v[4:7], off
|
|
; GFX10-SDAG-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX10-GISEL-LABEL: freeze_v5i32:
|
|
; GFX10-GISEL: ; %bb.0:
|
|
; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX10-GISEL-NEXT: s_clause 0x1
|
|
; GFX10-GISEL-NEXT: global_load_dwordx4 v[4:7], v[0:1], off
|
|
; GFX10-GISEL-NEXT: global_load_dword v8, v[0:1], off offset:16
|
|
; GFX10-GISEL-NEXT: s_waitcnt vmcnt(1)
|
|
; GFX10-GISEL-NEXT: global_store_dwordx4 v[2:3], v[4:7], off
|
|
; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX10-GISEL-NEXT: global_store_dword v[2:3], v8, off offset:16
|
|
; GFX10-GISEL-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX11-SDAG-LABEL: freeze_v5i32:
|
|
; GFX11-SDAG: ; %bb.0:
|
|
; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX11-SDAG-NEXT: s_clause 0x1
|
|
; GFX11-SDAG-NEXT: global_load_b32 v8, v[0:1], off offset:16
|
|
; GFX11-SDAG-NEXT: global_load_b128 v[4:7], v[0:1], off
|
|
; GFX11-SDAG-NEXT: s_waitcnt vmcnt(1)
|
|
; GFX11-SDAG-NEXT: global_store_b32 v[2:3], v8, off offset:16
|
|
; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX11-SDAG-NEXT: global_store_b128 v[2:3], v[4:7], off
|
|
; GFX11-SDAG-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX11-GISEL-LABEL: freeze_v5i32:
|
|
; GFX11-GISEL: ; %bb.0:
|
|
; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX11-GISEL-NEXT: s_clause 0x1
|
|
; GFX11-GISEL-NEXT: global_load_b128 v[4:7], v[0:1], off
|
|
; GFX11-GISEL-NEXT: global_load_b32 v0, v[0:1], off offset:16
|
|
; GFX11-GISEL-NEXT: s_waitcnt vmcnt(1)
|
|
; GFX11-GISEL-NEXT: global_store_b128 v[2:3], v[4:7], off
|
|
; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX11-GISEL-NEXT: global_store_b32 v[2:3], v0, off offset:16
|
|
; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31]
|
|
%a = load <5 x i32>, ptr addrspace(1) %ptra, align 4
|
|
%freeze = freeze <5 x i32> %a
|
|
store <5 x i32> %freeze, ptr addrspace(1) %ptrb, align 4
|
|
ret void
|
|
}
|
|
|
|
define void @freeze_v6i32(ptr addrspace(1) %ptra, ptr addrspace(1) %ptrb) {
|
|
; GFX6-SDAG-LABEL: freeze_v6i32:
|
|
; GFX6-SDAG: ; %bb.0:
|
|
; GFX6-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX6-SDAG-NEXT: s_mov_b32 s6, 0
|
|
; GFX6-SDAG-NEXT: s_mov_b32 s7, 0xf000
|
|
; GFX6-SDAG-NEXT: s_mov_b32 s4, s6
|
|
; GFX6-SDAG-NEXT: s_mov_b32 s5, s6
|
|
; GFX6-SDAG-NEXT: buffer_load_dwordx2 v[8:9], v[0:1], s[4:7], 0 addr64 offset:16
|
|
; GFX6-SDAG-NEXT: buffer_load_dwordx4 v[4:7], v[0:1], s[4:7], 0 addr64
|
|
; GFX6-SDAG-NEXT: s_waitcnt vmcnt(1)
|
|
; GFX6-SDAG-NEXT: buffer_store_dwordx2 v[8:9], v[2:3], s[4:7], 0 addr64 offset:16
|
|
; GFX6-SDAG-NEXT: s_waitcnt vmcnt(1)
|
|
; GFX6-SDAG-NEXT: buffer_store_dwordx4 v[4:7], v[2:3], s[4:7], 0 addr64
|
|
; GFX6-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0)
|
|
; GFX6-SDAG-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX6-GISEL-LABEL: freeze_v6i32:
|
|
; GFX6-GISEL: ; %bb.0:
|
|
; GFX6-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX6-GISEL-NEXT: s_mov_b32 s6, 0
|
|
; GFX6-GISEL-NEXT: s_mov_b32 s7, 0xf000
|
|
; GFX6-GISEL-NEXT: s_mov_b64 s[4:5], 0
|
|
; GFX6-GISEL-NEXT: buffer_load_dwordx4 v[4:7], v[0:1], s[4:7], 0 addr64
|
|
; GFX6-GISEL-NEXT: buffer_load_dwordx2 v[0:1], v[0:1], s[4:7], 0 addr64 offset:16
|
|
; GFX6-GISEL-NEXT: s_waitcnt vmcnt(1)
|
|
; GFX6-GISEL-NEXT: buffer_store_dwordx4 v[4:7], v[2:3], s[4:7], 0 addr64
|
|
; GFX6-GISEL-NEXT: s_waitcnt vmcnt(1)
|
|
; GFX6-GISEL-NEXT: buffer_store_dwordx2 v[0:1], v[2:3], s[4:7], 0 addr64 offset:16
|
|
; GFX6-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0)
|
|
; GFX6-GISEL-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX7-SDAG-LABEL: freeze_v6i32:
|
|
; GFX7-SDAG: ; %bb.0:
|
|
; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX7-SDAG-NEXT: s_mov_b32 s6, 0
|
|
; GFX7-SDAG-NEXT: s_mov_b32 s7, 0xf000
|
|
; GFX7-SDAG-NEXT: s_mov_b32 s4, s6
|
|
; GFX7-SDAG-NEXT: s_mov_b32 s5, s6
|
|
; GFX7-SDAG-NEXT: buffer_load_dwordx2 v[8:9], v[0:1], s[4:7], 0 addr64 offset:16
|
|
; GFX7-SDAG-NEXT: buffer_load_dwordx4 v[4:7], v[0:1], s[4:7], 0 addr64
|
|
; GFX7-SDAG-NEXT: s_waitcnt vmcnt(1)
|
|
; GFX7-SDAG-NEXT: buffer_store_dwordx2 v[8:9], v[2:3], s[4:7], 0 addr64 offset:16
|
|
; GFX7-SDAG-NEXT: s_waitcnt vmcnt(1)
|
|
; GFX7-SDAG-NEXT: buffer_store_dwordx4 v[4:7], v[2:3], s[4:7], 0 addr64
|
|
; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX7-SDAG-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX7-GISEL-LABEL: freeze_v6i32:
|
|
; GFX7-GISEL: ; %bb.0:
|
|
; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX7-GISEL-NEXT: s_mov_b32 s6, 0
|
|
; GFX7-GISEL-NEXT: s_mov_b32 s7, 0xf000
|
|
; GFX7-GISEL-NEXT: s_mov_b64 s[4:5], 0
|
|
; GFX7-GISEL-NEXT: buffer_load_dwordx4 v[4:7], v[0:1], s[4:7], 0 addr64
|
|
; GFX7-GISEL-NEXT: buffer_load_dwordx2 v[0:1], v[0:1], s[4:7], 0 addr64 offset:16
|
|
; GFX7-GISEL-NEXT: s_waitcnt vmcnt(1)
|
|
; GFX7-GISEL-NEXT: buffer_store_dwordx4 v[4:7], v[2:3], s[4:7], 0 addr64
|
|
; GFX7-GISEL-NEXT: s_waitcnt vmcnt(1)
|
|
; GFX7-GISEL-NEXT: buffer_store_dwordx2 v[0:1], v[2:3], s[4:7], 0 addr64 offset:16
|
|
; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX7-GISEL-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX8-GISEL-LABEL: freeze_v6i32:
|
|
; GFX8-GISEL: ; %bb.0:
|
|
; GFX8-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX8-GISEL-NEXT: flat_load_dwordx4 v[4:7], v[0:1]
|
|
; GFX8-GISEL-NEXT: v_add_u32_e32 v0, vcc, 16, v0
|
|
; GFX8-GISEL-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc
|
|
; GFX8-GISEL-NEXT: flat_load_dwordx2 v[0:1], v[0:1]
|
|
; GFX8-GISEL-NEXT: s_waitcnt vmcnt(1)
|
|
; GFX8-GISEL-NEXT: flat_store_dwordx4 v[2:3], v[4:7]
|
|
; GFX8-GISEL-NEXT: v_add_u32_e32 v2, vcc, 16, v2
|
|
; GFX8-GISEL-NEXT: v_addc_u32_e32 v3, vcc, 0, v3, vcc
|
|
; GFX8-GISEL-NEXT: s_waitcnt vmcnt(1)
|
|
; GFX8-GISEL-NEXT: flat_store_dwordx2 v[2:3], v[0:1]
|
|
; GFX8-GISEL-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX8-GISEL-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX9-GISEL-LABEL: freeze_v6i32:
|
|
; GFX9-GISEL: ; %bb.0:
|
|
; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX9-GISEL-NEXT: global_load_dwordx4 v[4:7], v[0:1], off
|
|
; GFX9-GISEL-NEXT: global_load_dwordx2 v[8:9], v[0:1], off offset:16
|
|
; GFX9-GISEL-NEXT: s_waitcnt vmcnt(1)
|
|
; GFX9-GISEL-NEXT: global_store_dwordx4 v[2:3], v[4:7], off
|
|
; GFX9-GISEL-NEXT: s_waitcnt vmcnt(1)
|
|
; GFX9-GISEL-NEXT: global_store_dwordx2 v[2:3], v[8:9], off offset:16
|
|
; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX10-SDAG-LABEL: freeze_v6i32:
|
|
; GFX10-SDAG: ; %bb.0:
|
|
; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX10-SDAG-NEXT: s_clause 0x1
|
|
; GFX10-SDAG-NEXT: global_load_dwordx2 v[8:9], v[0:1], off offset:16
|
|
; GFX10-SDAG-NEXT: global_load_dwordx4 v[4:7], v[0:1], off
|
|
; GFX10-SDAG-NEXT: s_waitcnt vmcnt(1)
|
|
; GFX10-SDAG-NEXT: global_store_dwordx2 v[2:3], v[8:9], off offset:16
|
|
; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX10-SDAG-NEXT: global_store_dwordx4 v[2:3], v[4:7], off
|
|
; GFX10-SDAG-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX10-GISEL-LABEL: freeze_v6i32:
|
|
; GFX10-GISEL: ; %bb.0:
|
|
; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX10-GISEL-NEXT: s_clause 0x1
|
|
; GFX10-GISEL-NEXT: global_load_dwordx4 v[4:7], v[0:1], off
|
|
; GFX10-GISEL-NEXT: global_load_dwordx2 v[8:9], v[0:1], off offset:16
|
|
; GFX10-GISEL-NEXT: s_waitcnt vmcnt(1)
|
|
; GFX10-GISEL-NEXT: global_store_dwordx4 v[2:3], v[4:7], off
|
|
; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX10-GISEL-NEXT: global_store_dwordx2 v[2:3], v[8:9], off offset:16
|
|
; GFX10-GISEL-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX11-SDAG-LABEL: freeze_v6i32:
|
|
; GFX11-SDAG: ; %bb.0:
|
|
; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX11-SDAG-NEXT: s_clause 0x1
|
|
; GFX11-SDAG-NEXT: global_load_b64 v[8:9], v[0:1], off offset:16
|
|
; GFX11-SDAG-NEXT: global_load_b128 v[4:7], v[0:1], off
|
|
; GFX11-SDAG-NEXT: s_waitcnt vmcnt(1)
|
|
; GFX11-SDAG-NEXT: global_store_b64 v[2:3], v[8:9], off offset:16
|
|
; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX11-SDAG-NEXT: global_store_b128 v[2:3], v[4:7], off
|
|
; GFX11-SDAG-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX11-GISEL-LABEL: freeze_v6i32:
|
|
; GFX11-GISEL: ; %bb.0:
|
|
; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX11-GISEL-NEXT: s_clause 0x1
|
|
; GFX11-GISEL-NEXT: global_load_b128 v[4:7], v[0:1], off
|
|
; GFX11-GISEL-NEXT: global_load_b64 v[0:1], v[0:1], off offset:16
|
|
; GFX11-GISEL-NEXT: s_waitcnt vmcnt(1)
|
|
; GFX11-GISEL-NEXT: global_store_b128 v[2:3], v[4:7], off
|
|
; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX11-GISEL-NEXT: global_store_b64 v[2:3], v[0:1], off offset:16
|
|
; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31]
|
|
%a = load <6 x i32>, ptr addrspace(1) %ptra, align 4
|
|
%freeze = freeze <6 x i32> %a
|
|
store <6 x i32> %freeze, ptr addrspace(1) %ptrb, align 4
|
|
ret void
|
|
}
|
|
|
|
define void @freeze_v7i32(ptr addrspace(1) %ptra, ptr addrspace(1) %ptrb) {
|
|
; GFX6-SDAG-LABEL: freeze_v7i32:
|
|
; GFX6-SDAG: ; %bb.0:
|
|
; GFX6-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX6-SDAG-NEXT: s_mov_b32 s6, 0
|
|
; GFX6-SDAG-NEXT: s_mov_b32 s7, 0xf000
|
|
; GFX6-SDAG-NEXT: s_mov_b32 s4, s6
|
|
; GFX6-SDAG-NEXT: s_mov_b32 s5, s6
|
|
; GFX6-SDAG-NEXT: buffer_load_dword v10, v[0:1], s[4:7], 0 addr64 offset:24
|
|
; GFX6-SDAG-NEXT: buffer_load_dwordx2 v[8:9], v[0:1], s[4:7], 0 addr64 offset:16
|
|
; GFX6-SDAG-NEXT: buffer_load_dwordx4 v[4:7], v[0:1], s[4:7], 0 addr64
|
|
; GFX6-SDAG-NEXT: s_waitcnt vmcnt(2)
|
|
; GFX6-SDAG-NEXT: buffer_store_dword v10, v[2:3], s[4:7], 0 addr64 offset:24
|
|
; GFX6-SDAG-NEXT: s_waitcnt vmcnt(2)
|
|
; GFX6-SDAG-NEXT: buffer_store_dwordx2 v[8:9], v[2:3], s[4:7], 0 addr64 offset:16
|
|
; GFX6-SDAG-NEXT: s_waitcnt vmcnt(2)
|
|
; GFX6-SDAG-NEXT: buffer_store_dwordx4 v[4:7], v[2:3], s[4:7], 0 addr64
|
|
; GFX6-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0)
|
|
; GFX6-SDAG-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX6-GISEL-LABEL: freeze_v7i32:
|
|
; GFX6-GISEL: ; %bb.0:
|
|
; GFX6-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX6-GISEL-NEXT: s_mov_b32 s6, 0
|
|
; GFX6-GISEL-NEXT: s_mov_b32 s7, 0xf000
|
|
; GFX6-GISEL-NEXT: s_mov_b64 s[4:5], 0
|
|
; GFX6-GISEL-NEXT: buffer_load_dwordx4 v[4:7], v[0:1], s[4:7], 0 addr64
|
|
; GFX6-GISEL-NEXT: buffer_load_dwordx2 v[8:9], v[0:1], s[4:7], 0 addr64 offset:16
|
|
; GFX6-GISEL-NEXT: buffer_load_dword v0, v[0:1], s[4:7], 0 addr64 offset:24
|
|
; GFX6-GISEL-NEXT: s_waitcnt vmcnt(2)
|
|
; GFX6-GISEL-NEXT: buffer_store_dwordx4 v[4:7], v[2:3], s[4:7], 0 addr64
|
|
; GFX6-GISEL-NEXT: s_waitcnt vmcnt(2)
|
|
; GFX6-GISEL-NEXT: buffer_store_dwordx2 v[8:9], v[2:3], s[4:7], 0 addr64 offset:16
|
|
; GFX6-GISEL-NEXT: s_waitcnt vmcnt(2)
|
|
; GFX6-GISEL-NEXT: buffer_store_dword v0, v[2:3], s[4:7], 0 addr64 offset:24
|
|
; GFX6-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0)
|
|
; GFX6-GISEL-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX7-SDAG-LABEL: freeze_v7i32:
|
|
; GFX7-SDAG: ; %bb.0:
|
|
; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX7-SDAG-NEXT: s_mov_b32 s6, 0
|
|
; GFX7-SDAG-NEXT: s_mov_b32 s7, 0xf000
|
|
; GFX7-SDAG-NEXT: s_mov_b32 s4, s6
|
|
; GFX7-SDAG-NEXT: s_mov_b32 s5, s6
|
|
; GFX7-SDAG-NEXT: buffer_load_dwordx3 v[8:10], v[0:1], s[4:7], 0 addr64 offset:16
|
|
; GFX7-SDAG-NEXT: buffer_load_dwordx4 v[4:7], v[0:1], s[4:7], 0 addr64
|
|
; GFX7-SDAG-NEXT: s_waitcnt vmcnt(1)
|
|
; GFX7-SDAG-NEXT: buffer_store_dwordx3 v[8:10], v[2:3], s[4:7], 0 addr64 offset:16
|
|
; GFX7-SDAG-NEXT: s_waitcnt vmcnt(1)
|
|
; GFX7-SDAG-NEXT: buffer_store_dwordx4 v[4:7], v[2:3], s[4:7], 0 addr64
|
|
; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX7-SDAG-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX7-GISEL-LABEL: freeze_v7i32:
|
|
; GFX7-GISEL: ; %bb.0:
|
|
; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX7-GISEL-NEXT: s_mov_b32 s6, 0
|
|
; GFX7-GISEL-NEXT: s_mov_b32 s7, 0xf000
|
|
; GFX7-GISEL-NEXT: s_mov_b64 s[4:5], 0
|
|
; GFX7-GISEL-NEXT: buffer_load_dwordx4 v[4:7], v[0:1], s[4:7], 0 addr64
|
|
; GFX7-GISEL-NEXT: buffer_load_dwordx3 v[8:10], v[0:1], s[4:7], 0 addr64 offset:16
|
|
; GFX7-GISEL-NEXT: s_waitcnt vmcnt(1)
|
|
; GFX7-GISEL-NEXT: buffer_store_dwordx4 v[4:7], v[2:3], s[4:7], 0 addr64
|
|
; GFX7-GISEL-NEXT: s_waitcnt vmcnt(1)
|
|
; GFX7-GISEL-NEXT: buffer_store_dwordx3 v[8:10], v[2:3], s[4:7], 0 addr64 offset:16
|
|
; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX7-GISEL-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX8-GISEL-LABEL: freeze_v7i32:
|
|
; GFX8-GISEL: ; %bb.0:
|
|
; GFX8-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX8-GISEL-NEXT: flat_load_dwordx4 v[4:7], v[0:1]
|
|
; GFX8-GISEL-NEXT: v_add_u32_e32 v0, vcc, 16, v0
|
|
; GFX8-GISEL-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc
|
|
; GFX8-GISEL-NEXT: flat_load_dwordx3 v[8:10], v[0:1]
|
|
; GFX8-GISEL-NEXT: v_add_u32_e32 v0, vcc, 16, v2
|
|
; GFX8-GISEL-NEXT: v_addc_u32_e32 v1, vcc, 0, v3, vcc
|
|
; GFX8-GISEL-NEXT: s_waitcnt vmcnt(1)
|
|
; GFX8-GISEL-NEXT: flat_store_dwordx4 v[2:3], v[4:7]
|
|
; GFX8-GISEL-NEXT: s_waitcnt vmcnt(1)
|
|
; GFX8-GISEL-NEXT: flat_store_dwordx3 v[0:1], v[8:10]
|
|
; GFX8-GISEL-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX8-GISEL-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX9-GISEL-LABEL: freeze_v7i32:
|
|
; GFX9-GISEL: ; %bb.0:
|
|
; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX9-GISEL-NEXT: global_load_dwordx4 v[4:7], v[0:1], off
|
|
; GFX9-GISEL-NEXT: global_load_dwordx3 v[8:10], v[0:1], off offset:16
|
|
; GFX9-GISEL-NEXT: s_waitcnt vmcnt(1)
|
|
; GFX9-GISEL-NEXT: global_store_dwordx4 v[2:3], v[4:7], off
|
|
; GFX9-GISEL-NEXT: s_waitcnt vmcnt(1)
|
|
; GFX9-GISEL-NEXT: global_store_dwordx3 v[2:3], v[8:10], off offset:16
|
|
; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX10-SDAG-LABEL: freeze_v7i32:
|
|
; GFX10-SDAG: ; %bb.0:
|
|
; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX10-SDAG-NEXT: s_clause 0x1
|
|
; GFX10-SDAG-NEXT: global_load_dwordx3 v[8:10], v[0:1], off offset:16
|
|
; GFX10-SDAG-NEXT: global_load_dwordx4 v[4:7], v[0:1], off
|
|
; GFX10-SDAG-NEXT: s_waitcnt vmcnt(1)
|
|
; GFX10-SDAG-NEXT: global_store_dwordx3 v[2:3], v[8:10], off offset:16
|
|
; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX10-SDAG-NEXT: global_store_dwordx4 v[2:3], v[4:7], off
|
|
; GFX10-SDAG-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX10-GISEL-LABEL: freeze_v7i32:
|
|
; GFX10-GISEL: ; %bb.0:
|
|
; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX10-GISEL-NEXT: s_clause 0x1
|
|
; GFX10-GISEL-NEXT: global_load_dwordx4 v[4:7], v[0:1], off
|
|
; GFX10-GISEL-NEXT: global_load_dwordx3 v[8:10], v[0:1], off offset:16
|
|
; GFX10-GISEL-NEXT: s_waitcnt vmcnt(1)
|
|
; GFX10-GISEL-NEXT: global_store_dwordx4 v[2:3], v[4:7], off
|
|
; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX10-GISEL-NEXT: global_store_dwordx3 v[2:3], v[8:10], off offset:16
|
|
; GFX10-GISEL-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX11-SDAG-LABEL: freeze_v7i32:
|
|
; GFX11-SDAG: ; %bb.0:
|
|
; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX11-SDAG-NEXT: s_clause 0x1
|
|
; GFX11-SDAG-NEXT: global_load_b96 v[8:10], v[0:1], off offset:16
|
|
; GFX11-SDAG-NEXT: global_load_b128 v[4:7], v[0:1], off
|
|
; GFX11-SDAG-NEXT: s_waitcnt vmcnt(1)
|
|
; GFX11-SDAG-NEXT: global_store_b96 v[2:3], v[8:10], off offset:16
|
|
; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX11-SDAG-NEXT: global_store_b128 v[2:3], v[4:7], off
|
|
; GFX11-SDAG-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX11-GISEL-LABEL: freeze_v7i32:
|
|
; GFX11-GISEL: ; %bb.0:
|
|
; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX11-GISEL-NEXT: s_clause 0x1
|
|
; GFX11-GISEL-NEXT: global_load_b128 v[4:7], v[0:1], off
|
|
; GFX11-GISEL-NEXT: global_load_b96 v[8:10], v[0:1], off offset:16
|
|
; GFX11-GISEL-NEXT: s_waitcnt vmcnt(1)
|
|
; GFX11-GISEL-NEXT: global_store_b128 v[2:3], v[4:7], off
|
|
; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX11-GISEL-NEXT: global_store_b96 v[2:3], v[8:10], off offset:16
|
|
; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31]
|
|
%a = load <7 x i32>, ptr addrspace(1) %ptra, align 4
|
|
%freeze = freeze <7 x i32> %a
|
|
store <7 x i32> %freeze, ptr addrspace(1) %ptrb, align 4
|
|
ret void
|
|
}
|
|
|
|
define void @freeze_v8i32(ptr addrspace(1) %ptra, ptr addrspace(1) %ptrb) {
|
|
; GFX6-SDAG-LABEL: freeze_v8i32:
|
|
; GFX6-SDAG: ; %bb.0:
|
|
; GFX6-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX6-SDAG-NEXT: s_mov_b32 s6, 0
|
|
; GFX6-SDAG-NEXT: s_mov_b32 s7, 0xf000
|
|
; GFX6-SDAG-NEXT: s_mov_b32 s4, s6
|
|
; GFX6-SDAG-NEXT: s_mov_b32 s5, s6
|
|
; GFX6-SDAG-NEXT: buffer_load_dwordx4 v[4:7], v[0:1], s[4:7], 0 addr64 offset:16
|
|
; GFX6-SDAG-NEXT: buffer_load_dwordx4 v[8:11], v[0:1], s[4:7], 0 addr64
|
|
; GFX6-SDAG-NEXT: s_waitcnt vmcnt(1)
|
|
; GFX6-SDAG-NEXT: buffer_store_dwordx4 v[4:7], v[2:3], s[4:7], 0 addr64 offset:16
|
|
; GFX6-SDAG-NEXT: s_waitcnt vmcnt(1)
|
|
; GFX6-SDAG-NEXT: buffer_store_dwordx4 v[8:11], v[2:3], s[4:7], 0 addr64
|
|
; GFX6-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0)
|
|
; GFX6-SDAG-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX6-GISEL-LABEL: freeze_v8i32:
|
|
; GFX6-GISEL: ; %bb.0:
|
|
; GFX6-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX6-GISEL-NEXT: s_mov_b32 s6, 0
|
|
; GFX6-GISEL-NEXT: s_mov_b32 s7, 0xf000
|
|
; GFX6-GISEL-NEXT: s_mov_b64 s[4:5], 0
|
|
; GFX6-GISEL-NEXT: buffer_load_dwordx4 v[4:7], v[0:1], s[4:7], 0 addr64
|
|
; GFX6-GISEL-NEXT: buffer_load_dwordx4 v[8:11], v[0:1], s[4:7], 0 addr64 offset:16
|
|
; GFX6-GISEL-NEXT: s_waitcnt vmcnt(1)
|
|
; GFX6-GISEL-NEXT: buffer_store_dwordx4 v[4:7], v[2:3], s[4:7], 0 addr64
|
|
; GFX6-GISEL-NEXT: s_waitcnt vmcnt(1)
|
|
; GFX6-GISEL-NEXT: buffer_store_dwordx4 v[8:11], v[2:3], s[4:7], 0 addr64 offset:16
|
|
; GFX6-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0)
|
|
; GFX6-GISEL-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX7-SDAG-LABEL: freeze_v8i32:
|
|
; GFX7-SDAG: ; %bb.0:
|
|
; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX7-SDAG-NEXT: s_mov_b32 s6, 0
|
|
; GFX7-SDAG-NEXT: s_mov_b32 s7, 0xf000
|
|
; GFX7-SDAG-NEXT: s_mov_b32 s4, s6
|
|
; GFX7-SDAG-NEXT: s_mov_b32 s5, s6
|
|
; GFX7-SDAG-NEXT: buffer_load_dwordx4 v[4:7], v[0:1], s[4:7], 0 addr64 offset:16
|
|
; GFX7-SDAG-NEXT: buffer_load_dwordx4 v[8:11], v[0:1], s[4:7], 0 addr64
|
|
; GFX7-SDAG-NEXT: s_waitcnt vmcnt(1)
|
|
; GFX7-SDAG-NEXT: buffer_store_dwordx4 v[4:7], v[2:3], s[4:7], 0 addr64 offset:16
|
|
; GFX7-SDAG-NEXT: s_waitcnt vmcnt(1)
|
|
; GFX7-SDAG-NEXT: buffer_store_dwordx4 v[8:11], v[2:3], s[4:7], 0 addr64
|
|
; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX7-SDAG-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX7-GISEL-LABEL: freeze_v8i32:
|
|
; GFX7-GISEL: ; %bb.0:
|
|
; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX7-GISEL-NEXT: s_mov_b32 s6, 0
|
|
; GFX7-GISEL-NEXT: s_mov_b32 s7, 0xf000
|
|
; GFX7-GISEL-NEXT: s_mov_b64 s[4:5], 0
|
|
; GFX7-GISEL-NEXT: buffer_load_dwordx4 v[4:7], v[0:1], s[4:7], 0 addr64
|
|
; GFX7-GISEL-NEXT: buffer_load_dwordx4 v[8:11], v[0:1], s[4:7], 0 addr64 offset:16
|
|
; GFX7-GISEL-NEXT: s_waitcnt vmcnt(1)
|
|
; GFX7-GISEL-NEXT: buffer_store_dwordx4 v[4:7], v[2:3], s[4:7], 0 addr64
|
|
; GFX7-GISEL-NEXT: s_waitcnt vmcnt(1)
|
|
; GFX7-GISEL-NEXT: buffer_store_dwordx4 v[8:11], v[2:3], s[4:7], 0 addr64 offset:16
|
|
; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX7-GISEL-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX8-GISEL-LABEL: freeze_v8i32:
|
|
; GFX8-GISEL: ; %bb.0:
|
|
; GFX8-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX8-GISEL-NEXT: flat_load_dwordx4 v[4:7], v[0:1]
|
|
; GFX8-GISEL-NEXT: v_add_u32_e32 v0, vcc, 16, v0
|
|
; GFX8-GISEL-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc
|
|
; GFX8-GISEL-NEXT: flat_load_dwordx4 v[8:11], v[0:1]
|
|
; GFX8-GISEL-NEXT: v_add_u32_e32 v0, vcc, 16, v2
|
|
; GFX8-GISEL-NEXT: v_addc_u32_e32 v1, vcc, 0, v3, vcc
|
|
; GFX8-GISEL-NEXT: s_waitcnt vmcnt(1)
|
|
; GFX8-GISEL-NEXT: flat_store_dwordx4 v[2:3], v[4:7]
|
|
; GFX8-GISEL-NEXT: s_waitcnt vmcnt(1)
|
|
; GFX8-GISEL-NEXT: flat_store_dwordx4 v[0:1], v[8:11]
|
|
; GFX8-GISEL-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX8-GISEL-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX9-GISEL-LABEL: freeze_v8i32:
|
|
; GFX9-GISEL: ; %bb.0:
|
|
; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX9-GISEL-NEXT: global_load_dwordx4 v[4:7], v[0:1], off
|
|
; GFX9-GISEL-NEXT: global_load_dwordx4 v[8:11], v[0:1], off offset:16
|
|
; GFX9-GISEL-NEXT: s_waitcnt vmcnt(1)
|
|
; GFX9-GISEL-NEXT: global_store_dwordx4 v[2:3], v[4:7], off
|
|
; GFX9-GISEL-NEXT: s_waitcnt vmcnt(1)
|
|
; GFX9-GISEL-NEXT: global_store_dwordx4 v[2:3], v[8:11], off offset:16
|
|
; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX10-SDAG-LABEL: freeze_v8i32:
|
|
; GFX10-SDAG: ; %bb.0:
|
|
; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX10-SDAG-NEXT: s_clause 0x1
|
|
; GFX10-SDAG-NEXT: global_load_dwordx4 v[4:7], v[0:1], off offset:16
|
|
; GFX10-SDAG-NEXT: global_load_dwordx4 v[8:11], v[0:1], off
|
|
; GFX10-SDAG-NEXT: s_waitcnt vmcnt(1)
|
|
; GFX10-SDAG-NEXT: global_store_dwordx4 v[2:3], v[4:7], off offset:16
|
|
; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX10-SDAG-NEXT: global_store_dwordx4 v[2:3], v[8:11], off
|
|
; GFX10-SDAG-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX10-GISEL-LABEL: freeze_v8i32:
|
|
; GFX10-GISEL: ; %bb.0:
|
|
; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX10-GISEL-NEXT: s_clause 0x1
|
|
; GFX10-GISEL-NEXT: global_load_dwordx4 v[4:7], v[0:1], off
|
|
; GFX10-GISEL-NEXT: global_load_dwordx4 v[8:11], v[0:1], off offset:16
|
|
; GFX10-GISEL-NEXT: s_waitcnt vmcnt(1)
|
|
; GFX10-GISEL-NEXT: global_store_dwordx4 v[2:3], v[4:7], off
|
|
; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX10-GISEL-NEXT: global_store_dwordx4 v[2:3], v[8:11], off offset:16
|
|
; GFX10-GISEL-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX11-SDAG-LABEL: freeze_v8i32:
|
|
; GFX11-SDAG: ; %bb.0:
|
|
; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX11-SDAG-NEXT: s_clause 0x1
|
|
; GFX11-SDAG-NEXT: global_load_b128 v[4:7], v[0:1], off offset:16
|
|
; GFX11-SDAG-NEXT: global_load_b128 v[8:11], v[0:1], off
|
|
; GFX11-SDAG-NEXT: s_waitcnt vmcnt(1)
|
|
; GFX11-SDAG-NEXT: global_store_b128 v[2:3], v[4:7], off offset:16
|
|
; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX11-SDAG-NEXT: global_store_b128 v[2:3], v[8:11], off
|
|
; GFX11-SDAG-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX11-GISEL-LABEL: freeze_v8i32:
|
|
; GFX11-GISEL: ; %bb.0:
|
|
; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX11-GISEL-NEXT: s_clause 0x1
|
|
; GFX11-GISEL-NEXT: global_load_b128 v[4:7], v[0:1], off
|
|
; GFX11-GISEL-NEXT: global_load_b128 v[8:11], v[0:1], off offset:16
|
|
; GFX11-GISEL-NEXT: s_waitcnt vmcnt(1)
|
|
; GFX11-GISEL-NEXT: global_store_b128 v[2:3], v[4:7], off
|
|
; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX11-GISEL-NEXT: global_store_b128 v[2:3], v[8:11], off offset:16
|
|
; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31]
|
|
%a = load <8 x i32>, ptr addrspace(1) %ptra, align 4
|
|
%freeze = freeze <8 x i32> %a
|
|
store <8 x i32> %freeze, ptr addrspace(1) %ptrb, align 4
|
|
ret void
|
|
}
|
|
|
|
define void @freeze_v9i32(ptr addrspace(1) %ptra, ptr addrspace(1) %ptrb) {
|
|
; GFX6-SDAG-LABEL: freeze_v9i32:
|
|
; GFX6-SDAG: ; %bb.0:
|
|
; GFX6-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX6-SDAG-NEXT: s_mov_b32 s6, 0
|
|
; GFX6-SDAG-NEXT: s_mov_b32 s7, 0xf000
|
|
; GFX6-SDAG-NEXT: s_mov_b32 s4, s6
|
|
; GFX6-SDAG-NEXT: s_mov_b32 s5, s6
|
|
; GFX6-SDAG-NEXT: buffer_load_dword v12, v[0:1], s[4:7], 0 addr64 offset:32
|
|
; GFX6-SDAG-NEXT: buffer_load_dwordx4 v[4:7], v[0:1], s[4:7], 0 addr64
|
|
; GFX6-SDAG-NEXT: buffer_load_dwordx4 v[8:11], v[0:1], s[4:7], 0 addr64 offset:16
|
|
; GFX6-SDAG-NEXT: s_waitcnt vmcnt(2)
|
|
; GFX6-SDAG-NEXT: buffer_store_dword v12, v[2:3], s[4:7], 0 addr64 offset:32
|
|
; GFX6-SDAG-NEXT: s_waitcnt vmcnt(2)
|
|
; GFX6-SDAG-NEXT: buffer_store_dwordx4 v[4:7], v[2:3], s[4:7], 0 addr64
|
|
; GFX6-SDAG-NEXT: s_waitcnt vmcnt(2)
|
|
; GFX6-SDAG-NEXT: buffer_store_dwordx4 v[8:11], v[2:3], s[4:7], 0 addr64 offset:16
|
|
; GFX6-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0)
|
|
; GFX6-SDAG-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX6-GISEL-LABEL: freeze_v9i32:
|
|
; GFX6-GISEL: ; %bb.0:
|
|
; GFX6-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX6-GISEL-NEXT: s_mov_b32 s6, 0
|
|
; GFX6-GISEL-NEXT: s_mov_b32 s7, 0xf000
|
|
; GFX6-GISEL-NEXT: s_mov_b64 s[4:5], 0
|
|
; GFX6-GISEL-NEXT: buffer_load_dwordx4 v[4:7], v[0:1], s[4:7], 0 addr64
|
|
; GFX6-GISEL-NEXT: buffer_load_dwordx4 v[8:11], v[0:1], s[4:7], 0 addr64 offset:16
|
|
; GFX6-GISEL-NEXT: buffer_load_dword v0, v[0:1], s[4:7], 0 addr64 offset:32
|
|
; GFX6-GISEL-NEXT: s_waitcnt vmcnt(2)
|
|
; GFX6-GISEL-NEXT: buffer_store_dwordx4 v[4:7], v[2:3], s[4:7], 0 addr64
|
|
; GFX6-GISEL-NEXT: s_waitcnt vmcnt(2)
|
|
; GFX6-GISEL-NEXT: buffer_store_dwordx4 v[8:11], v[2:3], s[4:7], 0 addr64 offset:16
|
|
; GFX6-GISEL-NEXT: s_waitcnt vmcnt(2)
|
|
; GFX6-GISEL-NEXT: buffer_store_dword v0, v[2:3], s[4:7], 0 addr64 offset:32
|
|
; GFX6-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0)
|
|
; GFX6-GISEL-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX7-SDAG-LABEL: freeze_v9i32:
|
|
; GFX7-SDAG: ; %bb.0:
|
|
; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX7-SDAG-NEXT: s_mov_b32 s6, 0
|
|
; GFX7-SDAG-NEXT: s_mov_b32 s7, 0xf000
|
|
; GFX7-SDAG-NEXT: s_mov_b32 s4, s6
|
|
; GFX7-SDAG-NEXT: s_mov_b32 s5, s6
|
|
; GFX7-SDAG-NEXT: buffer_load_dword v12, v[0:1], s[4:7], 0 addr64 offset:32
|
|
; GFX7-SDAG-NEXT: buffer_load_dwordx4 v[4:7], v[0:1], s[4:7], 0 addr64
|
|
; GFX7-SDAG-NEXT: buffer_load_dwordx4 v[8:11], v[0:1], s[4:7], 0 addr64 offset:16
|
|
; GFX7-SDAG-NEXT: s_waitcnt vmcnt(2)
|
|
; GFX7-SDAG-NEXT: buffer_store_dword v12, v[2:3], s[4:7], 0 addr64 offset:32
|
|
; GFX7-SDAG-NEXT: s_waitcnt vmcnt(2)
|
|
; GFX7-SDAG-NEXT: buffer_store_dwordx4 v[4:7], v[2:3], s[4:7], 0 addr64
|
|
; GFX7-SDAG-NEXT: s_waitcnt vmcnt(2)
|
|
; GFX7-SDAG-NEXT: buffer_store_dwordx4 v[8:11], v[2:3], s[4:7], 0 addr64 offset:16
|
|
; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX7-SDAG-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX7-GISEL-LABEL: freeze_v9i32:
|
|
; GFX7-GISEL: ; %bb.0:
|
|
; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX7-GISEL-NEXT: s_mov_b32 s6, 0
|
|
; GFX7-GISEL-NEXT: s_mov_b32 s7, 0xf000
|
|
; GFX7-GISEL-NEXT: s_mov_b64 s[4:5], 0
|
|
; GFX7-GISEL-NEXT: buffer_load_dwordx4 v[4:7], v[0:1], s[4:7], 0 addr64
|
|
; GFX7-GISEL-NEXT: buffer_load_dwordx4 v[8:11], v[0:1], s[4:7], 0 addr64 offset:16
|
|
; GFX7-GISEL-NEXT: buffer_load_dword v0, v[0:1], s[4:7], 0 addr64 offset:32
|
|
; GFX7-GISEL-NEXT: s_waitcnt vmcnt(2)
|
|
; GFX7-GISEL-NEXT: buffer_store_dwordx4 v[4:7], v[2:3], s[4:7], 0 addr64
|
|
; GFX7-GISEL-NEXT: s_waitcnt vmcnt(2)
|
|
; GFX7-GISEL-NEXT: buffer_store_dwordx4 v[8:11], v[2:3], s[4:7], 0 addr64 offset:16
|
|
; GFX7-GISEL-NEXT: s_waitcnt vmcnt(2)
|
|
; GFX7-GISEL-NEXT: buffer_store_dword v0, v[2:3], s[4:7], 0 addr64 offset:32
|
|
; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX7-GISEL-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX8-GISEL-LABEL: freeze_v9i32:
|
|
; GFX8-GISEL: ; %bb.0:
|
|
; GFX8-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX8-GISEL-NEXT: v_add_u32_e32 v8, vcc, 16, v0
|
|
; GFX8-GISEL-NEXT: v_addc_u32_e32 v9, vcc, 0, v1, vcc
|
|
; GFX8-GISEL-NEXT: flat_load_dwordx4 v[4:7], v[0:1]
|
|
; GFX8-GISEL-NEXT: flat_load_dwordx4 v[8:11], v[8:9]
|
|
; GFX8-GISEL-NEXT: v_add_u32_e32 v0, vcc, 32, v0
|
|
; GFX8-GISEL-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc
|
|
; GFX8-GISEL-NEXT: flat_load_dword v14, v[0:1]
|
|
; GFX8-GISEL-NEXT: v_add_u32_e32 v0, vcc, 16, v2
|
|
; GFX8-GISEL-NEXT: v_addc_u32_e32 v1, vcc, 0, v3, vcc
|
|
; GFX8-GISEL-NEXT: v_add_u32_e32 v12, vcc, 32, v2
|
|
; GFX8-GISEL-NEXT: v_addc_u32_e32 v13, vcc, 0, v3, vcc
|
|
; GFX8-GISEL-NEXT: s_waitcnt vmcnt(2)
|
|
; GFX8-GISEL-NEXT: flat_store_dwordx4 v[2:3], v[4:7]
|
|
; GFX8-GISEL-NEXT: s_waitcnt vmcnt(2)
|
|
; GFX8-GISEL-NEXT: flat_store_dwordx4 v[0:1], v[8:11]
|
|
; GFX8-GISEL-NEXT: s_waitcnt vmcnt(2)
|
|
; GFX8-GISEL-NEXT: flat_store_dword v[12:13], v14
|
|
; GFX8-GISEL-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX8-GISEL-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX9-GISEL-LABEL: freeze_v9i32:
|
|
; GFX9-GISEL: ; %bb.0:
|
|
; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX9-GISEL-NEXT: global_load_dwordx4 v[4:7], v[0:1], off
|
|
; GFX9-GISEL-NEXT: global_load_dwordx4 v[8:11], v[0:1], off offset:16
|
|
; GFX9-GISEL-NEXT: global_load_dword v12, v[0:1], off offset:32
|
|
; GFX9-GISEL-NEXT: s_waitcnt vmcnt(2)
|
|
; GFX9-GISEL-NEXT: global_store_dwordx4 v[2:3], v[4:7], off
|
|
; GFX9-GISEL-NEXT: s_waitcnt vmcnt(2)
|
|
; GFX9-GISEL-NEXT: global_store_dwordx4 v[2:3], v[8:11], off offset:16
|
|
; GFX9-GISEL-NEXT: s_waitcnt vmcnt(2)
|
|
; GFX9-GISEL-NEXT: global_store_dword v[2:3], v12, off offset:32
|
|
; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX10-SDAG-LABEL: freeze_v9i32:
|
|
; GFX10-SDAG: ; %bb.0:
|
|
; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX10-SDAG-NEXT: s_clause 0x2
|
|
; GFX10-SDAG-NEXT: global_load_dword v12, v[0:1], off offset:32
|
|
; GFX10-SDAG-NEXT: global_load_dwordx4 v[4:7], v[0:1], off
|
|
; GFX10-SDAG-NEXT: global_load_dwordx4 v[8:11], v[0:1], off offset:16
|
|
; GFX10-SDAG-NEXT: s_waitcnt vmcnt(2)
|
|
; GFX10-SDAG-NEXT: global_store_dword v[2:3], v12, off offset:32
|
|
; GFX10-SDAG-NEXT: s_waitcnt vmcnt(1)
|
|
; GFX10-SDAG-NEXT: global_store_dwordx4 v[2:3], v[4:7], off
|
|
; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX10-SDAG-NEXT: global_store_dwordx4 v[2:3], v[8:11], off offset:16
|
|
; GFX10-SDAG-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX10-GISEL-LABEL: freeze_v9i32:
|
|
; GFX10-GISEL: ; %bb.0:
|
|
; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX10-GISEL-NEXT: s_clause 0x2
|
|
; GFX10-GISEL-NEXT: global_load_dwordx4 v[4:7], v[0:1], off
|
|
; GFX10-GISEL-NEXT: global_load_dwordx4 v[8:11], v[0:1], off offset:16
|
|
; GFX10-GISEL-NEXT: global_load_dword v12, v[0:1], off offset:32
|
|
; GFX10-GISEL-NEXT: s_waitcnt vmcnt(2)
|
|
; GFX10-GISEL-NEXT: global_store_dwordx4 v[2:3], v[4:7], off
|
|
; GFX10-GISEL-NEXT: s_waitcnt vmcnt(1)
|
|
; GFX10-GISEL-NEXT: global_store_dwordx4 v[2:3], v[8:11], off offset:16
|
|
; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX10-GISEL-NEXT: global_store_dword v[2:3], v12, off offset:32
|
|
; GFX10-GISEL-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX11-SDAG-LABEL: freeze_v9i32:
|
|
; GFX11-SDAG: ; %bb.0:
|
|
; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX11-SDAG-NEXT: s_clause 0x2
|
|
; GFX11-SDAG-NEXT: global_load_b32 v12, v[0:1], off offset:32
|
|
; GFX11-SDAG-NEXT: global_load_b128 v[4:7], v[0:1], off
|
|
; GFX11-SDAG-NEXT: global_load_b128 v[8:11], v[0:1], off offset:16
|
|
; GFX11-SDAG-NEXT: s_waitcnt vmcnt(2)
|
|
; GFX11-SDAG-NEXT: global_store_b32 v[2:3], v12, off offset:32
|
|
; GFX11-SDAG-NEXT: s_waitcnt vmcnt(1)
|
|
; GFX11-SDAG-NEXT: global_store_b128 v[2:3], v[4:7], off
|
|
; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX11-SDAG-NEXT: global_store_b128 v[2:3], v[8:11], off offset:16
|
|
; GFX11-SDAG-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX11-GISEL-LABEL: freeze_v9i32:
|
|
; GFX11-GISEL: ; %bb.0:
|
|
; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX11-GISEL-NEXT: s_clause 0x2
|
|
; GFX11-GISEL-NEXT: global_load_b128 v[4:7], v[0:1], off
|
|
; GFX11-GISEL-NEXT: global_load_b128 v[8:11], v[0:1], off offset:16
|
|
; GFX11-GISEL-NEXT: global_load_b32 v0, v[0:1], off offset:32
|
|
; GFX11-GISEL-NEXT: s_waitcnt vmcnt(2)
|
|
; GFX11-GISEL-NEXT: global_store_b128 v[2:3], v[4:7], off
|
|
; GFX11-GISEL-NEXT: s_waitcnt vmcnt(1)
|
|
; GFX11-GISEL-NEXT: global_store_b128 v[2:3], v[8:11], off offset:16
|
|
; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX11-GISEL-NEXT: global_store_b32 v[2:3], v0, off offset:32
|
|
; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31]
|
|
%a = load <9 x i32>, ptr addrspace(1) %ptra, align 4
|
|
%freeze = freeze <9 x i32> %a
|
|
store <9 x i32> %freeze, ptr addrspace(1) %ptrb, align 4
|
|
ret void
|
|
}
|
|
|
|
define void @freeze_v10i32(ptr addrspace(1) %ptra, ptr addrspace(1) %ptrb) {
|
|
; GFX6-SDAG-LABEL: freeze_v10i32:
|
|
; GFX6-SDAG: ; %bb.0:
|
|
; GFX6-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX6-SDAG-NEXT: s_mov_b32 s6, 0
|
|
; GFX6-SDAG-NEXT: s_mov_b32 s7, 0xf000
|
|
; GFX6-SDAG-NEXT: s_mov_b32 s4, s6
|
|
; GFX6-SDAG-NEXT: s_mov_b32 s5, s6
|
|
; GFX6-SDAG-NEXT: buffer_load_dwordx4 v[4:7], v[0:1], s[4:7], 0 addr64
|
|
; GFX6-SDAG-NEXT: buffer_load_dwordx4 v[8:11], v[0:1], s[4:7], 0 addr64 offset:16
|
|
; GFX6-SDAG-NEXT: buffer_load_dwordx2 v[0:1], v[0:1], s[4:7], 0 addr64 offset:32
|
|
; GFX6-SDAG-NEXT: s_waitcnt vmcnt(2)
|
|
; GFX6-SDAG-NEXT: buffer_store_dwordx4 v[4:7], v[2:3], s[4:7], 0 addr64
|
|
; GFX6-SDAG-NEXT: s_waitcnt vmcnt(2)
|
|
; GFX6-SDAG-NEXT: buffer_store_dwordx4 v[8:11], v[2:3], s[4:7], 0 addr64 offset:16
|
|
; GFX6-SDAG-NEXT: s_waitcnt vmcnt(2)
|
|
; GFX6-SDAG-NEXT: buffer_store_dwordx2 v[0:1], v[2:3], s[4:7], 0 addr64 offset:32
|
|
; GFX6-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0)
|
|
; GFX6-SDAG-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX6-GISEL-LABEL: freeze_v10i32:
|
|
; GFX6-GISEL: ; %bb.0:
|
|
; GFX6-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX6-GISEL-NEXT: s_mov_b32 s6, 0
|
|
; GFX6-GISEL-NEXT: s_mov_b32 s7, 0xf000
|
|
; GFX6-GISEL-NEXT: s_mov_b64 s[4:5], 0
|
|
; GFX6-GISEL-NEXT: buffer_load_dwordx4 v[4:7], v[0:1], s[4:7], 0 addr64
|
|
; GFX6-GISEL-NEXT: buffer_load_dwordx4 v[8:11], v[0:1], s[4:7], 0 addr64 offset:16
|
|
; GFX6-GISEL-NEXT: buffer_load_dwordx2 v[0:1], v[0:1], s[4:7], 0 addr64 offset:32
|
|
; GFX6-GISEL-NEXT: s_waitcnt vmcnt(2)
|
|
; GFX6-GISEL-NEXT: buffer_store_dwordx4 v[4:7], v[2:3], s[4:7], 0 addr64
|
|
; GFX6-GISEL-NEXT: s_waitcnt vmcnt(2)
|
|
; GFX6-GISEL-NEXT: buffer_store_dwordx4 v[8:11], v[2:3], s[4:7], 0 addr64 offset:16
|
|
; GFX6-GISEL-NEXT: s_waitcnt vmcnt(2)
|
|
; GFX6-GISEL-NEXT: buffer_store_dwordx2 v[0:1], v[2:3], s[4:7], 0 addr64 offset:32
|
|
; GFX6-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0)
|
|
; GFX6-GISEL-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX7-SDAG-LABEL: freeze_v10i32:
|
|
; GFX7-SDAG: ; %bb.0:
|
|
; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX7-SDAG-NEXT: s_mov_b32 s6, 0
|
|
; GFX7-SDAG-NEXT: s_mov_b32 s7, 0xf000
|
|
; GFX7-SDAG-NEXT: s_mov_b32 s4, s6
|
|
; GFX7-SDAG-NEXT: s_mov_b32 s5, s6
|
|
; GFX7-SDAG-NEXT: buffer_load_dwordx4 v[4:7], v[0:1], s[4:7], 0 addr64
|
|
; GFX7-SDAG-NEXT: buffer_load_dwordx4 v[8:11], v[0:1], s[4:7], 0 addr64 offset:16
|
|
; GFX7-SDAG-NEXT: buffer_load_dwordx2 v[0:1], v[0:1], s[4:7], 0 addr64 offset:32
|
|
; GFX7-SDAG-NEXT: s_waitcnt vmcnt(2)
|
|
; GFX7-SDAG-NEXT: buffer_store_dwordx4 v[4:7], v[2:3], s[4:7], 0 addr64
|
|
; GFX7-SDAG-NEXT: s_waitcnt vmcnt(2)
|
|
; GFX7-SDAG-NEXT: buffer_store_dwordx4 v[8:11], v[2:3], s[4:7], 0 addr64 offset:16
|
|
; GFX7-SDAG-NEXT: s_waitcnt vmcnt(2)
|
|
; GFX7-SDAG-NEXT: buffer_store_dwordx2 v[0:1], v[2:3], s[4:7], 0 addr64 offset:32
|
|
; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX7-SDAG-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX7-GISEL-LABEL: freeze_v10i32:
|
|
; GFX7-GISEL: ; %bb.0:
|
|
; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX7-GISEL-NEXT: s_mov_b32 s6, 0
|
|
; GFX7-GISEL-NEXT: s_mov_b32 s7, 0xf000
|
|
; GFX7-GISEL-NEXT: s_mov_b64 s[4:5], 0
|
|
; GFX7-GISEL-NEXT: buffer_load_dwordx4 v[4:7], v[0:1], s[4:7], 0 addr64
|
|
; GFX7-GISEL-NEXT: buffer_load_dwordx4 v[8:11], v[0:1], s[4:7], 0 addr64 offset:16
|
|
; GFX7-GISEL-NEXT: buffer_load_dwordx2 v[0:1], v[0:1], s[4:7], 0 addr64 offset:32
|
|
; GFX7-GISEL-NEXT: s_waitcnt vmcnt(2)
|
|
; GFX7-GISEL-NEXT: buffer_store_dwordx4 v[4:7], v[2:3], s[4:7], 0 addr64
|
|
; GFX7-GISEL-NEXT: s_waitcnt vmcnt(2)
|
|
; GFX7-GISEL-NEXT: buffer_store_dwordx4 v[8:11], v[2:3], s[4:7], 0 addr64 offset:16
|
|
; GFX7-GISEL-NEXT: s_waitcnt vmcnt(2)
|
|
; GFX7-GISEL-NEXT: buffer_store_dwordx2 v[0:1], v[2:3], s[4:7], 0 addr64 offset:32
|
|
; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX7-GISEL-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX8-GISEL-LABEL: freeze_v10i32:
|
|
; GFX8-GISEL: ; %bb.0:
|
|
; GFX8-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX8-GISEL-NEXT: v_add_u32_e32 v8, vcc, 16, v0
|
|
; GFX8-GISEL-NEXT: v_addc_u32_e32 v9, vcc, 0, v1, vcc
|
|
; GFX8-GISEL-NEXT: flat_load_dwordx4 v[4:7], v[0:1]
|
|
; GFX8-GISEL-NEXT: flat_load_dwordx4 v[8:11], v[8:9]
|
|
; GFX8-GISEL-NEXT: v_add_u32_e32 v0, vcc, 32, v0
|
|
; GFX8-GISEL-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc
|
|
; GFX8-GISEL-NEXT: flat_load_dwordx2 v[0:1], v[0:1]
|
|
; GFX8-GISEL-NEXT: v_add_u32_e32 v12, vcc, 16, v2
|
|
; GFX8-GISEL-NEXT: v_addc_u32_e32 v13, vcc, 0, v3, vcc
|
|
; GFX8-GISEL-NEXT: v_add_u32_e32 v14, vcc, 32, v2
|
|
; GFX8-GISEL-NEXT: v_addc_u32_e32 v15, vcc, 0, v3, vcc
|
|
; GFX8-GISEL-NEXT: s_waitcnt vmcnt(2)
|
|
; GFX8-GISEL-NEXT: flat_store_dwordx4 v[2:3], v[4:7]
|
|
; GFX8-GISEL-NEXT: s_waitcnt vmcnt(2)
|
|
; GFX8-GISEL-NEXT: flat_store_dwordx4 v[12:13], v[8:11]
|
|
; GFX8-GISEL-NEXT: s_waitcnt vmcnt(2)
|
|
; GFX8-GISEL-NEXT: flat_store_dwordx2 v[14:15], v[0:1]
|
|
; GFX8-GISEL-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX8-GISEL-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX9-LABEL: freeze_v10i32:
|
|
; GFX9: ; %bb.0:
|
|
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX9-NEXT: global_load_dwordx4 v[4:7], v[0:1], off
|
|
; GFX9-NEXT: global_load_dwordx4 v[8:11], v[0:1], off offset:16
|
|
; GFX9-NEXT: global_load_dwordx2 v[12:13], v[0:1], off offset:32
|
|
; GFX9-NEXT: s_waitcnt vmcnt(2)
|
|
; GFX9-NEXT: global_store_dwordx4 v[2:3], v[4:7], off
|
|
; GFX9-NEXT: s_waitcnt vmcnt(2)
|
|
; GFX9-NEXT: global_store_dwordx4 v[2:3], v[8:11], off offset:16
|
|
; GFX9-NEXT: s_waitcnt vmcnt(2)
|
|
; GFX9-NEXT: global_store_dwordx2 v[2:3], v[12:13], off offset:32
|
|
; GFX9-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX9-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX10-LABEL: freeze_v10i32:
|
|
; GFX10: ; %bb.0:
|
|
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX10-NEXT: s_clause 0x2
|
|
; GFX10-NEXT: global_load_dwordx4 v[4:7], v[0:1], off
|
|
; GFX10-NEXT: global_load_dwordx4 v[8:11], v[0:1], off offset:16
|
|
; GFX10-NEXT: global_load_dwordx2 v[12:13], v[0:1], off offset:32
|
|
; GFX10-NEXT: s_waitcnt vmcnt(2)
|
|
; GFX10-NEXT: global_store_dwordx4 v[2:3], v[4:7], off
|
|
; GFX10-NEXT: s_waitcnt vmcnt(1)
|
|
; GFX10-NEXT: global_store_dwordx4 v[2:3], v[8:11], off offset:16
|
|
; GFX10-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX10-NEXT: global_store_dwordx2 v[2:3], v[12:13], off offset:32
|
|
; GFX10-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX11-LABEL: freeze_v10i32:
|
|
; GFX11: ; %bb.0:
|
|
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX11-NEXT: s_clause 0x2
|
|
; GFX11-NEXT: global_load_b128 v[4:7], v[0:1], off
|
|
; GFX11-NEXT: global_load_b128 v[8:11], v[0:1], off offset:16
|
|
; GFX11-NEXT: global_load_b64 v[0:1], v[0:1], off offset:32
|
|
; GFX11-NEXT: s_waitcnt vmcnt(2)
|
|
; GFX11-NEXT: global_store_b128 v[2:3], v[4:7], off
|
|
; GFX11-NEXT: s_waitcnt vmcnt(1)
|
|
; GFX11-NEXT: global_store_b128 v[2:3], v[8:11], off offset:16
|
|
; GFX11-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX11-NEXT: global_store_b64 v[2:3], v[0:1], off offset:32
|
|
; GFX11-NEXT: s_setpc_b64 s[30:31]
|
|
%a = load <10 x i32>, ptr addrspace(1) %ptra, align 4
|
|
%freeze = freeze <10 x i32> %a
|
|
store <10 x i32> %freeze, ptr addrspace(1) %ptrb, align 4
|
|
ret void
|
|
}
|
|
|
|
define void @freeze_v11i32(ptr addrspace(1) %ptra, ptr addrspace(1) %ptrb) {
|
|
; GFX6-SDAG-LABEL: freeze_v11i32:
|
|
; GFX6-SDAG: ; %bb.0:
|
|
; GFX6-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX6-SDAG-NEXT: s_mov_b32 s6, 0
|
|
; GFX6-SDAG-NEXT: s_mov_b32 s7, 0xf000
|
|
; GFX6-SDAG-NEXT: s_mov_b32 s4, s6
|
|
; GFX6-SDAG-NEXT: s_mov_b32 s5, s6
|
|
; GFX6-SDAG-NEXT: buffer_load_dword v14, v[0:1], s[4:7], 0 addr64 offset:40
|
|
; GFX6-SDAG-NEXT: buffer_load_dwordx2 v[12:13], v[0:1], s[4:7], 0 addr64 offset:32
|
|
; GFX6-SDAG-NEXT: buffer_load_dwordx4 v[4:7], v[0:1], s[4:7], 0 addr64
|
|
; GFX6-SDAG-NEXT: buffer_load_dwordx4 v[8:11], v[0:1], s[4:7], 0 addr64 offset:16
|
|
; GFX6-SDAG-NEXT: s_waitcnt vmcnt(3)
|
|
; GFX6-SDAG-NEXT: buffer_store_dword v14, v[2:3], s[4:7], 0 addr64 offset:40
|
|
; GFX6-SDAG-NEXT: s_waitcnt vmcnt(3)
|
|
; GFX6-SDAG-NEXT: buffer_store_dwordx2 v[12:13], v[2:3], s[4:7], 0 addr64 offset:32
|
|
; GFX6-SDAG-NEXT: s_waitcnt vmcnt(3)
|
|
; GFX6-SDAG-NEXT: buffer_store_dwordx4 v[4:7], v[2:3], s[4:7], 0 addr64
|
|
; GFX6-SDAG-NEXT: s_waitcnt vmcnt(3)
|
|
; GFX6-SDAG-NEXT: buffer_store_dwordx4 v[8:11], v[2:3], s[4:7], 0 addr64 offset:16
|
|
; GFX6-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0)
|
|
; GFX6-SDAG-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX6-GISEL-LABEL: freeze_v11i32:
|
|
; GFX6-GISEL: ; %bb.0:
|
|
; GFX6-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX6-GISEL-NEXT: s_mov_b32 s6, 0
|
|
; GFX6-GISEL-NEXT: s_mov_b32 s7, 0xf000
|
|
; GFX6-GISEL-NEXT: s_mov_b64 s[4:5], 0
|
|
; GFX6-GISEL-NEXT: buffer_load_dwordx4 v[4:7], v[0:1], s[4:7], 0 addr64
|
|
; GFX6-GISEL-NEXT: buffer_load_dwordx4 v[8:11], v[0:1], s[4:7], 0 addr64 offset:16
|
|
; GFX6-GISEL-NEXT: buffer_load_dwordx2 v[12:13], v[0:1], s[4:7], 0 addr64 offset:32
|
|
; GFX6-GISEL-NEXT: buffer_load_dword v0, v[0:1], s[4:7], 0 addr64 offset:40
|
|
; GFX6-GISEL-NEXT: s_waitcnt vmcnt(3)
|
|
; GFX6-GISEL-NEXT: buffer_store_dwordx4 v[4:7], v[2:3], s[4:7], 0 addr64
|
|
; GFX6-GISEL-NEXT: s_waitcnt vmcnt(3)
|
|
; GFX6-GISEL-NEXT: buffer_store_dwordx4 v[8:11], v[2:3], s[4:7], 0 addr64 offset:16
|
|
; GFX6-GISEL-NEXT: s_waitcnt vmcnt(3)
|
|
; GFX6-GISEL-NEXT: buffer_store_dwordx2 v[12:13], v[2:3], s[4:7], 0 addr64 offset:32
|
|
; GFX6-GISEL-NEXT: s_waitcnt vmcnt(3)
|
|
; GFX6-GISEL-NEXT: buffer_store_dword v0, v[2:3], s[4:7], 0 addr64 offset:40
|
|
; GFX6-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0)
|
|
; GFX6-GISEL-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX7-SDAG-LABEL: freeze_v11i32:
|
|
; GFX7-SDAG: ; %bb.0:
|
|
; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX7-SDAG-NEXT: s_mov_b32 s6, 0
|
|
; GFX7-SDAG-NEXT: s_mov_b32 s7, 0xf000
|
|
; GFX7-SDAG-NEXT: s_mov_b32 s4, s6
|
|
; GFX7-SDAG-NEXT: s_mov_b32 s5, s6
|
|
; GFX7-SDAG-NEXT: buffer_load_dwordx4 v[4:7], v[0:1], s[4:7], 0 addr64
|
|
; GFX7-SDAG-NEXT: buffer_load_dwordx4 v[8:11], v[0:1], s[4:7], 0 addr64 offset:16
|
|
; GFX7-SDAG-NEXT: buffer_load_dwordx3 v[12:14], v[0:1], s[4:7], 0 addr64 offset:32
|
|
; GFX7-SDAG-NEXT: s_waitcnt vmcnt(2)
|
|
; GFX7-SDAG-NEXT: buffer_store_dwordx4 v[4:7], v[2:3], s[4:7], 0 addr64
|
|
; GFX7-SDAG-NEXT: s_waitcnt vmcnt(2)
|
|
; GFX7-SDAG-NEXT: buffer_store_dwordx4 v[8:11], v[2:3], s[4:7], 0 addr64 offset:16
|
|
; GFX7-SDAG-NEXT: s_waitcnt vmcnt(2)
|
|
; GFX7-SDAG-NEXT: buffer_store_dwordx3 v[12:14], v[2:3], s[4:7], 0 addr64 offset:32
|
|
; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX7-SDAG-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX7-GISEL-LABEL: freeze_v11i32:
|
|
; GFX7-GISEL: ; %bb.0:
|
|
; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX7-GISEL-NEXT: s_mov_b32 s6, 0
|
|
; GFX7-GISEL-NEXT: s_mov_b32 s7, 0xf000
|
|
; GFX7-GISEL-NEXT: s_mov_b64 s[4:5], 0
|
|
; GFX7-GISEL-NEXT: buffer_load_dwordx4 v[4:7], v[0:1], s[4:7], 0 addr64
|
|
; GFX7-GISEL-NEXT: buffer_load_dwordx4 v[8:11], v[0:1], s[4:7], 0 addr64 offset:16
|
|
; GFX7-GISEL-NEXT: buffer_load_dwordx3 v[12:14], v[0:1], s[4:7], 0 addr64 offset:32
|
|
; GFX7-GISEL-NEXT: s_waitcnt vmcnt(2)
|
|
; GFX7-GISEL-NEXT: buffer_store_dwordx4 v[4:7], v[2:3], s[4:7], 0 addr64
|
|
; GFX7-GISEL-NEXT: s_waitcnt vmcnt(2)
|
|
; GFX7-GISEL-NEXT: buffer_store_dwordx4 v[8:11], v[2:3], s[4:7], 0 addr64 offset:16
|
|
; GFX7-GISEL-NEXT: s_waitcnt vmcnt(2)
|
|
; GFX7-GISEL-NEXT: buffer_store_dwordx3 v[12:14], v[2:3], s[4:7], 0 addr64 offset:32
|
|
; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX7-GISEL-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX8-GISEL-LABEL: freeze_v11i32:
|
|
; GFX8-GISEL: ; %bb.0:
|
|
; GFX8-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX8-GISEL-NEXT: v_add_u32_e32 v8, vcc, 16, v0
|
|
; GFX8-GISEL-NEXT: v_addc_u32_e32 v9, vcc, 0, v1, vcc
|
|
; GFX8-GISEL-NEXT: flat_load_dwordx4 v[4:7], v[0:1]
|
|
; GFX8-GISEL-NEXT: flat_load_dwordx4 v[8:11], v[8:9]
|
|
; GFX8-GISEL-NEXT: v_add_u32_e32 v0, vcc, 32, v0
|
|
; GFX8-GISEL-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc
|
|
; GFX8-GISEL-NEXT: flat_load_dwordx3 v[12:14], v[0:1]
|
|
; GFX8-GISEL-NEXT: v_add_u32_e32 v0, vcc, 16, v2
|
|
; GFX8-GISEL-NEXT: v_addc_u32_e32 v1, vcc, 0, v3, vcc
|
|
; GFX8-GISEL-NEXT: v_add_u32_e32 v15, vcc, 32, v2
|
|
; GFX8-GISEL-NEXT: v_addc_u32_e32 v16, vcc, 0, v3, vcc
|
|
; GFX8-GISEL-NEXT: s_waitcnt vmcnt(2)
|
|
; GFX8-GISEL-NEXT: flat_store_dwordx4 v[2:3], v[4:7]
|
|
; GFX8-GISEL-NEXT: s_waitcnt vmcnt(2)
|
|
; GFX8-GISEL-NEXT: flat_store_dwordx4 v[0:1], v[8:11]
|
|
; GFX8-GISEL-NEXT: s_waitcnt vmcnt(2)
|
|
; GFX8-GISEL-NEXT: flat_store_dwordx3 v[15:16], v[12:14]
|
|
; GFX8-GISEL-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX8-GISEL-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX9-LABEL: freeze_v11i32:
|
|
; GFX9: ; %bb.0:
|
|
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX9-NEXT: global_load_dwordx4 v[4:7], v[0:1], off
|
|
; GFX9-NEXT: global_load_dwordx4 v[8:11], v[0:1], off offset:16
|
|
; GFX9-NEXT: global_load_dwordx3 v[12:14], v[0:1], off offset:32
|
|
; GFX9-NEXT: s_waitcnt vmcnt(2)
|
|
; GFX9-NEXT: global_store_dwordx4 v[2:3], v[4:7], off
|
|
; GFX9-NEXT: s_waitcnt vmcnt(2)
|
|
; GFX9-NEXT: global_store_dwordx4 v[2:3], v[8:11], off offset:16
|
|
; GFX9-NEXT: s_waitcnt vmcnt(2)
|
|
; GFX9-NEXT: global_store_dwordx3 v[2:3], v[12:14], off offset:32
|
|
; GFX9-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX9-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX10-LABEL: freeze_v11i32:
|
|
; GFX10: ; %bb.0:
|
|
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX10-NEXT: s_clause 0x2
|
|
; GFX10-NEXT: global_load_dwordx4 v[4:7], v[0:1], off
|
|
; GFX10-NEXT: global_load_dwordx4 v[8:11], v[0:1], off offset:16
|
|
; GFX10-NEXT: global_load_dwordx3 v[12:14], v[0:1], off offset:32
|
|
; GFX10-NEXT: s_waitcnt vmcnt(2)
|
|
; GFX10-NEXT: global_store_dwordx4 v[2:3], v[4:7], off
|
|
; GFX10-NEXT: s_waitcnt vmcnt(1)
|
|
; GFX10-NEXT: global_store_dwordx4 v[2:3], v[8:11], off offset:16
|
|
; GFX10-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX10-NEXT: global_store_dwordx3 v[2:3], v[12:14], off offset:32
|
|
; GFX10-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX11-LABEL: freeze_v11i32:
|
|
; GFX11: ; %bb.0:
|
|
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX11-NEXT: s_clause 0x2
|
|
; GFX11-NEXT: global_load_b128 v[4:7], v[0:1], off
|
|
; GFX11-NEXT: global_load_b128 v[8:11], v[0:1], off offset:16
|
|
; GFX11-NEXT: global_load_b96 v[12:14], v[0:1], off offset:32
|
|
; GFX11-NEXT: s_waitcnt vmcnt(2)
|
|
; GFX11-NEXT: global_store_b128 v[2:3], v[4:7], off
|
|
; GFX11-NEXT: s_waitcnt vmcnt(1)
|
|
; GFX11-NEXT: global_store_b128 v[2:3], v[8:11], off offset:16
|
|
; GFX11-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX11-NEXT: global_store_b96 v[2:3], v[12:14], off offset:32
|
|
; GFX11-NEXT: s_setpc_b64 s[30:31]
|
|
%a = load <11 x i32>, ptr addrspace(1) %ptra, align 4
|
|
%freeze = freeze <11 x i32> %a
|
|
store <11 x i32> %freeze, ptr addrspace(1) %ptrb, align 4
|
|
ret void
|
|
}
|
|
|
|
define void @freeze_v12i32(ptr addrspace(1) %ptra, ptr addrspace(1) %ptrb) {
|
|
; GFX6-SDAG-LABEL: freeze_v12i32:
|
|
; GFX6-SDAG: ; %bb.0:
|
|
; GFX6-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX6-SDAG-NEXT: s_mov_b32 s6, 0
|
|
; GFX6-SDAG-NEXT: s_mov_b32 s7, 0xf000
|
|
; GFX6-SDAG-NEXT: s_mov_b32 s4, s6
|
|
; GFX6-SDAG-NEXT: s_mov_b32 s5, s6
|
|
; GFX6-SDAG-NEXT: buffer_load_dwordx4 v[4:7], v[0:1], s[4:7], 0 addr64
|
|
; GFX6-SDAG-NEXT: buffer_load_dwordx4 v[8:11], v[0:1], s[4:7], 0 addr64 offset:16
|
|
; GFX6-SDAG-NEXT: buffer_load_dwordx4 v[12:15], v[0:1], s[4:7], 0 addr64 offset:32
|
|
; GFX6-SDAG-NEXT: s_waitcnt vmcnt(2)
|
|
; GFX6-SDAG-NEXT: buffer_store_dwordx4 v[4:7], v[2:3], s[4:7], 0 addr64
|
|
; GFX6-SDAG-NEXT: s_waitcnt vmcnt(2)
|
|
; GFX6-SDAG-NEXT: buffer_store_dwordx4 v[8:11], v[2:3], s[4:7], 0 addr64 offset:16
|
|
; GFX6-SDAG-NEXT: s_waitcnt vmcnt(2)
|
|
; GFX6-SDAG-NEXT: buffer_store_dwordx4 v[12:15], v[2:3], s[4:7], 0 addr64 offset:32
|
|
; GFX6-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0)
|
|
; GFX6-SDAG-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX6-GISEL-LABEL: freeze_v12i32:
|
|
; GFX6-GISEL: ; %bb.0:
|
|
; GFX6-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX6-GISEL-NEXT: s_mov_b32 s6, 0
|
|
; GFX6-GISEL-NEXT: s_mov_b32 s7, 0xf000
|
|
; GFX6-GISEL-NEXT: s_mov_b64 s[4:5], 0
|
|
; GFX6-GISEL-NEXT: buffer_load_dwordx4 v[4:7], v[0:1], s[4:7], 0 addr64
|
|
; GFX6-GISEL-NEXT: buffer_load_dwordx4 v[8:11], v[0:1], s[4:7], 0 addr64 offset:16
|
|
; GFX6-GISEL-NEXT: buffer_load_dwordx4 v[12:15], v[0:1], s[4:7], 0 addr64 offset:32
|
|
; GFX6-GISEL-NEXT: s_waitcnt vmcnt(2)
|
|
; GFX6-GISEL-NEXT: buffer_store_dwordx4 v[4:7], v[2:3], s[4:7], 0 addr64
|
|
; GFX6-GISEL-NEXT: s_waitcnt vmcnt(2)
|
|
; GFX6-GISEL-NEXT: buffer_store_dwordx4 v[8:11], v[2:3], s[4:7], 0 addr64 offset:16
|
|
; GFX6-GISEL-NEXT: s_waitcnt vmcnt(2)
|
|
; GFX6-GISEL-NEXT: buffer_store_dwordx4 v[12:15], v[2:3], s[4:7], 0 addr64 offset:32
|
|
; GFX6-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0)
|
|
; GFX6-GISEL-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX7-SDAG-LABEL: freeze_v12i32:
|
|
; GFX7-SDAG: ; %bb.0:
|
|
; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX7-SDAG-NEXT: s_mov_b32 s6, 0
|
|
; GFX7-SDAG-NEXT: s_mov_b32 s7, 0xf000
|
|
; GFX7-SDAG-NEXT: s_mov_b32 s4, s6
|
|
; GFX7-SDAG-NEXT: s_mov_b32 s5, s6
|
|
; GFX7-SDAG-NEXT: buffer_load_dwordx4 v[4:7], v[0:1], s[4:7], 0 addr64
|
|
; GFX7-SDAG-NEXT: buffer_load_dwordx4 v[8:11], v[0:1], s[4:7], 0 addr64 offset:16
|
|
; GFX7-SDAG-NEXT: buffer_load_dwordx4 v[12:15], v[0:1], s[4:7], 0 addr64 offset:32
|
|
; GFX7-SDAG-NEXT: s_waitcnt vmcnt(2)
|
|
; GFX7-SDAG-NEXT: buffer_store_dwordx4 v[4:7], v[2:3], s[4:7], 0 addr64
|
|
; GFX7-SDAG-NEXT: s_waitcnt vmcnt(2)
|
|
; GFX7-SDAG-NEXT: buffer_store_dwordx4 v[8:11], v[2:3], s[4:7], 0 addr64 offset:16
|
|
; GFX7-SDAG-NEXT: s_waitcnt vmcnt(2)
|
|
; GFX7-SDAG-NEXT: buffer_store_dwordx4 v[12:15], v[2:3], s[4:7], 0 addr64 offset:32
|
|
; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX7-SDAG-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX7-GISEL-LABEL: freeze_v12i32:
|
|
; GFX7-GISEL: ; %bb.0:
|
|
; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX7-GISEL-NEXT: s_mov_b32 s6, 0
|
|
; GFX7-GISEL-NEXT: s_mov_b32 s7, 0xf000
|
|
; GFX7-GISEL-NEXT: s_mov_b64 s[4:5], 0
|
|
; GFX7-GISEL-NEXT: buffer_load_dwordx4 v[4:7], v[0:1], s[4:7], 0 addr64
|
|
; GFX7-GISEL-NEXT: buffer_load_dwordx4 v[8:11], v[0:1], s[4:7], 0 addr64 offset:16
|
|
; GFX7-GISEL-NEXT: buffer_load_dwordx4 v[12:15], v[0:1], s[4:7], 0 addr64 offset:32
|
|
; GFX7-GISEL-NEXT: s_waitcnt vmcnt(2)
|
|
; GFX7-GISEL-NEXT: buffer_store_dwordx4 v[4:7], v[2:3], s[4:7], 0 addr64
|
|
; GFX7-GISEL-NEXT: s_waitcnt vmcnt(2)
|
|
; GFX7-GISEL-NEXT: buffer_store_dwordx4 v[8:11], v[2:3], s[4:7], 0 addr64 offset:16
|
|
; GFX7-GISEL-NEXT: s_waitcnt vmcnt(2)
|
|
; GFX7-GISEL-NEXT: buffer_store_dwordx4 v[12:15], v[2:3], s[4:7], 0 addr64 offset:32
|
|
; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX7-GISEL-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX8-GISEL-LABEL: freeze_v12i32:
|
|
; GFX8-GISEL: ; %bb.0:
|
|
; GFX8-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX8-GISEL-NEXT: v_add_u32_e32 v8, vcc, 16, v0
|
|
; GFX8-GISEL-NEXT: v_addc_u32_e32 v9, vcc, 0, v1, vcc
|
|
; GFX8-GISEL-NEXT: flat_load_dwordx4 v[4:7], v[0:1]
|
|
; GFX8-GISEL-NEXT: flat_load_dwordx4 v[8:11], v[8:9]
|
|
; GFX8-GISEL-NEXT: v_add_u32_e32 v0, vcc, 32, v0
|
|
; GFX8-GISEL-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc
|
|
; GFX8-GISEL-NEXT: flat_load_dwordx4 v[12:15], v[0:1]
|
|
; GFX8-GISEL-NEXT: v_add_u32_e32 v0, vcc, 16, v2
|
|
; GFX8-GISEL-NEXT: v_addc_u32_e32 v1, vcc, 0, v3, vcc
|
|
; GFX8-GISEL-NEXT: v_add_u32_e32 v16, vcc, 32, v2
|
|
; GFX8-GISEL-NEXT: v_addc_u32_e32 v17, vcc, 0, v3, vcc
|
|
; GFX8-GISEL-NEXT: s_waitcnt vmcnt(2)
|
|
; GFX8-GISEL-NEXT: flat_store_dwordx4 v[2:3], v[4:7]
|
|
; GFX8-GISEL-NEXT: s_waitcnt vmcnt(2)
|
|
; GFX8-GISEL-NEXT: flat_store_dwordx4 v[0:1], v[8:11]
|
|
; GFX8-GISEL-NEXT: s_waitcnt vmcnt(2)
|
|
; GFX8-GISEL-NEXT: flat_store_dwordx4 v[16:17], v[12:15]
|
|
; GFX8-GISEL-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX8-GISEL-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX9-LABEL: freeze_v12i32:
|
|
; GFX9: ; %bb.0:
|
|
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX9-NEXT: global_load_dwordx4 v[4:7], v[0:1], off
|
|
; GFX9-NEXT: global_load_dwordx4 v[8:11], v[0:1], off offset:16
|
|
; GFX9-NEXT: global_load_dwordx4 v[12:15], v[0:1], off offset:32
|
|
; GFX9-NEXT: s_waitcnt vmcnt(2)
|
|
; GFX9-NEXT: global_store_dwordx4 v[2:3], v[4:7], off
|
|
; GFX9-NEXT: s_waitcnt vmcnt(2)
|
|
; GFX9-NEXT: global_store_dwordx4 v[2:3], v[8:11], off offset:16
|
|
; GFX9-NEXT: s_waitcnt vmcnt(2)
|
|
; GFX9-NEXT: global_store_dwordx4 v[2:3], v[12:15], off offset:32
|
|
; GFX9-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX9-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX10-LABEL: freeze_v12i32:
|
|
; GFX10: ; %bb.0:
|
|
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX10-NEXT: s_clause 0x2
|
|
; GFX10-NEXT: global_load_dwordx4 v[4:7], v[0:1], off
|
|
; GFX10-NEXT: global_load_dwordx4 v[8:11], v[0:1], off offset:16
|
|
; GFX10-NEXT: global_load_dwordx4 v[12:15], v[0:1], off offset:32
|
|
; GFX10-NEXT: s_waitcnt vmcnt(2)
|
|
; GFX10-NEXT: global_store_dwordx4 v[2:3], v[4:7], off
|
|
; GFX10-NEXT: s_waitcnt vmcnt(1)
|
|
; GFX10-NEXT: global_store_dwordx4 v[2:3], v[8:11], off offset:16
|
|
; GFX10-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX10-NEXT: global_store_dwordx4 v[2:3], v[12:15], off offset:32
|
|
; GFX10-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX11-LABEL: freeze_v12i32:
|
|
; GFX11: ; %bb.0:
|
|
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX11-NEXT: s_clause 0x2
|
|
; GFX11-NEXT: global_load_b128 v[4:7], v[0:1], off
|
|
; GFX11-NEXT: global_load_b128 v[8:11], v[0:1], off offset:16
|
|
; GFX11-NEXT: global_load_b128 v[12:15], v[0:1], off offset:32
|
|
; GFX11-NEXT: s_waitcnt vmcnt(2)
|
|
; GFX11-NEXT: global_store_b128 v[2:3], v[4:7], off
|
|
; GFX11-NEXT: s_waitcnt vmcnt(1)
|
|
; GFX11-NEXT: global_store_b128 v[2:3], v[8:11], off offset:16
|
|
; GFX11-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX11-NEXT: global_store_b128 v[2:3], v[12:15], off offset:32
|
|
; GFX11-NEXT: s_setpc_b64 s[30:31]
|
|
%a = load <12 x i32>, ptr addrspace(1) %ptra, align 4
|
|
%freeze = freeze <12 x i32> %a
|
|
store <12 x i32> %freeze, ptr addrspace(1) %ptrb, align 4
|
|
ret void
|
|
}
|
|
define void @freeze_v13i32(ptr addrspace(1) %ptra, ptr addrspace(1) %ptrb) {
|
|
; GFX6-SDAG-LABEL: freeze_v13i32:
|
|
; GFX6-SDAG: ; %bb.0:
|
|
; GFX6-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX6-SDAG-NEXT: s_mov_b32 s6, 0
|
|
; GFX6-SDAG-NEXT: s_mov_b32 s7, 0xf000
|
|
; GFX6-SDAG-NEXT: s_mov_b32 s4, s6
|
|
; GFX6-SDAG-NEXT: s_mov_b32 s5, s6
|
|
; GFX6-SDAG-NEXT: buffer_load_dword v16, v[0:1], s[4:7], 0 addr64 offset:48
|
|
; GFX6-SDAG-NEXT: buffer_load_dwordx4 v[4:7], v[0:1], s[4:7], 0 addr64 offset:32
|
|
; GFX6-SDAG-NEXT: buffer_load_dwordx4 v[8:11], v[0:1], s[4:7], 0 addr64
|
|
; GFX6-SDAG-NEXT: buffer_load_dwordx4 v[12:15], v[0:1], s[4:7], 0 addr64 offset:16
|
|
; GFX6-SDAG-NEXT: s_waitcnt vmcnt(3)
|
|
; GFX6-SDAG-NEXT: buffer_store_dword v16, v[2:3], s[4:7], 0 addr64 offset:48
|
|
; GFX6-SDAG-NEXT: s_waitcnt vmcnt(3)
|
|
; GFX6-SDAG-NEXT: buffer_store_dwordx4 v[4:7], v[2:3], s[4:7], 0 addr64 offset:32
|
|
; GFX6-SDAG-NEXT: s_waitcnt vmcnt(3)
|
|
; GFX6-SDAG-NEXT: buffer_store_dwordx4 v[8:11], v[2:3], s[4:7], 0 addr64
|
|
; GFX6-SDAG-NEXT: s_waitcnt vmcnt(3)
|
|
; GFX6-SDAG-NEXT: buffer_store_dwordx4 v[12:15], v[2:3], s[4:7], 0 addr64 offset:16
|
|
; GFX6-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0)
|
|
; GFX6-SDAG-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX6-GISEL-LABEL: freeze_v13i32:
|
|
; GFX6-GISEL: ; %bb.0:
|
|
; GFX6-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX6-GISEL-NEXT: s_mov_b32 s6, 0
|
|
; GFX6-GISEL-NEXT: s_mov_b32 s7, 0xf000
|
|
; GFX6-GISEL-NEXT: s_mov_b64 s[4:5], 0
|
|
; GFX6-GISEL-NEXT: buffer_load_dwordx4 v[4:7], v[0:1], s[4:7], 0 addr64
|
|
; GFX6-GISEL-NEXT: buffer_load_dwordx4 v[8:11], v[0:1], s[4:7], 0 addr64 offset:16
|
|
; GFX6-GISEL-NEXT: buffer_load_dwordx4 v[12:15], v[0:1], s[4:7], 0 addr64 offset:32
|
|
; GFX6-GISEL-NEXT: buffer_load_dword v0, v[0:1], s[4:7], 0 addr64 offset:48
|
|
; GFX6-GISEL-NEXT: s_waitcnt vmcnt(3)
|
|
; GFX6-GISEL-NEXT: buffer_store_dwordx4 v[4:7], v[2:3], s[4:7], 0 addr64
|
|
; GFX6-GISEL-NEXT: s_waitcnt vmcnt(3)
|
|
; GFX6-GISEL-NEXT: buffer_store_dwordx4 v[8:11], v[2:3], s[4:7], 0 addr64 offset:16
|
|
; GFX6-GISEL-NEXT: s_waitcnt vmcnt(3)
|
|
; GFX6-GISEL-NEXT: buffer_store_dwordx4 v[12:15], v[2:3], s[4:7], 0 addr64 offset:32
|
|
; GFX6-GISEL-NEXT: s_waitcnt vmcnt(3)
|
|
; GFX6-GISEL-NEXT: buffer_store_dword v0, v[2:3], s[4:7], 0 addr64 offset:48
|
|
; GFX6-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0)
|
|
; GFX6-GISEL-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX7-SDAG-LABEL: freeze_v13i32:
|
|
; GFX7-SDAG: ; %bb.0:
|
|
; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX7-SDAG-NEXT: s_mov_b32 s6, 0
|
|
; GFX7-SDAG-NEXT: s_mov_b32 s7, 0xf000
|
|
; GFX7-SDAG-NEXT: s_mov_b32 s4, s6
|
|
; GFX7-SDAG-NEXT: s_mov_b32 s5, s6
|
|
; GFX7-SDAG-NEXT: buffer_load_dword v16, v[0:1], s[4:7], 0 addr64 offset:48
|
|
; GFX7-SDAG-NEXT: buffer_load_dwordx4 v[4:7], v[0:1], s[4:7], 0 addr64 offset:32
|
|
; GFX7-SDAG-NEXT: buffer_load_dwordx4 v[8:11], v[0:1], s[4:7], 0 addr64
|
|
; GFX7-SDAG-NEXT: buffer_load_dwordx4 v[12:15], v[0:1], s[4:7], 0 addr64 offset:16
|
|
; GFX7-SDAG-NEXT: s_waitcnt vmcnt(3)
|
|
; GFX7-SDAG-NEXT: buffer_store_dword v16, v[2:3], s[4:7], 0 addr64 offset:48
|
|
; GFX7-SDAG-NEXT: s_waitcnt vmcnt(3)
|
|
; GFX7-SDAG-NEXT: buffer_store_dwordx4 v[4:7], v[2:3], s[4:7], 0 addr64 offset:32
|
|
; GFX7-SDAG-NEXT: s_waitcnt vmcnt(3)
|
|
; GFX7-SDAG-NEXT: buffer_store_dwordx4 v[8:11], v[2:3], s[4:7], 0 addr64
|
|
; GFX7-SDAG-NEXT: s_waitcnt vmcnt(3)
|
|
; GFX7-SDAG-NEXT: buffer_store_dwordx4 v[12:15], v[2:3], s[4:7], 0 addr64 offset:16
|
|
; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX7-SDAG-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX7-GISEL-LABEL: freeze_v13i32:
|
|
; GFX7-GISEL: ; %bb.0:
|
|
; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX7-GISEL-NEXT: s_mov_b32 s6, 0
|
|
; GFX7-GISEL-NEXT: s_mov_b32 s7, 0xf000
|
|
; GFX7-GISEL-NEXT: s_mov_b64 s[4:5], 0
|
|
; GFX7-GISEL-NEXT: buffer_load_dwordx4 v[4:7], v[0:1], s[4:7], 0 addr64
|
|
; GFX7-GISEL-NEXT: buffer_load_dwordx4 v[8:11], v[0:1], s[4:7], 0 addr64 offset:16
|
|
; GFX7-GISEL-NEXT: buffer_load_dwordx4 v[12:15], v[0:1], s[4:7], 0 addr64 offset:32
|
|
; GFX7-GISEL-NEXT: buffer_load_dword v0, v[0:1], s[4:7], 0 addr64 offset:48
|
|
; GFX7-GISEL-NEXT: s_waitcnt vmcnt(3)
|
|
; GFX7-GISEL-NEXT: buffer_store_dwordx4 v[4:7], v[2:3], s[4:7], 0 addr64
|
|
; GFX7-GISEL-NEXT: s_waitcnt vmcnt(3)
|
|
; GFX7-GISEL-NEXT: buffer_store_dwordx4 v[8:11], v[2:3], s[4:7], 0 addr64 offset:16
|
|
; GFX7-GISEL-NEXT: s_waitcnt vmcnt(3)
|
|
; GFX7-GISEL-NEXT: buffer_store_dwordx4 v[12:15], v[2:3], s[4:7], 0 addr64 offset:32
|
|
; GFX7-GISEL-NEXT: s_waitcnt vmcnt(3)
|
|
; GFX7-GISEL-NEXT: buffer_store_dword v0, v[2:3], s[4:7], 0 addr64 offset:48
|
|
; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX7-GISEL-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX8-GISEL-LABEL: freeze_v13i32:
|
|
; GFX8-GISEL: ; %bb.0:
|
|
; GFX8-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX8-GISEL-NEXT: v_add_u32_e32 v8, vcc, 16, v0
|
|
; GFX8-GISEL-NEXT: v_addc_u32_e32 v9, vcc, 0, v1, vcc
|
|
; GFX8-GISEL-NEXT: v_add_u32_e32 v12, vcc, 32, v0
|
|
; GFX8-GISEL-NEXT: flat_load_dwordx4 v[4:7], v[0:1]
|
|
; GFX8-GISEL-NEXT: flat_load_dwordx4 v[8:11], v[8:9]
|
|
; GFX8-GISEL-NEXT: v_addc_u32_e32 v13, vcc, 0, v1, vcc
|
|
; GFX8-GISEL-NEXT: v_add_u32_e32 v0, vcc, 48, v0
|
|
; GFX8-GISEL-NEXT: flat_load_dwordx4 v[12:15], v[12:13]
|
|
; GFX8-GISEL-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc
|
|
; GFX8-GISEL-NEXT: flat_load_dword v18, v[0:1]
|
|
; GFX8-GISEL-NEXT: v_add_u32_e32 v0, vcc, 16, v2
|
|
; GFX8-GISEL-NEXT: v_addc_u32_e32 v1, vcc, 0, v3, vcc
|
|
; GFX8-GISEL-NEXT: v_add_u32_e32 v16, vcc, 32, v2
|
|
; GFX8-GISEL-NEXT: v_addc_u32_e32 v17, vcc, 0, v3, vcc
|
|
; GFX8-GISEL-NEXT: s_waitcnt vmcnt(3)
|
|
; GFX8-GISEL-NEXT: flat_store_dwordx4 v[2:3], v[4:7]
|
|
; GFX8-GISEL-NEXT: v_add_u32_e32 v2, vcc, 48, v2
|
|
; GFX8-GISEL-NEXT: v_addc_u32_e32 v3, vcc, 0, v3, vcc
|
|
; GFX8-GISEL-NEXT: s_waitcnt vmcnt(3)
|
|
; GFX8-GISEL-NEXT: flat_store_dwordx4 v[0:1], v[8:11]
|
|
; GFX8-GISEL-NEXT: s_waitcnt vmcnt(3)
|
|
; GFX8-GISEL-NEXT: flat_store_dwordx4 v[16:17], v[12:15]
|
|
; GFX8-GISEL-NEXT: s_waitcnt vmcnt(3)
|
|
; GFX8-GISEL-NEXT: flat_store_dword v[2:3], v18
|
|
; GFX8-GISEL-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX8-GISEL-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX9-GISEL-LABEL: freeze_v13i32:
|
|
; GFX9-GISEL: ; %bb.0:
|
|
; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX9-GISEL-NEXT: global_load_dwordx4 v[4:7], v[0:1], off
|
|
; GFX9-GISEL-NEXT: global_load_dwordx4 v[8:11], v[0:1], off offset:16
|
|
; GFX9-GISEL-NEXT: global_load_dwordx4 v[12:15], v[0:1], off offset:32
|
|
; GFX9-GISEL-NEXT: global_load_dword v16, v[0:1], off offset:48
|
|
; GFX9-GISEL-NEXT: s_waitcnt vmcnt(3)
|
|
; GFX9-GISEL-NEXT: global_store_dwordx4 v[2:3], v[4:7], off
|
|
; GFX9-GISEL-NEXT: s_waitcnt vmcnt(3)
|
|
; GFX9-GISEL-NEXT: global_store_dwordx4 v[2:3], v[8:11], off offset:16
|
|
; GFX9-GISEL-NEXT: s_waitcnt vmcnt(3)
|
|
; GFX9-GISEL-NEXT: global_store_dwordx4 v[2:3], v[12:15], off offset:32
|
|
; GFX9-GISEL-NEXT: s_waitcnt vmcnt(3)
|
|
; GFX9-GISEL-NEXT: global_store_dword v[2:3], v16, off offset:48
|
|
; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX10-SDAG-LABEL: freeze_v13i32:
|
|
; GFX10-SDAG: ; %bb.0:
|
|
; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX10-SDAG-NEXT: s_clause 0x3
|
|
; GFX10-SDAG-NEXT: global_load_dword v16, v[0:1], off offset:48
|
|
; GFX10-SDAG-NEXT: global_load_dwordx4 v[4:7], v[0:1], off offset:32
|
|
; GFX10-SDAG-NEXT: global_load_dwordx4 v[8:11], v[0:1], off
|
|
; GFX10-SDAG-NEXT: global_load_dwordx4 v[12:15], v[0:1], off offset:16
|
|
; GFX10-SDAG-NEXT: s_waitcnt vmcnt(3)
|
|
; GFX10-SDAG-NEXT: global_store_dword v[2:3], v16, off offset:48
|
|
; GFX10-SDAG-NEXT: s_waitcnt vmcnt(2)
|
|
; GFX10-SDAG-NEXT: global_store_dwordx4 v[2:3], v[4:7], off offset:32
|
|
; GFX10-SDAG-NEXT: s_waitcnt vmcnt(1)
|
|
; GFX10-SDAG-NEXT: global_store_dwordx4 v[2:3], v[8:11], off
|
|
; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX10-SDAG-NEXT: global_store_dwordx4 v[2:3], v[12:15], off offset:16
|
|
; GFX10-SDAG-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX10-GISEL-LABEL: freeze_v13i32:
|
|
; GFX10-GISEL: ; %bb.0:
|
|
; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX10-GISEL-NEXT: s_clause 0x3
|
|
; GFX10-GISEL-NEXT: global_load_dwordx4 v[4:7], v[0:1], off
|
|
; GFX10-GISEL-NEXT: global_load_dwordx4 v[8:11], v[0:1], off offset:16
|
|
; GFX10-GISEL-NEXT: global_load_dwordx4 v[12:15], v[0:1], off offset:32
|
|
; GFX10-GISEL-NEXT: global_load_dword v16, v[0:1], off offset:48
|
|
; GFX10-GISEL-NEXT: s_waitcnt vmcnt(3)
|
|
; GFX10-GISEL-NEXT: global_store_dwordx4 v[2:3], v[4:7], off
|
|
; GFX10-GISEL-NEXT: s_waitcnt vmcnt(2)
|
|
; GFX10-GISEL-NEXT: global_store_dwordx4 v[2:3], v[8:11], off offset:16
|
|
; GFX10-GISEL-NEXT: s_waitcnt vmcnt(1)
|
|
; GFX10-GISEL-NEXT: global_store_dwordx4 v[2:3], v[12:15], off offset:32
|
|
; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX10-GISEL-NEXT: global_store_dword v[2:3], v16, off offset:48
|
|
; GFX10-GISEL-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX11-SDAG-LABEL: freeze_v13i32:
|
|
; GFX11-SDAG: ; %bb.0:
|
|
; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX11-SDAG-NEXT: s_clause 0x3
|
|
; GFX11-SDAG-NEXT: global_load_b32 v16, v[0:1], off offset:48
|
|
; GFX11-SDAG-NEXT: global_load_b128 v[4:7], v[0:1], off offset:32
|
|
; GFX11-SDAG-NEXT: global_load_b128 v[8:11], v[0:1], off
|
|
; GFX11-SDAG-NEXT: global_load_b128 v[12:15], v[0:1], off offset:16
|
|
; GFX11-SDAG-NEXT: s_waitcnt vmcnt(3)
|
|
; GFX11-SDAG-NEXT: global_store_b32 v[2:3], v16, off offset:48
|
|
; GFX11-SDAG-NEXT: s_waitcnt vmcnt(2)
|
|
; GFX11-SDAG-NEXT: global_store_b128 v[2:3], v[4:7], off offset:32
|
|
; GFX11-SDAG-NEXT: s_waitcnt vmcnt(1)
|
|
; GFX11-SDAG-NEXT: global_store_b128 v[2:3], v[8:11], off
|
|
; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX11-SDAG-NEXT: global_store_b128 v[2:3], v[12:15], off offset:16
|
|
; GFX11-SDAG-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX11-GISEL-LABEL: freeze_v13i32:
|
|
; GFX11-GISEL: ; %bb.0:
|
|
; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX11-GISEL-NEXT: s_clause 0x3
|
|
; GFX11-GISEL-NEXT: global_load_b128 v[4:7], v[0:1], off
|
|
; GFX11-GISEL-NEXT: global_load_b128 v[8:11], v[0:1], off offset:16
|
|
; GFX11-GISEL-NEXT: global_load_b128 v[12:15], v[0:1], off offset:32
|
|
; GFX11-GISEL-NEXT: global_load_b32 v0, v[0:1], off offset:48
|
|
; GFX11-GISEL-NEXT: s_waitcnt vmcnt(3)
|
|
; GFX11-GISEL-NEXT: global_store_b128 v[2:3], v[4:7], off
|
|
; GFX11-GISEL-NEXT: s_waitcnt vmcnt(2)
|
|
; GFX11-GISEL-NEXT: global_store_b128 v[2:3], v[8:11], off offset:16
|
|
; GFX11-GISEL-NEXT: s_waitcnt vmcnt(1)
|
|
; GFX11-GISEL-NEXT: global_store_b128 v[2:3], v[12:15], off offset:32
|
|
; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX11-GISEL-NEXT: global_store_b32 v[2:3], v0, off offset:48
|
|
; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31]
|
|
%a = load <13 x i32>, ptr addrspace(1) %ptra, align 4
|
|
%freeze = freeze <13 x i32> %a
|
|
store <13 x i32> %freeze, ptr addrspace(1) %ptrb, align 4
|
|
ret void
|
|
}
|
|
|
|
define void @freeze_v14i32(ptr addrspace(1) %ptra, ptr addrspace(1) %ptrb) {
|
|
; GFX6-SDAG-LABEL: freeze_v14i32:
|
|
; GFX6-SDAG: ; %bb.0:
|
|
; GFX6-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX6-SDAG-NEXT: s_mov_b32 s6, 0
|
|
; GFX6-SDAG-NEXT: s_mov_b32 s7, 0xf000
|
|
; GFX6-SDAG-NEXT: s_mov_b32 s4, s6
|
|
; GFX6-SDAG-NEXT: s_mov_b32 s5, s6
|
|
; GFX6-SDAG-NEXT: buffer_load_dwordx4 v[4:7], v[0:1], s[4:7], 0 addr64 offset:32
|
|
; GFX6-SDAG-NEXT: buffer_load_dwordx2 v[16:17], v[0:1], s[4:7], 0 addr64 offset:48
|
|
; GFX6-SDAG-NEXT: buffer_load_dwordx4 v[8:11], v[0:1], s[4:7], 0 addr64
|
|
; GFX6-SDAG-NEXT: buffer_load_dwordx4 v[12:15], v[0:1], s[4:7], 0 addr64 offset:16
|
|
; GFX6-SDAG-NEXT: s_waitcnt vmcnt(3)
|
|
; GFX6-SDAG-NEXT: buffer_store_dwordx4 v[4:7], v[2:3], s[4:7], 0 addr64 offset:32
|
|
; GFX6-SDAG-NEXT: s_waitcnt vmcnt(3)
|
|
; GFX6-SDAG-NEXT: buffer_store_dwordx2 v[16:17], v[2:3], s[4:7], 0 addr64 offset:48
|
|
; GFX6-SDAG-NEXT: s_waitcnt vmcnt(3)
|
|
; GFX6-SDAG-NEXT: buffer_store_dwordx4 v[8:11], v[2:3], s[4:7], 0 addr64
|
|
; GFX6-SDAG-NEXT: s_waitcnt vmcnt(3)
|
|
; GFX6-SDAG-NEXT: buffer_store_dwordx4 v[12:15], v[2:3], s[4:7], 0 addr64 offset:16
|
|
; GFX6-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0)
|
|
; GFX6-SDAG-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX6-GISEL-LABEL: freeze_v14i32:
|
|
; GFX6-GISEL: ; %bb.0:
|
|
; GFX6-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX6-GISEL-NEXT: s_mov_b32 s6, 0
|
|
; GFX6-GISEL-NEXT: s_mov_b32 s7, 0xf000
|
|
; GFX6-GISEL-NEXT: s_mov_b64 s[4:5], 0
|
|
; GFX6-GISEL-NEXT: buffer_load_dwordx4 v[4:7], v[0:1], s[4:7], 0 addr64
|
|
; GFX6-GISEL-NEXT: buffer_load_dwordx4 v[8:11], v[0:1], s[4:7], 0 addr64 offset:16
|
|
; GFX6-GISEL-NEXT: buffer_load_dwordx4 v[12:15], v[0:1], s[4:7], 0 addr64 offset:32
|
|
; GFX6-GISEL-NEXT: buffer_load_dwordx2 v[0:1], v[0:1], s[4:7], 0 addr64 offset:48
|
|
; GFX6-GISEL-NEXT: s_waitcnt vmcnt(3)
|
|
; GFX6-GISEL-NEXT: buffer_store_dwordx4 v[4:7], v[2:3], s[4:7], 0 addr64
|
|
; GFX6-GISEL-NEXT: s_waitcnt vmcnt(3)
|
|
; GFX6-GISEL-NEXT: buffer_store_dwordx4 v[8:11], v[2:3], s[4:7], 0 addr64 offset:16
|
|
; GFX6-GISEL-NEXT: s_waitcnt vmcnt(3)
|
|
; GFX6-GISEL-NEXT: buffer_store_dwordx4 v[12:15], v[2:3], s[4:7], 0 addr64 offset:32
|
|
; GFX6-GISEL-NEXT: s_waitcnt vmcnt(3)
|
|
; GFX6-GISEL-NEXT: buffer_store_dwordx2 v[0:1], v[2:3], s[4:7], 0 addr64 offset:48
|
|
; GFX6-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0)
|
|
; GFX6-GISEL-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX7-SDAG-LABEL: freeze_v14i32:
|
|
; GFX7-SDAG: ; %bb.0:
|
|
; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX7-SDAG-NEXT: s_mov_b32 s6, 0
|
|
; GFX7-SDAG-NEXT: s_mov_b32 s7, 0xf000
|
|
; GFX7-SDAG-NEXT: s_mov_b32 s4, s6
|
|
; GFX7-SDAG-NEXT: s_mov_b32 s5, s6
|
|
; GFX7-SDAG-NEXT: buffer_load_dwordx4 v[4:7], v[0:1], s[4:7], 0 addr64 offset:32
|
|
; GFX7-SDAG-NEXT: buffer_load_dwordx2 v[16:17], v[0:1], s[4:7], 0 addr64 offset:48
|
|
; GFX7-SDAG-NEXT: buffer_load_dwordx4 v[8:11], v[0:1], s[4:7], 0 addr64
|
|
; GFX7-SDAG-NEXT: buffer_load_dwordx4 v[12:15], v[0:1], s[4:7], 0 addr64 offset:16
|
|
; GFX7-SDAG-NEXT: s_waitcnt vmcnt(3)
|
|
; GFX7-SDAG-NEXT: buffer_store_dwordx4 v[4:7], v[2:3], s[4:7], 0 addr64 offset:32
|
|
; GFX7-SDAG-NEXT: s_waitcnt vmcnt(3)
|
|
; GFX7-SDAG-NEXT: buffer_store_dwordx2 v[16:17], v[2:3], s[4:7], 0 addr64 offset:48
|
|
; GFX7-SDAG-NEXT: s_waitcnt vmcnt(3)
|
|
; GFX7-SDAG-NEXT: buffer_store_dwordx4 v[8:11], v[2:3], s[4:7], 0 addr64
|
|
; GFX7-SDAG-NEXT: s_waitcnt vmcnt(3)
|
|
; GFX7-SDAG-NEXT: buffer_store_dwordx4 v[12:15], v[2:3], s[4:7], 0 addr64 offset:16
|
|
; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX7-SDAG-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX7-GISEL-LABEL: freeze_v14i32:
|
|
; GFX7-GISEL: ; %bb.0:
|
|
; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX7-GISEL-NEXT: s_mov_b32 s6, 0
|
|
; GFX7-GISEL-NEXT: s_mov_b32 s7, 0xf000
|
|
; GFX7-GISEL-NEXT: s_mov_b64 s[4:5], 0
|
|
; GFX7-GISEL-NEXT: buffer_load_dwordx4 v[4:7], v[0:1], s[4:7], 0 addr64
|
|
; GFX7-GISEL-NEXT: buffer_load_dwordx4 v[8:11], v[0:1], s[4:7], 0 addr64 offset:16
|
|
; GFX7-GISEL-NEXT: buffer_load_dwordx4 v[12:15], v[0:1], s[4:7], 0 addr64 offset:32
|
|
; GFX7-GISEL-NEXT: buffer_load_dwordx2 v[0:1], v[0:1], s[4:7], 0 addr64 offset:48
|
|
; GFX7-GISEL-NEXT: s_waitcnt vmcnt(3)
|
|
; GFX7-GISEL-NEXT: buffer_store_dwordx4 v[4:7], v[2:3], s[4:7], 0 addr64
|
|
; GFX7-GISEL-NEXT: s_waitcnt vmcnt(3)
|
|
; GFX7-GISEL-NEXT: buffer_store_dwordx4 v[8:11], v[2:3], s[4:7], 0 addr64 offset:16
|
|
; GFX7-GISEL-NEXT: s_waitcnt vmcnt(3)
|
|
; GFX7-GISEL-NEXT: buffer_store_dwordx4 v[12:15], v[2:3], s[4:7], 0 addr64 offset:32
|
|
; GFX7-GISEL-NEXT: s_waitcnt vmcnt(3)
|
|
; GFX7-GISEL-NEXT: buffer_store_dwordx2 v[0:1], v[2:3], s[4:7], 0 addr64 offset:48
|
|
; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX7-GISEL-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX8-GISEL-LABEL: freeze_v14i32:
|
|
; GFX8-GISEL: ; %bb.0:
|
|
; GFX8-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX8-GISEL-NEXT: v_add_u32_e32 v8, vcc, 16, v0
|
|
; GFX8-GISEL-NEXT: v_addc_u32_e32 v9, vcc, 0, v1, vcc
|
|
; GFX8-GISEL-NEXT: v_add_u32_e32 v12, vcc, 32, v0
|
|
; GFX8-GISEL-NEXT: flat_load_dwordx4 v[4:7], v[0:1]
|
|
; GFX8-GISEL-NEXT: flat_load_dwordx4 v[8:11], v[8:9]
|
|
; GFX8-GISEL-NEXT: v_addc_u32_e32 v13, vcc, 0, v1, vcc
|
|
; GFX8-GISEL-NEXT: v_add_u32_e32 v0, vcc, 48, v0
|
|
; GFX8-GISEL-NEXT: flat_load_dwordx4 v[12:15], v[12:13]
|
|
; GFX8-GISEL-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc
|
|
; GFX8-GISEL-NEXT: flat_load_dwordx2 v[0:1], v[0:1]
|
|
; GFX8-GISEL-NEXT: v_add_u32_e32 v16, vcc, 16, v2
|
|
; GFX8-GISEL-NEXT: v_addc_u32_e32 v17, vcc, 0, v3, vcc
|
|
; GFX8-GISEL-NEXT: v_add_u32_e32 v18, vcc, 32, v2
|
|
; GFX8-GISEL-NEXT: v_addc_u32_e32 v19, vcc, 0, v3, vcc
|
|
; GFX8-GISEL-NEXT: s_waitcnt vmcnt(3)
|
|
; GFX8-GISEL-NEXT: flat_store_dwordx4 v[2:3], v[4:7]
|
|
; GFX8-GISEL-NEXT: v_add_u32_e32 v2, vcc, 48, v2
|
|
; GFX8-GISEL-NEXT: v_addc_u32_e32 v3, vcc, 0, v3, vcc
|
|
; GFX8-GISEL-NEXT: s_waitcnt vmcnt(3)
|
|
; GFX8-GISEL-NEXT: flat_store_dwordx4 v[16:17], v[8:11]
|
|
; GFX8-GISEL-NEXT: s_waitcnt vmcnt(3)
|
|
; GFX8-GISEL-NEXT: flat_store_dwordx4 v[18:19], v[12:15]
|
|
; GFX8-GISEL-NEXT: s_waitcnt vmcnt(3)
|
|
; GFX8-GISEL-NEXT: flat_store_dwordx2 v[2:3], v[0:1]
|
|
; GFX8-GISEL-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX8-GISEL-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX9-GISEL-LABEL: freeze_v14i32:
|
|
; GFX9-GISEL: ; %bb.0:
|
|
; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX9-GISEL-NEXT: global_load_dwordx4 v[4:7], v[0:1], off
|
|
; GFX9-GISEL-NEXT: global_load_dwordx4 v[8:11], v[0:1], off offset:16
|
|
; GFX9-GISEL-NEXT: global_load_dwordx4 v[12:15], v[0:1], off offset:32
|
|
; GFX9-GISEL-NEXT: global_load_dwordx2 v[16:17], v[0:1], off offset:48
|
|
; GFX9-GISEL-NEXT: s_waitcnt vmcnt(3)
|
|
; GFX9-GISEL-NEXT: global_store_dwordx4 v[2:3], v[4:7], off
|
|
; GFX9-GISEL-NEXT: s_waitcnt vmcnt(3)
|
|
; GFX9-GISEL-NEXT: global_store_dwordx4 v[2:3], v[8:11], off offset:16
|
|
; GFX9-GISEL-NEXT: s_waitcnt vmcnt(3)
|
|
; GFX9-GISEL-NEXT: global_store_dwordx4 v[2:3], v[12:15], off offset:32
|
|
; GFX9-GISEL-NEXT: s_waitcnt vmcnt(3)
|
|
; GFX9-GISEL-NEXT: global_store_dwordx2 v[2:3], v[16:17], off offset:48
|
|
; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX10-SDAG-LABEL: freeze_v14i32:
|
|
; GFX10-SDAG: ; %bb.0:
|
|
; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX10-SDAG-NEXT: s_clause 0x3
|
|
; GFX10-SDAG-NEXT: global_load_dwordx4 v[4:7], v[0:1], off offset:32
|
|
; GFX10-SDAG-NEXT: global_load_dwordx2 v[16:17], v[0:1], off offset:48
|
|
; GFX10-SDAG-NEXT: global_load_dwordx4 v[8:11], v[0:1], off
|
|
; GFX10-SDAG-NEXT: global_load_dwordx4 v[12:15], v[0:1], off offset:16
|
|
; GFX10-SDAG-NEXT: s_waitcnt vmcnt(3)
|
|
; GFX10-SDAG-NEXT: global_store_dwordx4 v[2:3], v[4:7], off offset:32
|
|
; GFX10-SDAG-NEXT: s_waitcnt vmcnt(2)
|
|
; GFX10-SDAG-NEXT: global_store_dwordx2 v[2:3], v[16:17], off offset:48
|
|
; GFX10-SDAG-NEXT: s_waitcnt vmcnt(1)
|
|
; GFX10-SDAG-NEXT: global_store_dwordx4 v[2:3], v[8:11], off
|
|
; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX10-SDAG-NEXT: global_store_dwordx4 v[2:3], v[12:15], off offset:16
|
|
; GFX10-SDAG-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX10-GISEL-LABEL: freeze_v14i32:
|
|
; GFX10-GISEL: ; %bb.0:
|
|
; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX10-GISEL-NEXT: s_clause 0x3
|
|
; GFX10-GISEL-NEXT: global_load_dwordx4 v[4:7], v[0:1], off
|
|
; GFX10-GISEL-NEXT: global_load_dwordx4 v[8:11], v[0:1], off offset:16
|
|
; GFX10-GISEL-NEXT: global_load_dwordx4 v[12:15], v[0:1], off offset:32
|
|
; GFX10-GISEL-NEXT: global_load_dwordx2 v[16:17], v[0:1], off offset:48
|
|
; GFX10-GISEL-NEXT: s_waitcnt vmcnt(3)
|
|
; GFX10-GISEL-NEXT: global_store_dwordx4 v[2:3], v[4:7], off
|
|
; GFX10-GISEL-NEXT: s_waitcnt vmcnt(2)
|
|
; GFX10-GISEL-NEXT: global_store_dwordx4 v[2:3], v[8:11], off offset:16
|
|
; GFX10-GISEL-NEXT: s_waitcnt vmcnt(1)
|
|
; GFX10-GISEL-NEXT: global_store_dwordx4 v[2:3], v[12:15], off offset:32
|
|
; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX10-GISEL-NEXT: global_store_dwordx2 v[2:3], v[16:17], off offset:48
|
|
; GFX10-GISEL-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX11-SDAG-LABEL: freeze_v14i32:
|
|
; GFX11-SDAG: ; %bb.0:
|
|
; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX11-SDAG-NEXT: s_clause 0x3
|
|
; GFX11-SDAG-NEXT: global_load_b128 v[4:7], v[0:1], off offset:32
|
|
; GFX11-SDAG-NEXT: global_load_b64 v[16:17], v[0:1], off offset:48
|
|
; GFX11-SDAG-NEXT: global_load_b128 v[8:11], v[0:1], off
|
|
; GFX11-SDAG-NEXT: global_load_b128 v[12:15], v[0:1], off offset:16
|
|
; GFX11-SDAG-NEXT: s_waitcnt vmcnt(3)
|
|
; GFX11-SDAG-NEXT: global_store_b128 v[2:3], v[4:7], off offset:32
|
|
; GFX11-SDAG-NEXT: s_waitcnt vmcnt(2)
|
|
; GFX11-SDAG-NEXT: global_store_b64 v[2:3], v[16:17], off offset:48
|
|
; GFX11-SDAG-NEXT: s_waitcnt vmcnt(1)
|
|
; GFX11-SDAG-NEXT: global_store_b128 v[2:3], v[8:11], off
|
|
; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX11-SDAG-NEXT: global_store_b128 v[2:3], v[12:15], off offset:16
|
|
; GFX11-SDAG-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX11-GISEL-LABEL: freeze_v14i32:
|
|
; GFX11-GISEL: ; %bb.0:
|
|
; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX11-GISEL-NEXT: s_clause 0x3
|
|
; GFX11-GISEL-NEXT: global_load_b128 v[4:7], v[0:1], off
|
|
; GFX11-GISEL-NEXT: global_load_b128 v[8:11], v[0:1], off offset:16
|
|
; GFX11-GISEL-NEXT: global_load_b128 v[12:15], v[0:1], off offset:32
|
|
; GFX11-GISEL-NEXT: global_load_b64 v[0:1], v[0:1], off offset:48
|
|
; GFX11-GISEL-NEXT: s_waitcnt vmcnt(3)
|
|
; GFX11-GISEL-NEXT: global_store_b128 v[2:3], v[4:7], off
|
|
; GFX11-GISEL-NEXT: s_waitcnt vmcnt(2)
|
|
; GFX11-GISEL-NEXT: global_store_b128 v[2:3], v[8:11], off offset:16
|
|
; GFX11-GISEL-NEXT: s_waitcnt vmcnt(1)
|
|
; GFX11-GISEL-NEXT: global_store_b128 v[2:3], v[12:15], off offset:32
|
|
; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX11-GISEL-NEXT: global_store_b64 v[2:3], v[0:1], off offset:48
|
|
; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31]
|
|
%a = load <14 x i32>, ptr addrspace(1) %ptra, align 4
|
|
%freeze = freeze <14 x i32> %a
|
|
store <14 x i32> %freeze, ptr addrspace(1) %ptrb, align 4
|
|
ret void
|
|
}
|
|
|
|
define void @freeze_v15i32(ptr addrspace(1) %ptra, ptr addrspace(1) %ptrb) {
|
|
; GFX6-SDAG-LABEL: freeze_v15i32:
|
|
; GFX6-SDAG: ; %bb.0:
|
|
; GFX6-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX6-SDAG-NEXT: s_mov_b32 s6, 0
|
|
; GFX6-SDAG-NEXT: s_mov_b32 s7, 0xf000
|
|
; GFX6-SDAG-NEXT: s_mov_b32 s4, s6
|
|
; GFX6-SDAG-NEXT: s_mov_b32 s5, s6
|
|
; GFX6-SDAG-NEXT: buffer_load_dword v18, v[0:1], s[4:7], 0 addr64 offset:56
|
|
; GFX6-SDAG-NEXT: buffer_load_dwordx4 v[4:7], v[0:1], s[4:7], 0 addr64 offset:32
|
|
; GFX6-SDAG-NEXT: buffer_load_dwordx2 v[16:17], v[0:1], s[4:7], 0 addr64 offset:48
|
|
; GFX6-SDAG-NEXT: buffer_load_dwordx4 v[8:11], v[0:1], s[4:7], 0 addr64
|
|
; GFX6-SDAG-NEXT: buffer_load_dwordx4 v[12:15], v[0:1], s[4:7], 0 addr64 offset:16
|
|
; GFX6-SDAG-NEXT: s_waitcnt vmcnt(4)
|
|
; GFX6-SDAG-NEXT: buffer_store_dword v18, v[2:3], s[4:7], 0 addr64 offset:56
|
|
; GFX6-SDAG-NEXT: s_waitcnt vmcnt(4)
|
|
; GFX6-SDAG-NEXT: buffer_store_dwordx4 v[4:7], v[2:3], s[4:7], 0 addr64 offset:32
|
|
; GFX6-SDAG-NEXT: s_waitcnt vmcnt(4)
|
|
; GFX6-SDAG-NEXT: buffer_store_dwordx2 v[16:17], v[2:3], s[4:7], 0 addr64 offset:48
|
|
; GFX6-SDAG-NEXT: s_waitcnt vmcnt(4)
|
|
; GFX6-SDAG-NEXT: buffer_store_dwordx4 v[8:11], v[2:3], s[4:7], 0 addr64
|
|
; GFX6-SDAG-NEXT: s_waitcnt vmcnt(4)
|
|
; GFX6-SDAG-NEXT: buffer_store_dwordx4 v[12:15], v[2:3], s[4:7], 0 addr64 offset:16
|
|
; GFX6-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0)
|
|
; GFX6-SDAG-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX6-GISEL-LABEL: freeze_v15i32:
|
|
; GFX6-GISEL: ; %bb.0:
|
|
; GFX6-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX6-GISEL-NEXT: s_mov_b32 s6, 0
|
|
; GFX6-GISEL-NEXT: s_mov_b32 s7, 0xf000
|
|
; GFX6-GISEL-NEXT: s_mov_b64 s[4:5], 0
|
|
; GFX6-GISEL-NEXT: buffer_load_dwordx4 v[4:7], v[0:1], s[4:7], 0 addr64
|
|
; GFX6-GISEL-NEXT: buffer_load_dwordx4 v[8:11], v[0:1], s[4:7], 0 addr64 offset:16
|
|
; GFX6-GISEL-NEXT: buffer_load_dwordx4 v[12:15], v[0:1], s[4:7], 0 addr64 offset:32
|
|
; GFX6-GISEL-NEXT: buffer_load_dwordx2 v[16:17], v[0:1], s[4:7], 0 addr64 offset:48
|
|
; GFX6-GISEL-NEXT: buffer_load_dword v0, v[0:1], s[4:7], 0 addr64 offset:56
|
|
; GFX6-GISEL-NEXT: s_waitcnt vmcnt(4)
|
|
; GFX6-GISEL-NEXT: buffer_store_dwordx4 v[4:7], v[2:3], s[4:7], 0 addr64
|
|
; GFX6-GISEL-NEXT: s_waitcnt vmcnt(4)
|
|
; GFX6-GISEL-NEXT: buffer_store_dwordx4 v[8:11], v[2:3], s[4:7], 0 addr64 offset:16
|
|
; GFX6-GISEL-NEXT: s_waitcnt vmcnt(4)
|
|
; GFX6-GISEL-NEXT: buffer_store_dwordx4 v[12:15], v[2:3], s[4:7], 0 addr64 offset:32
|
|
; GFX6-GISEL-NEXT: s_waitcnt vmcnt(4)
|
|
; GFX6-GISEL-NEXT: buffer_store_dwordx2 v[16:17], v[2:3], s[4:7], 0 addr64 offset:48
|
|
; GFX6-GISEL-NEXT: s_waitcnt vmcnt(4)
|
|
; GFX6-GISEL-NEXT: buffer_store_dword v0, v[2:3], s[4:7], 0 addr64 offset:56
|
|
; GFX6-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0)
|
|
; GFX6-GISEL-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX7-SDAG-LABEL: freeze_v15i32:
|
|
; GFX7-SDAG: ; %bb.0:
|
|
; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX7-SDAG-NEXT: s_mov_b32 s6, 0
|
|
; GFX7-SDAG-NEXT: s_mov_b32 s7, 0xf000
|
|
; GFX7-SDAG-NEXT: s_mov_b32 s4, s6
|
|
; GFX7-SDAG-NEXT: s_mov_b32 s5, s6
|
|
; GFX7-SDAG-NEXT: buffer_load_dword v18, v[0:1], s[4:7], 0 addr64 offset:56
|
|
; GFX7-SDAG-NEXT: buffer_load_dwordx4 v[4:7], v[0:1], s[4:7], 0 addr64 offset:32
|
|
; GFX7-SDAG-NEXT: buffer_load_dwordx2 v[16:17], v[0:1], s[4:7], 0 addr64 offset:48
|
|
; GFX7-SDAG-NEXT: buffer_load_dwordx4 v[8:11], v[0:1], s[4:7], 0 addr64
|
|
; GFX7-SDAG-NEXT: buffer_load_dwordx4 v[12:15], v[0:1], s[4:7], 0 addr64 offset:16
|
|
; GFX7-SDAG-NEXT: s_waitcnt vmcnt(4)
|
|
; GFX7-SDAG-NEXT: buffer_store_dword v18, v[2:3], s[4:7], 0 addr64 offset:56
|
|
; GFX7-SDAG-NEXT: s_waitcnt vmcnt(4)
|
|
; GFX7-SDAG-NEXT: buffer_store_dwordx4 v[4:7], v[2:3], s[4:7], 0 addr64 offset:32
|
|
; GFX7-SDAG-NEXT: s_waitcnt vmcnt(4)
|
|
; GFX7-SDAG-NEXT: buffer_store_dwordx2 v[16:17], v[2:3], s[4:7], 0 addr64 offset:48
|
|
; GFX7-SDAG-NEXT: s_waitcnt vmcnt(4)
|
|
; GFX7-SDAG-NEXT: buffer_store_dwordx4 v[8:11], v[2:3], s[4:7], 0 addr64
|
|
; GFX7-SDAG-NEXT: s_waitcnt vmcnt(4)
|
|
; GFX7-SDAG-NEXT: buffer_store_dwordx4 v[12:15], v[2:3], s[4:7], 0 addr64 offset:16
|
|
; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX7-SDAG-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX7-GISEL-LABEL: freeze_v15i32:
|
|
; GFX7-GISEL: ; %bb.0:
|
|
; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX7-GISEL-NEXT: s_mov_b32 s6, 0
|
|
; GFX7-GISEL-NEXT: s_mov_b32 s7, 0xf000
|
|
; GFX7-GISEL-NEXT: s_mov_b64 s[4:5], 0
|
|
; GFX7-GISEL-NEXT: buffer_load_dwordx4 v[4:7], v[0:1], s[4:7], 0 addr64
|
|
; GFX7-GISEL-NEXT: buffer_load_dwordx4 v[8:11], v[0:1], s[4:7], 0 addr64 offset:16
|
|
; GFX7-GISEL-NEXT: buffer_load_dwordx4 v[12:15], v[0:1], s[4:7], 0 addr64 offset:32
|
|
; GFX7-GISEL-NEXT: buffer_load_dwordx3 v[16:18], v[0:1], s[4:7], 0 addr64 offset:48
|
|
; GFX7-GISEL-NEXT: s_waitcnt vmcnt(3)
|
|
; GFX7-GISEL-NEXT: buffer_store_dwordx4 v[4:7], v[2:3], s[4:7], 0 addr64
|
|
; GFX7-GISEL-NEXT: s_waitcnt vmcnt(3)
|
|
; GFX7-GISEL-NEXT: buffer_store_dwordx4 v[8:11], v[2:3], s[4:7], 0 addr64 offset:16
|
|
; GFX7-GISEL-NEXT: s_waitcnt vmcnt(3)
|
|
; GFX7-GISEL-NEXT: buffer_store_dwordx4 v[12:15], v[2:3], s[4:7], 0 addr64 offset:32
|
|
; GFX7-GISEL-NEXT: s_waitcnt vmcnt(3)
|
|
; GFX7-GISEL-NEXT: buffer_store_dwordx3 v[16:18], v[2:3], s[4:7], 0 addr64 offset:48
|
|
; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX7-GISEL-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX8-GISEL-LABEL: freeze_v15i32:
|
|
; GFX8-GISEL: ; %bb.0:
|
|
; GFX8-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX8-GISEL-NEXT: v_add_u32_e32 v8, vcc, 16, v0
|
|
; GFX8-GISEL-NEXT: flat_load_dwordx4 v[4:7], v[0:1]
|
|
; GFX8-GISEL-NEXT: v_addc_u32_e32 v9, vcc, 0, v1, vcc
|
|
; GFX8-GISEL-NEXT: v_add_u32_e32 v12, vcc, 32, v0
|
|
; GFX8-GISEL-NEXT: v_addc_u32_e32 v13, vcc, 0, v1, vcc
|
|
; GFX8-GISEL-NEXT: flat_load_dwordx4 v[8:11], v[8:9]
|
|
; GFX8-GISEL-NEXT: v_add_u32_e32 v0, vcc, 48, v0
|
|
; GFX8-GISEL-NEXT: flat_load_dwordx4 v[12:15], v[12:13]
|
|
; GFX8-GISEL-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc
|
|
; GFX8-GISEL-NEXT: flat_load_dwordx3 v[16:18], v[0:1]
|
|
; GFX8-GISEL-NEXT: v_add_u32_e32 v0, vcc, 16, v2
|
|
; GFX8-GISEL-NEXT: v_addc_u32_e32 v1, vcc, 0, v3, vcc
|
|
; GFX8-GISEL-NEXT: s_waitcnt vmcnt(3)
|
|
; GFX8-GISEL-NEXT: flat_store_dwordx4 v[2:3], v[4:7]
|
|
; GFX8-GISEL-NEXT: s_nop 0
|
|
; GFX8-GISEL-NEXT: v_add_u32_e32 v4, vcc, 32, v2
|
|
; GFX8-GISEL-NEXT: v_addc_u32_e32 v5, vcc, 0, v3, vcc
|
|
; GFX8-GISEL-NEXT: v_add_u32_e32 v2, vcc, 48, v2
|
|
; GFX8-GISEL-NEXT: v_addc_u32_e32 v3, vcc, 0, v3, vcc
|
|
; GFX8-GISEL-NEXT: s_waitcnt vmcnt(3)
|
|
; GFX8-GISEL-NEXT: flat_store_dwordx4 v[0:1], v[8:11]
|
|
; GFX8-GISEL-NEXT: s_waitcnt vmcnt(3)
|
|
; GFX8-GISEL-NEXT: flat_store_dwordx4 v[4:5], v[12:15]
|
|
; GFX8-GISEL-NEXT: s_waitcnt vmcnt(3)
|
|
; GFX8-GISEL-NEXT: flat_store_dwordx3 v[2:3], v[16:18]
|
|
; GFX8-GISEL-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX8-GISEL-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX9-GISEL-LABEL: freeze_v15i32:
|
|
; GFX9-GISEL: ; %bb.0:
|
|
; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX9-GISEL-NEXT: global_load_dwordx4 v[4:7], v[0:1], off
|
|
; GFX9-GISEL-NEXT: global_load_dwordx4 v[8:11], v[0:1], off offset:16
|
|
; GFX9-GISEL-NEXT: global_load_dwordx4 v[12:15], v[0:1], off offset:32
|
|
; GFX9-GISEL-NEXT: global_load_dwordx3 v[16:18], v[0:1], off offset:48
|
|
; GFX9-GISEL-NEXT: s_waitcnt vmcnt(3)
|
|
; GFX9-GISEL-NEXT: global_store_dwordx4 v[2:3], v[4:7], off
|
|
; GFX9-GISEL-NEXT: s_waitcnt vmcnt(3)
|
|
; GFX9-GISEL-NEXT: global_store_dwordx4 v[2:3], v[8:11], off offset:16
|
|
; GFX9-GISEL-NEXT: s_waitcnt vmcnt(3)
|
|
; GFX9-GISEL-NEXT: global_store_dwordx4 v[2:3], v[12:15], off offset:32
|
|
; GFX9-GISEL-NEXT: s_waitcnt vmcnt(3)
|
|
; GFX9-GISEL-NEXT: global_store_dwordx3 v[2:3], v[16:18], off offset:48
|
|
; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX10-SDAG-LABEL: freeze_v15i32:
|
|
; GFX10-SDAG: ; %bb.0:
|
|
; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX10-SDAG-NEXT: s_clause 0x3
|
|
; GFX10-SDAG-NEXT: global_load_dwordx4 v[4:7], v[0:1], off offset:32
|
|
; GFX10-SDAG-NEXT: global_load_dwordx3 v[16:18], v[0:1], off offset:48
|
|
; GFX10-SDAG-NEXT: global_load_dwordx4 v[8:11], v[0:1], off
|
|
; GFX10-SDAG-NEXT: global_load_dwordx4 v[12:15], v[0:1], off offset:16
|
|
; GFX10-SDAG-NEXT: s_waitcnt vmcnt(3)
|
|
; GFX10-SDAG-NEXT: global_store_dwordx4 v[2:3], v[4:7], off offset:32
|
|
; GFX10-SDAG-NEXT: s_waitcnt vmcnt(2)
|
|
; GFX10-SDAG-NEXT: global_store_dwordx3 v[2:3], v[16:18], off offset:48
|
|
; GFX10-SDAG-NEXT: s_waitcnt vmcnt(1)
|
|
; GFX10-SDAG-NEXT: global_store_dwordx4 v[2:3], v[8:11], off
|
|
; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX10-SDAG-NEXT: global_store_dwordx4 v[2:3], v[12:15], off offset:16
|
|
; GFX10-SDAG-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX10-GISEL-LABEL: freeze_v15i32:
|
|
; GFX10-GISEL: ; %bb.0:
|
|
; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX10-GISEL-NEXT: s_clause 0x3
|
|
; GFX10-GISEL-NEXT: global_load_dwordx4 v[4:7], v[0:1], off
|
|
; GFX10-GISEL-NEXT: global_load_dwordx4 v[8:11], v[0:1], off offset:16
|
|
; GFX10-GISEL-NEXT: global_load_dwordx4 v[12:15], v[0:1], off offset:32
|
|
; GFX10-GISEL-NEXT: global_load_dwordx3 v[16:18], v[0:1], off offset:48
|
|
; GFX10-GISEL-NEXT: s_waitcnt vmcnt(3)
|
|
; GFX10-GISEL-NEXT: global_store_dwordx4 v[2:3], v[4:7], off
|
|
; GFX10-GISEL-NEXT: s_waitcnt vmcnt(2)
|
|
; GFX10-GISEL-NEXT: global_store_dwordx4 v[2:3], v[8:11], off offset:16
|
|
; GFX10-GISEL-NEXT: s_waitcnt vmcnt(1)
|
|
; GFX10-GISEL-NEXT: global_store_dwordx4 v[2:3], v[12:15], off offset:32
|
|
; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX10-GISEL-NEXT: global_store_dwordx3 v[2:3], v[16:18], off offset:48
|
|
; GFX10-GISEL-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX11-SDAG-LABEL: freeze_v15i32:
|
|
; GFX11-SDAG: ; %bb.0:
|
|
; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX11-SDAG-NEXT: s_clause 0x3
|
|
; GFX11-SDAG-NEXT: global_load_b128 v[4:7], v[0:1], off offset:32
|
|
; GFX11-SDAG-NEXT: global_load_b96 v[16:18], v[0:1], off offset:48
|
|
; GFX11-SDAG-NEXT: global_load_b128 v[8:11], v[0:1], off
|
|
; GFX11-SDAG-NEXT: global_load_b128 v[12:15], v[0:1], off offset:16
|
|
; GFX11-SDAG-NEXT: s_waitcnt vmcnt(3)
|
|
; GFX11-SDAG-NEXT: global_store_b128 v[2:3], v[4:7], off offset:32
|
|
; GFX11-SDAG-NEXT: s_waitcnt vmcnt(2)
|
|
; GFX11-SDAG-NEXT: global_store_b96 v[2:3], v[16:18], off offset:48
|
|
; GFX11-SDAG-NEXT: s_waitcnt vmcnt(1)
|
|
; GFX11-SDAG-NEXT: global_store_b128 v[2:3], v[8:11], off
|
|
; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX11-SDAG-NEXT: global_store_b128 v[2:3], v[12:15], off offset:16
|
|
; GFX11-SDAG-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX11-GISEL-LABEL: freeze_v15i32:
|
|
; GFX11-GISEL: ; %bb.0:
|
|
; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX11-GISEL-NEXT: s_clause 0x3
|
|
; GFX11-GISEL-NEXT: global_load_b128 v[4:7], v[0:1], off
|
|
; GFX11-GISEL-NEXT: global_load_b128 v[8:11], v[0:1], off offset:16
|
|
; GFX11-GISEL-NEXT: global_load_b128 v[12:15], v[0:1], off offset:32
|
|
; GFX11-GISEL-NEXT: global_load_b96 v[16:18], v[0:1], off offset:48
|
|
; GFX11-GISEL-NEXT: s_waitcnt vmcnt(3)
|
|
; GFX11-GISEL-NEXT: global_store_b128 v[2:3], v[4:7], off
|
|
; GFX11-GISEL-NEXT: s_waitcnt vmcnt(2)
|
|
; GFX11-GISEL-NEXT: global_store_b128 v[2:3], v[8:11], off offset:16
|
|
; GFX11-GISEL-NEXT: s_waitcnt vmcnt(1)
|
|
; GFX11-GISEL-NEXT: global_store_b128 v[2:3], v[12:15], off offset:32
|
|
; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX11-GISEL-NEXT: global_store_b96 v[2:3], v[16:18], off offset:48
|
|
; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31]
|
|
%a = load <15 x i32>, ptr addrspace(1) %ptra, align 4
|
|
%freeze = freeze <15 x i32> %a
|
|
store <15 x i32> %freeze, ptr addrspace(1) %ptrb, align 4
|
|
ret void
|
|
}
|
|
|
|
define void @freeze_v16i32(ptr addrspace(1) %ptra, ptr addrspace(1) %ptrb) {
|
|
; GFX6-SDAG-LABEL: freeze_v16i32:
|
|
; GFX6-SDAG: ; %bb.0:
|
|
; GFX6-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX6-SDAG-NEXT: s_mov_b32 s6, 0
|
|
; GFX6-SDAG-NEXT: s_mov_b32 s7, 0xf000
|
|
; GFX6-SDAG-NEXT: s_mov_b32 s4, s6
|
|
; GFX6-SDAG-NEXT: s_mov_b32 s5, s6
|
|
; GFX6-SDAG-NEXT: buffer_load_dwordx4 v[4:7], v[0:1], s[4:7], 0 addr64 offset:32
|
|
; GFX6-SDAG-NEXT: buffer_load_dwordx4 v[8:11], v[0:1], s[4:7], 0 addr64 offset:48
|
|
; GFX6-SDAG-NEXT: buffer_load_dwordx4 v[12:15], v[0:1], s[4:7], 0 addr64
|
|
; GFX6-SDAG-NEXT: buffer_load_dwordx4 v[16:19], v[0:1], s[4:7], 0 addr64 offset:16
|
|
; GFX6-SDAG-NEXT: s_waitcnt vmcnt(3)
|
|
; GFX6-SDAG-NEXT: buffer_store_dwordx4 v[4:7], v[2:3], s[4:7], 0 addr64 offset:32
|
|
; GFX6-SDAG-NEXT: s_waitcnt vmcnt(3)
|
|
; GFX6-SDAG-NEXT: buffer_store_dwordx4 v[8:11], v[2:3], s[4:7], 0 addr64 offset:48
|
|
; GFX6-SDAG-NEXT: s_waitcnt vmcnt(3)
|
|
; GFX6-SDAG-NEXT: buffer_store_dwordx4 v[12:15], v[2:3], s[4:7], 0 addr64
|
|
; GFX6-SDAG-NEXT: s_waitcnt vmcnt(3)
|
|
; GFX6-SDAG-NEXT: buffer_store_dwordx4 v[16:19], v[2:3], s[4:7], 0 addr64 offset:16
|
|
; GFX6-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0)
|
|
; GFX6-SDAG-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX6-GISEL-LABEL: freeze_v16i32:
|
|
; GFX6-GISEL: ; %bb.0:
|
|
; GFX6-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX6-GISEL-NEXT: s_mov_b32 s6, 0
|
|
; GFX6-GISEL-NEXT: s_mov_b32 s7, 0xf000
|
|
; GFX6-GISEL-NEXT: s_mov_b64 s[4:5], 0
|
|
; GFX6-GISEL-NEXT: buffer_load_dwordx4 v[4:7], v[0:1], s[4:7], 0 addr64
|
|
; GFX6-GISEL-NEXT: buffer_load_dwordx4 v[8:11], v[0:1], s[4:7], 0 addr64 offset:16
|
|
; GFX6-GISEL-NEXT: buffer_load_dwordx4 v[12:15], v[0:1], s[4:7], 0 addr64 offset:32
|
|
; GFX6-GISEL-NEXT: buffer_load_dwordx4 v[16:19], v[0:1], s[4:7], 0 addr64 offset:48
|
|
; GFX6-GISEL-NEXT: s_waitcnt vmcnt(3)
|
|
; GFX6-GISEL-NEXT: buffer_store_dwordx4 v[4:7], v[2:3], s[4:7], 0 addr64
|
|
; GFX6-GISEL-NEXT: s_waitcnt vmcnt(3)
|
|
; GFX6-GISEL-NEXT: buffer_store_dwordx4 v[8:11], v[2:3], s[4:7], 0 addr64 offset:16
|
|
; GFX6-GISEL-NEXT: s_waitcnt vmcnt(3)
|
|
; GFX6-GISEL-NEXT: buffer_store_dwordx4 v[12:15], v[2:3], s[4:7], 0 addr64 offset:32
|
|
; GFX6-GISEL-NEXT: s_waitcnt vmcnt(3)
|
|
; GFX6-GISEL-NEXT: buffer_store_dwordx4 v[16:19], v[2:3], s[4:7], 0 addr64 offset:48
|
|
; GFX6-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0)
|
|
; GFX6-GISEL-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX7-SDAG-LABEL: freeze_v16i32:
|
|
; GFX7-SDAG: ; %bb.0:
|
|
; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX7-SDAG-NEXT: s_mov_b32 s6, 0
|
|
; GFX7-SDAG-NEXT: s_mov_b32 s7, 0xf000
|
|
; GFX7-SDAG-NEXT: s_mov_b32 s4, s6
|
|
; GFX7-SDAG-NEXT: s_mov_b32 s5, s6
|
|
; GFX7-SDAG-NEXT: buffer_load_dwordx4 v[4:7], v[0:1], s[4:7], 0 addr64 offset:32
|
|
; GFX7-SDAG-NEXT: buffer_load_dwordx4 v[8:11], v[0:1], s[4:7], 0 addr64 offset:48
|
|
; GFX7-SDAG-NEXT: buffer_load_dwordx4 v[12:15], v[0:1], s[4:7], 0 addr64
|
|
; GFX7-SDAG-NEXT: buffer_load_dwordx4 v[16:19], v[0:1], s[4:7], 0 addr64 offset:16
|
|
; GFX7-SDAG-NEXT: s_waitcnt vmcnt(3)
|
|
; GFX7-SDAG-NEXT: buffer_store_dwordx4 v[4:7], v[2:3], s[4:7], 0 addr64 offset:32
|
|
; GFX7-SDAG-NEXT: s_waitcnt vmcnt(3)
|
|
; GFX7-SDAG-NEXT: buffer_store_dwordx4 v[8:11], v[2:3], s[4:7], 0 addr64 offset:48
|
|
; GFX7-SDAG-NEXT: s_waitcnt vmcnt(3)
|
|
; GFX7-SDAG-NEXT: buffer_store_dwordx4 v[12:15], v[2:3], s[4:7], 0 addr64
|
|
; GFX7-SDAG-NEXT: s_waitcnt vmcnt(3)
|
|
; GFX7-SDAG-NEXT: buffer_store_dwordx4 v[16:19], v[2:3], s[4:7], 0 addr64 offset:16
|
|
; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX7-SDAG-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX7-GISEL-LABEL: freeze_v16i32:
|
|
; GFX7-GISEL: ; %bb.0:
|
|
; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX7-GISEL-NEXT: s_mov_b32 s6, 0
|
|
; GFX7-GISEL-NEXT: s_mov_b32 s7, 0xf000
|
|
; GFX7-GISEL-NEXT: s_mov_b64 s[4:5], 0
|
|
; GFX7-GISEL-NEXT: buffer_load_dwordx4 v[4:7], v[0:1], s[4:7], 0 addr64
|
|
; GFX7-GISEL-NEXT: buffer_load_dwordx4 v[8:11], v[0:1], s[4:7], 0 addr64 offset:16
|
|
; GFX7-GISEL-NEXT: buffer_load_dwordx4 v[12:15], v[0:1], s[4:7], 0 addr64 offset:32
|
|
; GFX7-GISEL-NEXT: buffer_load_dwordx4 v[16:19], v[0:1], s[4:7], 0 addr64 offset:48
|
|
; GFX7-GISEL-NEXT: s_waitcnt vmcnt(3)
|
|
; GFX7-GISEL-NEXT: buffer_store_dwordx4 v[4:7], v[2:3], s[4:7], 0 addr64
|
|
; GFX7-GISEL-NEXT: s_waitcnt vmcnt(3)
|
|
; GFX7-GISEL-NEXT: buffer_store_dwordx4 v[8:11], v[2:3], s[4:7], 0 addr64 offset:16
|
|
; GFX7-GISEL-NEXT: s_waitcnt vmcnt(3)
|
|
; GFX7-GISEL-NEXT: buffer_store_dwordx4 v[12:15], v[2:3], s[4:7], 0 addr64 offset:32
|
|
; GFX7-GISEL-NEXT: s_waitcnt vmcnt(3)
|
|
; GFX7-GISEL-NEXT: buffer_store_dwordx4 v[16:19], v[2:3], s[4:7], 0 addr64 offset:48
|
|
; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX7-GISEL-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX8-GISEL-LABEL: freeze_v16i32:
|
|
; GFX8-GISEL: ; %bb.0:
|
|
; GFX8-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX8-GISEL-NEXT: v_add_u32_e32 v8, vcc, 16, v0
|
|
; GFX8-GISEL-NEXT: v_addc_u32_e32 v9, vcc, 0, v1, vcc
|
|
; GFX8-GISEL-NEXT: v_add_u32_e32 v12, vcc, 32, v0
|
|
; GFX8-GISEL-NEXT: flat_load_dwordx4 v[4:7], v[0:1]
|
|
; GFX8-GISEL-NEXT: flat_load_dwordx4 v[8:11], v[8:9]
|
|
; GFX8-GISEL-NEXT: v_addc_u32_e32 v13, vcc, 0, v1, vcc
|
|
; GFX8-GISEL-NEXT: v_add_u32_e32 v0, vcc, 48, v0
|
|
; GFX8-GISEL-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc
|
|
; GFX8-GISEL-NEXT: flat_load_dwordx4 v[12:15], v[12:13]
|
|
; GFX8-GISEL-NEXT: flat_load_dwordx4 v[16:19], v[0:1]
|
|
; GFX8-GISEL-NEXT: v_add_u32_e32 v0, vcc, 16, v2
|
|
; GFX8-GISEL-NEXT: v_addc_u32_e32 v1, vcc, 0, v3, vcc
|
|
; GFX8-GISEL-NEXT: s_waitcnt vmcnt(3)
|
|
; GFX8-GISEL-NEXT: flat_store_dwordx4 v[2:3], v[4:7]
|
|
; GFX8-GISEL-NEXT: s_nop 0
|
|
; GFX8-GISEL-NEXT: v_add_u32_e32 v4, vcc, 32, v2
|
|
; GFX8-GISEL-NEXT: v_addc_u32_e32 v5, vcc, 0, v3, vcc
|
|
; GFX8-GISEL-NEXT: v_add_u32_e32 v2, vcc, 48, v2
|
|
; GFX8-GISEL-NEXT: v_addc_u32_e32 v3, vcc, 0, v3, vcc
|
|
; GFX8-GISEL-NEXT: s_waitcnt vmcnt(3)
|
|
; GFX8-GISEL-NEXT: flat_store_dwordx4 v[0:1], v[8:11]
|
|
; GFX8-GISEL-NEXT: s_waitcnt vmcnt(3)
|
|
; GFX8-GISEL-NEXT: flat_store_dwordx4 v[4:5], v[12:15]
|
|
; GFX8-GISEL-NEXT: s_waitcnt vmcnt(3)
|
|
; GFX8-GISEL-NEXT: flat_store_dwordx4 v[2:3], v[16:19]
|
|
; GFX8-GISEL-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX8-GISEL-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX9-GISEL-LABEL: freeze_v16i32:
|
|
; GFX9-GISEL: ; %bb.0:
|
|
; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX9-GISEL-NEXT: global_load_dwordx4 v[4:7], v[0:1], off
|
|
; GFX9-GISEL-NEXT: global_load_dwordx4 v[8:11], v[0:1], off offset:16
|
|
; GFX9-GISEL-NEXT: global_load_dwordx4 v[12:15], v[0:1], off offset:32
|
|
; GFX9-GISEL-NEXT: global_load_dwordx4 v[16:19], v[0:1], off offset:48
|
|
; GFX9-GISEL-NEXT: s_waitcnt vmcnt(3)
|
|
; GFX9-GISEL-NEXT: global_store_dwordx4 v[2:3], v[4:7], off
|
|
; GFX9-GISEL-NEXT: s_waitcnt vmcnt(3)
|
|
; GFX9-GISEL-NEXT: global_store_dwordx4 v[2:3], v[8:11], off offset:16
|
|
; GFX9-GISEL-NEXT: s_waitcnt vmcnt(3)
|
|
; GFX9-GISEL-NEXT: global_store_dwordx4 v[2:3], v[12:15], off offset:32
|
|
; GFX9-GISEL-NEXT: s_waitcnt vmcnt(3)
|
|
; GFX9-GISEL-NEXT: global_store_dwordx4 v[2:3], v[16:19], off offset:48
|
|
; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX10-SDAG-LABEL: freeze_v16i32:
|
|
; GFX10-SDAG: ; %bb.0:
|
|
; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX10-SDAG-NEXT: s_clause 0x3
|
|
; GFX10-SDAG-NEXT: global_load_dwordx4 v[4:7], v[0:1], off offset:32
|
|
; GFX10-SDAG-NEXT: global_load_dwordx4 v[8:11], v[0:1], off offset:48
|
|
; GFX10-SDAG-NEXT: global_load_dwordx4 v[12:15], v[0:1], off
|
|
; GFX10-SDAG-NEXT: global_load_dwordx4 v[16:19], v[0:1], off offset:16
|
|
; GFX10-SDAG-NEXT: s_waitcnt vmcnt(3)
|
|
; GFX10-SDAG-NEXT: global_store_dwordx4 v[2:3], v[4:7], off offset:32
|
|
; GFX10-SDAG-NEXT: s_waitcnt vmcnt(2)
|
|
; GFX10-SDAG-NEXT: global_store_dwordx4 v[2:3], v[8:11], off offset:48
|
|
; GFX10-SDAG-NEXT: s_waitcnt vmcnt(1)
|
|
; GFX10-SDAG-NEXT: global_store_dwordx4 v[2:3], v[12:15], off
|
|
; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX10-SDAG-NEXT: global_store_dwordx4 v[2:3], v[16:19], off offset:16
|
|
; GFX10-SDAG-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX10-GISEL-LABEL: freeze_v16i32:
|
|
; GFX10-GISEL: ; %bb.0:
|
|
; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX10-GISEL-NEXT: s_clause 0x3
|
|
; GFX10-GISEL-NEXT: global_load_dwordx4 v[4:7], v[0:1], off
|
|
; GFX10-GISEL-NEXT: global_load_dwordx4 v[8:11], v[0:1], off offset:16
|
|
; GFX10-GISEL-NEXT: global_load_dwordx4 v[12:15], v[0:1], off offset:32
|
|
; GFX10-GISEL-NEXT: global_load_dwordx4 v[16:19], v[0:1], off offset:48
|
|
; GFX10-GISEL-NEXT: s_waitcnt vmcnt(3)
|
|
; GFX10-GISEL-NEXT: global_store_dwordx4 v[2:3], v[4:7], off
|
|
; GFX10-GISEL-NEXT: s_waitcnt vmcnt(2)
|
|
; GFX10-GISEL-NEXT: global_store_dwordx4 v[2:3], v[8:11], off offset:16
|
|
; GFX10-GISEL-NEXT: s_waitcnt vmcnt(1)
|
|
; GFX10-GISEL-NEXT: global_store_dwordx4 v[2:3], v[12:15], off offset:32
|
|
; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX10-GISEL-NEXT: global_store_dwordx4 v[2:3], v[16:19], off offset:48
|
|
; GFX10-GISEL-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX11-SDAG-LABEL: freeze_v16i32:
|
|
; GFX11-SDAG: ; %bb.0:
|
|
; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX11-SDAG-NEXT: s_clause 0x3
|
|
; GFX11-SDAG-NEXT: global_load_b128 v[4:7], v[0:1], off offset:32
|
|
; GFX11-SDAG-NEXT: global_load_b128 v[8:11], v[0:1], off offset:48
|
|
; GFX11-SDAG-NEXT: global_load_b128 v[12:15], v[0:1], off
|
|
; GFX11-SDAG-NEXT: global_load_b128 v[16:19], v[0:1], off offset:16
|
|
; GFX11-SDAG-NEXT: s_waitcnt vmcnt(3)
|
|
; GFX11-SDAG-NEXT: global_store_b128 v[2:3], v[4:7], off offset:32
|
|
; GFX11-SDAG-NEXT: s_waitcnt vmcnt(2)
|
|
; GFX11-SDAG-NEXT: global_store_b128 v[2:3], v[8:11], off offset:48
|
|
; GFX11-SDAG-NEXT: s_waitcnt vmcnt(1)
|
|
; GFX11-SDAG-NEXT: global_store_b128 v[2:3], v[12:15], off
|
|
; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX11-SDAG-NEXT: global_store_b128 v[2:3], v[16:19], off offset:16
|
|
; GFX11-SDAG-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX11-GISEL-LABEL: freeze_v16i32:
|
|
; GFX11-GISEL: ; %bb.0:
|
|
; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX11-GISEL-NEXT: s_clause 0x3
|
|
; GFX11-GISEL-NEXT: global_load_b128 v[4:7], v[0:1], off
|
|
; GFX11-GISEL-NEXT: global_load_b128 v[8:11], v[0:1], off offset:16
|
|
; GFX11-GISEL-NEXT: global_load_b128 v[12:15], v[0:1], off offset:32
|
|
; GFX11-GISEL-NEXT: global_load_b128 v[16:19], v[0:1], off offset:48
|
|
; GFX11-GISEL-NEXT: s_waitcnt vmcnt(3)
|
|
; GFX11-GISEL-NEXT: global_store_b128 v[2:3], v[4:7], off
|
|
; GFX11-GISEL-NEXT: s_waitcnt vmcnt(2)
|
|
; GFX11-GISEL-NEXT: global_store_b128 v[2:3], v[8:11], off offset:16
|
|
; GFX11-GISEL-NEXT: s_waitcnt vmcnt(1)
|
|
; GFX11-GISEL-NEXT: global_store_b128 v[2:3], v[12:15], off offset:32
|
|
; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX11-GISEL-NEXT: global_store_b128 v[2:3], v[16:19], off offset:48
|
|
; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31]
|
|
%a = load <16 x i32>, ptr addrspace(1) %ptra, align 4
|
|
%freeze = freeze <16 x i32> %a
|
|
store <16 x i32> %freeze, ptr addrspace(1) %ptrb, align 4
|
|
ret void
|
|
}
|
|
|
|
define void @freeze_v17i32(ptr addrspace(1) %ptra, ptr addrspace(1) %ptrb) {
|
|
; GFX6-SDAG-LABEL: freeze_v17i32:
|
|
; GFX6-SDAG: ; %bb.0:
|
|
; GFX6-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX6-SDAG-NEXT: s_mov_b32 s6, 0
|
|
; GFX6-SDAG-NEXT: s_mov_b32 s7, 0xf000
|
|
; GFX6-SDAG-NEXT: s_mov_b32 s4, s6
|
|
; GFX6-SDAG-NEXT: s_mov_b32 s5, s6
|
|
; GFX6-SDAG-NEXT: buffer_load_dword v20, v[0:1], s[4:7], 0 addr64 offset:64
|
|
; GFX6-SDAG-NEXT: buffer_load_dwordx4 v[4:7], v[0:1], s[4:7], 0 addr64 offset:32
|
|
; GFX6-SDAG-NEXT: buffer_load_dwordx4 v[8:11], v[0:1], s[4:7], 0 addr64 offset:48
|
|
; GFX6-SDAG-NEXT: buffer_load_dwordx4 v[12:15], v[0:1], s[4:7], 0 addr64
|
|
; GFX6-SDAG-NEXT: buffer_load_dwordx4 v[16:19], v[0:1], s[4:7], 0 addr64 offset:16
|
|
; GFX6-SDAG-NEXT: s_waitcnt vmcnt(4)
|
|
; GFX6-SDAG-NEXT: buffer_store_dword v20, v[2:3], s[4:7], 0 addr64 offset:64
|
|
; GFX6-SDAG-NEXT: s_waitcnt vmcnt(4)
|
|
; GFX6-SDAG-NEXT: buffer_store_dwordx4 v[4:7], v[2:3], s[4:7], 0 addr64 offset:32
|
|
; GFX6-SDAG-NEXT: s_waitcnt vmcnt(4)
|
|
; GFX6-SDAG-NEXT: buffer_store_dwordx4 v[8:11], v[2:3], s[4:7], 0 addr64 offset:48
|
|
; GFX6-SDAG-NEXT: s_waitcnt vmcnt(4)
|
|
; GFX6-SDAG-NEXT: buffer_store_dwordx4 v[12:15], v[2:3], s[4:7], 0 addr64
|
|
; GFX6-SDAG-NEXT: s_waitcnt vmcnt(4)
|
|
; GFX6-SDAG-NEXT: buffer_store_dwordx4 v[16:19], v[2:3], s[4:7], 0 addr64 offset:16
|
|
; GFX6-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0)
|
|
; GFX6-SDAG-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX6-GISEL-LABEL: freeze_v17i32:
|
|
; GFX6-GISEL: ; %bb.0:
|
|
; GFX6-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX6-GISEL-NEXT: s_mov_b32 s6, 0
|
|
; GFX6-GISEL-NEXT: s_mov_b32 s7, 0xf000
|
|
; GFX6-GISEL-NEXT: s_mov_b64 s[4:5], 0
|
|
; GFX6-GISEL-NEXT: buffer_load_dwordx4 v[4:7], v[0:1], s[4:7], 0 addr64
|
|
; GFX6-GISEL-NEXT: buffer_load_dwordx4 v[8:11], v[0:1], s[4:7], 0 addr64 offset:16
|
|
; GFX6-GISEL-NEXT: buffer_load_dwordx4 v[12:15], v[0:1], s[4:7], 0 addr64 offset:32
|
|
; GFX6-GISEL-NEXT: buffer_load_dwordx4 v[16:19], v[0:1], s[4:7], 0 addr64 offset:48
|
|
; GFX6-GISEL-NEXT: buffer_load_dword v0, v[0:1], s[4:7], 0 addr64 offset:64
|
|
; GFX6-GISEL-NEXT: s_waitcnt vmcnt(4)
|
|
; GFX6-GISEL-NEXT: buffer_store_dwordx4 v[4:7], v[2:3], s[4:7], 0 addr64
|
|
; GFX6-GISEL-NEXT: s_waitcnt vmcnt(4)
|
|
; GFX6-GISEL-NEXT: buffer_store_dwordx4 v[8:11], v[2:3], s[4:7], 0 addr64 offset:16
|
|
; GFX6-GISEL-NEXT: s_waitcnt vmcnt(4)
|
|
; GFX6-GISEL-NEXT: buffer_store_dwordx4 v[12:15], v[2:3], s[4:7], 0 addr64 offset:32
|
|
; GFX6-GISEL-NEXT: s_waitcnt vmcnt(4)
|
|
; GFX6-GISEL-NEXT: buffer_store_dwordx4 v[16:19], v[2:3], s[4:7], 0 addr64 offset:48
|
|
; GFX6-GISEL-NEXT: s_waitcnt vmcnt(4)
|
|
; GFX6-GISEL-NEXT: buffer_store_dword v0, v[2:3], s[4:7], 0 addr64 offset:64
|
|
; GFX6-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0)
|
|
; GFX6-GISEL-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX7-SDAG-LABEL: freeze_v17i32:
|
|
; GFX7-SDAG: ; %bb.0:
|
|
; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX7-SDAG-NEXT: s_mov_b32 s6, 0
|
|
; GFX7-SDAG-NEXT: s_mov_b32 s7, 0xf000
|
|
; GFX7-SDAG-NEXT: s_mov_b32 s4, s6
|
|
; GFX7-SDAG-NEXT: s_mov_b32 s5, s6
|
|
; GFX7-SDAG-NEXT: buffer_load_dword v20, v[0:1], s[4:7], 0 addr64 offset:64
|
|
; GFX7-SDAG-NEXT: buffer_load_dwordx4 v[4:7], v[0:1], s[4:7], 0 addr64 offset:32
|
|
; GFX7-SDAG-NEXT: buffer_load_dwordx4 v[8:11], v[0:1], s[4:7], 0 addr64 offset:48
|
|
; GFX7-SDAG-NEXT: buffer_load_dwordx4 v[12:15], v[0:1], s[4:7], 0 addr64
|
|
; GFX7-SDAG-NEXT: buffer_load_dwordx4 v[16:19], v[0:1], s[4:7], 0 addr64 offset:16
|
|
; GFX7-SDAG-NEXT: s_waitcnt vmcnt(4)
|
|
; GFX7-SDAG-NEXT: buffer_store_dword v20, v[2:3], s[4:7], 0 addr64 offset:64
|
|
; GFX7-SDAG-NEXT: s_waitcnt vmcnt(4)
|
|
; GFX7-SDAG-NEXT: buffer_store_dwordx4 v[4:7], v[2:3], s[4:7], 0 addr64 offset:32
|
|
; GFX7-SDAG-NEXT: s_waitcnt vmcnt(4)
|
|
; GFX7-SDAG-NEXT: buffer_store_dwordx4 v[8:11], v[2:3], s[4:7], 0 addr64 offset:48
|
|
; GFX7-SDAG-NEXT: s_waitcnt vmcnt(4)
|
|
; GFX7-SDAG-NEXT: buffer_store_dwordx4 v[12:15], v[2:3], s[4:7], 0 addr64
|
|
; GFX7-SDAG-NEXT: s_waitcnt vmcnt(4)
|
|
; GFX7-SDAG-NEXT: buffer_store_dwordx4 v[16:19], v[2:3], s[4:7], 0 addr64 offset:16
|
|
; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX7-SDAG-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX7-GISEL-LABEL: freeze_v17i32:
|
|
; GFX7-GISEL: ; %bb.0:
|
|
; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX7-GISEL-NEXT: s_mov_b32 s6, 0
|
|
; GFX7-GISEL-NEXT: s_mov_b32 s7, 0xf000
|
|
; GFX7-GISEL-NEXT: s_mov_b64 s[4:5], 0
|
|
; GFX7-GISEL-NEXT: buffer_load_dwordx4 v[4:7], v[0:1], s[4:7], 0 addr64
|
|
; GFX7-GISEL-NEXT: buffer_load_dwordx4 v[8:11], v[0:1], s[4:7], 0 addr64 offset:16
|
|
; GFX7-GISEL-NEXT: buffer_load_dwordx4 v[12:15], v[0:1], s[4:7], 0 addr64 offset:32
|
|
; GFX7-GISEL-NEXT: buffer_load_dwordx4 v[16:19], v[0:1], s[4:7], 0 addr64 offset:48
|
|
; GFX7-GISEL-NEXT: buffer_load_dword v0, v[0:1], s[4:7], 0 addr64 offset:64
|
|
; GFX7-GISEL-NEXT: s_waitcnt vmcnt(4)
|
|
; GFX7-GISEL-NEXT: buffer_store_dwordx4 v[4:7], v[2:3], s[4:7], 0 addr64
|
|
; GFX7-GISEL-NEXT: s_waitcnt vmcnt(4)
|
|
; GFX7-GISEL-NEXT: buffer_store_dwordx4 v[8:11], v[2:3], s[4:7], 0 addr64 offset:16
|
|
; GFX7-GISEL-NEXT: s_waitcnt vmcnt(4)
|
|
; GFX7-GISEL-NEXT: buffer_store_dwordx4 v[12:15], v[2:3], s[4:7], 0 addr64 offset:32
|
|
; GFX7-GISEL-NEXT: s_waitcnt vmcnt(4)
|
|
; GFX7-GISEL-NEXT: buffer_store_dwordx4 v[16:19], v[2:3], s[4:7], 0 addr64 offset:48
|
|
; GFX7-GISEL-NEXT: s_waitcnt vmcnt(4)
|
|
; GFX7-GISEL-NEXT: buffer_store_dword v0, v[2:3], s[4:7], 0 addr64 offset:64
|
|
; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX7-GISEL-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX8-GISEL-LABEL: freeze_v17i32:
|
|
; GFX8-GISEL: ; %bb.0:
|
|
; GFX8-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX8-GISEL-NEXT: v_add_u32_e32 v4, vcc, 16, v0
|
|
; GFX8-GISEL-NEXT: v_addc_u32_e32 v5, vcc, 0, v1, vcc
|
|
; GFX8-GISEL-NEXT: v_add_u32_e32 v8, vcc, 32, v0
|
|
; GFX8-GISEL-NEXT: v_addc_u32_e32 v9, vcc, 0, v1, vcc
|
|
; GFX8-GISEL-NEXT: flat_load_dwordx4 v[4:7], v[4:5]
|
|
; GFX8-GISEL-NEXT: flat_load_dwordx4 v[8:11], v[8:9]
|
|
; GFX8-GISEL-NEXT: v_add_u32_e32 v16, vcc, 48, v0
|
|
; GFX8-GISEL-NEXT: v_addc_u32_e32 v17, vcc, 0, v1, vcc
|
|
; GFX8-GISEL-NEXT: v_add_u32_e32 v18, vcc, 64, v0
|
|
; GFX8-GISEL-NEXT: v_addc_u32_e32 v19, vcc, 0, v1, vcc
|
|
; GFX8-GISEL-NEXT: flat_load_dwordx4 v[12:15], v[0:1]
|
|
; GFX8-GISEL-NEXT: flat_load_dword v20, v[18:19]
|
|
; GFX8-GISEL-NEXT: flat_load_dwordx4 v[16:19], v[16:17]
|
|
; GFX8-GISEL-NEXT: v_add_u32_e32 v0, vcc, 16, v2
|
|
; GFX8-GISEL-NEXT: v_addc_u32_e32 v1, vcc, 0, v3, vcc
|
|
; GFX8-GISEL-NEXT: s_waitcnt vmcnt(4)
|
|
; GFX8-GISEL-NEXT: flat_store_dwordx4 v[0:1], v[4:7]
|
|
; GFX8-GISEL-NEXT: v_add_u32_e32 v0, vcc, 32, v2
|
|
; GFX8-GISEL-NEXT: v_addc_u32_e32 v1, vcc, 0, v3, vcc
|
|
; GFX8-GISEL-NEXT: v_add_u32_e32 v4, vcc, 48, v2
|
|
; GFX8-GISEL-NEXT: v_addc_u32_e32 v5, vcc, 0, v3, vcc
|
|
; GFX8-GISEL-NEXT: s_waitcnt vmcnt(3)
|
|
; GFX8-GISEL-NEXT: flat_store_dwordx4 v[2:3], v[12:15]
|
|
; GFX8-GISEL-NEXT: v_add_u32_e32 v2, vcc, 64, v2
|
|
; GFX8-GISEL-NEXT: v_addc_u32_e32 v3, vcc, 0, v3, vcc
|
|
; GFX8-GISEL-NEXT: flat_store_dwordx4 v[0:1], v[8:11]
|
|
; GFX8-GISEL-NEXT: s_waitcnt vmcnt(3)
|
|
; GFX8-GISEL-NEXT: flat_store_dwordx4 v[4:5], v[16:19]
|
|
; GFX8-GISEL-NEXT: flat_store_dword v[2:3], v20
|
|
; GFX8-GISEL-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX8-GISEL-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX9-GISEL-LABEL: freeze_v17i32:
|
|
; GFX9-GISEL: ; %bb.0:
|
|
; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX9-GISEL-NEXT: global_load_dwordx4 v[4:7], v[0:1], off
|
|
; GFX9-GISEL-NEXT: global_load_dwordx4 v[8:11], v[0:1], off offset:16
|
|
; GFX9-GISEL-NEXT: global_load_dwordx4 v[12:15], v[0:1], off offset:32
|
|
; GFX9-GISEL-NEXT: global_load_dwordx4 v[16:19], v[0:1], off offset:48
|
|
; GFX9-GISEL-NEXT: global_load_dword v20, v[0:1], off offset:64
|
|
; GFX9-GISEL-NEXT: s_waitcnt vmcnt(4)
|
|
; GFX9-GISEL-NEXT: global_store_dwordx4 v[2:3], v[4:7], off
|
|
; GFX9-GISEL-NEXT: s_waitcnt vmcnt(4)
|
|
; GFX9-GISEL-NEXT: global_store_dwordx4 v[2:3], v[8:11], off offset:16
|
|
; GFX9-GISEL-NEXT: s_waitcnt vmcnt(4)
|
|
; GFX9-GISEL-NEXT: global_store_dwordx4 v[2:3], v[12:15], off offset:32
|
|
; GFX9-GISEL-NEXT: s_waitcnt vmcnt(4)
|
|
; GFX9-GISEL-NEXT: global_store_dwordx4 v[2:3], v[16:19], off offset:48
|
|
; GFX9-GISEL-NEXT: s_waitcnt vmcnt(4)
|
|
; GFX9-GISEL-NEXT: global_store_dword v[2:3], v20, off offset:64
|
|
; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX10-SDAG-LABEL: freeze_v17i32:
|
|
; GFX10-SDAG: ; %bb.0:
|
|
; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX10-SDAG-NEXT: s_clause 0x4
|
|
; GFX10-SDAG-NEXT: global_load_dword v20, v[0:1], off offset:64
|
|
; GFX10-SDAG-NEXT: global_load_dwordx4 v[4:7], v[0:1], off offset:32
|
|
; GFX10-SDAG-NEXT: global_load_dwordx4 v[8:11], v[0:1], off offset:48
|
|
; GFX10-SDAG-NEXT: global_load_dwordx4 v[12:15], v[0:1], off
|
|
; GFX10-SDAG-NEXT: global_load_dwordx4 v[16:19], v[0:1], off offset:16
|
|
; GFX10-SDAG-NEXT: s_waitcnt vmcnt(4)
|
|
; GFX10-SDAG-NEXT: global_store_dword v[2:3], v20, off offset:64
|
|
; GFX10-SDAG-NEXT: s_waitcnt vmcnt(3)
|
|
; GFX10-SDAG-NEXT: global_store_dwordx4 v[2:3], v[4:7], off offset:32
|
|
; GFX10-SDAG-NEXT: s_waitcnt vmcnt(2)
|
|
; GFX10-SDAG-NEXT: global_store_dwordx4 v[2:3], v[8:11], off offset:48
|
|
; GFX10-SDAG-NEXT: s_waitcnt vmcnt(1)
|
|
; GFX10-SDAG-NEXT: global_store_dwordx4 v[2:3], v[12:15], off
|
|
; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX10-SDAG-NEXT: global_store_dwordx4 v[2:3], v[16:19], off offset:16
|
|
; GFX10-SDAG-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX10-GISEL-LABEL: freeze_v17i32:
|
|
; GFX10-GISEL: ; %bb.0:
|
|
; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX10-GISEL-NEXT: s_clause 0x4
|
|
; GFX10-GISEL-NEXT: global_load_dwordx4 v[4:7], v[0:1], off
|
|
; GFX10-GISEL-NEXT: global_load_dwordx4 v[8:11], v[0:1], off offset:16
|
|
; GFX10-GISEL-NEXT: global_load_dwordx4 v[12:15], v[0:1], off offset:32
|
|
; GFX10-GISEL-NEXT: global_load_dwordx4 v[16:19], v[0:1], off offset:48
|
|
; GFX10-GISEL-NEXT: global_load_dword v20, v[0:1], off offset:64
|
|
; GFX10-GISEL-NEXT: s_waitcnt vmcnt(4)
|
|
; GFX10-GISEL-NEXT: global_store_dwordx4 v[2:3], v[4:7], off
|
|
; GFX10-GISEL-NEXT: s_waitcnt vmcnt(3)
|
|
; GFX10-GISEL-NEXT: global_store_dwordx4 v[2:3], v[8:11], off offset:16
|
|
; GFX10-GISEL-NEXT: s_waitcnt vmcnt(2)
|
|
; GFX10-GISEL-NEXT: global_store_dwordx4 v[2:3], v[12:15], off offset:32
|
|
; GFX10-GISEL-NEXT: s_waitcnt vmcnt(1)
|
|
; GFX10-GISEL-NEXT: global_store_dwordx4 v[2:3], v[16:19], off offset:48
|
|
; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX10-GISEL-NEXT: global_store_dword v[2:3], v20, off offset:64
|
|
; GFX10-GISEL-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX11-SDAG-LABEL: freeze_v17i32:
|
|
; GFX11-SDAG: ; %bb.0:
|
|
; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX11-SDAG-NEXT: s_clause 0x4
|
|
; GFX11-SDAG-NEXT: global_load_b32 v20, v[0:1], off offset:64
|
|
; GFX11-SDAG-NEXT: global_load_b128 v[4:7], v[0:1], off offset:32
|
|
; GFX11-SDAG-NEXT: global_load_b128 v[8:11], v[0:1], off offset:48
|
|
; GFX11-SDAG-NEXT: global_load_b128 v[12:15], v[0:1], off
|
|
; GFX11-SDAG-NEXT: global_load_b128 v[16:19], v[0:1], off offset:16
|
|
; GFX11-SDAG-NEXT: s_waitcnt vmcnt(4)
|
|
; GFX11-SDAG-NEXT: global_store_b32 v[2:3], v20, off offset:64
|
|
; GFX11-SDAG-NEXT: s_waitcnt vmcnt(3)
|
|
; GFX11-SDAG-NEXT: global_store_b128 v[2:3], v[4:7], off offset:32
|
|
; GFX11-SDAG-NEXT: s_waitcnt vmcnt(2)
|
|
; GFX11-SDAG-NEXT: global_store_b128 v[2:3], v[8:11], off offset:48
|
|
; GFX11-SDAG-NEXT: s_waitcnt vmcnt(1)
|
|
; GFX11-SDAG-NEXT: global_store_b128 v[2:3], v[12:15], off
|
|
; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX11-SDAG-NEXT: global_store_b128 v[2:3], v[16:19], off offset:16
|
|
; GFX11-SDAG-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX11-GISEL-LABEL: freeze_v17i32:
|
|
; GFX11-GISEL: ; %bb.0:
|
|
; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX11-GISEL-NEXT: s_clause 0x4
|
|
; GFX11-GISEL-NEXT: global_load_b128 v[4:7], v[0:1], off
|
|
; GFX11-GISEL-NEXT: global_load_b128 v[8:11], v[0:1], off offset:16
|
|
; GFX11-GISEL-NEXT: global_load_b128 v[12:15], v[0:1], off offset:32
|
|
; GFX11-GISEL-NEXT: global_load_b128 v[16:19], v[0:1], off offset:48
|
|
; GFX11-GISEL-NEXT: global_load_b32 v0, v[0:1], off offset:64
|
|
; GFX11-GISEL-NEXT: s_waitcnt vmcnt(4)
|
|
; GFX11-GISEL-NEXT: global_store_b128 v[2:3], v[4:7], off
|
|
; GFX11-GISEL-NEXT: s_waitcnt vmcnt(3)
|
|
; GFX11-GISEL-NEXT: global_store_b128 v[2:3], v[8:11], off offset:16
|
|
; GFX11-GISEL-NEXT: s_waitcnt vmcnt(2)
|
|
; GFX11-GISEL-NEXT: global_store_b128 v[2:3], v[12:15], off offset:32
|
|
; GFX11-GISEL-NEXT: s_waitcnt vmcnt(1)
|
|
; GFX11-GISEL-NEXT: global_store_b128 v[2:3], v[16:19], off offset:48
|
|
; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX11-GISEL-NEXT: global_store_b32 v[2:3], v0, off offset:64
|
|
; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31]
|
|
%a = load <17 x i32>, ptr addrspace(1) %ptra, align 4
|
|
%freeze = freeze <17 x i32> %a
|
|
store <17 x i32> %freeze, ptr addrspace(1) %ptrb, align 4
|
|
ret void
|
|
}
|
|
|
|
define void @freeze_v18i32(ptr addrspace(1) %ptra, ptr addrspace(1) %ptrb) {
|
|
; GFX6-SDAG-LABEL: freeze_v18i32:
|
|
; GFX6-SDAG: ; %bb.0:
|
|
; GFX6-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX6-SDAG-NEXT: s_mov_b32 s6, 0
|
|
; GFX6-SDAG-NEXT: s_mov_b32 s7, 0xf000
|
|
; GFX6-SDAG-NEXT: s_mov_b32 s4, s6
|
|
; GFX6-SDAG-NEXT: s_mov_b32 s5, s6
|
|
; GFX6-SDAG-NEXT: buffer_load_dwordx2 v[20:21], v[0:1], s[4:7], 0 addr64 offset:64
|
|
; GFX6-SDAG-NEXT: buffer_load_dwordx4 v[4:7], v[0:1], s[4:7], 0 addr64 offset:32
|
|
; GFX6-SDAG-NEXT: buffer_load_dwordx4 v[8:11], v[0:1], s[4:7], 0 addr64 offset:48
|
|
; GFX6-SDAG-NEXT: buffer_load_dwordx4 v[12:15], v[0:1], s[4:7], 0 addr64
|
|
; GFX6-SDAG-NEXT: buffer_load_dwordx4 v[16:19], v[0:1], s[4:7], 0 addr64 offset:16
|
|
; GFX6-SDAG-NEXT: s_waitcnt vmcnt(4)
|
|
; GFX6-SDAG-NEXT: buffer_store_dwordx2 v[20:21], v[2:3], s[4:7], 0 addr64 offset:64
|
|
; GFX6-SDAG-NEXT: s_waitcnt vmcnt(4)
|
|
; GFX6-SDAG-NEXT: buffer_store_dwordx4 v[4:7], v[2:3], s[4:7], 0 addr64 offset:32
|
|
; GFX6-SDAG-NEXT: s_waitcnt vmcnt(4)
|
|
; GFX6-SDAG-NEXT: buffer_store_dwordx4 v[8:11], v[2:3], s[4:7], 0 addr64 offset:48
|
|
; GFX6-SDAG-NEXT: s_waitcnt vmcnt(4)
|
|
; GFX6-SDAG-NEXT: buffer_store_dwordx4 v[12:15], v[2:3], s[4:7], 0 addr64
|
|
; GFX6-SDAG-NEXT: s_waitcnt vmcnt(4)
|
|
; GFX6-SDAG-NEXT: buffer_store_dwordx4 v[16:19], v[2:3], s[4:7], 0 addr64 offset:16
|
|
; GFX6-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0)
|
|
; GFX6-SDAG-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX6-GISEL-LABEL: freeze_v18i32:
|
|
; GFX6-GISEL: ; %bb.0:
|
|
; GFX6-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX6-GISEL-NEXT: s_mov_b32 s6, 0
|
|
; GFX6-GISEL-NEXT: s_mov_b32 s7, 0xf000
|
|
; GFX6-GISEL-NEXT: s_mov_b64 s[4:5], 0
|
|
; GFX6-GISEL-NEXT: buffer_load_dwordx4 v[4:7], v[0:1], s[4:7], 0 addr64
|
|
; GFX6-GISEL-NEXT: buffer_load_dwordx4 v[8:11], v[0:1], s[4:7], 0 addr64 offset:16
|
|
; GFX6-GISEL-NEXT: buffer_load_dwordx4 v[12:15], v[0:1], s[4:7], 0 addr64 offset:32
|
|
; GFX6-GISEL-NEXT: buffer_load_dwordx4 v[16:19], v[0:1], s[4:7], 0 addr64 offset:48
|
|
; GFX6-GISEL-NEXT: buffer_load_dwordx2 v[0:1], v[0:1], s[4:7], 0 addr64 offset:64
|
|
; GFX6-GISEL-NEXT: s_waitcnt vmcnt(4)
|
|
; GFX6-GISEL-NEXT: buffer_store_dwordx4 v[4:7], v[2:3], s[4:7], 0 addr64
|
|
; GFX6-GISEL-NEXT: s_waitcnt vmcnt(4)
|
|
; GFX6-GISEL-NEXT: buffer_store_dwordx4 v[8:11], v[2:3], s[4:7], 0 addr64 offset:16
|
|
; GFX6-GISEL-NEXT: s_waitcnt vmcnt(4)
|
|
; GFX6-GISEL-NEXT: buffer_store_dwordx4 v[12:15], v[2:3], s[4:7], 0 addr64 offset:32
|
|
; GFX6-GISEL-NEXT: s_waitcnt vmcnt(4)
|
|
; GFX6-GISEL-NEXT: buffer_store_dwordx4 v[16:19], v[2:3], s[4:7], 0 addr64 offset:48
|
|
; GFX6-GISEL-NEXT: s_waitcnt vmcnt(4)
|
|
; GFX6-GISEL-NEXT: buffer_store_dwordx2 v[0:1], v[2:3], s[4:7], 0 addr64 offset:64
|
|
; GFX6-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0)
|
|
; GFX6-GISEL-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX7-SDAG-LABEL: freeze_v18i32:
|
|
; GFX7-SDAG: ; %bb.0:
|
|
; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX7-SDAG-NEXT: s_mov_b32 s6, 0
|
|
; GFX7-SDAG-NEXT: s_mov_b32 s7, 0xf000
|
|
; GFX7-SDAG-NEXT: s_mov_b32 s4, s6
|
|
; GFX7-SDAG-NEXT: s_mov_b32 s5, s6
|
|
; GFX7-SDAG-NEXT: buffer_load_dwordx2 v[20:21], v[0:1], s[4:7], 0 addr64 offset:64
|
|
; GFX7-SDAG-NEXT: buffer_load_dwordx4 v[4:7], v[0:1], s[4:7], 0 addr64 offset:32
|
|
; GFX7-SDAG-NEXT: buffer_load_dwordx4 v[8:11], v[0:1], s[4:7], 0 addr64 offset:48
|
|
; GFX7-SDAG-NEXT: buffer_load_dwordx4 v[12:15], v[0:1], s[4:7], 0 addr64
|
|
; GFX7-SDAG-NEXT: buffer_load_dwordx4 v[16:19], v[0:1], s[4:7], 0 addr64 offset:16
|
|
; GFX7-SDAG-NEXT: s_waitcnt vmcnt(4)
|
|
; GFX7-SDAG-NEXT: buffer_store_dwordx2 v[20:21], v[2:3], s[4:7], 0 addr64 offset:64
|
|
; GFX7-SDAG-NEXT: s_waitcnt vmcnt(4)
|
|
; GFX7-SDAG-NEXT: buffer_store_dwordx4 v[4:7], v[2:3], s[4:7], 0 addr64 offset:32
|
|
; GFX7-SDAG-NEXT: s_waitcnt vmcnt(4)
|
|
; GFX7-SDAG-NEXT: buffer_store_dwordx4 v[8:11], v[2:3], s[4:7], 0 addr64 offset:48
|
|
; GFX7-SDAG-NEXT: s_waitcnt vmcnt(4)
|
|
; GFX7-SDAG-NEXT: buffer_store_dwordx4 v[12:15], v[2:3], s[4:7], 0 addr64
|
|
; GFX7-SDAG-NEXT: s_waitcnt vmcnt(4)
|
|
; GFX7-SDAG-NEXT: buffer_store_dwordx4 v[16:19], v[2:3], s[4:7], 0 addr64 offset:16
|
|
; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX7-SDAG-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX7-GISEL-LABEL: freeze_v18i32:
|
|
; GFX7-GISEL: ; %bb.0:
|
|
; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX7-GISEL-NEXT: s_mov_b32 s6, 0
|
|
; GFX7-GISEL-NEXT: s_mov_b32 s7, 0xf000
|
|
; GFX7-GISEL-NEXT: s_mov_b64 s[4:5], 0
|
|
; GFX7-GISEL-NEXT: buffer_load_dwordx4 v[4:7], v[0:1], s[4:7], 0 addr64
|
|
; GFX7-GISEL-NEXT: buffer_load_dwordx4 v[8:11], v[0:1], s[4:7], 0 addr64 offset:16
|
|
; GFX7-GISEL-NEXT: buffer_load_dwordx4 v[12:15], v[0:1], s[4:7], 0 addr64 offset:32
|
|
; GFX7-GISEL-NEXT: buffer_load_dwordx4 v[16:19], v[0:1], s[4:7], 0 addr64 offset:48
|
|
; GFX7-GISEL-NEXT: buffer_load_dwordx2 v[0:1], v[0:1], s[4:7], 0 addr64 offset:64
|
|
; GFX7-GISEL-NEXT: s_waitcnt vmcnt(4)
|
|
; GFX7-GISEL-NEXT: buffer_store_dwordx4 v[4:7], v[2:3], s[4:7], 0 addr64
|
|
; GFX7-GISEL-NEXT: s_waitcnt vmcnt(4)
|
|
; GFX7-GISEL-NEXT: buffer_store_dwordx4 v[8:11], v[2:3], s[4:7], 0 addr64 offset:16
|
|
; GFX7-GISEL-NEXT: s_waitcnt vmcnt(4)
|
|
; GFX7-GISEL-NEXT: buffer_store_dwordx4 v[12:15], v[2:3], s[4:7], 0 addr64 offset:32
|
|
; GFX7-GISEL-NEXT: s_waitcnt vmcnt(4)
|
|
; GFX7-GISEL-NEXT: buffer_store_dwordx4 v[16:19], v[2:3], s[4:7], 0 addr64 offset:48
|
|
; GFX7-GISEL-NEXT: s_waitcnt vmcnt(4)
|
|
; GFX7-GISEL-NEXT: buffer_store_dwordx2 v[0:1], v[2:3], s[4:7], 0 addr64 offset:64
|
|
; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX7-GISEL-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX8-GISEL-LABEL: freeze_v18i32:
|
|
; GFX8-GISEL: ; %bb.0:
|
|
; GFX8-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX8-GISEL-NEXT: v_add_u32_e32 v4, vcc, 16, v0
|
|
; GFX8-GISEL-NEXT: v_addc_u32_e32 v5, vcc, 0, v1, vcc
|
|
; GFX8-GISEL-NEXT: v_add_u32_e32 v8, vcc, 32, v0
|
|
; GFX8-GISEL-NEXT: v_addc_u32_e32 v9, vcc, 0, v1, vcc
|
|
; GFX8-GISEL-NEXT: flat_load_dwordx4 v[4:7], v[4:5]
|
|
; GFX8-GISEL-NEXT: flat_load_dwordx4 v[8:11], v[8:9]
|
|
; GFX8-GISEL-NEXT: v_add_u32_e32 v16, vcc, 48, v0
|
|
; GFX8-GISEL-NEXT: v_addc_u32_e32 v17, vcc, 0, v1, vcc
|
|
; GFX8-GISEL-NEXT: v_add_u32_e32 v18, vcc, 64, v0
|
|
; GFX8-GISEL-NEXT: v_addc_u32_e32 v19, vcc, 0, v1, vcc
|
|
; GFX8-GISEL-NEXT: flat_load_dwordx4 v[12:15], v[0:1]
|
|
; GFX8-GISEL-NEXT: flat_load_dwordx2 v[0:1], v[18:19]
|
|
; GFX8-GISEL-NEXT: flat_load_dwordx4 v[16:19], v[16:17]
|
|
; GFX8-GISEL-NEXT: v_add_u32_e32 v20, vcc, 16, v2
|
|
; GFX8-GISEL-NEXT: v_addc_u32_e32 v21, vcc, 0, v3, vcc
|
|
; GFX8-GISEL-NEXT: s_waitcnt vmcnt(4)
|
|
; GFX8-GISEL-NEXT: flat_store_dwordx4 v[20:21], v[4:7]
|
|
; GFX8-GISEL-NEXT: s_nop 0
|
|
; GFX8-GISEL-NEXT: v_add_u32_e32 v4, vcc, 32, v2
|
|
; GFX8-GISEL-NEXT: v_addc_u32_e32 v5, vcc, 0, v3, vcc
|
|
; GFX8-GISEL-NEXT: v_add_u32_e32 v6, vcc, 48, v2
|
|
; GFX8-GISEL-NEXT: v_addc_u32_e32 v7, vcc, 0, v3, vcc
|
|
; GFX8-GISEL-NEXT: s_waitcnt vmcnt(3)
|
|
; GFX8-GISEL-NEXT: flat_store_dwordx4 v[2:3], v[12:15]
|
|
; GFX8-GISEL-NEXT: v_add_u32_e32 v2, vcc, 64, v2
|
|
; GFX8-GISEL-NEXT: v_addc_u32_e32 v3, vcc, 0, v3, vcc
|
|
; GFX8-GISEL-NEXT: flat_store_dwordx4 v[4:5], v[8:11]
|
|
; GFX8-GISEL-NEXT: s_waitcnt vmcnt(3)
|
|
; GFX8-GISEL-NEXT: flat_store_dwordx4 v[6:7], v[16:19]
|
|
; GFX8-GISEL-NEXT: flat_store_dwordx2 v[2:3], v[0:1]
|
|
; GFX8-GISEL-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX8-GISEL-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX9-GISEL-LABEL: freeze_v18i32:
|
|
; GFX9-GISEL: ; %bb.0:
|
|
; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX9-GISEL-NEXT: global_load_dwordx4 v[4:7], v[0:1], off
|
|
; GFX9-GISEL-NEXT: global_load_dwordx4 v[8:11], v[0:1], off offset:16
|
|
; GFX9-GISEL-NEXT: global_load_dwordx4 v[12:15], v[0:1], off offset:32
|
|
; GFX9-GISEL-NEXT: global_load_dwordx4 v[16:19], v[0:1], off offset:48
|
|
; GFX9-GISEL-NEXT: global_load_dwordx2 v[20:21], v[0:1], off offset:64
|
|
; GFX9-GISEL-NEXT: s_waitcnt vmcnt(4)
|
|
; GFX9-GISEL-NEXT: global_store_dwordx4 v[2:3], v[4:7], off
|
|
; GFX9-GISEL-NEXT: s_waitcnt vmcnt(4)
|
|
; GFX9-GISEL-NEXT: global_store_dwordx4 v[2:3], v[8:11], off offset:16
|
|
; GFX9-GISEL-NEXT: s_waitcnt vmcnt(4)
|
|
; GFX9-GISEL-NEXT: global_store_dwordx4 v[2:3], v[12:15], off offset:32
|
|
; GFX9-GISEL-NEXT: s_waitcnt vmcnt(4)
|
|
; GFX9-GISEL-NEXT: global_store_dwordx4 v[2:3], v[16:19], off offset:48
|
|
; GFX9-GISEL-NEXT: s_waitcnt vmcnt(4)
|
|
; GFX9-GISEL-NEXT: global_store_dwordx2 v[2:3], v[20:21], off offset:64
|
|
; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX10-SDAG-LABEL: freeze_v18i32:
|
|
; GFX10-SDAG: ; %bb.0:
|
|
; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX10-SDAG-NEXT: s_clause 0x4
|
|
; GFX10-SDAG-NEXT: global_load_dwordx2 v[20:21], v[0:1], off offset:64
|
|
; GFX10-SDAG-NEXT: global_load_dwordx4 v[4:7], v[0:1], off offset:32
|
|
; GFX10-SDAG-NEXT: global_load_dwordx4 v[8:11], v[0:1], off offset:48
|
|
; GFX10-SDAG-NEXT: global_load_dwordx4 v[12:15], v[0:1], off
|
|
; GFX10-SDAG-NEXT: global_load_dwordx4 v[16:19], v[0:1], off offset:16
|
|
; GFX10-SDAG-NEXT: s_waitcnt vmcnt(4)
|
|
; GFX10-SDAG-NEXT: global_store_dwordx2 v[2:3], v[20:21], off offset:64
|
|
; GFX10-SDAG-NEXT: s_waitcnt vmcnt(3)
|
|
; GFX10-SDAG-NEXT: global_store_dwordx4 v[2:3], v[4:7], off offset:32
|
|
; GFX10-SDAG-NEXT: s_waitcnt vmcnt(2)
|
|
; GFX10-SDAG-NEXT: global_store_dwordx4 v[2:3], v[8:11], off offset:48
|
|
; GFX10-SDAG-NEXT: s_waitcnt vmcnt(1)
|
|
; GFX10-SDAG-NEXT: global_store_dwordx4 v[2:3], v[12:15], off
|
|
; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX10-SDAG-NEXT: global_store_dwordx4 v[2:3], v[16:19], off offset:16
|
|
; GFX10-SDAG-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX10-GISEL-LABEL: freeze_v18i32:
|
|
; GFX10-GISEL: ; %bb.0:
|
|
; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX10-GISEL-NEXT: s_clause 0x4
|
|
; GFX10-GISEL-NEXT: global_load_dwordx4 v[4:7], v[0:1], off
|
|
; GFX10-GISEL-NEXT: global_load_dwordx4 v[8:11], v[0:1], off offset:16
|
|
; GFX10-GISEL-NEXT: global_load_dwordx4 v[12:15], v[0:1], off offset:32
|
|
; GFX10-GISEL-NEXT: global_load_dwordx4 v[16:19], v[0:1], off offset:48
|
|
; GFX10-GISEL-NEXT: global_load_dwordx2 v[20:21], v[0:1], off offset:64
|
|
; GFX10-GISEL-NEXT: s_waitcnt vmcnt(4)
|
|
; GFX10-GISEL-NEXT: global_store_dwordx4 v[2:3], v[4:7], off
|
|
; GFX10-GISEL-NEXT: s_waitcnt vmcnt(3)
|
|
; GFX10-GISEL-NEXT: global_store_dwordx4 v[2:3], v[8:11], off offset:16
|
|
; GFX10-GISEL-NEXT: s_waitcnt vmcnt(2)
|
|
; GFX10-GISEL-NEXT: global_store_dwordx4 v[2:3], v[12:15], off offset:32
|
|
; GFX10-GISEL-NEXT: s_waitcnt vmcnt(1)
|
|
; GFX10-GISEL-NEXT: global_store_dwordx4 v[2:3], v[16:19], off offset:48
|
|
; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX10-GISEL-NEXT: global_store_dwordx2 v[2:3], v[20:21], off offset:64
|
|
; GFX10-GISEL-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX11-SDAG-LABEL: freeze_v18i32:
|
|
; GFX11-SDAG: ; %bb.0:
|
|
; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX11-SDAG-NEXT: s_clause 0x4
|
|
; GFX11-SDAG-NEXT: global_load_b64 v[20:21], v[0:1], off offset:64
|
|
; GFX11-SDAG-NEXT: global_load_b128 v[4:7], v[0:1], off offset:32
|
|
; GFX11-SDAG-NEXT: global_load_b128 v[8:11], v[0:1], off offset:48
|
|
; GFX11-SDAG-NEXT: global_load_b128 v[12:15], v[0:1], off
|
|
; GFX11-SDAG-NEXT: global_load_b128 v[16:19], v[0:1], off offset:16
|
|
; GFX11-SDAG-NEXT: s_waitcnt vmcnt(4)
|
|
; GFX11-SDAG-NEXT: global_store_b64 v[2:3], v[20:21], off offset:64
|
|
; GFX11-SDAG-NEXT: s_waitcnt vmcnt(3)
|
|
; GFX11-SDAG-NEXT: global_store_b128 v[2:3], v[4:7], off offset:32
|
|
; GFX11-SDAG-NEXT: s_waitcnt vmcnt(2)
|
|
; GFX11-SDAG-NEXT: global_store_b128 v[2:3], v[8:11], off offset:48
|
|
; GFX11-SDAG-NEXT: s_waitcnt vmcnt(1)
|
|
; GFX11-SDAG-NEXT: global_store_b128 v[2:3], v[12:15], off
|
|
; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX11-SDAG-NEXT: global_store_b128 v[2:3], v[16:19], off offset:16
|
|
; GFX11-SDAG-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX11-GISEL-LABEL: freeze_v18i32:
|
|
; GFX11-GISEL: ; %bb.0:
|
|
; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX11-GISEL-NEXT: s_clause 0x4
|
|
; GFX11-GISEL-NEXT: global_load_b128 v[4:7], v[0:1], off
|
|
; GFX11-GISEL-NEXT: global_load_b128 v[8:11], v[0:1], off offset:16
|
|
; GFX11-GISEL-NEXT: global_load_b128 v[12:15], v[0:1], off offset:32
|
|
; GFX11-GISEL-NEXT: global_load_b128 v[16:19], v[0:1], off offset:48
|
|
; GFX11-GISEL-NEXT: global_load_b64 v[0:1], v[0:1], off offset:64
|
|
; GFX11-GISEL-NEXT: s_waitcnt vmcnt(4)
|
|
; GFX11-GISEL-NEXT: global_store_b128 v[2:3], v[4:7], off
|
|
; GFX11-GISEL-NEXT: s_waitcnt vmcnt(3)
|
|
; GFX11-GISEL-NEXT: global_store_b128 v[2:3], v[8:11], off offset:16
|
|
; GFX11-GISEL-NEXT: s_waitcnt vmcnt(2)
|
|
; GFX11-GISEL-NEXT: global_store_b128 v[2:3], v[12:15], off offset:32
|
|
; GFX11-GISEL-NEXT: s_waitcnt vmcnt(1)
|
|
; GFX11-GISEL-NEXT: global_store_b128 v[2:3], v[16:19], off offset:48
|
|
; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX11-GISEL-NEXT: global_store_b64 v[2:3], v[0:1], off offset:64
|
|
; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31]
|
|
%a = load <18 x i32>, ptr addrspace(1) %ptra, align 4
|
|
%freeze = freeze <18 x i32> %a
|
|
store <18 x i32> %freeze, ptr addrspace(1) %ptrb, align 4
|
|
ret void
|
|
}
|
|
|
|
define void @freeze_v19i32(ptr addrspace(1) %ptra, ptr addrspace(1) %ptrb) {
|
|
; GFX6-SDAG-LABEL: freeze_v19i32:
|
|
; GFX6-SDAG: ; %bb.0:
|
|
; GFX6-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX6-SDAG-NEXT: s_mov_b32 s6, 0
|
|
; GFX6-SDAG-NEXT: s_mov_b32 s7, 0xf000
|
|
; GFX6-SDAG-NEXT: s_mov_b32 s4, s6
|
|
; GFX6-SDAG-NEXT: s_mov_b32 s5, s6
|
|
; GFX6-SDAG-NEXT: buffer_load_dword v22, v[0:1], s[4:7], 0 addr64 offset:72
|
|
; GFX6-SDAG-NEXT: buffer_load_dwordx2 v[20:21], v[0:1], s[4:7], 0 addr64 offset:64
|
|
; GFX6-SDAG-NEXT: buffer_load_dwordx4 v[4:7], v[0:1], s[4:7], 0 addr64 offset:32
|
|
; GFX6-SDAG-NEXT: buffer_load_dwordx4 v[8:11], v[0:1], s[4:7], 0 addr64 offset:48
|
|
; GFX6-SDAG-NEXT: buffer_load_dwordx4 v[12:15], v[0:1], s[4:7], 0 addr64
|
|
; GFX6-SDAG-NEXT: buffer_load_dwordx4 v[16:19], v[0:1], s[4:7], 0 addr64 offset:16
|
|
; GFX6-SDAG-NEXT: s_waitcnt vmcnt(5)
|
|
; GFX6-SDAG-NEXT: buffer_store_dword v22, v[2:3], s[4:7], 0 addr64 offset:72
|
|
; GFX6-SDAG-NEXT: s_waitcnt vmcnt(5)
|
|
; GFX6-SDAG-NEXT: buffer_store_dwordx2 v[20:21], v[2:3], s[4:7], 0 addr64 offset:64
|
|
; GFX6-SDAG-NEXT: s_waitcnt vmcnt(5)
|
|
; GFX6-SDAG-NEXT: buffer_store_dwordx4 v[4:7], v[2:3], s[4:7], 0 addr64 offset:32
|
|
; GFX6-SDAG-NEXT: s_waitcnt vmcnt(5)
|
|
; GFX6-SDAG-NEXT: buffer_store_dwordx4 v[8:11], v[2:3], s[4:7], 0 addr64 offset:48
|
|
; GFX6-SDAG-NEXT: s_waitcnt vmcnt(5)
|
|
; GFX6-SDAG-NEXT: buffer_store_dwordx4 v[12:15], v[2:3], s[4:7], 0 addr64
|
|
; GFX6-SDAG-NEXT: s_waitcnt vmcnt(5)
|
|
; GFX6-SDAG-NEXT: buffer_store_dwordx4 v[16:19], v[2:3], s[4:7], 0 addr64 offset:16
|
|
; GFX6-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0)
|
|
; GFX6-SDAG-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX6-GISEL-LABEL: freeze_v19i32:
|
|
; GFX6-GISEL: ; %bb.0:
|
|
; GFX6-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX6-GISEL-NEXT: s_mov_b32 s6, 0
|
|
; GFX6-GISEL-NEXT: s_mov_b32 s7, 0xf000
|
|
; GFX6-GISEL-NEXT: s_mov_b64 s[4:5], 0
|
|
; GFX6-GISEL-NEXT: buffer_load_dwordx4 v[4:7], v[0:1], s[4:7], 0 addr64
|
|
; GFX6-GISEL-NEXT: buffer_load_dwordx4 v[8:11], v[0:1], s[4:7], 0 addr64 offset:16
|
|
; GFX6-GISEL-NEXT: buffer_load_dwordx2 v[20:21], v[0:1], s[4:7], 0 addr64 offset:64
|
|
; GFX6-GISEL-NEXT: buffer_load_dword v22, v[0:1], s[4:7], 0 addr64 offset:72
|
|
; GFX6-GISEL-NEXT: buffer_load_dwordx4 v[12:15], v[0:1], s[4:7], 0 addr64 offset:32
|
|
; GFX6-GISEL-NEXT: buffer_load_dwordx4 v[16:19], v[0:1], s[4:7], 0 addr64 offset:48
|
|
; GFX6-GISEL-NEXT: s_waitcnt vmcnt(5)
|
|
; GFX6-GISEL-NEXT: buffer_store_dwordx4 v[4:7], v[2:3], s[4:7], 0 addr64
|
|
; GFX6-GISEL-NEXT: s_waitcnt vmcnt(5)
|
|
; GFX6-GISEL-NEXT: buffer_store_dwordx4 v[8:11], v[2:3], s[4:7], 0 addr64 offset:16
|
|
; GFX6-GISEL-NEXT: s_waitcnt vmcnt(3)
|
|
; GFX6-GISEL-NEXT: buffer_store_dwordx4 v[12:15], v[2:3], s[4:7], 0 addr64 offset:32
|
|
; GFX6-GISEL-NEXT: s_waitcnt vmcnt(3)
|
|
; GFX6-GISEL-NEXT: buffer_store_dwordx4 v[16:19], v[2:3], s[4:7], 0 addr64 offset:48
|
|
; GFX6-GISEL-NEXT: buffer_store_dwordx2 v[20:21], v[2:3], s[4:7], 0 addr64 offset:64
|
|
; GFX6-GISEL-NEXT: buffer_store_dword v22, v[2:3], s[4:7], 0 addr64 offset:72
|
|
; GFX6-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0)
|
|
; GFX6-GISEL-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX7-SDAG-LABEL: freeze_v19i32:
|
|
; GFX7-SDAG: ; %bb.0:
|
|
; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX7-SDAG-NEXT: s_mov_b32 s6, 0
|
|
; GFX7-SDAG-NEXT: s_mov_b32 s7, 0xf000
|
|
; GFX7-SDAG-NEXT: s_mov_b32 s4, s6
|
|
; GFX7-SDAG-NEXT: s_mov_b32 s5, s6
|
|
; GFX7-SDAG-NEXT: buffer_load_dword v22, v[0:1], s[4:7], 0 addr64 offset:72
|
|
; GFX7-SDAG-NEXT: buffer_load_dwordx2 v[20:21], v[0:1], s[4:7], 0 addr64 offset:64
|
|
; GFX7-SDAG-NEXT: buffer_load_dwordx4 v[4:7], v[0:1], s[4:7], 0 addr64 offset:32
|
|
; GFX7-SDAG-NEXT: buffer_load_dwordx4 v[8:11], v[0:1], s[4:7], 0 addr64 offset:48
|
|
; GFX7-SDAG-NEXT: buffer_load_dwordx4 v[12:15], v[0:1], s[4:7], 0 addr64
|
|
; GFX7-SDAG-NEXT: buffer_load_dwordx4 v[16:19], v[0:1], s[4:7], 0 addr64 offset:16
|
|
; GFX7-SDAG-NEXT: s_waitcnt vmcnt(5)
|
|
; GFX7-SDAG-NEXT: buffer_store_dword v22, v[2:3], s[4:7], 0 addr64 offset:72
|
|
; GFX7-SDAG-NEXT: s_waitcnt vmcnt(5)
|
|
; GFX7-SDAG-NEXT: buffer_store_dwordx2 v[20:21], v[2:3], s[4:7], 0 addr64 offset:64
|
|
; GFX7-SDAG-NEXT: s_waitcnt vmcnt(5)
|
|
; GFX7-SDAG-NEXT: buffer_store_dwordx4 v[4:7], v[2:3], s[4:7], 0 addr64 offset:32
|
|
; GFX7-SDAG-NEXT: s_waitcnt vmcnt(5)
|
|
; GFX7-SDAG-NEXT: buffer_store_dwordx4 v[8:11], v[2:3], s[4:7], 0 addr64 offset:48
|
|
; GFX7-SDAG-NEXT: s_waitcnt vmcnt(5)
|
|
; GFX7-SDAG-NEXT: buffer_store_dwordx4 v[12:15], v[2:3], s[4:7], 0 addr64
|
|
; GFX7-SDAG-NEXT: s_waitcnt vmcnt(5)
|
|
; GFX7-SDAG-NEXT: buffer_store_dwordx4 v[16:19], v[2:3], s[4:7], 0 addr64 offset:16
|
|
; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX7-SDAG-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX7-GISEL-LABEL: freeze_v19i32:
|
|
; GFX7-GISEL: ; %bb.0:
|
|
; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX7-GISEL-NEXT: s_mov_b32 s6, 0
|
|
; GFX7-GISEL-NEXT: s_mov_b32 s7, 0xf000
|
|
; GFX7-GISEL-NEXT: s_mov_b64 s[4:5], 0
|
|
; GFX7-GISEL-NEXT: buffer_load_dwordx4 v[4:7], v[0:1], s[4:7], 0 addr64
|
|
; GFX7-GISEL-NEXT: buffer_load_dwordx4 v[8:11], v[0:1], s[4:7], 0 addr64 offset:16
|
|
; GFX7-GISEL-NEXT: buffer_load_dwordx4 v[12:15], v[0:1], s[4:7], 0 addr64 offset:32
|
|
; GFX7-GISEL-NEXT: buffer_load_dwordx4 v[16:19], v[0:1], s[4:7], 0 addr64 offset:48
|
|
; GFX7-GISEL-NEXT: buffer_load_dwordx3 v[20:22], v[0:1], s[4:7], 0 addr64 offset:64
|
|
; GFX7-GISEL-NEXT: s_waitcnt vmcnt(4)
|
|
; GFX7-GISEL-NEXT: buffer_store_dwordx4 v[4:7], v[2:3], s[4:7], 0 addr64
|
|
; GFX7-GISEL-NEXT: s_waitcnt vmcnt(4)
|
|
; GFX7-GISEL-NEXT: buffer_store_dwordx4 v[8:11], v[2:3], s[4:7], 0 addr64 offset:16
|
|
; GFX7-GISEL-NEXT: s_waitcnt vmcnt(4)
|
|
; GFX7-GISEL-NEXT: buffer_store_dwordx4 v[12:15], v[2:3], s[4:7], 0 addr64 offset:32
|
|
; GFX7-GISEL-NEXT: s_waitcnt vmcnt(4)
|
|
; GFX7-GISEL-NEXT: buffer_store_dwordx4 v[16:19], v[2:3], s[4:7], 0 addr64 offset:48
|
|
; GFX7-GISEL-NEXT: s_waitcnt vmcnt(4)
|
|
; GFX7-GISEL-NEXT: buffer_store_dwordx3 v[20:22], v[2:3], s[4:7], 0 addr64 offset:64
|
|
; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX7-GISEL-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX8-GISEL-LABEL: freeze_v19i32:
|
|
; GFX8-GISEL: ; %bb.0:
|
|
; GFX8-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX8-GISEL-NEXT: v_add_u32_e32 v4, vcc, 16, v0
|
|
; GFX8-GISEL-NEXT: v_addc_u32_e32 v5, vcc, 0, v1, vcc
|
|
; GFX8-GISEL-NEXT: v_add_u32_e32 v8, vcc, 32, v0
|
|
; GFX8-GISEL-NEXT: flat_load_dwordx4 v[4:7], v[4:5]
|
|
; GFX8-GISEL-NEXT: v_addc_u32_e32 v9, vcc, 0, v1, vcc
|
|
; GFX8-GISEL-NEXT: v_add_u32_e32 v16, vcc, 48, v0
|
|
; GFX8-GISEL-NEXT: v_addc_u32_e32 v17, vcc, 0, v1, vcc
|
|
; GFX8-GISEL-NEXT: v_add_u32_e32 v20, vcc, 64, v0
|
|
; GFX8-GISEL-NEXT: v_addc_u32_e32 v21, vcc, 0, v1, vcc
|
|
; GFX8-GISEL-NEXT: flat_load_dwordx4 v[8:11], v[8:9]
|
|
; GFX8-GISEL-NEXT: flat_load_dwordx4 v[12:15], v[0:1]
|
|
; GFX8-GISEL-NEXT: flat_load_dwordx4 v[16:19], v[16:17]
|
|
; GFX8-GISEL-NEXT: flat_load_dwordx3 v[20:22], v[20:21]
|
|
; GFX8-GISEL-NEXT: v_add_u32_e32 v0, vcc, 16, v2
|
|
; GFX8-GISEL-NEXT: v_addc_u32_e32 v1, vcc, 0, v3, vcc
|
|
; GFX8-GISEL-NEXT: s_waitcnt vmcnt(4)
|
|
; GFX8-GISEL-NEXT: flat_store_dwordx4 v[0:1], v[4:7]
|
|
; GFX8-GISEL-NEXT: v_add_u32_e32 v0, vcc, 32, v2
|
|
; GFX8-GISEL-NEXT: v_addc_u32_e32 v1, vcc, 0, v3, vcc
|
|
; GFX8-GISEL-NEXT: v_add_u32_e32 v4, vcc, 48, v2
|
|
; GFX8-GISEL-NEXT: v_addc_u32_e32 v5, vcc, 0, v3, vcc
|
|
; GFX8-GISEL-NEXT: v_add_u32_e32 v6, vcc, 64, v2
|
|
; GFX8-GISEL-NEXT: v_addc_u32_e32 v7, vcc, 0, v3, vcc
|
|
; GFX8-GISEL-NEXT: s_waitcnt vmcnt(4)
|
|
; GFX8-GISEL-NEXT: flat_store_dwordx4 v[0:1], v[8:11]
|
|
; GFX8-GISEL-NEXT: s_waitcnt vmcnt(4)
|
|
; GFX8-GISEL-NEXT: flat_store_dwordx4 v[2:3], v[12:15]
|
|
; GFX8-GISEL-NEXT: s_waitcnt vmcnt(4)
|
|
; GFX8-GISEL-NEXT: flat_store_dwordx4 v[4:5], v[16:19]
|
|
; GFX8-GISEL-NEXT: s_waitcnt vmcnt(4)
|
|
; GFX8-GISEL-NEXT: flat_store_dwordx3 v[6:7], v[20:22]
|
|
; GFX8-GISEL-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX8-GISEL-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX9-GISEL-LABEL: freeze_v19i32:
|
|
; GFX9-GISEL: ; %bb.0:
|
|
; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX9-GISEL-NEXT: global_load_dwordx4 v[4:7], v[0:1], off
|
|
; GFX9-GISEL-NEXT: global_load_dwordx4 v[8:11], v[0:1], off offset:16
|
|
; GFX9-GISEL-NEXT: global_load_dwordx4 v[12:15], v[0:1], off offset:32
|
|
; GFX9-GISEL-NEXT: global_load_dwordx4 v[16:19], v[0:1], off offset:48
|
|
; GFX9-GISEL-NEXT: global_load_dwordx3 v[20:22], v[0:1], off offset:64
|
|
; GFX9-GISEL-NEXT: s_waitcnt vmcnt(4)
|
|
; GFX9-GISEL-NEXT: global_store_dwordx4 v[2:3], v[4:7], off
|
|
; GFX9-GISEL-NEXT: s_waitcnt vmcnt(4)
|
|
; GFX9-GISEL-NEXT: global_store_dwordx4 v[2:3], v[8:11], off offset:16
|
|
; GFX9-GISEL-NEXT: s_waitcnt vmcnt(4)
|
|
; GFX9-GISEL-NEXT: global_store_dwordx4 v[2:3], v[12:15], off offset:32
|
|
; GFX9-GISEL-NEXT: s_waitcnt vmcnt(4)
|
|
; GFX9-GISEL-NEXT: global_store_dwordx4 v[2:3], v[16:19], off offset:48
|
|
; GFX9-GISEL-NEXT: s_waitcnt vmcnt(4)
|
|
; GFX9-GISEL-NEXT: global_store_dwordx3 v[2:3], v[20:22], off offset:64
|
|
; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX10-SDAG-LABEL: freeze_v19i32:
|
|
; GFX10-SDAG: ; %bb.0:
|
|
; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX10-SDAG-NEXT: s_clause 0x4
|
|
; GFX10-SDAG-NEXT: global_load_dwordx3 v[20:22], v[0:1], off offset:64
|
|
; GFX10-SDAG-NEXT: global_load_dwordx4 v[4:7], v[0:1], off offset:32
|
|
; GFX10-SDAG-NEXT: global_load_dwordx4 v[8:11], v[0:1], off offset:48
|
|
; GFX10-SDAG-NEXT: global_load_dwordx4 v[12:15], v[0:1], off
|
|
; GFX10-SDAG-NEXT: global_load_dwordx4 v[16:19], v[0:1], off offset:16
|
|
; GFX10-SDAG-NEXT: s_waitcnt vmcnt(4)
|
|
; GFX10-SDAG-NEXT: global_store_dwordx3 v[2:3], v[20:22], off offset:64
|
|
; GFX10-SDAG-NEXT: s_waitcnt vmcnt(3)
|
|
; GFX10-SDAG-NEXT: global_store_dwordx4 v[2:3], v[4:7], off offset:32
|
|
; GFX10-SDAG-NEXT: s_waitcnt vmcnt(2)
|
|
; GFX10-SDAG-NEXT: global_store_dwordx4 v[2:3], v[8:11], off offset:48
|
|
; GFX10-SDAG-NEXT: s_waitcnt vmcnt(1)
|
|
; GFX10-SDAG-NEXT: global_store_dwordx4 v[2:3], v[12:15], off
|
|
; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX10-SDAG-NEXT: global_store_dwordx4 v[2:3], v[16:19], off offset:16
|
|
; GFX10-SDAG-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX10-GISEL-LABEL: freeze_v19i32:
|
|
; GFX10-GISEL: ; %bb.0:
|
|
; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX10-GISEL-NEXT: s_clause 0x4
|
|
; GFX10-GISEL-NEXT: global_load_dwordx4 v[4:7], v[0:1], off
|
|
; GFX10-GISEL-NEXT: global_load_dwordx4 v[8:11], v[0:1], off offset:16
|
|
; GFX10-GISEL-NEXT: global_load_dwordx4 v[12:15], v[0:1], off offset:32
|
|
; GFX10-GISEL-NEXT: global_load_dwordx4 v[16:19], v[0:1], off offset:48
|
|
; GFX10-GISEL-NEXT: global_load_dwordx3 v[20:22], v[0:1], off offset:64
|
|
; GFX10-GISEL-NEXT: s_waitcnt vmcnt(4)
|
|
; GFX10-GISEL-NEXT: global_store_dwordx4 v[2:3], v[4:7], off
|
|
; GFX10-GISEL-NEXT: s_waitcnt vmcnt(3)
|
|
; GFX10-GISEL-NEXT: global_store_dwordx4 v[2:3], v[8:11], off offset:16
|
|
; GFX10-GISEL-NEXT: s_waitcnt vmcnt(2)
|
|
; GFX10-GISEL-NEXT: global_store_dwordx4 v[2:3], v[12:15], off offset:32
|
|
; GFX10-GISEL-NEXT: s_waitcnt vmcnt(1)
|
|
; GFX10-GISEL-NEXT: global_store_dwordx4 v[2:3], v[16:19], off offset:48
|
|
; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX10-GISEL-NEXT: global_store_dwordx3 v[2:3], v[20:22], off offset:64
|
|
; GFX10-GISEL-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX11-SDAG-LABEL: freeze_v19i32:
|
|
; GFX11-SDAG: ; %bb.0:
|
|
; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX11-SDAG-NEXT: s_clause 0x4
|
|
; GFX11-SDAG-NEXT: global_load_b96 v[20:22], v[0:1], off offset:64
|
|
; GFX11-SDAG-NEXT: global_load_b128 v[4:7], v[0:1], off offset:32
|
|
; GFX11-SDAG-NEXT: global_load_b128 v[8:11], v[0:1], off offset:48
|
|
; GFX11-SDAG-NEXT: global_load_b128 v[12:15], v[0:1], off
|
|
; GFX11-SDAG-NEXT: global_load_b128 v[16:19], v[0:1], off offset:16
|
|
; GFX11-SDAG-NEXT: s_waitcnt vmcnt(4)
|
|
; GFX11-SDAG-NEXT: global_store_b96 v[2:3], v[20:22], off offset:64
|
|
; GFX11-SDAG-NEXT: s_waitcnt vmcnt(3)
|
|
; GFX11-SDAG-NEXT: global_store_b128 v[2:3], v[4:7], off offset:32
|
|
; GFX11-SDAG-NEXT: s_waitcnt vmcnt(2)
|
|
; GFX11-SDAG-NEXT: global_store_b128 v[2:3], v[8:11], off offset:48
|
|
; GFX11-SDAG-NEXT: s_waitcnt vmcnt(1)
|
|
; GFX11-SDAG-NEXT: global_store_b128 v[2:3], v[12:15], off
|
|
; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX11-SDAG-NEXT: global_store_b128 v[2:3], v[16:19], off offset:16
|
|
; GFX11-SDAG-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX11-GISEL-LABEL: freeze_v19i32:
|
|
; GFX11-GISEL: ; %bb.0:
|
|
; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX11-GISEL-NEXT: s_clause 0x4
|
|
; GFX11-GISEL-NEXT: global_load_b128 v[4:7], v[0:1], off
|
|
; GFX11-GISEL-NEXT: global_load_b128 v[8:11], v[0:1], off offset:16
|
|
; GFX11-GISEL-NEXT: global_load_b128 v[12:15], v[0:1], off offset:32
|
|
; GFX11-GISEL-NEXT: global_load_b128 v[16:19], v[0:1], off offset:48
|
|
; GFX11-GISEL-NEXT: global_load_b96 v[20:22], v[0:1], off offset:64
|
|
; GFX11-GISEL-NEXT: s_waitcnt vmcnt(4)
|
|
; GFX11-GISEL-NEXT: global_store_b128 v[2:3], v[4:7], off
|
|
; GFX11-GISEL-NEXT: s_waitcnt vmcnt(3)
|
|
; GFX11-GISEL-NEXT: global_store_b128 v[2:3], v[8:11], off offset:16
|
|
; GFX11-GISEL-NEXT: s_waitcnt vmcnt(2)
|
|
; GFX11-GISEL-NEXT: global_store_b128 v[2:3], v[12:15], off offset:32
|
|
; GFX11-GISEL-NEXT: s_waitcnt vmcnt(1)
|
|
; GFX11-GISEL-NEXT: global_store_b128 v[2:3], v[16:19], off offset:48
|
|
; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX11-GISEL-NEXT: global_store_b96 v[2:3], v[20:22], off offset:64
|
|
; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31]
|
|
%a = load <19 x i32>, ptr addrspace(1) %ptra, align 4
|
|
%freeze = freeze <19 x i32> %a
|
|
store <19 x i32> %freeze, ptr addrspace(1) %ptrb, align 4
|
|
ret void
|
|
}
|
|
|
|
define void @freeze_v20i32(ptr addrspace(1) %ptra, ptr addrspace(1) %ptrb) {
|
|
; GFX6-SDAG-LABEL: freeze_v20i32:
|
|
; GFX6-SDAG: ; %bb.0:
|
|
; GFX6-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX6-SDAG-NEXT: s_mov_b32 s6, 0
|
|
; GFX6-SDAG-NEXT: s_mov_b32 s7, 0xf000
|
|
; GFX6-SDAG-NEXT: s_mov_b32 s4, s6
|
|
; GFX6-SDAG-NEXT: s_mov_b32 s5, s6
|
|
; GFX6-SDAG-NEXT: buffer_load_dwordx4 v[4:7], v[0:1], s[4:7], 0 addr64 offset:64
|
|
; GFX6-SDAG-NEXT: buffer_load_dwordx4 v[8:11], v[0:1], s[4:7], 0 addr64 offset:32
|
|
; GFX6-SDAG-NEXT: buffer_load_dwordx4 v[12:15], v[0:1], s[4:7], 0 addr64 offset:48
|
|
; GFX6-SDAG-NEXT: buffer_load_dwordx4 v[16:19], v[0:1], s[4:7], 0 addr64
|
|
; GFX6-SDAG-NEXT: buffer_load_dwordx4 v[20:23], v[0:1], s[4:7], 0 addr64 offset:16
|
|
; GFX6-SDAG-NEXT: s_waitcnt vmcnt(4)
|
|
; GFX6-SDAG-NEXT: buffer_store_dwordx4 v[4:7], v[2:3], s[4:7], 0 addr64 offset:64
|
|
; GFX6-SDAG-NEXT: s_waitcnt vmcnt(4)
|
|
; GFX6-SDAG-NEXT: buffer_store_dwordx4 v[8:11], v[2:3], s[4:7], 0 addr64 offset:32
|
|
; GFX6-SDAG-NEXT: s_waitcnt vmcnt(4)
|
|
; GFX6-SDAG-NEXT: buffer_store_dwordx4 v[12:15], v[2:3], s[4:7], 0 addr64 offset:48
|
|
; GFX6-SDAG-NEXT: s_waitcnt vmcnt(4)
|
|
; GFX6-SDAG-NEXT: buffer_store_dwordx4 v[16:19], v[2:3], s[4:7], 0 addr64
|
|
; GFX6-SDAG-NEXT: s_waitcnt vmcnt(4)
|
|
; GFX6-SDAG-NEXT: buffer_store_dwordx4 v[20:23], v[2:3], s[4:7], 0 addr64 offset:16
|
|
; GFX6-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0)
|
|
; GFX6-SDAG-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX6-GISEL-LABEL: freeze_v20i32:
|
|
; GFX6-GISEL: ; %bb.0:
|
|
; GFX6-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX6-GISEL-NEXT: s_mov_b32 s6, 0
|
|
; GFX6-GISEL-NEXT: s_mov_b32 s7, 0xf000
|
|
; GFX6-GISEL-NEXT: s_mov_b64 s[4:5], 0
|
|
; GFX6-GISEL-NEXT: buffer_load_dwordx4 v[4:7], v[0:1], s[4:7], 0 addr64
|
|
; GFX6-GISEL-NEXT: buffer_load_dwordx4 v[8:11], v[0:1], s[4:7], 0 addr64 offset:16
|
|
; GFX6-GISEL-NEXT: buffer_load_dwordx4 v[12:15], v[0:1], s[4:7], 0 addr64 offset:32
|
|
; GFX6-GISEL-NEXT: buffer_load_dwordx4 v[16:19], v[0:1], s[4:7], 0 addr64 offset:48
|
|
; GFX6-GISEL-NEXT: buffer_load_dwordx4 v[20:23], v[0:1], s[4:7], 0 addr64 offset:64
|
|
; GFX6-GISEL-NEXT: s_waitcnt vmcnt(4)
|
|
; GFX6-GISEL-NEXT: buffer_store_dwordx4 v[4:7], v[2:3], s[4:7], 0 addr64
|
|
; GFX6-GISEL-NEXT: s_waitcnt vmcnt(4)
|
|
; GFX6-GISEL-NEXT: buffer_store_dwordx4 v[8:11], v[2:3], s[4:7], 0 addr64 offset:16
|
|
; GFX6-GISEL-NEXT: s_waitcnt vmcnt(4)
|
|
; GFX6-GISEL-NEXT: buffer_store_dwordx4 v[12:15], v[2:3], s[4:7], 0 addr64 offset:32
|
|
; GFX6-GISEL-NEXT: s_waitcnt vmcnt(4)
|
|
; GFX6-GISEL-NEXT: buffer_store_dwordx4 v[16:19], v[2:3], s[4:7], 0 addr64 offset:48
|
|
; GFX6-GISEL-NEXT: s_waitcnt vmcnt(4)
|
|
; GFX6-GISEL-NEXT: buffer_store_dwordx4 v[20:23], v[2:3], s[4:7], 0 addr64 offset:64
|
|
; GFX6-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0)
|
|
; GFX6-GISEL-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX7-SDAG-LABEL: freeze_v20i32:
|
|
; GFX7-SDAG: ; %bb.0:
|
|
; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX7-SDAG-NEXT: s_mov_b32 s6, 0
|
|
; GFX7-SDAG-NEXT: s_mov_b32 s7, 0xf000
|
|
; GFX7-SDAG-NEXT: s_mov_b32 s4, s6
|
|
; GFX7-SDAG-NEXT: s_mov_b32 s5, s6
|
|
; GFX7-SDAG-NEXT: buffer_load_dwordx4 v[4:7], v[0:1], s[4:7], 0 addr64 offset:64
|
|
; GFX7-SDAG-NEXT: buffer_load_dwordx4 v[8:11], v[0:1], s[4:7], 0 addr64 offset:32
|
|
; GFX7-SDAG-NEXT: buffer_load_dwordx4 v[12:15], v[0:1], s[4:7], 0 addr64 offset:48
|
|
; GFX7-SDAG-NEXT: buffer_load_dwordx4 v[16:19], v[0:1], s[4:7], 0 addr64
|
|
; GFX7-SDAG-NEXT: buffer_load_dwordx4 v[20:23], v[0:1], s[4:7], 0 addr64 offset:16
|
|
; GFX7-SDAG-NEXT: s_waitcnt vmcnt(4)
|
|
; GFX7-SDAG-NEXT: buffer_store_dwordx4 v[4:7], v[2:3], s[4:7], 0 addr64 offset:64
|
|
; GFX7-SDAG-NEXT: s_waitcnt vmcnt(4)
|
|
; GFX7-SDAG-NEXT: buffer_store_dwordx4 v[8:11], v[2:3], s[4:7], 0 addr64 offset:32
|
|
; GFX7-SDAG-NEXT: s_waitcnt vmcnt(4)
|
|
; GFX7-SDAG-NEXT: buffer_store_dwordx4 v[12:15], v[2:3], s[4:7], 0 addr64 offset:48
|
|
; GFX7-SDAG-NEXT: s_waitcnt vmcnt(4)
|
|
; GFX7-SDAG-NEXT: buffer_store_dwordx4 v[16:19], v[2:3], s[4:7], 0 addr64
|
|
; GFX7-SDAG-NEXT: s_waitcnt vmcnt(4)
|
|
; GFX7-SDAG-NEXT: buffer_store_dwordx4 v[20:23], v[2:3], s[4:7], 0 addr64 offset:16
|
|
; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX7-SDAG-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX7-GISEL-LABEL: freeze_v20i32:
|
|
; GFX7-GISEL: ; %bb.0:
|
|
; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX7-GISEL-NEXT: s_mov_b32 s6, 0
|
|
; GFX7-GISEL-NEXT: s_mov_b32 s7, 0xf000
|
|
; GFX7-GISEL-NEXT: s_mov_b64 s[4:5], 0
|
|
; GFX7-GISEL-NEXT: buffer_load_dwordx4 v[4:7], v[0:1], s[4:7], 0 addr64
|
|
; GFX7-GISEL-NEXT: buffer_load_dwordx4 v[8:11], v[0:1], s[4:7], 0 addr64 offset:16
|
|
; GFX7-GISEL-NEXT: buffer_load_dwordx4 v[12:15], v[0:1], s[4:7], 0 addr64 offset:32
|
|
; GFX7-GISEL-NEXT: buffer_load_dwordx4 v[16:19], v[0:1], s[4:7], 0 addr64 offset:48
|
|
; GFX7-GISEL-NEXT: buffer_load_dwordx4 v[20:23], v[0:1], s[4:7], 0 addr64 offset:64
|
|
; GFX7-GISEL-NEXT: s_waitcnt vmcnt(4)
|
|
; GFX7-GISEL-NEXT: buffer_store_dwordx4 v[4:7], v[2:3], s[4:7], 0 addr64
|
|
; GFX7-GISEL-NEXT: s_waitcnt vmcnt(4)
|
|
; GFX7-GISEL-NEXT: buffer_store_dwordx4 v[8:11], v[2:3], s[4:7], 0 addr64 offset:16
|
|
; GFX7-GISEL-NEXT: s_waitcnt vmcnt(4)
|
|
; GFX7-GISEL-NEXT: buffer_store_dwordx4 v[12:15], v[2:3], s[4:7], 0 addr64 offset:32
|
|
; GFX7-GISEL-NEXT: s_waitcnt vmcnt(4)
|
|
; GFX7-GISEL-NEXT: buffer_store_dwordx4 v[16:19], v[2:3], s[4:7], 0 addr64 offset:48
|
|
; GFX7-GISEL-NEXT: s_waitcnt vmcnt(4)
|
|
; GFX7-GISEL-NEXT: buffer_store_dwordx4 v[20:23], v[2:3], s[4:7], 0 addr64 offset:64
|
|
; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX7-GISEL-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX8-GISEL-LABEL: freeze_v20i32:
|
|
; GFX8-GISEL: ; %bb.0:
|
|
; GFX8-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX8-GISEL-NEXT: v_add_u32_e32 v8, vcc, 16, v0
|
|
; GFX8-GISEL-NEXT: v_addc_u32_e32 v9, vcc, 0, v1, vcc
|
|
; GFX8-GISEL-NEXT: flat_load_dwordx4 v[4:7], v[0:1]
|
|
; GFX8-GISEL-NEXT: v_add_u32_e32 v12, vcc, 32, v0
|
|
; GFX8-GISEL-NEXT: v_addc_u32_e32 v13, vcc, 0, v1, vcc
|
|
; GFX8-GISEL-NEXT: v_add_u32_e32 v16, vcc, 48, v0
|
|
; GFX8-GISEL-NEXT: v_addc_u32_e32 v17, vcc, 0, v1, vcc
|
|
; GFX8-GISEL-NEXT: v_add_u32_e32 v0, vcc, 64, v0
|
|
; GFX8-GISEL-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc
|
|
; GFX8-GISEL-NEXT: flat_load_dwordx4 v[8:11], v[8:9]
|
|
; GFX8-GISEL-NEXT: flat_load_dwordx4 v[12:15], v[12:13]
|
|
; GFX8-GISEL-NEXT: flat_load_dwordx4 v[16:19], v[16:17]
|
|
; GFX8-GISEL-NEXT: flat_load_dwordx4 v[20:23], v[0:1]
|
|
; GFX8-GISEL-NEXT: v_add_u32_e32 v0, vcc, 16, v2
|
|
; GFX8-GISEL-NEXT: v_addc_u32_e32 v1, vcc, 0, v3, vcc
|
|
; GFX8-GISEL-NEXT: s_waitcnt vmcnt(4)
|
|
; GFX8-GISEL-NEXT: flat_store_dwordx4 v[2:3], v[4:7]
|
|
; GFX8-GISEL-NEXT: s_nop 0
|
|
; GFX8-GISEL-NEXT: v_add_u32_e32 v4, vcc, 32, v2
|
|
; GFX8-GISEL-NEXT: v_addc_u32_e32 v5, vcc, 0, v3, vcc
|
|
; GFX8-GISEL-NEXT: v_add_u32_e32 v6, vcc, 48, v2
|
|
; GFX8-GISEL-NEXT: v_addc_u32_e32 v7, vcc, 0, v3, vcc
|
|
; GFX8-GISEL-NEXT: v_add_u32_e32 v2, vcc, 64, v2
|
|
; GFX8-GISEL-NEXT: v_addc_u32_e32 v3, vcc, 0, v3, vcc
|
|
; GFX8-GISEL-NEXT: s_waitcnt vmcnt(4)
|
|
; GFX8-GISEL-NEXT: flat_store_dwordx4 v[0:1], v[8:11]
|
|
; GFX8-GISEL-NEXT: s_waitcnt vmcnt(4)
|
|
; GFX8-GISEL-NEXT: flat_store_dwordx4 v[4:5], v[12:15]
|
|
; GFX8-GISEL-NEXT: s_waitcnt vmcnt(4)
|
|
; GFX8-GISEL-NEXT: flat_store_dwordx4 v[6:7], v[16:19]
|
|
; GFX8-GISEL-NEXT: s_waitcnt vmcnt(4)
|
|
; GFX8-GISEL-NEXT: flat_store_dwordx4 v[2:3], v[20:23]
|
|
; GFX8-GISEL-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX8-GISEL-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX9-GISEL-LABEL: freeze_v20i32:
|
|
; GFX9-GISEL: ; %bb.0:
|
|
; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX9-GISEL-NEXT: global_load_dwordx4 v[4:7], v[0:1], off
|
|
; GFX9-GISEL-NEXT: global_load_dwordx4 v[8:11], v[0:1], off offset:16
|
|
; GFX9-GISEL-NEXT: global_load_dwordx4 v[12:15], v[0:1], off offset:32
|
|
; GFX9-GISEL-NEXT: global_load_dwordx4 v[16:19], v[0:1], off offset:48
|
|
; GFX9-GISEL-NEXT: global_load_dwordx4 v[20:23], v[0:1], off offset:64
|
|
; GFX9-GISEL-NEXT: s_waitcnt vmcnt(4)
|
|
; GFX9-GISEL-NEXT: global_store_dwordx4 v[2:3], v[4:7], off
|
|
; GFX9-GISEL-NEXT: s_waitcnt vmcnt(4)
|
|
; GFX9-GISEL-NEXT: global_store_dwordx4 v[2:3], v[8:11], off offset:16
|
|
; GFX9-GISEL-NEXT: s_waitcnt vmcnt(4)
|
|
; GFX9-GISEL-NEXT: global_store_dwordx4 v[2:3], v[12:15], off offset:32
|
|
; GFX9-GISEL-NEXT: s_waitcnt vmcnt(4)
|
|
; GFX9-GISEL-NEXT: global_store_dwordx4 v[2:3], v[16:19], off offset:48
|
|
; GFX9-GISEL-NEXT: s_waitcnt vmcnt(4)
|
|
; GFX9-GISEL-NEXT: global_store_dwordx4 v[2:3], v[20:23], off offset:64
|
|
; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX10-SDAG-LABEL: freeze_v20i32:
|
|
; GFX10-SDAG: ; %bb.0:
|
|
; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX10-SDAG-NEXT: s_clause 0x4
|
|
; GFX10-SDAG-NEXT: global_load_dwordx4 v[4:7], v[0:1], off offset:64
|
|
; GFX10-SDAG-NEXT: global_load_dwordx4 v[8:11], v[0:1], off offset:32
|
|
; GFX10-SDAG-NEXT: global_load_dwordx4 v[12:15], v[0:1], off offset:48
|
|
; GFX10-SDAG-NEXT: global_load_dwordx4 v[16:19], v[0:1], off
|
|
; GFX10-SDAG-NEXT: global_load_dwordx4 v[20:23], v[0:1], off offset:16
|
|
; GFX10-SDAG-NEXT: s_waitcnt vmcnt(4)
|
|
; GFX10-SDAG-NEXT: global_store_dwordx4 v[2:3], v[4:7], off offset:64
|
|
; GFX10-SDAG-NEXT: s_waitcnt vmcnt(3)
|
|
; GFX10-SDAG-NEXT: global_store_dwordx4 v[2:3], v[8:11], off offset:32
|
|
; GFX10-SDAG-NEXT: s_waitcnt vmcnt(2)
|
|
; GFX10-SDAG-NEXT: global_store_dwordx4 v[2:3], v[12:15], off offset:48
|
|
; GFX10-SDAG-NEXT: s_waitcnt vmcnt(1)
|
|
; GFX10-SDAG-NEXT: global_store_dwordx4 v[2:3], v[16:19], off
|
|
; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX10-SDAG-NEXT: global_store_dwordx4 v[2:3], v[20:23], off offset:16
|
|
; GFX10-SDAG-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX10-GISEL-LABEL: freeze_v20i32:
|
|
; GFX10-GISEL: ; %bb.0:
|
|
; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX10-GISEL-NEXT: s_clause 0x4
|
|
; GFX10-GISEL-NEXT: global_load_dwordx4 v[4:7], v[0:1], off
|
|
; GFX10-GISEL-NEXT: global_load_dwordx4 v[8:11], v[0:1], off offset:16
|
|
; GFX10-GISEL-NEXT: global_load_dwordx4 v[12:15], v[0:1], off offset:32
|
|
; GFX10-GISEL-NEXT: global_load_dwordx4 v[16:19], v[0:1], off offset:48
|
|
; GFX10-GISEL-NEXT: global_load_dwordx4 v[20:23], v[0:1], off offset:64
|
|
; GFX10-GISEL-NEXT: s_waitcnt vmcnt(4)
|
|
; GFX10-GISEL-NEXT: global_store_dwordx4 v[2:3], v[4:7], off
|
|
; GFX10-GISEL-NEXT: s_waitcnt vmcnt(3)
|
|
; GFX10-GISEL-NEXT: global_store_dwordx4 v[2:3], v[8:11], off offset:16
|
|
; GFX10-GISEL-NEXT: s_waitcnt vmcnt(2)
|
|
; GFX10-GISEL-NEXT: global_store_dwordx4 v[2:3], v[12:15], off offset:32
|
|
; GFX10-GISEL-NEXT: s_waitcnt vmcnt(1)
|
|
; GFX10-GISEL-NEXT: global_store_dwordx4 v[2:3], v[16:19], off offset:48
|
|
; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX10-GISEL-NEXT: global_store_dwordx4 v[2:3], v[20:23], off offset:64
|
|
; GFX10-GISEL-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX11-SDAG-LABEL: freeze_v20i32:
|
|
; GFX11-SDAG: ; %bb.0:
|
|
; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX11-SDAG-NEXT: s_clause 0x4
|
|
; GFX11-SDAG-NEXT: global_load_b128 v[4:7], v[0:1], off offset:64
|
|
; GFX11-SDAG-NEXT: global_load_b128 v[8:11], v[0:1], off offset:32
|
|
; GFX11-SDAG-NEXT: global_load_b128 v[12:15], v[0:1], off offset:48
|
|
; GFX11-SDAG-NEXT: global_load_b128 v[16:19], v[0:1], off
|
|
; GFX11-SDAG-NEXT: global_load_b128 v[20:23], v[0:1], off offset:16
|
|
; GFX11-SDAG-NEXT: s_waitcnt vmcnt(4)
|
|
; GFX11-SDAG-NEXT: global_store_b128 v[2:3], v[4:7], off offset:64
|
|
; GFX11-SDAG-NEXT: s_waitcnt vmcnt(3)
|
|
; GFX11-SDAG-NEXT: global_store_b128 v[2:3], v[8:11], off offset:32
|
|
; GFX11-SDAG-NEXT: s_waitcnt vmcnt(2)
|
|
; GFX11-SDAG-NEXT: global_store_b128 v[2:3], v[12:15], off offset:48
|
|
; GFX11-SDAG-NEXT: s_waitcnt vmcnt(1)
|
|
; GFX11-SDAG-NEXT: global_store_b128 v[2:3], v[16:19], off
|
|
; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX11-SDAG-NEXT: global_store_b128 v[2:3], v[20:23], off offset:16
|
|
; GFX11-SDAG-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX11-GISEL-LABEL: freeze_v20i32:
|
|
; GFX11-GISEL: ; %bb.0:
|
|
; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX11-GISEL-NEXT: s_clause 0x4
|
|
; GFX11-GISEL-NEXT: global_load_b128 v[4:7], v[0:1], off
|
|
; GFX11-GISEL-NEXT: global_load_b128 v[8:11], v[0:1], off offset:16
|
|
; GFX11-GISEL-NEXT: global_load_b128 v[12:15], v[0:1], off offset:32
|
|
; GFX11-GISEL-NEXT: global_load_b128 v[16:19], v[0:1], off offset:48
|
|
; GFX11-GISEL-NEXT: global_load_b128 v[20:23], v[0:1], off offset:64
|
|
; GFX11-GISEL-NEXT: s_waitcnt vmcnt(4)
|
|
; GFX11-GISEL-NEXT: global_store_b128 v[2:3], v[4:7], off
|
|
; GFX11-GISEL-NEXT: s_waitcnt vmcnt(3)
|
|
; GFX11-GISEL-NEXT: global_store_b128 v[2:3], v[8:11], off offset:16
|
|
; GFX11-GISEL-NEXT: s_waitcnt vmcnt(2)
|
|
; GFX11-GISEL-NEXT: global_store_b128 v[2:3], v[12:15], off offset:32
|
|
; GFX11-GISEL-NEXT: s_waitcnt vmcnt(1)
|
|
; GFX11-GISEL-NEXT: global_store_b128 v[2:3], v[16:19], off offset:48
|
|
; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX11-GISEL-NEXT: global_store_b128 v[2:3], v[20:23], off offset:64
|
|
; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31]
|
|
%a = load <20 x i32>, ptr addrspace(1) %ptra, align 4
|
|
%freeze = freeze <20 x i32> %a
|
|
store <20 x i32> %freeze, ptr addrspace(1) %ptrb, align 4
|
|
ret void
|
|
}
|
|
|
|
define void @freeze_v21i32(ptr addrspace(1) %ptra, ptr addrspace(1) %ptrb) {
|
|
; GFX6-SDAG-LABEL: freeze_v21i32:
|
|
; GFX6-SDAG: ; %bb.0:
|
|
; GFX6-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX6-SDAG-NEXT: s_mov_b32 s6, 0
|
|
; GFX6-SDAG-NEXT: s_mov_b32 s7, 0xf000
|
|
; GFX6-SDAG-NEXT: s_mov_b32 s4, s6
|
|
; GFX6-SDAG-NEXT: s_mov_b32 s5, s6
|
|
; GFX6-SDAG-NEXT: buffer_load_dwordx4 v[4:7], v[0:1], s[4:7], 0 addr64 offset:64
|
|
; GFX6-SDAG-NEXT: buffer_load_dwordx4 v[8:11], v[0:1], s[4:7], 0 addr64 offset:32
|
|
; GFX6-SDAG-NEXT: buffer_load_dwordx4 v[12:15], v[0:1], s[4:7], 0 addr64 offset:48
|
|
; GFX6-SDAG-NEXT: buffer_load_dword v24, v[0:1], s[4:7], 0 addr64 offset:80
|
|
; GFX6-SDAG-NEXT: buffer_load_dwordx4 v[16:19], v[0:1], s[4:7], 0 addr64
|
|
; GFX6-SDAG-NEXT: buffer_load_dwordx4 v[20:23], v[0:1], s[4:7], 0 addr64 offset:16
|
|
; GFX6-SDAG-NEXT: s_waitcnt vmcnt(5)
|
|
; GFX6-SDAG-NEXT: buffer_store_dwordx4 v[4:7], v[2:3], s[4:7], 0 addr64 offset:64
|
|
; GFX6-SDAG-NEXT: s_waitcnt vmcnt(5)
|
|
; GFX6-SDAG-NEXT: buffer_store_dwordx4 v[8:11], v[2:3], s[4:7], 0 addr64 offset:32
|
|
; GFX6-SDAG-NEXT: s_waitcnt vmcnt(5)
|
|
; GFX6-SDAG-NEXT: buffer_store_dwordx4 v[12:15], v[2:3], s[4:7], 0 addr64 offset:48
|
|
; GFX6-SDAG-NEXT: s_waitcnt vmcnt(5)
|
|
; GFX6-SDAG-NEXT: buffer_store_dword v24, v[2:3], s[4:7], 0 addr64 offset:80
|
|
; GFX6-SDAG-NEXT: s_waitcnt vmcnt(5)
|
|
; GFX6-SDAG-NEXT: buffer_store_dwordx4 v[16:19], v[2:3], s[4:7], 0 addr64
|
|
; GFX6-SDAG-NEXT: s_waitcnt vmcnt(5)
|
|
; GFX6-SDAG-NEXT: buffer_store_dwordx4 v[20:23], v[2:3], s[4:7], 0 addr64 offset:16
|
|
; GFX6-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0)
|
|
; GFX6-SDAG-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX6-GISEL-LABEL: freeze_v21i32:
|
|
; GFX6-GISEL: ; %bb.0:
|
|
; GFX6-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX6-GISEL-NEXT: s_mov_b32 s6, 0
|
|
; GFX6-GISEL-NEXT: s_mov_b32 s7, 0xf000
|
|
; GFX6-GISEL-NEXT: s_mov_b64 s[4:5], 0
|
|
; GFX6-GISEL-NEXT: buffer_load_dwordx4 v[4:7], v[0:1], s[4:7], 0 addr64
|
|
; GFX6-GISEL-NEXT: buffer_load_dwordx4 v[8:11], v[0:1], s[4:7], 0 addr64 offset:16
|
|
; GFX6-GISEL-NEXT: buffer_load_dwordx4 v[12:15], v[0:1], s[4:7], 0 addr64 offset:32
|
|
; GFX6-GISEL-NEXT: buffer_load_dwordx4 v[16:19], v[0:1], s[4:7], 0 addr64 offset:48
|
|
; GFX6-GISEL-NEXT: buffer_load_dwordx4 v[20:23], v[0:1], s[4:7], 0 addr64 offset:64
|
|
; GFX6-GISEL-NEXT: buffer_load_dword v0, v[0:1], s[4:7], 0 addr64 offset:80
|
|
; GFX6-GISEL-NEXT: s_waitcnt vmcnt(5)
|
|
; GFX6-GISEL-NEXT: buffer_store_dwordx4 v[4:7], v[2:3], s[4:7], 0 addr64
|
|
; GFX6-GISEL-NEXT: s_waitcnt vmcnt(5)
|
|
; GFX6-GISEL-NEXT: buffer_store_dwordx4 v[8:11], v[2:3], s[4:7], 0 addr64 offset:16
|
|
; GFX6-GISEL-NEXT: s_waitcnt vmcnt(5)
|
|
; GFX6-GISEL-NEXT: buffer_store_dwordx4 v[12:15], v[2:3], s[4:7], 0 addr64 offset:32
|
|
; GFX6-GISEL-NEXT: s_waitcnt vmcnt(5)
|
|
; GFX6-GISEL-NEXT: buffer_store_dwordx4 v[16:19], v[2:3], s[4:7], 0 addr64 offset:48
|
|
; GFX6-GISEL-NEXT: s_waitcnt vmcnt(5)
|
|
; GFX6-GISEL-NEXT: buffer_store_dwordx4 v[20:23], v[2:3], s[4:7], 0 addr64 offset:64
|
|
; GFX6-GISEL-NEXT: s_waitcnt vmcnt(5)
|
|
; GFX6-GISEL-NEXT: buffer_store_dword v0, v[2:3], s[4:7], 0 addr64 offset:80
|
|
; GFX6-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0)
|
|
; GFX6-GISEL-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX7-SDAG-LABEL: freeze_v21i32:
|
|
; GFX7-SDAG: ; %bb.0:
|
|
; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX7-SDAG-NEXT: s_mov_b32 s6, 0
|
|
; GFX7-SDAG-NEXT: s_mov_b32 s7, 0xf000
|
|
; GFX7-SDAG-NEXT: s_mov_b32 s4, s6
|
|
; GFX7-SDAG-NEXT: s_mov_b32 s5, s6
|
|
; GFX7-SDAG-NEXT: buffer_load_dwordx4 v[4:7], v[0:1], s[4:7], 0 addr64 offset:64
|
|
; GFX7-SDAG-NEXT: buffer_load_dwordx4 v[8:11], v[0:1], s[4:7], 0 addr64 offset:32
|
|
; GFX7-SDAG-NEXT: buffer_load_dwordx4 v[12:15], v[0:1], s[4:7], 0 addr64 offset:48
|
|
; GFX7-SDAG-NEXT: buffer_load_dword v24, v[0:1], s[4:7], 0 addr64 offset:80
|
|
; GFX7-SDAG-NEXT: buffer_load_dwordx4 v[16:19], v[0:1], s[4:7], 0 addr64
|
|
; GFX7-SDAG-NEXT: buffer_load_dwordx4 v[20:23], v[0:1], s[4:7], 0 addr64 offset:16
|
|
; GFX7-SDAG-NEXT: s_waitcnt vmcnt(5)
|
|
; GFX7-SDAG-NEXT: buffer_store_dwordx4 v[4:7], v[2:3], s[4:7], 0 addr64 offset:64
|
|
; GFX7-SDAG-NEXT: s_waitcnt vmcnt(5)
|
|
; GFX7-SDAG-NEXT: buffer_store_dwordx4 v[8:11], v[2:3], s[4:7], 0 addr64 offset:32
|
|
; GFX7-SDAG-NEXT: s_waitcnt vmcnt(5)
|
|
; GFX7-SDAG-NEXT: buffer_store_dwordx4 v[12:15], v[2:3], s[4:7], 0 addr64 offset:48
|
|
; GFX7-SDAG-NEXT: s_waitcnt vmcnt(5)
|
|
; GFX7-SDAG-NEXT: buffer_store_dword v24, v[2:3], s[4:7], 0 addr64 offset:80
|
|
; GFX7-SDAG-NEXT: s_waitcnt vmcnt(5)
|
|
; GFX7-SDAG-NEXT: buffer_store_dwordx4 v[16:19], v[2:3], s[4:7], 0 addr64
|
|
; GFX7-SDAG-NEXT: s_waitcnt vmcnt(5)
|
|
; GFX7-SDAG-NEXT: buffer_store_dwordx4 v[20:23], v[2:3], s[4:7], 0 addr64 offset:16
|
|
; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX7-SDAG-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX7-GISEL-LABEL: freeze_v21i32:
|
|
; GFX7-GISEL: ; %bb.0:
|
|
; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX7-GISEL-NEXT: s_mov_b32 s6, 0
|
|
; GFX7-GISEL-NEXT: s_mov_b32 s7, 0xf000
|
|
; GFX7-GISEL-NEXT: s_mov_b64 s[4:5], 0
|
|
; GFX7-GISEL-NEXT: buffer_load_dwordx4 v[4:7], v[0:1], s[4:7], 0 addr64
|
|
; GFX7-GISEL-NEXT: buffer_load_dwordx4 v[8:11], v[0:1], s[4:7], 0 addr64 offset:16
|
|
; GFX7-GISEL-NEXT: buffer_load_dwordx4 v[12:15], v[0:1], s[4:7], 0 addr64 offset:32
|
|
; GFX7-GISEL-NEXT: buffer_load_dwordx4 v[16:19], v[0:1], s[4:7], 0 addr64 offset:48
|
|
; GFX7-GISEL-NEXT: buffer_load_dwordx4 v[20:23], v[0:1], s[4:7], 0 addr64 offset:64
|
|
; GFX7-GISEL-NEXT: buffer_load_dword v0, v[0:1], s[4:7], 0 addr64 offset:80
|
|
; GFX7-GISEL-NEXT: s_waitcnt vmcnt(5)
|
|
; GFX7-GISEL-NEXT: buffer_store_dwordx4 v[4:7], v[2:3], s[4:7], 0 addr64
|
|
; GFX7-GISEL-NEXT: s_waitcnt vmcnt(5)
|
|
; GFX7-GISEL-NEXT: buffer_store_dwordx4 v[8:11], v[2:3], s[4:7], 0 addr64 offset:16
|
|
; GFX7-GISEL-NEXT: s_waitcnt vmcnt(5)
|
|
; GFX7-GISEL-NEXT: buffer_store_dwordx4 v[12:15], v[2:3], s[4:7], 0 addr64 offset:32
|
|
; GFX7-GISEL-NEXT: s_waitcnt vmcnt(5)
|
|
; GFX7-GISEL-NEXT: buffer_store_dwordx4 v[16:19], v[2:3], s[4:7], 0 addr64 offset:48
|
|
; GFX7-GISEL-NEXT: s_waitcnt vmcnt(5)
|
|
; GFX7-GISEL-NEXT: buffer_store_dwordx4 v[20:23], v[2:3], s[4:7], 0 addr64 offset:64
|
|
; GFX7-GISEL-NEXT: s_waitcnt vmcnt(5)
|
|
; GFX7-GISEL-NEXT: buffer_store_dword v0, v[2:3], s[4:7], 0 addr64 offset:80
|
|
; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX7-GISEL-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX8-GISEL-LABEL: freeze_v21i32:
|
|
; GFX8-GISEL: ; %bb.0:
|
|
; GFX8-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX8-GISEL-NEXT: v_add_u32_e32 v4, vcc, 16, v0
|
|
; GFX8-GISEL-NEXT: v_addc_u32_e32 v5, vcc, 0, v1, vcc
|
|
; GFX8-GISEL-NEXT: v_mov_b32_e32 v6, 0x50
|
|
; GFX8-GISEL-NEXT: v_add_u32_e32 v8, vcc, v0, v6
|
|
; GFX8-GISEL-NEXT: v_addc_u32_e32 v9, vcc, 0, v1, vcc
|
|
; GFX8-GISEL-NEXT: v_add_u32_e32 v12, vcc, 32, v0
|
|
; GFX8-GISEL-NEXT: v_addc_u32_e32 v13, vcc, 0, v1, vcc
|
|
; GFX8-GISEL-NEXT: v_add_u32_e32 v16, vcc, 48, v0
|
|
; GFX8-GISEL-NEXT: flat_load_dwordx4 v[4:7], v[4:5]
|
|
; GFX8-GISEL-NEXT: flat_load_dword v26, v[8:9]
|
|
; GFX8-GISEL-NEXT: v_addc_u32_e32 v17, vcc, 0, v1, vcc
|
|
; GFX8-GISEL-NEXT: flat_load_dwordx4 v[8:11], v[0:1]
|
|
; GFX8-GISEL-NEXT: v_add_u32_e32 v0, vcc, 64, v0
|
|
; GFX8-GISEL-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc
|
|
; GFX8-GISEL-NEXT: flat_load_dwordx4 v[12:15], v[12:13]
|
|
; GFX8-GISEL-NEXT: flat_load_dwordx4 v[16:19], v[16:17]
|
|
; GFX8-GISEL-NEXT: flat_load_dwordx4 v[20:23], v[0:1]
|
|
; GFX8-GISEL-NEXT: v_add_u32_e32 v24, vcc, 16, v2
|
|
; GFX8-GISEL-NEXT: v_addc_u32_e32 v25, vcc, 0, v3, vcc
|
|
; GFX8-GISEL-NEXT: v_add_u32_e32 v0, vcc, 32, v2
|
|
; GFX8-GISEL-NEXT: v_addc_u32_e32 v1, vcc, 0, v3, vcc
|
|
; GFX8-GISEL-NEXT: s_waitcnt vmcnt(5)
|
|
; GFX8-GISEL-NEXT: flat_store_dwordx4 v[24:25], v[4:7]
|
|
; GFX8-GISEL-NEXT: s_nop 0
|
|
; GFX8-GISEL-NEXT: v_add_u32_e32 v4, vcc, 48, v2
|
|
; GFX8-GISEL-NEXT: v_addc_u32_e32 v5, vcc, 0, v3, vcc
|
|
; GFX8-GISEL-NEXT: s_waitcnt vmcnt(4)
|
|
; GFX8-GISEL-NEXT: flat_store_dwordx4 v[2:3], v[8:11]
|
|
; GFX8-GISEL-NEXT: v_add_u32_e32 v6, vcc, 0x50, v2
|
|
; GFX8-GISEL-NEXT: v_add_u32_e64 v8, s[4:5], 64, v2
|
|
; GFX8-GISEL-NEXT: v_addc_u32_e64 v9, s[4:5], 0, v3, s[4:5]
|
|
; GFX8-GISEL-NEXT: v_addc_u32_e32 v7, vcc, 0, v3, vcc
|
|
; GFX8-GISEL-NEXT: s_waitcnt vmcnt(4)
|
|
; GFX8-GISEL-NEXT: flat_store_dwordx4 v[0:1], v[12:15]
|
|
; GFX8-GISEL-NEXT: s_waitcnt vmcnt(4)
|
|
; GFX8-GISEL-NEXT: flat_store_dwordx4 v[4:5], v[16:19]
|
|
; GFX8-GISEL-NEXT: s_waitcnt vmcnt(4)
|
|
; GFX8-GISEL-NEXT: flat_store_dwordx4 v[8:9], v[20:23]
|
|
; GFX8-GISEL-NEXT: flat_store_dword v[6:7], v26
|
|
; GFX8-GISEL-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX8-GISEL-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX9-GISEL-LABEL: freeze_v21i32:
|
|
; GFX9-GISEL: ; %bb.0:
|
|
; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX9-GISEL-NEXT: global_load_dwordx4 v[4:7], v[0:1], off
|
|
; GFX9-GISEL-NEXT: global_load_dwordx4 v[8:11], v[0:1], off offset:16
|
|
; GFX9-GISEL-NEXT: global_load_dwordx4 v[12:15], v[0:1], off offset:32
|
|
; GFX9-GISEL-NEXT: global_load_dwordx4 v[16:19], v[0:1], off offset:48
|
|
; GFX9-GISEL-NEXT: global_load_dwordx4 v[20:23], v[0:1], off offset:64
|
|
; GFX9-GISEL-NEXT: global_load_dword v24, v[0:1], off offset:80
|
|
; GFX9-GISEL-NEXT: s_waitcnt vmcnt(5)
|
|
; GFX9-GISEL-NEXT: global_store_dwordx4 v[2:3], v[4:7], off
|
|
; GFX9-GISEL-NEXT: s_waitcnt vmcnt(5)
|
|
; GFX9-GISEL-NEXT: global_store_dwordx4 v[2:3], v[8:11], off offset:16
|
|
; GFX9-GISEL-NEXT: s_waitcnt vmcnt(5)
|
|
; GFX9-GISEL-NEXT: global_store_dwordx4 v[2:3], v[12:15], off offset:32
|
|
; GFX9-GISEL-NEXT: s_waitcnt vmcnt(5)
|
|
; GFX9-GISEL-NEXT: global_store_dwordx4 v[2:3], v[16:19], off offset:48
|
|
; GFX9-GISEL-NEXT: s_waitcnt vmcnt(5)
|
|
; GFX9-GISEL-NEXT: global_store_dwordx4 v[2:3], v[20:23], off offset:64
|
|
; GFX9-GISEL-NEXT: s_waitcnt vmcnt(5)
|
|
; GFX9-GISEL-NEXT: global_store_dword v[2:3], v24, off offset:80
|
|
; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX10-SDAG-LABEL: freeze_v21i32:
|
|
; GFX10-SDAG: ; %bb.0:
|
|
; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX10-SDAG-NEXT: s_clause 0x5
|
|
; GFX10-SDAG-NEXT: global_load_dword v24, v[0:1], off offset:80
|
|
; GFX10-SDAG-NEXT: global_load_dwordx4 v[4:7], v[0:1], off offset:64
|
|
; GFX10-SDAG-NEXT: global_load_dwordx4 v[8:11], v[0:1], off offset:32
|
|
; GFX10-SDAG-NEXT: global_load_dwordx4 v[12:15], v[0:1], off offset:48
|
|
; GFX10-SDAG-NEXT: global_load_dwordx4 v[16:19], v[0:1], off
|
|
; GFX10-SDAG-NEXT: global_load_dwordx4 v[20:23], v[0:1], off offset:16
|
|
; GFX10-SDAG-NEXT: s_waitcnt vmcnt(5)
|
|
; GFX10-SDAG-NEXT: global_store_dword v[2:3], v24, off offset:80
|
|
; GFX10-SDAG-NEXT: s_waitcnt vmcnt(4)
|
|
; GFX10-SDAG-NEXT: global_store_dwordx4 v[2:3], v[4:7], off offset:64
|
|
; GFX10-SDAG-NEXT: s_waitcnt vmcnt(3)
|
|
; GFX10-SDAG-NEXT: global_store_dwordx4 v[2:3], v[8:11], off offset:32
|
|
; GFX10-SDAG-NEXT: s_waitcnt vmcnt(2)
|
|
; GFX10-SDAG-NEXT: global_store_dwordx4 v[2:3], v[12:15], off offset:48
|
|
; GFX10-SDAG-NEXT: s_waitcnt vmcnt(1)
|
|
; GFX10-SDAG-NEXT: global_store_dwordx4 v[2:3], v[16:19], off
|
|
; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX10-SDAG-NEXT: global_store_dwordx4 v[2:3], v[20:23], off offset:16
|
|
; GFX10-SDAG-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX10-GISEL-LABEL: freeze_v21i32:
|
|
; GFX10-GISEL: ; %bb.0:
|
|
; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX10-GISEL-NEXT: s_clause 0x5
|
|
; GFX10-GISEL-NEXT: global_load_dwordx4 v[4:7], v[0:1], off
|
|
; GFX10-GISEL-NEXT: global_load_dwordx4 v[8:11], v[0:1], off offset:16
|
|
; GFX10-GISEL-NEXT: global_load_dwordx4 v[12:15], v[0:1], off offset:32
|
|
; GFX10-GISEL-NEXT: global_load_dwordx4 v[16:19], v[0:1], off offset:48
|
|
; GFX10-GISEL-NEXT: global_load_dwordx4 v[20:23], v[0:1], off offset:64
|
|
; GFX10-GISEL-NEXT: global_load_dword v24, v[0:1], off offset:80
|
|
; GFX10-GISEL-NEXT: s_waitcnt vmcnt(5)
|
|
; GFX10-GISEL-NEXT: global_store_dwordx4 v[2:3], v[4:7], off
|
|
; GFX10-GISEL-NEXT: s_waitcnt vmcnt(4)
|
|
; GFX10-GISEL-NEXT: global_store_dwordx4 v[2:3], v[8:11], off offset:16
|
|
; GFX10-GISEL-NEXT: s_waitcnt vmcnt(3)
|
|
; GFX10-GISEL-NEXT: global_store_dwordx4 v[2:3], v[12:15], off offset:32
|
|
; GFX10-GISEL-NEXT: s_waitcnt vmcnt(2)
|
|
; GFX10-GISEL-NEXT: global_store_dwordx4 v[2:3], v[16:19], off offset:48
|
|
; GFX10-GISEL-NEXT: s_waitcnt vmcnt(1)
|
|
; GFX10-GISEL-NEXT: global_store_dwordx4 v[2:3], v[20:23], off offset:64
|
|
; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX10-GISEL-NEXT: global_store_dword v[2:3], v24, off offset:80
|
|
; GFX10-GISEL-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX11-SDAG-LABEL: freeze_v21i32:
|
|
; GFX11-SDAG: ; %bb.0:
|
|
; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX11-SDAG-NEXT: s_clause 0x5
|
|
; GFX11-SDAG-NEXT: global_load_b32 v24, v[0:1], off offset:80
|
|
; GFX11-SDAG-NEXT: global_load_b128 v[4:7], v[0:1], off offset:64
|
|
; GFX11-SDAG-NEXT: global_load_b128 v[8:11], v[0:1], off offset:32
|
|
; GFX11-SDAG-NEXT: global_load_b128 v[12:15], v[0:1], off offset:48
|
|
; GFX11-SDAG-NEXT: global_load_b128 v[16:19], v[0:1], off
|
|
; GFX11-SDAG-NEXT: global_load_b128 v[20:23], v[0:1], off offset:16
|
|
; GFX11-SDAG-NEXT: s_waitcnt vmcnt(5)
|
|
; GFX11-SDAG-NEXT: global_store_b32 v[2:3], v24, off offset:80
|
|
; GFX11-SDAG-NEXT: s_waitcnt vmcnt(4)
|
|
; GFX11-SDAG-NEXT: global_store_b128 v[2:3], v[4:7], off offset:64
|
|
; GFX11-SDAG-NEXT: s_waitcnt vmcnt(3)
|
|
; GFX11-SDAG-NEXT: global_store_b128 v[2:3], v[8:11], off offset:32
|
|
; GFX11-SDAG-NEXT: s_waitcnt vmcnt(2)
|
|
; GFX11-SDAG-NEXT: global_store_b128 v[2:3], v[12:15], off offset:48
|
|
; GFX11-SDAG-NEXT: s_waitcnt vmcnt(1)
|
|
; GFX11-SDAG-NEXT: global_store_b128 v[2:3], v[16:19], off
|
|
; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX11-SDAG-NEXT: global_store_b128 v[2:3], v[20:23], off offset:16
|
|
; GFX11-SDAG-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX11-GISEL-LABEL: freeze_v21i32:
|
|
; GFX11-GISEL: ; %bb.0:
|
|
; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX11-GISEL-NEXT: s_clause 0x5
|
|
; GFX11-GISEL-NEXT: global_load_b128 v[4:7], v[0:1], off
|
|
; GFX11-GISEL-NEXT: global_load_b128 v[8:11], v[0:1], off offset:16
|
|
; GFX11-GISEL-NEXT: global_load_b128 v[12:15], v[0:1], off offset:32
|
|
; GFX11-GISEL-NEXT: global_load_b128 v[16:19], v[0:1], off offset:48
|
|
; GFX11-GISEL-NEXT: global_load_b128 v[20:23], v[0:1], off offset:64
|
|
; GFX11-GISEL-NEXT: global_load_b32 v0, v[0:1], off offset:80
|
|
; GFX11-GISEL-NEXT: s_waitcnt vmcnt(5)
|
|
; GFX11-GISEL-NEXT: global_store_b128 v[2:3], v[4:7], off
|
|
; GFX11-GISEL-NEXT: s_waitcnt vmcnt(4)
|
|
; GFX11-GISEL-NEXT: global_store_b128 v[2:3], v[8:11], off offset:16
|
|
; GFX11-GISEL-NEXT: s_waitcnt vmcnt(3)
|
|
; GFX11-GISEL-NEXT: global_store_b128 v[2:3], v[12:15], off offset:32
|
|
; GFX11-GISEL-NEXT: s_waitcnt vmcnt(2)
|
|
; GFX11-GISEL-NEXT: global_store_b128 v[2:3], v[16:19], off offset:48
|
|
; GFX11-GISEL-NEXT: s_waitcnt vmcnt(1)
|
|
; GFX11-GISEL-NEXT: global_store_b128 v[2:3], v[20:23], off offset:64
|
|
; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX11-GISEL-NEXT: global_store_b32 v[2:3], v0, off offset:80
|
|
; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31]
|
|
%a = load <21 x i32>, ptr addrspace(1) %ptra, align 4
|
|
%freeze = freeze <21 x i32> %a
|
|
store <21 x i32> %freeze, ptr addrspace(1) %ptrb, align 4
|
|
ret void
|
|
}
|
|
|
|
define void @freeze_v22i32(ptr addrspace(1) %ptra, ptr addrspace(1) %ptrb) {
|
|
; GFX6-SDAG-LABEL: freeze_v22i32:
|
|
; GFX6-SDAG: ; %bb.0:
|
|
; GFX6-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX6-SDAG-NEXT: s_mov_b32 s6, 0
|
|
; GFX6-SDAG-NEXT: s_mov_b32 s7, 0xf000
|
|
; GFX6-SDAG-NEXT: s_mov_b32 s4, s6
|
|
; GFX6-SDAG-NEXT: s_mov_b32 s5, s6
|
|
; GFX6-SDAG-NEXT: buffer_load_dwordx4 v[4:7], v[0:1], s[4:7], 0 addr64 offset:64
|
|
; GFX6-SDAG-NEXT: buffer_load_dwordx4 v[8:11], v[0:1], s[4:7], 0 addr64 offset:32
|
|
; GFX6-SDAG-NEXT: buffer_load_dwordx4 v[12:15], v[0:1], s[4:7], 0 addr64 offset:48
|
|
; GFX6-SDAG-NEXT: buffer_load_dwordx2 v[24:25], v[0:1], s[4:7], 0 addr64 offset:80
|
|
; GFX6-SDAG-NEXT: buffer_load_dwordx4 v[16:19], v[0:1], s[4:7], 0 addr64
|
|
; GFX6-SDAG-NEXT: buffer_load_dwordx4 v[20:23], v[0:1], s[4:7], 0 addr64 offset:16
|
|
; GFX6-SDAG-NEXT: s_waitcnt vmcnt(5)
|
|
; GFX6-SDAG-NEXT: buffer_store_dwordx4 v[4:7], v[2:3], s[4:7], 0 addr64 offset:64
|
|
; GFX6-SDAG-NEXT: s_waitcnt vmcnt(5)
|
|
; GFX6-SDAG-NEXT: buffer_store_dwordx4 v[8:11], v[2:3], s[4:7], 0 addr64 offset:32
|
|
; GFX6-SDAG-NEXT: s_waitcnt vmcnt(5)
|
|
; GFX6-SDAG-NEXT: buffer_store_dwordx4 v[12:15], v[2:3], s[4:7], 0 addr64 offset:48
|
|
; GFX6-SDAG-NEXT: s_waitcnt vmcnt(5)
|
|
; GFX6-SDAG-NEXT: buffer_store_dwordx2 v[24:25], v[2:3], s[4:7], 0 addr64 offset:80
|
|
; GFX6-SDAG-NEXT: s_waitcnt vmcnt(5)
|
|
; GFX6-SDAG-NEXT: buffer_store_dwordx4 v[16:19], v[2:3], s[4:7], 0 addr64
|
|
; GFX6-SDAG-NEXT: s_waitcnt vmcnt(5)
|
|
; GFX6-SDAG-NEXT: buffer_store_dwordx4 v[20:23], v[2:3], s[4:7], 0 addr64 offset:16
|
|
; GFX6-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0)
|
|
; GFX6-SDAG-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX6-GISEL-LABEL: freeze_v22i32:
|
|
; GFX6-GISEL: ; %bb.0:
|
|
; GFX6-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX6-GISEL-NEXT: s_mov_b32 s6, 0
|
|
; GFX6-GISEL-NEXT: s_mov_b32 s7, 0xf000
|
|
; GFX6-GISEL-NEXT: s_mov_b64 s[4:5], 0
|
|
; GFX6-GISEL-NEXT: buffer_load_dwordx4 v[4:7], v[0:1], s[4:7], 0 addr64
|
|
; GFX6-GISEL-NEXT: buffer_load_dwordx4 v[8:11], v[0:1], s[4:7], 0 addr64 offset:16
|
|
; GFX6-GISEL-NEXT: buffer_load_dwordx4 v[12:15], v[0:1], s[4:7], 0 addr64 offset:32
|
|
; GFX6-GISEL-NEXT: buffer_load_dwordx4 v[16:19], v[0:1], s[4:7], 0 addr64 offset:48
|
|
; GFX6-GISEL-NEXT: buffer_load_dwordx4 v[20:23], v[0:1], s[4:7], 0 addr64 offset:64
|
|
; GFX6-GISEL-NEXT: buffer_load_dwordx2 v[0:1], v[0:1], s[4:7], 0 addr64 offset:80
|
|
; GFX6-GISEL-NEXT: s_waitcnt vmcnt(5)
|
|
; GFX6-GISEL-NEXT: buffer_store_dwordx4 v[4:7], v[2:3], s[4:7], 0 addr64
|
|
; GFX6-GISEL-NEXT: s_waitcnt vmcnt(5)
|
|
; GFX6-GISEL-NEXT: buffer_store_dwordx4 v[8:11], v[2:3], s[4:7], 0 addr64 offset:16
|
|
; GFX6-GISEL-NEXT: s_waitcnt vmcnt(5)
|
|
; GFX6-GISEL-NEXT: buffer_store_dwordx4 v[12:15], v[2:3], s[4:7], 0 addr64 offset:32
|
|
; GFX6-GISEL-NEXT: s_waitcnt vmcnt(5)
|
|
; GFX6-GISEL-NEXT: buffer_store_dwordx4 v[16:19], v[2:3], s[4:7], 0 addr64 offset:48
|
|
; GFX6-GISEL-NEXT: s_waitcnt vmcnt(5)
|
|
; GFX6-GISEL-NEXT: buffer_store_dwordx4 v[20:23], v[2:3], s[4:7], 0 addr64 offset:64
|
|
; GFX6-GISEL-NEXT: s_waitcnt vmcnt(5)
|
|
; GFX6-GISEL-NEXT: buffer_store_dwordx2 v[0:1], v[2:3], s[4:7], 0 addr64 offset:80
|
|
; GFX6-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0)
|
|
; GFX6-GISEL-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX7-SDAG-LABEL: freeze_v22i32:
|
|
; GFX7-SDAG: ; %bb.0:
|
|
; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX7-SDAG-NEXT: s_mov_b32 s6, 0
|
|
; GFX7-SDAG-NEXT: s_mov_b32 s7, 0xf000
|
|
; GFX7-SDAG-NEXT: s_mov_b32 s4, s6
|
|
; GFX7-SDAG-NEXT: s_mov_b32 s5, s6
|
|
; GFX7-SDAG-NEXT: buffer_load_dwordx4 v[4:7], v[0:1], s[4:7], 0 addr64 offset:64
|
|
; GFX7-SDAG-NEXT: buffer_load_dwordx4 v[8:11], v[0:1], s[4:7], 0 addr64 offset:32
|
|
; GFX7-SDAG-NEXT: buffer_load_dwordx4 v[12:15], v[0:1], s[4:7], 0 addr64 offset:48
|
|
; GFX7-SDAG-NEXT: buffer_load_dwordx2 v[24:25], v[0:1], s[4:7], 0 addr64 offset:80
|
|
; GFX7-SDAG-NEXT: buffer_load_dwordx4 v[16:19], v[0:1], s[4:7], 0 addr64
|
|
; GFX7-SDAG-NEXT: buffer_load_dwordx4 v[20:23], v[0:1], s[4:7], 0 addr64 offset:16
|
|
; GFX7-SDAG-NEXT: s_waitcnt vmcnt(5)
|
|
; GFX7-SDAG-NEXT: buffer_store_dwordx4 v[4:7], v[2:3], s[4:7], 0 addr64 offset:64
|
|
; GFX7-SDAG-NEXT: s_waitcnt vmcnt(5)
|
|
; GFX7-SDAG-NEXT: buffer_store_dwordx4 v[8:11], v[2:3], s[4:7], 0 addr64 offset:32
|
|
; GFX7-SDAG-NEXT: s_waitcnt vmcnt(5)
|
|
; GFX7-SDAG-NEXT: buffer_store_dwordx4 v[12:15], v[2:3], s[4:7], 0 addr64 offset:48
|
|
; GFX7-SDAG-NEXT: s_waitcnt vmcnt(5)
|
|
; GFX7-SDAG-NEXT: buffer_store_dwordx2 v[24:25], v[2:3], s[4:7], 0 addr64 offset:80
|
|
; GFX7-SDAG-NEXT: s_waitcnt vmcnt(5)
|
|
; GFX7-SDAG-NEXT: buffer_store_dwordx4 v[16:19], v[2:3], s[4:7], 0 addr64
|
|
; GFX7-SDAG-NEXT: s_waitcnt vmcnt(5)
|
|
; GFX7-SDAG-NEXT: buffer_store_dwordx4 v[20:23], v[2:3], s[4:7], 0 addr64 offset:16
|
|
; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX7-SDAG-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX7-GISEL-LABEL: freeze_v22i32:
|
|
; GFX7-GISEL: ; %bb.0:
|
|
; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX7-GISEL-NEXT: s_mov_b32 s6, 0
|
|
; GFX7-GISEL-NEXT: s_mov_b32 s7, 0xf000
|
|
; GFX7-GISEL-NEXT: s_mov_b64 s[4:5], 0
|
|
; GFX7-GISEL-NEXT: buffer_load_dwordx4 v[4:7], v[0:1], s[4:7], 0 addr64
|
|
; GFX7-GISEL-NEXT: buffer_load_dwordx4 v[8:11], v[0:1], s[4:7], 0 addr64 offset:16
|
|
; GFX7-GISEL-NEXT: buffer_load_dwordx4 v[12:15], v[0:1], s[4:7], 0 addr64 offset:32
|
|
; GFX7-GISEL-NEXT: buffer_load_dwordx4 v[16:19], v[0:1], s[4:7], 0 addr64 offset:48
|
|
; GFX7-GISEL-NEXT: buffer_load_dwordx4 v[20:23], v[0:1], s[4:7], 0 addr64 offset:64
|
|
; GFX7-GISEL-NEXT: buffer_load_dwordx2 v[0:1], v[0:1], s[4:7], 0 addr64 offset:80
|
|
; GFX7-GISEL-NEXT: s_waitcnt vmcnt(5)
|
|
; GFX7-GISEL-NEXT: buffer_store_dwordx4 v[4:7], v[2:3], s[4:7], 0 addr64
|
|
; GFX7-GISEL-NEXT: s_waitcnt vmcnt(5)
|
|
; GFX7-GISEL-NEXT: buffer_store_dwordx4 v[8:11], v[2:3], s[4:7], 0 addr64 offset:16
|
|
; GFX7-GISEL-NEXT: s_waitcnt vmcnt(5)
|
|
; GFX7-GISEL-NEXT: buffer_store_dwordx4 v[12:15], v[2:3], s[4:7], 0 addr64 offset:32
|
|
; GFX7-GISEL-NEXT: s_waitcnt vmcnt(5)
|
|
; GFX7-GISEL-NEXT: buffer_store_dwordx4 v[16:19], v[2:3], s[4:7], 0 addr64 offset:48
|
|
; GFX7-GISEL-NEXT: s_waitcnt vmcnt(5)
|
|
; GFX7-GISEL-NEXT: buffer_store_dwordx4 v[20:23], v[2:3], s[4:7], 0 addr64 offset:64
|
|
; GFX7-GISEL-NEXT: s_waitcnt vmcnt(5)
|
|
; GFX7-GISEL-NEXT: buffer_store_dwordx2 v[0:1], v[2:3], s[4:7], 0 addr64 offset:80
|
|
; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX7-GISEL-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX8-GISEL-LABEL: freeze_v22i32:
|
|
; GFX8-GISEL: ; %bb.0:
|
|
; GFX8-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX8-GISEL-NEXT: v_add_u32_e32 v4, vcc, 16, v0
|
|
; GFX8-GISEL-NEXT: v_addc_u32_e32 v5, vcc, 0, v1, vcc
|
|
; GFX8-GISEL-NEXT: v_mov_b32_e32 v6, 0x50
|
|
; GFX8-GISEL-NEXT: v_add_u32_e32 v8, vcc, v0, v6
|
|
; GFX8-GISEL-NEXT: v_addc_u32_e32 v9, vcc, 0, v1, vcc
|
|
; GFX8-GISEL-NEXT: v_add_u32_e32 v12, vcc, 32, v0
|
|
; GFX8-GISEL-NEXT: v_addc_u32_e32 v13, vcc, 0, v1, vcc
|
|
; GFX8-GISEL-NEXT: v_add_u32_e32 v16, vcc, 48, v0
|
|
; GFX8-GISEL-NEXT: flat_load_dwordx4 v[4:7], v[4:5]
|
|
; GFX8-GISEL-NEXT: flat_load_dwordx2 v[24:25], v[8:9]
|
|
; GFX8-GISEL-NEXT: v_addc_u32_e32 v17, vcc, 0, v1, vcc
|
|
; GFX8-GISEL-NEXT: flat_load_dwordx4 v[8:11], v[0:1]
|
|
; GFX8-GISEL-NEXT: v_add_u32_e32 v0, vcc, 64, v0
|
|
; GFX8-GISEL-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc
|
|
; GFX8-GISEL-NEXT: flat_load_dwordx4 v[12:15], v[12:13]
|
|
; GFX8-GISEL-NEXT: flat_load_dwordx4 v[16:19], v[16:17]
|
|
; GFX8-GISEL-NEXT: flat_load_dwordx4 v[20:23], v[0:1]
|
|
; GFX8-GISEL-NEXT: v_add_u32_e32 v26, vcc, 16, v2
|
|
; GFX8-GISEL-NEXT: v_addc_u32_e32 v27, vcc, 0, v3, vcc
|
|
; GFX8-GISEL-NEXT: v_add_u32_e32 v0, vcc, 32, v2
|
|
; GFX8-GISEL-NEXT: v_addc_u32_e32 v1, vcc, 0, v3, vcc
|
|
; GFX8-GISEL-NEXT: s_waitcnt vmcnt(5)
|
|
; GFX8-GISEL-NEXT: flat_store_dwordx4 v[26:27], v[4:7]
|
|
; GFX8-GISEL-NEXT: s_nop 0
|
|
; GFX8-GISEL-NEXT: v_add_u32_e32 v4, vcc, 48, v2
|
|
; GFX8-GISEL-NEXT: v_addc_u32_e32 v5, vcc, 0, v3, vcc
|
|
; GFX8-GISEL-NEXT: s_waitcnt vmcnt(4)
|
|
; GFX8-GISEL-NEXT: flat_store_dwordx4 v[2:3], v[8:11]
|
|
; GFX8-GISEL-NEXT: v_add_u32_e32 v6, vcc, 0x50, v2
|
|
; GFX8-GISEL-NEXT: v_add_u32_e64 v8, s[4:5], 64, v2
|
|
; GFX8-GISEL-NEXT: v_addc_u32_e64 v9, s[4:5], 0, v3, s[4:5]
|
|
; GFX8-GISEL-NEXT: v_addc_u32_e32 v7, vcc, 0, v3, vcc
|
|
; GFX8-GISEL-NEXT: s_waitcnt vmcnt(4)
|
|
; GFX8-GISEL-NEXT: flat_store_dwordx4 v[0:1], v[12:15]
|
|
; GFX8-GISEL-NEXT: s_waitcnt vmcnt(4)
|
|
; GFX8-GISEL-NEXT: flat_store_dwordx4 v[4:5], v[16:19]
|
|
; GFX8-GISEL-NEXT: s_waitcnt vmcnt(4)
|
|
; GFX8-GISEL-NEXT: flat_store_dwordx4 v[8:9], v[20:23]
|
|
; GFX8-GISEL-NEXT: flat_store_dwordx2 v[6:7], v[24:25]
|
|
; GFX8-GISEL-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX8-GISEL-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX9-GISEL-LABEL: freeze_v22i32:
|
|
; GFX9-GISEL: ; %bb.0:
|
|
; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX9-GISEL-NEXT: global_load_dwordx4 v[4:7], v[0:1], off
|
|
; GFX9-GISEL-NEXT: global_load_dwordx4 v[8:11], v[0:1], off offset:16
|
|
; GFX9-GISEL-NEXT: global_load_dwordx4 v[12:15], v[0:1], off offset:32
|
|
; GFX9-GISEL-NEXT: global_load_dwordx4 v[16:19], v[0:1], off offset:48
|
|
; GFX9-GISEL-NEXT: global_load_dwordx4 v[20:23], v[0:1], off offset:64
|
|
; GFX9-GISEL-NEXT: global_load_dwordx2 v[24:25], v[0:1], off offset:80
|
|
; GFX9-GISEL-NEXT: s_waitcnt vmcnt(5)
|
|
; GFX9-GISEL-NEXT: global_store_dwordx4 v[2:3], v[4:7], off
|
|
; GFX9-GISEL-NEXT: s_waitcnt vmcnt(5)
|
|
; GFX9-GISEL-NEXT: global_store_dwordx4 v[2:3], v[8:11], off offset:16
|
|
; GFX9-GISEL-NEXT: s_waitcnt vmcnt(5)
|
|
; GFX9-GISEL-NEXT: global_store_dwordx4 v[2:3], v[12:15], off offset:32
|
|
; GFX9-GISEL-NEXT: s_waitcnt vmcnt(5)
|
|
; GFX9-GISEL-NEXT: global_store_dwordx4 v[2:3], v[16:19], off offset:48
|
|
; GFX9-GISEL-NEXT: s_waitcnt vmcnt(5)
|
|
; GFX9-GISEL-NEXT: global_store_dwordx4 v[2:3], v[20:23], off offset:64
|
|
; GFX9-GISEL-NEXT: s_waitcnt vmcnt(5)
|
|
; GFX9-GISEL-NEXT: global_store_dwordx2 v[2:3], v[24:25], off offset:80
|
|
; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX10-SDAG-LABEL: freeze_v22i32:
|
|
; GFX10-SDAG: ; %bb.0:
|
|
; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX10-SDAG-NEXT: s_clause 0x5
|
|
; GFX10-SDAG-NEXT: global_load_dwordx4 v[4:7], v[0:1], off offset:64
|
|
; GFX10-SDAG-NEXT: global_load_dwordx2 v[24:25], v[0:1], off offset:80
|
|
; GFX10-SDAG-NEXT: global_load_dwordx4 v[8:11], v[0:1], off offset:32
|
|
; GFX10-SDAG-NEXT: global_load_dwordx4 v[12:15], v[0:1], off offset:48
|
|
; GFX10-SDAG-NEXT: global_load_dwordx4 v[16:19], v[0:1], off
|
|
; GFX10-SDAG-NEXT: global_load_dwordx4 v[20:23], v[0:1], off offset:16
|
|
; GFX10-SDAG-NEXT: s_waitcnt vmcnt(5)
|
|
; GFX10-SDAG-NEXT: global_store_dwordx4 v[2:3], v[4:7], off offset:64
|
|
; GFX10-SDAG-NEXT: s_waitcnt vmcnt(4)
|
|
; GFX10-SDAG-NEXT: global_store_dwordx2 v[2:3], v[24:25], off offset:80
|
|
; GFX10-SDAG-NEXT: s_waitcnt vmcnt(3)
|
|
; GFX10-SDAG-NEXT: global_store_dwordx4 v[2:3], v[8:11], off offset:32
|
|
; GFX10-SDAG-NEXT: s_waitcnt vmcnt(2)
|
|
; GFX10-SDAG-NEXT: global_store_dwordx4 v[2:3], v[12:15], off offset:48
|
|
; GFX10-SDAG-NEXT: s_waitcnt vmcnt(1)
|
|
; GFX10-SDAG-NEXT: global_store_dwordx4 v[2:3], v[16:19], off
|
|
; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX10-SDAG-NEXT: global_store_dwordx4 v[2:3], v[20:23], off offset:16
|
|
; GFX10-SDAG-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX10-GISEL-LABEL: freeze_v22i32:
|
|
; GFX10-GISEL: ; %bb.0:
|
|
; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX10-GISEL-NEXT: s_clause 0x5
|
|
; GFX10-GISEL-NEXT: global_load_dwordx4 v[4:7], v[0:1], off
|
|
; GFX10-GISEL-NEXT: global_load_dwordx4 v[8:11], v[0:1], off offset:16
|
|
; GFX10-GISEL-NEXT: global_load_dwordx4 v[12:15], v[0:1], off offset:32
|
|
; GFX10-GISEL-NEXT: global_load_dwordx4 v[16:19], v[0:1], off offset:48
|
|
; GFX10-GISEL-NEXT: global_load_dwordx4 v[20:23], v[0:1], off offset:64
|
|
; GFX10-GISEL-NEXT: global_load_dwordx2 v[24:25], v[0:1], off offset:80
|
|
; GFX10-GISEL-NEXT: s_waitcnt vmcnt(5)
|
|
; GFX10-GISEL-NEXT: global_store_dwordx4 v[2:3], v[4:7], off
|
|
; GFX10-GISEL-NEXT: s_waitcnt vmcnt(4)
|
|
; GFX10-GISEL-NEXT: global_store_dwordx4 v[2:3], v[8:11], off offset:16
|
|
; GFX10-GISEL-NEXT: s_waitcnt vmcnt(3)
|
|
; GFX10-GISEL-NEXT: global_store_dwordx4 v[2:3], v[12:15], off offset:32
|
|
; GFX10-GISEL-NEXT: s_waitcnt vmcnt(2)
|
|
; GFX10-GISEL-NEXT: global_store_dwordx4 v[2:3], v[16:19], off offset:48
|
|
; GFX10-GISEL-NEXT: s_waitcnt vmcnt(1)
|
|
; GFX10-GISEL-NEXT: global_store_dwordx4 v[2:3], v[20:23], off offset:64
|
|
; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX10-GISEL-NEXT: global_store_dwordx2 v[2:3], v[24:25], off offset:80
|
|
; GFX10-GISEL-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX11-SDAG-LABEL: freeze_v22i32:
|
|
; GFX11-SDAG: ; %bb.0:
|
|
; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX11-SDAG-NEXT: s_clause 0x5
|
|
; GFX11-SDAG-NEXT: global_load_b128 v[4:7], v[0:1], off offset:64
|
|
; GFX11-SDAG-NEXT: global_load_b64 v[24:25], v[0:1], off offset:80
|
|
; GFX11-SDAG-NEXT: global_load_b128 v[8:11], v[0:1], off offset:32
|
|
; GFX11-SDAG-NEXT: global_load_b128 v[12:15], v[0:1], off offset:48
|
|
; GFX11-SDAG-NEXT: global_load_b128 v[16:19], v[0:1], off
|
|
; GFX11-SDAG-NEXT: global_load_b128 v[20:23], v[0:1], off offset:16
|
|
; GFX11-SDAG-NEXT: s_waitcnt vmcnt(5)
|
|
; GFX11-SDAG-NEXT: global_store_b128 v[2:3], v[4:7], off offset:64
|
|
; GFX11-SDAG-NEXT: s_waitcnt vmcnt(4)
|
|
; GFX11-SDAG-NEXT: global_store_b64 v[2:3], v[24:25], off offset:80
|
|
; GFX11-SDAG-NEXT: s_waitcnt vmcnt(3)
|
|
; GFX11-SDAG-NEXT: global_store_b128 v[2:3], v[8:11], off offset:32
|
|
; GFX11-SDAG-NEXT: s_waitcnt vmcnt(2)
|
|
; GFX11-SDAG-NEXT: global_store_b128 v[2:3], v[12:15], off offset:48
|
|
; GFX11-SDAG-NEXT: s_waitcnt vmcnt(1)
|
|
; GFX11-SDAG-NEXT: global_store_b128 v[2:3], v[16:19], off
|
|
; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX11-SDAG-NEXT: global_store_b128 v[2:3], v[20:23], off offset:16
|
|
; GFX11-SDAG-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX11-GISEL-LABEL: freeze_v22i32:
|
|
; GFX11-GISEL: ; %bb.0:
|
|
; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX11-GISEL-NEXT: s_clause 0x5
|
|
; GFX11-GISEL-NEXT: global_load_b128 v[4:7], v[0:1], off
|
|
; GFX11-GISEL-NEXT: global_load_b128 v[8:11], v[0:1], off offset:16
|
|
; GFX11-GISEL-NEXT: global_load_b128 v[12:15], v[0:1], off offset:32
|
|
; GFX11-GISEL-NEXT: global_load_b128 v[16:19], v[0:1], off offset:48
|
|
; GFX11-GISEL-NEXT: global_load_b128 v[20:23], v[0:1], off offset:64
|
|
; GFX11-GISEL-NEXT: global_load_b64 v[0:1], v[0:1], off offset:80
|
|
; GFX11-GISEL-NEXT: s_waitcnt vmcnt(5)
|
|
; GFX11-GISEL-NEXT: global_store_b128 v[2:3], v[4:7], off
|
|
; GFX11-GISEL-NEXT: s_waitcnt vmcnt(4)
|
|
; GFX11-GISEL-NEXT: global_store_b128 v[2:3], v[8:11], off offset:16
|
|
; GFX11-GISEL-NEXT: s_waitcnt vmcnt(3)
|
|
; GFX11-GISEL-NEXT: global_store_b128 v[2:3], v[12:15], off offset:32
|
|
; GFX11-GISEL-NEXT: s_waitcnt vmcnt(2)
|
|
; GFX11-GISEL-NEXT: global_store_b128 v[2:3], v[16:19], off offset:48
|
|
; GFX11-GISEL-NEXT: s_waitcnt vmcnt(1)
|
|
; GFX11-GISEL-NEXT: global_store_b128 v[2:3], v[20:23], off offset:64
|
|
; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX11-GISEL-NEXT: global_store_b64 v[2:3], v[0:1], off offset:80
|
|
; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31]
|
|
%a = load <22 x i32>, ptr addrspace(1) %ptra, align 4
|
|
%freeze = freeze <22 x i32> %a
|
|
store <22 x i32> %freeze, ptr addrspace(1) %ptrb, align 4
|
|
ret void
|
|
}
|
|
|
|
define void @freeze_v30i32(ptr addrspace(1) %ptra, ptr addrspace(1) %ptrb) {
|
|
; GFX6-SDAG-LABEL: freeze_v30i32:
|
|
; GFX6-SDAG: ; %bb.0:
|
|
; GFX6-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX6-SDAG-NEXT: s_mov_b32 s6, 0
|
|
; GFX6-SDAG-NEXT: s_mov_b32 s7, 0xf000
|
|
; GFX6-SDAG-NEXT: s_mov_b32 s4, s6
|
|
; GFX6-SDAG-NEXT: s_mov_b32 s5, s6
|
|
; GFX6-SDAG-NEXT: buffer_load_dwordx4 v[4:7], v[0:1], s[4:7], 0 addr64 offset:96
|
|
; GFX6-SDAG-NEXT: buffer_load_dwordx4 v[8:11], v[0:1], s[4:7], 0 addr64 offset:64
|
|
; GFX6-SDAG-NEXT: buffer_load_dwordx4 v[12:15], v[0:1], s[4:7], 0 addr64 offset:80
|
|
; GFX6-SDAG-NEXT: buffer_load_dwordx2 v[32:33], v[0:1], s[4:7], 0 addr64 offset:112
|
|
; GFX6-SDAG-NEXT: buffer_load_dwordx4 v[16:19], v[0:1], s[4:7], 0 addr64 offset:32
|
|
; GFX6-SDAG-NEXT: buffer_load_dwordx4 v[20:23], v[0:1], s[4:7], 0 addr64 offset:48
|
|
; GFX6-SDAG-NEXT: buffer_load_dwordx4 v[24:27], v[0:1], s[4:7], 0 addr64
|
|
; GFX6-SDAG-NEXT: buffer_load_dwordx4 v[28:31], v[0:1], s[4:7], 0 addr64 offset:16
|
|
; GFX6-SDAG-NEXT: s_waitcnt vmcnt(7)
|
|
; GFX6-SDAG-NEXT: buffer_store_dwordx4 v[4:7], v[2:3], s[4:7], 0 addr64 offset:96
|
|
; GFX6-SDAG-NEXT: s_waitcnt vmcnt(7)
|
|
; GFX6-SDAG-NEXT: buffer_store_dwordx4 v[8:11], v[2:3], s[4:7], 0 addr64 offset:64
|
|
; GFX6-SDAG-NEXT: s_waitcnt vmcnt(7)
|
|
; GFX6-SDAG-NEXT: buffer_store_dwordx4 v[12:15], v[2:3], s[4:7], 0 addr64 offset:80
|
|
; GFX6-SDAG-NEXT: s_waitcnt vmcnt(7)
|
|
; GFX6-SDAG-NEXT: buffer_store_dwordx2 v[32:33], v[2:3], s[4:7], 0 addr64 offset:112
|
|
; GFX6-SDAG-NEXT: s_waitcnt vmcnt(7)
|
|
; GFX6-SDAG-NEXT: buffer_store_dwordx4 v[16:19], v[2:3], s[4:7], 0 addr64 offset:32
|
|
; GFX6-SDAG-NEXT: s_waitcnt vmcnt(7)
|
|
; GFX6-SDAG-NEXT: buffer_store_dwordx4 v[20:23], v[2:3], s[4:7], 0 addr64 offset:48
|
|
; GFX6-SDAG-NEXT: s_waitcnt vmcnt(7)
|
|
; GFX6-SDAG-NEXT: buffer_store_dwordx4 v[24:27], v[2:3], s[4:7], 0 addr64
|
|
; GFX6-SDAG-NEXT: s_waitcnt vmcnt(7)
|
|
; GFX6-SDAG-NEXT: buffer_store_dwordx4 v[28:31], v[2:3], s[4:7], 0 addr64 offset:16
|
|
; GFX6-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0)
|
|
; GFX6-SDAG-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX6-GISEL-LABEL: freeze_v30i32:
|
|
; GFX6-GISEL: ; %bb.0:
|
|
; GFX6-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX6-GISEL-NEXT: s_mov_b32 s6, 0
|
|
; GFX6-GISEL-NEXT: s_mov_b32 s7, 0xf000
|
|
; GFX6-GISEL-NEXT: s_mov_b64 s[4:5], 0
|
|
; GFX6-GISEL-NEXT: buffer_load_dwordx4 v[4:7], v[0:1], s[4:7], 0 addr64
|
|
; GFX6-GISEL-NEXT: buffer_load_dwordx4 v[8:11], v[0:1], s[4:7], 0 addr64 offset:16
|
|
; GFX6-GISEL-NEXT: buffer_load_dwordx4 v[12:15], v[0:1], s[4:7], 0 addr64 offset:32
|
|
; GFX6-GISEL-NEXT: buffer_load_dwordx4 v[16:19], v[0:1], s[4:7], 0 addr64 offset:48
|
|
; GFX6-GISEL-NEXT: buffer_load_dwordx4 v[20:23], v[0:1], s[4:7], 0 addr64 offset:64
|
|
; GFX6-GISEL-NEXT: buffer_load_dwordx4 v[24:27], v[0:1], s[4:7], 0 addr64 offset:80
|
|
; GFX6-GISEL-NEXT: buffer_load_dwordx4 v[28:31], v[0:1], s[4:7], 0 addr64 offset:96
|
|
; GFX6-GISEL-NEXT: buffer_load_dwordx2 v[0:1], v[0:1], s[4:7], 0 addr64 offset:112
|
|
; GFX6-GISEL-NEXT: s_waitcnt vmcnt(7)
|
|
; GFX6-GISEL-NEXT: buffer_store_dwordx4 v[4:7], v[2:3], s[4:7], 0 addr64
|
|
; GFX6-GISEL-NEXT: s_waitcnt vmcnt(7)
|
|
; GFX6-GISEL-NEXT: buffer_store_dwordx4 v[8:11], v[2:3], s[4:7], 0 addr64 offset:16
|
|
; GFX6-GISEL-NEXT: s_waitcnt vmcnt(7)
|
|
; GFX6-GISEL-NEXT: buffer_store_dwordx4 v[12:15], v[2:3], s[4:7], 0 addr64 offset:32
|
|
; GFX6-GISEL-NEXT: s_waitcnt vmcnt(7)
|
|
; GFX6-GISEL-NEXT: buffer_store_dwordx4 v[16:19], v[2:3], s[4:7], 0 addr64 offset:48
|
|
; GFX6-GISEL-NEXT: s_waitcnt vmcnt(7)
|
|
; GFX6-GISEL-NEXT: buffer_store_dwordx4 v[20:23], v[2:3], s[4:7], 0 addr64 offset:64
|
|
; GFX6-GISEL-NEXT: s_waitcnt vmcnt(7)
|
|
; GFX6-GISEL-NEXT: buffer_store_dwordx4 v[24:27], v[2:3], s[4:7], 0 addr64 offset:80
|
|
; GFX6-GISEL-NEXT: s_waitcnt vmcnt(7)
|
|
; GFX6-GISEL-NEXT: buffer_store_dwordx4 v[28:31], v[2:3], s[4:7], 0 addr64 offset:96
|
|
; GFX6-GISEL-NEXT: s_waitcnt vmcnt(7)
|
|
; GFX6-GISEL-NEXT: buffer_store_dwordx2 v[0:1], v[2:3], s[4:7], 0 addr64 offset:112
|
|
; GFX6-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0)
|
|
; GFX6-GISEL-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX7-SDAG-LABEL: freeze_v30i32:
|
|
; GFX7-SDAG: ; %bb.0:
|
|
; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX7-SDAG-NEXT: s_mov_b32 s6, 0
|
|
; GFX7-SDAG-NEXT: s_mov_b32 s7, 0xf000
|
|
; GFX7-SDAG-NEXT: s_mov_b32 s4, s6
|
|
; GFX7-SDAG-NEXT: s_mov_b32 s5, s6
|
|
; GFX7-SDAG-NEXT: buffer_load_dwordx4 v[4:7], v[0:1], s[4:7], 0 addr64 offset:96
|
|
; GFX7-SDAG-NEXT: buffer_load_dwordx4 v[8:11], v[0:1], s[4:7], 0 addr64 offset:64
|
|
; GFX7-SDAG-NEXT: buffer_load_dwordx4 v[12:15], v[0:1], s[4:7], 0 addr64 offset:80
|
|
; GFX7-SDAG-NEXT: buffer_load_dwordx2 v[32:33], v[0:1], s[4:7], 0 addr64 offset:112
|
|
; GFX7-SDAG-NEXT: buffer_load_dwordx4 v[16:19], v[0:1], s[4:7], 0 addr64 offset:32
|
|
; GFX7-SDAG-NEXT: buffer_load_dwordx4 v[20:23], v[0:1], s[4:7], 0 addr64 offset:48
|
|
; GFX7-SDAG-NEXT: buffer_load_dwordx4 v[24:27], v[0:1], s[4:7], 0 addr64
|
|
; GFX7-SDAG-NEXT: buffer_load_dwordx4 v[28:31], v[0:1], s[4:7], 0 addr64 offset:16
|
|
; GFX7-SDAG-NEXT: s_waitcnt vmcnt(7)
|
|
; GFX7-SDAG-NEXT: buffer_store_dwordx4 v[4:7], v[2:3], s[4:7], 0 addr64 offset:96
|
|
; GFX7-SDAG-NEXT: s_waitcnt vmcnt(7)
|
|
; GFX7-SDAG-NEXT: buffer_store_dwordx4 v[8:11], v[2:3], s[4:7], 0 addr64 offset:64
|
|
; GFX7-SDAG-NEXT: s_waitcnt vmcnt(7)
|
|
; GFX7-SDAG-NEXT: buffer_store_dwordx4 v[12:15], v[2:3], s[4:7], 0 addr64 offset:80
|
|
; GFX7-SDAG-NEXT: s_waitcnt vmcnt(7)
|
|
; GFX7-SDAG-NEXT: buffer_store_dwordx2 v[32:33], v[2:3], s[4:7], 0 addr64 offset:112
|
|
; GFX7-SDAG-NEXT: s_waitcnt vmcnt(7)
|
|
; GFX7-SDAG-NEXT: buffer_store_dwordx4 v[16:19], v[2:3], s[4:7], 0 addr64 offset:32
|
|
; GFX7-SDAG-NEXT: s_waitcnt vmcnt(7)
|
|
; GFX7-SDAG-NEXT: buffer_store_dwordx4 v[20:23], v[2:3], s[4:7], 0 addr64 offset:48
|
|
; GFX7-SDAG-NEXT: s_waitcnt vmcnt(7)
|
|
; GFX7-SDAG-NEXT: buffer_store_dwordx4 v[24:27], v[2:3], s[4:7], 0 addr64
|
|
; GFX7-SDAG-NEXT: s_waitcnt vmcnt(7)
|
|
; GFX7-SDAG-NEXT: buffer_store_dwordx4 v[28:31], v[2:3], s[4:7], 0 addr64 offset:16
|
|
; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX7-SDAG-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX7-GISEL-LABEL: freeze_v30i32:
|
|
; GFX7-GISEL: ; %bb.0:
|
|
; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX7-GISEL-NEXT: s_mov_b32 s6, 0
|
|
; GFX7-GISEL-NEXT: s_mov_b32 s7, 0xf000
|
|
; GFX7-GISEL-NEXT: s_mov_b64 s[4:5], 0
|
|
; GFX7-GISEL-NEXT: buffer_load_dwordx4 v[4:7], v[0:1], s[4:7], 0 addr64
|
|
; GFX7-GISEL-NEXT: buffer_load_dwordx4 v[8:11], v[0:1], s[4:7], 0 addr64 offset:16
|
|
; GFX7-GISEL-NEXT: buffer_load_dwordx4 v[12:15], v[0:1], s[4:7], 0 addr64 offset:32
|
|
; GFX7-GISEL-NEXT: buffer_load_dwordx4 v[16:19], v[0:1], s[4:7], 0 addr64 offset:48
|
|
; GFX7-GISEL-NEXT: buffer_load_dwordx4 v[20:23], v[0:1], s[4:7], 0 addr64 offset:64
|
|
; GFX7-GISEL-NEXT: buffer_load_dwordx4 v[24:27], v[0:1], s[4:7], 0 addr64 offset:80
|
|
; GFX7-GISEL-NEXT: buffer_load_dwordx4 v[28:31], v[0:1], s[4:7], 0 addr64 offset:96
|
|
; GFX7-GISEL-NEXT: buffer_load_dwordx2 v[0:1], v[0:1], s[4:7], 0 addr64 offset:112
|
|
; GFX7-GISEL-NEXT: s_waitcnt vmcnt(7)
|
|
; GFX7-GISEL-NEXT: buffer_store_dwordx4 v[4:7], v[2:3], s[4:7], 0 addr64
|
|
; GFX7-GISEL-NEXT: s_waitcnt vmcnt(7)
|
|
; GFX7-GISEL-NEXT: buffer_store_dwordx4 v[8:11], v[2:3], s[4:7], 0 addr64 offset:16
|
|
; GFX7-GISEL-NEXT: s_waitcnt vmcnt(7)
|
|
; GFX7-GISEL-NEXT: buffer_store_dwordx4 v[12:15], v[2:3], s[4:7], 0 addr64 offset:32
|
|
; GFX7-GISEL-NEXT: s_waitcnt vmcnt(7)
|
|
; GFX7-GISEL-NEXT: buffer_store_dwordx4 v[16:19], v[2:3], s[4:7], 0 addr64 offset:48
|
|
; GFX7-GISEL-NEXT: s_waitcnt vmcnt(7)
|
|
; GFX7-GISEL-NEXT: buffer_store_dwordx4 v[20:23], v[2:3], s[4:7], 0 addr64 offset:64
|
|
; GFX7-GISEL-NEXT: s_waitcnt vmcnt(7)
|
|
; GFX7-GISEL-NEXT: buffer_store_dwordx4 v[24:27], v[2:3], s[4:7], 0 addr64 offset:80
|
|
; GFX7-GISEL-NEXT: s_waitcnt vmcnt(7)
|
|
; GFX7-GISEL-NEXT: buffer_store_dwordx4 v[28:31], v[2:3], s[4:7], 0 addr64 offset:96
|
|
; GFX7-GISEL-NEXT: s_waitcnt vmcnt(7)
|
|
; GFX7-GISEL-NEXT: buffer_store_dwordx2 v[0:1], v[2:3], s[4:7], 0 addr64 offset:112
|
|
; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX7-GISEL-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX8-GISEL-LABEL: freeze_v30i32:
|
|
; GFX8-GISEL: ; %bb.0:
|
|
; GFX8-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX8-GISEL-NEXT: v_add_u32_e32 v4, vcc, 16, v0
|
|
; GFX8-GISEL-NEXT: v_addc_u32_e32 v5, vcc, 0, v1, vcc
|
|
; GFX8-GISEL-NEXT: v_add_u32_e32 v12, vcc, 32, v0
|
|
; GFX8-GISEL-NEXT: v_addc_u32_e32 v13, vcc, 0, v1, vcc
|
|
; GFX8-GISEL-NEXT: v_add_u32_e32 v16, vcc, 48, v0
|
|
; GFX8-GISEL-NEXT: flat_load_dwordx4 v[4:7], v[4:5]
|
|
; GFX8-GISEL-NEXT: flat_load_dwordx4 v[8:11], v[0:1]
|
|
; GFX8-GISEL-NEXT: v_addc_u32_e32 v17, vcc, 0, v1, vcc
|
|
; GFX8-GISEL-NEXT: v_add_u32_e32 v20, vcc, 64, v0
|
|
; GFX8-GISEL-NEXT: v_addc_u32_e32 v21, vcc, 0, v1, vcc
|
|
; GFX8-GISEL-NEXT: v_mov_b32_e32 v34, 0x50
|
|
; GFX8-GISEL-NEXT: v_add_u32_e32 v24, vcc, v0, v34
|
|
; GFX8-GISEL-NEXT: v_addc_u32_e32 v25, vcc, 0, v1, vcc
|
|
; GFX8-GISEL-NEXT: v_mov_b32_e32 v14, 0x60
|
|
; GFX8-GISEL-NEXT: v_add_u32_e32 v28, vcc, v0, v14
|
|
; GFX8-GISEL-NEXT: v_addc_u32_e32 v29, vcc, 0, v1, vcc
|
|
; GFX8-GISEL-NEXT: v_mov_b32_e32 v14, 0x70
|
|
; GFX8-GISEL-NEXT: v_add_u32_e32 v0, vcc, v0, v14
|
|
; GFX8-GISEL-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc
|
|
; GFX8-GISEL-NEXT: flat_load_dwordx4 v[12:15], v[12:13]
|
|
; GFX8-GISEL-NEXT: flat_load_dwordx4 v[16:19], v[16:17]
|
|
; GFX8-GISEL-NEXT: flat_load_dwordx4 v[20:23], v[20:21]
|
|
; GFX8-GISEL-NEXT: flat_load_dwordx4 v[24:27], v[24:25]
|
|
; GFX8-GISEL-NEXT: flat_load_dwordx4 v[28:31], v[28:29]
|
|
; GFX8-GISEL-NEXT: flat_load_dwordx2 v[0:1], v[0:1]
|
|
; GFX8-GISEL-NEXT: v_add_u32_e32 v32, vcc, 16, v2
|
|
; GFX8-GISEL-NEXT: v_addc_u32_e32 v33, vcc, 0, v3, vcc
|
|
; GFX8-GISEL-NEXT: s_waitcnt vmcnt(7)
|
|
; GFX8-GISEL-NEXT: flat_store_dwordx4 v[32:33], v[4:7]
|
|
; GFX8-GISEL-NEXT: s_nop 0
|
|
; GFX8-GISEL-NEXT: v_add_u32_e32 v4, vcc, 32, v2
|
|
; GFX8-GISEL-NEXT: v_addc_u32_e32 v5, vcc, 0, v3, vcc
|
|
; GFX8-GISEL-NEXT: v_add_u32_e32 v6, vcc, 48, v2
|
|
; GFX8-GISEL-NEXT: v_addc_u32_e32 v7, vcc, 0, v3, vcc
|
|
; GFX8-GISEL-NEXT: v_add_u32_e32 v32, vcc, v2, v34
|
|
; GFX8-GISEL-NEXT: v_add_u32_e64 v34, s[4:5], 64, v2
|
|
; GFX8-GISEL-NEXT: v_addc_u32_e64 v35, s[4:5], 0, v3, s[4:5]
|
|
; GFX8-GISEL-NEXT: s_mov_b64 s[4:5], vcc
|
|
; GFX8-GISEL-NEXT: s_waitcnt vmcnt(7)
|
|
; GFX8-GISEL-NEXT: flat_store_dwordx4 v[2:3], v[8:11]
|
|
; GFX8-GISEL-NEXT: v_addc_u32_e64 v33, s[4:5], 0, v3, s[4:5]
|
|
; GFX8-GISEL-NEXT: v_add_u32_e32 v8, vcc, 0x60, v2
|
|
; GFX8-GISEL-NEXT: s_mov_b64 s[4:5], vcc
|
|
; GFX8-GISEL-NEXT: v_add_u32_e32 v2, vcc, 0x70, v2
|
|
; GFX8-GISEL-NEXT: v_addc_u32_e64 v9, s[4:5], 0, v3, s[4:5]
|
|
; GFX8-GISEL-NEXT: v_addc_u32_e32 v3, vcc, 0, v3, vcc
|
|
; GFX8-GISEL-NEXT: s_waitcnt vmcnt(7)
|
|
; GFX8-GISEL-NEXT: flat_store_dwordx4 v[4:5], v[12:15]
|
|
; GFX8-GISEL-NEXT: s_waitcnt vmcnt(7)
|
|
; GFX8-GISEL-NEXT: flat_store_dwordx4 v[6:7], v[16:19]
|
|
; GFX8-GISEL-NEXT: s_waitcnt vmcnt(7)
|
|
; GFX8-GISEL-NEXT: flat_store_dwordx4 v[34:35], v[20:23]
|
|
; GFX8-GISEL-NEXT: s_waitcnt vmcnt(7)
|
|
; GFX8-GISEL-NEXT: flat_store_dwordx4 v[32:33], v[24:27]
|
|
; GFX8-GISEL-NEXT: s_waitcnt vmcnt(7)
|
|
; GFX8-GISEL-NEXT: flat_store_dwordx4 v[8:9], v[28:31]
|
|
; GFX8-GISEL-NEXT: s_waitcnt vmcnt(7)
|
|
; GFX8-GISEL-NEXT: flat_store_dwordx2 v[2:3], v[0:1]
|
|
; GFX8-GISEL-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX8-GISEL-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX9-GISEL-LABEL: freeze_v30i32:
|
|
; GFX9-GISEL: ; %bb.0:
|
|
; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX9-GISEL-NEXT: global_load_dwordx4 v[4:7], v[0:1], off
|
|
; GFX9-GISEL-NEXT: global_load_dwordx4 v[8:11], v[0:1], off offset:16
|
|
; GFX9-GISEL-NEXT: global_load_dwordx4 v[12:15], v[0:1], off offset:32
|
|
; GFX9-GISEL-NEXT: global_load_dwordx4 v[16:19], v[0:1], off offset:48
|
|
; GFX9-GISEL-NEXT: global_load_dwordx4 v[20:23], v[0:1], off offset:64
|
|
; GFX9-GISEL-NEXT: global_load_dwordx4 v[24:27], v[0:1], off offset:80
|
|
; GFX9-GISEL-NEXT: global_load_dwordx4 v[28:31], v[0:1], off offset:96
|
|
; GFX9-GISEL-NEXT: s_nop 0
|
|
; GFX9-GISEL-NEXT: global_load_dwordx2 v[0:1], v[0:1], off offset:112
|
|
; GFX9-GISEL-NEXT: s_waitcnt vmcnt(7)
|
|
; GFX9-GISEL-NEXT: global_store_dwordx4 v[2:3], v[4:7], off
|
|
; GFX9-GISEL-NEXT: s_waitcnt vmcnt(7)
|
|
; GFX9-GISEL-NEXT: global_store_dwordx4 v[2:3], v[8:11], off offset:16
|
|
; GFX9-GISEL-NEXT: s_waitcnt vmcnt(7)
|
|
; GFX9-GISEL-NEXT: global_store_dwordx4 v[2:3], v[12:15], off offset:32
|
|
; GFX9-GISEL-NEXT: s_waitcnt vmcnt(7)
|
|
; GFX9-GISEL-NEXT: global_store_dwordx4 v[2:3], v[16:19], off offset:48
|
|
; GFX9-GISEL-NEXT: s_waitcnt vmcnt(7)
|
|
; GFX9-GISEL-NEXT: global_store_dwordx4 v[2:3], v[20:23], off offset:64
|
|
; GFX9-GISEL-NEXT: s_waitcnt vmcnt(7)
|
|
; GFX9-GISEL-NEXT: global_store_dwordx4 v[2:3], v[24:27], off offset:80
|
|
; GFX9-GISEL-NEXT: s_waitcnt vmcnt(7)
|
|
; GFX9-GISEL-NEXT: global_store_dwordx4 v[2:3], v[28:31], off offset:96
|
|
; GFX9-GISEL-NEXT: s_waitcnt vmcnt(7)
|
|
; GFX9-GISEL-NEXT: global_store_dwordx2 v[2:3], v[0:1], off offset:112
|
|
; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX10-SDAG-LABEL: freeze_v30i32:
|
|
; GFX10-SDAG: ; %bb.0:
|
|
; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX10-SDAG-NEXT: s_clause 0x7
|
|
; GFX10-SDAG-NEXT: global_load_dwordx4 v[4:7], v[0:1], off offset:96
|
|
; GFX10-SDAG-NEXT: global_load_dwordx2 v[32:33], v[0:1], off offset:112
|
|
; GFX10-SDAG-NEXT: global_load_dwordx4 v[8:11], v[0:1], off offset:64
|
|
; GFX10-SDAG-NEXT: global_load_dwordx4 v[12:15], v[0:1], off offset:80
|
|
; GFX10-SDAG-NEXT: global_load_dwordx4 v[16:19], v[0:1], off offset:32
|
|
; GFX10-SDAG-NEXT: global_load_dwordx4 v[20:23], v[0:1], off offset:48
|
|
; GFX10-SDAG-NEXT: global_load_dwordx4 v[24:27], v[0:1], off
|
|
; GFX10-SDAG-NEXT: global_load_dwordx4 v[28:31], v[0:1], off offset:16
|
|
; GFX10-SDAG-NEXT: s_waitcnt vmcnt(7)
|
|
; GFX10-SDAG-NEXT: global_store_dwordx4 v[2:3], v[4:7], off offset:96
|
|
; GFX10-SDAG-NEXT: s_waitcnt vmcnt(6)
|
|
; GFX10-SDAG-NEXT: global_store_dwordx2 v[2:3], v[32:33], off offset:112
|
|
; GFX10-SDAG-NEXT: s_waitcnt vmcnt(5)
|
|
; GFX10-SDAG-NEXT: global_store_dwordx4 v[2:3], v[8:11], off offset:64
|
|
; GFX10-SDAG-NEXT: s_waitcnt vmcnt(4)
|
|
; GFX10-SDAG-NEXT: global_store_dwordx4 v[2:3], v[12:15], off offset:80
|
|
; GFX10-SDAG-NEXT: s_waitcnt vmcnt(3)
|
|
; GFX10-SDAG-NEXT: global_store_dwordx4 v[2:3], v[16:19], off offset:32
|
|
; GFX10-SDAG-NEXT: s_waitcnt vmcnt(2)
|
|
; GFX10-SDAG-NEXT: global_store_dwordx4 v[2:3], v[20:23], off offset:48
|
|
; GFX10-SDAG-NEXT: s_waitcnt vmcnt(1)
|
|
; GFX10-SDAG-NEXT: global_store_dwordx4 v[2:3], v[24:27], off
|
|
; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX10-SDAG-NEXT: global_store_dwordx4 v[2:3], v[28:31], off offset:16
|
|
; GFX10-SDAG-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX10-GISEL-LABEL: freeze_v30i32:
|
|
; GFX10-GISEL: ; %bb.0:
|
|
; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX10-GISEL-NEXT: s_clause 0x7
|
|
; GFX10-GISEL-NEXT: global_load_dwordx4 v[4:7], v[0:1], off
|
|
; GFX10-GISEL-NEXT: global_load_dwordx4 v[8:11], v[0:1], off offset:16
|
|
; GFX10-GISEL-NEXT: global_load_dwordx4 v[12:15], v[0:1], off offset:32
|
|
; GFX10-GISEL-NEXT: global_load_dwordx4 v[16:19], v[0:1], off offset:48
|
|
; GFX10-GISEL-NEXT: global_load_dwordx4 v[20:23], v[0:1], off offset:64
|
|
; GFX10-GISEL-NEXT: global_load_dwordx4 v[24:27], v[0:1], off offset:80
|
|
; GFX10-GISEL-NEXT: global_load_dwordx4 v[28:31], v[0:1], off offset:96
|
|
; GFX10-GISEL-NEXT: global_load_dwordx2 v[32:33], v[0:1], off offset:112
|
|
; GFX10-GISEL-NEXT: s_waitcnt vmcnt(7)
|
|
; GFX10-GISEL-NEXT: global_store_dwordx4 v[2:3], v[4:7], off
|
|
; GFX10-GISEL-NEXT: s_waitcnt vmcnt(6)
|
|
; GFX10-GISEL-NEXT: global_store_dwordx4 v[2:3], v[8:11], off offset:16
|
|
; GFX10-GISEL-NEXT: s_waitcnt vmcnt(5)
|
|
; GFX10-GISEL-NEXT: global_store_dwordx4 v[2:3], v[12:15], off offset:32
|
|
; GFX10-GISEL-NEXT: s_waitcnt vmcnt(4)
|
|
; GFX10-GISEL-NEXT: global_store_dwordx4 v[2:3], v[16:19], off offset:48
|
|
; GFX10-GISEL-NEXT: s_waitcnt vmcnt(3)
|
|
; GFX10-GISEL-NEXT: global_store_dwordx4 v[2:3], v[20:23], off offset:64
|
|
; GFX10-GISEL-NEXT: s_waitcnt vmcnt(2)
|
|
; GFX10-GISEL-NEXT: global_store_dwordx4 v[2:3], v[24:27], off offset:80
|
|
; GFX10-GISEL-NEXT: s_waitcnt vmcnt(1)
|
|
; GFX10-GISEL-NEXT: global_store_dwordx4 v[2:3], v[28:31], off offset:96
|
|
; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX10-GISEL-NEXT: global_store_dwordx2 v[2:3], v[32:33], off offset:112
|
|
; GFX10-GISEL-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX11-SDAG-LABEL: freeze_v30i32:
|
|
; GFX11-SDAG: ; %bb.0:
|
|
; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX11-SDAG-NEXT: s_clause 0x7
|
|
; GFX11-SDAG-NEXT: global_load_b128 v[4:7], v[0:1], off offset:96
|
|
; GFX11-SDAG-NEXT: global_load_b64 v[32:33], v[0:1], off offset:112
|
|
; GFX11-SDAG-NEXT: global_load_b128 v[8:11], v[0:1], off offset:64
|
|
; GFX11-SDAG-NEXT: global_load_b128 v[12:15], v[0:1], off offset:80
|
|
; GFX11-SDAG-NEXT: global_load_b128 v[16:19], v[0:1], off offset:32
|
|
; GFX11-SDAG-NEXT: global_load_b128 v[20:23], v[0:1], off offset:48
|
|
; GFX11-SDAG-NEXT: global_load_b128 v[24:27], v[0:1], off
|
|
; GFX11-SDAG-NEXT: global_load_b128 v[28:31], v[0:1], off offset:16
|
|
; GFX11-SDAG-NEXT: s_waitcnt vmcnt(7)
|
|
; GFX11-SDAG-NEXT: global_store_b128 v[2:3], v[4:7], off offset:96
|
|
; GFX11-SDAG-NEXT: s_waitcnt vmcnt(6)
|
|
; GFX11-SDAG-NEXT: global_store_b64 v[2:3], v[32:33], off offset:112
|
|
; GFX11-SDAG-NEXT: s_waitcnt vmcnt(5)
|
|
; GFX11-SDAG-NEXT: global_store_b128 v[2:3], v[8:11], off offset:64
|
|
; GFX11-SDAG-NEXT: s_waitcnt vmcnt(4)
|
|
; GFX11-SDAG-NEXT: global_store_b128 v[2:3], v[12:15], off offset:80
|
|
; GFX11-SDAG-NEXT: s_waitcnt vmcnt(3)
|
|
; GFX11-SDAG-NEXT: global_store_b128 v[2:3], v[16:19], off offset:32
|
|
; GFX11-SDAG-NEXT: s_waitcnt vmcnt(2)
|
|
; GFX11-SDAG-NEXT: global_store_b128 v[2:3], v[20:23], off offset:48
|
|
; GFX11-SDAG-NEXT: s_waitcnt vmcnt(1)
|
|
; GFX11-SDAG-NEXT: global_store_b128 v[2:3], v[24:27], off
|
|
; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX11-SDAG-NEXT: global_store_b128 v[2:3], v[28:31], off offset:16
|
|
; GFX11-SDAG-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX11-GISEL-LABEL: freeze_v30i32:
|
|
; GFX11-GISEL: ; %bb.0:
|
|
; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX11-GISEL-NEXT: s_clause 0x7
|
|
; GFX11-GISEL-NEXT: global_load_b128 v[4:7], v[0:1], off
|
|
; GFX11-GISEL-NEXT: global_load_b128 v[8:11], v[0:1], off offset:16
|
|
; GFX11-GISEL-NEXT: global_load_b128 v[12:15], v[0:1], off offset:32
|
|
; GFX11-GISEL-NEXT: global_load_b128 v[16:19], v[0:1], off offset:48
|
|
; GFX11-GISEL-NEXT: global_load_b128 v[20:23], v[0:1], off offset:64
|
|
; GFX11-GISEL-NEXT: global_load_b128 v[24:27], v[0:1], off offset:80
|
|
; GFX11-GISEL-NEXT: global_load_b128 v[28:31], v[0:1], off offset:96
|
|
; GFX11-GISEL-NEXT: global_load_b64 v[0:1], v[0:1], off offset:112
|
|
; GFX11-GISEL-NEXT: s_waitcnt vmcnt(7)
|
|
; GFX11-GISEL-NEXT: global_store_b128 v[2:3], v[4:7], off
|
|
; GFX11-GISEL-NEXT: s_waitcnt vmcnt(6)
|
|
; GFX11-GISEL-NEXT: global_store_b128 v[2:3], v[8:11], off offset:16
|
|
; GFX11-GISEL-NEXT: s_waitcnt vmcnt(5)
|
|
; GFX11-GISEL-NEXT: global_store_b128 v[2:3], v[12:15], off offset:32
|
|
; GFX11-GISEL-NEXT: s_waitcnt vmcnt(4)
|
|
; GFX11-GISEL-NEXT: global_store_b128 v[2:3], v[16:19], off offset:48
|
|
; GFX11-GISEL-NEXT: s_waitcnt vmcnt(3)
|
|
; GFX11-GISEL-NEXT: global_store_b128 v[2:3], v[20:23], off offset:64
|
|
; GFX11-GISEL-NEXT: s_waitcnt vmcnt(2)
|
|
; GFX11-GISEL-NEXT: global_store_b128 v[2:3], v[24:27], off offset:80
|
|
; GFX11-GISEL-NEXT: s_waitcnt vmcnt(1)
|
|
; GFX11-GISEL-NEXT: global_store_b128 v[2:3], v[28:31], off offset:96
|
|
; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX11-GISEL-NEXT: global_store_b64 v[2:3], v[0:1], off offset:112
|
|
; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31]
|
|
%a = load <30 x i32>, ptr addrspace(1) %ptra, align 4
|
|
%freeze = freeze <30 x i32> %a
|
|
store <30 x i32> %freeze, ptr addrspace(1) %ptrb, align 4
|
|
ret void
|
|
}
|
|
|
|
define void @freeze_v31i32(ptr addrspace(1) %ptra, ptr addrspace(1) %ptrb) {
|
|
; GFX6-SDAG-LABEL: freeze_v31i32:
|
|
; GFX6-SDAG: ; %bb.0:
|
|
; GFX6-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX6-SDAG-NEXT: s_mov_b32 s6, 0
|
|
; GFX6-SDAG-NEXT: s_mov_b32 s7, 0xf000
|
|
; GFX6-SDAG-NEXT: s_mov_b32 s4, s6
|
|
; GFX6-SDAG-NEXT: s_mov_b32 s5, s6
|
|
; GFX6-SDAG-NEXT: buffer_load_dwordx4 v[4:7], v[0:1], s[4:7], 0 addr64 offset:96
|
|
; GFX6-SDAG-NEXT: buffer_load_dwordx4 v[8:11], v[0:1], s[4:7], 0 addr64 offset:64
|
|
; GFX6-SDAG-NEXT: buffer_load_dwordx4 v[12:15], v[0:1], s[4:7], 0 addr64 offset:80
|
|
; GFX6-SDAG-NEXT: buffer_load_dword v34, v[0:1], s[4:7], 0 addr64 offset:120
|
|
; GFX6-SDAG-NEXT: buffer_load_dwordx2 v[32:33], v[0:1], s[4:7], 0 addr64 offset:112
|
|
; GFX6-SDAG-NEXT: buffer_load_dwordx4 v[16:19], v[0:1], s[4:7], 0 addr64 offset:32
|
|
; GFX6-SDAG-NEXT: buffer_load_dwordx4 v[20:23], v[0:1], s[4:7], 0 addr64 offset:48
|
|
; GFX6-SDAG-NEXT: buffer_load_dwordx4 v[24:27], v[0:1], s[4:7], 0 addr64
|
|
; GFX6-SDAG-NEXT: buffer_load_dwordx4 v[28:31], v[0:1], s[4:7], 0 addr64 offset:16
|
|
; GFX6-SDAG-NEXT: s_waitcnt vmcnt(8)
|
|
; GFX6-SDAG-NEXT: buffer_store_dwordx4 v[4:7], v[2:3], s[4:7], 0 addr64 offset:96
|
|
; GFX6-SDAG-NEXT: s_waitcnt vmcnt(8)
|
|
; GFX6-SDAG-NEXT: buffer_store_dwordx4 v[8:11], v[2:3], s[4:7], 0 addr64 offset:64
|
|
; GFX6-SDAG-NEXT: s_waitcnt vmcnt(8)
|
|
; GFX6-SDAG-NEXT: buffer_store_dwordx4 v[12:15], v[2:3], s[4:7], 0 addr64 offset:80
|
|
; GFX6-SDAG-NEXT: s_waitcnt vmcnt(8)
|
|
; GFX6-SDAG-NEXT: buffer_store_dword v34, v[2:3], s[4:7], 0 addr64 offset:120
|
|
; GFX6-SDAG-NEXT: s_waitcnt vmcnt(8)
|
|
; GFX6-SDAG-NEXT: buffer_store_dwordx2 v[32:33], v[2:3], s[4:7], 0 addr64 offset:112
|
|
; GFX6-SDAG-NEXT: s_waitcnt vmcnt(8)
|
|
; GFX6-SDAG-NEXT: buffer_store_dwordx4 v[16:19], v[2:3], s[4:7], 0 addr64 offset:32
|
|
; GFX6-SDAG-NEXT: s_waitcnt vmcnt(8)
|
|
; GFX6-SDAG-NEXT: buffer_store_dwordx4 v[20:23], v[2:3], s[4:7], 0 addr64 offset:48
|
|
; GFX6-SDAG-NEXT: s_waitcnt vmcnt(8)
|
|
; GFX6-SDAG-NEXT: buffer_store_dwordx4 v[24:27], v[2:3], s[4:7], 0 addr64
|
|
; GFX6-SDAG-NEXT: s_waitcnt vmcnt(8)
|
|
; GFX6-SDAG-NEXT: buffer_store_dwordx4 v[28:31], v[2:3], s[4:7], 0 addr64 offset:16
|
|
; GFX6-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0)
|
|
; GFX6-SDAG-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX6-GISEL-LABEL: freeze_v31i32:
|
|
; GFX6-GISEL: ; %bb.0:
|
|
; GFX6-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX6-GISEL-NEXT: s_mov_b32 s6, 0
|
|
; GFX6-GISEL-NEXT: s_mov_b32 s7, 0xf000
|
|
; GFX6-GISEL-NEXT: s_mov_b64 s[4:5], 0
|
|
; GFX6-GISEL-NEXT: buffer_load_dwordx4 v[4:7], v[0:1], s[4:7], 0 addr64
|
|
; GFX6-GISEL-NEXT: buffer_load_dwordx4 v[8:11], v[0:1], s[4:7], 0 addr64 offset:16
|
|
; GFX6-GISEL-NEXT: buffer_load_dword v34, v[0:1], s[4:7], 0 addr64 offset:120
|
|
; GFX6-GISEL-NEXT: buffer_load_dwordx4 v[12:15], v[0:1], s[4:7], 0 addr64 offset:32
|
|
; GFX6-GISEL-NEXT: buffer_load_dwordx4 v[16:19], v[0:1], s[4:7], 0 addr64 offset:48
|
|
; GFX6-GISEL-NEXT: buffer_load_dwordx4 v[20:23], v[0:1], s[4:7], 0 addr64 offset:64
|
|
; GFX6-GISEL-NEXT: buffer_load_dwordx4 v[24:27], v[0:1], s[4:7], 0 addr64 offset:80
|
|
; GFX6-GISEL-NEXT: buffer_load_dwordx2 v[32:33], v[0:1], s[4:7], 0 addr64 offset:112
|
|
; GFX6-GISEL-NEXT: buffer_load_dwordx4 v[28:31], v[0:1], s[4:7], 0 addr64 offset:96
|
|
; GFX6-GISEL-NEXT: s_waitcnt vmcnt(8)
|
|
; GFX6-GISEL-NEXT: buffer_store_dwordx4 v[4:7], v[2:3], s[4:7], 0 addr64
|
|
; GFX6-GISEL-NEXT: s_waitcnt vmcnt(8)
|
|
; GFX6-GISEL-NEXT: buffer_store_dwordx4 v[8:11], v[2:3], s[4:7], 0 addr64 offset:16
|
|
; GFX6-GISEL-NEXT: s_waitcnt vmcnt(7)
|
|
; GFX6-GISEL-NEXT: buffer_store_dwordx4 v[12:15], v[2:3], s[4:7], 0 addr64 offset:32
|
|
; GFX6-GISEL-NEXT: s_waitcnt vmcnt(7)
|
|
; GFX6-GISEL-NEXT: buffer_store_dwordx4 v[16:19], v[2:3], s[4:7], 0 addr64 offset:48
|
|
; GFX6-GISEL-NEXT: s_waitcnt vmcnt(7)
|
|
; GFX6-GISEL-NEXT: buffer_store_dwordx4 v[20:23], v[2:3], s[4:7], 0 addr64 offset:64
|
|
; GFX6-GISEL-NEXT: s_waitcnt vmcnt(7)
|
|
; GFX6-GISEL-NEXT: buffer_store_dwordx4 v[24:27], v[2:3], s[4:7], 0 addr64 offset:80
|
|
; GFX6-GISEL-NEXT: s_waitcnt vmcnt(6)
|
|
; GFX6-GISEL-NEXT: buffer_store_dwordx4 v[28:31], v[2:3], s[4:7], 0 addr64 offset:96
|
|
; GFX6-GISEL-NEXT: buffer_store_dwordx2 v[32:33], v[2:3], s[4:7], 0 addr64 offset:112
|
|
; GFX6-GISEL-NEXT: buffer_store_dword v34, v[2:3], s[4:7], 0 addr64 offset:120
|
|
; GFX6-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0)
|
|
; GFX6-GISEL-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX7-SDAG-LABEL: freeze_v31i32:
|
|
; GFX7-SDAG: ; %bb.0:
|
|
; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX7-SDAG-NEXT: s_mov_b32 s6, 0
|
|
; GFX7-SDAG-NEXT: s_mov_b32 s7, 0xf000
|
|
; GFX7-SDAG-NEXT: s_mov_b32 s4, s6
|
|
; GFX7-SDAG-NEXT: s_mov_b32 s5, s6
|
|
; GFX7-SDAG-NEXT: buffer_load_dwordx4 v[4:7], v[0:1], s[4:7], 0 addr64 offset:96
|
|
; GFX7-SDAG-NEXT: buffer_load_dwordx4 v[8:11], v[0:1], s[4:7], 0 addr64 offset:64
|
|
; GFX7-SDAG-NEXT: buffer_load_dwordx4 v[12:15], v[0:1], s[4:7], 0 addr64 offset:80
|
|
; GFX7-SDAG-NEXT: buffer_load_dword v34, v[0:1], s[4:7], 0 addr64 offset:120
|
|
; GFX7-SDAG-NEXT: buffer_load_dwordx2 v[32:33], v[0:1], s[4:7], 0 addr64 offset:112
|
|
; GFX7-SDAG-NEXT: buffer_load_dwordx4 v[16:19], v[0:1], s[4:7], 0 addr64 offset:32
|
|
; GFX7-SDAG-NEXT: buffer_load_dwordx4 v[20:23], v[0:1], s[4:7], 0 addr64 offset:48
|
|
; GFX7-SDAG-NEXT: buffer_load_dwordx4 v[24:27], v[0:1], s[4:7], 0 addr64
|
|
; GFX7-SDAG-NEXT: buffer_load_dwordx4 v[28:31], v[0:1], s[4:7], 0 addr64 offset:16
|
|
; GFX7-SDAG-NEXT: s_waitcnt vmcnt(8)
|
|
; GFX7-SDAG-NEXT: buffer_store_dwordx4 v[4:7], v[2:3], s[4:7], 0 addr64 offset:96
|
|
; GFX7-SDAG-NEXT: s_waitcnt vmcnt(8)
|
|
; GFX7-SDAG-NEXT: buffer_store_dwordx4 v[8:11], v[2:3], s[4:7], 0 addr64 offset:64
|
|
; GFX7-SDAG-NEXT: s_waitcnt vmcnt(8)
|
|
; GFX7-SDAG-NEXT: buffer_store_dwordx4 v[12:15], v[2:3], s[4:7], 0 addr64 offset:80
|
|
; GFX7-SDAG-NEXT: s_waitcnt vmcnt(8)
|
|
; GFX7-SDAG-NEXT: buffer_store_dword v34, v[2:3], s[4:7], 0 addr64 offset:120
|
|
; GFX7-SDAG-NEXT: s_waitcnt vmcnt(8)
|
|
; GFX7-SDAG-NEXT: buffer_store_dwordx2 v[32:33], v[2:3], s[4:7], 0 addr64 offset:112
|
|
; GFX7-SDAG-NEXT: s_waitcnt vmcnt(8)
|
|
; GFX7-SDAG-NEXT: buffer_store_dwordx4 v[16:19], v[2:3], s[4:7], 0 addr64 offset:32
|
|
; GFX7-SDAG-NEXT: s_waitcnt vmcnt(8)
|
|
; GFX7-SDAG-NEXT: buffer_store_dwordx4 v[20:23], v[2:3], s[4:7], 0 addr64 offset:48
|
|
; GFX7-SDAG-NEXT: s_waitcnt vmcnt(8)
|
|
; GFX7-SDAG-NEXT: buffer_store_dwordx4 v[24:27], v[2:3], s[4:7], 0 addr64
|
|
; GFX7-SDAG-NEXT: s_waitcnt vmcnt(8)
|
|
; GFX7-SDAG-NEXT: buffer_store_dwordx4 v[28:31], v[2:3], s[4:7], 0 addr64 offset:16
|
|
; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX7-SDAG-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX7-GISEL-LABEL: freeze_v31i32:
|
|
; GFX7-GISEL: ; %bb.0:
|
|
; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX7-GISEL-NEXT: s_mov_b32 s6, 0
|
|
; GFX7-GISEL-NEXT: s_mov_b32 s7, 0xf000
|
|
; GFX7-GISEL-NEXT: s_mov_b64 s[4:5], 0
|
|
; GFX7-GISEL-NEXT: buffer_load_dwordx4 v[4:7], v[0:1], s[4:7], 0 addr64
|
|
; GFX7-GISEL-NEXT: buffer_load_dwordx4 v[8:11], v[0:1], s[4:7], 0 addr64 offset:16
|
|
; GFX7-GISEL-NEXT: buffer_load_dwordx4 v[12:15], v[0:1], s[4:7], 0 addr64 offset:32
|
|
; GFX7-GISEL-NEXT: buffer_load_dwordx4 v[16:19], v[0:1], s[4:7], 0 addr64 offset:48
|
|
; GFX7-GISEL-NEXT: buffer_load_dwordx4 v[20:23], v[0:1], s[4:7], 0 addr64 offset:64
|
|
; GFX7-GISEL-NEXT: buffer_load_dwordx4 v[24:27], v[0:1], s[4:7], 0 addr64 offset:80
|
|
; GFX7-GISEL-NEXT: buffer_load_dwordx4 v[28:31], v[0:1], s[4:7], 0 addr64 offset:96
|
|
; GFX7-GISEL-NEXT: buffer_load_dwordx3 v[32:34], v[0:1], s[4:7], 0 addr64 offset:112
|
|
; GFX7-GISEL-NEXT: s_waitcnt vmcnt(7)
|
|
; GFX7-GISEL-NEXT: buffer_store_dwordx4 v[4:7], v[2:3], s[4:7], 0 addr64
|
|
; GFX7-GISEL-NEXT: s_waitcnt vmcnt(7)
|
|
; GFX7-GISEL-NEXT: buffer_store_dwordx4 v[8:11], v[2:3], s[4:7], 0 addr64 offset:16
|
|
; GFX7-GISEL-NEXT: s_waitcnt vmcnt(7)
|
|
; GFX7-GISEL-NEXT: buffer_store_dwordx4 v[12:15], v[2:3], s[4:7], 0 addr64 offset:32
|
|
; GFX7-GISEL-NEXT: s_waitcnt vmcnt(7)
|
|
; GFX7-GISEL-NEXT: buffer_store_dwordx4 v[16:19], v[2:3], s[4:7], 0 addr64 offset:48
|
|
; GFX7-GISEL-NEXT: s_waitcnt vmcnt(7)
|
|
; GFX7-GISEL-NEXT: buffer_store_dwordx4 v[20:23], v[2:3], s[4:7], 0 addr64 offset:64
|
|
; GFX7-GISEL-NEXT: s_waitcnt vmcnt(7)
|
|
; GFX7-GISEL-NEXT: buffer_store_dwordx4 v[24:27], v[2:3], s[4:7], 0 addr64 offset:80
|
|
; GFX7-GISEL-NEXT: s_waitcnt vmcnt(7)
|
|
; GFX7-GISEL-NEXT: buffer_store_dwordx4 v[28:31], v[2:3], s[4:7], 0 addr64 offset:96
|
|
; GFX7-GISEL-NEXT: s_waitcnt vmcnt(7)
|
|
; GFX7-GISEL-NEXT: buffer_store_dwordx3 v[32:34], v[2:3], s[4:7], 0 addr64 offset:112
|
|
; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX7-GISEL-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX8-GISEL-LABEL: freeze_v31i32:
|
|
; GFX8-GISEL: ; %bb.0:
|
|
; GFX8-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX8-GISEL-NEXT: v_add_u32_e32 v4, vcc, 16, v0
|
|
; GFX8-GISEL-NEXT: v_addc_u32_e32 v5, vcc, 0, v1, vcc
|
|
; GFX8-GISEL-NEXT: v_add_u32_e32 v12, vcc, 32, v0
|
|
; GFX8-GISEL-NEXT: v_addc_u32_e32 v13, vcc, 0, v1, vcc
|
|
; GFX8-GISEL-NEXT: v_add_u32_e32 v16, vcc, 48, v0
|
|
; GFX8-GISEL-NEXT: flat_load_dwordx4 v[4:7], v[4:5]
|
|
; GFX8-GISEL-NEXT: flat_load_dwordx4 v[8:11], v[0:1]
|
|
; GFX8-GISEL-NEXT: v_addc_u32_e32 v17, vcc, 0, v1, vcc
|
|
; GFX8-GISEL-NEXT: v_add_u32_e32 v20, vcc, 64, v0
|
|
; GFX8-GISEL-NEXT: v_addc_u32_e32 v21, vcc, 0, v1, vcc
|
|
; GFX8-GISEL-NEXT: v_mov_b32_e32 v35, 0x50
|
|
; GFX8-GISEL-NEXT: v_add_u32_e32 v24, vcc, v0, v35
|
|
; GFX8-GISEL-NEXT: v_addc_u32_e32 v25, vcc, 0, v1, vcc
|
|
; GFX8-GISEL-NEXT: v_mov_b32_e32 v14, 0x60
|
|
; GFX8-GISEL-NEXT: v_add_u32_e32 v28, vcc, v0, v14
|
|
; GFX8-GISEL-NEXT: v_addc_u32_e32 v29, vcc, 0, v1, vcc
|
|
; GFX8-GISEL-NEXT: v_mov_b32_e32 v14, 0x70
|
|
; GFX8-GISEL-NEXT: v_add_u32_e32 v0, vcc, v0, v14
|
|
; GFX8-GISEL-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc
|
|
; GFX8-GISEL-NEXT: flat_load_dwordx4 v[12:15], v[12:13]
|
|
; GFX8-GISEL-NEXT: flat_load_dwordx4 v[16:19], v[16:17]
|
|
; GFX8-GISEL-NEXT: flat_load_dwordx4 v[20:23], v[20:21]
|
|
; GFX8-GISEL-NEXT: flat_load_dwordx4 v[24:27], v[24:25]
|
|
; GFX8-GISEL-NEXT: flat_load_dwordx4 v[28:31], v[28:29]
|
|
; GFX8-GISEL-NEXT: flat_load_dwordx3 v[32:34], v[0:1]
|
|
; GFX8-GISEL-NEXT: v_add_u32_e32 v0, vcc, 16, v2
|
|
; GFX8-GISEL-NEXT: v_addc_u32_e32 v1, vcc, 0, v3, vcc
|
|
; GFX8-GISEL-NEXT: s_waitcnt vmcnt(7)
|
|
; GFX8-GISEL-NEXT: flat_store_dwordx4 v[0:1], v[4:7]
|
|
; GFX8-GISEL-NEXT: v_add_u32_e32 v0, vcc, 32, v2
|
|
; GFX8-GISEL-NEXT: v_addc_u32_e32 v1, vcc, 0, v3, vcc
|
|
; GFX8-GISEL-NEXT: v_add_u32_e32 v4, vcc, 48, v2
|
|
; GFX8-GISEL-NEXT: v_addc_u32_e32 v5, vcc, 0, v3, vcc
|
|
; GFX8-GISEL-NEXT: s_waitcnt vmcnt(7)
|
|
; GFX8-GISEL-NEXT: flat_store_dwordx4 v[2:3], v[8:11]
|
|
; GFX8-GISEL-NEXT: v_add_u32_e32 v6, vcc, v2, v35
|
|
; GFX8-GISEL-NEXT: v_add_u32_e64 v8, s[4:5], 64, v2
|
|
; GFX8-GISEL-NEXT: v_addc_u32_e64 v9, s[4:5], 0, v3, s[4:5]
|
|
; GFX8-GISEL-NEXT: s_mov_b64 s[4:5], vcc
|
|
; GFX8-GISEL-NEXT: v_add_u32_e32 v10, vcc, 0x60, v2
|
|
; GFX8-GISEL-NEXT: v_addc_u32_e32 v11, vcc, 0, v3, vcc
|
|
; GFX8-GISEL-NEXT: v_add_u32_e32 v2, vcc, 0x70, v2
|
|
; GFX8-GISEL-NEXT: v_addc_u32_e64 v7, s[4:5], 0, v3, s[4:5]
|
|
; GFX8-GISEL-NEXT: v_addc_u32_e32 v3, vcc, 0, v3, vcc
|
|
; GFX8-GISEL-NEXT: s_waitcnt vmcnt(7)
|
|
; GFX8-GISEL-NEXT: flat_store_dwordx4 v[0:1], v[12:15]
|
|
; GFX8-GISEL-NEXT: s_waitcnt vmcnt(7)
|
|
; GFX8-GISEL-NEXT: flat_store_dwordx4 v[4:5], v[16:19]
|
|
; GFX8-GISEL-NEXT: s_waitcnt vmcnt(7)
|
|
; GFX8-GISEL-NEXT: flat_store_dwordx4 v[8:9], v[20:23]
|
|
; GFX8-GISEL-NEXT: s_waitcnt vmcnt(7)
|
|
; GFX8-GISEL-NEXT: flat_store_dwordx4 v[6:7], v[24:27]
|
|
; GFX8-GISEL-NEXT: s_waitcnt vmcnt(7)
|
|
; GFX8-GISEL-NEXT: flat_store_dwordx4 v[10:11], v[28:31]
|
|
; GFX8-GISEL-NEXT: s_waitcnt vmcnt(7)
|
|
; GFX8-GISEL-NEXT: flat_store_dwordx3 v[2:3], v[32:34]
|
|
; GFX8-GISEL-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX8-GISEL-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX9-GISEL-LABEL: freeze_v31i32:
|
|
; GFX9-GISEL: ; %bb.0:
|
|
; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX9-GISEL-NEXT: global_load_dwordx4 v[4:7], v[0:1], off
|
|
; GFX9-GISEL-NEXT: global_load_dwordx4 v[8:11], v[0:1], off offset:16
|
|
; GFX9-GISEL-NEXT: global_load_dwordx4 v[12:15], v[0:1], off offset:32
|
|
; GFX9-GISEL-NEXT: global_load_dwordx4 v[16:19], v[0:1], off offset:48
|
|
; GFX9-GISEL-NEXT: global_load_dwordx4 v[20:23], v[0:1], off offset:64
|
|
; GFX9-GISEL-NEXT: global_load_dwordx4 v[24:27], v[0:1], off offset:80
|
|
; GFX9-GISEL-NEXT: global_load_dwordx4 v[28:31], v[0:1], off offset:96
|
|
; GFX9-GISEL-NEXT: global_load_dwordx3 v[32:34], v[0:1], off offset:112
|
|
; GFX9-GISEL-NEXT: s_waitcnt vmcnt(7)
|
|
; GFX9-GISEL-NEXT: global_store_dwordx4 v[2:3], v[4:7], off
|
|
; GFX9-GISEL-NEXT: s_waitcnt vmcnt(7)
|
|
; GFX9-GISEL-NEXT: global_store_dwordx4 v[2:3], v[8:11], off offset:16
|
|
; GFX9-GISEL-NEXT: s_waitcnt vmcnt(7)
|
|
; GFX9-GISEL-NEXT: global_store_dwordx4 v[2:3], v[12:15], off offset:32
|
|
; GFX9-GISEL-NEXT: s_waitcnt vmcnt(7)
|
|
; GFX9-GISEL-NEXT: global_store_dwordx4 v[2:3], v[16:19], off offset:48
|
|
; GFX9-GISEL-NEXT: s_waitcnt vmcnt(7)
|
|
; GFX9-GISEL-NEXT: global_store_dwordx4 v[2:3], v[20:23], off offset:64
|
|
; GFX9-GISEL-NEXT: s_waitcnt vmcnt(7)
|
|
; GFX9-GISEL-NEXT: global_store_dwordx4 v[2:3], v[24:27], off offset:80
|
|
; GFX9-GISEL-NEXT: s_waitcnt vmcnt(7)
|
|
; GFX9-GISEL-NEXT: global_store_dwordx4 v[2:3], v[28:31], off offset:96
|
|
; GFX9-GISEL-NEXT: s_waitcnt vmcnt(7)
|
|
; GFX9-GISEL-NEXT: global_store_dwordx3 v[2:3], v[32:34], off offset:112
|
|
; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX10-SDAG-LABEL: freeze_v31i32:
|
|
; GFX10-SDAG: ; %bb.0:
|
|
; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX10-SDAG-NEXT: s_clause 0x7
|
|
; GFX10-SDAG-NEXT: global_load_dwordx4 v[4:7], v[0:1], off offset:96
|
|
; GFX10-SDAG-NEXT: global_load_dwordx3 v[32:34], v[0:1], off offset:112
|
|
; GFX10-SDAG-NEXT: global_load_dwordx4 v[8:11], v[0:1], off offset:64
|
|
; GFX10-SDAG-NEXT: global_load_dwordx4 v[12:15], v[0:1], off offset:80
|
|
; GFX10-SDAG-NEXT: global_load_dwordx4 v[16:19], v[0:1], off offset:32
|
|
; GFX10-SDAG-NEXT: global_load_dwordx4 v[20:23], v[0:1], off offset:48
|
|
; GFX10-SDAG-NEXT: global_load_dwordx4 v[24:27], v[0:1], off
|
|
; GFX10-SDAG-NEXT: global_load_dwordx4 v[28:31], v[0:1], off offset:16
|
|
; GFX10-SDAG-NEXT: s_waitcnt vmcnt(7)
|
|
; GFX10-SDAG-NEXT: global_store_dwordx4 v[2:3], v[4:7], off offset:96
|
|
; GFX10-SDAG-NEXT: s_waitcnt vmcnt(6)
|
|
; GFX10-SDAG-NEXT: global_store_dwordx3 v[2:3], v[32:34], off offset:112
|
|
; GFX10-SDAG-NEXT: s_waitcnt vmcnt(5)
|
|
; GFX10-SDAG-NEXT: global_store_dwordx4 v[2:3], v[8:11], off offset:64
|
|
; GFX10-SDAG-NEXT: s_waitcnt vmcnt(4)
|
|
; GFX10-SDAG-NEXT: global_store_dwordx4 v[2:3], v[12:15], off offset:80
|
|
; GFX10-SDAG-NEXT: s_waitcnt vmcnt(3)
|
|
; GFX10-SDAG-NEXT: global_store_dwordx4 v[2:3], v[16:19], off offset:32
|
|
; GFX10-SDAG-NEXT: s_waitcnt vmcnt(2)
|
|
; GFX10-SDAG-NEXT: global_store_dwordx4 v[2:3], v[20:23], off offset:48
|
|
; GFX10-SDAG-NEXT: s_waitcnt vmcnt(1)
|
|
; GFX10-SDAG-NEXT: global_store_dwordx4 v[2:3], v[24:27], off
|
|
; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX10-SDAG-NEXT: global_store_dwordx4 v[2:3], v[28:31], off offset:16
|
|
; GFX10-SDAG-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX10-GISEL-LABEL: freeze_v31i32:
|
|
; GFX10-GISEL: ; %bb.0:
|
|
; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX10-GISEL-NEXT: s_clause 0x7
|
|
; GFX10-GISEL-NEXT: global_load_dwordx4 v[4:7], v[0:1], off
|
|
; GFX10-GISEL-NEXT: global_load_dwordx4 v[8:11], v[0:1], off offset:16
|
|
; GFX10-GISEL-NEXT: global_load_dwordx4 v[12:15], v[0:1], off offset:32
|
|
; GFX10-GISEL-NEXT: global_load_dwordx4 v[16:19], v[0:1], off offset:48
|
|
; GFX10-GISEL-NEXT: global_load_dwordx4 v[20:23], v[0:1], off offset:64
|
|
; GFX10-GISEL-NEXT: global_load_dwordx4 v[24:27], v[0:1], off offset:80
|
|
; GFX10-GISEL-NEXT: global_load_dwordx4 v[28:31], v[0:1], off offset:96
|
|
; GFX10-GISEL-NEXT: global_load_dwordx3 v[32:34], v[0:1], off offset:112
|
|
; GFX10-GISEL-NEXT: s_waitcnt vmcnt(7)
|
|
; GFX10-GISEL-NEXT: global_store_dwordx4 v[2:3], v[4:7], off
|
|
; GFX10-GISEL-NEXT: s_waitcnt vmcnt(6)
|
|
; GFX10-GISEL-NEXT: global_store_dwordx4 v[2:3], v[8:11], off offset:16
|
|
; GFX10-GISEL-NEXT: s_waitcnt vmcnt(5)
|
|
; GFX10-GISEL-NEXT: global_store_dwordx4 v[2:3], v[12:15], off offset:32
|
|
; GFX10-GISEL-NEXT: s_waitcnt vmcnt(4)
|
|
; GFX10-GISEL-NEXT: global_store_dwordx4 v[2:3], v[16:19], off offset:48
|
|
; GFX10-GISEL-NEXT: s_waitcnt vmcnt(3)
|
|
; GFX10-GISEL-NEXT: global_store_dwordx4 v[2:3], v[20:23], off offset:64
|
|
; GFX10-GISEL-NEXT: s_waitcnt vmcnt(2)
|
|
; GFX10-GISEL-NEXT: global_store_dwordx4 v[2:3], v[24:27], off offset:80
|
|
; GFX10-GISEL-NEXT: s_waitcnt vmcnt(1)
|
|
; GFX10-GISEL-NEXT: global_store_dwordx4 v[2:3], v[28:31], off offset:96
|
|
; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX10-GISEL-NEXT: global_store_dwordx3 v[2:3], v[32:34], off offset:112
|
|
; GFX10-GISEL-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX11-SDAG-LABEL: freeze_v31i32:
|
|
; GFX11-SDAG: ; %bb.0:
|
|
; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX11-SDAG-NEXT: s_clause 0x7
|
|
; GFX11-SDAG-NEXT: global_load_b128 v[4:7], v[0:1], off offset:96
|
|
; GFX11-SDAG-NEXT: global_load_b96 v[32:34], v[0:1], off offset:112
|
|
; GFX11-SDAG-NEXT: global_load_b128 v[8:11], v[0:1], off offset:64
|
|
; GFX11-SDAG-NEXT: global_load_b128 v[12:15], v[0:1], off offset:80
|
|
; GFX11-SDAG-NEXT: global_load_b128 v[16:19], v[0:1], off offset:32
|
|
; GFX11-SDAG-NEXT: global_load_b128 v[20:23], v[0:1], off offset:48
|
|
; GFX11-SDAG-NEXT: global_load_b128 v[24:27], v[0:1], off
|
|
; GFX11-SDAG-NEXT: global_load_b128 v[28:31], v[0:1], off offset:16
|
|
; GFX11-SDAG-NEXT: s_waitcnt vmcnt(7)
|
|
; GFX11-SDAG-NEXT: global_store_b128 v[2:3], v[4:7], off offset:96
|
|
; GFX11-SDAG-NEXT: s_waitcnt vmcnt(6)
|
|
; GFX11-SDAG-NEXT: global_store_b96 v[2:3], v[32:34], off offset:112
|
|
; GFX11-SDAG-NEXT: s_waitcnt vmcnt(5)
|
|
; GFX11-SDAG-NEXT: global_store_b128 v[2:3], v[8:11], off offset:64
|
|
; GFX11-SDAG-NEXT: s_waitcnt vmcnt(4)
|
|
; GFX11-SDAG-NEXT: global_store_b128 v[2:3], v[12:15], off offset:80
|
|
; GFX11-SDAG-NEXT: s_waitcnt vmcnt(3)
|
|
; GFX11-SDAG-NEXT: global_store_b128 v[2:3], v[16:19], off offset:32
|
|
; GFX11-SDAG-NEXT: s_waitcnt vmcnt(2)
|
|
; GFX11-SDAG-NEXT: global_store_b128 v[2:3], v[20:23], off offset:48
|
|
; GFX11-SDAG-NEXT: s_waitcnt vmcnt(1)
|
|
; GFX11-SDAG-NEXT: global_store_b128 v[2:3], v[24:27], off
|
|
; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX11-SDAG-NEXT: global_store_b128 v[2:3], v[28:31], off offset:16
|
|
; GFX11-SDAG-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX11-GISEL-LABEL: freeze_v31i32:
|
|
; GFX11-GISEL: ; %bb.0:
|
|
; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX11-GISEL-NEXT: s_clause 0x7
|
|
; GFX11-GISEL-NEXT: global_load_b128 v[4:7], v[0:1], off
|
|
; GFX11-GISEL-NEXT: global_load_b128 v[8:11], v[0:1], off offset:16
|
|
; GFX11-GISEL-NEXT: global_load_b128 v[12:15], v[0:1], off offset:32
|
|
; GFX11-GISEL-NEXT: global_load_b128 v[16:19], v[0:1], off offset:48
|
|
; GFX11-GISEL-NEXT: global_load_b128 v[20:23], v[0:1], off offset:64
|
|
; GFX11-GISEL-NEXT: global_load_b128 v[24:27], v[0:1], off offset:80
|
|
; GFX11-GISEL-NEXT: global_load_b128 v[28:31], v[0:1], off offset:96
|
|
; GFX11-GISEL-NEXT: global_load_b96 v[32:34], v[0:1], off offset:112
|
|
; GFX11-GISEL-NEXT: s_waitcnt vmcnt(7)
|
|
; GFX11-GISEL-NEXT: global_store_b128 v[2:3], v[4:7], off
|
|
; GFX11-GISEL-NEXT: s_waitcnt vmcnt(6)
|
|
; GFX11-GISEL-NEXT: global_store_b128 v[2:3], v[8:11], off offset:16
|
|
; GFX11-GISEL-NEXT: s_waitcnt vmcnt(5)
|
|
; GFX11-GISEL-NEXT: global_store_b128 v[2:3], v[12:15], off offset:32
|
|
; GFX11-GISEL-NEXT: s_waitcnt vmcnt(4)
|
|
; GFX11-GISEL-NEXT: global_store_b128 v[2:3], v[16:19], off offset:48
|
|
; GFX11-GISEL-NEXT: s_waitcnt vmcnt(3)
|
|
; GFX11-GISEL-NEXT: global_store_b128 v[2:3], v[20:23], off offset:64
|
|
; GFX11-GISEL-NEXT: s_waitcnt vmcnt(2)
|
|
; GFX11-GISEL-NEXT: global_store_b128 v[2:3], v[24:27], off offset:80
|
|
; GFX11-GISEL-NEXT: s_waitcnt vmcnt(1)
|
|
; GFX11-GISEL-NEXT: global_store_b128 v[2:3], v[28:31], off offset:96
|
|
; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX11-GISEL-NEXT: global_store_b96 v[2:3], v[32:34], off offset:112
|
|
; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31]
|
|
%a = load <31 x i32>, ptr addrspace(1) %ptra, align 4
|
|
%freeze = freeze <31 x i32> %a
|
|
store <31 x i32> %freeze, ptr addrspace(1) %ptrb, align 4
|
|
ret void
|
|
}
|
|
|
|
define void @freeze_v32i32(ptr addrspace(1) %ptra, ptr addrspace(1) %ptrb) {
|
|
; GFX6-SDAG-LABEL: freeze_v32i32:
|
|
; GFX6-SDAG: ; %bb.0:
|
|
; GFX6-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX6-SDAG-NEXT: s_mov_b32 s6, 0
|
|
; GFX6-SDAG-NEXT: s_mov_b32 s7, 0xf000
|
|
; GFX6-SDAG-NEXT: s_mov_b32 s4, s6
|
|
; GFX6-SDAG-NEXT: s_mov_b32 s5, s6
|
|
; GFX6-SDAG-NEXT: buffer_load_dwordx4 v[4:7], v[0:1], s[4:7], 0 addr64 offset:96
|
|
; GFX6-SDAG-NEXT: buffer_load_dwordx4 v[8:11], v[0:1], s[4:7], 0 addr64 offset:112
|
|
; GFX6-SDAG-NEXT: buffer_load_dwordx4 v[12:15], v[0:1], s[4:7], 0 addr64 offset:64
|
|
; GFX6-SDAG-NEXT: buffer_load_dwordx4 v[16:19], v[0:1], s[4:7], 0 addr64 offset:80
|
|
; GFX6-SDAG-NEXT: buffer_load_dwordx4 v[20:23], v[0:1], s[4:7], 0 addr64 offset:32
|
|
; GFX6-SDAG-NEXT: buffer_load_dwordx4 v[24:27], v[0:1], s[4:7], 0 addr64 offset:48
|
|
; GFX6-SDAG-NEXT: buffer_load_dwordx4 v[28:31], v[0:1], s[4:7], 0 addr64
|
|
; GFX6-SDAG-NEXT: buffer_load_dwordx4 v[32:35], v[0:1], s[4:7], 0 addr64 offset:16
|
|
; GFX6-SDAG-NEXT: s_waitcnt vmcnt(7)
|
|
; GFX6-SDAG-NEXT: buffer_store_dwordx4 v[4:7], v[2:3], s[4:7], 0 addr64 offset:96
|
|
; GFX6-SDAG-NEXT: s_waitcnt vmcnt(7)
|
|
; GFX6-SDAG-NEXT: buffer_store_dwordx4 v[8:11], v[2:3], s[4:7], 0 addr64 offset:112
|
|
; GFX6-SDAG-NEXT: s_waitcnt vmcnt(7)
|
|
; GFX6-SDAG-NEXT: buffer_store_dwordx4 v[12:15], v[2:3], s[4:7], 0 addr64 offset:64
|
|
; GFX6-SDAG-NEXT: s_waitcnt vmcnt(7)
|
|
; GFX6-SDAG-NEXT: buffer_store_dwordx4 v[16:19], v[2:3], s[4:7], 0 addr64 offset:80
|
|
; GFX6-SDAG-NEXT: s_waitcnt vmcnt(7)
|
|
; GFX6-SDAG-NEXT: buffer_store_dwordx4 v[20:23], v[2:3], s[4:7], 0 addr64 offset:32
|
|
; GFX6-SDAG-NEXT: s_waitcnt vmcnt(7)
|
|
; GFX6-SDAG-NEXT: buffer_store_dwordx4 v[24:27], v[2:3], s[4:7], 0 addr64 offset:48
|
|
; GFX6-SDAG-NEXT: s_waitcnt vmcnt(7)
|
|
; GFX6-SDAG-NEXT: buffer_store_dwordx4 v[28:31], v[2:3], s[4:7], 0 addr64
|
|
; GFX6-SDAG-NEXT: s_waitcnt vmcnt(7)
|
|
; GFX6-SDAG-NEXT: buffer_store_dwordx4 v[32:35], v[2:3], s[4:7], 0 addr64 offset:16
|
|
; GFX6-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0)
|
|
; GFX6-SDAG-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX6-GISEL-LABEL: freeze_v32i32:
|
|
; GFX6-GISEL: ; %bb.0:
|
|
; GFX6-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX6-GISEL-NEXT: s_mov_b32 s6, 0
|
|
; GFX6-GISEL-NEXT: s_mov_b32 s7, 0xf000
|
|
; GFX6-GISEL-NEXT: s_mov_b64 s[4:5], 0
|
|
; GFX6-GISEL-NEXT: buffer_load_dwordx4 v[4:7], v[0:1], s[4:7], 0 addr64
|
|
; GFX6-GISEL-NEXT: buffer_load_dwordx4 v[8:11], v[0:1], s[4:7], 0 addr64 offset:16
|
|
; GFX6-GISEL-NEXT: buffer_load_dwordx4 v[12:15], v[0:1], s[4:7], 0 addr64 offset:32
|
|
; GFX6-GISEL-NEXT: buffer_load_dwordx4 v[16:19], v[0:1], s[4:7], 0 addr64 offset:48
|
|
; GFX6-GISEL-NEXT: buffer_load_dwordx4 v[20:23], v[0:1], s[4:7], 0 addr64 offset:64
|
|
; GFX6-GISEL-NEXT: buffer_load_dwordx4 v[24:27], v[0:1], s[4:7], 0 addr64 offset:80
|
|
; GFX6-GISEL-NEXT: buffer_load_dwordx4 v[28:31], v[0:1], s[4:7], 0 addr64 offset:96
|
|
; GFX6-GISEL-NEXT: buffer_load_dwordx4 v[32:35], v[0:1], s[4:7], 0 addr64 offset:112
|
|
; GFX6-GISEL-NEXT: s_waitcnt vmcnt(7)
|
|
; GFX6-GISEL-NEXT: buffer_store_dwordx4 v[4:7], v[2:3], s[4:7], 0 addr64
|
|
; GFX6-GISEL-NEXT: s_waitcnt vmcnt(7)
|
|
; GFX6-GISEL-NEXT: buffer_store_dwordx4 v[8:11], v[2:3], s[4:7], 0 addr64 offset:16
|
|
; GFX6-GISEL-NEXT: s_waitcnt vmcnt(7)
|
|
; GFX6-GISEL-NEXT: buffer_store_dwordx4 v[12:15], v[2:3], s[4:7], 0 addr64 offset:32
|
|
; GFX6-GISEL-NEXT: s_waitcnt vmcnt(7)
|
|
; GFX6-GISEL-NEXT: buffer_store_dwordx4 v[16:19], v[2:3], s[4:7], 0 addr64 offset:48
|
|
; GFX6-GISEL-NEXT: s_waitcnt vmcnt(7)
|
|
; GFX6-GISEL-NEXT: buffer_store_dwordx4 v[20:23], v[2:3], s[4:7], 0 addr64 offset:64
|
|
; GFX6-GISEL-NEXT: s_waitcnt vmcnt(7)
|
|
; GFX6-GISEL-NEXT: buffer_store_dwordx4 v[24:27], v[2:3], s[4:7], 0 addr64 offset:80
|
|
; GFX6-GISEL-NEXT: s_waitcnt vmcnt(7)
|
|
; GFX6-GISEL-NEXT: buffer_store_dwordx4 v[28:31], v[2:3], s[4:7], 0 addr64 offset:96
|
|
; GFX6-GISEL-NEXT: s_waitcnt vmcnt(7)
|
|
; GFX6-GISEL-NEXT: buffer_store_dwordx4 v[32:35], v[2:3], s[4:7], 0 addr64 offset:112
|
|
; GFX6-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0)
|
|
; GFX6-GISEL-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX7-SDAG-LABEL: freeze_v32i32:
|
|
; GFX7-SDAG: ; %bb.0:
|
|
; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX7-SDAG-NEXT: s_mov_b32 s6, 0
|
|
; GFX7-SDAG-NEXT: s_mov_b32 s7, 0xf000
|
|
; GFX7-SDAG-NEXT: s_mov_b32 s4, s6
|
|
; GFX7-SDAG-NEXT: s_mov_b32 s5, s6
|
|
; GFX7-SDAG-NEXT: buffer_load_dwordx4 v[4:7], v[0:1], s[4:7], 0 addr64 offset:96
|
|
; GFX7-SDAG-NEXT: buffer_load_dwordx4 v[8:11], v[0:1], s[4:7], 0 addr64 offset:112
|
|
; GFX7-SDAG-NEXT: buffer_load_dwordx4 v[12:15], v[0:1], s[4:7], 0 addr64 offset:64
|
|
; GFX7-SDAG-NEXT: buffer_load_dwordx4 v[16:19], v[0:1], s[4:7], 0 addr64 offset:80
|
|
; GFX7-SDAG-NEXT: buffer_load_dwordx4 v[20:23], v[0:1], s[4:7], 0 addr64 offset:32
|
|
; GFX7-SDAG-NEXT: buffer_load_dwordx4 v[24:27], v[0:1], s[4:7], 0 addr64 offset:48
|
|
; GFX7-SDAG-NEXT: buffer_load_dwordx4 v[28:31], v[0:1], s[4:7], 0 addr64
|
|
; GFX7-SDAG-NEXT: buffer_load_dwordx4 v[32:35], v[0:1], s[4:7], 0 addr64 offset:16
|
|
; GFX7-SDAG-NEXT: s_waitcnt vmcnt(7)
|
|
; GFX7-SDAG-NEXT: buffer_store_dwordx4 v[4:7], v[2:3], s[4:7], 0 addr64 offset:96
|
|
; GFX7-SDAG-NEXT: s_waitcnt vmcnt(7)
|
|
; GFX7-SDAG-NEXT: buffer_store_dwordx4 v[8:11], v[2:3], s[4:7], 0 addr64 offset:112
|
|
; GFX7-SDAG-NEXT: s_waitcnt vmcnt(7)
|
|
; GFX7-SDAG-NEXT: buffer_store_dwordx4 v[12:15], v[2:3], s[4:7], 0 addr64 offset:64
|
|
; GFX7-SDAG-NEXT: s_waitcnt vmcnt(7)
|
|
; GFX7-SDAG-NEXT: buffer_store_dwordx4 v[16:19], v[2:3], s[4:7], 0 addr64 offset:80
|
|
; GFX7-SDAG-NEXT: s_waitcnt vmcnt(7)
|
|
; GFX7-SDAG-NEXT: buffer_store_dwordx4 v[20:23], v[2:3], s[4:7], 0 addr64 offset:32
|
|
; GFX7-SDAG-NEXT: s_waitcnt vmcnt(7)
|
|
; GFX7-SDAG-NEXT: buffer_store_dwordx4 v[24:27], v[2:3], s[4:7], 0 addr64 offset:48
|
|
; GFX7-SDAG-NEXT: s_waitcnt vmcnt(7)
|
|
; GFX7-SDAG-NEXT: buffer_store_dwordx4 v[28:31], v[2:3], s[4:7], 0 addr64
|
|
; GFX7-SDAG-NEXT: s_waitcnt vmcnt(7)
|
|
; GFX7-SDAG-NEXT: buffer_store_dwordx4 v[32:35], v[2:3], s[4:7], 0 addr64 offset:16
|
|
; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX7-SDAG-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX7-GISEL-LABEL: freeze_v32i32:
|
|
; GFX7-GISEL: ; %bb.0:
|
|
; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX7-GISEL-NEXT: s_mov_b32 s6, 0
|
|
; GFX7-GISEL-NEXT: s_mov_b32 s7, 0xf000
|
|
; GFX7-GISEL-NEXT: s_mov_b64 s[4:5], 0
|
|
; GFX7-GISEL-NEXT: buffer_load_dwordx4 v[4:7], v[0:1], s[4:7], 0 addr64
|
|
; GFX7-GISEL-NEXT: buffer_load_dwordx4 v[8:11], v[0:1], s[4:7], 0 addr64 offset:16
|
|
; GFX7-GISEL-NEXT: buffer_load_dwordx4 v[12:15], v[0:1], s[4:7], 0 addr64 offset:32
|
|
; GFX7-GISEL-NEXT: buffer_load_dwordx4 v[16:19], v[0:1], s[4:7], 0 addr64 offset:48
|
|
; GFX7-GISEL-NEXT: buffer_load_dwordx4 v[20:23], v[0:1], s[4:7], 0 addr64 offset:64
|
|
; GFX7-GISEL-NEXT: buffer_load_dwordx4 v[24:27], v[0:1], s[4:7], 0 addr64 offset:80
|
|
; GFX7-GISEL-NEXT: buffer_load_dwordx4 v[28:31], v[0:1], s[4:7], 0 addr64 offset:96
|
|
; GFX7-GISEL-NEXT: buffer_load_dwordx4 v[32:35], v[0:1], s[4:7], 0 addr64 offset:112
|
|
; GFX7-GISEL-NEXT: s_waitcnt vmcnt(7)
|
|
; GFX7-GISEL-NEXT: buffer_store_dwordx4 v[4:7], v[2:3], s[4:7], 0 addr64
|
|
; GFX7-GISEL-NEXT: s_waitcnt vmcnt(7)
|
|
; GFX7-GISEL-NEXT: buffer_store_dwordx4 v[8:11], v[2:3], s[4:7], 0 addr64 offset:16
|
|
; GFX7-GISEL-NEXT: s_waitcnt vmcnt(7)
|
|
; GFX7-GISEL-NEXT: buffer_store_dwordx4 v[12:15], v[2:3], s[4:7], 0 addr64 offset:32
|
|
; GFX7-GISEL-NEXT: s_waitcnt vmcnt(7)
|
|
; GFX7-GISEL-NEXT: buffer_store_dwordx4 v[16:19], v[2:3], s[4:7], 0 addr64 offset:48
|
|
; GFX7-GISEL-NEXT: s_waitcnt vmcnt(7)
|
|
; GFX7-GISEL-NEXT: buffer_store_dwordx4 v[20:23], v[2:3], s[4:7], 0 addr64 offset:64
|
|
; GFX7-GISEL-NEXT: s_waitcnt vmcnt(7)
|
|
; GFX7-GISEL-NEXT: buffer_store_dwordx4 v[24:27], v[2:3], s[4:7], 0 addr64 offset:80
|
|
; GFX7-GISEL-NEXT: s_waitcnt vmcnt(7)
|
|
; GFX7-GISEL-NEXT: buffer_store_dwordx4 v[28:31], v[2:3], s[4:7], 0 addr64 offset:96
|
|
; GFX7-GISEL-NEXT: s_waitcnt vmcnt(7)
|
|
; GFX7-GISEL-NEXT: buffer_store_dwordx4 v[32:35], v[2:3], s[4:7], 0 addr64 offset:112
|
|
; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX7-GISEL-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX8-GISEL-LABEL: freeze_v32i32:
|
|
; GFX8-GISEL: ; %bb.0:
|
|
; GFX8-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX8-GISEL-NEXT: v_add_u32_e32 v4, vcc, 16, v0
|
|
; GFX8-GISEL-NEXT: v_addc_u32_e32 v5, vcc, 0, v1, vcc
|
|
; GFX8-GISEL-NEXT: v_add_u32_e32 v12, vcc, 32, v0
|
|
; GFX8-GISEL-NEXT: v_addc_u32_e32 v13, vcc, 0, v1, vcc
|
|
; GFX8-GISEL-NEXT: v_add_u32_e32 v16, vcc, 48, v0
|
|
; GFX8-GISEL-NEXT: v_addc_u32_e32 v17, vcc, 0, v1, vcc
|
|
; GFX8-GISEL-NEXT: v_add_u32_e32 v20, vcc, 64, v0
|
|
; GFX8-GISEL-NEXT: flat_load_dwordx4 v[4:7], v[4:5]
|
|
; GFX8-GISEL-NEXT: flat_load_dwordx4 v[8:11], v[0:1]
|
|
; GFX8-GISEL-NEXT: v_addc_u32_e32 v21, vcc, 0, v1, vcc
|
|
; GFX8-GISEL-NEXT: v_mov_b32_e32 v38, 0x50
|
|
; GFX8-GISEL-NEXT: v_add_u32_e32 v24, vcc, v0, v38
|
|
; GFX8-GISEL-NEXT: v_addc_u32_e32 v25, vcc, 0, v1, vcc
|
|
; GFX8-GISEL-NEXT: v_mov_b32_e32 v14, 0x60
|
|
; GFX8-GISEL-NEXT: v_add_u32_e32 v28, vcc, v0, v14
|
|
; GFX8-GISEL-NEXT: v_addc_u32_e32 v29, vcc, 0, v1, vcc
|
|
; GFX8-GISEL-NEXT: v_mov_b32_e32 v14, 0x70
|
|
; GFX8-GISEL-NEXT: v_add_u32_e32 v0, vcc, v0, v14
|
|
; GFX8-GISEL-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc
|
|
; GFX8-GISEL-NEXT: flat_load_dwordx4 v[12:15], v[12:13]
|
|
; GFX8-GISEL-NEXT: flat_load_dwordx4 v[16:19], v[16:17]
|
|
; GFX8-GISEL-NEXT: flat_load_dwordx4 v[20:23], v[20:21]
|
|
; GFX8-GISEL-NEXT: flat_load_dwordx4 v[24:27], v[24:25]
|
|
; GFX8-GISEL-NEXT: flat_load_dwordx4 v[28:31], v[28:29]
|
|
; GFX8-GISEL-NEXT: flat_load_dwordx4 v[32:35], v[0:1]
|
|
; GFX8-GISEL-NEXT: v_add_u32_e32 v36, vcc, 16, v2
|
|
; GFX8-GISEL-NEXT: v_addc_u32_e32 v37, vcc, 0, v3, vcc
|
|
; GFX8-GISEL-NEXT: v_add_u32_e32 v0, vcc, 32, v2
|
|
; GFX8-GISEL-NEXT: v_addc_u32_e32 v1, vcc, 0, v3, vcc
|
|
; GFX8-GISEL-NEXT: s_waitcnt vmcnt(7)
|
|
; GFX8-GISEL-NEXT: flat_store_dwordx4 v[36:37], v[4:7]
|
|
; GFX8-GISEL-NEXT: s_nop 0
|
|
; GFX8-GISEL-NEXT: v_add_u32_e32 v4, vcc, 48, v2
|
|
; GFX8-GISEL-NEXT: v_addc_u32_e32 v5, vcc, 0, v3, vcc
|
|
; GFX8-GISEL-NEXT: s_waitcnt vmcnt(7)
|
|
; GFX8-GISEL-NEXT: flat_store_dwordx4 v[2:3], v[8:11]
|
|
; GFX8-GISEL-NEXT: v_add_u32_e32 v6, vcc, v2, v38
|
|
; GFX8-GISEL-NEXT: v_add_u32_e64 v8, s[4:5], 64, v2
|
|
; GFX8-GISEL-NEXT: v_addc_u32_e64 v9, s[4:5], 0, v3, s[4:5]
|
|
; GFX8-GISEL-NEXT: s_mov_b64 s[4:5], vcc
|
|
; GFX8-GISEL-NEXT: v_add_u32_e32 v10, vcc, 0x60, v2
|
|
; GFX8-GISEL-NEXT: v_addc_u32_e32 v11, vcc, 0, v3, vcc
|
|
; GFX8-GISEL-NEXT: v_add_u32_e32 v2, vcc, 0x70, v2
|
|
; GFX8-GISEL-NEXT: v_addc_u32_e64 v7, s[4:5], 0, v3, s[4:5]
|
|
; GFX8-GISEL-NEXT: v_addc_u32_e32 v3, vcc, 0, v3, vcc
|
|
; GFX8-GISEL-NEXT: s_waitcnt vmcnt(7)
|
|
; GFX8-GISEL-NEXT: flat_store_dwordx4 v[0:1], v[12:15]
|
|
; GFX8-GISEL-NEXT: s_waitcnt vmcnt(7)
|
|
; GFX8-GISEL-NEXT: flat_store_dwordx4 v[4:5], v[16:19]
|
|
; GFX8-GISEL-NEXT: s_waitcnt vmcnt(7)
|
|
; GFX8-GISEL-NEXT: flat_store_dwordx4 v[8:9], v[20:23]
|
|
; GFX8-GISEL-NEXT: s_waitcnt vmcnt(7)
|
|
; GFX8-GISEL-NEXT: flat_store_dwordx4 v[6:7], v[24:27]
|
|
; GFX8-GISEL-NEXT: s_waitcnt vmcnt(7)
|
|
; GFX8-GISEL-NEXT: flat_store_dwordx4 v[10:11], v[28:31]
|
|
; GFX8-GISEL-NEXT: s_waitcnt vmcnt(7)
|
|
; GFX8-GISEL-NEXT: flat_store_dwordx4 v[2:3], v[32:35]
|
|
; GFX8-GISEL-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX8-GISEL-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX9-GISEL-LABEL: freeze_v32i32:
|
|
; GFX9-GISEL: ; %bb.0:
|
|
; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX9-GISEL-NEXT: global_load_dwordx4 v[4:7], v[0:1], off
|
|
; GFX9-GISEL-NEXT: global_load_dwordx4 v[8:11], v[0:1], off offset:16
|
|
; GFX9-GISEL-NEXT: global_load_dwordx4 v[12:15], v[0:1], off offset:32
|
|
; GFX9-GISEL-NEXT: global_load_dwordx4 v[16:19], v[0:1], off offset:48
|
|
; GFX9-GISEL-NEXT: global_load_dwordx4 v[20:23], v[0:1], off offset:64
|
|
; GFX9-GISEL-NEXT: global_load_dwordx4 v[24:27], v[0:1], off offset:80
|
|
; GFX9-GISEL-NEXT: global_load_dwordx4 v[28:31], v[0:1], off offset:96
|
|
; GFX9-GISEL-NEXT: global_load_dwordx4 v[32:35], v[0:1], off offset:112
|
|
; GFX9-GISEL-NEXT: s_waitcnt vmcnt(7)
|
|
; GFX9-GISEL-NEXT: global_store_dwordx4 v[2:3], v[4:7], off
|
|
; GFX9-GISEL-NEXT: s_waitcnt vmcnt(7)
|
|
; GFX9-GISEL-NEXT: global_store_dwordx4 v[2:3], v[8:11], off offset:16
|
|
; GFX9-GISEL-NEXT: s_waitcnt vmcnt(7)
|
|
; GFX9-GISEL-NEXT: global_store_dwordx4 v[2:3], v[12:15], off offset:32
|
|
; GFX9-GISEL-NEXT: s_waitcnt vmcnt(7)
|
|
; GFX9-GISEL-NEXT: global_store_dwordx4 v[2:3], v[16:19], off offset:48
|
|
; GFX9-GISEL-NEXT: s_waitcnt vmcnt(7)
|
|
; GFX9-GISEL-NEXT: global_store_dwordx4 v[2:3], v[20:23], off offset:64
|
|
; GFX9-GISEL-NEXT: s_waitcnt vmcnt(7)
|
|
; GFX9-GISEL-NEXT: global_store_dwordx4 v[2:3], v[24:27], off offset:80
|
|
; GFX9-GISEL-NEXT: s_waitcnt vmcnt(7)
|
|
; GFX9-GISEL-NEXT: global_store_dwordx4 v[2:3], v[28:31], off offset:96
|
|
; GFX9-GISEL-NEXT: s_waitcnt vmcnt(7)
|
|
; GFX9-GISEL-NEXT: global_store_dwordx4 v[2:3], v[32:35], off offset:112
|
|
; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX10-SDAG-LABEL: freeze_v32i32:
|
|
; GFX10-SDAG: ; %bb.0:
|
|
; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX10-SDAG-NEXT: s_clause 0x7
|
|
; GFX10-SDAG-NEXT: global_load_dwordx4 v[4:7], v[0:1], off offset:96
|
|
; GFX10-SDAG-NEXT: global_load_dwordx4 v[8:11], v[0:1], off offset:112
|
|
; GFX10-SDAG-NEXT: global_load_dwordx4 v[12:15], v[0:1], off offset:64
|
|
; GFX10-SDAG-NEXT: global_load_dwordx4 v[16:19], v[0:1], off offset:80
|
|
; GFX10-SDAG-NEXT: global_load_dwordx4 v[20:23], v[0:1], off offset:32
|
|
; GFX10-SDAG-NEXT: global_load_dwordx4 v[24:27], v[0:1], off offset:48
|
|
; GFX10-SDAG-NEXT: global_load_dwordx4 v[28:31], v[0:1], off
|
|
; GFX10-SDAG-NEXT: global_load_dwordx4 v[32:35], v[0:1], off offset:16
|
|
; GFX10-SDAG-NEXT: s_waitcnt vmcnt(7)
|
|
; GFX10-SDAG-NEXT: global_store_dwordx4 v[2:3], v[4:7], off offset:96
|
|
; GFX10-SDAG-NEXT: s_waitcnt vmcnt(6)
|
|
; GFX10-SDAG-NEXT: global_store_dwordx4 v[2:3], v[8:11], off offset:112
|
|
; GFX10-SDAG-NEXT: s_waitcnt vmcnt(5)
|
|
; GFX10-SDAG-NEXT: global_store_dwordx4 v[2:3], v[12:15], off offset:64
|
|
; GFX10-SDAG-NEXT: s_waitcnt vmcnt(4)
|
|
; GFX10-SDAG-NEXT: global_store_dwordx4 v[2:3], v[16:19], off offset:80
|
|
; GFX10-SDAG-NEXT: s_waitcnt vmcnt(3)
|
|
; GFX10-SDAG-NEXT: global_store_dwordx4 v[2:3], v[20:23], off offset:32
|
|
; GFX10-SDAG-NEXT: s_waitcnt vmcnt(2)
|
|
; GFX10-SDAG-NEXT: global_store_dwordx4 v[2:3], v[24:27], off offset:48
|
|
; GFX10-SDAG-NEXT: s_waitcnt vmcnt(1)
|
|
; GFX10-SDAG-NEXT: global_store_dwordx4 v[2:3], v[28:31], off
|
|
; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX10-SDAG-NEXT: global_store_dwordx4 v[2:3], v[32:35], off offset:16
|
|
; GFX10-SDAG-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX10-GISEL-LABEL: freeze_v32i32:
|
|
; GFX10-GISEL: ; %bb.0:
|
|
; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX10-GISEL-NEXT: s_clause 0x7
|
|
; GFX10-GISEL-NEXT: global_load_dwordx4 v[4:7], v[0:1], off
|
|
; GFX10-GISEL-NEXT: global_load_dwordx4 v[8:11], v[0:1], off offset:16
|
|
; GFX10-GISEL-NEXT: global_load_dwordx4 v[12:15], v[0:1], off offset:32
|
|
; GFX10-GISEL-NEXT: global_load_dwordx4 v[16:19], v[0:1], off offset:48
|
|
; GFX10-GISEL-NEXT: global_load_dwordx4 v[20:23], v[0:1], off offset:64
|
|
; GFX10-GISEL-NEXT: global_load_dwordx4 v[24:27], v[0:1], off offset:80
|
|
; GFX10-GISEL-NEXT: global_load_dwordx4 v[28:31], v[0:1], off offset:96
|
|
; GFX10-GISEL-NEXT: global_load_dwordx4 v[32:35], v[0:1], off offset:112
|
|
; GFX10-GISEL-NEXT: s_waitcnt vmcnt(7)
|
|
; GFX10-GISEL-NEXT: global_store_dwordx4 v[2:3], v[4:7], off
|
|
; GFX10-GISEL-NEXT: s_waitcnt vmcnt(6)
|
|
; GFX10-GISEL-NEXT: global_store_dwordx4 v[2:3], v[8:11], off offset:16
|
|
; GFX10-GISEL-NEXT: s_waitcnt vmcnt(5)
|
|
; GFX10-GISEL-NEXT: global_store_dwordx4 v[2:3], v[12:15], off offset:32
|
|
; GFX10-GISEL-NEXT: s_waitcnt vmcnt(4)
|
|
; GFX10-GISEL-NEXT: global_store_dwordx4 v[2:3], v[16:19], off offset:48
|
|
; GFX10-GISEL-NEXT: s_waitcnt vmcnt(3)
|
|
; GFX10-GISEL-NEXT: global_store_dwordx4 v[2:3], v[20:23], off offset:64
|
|
; GFX10-GISEL-NEXT: s_waitcnt vmcnt(2)
|
|
; GFX10-GISEL-NEXT: global_store_dwordx4 v[2:3], v[24:27], off offset:80
|
|
; GFX10-GISEL-NEXT: s_waitcnt vmcnt(1)
|
|
; GFX10-GISEL-NEXT: global_store_dwordx4 v[2:3], v[28:31], off offset:96
|
|
; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX10-GISEL-NEXT: global_store_dwordx4 v[2:3], v[32:35], off offset:112
|
|
; GFX10-GISEL-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX11-SDAG-LABEL: freeze_v32i32:
|
|
; GFX11-SDAG: ; %bb.0:
|
|
; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX11-SDAG-NEXT: s_clause 0x7
|
|
; GFX11-SDAG-NEXT: global_load_b128 v[4:7], v[0:1], off offset:96
|
|
; GFX11-SDAG-NEXT: global_load_b128 v[8:11], v[0:1], off offset:112
|
|
; GFX11-SDAG-NEXT: global_load_b128 v[12:15], v[0:1], off offset:64
|
|
; GFX11-SDAG-NEXT: global_load_b128 v[16:19], v[0:1], off offset:80
|
|
; GFX11-SDAG-NEXT: global_load_b128 v[20:23], v[0:1], off offset:32
|
|
; GFX11-SDAG-NEXT: global_load_b128 v[24:27], v[0:1], off offset:48
|
|
; GFX11-SDAG-NEXT: global_load_b128 v[28:31], v[0:1], off
|
|
; GFX11-SDAG-NEXT: global_load_b128 v[32:35], v[0:1], off offset:16
|
|
; GFX11-SDAG-NEXT: s_waitcnt vmcnt(7)
|
|
; GFX11-SDAG-NEXT: global_store_b128 v[2:3], v[4:7], off offset:96
|
|
; GFX11-SDAG-NEXT: s_waitcnt vmcnt(6)
|
|
; GFX11-SDAG-NEXT: global_store_b128 v[2:3], v[8:11], off offset:112
|
|
; GFX11-SDAG-NEXT: s_waitcnt vmcnt(5)
|
|
; GFX11-SDAG-NEXT: global_store_b128 v[2:3], v[12:15], off offset:64
|
|
; GFX11-SDAG-NEXT: s_waitcnt vmcnt(4)
|
|
; GFX11-SDAG-NEXT: global_store_b128 v[2:3], v[16:19], off offset:80
|
|
; GFX11-SDAG-NEXT: s_waitcnt vmcnt(3)
|
|
; GFX11-SDAG-NEXT: global_store_b128 v[2:3], v[20:23], off offset:32
|
|
; GFX11-SDAG-NEXT: s_waitcnt vmcnt(2)
|
|
; GFX11-SDAG-NEXT: global_store_b128 v[2:3], v[24:27], off offset:48
|
|
; GFX11-SDAG-NEXT: s_waitcnt vmcnt(1)
|
|
; GFX11-SDAG-NEXT: global_store_b128 v[2:3], v[28:31], off
|
|
; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX11-SDAG-NEXT: global_store_b128 v[2:3], v[32:35], off offset:16
|
|
; GFX11-SDAG-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX11-GISEL-LABEL: freeze_v32i32:
|
|
; GFX11-GISEL: ; %bb.0:
|
|
; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX11-GISEL-NEXT: s_clause 0x7
|
|
; GFX11-GISEL-NEXT: global_load_b128 v[4:7], v[0:1], off
|
|
; GFX11-GISEL-NEXT: global_load_b128 v[8:11], v[0:1], off offset:16
|
|
; GFX11-GISEL-NEXT: global_load_b128 v[12:15], v[0:1], off offset:32
|
|
; GFX11-GISEL-NEXT: global_load_b128 v[16:19], v[0:1], off offset:48
|
|
; GFX11-GISEL-NEXT: global_load_b128 v[20:23], v[0:1], off offset:64
|
|
; GFX11-GISEL-NEXT: global_load_b128 v[24:27], v[0:1], off offset:80
|
|
; GFX11-GISEL-NEXT: global_load_b128 v[28:31], v[0:1], off offset:96
|
|
; GFX11-GISEL-NEXT: global_load_b128 v[32:35], v[0:1], off offset:112
|
|
; GFX11-GISEL-NEXT: s_waitcnt vmcnt(7)
|
|
; GFX11-GISEL-NEXT: global_store_b128 v[2:3], v[4:7], off
|
|
; GFX11-GISEL-NEXT: s_waitcnt vmcnt(6)
|
|
; GFX11-GISEL-NEXT: global_store_b128 v[2:3], v[8:11], off offset:16
|
|
; GFX11-GISEL-NEXT: s_waitcnt vmcnt(5)
|
|
; GFX11-GISEL-NEXT: global_store_b128 v[2:3], v[12:15], off offset:32
|
|
; GFX11-GISEL-NEXT: s_waitcnt vmcnt(4)
|
|
; GFX11-GISEL-NEXT: global_store_b128 v[2:3], v[16:19], off offset:48
|
|
; GFX11-GISEL-NEXT: s_waitcnt vmcnt(3)
|
|
; GFX11-GISEL-NEXT: global_store_b128 v[2:3], v[20:23], off offset:64
|
|
; GFX11-GISEL-NEXT: s_waitcnt vmcnt(2)
|
|
; GFX11-GISEL-NEXT: global_store_b128 v[2:3], v[24:27], off offset:80
|
|
; GFX11-GISEL-NEXT: s_waitcnt vmcnt(1)
|
|
; GFX11-GISEL-NEXT: global_store_b128 v[2:3], v[28:31], off offset:96
|
|
; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX11-GISEL-NEXT: global_store_b128 v[2:3], v[32:35], off offset:112
|
|
; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31]
|
|
%a = load <32 x i32>, ptr addrspace(1) %ptra, align 4
|
|
%freeze = freeze <32 x i32> %a
|
|
store <32 x i32> %freeze, ptr addrspace(1) %ptrb, align 4
|
|
ret void
|
|
}
|
|
|
|
define void @freeze_i32(ptr addrspace(1) %ptra, ptr addrspace(1) %ptrb) {
|
|
; GFX6-SDAG-LABEL: freeze_i32:
|
|
; GFX6-SDAG: ; %bb.0:
|
|
; GFX6-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX6-SDAG-NEXT: s_mov_b32 s6, 0
|
|
; GFX6-SDAG-NEXT: s_mov_b32 s7, 0xf000
|
|
; GFX6-SDAG-NEXT: s_mov_b32 s4, s6
|
|
; GFX6-SDAG-NEXT: s_mov_b32 s5, s6
|
|
; GFX6-SDAG-NEXT: buffer_load_dword v0, v[0:1], s[4:7], 0 addr64
|
|
; GFX6-SDAG-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX6-SDAG-NEXT: buffer_store_dword v0, v[2:3], s[4:7], 0 addr64
|
|
; GFX6-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0)
|
|
; GFX6-SDAG-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX6-GISEL-LABEL: freeze_i32:
|
|
; GFX6-GISEL: ; %bb.0:
|
|
; GFX6-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX6-GISEL-NEXT: s_mov_b32 s6, 0
|
|
; GFX6-GISEL-NEXT: s_mov_b32 s7, 0xf000
|
|
; GFX6-GISEL-NEXT: s_mov_b64 s[4:5], 0
|
|
; GFX6-GISEL-NEXT: buffer_load_dword v0, v[0:1], s[4:7], 0 addr64
|
|
; GFX6-GISEL-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX6-GISEL-NEXT: buffer_store_dword v0, v[2:3], s[4:7], 0 addr64
|
|
; GFX6-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0)
|
|
; GFX6-GISEL-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX7-SDAG-LABEL: freeze_i32:
|
|
; GFX7-SDAG: ; %bb.0:
|
|
; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX7-SDAG-NEXT: s_mov_b32 s6, 0
|
|
; GFX7-SDAG-NEXT: s_mov_b32 s7, 0xf000
|
|
; GFX7-SDAG-NEXT: s_mov_b32 s4, s6
|
|
; GFX7-SDAG-NEXT: s_mov_b32 s5, s6
|
|
; GFX7-SDAG-NEXT: buffer_load_dword v0, v[0:1], s[4:7], 0 addr64
|
|
; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX7-SDAG-NEXT: buffer_store_dword v0, v[2:3], s[4:7], 0 addr64
|
|
; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX7-SDAG-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX7-GISEL-LABEL: freeze_i32:
|
|
; GFX7-GISEL: ; %bb.0:
|
|
; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX7-GISEL-NEXT: s_mov_b32 s6, 0
|
|
; GFX7-GISEL-NEXT: s_mov_b32 s7, 0xf000
|
|
; GFX7-GISEL-NEXT: s_mov_b64 s[4:5], 0
|
|
; GFX7-GISEL-NEXT: buffer_load_dword v0, v[0:1], s[4:7], 0 addr64
|
|
; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX7-GISEL-NEXT: buffer_store_dword v0, v[2:3], s[4:7], 0 addr64
|
|
; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX7-GISEL-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX8-LABEL: freeze_i32:
|
|
; GFX8: ; %bb.0:
|
|
; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX8-NEXT: flat_load_dword v0, v[0:1]
|
|
; GFX8-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX8-NEXT: flat_store_dword v[2:3], v0
|
|
; GFX8-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX8-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX9-LABEL: freeze_i32:
|
|
; GFX9: ; %bb.0:
|
|
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX9-NEXT: global_load_dword v0, v[0:1], off
|
|
; GFX9-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX9-NEXT: global_store_dword v[2:3], v0, off
|
|
; GFX9-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX9-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX10-LABEL: freeze_i32:
|
|
; GFX10: ; %bb.0:
|
|
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX10-NEXT: global_load_dword v0, v[0:1], off
|
|
; GFX10-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX10-NEXT: global_store_dword v[2:3], v0, off
|
|
; GFX10-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX11-LABEL: freeze_i32:
|
|
; GFX11: ; %bb.0:
|
|
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX11-NEXT: global_load_b32 v0, v[0:1], off
|
|
; GFX11-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX11-NEXT: global_store_b32 v[2:3], v0, off
|
|
; GFX11-NEXT: s_setpc_b64 s[30:31]
|
|
%a = load i32, ptr addrspace(1) %ptra, align 4
|
|
%freeze = freeze i32 %a
|
|
store i32 %freeze, ptr addrspace(1) %ptrb, align 4
|
|
ret void
|
|
}
|
|
|
|
define void @freeze_i64(ptr addrspace(1) %ptra, ptr addrspace(1) %ptrb) {
|
|
; GFX6-SDAG-LABEL: freeze_i64:
|
|
; GFX6-SDAG: ; %bb.0:
|
|
; GFX6-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX6-SDAG-NEXT: s_mov_b32 s6, 0
|
|
; GFX6-SDAG-NEXT: s_mov_b32 s7, 0xf000
|
|
; GFX6-SDAG-NEXT: s_mov_b32 s4, s6
|
|
; GFX6-SDAG-NEXT: s_mov_b32 s5, s6
|
|
; GFX6-SDAG-NEXT: buffer_load_dwordx2 v[0:1], v[0:1], s[4:7], 0 addr64
|
|
; GFX6-SDAG-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX6-SDAG-NEXT: buffer_store_dwordx2 v[0:1], v[2:3], s[4:7], 0 addr64
|
|
; GFX6-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0)
|
|
; GFX6-SDAG-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX6-GISEL-LABEL: freeze_i64:
|
|
; GFX6-GISEL: ; %bb.0:
|
|
; GFX6-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX6-GISEL-NEXT: s_mov_b32 s6, 0
|
|
; GFX6-GISEL-NEXT: s_mov_b32 s7, 0xf000
|
|
; GFX6-GISEL-NEXT: s_mov_b64 s[4:5], 0
|
|
; GFX6-GISEL-NEXT: buffer_load_dwordx2 v[0:1], v[0:1], s[4:7], 0 addr64
|
|
; GFX6-GISEL-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX6-GISEL-NEXT: buffer_store_dwordx2 v[0:1], v[2:3], s[4:7], 0 addr64
|
|
; GFX6-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0)
|
|
; GFX6-GISEL-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX7-SDAG-LABEL: freeze_i64:
|
|
; GFX7-SDAG: ; %bb.0:
|
|
; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX7-SDAG-NEXT: s_mov_b32 s6, 0
|
|
; GFX7-SDAG-NEXT: s_mov_b32 s7, 0xf000
|
|
; GFX7-SDAG-NEXT: s_mov_b32 s4, s6
|
|
; GFX7-SDAG-NEXT: s_mov_b32 s5, s6
|
|
; GFX7-SDAG-NEXT: buffer_load_dwordx2 v[0:1], v[0:1], s[4:7], 0 addr64
|
|
; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX7-SDAG-NEXT: buffer_store_dwordx2 v[0:1], v[2:3], s[4:7], 0 addr64
|
|
; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX7-SDAG-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX7-GISEL-LABEL: freeze_i64:
|
|
; GFX7-GISEL: ; %bb.0:
|
|
; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX7-GISEL-NEXT: s_mov_b32 s6, 0
|
|
; GFX7-GISEL-NEXT: s_mov_b32 s7, 0xf000
|
|
; GFX7-GISEL-NEXT: s_mov_b64 s[4:5], 0
|
|
; GFX7-GISEL-NEXT: buffer_load_dwordx2 v[0:1], v[0:1], s[4:7], 0 addr64
|
|
; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX7-GISEL-NEXT: buffer_store_dwordx2 v[0:1], v[2:3], s[4:7], 0 addr64
|
|
; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX7-GISEL-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX8-LABEL: freeze_i64:
|
|
; GFX8: ; %bb.0:
|
|
; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX8-NEXT: flat_load_dwordx2 v[0:1], v[0:1]
|
|
; GFX8-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX8-NEXT: flat_store_dwordx2 v[2:3], v[0:1]
|
|
; GFX8-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX8-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX9-LABEL: freeze_i64:
|
|
; GFX9: ; %bb.0:
|
|
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX9-NEXT: global_load_dwordx2 v[0:1], v[0:1], off
|
|
; GFX9-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX9-NEXT: global_store_dwordx2 v[2:3], v[0:1], off
|
|
; GFX9-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX9-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX10-LABEL: freeze_i64:
|
|
; GFX10: ; %bb.0:
|
|
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX10-NEXT: global_load_dwordx2 v[0:1], v[0:1], off
|
|
; GFX10-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX10-NEXT: global_store_dwordx2 v[2:3], v[0:1], off
|
|
; GFX10-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX11-LABEL: freeze_i64:
|
|
; GFX11: ; %bb.0:
|
|
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX11-NEXT: global_load_b64 v[0:1], v[0:1], off
|
|
; GFX11-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX11-NEXT: global_store_b64 v[2:3], v[0:1], off
|
|
; GFX11-NEXT: s_setpc_b64 s[30:31]
|
|
%a = load i64, ptr addrspace(1) %ptra, align 4
|
|
%freeze = freeze i64 %a
|
|
store i64 %freeze, ptr addrspace(1) %ptrb, align 4
|
|
ret void
|
|
}
|
|
|
|
define void @freeze_float(ptr addrspace(1) %ptra, ptr addrspace(1) %ptrb) {
|
|
; GFX6-SDAG-LABEL: freeze_float:
|
|
; GFX6-SDAG: ; %bb.0:
|
|
; GFX6-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX6-SDAG-NEXT: s_mov_b32 s6, 0
|
|
; GFX6-SDAG-NEXT: s_mov_b32 s7, 0xf000
|
|
; GFX6-SDAG-NEXT: s_mov_b32 s4, s6
|
|
; GFX6-SDAG-NEXT: s_mov_b32 s5, s6
|
|
; GFX6-SDAG-NEXT: buffer_load_dword v0, v[0:1], s[4:7], 0 addr64
|
|
; GFX6-SDAG-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX6-SDAG-NEXT: buffer_store_dword v0, v[2:3], s[4:7], 0 addr64
|
|
; GFX6-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0)
|
|
; GFX6-SDAG-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX6-GISEL-LABEL: freeze_float:
|
|
; GFX6-GISEL: ; %bb.0:
|
|
; GFX6-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX6-GISEL-NEXT: s_mov_b32 s6, 0
|
|
; GFX6-GISEL-NEXT: s_mov_b32 s7, 0xf000
|
|
; GFX6-GISEL-NEXT: s_mov_b64 s[4:5], 0
|
|
; GFX6-GISEL-NEXT: buffer_load_dword v0, v[0:1], s[4:7], 0 addr64
|
|
; GFX6-GISEL-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX6-GISEL-NEXT: buffer_store_dword v0, v[2:3], s[4:7], 0 addr64
|
|
; GFX6-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0)
|
|
; GFX6-GISEL-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX7-SDAG-LABEL: freeze_float:
|
|
; GFX7-SDAG: ; %bb.0:
|
|
; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX7-SDAG-NEXT: s_mov_b32 s6, 0
|
|
; GFX7-SDAG-NEXT: s_mov_b32 s7, 0xf000
|
|
; GFX7-SDAG-NEXT: s_mov_b32 s4, s6
|
|
; GFX7-SDAG-NEXT: s_mov_b32 s5, s6
|
|
; GFX7-SDAG-NEXT: buffer_load_dword v0, v[0:1], s[4:7], 0 addr64
|
|
; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX7-SDAG-NEXT: buffer_store_dword v0, v[2:3], s[4:7], 0 addr64
|
|
; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX7-SDAG-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX7-GISEL-LABEL: freeze_float:
|
|
; GFX7-GISEL: ; %bb.0:
|
|
; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX7-GISEL-NEXT: s_mov_b32 s6, 0
|
|
; GFX7-GISEL-NEXT: s_mov_b32 s7, 0xf000
|
|
; GFX7-GISEL-NEXT: s_mov_b64 s[4:5], 0
|
|
; GFX7-GISEL-NEXT: buffer_load_dword v0, v[0:1], s[4:7], 0 addr64
|
|
; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX7-GISEL-NEXT: buffer_store_dword v0, v[2:3], s[4:7], 0 addr64
|
|
; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX7-GISEL-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX8-LABEL: freeze_float:
|
|
; GFX8: ; %bb.0:
|
|
; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX8-NEXT: flat_load_dword v0, v[0:1]
|
|
; GFX8-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX8-NEXT: flat_store_dword v[2:3], v0
|
|
; GFX8-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX8-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX9-LABEL: freeze_float:
|
|
; GFX9: ; %bb.0:
|
|
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX9-NEXT: global_load_dword v0, v[0:1], off
|
|
; GFX9-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX9-NEXT: global_store_dword v[2:3], v0, off
|
|
; GFX9-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX9-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX10-LABEL: freeze_float:
|
|
; GFX10: ; %bb.0:
|
|
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX10-NEXT: global_load_dword v0, v[0:1], off
|
|
; GFX10-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX10-NEXT: global_store_dword v[2:3], v0, off
|
|
; GFX10-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX11-LABEL: freeze_float:
|
|
; GFX11: ; %bb.0:
|
|
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX11-NEXT: global_load_b32 v0, v[0:1], off
|
|
; GFX11-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX11-NEXT: global_store_b32 v[2:3], v0, off
|
|
; GFX11-NEXT: s_setpc_b64 s[30:31]
|
|
%a = load float, ptr addrspace(1) %ptra, align 4
|
|
%freeze = freeze float %a
|
|
store float %freeze, ptr addrspace(1) %ptrb, align 4
|
|
ret void
|
|
}
|
|
|
|
define void @freeze_i128(ptr addrspace(1) %ptra, ptr addrspace(1) %ptrb) {
|
|
; GFX6-SDAG-LABEL: freeze_i128:
|
|
; GFX6-SDAG: ; %bb.0:
|
|
; GFX6-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX6-SDAG-NEXT: s_mov_b32 s6, 0
|
|
; GFX6-SDAG-NEXT: s_mov_b32 s7, 0xf000
|
|
; GFX6-SDAG-NEXT: s_mov_b32 s4, s6
|
|
; GFX6-SDAG-NEXT: s_mov_b32 s5, s6
|
|
; GFX6-SDAG-NEXT: buffer_load_dwordx4 v[4:7], v[0:1], s[4:7], 0 addr64
|
|
; GFX6-SDAG-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX6-SDAG-NEXT: buffer_store_dwordx4 v[4:7], v[2:3], s[4:7], 0 addr64
|
|
; GFX6-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0)
|
|
; GFX6-SDAG-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX6-GISEL-LABEL: freeze_i128:
|
|
; GFX6-GISEL: ; %bb.0:
|
|
; GFX6-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX6-GISEL-NEXT: s_mov_b32 s6, 0
|
|
; GFX6-GISEL-NEXT: s_mov_b32 s7, 0xf000
|
|
; GFX6-GISEL-NEXT: s_mov_b64 s[4:5], 0
|
|
; GFX6-GISEL-NEXT: buffer_load_dwordx4 v[4:7], v[0:1], s[4:7], 0 addr64
|
|
; GFX6-GISEL-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX6-GISEL-NEXT: buffer_store_dwordx4 v[4:7], v[2:3], s[4:7], 0 addr64
|
|
; GFX6-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0)
|
|
; GFX6-GISEL-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX7-SDAG-LABEL: freeze_i128:
|
|
; GFX7-SDAG: ; %bb.0:
|
|
; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX7-SDAG-NEXT: s_mov_b32 s6, 0
|
|
; GFX7-SDAG-NEXT: s_mov_b32 s7, 0xf000
|
|
; GFX7-SDAG-NEXT: s_mov_b32 s4, s6
|
|
; GFX7-SDAG-NEXT: s_mov_b32 s5, s6
|
|
; GFX7-SDAG-NEXT: buffer_load_dwordx4 v[4:7], v[0:1], s[4:7], 0 addr64
|
|
; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX7-SDAG-NEXT: buffer_store_dwordx4 v[4:7], v[2:3], s[4:7], 0 addr64
|
|
; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX7-SDAG-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX7-GISEL-LABEL: freeze_i128:
|
|
; GFX7-GISEL: ; %bb.0:
|
|
; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX7-GISEL-NEXT: s_mov_b32 s6, 0
|
|
; GFX7-GISEL-NEXT: s_mov_b32 s7, 0xf000
|
|
; GFX7-GISEL-NEXT: s_mov_b64 s[4:5], 0
|
|
; GFX7-GISEL-NEXT: buffer_load_dwordx4 v[4:7], v[0:1], s[4:7], 0 addr64
|
|
; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX7-GISEL-NEXT: buffer_store_dwordx4 v[4:7], v[2:3], s[4:7], 0 addr64
|
|
; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX7-GISEL-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX8-LABEL: freeze_i128:
|
|
; GFX8: ; %bb.0:
|
|
; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX8-NEXT: flat_load_dwordx4 v[4:7], v[0:1]
|
|
; GFX8-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX8-NEXT: flat_store_dwordx4 v[2:3], v[4:7]
|
|
; GFX8-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX8-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX9-LABEL: freeze_i128:
|
|
; GFX9: ; %bb.0:
|
|
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX9-NEXT: global_load_dwordx4 v[4:7], v[0:1], off
|
|
; GFX9-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX9-NEXT: global_store_dwordx4 v[2:3], v[4:7], off
|
|
; GFX9-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX9-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX10-LABEL: freeze_i128:
|
|
; GFX10: ; %bb.0:
|
|
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX10-NEXT: global_load_dwordx4 v[4:7], v[0:1], off
|
|
; GFX10-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX10-NEXT: global_store_dwordx4 v[2:3], v[4:7], off
|
|
; GFX10-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX11-LABEL: freeze_i128:
|
|
; GFX11: ; %bb.0:
|
|
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX11-NEXT: global_load_b128 v[4:7], v[0:1], off
|
|
; GFX11-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX11-NEXT: global_store_b128 v[2:3], v[4:7], off
|
|
; GFX11-NEXT: s_setpc_b64 s[30:31]
|
|
%a = load i128, ptr addrspace(1) %ptra, align 4
|
|
%freeze = freeze i128 %a
|
|
store i128 %freeze, ptr addrspace(1) %ptrb, align 4
|
|
ret void
|
|
}
|
|
|
|
define void @freeze_i256(ptr addrspace(1) %ptra, ptr addrspace(1) %ptrb) {
|
|
; GFX6-SDAG-LABEL: freeze_i256:
|
|
; GFX6-SDAG: ; %bb.0:
|
|
; GFX6-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX6-SDAG-NEXT: s_mov_b32 s6, 0
|
|
; GFX6-SDAG-NEXT: s_mov_b32 s7, 0xf000
|
|
; GFX6-SDAG-NEXT: s_mov_b32 s4, s6
|
|
; GFX6-SDAG-NEXT: s_mov_b32 s5, s6
|
|
; GFX6-SDAG-NEXT: buffer_load_dwordx4 v[4:7], v[0:1], s[4:7], 0 addr64 offset:16
|
|
; GFX6-SDAG-NEXT: buffer_load_dwordx4 v[8:11], v[0:1], s[4:7], 0 addr64
|
|
; GFX6-SDAG-NEXT: s_waitcnt vmcnt(1)
|
|
; GFX6-SDAG-NEXT: buffer_store_dwordx4 v[4:7], v[2:3], s[4:7], 0 addr64 offset:16
|
|
; GFX6-SDAG-NEXT: s_waitcnt vmcnt(1)
|
|
; GFX6-SDAG-NEXT: buffer_store_dwordx4 v[8:11], v[2:3], s[4:7], 0 addr64
|
|
; GFX6-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0)
|
|
; GFX6-SDAG-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX6-GISEL-LABEL: freeze_i256:
|
|
; GFX6-GISEL: ; %bb.0:
|
|
; GFX6-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX6-GISEL-NEXT: s_mov_b32 s6, 0
|
|
; GFX6-GISEL-NEXT: s_mov_b32 s7, 0xf000
|
|
; GFX6-GISEL-NEXT: s_mov_b64 s[4:5], 0
|
|
; GFX6-GISEL-NEXT: buffer_load_dwordx4 v[4:7], v[0:1], s[4:7], 0 addr64
|
|
; GFX6-GISEL-NEXT: buffer_load_dwordx4 v[8:11], v[0:1], s[4:7], 0 addr64 offset:16
|
|
; GFX6-GISEL-NEXT: s_waitcnt vmcnt(1)
|
|
; GFX6-GISEL-NEXT: buffer_store_dwordx4 v[4:7], v[2:3], s[4:7], 0 addr64
|
|
; GFX6-GISEL-NEXT: s_waitcnt vmcnt(1)
|
|
; GFX6-GISEL-NEXT: buffer_store_dwordx4 v[8:11], v[2:3], s[4:7], 0 addr64 offset:16
|
|
; GFX6-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0)
|
|
; GFX6-GISEL-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX7-SDAG-LABEL: freeze_i256:
|
|
; GFX7-SDAG: ; %bb.0:
|
|
; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX7-SDAG-NEXT: s_mov_b32 s6, 0
|
|
; GFX7-SDAG-NEXT: s_mov_b32 s7, 0xf000
|
|
; GFX7-SDAG-NEXT: s_mov_b32 s4, s6
|
|
; GFX7-SDAG-NEXT: s_mov_b32 s5, s6
|
|
; GFX7-SDAG-NEXT: buffer_load_dwordx4 v[4:7], v[0:1], s[4:7], 0 addr64 offset:16
|
|
; GFX7-SDAG-NEXT: buffer_load_dwordx4 v[8:11], v[0:1], s[4:7], 0 addr64
|
|
; GFX7-SDAG-NEXT: s_waitcnt vmcnt(1)
|
|
; GFX7-SDAG-NEXT: buffer_store_dwordx4 v[4:7], v[2:3], s[4:7], 0 addr64 offset:16
|
|
; GFX7-SDAG-NEXT: s_waitcnt vmcnt(1)
|
|
; GFX7-SDAG-NEXT: buffer_store_dwordx4 v[8:11], v[2:3], s[4:7], 0 addr64
|
|
; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX7-SDAG-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX7-GISEL-LABEL: freeze_i256:
|
|
; GFX7-GISEL: ; %bb.0:
|
|
; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX7-GISEL-NEXT: s_mov_b32 s6, 0
|
|
; GFX7-GISEL-NEXT: s_mov_b32 s7, 0xf000
|
|
; GFX7-GISEL-NEXT: s_mov_b64 s[4:5], 0
|
|
; GFX7-GISEL-NEXT: buffer_load_dwordx4 v[4:7], v[0:1], s[4:7], 0 addr64
|
|
; GFX7-GISEL-NEXT: buffer_load_dwordx4 v[8:11], v[0:1], s[4:7], 0 addr64 offset:16
|
|
; GFX7-GISEL-NEXT: s_waitcnt vmcnt(1)
|
|
; GFX7-GISEL-NEXT: buffer_store_dwordx4 v[4:7], v[2:3], s[4:7], 0 addr64
|
|
; GFX7-GISEL-NEXT: s_waitcnt vmcnt(1)
|
|
; GFX7-GISEL-NEXT: buffer_store_dwordx4 v[8:11], v[2:3], s[4:7], 0 addr64 offset:16
|
|
; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX7-GISEL-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX8-GISEL-LABEL: freeze_i256:
|
|
; GFX8-GISEL: ; %bb.0:
|
|
; GFX8-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX8-GISEL-NEXT: flat_load_dwordx4 v[4:7], v[0:1]
|
|
; GFX8-GISEL-NEXT: v_add_u32_e32 v0, vcc, 16, v0
|
|
; GFX8-GISEL-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc
|
|
; GFX8-GISEL-NEXT: flat_load_dwordx4 v[8:11], v[0:1]
|
|
; GFX8-GISEL-NEXT: v_add_u32_e32 v0, vcc, 16, v2
|
|
; GFX8-GISEL-NEXT: v_addc_u32_e32 v1, vcc, 0, v3, vcc
|
|
; GFX8-GISEL-NEXT: s_waitcnt vmcnt(1)
|
|
; GFX8-GISEL-NEXT: flat_store_dwordx4 v[2:3], v[4:7]
|
|
; GFX8-GISEL-NEXT: s_waitcnt vmcnt(1)
|
|
; GFX8-GISEL-NEXT: flat_store_dwordx4 v[0:1], v[8:11]
|
|
; GFX8-GISEL-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX8-GISEL-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX9-GISEL-LABEL: freeze_i256:
|
|
; GFX9-GISEL: ; %bb.0:
|
|
; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX9-GISEL-NEXT: global_load_dwordx4 v[4:7], v[0:1], off
|
|
; GFX9-GISEL-NEXT: global_load_dwordx4 v[8:11], v[0:1], off offset:16
|
|
; GFX9-GISEL-NEXT: s_waitcnt vmcnt(1)
|
|
; GFX9-GISEL-NEXT: global_store_dwordx4 v[2:3], v[4:7], off
|
|
; GFX9-GISEL-NEXT: s_waitcnt vmcnt(1)
|
|
; GFX9-GISEL-NEXT: global_store_dwordx4 v[2:3], v[8:11], off offset:16
|
|
; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX10-SDAG-LABEL: freeze_i256:
|
|
; GFX10-SDAG: ; %bb.0:
|
|
; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX10-SDAG-NEXT: s_clause 0x1
|
|
; GFX10-SDAG-NEXT: global_load_dwordx4 v[4:7], v[0:1], off offset:16
|
|
; GFX10-SDAG-NEXT: global_load_dwordx4 v[8:11], v[0:1], off
|
|
; GFX10-SDAG-NEXT: s_waitcnt vmcnt(1)
|
|
; GFX10-SDAG-NEXT: global_store_dwordx4 v[2:3], v[4:7], off offset:16
|
|
; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX10-SDAG-NEXT: global_store_dwordx4 v[2:3], v[8:11], off
|
|
; GFX10-SDAG-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX10-GISEL-LABEL: freeze_i256:
|
|
; GFX10-GISEL: ; %bb.0:
|
|
; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX10-GISEL-NEXT: s_clause 0x1
|
|
; GFX10-GISEL-NEXT: global_load_dwordx4 v[4:7], v[0:1], off
|
|
; GFX10-GISEL-NEXT: global_load_dwordx4 v[8:11], v[0:1], off offset:16
|
|
; GFX10-GISEL-NEXT: s_waitcnt vmcnt(1)
|
|
; GFX10-GISEL-NEXT: global_store_dwordx4 v[2:3], v[4:7], off
|
|
; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX10-GISEL-NEXT: global_store_dwordx4 v[2:3], v[8:11], off offset:16
|
|
; GFX10-GISEL-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX11-SDAG-LABEL: freeze_i256:
|
|
; GFX11-SDAG: ; %bb.0:
|
|
; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX11-SDAG-NEXT: s_clause 0x1
|
|
; GFX11-SDAG-NEXT: global_load_b128 v[4:7], v[0:1], off offset:16
|
|
; GFX11-SDAG-NEXT: global_load_b128 v[8:11], v[0:1], off
|
|
; GFX11-SDAG-NEXT: s_waitcnt vmcnt(1)
|
|
; GFX11-SDAG-NEXT: global_store_b128 v[2:3], v[4:7], off offset:16
|
|
; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX11-SDAG-NEXT: global_store_b128 v[2:3], v[8:11], off
|
|
; GFX11-SDAG-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX11-GISEL-LABEL: freeze_i256:
|
|
; GFX11-GISEL: ; %bb.0:
|
|
; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX11-GISEL-NEXT: s_clause 0x1
|
|
; GFX11-GISEL-NEXT: global_load_b128 v[4:7], v[0:1], off
|
|
; GFX11-GISEL-NEXT: global_load_b128 v[8:11], v[0:1], off offset:16
|
|
; GFX11-GISEL-NEXT: s_waitcnt vmcnt(1)
|
|
; GFX11-GISEL-NEXT: global_store_b128 v[2:3], v[4:7], off
|
|
; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX11-GISEL-NEXT: global_store_b128 v[2:3], v[8:11], off offset:16
|
|
; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31]
|
|
%a = load i256, ptr addrspace(1) %ptra, align 4
|
|
%freeze = freeze i256 %a
|
|
store i256 %freeze, ptr addrspace(1) %ptrb, align 4
|
|
ret void
|
|
}
|
|
|
|
define void @freeze_i16(ptr addrspace(1) %ptra, ptr addrspace(1) %ptrb) {
|
|
; GFX6-SDAG-LABEL: freeze_i16:
|
|
; GFX6-SDAG: ; %bb.0:
|
|
; GFX6-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX6-SDAG-NEXT: s_mov_b32 s6, 0
|
|
; GFX6-SDAG-NEXT: s_mov_b32 s7, 0xf000
|
|
; GFX6-SDAG-NEXT: s_mov_b32 s4, s6
|
|
; GFX6-SDAG-NEXT: s_mov_b32 s5, s6
|
|
; GFX6-SDAG-NEXT: buffer_load_ushort v0, v[0:1], s[4:7], 0 addr64
|
|
; GFX6-SDAG-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX6-SDAG-NEXT: buffer_store_short v0, v[2:3], s[4:7], 0 addr64
|
|
; GFX6-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0)
|
|
; GFX6-SDAG-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX6-GISEL-LABEL: freeze_i16:
|
|
; GFX6-GISEL: ; %bb.0:
|
|
; GFX6-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX6-GISEL-NEXT: s_mov_b32 s6, 0
|
|
; GFX6-GISEL-NEXT: s_mov_b32 s7, 0xf000
|
|
; GFX6-GISEL-NEXT: s_mov_b64 s[4:5], 0
|
|
; GFX6-GISEL-NEXT: buffer_load_ushort v0, v[0:1], s[4:7], 0 addr64
|
|
; GFX6-GISEL-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX6-GISEL-NEXT: buffer_store_short v0, v[2:3], s[4:7], 0 addr64
|
|
; GFX6-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0)
|
|
; GFX6-GISEL-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX7-SDAG-LABEL: freeze_i16:
|
|
; GFX7-SDAG: ; %bb.0:
|
|
; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX7-SDAG-NEXT: s_mov_b32 s6, 0
|
|
; GFX7-SDAG-NEXT: s_mov_b32 s7, 0xf000
|
|
; GFX7-SDAG-NEXT: s_mov_b32 s4, s6
|
|
; GFX7-SDAG-NEXT: s_mov_b32 s5, s6
|
|
; GFX7-SDAG-NEXT: buffer_load_ushort v0, v[0:1], s[4:7], 0 addr64
|
|
; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX7-SDAG-NEXT: buffer_store_short v0, v[2:3], s[4:7], 0 addr64
|
|
; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX7-SDAG-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX7-GISEL-LABEL: freeze_i16:
|
|
; GFX7-GISEL: ; %bb.0:
|
|
; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX7-GISEL-NEXT: s_mov_b32 s6, 0
|
|
; GFX7-GISEL-NEXT: s_mov_b32 s7, 0xf000
|
|
; GFX7-GISEL-NEXT: s_mov_b64 s[4:5], 0
|
|
; GFX7-GISEL-NEXT: buffer_load_ushort v0, v[0:1], s[4:7], 0 addr64
|
|
; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX7-GISEL-NEXT: buffer_store_short v0, v[2:3], s[4:7], 0 addr64
|
|
; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX7-GISEL-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX8-LABEL: freeze_i16:
|
|
; GFX8: ; %bb.0:
|
|
; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX8-NEXT: flat_load_ushort v0, v[0:1]
|
|
; GFX8-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX8-NEXT: flat_store_short v[2:3], v0
|
|
; GFX8-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX8-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX9-LABEL: freeze_i16:
|
|
; GFX9: ; %bb.0:
|
|
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX9-NEXT: global_load_ushort v0, v[0:1], off
|
|
; GFX9-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX9-NEXT: global_store_short v[2:3], v0, off
|
|
; GFX9-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX9-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX10-LABEL: freeze_i16:
|
|
; GFX10: ; %bb.0:
|
|
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX10-NEXT: global_load_ushort v0, v[0:1], off
|
|
; GFX10-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX10-NEXT: global_store_short v[2:3], v0, off
|
|
; GFX10-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX11-SDAG-TRUE16-LABEL: freeze_i16:
|
|
; GFX11-SDAG-TRUE16: ; %bb.0:
|
|
; GFX11-SDAG-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX11-SDAG-TRUE16-NEXT: global_load_d16_b16 v0, v[0:1], off
|
|
; GFX11-SDAG-TRUE16-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX11-SDAG-TRUE16-NEXT: global_store_b16 v[2:3], v0, off
|
|
; GFX11-SDAG-TRUE16-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX11-SDAG-FAKE16-LABEL: freeze_i16:
|
|
; GFX11-SDAG-FAKE16: ; %bb.0:
|
|
; GFX11-SDAG-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX11-SDAG-FAKE16-NEXT: global_load_u16 v0, v[0:1], off
|
|
; GFX11-SDAG-FAKE16-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX11-SDAG-FAKE16-NEXT: global_store_b16 v[2:3], v0, off
|
|
; GFX11-SDAG-FAKE16-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX11-GISEL-LABEL: freeze_i16:
|
|
; GFX11-GISEL: ; %bb.0:
|
|
; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX11-GISEL-NEXT: global_load_u16 v0, v[0:1], off
|
|
; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX11-GISEL-NEXT: global_store_b16 v[2:3], v0, off
|
|
; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31]
|
|
%a = load i16, ptr addrspace(1) %ptra
|
|
%freeze = freeze i16 %a
|
|
store i16 %freeze, ptr addrspace(1) %ptrb
|
|
ret void
|
|
}
|
|
|
|
define void @freeze_v2i16(ptr addrspace(1) %ptra, ptr addrspace(1) %ptrb) {
|
|
; GFX6-SDAG-LABEL: freeze_v2i16:
|
|
; GFX6-SDAG: ; %bb.0:
|
|
; GFX6-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX6-SDAG-NEXT: s_mov_b32 s6, 0
|
|
; GFX6-SDAG-NEXT: s_mov_b32 s7, 0xf000
|
|
; GFX6-SDAG-NEXT: s_mov_b32 s4, s6
|
|
; GFX6-SDAG-NEXT: s_mov_b32 s5, s6
|
|
; GFX6-SDAG-NEXT: buffer_load_dword v0, v[0:1], s[4:7], 0 addr64
|
|
; GFX6-SDAG-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX6-SDAG-NEXT: buffer_store_dword v0, v[2:3], s[4:7], 0 addr64
|
|
; GFX6-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0)
|
|
; GFX6-SDAG-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX6-GISEL-LABEL: freeze_v2i16:
|
|
; GFX6-GISEL: ; %bb.0:
|
|
; GFX6-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX6-GISEL-NEXT: s_mov_b32 s6, 0
|
|
; GFX6-GISEL-NEXT: s_mov_b32 s7, 0xf000
|
|
; GFX6-GISEL-NEXT: s_mov_b64 s[4:5], 0
|
|
; GFX6-GISEL-NEXT: buffer_load_dword v0, v[0:1], s[4:7], 0 addr64
|
|
; GFX6-GISEL-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX6-GISEL-NEXT: buffer_store_dword v0, v[2:3], s[4:7], 0 addr64
|
|
; GFX6-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0)
|
|
; GFX6-GISEL-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX7-SDAG-LABEL: freeze_v2i16:
|
|
; GFX7-SDAG: ; %bb.0:
|
|
; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX7-SDAG-NEXT: s_mov_b32 s6, 0
|
|
; GFX7-SDAG-NEXT: s_mov_b32 s7, 0xf000
|
|
; GFX7-SDAG-NEXT: s_mov_b32 s4, s6
|
|
; GFX7-SDAG-NEXT: s_mov_b32 s5, s6
|
|
; GFX7-SDAG-NEXT: buffer_load_dword v0, v[0:1], s[4:7], 0 addr64
|
|
; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX7-SDAG-NEXT: buffer_store_dword v0, v[2:3], s[4:7], 0 addr64
|
|
; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX7-SDAG-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX7-GISEL-LABEL: freeze_v2i16:
|
|
; GFX7-GISEL: ; %bb.0:
|
|
; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX7-GISEL-NEXT: s_mov_b32 s6, 0
|
|
; GFX7-GISEL-NEXT: s_mov_b32 s7, 0xf000
|
|
; GFX7-GISEL-NEXT: s_mov_b64 s[4:5], 0
|
|
; GFX7-GISEL-NEXT: buffer_load_dword v0, v[0:1], s[4:7], 0 addr64
|
|
; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX7-GISEL-NEXT: buffer_store_dword v0, v[2:3], s[4:7], 0 addr64
|
|
; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX7-GISEL-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX8-LABEL: freeze_v2i16:
|
|
; GFX8: ; %bb.0:
|
|
; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX8-NEXT: flat_load_dword v0, v[0:1]
|
|
; GFX8-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX8-NEXT: flat_store_dword v[2:3], v0
|
|
; GFX8-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX8-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX9-LABEL: freeze_v2i16:
|
|
; GFX9: ; %bb.0:
|
|
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX9-NEXT: global_load_dword v0, v[0:1], off
|
|
; GFX9-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX9-NEXT: global_store_dword v[2:3], v0, off
|
|
; GFX9-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX9-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX10-LABEL: freeze_v2i16:
|
|
; GFX10: ; %bb.0:
|
|
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX10-NEXT: global_load_dword v0, v[0:1], off
|
|
; GFX10-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX10-NEXT: global_store_dword v[2:3], v0, off
|
|
; GFX10-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX11-LABEL: freeze_v2i16:
|
|
; GFX11: ; %bb.0:
|
|
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX11-NEXT: global_load_b32 v0, v[0:1], off
|
|
; GFX11-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX11-NEXT: global_store_b32 v[2:3], v0, off
|
|
; GFX11-NEXT: s_setpc_b64 s[30:31]
|
|
%a = load <2 x i16>, ptr addrspace(1) %ptra
|
|
%freeze = freeze <2 x i16> %a
|
|
store <2 x i16> %freeze, ptr addrspace(1) %ptrb
|
|
ret void
|
|
}
|
|
|
|
define void @freeze_v3i16(ptr addrspace(1) %ptra, ptr addrspace(1) %ptrb) {
|
|
; GFX6-SDAG-LABEL: freeze_v3i16:
|
|
; GFX6-SDAG: ; %bb.0:
|
|
; GFX6-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX6-SDAG-NEXT: s_mov_b32 s6, 0
|
|
; GFX6-SDAG-NEXT: s_mov_b32 s7, 0xf000
|
|
; GFX6-SDAG-NEXT: s_mov_b32 s4, s6
|
|
; GFX6-SDAG-NEXT: s_mov_b32 s5, s6
|
|
; GFX6-SDAG-NEXT: buffer_load_dwordx2 v[0:1], v[0:1], s[4:7], 0 addr64
|
|
; GFX6-SDAG-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX6-SDAG-NEXT: buffer_store_short v1, v[2:3], s[4:7], 0 addr64 offset:4
|
|
; GFX6-SDAG-NEXT: buffer_store_dword v0, v[2:3], s[4:7], 0 addr64
|
|
; GFX6-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0)
|
|
; GFX6-SDAG-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX6-GISEL-LABEL: freeze_v3i16:
|
|
; GFX6-GISEL: ; %bb.0:
|
|
; GFX6-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX6-GISEL-NEXT: s_mov_b32 s6, 0
|
|
; GFX6-GISEL-NEXT: s_mov_b32 s7, 0xf000
|
|
; GFX6-GISEL-NEXT: s_mov_b64 s[4:5], 0
|
|
; GFX6-GISEL-NEXT: buffer_load_dwordx2 v[0:1], v[0:1], s[4:7], 0 addr64
|
|
; GFX6-GISEL-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX6-GISEL-NEXT: v_lshrrev_b32_e32 v4, 16, v0
|
|
; GFX6-GISEL-NEXT: buffer_store_short v0, v[2:3], s[4:7], 0 addr64
|
|
; GFX6-GISEL-NEXT: buffer_store_short v4, v[2:3], s[4:7], 0 addr64 offset:2
|
|
; GFX6-GISEL-NEXT: buffer_store_short v1, v[2:3], s[4:7], 0 addr64 offset:4
|
|
; GFX6-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0)
|
|
; GFX6-GISEL-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX7-SDAG-LABEL: freeze_v3i16:
|
|
; GFX7-SDAG: ; %bb.0:
|
|
; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX7-SDAG-NEXT: s_mov_b32 s6, 0
|
|
; GFX7-SDAG-NEXT: s_mov_b32 s7, 0xf000
|
|
; GFX7-SDAG-NEXT: s_mov_b32 s4, s6
|
|
; GFX7-SDAG-NEXT: s_mov_b32 s5, s6
|
|
; GFX7-SDAG-NEXT: buffer_load_dwordx2 v[0:1], v[0:1], s[4:7], 0 addr64
|
|
; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX7-SDAG-NEXT: buffer_store_short v1, v[2:3], s[4:7], 0 addr64 offset:4
|
|
; GFX7-SDAG-NEXT: buffer_store_dword v0, v[2:3], s[4:7], 0 addr64
|
|
; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX7-SDAG-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX7-GISEL-LABEL: freeze_v3i16:
|
|
; GFX7-GISEL: ; %bb.0:
|
|
; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX7-GISEL-NEXT: s_mov_b32 s6, 0
|
|
; GFX7-GISEL-NEXT: s_mov_b32 s7, 0xf000
|
|
; GFX7-GISEL-NEXT: s_mov_b64 s[4:5], 0
|
|
; GFX7-GISEL-NEXT: buffer_load_dwordx2 v[0:1], v[0:1], s[4:7], 0 addr64
|
|
; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX7-GISEL-NEXT: v_lshrrev_b32_e32 v4, 16, v0
|
|
; GFX7-GISEL-NEXT: buffer_store_short v0, v[2:3], s[4:7], 0 addr64
|
|
; GFX7-GISEL-NEXT: buffer_store_short v4, v[2:3], s[4:7], 0 addr64 offset:2
|
|
; GFX7-GISEL-NEXT: buffer_store_short v1, v[2:3], s[4:7], 0 addr64 offset:4
|
|
; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX7-GISEL-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX8-GISEL-LABEL: freeze_v3i16:
|
|
; GFX8-GISEL: ; %bb.0:
|
|
; GFX8-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX8-GISEL-NEXT: flat_load_dwordx2 v[0:1], v[0:1]
|
|
; GFX8-GISEL-NEXT: v_add_u32_e32 v4, vcc, 2, v2
|
|
; GFX8-GISEL-NEXT: v_addc_u32_e32 v5, vcc, 0, v3, vcc
|
|
; GFX8-GISEL-NEXT: v_add_u32_e32 v6, vcc, 4, v2
|
|
; GFX8-GISEL-NEXT: v_addc_u32_e32 v7, vcc, 0, v3, vcc
|
|
; GFX8-GISEL-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX8-GISEL-NEXT: v_lshrrev_b32_e32 v8, 16, v0
|
|
; GFX8-GISEL-NEXT: flat_store_short v[2:3], v0
|
|
; GFX8-GISEL-NEXT: flat_store_short v[4:5], v8
|
|
; GFX8-GISEL-NEXT: flat_store_short v[6:7], v1
|
|
; GFX8-GISEL-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX8-GISEL-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX9-GISEL-LABEL: freeze_v3i16:
|
|
; GFX9-GISEL: ; %bb.0:
|
|
; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX9-GISEL-NEXT: global_load_dwordx2 v[0:1], v[0:1], off
|
|
; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX9-GISEL-NEXT: global_store_short v[2:3], v0, off
|
|
; GFX9-GISEL-NEXT: global_store_short_d16_hi v[2:3], v0, off offset:2
|
|
; GFX9-GISEL-NEXT: global_store_short v[2:3], v1, off offset:4
|
|
; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX10-SDAG-LABEL: freeze_v3i16:
|
|
; GFX10-SDAG: ; %bb.0:
|
|
; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX10-SDAG-NEXT: global_load_dwordx2 v[0:1], v[0:1], off
|
|
; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX10-SDAG-NEXT: global_store_short v[2:3], v1, off offset:4
|
|
; GFX10-SDAG-NEXT: global_store_dword v[2:3], v0, off
|
|
; GFX10-SDAG-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX10-GISEL-LABEL: freeze_v3i16:
|
|
; GFX10-GISEL: ; %bb.0:
|
|
; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX10-GISEL-NEXT: global_load_dwordx2 v[0:1], v[0:1], off
|
|
; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX10-GISEL-NEXT: global_store_short v[2:3], v0, off
|
|
; GFX10-GISEL-NEXT: global_store_short_d16_hi v[2:3], v0, off offset:2
|
|
; GFX10-GISEL-NEXT: global_store_short v[2:3], v1, off offset:4
|
|
; GFX10-GISEL-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX11-SDAG-LABEL: freeze_v3i16:
|
|
; GFX11-SDAG: ; %bb.0:
|
|
; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX11-SDAG-NEXT: global_load_b64 v[0:1], v[0:1], off
|
|
; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX11-SDAG-NEXT: s_clause 0x1
|
|
; GFX11-SDAG-NEXT: global_store_b16 v[2:3], v1, off offset:4
|
|
; GFX11-SDAG-NEXT: global_store_b32 v[2:3], v0, off
|
|
; GFX11-SDAG-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX11-GISEL-LABEL: freeze_v3i16:
|
|
; GFX11-GISEL: ; %bb.0:
|
|
; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX11-GISEL-NEXT: global_load_b64 v[0:1], v[0:1], off
|
|
; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX11-GISEL-NEXT: s_clause 0x2
|
|
; GFX11-GISEL-NEXT: global_store_b16 v[2:3], v0, off
|
|
; GFX11-GISEL-NEXT: global_store_d16_hi_b16 v[2:3], v0, off offset:2
|
|
; GFX11-GISEL-NEXT: global_store_b16 v[2:3], v1, off offset:4
|
|
; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31]
|
|
%a = load <3 x i16>, ptr addrspace(1) %ptra
|
|
%freeze = freeze <3 x i16> %a
|
|
store <3 x i16> %freeze, ptr addrspace(1) %ptrb
|
|
ret void
|
|
}
|
|
|
|
define void @freeze_v4i16(ptr addrspace(1) %ptra, ptr addrspace(1) %ptrb) {
|
|
; GFX6-SDAG-LABEL: freeze_v4i16:
|
|
; GFX6-SDAG: ; %bb.0:
|
|
; GFX6-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX6-SDAG-NEXT: s_mov_b32 s6, 0
|
|
; GFX6-SDAG-NEXT: s_mov_b32 s7, 0xf000
|
|
; GFX6-SDAG-NEXT: s_mov_b32 s4, s6
|
|
; GFX6-SDAG-NEXT: s_mov_b32 s5, s6
|
|
; GFX6-SDAG-NEXT: buffer_load_dwordx2 v[0:1], v[0:1], s[4:7], 0 addr64
|
|
; GFX6-SDAG-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX6-SDAG-NEXT: buffer_store_dwordx2 v[0:1], v[2:3], s[4:7], 0 addr64
|
|
; GFX6-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0)
|
|
; GFX6-SDAG-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX6-GISEL-LABEL: freeze_v4i16:
|
|
; GFX6-GISEL: ; %bb.0:
|
|
; GFX6-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX6-GISEL-NEXT: s_mov_b32 s6, 0
|
|
; GFX6-GISEL-NEXT: s_mov_b32 s7, 0xf000
|
|
; GFX6-GISEL-NEXT: s_mov_b64 s[4:5], 0
|
|
; GFX6-GISEL-NEXT: buffer_load_dwordx2 v[0:1], v[0:1], s[4:7], 0 addr64
|
|
; GFX6-GISEL-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX6-GISEL-NEXT: buffer_store_dwordx2 v[0:1], v[2:3], s[4:7], 0 addr64
|
|
; GFX6-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0)
|
|
; GFX6-GISEL-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX7-SDAG-LABEL: freeze_v4i16:
|
|
; GFX7-SDAG: ; %bb.0:
|
|
; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX7-SDAG-NEXT: s_mov_b32 s6, 0
|
|
; GFX7-SDAG-NEXT: s_mov_b32 s7, 0xf000
|
|
; GFX7-SDAG-NEXT: s_mov_b32 s4, s6
|
|
; GFX7-SDAG-NEXT: s_mov_b32 s5, s6
|
|
; GFX7-SDAG-NEXT: buffer_load_dwordx2 v[0:1], v[0:1], s[4:7], 0 addr64
|
|
; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX7-SDAG-NEXT: buffer_store_dwordx2 v[0:1], v[2:3], s[4:7], 0 addr64
|
|
; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX7-SDAG-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX7-GISEL-LABEL: freeze_v4i16:
|
|
; GFX7-GISEL: ; %bb.0:
|
|
; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX7-GISEL-NEXT: s_mov_b32 s6, 0
|
|
; GFX7-GISEL-NEXT: s_mov_b32 s7, 0xf000
|
|
; GFX7-GISEL-NEXT: s_mov_b64 s[4:5], 0
|
|
; GFX7-GISEL-NEXT: buffer_load_dwordx2 v[0:1], v[0:1], s[4:7], 0 addr64
|
|
; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX7-GISEL-NEXT: buffer_store_dwordx2 v[0:1], v[2:3], s[4:7], 0 addr64
|
|
; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX7-GISEL-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX8-LABEL: freeze_v4i16:
|
|
; GFX8: ; %bb.0:
|
|
; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX8-NEXT: flat_load_dwordx2 v[0:1], v[0:1]
|
|
; GFX8-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX8-NEXT: flat_store_dwordx2 v[2:3], v[0:1]
|
|
; GFX8-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX8-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX9-LABEL: freeze_v4i16:
|
|
; GFX9: ; %bb.0:
|
|
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX9-NEXT: global_load_dwordx2 v[0:1], v[0:1], off
|
|
; GFX9-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX9-NEXT: global_store_dwordx2 v[2:3], v[0:1], off
|
|
; GFX9-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX9-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX10-LABEL: freeze_v4i16:
|
|
; GFX10: ; %bb.0:
|
|
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX10-NEXT: global_load_dwordx2 v[0:1], v[0:1], off
|
|
; GFX10-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX10-NEXT: global_store_dwordx2 v[2:3], v[0:1], off
|
|
; GFX10-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX11-LABEL: freeze_v4i16:
|
|
; GFX11: ; %bb.0:
|
|
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX11-NEXT: global_load_b64 v[0:1], v[0:1], off
|
|
; GFX11-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX11-NEXT: global_store_b64 v[2:3], v[0:1], off
|
|
; GFX11-NEXT: s_setpc_b64 s[30:31]
|
|
%a = load <4 x i16>, ptr addrspace(1) %ptra
|
|
%freeze = freeze <4 x i16> %a
|
|
store <4 x i16> %freeze, ptr addrspace(1) %ptrb
|
|
ret void
|
|
}
|
|
|
|
define void @freeze_v8i16(ptr addrspace(1) %ptra, ptr addrspace(1) %ptrb) {
|
|
; GFX6-SDAG-LABEL: freeze_v8i16:
|
|
; GFX6-SDAG: ; %bb.0:
|
|
; GFX6-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX6-SDAG-NEXT: s_mov_b32 s6, 0
|
|
; GFX6-SDAG-NEXT: s_mov_b32 s7, 0xf000
|
|
; GFX6-SDAG-NEXT: s_mov_b32 s4, s6
|
|
; GFX6-SDAG-NEXT: s_mov_b32 s5, s6
|
|
; GFX6-SDAG-NEXT: buffer_load_dwordx4 v[4:7], v[0:1], s[4:7], 0 addr64
|
|
; GFX6-SDAG-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX6-SDAG-NEXT: buffer_store_dwordx4 v[4:7], v[2:3], s[4:7], 0 addr64
|
|
; GFX6-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0)
|
|
; GFX6-SDAG-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX6-GISEL-LABEL: freeze_v8i16:
|
|
; GFX6-GISEL: ; %bb.0:
|
|
; GFX6-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX6-GISEL-NEXT: s_mov_b32 s6, 0
|
|
; GFX6-GISEL-NEXT: s_mov_b32 s7, 0xf000
|
|
; GFX6-GISEL-NEXT: s_mov_b64 s[4:5], 0
|
|
; GFX6-GISEL-NEXT: buffer_load_dwordx4 v[4:7], v[0:1], s[4:7], 0 addr64
|
|
; GFX6-GISEL-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX6-GISEL-NEXT: buffer_store_dwordx4 v[4:7], v[2:3], s[4:7], 0 addr64
|
|
; GFX6-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0)
|
|
; GFX6-GISEL-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX7-SDAG-LABEL: freeze_v8i16:
|
|
; GFX7-SDAG: ; %bb.0:
|
|
; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX7-SDAG-NEXT: s_mov_b32 s6, 0
|
|
; GFX7-SDAG-NEXT: s_mov_b32 s7, 0xf000
|
|
; GFX7-SDAG-NEXT: s_mov_b32 s4, s6
|
|
; GFX7-SDAG-NEXT: s_mov_b32 s5, s6
|
|
; GFX7-SDAG-NEXT: buffer_load_dwordx4 v[4:7], v[0:1], s[4:7], 0 addr64
|
|
; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX7-SDAG-NEXT: buffer_store_dwordx4 v[4:7], v[2:3], s[4:7], 0 addr64
|
|
; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX7-SDAG-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX7-GISEL-LABEL: freeze_v8i16:
|
|
; GFX7-GISEL: ; %bb.0:
|
|
; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX7-GISEL-NEXT: s_mov_b32 s6, 0
|
|
; GFX7-GISEL-NEXT: s_mov_b32 s7, 0xf000
|
|
; GFX7-GISEL-NEXT: s_mov_b64 s[4:5], 0
|
|
; GFX7-GISEL-NEXT: buffer_load_dwordx4 v[4:7], v[0:1], s[4:7], 0 addr64
|
|
; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX7-GISEL-NEXT: buffer_store_dwordx4 v[4:7], v[2:3], s[4:7], 0 addr64
|
|
; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX7-GISEL-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX8-LABEL: freeze_v8i16:
|
|
; GFX8: ; %bb.0:
|
|
; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX8-NEXT: flat_load_dwordx4 v[4:7], v[0:1]
|
|
; GFX8-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX8-NEXT: flat_store_dwordx4 v[2:3], v[4:7]
|
|
; GFX8-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX8-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX9-LABEL: freeze_v8i16:
|
|
; GFX9: ; %bb.0:
|
|
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX9-NEXT: global_load_dwordx4 v[4:7], v[0:1], off
|
|
; GFX9-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX9-NEXT: global_store_dwordx4 v[2:3], v[4:7], off
|
|
; GFX9-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX9-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX10-LABEL: freeze_v8i16:
|
|
; GFX10: ; %bb.0:
|
|
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX10-NEXT: global_load_dwordx4 v[4:7], v[0:1], off
|
|
; GFX10-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX10-NEXT: global_store_dwordx4 v[2:3], v[4:7], off
|
|
; GFX10-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX11-LABEL: freeze_v8i16:
|
|
; GFX11: ; %bb.0:
|
|
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX11-NEXT: global_load_b128 v[4:7], v[0:1], off
|
|
; GFX11-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX11-NEXT: global_store_b128 v[2:3], v[4:7], off
|
|
; GFX11-NEXT: s_setpc_b64 s[30:31]
|
|
%a = load <8 x i16>, ptr addrspace(1) %ptra
|
|
%freeze = freeze <8 x i16> %a
|
|
store <8 x i16> %freeze, ptr addrspace(1) %ptrb
|
|
ret void
|
|
}
|
|
|
|
define void @freeze_v16i16(ptr addrspace(1) %ptra, ptr addrspace(1) %ptrb) {
|
|
; GFX6-SDAG-LABEL: freeze_v16i16:
|
|
; GFX6-SDAG: ; %bb.0:
|
|
; GFX6-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX6-SDAG-NEXT: s_mov_b32 s6, 0
|
|
; GFX6-SDAG-NEXT: s_mov_b32 s7, 0xf000
|
|
; GFX6-SDAG-NEXT: s_mov_b32 s4, s6
|
|
; GFX6-SDAG-NEXT: s_mov_b32 s5, s6
|
|
; GFX6-SDAG-NEXT: buffer_load_dwordx4 v[4:7], v[0:1], s[4:7], 0 addr64 offset:16
|
|
; GFX6-SDAG-NEXT: buffer_load_dwordx4 v[8:11], v[0:1], s[4:7], 0 addr64
|
|
; GFX6-SDAG-NEXT: s_waitcnt vmcnt(1)
|
|
; GFX6-SDAG-NEXT: buffer_store_dwordx4 v[4:7], v[2:3], s[4:7], 0 addr64 offset:16
|
|
; GFX6-SDAG-NEXT: s_waitcnt vmcnt(1)
|
|
; GFX6-SDAG-NEXT: buffer_store_dwordx4 v[8:11], v[2:3], s[4:7], 0 addr64
|
|
; GFX6-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0)
|
|
; GFX6-SDAG-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX6-GISEL-LABEL: freeze_v16i16:
|
|
; GFX6-GISEL: ; %bb.0:
|
|
; GFX6-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX6-GISEL-NEXT: s_mov_b32 s6, 0
|
|
; GFX6-GISEL-NEXT: s_mov_b32 s7, 0xf000
|
|
; GFX6-GISEL-NEXT: s_mov_b64 s[4:5], 0
|
|
; GFX6-GISEL-NEXT: buffer_load_dwordx4 v[4:7], v[0:1], s[4:7], 0 addr64
|
|
; GFX6-GISEL-NEXT: buffer_load_dwordx4 v[8:11], v[0:1], s[4:7], 0 addr64 offset:16
|
|
; GFX6-GISEL-NEXT: s_waitcnt vmcnt(1)
|
|
; GFX6-GISEL-NEXT: buffer_store_dwordx4 v[4:7], v[2:3], s[4:7], 0 addr64
|
|
; GFX6-GISEL-NEXT: s_waitcnt vmcnt(1)
|
|
; GFX6-GISEL-NEXT: buffer_store_dwordx4 v[8:11], v[2:3], s[4:7], 0 addr64 offset:16
|
|
; GFX6-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0)
|
|
; GFX6-GISEL-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX7-SDAG-LABEL: freeze_v16i16:
|
|
; GFX7-SDAG: ; %bb.0:
|
|
; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX7-SDAG-NEXT: s_mov_b32 s6, 0
|
|
; GFX7-SDAG-NEXT: s_mov_b32 s7, 0xf000
|
|
; GFX7-SDAG-NEXT: s_mov_b32 s4, s6
|
|
; GFX7-SDAG-NEXT: s_mov_b32 s5, s6
|
|
; GFX7-SDAG-NEXT: buffer_load_dwordx4 v[4:7], v[0:1], s[4:7], 0 addr64 offset:16
|
|
; GFX7-SDAG-NEXT: buffer_load_dwordx4 v[8:11], v[0:1], s[4:7], 0 addr64
|
|
; GFX7-SDAG-NEXT: s_waitcnt vmcnt(1)
|
|
; GFX7-SDAG-NEXT: buffer_store_dwordx4 v[4:7], v[2:3], s[4:7], 0 addr64 offset:16
|
|
; GFX7-SDAG-NEXT: s_waitcnt vmcnt(1)
|
|
; GFX7-SDAG-NEXT: buffer_store_dwordx4 v[8:11], v[2:3], s[4:7], 0 addr64
|
|
; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX7-SDAG-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX7-GISEL-LABEL: freeze_v16i16:
|
|
; GFX7-GISEL: ; %bb.0:
|
|
; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX7-GISEL-NEXT: s_mov_b32 s6, 0
|
|
; GFX7-GISEL-NEXT: s_mov_b32 s7, 0xf000
|
|
; GFX7-GISEL-NEXT: s_mov_b64 s[4:5], 0
|
|
; GFX7-GISEL-NEXT: buffer_load_dwordx4 v[4:7], v[0:1], s[4:7], 0 addr64
|
|
; GFX7-GISEL-NEXT: buffer_load_dwordx4 v[8:11], v[0:1], s[4:7], 0 addr64 offset:16
|
|
; GFX7-GISEL-NEXT: s_waitcnt vmcnt(1)
|
|
; GFX7-GISEL-NEXT: buffer_store_dwordx4 v[4:7], v[2:3], s[4:7], 0 addr64
|
|
; GFX7-GISEL-NEXT: s_waitcnt vmcnt(1)
|
|
; GFX7-GISEL-NEXT: buffer_store_dwordx4 v[8:11], v[2:3], s[4:7], 0 addr64 offset:16
|
|
; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX7-GISEL-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX8-GISEL-LABEL: freeze_v16i16:
|
|
; GFX8-GISEL: ; %bb.0:
|
|
; GFX8-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX8-GISEL-NEXT: flat_load_dwordx4 v[4:7], v[0:1]
|
|
; GFX8-GISEL-NEXT: v_add_u32_e32 v0, vcc, 16, v0
|
|
; GFX8-GISEL-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc
|
|
; GFX8-GISEL-NEXT: flat_load_dwordx4 v[8:11], v[0:1]
|
|
; GFX8-GISEL-NEXT: v_add_u32_e32 v0, vcc, 16, v2
|
|
; GFX8-GISEL-NEXT: v_addc_u32_e32 v1, vcc, 0, v3, vcc
|
|
; GFX8-GISEL-NEXT: s_waitcnt vmcnt(1)
|
|
; GFX8-GISEL-NEXT: flat_store_dwordx4 v[2:3], v[4:7]
|
|
; GFX8-GISEL-NEXT: s_waitcnt vmcnt(1)
|
|
; GFX8-GISEL-NEXT: flat_store_dwordx4 v[0:1], v[8:11]
|
|
; GFX8-GISEL-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX8-GISEL-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX9-GISEL-LABEL: freeze_v16i16:
|
|
; GFX9-GISEL: ; %bb.0:
|
|
; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX9-GISEL-NEXT: global_load_dwordx4 v[4:7], v[0:1], off
|
|
; GFX9-GISEL-NEXT: global_load_dwordx4 v[8:11], v[0:1], off offset:16
|
|
; GFX9-GISEL-NEXT: s_waitcnt vmcnt(1)
|
|
; GFX9-GISEL-NEXT: global_store_dwordx4 v[2:3], v[4:7], off
|
|
; GFX9-GISEL-NEXT: s_waitcnt vmcnt(1)
|
|
; GFX9-GISEL-NEXT: global_store_dwordx4 v[2:3], v[8:11], off offset:16
|
|
; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX10-SDAG-LABEL: freeze_v16i16:
|
|
; GFX10-SDAG: ; %bb.0:
|
|
; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX10-SDAG-NEXT: s_clause 0x1
|
|
; GFX10-SDAG-NEXT: global_load_dwordx4 v[4:7], v[0:1], off offset:16
|
|
; GFX10-SDAG-NEXT: global_load_dwordx4 v[8:11], v[0:1], off
|
|
; GFX10-SDAG-NEXT: s_waitcnt vmcnt(1)
|
|
; GFX10-SDAG-NEXT: global_store_dwordx4 v[2:3], v[4:7], off offset:16
|
|
; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX10-SDAG-NEXT: global_store_dwordx4 v[2:3], v[8:11], off
|
|
; GFX10-SDAG-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX10-GISEL-LABEL: freeze_v16i16:
|
|
; GFX10-GISEL: ; %bb.0:
|
|
; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX10-GISEL-NEXT: s_clause 0x1
|
|
; GFX10-GISEL-NEXT: global_load_dwordx4 v[4:7], v[0:1], off
|
|
; GFX10-GISEL-NEXT: global_load_dwordx4 v[8:11], v[0:1], off offset:16
|
|
; GFX10-GISEL-NEXT: s_waitcnt vmcnt(1)
|
|
; GFX10-GISEL-NEXT: global_store_dwordx4 v[2:3], v[4:7], off
|
|
; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX10-GISEL-NEXT: global_store_dwordx4 v[2:3], v[8:11], off offset:16
|
|
; GFX10-GISEL-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX11-SDAG-LABEL: freeze_v16i16:
|
|
; GFX11-SDAG: ; %bb.0:
|
|
; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX11-SDAG-NEXT: s_clause 0x1
|
|
; GFX11-SDAG-NEXT: global_load_b128 v[4:7], v[0:1], off offset:16
|
|
; GFX11-SDAG-NEXT: global_load_b128 v[8:11], v[0:1], off
|
|
; GFX11-SDAG-NEXT: s_waitcnt vmcnt(1)
|
|
; GFX11-SDAG-NEXT: global_store_b128 v[2:3], v[4:7], off offset:16
|
|
; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX11-SDAG-NEXT: global_store_b128 v[2:3], v[8:11], off
|
|
; GFX11-SDAG-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX11-GISEL-LABEL: freeze_v16i16:
|
|
; GFX11-GISEL: ; %bb.0:
|
|
; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX11-GISEL-NEXT: s_clause 0x1
|
|
; GFX11-GISEL-NEXT: global_load_b128 v[4:7], v[0:1], off
|
|
; GFX11-GISEL-NEXT: global_load_b128 v[8:11], v[0:1], off offset:16
|
|
; GFX11-GISEL-NEXT: s_waitcnt vmcnt(1)
|
|
; GFX11-GISEL-NEXT: global_store_b128 v[2:3], v[4:7], off
|
|
; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX11-GISEL-NEXT: global_store_b128 v[2:3], v[8:11], off offset:16
|
|
; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31]
|
|
%a = load <16 x i16>, ptr addrspace(1) %ptra
|
|
%freeze = freeze <16 x i16> %a
|
|
store <16 x i16> %freeze, ptr addrspace(1) %ptrb
|
|
ret void
|
|
}
|
|
|
|
define void @freeze_f16(ptr addrspace(1) %ptra, ptr addrspace(1) %ptrb) {
|
|
; GFX6-SDAG-LABEL: freeze_f16:
|
|
; GFX6-SDAG: ; %bb.0:
|
|
; GFX6-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX6-SDAG-NEXT: s_mov_b32 s6, 0
|
|
; GFX6-SDAG-NEXT: s_mov_b32 s7, 0xf000
|
|
; GFX6-SDAG-NEXT: s_mov_b32 s4, s6
|
|
; GFX6-SDAG-NEXT: s_mov_b32 s5, s6
|
|
; GFX6-SDAG-NEXT: buffer_load_ushort v0, v[0:1], s[4:7], 0 addr64
|
|
; GFX6-SDAG-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX6-SDAG-NEXT: buffer_store_short v0, v[2:3], s[4:7], 0 addr64
|
|
; GFX6-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0)
|
|
; GFX6-SDAG-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX6-GISEL-LABEL: freeze_f16:
|
|
; GFX6-GISEL: ; %bb.0:
|
|
; GFX6-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX6-GISEL-NEXT: s_mov_b32 s6, 0
|
|
; GFX6-GISEL-NEXT: s_mov_b32 s7, 0xf000
|
|
; GFX6-GISEL-NEXT: s_mov_b64 s[4:5], 0
|
|
; GFX6-GISEL-NEXT: buffer_load_ushort v0, v[0:1], s[4:7], 0 addr64
|
|
; GFX6-GISEL-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX6-GISEL-NEXT: buffer_store_short v0, v[2:3], s[4:7], 0 addr64
|
|
; GFX6-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0)
|
|
; GFX6-GISEL-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX7-SDAG-LABEL: freeze_f16:
|
|
; GFX7-SDAG: ; %bb.0:
|
|
; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX7-SDAG-NEXT: s_mov_b32 s6, 0
|
|
; GFX7-SDAG-NEXT: s_mov_b32 s7, 0xf000
|
|
; GFX7-SDAG-NEXT: s_mov_b32 s4, s6
|
|
; GFX7-SDAG-NEXT: s_mov_b32 s5, s6
|
|
; GFX7-SDAG-NEXT: buffer_load_ushort v0, v[0:1], s[4:7], 0 addr64
|
|
; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX7-SDAG-NEXT: buffer_store_short v0, v[2:3], s[4:7], 0 addr64
|
|
; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX7-SDAG-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX7-GISEL-LABEL: freeze_f16:
|
|
; GFX7-GISEL: ; %bb.0:
|
|
; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX7-GISEL-NEXT: s_mov_b32 s6, 0
|
|
; GFX7-GISEL-NEXT: s_mov_b32 s7, 0xf000
|
|
; GFX7-GISEL-NEXT: s_mov_b64 s[4:5], 0
|
|
; GFX7-GISEL-NEXT: buffer_load_ushort v0, v[0:1], s[4:7], 0 addr64
|
|
; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX7-GISEL-NEXT: buffer_store_short v0, v[2:3], s[4:7], 0 addr64
|
|
; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX7-GISEL-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX8-LABEL: freeze_f16:
|
|
; GFX8: ; %bb.0:
|
|
; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX8-NEXT: flat_load_ushort v0, v[0:1]
|
|
; GFX8-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX8-NEXT: flat_store_short v[2:3], v0
|
|
; GFX8-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX8-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX9-LABEL: freeze_f16:
|
|
; GFX9: ; %bb.0:
|
|
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX9-NEXT: global_load_ushort v0, v[0:1], off
|
|
; GFX9-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX9-NEXT: global_store_short v[2:3], v0, off
|
|
; GFX9-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX9-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX10-LABEL: freeze_f16:
|
|
; GFX10: ; %bb.0:
|
|
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX10-NEXT: global_load_ushort v0, v[0:1], off
|
|
; GFX10-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX10-NEXT: global_store_short v[2:3], v0, off
|
|
; GFX10-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX11-SDAG-TRUE16-LABEL: freeze_f16:
|
|
; GFX11-SDAG-TRUE16: ; %bb.0:
|
|
; GFX11-SDAG-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX11-SDAG-TRUE16-NEXT: global_load_d16_b16 v0, v[0:1], off
|
|
; GFX11-SDAG-TRUE16-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX11-SDAG-TRUE16-NEXT: global_store_b16 v[2:3], v0, off
|
|
; GFX11-SDAG-TRUE16-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX11-SDAG-FAKE16-LABEL: freeze_f16:
|
|
; GFX11-SDAG-FAKE16: ; %bb.0:
|
|
; GFX11-SDAG-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX11-SDAG-FAKE16-NEXT: global_load_u16 v0, v[0:1], off
|
|
; GFX11-SDAG-FAKE16-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX11-SDAG-FAKE16-NEXT: global_store_b16 v[2:3], v0, off
|
|
; GFX11-SDAG-FAKE16-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX11-GISEL-LABEL: freeze_f16:
|
|
; GFX11-GISEL: ; %bb.0:
|
|
; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX11-GISEL-NEXT: global_load_u16 v0, v[0:1], off
|
|
; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX11-GISEL-NEXT: global_store_b16 v[2:3], v0, off
|
|
; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31]
|
|
%a = load half, ptr addrspace(1) %ptra
|
|
%freeze = freeze half %a
|
|
store half %freeze, ptr addrspace(1) %ptrb
|
|
ret void
|
|
}
|
|
|
|
define void @freeze_v2f16(ptr addrspace(1) %ptra, ptr addrspace(1) %ptrb) {
|
|
; GFX6-SDAG-LABEL: freeze_v2f16:
|
|
; GFX6-SDAG: ; %bb.0:
|
|
; GFX6-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX6-SDAG-NEXT: s_mov_b32 s6, 0
|
|
; GFX6-SDAG-NEXT: s_mov_b32 s7, 0xf000
|
|
; GFX6-SDAG-NEXT: s_mov_b32 s4, s6
|
|
; GFX6-SDAG-NEXT: s_mov_b32 s5, s6
|
|
; GFX6-SDAG-NEXT: buffer_load_dword v0, v[0:1], s[4:7], 0 addr64
|
|
; GFX6-SDAG-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX6-SDAG-NEXT: buffer_store_dword v0, v[2:3], s[4:7], 0 addr64
|
|
; GFX6-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0)
|
|
; GFX6-SDAG-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX6-GISEL-LABEL: freeze_v2f16:
|
|
; GFX6-GISEL: ; %bb.0:
|
|
; GFX6-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX6-GISEL-NEXT: s_mov_b32 s6, 0
|
|
; GFX6-GISEL-NEXT: s_mov_b32 s7, 0xf000
|
|
; GFX6-GISEL-NEXT: s_mov_b64 s[4:5], 0
|
|
; GFX6-GISEL-NEXT: buffer_load_dword v0, v[0:1], s[4:7], 0 addr64
|
|
; GFX6-GISEL-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX6-GISEL-NEXT: buffer_store_dword v0, v[2:3], s[4:7], 0 addr64
|
|
; GFX6-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0)
|
|
; GFX6-GISEL-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX7-SDAG-LABEL: freeze_v2f16:
|
|
; GFX7-SDAG: ; %bb.0:
|
|
; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX7-SDAG-NEXT: s_mov_b32 s6, 0
|
|
; GFX7-SDAG-NEXT: s_mov_b32 s7, 0xf000
|
|
; GFX7-SDAG-NEXT: s_mov_b32 s4, s6
|
|
; GFX7-SDAG-NEXT: s_mov_b32 s5, s6
|
|
; GFX7-SDAG-NEXT: buffer_load_dword v0, v[0:1], s[4:7], 0 addr64
|
|
; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX7-SDAG-NEXT: buffer_store_dword v0, v[2:3], s[4:7], 0 addr64
|
|
; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX7-SDAG-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX7-GISEL-LABEL: freeze_v2f16:
|
|
; GFX7-GISEL: ; %bb.0:
|
|
; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX7-GISEL-NEXT: s_mov_b32 s6, 0
|
|
; GFX7-GISEL-NEXT: s_mov_b32 s7, 0xf000
|
|
; GFX7-GISEL-NEXT: s_mov_b64 s[4:5], 0
|
|
; GFX7-GISEL-NEXT: buffer_load_dword v0, v[0:1], s[4:7], 0 addr64
|
|
; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX7-GISEL-NEXT: buffer_store_dword v0, v[2:3], s[4:7], 0 addr64
|
|
; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX7-GISEL-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX8-LABEL: freeze_v2f16:
|
|
; GFX8: ; %bb.0:
|
|
; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX8-NEXT: flat_load_dword v0, v[0:1]
|
|
; GFX8-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX8-NEXT: flat_store_dword v[2:3], v0
|
|
; GFX8-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX8-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX9-LABEL: freeze_v2f16:
|
|
; GFX9: ; %bb.0:
|
|
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX9-NEXT: global_load_dword v0, v[0:1], off
|
|
; GFX9-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX9-NEXT: global_store_dword v[2:3], v0, off
|
|
; GFX9-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX9-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX10-LABEL: freeze_v2f16:
|
|
; GFX10: ; %bb.0:
|
|
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX10-NEXT: global_load_dword v0, v[0:1], off
|
|
; GFX10-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX10-NEXT: global_store_dword v[2:3], v0, off
|
|
; GFX10-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX11-LABEL: freeze_v2f16:
|
|
; GFX11: ; %bb.0:
|
|
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX11-NEXT: global_load_b32 v0, v[0:1], off
|
|
; GFX11-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX11-NEXT: global_store_b32 v[2:3], v0, off
|
|
; GFX11-NEXT: s_setpc_b64 s[30:31]
|
|
%a = load <2 x half>, ptr addrspace(1) %ptra
|
|
%freeze = freeze <2 x half> %a
|
|
store <2 x half> %freeze, ptr addrspace(1) %ptrb
|
|
ret void
|
|
}
|
|
|
|
define void @freeze_v3f16(ptr addrspace(1) %ptra, ptr addrspace(1) %ptrb) {
|
|
; GFX6-SDAG-LABEL: freeze_v3f16:
|
|
; GFX6-SDAG: ; %bb.0:
|
|
; GFX6-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX6-SDAG-NEXT: s_mov_b32 s6, 0
|
|
; GFX6-SDAG-NEXT: s_mov_b32 s7, 0xf000
|
|
; GFX6-SDAG-NEXT: s_mov_b32 s4, s6
|
|
; GFX6-SDAG-NEXT: s_mov_b32 s5, s6
|
|
; GFX6-SDAG-NEXT: buffer_load_dwordx2 v[0:1], v[0:1], s[4:7], 0 addr64
|
|
; GFX6-SDAG-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX6-SDAG-NEXT: buffer_store_short v1, v[2:3], s[4:7], 0 addr64 offset:4
|
|
; GFX6-SDAG-NEXT: buffer_store_dword v0, v[2:3], s[4:7], 0 addr64
|
|
; GFX6-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0)
|
|
; GFX6-SDAG-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX6-GISEL-LABEL: freeze_v3f16:
|
|
; GFX6-GISEL: ; %bb.0:
|
|
; GFX6-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX6-GISEL-NEXT: s_mov_b32 s6, 0
|
|
; GFX6-GISEL-NEXT: s_mov_b32 s7, 0xf000
|
|
; GFX6-GISEL-NEXT: s_mov_b64 s[4:5], 0
|
|
; GFX6-GISEL-NEXT: buffer_load_dwordx2 v[0:1], v[0:1], s[4:7], 0 addr64
|
|
; GFX6-GISEL-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX6-GISEL-NEXT: v_lshrrev_b32_e32 v4, 16, v0
|
|
; GFX6-GISEL-NEXT: buffer_store_short v0, v[2:3], s[4:7], 0 addr64
|
|
; GFX6-GISEL-NEXT: buffer_store_short v4, v[2:3], s[4:7], 0 addr64 offset:2
|
|
; GFX6-GISEL-NEXT: buffer_store_short v1, v[2:3], s[4:7], 0 addr64 offset:4
|
|
; GFX6-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0)
|
|
; GFX6-GISEL-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX7-SDAG-LABEL: freeze_v3f16:
|
|
; GFX7-SDAG: ; %bb.0:
|
|
; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX7-SDAG-NEXT: s_mov_b32 s6, 0
|
|
; GFX7-SDAG-NEXT: s_mov_b32 s7, 0xf000
|
|
; GFX7-SDAG-NEXT: s_mov_b32 s4, s6
|
|
; GFX7-SDAG-NEXT: s_mov_b32 s5, s6
|
|
; GFX7-SDAG-NEXT: buffer_load_dwordx2 v[0:1], v[0:1], s[4:7], 0 addr64
|
|
; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX7-SDAG-NEXT: buffer_store_short v1, v[2:3], s[4:7], 0 addr64 offset:4
|
|
; GFX7-SDAG-NEXT: buffer_store_dword v0, v[2:3], s[4:7], 0 addr64
|
|
; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX7-SDAG-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX7-GISEL-LABEL: freeze_v3f16:
|
|
; GFX7-GISEL: ; %bb.0:
|
|
; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX7-GISEL-NEXT: s_mov_b32 s6, 0
|
|
; GFX7-GISEL-NEXT: s_mov_b32 s7, 0xf000
|
|
; GFX7-GISEL-NEXT: s_mov_b64 s[4:5], 0
|
|
; GFX7-GISEL-NEXT: buffer_load_dwordx2 v[0:1], v[0:1], s[4:7], 0 addr64
|
|
; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX7-GISEL-NEXT: v_lshrrev_b32_e32 v4, 16, v0
|
|
; GFX7-GISEL-NEXT: buffer_store_short v0, v[2:3], s[4:7], 0 addr64
|
|
; GFX7-GISEL-NEXT: buffer_store_short v4, v[2:3], s[4:7], 0 addr64 offset:2
|
|
; GFX7-GISEL-NEXT: buffer_store_short v1, v[2:3], s[4:7], 0 addr64 offset:4
|
|
; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX7-GISEL-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX8-GISEL-LABEL: freeze_v3f16:
|
|
; GFX8-GISEL: ; %bb.0:
|
|
; GFX8-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX8-GISEL-NEXT: flat_load_dwordx2 v[0:1], v[0:1]
|
|
; GFX8-GISEL-NEXT: v_add_u32_e32 v4, vcc, 2, v2
|
|
; GFX8-GISEL-NEXT: v_addc_u32_e32 v5, vcc, 0, v3, vcc
|
|
; GFX8-GISEL-NEXT: v_add_u32_e32 v6, vcc, 4, v2
|
|
; GFX8-GISEL-NEXT: v_addc_u32_e32 v7, vcc, 0, v3, vcc
|
|
; GFX8-GISEL-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX8-GISEL-NEXT: v_lshrrev_b32_e32 v8, 16, v0
|
|
; GFX8-GISEL-NEXT: flat_store_short v[2:3], v0
|
|
; GFX8-GISEL-NEXT: flat_store_short v[4:5], v8
|
|
; GFX8-GISEL-NEXT: flat_store_short v[6:7], v1
|
|
; GFX8-GISEL-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX8-GISEL-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX9-GISEL-LABEL: freeze_v3f16:
|
|
; GFX9-GISEL: ; %bb.0:
|
|
; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX9-GISEL-NEXT: global_load_dwordx2 v[0:1], v[0:1], off
|
|
; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX9-GISEL-NEXT: global_store_short v[2:3], v0, off
|
|
; GFX9-GISEL-NEXT: global_store_short_d16_hi v[2:3], v0, off offset:2
|
|
; GFX9-GISEL-NEXT: global_store_short v[2:3], v1, off offset:4
|
|
; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX10-SDAG-LABEL: freeze_v3f16:
|
|
; GFX10-SDAG: ; %bb.0:
|
|
; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX10-SDAG-NEXT: global_load_dwordx2 v[0:1], v[0:1], off
|
|
; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX10-SDAG-NEXT: global_store_short v[2:3], v1, off offset:4
|
|
; GFX10-SDAG-NEXT: global_store_dword v[2:3], v0, off
|
|
; GFX10-SDAG-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX10-GISEL-LABEL: freeze_v3f16:
|
|
; GFX10-GISEL: ; %bb.0:
|
|
; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX10-GISEL-NEXT: global_load_dwordx2 v[0:1], v[0:1], off
|
|
; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX10-GISEL-NEXT: global_store_short v[2:3], v0, off
|
|
; GFX10-GISEL-NEXT: global_store_short_d16_hi v[2:3], v0, off offset:2
|
|
; GFX10-GISEL-NEXT: global_store_short v[2:3], v1, off offset:4
|
|
; GFX10-GISEL-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX11-SDAG-LABEL: freeze_v3f16:
|
|
; GFX11-SDAG: ; %bb.0:
|
|
; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX11-SDAG-NEXT: global_load_b64 v[0:1], v[0:1], off
|
|
; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX11-SDAG-NEXT: s_clause 0x1
|
|
; GFX11-SDAG-NEXT: global_store_b16 v[2:3], v1, off offset:4
|
|
; GFX11-SDAG-NEXT: global_store_b32 v[2:3], v0, off
|
|
; GFX11-SDAG-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX11-GISEL-LABEL: freeze_v3f16:
|
|
; GFX11-GISEL: ; %bb.0:
|
|
; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX11-GISEL-NEXT: global_load_b64 v[0:1], v[0:1], off
|
|
; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX11-GISEL-NEXT: s_clause 0x2
|
|
; GFX11-GISEL-NEXT: global_store_b16 v[2:3], v0, off
|
|
; GFX11-GISEL-NEXT: global_store_d16_hi_b16 v[2:3], v0, off offset:2
|
|
; GFX11-GISEL-NEXT: global_store_b16 v[2:3], v1, off offset:4
|
|
; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31]
|
|
%a = load <3 x half>, ptr addrspace(1) %ptra
|
|
%freeze = freeze <3 x half> %a
|
|
store <3 x half> %freeze, ptr addrspace(1) %ptrb
|
|
ret void
|
|
}
|
|
|
|
define void @freeze_v4f16(ptr addrspace(1) %ptra, ptr addrspace(1) %ptrb) {
|
|
; GFX6-SDAG-LABEL: freeze_v4f16:
|
|
; GFX6-SDAG: ; %bb.0:
|
|
; GFX6-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX6-SDAG-NEXT: s_mov_b32 s6, 0
|
|
; GFX6-SDAG-NEXT: s_mov_b32 s7, 0xf000
|
|
; GFX6-SDAG-NEXT: s_mov_b32 s4, s6
|
|
; GFX6-SDAG-NEXT: s_mov_b32 s5, s6
|
|
; GFX6-SDAG-NEXT: buffer_load_dwordx2 v[0:1], v[0:1], s[4:7], 0 addr64
|
|
; GFX6-SDAG-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX6-SDAG-NEXT: buffer_store_dwordx2 v[0:1], v[2:3], s[4:7], 0 addr64
|
|
; GFX6-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0)
|
|
; GFX6-SDAG-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX6-GISEL-LABEL: freeze_v4f16:
|
|
; GFX6-GISEL: ; %bb.0:
|
|
; GFX6-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX6-GISEL-NEXT: s_mov_b32 s6, 0
|
|
; GFX6-GISEL-NEXT: s_mov_b32 s7, 0xf000
|
|
; GFX6-GISEL-NEXT: s_mov_b64 s[4:5], 0
|
|
; GFX6-GISEL-NEXT: buffer_load_dwordx2 v[0:1], v[0:1], s[4:7], 0 addr64
|
|
; GFX6-GISEL-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX6-GISEL-NEXT: buffer_store_dwordx2 v[0:1], v[2:3], s[4:7], 0 addr64
|
|
; GFX6-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0)
|
|
; GFX6-GISEL-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX7-SDAG-LABEL: freeze_v4f16:
|
|
; GFX7-SDAG: ; %bb.0:
|
|
; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX7-SDAG-NEXT: s_mov_b32 s6, 0
|
|
; GFX7-SDAG-NEXT: s_mov_b32 s7, 0xf000
|
|
; GFX7-SDAG-NEXT: s_mov_b32 s4, s6
|
|
; GFX7-SDAG-NEXT: s_mov_b32 s5, s6
|
|
; GFX7-SDAG-NEXT: buffer_load_dwordx2 v[0:1], v[0:1], s[4:7], 0 addr64
|
|
; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX7-SDAG-NEXT: buffer_store_dwordx2 v[0:1], v[2:3], s[4:7], 0 addr64
|
|
; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX7-SDAG-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX7-GISEL-LABEL: freeze_v4f16:
|
|
; GFX7-GISEL: ; %bb.0:
|
|
; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX7-GISEL-NEXT: s_mov_b32 s6, 0
|
|
; GFX7-GISEL-NEXT: s_mov_b32 s7, 0xf000
|
|
; GFX7-GISEL-NEXT: s_mov_b64 s[4:5], 0
|
|
; GFX7-GISEL-NEXT: buffer_load_dwordx2 v[0:1], v[0:1], s[4:7], 0 addr64
|
|
; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX7-GISEL-NEXT: buffer_store_dwordx2 v[0:1], v[2:3], s[4:7], 0 addr64
|
|
; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX7-GISEL-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX8-LABEL: freeze_v4f16:
|
|
; GFX8: ; %bb.0:
|
|
; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX8-NEXT: flat_load_dwordx2 v[0:1], v[0:1]
|
|
; GFX8-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX8-NEXT: flat_store_dwordx2 v[2:3], v[0:1]
|
|
; GFX8-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX8-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX9-LABEL: freeze_v4f16:
|
|
; GFX9: ; %bb.0:
|
|
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX9-NEXT: global_load_dwordx2 v[0:1], v[0:1], off
|
|
; GFX9-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX9-NEXT: global_store_dwordx2 v[2:3], v[0:1], off
|
|
; GFX9-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX9-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX10-LABEL: freeze_v4f16:
|
|
; GFX10: ; %bb.0:
|
|
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX10-NEXT: global_load_dwordx2 v[0:1], v[0:1], off
|
|
; GFX10-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX10-NEXT: global_store_dwordx2 v[2:3], v[0:1], off
|
|
; GFX10-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX11-LABEL: freeze_v4f16:
|
|
; GFX11: ; %bb.0:
|
|
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX11-NEXT: global_load_b64 v[0:1], v[0:1], off
|
|
; GFX11-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX11-NEXT: global_store_b64 v[2:3], v[0:1], off
|
|
; GFX11-NEXT: s_setpc_b64 s[30:31]
|
|
%a = load <4 x half>, ptr addrspace(1) %ptra
|
|
%freeze = freeze <4 x half> %a
|
|
store <4 x half> %freeze, ptr addrspace(1) %ptrb
|
|
ret void
|
|
}
|
|
|
|
define void @freeze_v8f16(ptr addrspace(1) %ptra, ptr addrspace(1) %ptrb) {
|
|
; GFX6-SDAG-LABEL: freeze_v8f16:
|
|
; GFX6-SDAG: ; %bb.0:
|
|
; GFX6-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX6-SDAG-NEXT: s_mov_b32 s6, 0
|
|
; GFX6-SDAG-NEXT: s_mov_b32 s7, 0xf000
|
|
; GFX6-SDAG-NEXT: s_mov_b32 s4, s6
|
|
; GFX6-SDAG-NEXT: s_mov_b32 s5, s6
|
|
; GFX6-SDAG-NEXT: buffer_load_dwordx4 v[4:7], v[0:1], s[4:7], 0 addr64
|
|
; GFX6-SDAG-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX6-SDAG-NEXT: buffer_store_dwordx4 v[4:7], v[2:3], s[4:7], 0 addr64
|
|
; GFX6-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0)
|
|
; GFX6-SDAG-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX6-GISEL-LABEL: freeze_v8f16:
|
|
; GFX6-GISEL: ; %bb.0:
|
|
; GFX6-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX6-GISEL-NEXT: s_mov_b32 s6, 0
|
|
; GFX6-GISEL-NEXT: s_mov_b32 s7, 0xf000
|
|
; GFX6-GISEL-NEXT: s_mov_b64 s[4:5], 0
|
|
; GFX6-GISEL-NEXT: buffer_load_dwordx4 v[4:7], v[0:1], s[4:7], 0 addr64
|
|
; GFX6-GISEL-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX6-GISEL-NEXT: buffer_store_dwordx4 v[4:7], v[2:3], s[4:7], 0 addr64
|
|
; GFX6-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0)
|
|
; GFX6-GISEL-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX7-SDAG-LABEL: freeze_v8f16:
|
|
; GFX7-SDAG: ; %bb.0:
|
|
; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX7-SDAG-NEXT: s_mov_b32 s6, 0
|
|
; GFX7-SDAG-NEXT: s_mov_b32 s7, 0xf000
|
|
; GFX7-SDAG-NEXT: s_mov_b32 s4, s6
|
|
; GFX7-SDAG-NEXT: s_mov_b32 s5, s6
|
|
; GFX7-SDAG-NEXT: buffer_load_dwordx4 v[4:7], v[0:1], s[4:7], 0 addr64
|
|
; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX7-SDAG-NEXT: buffer_store_dwordx4 v[4:7], v[2:3], s[4:7], 0 addr64
|
|
; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX7-SDAG-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX7-GISEL-LABEL: freeze_v8f16:
|
|
; GFX7-GISEL: ; %bb.0:
|
|
; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX7-GISEL-NEXT: s_mov_b32 s6, 0
|
|
; GFX7-GISEL-NEXT: s_mov_b32 s7, 0xf000
|
|
; GFX7-GISEL-NEXT: s_mov_b64 s[4:5], 0
|
|
; GFX7-GISEL-NEXT: buffer_load_dwordx4 v[4:7], v[0:1], s[4:7], 0 addr64
|
|
; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX7-GISEL-NEXT: buffer_store_dwordx4 v[4:7], v[2:3], s[4:7], 0 addr64
|
|
; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX7-GISEL-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX8-LABEL: freeze_v8f16:
|
|
; GFX8: ; %bb.0:
|
|
; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX8-NEXT: flat_load_dwordx4 v[4:7], v[0:1]
|
|
; GFX8-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX8-NEXT: flat_store_dwordx4 v[2:3], v[4:7]
|
|
; GFX8-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX8-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX9-LABEL: freeze_v8f16:
|
|
; GFX9: ; %bb.0:
|
|
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX9-NEXT: global_load_dwordx4 v[4:7], v[0:1], off
|
|
; GFX9-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX9-NEXT: global_store_dwordx4 v[2:3], v[4:7], off
|
|
; GFX9-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX9-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX10-LABEL: freeze_v8f16:
|
|
; GFX10: ; %bb.0:
|
|
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX10-NEXT: global_load_dwordx4 v[4:7], v[0:1], off
|
|
; GFX10-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX10-NEXT: global_store_dwordx4 v[2:3], v[4:7], off
|
|
; GFX10-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX11-LABEL: freeze_v8f16:
|
|
; GFX11: ; %bb.0:
|
|
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX11-NEXT: global_load_b128 v[4:7], v[0:1], off
|
|
; GFX11-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX11-NEXT: global_store_b128 v[2:3], v[4:7], off
|
|
; GFX11-NEXT: s_setpc_b64 s[30:31]
|
|
%a = load <8 x half>, ptr addrspace(1) %ptra
|
|
%freeze = freeze <8 x half> %a
|
|
store <8 x half> %freeze, ptr addrspace(1) %ptrb
|
|
ret void
|
|
}
|
|
|
|
define void @freeze_v16f16(ptr addrspace(1) %ptra, ptr addrspace(1) %ptrb) {
|
|
; GFX6-SDAG-LABEL: freeze_v16f16:
|
|
; GFX6-SDAG: ; %bb.0:
|
|
; GFX6-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX6-SDAG-NEXT: s_mov_b32 s6, 0
|
|
; GFX6-SDAG-NEXT: s_mov_b32 s7, 0xf000
|
|
; GFX6-SDAG-NEXT: s_mov_b32 s4, s6
|
|
; GFX6-SDAG-NEXT: s_mov_b32 s5, s6
|
|
; GFX6-SDAG-NEXT: buffer_load_dwordx4 v[4:7], v[0:1], s[4:7], 0 addr64 offset:16
|
|
; GFX6-SDAG-NEXT: buffer_load_dwordx4 v[8:11], v[0:1], s[4:7], 0 addr64
|
|
; GFX6-SDAG-NEXT: s_waitcnt vmcnt(1)
|
|
; GFX6-SDAG-NEXT: buffer_store_dwordx4 v[4:7], v[2:3], s[4:7], 0 addr64 offset:16
|
|
; GFX6-SDAG-NEXT: s_waitcnt vmcnt(1)
|
|
; GFX6-SDAG-NEXT: buffer_store_dwordx4 v[8:11], v[2:3], s[4:7], 0 addr64
|
|
; GFX6-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0)
|
|
; GFX6-SDAG-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX6-GISEL-LABEL: freeze_v16f16:
|
|
; GFX6-GISEL: ; %bb.0:
|
|
; GFX6-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX6-GISEL-NEXT: s_mov_b32 s6, 0
|
|
; GFX6-GISEL-NEXT: s_mov_b32 s7, 0xf000
|
|
; GFX6-GISEL-NEXT: s_mov_b64 s[4:5], 0
|
|
; GFX6-GISEL-NEXT: buffer_load_dwordx4 v[4:7], v[0:1], s[4:7], 0 addr64
|
|
; GFX6-GISEL-NEXT: buffer_load_dwordx4 v[8:11], v[0:1], s[4:7], 0 addr64 offset:16
|
|
; GFX6-GISEL-NEXT: s_waitcnt vmcnt(1)
|
|
; GFX6-GISEL-NEXT: buffer_store_dwordx4 v[4:7], v[2:3], s[4:7], 0 addr64
|
|
; GFX6-GISEL-NEXT: s_waitcnt vmcnt(1)
|
|
; GFX6-GISEL-NEXT: buffer_store_dwordx4 v[8:11], v[2:3], s[4:7], 0 addr64 offset:16
|
|
; GFX6-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0)
|
|
; GFX6-GISEL-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX7-SDAG-LABEL: freeze_v16f16:
|
|
; GFX7-SDAG: ; %bb.0:
|
|
; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX7-SDAG-NEXT: s_mov_b32 s6, 0
|
|
; GFX7-SDAG-NEXT: s_mov_b32 s7, 0xf000
|
|
; GFX7-SDAG-NEXT: s_mov_b32 s4, s6
|
|
; GFX7-SDAG-NEXT: s_mov_b32 s5, s6
|
|
; GFX7-SDAG-NEXT: buffer_load_dwordx4 v[4:7], v[0:1], s[4:7], 0 addr64 offset:16
|
|
; GFX7-SDAG-NEXT: buffer_load_dwordx4 v[8:11], v[0:1], s[4:7], 0 addr64
|
|
; GFX7-SDAG-NEXT: s_waitcnt vmcnt(1)
|
|
; GFX7-SDAG-NEXT: buffer_store_dwordx4 v[4:7], v[2:3], s[4:7], 0 addr64 offset:16
|
|
; GFX7-SDAG-NEXT: s_waitcnt vmcnt(1)
|
|
; GFX7-SDAG-NEXT: buffer_store_dwordx4 v[8:11], v[2:3], s[4:7], 0 addr64
|
|
; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX7-SDAG-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX7-GISEL-LABEL: freeze_v16f16:
|
|
; GFX7-GISEL: ; %bb.0:
|
|
; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX7-GISEL-NEXT: s_mov_b32 s6, 0
|
|
; GFX7-GISEL-NEXT: s_mov_b32 s7, 0xf000
|
|
; GFX7-GISEL-NEXT: s_mov_b64 s[4:5], 0
|
|
; GFX7-GISEL-NEXT: buffer_load_dwordx4 v[4:7], v[0:1], s[4:7], 0 addr64
|
|
; GFX7-GISEL-NEXT: buffer_load_dwordx4 v[8:11], v[0:1], s[4:7], 0 addr64 offset:16
|
|
; GFX7-GISEL-NEXT: s_waitcnt vmcnt(1)
|
|
; GFX7-GISEL-NEXT: buffer_store_dwordx4 v[4:7], v[2:3], s[4:7], 0 addr64
|
|
; GFX7-GISEL-NEXT: s_waitcnt vmcnt(1)
|
|
; GFX7-GISEL-NEXT: buffer_store_dwordx4 v[8:11], v[2:3], s[4:7], 0 addr64 offset:16
|
|
; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX7-GISEL-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX8-GISEL-LABEL: freeze_v16f16:
|
|
; GFX8-GISEL: ; %bb.0:
|
|
; GFX8-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX8-GISEL-NEXT: flat_load_dwordx4 v[4:7], v[0:1]
|
|
; GFX8-GISEL-NEXT: v_add_u32_e32 v0, vcc, 16, v0
|
|
; GFX8-GISEL-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc
|
|
; GFX8-GISEL-NEXT: flat_load_dwordx4 v[8:11], v[0:1]
|
|
; GFX8-GISEL-NEXT: v_add_u32_e32 v0, vcc, 16, v2
|
|
; GFX8-GISEL-NEXT: v_addc_u32_e32 v1, vcc, 0, v3, vcc
|
|
; GFX8-GISEL-NEXT: s_waitcnt vmcnt(1)
|
|
; GFX8-GISEL-NEXT: flat_store_dwordx4 v[2:3], v[4:7]
|
|
; GFX8-GISEL-NEXT: s_waitcnt vmcnt(1)
|
|
; GFX8-GISEL-NEXT: flat_store_dwordx4 v[0:1], v[8:11]
|
|
; GFX8-GISEL-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX8-GISEL-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX9-GISEL-LABEL: freeze_v16f16:
|
|
; GFX9-GISEL: ; %bb.0:
|
|
; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX9-GISEL-NEXT: global_load_dwordx4 v[4:7], v[0:1], off
|
|
; GFX9-GISEL-NEXT: global_load_dwordx4 v[8:11], v[0:1], off offset:16
|
|
; GFX9-GISEL-NEXT: s_waitcnt vmcnt(1)
|
|
; GFX9-GISEL-NEXT: global_store_dwordx4 v[2:3], v[4:7], off
|
|
; GFX9-GISEL-NEXT: s_waitcnt vmcnt(1)
|
|
; GFX9-GISEL-NEXT: global_store_dwordx4 v[2:3], v[8:11], off offset:16
|
|
; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX10-SDAG-LABEL: freeze_v16f16:
|
|
; GFX10-SDAG: ; %bb.0:
|
|
; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX10-SDAG-NEXT: s_clause 0x1
|
|
; GFX10-SDAG-NEXT: global_load_dwordx4 v[4:7], v[0:1], off offset:16
|
|
; GFX10-SDAG-NEXT: global_load_dwordx4 v[8:11], v[0:1], off
|
|
; GFX10-SDAG-NEXT: s_waitcnt vmcnt(1)
|
|
; GFX10-SDAG-NEXT: global_store_dwordx4 v[2:3], v[4:7], off offset:16
|
|
; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX10-SDAG-NEXT: global_store_dwordx4 v[2:3], v[8:11], off
|
|
; GFX10-SDAG-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX10-GISEL-LABEL: freeze_v16f16:
|
|
; GFX10-GISEL: ; %bb.0:
|
|
; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX10-GISEL-NEXT: s_clause 0x1
|
|
; GFX10-GISEL-NEXT: global_load_dwordx4 v[4:7], v[0:1], off
|
|
; GFX10-GISEL-NEXT: global_load_dwordx4 v[8:11], v[0:1], off offset:16
|
|
; GFX10-GISEL-NEXT: s_waitcnt vmcnt(1)
|
|
; GFX10-GISEL-NEXT: global_store_dwordx4 v[2:3], v[4:7], off
|
|
; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX10-GISEL-NEXT: global_store_dwordx4 v[2:3], v[8:11], off offset:16
|
|
; GFX10-GISEL-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX11-SDAG-LABEL: freeze_v16f16:
|
|
; GFX11-SDAG: ; %bb.0:
|
|
; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX11-SDAG-NEXT: s_clause 0x1
|
|
; GFX11-SDAG-NEXT: global_load_b128 v[4:7], v[0:1], off offset:16
|
|
; GFX11-SDAG-NEXT: global_load_b128 v[8:11], v[0:1], off
|
|
; GFX11-SDAG-NEXT: s_waitcnt vmcnt(1)
|
|
; GFX11-SDAG-NEXT: global_store_b128 v[2:3], v[4:7], off offset:16
|
|
; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX11-SDAG-NEXT: global_store_b128 v[2:3], v[8:11], off
|
|
; GFX11-SDAG-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX11-GISEL-LABEL: freeze_v16f16:
|
|
; GFX11-GISEL: ; %bb.0:
|
|
; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX11-GISEL-NEXT: s_clause 0x1
|
|
; GFX11-GISEL-NEXT: global_load_b128 v[4:7], v[0:1], off
|
|
; GFX11-GISEL-NEXT: global_load_b128 v[8:11], v[0:1], off offset:16
|
|
; GFX11-GISEL-NEXT: s_waitcnt vmcnt(1)
|
|
; GFX11-GISEL-NEXT: global_store_b128 v[2:3], v[4:7], off
|
|
; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX11-GISEL-NEXT: global_store_b128 v[2:3], v[8:11], off offset:16
|
|
; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31]
|
|
%a = load <16 x half>, ptr addrspace(1) %ptra
|
|
%freeze = freeze <16 x half> %a
|
|
store <16 x half> %freeze, ptr addrspace(1) %ptrb
|
|
ret void
|
|
}
|
|
|
|
define void @freeze_bf16(ptr addrspace(1) %ptra, ptr addrspace(1) %ptrb) {
|
|
; GFX6-SDAG-LABEL: freeze_bf16:
|
|
; GFX6-SDAG: ; %bb.0:
|
|
; GFX6-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX6-SDAG-NEXT: s_mov_b32 s6, 0
|
|
; GFX6-SDAG-NEXT: s_mov_b32 s7, 0xf000
|
|
; GFX6-SDAG-NEXT: s_mov_b32 s4, s6
|
|
; GFX6-SDAG-NEXT: s_mov_b32 s5, s6
|
|
; GFX6-SDAG-NEXT: buffer_load_ushort v0, v[0:1], s[4:7], 0 addr64
|
|
; GFX6-SDAG-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX6-SDAG-NEXT: v_lshlrev_b32_e32 v0, 16, v0
|
|
; GFX6-SDAG-NEXT: v_mul_f32_e32 v0, 1.0, v0
|
|
; GFX6-SDAG-NEXT: v_lshrrev_b32_e32 v0, 16, v0
|
|
; GFX6-SDAG-NEXT: buffer_store_short v0, v[2:3], s[4:7], 0 addr64
|
|
; GFX6-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0)
|
|
; GFX6-SDAG-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX6-GISEL-LABEL: freeze_bf16:
|
|
; GFX6-GISEL: ; %bb.0:
|
|
; GFX6-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX6-GISEL-NEXT: s_mov_b32 s6, 0
|
|
; GFX6-GISEL-NEXT: s_mov_b32 s7, 0xf000
|
|
; GFX6-GISEL-NEXT: s_mov_b64 s[4:5], 0
|
|
; GFX6-GISEL-NEXT: buffer_load_ushort v0, v[0:1], s[4:7], 0 addr64
|
|
; GFX6-GISEL-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX6-GISEL-NEXT: buffer_store_short v0, v[2:3], s[4:7], 0 addr64
|
|
; GFX6-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0)
|
|
; GFX6-GISEL-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX7-SDAG-LABEL: freeze_bf16:
|
|
; GFX7-SDAG: ; %bb.0:
|
|
; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX7-SDAG-NEXT: s_mov_b32 s6, 0
|
|
; GFX7-SDAG-NEXT: s_mov_b32 s7, 0xf000
|
|
; GFX7-SDAG-NEXT: s_mov_b32 s4, s6
|
|
; GFX7-SDAG-NEXT: s_mov_b32 s5, s6
|
|
; GFX7-SDAG-NEXT: buffer_load_ushort v0, v[0:1], s[4:7], 0 addr64
|
|
; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX7-SDAG-NEXT: v_lshlrev_b32_e32 v0, 16, v0
|
|
; GFX7-SDAG-NEXT: v_mul_f32_e32 v0, 1.0, v0
|
|
; GFX7-SDAG-NEXT: v_lshrrev_b32_e32 v0, 16, v0
|
|
; GFX7-SDAG-NEXT: buffer_store_short v0, v[2:3], s[4:7], 0 addr64
|
|
; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX7-SDAG-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX7-GISEL-LABEL: freeze_bf16:
|
|
; GFX7-GISEL: ; %bb.0:
|
|
; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX7-GISEL-NEXT: s_mov_b32 s6, 0
|
|
; GFX7-GISEL-NEXT: s_mov_b32 s7, 0xf000
|
|
; GFX7-GISEL-NEXT: s_mov_b64 s[4:5], 0
|
|
; GFX7-GISEL-NEXT: buffer_load_ushort v0, v[0:1], s[4:7], 0 addr64
|
|
; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX7-GISEL-NEXT: buffer_store_short v0, v[2:3], s[4:7], 0 addr64
|
|
; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX7-GISEL-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX8-LABEL: freeze_bf16:
|
|
; GFX8: ; %bb.0:
|
|
; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX8-NEXT: flat_load_ushort v0, v[0:1]
|
|
; GFX8-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX8-NEXT: flat_store_short v[2:3], v0
|
|
; GFX8-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX8-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX9-LABEL: freeze_bf16:
|
|
; GFX9: ; %bb.0:
|
|
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX9-NEXT: global_load_ushort v0, v[0:1], off
|
|
; GFX9-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX9-NEXT: global_store_short v[2:3], v0, off
|
|
; GFX9-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX9-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX10-LABEL: freeze_bf16:
|
|
; GFX10: ; %bb.0:
|
|
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX10-NEXT: global_load_ushort v0, v[0:1], off
|
|
; GFX10-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX10-NEXT: global_store_short v[2:3], v0, off
|
|
; GFX10-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX11-SDAG-TRUE16-LABEL: freeze_bf16:
|
|
; GFX11-SDAG-TRUE16: ; %bb.0:
|
|
; GFX11-SDAG-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX11-SDAG-TRUE16-NEXT: global_load_d16_b16 v0, v[0:1], off
|
|
; GFX11-SDAG-TRUE16-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX11-SDAG-TRUE16-NEXT: global_store_b16 v[2:3], v0, off
|
|
; GFX11-SDAG-TRUE16-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX11-SDAG-FAKE16-LABEL: freeze_bf16:
|
|
; GFX11-SDAG-FAKE16: ; %bb.0:
|
|
; GFX11-SDAG-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX11-SDAG-FAKE16-NEXT: global_load_u16 v0, v[0:1], off
|
|
; GFX11-SDAG-FAKE16-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX11-SDAG-FAKE16-NEXT: global_store_b16 v[2:3], v0, off
|
|
; GFX11-SDAG-FAKE16-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX11-GISEL-LABEL: freeze_bf16:
|
|
; GFX11-GISEL: ; %bb.0:
|
|
; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX11-GISEL-NEXT: global_load_u16 v0, v[0:1], off
|
|
; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX11-GISEL-NEXT: global_store_b16 v[2:3], v0, off
|
|
; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31]
|
|
%a = load bfloat, ptr addrspace(1) %ptra
|
|
%freeze = freeze bfloat %a
|
|
store bfloat %freeze, ptr addrspace(1) %ptrb
|
|
ret void
|
|
}
|
|
|
|
define void @freeze_v2bf16(ptr addrspace(1) %ptra, ptr addrspace(1) %ptrb) {
|
|
; GFX6-SDAG-LABEL: freeze_v2bf16:
|
|
; GFX6-SDAG: ; %bb.0:
|
|
; GFX6-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX6-SDAG-NEXT: s_mov_b32 s6, 0
|
|
; GFX6-SDAG-NEXT: s_mov_b32 s7, 0xf000
|
|
; GFX6-SDAG-NEXT: s_mov_b32 s4, s6
|
|
; GFX6-SDAG-NEXT: s_mov_b32 s5, s6
|
|
; GFX6-SDAG-NEXT: buffer_load_dword v0, v[0:1], s[4:7], 0 addr64
|
|
; GFX6-SDAG-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX6-SDAG-NEXT: buffer_store_dword v0, v[2:3], s[4:7], 0 addr64
|
|
; GFX6-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0)
|
|
; GFX6-SDAG-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX6-GISEL-LABEL: freeze_v2bf16:
|
|
; GFX6-GISEL: ; %bb.0:
|
|
; GFX6-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX6-GISEL-NEXT: s_mov_b32 s6, 0
|
|
; GFX6-GISEL-NEXT: s_mov_b32 s7, 0xf000
|
|
; GFX6-GISEL-NEXT: s_mov_b64 s[4:5], 0
|
|
; GFX6-GISEL-NEXT: buffer_load_dword v0, v[0:1], s[4:7], 0 addr64
|
|
; GFX6-GISEL-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX6-GISEL-NEXT: buffer_store_dword v0, v[2:3], s[4:7], 0 addr64
|
|
; GFX6-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0)
|
|
; GFX6-GISEL-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX7-SDAG-LABEL: freeze_v2bf16:
|
|
; GFX7-SDAG: ; %bb.0:
|
|
; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX7-SDAG-NEXT: s_mov_b32 s6, 0
|
|
; GFX7-SDAG-NEXT: s_mov_b32 s7, 0xf000
|
|
; GFX7-SDAG-NEXT: s_mov_b32 s4, s6
|
|
; GFX7-SDAG-NEXT: s_mov_b32 s5, s6
|
|
; GFX7-SDAG-NEXT: buffer_load_dword v0, v[0:1], s[4:7], 0 addr64
|
|
; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX7-SDAG-NEXT: buffer_store_dword v0, v[2:3], s[4:7], 0 addr64
|
|
; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX7-SDAG-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX7-GISEL-LABEL: freeze_v2bf16:
|
|
; GFX7-GISEL: ; %bb.0:
|
|
; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX7-GISEL-NEXT: s_mov_b32 s6, 0
|
|
; GFX7-GISEL-NEXT: s_mov_b32 s7, 0xf000
|
|
; GFX7-GISEL-NEXT: s_mov_b64 s[4:5], 0
|
|
; GFX7-GISEL-NEXT: buffer_load_dword v0, v[0:1], s[4:7], 0 addr64
|
|
; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX7-GISEL-NEXT: buffer_store_dword v0, v[2:3], s[4:7], 0 addr64
|
|
; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX7-GISEL-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX8-LABEL: freeze_v2bf16:
|
|
; GFX8: ; %bb.0:
|
|
; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX8-NEXT: flat_load_dword v0, v[0:1]
|
|
; GFX8-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX8-NEXT: flat_store_dword v[2:3], v0
|
|
; GFX8-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX8-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX9-LABEL: freeze_v2bf16:
|
|
; GFX9: ; %bb.0:
|
|
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX9-NEXT: global_load_dword v0, v[0:1], off
|
|
; GFX9-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX9-NEXT: global_store_dword v[2:3], v0, off
|
|
; GFX9-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX9-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX10-LABEL: freeze_v2bf16:
|
|
; GFX10: ; %bb.0:
|
|
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX10-NEXT: global_load_dword v0, v[0:1], off
|
|
; GFX10-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX10-NEXT: global_store_dword v[2:3], v0, off
|
|
; GFX10-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX11-LABEL: freeze_v2bf16:
|
|
; GFX11: ; %bb.0:
|
|
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX11-NEXT: global_load_b32 v0, v[0:1], off
|
|
; GFX11-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX11-NEXT: global_store_b32 v[2:3], v0, off
|
|
; GFX11-NEXT: s_setpc_b64 s[30:31]
|
|
%a = load <2 x bfloat>, ptr addrspace(1) %ptra
|
|
%freeze = freeze <2 x bfloat> %a
|
|
store <2 x bfloat> %freeze, ptr addrspace(1) %ptrb
|
|
ret void
|
|
}
|
|
|
|
define void @freeze_v3bf16(ptr addrspace(1) %ptra, ptr addrspace(1) %ptrb) {
|
|
; GFX6-SDAG-LABEL: freeze_v3bf16:
|
|
; GFX6-SDAG: ; %bb.0:
|
|
; GFX6-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX6-SDAG-NEXT: s_mov_b32 s6, 0
|
|
; GFX6-SDAG-NEXT: s_mov_b32 s7, 0xf000
|
|
; GFX6-SDAG-NEXT: s_mov_b32 s4, s6
|
|
; GFX6-SDAG-NEXT: s_mov_b32 s5, s6
|
|
; GFX6-SDAG-NEXT: buffer_load_dwordx2 v[0:1], v[0:1], s[4:7], 0 addr64
|
|
; GFX6-SDAG-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX6-SDAG-NEXT: v_and_b32_e32 v4, 0xffff0000, v0
|
|
; GFX6-SDAG-NEXT: v_lshlrev_b32_e32 v1, 16, v1
|
|
; GFX6-SDAG-NEXT: v_lshlrev_b32_e32 v0, 16, v0
|
|
; GFX6-SDAG-NEXT: v_mul_f32_e32 v4, 1.0, v4
|
|
; GFX6-SDAG-NEXT: v_mul_f32_e32 v1, 1.0, v1
|
|
; GFX6-SDAG-NEXT: v_mul_f32_e32 v0, 1.0, v0
|
|
; GFX6-SDAG-NEXT: v_lshrrev_b32_e32 v4, 16, v4
|
|
; GFX6-SDAG-NEXT: v_lshrrev_b32_e32 v1, 16, v1
|
|
; GFX6-SDAG-NEXT: v_alignbit_b32 v0, v4, v0, 16
|
|
; GFX6-SDAG-NEXT: buffer_store_short v1, v[2:3], s[4:7], 0 addr64 offset:4
|
|
; GFX6-SDAG-NEXT: buffer_store_dword v0, v[2:3], s[4:7], 0 addr64
|
|
; GFX6-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0)
|
|
; GFX6-SDAG-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX6-GISEL-LABEL: freeze_v3bf16:
|
|
; GFX6-GISEL: ; %bb.0:
|
|
; GFX6-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX6-GISEL-NEXT: s_mov_b32 s6, 0
|
|
; GFX6-GISEL-NEXT: s_mov_b32 s7, 0xf000
|
|
; GFX6-GISEL-NEXT: s_mov_b64 s[4:5], 0
|
|
; GFX6-GISEL-NEXT: buffer_load_dwordx2 v[0:1], v[0:1], s[4:7], 0 addr64
|
|
; GFX6-GISEL-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX6-GISEL-NEXT: v_lshrrev_b32_e32 v4, 16, v0
|
|
; GFX6-GISEL-NEXT: buffer_store_short v0, v[2:3], s[4:7], 0 addr64
|
|
; GFX6-GISEL-NEXT: buffer_store_short v4, v[2:3], s[4:7], 0 addr64 offset:2
|
|
; GFX6-GISEL-NEXT: buffer_store_short v1, v[2:3], s[4:7], 0 addr64 offset:4
|
|
; GFX6-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0)
|
|
; GFX6-GISEL-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX7-SDAG-LABEL: freeze_v3bf16:
|
|
; GFX7-SDAG: ; %bb.0:
|
|
; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX7-SDAG-NEXT: s_mov_b32 s6, 0
|
|
; GFX7-SDAG-NEXT: s_mov_b32 s7, 0xf000
|
|
; GFX7-SDAG-NEXT: s_mov_b32 s4, s6
|
|
; GFX7-SDAG-NEXT: s_mov_b32 s5, s6
|
|
; GFX7-SDAG-NEXT: buffer_load_dwordx2 v[0:1], v[0:1], s[4:7], 0 addr64
|
|
; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX7-SDAG-NEXT: v_and_b32_e32 v4, 0xffff0000, v0
|
|
; GFX7-SDAG-NEXT: v_lshlrev_b32_e32 v1, 16, v1
|
|
; GFX7-SDAG-NEXT: v_lshlrev_b32_e32 v0, 16, v0
|
|
; GFX7-SDAG-NEXT: v_mul_f32_e32 v4, 1.0, v4
|
|
; GFX7-SDAG-NEXT: v_mul_f32_e32 v1, 1.0, v1
|
|
; GFX7-SDAG-NEXT: v_mul_f32_e32 v0, 1.0, v0
|
|
; GFX7-SDAG-NEXT: v_lshrrev_b32_e32 v4, 16, v4
|
|
; GFX7-SDAG-NEXT: v_lshrrev_b32_e32 v1, 16, v1
|
|
; GFX7-SDAG-NEXT: v_alignbit_b32 v0, v4, v0, 16
|
|
; GFX7-SDAG-NEXT: buffer_store_short v1, v[2:3], s[4:7], 0 addr64 offset:4
|
|
; GFX7-SDAG-NEXT: buffer_store_dword v0, v[2:3], s[4:7], 0 addr64
|
|
; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX7-SDAG-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX7-GISEL-LABEL: freeze_v3bf16:
|
|
; GFX7-GISEL: ; %bb.0:
|
|
; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX7-GISEL-NEXT: s_mov_b32 s6, 0
|
|
; GFX7-GISEL-NEXT: s_mov_b32 s7, 0xf000
|
|
; GFX7-GISEL-NEXT: s_mov_b64 s[4:5], 0
|
|
; GFX7-GISEL-NEXT: buffer_load_dwordx2 v[0:1], v[0:1], s[4:7], 0 addr64
|
|
; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX7-GISEL-NEXT: v_lshrrev_b32_e32 v4, 16, v0
|
|
; GFX7-GISEL-NEXT: buffer_store_short v0, v[2:3], s[4:7], 0 addr64
|
|
; GFX7-GISEL-NEXT: buffer_store_short v4, v[2:3], s[4:7], 0 addr64 offset:2
|
|
; GFX7-GISEL-NEXT: buffer_store_short v1, v[2:3], s[4:7], 0 addr64 offset:4
|
|
; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX7-GISEL-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX8-GISEL-LABEL: freeze_v3bf16:
|
|
; GFX8-GISEL: ; %bb.0:
|
|
; GFX8-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX8-GISEL-NEXT: flat_load_dwordx2 v[0:1], v[0:1]
|
|
; GFX8-GISEL-NEXT: v_add_u32_e32 v4, vcc, 2, v2
|
|
; GFX8-GISEL-NEXT: v_addc_u32_e32 v5, vcc, 0, v3, vcc
|
|
; GFX8-GISEL-NEXT: v_add_u32_e32 v6, vcc, 4, v2
|
|
; GFX8-GISEL-NEXT: v_addc_u32_e32 v7, vcc, 0, v3, vcc
|
|
; GFX8-GISEL-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX8-GISEL-NEXT: v_lshrrev_b32_e32 v8, 16, v0
|
|
; GFX8-GISEL-NEXT: flat_store_short v[2:3], v0
|
|
; GFX8-GISEL-NEXT: flat_store_short v[4:5], v8
|
|
; GFX8-GISEL-NEXT: flat_store_short v[6:7], v1
|
|
; GFX8-GISEL-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX8-GISEL-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX9-GISEL-LABEL: freeze_v3bf16:
|
|
; GFX9-GISEL: ; %bb.0:
|
|
; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX9-GISEL-NEXT: global_load_dwordx2 v[0:1], v[0:1], off
|
|
; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX9-GISEL-NEXT: global_store_short v[2:3], v0, off
|
|
; GFX9-GISEL-NEXT: global_store_short_d16_hi v[2:3], v0, off offset:2
|
|
; GFX9-GISEL-NEXT: global_store_short v[2:3], v1, off offset:4
|
|
; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX10-SDAG-LABEL: freeze_v3bf16:
|
|
; GFX10-SDAG: ; %bb.0:
|
|
; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX10-SDAG-NEXT: global_load_dwordx2 v[0:1], v[0:1], off
|
|
; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX10-SDAG-NEXT: global_store_short v[2:3], v1, off offset:4
|
|
; GFX10-SDAG-NEXT: global_store_dword v[2:3], v0, off
|
|
; GFX10-SDAG-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX10-GISEL-LABEL: freeze_v3bf16:
|
|
; GFX10-GISEL: ; %bb.0:
|
|
; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX10-GISEL-NEXT: global_load_dwordx2 v[0:1], v[0:1], off
|
|
; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX10-GISEL-NEXT: global_store_short v[2:3], v0, off
|
|
; GFX10-GISEL-NEXT: global_store_short_d16_hi v[2:3], v0, off offset:2
|
|
; GFX10-GISEL-NEXT: global_store_short v[2:3], v1, off offset:4
|
|
; GFX10-GISEL-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX11-SDAG-LABEL: freeze_v3bf16:
|
|
; GFX11-SDAG: ; %bb.0:
|
|
; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX11-SDAG-NEXT: global_load_b64 v[0:1], v[0:1], off
|
|
; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX11-SDAG-NEXT: s_clause 0x1
|
|
; GFX11-SDAG-NEXT: global_store_b16 v[2:3], v1, off offset:4
|
|
; GFX11-SDAG-NEXT: global_store_b32 v[2:3], v0, off
|
|
; GFX11-SDAG-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX11-GISEL-LABEL: freeze_v3bf16:
|
|
; GFX11-GISEL: ; %bb.0:
|
|
; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX11-GISEL-NEXT: global_load_b64 v[0:1], v[0:1], off
|
|
; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX11-GISEL-NEXT: s_clause 0x2
|
|
; GFX11-GISEL-NEXT: global_store_b16 v[2:3], v0, off
|
|
; GFX11-GISEL-NEXT: global_store_d16_hi_b16 v[2:3], v0, off offset:2
|
|
; GFX11-GISEL-NEXT: global_store_b16 v[2:3], v1, off offset:4
|
|
; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31]
|
|
%a = load <3 x bfloat>, ptr addrspace(1) %ptra
|
|
%freeze = freeze <3 x bfloat> %a
|
|
store <3 x bfloat> %freeze, ptr addrspace(1) %ptrb
|
|
ret void
|
|
}
|
|
|
|
define void @freeze_v4bf16(ptr addrspace(1) %ptra, ptr addrspace(1) %ptrb) {
|
|
; GFX6-SDAG-LABEL: freeze_v4bf16:
|
|
; GFX6-SDAG: ; %bb.0:
|
|
; GFX6-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX6-SDAG-NEXT: s_mov_b32 s6, 0
|
|
; GFX6-SDAG-NEXT: s_mov_b32 s7, 0xf000
|
|
; GFX6-SDAG-NEXT: s_mov_b32 s4, s6
|
|
; GFX6-SDAG-NEXT: s_mov_b32 s5, s6
|
|
; GFX6-SDAG-NEXT: buffer_load_dwordx2 v[0:1], v[0:1], s[4:7], 0 addr64
|
|
; GFX6-SDAG-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX6-SDAG-NEXT: buffer_store_dwordx2 v[0:1], v[2:3], s[4:7], 0 addr64
|
|
; GFX6-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0)
|
|
; GFX6-SDAG-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX6-GISEL-LABEL: freeze_v4bf16:
|
|
; GFX6-GISEL: ; %bb.0:
|
|
; GFX6-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX6-GISEL-NEXT: s_mov_b32 s6, 0
|
|
; GFX6-GISEL-NEXT: s_mov_b32 s7, 0xf000
|
|
; GFX6-GISEL-NEXT: s_mov_b64 s[4:5], 0
|
|
; GFX6-GISEL-NEXT: buffer_load_dwordx2 v[0:1], v[0:1], s[4:7], 0 addr64
|
|
; GFX6-GISEL-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX6-GISEL-NEXT: buffer_store_dwordx2 v[0:1], v[2:3], s[4:7], 0 addr64
|
|
; GFX6-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0)
|
|
; GFX6-GISEL-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX7-SDAG-LABEL: freeze_v4bf16:
|
|
; GFX7-SDAG: ; %bb.0:
|
|
; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX7-SDAG-NEXT: s_mov_b32 s6, 0
|
|
; GFX7-SDAG-NEXT: s_mov_b32 s7, 0xf000
|
|
; GFX7-SDAG-NEXT: s_mov_b32 s4, s6
|
|
; GFX7-SDAG-NEXT: s_mov_b32 s5, s6
|
|
; GFX7-SDAG-NEXT: buffer_load_dwordx2 v[0:1], v[0:1], s[4:7], 0 addr64
|
|
; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX7-SDAG-NEXT: buffer_store_dwordx2 v[0:1], v[2:3], s[4:7], 0 addr64
|
|
; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX7-SDAG-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX7-GISEL-LABEL: freeze_v4bf16:
|
|
; GFX7-GISEL: ; %bb.0:
|
|
; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX7-GISEL-NEXT: s_mov_b32 s6, 0
|
|
; GFX7-GISEL-NEXT: s_mov_b32 s7, 0xf000
|
|
; GFX7-GISEL-NEXT: s_mov_b64 s[4:5], 0
|
|
; GFX7-GISEL-NEXT: buffer_load_dwordx2 v[0:1], v[0:1], s[4:7], 0 addr64
|
|
; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX7-GISEL-NEXT: buffer_store_dwordx2 v[0:1], v[2:3], s[4:7], 0 addr64
|
|
; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX7-GISEL-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX8-LABEL: freeze_v4bf16:
|
|
; GFX8: ; %bb.0:
|
|
; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX8-NEXT: flat_load_dwordx2 v[0:1], v[0:1]
|
|
; GFX8-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX8-NEXT: flat_store_dwordx2 v[2:3], v[0:1]
|
|
; GFX8-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX8-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX9-LABEL: freeze_v4bf16:
|
|
; GFX9: ; %bb.0:
|
|
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX9-NEXT: global_load_dwordx2 v[0:1], v[0:1], off
|
|
; GFX9-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX9-NEXT: global_store_dwordx2 v[2:3], v[0:1], off
|
|
; GFX9-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX9-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX10-LABEL: freeze_v4bf16:
|
|
; GFX10: ; %bb.0:
|
|
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX10-NEXT: global_load_dwordx2 v[0:1], v[0:1], off
|
|
; GFX10-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX10-NEXT: global_store_dwordx2 v[2:3], v[0:1], off
|
|
; GFX10-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX11-LABEL: freeze_v4bf16:
|
|
; GFX11: ; %bb.0:
|
|
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX11-NEXT: global_load_b64 v[0:1], v[0:1], off
|
|
; GFX11-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX11-NEXT: global_store_b64 v[2:3], v[0:1], off
|
|
; GFX11-NEXT: s_setpc_b64 s[30:31]
|
|
%a = load <4 x bfloat>, ptr addrspace(1) %ptra
|
|
%freeze = freeze <4 x bfloat> %a
|
|
store <4 x bfloat> %freeze, ptr addrspace(1) %ptrb
|
|
ret void
|
|
}
|
|
|
|
define void @freeze_v8bf16(ptr addrspace(1) %ptra, ptr addrspace(1) %ptrb) {
|
|
; GFX6-SDAG-LABEL: freeze_v8bf16:
|
|
; GFX6-SDAG: ; %bb.0:
|
|
; GFX6-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX6-SDAG-NEXT: s_mov_b32 s6, 0
|
|
; GFX6-SDAG-NEXT: s_mov_b32 s7, 0xf000
|
|
; GFX6-SDAG-NEXT: s_mov_b32 s4, s6
|
|
; GFX6-SDAG-NEXT: s_mov_b32 s5, s6
|
|
; GFX6-SDAG-NEXT: buffer_load_dwordx4 v[4:7], v[0:1], s[4:7], 0 addr64
|
|
; GFX6-SDAG-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX6-SDAG-NEXT: buffer_store_dwordx4 v[4:7], v[2:3], s[4:7], 0 addr64
|
|
; GFX6-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0)
|
|
; GFX6-SDAG-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX6-GISEL-LABEL: freeze_v8bf16:
|
|
; GFX6-GISEL: ; %bb.0:
|
|
; GFX6-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX6-GISEL-NEXT: s_mov_b32 s6, 0
|
|
; GFX6-GISEL-NEXT: s_mov_b32 s7, 0xf000
|
|
; GFX6-GISEL-NEXT: s_mov_b64 s[4:5], 0
|
|
; GFX6-GISEL-NEXT: buffer_load_dwordx4 v[4:7], v[0:1], s[4:7], 0 addr64
|
|
; GFX6-GISEL-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX6-GISEL-NEXT: buffer_store_dwordx4 v[4:7], v[2:3], s[4:7], 0 addr64
|
|
; GFX6-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0)
|
|
; GFX6-GISEL-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX7-SDAG-LABEL: freeze_v8bf16:
|
|
; GFX7-SDAG: ; %bb.0:
|
|
; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX7-SDAG-NEXT: s_mov_b32 s6, 0
|
|
; GFX7-SDAG-NEXT: s_mov_b32 s7, 0xf000
|
|
; GFX7-SDAG-NEXT: s_mov_b32 s4, s6
|
|
; GFX7-SDAG-NEXT: s_mov_b32 s5, s6
|
|
; GFX7-SDAG-NEXT: buffer_load_dwordx4 v[4:7], v[0:1], s[4:7], 0 addr64
|
|
; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX7-SDAG-NEXT: buffer_store_dwordx4 v[4:7], v[2:3], s[4:7], 0 addr64
|
|
; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX7-SDAG-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX7-GISEL-LABEL: freeze_v8bf16:
|
|
; GFX7-GISEL: ; %bb.0:
|
|
; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX7-GISEL-NEXT: s_mov_b32 s6, 0
|
|
; GFX7-GISEL-NEXT: s_mov_b32 s7, 0xf000
|
|
; GFX7-GISEL-NEXT: s_mov_b64 s[4:5], 0
|
|
; GFX7-GISEL-NEXT: buffer_load_dwordx4 v[4:7], v[0:1], s[4:7], 0 addr64
|
|
; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX7-GISEL-NEXT: buffer_store_dwordx4 v[4:7], v[2:3], s[4:7], 0 addr64
|
|
; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX7-GISEL-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX8-LABEL: freeze_v8bf16:
|
|
; GFX8: ; %bb.0:
|
|
; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX8-NEXT: flat_load_dwordx4 v[4:7], v[0:1]
|
|
; GFX8-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX8-NEXT: flat_store_dwordx4 v[2:3], v[4:7]
|
|
; GFX8-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX8-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX9-LABEL: freeze_v8bf16:
|
|
; GFX9: ; %bb.0:
|
|
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX9-NEXT: global_load_dwordx4 v[4:7], v[0:1], off
|
|
; GFX9-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX9-NEXT: global_store_dwordx4 v[2:3], v[4:7], off
|
|
; GFX9-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX9-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX10-LABEL: freeze_v8bf16:
|
|
; GFX10: ; %bb.0:
|
|
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX10-NEXT: global_load_dwordx4 v[4:7], v[0:1], off
|
|
; GFX10-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX10-NEXT: global_store_dwordx4 v[2:3], v[4:7], off
|
|
; GFX10-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX11-LABEL: freeze_v8bf16:
|
|
; GFX11: ; %bb.0:
|
|
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX11-NEXT: global_load_b128 v[4:7], v[0:1], off
|
|
; GFX11-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX11-NEXT: global_store_b128 v[2:3], v[4:7], off
|
|
; GFX11-NEXT: s_setpc_b64 s[30:31]
|
|
%a = load <8 x bfloat>, ptr addrspace(1) %ptra
|
|
%freeze = freeze <8 x bfloat> %a
|
|
store <8 x bfloat> %freeze, ptr addrspace(1) %ptrb
|
|
ret void
|
|
}
|
|
|
|
define void @freeze_f64(ptr addrspace(1) %ptra, ptr addrspace(1) %ptrb) {
|
|
; GFX6-SDAG-LABEL: freeze_f64:
|
|
; GFX6-SDAG: ; %bb.0:
|
|
; GFX6-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX6-SDAG-NEXT: s_mov_b32 s6, 0
|
|
; GFX6-SDAG-NEXT: s_mov_b32 s7, 0xf000
|
|
; GFX6-SDAG-NEXT: s_mov_b32 s4, s6
|
|
; GFX6-SDAG-NEXT: s_mov_b32 s5, s6
|
|
; GFX6-SDAG-NEXT: buffer_load_dwordx2 v[0:1], v[0:1], s[4:7], 0 addr64
|
|
; GFX6-SDAG-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX6-SDAG-NEXT: buffer_store_dwordx2 v[0:1], v[2:3], s[4:7], 0 addr64
|
|
; GFX6-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0)
|
|
; GFX6-SDAG-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX6-GISEL-LABEL: freeze_f64:
|
|
; GFX6-GISEL: ; %bb.0:
|
|
; GFX6-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX6-GISEL-NEXT: s_mov_b32 s6, 0
|
|
; GFX6-GISEL-NEXT: s_mov_b32 s7, 0xf000
|
|
; GFX6-GISEL-NEXT: s_mov_b64 s[4:5], 0
|
|
; GFX6-GISEL-NEXT: buffer_load_dwordx2 v[0:1], v[0:1], s[4:7], 0 addr64
|
|
; GFX6-GISEL-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX6-GISEL-NEXT: buffer_store_dwordx2 v[0:1], v[2:3], s[4:7], 0 addr64
|
|
; GFX6-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0)
|
|
; GFX6-GISEL-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX7-SDAG-LABEL: freeze_f64:
|
|
; GFX7-SDAG: ; %bb.0:
|
|
; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX7-SDAG-NEXT: s_mov_b32 s6, 0
|
|
; GFX7-SDAG-NEXT: s_mov_b32 s7, 0xf000
|
|
; GFX7-SDAG-NEXT: s_mov_b32 s4, s6
|
|
; GFX7-SDAG-NEXT: s_mov_b32 s5, s6
|
|
; GFX7-SDAG-NEXT: buffer_load_dwordx2 v[0:1], v[0:1], s[4:7], 0 addr64
|
|
; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX7-SDAG-NEXT: buffer_store_dwordx2 v[0:1], v[2:3], s[4:7], 0 addr64
|
|
; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX7-SDAG-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX7-GISEL-LABEL: freeze_f64:
|
|
; GFX7-GISEL: ; %bb.0:
|
|
; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX7-GISEL-NEXT: s_mov_b32 s6, 0
|
|
; GFX7-GISEL-NEXT: s_mov_b32 s7, 0xf000
|
|
; GFX7-GISEL-NEXT: s_mov_b64 s[4:5], 0
|
|
; GFX7-GISEL-NEXT: buffer_load_dwordx2 v[0:1], v[0:1], s[4:7], 0 addr64
|
|
; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX7-GISEL-NEXT: buffer_store_dwordx2 v[0:1], v[2:3], s[4:7], 0 addr64
|
|
; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX7-GISEL-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX8-LABEL: freeze_f64:
|
|
; GFX8: ; %bb.0:
|
|
; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX8-NEXT: flat_load_dwordx2 v[0:1], v[0:1]
|
|
; GFX8-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX8-NEXT: flat_store_dwordx2 v[2:3], v[0:1]
|
|
; GFX8-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX8-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX9-LABEL: freeze_f64:
|
|
; GFX9: ; %bb.0:
|
|
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX9-NEXT: global_load_dwordx2 v[0:1], v[0:1], off
|
|
; GFX9-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX9-NEXT: global_store_dwordx2 v[2:3], v[0:1], off
|
|
; GFX9-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX9-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX10-LABEL: freeze_f64:
|
|
; GFX10: ; %bb.0:
|
|
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX10-NEXT: global_load_dwordx2 v[0:1], v[0:1], off
|
|
; GFX10-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX10-NEXT: global_store_dwordx2 v[2:3], v[0:1], off
|
|
; GFX10-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX11-LABEL: freeze_f64:
|
|
; GFX11: ; %bb.0:
|
|
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX11-NEXT: global_load_b64 v[0:1], v[0:1], off
|
|
; GFX11-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX11-NEXT: global_store_b64 v[2:3], v[0:1], off
|
|
; GFX11-NEXT: s_setpc_b64 s[30:31]
|
|
%a = load double, ptr addrspace(1) %ptra
|
|
%freeze = freeze double %a
|
|
store double %freeze, ptr addrspace(1) %ptrb
|
|
ret void
|
|
}
|
|
|
|
define void @freeze_v2f64(ptr addrspace(1) %ptra, ptr addrspace(1) %ptrb) {
|
|
; GFX6-SDAG-LABEL: freeze_v2f64:
|
|
; GFX6-SDAG: ; %bb.0:
|
|
; GFX6-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX6-SDAG-NEXT: s_mov_b32 s6, 0
|
|
; GFX6-SDAG-NEXT: s_mov_b32 s7, 0xf000
|
|
; GFX6-SDAG-NEXT: s_mov_b32 s4, s6
|
|
; GFX6-SDAG-NEXT: s_mov_b32 s5, s6
|
|
; GFX6-SDAG-NEXT: buffer_load_dwordx4 v[4:7], v[0:1], s[4:7], 0 addr64
|
|
; GFX6-SDAG-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX6-SDAG-NEXT: buffer_store_dwordx4 v[4:7], v[2:3], s[4:7], 0 addr64
|
|
; GFX6-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0)
|
|
; GFX6-SDAG-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX6-GISEL-LABEL: freeze_v2f64:
|
|
; GFX6-GISEL: ; %bb.0:
|
|
; GFX6-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX6-GISEL-NEXT: s_mov_b32 s6, 0
|
|
; GFX6-GISEL-NEXT: s_mov_b32 s7, 0xf000
|
|
; GFX6-GISEL-NEXT: s_mov_b64 s[4:5], 0
|
|
; GFX6-GISEL-NEXT: buffer_load_dwordx4 v[4:7], v[0:1], s[4:7], 0 addr64
|
|
; GFX6-GISEL-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX6-GISEL-NEXT: buffer_store_dwordx4 v[4:7], v[2:3], s[4:7], 0 addr64
|
|
; GFX6-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0)
|
|
; GFX6-GISEL-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX7-SDAG-LABEL: freeze_v2f64:
|
|
; GFX7-SDAG: ; %bb.0:
|
|
; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX7-SDAG-NEXT: s_mov_b32 s6, 0
|
|
; GFX7-SDAG-NEXT: s_mov_b32 s7, 0xf000
|
|
; GFX7-SDAG-NEXT: s_mov_b32 s4, s6
|
|
; GFX7-SDAG-NEXT: s_mov_b32 s5, s6
|
|
; GFX7-SDAG-NEXT: buffer_load_dwordx4 v[4:7], v[0:1], s[4:7], 0 addr64
|
|
; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX7-SDAG-NEXT: buffer_store_dwordx4 v[4:7], v[2:3], s[4:7], 0 addr64
|
|
; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX7-SDAG-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX7-GISEL-LABEL: freeze_v2f64:
|
|
; GFX7-GISEL: ; %bb.0:
|
|
; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX7-GISEL-NEXT: s_mov_b32 s6, 0
|
|
; GFX7-GISEL-NEXT: s_mov_b32 s7, 0xf000
|
|
; GFX7-GISEL-NEXT: s_mov_b64 s[4:5], 0
|
|
; GFX7-GISEL-NEXT: buffer_load_dwordx4 v[4:7], v[0:1], s[4:7], 0 addr64
|
|
; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX7-GISEL-NEXT: buffer_store_dwordx4 v[4:7], v[2:3], s[4:7], 0 addr64
|
|
; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX7-GISEL-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX8-LABEL: freeze_v2f64:
|
|
; GFX8: ; %bb.0:
|
|
; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX8-NEXT: flat_load_dwordx4 v[4:7], v[0:1]
|
|
; GFX8-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX8-NEXT: flat_store_dwordx4 v[2:3], v[4:7]
|
|
; GFX8-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX8-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX9-LABEL: freeze_v2f64:
|
|
; GFX9: ; %bb.0:
|
|
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX9-NEXT: global_load_dwordx4 v[4:7], v[0:1], off
|
|
; GFX9-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX9-NEXT: global_store_dwordx4 v[2:3], v[4:7], off
|
|
; GFX9-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX9-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX10-LABEL: freeze_v2f64:
|
|
; GFX10: ; %bb.0:
|
|
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX10-NEXT: global_load_dwordx4 v[4:7], v[0:1], off
|
|
; GFX10-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX10-NEXT: global_store_dwordx4 v[2:3], v[4:7], off
|
|
; GFX10-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX11-LABEL: freeze_v2f64:
|
|
; GFX11: ; %bb.0:
|
|
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX11-NEXT: global_load_b128 v[4:7], v[0:1], off
|
|
; GFX11-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX11-NEXT: global_store_b128 v[2:3], v[4:7], off
|
|
; GFX11-NEXT: s_setpc_b64 s[30:31]
|
|
%a = load <2 x double>, ptr addrspace(1) %ptra
|
|
%freeze = freeze <2 x double> %a
|
|
store <2 x double> %freeze, ptr addrspace(1) %ptrb
|
|
ret void
|
|
}
|
|
|
|
define void @freeze_v3f64(ptr addrspace(1) %ptra, ptr addrspace(1) %ptrb) {
|
|
; GFX6-SDAG-LABEL: freeze_v3f64:
|
|
; GFX6-SDAG: ; %bb.0:
|
|
; GFX6-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX6-SDAG-NEXT: s_mov_b32 s6, 0
|
|
; GFX6-SDAG-NEXT: s_mov_b32 s7, 0xf000
|
|
; GFX6-SDAG-NEXT: s_mov_b32 s4, s6
|
|
; GFX6-SDAG-NEXT: s_mov_b32 s5, s6
|
|
; GFX6-SDAG-NEXT: buffer_load_dwordx2 v[8:9], v[0:1], s[4:7], 0 addr64 offset:16
|
|
; GFX6-SDAG-NEXT: buffer_load_dwordx4 v[4:7], v[0:1], s[4:7], 0 addr64
|
|
; GFX6-SDAG-NEXT: s_waitcnt vmcnt(1)
|
|
; GFX6-SDAG-NEXT: buffer_store_dwordx2 v[8:9], v[2:3], s[4:7], 0 addr64 offset:16
|
|
; GFX6-SDAG-NEXT: s_waitcnt vmcnt(1)
|
|
; GFX6-SDAG-NEXT: buffer_store_dwordx4 v[4:7], v[2:3], s[4:7], 0 addr64
|
|
; GFX6-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0)
|
|
; GFX6-SDAG-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX6-GISEL-LABEL: freeze_v3f64:
|
|
; GFX6-GISEL: ; %bb.0:
|
|
; GFX6-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX6-GISEL-NEXT: s_mov_b32 s6, 0
|
|
; GFX6-GISEL-NEXT: s_mov_b32 s7, 0xf000
|
|
; GFX6-GISEL-NEXT: s_mov_b64 s[4:5], 0
|
|
; GFX6-GISEL-NEXT: buffer_load_dwordx4 v[4:7], v[0:1], s[4:7], 0 addr64
|
|
; GFX6-GISEL-NEXT: buffer_load_dwordx4 v[8:11], v[0:1], s[4:7], 0 addr64 offset:16
|
|
; GFX6-GISEL-NEXT: s_waitcnt vmcnt(1)
|
|
; GFX6-GISEL-NEXT: buffer_store_dwordx4 v[4:7], v[2:3], s[4:7], 0 addr64
|
|
; GFX6-GISEL-NEXT: s_waitcnt vmcnt(1)
|
|
; GFX6-GISEL-NEXT: buffer_store_dwordx2 v[8:9], v[2:3], s[4:7], 0 addr64 offset:16
|
|
; GFX6-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0)
|
|
; GFX6-GISEL-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX7-SDAG-LABEL: freeze_v3f64:
|
|
; GFX7-SDAG: ; %bb.0:
|
|
; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX7-SDAG-NEXT: s_mov_b32 s6, 0
|
|
; GFX7-SDAG-NEXT: s_mov_b32 s7, 0xf000
|
|
; GFX7-SDAG-NEXT: s_mov_b32 s4, s6
|
|
; GFX7-SDAG-NEXT: s_mov_b32 s5, s6
|
|
; GFX7-SDAG-NEXT: buffer_load_dwordx2 v[8:9], v[0:1], s[4:7], 0 addr64 offset:16
|
|
; GFX7-SDAG-NEXT: buffer_load_dwordx4 v[4:7], v[0:1], s[4:7], 0 addr64
|
|
; GFX7-SDAG-NEXT: s_waitcnt vmcnt(1)
|
|
; GFX7-SDAG-NEXT: buffer_store_dwordx2 v[8:9], v[2:3], s[4:7], 0 addr64 offset:16
|
|
; GFX7-SDAG-NEXT: s_waitcnt vmcnt(1)
|
|
; GFX7-SDAG-NEXT: buffer_store_dwordx4 v[4:7], v[2:3], s[4:7], 0 addr64
|
|
; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX7-SDAG-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX7-GISEL-LABEL: freeze_v3f64:
|
|
; GFX7-GISEL: ; %bb.0:
|
|
; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX7-GISEL-NEXT: s_mov_b32 s6, 0
|
|
; GFX7-GISEL-NEXT: s_mov_b32 s7, 0xf000
|
|
; GFX7-GISEL-NEXT: s_mov_b64 s[4:5], 0
|
|
; GFX7-GISEL-NEXT: buffer_load_dwordx4 v[4:7], v[0:1], s[4:7], 0 addr64
|
|
; GFX7-GISEL-NEXT: buffer_load_dwordx4 v[8:11], v[0:1], s[4:7], 0 addr64 offset:16
|
|
; GFX7-GISEL-NEXT: s_waitcnt vmcnt(1)
|
|
; GFX7-GISEL-NEXT: buffer_store_dwordx4 v[4:7], v[2:3], s[4:7], 0 addr64
|
|
; GFX7-GISEL-NEXT: s_waitcnt vmcnt(1)
|
|
; GFX7-GISEL-NEXT: buffer_store_dwordx2 v[8:9], v[2:3], s[4:7], 0 addr64 offset:16
|
|
; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX7-GISEL-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX8-GISEL-LABEL: freeze_v3f64:
|
|
; GFX8-GISEL: ; %bb.0:
|
|
; GFX8-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX8-GISEL-NEXT: flat_load_dwordx4 v[4:7], v[0:1]
|
|
; GFX8-GISEL-NEXT: v_add_u32_e32 v0, vcc, 16, v0
|
|
; GFX8-GISEL-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc
|
|
; GFX8-GISEL-NEXT: flat_load_dwordx4 v[8:11], v[0:1]
|
|
; GFX8-GISEL-NEXT: v_add_u32_e32 v0, vcc, 16, v2
|
|
; GFX8-GISEL-NEXT: v_addc_u32_e32 v1, vcc, 0, v3, vcc
|
|
; GFX8-GISEL-NEXT: s_waitcnt vmcnt(1)
|
|
; GFX8-GISEL-NEXT: flat_store_dwordx4 v[2:3], v[4:7]
|
|
; GFX8-GISEL-NEXT: s_waitcnt vmcnt(1)
|
|
; GFX8-GISEL-NEXT: flat_store_dwordx2 v[0:1], v[8:9]
|
|
; GFX8-GISEL-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX8-GISEL-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX9-GISEL-LABEL: freeze_v3f64:
|
|
; GFX9-GISEL: ; %bb.0:
|
|
; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX9-GISEL-NEXT: global_load_dwordx4 v[4:7], v[0:1], off
|
|
; GFX9-GISEL-NEXT: global_load_dwordx4 v[8:11], v[0:1], off offset:16
|
|
; GFX9-GISEL-NEXT: s_waitcnt vmcnt(1)
|
|
; GFX9-GISEL-NEXT: global_store_dwordx4 v[2:3], v[4:7], off
|
|
; GFX9-GISEL-NEXT: s_waitcnt vmcnt(1)
|
|
; GFX9-GISEL-NEXT: global_store_dwordx2 v[2:3], v[8:9], off offset:16
|
|
; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX10-SDAG-LABEL: freeze_v3f64:
|
|
; GFX10-SDAG: ; %bb.0:
|
|
; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX10-SDAG-NEXT: s_clause 0x1
|
|
; GFX10-SDAG-NEXT: global_load_dwordx2 v[8:9], v[0:1], off offset:16
|
|
; GFX10-SDAG-NEXT: global_load_dwordx4 v[4:7], v[0:1], off
|
|
; GFX10-SDAG-NEXT: s_waitcnt vmcnt(1)
|
|
; GFX10-SDAG-NEXT: global_store_dwordx2 v[2:3], v[8:9], off offset:16
|
|
; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX10-SDAG-NEXT: global_store_dwordx4 v[2:3], v[4:7], off
|
|
; GFX10-SDAG-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX10-GISEL-LABEL: freeze_v3f64:
|
|
; GFX10-GISEL: ; %bb.0:
|
|
; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX10-GISEL-NEXT: s_clause 0x1
|
|
; GFX10-GISEL-NEXT: global_load_dwordx4 v[4:7], v[0:1], off
|
|
; GFX10-GISEL-NEXT: global_load_dwordx4 v[8:11], v[0:1], off offset:16
|
|
; GFX10-GISEL-NEXT: s_waitcnt vmcnt(1)
|
|
; GFX10-GISEL-NEXT: global_store_dwordx4 v[2:3], v[4:7], off
|
|
; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX10-GISEL-NEXT: global_store_dwordx2 v[2:3], v[8:9], off offset:16
|
|
; GFX10-GISEL-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX11-SDAG-LABEL: freeze_v3f64:
|
|
; GFX11-SDAG: ; %bb.0:
|
|
; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX11-SDAG-NEXT: s_clause 0x1
|
|
; GFX11-SDAG-NEXT: global_load_b64 v[8:9], v[0:1], off offset:16
|
|
; GFX11-SDAG-NEXT: global_load_b128 v[4:7], v[0:1], off
|
|
; GFX11-SDAG-NEXT: s_waitcnt vmcnt(1)
|
|
; GFX11-SDAG-NEXT: global_store_b64 v[2:3], v[8:9], off offset:16
|
|
; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX11-SDAG-NEXT: global_store_b128 v[2:3], v[4:7], off
|
|
; GFX11-SDAG-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX11-GISEL-LABEL: freeze_v3f64:
|
|
; GFX11-GISEL: ; %bb.0:
|
|
; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX11-GISEL-NEXT: s_clause 0x1
|
|
; GFX11-GISEL-NEXT: global_load_b128 v[4:7], v[0:1], off
|
|
; GFX11-GISEL-NEXT: global_load_b128 v[8:11], v[0:1], off offset:16
|
|
; GFX11-GISEL-NEXT: s_waitcnt vmcnt(1)
|
|
; GFX11-GISEL-NEXT: global_store_b128 v[2:3], v[4:7], off
|
|
; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX11-GISEL-NEXT: global_store_b64 v[2:3], v[8:9], off offset:16
|
|
; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31]
|
|
%a = load <3 x double>, ptr addrspace(1) %ptra
|
|
%freeze = freeze <3 x double> %a
|
|
store <3 x double> %freeze, ptr addrspace(1) %ptrb
|
|
ret void
|
|
}
|
|
|
|
define void @freeze_v4f64(ptr addrspace(1) %ptra, ptr addrspace(1) %ptrb) {
|
|
; GFX6-SDAG-LABEL: freeze_v4f64:
|
|
; GFX6-SDAG: ; %bb.0:
|
|
; GFX6-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX6-SDAG-NEXT: s_mov_b32 s6, 0
|
|
; GFX6-SDAG-NEXT: s_mov_b32 s7, 0xf000
|
|
; GFX6-SDAG-NEXT: s_mov_b32 s4, s6
|
|
; GFX6-SDAG-NEXT: s_mov_b32 s5, s6
|
|
; GFX6-SDAG-NEXT: buffer_load_dwordx4 v[4:7], v[0:1], s[4:7], 0 addr64 offset:16
|
|
; GFX6-SDAG-NEXT: buffer_load_dwordx4 v[8:11], v[0:1], s[4:7], 0 addr64
|
|
; GFX6-SDAG-NEXT: s_waitcnt vmcnt(1)
|
|
; GFX6-SDAG-NEXT: buffer_store_dwordx4 v[4:7], v[2:3], s[4:7], 0 addr64 offset:16
|
|
; GFX6-SDAG-NEXT: s_waitcnt vmcnt(1)
|
|
; GFX6-SDAG-NEXT: buffer_store_dwordx4 v[8:11], v[2:3], s[4:7], 0 addr64
|
|
; GFX6-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0)
|
|
; GFX6-SDAG-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX6-GISEL-LABEL: freeze_v4f64:
|
|
; GFX6-GISEL: ; %bb.0:
|
|
; GFX6-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX6-GISEL-NEXT: s_mov_b32 s6, 0
|
|
; GFX6-GISEL-NEXT: s_mov_b32 s7, 0xf000
|
|
; GFX6-GISEL-NEXT: s_mov_b64 s[4:5], 0
|
|
; GFX6-GISEL-NEXT: buffer_load_dwordx4 v[4:7], v[0:1], s[4:7], 0 addr64
|
|
; GFX6-GISEL-NEXT: buffer_load_dwordx4 v[8:11], v[0:1], s[4:7], 0 addr64 offset:16
|
|
; GFX6-GISEL-NEXT: s_waitcnt vmcnt(1)
|
|
; GFX6-GISEL-NEXT: buffer_store_dwordx4 v[4:7], v[2:3], s[4:7], 0 addr64
|
|
; GFX6-GISEL-NEXT: s_waitcnt vmcnt(1)
|
|
; GFX6-GISEL-NEXT: buffer_store_dwordx4 v[8:11], v[2:3], s[4:7], 0 addr64 offset:16
|
|
; GFX6-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0)
|
|
; GFX6-GISEL-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX7-SDAG-LABEL: freeze_v4f64:
|
|
; GFX7-SDAG: ; %bb.0:
|
|
; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX7-SDAG-NEXT: s_mov_b32 s6, 0
|
|
; GFX7-SDAG-NEXT: s_mov_b32 s7, 0xf000
|
|
; GFX7-SDAG-NEXT: s_mov_b32 s4, s6
|
|
; GFX7-SDAG-NEXT: s_mov_b32 s5, s6
|
|
; GFX7-SDAG-NEXT: buffer_load_dwordx4 v[4:7], v[0:1], s[4:7], 0 addr64 offset:16
|
|
; GFX7-SDAG-NEXT: buffer_load_dwordx4 v[8:11], v[0:1], s[4:7], 0 addr64
|
|
; GFX7-SDAG-NEXT: s_waitcnt vmcnt(1)
|
|
; GFX7-SDAG-NEXT: buffer_store_dwordx4 v[4:7], v[2:3], s[4:7], 0 addr64 offset:16
|
|
; GFX7-SDAG-NEXT: s_waitcnt vmcnt(1)
|
|
; GFX7-SDAG-NEXT: buffer_store_dwordx4 v[8:11], v[2:3], s[4:7], 0 addr64
|
|
; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX7-SDAG-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX7-GISEL-LABEL: freeze_v4f64:
|
|
; GFX7-GISEL: ; %bb.0:
|
|
; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX7-GISEL-NEXT: s_mov_b32 s6, 0
|
|
; GFX7-GISEL-NEXT: s_mov_b32 s7, 0xf000
|
|
; GFX7-GISEL-NEXT: s_mov_b64 s[4:5], 0
|
|
; GFX7-GISEL-NEXT: buffer_load_dwordx4 v[4:7], v[0:1], s[4:7], 0 addr64
|
|
; GFX7-GISEL-NEXT: buffer_load_dwordx4 v[8:11], v[0:1], s[4:7], 0 addr64 offset:16
|
|
; GFX7-GISEL-NEXT: s_waitcnt vmcnt(1)
|
|
; GFX7-GISEL-NEXT: buffer_store_dwordx4 v[4:7], v[2:3], s[4:7], 0 addr64
|
|
; GFX7-GISEL-NEXT: s_waitcnt vmcnt(1)
|
|
; GFX7-GISEL-NEXT: buffer_store_dwordx4 v[8:11], v[2:3], s[4:7], 0 addr64 offset:16
|
|
; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX7-GISEL-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX8-GISEL-LABEL: freeze_v4f64:
|
|
; GFX8-GISEL: ; %bb.0:
|
|
; GFX8-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX8-GISEL-NEXT: flat_load_dwordx4 v[4:7], v[0:1]
|
|
; GFX8-GISEL-NEXT: v_add_u32_e32 v0, vcc, 16, v0
|
|
; GFX8-GISEL-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc
|
|
; GFX8-GISEL-NEXT: flat_load_dwordx4 v[8:11], v[0:1]
|
|
; GFX8-GISEL-NEXT: v_add_u32_e32 v0, vcc, 16, v2
|
|
; GFX8-GISEL-NEXT: v_addc_u32_e32 v1, vcc, 0, v3, vcc
|
|
; GFX8-GISEL-NEXT: s_waitcnt vmcnt(1)
|
|
; GFX8-GISEL-NEXT: flat_store_dwordx4 v[2:3], v[4:7]
|
|
; GFX8-GISEL-NEXT: s_waitcnt vmcnt(1)
|
|
; GFX8-GISEL-NEXT: flat_store_dwordx4 v[0:1], v[8:11]
|
|
; GFX8-GISEL-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX8-GISEL-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX9-GISEL-LABEL: freeze_v4f64:
|
|
; GFX9-GISEL: ; %bb.0:
|
|
; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX9-GISEL-NEXT: global_load_dwordx4 v[4:7], v[0:1], off
|
|
; GFX9-GISEL-NEXT: global_load_dwordx4 v[8:11], v[0:1], off offset:16
|
|
; GFX9-GISEL-NEXT: s_waitcnt vmcnt(1)
|
|
; GFX9-GISEL-NEXT: global_store_dwordx4 v[2:3], v[4:7], off
|
|
; GFX9-GISEL-NEXT: s_waitcnt vmcnt(1)
|
|
; GFX9-GISEL-NEXT: global_store_dwordx4 v[2:3], v[8:11], off offset:16
|
|
; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX10-SDAG-LABEL: freeze_v4f64:
|
|
; GFX10-SDAG: ; %bb.0:
|
|
; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX10-SDAG-NEXT: s_clause 0x1
|
|
; GFX10-SDAG-NEXT: global_load_dwordx4 v[4:7], v[0:1], off offset:16
|
|
; GFX10-SDAG-NEXT: global_load_dwordx4 v[8:11], v[0:1], off
|
|
; GFX10-SDAG-NEXT: s_waitcnt vmcnt(1)
|
|
; GFX10-SDAG-NEXT: global_store_dwordx4 v[2:3], v[4:7], off offset:16
|
|
; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX10-SDAG-NEXT: global_store_dwordx4 v[2:3], v[8:11], off
|
|
; GFX10-SDAG-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX10-GISEL-LABEL: freeze_v4f64:
|
|
; GFX10-GISEL: ; %bb.0:
|
|
; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX10-GISEL-NEXT: s_clause 0x1
|
|
; GFX10-GISEL-NEXT: global_load_dwordx4 v[4:7], v[0:1], off
|
|
; GFX10-GISEL-NEXT: global_load_dwordx4 v[8:11], v[0:1], off offset:16
|
|
; GFX10-GISEL-NEXT: s_waitcnt vmcnt(1)
|
|
; GFX10-GISEL-NEXT: global_store_dwordx4 v[2:3], v[4:7], off
|
|
; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX10-GISEL-NEXT: global_store_dwordx4 v[2:3], v[8:11], off offset:16
|
|
; GFX10-GISEL-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX11-SDAG-LABEL: freeze_v4f64:
|
|
; GFX11-SDAG: ; %bb.0:
|
|
; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX11-SDAG-NEXT: s_clause 0x1
|
|
; GFX11-SDAG-NEXT: global_load_b128 v[4:7], v[0:1], off offset:16
|
|
; GFX11-SDAG-NEXT: global_load_b128 v[8:11], v[0:1], off
|
|
; GFX11-SDAG-NEXT: s_waitcnt vmcnt(1)
|
|
; GFX11-SDAG-NEXT: global_store_b128 v[2:3], v[4:7], off offset:16
|
|
; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX11-SDAG-NEXT: global_store_b128 v[2:3], v[8:11], off
|
|
; GFX11-SDAG-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX11-GISEL-LABEL: freeze_v4f64:
|
|
; GFX11-GISEL: ; %bb.0:
|
|
; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX11-GISEL-NEXT: s_clause 0x1
|
|
; GFX11-GISEL-NEXT: global_load_b128 v[4:7], v[0:1], off
|
|
; GFX11-GISEL-NEXT: global_load_b128 v[8:11], v[0:1], off offset:16
|
|
; GFX11-GISEL-NEXT: s_waitcnt vmcnt(1)
|
|
; GFX11-GISEL-NEXT: global_store_b128 v[2:3], v[4:7], off
|
|
; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX11-GISEL-NEXT: global_store_b128 v[2:3], v[8:11], off offset:16
|
|
; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31]
|
|
%a = load <4 x double>, ptr addrspace(1) %ptra
|
|
%freeze = freeze <4 x double> %a
|
|
store <4 x double> %freeze, ptr addrspace(1) %ptrb
|
|
ret void
|
|
}
|
|
|
|
define void @freeze_v8f64(ptr addrspace(1) %ptra, ptr addrspace(1) %ptrb) {
|
|
; GFX6-SDAG-LABEL: freeze_v8f64:
|
|
; GFX6-SDAG: ; %bb.0:
|
|
; GFX6-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX6-SDAG-NEXT: s_mov_b32 s6, 0
|
|
; GFX6-SDAG-NEXT: s_mov_b32 s7, 0xf000
|
|
; GFX6-SDAG-NEXT: s_mov_b32 s4, s6
|
|
; GFX6-SDAG-NEXT: s_mov_b32 s5, s6
|
|
; GFX6-SDAG-NEXT: buffer_load_dwordx4 v[4:7], v[0:1], s[4:7], 0 addr64 offset:32
|
|
; GFX6-SDAG-NEXT: buffer_load_dwordx4 v[8:11], v[0:1], s[4:7], 0 addr64 offset:48
|
|
; GFX6-SDAG-NEXT: buffer_load_dwordx4 v[12:15], v[0:1], s[4:7], 0 addr64
|
|
; GFX6-SDAG-NEXT: buffer_load_dwordx4 v[16:19], v[0:1], s[4:7], 0 addr64 offset:16
|
|
; GFX6-SDAG-NEXT: s_waitcnt vmcnt(3)
|
|
; GFX6-SDAG-NEXT: buffer_store_dwordx4 v[4:7], v[2:3], s[4:7], 0 addr64 offset:32
|
|
; GFX6-SDAG-NEXT: s_waitcnt vmcnt(3)
|
|
; GFX6-SDAG-NEXT: buffer_store_dwordx4 v[8:11], v[2:3], s[4:7], 0 addr64 offset:48
|
|
; GFX6-SDAG-NEXT: s_waitcnt vmcnt(3)
|
|
; GFX6-SDAG-NEXT: buffer_store_dwordx4 v[12:15], v[2:3], s[4:7], 0 addr64
|
|
; GFX6-SDAG-NEXT: s_waitcnt vmcnt(3)
|
|
; GFX6-SDAG-NEXT: buffer_store_dwordx4 v[16:19], v[2:3], s[4:7], 0 addr64 offset:16
|
|
; GFX6-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0)
|
|
; GFX6-SDAG-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX6-GISEL-LABEL: freeze_v8f64:
|
|
; GFX6-GISEL: ; %bb.0:
|
|
; GFX6-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX6-GISEL-NEXT: s_mov_b32 s6, 0
|
|
; GFX6-GISEL-NEXT: s_mov_b32 s7, 0xf000
|
|
; GFX6-GISEL-NEXT: s_mov_b64 s[4:5], 0
|
|
; GFX6-GISEL-NEXT: buffer_load_dwordx4 v[4:7], v[0:1], s[4:7], 0 addr64
|
|
; GFX6-GISEL-NEXT: buffer_load_dwordx4 v[8:11], v[0:1], s[4:7], 0 addr64 offset:16
|
|
; GFX6-GISEL-NEXT: buffer_load_dwordx4 v[12:15], v[0:1], s[4:7], 0 addr64 offset:32
|
|
; GFX6-GISEL-NEXT: buffer_load_dwordx4 v[16:19], v[0:1], s[4:7], 0 addr64 offset:48
|
|
; GFX6-GISEL-NEXT: s_waitcnt vmcnt(3)
|
|
; GFX6-GISEL-NEXT: buffer_store_dwordx4 v[4:7], v[2:3], s[4:7], 0 addr64
|
|
; GFX6-GISEL-NEXT: s_waitcnt vmcnt(3)
|
|
; GFX6-GISEL-NEXT: buffer_store_dwordx4 v[8:11], v[2:3], s[4:7], 0 addr64 offset:16
|
|
; GFX6-GISEL-NEXT: s_waitcnt vmcnt(3)
|
|
; GFX6-GISEL-NEXT: buffer_store_dwordx4 v[12:15], v[2:3], s[4:7], 0 addr64 offset:32
|
|
; GFX6-GISEL-NEXT: s_waitcnt vmcnt(3)
|
|
; GFX6-GISEL-NEXT: buffer_store_dwordx4 v[16:19], v[2:3], s[4:7], 0 addr64 offset:48
|
|
; GFX6-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0)
|
|
; GFX6-GISEL-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX7-SDAG-LABEL: freeze_v8f64:
|
|
; GFX7-SDAG: ; %bb.0:
|
|
; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX7-SDAG-NEXT: s_mov_b32 s6, 0
|
|
; GFX7-SDAG-NEXT: s_mov_b32 s7, 0xf000
|
|
; GFX7-SDAG-NEXT: s_mov_b32 s4, s6
|
|
; GFX7-SDAG-NEXT: s_mov_b32 s5, s6
|
|
; GFX7-SDAG-NEXT: buffer_load_dwordx4 v[4:7], v[0:1], s[4:7], 0 addr64 offset:32
|
|
; GFX7-SDAG-NEXT: buffer_load_dwordx4 v[8:11], v[0:1], s[4:7], 0 addr64 offset:48
|
|
; GFX7-SDAG-NEXT: buffer_load_dwordx4 v[12:15], v[0:1], s[4:7], 0 addr64
|
|
; GFX7-SDAG-NEXT: buffer_load_dwordx4 v[16:19], v[0:1], s[4:7], 0 addr64 offset:16
|
|
; GFX7-SDAG-NEXT: s_waitcnt vmcnt(3)
|
|
; GFX7-SDAG-NEXT: buffer_store_dwordx4 v[4:7], v[2:3], s[4:7], 0 addr64 offset:32
|
|
; GFX7-SDAG-NEXT: s_waitcnt vmcnt(3)
|
|
; GFX7-SDAG-NEXT: buffer_store_dwordx4 v[8:11], v[2:3], s[4:7], 0 addr64 offset:48
|
|
; GFX7-SDAG-NEXT: s_waitcnt vmcnt(3)
|
|
; GFX7-SDAG-NEXT: buffer_store_dwordx4 v[12:15], v[2:3], s[4:7], 0 addr64
|
|
; GFX7-SDAG-NEXT: s_waitcnt vmcnt(3)
|
|
; GFX7-SDAG-NEXT: buffer_store_dwordx4 v[16:19], v[2:3], s[4:7], 0 addr64 offset:16
|
|
; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX7-SDAG-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX7-GISEL-LABEL: freeze_v8f64:
|
|
; GFX7-GISEL: ; %bb.0:
|
|
; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX7-GISEL-NEXT: s_mov_b32 s6, 0
|
|
; GFX7-GISEL-NEXT: s_mov_b32 s7, 0xf000
|
|
; GFX7-GISEL-NEXT: s_mov_b64 s[4:5], 0
|
|
; GFX7-GISEL-NEXT: buffer_load_dwordx4 v[4:7], v[0:1], s[4:7], 0 addr64
|
|
; GFX7-GISEL-NEXT: buffer_load_dwordx4 v[8:11], v[0:1], s[4:7], 0 addr64 offset:16
|
|
; GFX7-GISEL-NEXT: buffer_load_dwordx4 v[12:15], v[0:1], s[4:7], 0 addr64 offset:32
|
|
; GFX7-GISEL-NEXT: buffer_load_dwordx4 v[16:19], v[0:1], s[4:7], 0 addr64 offset:48
|
|
; GFX7-GISEL-NEXT: s_waitcnt vmcnt(3)
|
|
; GFX7-GISEL-NEXT: buffer_store_dwordx4 v[4:7], v[2:3], s[4:7], 0 addr64
|
|
; GFX7-GISEL-NEXT: s_waitcnt vmcnt(3)
|
|
; GFX7-GISEL-NEXT: buffer_store_dwordx4 v[8:11], v[2:3], s[4:7], 0 addr64 offset:16
|
|
; GFX7-GISEL-NEXT: s_waitcnt vmcnt(3)
|
|
; GFX7-GISEL-NEXT: buffer_store_dwordx4 v[12:15], v[2:3], s[4:7], 0 addr64 offset:32
|
|
; GFX7-GISEL-NEXT: s_waitcnt vmcnt(3)
|
|
; GFX7-GISEL-NEXT: buffer_store_dwordx4 v[16:19], v[2:3], s[4:7], 0 addr64 offset:48
|
|
; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX7-GISEL-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX8-GISEL-LABEL: freeze_v8f64:
|
|
; GFX8-GISEL: ; %bb.0:
|
|
; GFX8-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX8-GISEL-NEXT: v_add_u32_e32 v8, vcc, 16, v0
|
|
; GFX8-GISEL-NEXT: v_addc_u32_e32 v9, vcc, 0, v1, vcc
|
|
; GFX8-GISEL-NEXT: v_add_u32_e32 v12, vcc, 32, v0
|
|
; GFX8-GISEL-NEXT: flat_load_dwordx4 v[4:7], v[0:1]
|
|
; GFX8-GISEL-NEXT: flat_load_dwordx4 v[8:11], v[8:9]
|
|
; GFX8-GISEL-NEXT: v_addc_u32_e32 v13, vcc, 0, v1, vcc
|
|
; GFX8-GISEL-NEXT: v_add_u32_e32 v0, vcc, 48, v0
|
|
; GFX8-GISEL-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc
|
|
; GFX8-GISEL-NEXT: flat_load_dwordx4 v[12:15], v[12:13]
|
|
; GFX8-GISEL-NEXT: flat_load_dwordx4 v[16:19], v[0:1]
|
|
; GFX8-GISEL-NEXT: v_add_u32_e32 v0, vcc, 16, v2
|
|
; GFX8-GISEL-NEXT: v_addc_u32_e32 v1, vcc, 0, v3, vcc
|
|
; GFX8-GISEL-NEXT: s_waitcnt vmcnt(3)
|
|
; GFX8-GISEL-NEXT: flat_store_dwordx4 v[2:3], v[4:7]
|
|
; GFX8-GISEL-NEXT: s_nop 0
|
|
; GFX8-GISEL-NEXT: v_add_u32_e32 v4, vcc, 32, v2
|
|
; GFX8-GISEL-NEXT: v_addc_u32_e32 v5, vcc, 0, v3, vcc
|
|
; GFX8-GISEL-NEXT: v_add_u32_e32 v2, vcc, 48, v2
|
|
; GFX8-GISEL-NEXT: v_addc_u32_e32 v3, vcc, 0, v3, vcc
|
|
; GFX8-GISEL-NEXT: s_waitcnt vmcnt(3)
|
|
; GFX8-GISEL-NEXT: flat_store_dwordx4 v[0:1], v[8:11]
|
|
; GFX8-GISEL-NEXT: s_waitcnt vmcnt(3)
|
|
; GFX8-GISEL-NEXT: flat_store_dwordx4 v[4:5], v[12:15]
|
|
; GFX8-GISEL-NEXT: s_waitcnt vmcnt(3)
|
|
; GFX8-GISEL-NEXT: flat_store_dwordx4 v[2:3], v[16:19]
|
|
; GFX8-GISEL-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX8-GISEL-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX9-GISEL-LABEL: freeze_v8f64:
|
|
; GFX9-GISEL: ; %bb.0:
|
|
; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX9-GISEL-NEXT: global_load_dwordx4 v[4:7], v[0:1], off
|
|
; GFX9-GISEL-NEXT: global_load_dwordx4 v[8:11], v[0:1], off offset:16
|
|
; GFX9-GISEL-NEXT: global_load_dwordx4 v[12:15], v[0:1], off offset:32
|
|
; GFX9-GISEL-NEXT: global_load_dwordx4 v[16:19], v[0:1], off offset:48
|
|
; GFX9-GISEL-NEXT: s_waitcnt vmcnt(3)
|
|
; GFX9-GISEL-NEXT: global_store_dwordx4 v[2:3], v[4:7], off
|
|
; GFX9-GISEL-NEXT: s_waitcnt vmcnt(3)
|
|
; GFX9-GISEL-NEXT: global_store_dwordx4 v[2:3], v[8:11], off offset:16
|
|
; GFX9-GISEL-NEXT: s_waitcnt vmcnt(3)
|
|
; GFX9-GISEL-NEXT: global_store_dwordx4 v[2:3], v[12:15], off offset:32
|
|
; GFX9-GISEL-NEXT: s_waitcnt vmcnt(3)
|
|
; GFX9-GISEL-NEXT: global_store_dwordx4 v[2:3], v[16:19], off offset:48
|
|
; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX10-SDAG-LABEL: freeze_v8f64:
|
|
; GFX10-SDAG: ; %bb.0:
|
|
; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX10-SDAG-NEXT: s_clause 0x3
|
|
; GFX10-SDAG-NEXT: global_load_dwordx4 v[4:7], v[0:1], off offset:32
|
|
; GFX10-SDAG-NEXT: global_load_dwordx4 v[8:11], v[0:1], off offset:48
|
|
; GFX10-SDAG-NEXT: global_load_dwordx4 v[12:15], v[0:1], off
|
|
; GFX10-SDAG-NEXT: global_load_dwordx4 v[16:19], v[0:1], off offset:16
|
|
; GFX10-SDAG-NEXT: s_waitcnt vmcnt(3)
|
|
; GFX10-SDAG-NEXT: global_store_dwordx4 v[2:3], v[4:7], off offset:32
|
|
; GFX10-SDAG-NEXT: s_waitcnt vmcnt(2)
|
|
; GFX10-SDAG-NEXT: global_store_dwordx4 v[2:3], v[8:11], off offset:48
|
|
; GFX10-SDAG-NEXT: s_waitcnt vmcnt(1)
|
|
; GFX10-SDAG-NEXT: global_store_dwordx4 v[2:3], v[12:15], off
|
|
; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX10-SDAG-NEXT: global_store_dwordx4 v[2:3], v[16:19], off offset:16
|
|
; GFX10-SDAG-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX10-GISEL-LABEL: freeze_v8f64:
|
|
; GFX10-GISEL: ; %bb.0:
|
|
; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX10-GISEL-NEXT: s_clause 0x3
|
|
; GFX10-GISEL-NEXT: global_load_dwordx4 v[4:7], v[0:1], off
|
|
; GFX10-GISEL-NEXT: global_load_dwordx4 v[8:11], v[0:1], off offset:16
|
|
; GFX10-GISEL-NEXT: global_load_dwordx4 v[12:15], v[0:1], off offset:32
|
|
; GFX10-GISEL-NEXT: global_load_dwordx4 v[16:19], v[0:1], off offset:48
|
|
; GFX10-GISEL-NEXT: s_waitcnt vmcnt(3)
|
|
; GFX10-GISEL-NEXT: global_store_dwordx4 v[2:3], v[4:7], off
|
|
; GFX10-GISEL-NEXT: s_waitcnt vmcnt(2)
|
|
; GFX10-GISEL-NEXT: global_store_dwordx4 v[2:3], v[8:11], off offset:16
|
|
; GFX10-GISEL-NEXT: s_waitcnt vmcnt(1)
|
|
; GFX10-GISEL-NEXT: global_store_dwordx4 v[2:3], v[12:15], off offset:32
|
|
; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX10-GISEL-NEXT: global_store_dwordx4 v[2:3], v[16:19], off offset:48
|
|
; GFX10-GISEL-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX11-SDAG-LABEL: freeze_v8f64:
|
|
; GFX11-SDAG: ; %bb.0:
|
|
; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX11-SDAG-NEXT: s_clause 0x3
|
|
; GFX11-SDAG-NEXT: global_load_b128 v[4:7], v[0:1], off offset:32
|
|
; GFX11-SDAG-NEXT: global_load_b128 v[8:11], v[0:1], off offset:48
|
|
; GFX11-SDAG-NEXT: global_load_b128 v[12:15], v[0:1], off
|
|
; GFX11-SDAG-NEXT: global_load_b128 v[16:19], v[0:1], off offset:16
|
|
; GFX11-SDAG-NEXT: s_waitcnt vmcnt(3)
|
|
; GFX11-SDAG-NEXT: global_store_b128 v[2:3], v[4:7], off offset:32
|
|
; GFX11-SDAG-NEXT: s_waitcnt vmcnt(2)
|
|
; GFX11-SDAG-NEXT: global_store_b128 v[2:3], v[8:11], off offset:48
|
|
; GFX11-SDAG-NEXT: s_waitcnt vmcnt(1)
|
|
; GFX11-SDAG-NEXT: global_store_b128 v[2:3], v[12:15], off
|
|
; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX11-SDAG-NEXT: global_store_b128 v[2:3], v[16:19], off offset:16
|
|
; GFX11-SDAG-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX11-GISEL-LABEL: freeze_v8f64:
|
|
; GFX11-GISEL: ; %bb.0:
|
|
; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX11-GISEL-NEXT: s_clause 0x3
|
|
; GFX11-GISEL-NEXT: global_load_b128 v[4:7], v[0:1], off
|
|
; GFX11-GISEL-NEXT: global_load_b128 v[8:11], v[0:1], off offset:16
|
|
; GFX11-GISEL-NEXT: global_load_b128 v[12:15], v[0:1], off offset:32
|
|
; GFX11-GISEL-NEXT: global_load_b128 v[16:19], v[0:1], off offset:48
|
|
; GFX11-GISEL-NEXT: s_waitcnt vmcnt(3)
|
|
; GFX11-GISEL-NEXT: global_store_b128 v[2:3], v[4:7], off
|
|
; GFX11-GISEL-NEXT: s_waitcnt vmcnt(2)
|
|
; GFX11-GISEL-NEXT: global_store_b128 v[2:3], v[8:11], off offset:16
|
|
; GFX11-GISEL-NEXT: s_waitcnt vmcnt(1)
|
|
; GFX11-GISEL-NEXT: global_store_b128 v[2:3], v[12:15], off offset:32
|
|
; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX11-GISEL-NEXT: global_store_b128 v[2:3], v[16:19], off offset:48
|
|
; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31]
|
|
%a = load <8 x double>, ptr addrspace(1) %ptra
|
|
%freeze = freeze <8 x double> %a
|
|
store <8 x double> %freeze, ptr addrspace(1) %ptrb
|
|
ret void
|
|
}
|
|
|
|
define void @freeze_p0(ptr addrspace(1) %ptra, ptr addrspace(1) %ptrb) {
|
|
; GFX6-SDAG-LABEL: freeze_p0:
|
|
; GFX6-SDAG: ; %bb.0:
|
|
; GFX6-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX6-SDAG-NEXT: s_mov_b32 s6, 0
|
|
; GFX6-SDAG-NEXT: s_mov_b32 s7, 0xf000
|
|
; GFX6-SDAG-NEXT: s_mov_b32 s4, s6
|
|
; GFX6-SDAG-NEXT: s_mov_b32 s5, s6
|
|
; GFX6-SDAG-NEXT: buffer_load_dwordx2 v[0:1], v[0:1], s[4:7], 0 addr64
|
|
; GFX6-SDAG-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX6-SDAG-NEXT: buffer_store_dwordx2 v[0:1], v[2:3], s[4:7], 0 addr64
|
|
; GFX6-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0)
|
|
; GFX6-SDAG-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX6-GISEL-LABEL: freeze_p0:
|
|
; GFX6-GISEL: ; %bb.0:
|
|
; GFX6-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX6-GISEL-NEXT: s_mov_b32 s6, 0
|
|
; GFX6-GISEL-NEXT: s_mov_b32 s7, 0xf000
|
|
; GFX6-GISEL-NEXT: s_mov_b64 s[4:5], 0
|
|
; GFX6-GISEL-NEXT: buffer_load_dwordx2 v[0:1], v[0:1], s[4:7], 0 addr64
|
|
; GFX6-GISEL-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX6-GISEL-NEXT: buffer_store_dwordx2 v[0:1], v[2:3], s[4:7], 0 addr64
|
|
; GFX6-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0)
|
|
; GFX6-GISEL-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX7-SDAG-LABEL: freeze_p0:
|
|
; GFX7-SDAG: ; %bb.0:
|
|
; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX7-SDAG-NEXT: s_mov_b32 s6, 0
|
|
; GFX7-SDAG-NEXT: s_mov_b32 s7, 0xf000
|
|
; GFX7-SDAG-NEXT: s_mov_b32 s4, s6
|
|
; GFX7-SDAG-NEXT: s_mov_b32 s5, s6
|
|
; GFX7-SDAG-NEXT: buffer_load_dwordx2 v[0:1], v[0:1], s[4:7], 0 addr64
|
|
; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX7-SDAG-NEXT: buffer_store_dwordx2 v[0:1], v[2:3], s[4:7], 0 addr64
|
|
; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX7-SDAG-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX7-GISEL-LABEL: freeze_p0:
|
|
; GFX7-GISEL: ; %bb.0:
|
|
; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX7-GISEL-NEXT: s_mov_b32 s6, 0
|
|
; GFX7-GISEL-NEXT: s_mov_b32 s7, 0xf000
|
|
; GFX7-GISEL-NEXT: s_mov_b64 s[4:5], 0
|
|
; GFX7-GISEL-NEXT: buffer_load_dwordx2 v[0:1], v[0:1], s[4:7], 0 addr64
|
|
; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX7-GISEL-NEXT: buffer_store_dwordx2 v[0:1], v[2:3], s[4:7], 0 addr64
|
|
; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX7-GISEL-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX8-LABEL: freeze_p0:
|
|
; GFX8: ; %bb.0:
|
|
; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX8-NEXT: flat_load_dwordx2 v[0:1], v[0:1]
|
|
; GFX8-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX8-NEXT: flat_store_dwordx2 v[2:3], v[0:1]
|
|
; GFX8-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX8-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX9-LABEL: freeze_p0:
|
|
; GFX9: ; %bb.0:
|
|
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX9-NEXT: global_load_dwordx2 v[0:1], v[0:1], off
|
|
; GFX9-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX9-NEXT: global_store_dwordx2 v[2:3], v[0:1], off
|
|
; GFX9-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX9-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX10-LABEL: freeze_p0:
|
|
; GFX10: ; %bb.0:
|
|
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX10-NEXT: global_load_dwordx2 v[0:1], v[0:1], off
|
|
; GFX10-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX10-NEXT: global_store_dwordx2 v[2:3], v[0:1], off
|
|
; GFX10-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX11-LABEL: freeze_p0:
|
|
; GFX11: ; %bb.0:
|
|
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX11-NEXT: global_load_b64 v[0:1], v[0:1], off
|
|
; GFX11-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX11-NEXT: global_store_b64 v[2:3], v[0:1], off
|
|
; GFX11-NEXT: s_setpc_b64 s[30:31]
|
|
%a = load ptr, ptr addrspace(1) %ptra
|
|
%freeze = freeze ptr %a
|
|
store ptr %freeze, ptr addrspace(1) %ptrb
|
|
ret void
|
|
}
|
|
|
|
define void @freeze_v2p0(ptr addrspace(1) %ptra, ptr addrspace(1) %ptrb) {
|
|
; GFX6-SDAG-LABEL: freeze_v2p0:
|
|
; GFX6-SDAG: ; %bb.0:
|
|
; GFX6-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX6-SDAG-NEXT: s_mov_b32 s6, 0
|
|
; GFX6-SDAG-NEXT: s_mov_b32 s7, 0xf000
|
|
; GFX6-SDAG-NEXT: s_mov_b32 s4, s6
|
|
; GFX6-SDAG-NEXT: s_mov_b32 s5, s6
|
|
; GFX6-SDAG-NEXT: buffer_load_dwordx4 v[4:7], v[0:1], s[4:7], 0 addr64
|
|
; GFX6-SDAG-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX6-SDAG-NEXT: buffer_store_dwordx4 v[4:7], v[2:3], s[4:7], 0 addr64
|
|
; GFX6-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0)
|
|
; GFX6-SDAG-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX6-GISEL-LABEL: freeze_v2p0:
|
|
; GFX6-GISEL: ; %bb.0:
|
|
; GFX6-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX6-GISEL-NEXT: s_mov_b32 s6, 0
|
|
; GFX6-GISEL-NEXT: s_mov_b32 s7, 0xf000
|
|
; GFX6-GISEL-NEXT: s_mov_b64 s[4:5], 0
|
|
; GFX6-GISEL-NEXT: buffer_load_dwordx4 v[4:7], v[0:1], s[4:7], 0 addr64
|
|
; GFX6-GISEL-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX6-GISEL-NEXT: buffer_store_dwordx4 v[4:7], v[2:3], s[4:7], 0 addr64
|
|
; GFX6-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0)
|
|
; GFX6-GISEL-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX7-SDAG-LABEL: freeze_v2p0:
|
|
; GFX7-SDAG: ; %bb.0:
|
|
; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX7-SDAG-NEXT: s_mov_b32 s6, 0
|
|
; GFX7-SDAG-NEXT: s_mov_b32 s7, 0xf000
|
|
; GFX7-SDAG-NEXT: s_mov_b32 s4, s6
|
|
; GFX7-SDAG-NEXT: s_mov_b32 s5, s6
|
|
; GFX7-SDAG-NEXT: buffer_load_dwordx4 v[4:7], v[0:1], s[4:7], 0 addr64
|
|
; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX7-SDAG-NEXT: buffer_store_dwordx4 v[4:7], v[2:3], s[4:7], 0 addr64
|
|
; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX7-SDAG-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX7-GISEL-LABEL: freeze_v2p0:
|
|
; GFX7-GISEL: ; %bb.0:
|
|
; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX7-GISEL-NEXT: s_mov_b32 s6, 0
|
|
; GFX7-GISEL-NEXT: s_mov_b32 s7, 0xf000
|
|
; GFX7-GISEL-NEXT: s_mov_b64 s[4:5], 0
|
|
; GFX7-GISEL-NEXT: buffer_load_dwordx4 v[4:7], v[0:1], s[4:7], 0 addr64
|
|
; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX7-GISEL-NEXT: buffer_store_dwordx4 v[4:7], v[2:3], s[4:7], 0 addr64
|
|
; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX7-GISEL-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX8-LABEL: freeze_v2p0:
|
|
; GFX8: ; %bb.0:
|
|
; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX8-NEXT: flat_load_dwordx4 v[4:7], v[0:1]
|
|
; GFX8-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX8-NEXT: flat_store_dwordx4 v[2:3], v[4:7]
|
|
; GFX8-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX8-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX9-LABEL: freeze_v2p0:
|
|
; GFX9: ; %bb.0:
|
|
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX9-NEXT: global_load_dwordx4 v[4:7], v[0:1], off
|
|
; GFX9-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX9-NEXT: global_store_dwordx4 v[2:3], v[4:7], off
|
|
; GFX9-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX9-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX10-LABEL: freeze_v2p0:
|
|
; GFX10: ; %bb.0:
|
|
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX10-NEXT: global_load_dwordx4 v[4:7], v[0:1], off
|
|
; GFX10-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX10-NEXT: global_store_dwordx4 v[2:3], v[4:7], off
|
|
; GFX10-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX11-LABEL: freeze_v2p0:
|
|
; GFX11: ; %bb.0:
|
|
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX11-NEXT: global_load_b128 v[4:7], v[0:1], off
|
|
; GFX11-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX11-NEXT: global_store_b128 v[2:3], v[4:7], off
|
|
; GFX11-NEXT: s_setpc_b64 s[30:31]
|
|
%a = load <2 x ptr>, ptr addrspace(1) %ptra
|
|
%freeze = freeze <2 x ptr> %a
|
|
store <2 x ptr> %freeze, ptr addrspace(1) %ptrb
|
|
ret void
|
|
}
|
|
|
|
define void @freeze_v3p0(ptr addrspace(1) %ptra, ptr addrspace(1) %ptrb) {
|
|
; GFX6-SDAG-LABEL: freeze_v3p0:
|
|
; GFX6-SDAG: ; %bb.0:
|
|
; GFX6-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX6-SDAG-NEXT: s_mov_b32 s6, 0
|
|
; GFX6-SDAG-NEXT: s_mov_b32 s7, 0xf000
|
|
; GFX6-SDAG-NEXT: s_mov_b32 s4, s6
|
|
; GFX6-SDAG-NEXT: s_mov_b32 s5, s6
|
|
; GFX6-SDAG-NEXT: buffer_load_dwordx2 v[8:9], v[0:1], s[4:7], 0 addr64 offset:16
|
|
; GFX6-SDAG-NEXT: buffer_load_dwordx4 v[4:7], v[0:1], s[4:7], 0 addr64
|
|
; GFX6-SDAG-NEXT: s_waitcnt vmcnt(1)
|
|
; GFX6-SDAG-NEXT: buffer_store_dwordx2 v[8:9], v[2:3], s[4:7], 0 addr64 offset:16
|
|
; GFX6-SDAG-NEXT: s_waitcnt vmcnt(1)
|
|
; GFX6-SDAG-NEXT: buffer_store_dwordx4 v[4:7], v[2:3], s[4:7], 0 addr64
|
|
; GFX6-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0)
|
|
; GFX6-SDAG-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX6-GISEL-LABEL: freeze_v3p0:
|
|
; GFX6-GISEL: ; %bb.0:
|
|
; GFX6-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX6-GISEL-NEXT: s_mov_b32 s6, 0
|
|
; GFX6-GISEL-NEXT: s_mov_b32 s7, 0xf000
|
|
; GFX6-GISEL-NEXT: s_mov_b64 s[4:5], 0
|
|
; GFX6-GISEL-NEXT: buffer_load_dwordx4 v[4:7], v[0:1], s[4:7], 0 addr64 offset:16
|
|
; GFX6-GISEL-NEXT: buffer_load_dwordx4 v[6:9], v[0:1], s[4:7], 0 addr64
|
|
; GFX6-GISEL-NEXT: s_waitcnt vmcnt(1)
|
|
; GFX6-GISEL-NEXT: v_mov_b32_e32 v0, v4
|
|
; GFX6-GISEL-NEXT: v_mov_b32_e32 v1, v5
|
|
; GFX6-GISEL-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX6-GISEL-NEXT: buffer_store_dwordx4 v[6:9], v[2:3], s[4:7], 0 addr64
|
|
; GFX6-GISEL-NEXT: buffer_store_dwordx2 v[0:1], v[2:3], s[4:7], 0 addr64 offset:16
|
|
; GFX6-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0)
|
|
; GFX6-GISEL-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX7-SDAG-LABEL: freeze_v3p0:
|
|
; GFX7-SDAG: ; %bb.0:
|
|
; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX7-SDAG-NEXT: s_mov_b32 s6, 0
|
|
; GFX7-SDAG-NEXT: s_mov_b32 s7, 0xf000
|
|
; GFX7-SDAG-NEXT: s_mov_b32 s4, s6
|
|
; GFX7-SDAG-NEXT: s_mov_b32 s5, s6
|
|
; GFX7-SDAG-NEXT: buffer_load_dwordx2 v[8:9], v[0:1], s[4:7], 0 addr64 offset:16
|
|
; GFX7-SDAG-NEXT: buffer_load_dwordx4 v[4:7], v[0:1], s[4:7], 0 addr64
|
|
; GFX7-SDAG-NEXT: s_waitcnt vmcnt(1)
|
|
; GFX7-SDAG-NEXT: buffer_store_dwordx2 v[8:9], v[2:3], s[4:7], 0 addr64 offset:16
|
|
; GFX7-SDAG-NEXT: s_waitcnt vmcnt(1)
|
|
; GFX7-SDAG-NEXT: buffer_store_dwordx4 v[4:7], v[2:3], s[4:7], 0 addr64
|
|
; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX7-SDAG-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX7-GISEL-LABEL: freeze_v3p0:
|
|
; GFX7-GISEL: ; %bb.0:
|
|
; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX7-GISEL-NEXT: s_mov_b32 s6, 0
|
|
; GFX7-GISEL-NEXT: s_mov_b32 s7, 0xf000
|
|
; GFX7-GISEL-NEXT: s_mov_b64 s[4:5], 0
|
|
; GFX7-GISEL-NEXT: buffer_load_dwordx4 v[4:7], v[0:1], s[4:7], 0 addr64 offset:16
|
|
; GFX7-GISEL-NEXT: buffer_load_dwordx4 v[6:9], v[0:1], s[4:7], 0 addr64
|
|
; GFX7-GISEL-NEXT: s_waitcnt vmcnt(1)
|
|
; GFX7-GISEL-NEXT: v_mov_b32_e32 v0, v4
|
|
; GFX7-GISEL-NEXT: v_mov_b32_e32 v1, v5
|
|
; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX7-GISEL-NEXT: buffer_store_dwordx4 v[6:9], v[2:3], s[4:7], 0 addr64
|
|
; GFX7-GISEL-NEXT: buffer_store_dwordx2 v[0:1], v[2:3], s[4:7], 0 addr64 offset:16
|
|
; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX7-GISEL-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX8-GISEL-LABEL: freeze_v3p0:
|
|
; GFX8-GISEL: ; %bb.0:
|
|
; GFX8-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX8-GISEL-NEXT: v_add_u32_e32 v8, vcc, 16, v0
|
|
; GFX8-GISEL-NEXT: v_addc_u32_e32 v9, vcc, 0, v1, vcc
|
|
; GFX8-GISEL-NEXT: flat_load_dwordx4 v[4:7], v[0:1]
|
|
; GFX8-GISEL-NEXT: flat_load_dwordx4 v[8:11], v[8:9]
|
|
; GFX8-GISEL-NEXT: s_waitcnt vmcnt(1)
|
|
; GFX8-GISEL-NEXT: flat_store_dwordx4 v[2:3], v[4:7]
|
|
; GFX8-GISEL-NEXT: v_add_u32_e32 v2, vcc, 16, v2
|
|
; GFX8-GISEL-NEXT: s_waitcnt vmcnt(1)
|
|
; GFX8-GISEL-NEXT: v_mov_b32_e32 v0, v8
|
|
; GFX8-GISEL-NEXT: v_mov_b32_e32 v1, v9
|
|
; GFX8-GISEL-NEXT: v_addc_u32_e32 v3, vcc, 0, v3, vcc
|
|
; GFX8-GISEL-NEXT: flat_store_dwordx2 v[2:3], v[0:1]
|
|
; GFX8-GISEL-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX8-GISEL-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX9-GISEL-LABEL: freeze_v3p0:
|
|
; GFX9-GISEL: ; %bb.0:
|
|
; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX9-GISEL-NEXT: global_load_dwordx4 v[4:7], v[0:1], off offset:16
|
|
; GFX9-GISEL-NEXT: global_load_dwordx4 v[6:9], v[0:1], off
|
|
; GFX9-GISEL-NEXT: ; kill: killed $vgpr0 killed $vgpr1
|
|
; GFX9-GISEL-NEXT: s_waitcnt vmcnt(1)
|
|
; GFX9-GISEL-NEXT: v_mov_b32_e32 v0, v4
|
|
; GFX9-GISEL-NEXT: v_mov_b32_e32 v1, v5
|
|
; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX9-GISEL-NEXT: global_store_dwordx4 v[2:3], v[6:9], off
|
|
; GFX9-GISEL-NEXT: global_store_dwordx2 v[2:3], v[0:1], off offset:16
|
|
; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX10-SDAG-LABEL: freeze_v3p0:
|
|
; GFX10-SDAG: ; %bb.0:
|
|
; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX10-SDAG-NEXT: s_clause 0x1
|
|
; GFX10-SDAG-NEXT: global_load_dwordx2 v[8:9], v[0:1], off offset:16
|
|
; GFX10-SDAG-NEXT: global_load_dwordx4 v[4:7], v[0:1], off
|
|
; GFX10-SDAG-NEXT: s_waitcnt vmcnt(1)
|
|
; GFX10-SDAG-NEXT: global_store_dwordx2 v[2:3], v[8:9], off offset:16
|
|
; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX10-SDAG-NEXT: global_store_dwordx4 v[2:3], v[4:7], off
|
|
; GFX10-SDAG-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX10-GISEL-LABEL: freeze_v3p0:
|
|
; GFX10-GISEL: ; %bb.0:
|
|
; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX10-GISEL-NEXT: s_clause 0x1
|
|
; GFX10-GISEL-NEXT: global_load_dwordx4 v[4:7], v[0:1], off offset:16
|
|
; GFX10-GISEL-NEXT: global_load_dwordx4 v[6:9], v[0:1], off
|
|
; GFX10-GISEL-NEXT: ; kill: killed $vgpr0 killed $vgpr1
|
|
; GFX10-GISEL-NEXT: s_waitcnt vmcnt(1)
|
|
; GFX10-GISEL-NEXT: v_mov_b32_e32 v0, v4
|
|
; GFX10-GISEL-NEXT: v_mov_b32_e32 v1, v5
|
|
; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX10-GISEL-NEXT: global_store_dwordx4 v[2:3], v[6:9], off
|
|
; GFX10-GISEL-NEXT: global_store_dwordx2 v[2:3], v[0:1], off offset:16
|
|
; GFX10-GISEL-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX11-SDAG-LABEL: freeze_v3p0:
|
|
; GFX11-SDAG: ; %bb.0:
|
|
; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX11-SDAG-NEXT: s_clause 0x1
|
|
; GFX11-SDAG-NEXT: global_load_b64 v[8:9], v[0:1], off offset:16
|
|
; GFX11-SDAG-NEXT: global_load_b128 v[4:7], v[0:1], off
|
|
; GFX11-SDAG-NEXT: s_waitcnt vmcnt(1)
|
|
; GFX11-SDAG-NEXT: global_store_b64 v[2:3], v[8:9], off offset:16
|
|
; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX11-SDAG-NEXT: global_store_b128 v[2:3], v[4:7], off
|
|
; GFX11-SDAG-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX11-GISEL-LABEL: freeze_v3p0:
|
|
; GFX11-GISEL: ; %bb.0:
|
|
; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX11-GISEL-NEXT: s_clause 0x1
|
|
; GFX11-GISEL-NEXT: global_load_b128 v[4:7], v[0:1], off offset:16
|
|
; GFX11-GISEL-NEXT: global_load_b128 v[6:9], v[0:1], off
|
|
; GFX11-GISEL-NEXT: s_waitcnt vmcnt(1)
|
|
; GFX11-GISEL-NEXT: v_dual_mov_b32 v0, v4 :: v_dual_mov_b32 v1, v5
|
|
; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX11-GISEL-NEXT: s_clause 0x1
|
|
; GFX11-GISEL-NEXT: global_store_b128 v[2:3], v[6:9], off
|
|
; GFX11-GISEL-NEXT: global_store_b64 v[2:3], v[0:1], off offset:16
|
|
; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31]
|
|
%a = load <3 x ptr>, ptr addrspace(1) %ptra
|
|
%freeze = freeze <3 x ptr> %a
|
|
store <3 x ptr> %freeze, ptr addrspace(1) %ptrb
|
|
ret void
|
|
}
|
|
|
|
define void @freeze_v4p0(ptr addrspace(1) %ptra, ptr addrspace(1) %ptrb) {
|
|
; GFX6-SDAG-LABEL: freeze_v4p0:
|
|
; GFX6-SDAG: ; %bb.0:
|
|
; GFX6-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX6-SDAG-NEXT: s_mov_b32 s6, 0
|
|
; GFX6-SDAG-NEXT: s_mov_b32 s7, 0xf000
|
|
; GFX6-SDAG-NEXT: s_mov_b32 s4, s6
|
|
; GFX6-SDAG-NEXT: s_mov_b32 s5, s6
|
|
; GFX6-SDAG-NEXT: buffer_load_dwordx4 v[4:7], v[0:1], s[4:7], 0 addr64 offset:16
|
|
; GFX6-SDAG-NEXT: buffer_load_dwordx4 v[8:11], v[0:1], s[4:7], 0 addr64
|
|
; GFX6-SDAG-NEXT: s_waitcnt vmcnt(1)
|
|
; GFX6-SDAG-NEXT: buffer_store_dwordx4 v[4:7], v[2:3], s[4:7], 0 addr64 offset:16
|
|
; GFX6-SDAG-NEXT: s_waitcnt vmcnt(1)
|
|
; GFX6-SDAG-NEXT: buffer_store_dwordx4 v[8:11], v[2:3], s[4:7], 0 addr64
|
|
; GFX6-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0)
|
|
; GFX6-SDAG-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX6-GISEL-LABEL: freeze_v4p0:
|
|
; GFX6-GISEL: ; %bb.0:
|
|
; GFX6-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX6-GISEL-NEXT: s_mov_b32 s6, 0
|
|
; GFX6-GISEL-NEXT: s_mov_b32 s7, 0xf000
|
|
; GFX6-GISEL-NEXT: s_mov_b64 s[4:5], 0
|
|
; GFX6-GISEL-NEXT: buffer_load_dwordx4 v[4:7], v[0:1], s[4:7], 0 addr64
|
|
; GFX6-GISEL-NEXT: buffer_load_dwordx4 v[8:11], v[0:1], s[4:7], 0 addr64 offset:16
|
|
; GFX6-GISEL-NEXT: s_waitcnt vmcnt(1)
|
|
; GFX6-GISEL-NEXT: buffer_store_dwordx4 v[4:7], v[2:3], s[4:7], 0 addr64
|
|
; GFX6-GISEL-NEXT: s_waitcnt vmcnt(1)
|
|
; GFX6-GISEL-NEXT: buffer_store_dwordx4 v[8:11], v[2:3], s[4:7], 0 addr64 offset:16
|
|
; GFX6-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0)
|
|
; GFX6-GISEL-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX7-SDAG-LABEL: freeze_v4p0:
|
|
; GFX7-SDAG: ; %bb.0:
|
|
; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX7-SDAG-NEXT: s_mov_b32 s6, 0
|
|
; GFX7-SDAG-NEXT: s_mov_b32 s7, 0xf000
|
|
; GFX7-SDAG-NEXT: s_mov_b32 s4, s6
|
|
; GFX7-SDAG-NEXT: s_mov_b32 s5, s6
|
|
; GFX7-SDAG-NEXT: buffer_load_dwordx4 v[4:7], v[0:1], s[4:7], 0 addr64 offset:16
|
|
; GFX7-SDAG-NEXT: buffer_load_dwordx4 v[8:11], v[0:1], s[4:7], 0 addr64
|
|
; GFX7-SDAG-NEXT: s_waitcnt vmcnt(1)
|
|
; GFX7-SDAG-NEXT: buffer_store_dwordx4 v[4:7], v[2:3], s[4:7], 0 addr64 offset:16
|
|
; GFX7-SDAG-NEXT: s_waitcnt vmcnt(1)
|
|
; GFX7-SDAG-NEXT: buffer_store_dwordx4 v[8:11], v[2:3], s[4:7], 0 addr64
|
|
; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX7-SDAG-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX7-GISEL-LABEL: freeze_v4p0:
|
|
; GFX7-GISEL: ; %bb.0:
|
|
; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX7-GISEL-NEXT: s_mov_b32 s6, 0
|
|
; GFX7-GISEL-NEXT: s_mov_b32 s7, 0xf000
|
|
; GFX7-GISEL-NEXT: s_mov_b64 s[4:5], 0
|
|
; GFX7-GISEL-NEXT: buffer_load_dwordx4 v[4:7], v[0:1], s[4:7], 0 addr64
|
|
; GFX7-GISEL-NEXT: buffer_load_dwordx4 v[8:11], v[0:1], s[4:7], 0 addr64 offset:16
|
|
; GFX7-GISEL-NEXT: s_waitcnt vmcnt(1)
|
|
; GFX7-GISEL-NEXT: buffer_store_dwordx4 v[4:7], v[2:3], s[4:7], 0 addr64
|
|
; GFX7-GISEL-NEXT: s_waitcnt vmcnt(1)
|
|
; GFX7-GISEL-NEXT: buffer_store_dwordx4 v[8:11], v[2:3], s[4:7], 0 addr64 offset:16
|
|
; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX7-GISEL-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX8-GISEL-LABEL: freeze_v4p0:
|
|
; GFX8-GISEL: ; %bb.0:
|
|
; GFX8-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX8-GISEL-NEXT: flat_load_dwordx4 v[4:7], v[0:1]
|
|
; GFX8-GISEL-NEXT: v_add_u32_e32 v0, vcc, 16, v0
|
|
; GFX8-GISEL-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc
|
|
; GFX8-GISEL-NEXT: flat_load_dwordx4 v[8:11], v[0:1]
|
|
; GFX8-GISEL-NEXT: v_add_u32_e32 v0, vcc, 16, v2
|
|
; GFX8-GISEL-NEXT: v_addc_u32_e32 v1, vcc, 0, v3, vcc
|
|
; GFX8-GISEL-NEXT: s_waitcnt vmcnt(1)
|
|
; GFX8-GISEL-NEXT: flat_store_dwordx4 v[2:3], v[4:7]
|
|
; GFX8-GISEL-NEXT: s_waitcnt vmcnt(1)
|
|
; GFX8-GISEL-NEXT: flat_store_dwordx4 v[0:1], v[8:11]
|
|
; GFX8-GISEL-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX8-GISEL-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX9-GISEL-LABEL: freeze_v4p0:
|
|
; GFX9-GISEL: ; %bb.0:
|
|
; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX9-GISEL-NEXT: global_load_dwordx4 v[4:7], v[0:1], off
|
|
; GFX9-GISEL-NEXT: global_load_dwordx4 v[8:11], v[0:1], off offset:16
|
|
; GFX9-GISEL-NEXT: s_waitcnt vmcnt(1)
|
|
; GFX9-GISEL-NEXT: global_store_dwordx4 v[2:3], v[4:7], off
|
|
; GFX9-GISEL-NEXT: s_waitcnt vmcnt(1)
|
|
; GFX9-GISEL-NEXT: global_store_dwordx4 v[2:3], v[8:11], off offset:16
|
|
; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX10-SDAG-LABEL: freeze_v4p0:
|
|
; GFX10-SDAG: ; %bb.0:
|
|
; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX10-SDAG-NEXT: s_clause 0x1
|
|
; GFX10-SDAG-NEXT: global_load_dwordx4 v[4:7], v[0:1], off offset:16
|
|
; GFX10-SDAG-NEXT: global_load_dwordx4 v[8:11], v[0:1], off
|
|
; GFX10-SDAG-NEXT: s_waitcnt vmcnt(1)
|
|
; GFX10-SDAG-NEXT: global_store_dwordx4 v[2:3], v[4:7], off offset:16
|
|
; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX10-SDAG-NEXT: global_store_dwordx4 v[2:3], v[8:11], off
|
|
; GFX10-SDAG-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX10-GISEL-LABEL: freeze_v4p0:
|
|
; GFX10-GISEL: ; %bb.0:
|
|
; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX10-GISEL-NEXT: s_clause 0x1
|
|
; GFX10-GISEL-NEXT: global_load_dwordx4 v[4:7], v[0:1], off
|
|
; GFX10-GISEL-NEXT: global_load_dwordx4 v[8:11], v[0:1], off offset:16
|
|
; GFX10-GISEL-NEXT: s_waitcnt vmcnt(1)
|
|
; GFX10-GISEL-NEXT: global_store_dwordx4 v[2:3], v[4:7], off
|
|
; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX10-GISEL-NEXT: global_store_dwordx4 v[2:3], v[8:11], off offset:16
|
|
; GFX10-GISEL-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX11-SDAG-LABEL: freeze_v4p0:
|
|
; GFX11-SDAG: ; %bb.0:
|
|
; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX11-SDAG-NEXT: s_clause 0x1
|
|
; GFX11-SDAG-NEXT: global_load_b128 v[4:7], v[0:1], off offset:16
|
|
; GFX11-SDAG-NEXT: global_load_b128 v[8:11], v[0:1], off
|
|
; GFX11-SDAG-NEXT: s_waitcnt vmcnt(1)
|
|
; GFX11-SDAG-NEXT: global_store_b128 v[2:3], v[4:7], off offset:16
|
|
; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX11-SDAG-NEXT: global_store_b128 v[2:3], v[8:11], off
|
|
; GFX11-SDAG-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX11-GISEL-LABEL: freeze_v4p0:
|
|
; GFX11-GISEL: ; %bb.0:
|
|
; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX11-GISEL-NEXT: s_clause 0x1
|
|
; GFX11-GISEL-NEXT: global_load_b128 v[4:7], v[0:1], off
|
|
; GFX11-GISEL-NEXT: global_load_b128 v[8:11], v[0:1], off offset:16
|
|
; GFX11-GISEL-NEXT: s_waitcnt vmcnt(1)
|
|
; GFX11-GISEL-NEXT: global_store_b128 v[2:3], v[4:7], off
|
|
; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX11-GISEL-NEXT: global_store_b128 v[2:3], v[8:11], off offset:16
|
|
; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31]
|
|
%a = load <4 x ptr>, ptr addrspace(1) %ptra
|
|
%freeze = freeze <4 x ptr> %a
|
|
store <4 x ptr> %freeze, ptr addrspace(1) %ptrb
|
|
ret void
|
|
}
|
|
|
|
define void @freeze_v8p0(ptr addrspace(1) %ptra, ptr addrspace(1) %ptrb) {
|
|
; GFX6-SDAG-LABEL: freeze_v8p0:
|
|
; GFX6-SDAG: ; %bb.0:
|
|
; GFX6-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX6-SDAG-NEXT: s_mov_b32 s6, 0
|
|
; GFX6-SDAG-NEXT: s_mov_b32 s7, 0xf000
|
|
; GFX6-SDAG-NEXT: s_mov_b32 s4, s6
|
|
; GFX6-SDAG-NEXT: s_mov_b32 s5, s6
|
|
; GFX6-SDAG-NEXT: buffer_load_dwordx4 v[4:7], v[0:1], s[4:7], 0 addr64 offset:32
|
|
; GFX6-SDAG-NEXT: buffer_load_dwordx4 v[8:11], v[0:1], s[4:7], 0 addr64 offset:48
|
|
; GFX6-SDAG-NEXT: buffer_load_dwordx4 v[12:15], v[0:1], s[4:7], 0 addr64
|
|
; GFX6-SDAG-NEXT: buffer_load_dwordx4 v[16:19], v[0:1], s[4:7], 0 addr64 offset:16
|
|
; GFX6-SDAG-NEXT: s_waitcnt vmcnt(3)
|
|
; GFX6-SDAG-NEXT: buffer_store_dwordx4 v[4:7], v[2:3], s[4:7], 0 addr64 offset:32
|
|
; GFX6-SDAG-NEXT: s_waitcnt vmcnt(3)
|
|
; GFX6-SDAG-NEXT: buffer_store_dwordx4 v[8:11], v[2:3], s[4:7], 0 addr64 offset:48
|
|
; GFX6-SDAG-NEXT: s_waitcnt vmcnt(3)
|
|
; GFX6-SDAG-NEXT: buffer_store_dwordx4 v[12:15], v[2:3], s[4:7], 0 addr64
|
|
; GFX6-SDAG-NEXT: s_waitcnt vmcnt(3)
|
|
; GFX6-SDAG-NEXT: buffer_store_dwordx4 v[16:19], v[2:3], s[4:7], 0 addr64 offset:16
|
|
; GFX6-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0)
|
|
; GFX6-SDAG-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX6-GISEL-LABEL: freeze_v8p0:
|
|
; GFX6-GISEL: ; %bb.0:
|
|
; GFX6-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX6-GISEL-NEXT: s_mov_b32 s6, 0
|
|
; GFX6-GISEL-NEXT: s_mov_b32 s7, 0xf000
|
|
; GFX6-GISEL-NEXT: s_mov_b64 s[4:5], 0
|
|
; GFX6-GISEL-NEXT: buffer_load_dwordx4 v[4:7], v[0:1], s[4:7], 0 addr64
|
|
; GFX6-GISEL-NEXT: buffer_load_dwordx4 v[8:11], v[0:1], s[4:7], 0 addr64 offset:16
|
|
; GFX6-GISEL-NEXT: buffer_load_dwordx4 v[12:15], v[0:1], s[4:7], 0 addr64 offset:32
|
|
; GFX6-GISEL-NEXT: buffer_load_dwordx4 v[16:19], v[0:1], s[4:7], 0 addr64 offset:48
|
|
; GFX6-GISEL-NEXT: s_waitcnt vmcnt(3)
|
|
; GFX6-GISEL-NEXT: buffer_store_dwordx4 v[4:7], v[2:3], s[4:7], 0 addr64
|
|
; GFX6-GISEL-NEXT: s_waitcnt vmcnt(3)
|
|
; GFX6-GISEL-NEXT: buffer_store_dwordx4 v[8:11], v[2:3], s[4:7], 0 addr64 offset:16
|
|
; GFX6-GISEL-NEXT: s_waitcnt vmcnt(3)
|
|
; GFX6-GISEL-NEXT: buffer_store_dwordx4 v[12:15], v[2:3], s[4:7], 0 addr64 offset:32
|
|
; GFX6-GISEL-NEXT: s_waitcnt vmcnt(3)
|
|
; GFX6-GISEL-NEXT: buffer_store_dwordx4 v[16:19], v[2:3], s[4:7], 0 addr64 offset:48
|
|
; GFX6-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0)
|
|
; GFX6-GISEL-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX7-SDAG-LABEL: freeze_v8p0:
|
|
; GFX7-SDAG: ; %bb.0:
|
|
; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX7-SDAG-NEXT: s_mov_b32 s6, 0
|
|
; GFX7-SDAG-NEXT: s_mov_b32 s7, 0xf000
|
|
; GFX7-SDAG-NEXT: s_mov_b32 s4, s6
|
|
; GFX7-SDAG-NEXT: s_mov_b32 s5, s6
|
|
; GFX7-SDAG-NEXT: buffer_load_dwordx4 v[4:7], v[0:1], s[4:7], 0 addr64 offset:32
|
|
; GFX7-SDAG-NEXT: buffer_load_dwordx4 v[8:11], v[0:1], s[4:7], 0 addr64 offset:48
|
|
; GFX7-SDAG-NEXT: buffer_load_dwordx4 v[12:15], v[0:1], s[4:7], 0 addr64
|
|
; GFX7-SDAG-NEXT: buffer_load_dwordx4 v[16:19], v[0:1], s[4:7], 0 addr64 offset:16
|
|
; GFX7-SDAG-NEXT: s_waitcnt vmcnt(3)
|
|
; GFX7-SDAG-NEXT: buffer_store_dwordx4 v[4:7], v[2:3], s[4:7], 0 addr64 offset:32
|
|
; GFX7-SDAG-NEXT: s_waitcnt vmcnt(3)
|
|
; GFX7-SDAG-NEXT: buffer_store_dwordx4 v[8:11], v[2:3], s[4:7], 0 addr64 offset:48
|
|
; GFX7-SDAG-NEXT: s_waitcnt vmcnt(3)
|
|
; GFX7-SDAG-NEXT: buffer_store_dwordx4 v[12:15], v[2:3], s[4:7], 0 addr64
|
|
; GFX7-SDAG-NEXT: s_waitcnt vmcnt(3)
|
|
; GFX7-SDAG-NEXT: buffer_store_dwordx4 v[16:19], v[2:3], s[4:7], 0 addr64 offset:16
|
|
; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX7-SDAG-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX7-GISEL-LABEL: freeze_v8p0:
|
|
; GFX7-GISEL: ; %bb.0:
|
|
; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX7-GISEL-NEXT: s_mov_b32 s6, 0
|
|
; GFX7-GISEL-NEXT: s_mov_b32 s7, 0xf000
|
|
; GFX7-GISEL-NEXT: s_mov_b64 s[4:5], 0
|
|
; GFX7-GISEL-NEXT: buffer_load_dwordx4 v[4:7], v[0:1], s[4:7], 0 addr64
|
|
; GFX7-GISEL-NEXT: buffer_load_dwordx4 v[8:11], v[0:1], s[4:7], 0 addr64 offset:16
|
|
; GFX7-GISEL-NEXT: buffer_load_dwordx4 v[12:15], v[0:1], s[4:7], 0 addr64 offset:32
|
|
; GFX7-GISEL-NEXT: buffer_load_dwordx4 v[16:19], v[0:1], s[4:7], 0 addr64 offset:48
|
|
; GFX7-GISEL-NEXT: s_waitcnt vmcnt(3)
|
|
; GFX7-GISEL-NEXT: buffer_store_dwordx4 v[4:7], v[2:3], s[4:7], 0 addr64
|
|
; GFX7-GISEL-NEXT: s_waitcnt vmcnt(3)
|
|
; GFX7-GISEL-NEXT: buffer_store_dwordx4 v[8:11], v[2:3], s[4:7], 0 addr64 offset:16
|
|
; GFX7-GISEL-NEXT: s_waitcnt vmcnt(3)
|
|
; GFX7-GISEL-NEXT: buffer_store_dwordx4 v[12:15], v[2:3], s[4:7], 0 addr64 offset:32
|
|
; GFX7-GISEL-NEXT: s_waitcnt vmcnt(3)
|
|
; GFX7-GISEL-NEXT: buffer_store_dwordx4 v[16:19], v[2:3], s[4:7], 0 addr64 offset:48
|
|
; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX7-GISEL-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX8-GISEL-LABEL: freeze_v8p0:
|
|
; GFX8-GISEL: ; %bb.0:
|
|
; GFX8-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX8-GISEL-NEXT: v_add_u32_e32 v8, vcc, 16, v0
|
|
; GFX8-GISEL-NEXT: v_addc_u32_e32 v9, vcc, 0, v1, vcc
|
|
; GFX8-GISEL-NEXT: v_add_u32_e32 v12, vcc, 32, v0
|
|
; GFX8-GISEL-NEXT: flat_load_dwordx4 v[4:7], v[0:1]
|
|
; GFX8-GISEL-NEXT: flat_load_dwordx4 v[8:11], v[8:9]
|
|
; GFX8-GISEL-NEXT: v_addc_u32_e32 v13, vcc, 0, v1, vcc
|
|
; GFX8-GISEL-NEXT: v_add_u32_e32 v0, vcc, 48, v0
|
|
; GFX8-GISEL-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc
|
|
; GFX8-GISEL-NEXT: flat_load_dwordx4 v[12:15], v[12:13]
|
|
; GFX8-GISEL-NEXT: flat_load_dwordx4 v[16:19], v[0:1]
|
|
; GFX8-GISEL-NEXT: v_add_u32_e32 v0, vcc, 16, v2
|
|
; GFX8-GISEL-NEXT: v_addc_u32_e32 v1, vcc, 0, v3, vcc
|
|
; GFX8-GISEL-NEXT: s_waitcnt vmcnt(3)
|
|
; GFX8-GISEL-NEXT: flat_store_dwordx4 v[2:3], v[4:7]
|
|
; GFX8-GISEL-NEXT: s_nop 0
|
|
; GFX8-GISEL-NEXT: v_add_u32_e32 v4, vcc, 32, v2
|
|
; GFX8-GISEL-NEXT: v_addc_u32_e32 v5, vcc, 0, v3, vcc
|
|
; GFX8-GISEL-NEXT: v_add_u32_e32 v2, vcc, 48, v2
|
|
; GFX8-GISEL-NEXT: v_addc_u32_e32 v3, vcc, 0, v3, vcc
|
|
; GFX8-GISEL-NEXT: s_waitcnt vmcnt(3)
|
|
; GFX8-GISEL-NEXT: flat_store_dwordx4 v[0:1], v[8:11]
|
|
; GFX8-GISEL-NEXT: s_waitcnt vmcnt(3)
|
|
; GFX8-GISEL-NEXT: flat_store_dwordx4 v[4:5], v[12:15]
|
|
; GFX8-GISEL-NEXT: s_waitcnt vmcnt(3)
|
|
; GFX8-GISEL-NEXT: flat_store_dwordx4 v[2:3], v[16:19]
|
|
; GFX8-GISEL-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX8-GISEL-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX9-GISEL-LABEL: freeze_v8p0:
|
|
; GFX9-GISEL: ; %bb.0:
|
|
; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX9-GISEL-NEXT: global_load_dwordx4 v[4:7], v[0:1], off
|
|
; GFX9-GISEL-NEXT: global_load_dwordx4 v[8:11], v[0:1], off offset:16
|
|
; GFX9-GISEL-NEXT: global_load_dwordx4 v[12:15], v[0:1], off offset:32
|
|
; GFX9-GISEL-NEXT: global_load_dwordx4 v[16:19], v[0:1], off offset:48
|
|
; GFX9-GISEL-NEXT: s_waitcnt vmcnt(3)
|
|
; GFX9-GISEL-NEXT: global_store_dwordx4 v[2:3], v[4:7], off
|
|
; GFX9-GISEL-NEXT: s_waitcnt vmcnt(3)
|
|
; GFX9-GISEL-NEXT: global_store_dwordx4 v[2:3], v[8:11], off offset:16
|
|
; GFX9-GISEL-NEXT: s_waitcnt vmcnt(3)
|
|
; GFX9-GISEL-NEXT: global_store_dwordx4 v[2:3], v[12:15], off offset:32
|
|
; GFX9-GISEL-NEXT: s_waitcnt vmcnt(3)
|
|
; GFX9-GISEL-NEXT: global_store_dwordx4 v[2:3], v[16:19], off offset:48
|
|
; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX10-SDAG-LABEL: freeze_v8p0:
|
|
; GFX10-SDAG: ; %bb.0:
|
|
; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX10-SDAG-NEXT: s_clause 0x3
|
|
; GFX10-SDAG-NEXT: global_load_dwordx4 v[4:7], v[0:1], off offset:32
|
|
; GFX10-SDAG-NEXT: global_load_dwordx4 v[8:11], v[0:1], off offset:48
|
|
; GFX10-SDAG-NEXT: global_load_dwordx4 v[12:15], v[0:1], off
|
|
; GFX10-SDAG-NEXT: global_load_dwordx4 v[16:19], v[0:1], off offset:16
|
|
; GFX10-SDAG-NEXT: s_waitcnt vmcnt(3)
|
|
; GFX10-SDAG-NEXT: global_store_dwordx4 v[2:3], v[4:7], off offset:32
|
|
; GFX10-SDAG-NEXT: s_waitcnt vmcnt(2)
|
|
; GFX10-SDAG-NEXT: global_store_dwordx4 v[2:3], v[8:11], off offset:48
|
|
; GFX10-SDAG-NEXT: s_waitcnt vmcnt(1)
|
|
; GFX10-SDAG-NEXT: global_store_dwordx4 v[2:3], v[12:15], off
|
|
; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX10-SDAG-NEXT: global_store_dwordx4 v[2:3], v[16:19], off offset:16
|
|
; GFX10-SDAG-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX10-GISEL-LABEL: freeze_v8p0:
|
|
; GFX10-GISEL: ; %bb.0:
|
|
; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX10-GISEL-NEXT: s_clause 0x3
|
|
; GFX10-GISEL-NEXT: global_load_dwordx4 v[4:7], v[0:1], off
|
|
; GFX10-GISEL-NEXT: global_load_dwordx4 v[8:11], v[0:1], off offset:16
|
|
; GFX10-GISEL-NEXT: global_load_dwordx4 v[12:15], v[0:1], off offset:32
|
|
; GFX10-GISEL-NEXT: global_load_dwordx4 v[16:19], v[0:1], off offset:48
|
|
; GFX10-GISEL-NEXT: s_waitcnt vmcnt(3)
|
|
; GFX10-GISEL-NEXT: global_store_dwordx4 v[2:3], v[4:7], off
|
|
; GFX10-GISEL-NEXT: s_waitcnt vmcnt(2)
|
|
; GFX10-GISEL-NEXT: global_store_dwordx4 v[2:3], v[8:11], off offset:16
|
|
; GFX10-GISEL-NEXT: s_waitcnt vmcnt(1)
|
|
; GFX10-GISEL-NEXT: global_store_dwordx4 v[2:3], v[12:15], off offset:32
|
|
; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX10-GISEL-NEXT: global_store_dwordx4 v[2:3], v[16:19], off offset:48
|
|
; GFX10-GISEL-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX11-SDAG-LABEL: freeze_v8p0:
|
|
; GFX11-SDAG: ; %bb.0:
|
|
; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX11-SDAG-NEXT: s_clause 0x3
|
|
; GFX11-SDAG-NEXT: global_load_b128 v[4:7], v[0:1], off offset:32
|
|
; GFX11-SDAG-NEXT: global_load_b128 v[8:11], v[0:1], off offset:48
|
|
; GFX11-SDAG-NEXT: global_load_b128 v[12:15], v[0:1], off
|
|
; GFX11-SDAG-NEXT: global_load_b128 v[16:19], v[0:1], off offset:16
|
|
; GFX11-SDAG-NEXT: s_waitcnt vmcnt(3)
|
|
; GFX11-SDAG-NEXT: global_store_b128 v[2:3], v[4:7], off offset:32
|
|
; GFX11-SDAG-NEXT: s_waitcnt vmcnt(2)
|
|
; GFX11-SDAG-NEXT: global_store_b128 v[2:3], v[8:11], off offset:48
|
|
; GFX11-SDAG-NEXT: s_waitcnt vmcnt(1)
|
|
; GFX11-SDAG-NEXT: global_store_b128 v[2:3], v[12:15], off
|
|
; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX11-SDAG-NEXT: global_store_b128 v[2:3], v[16:19], off offset:16
|
|
; GFX11-SDAG-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX11-GISEL-LABEL: freeze_v8p0:
|
|
; GFX11-GISEL: ; %bb.0:
|
|
; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX11-GISEL-NEXT: s_clause 0x3
|
|
; GFX11-GISEL-NEXT: global_load_b128 v[4:7], v[0:1], off
|
|
; GFX11-GISEL-NEXT: global_load_b128 v[8:11], v[0:1], off offset:16
|
|
; GFX11-GISEL-NEXT: global_load_b128 v[12:15], v[0:1], off offset:32
|
|
; GFX11-GISEL-NEXT: global_load_b128 v[16:19], v[0:1], off offset:48
|
|
; GFX11-GISEL-NEXT: s_waitcnt vmcnt(3)
|
|
; GFX11-GISEL-NEXT: global_store_b128 v[2:3], v[4:7], off
|
|
; GFX11-GISEL-NEXT: s_waitcnt vmcnt(2)
|
|
; GFX11-GISEL-NEXT: global_store_b128 v[2:3], v[8:11], off offset:16
|
|
; GFX11-GISEL-NEXT: s_waitcnt vmcnt(1)
|
|
; GFX11-GISEL-NEXT: global_store_b128 v[2:3], v[12:15], off offset:32
|
|
; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX11-GISEL-NEXT: global_store_b128 v[2:3], v[16:19], off offset:48
|
|
; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31]
|
|
%a = load <8 x ptr>, ptr addrspace(1) %ptra
|
|
%freeze = freeze <8 x ptr> %a
|
|
store <8 x ptr> %freeze, ptr addrspace(1) %ptrb
|
|
ret void
|
|
}
|
|
|
|
define void @freeze_v16p0(ptr addrspace(1) %ptra, ptr addrspace(1) %ptrb) {
|
|
; GFX6-SDAG-LABEL: freeze_v16p0:
|
|
; GFX6-SDAG: ; %bb.0:
|
|
; GFX6-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX6-SDAG-NEXT: s_mov_b32 s6, 0
|
|
; GFX6-SDAG-NEXT: s_mov_b32 s7, 0xf000
|
|
; GFX6-SDAG-NEXT: s_mov_b32 s4, s6
|
|
; GFX6-SDAG-NEXT: s_mov_b32 s5, s6
|
|
; GFX6-SDAG-NEXT: buffer_load_dwordx4 v[4:7], v[0:1], s[4:7], 0 addr64 offset:96
|
|
; GFX6-SDAG-NEXT: buffer_load_dwordx4 v[8:11], v[0:1], s[4:7], 0 addr64 offset:112
|
|
; GFX6-SDAG-NEXT: buffer_load_dwordx4 v[12:15], v[0:1], s[4:7], 0 addr64 offset:64
|
|
; GFX6-SDAG-NEXT: buffer_load_dwordx4 v[16:19], v[0:1], s[4:7], 0 addr64 offset:80
|
|
; GFX6-SDAG-NEXT: buffer_load_dwordx4 v[20:23], v[0:1], s[4:7], 0 addr64 offset:32
|
|
; GFX6-SDAG-NEXT: buffer_load_dwordx4 v[24:27], v[0:1], s[4:7], 0 addr64 offset:48
|
|
; GFX6-SDAG-NEXT: buffer_load_dwordx4 v[28:31], v[0:1], s[4:7], 0 addr64
|
|
; GFX6-SDAG-NEXT: buffer_load_dwordx4 v[32:35], v[0:1], s[4:7], 0 addr64 offset:16
|
|
; GFX6-SDAG-NEXT: s_waitcnt vmcnt(7)
|
|
; GFX6-SDAG-NEXT: buffer_store_dwordx4 v[4:7], v[2:3], s[4:7], 0 addr64 offset:96
|
|
; GFX6-SDAG-NEXT: s_waitcnt vmcnt(7)
|
|
; GFX6-SDAG-NEXT: buffer_store_dwordx4 v[8:11], v[2:3], s[4:7], 0 addr64 offset:112
|
|
; GFX6-SDAG-NEXT: s_waitcnt vmcnt(7)
|
|
; GFX6-SDAG-NEXT: buffer_store_dwordx4 v[12:15], v[2:3], s[4:7], 0 addr64 offset:64
|
|
; GFX6-SDAG-NEXT: s_waitcnt vmcnt(7)
|
|
; GFX6-SDAG-NEXT: buffer_store_dwordx4 v[16:19], v[2:3], s[4:7], 0 addr64 offset:80
|
|
; GFX6-SDAG-NEXT: s_waitcnt vmcnt(7)
|
|
; GFX6-SDAG-NEXT: buffer_store_dwordx4 v[20:23], v[2:3], s[4:7], 0 addr64 offset:32
|
|
; GFX6-SDAG-NEXT: s_waitcnt vmcnt(7)
|
|
; GFX6-SDAG-NEXT: buffer_store_dwordx4 v[24:27], v[2:3], s[4:7], 0 addr64 offset:48
|
|
; GFX6-SDAG-NEXT: s_waitcnt vmcnt(7)
|
|
; GFX6-SDAG-NEXT: buffer_store_dwordx4 v[28:31], v[2:3], s[4:7], 0 addr64
|
|
; GFX6-SDAG-NEXT: s_waitcnt vmcnt(7)
|
|
; GFX6-SDAG-NEXT: buffer_store_dwordx4 v[32:35], v[2:3], s[4:7], 0 addr64 offset:16
|
|
; GFX6-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0)
|
|
; GFX6-SDAG-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX6-GISEL-LABEL: freeze_v16p0:
|
|
; GFX6-GISEL: ; %bb.0:
|
|
; GFX6-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX6-GISEL-NEXT: s_mov_b32 s6, 0
|
|
; GFX6-GISEL-NEXT: s_mov_b32 s7, 0xf000
|
|
; GFX6-GISEL-NEXT: s_mov_b64 s[4:5], 0
|
|
; GFX6-GISEL-NEXT: buffer_load_dwordx4 v[4:7], v[0:1], s[4:7], 0 addr64
|
|
; GFX6-GISEL-NEXT: buffer_load_dwordx4 v[8:11], v[0:1], s[4:7], 0 addr64 offset:16
|
|
; GFX6-GISEL-NEXT: buffer_load_dwordx4 v[12:15], v[0:1], s[4:7], 0 addr64 offset:32
|
|
; GFX6-GISEL-NEXT: buffer_load_dwordx4 v[16:19], v[0:1], s[4:7], 0 addr64 offset:48
|
|
; GFX6-GISEL-NEXT: buffer_load_dwordx4 v[20:23], v[0:1], s[4:7], 0 addr64 offset:64
|
|
; GFX6-GISEL-NEXT: buffer_load_dwordx4 v[24:27], v[0:1], s[4:7], 0 addr64 offset:80
|
|
; GFX6-GISEL-NEXT: buffer_load_dwordx4 v[28:31], v[0:1], s[4:7], 0 addr64 offset:96
|
|
; GFX6-GISEL-NEXT: buffer_load_dwordx4 v[32:35], v[0:1], s[4:7], 0 addr64 offset:112
|
|
; GFX6-GISEL-NEXT: s_waitcnt vmcnt(7)
|
|
; GFX6-GISEL-NEXT: buffer_store_dwordx4 v[4:7], v[2:3], s[4:7], 0 addr64
|
|
; GFX6-GISEL-NEXT: s_waitcnt vmcnt(7)
|
|
; GFX6-GISEL-NEXT: buffer_store_dwordx4 v[8:11], v[2:3], s[4:7], 0 addr64 offset:16
|
|
; GFX6-GISEL-NEXT: s_waitcnt vmcnt(7)
|
|
; GFX6-GISEL-NEXT: buffer_store_dwordx4 v[12:15], v[2:3], s[4:7], 0 addr64 offset:32
|
|
; GFX6-GISEL-NEXT: s_waitcnt vmcnt(7)
|
|
; GFX6-GISEL-NEXT: buffer_store_dwordx4 v[16:19], v[2:3], s[4:7], 0 addr64 offset:48
|
|
; GFX6-GISEL-NEXT: s_waitcnt vmcnt(7)
|
|
; GFX6-GISEL-NEXT: buffer_store_dwordx4 v[20:23], v[2:3], s[4:7], 0 addr64 offset:64
|
|
; GFX6-GISEL-NEXT: s_waitcnt vmcnt(7)
|
|
; GFX6-GISEL-NEXT: buffer_store_dwordx4 v[24:27], v[2:3], s[4:7], 0 addr64 offset:80
|
|
; GFX6-GISEL-NEXT: s_waitcnt vmcnt(7)
|
|
; GFX6-GISEL-NEXT: buffer_store_dwordx4 v[28:31], v[2:3], s[4:7], 0 addr64 offset:96
|
|
; GFX6-GISEL-NEXT: s_waitcnt vmcnt(7)
|
|
; GFX6-GISEL-NEXT: buffer_store_dwordx4 v[32:35], v[2:3], s[4:7], 0 addr64 offset:112
|
|
; GFX6-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0)
|
|
; GFX6-GISEL-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX7-SDAG-LABEL: freeze_v16p0:
|
|
; GFX7-SDAG: ; %bb.0:
|
|
; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX7-SDAG-NEXT: s_mov_b32 s6, 0
|
|
; GFX7-SDAG-NEXT: s_mov_b32 s7, 0xf000
|
|
; GFX7-SDAG-NEXT: s_mov_b32 s4, s6
|
|
; GFX7-SDAG-NEXT: s_mov_b32 s5, s6
|
|
; GFX7-SDAG-NEXT: buffer_load_dwordx4 v[4:7], v[0:1], s[4:7], 0 addr64 offset:96
|
|
; GFX7-SDAG-NEXT: buffer_load_dwordx4 v[8:11], v[0:1], s[4:7], 0 addr64 offset:112
|
|
; GFX7-SDAG-NEXT: buffer_load_dwordx4 v[12:15], v[0:1], s[4:7], 0 addr64 offset:64
|
|
; GFX7-SDAG-NEXT: buffer_load_dwordx4 v[16:19], v[0:1], s[4:7], 0 addr64 offset:80
|
|
; GFX7-SDAG-NEXT: buffer_load_dwordx4 v[20:23], v[0:1], s[4:7], 0 addr64 offset:32
|
|
; GFX7-SDAG-NEXT: buffer_load_dwordx4 v[24:27], v[0:1], s[4:7], 0 addr64 offset:48
|
|
; GFX7-SDAG-NEXT: buffer_load_dwordx4 v[28:31], v[0:1], s[4:7], 0 addr64
|
|
; GFX7-SDAG-NEXT: buffer_load_dwordx4 v[32:35], v[0:1], s[4:7], 0 addr64 offset:16
|
|
; GFX7-SDAG-NEXT: s_waitcnt vmcnt(7)
|
|
; GFX7-SDAG-NEXT: buffer_store_dwordx4 v[4:7], v[2:3], s[4:7], 0 addr64 offset:96
|
|
; GFX7-SDAG-NEXT: s_waitcnt vmcnt(7)
|
|
; GFX7-SDAG-NEXT: buffer_store_dwordx4 v[8:11], v[2:3], s[4:7], 0 addr64 offset:112
|
|
; GFX7-SDAG-NEXT: s_waitcnt vmcnt(7)
|
|
; GFX7-SDAG-NEXT: buffer_store_dwordx4 v[12:15], v[2:3], s[4:7], 0 addr64 offset:64
|
|
; GFX7-SDAG-NEXT: s_waitcnt vmcnt(7)
|
|
; GFX7-SDAG-NEXT: buffer_store_dwordx4 v[16:19], v[2:3], s[4:7], 0 addr64 offset:80
|
|
; GFX7-SDAG-NEXT: s_waitcnt vmcnt(7)
|
|
; GFX7-SDAG-NEXT: buffer_store_dwordx4 v[20:23], v[2:3], s[4:7], 0 addr64 offset:32
|
|
; GFX7-SDAG-NEXT: s_waitcnt vmcnt(7)
|
|
; GFX7-SDAG-NEXT: buffer_store_dwordx4 v[24:27], v[2:3], s[4:7], 0 addr64 offset:48
|
|
; GFX7-SDAG-NEXT: s_waitcnt vmcnt(7)
|
|
; GFX7-SDAG-NEXT: buffer_store_dwordx4 v[28:31], v[2:3], s[4:7], 0 addr64
|
|
; GFX7-SDAG-NEXT: s_waitcnt vmcnt(7)
|
|
; GFX7-SDAG-NEXT: buffer_store_dwordx4 v[32:35], v[2:3], s[4:7], 0 addr64 offset:16
|
|
; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX7-SDAG-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX7-GISEL-LABEL: freeze_v16p0:
|
|
; GFX7-GISEL: ; %bb.0:
|
|
; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX7-GISEL-NEXT: s_mov_b32 s6, 0
|
|
; GFX7-GISEL-NEXT: s_mov_b32 s7, 0xf000
|
|
; GFX7-GISEL-NEXT: s_mov_b64 s[4:5], 0
|
|
; GFX7-GISEL-NEXT: buffer_load_dwordx4 v[4:7], v[0:1], s[4:7], 0 addr64
|
|
; GFX7-GISEL-NEXT: buffer_load_dwordx4 v[8:11], v[0:1], s[4:7], 0 addr64 offset:16
|
|
; GFX7-GISEL-NEXT: buffer_load_dwordx4 v[12:15], v[0:1], s[4:7], 0 addr64 offset:32
|
|
; GFX7-GISEL-NEXT: buffer_load_dwordx4 v[16:19], v[0:1], s[4:7], 0 addr64 offset:48
|
|
; GFX7-GISEL-NEXT: buffer_load_dwordx4 v[20:23], v[0:1], s[4:7], 0 addr64 offset:64
|
|
; GFX7-GISEL-NEXT: buffer_load_dwordx4 v[24:27], v[0:1], s[4:7], 0 addr64 offset:80
|
|
; GFX7-GISEL-NEXT: buffer_load_dwordx4 v[28:31], v[0:1], s[4:7], 0 addr64 offset:96
|
|
; GFX7-GISEL-NEXT: buffer_load_dwordx4 v[32:35], v[0:1], s[4:7], 0 addr64 offset:112
|
|
; GFX7-GISEL-NEXT: s_waitcnt vmcnt(7)
|
|
; GFX7-GISEL-NEXT: buffer_store_dwordx4 v[4:7], v[2:3], s[4:7], 0 addr64
|
|
; GFX7-GISEL-NEXT: s_waitcnt vmcnt(7)
|
|
; GFX7-GISEL-NEXT: buffer_store_dwordx4 v[8:11], v[2:3], s[4:7], 0 addr64 offset:16
|
|
; GFX7-GISEL-NEXT: s_waitcnt vmcnt(7)
|
|
; GFX7-GISEL-NEXT: buffer_store_dwordx4 v[12:15], v[2:3], s[4:7], 0 addr64 offset:32
|
|
; GFX7-GISEL-NEXT: s_waitcnt vmcnt(7)
|
|
; GFX7-GISEL-NEXT: buffer_store_dwordx4 v[16:19], v[2:3], s[4:7], 0 addr64 offset:48
|
|
; GFX7-GISEL-NEXT: s_waitcnt vmcnt(7)
|
|
; GFX7-GISEL-NEXT: buffer_store_dwordx4 v[20:23], v[2:3], s[4:7], 0 addr64 offset:64
|
|
; GFX7-GISEL-NEXT: s_waitcnt vmcnt(7)
|
|
; GFX7-GISEL-NEXT: buffer_store_dwordx4 v[24:27], v[2:3], s[4:7], 0 addr64 offset:80
|
|
; GFX7-GISEL-NEXT: s_waitcnt vmcnt(7)
|
|
; GFX7-GISEL-NEXT: buffer_store_dwordx4 v[28:31], v[2:3], s[4:7], 0 addr64 offset:96
|
|
; GFX7-GISEL-NEXT: s_waitcnt vmcnt(7)
|
|
; GFX7-GISEL-NEXT: buffer_store_dwordx4 v[32:35], v[2:3], s[4:7], 0 addr64 offset:112
|
|
; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX7-GISEL-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX8-GISEL-LABEL: freeze_v16p0:
|
|
; GFX8-GISEL: ; %bb.0:
|
|
; GFX8-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX8-GISEL-NEXT: v_add_u32_e32 v4, vcc, 16, v0
|
|
; GFX8-GISEL-NEXT: v_addc_u32_e32 v5, vcc, 0, v1, vcc
|
|
; GFX8-GISEL-NEXT: v_add_u32_e32 v12, vcc, 32, v0
|
|
; GFX8-GISEL-NEXT: v_addc_u32_e32 v13, vcc, 0, v1, vcc
|
|
; GFX8-GISEL-NEXT: v_add_u32_e32 v16, vcc, 48, v0
|
|
; GFX8-GISEL-NEXT: v_addc_u32_e32 v17, vcc, 0, v1, vcc
|
|
; GFX8-GISEL-NEXT: v_add_u32_e32 v20, vcc, 64, v0
|
|
; GFX8-GISEL-NEXT: flat_load_dwordx4 v[4:7], v[4:5]
|
|
; GFX8-GISEL-NEXT: flat_load_dwordx4 v[8:11], v[0:1]
|
|
; GFX8-GISEL-NEXT: v_addc_u32_e32 v21, vcc, 0, v1, vcc
|
|
; GFX8-GISEL-NEXT: v_mov_b32_e32 v38, 0x50
|
|
; GFX8-GISEL-NEXT: v_add_u32_e32 v24, vcc, v0, v38
|
|
; GFX8-GISEL-NEXT: v_addc_u32_e32 v25, vcc, 0, v1, vcc
|
|
; GFX8-GISEL-NEXT: v_mov_b32_e32 v14, 0x60
|
|
; GFX8-GISEL-NEXT: v_add_u32_e32 v28, vcc, v0, v14
|
|
; GFX8-GISEL-NEXT: v_addc_u32_e32 v29, vcc, 0, v1, vcc
|
|
; GFX8-GISEL-NEXT: v_mov_b32_e32 v14, 0x70
|
|
; GFX8-GISEL-NEXT: v_add_u32_e32 v0, vcc, v0, v14
|
|
; GFX8-GISEL-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc
|
|
; GFX8-GISEL-NEXT: flat_load_dwordx4 v[12:15], v[12:13]
|
|
; GFX8-GISEL-NEXT: flat_load_dwordx4 v[16:19], v[16:17]
|
|
; GFX8-GISEL-NEXT: flat_load_dwordx4 v[20:23], v[20:21]
|
|
; GFX8-GISEL-NEXT: flat_load_dwordx4 v[24:27], v[24:25]
|
|
; GFX8-GISEL-NEXT: flat_load_dwordx4 v[28:31], v[28:29]
|
|
; GFX8-GISEL-NEXT: flat_load_dwordx4 v[32:35], v[0:1]
|
|
; GFX8-GISEL-NEXT: v_add_u32_e32 v36, vcc, 16, v2
|
|
; GFX8-GISEL-NEXT: v_addc_u32_e32 v37, vcc, 0, v3, vcc
|
|
; GFX8-GISEL-NEXT: v_add_u32_e32 v0, vcc, 32, v2
|
|
; GFX8-GISEL-NEXT: v_addc_u32_e32 v1, vcc, 0, v3, vcc
|
|
; GFX8-GISEL-NEXT: s_waitcnt vmcnt(7)
|
|
; GFX8-GISEL-NEXT: flat_store_dwordx4 v[36:37], v[4:7]
|
|
; GFX8-GISEL-NEXT: s_nop 0
|
|
; GFX8-GISEL-NEXT: v_add_u32_e32 v4, vcc, 48, v2
|
|
; GFX8-GISEL-NEXT: v_addc_u32_e32 v5, vcc, 0, v3, vcc
|
|
; GFX8-GISEL-NEXT: s_waitcnt vmcnt(7)
|
|
; GFX8-GISEL-NEXT: flat_store_dwordx4 v[2:3], v[8:11]
|
|
; GFX8-GISEL-NEXT: v_add_u32_e32 v6, vcc, v2, v38
|
|
; GFX8-GISEL-NEXT: v_add_u32_e64 v8, s[4:5], 64, v2
|
|
; GFX8-GISEL-NEXT: v_addc_u32_e64 v9, s[4:5], 0, v3, s[4:5]
|
|
; GFX8-GISEL-NEXT: s_mov_b64 s[4:5], vcc
|
|
; GFX8-GISEL-NEXT: v_add_u32_e32 v10, vcc, 0x60, v2
|
|
; GFX8-GISEL-NEXT: v_addc_u32_e32 v11, vcc, 0, v3, vcc
|
|
; GFX8-GISEL-NEXT: v_add_u32_e32 v2, vcc, 0x70, v2
|
|
; GFX8-GISEL-NEXT: v_addc_u32_e64 v7, s[4:5], 0, v3, s[4:5]
|
|
; GFX8-GISEL-NEXT: v_addc_u32_e32 v3, vcc, 0, v3, vcc
|
|
; GFX8-GISEL-NEXT: s_waitcnt vmcnt(7)
|
|
; GFX8-GISEL-NEXT: flat_store_dwordx4 v[0:1], v[12:15]
|
|
; GFX8-GISEL-NEXT: s_waitcnt vmcnt(7)
|
|
; GFX8-GISEL-NEXT: flat_store_dwordx4 v[4:5], v[16:19]
|
|
; GFX8-GISEL-NEXT: s_waitcnt vmcnt(7)
|
|
; GFX8-GISEL-NEXT: flat_store_dwordx4 v[8:9], v[20:23]
|
|
; GFX8-GISEL-NEXT: s_waitcnt vmcnt(7)
|
|
; GFX8-GISEL-NEXT: flat_store_dwordx4 v[6:7], v[24:27]
|
|
; GFX8-GISEL-NEXT: s_waitcnt vmcnt(7)
|
|
; GFX8-GISEL-NEXT: flat_store_dwordx4 v[10:11], v[28:31]
|
|
; GFX8-GISEL-NEXT: s_waitcnt vmcnt(7)
|
|
; GFX8-GISEL-NEXT: flat_store_dwordx4 v[2:3], v[32:35]
|
|
; GFX8-GISEL-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX8-GISEL-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX9-GISEL-LABEL: freeze_v16p0:
|
|
; GFX9-GISEL: ; %bb.0:
|
|
; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX9-GISEL-NEXT: global_load_dwordx4 v[4:7], v[0:1], off
|
|
; GFX9-GISEL-NEXT: global_load_dwordx4 v[8:11], v[0:1], off offset:16
|
|
; GFX9-GISEL-NEXT: global_load_dwordx4 v[12:15], v[0:1], off offset:32
|
|
; GFX9-GISEL-NEXT: global_load_dwordx4 v[16:19], v[0:1], off offset:48
|
|
; GFX9-GISEL-NEXT: global_load_dwordx4 v[20:23], v[0:1], off offset:64
|
|
; GFX9-GISEL-NEXT: global_load_dwordx4 v[24:27], v[0:1], off offset:80
|
|
; GFX9-GISEL-NEXT: global_load_dwordx4 v[28:31], v[0:1], off offset:96
|
|
; GFX9-GISEL-NEXT: global_load_dwordx4 v[32:35], v[0:1], off offset:112
|
|
; GFX9-GISEL-NEXT: s_waitcnt vmcnt(7)
|
|
; GFX9-GISEL-NEXT: global_store_dwordx4 v[2:3], v[4:7], off
|
|
; GFX9-GISEL-NEXT: s_waitcnt vmcnt(7)
|
|
; GFX9-GISEL-NEXT: global_store_dwordx4 v[2:3], v[8:11], off offset:16
|
|
; GFX9-GISEL-NEXT: s_waitcnt vmcnt(7)
|
|
; GFX9-GISEL-NEXT: global_store_dwordx4 v[2:3], v[12:15], off offset:32
|
|
; GFX9-GISEL-NEXT: s_waitcnt vmcnt(7)
|
|
; GFX9-GISEL-NEXT: global_store_dwordx4 v[2:3], v[16:19], off offset:48
|
|
; GFX9-GISEL-NEXT: s_waitcnt vmcnt(7)
|
|
; GFX9-GISEL-NEXT: global_store_dwordx4 v[2:3], v[20:23], off offset:64
|
|
; GFX9-GISEL-NEXT: s_waitcnt vmcnt(7)
|
|
; GFX9-GISEL-NEXT: global_store_dwordx4 v[2:3], v[24:27], off offset:80
|
|
; GFX9-GISEL-NEXT: s_waitcnt vmcnt(7)
|
|
; GFX9-GISEL-NEXT: global_store_dwordx4 v[2:3], v[28:31], off offset:96
|
|
; GFX9-GISEL-NEXT: s_waitcnt vmcnt(7)
|
|
; GFX9-GISEL-NEXT: global_store_dwordx4 v[2:3], v[32:35], off offset:112
|
|
; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX10-SDAG-LABEL: freeze_v16p0:
|
|
; GFX10-SDAG: ; %bb.0:
|
|
; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX10-SDAG-NEXT: s_clause 0x7
|
|
; GFX10-SDAG-NEXT: global_load_dwordx4 v[4:7], v[0:1], off offset:96
|
|
; GFX10-SDAG-NEXT: global_load_dwordx4 v[8:11], v[0:1], off offset:112
|
|
; GFX10-SDAG-NEXT: global_load_dwordx4 v[12:15], v[0:1], off offset:64
|
|
; GFX10-SDAG-NEXT: global_load_dwordx4 v[16:19], v[0:1], off offset:80
|
|
; GFX10-SDAG-NEXT: global_load_dwordx4 v[20:23], v[0:1], off offset:32
|
|
; GFX10-SDAG-NEXT: global_load_dwordx4 v[24:27], v[0:1], off offset:48
|
|
; GFX10-SDAG-NEXT: global_load_dwordx4 v[28:31], v[0:1], off
|
|
; GFX10-SDAG-NEXT: global_load_dwordx4 v[32:35], v[0:1], off offset:16
|
|
; GFX10-SDAG-NEXT: s_waitcnt vmcnt(7)
|
|
; GFX10-SDAG-NEXT: global_store_dwordx4 v[2:3], v[4:7], off offset:96
|
|
; GFX10-SDAG-NEXT: s_waitcnt vmcnt(6)
|
|
; GFX10-SDAG-NEXT: global_store_dwordx4 v[2:3], v[8:11], off offset:112
|
|
; GFX10-SDAG-NEXT: s_waitcnt vmcnt(5)
|
|
; GFX10-SDAG-NEXT: global_store_dwordx4 v[2:3], v[12:15], off offset:64
|
|
; GFX10-SDAG-NEXT: s_waitcnt vmcnt(4)
|
|
; GFX10-SDAG-NEXT: global_store_dwordx4 v[2:3], v[16:19], off offset:80
|
|
; GFX10-SDAG-NEXT: s_waitcnt vmcnt(3)
|
|
; GFX10-SDAG-NEXT: global_store_dwordx4 v[2:3], v[20:23], off offset:32
|
|
; GFX10-SDAG-NEXT: s_waitcnt vmcnt(2)
|
|
; GFX10-SDAG-NEXT: global_store_dwordx4 v[2:3], v[24:27], off offset:48
|
|
; GFX10-SDAG-NEXT: s_waitcnt vmcnt(1)
|
|
; GFX10-SDAG-NEXT: global_store_dwordx4 v[2:3], v[28:31], off
|
|
; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX10-SDAG-NEXT: global_store_dwordx4 v[2:3], v[32:35], off offset:16
|
|
; GFX10-SDAG-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX10-GISEL-LABEL: freeze_v16p0:
|
|
; GFX10-GISEL: ; %bb.0:
|
|
; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX10-GISEL-NEXT: s_clause 0x7
|
|
; GFX10-GISEL-NEXT: global_load_dwordx4 v[4:7], v[0:1], off
|
|
; GFX10-GISEL-NEXT: global_load_dwordx4 v[8:11], v[0:1], off offset:16
|
|
; GFX10-GISEL-NEXT: global_load_dwordx4 v[12:15], v[0:1], off offset:32
|
|
; GFX10-GISEL-NEXT: global_load_dwordx4 v[16:19], v[0:1], off offset:48
|
|
; GFX10-GISEL-NEXT: global_load_dwordx4 v[20:23], v[0:1], off offset:64
|
|
; GFX10-GISEL-NEXT: global_load_dwordx4 v[24:27], v[0:1], off offset:80
|
|
; GFX10-GISEL-NEXT: global_load_dwordx4 v[28:31], v[0:1], off offset:96
|
|
; GFX10-GISEL-NEXT: global_load_dwordx4 v[32:35], v[0:1], off offset:112
|
|
; GFX10-GISEL-NEXT: s_waitcnt vmcnt(7)
|
|
; GFX10-GISEL-NEXT: global_store_dwordx4 v[2:3], v[4:7], off
|
|
; GFX10-GISEL-NEXT: s_waitcnt vmcnt(6)
|
|
; GFX10-GISEL-NEXT: global_store_dwordx4 v[2:3], v[8:11], off offset:16
|
|
; GFX10-GISEL-NEXT: s_waitcnt vmcnt(5)
|
|
; GFX10-GISEL-NEXT: global_store_dwordx4 v[2:3], v[12:15], off offset:32
|
|
; GFX10-GISEL-NEXT: s_waitcnt vmcnt(4)
|
|
; GFX10-GISEL-NEXT: global_store_dwordx4 v[2:3], v[16:19], off offset:48
|
|
; GFX10-GISEL-NEXT: s_waitcnt vmcnt(3)
|
|
; GFX10-GISEL-NEXT: global_store_dwordx4 v[2:3], v[20:23], off offset:64
|
|
; GFX10-GISEL-NEXT: s_waitcnt vmcnt(2)
|
|
; GFX10-GISEL-NEXT: global_store_dwordx4 v[2:3], v[24:27], off offset:80
|
|
; GFX10-GISEL-NEXT: s_waitcnt vmcnt(1)
|
|
; GFX10-GISEL-NEXT: global_store_dwordx4 v[2:3], v[28:31], off offset:96
|
|
; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX10-GISEL-NEXT: global_store_dwordx4 v[2:3], v[32:35], off offset:112
|
|
; GFX10-GISEL-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX11-SDAG-LABEL: freeze_v16p0:
|
|
; GFX11-SDAG: ; %bb.0:
|
|
; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX11-SDAG-NEXT: s_clause 0x7
|
|
; GFX11-SDAG-NEXT: global_load_b128 v[4:7], v[0:1], off offset:96
|
|
; GFX11-SDAG-NEXT: global_load_b128 v[8:11], v[0:1], off offset:112
|
|
; GFX11-SDAG-NEXT: global_load_b128 v[12:15], v[0:1], off offset:64
|
|
; GFX11-SDAG-NEXT: global_load_b128 v[16:19], v[0:1], off offset:80
|
|
; GFX11-SDAG-NEXT: global_load_b128 v[20:23], v[0:1], off offset:32
|
|
; GFX11-SDAG-NEXT: global_load_b128 v[24:27], v[0:1], off offset:48
|
|
; GFX11-SDAG-NEXT: global_load_b128 v[28:31], v[0:1], off
|
|
; GFX11-SDAG-NEXT: global_load_b128 v[32:35], v[0:1], off offset:16
|
|
; GFX11-SDAG-NEXT: s_waitcnt vmcnt(7)
|
|
; GFX11-SDAG-NEXT: global_store_b128 v[2:3], v[4:7], off offset:96
|
|
; GFX11-SDAG-NEXT: s_waitcnt vmcnt(6)
|
|
; GFX11-SDAG-NEXT: global_store_b128 v[2:3], v[8:11], off offset:112
|
|
; GFX11-SDAG-NEXT: s_waitcnt vmcnt(5)
|
|
; GFX11-SDAG-NEXT: global_store_b128 v[2:3], v[12:15], off offset:64
|
|
; GFX11-SDAG-NEXT: s_waitcnt vmcnt(4)
|
|
; GFX11-SDAG-NEXT: global_store_b128 v[2:3], v[16:19], off offset:80
|
|
; GFX11-SDAG-NEXT: s_waitcnt vmcnt(3)
|
|
; GFX11-SDAG-NEXT: global_store_b128 v[2:3], v[20:23], off offset:32
|
|
; GFX11-SDAG-NEXT: s_waitcnt vmcnt(2)
|
|
; GFX11-SDAG-NEXT: global_store_b128 v[2:3], v[24:27], off offset:48
|
|
; GFX11-SDAG-NEXT: s_waitcnt vmcnt(1)
|
|
; GFX11-SDAG-NEXT: global_store_b128 v[2:3], v[28:31], off
|
|
; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX11-SDAG-NEXT: global_store_b128 v[2:3], v[32:35], off offset:16
|
|
; GFX11-SDAG-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX11-GISEL-LABEL: freeze_v16p0:
|
|
; GFX11-GISEL: ; %bb.0:
|
|
; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX11-GISEL-NEXT: s_clause 0x7
|
|
; GFX11-GISEL-NEXT: global_load_b128 v[4:7], v[0:1], off
|
|
; GFX11-GISEL-NEXT: global_load_b128 v[8:11], v[0:1], off offset:16
|
|
; GFX11-GISEL-NEXT: global_load_b128 v[12:15], v[0:1], off offset:32
|
|
; GFX11-GISEL-NEXT: global_load_b128 v[16:19], v[0:1], off offset:48
|
|
; GFX11-GISEL-NEXT: global_load_b128 v[20:23], v[0:1], off offset:64
|
|
; GFX11-GISEL-NEXT: global_load_b128 v[24:27], v[0:1], off offset:80
|
|
; GFX11-GISEL-NEXT: global_load_b128 v[28:31], v[0:1], off offset:96
|
|
; GFX11-GISEL-NEXT: global_load_b128 v[32:35], v[0:1], off offset:112
|
|
; GFX11-GISEL-NEXT: s_waitcnt vmcnt(7)
|
|
; GFX11-GISEL-NEXT: global_store_b128 v[2:3], v[4:7], off
|
|
; GFX11-GISEL-NEXT: s_waitcnt vmcnt(6)
|
|
; GFX11-GISEL-NEXT: global_store_b128 v[2:3], v[8:11], off offset:16
|
|
; GFX11-GISEL-NEXT: s_waitcnt vmcnt(5)
|
|
; GFX11-GISEL-NEXT: global_store_b128 v[2:3], v[12:15], off offset:32
|
|
; GFX11-GISEL-NEXT: s_waitcnt vmcnt(4)
|
|
; GFX11-GISEL-NEXT: global_store_b128 v[2:3], v[16:19], off offset:48
|
|
; GFX11-GISEL-NEXT: s_waitcnt vmcnt(3)
|
|
; GFX11-GISEL-NEXT: global_store_b128 v[2:3], v[20:23], off offset:64
|
|
; GFX11-GISEL-NEXT: s_waitcnt vmcnt(2)
|
|
; GFX11-GISEL-NEXT: global_store_b128 v[2:3], v[24:27], off offset:80
|
|
; GFX11-GISEL-NEXT: s_waitcnt vmcnt(1)
|
|
; GFX11-GISEL-NEXT: global_store_b128 v[2:3], v[28:31], off offset:96
|
|
; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX11-GISEL-NEXT: global_store_b128 v[2:3], v[32:35], off offset:112
|
|
; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31]
|
|
%a = load <16 x ptr>, ptr addrspace(1) %ptra
|
|
%freeze = freeze <16 x ptr> %a
|
|
store <16 x ptr> %freeze, ptr addrspace(1) %ptrb
|
|
ret void
|
|
}
|
|
|
|
define void @freeze_p1(ptr addrspace(1) %ptra, ptr addrspace(1) %ptrb) {
|
|
; GFX6-SDAG-LABEL: freeze_p1:
|
|
; GFX6-SDAG: ; %bb.0:
|
|
; GFX6-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX6-SDAG-NEXT: s_mov_b32 s6, 0
|
|
; GFX6-SDAG-NEXT: s_mov_b32 s7, 0xf000
|
|
; GFX6-SDAG-NEXT: s_mov_b32 s4, s6
|
|
; GFX6-SDAG-NEXT: s_mov_b32 s5, s6
|
|
; GFX6-SDAG-NEXT: buffer_load_dwordx2 v[0:1], v[0:1], s[4:7], 0 addr64
|
|
; GFX6-SDAG-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX6-SDAG-NEXT: buffer_store_dwordx2 v[0:1], v[2:3], s[4:7], 0 addr64
|
|
; GFX6-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0)
|
|
; GFX6-SDAG-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX6-GISEL-LABEL: freeze_p1:
|
|
; GFX6-GISEL: ; %bb.0:
|
|
; GFX6-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX6-GISEL-NEXT: s_mov_b32 s6, 0
|
|
; GFX6-GISEL-NEXT: s_mov_b32 s7, 0xf000
|
|
; GFX6-GISEL-NEXT: s_mov_b64 s[4:5], 0
|
|
; GFX6-GISEL-NEXT: buffer_load_dwordx2 v[0:1], v[0:1], s[4:7], 0 addr64
|
|
; GFX6-GISEL-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX6-GISEL-NEXT: buffer_store_dwordx2 v[0:1], v[2:3], s[4:7], 0 addr64
|
|
; GFX6-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0)
|
|
; GFX6-GISEL-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX7-SDAG-LABEL: freeze_p1:
|
|
; GFX7-SDAG: ; %bb.0:
|
|
; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX7-SDAG-NEXT: s_mov_b32 s6, 0
|
|
; GFX7-SDAG-NEXT: s_mov_b32 s7, 0xf000
|
|
; GFX7-SDAG-NEXT: s_mov_b32 s4, s6
|
|
; GFX7-SDAG-NEXT: s_mov_b32 s5, s6
|
|
; GFX7-SDAG-NEXT: buffer_load_dwordx2 v[0:1], v[0:1], s[4:7], 0 addr64
|
|
; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX7-SDAG-NEXT: buffer_store_dwordx2 v[0:1], v[2:3], s[4:7], 0 addr64
|
|
; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX7-SDAG-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX7-GISEL-LABEL: freeze_p1:
|
|
; GFX7-GISEL: ; %bb.0:
|
|
; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX7-GISEL-NEXT: s_mov_b32 s6, 0
|
|
; GFX7-GISEL-NEXT: s_mov_b32 s7, 0xf000
|
|
; GFX7-GISEL-NEXT: s_mov_b64 s[4:5], 0
|
|
; GFX7-GISEL-NEXT: buffer_load_dwordx2 v[0:1], v[0:1], s[4:7], 0 addr64
|
|
; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX7-GISEL-NEXT: buffer_store_dwordx2 v[0:1], v[2:3], s[4:7], 0 addr64
|
|
; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX7-GISEL-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX8-LABEL: freeze_p1:
|
|
; GFX8: ; %bb.0:
|
|
; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX8-NEXT: flat_load_dwordx2 v[0:1], v[0:1]
|
|
; GFX8-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX8-NEXT: flat_store_dwordx2 v[2:3], v[0:1]
|
|
; GFX8-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX8-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX9-LABEL: freeze_p1:
|
|
; GFX9: ; %bb.0:
|
|
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX9-NEXT: global_load_dwordx2 v[0:1], v[0:1], off
|
|
; GFX9-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX9-NEXT: global_store_dwordx2 v[2:3], v[0:1], off
|
|
; GFX9-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX9-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX10-LABEL: freeze_p1:
|
|
; GFX10: ; %bb.0:
|
|
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX10-NEXT: global_load_dwordx2 v[0:1], v[0:1], off
|
|
; GFX10-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX10-NEXT: global_store_dwordx2 v[2:3], v[0:1], off
|
|
; GFX10-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX11-LABEL: freeze_p1:
|
|
; GFX11: ; %bb.0:
|
|
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX11-NEXT: global_load_b64 v[0:1], v[0:1], off
|
|
; GFX11-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX11-NEXT: global_store_b64 v[2:3], v[0:1], off
|
|
; GFX11-NEXT: s_setpc_b64 s[30:31]
|
|
%a = load ptr addrspace(1), ptr addrspace(1) %ptra
|
|
%freeze = freeze ptr addrspace(1) %a
|
|
store ptr addrspace(1) %freeze, ptr addrspace(1) %ptrb
|
|
ret void
|
|
}
|
|
|
|
define void @freeze_v2p1(ptr addrspace(1) %ptra, ptr addrspace(1) %ptrb) {
|
|
; GFX6-SDAG-LABEL: freeze_v2p1:
|
|
; GFX6-SDAG: ; %bb.0:
|
|
; GFX6-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX6-SDAG-NEXT: s_mov_b32 s6, 0
|
|
; GFX6-SDAG-NEXT: s_mov_b32 s7, 0xf000
|
|
; GFX6-SDAG-NEXT: s_mov_b32 s4, s6
|
|
; GFX6-SDAG-NEXT: s_mov_b32 s5, s6
|
|
; GFX6-SDAG-NEXT: buffer_load_dwordx4 v[4:7], v[0:1], s[4:7], 0 addr64
|
|
; GFX6-SDAG-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX6-SDAG-NEXT: buffer_store_dwordx4 v[4:7], v[2:3], s[4:7], 0 addr64
|
|
; GFX6-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0)
|
|
; GFX6-SDAG-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX6-GISEL-LABEL: freeze_v2p1:
|
|
; GFX6-GISEL: ; %bb.0:
|
|
; GFX6-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX6-GISEL-NEXT: s_mov_b32 s6, 0
|
|
; GFX6-GISEL-NEXT: s_mov_b32 s7, 0xf000
|
|
; GFX6-GISEL-NEXT: s_mov_b64 s[4:5], 0
|
|
; GFX6-GISEL-NEXT: buffer_load_dwordx4 v[4:7], v[0:1], s[4:7], 0 addr64
|
|
; GFX6-GISEL-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX6-GISEL-NEXT: buffer_store_dwordx4 v[4:7], v[2:3], s[4:7], 0 addr64
|
|
; GFX6-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0)
|
|
; GFX6-GISEL-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX7-SDAG-LABEL: freeze_v2p1:
|
|
; GFX7-SDAG: ; %bb.0:
|
|
; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX7-SDAG-NEXT: s_mov_b32 s6, 0
|
|
; GFX7-SDAG-NEXT: s_mov_b32 s7, 0xf000
|
|
; GFX7-SDAG-NEXT: s_mov_b32 s4, s6
|
|
; GFX7-SDAG-NEXT: s_mov_b32 s5, s6
|
|
; GFX7-SDAG-NEXT: buffer_load_dwordx4 v[4:7], v[0:1], s[4:7], 0 addr64
|
|
; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX7-SDAG-NEXT: buffer_store_dwordx4 v[4:7], v[2:3], s[4:7], 0 addr64
|
|
; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX7-SDAG-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX7-GISEL-LABEL: freeze_v2p1:
|
|
; GFX7-GISEL: ; %bb.0:
|
|
; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX7-GISEL-NEXT: s_mov_b32 s6, 0
|
|
; GFX7-GISEL-NEXT: s_mov_b32 s7, 0xf000
|
|
; GFX7-GISEL-NEXT: s_mov_b64 s[4:5], 0
|
|
; GFX7-GISEL-NEXT: buffer_load_dwordx4 v[4:7], v[0:1], s[4:7], 0 addr64
|
|
; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX7-GISEL-NEXT: buffer_store_dwordx4 v[4:7], v[2:3], s[4:7], 0 addr64
|
|
; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX7-GISEL-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX8-LABEL: freeze_v2p1:
|
|
; GFX8: ; %bb.0:
|
|
; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX8-NEXT: flat_load_dwordx4 v[4:7], v[0:1]
|
|
; GFX8-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX8-NEXT: flat_store_dwordx4 v[2:3], v[4:7]
|
|
; GFX8-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX8-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX9-LABEL: freeze_v2p1:
|
|
; GFX9: ; %bb.0:
|
|
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX9-NEXT: global_load_dwordx4 v[4:7], v[0:1], off
|
|
; GFX9-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX9-NEXT: global_store_dwordx4 v[2:3], v[4:7], off
|
|
; GFX9-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX9-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX10-LABEL: freeze_v2p1:
|
|
; GFX10: ; %bb.0:
|
|
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX10-NEXT: global_load_dwordx4 v[4:7], v[0:1], off
|
|
; GFX10-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX10-NEXT: global_store_dwordx4 v[2:3], v[4:7], off
|
|
; GFX10-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX11-LABEL: freeze_v2p1:
|
|
; GFX11: ; %bb.0:
|
|
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX11-NEXT: global_load_b128 v[4:7], v[0:1], off
|
|
; GFX11-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX11-NEXT: global_store_b128 v[2:3], v[4:7], off
|
|
; GFX11-NEXT: s_setpc_b64 s[30:31]
|
|
%a = load <2 x ptr addrspace(1)>, ptr addrspace(1) %ptra
|
|
%freeze = freeze <2 x ptr addrspace(1)> %a
|
|
store <2 x ptr addrspace(1)> %freeze, ptr addrspace(1) %ptrb
|
|
ret void
|
|
}
|
|
|
|
define void @freeze_v3p1(ptr addrspace(1) %ptra, ptr addrspace(1) %ptrb) {
|
|
; GFX6-SDAG-LABEL: freeze_v3p1:
|
|
; GFX6-SDAG: ; %bb.0:
|
|
; GFX6-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX6-SDAG-NEXT: s_mov_b32 s6, 0
|
|
; GFX6-SDAG-NEXT: s_mov_b32 s7, 0xf000
|
|
; GFX6-SDAG-NEXT: s_mov_b32 s4, s6
|
|
; GFX6-SDAG-NEXT: s_mov_b32 s5, s6
|
|
; GFX6-SDAG-NEXT: buffer_load_dwordx2 v[8:9], v[0:1], s[4:7], 0 addr64 offset:16
|
|
; GFX6-SDAG-NEXT: buffer_load_dwordx4 v[4:7], v[0:1], s[4:7], 0 addr64
|
|
; GFX6-SDAG-NEXT: s_waitcnt vmcnt(1)
|
|
; GFX6-SDAG-NEXT: buffer_store_dwordx2 v[8:9], v[2:3], s[4:7], 0 addr64 offset:16
|
|
; GFX6-SDAG-NEXT: s_waitcnt vmcnt(1)
|
|
; GFX6-SDAG-NEXT: buffer_store_dwordx4 v[4:7], v[2:3], s[4:7], 0 addr64
|
|
; GFX6-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0)
|
|
; GFX6-SDAG-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX6-GISEL-LABEL: freeze_v3p1:
|
|
; GFX6-GISEL: ; %bb.0:
|
|
; GFX6-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX6-GISEL-NEXT: s_mov_b32 s6, 0
|
|
; GFX6-GISEL-NEXT: s_mov_b32 s7, 0xf000
|
|
; GFX6-GISEL-NEXT: s_mov_b64 s[4:5], 0
|
|
; GFX6-GISEL-NEXT: buffer_load_dwordx4 v[4:7], v[0:1], s[4:7], 0 addr64 offset:16
|
|
; GFX6-GISEL-NEXT: buffer_load_dwordx4 v[6:9], v[0:1], s[4:7], 0 addr64
|
|
; GFX6-GISEL-NEXT: s_waitcnt vmcnt(1)
|
|
; GFX6-GISEL-NEXT: v_mov_b32_e32 v0, v4
|
|
; GFX6-GISEL-NEXT: v_mov_b32_e32 v1, v5
|
|
; GFX6-GISEL-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX6-GISEL-NEXT: buffer_store_dwordx4 v[6:9], v[2:3], s[4:7], 0 addr64
|
|
; GFX6-GISEL-NEXT: buffer_store_dwordx2 v[0:1], v[2:3], s[4:7], 0 addr64 offset:16
|
|
; GFX6-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0)
|
|
; GFX6-GISEL-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX7-SDAG-LABEL: freeze_v3p1:
|
|
; GFX7-SDAG: ; %bb.0:
|
|
; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX7-SDAG-NEXT: s_mov_b32 s6, 0
|
|
; GFX7-SDAG-NEXT: s_mov_b32 s7, 0xf000
|
|
; GFX7-SDAG-NEXT: s_mov_b32 s4, s6
|
|
; GFX7-SDAG-NEXT: s_mov_b32 s5, s6
|
|
; GFX7-SDAG-NEXT: buffer_load_dwordx2 v[8:9], v[0:1], s[4:7], 0 addr64 offset:16
|
|
; GFX7-SDAG-NEXT: buffer_load_dwordx4 v[4:7], v[0:1], s[4:7], 0 addr64
|
|
; GFX7-SDAG-NEXT: s_waitcnt vmcnt(1)
|
|
; GFX7-SDAG-NEXT: buffer_store_dwordx2 v[8:9], v[2:3], s[4:7], 0 addr64 offset:16
|
|
; GFX7-SDAG-NEXT: s_waitcnt vmcnt(1)
|
|
; GFX7-SDAG-NEXT: buffer_store_dwordx4 v[4:7], v[2:3], s[4:7], 0 addr64
|
|
; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX7-SDAG-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX7-GISEL-LABEL: freeze_v3p1:
|
|
; GFX7-GISEL: ; %bb.0:
|
|
; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX7-GISEL-NEXT: s_mov_b32 s6, 0
|
|
; GFX7-GISEL-NEXT: s_mov_b32 s7, 0xf000
|
|
; GFX7-GISEL-NEXT: s_mov_b64 s[4:5], 0
|
|
; GFX7-GISEL-NEXT: buffer_load_dwordx4 v[4:7], v[0:1], s[4:7], 0 addr64 offset:16
|
|
; GFX7-GISEL-NEXT: buffer_load_dwordx4 v[6:9], v[0:1], s[4:7], 0 addr64
|
|
; GFX7-GISEL-NEXT: s_waitcnt vmcnt(1)
|
|
; GFX7-GISEL-NEXT: v_mov_b32_e32 v0, v4
|
|
; GFX7-GISEL-NEXT: v_mov_b32_e32 v1, v5
|
|
; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX7-GISEL-NEXT: buffer_store_dwordx4 v[6:9], v[2:3], s[4:7], 0 addr64
|
|
; GFX7-GISEL-NEXT: buffer_store_dwordx2 v[0:1], v[2:3], s[4:7], 0 addr64 offset:16
|
|
; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX7-GISEL-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX8-GISEL-LABEL: freeze_v3p1:
|
|
; GFX8-GISEL: ; %bb.0:
|
|
; GFX8-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX8-GISEL-NEXT: v_add_u32_e32 v8, vcc, 16, v0
|
|
; GFX8-GISEL-NEXT: v_addc_u32_e32 v9, vcc, 0, v1, vcc
|
|
; GFX8-GISEL-NEXT: flat_load_dwordx4 v[4:7], v[0:1]
|
|
; GFX8-GISEL-NEXT: flat_load_dwordx4 v[8:11], v[8:9]
|
|
; GFX8-GISEL-NEXT: s_waitcnt vmcnt(1)
|
|
; GFX8-GISEL-NEXT: flat_store_dwordx4 v[2:3], v[4:7]
|
|
; GFX8-GISEL-NEXT: v_add_u32_e32 v2, vcc, 16, v2
|
|
; GFX8-GISEL-NEXT: s_waitcnt vmcnt(1)
|
|
; GFX8-GISEL-NEXT: v_mov_b32_e32 v0, v8
|
|
; GFX8-GISEL-NEXT: v_mov_b32_e32 v1, v9
|
|
; GFX8-GISEL-NEXT: v_addc_u32_e32 v3, vcc, 0, v3, vcc
|
|
; GFX8-GISEL-NEXT: flat_store_dwordx2 v[2:3], v[0:1]
|
|
; GFX8-GISEL-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX8-GISEL-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX9-GISEL-LABEL: freeze_v3p1:
|
|
; GFX9-GISEL: ; %bb.0:
|
|
; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX9-GISEL-NEXT: global_load_dwordx4 v[4:7], v[0:1], off offset:16
|
|
; GFX9-GISEL-NEXT: global_load_dwordx4 v[6:9], v[0:1], off
|
|
; GFX9-GISEL-NEXT: ; kill: killed $vgpr0 killed $vgpr1
|
|
; GFX9-GISEL-NEXT: s_waitcnt vmcnt(1)
|
|
; GFX9-GISEL-NEXT: v_mov_b32_e32 v0, v4
|
|
; GFX9-GISEL-NEXT: v_mov_b32_e32 v1, v5
|
|
; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX9-GISEL-NEXT: global_store_dwordx4 v[2:3], v[6:9], off
|
|
; GFX9-GISEL-NEXT: global_store_dwordx2 v[2:3], v[0:1], off offset:16
|
|
; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX10-SDAG-LABEL: freeze_v3p1:
|
|
; GFX10-SDAG: ; %bb.0:
|
|
; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX10-SDAG-NEXT: s_clause 0x1
|
|
; GFX10-SDAG-NEXT: global_load_dwordx2 v[8:9], v[0:1], off offset:16
|
|
; GFX10-SDAG-NEXT: global_load_dwordx4 v[4:7], v[0:1], off
|
|
; GFX10-SDAG-NEXT: s_waitcnt vmcnt(1)
|
|
; GFX10-SDAG-NEXT: global_store_dwordx2 v[2:3], v[8:9], off offset:16
|
|
; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX10-SDAG-NEXT: global_store_dwordx4 v[2:3], v[4:7], off
|
|
; GFX10-SDAG-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX10-GISEL-LABEL: freeze_v3p1:
|
|
; GFX10-GISEL: ; %bb.0:
|
|
; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX10-GISEL-NEXT: s_clause 0x1
|
|
; GFX10-GISEL-NEXT: global_load_dwordx4 v[4:7], v[0:1], off offset:16
|
|
; GFX10-GISEL-NEXT: global_load_dwordx4 v[6:9], v[0:1], off
|
|
; GFX10-GISEL-NEXT: ; kill: killed $vgpr0 killed $vgpr1
|
|
; GFX10-GISEL-NEXT: s_waitcnt vmcnt(1)
|
|
; GFX10-GISEL-NEXT: v_mov_b32_e32 v0, v4
|
|
; GFX10-GISEL-NEXT: v_mov_b32_e32 v1, v5
|
|
; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX10-GISEL-NEXT: global_store_dwordx4 v[2:3], v[6:9], off
|
|
; GFX10-GISEL-NEXT: global_store_dwordx2 v[2:3], v[0:1], off offset:16
|
|
; GFX10-GISEL-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX11-SDAG-LABEL: freeze_v3p1:
|
|
; GFX11-SDAG: ; %bb.0:
|
|
; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX11-SDAG-NEXT: s_clause 0x1
|
|
; GFX11-SDAG-NEXT: global_load_b64 v[8:9], v[0:1], off offset:16
|
|
; GFX11-SDAG-NEXT: global_load_b128 v[4:7], v[0:1], off
|
|
; GFX11-SDAG-NEXT: s_waitcnt vmcnt(1)
|
|
; GFX11-SDAG-NEXT: global_store_b64 v[2:3], v[8:9], off offset:16
|
|
; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX11-SDAG-NEXT: global_store_b128 v[2:3], v[4:7], off
|
|
; GFX11-SDAG-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX11-GISEL-LABEL: freeze_v3p1:
|
|
; GFX11-GISEL: ; %bb.0:
|
|
; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX11-GISEL-NEXT: s_clause 0x1
|
|
; GFX11-GISEL-NEXT: global_load_b128 v[4:7], v[0:1], off offset:16
|
|
; GFX11-GISEL-NEXT: global_load_b128 v[6:9], v[0:1], off
|
|
; GFX11-GISEL-NEXT: s_waitcnt vmcnt(1)
|
|
; GFX11-GISEL-NEXT: v_dual_mov_b32 v0, v4 :: v_dual_mov_b32 v1, v5
|
|
; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX11-GISEL-NEXT: s_clause 0x1
|
|
; GFX11-GISEL-NEXT: global_store_b128 v[2:3], v[6:9], off
|
|
; GFX11-GISEL-NEXT: global_store_b64 v[2:3], v[0:1], off offset:16
|
|
; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31]
|
|
%a = load <3 x ptr addrspace(1)>, ptr addrspace(1) %ptra
|
|
%freeze = freeze <3 x ptr addrspace(1)> %a
|
|
store <3 x ptr addrspace(1)> %freeze, ptr addrspace(1) %ptrb
|
|
ret void
|
|
}
|
|
|
|
define void @freeze_v4p1(ptr addrspace(1) %ptra, ptr addrspace(1) %ptrb) {
|
|
; GFX6-SDAG-LABEL: freeze_v4p1:
|
|
; GFX6-SDAG: ; %bb.0:
|
|
; GFX6-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX6-SDAG-NEXT: s_mov_b32 s6, 0
|
|
; GFX6-SDAG-NEXT: s_mov_b32 s7, 0xf000
|
|
; GFX6-SDAG-NEXT: s_mov_b32 s4, s6
|
|
; GFX6-SDAG-NEXT: s_mov_b32 s5, s6
|
|
; GFX6-SDAG-NEXT: buffer_load_dwordx4 v[4:7], v[0:1], s[4:7], 0 addr64 offset:16
|
|
; GFX6-SDAG-NEXT: buffer_load_dwordx4 v[8:11], v[0:1], s[4:7], 0 addr64
|
|
; GFX6-SDAG-NEXT: s_waitcnt vmcnt(1)
|
|
; GFX6-SDAG-NEXT: buffer_store_dwordx4 v[4:7], v[2:3], s[4:7], 0 addr64 offset:16
|
|
; GFX6-SDAG-NEXT: s_waitcnt vmcnt(1)
|
|
; GFX6-SDAG-NEXT: buffer_store_dwordx4 v[8:11], v[2:3], s[4:7], 0 addr64
|
|
; GFX6-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0)
|
|
; GFX6-SDAG-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX6-GISEL-LABEL: freeze_v4p1:
|
|
; GFX6-GISEL: ; %bb.0:
|
|
; GFX6-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX6-GISEL-NEXT: s_mov_b32 s6, 0
|
|
; GFX6-GISEL-NEXT: s_mov_b32 s7, 0xf000
|
|
; GFX6-GISEL-NEXT: s_mov_b64 s[4:5], 0
|
|
; GFX6-GISEL-NEXT: buffer_load_dwordx4 v[4:7], v[0:1], s[4:7], 0 addr64
|
|
; GFX6-GISEL-NEXT: buffer_load_dwordx4 v[8:11], v[0:1], s[4:7], 0 addr64 offset:16
|
|
; GFX6-GISEL-NEXT: s_waitcnt vmcnt(1)
|
|
; GFX6-GISEL-NEXT: buffer_store_dwordx4 v[4:7], v[2:3], s[4:7], 0 addr64
|
|
; GFX6-GISEL-NEXT: s_waitcnt vmcnt(1)
|
|
; GFX6-GISEL-NEXT: buffer_store_dwordx4 v[8:11], v[2:3], s[4:7], 0 addr64 offset:16
|
|
; GFX6-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0)
|
|
; GFX6-GISEL-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX7-SDAG-LABEL: freeze_v4p1:
|
|
; GFX7-SDAG: ; %bb.0:
|
|
; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX7-SDAG-NEXT: s_mov_b32 s6, 0
|
|
; GFX7-SDAG-NEXT: s_mov_b32 s7, 0xf000
|
|
; GFX7-SDAG-NEXT: s_mov_b32 s4, s6
|
|
; GFX7-SDAG-NEXT: s_mov_b32 s5, s6
|
|
; GFX7-SDAG-NEXT: buffer_load_dwordx4 v[4:7], v[0:1], s[4:7], 0 addr64 offset:16
|
|
; GFX7-SDAG-NEXT: buffer_load_dwordx4 v[8:11], v[0:1], s[4:7], 0 addr64
|
|
; GFX7-SDAG-NEXT: s_waitcnt vmcnt(1)
|
|
; GFX7-SDAG-NEXT: buffer_store_dwordx4 v[4:7], v[2:3], s[4:7], 0 addr64 offset:16
|
|
; GFX7-SDAG-NEXT: s_waitcnt vmcnt(1)
|
|
; GFX7-SDAG-NEXT: buffer_store_dwordx4 v[8:11], v[2:3], s[4:7], 0 addr64
|
|
; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX7-SDAG-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX7-GISEL-LABEL: freeze_v4p1:
|
|
; GFX7-GISEL: ; %bb.0:
|
|
; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX7-GISEL-NEXT: s_mov_b32 s6, 0
|
|
; GFX7-GISEL-NEXT: s_mov_b32 s7, 0xf000
|
|
; GFX7-GISEL-NEXT: s_mov_b64 s[4:5], 0
|
|
; GFX7-GISEL-NEXT: buffer_load_dwordx4 v[4:7], v[0:1], s[4:7], 0 addr64
|
|
; GFX7-GISEL-NEXT: buffer_load_dwordx4 v[8:11], v[0:1], s[4:7], 0 addr64 offset:16
|
|
; GFX7-GISEL-NEXT: s_waitcnt vmcnt(1)
|
|
; GFX7-GISEL-NEXT: buffer_store_dwordx4 v[4:7], v[2:3], s[4:7], 0 addr64
|
|
; GFX7-GISEL-NEXT: s_waitcnt vmcnt(1)
|
|
; GFX7-GISEL-NEXT: buffer_store_dwordx4 v[8:11], v[2:3], s[4:7], 0 addr64 offset:16
|
|
; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX7-GISEL-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX8-GISEL-LABEL: freeze_v4p1:
|
|
; GFX8-GISEL: ; %bb.0:
|
|
; GFX8-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX8-GISEL-NEXT: flat_load_dwordx4 v[4:7], v[0:1]
|
|
; GFX8-GISEL-NEXT: v_add_u32_e32 v0, vcc, 16, v0
|
|
; GFX8-GISEL-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc
|
|
; GFX8-GISEL-NEXT: flat_load_dwordx4 v[8:11], v[0:1]
|
|
; GFX8-GISEL-NEXT: v_add_u32_e32 v0, vcc, 16, v2
|
|
; GFX8-GISEL-NEXT: v_addc_u32_e32 v1, vcc, 0, v3, vcc
|
|
; GFX8-GISEL-NEXT: s_waitcnt vmcnt(1)
|
|
; GFX8-GISEL-NEXT: flat_store_dwordx4 v[2:3], v[4:7]
|
|
; GFX8-GISEL-NEXT: s_waitcnt vmcnt(1)
|
|
; GFX8-GISEL-NEXT: flat_store_dwordx4 v[0:1], v[8:11]
|
|
; GFX8-GISEL-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX8-GISEL-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX9-GISEL-LABEL: freeze_v4p1:
|
|
; GFX9-GISEL: ; %bb.0:
|
|
; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX9-GISEL-NEXT: global_load_dwordx4 v[4:7], v[0:1], off
|
|
; GFX9-GISEL-NEXT: global_load_dwordx4 v[8:11], v[0:1], off offset:16
|
|
; GFX9-GISEL-NEXT: s_waitcnt vmcnt(1)
|
|
; GFX9-GISEL-NEXT: global_store_dwordx4 v[2:3], v[4:7], off
|
|
; GFX9-GISEL-NEXT: s_waitcnt vmcnt(1)
|
|
; GFX9-GISEL-NEXT: global_store_dwordx4 v[2:3], v[8:11], off offset:16
|
|
; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX10-SDAG-LABEL: freeze_v4p1:
|
|
; GFX10-SDAG: ; %bb.0:
|
|
; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX10-SDAG-NEXT: s_clause 0x1
|
|
; GFX10-SDAG-NEXT: global_load_dwordx4 v[4:7], v[0:1], off offset:16
|
|
; GFX10-SDAG-NEXT: global_load_dwordx4 v[8:11], v[0:1], off
|
|
; GFX10-SDAG-NEXT: s_waitcnt vmcnt(1)
|
|
; GFX10-SDAG-NEXT: global_store_dwordx4 v[2:3], v[4:7], off offset:16
|
|
; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX10-SDAG-NEXT: global_store_dwordx4 v[2:3], v[8:11], off
|
|
; GFX10-SDAG-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX10-GISEL-LABEL: freeze_v4p1:
|
|
; GFX10-GISEL: ; %bb.0:
|
|
; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX10-GISEL-NEXT: s_clause 0x1
|
|
; GFX10-GISEL-NEXT: global_load_dwordx4 v[4:7], v[0:1], off
|
|
; GFX10-GISEL-NEXT: global_load_dwordx4 v[8:11], v[0:1], off offset:16
|
|
; GFX10-GISEL-NEXT: s_waitcnt vmcnt(1)
|
|
; GFX10-GISEL-NEXT: global_store_dwordx4 v[2:3], v[4:7], off
|
|
; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX10-GISEL-NEXT: global_store_dwordx4 v[2:3], v[8:11], off offset:16
|
|
; GFX10-GISEL-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX11-SDAG-LABEL: freeze_v4p1:
|
|
; GFX11-SDAG: ; %bb.0:
|
|
; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX11-SDAG-NEXT: s_clause 0x1
|
|
; GFX11-SDAG-NEXT: global_load_b128 v[4:7], v[0:1], off offset:16
|
|
; GFX11-SDAG-NEXT: global_load_b128 v[8:11], v[0:1], off
|
|
; GFX11-SDAG-NEXT: s_waitcnt vmcnt(1)
|
|
; GFX11-SDAG-NEXT: global_store_b128 v[2:3], v[4:7], off offset:16
|
|
; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX11-SDAG-NEXT: global_store_b128 v[2:3], v[8:11], off
|
|
; GFX11-SDAG-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX11-GISEL-LABEL: freeze_v4p1:
|
|
; GFX11-GISEL: ; %bb.0:
|
|
; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX11-GISEL-NEXT: s_clause 0x1
|
|
; GFX11-GISEL-NEXT: global_load_b128 v[4:7], v[0:1], off
|
|
; GFX11-GISEL-NEXT: global_load_b128 v[8:11], v[0:1], off offset:16
|
|
; GFX11-GISEL-NEXT: s_waitcnt vmcnt(1)
|
|
; GFX11-GISEL-NEXT: global_store_b128 v[2:3], v[4:7], off
|
|
; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX11-GISEL-NEXT: global_store_b128 v[2:3], v[8:11], off offset:16
|
|
; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31]
|
|
%a = load <4 x ptr addrspace(1)>, ptr addrspace(1) %ptra
|
|
%freeze = freeze <4 x ptr addrspace(1)> %a
|
|
store <4 x ptr addrspace(1)> %freeze, ptr addrspace(1) %ptrb
|
|
ret void
|
|
}
|
|
|
|
define void @freeze_v8p1(ptr addrspace(1) %ptra, ptr addrspace(1) %ptrb) {
|
|
; GFX6-SDAG-LABEL: freeze_v8p1:
|
|
; GFX6-SDAG: ; %bb.0:
|
|
; GFX6-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX6-SDAG-NEXT: s_mov_b32 s6, 0
|
|
; GFX6-SDAG-NEXT: s_mov_b32 s7, 0xf000
|
|
; GFX6-SDAG-NEXT: s_mov_b32 s4, s6
|
|
; GFX6-SDAG-NEXT: s_mov_b32 s5, s6
|
|
; GFX6-SDAG-NEXT: buffer_load_dwordx4 v[4:7], v[0:1], s[4:7], 0 addr64 offset:32
|
|
; GFX6-SDAG-NEXT: buffer_load_dwordx4 v[8:11], v[0:1], s[4:7], 0 addr64 offset:48
|
|
; GFX6-SDAG-NEXT: buffer_load_dwordx4 v[12:15], v[0:1], s[4:7], 0 addr64
|
|
; GFX6-SDAG-NEXT: buffer_load_dwordx4 v[16:19], v[0:1], s[4:7], 0 addr64 offset:16
|
|
; GFX6-SDAG-NEXT: s_waitcnt vmcnt(3)
|
|
; GFX6-SDAG-NEXT: buffer_store_dwordx4 v[4:7], v[2:3], s[4:7], 0 addr64 offset:32
|
|
; GFX6-SDAG-NEXT: s_waitcnt vmcnt(3)
|
|
; GFX6-SDAG-NEXT: buffer_store_dwordx4 v[8:11], v[2:3], s[4:7], 0 addr64 offset:48
|
|
; GFX6-SDAG-NEXT: s_waitcnt vmcnt(3)
|
|
; GFX6-SDAG-NEXT: buffer_store_dwordx4 v[12:15], v[2:3], s[4:7], 0 addr64
|
|
; GFX6-SDAG-NEXT: s_waitcnt vmcnt(3)
|
|
; GFX6-SDAG-NEXT: buffer_store_dwordx4 v[16:19], v[2:3], s[4:7], 0 addr64 offset:16
|
|
; GFX6-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0)
|
|
; GFX6-SDAG-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX6-GISEL-LABEL: freeze_v8p1:
|
|
; GFX6-GISEL: ; %bb.0:
|
|
; GFX6-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX6-GISEL-NEXT: s_mov_b32 s6, 0
|
|
; GFX6-GISEL-NEXT: s_mov_b32 s7, 0xf000
|
|
; GFX6-GISEL-NEXT: s_mov_b64 s[4:5], 0
|
|
; GFX6-GISEL-NEXT: buffer_load_dwordx4 v[4:7], v[0:1], s[4:7], 0 addr64
|
|
; GFX6-GISEL-NEXT: buffer_load_dwordx4 v[8:11], v[0:1], s[4:7], 0 addr64 offset:16
|
|
; GFX6-GISEL-NEXT: buffer_load_dwordx4 v[12:15], v[0:1], s[4:7], 0 addr64 offset:32
|
|
; GFX6-GISEL-NEXT: buffer_load_dwordx4 v[16:19], v[0:1], s[4:7], 0 addr64 offset:48
|
|
; GFX6-GISEL-NEXT: s_waitcnt vmcnt(3)
|
|
; GFX6-GISEL-NEXT: buffer_store_dwordx4 v[4:7], v[2:3], s[4:7], 0 addr64
|
|
; GFX6-GISEL-NEXT: s_waitcnt vmcnt(3)
|
|
; GFX6-GISEL-NEXT: buffer_store_dwordx4 v[8:11], v[2:3], s[4:7], 0 addr64 offset:16
|
|
; GFX6-GISEL-NEXT: s_waitcnt vmcnt(3)
|
|
; GFX6-GISEL-NEXT: buffer_store_dwordx4 v[12:15], v[2:3], s[4:7], 0 addr64 offset:32
|
|
; GFX6-GISEL-NEXT: s_waitcnt vmcnt(3)
|
|
; GFX6-GISEL-NEXT: buffer_store_dwordx4 v[16:19], v[2:3], s[4:7], 0 addr64 offset:48
|
|
; GFX6-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0)
|
|
; GFX6-GISEL-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX7-SDAG-LABEL: freeze_v8p1:
|
|
; GFX7-SDAG: ; %bb.0:
|
|
; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX7-SDAG-NEXT: s_mov_b32 s6, 0
|
|
; GFX7-SDAG-NEXT: s_mov_b32 s7, 0xf000
|
|
; GFX7-SDAG-NEXT: s_mov_b32 s4, s6
|
|
; GFX7-SDAG-NEXT: s_mov_b32 s5, s6
|
|
; GFX7-SDAG-NEXT: buffer_load_dwordx4 v[4:7], v[0:1], s[4:7], 0 addr64 offset:32
|
|
; GFX7-SDAG-NEXT: buffer_load_dwordx4 v[8:11], v[0:1], s[4:7], 0 addr64 offset:48
|
|
; GFX7-SDAG-NEXT: buffer_load_dwordx4 v[12:15], v[0:1], s[4:7], 0 addr64
|
|
; GFX7-SDAG-NEXT: buffer_load_dwordx4 v[16:19], v[0:1], s[4:7], 0 addr64 offset:16
|
|
; GFX7-SDAG-NEXT: s_waitcnt vmcnt(3)
|
|
; GFX7-SDAG-NEXT: buffer_store_dwordx4 v[4:7], v[2:3], s[4:7], 0 addr64 offset:32
|
|
; GFX7-SDAG-NEXT: s_waitcnt vmcnt(3)
|
|
; GFX7-SDAG-NEXT: buffer_store_dwordx4 v[8:11], v[2:3], s[4:7], 0 addr64 offset:48
|
|
; GFX7-SDAG-NEXT: s_waitcnt vmcnt(3)
|
|
; GFX7-SDAG-NEXT: buffer_store_dwordx4 v[12:15], v[2:3], s[4:7], 0 addr64
|
|
; GFX7-SDAG-NEXT: s_waitcnt vmcnt(3)
|
|
; GFX7-SDAG-NEXT: buffer_store_dwordx4 v[16:19], v[2:3], s[4:7], 0 addr64 offset:16
|
|
; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX7-SDAG-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX7-GISEL-LABEL: freeze_v8p1:
|
|
; GFX7-GISEL: ; %bb.0:
|
|
; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX7-GISEL-NEXT: s_mov_b32 s6, 0
|
|
; GFX7-GISEL-NEXT: s_mov_b32 s7, 0xf000
|
|
; GFX7-GISEL-NEXT: s_mov_b64 s[4:5], 0
|
|
; GFX7-GISEL-NEXT: buffer_load_dwordx4 v[4:7], v[0:1], s[4:7], 0 addr64
|
|
; GFX7-GISEL-NEXT: buffer_load_dwordx4 v[8:11], v[0:1], s[4:7], 0 addr64 offset:16
|
|
; GFX7-GISEL-NEXT: buffer_load_dwordx4 v[12:15], v[0:1], s[4:7], 0 addr64 offset:32
|
|
; GFX7-GISEL-NEXT: buffer_load_dwordx4 v[16:19], v[0:1], s[4:7], 0 addr64 offset:48
|
|
; GFX7-GISEL-NEXT: s_waitcnt vmcnt(3)
|
|
; GFX7-GISEL-NEXT: buffer_store_dwordx4 v[4:7], v[2:3], s[4:7], 0 addr64
|
|
; GFX7-GISEL-NEXT: s_waitcnt vmcnt(3)
|
|
; GFX7-GISEL-NEXT: buffer_store_dwordx4 v[8:11], v[2:3], s[4:7], 0 addr64 offset:16
|
|
; GFX7-GISEL-NEXT: s_waitcnt vmcnt(3)
|
|
; GFX7-GISEL-NEXT: buffer_store_dwordx4 v[12:15], v[2:3], s[4:7], 0 addr64 offset:32
|
|
; GFX7-GISEL-NEXT: s_waitcnt vmcnt(3)
|
|
; GFX7-GISEL-NEXT: buffer_store_dwordx4 v[16:19], v[2:3], s[4:7], 0 addr64 offset:48
|
|
; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX7-GISEL-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX8-GISEL-LABEL: freeze_v8p1:
|
|
; GFX8-GISEL: ; %bb.0:
|
|
; GFX8-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX8-GISEL-NEXT: v_add_u32_e32 v8, vcc, 16, v0
|
|
; GFX8-GISEL-NEXT: v_addc_u32_e32 v9, vcc, 0, v1, vcc
|
|
; GFX8-GISEL-NEXT: v_add_u32_e32 v12, vcc, 32, v0
|
|
; GFX8-GISEL-NEXT: flat_load_dwordx4 v[4:7], v[0:1]
|
|
; GFX8-GISEL-NEXT: flat_load_dwordx4 v[8:11], v[8:9]
|
|
; GFX8-GISEL-NEXT: v_addc_u32_e32 v13, vcc, 0, v1, vcc
|
|
; GFX8-GISEL-NEXT: v_add_u32_e32 v0, vcc, 48, v0
|
|
; GFX8-GISEL-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc
|
|
; GFX8-GISEL-NEXT: flat_load_dwordx4 v[12:15], v[12:13]
|
|
; GFX8-GISEL-NEXT: flat_load_dwordx4 v[16:19], v[0:1]
|
|
; GFX8-GISEL-NEXT: v_add_u32_e32 v0, vcc, 16, v2
|
|
; GFX8-GISEL-NEXT: v_addc_u32_e32 v1, vcc, 0, v3, vcc
|
|
; GFX8-GISEL-NEXT: s_waitcnt vmcnt(3)
|
|
; GFX8-GISEL-NEXT: flat_store_dwordx4 v[2:3], v[4:7]
|
|
; GFX8-GISEL-NEXT: s_nop 0
|
|
; GFX8-GISEL-NEXT: v_add_u32_e32 v4, vcc, 32, v2
|
|
; GFX8-GISEL-NEXT: v_addc_u32_e32 v5, vcc, 0, v3, vcc
|
|
; GFX8-GISEL-NEXT: v_add_u32_e32 v2, vcc, 48, v2
|
|
; GFX8-GISEL-NEXT: v_addc_u32_e32 v3, vcc, 0, v3, vcc
|
|
; GFX8-GISEL-NEXT: s_waitcnt vmcnt(3)
|
|
; GFX8-GISEL-NEXT: flat_store_dwordx4 v[0:1], v[8:11]
|
|
; GFX8-GISEL-NEXT: s_waitcnt vmcnt(3)
|
|
; GFX8-GISEL-NEXT: flat_store_dwordx4 v[4:5], v[12:15]
|
|
; GFX8-GISEL-NEXT: s_waitcnt vmcnt(3)
|
|
; GFX8-GISEL-NEXT: flat_store_dwordx4 v[2:3], v[16:19]
|
|
; GFX8-GISEL-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX8-GISEL-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX9-GISEL-LABEL: freeze_v8p1:
|
|
; GFX9-GISEL: ; %bb.0:
|
|
; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX9-GISEL-NEXT: global_load_dwordx4 v[4:7], v[0:1], off
|
|
; GFX9-GISEL-NEXT: global_load_dwordx4 v[8:11], v[0:1], off offset:16
|
|
; GFX9-GISEL-NEXT: global_load_dwordx4 v[12:15], v[0:1], off offset:32
|
|
; GFX9-GISEL-NEXT: global_load_dwordx4 v[16:19], v[0:1], off offset:48
|
|
; GFX9-GISEL-NEXT: s_waitcnt vmcnt(3)
|
|
; GFX9-GISEL-NEXT: global_store_dwordx4 v[2:3], v[4:7], off
|
|
; GFX9-GISEL-NEXT: s_waitcnt vmcnt(3)
|
|
; GFX9-GISEL-NEXT: global_store_dwordx4 v[2:3], v[8:11], off offset:16
|
|
; GFX9-GISEL-NEXT: s_waitcnt vmcnt(3)
|
|
; GFX9-GISEL-NEXT: global_store_dwordx4 v[2:3], v[12:15], off offset:32
|
|
; GFX9-GISEL-NEXT: s_waitcnt vmcnt(3)
|
|
; GFX9-GISEL-NEXT: global_store_dwordx4 v[2:3], v[16:19], off offset:48
|
|
; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX10-SDAG-LABEL: freeze_v8p1:
|
|
; GFX10-SDAG: ; %bb.0:
|
|
; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX10-SDAG-NEXT: s_clause 0x3
|
|
; GFX10-SDAG-NEXT: global_load_dwordx4 v[4:7], v[0:1], off offset:32
|
|
; GFX10-SDAG-NEXT: global_load_dwordx4 v[8:11], v[0:1], off offset:48
|
|
; GFX10-SDAG-NEXT: global_load_dwordx4 v[12:15], v[0:1], off
|
|
; GFX10-SDAG-NEXT: global_load_dwordx4 v[16:19], v[0:1], off offset:16
|
|
; GFX10-SDAG-NEXT: s_waitcnt vmcnt(3)
|
|
; GFX10-SDAG-NEXT: global_store_dwordx4 v[2:3], v[4:7], off offset:32
|
|
; GFX10-SDAG-NEXT: s_waitcnt vmcnt(2)
|
|
; GFX10-SDAG-NEXT: global_store_dwordx4 v[2:3], v[8:11], off offset:48
|
|
; GFX10-SDAG-NEXT: s_waitcnt vmcnt(1)
|
|
; GFX10-SDAG-NEXT: global_store_dwordx4 v[2:3], v[12:15], off
|
|
; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX10-SDAG-NEXT: global_store_dwordx4 v[2:3], v[16:19], off offset:16
|
|
; GFX10-SDAG-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX10-GISEL-LABEL: freeze_v8p1:
|
|
; GFX10-GISEL: ; %bb.0:
|
|
; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX10-GISEL-NEXT: s_clause 0x3
|
|
; GFX10-GISEL-NEXT: global_load_dwordx4 v[4:7], v[0:1], off
|
|
; GFX10-GISEL-NEXT: global_load_dwordx4 v[8:11], v[0:1], off offset:16
|
|
; GFX10-GISEL-NEXT: global_load_dwordx4 v[12:15], v[0:1], off offset:32
|
|
; GFX10-GISEL-NEXT: global_load_dwordx4 v[16:19], v[0:1], off offset:48
|
|
; GFX10-GISEL-NEXT: s_waitcnt vmcnt(3)
|
|
; GFX10-GISEL-NEXT: global_store_dwordx4 v[2:3], v[4:7], off
|
|
; GFX10-GISEL-NEXT: s_waitcnt vmcnt(2)
|
|
; GFX10-GISEL-NEXT: global_store_dwordx4 v[2:3], v[8:11], off offset:16
|
|
; GFX10-GISEL-NEXT: s_waitcnt vmcnt(1)
|
|
; GFX10-GISEL-NEXT: global_store_dwordx4 v[2:3], v[12:15], off offset:32
|
|
; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX10-GISEL-NEXT: global_store_dwordx4 v[2:3], v[16:19], off offset:48
|
|
; GFX10-GISEL-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX11-SDAG-LABEL: freeze_v8p1:
|
|
; GFX11-SDAG: ; %bb.0:
|
|
; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX11-SDAG-NEXT: s_clause 0x3
|
|
; GFX11-SDAG-NEXT: global_load_b128 v[4:7], v[0:1], off offset:32
|
|
; GFX11-SDAG-NEXT: global_load_b128 v[8:11], v[0:1], off offset:48
|
|
; GFX11-SDAG-NEXT: global_load_b128 v[12:15], v[0:1], off
|
|
; GFX11-SDAG-NEXT: global_load_b128 v[16:19], v[0:1], off offset:16
|
|
; GFX11-SDAG-NEXT: s_waitcnt vmcnt(3)
|
|
; GFX11-SDAG-NEXT: global_store_b128 v[2:3], v[4:7], off offset:32
|
|
; GFX11-SDAG-NEXT: s_waitcnt vmcnt(2)
|
|
; GFX11-SDAG-NEXT: global_store_b128 v[2:3], v[8:11], off offset:48
|
|
; GFX11-SDAG-NEXT: s_waitcnt vmcnt(1)
|
|
; GFX11-SDAG-NEXT: global_store_b128 v[2:3], v[12:15], off
|
|
; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX11-SDAG-NEXT: global_store_b128 v[2:3], v[16:19], off offset:16
|
|
; GFX11-SDAG-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX11-GISEL-LABEL: freeze_v8p1:
|
|
; GFX11-GISEL: ; %bb.0:
|
|
; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX11-GISEL-NEXT: s_clause 0x3
|
|
; GFX11-GISEL-NEXT: global_load_b128 v[4:7], v[0:1], off
|
|
; GFX11-GISEL-NEXT: global_load_b128 v[8:11], v[0:1], off offset:16
|
|
; GFX11-GISEL-NEXT: global_load_b128 v[12:15], v[0:1], off offset:32
|
|
; GFX11-GISEL-NEXT: global_load_b128 v[16:19], v[0:1], off offset:48
|
|
; GFX11-GISEL-NEXT: s_waitcnt vmcnt(3)
|
|
; GFX11-GISEL-NEXT: global_store_b128 v[2:3], v[4:7], off
|
|
; GFX11-GISEL-NEXT: s_waitcnt vmcnt(2)
|
|
; GFX11-GISEL-NEXT: global_store_b128 v[2:3], v[8:11], off offset:16
|
|
; GFX11-GISEL-NEXT: s_waitcnt vmcnt(1)
|
|
; GFX11-GISEL-NEXT: global_store_b128 v[2:3], v[12:15], off offset:32
|
|
; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX11-GISEL-NEXT: global_store_b128 v[2:3], v[16:19], off offset:48
|
|
; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31]
|
|
%a = load <8 x ptr addrspace(1)>, ptr addrspace(1) %ptra
|
|
%freeze = freeze <8 x ptr addrspace(1)> %a
|
|
store <8 x ptr addrspace(1)> %freeze, ptr addrspace(1) %ptrb
|
|
ret void
|
|
}
|
|
|
|
define void @freeze_v16p1(ptr addrspace(1) %ptra, ptr addrspace(1) %ptrb) {
|
|
; GFX6-SDAG-LABEL: freeze_v16p1:
|
|
; GFX6-SDAG: ; %bb.0:
|
|
; GFX6-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX6-SDAG-NEXT: s_mov_b32 s6, 0
|
|
; GFX6-SDAG-NEXT: s_mov_b32 s7, 0xf000
|
|
; GFX6-SDAG-NEXT: s_mov_b32 s4, s6
|
|
; GFX6-SDAG-NEXT: s_mov_b32 s5, s6
|
|
; GFX6-SDAG-NEXT: buffer_load_dwordx4 v[4:7], v[0:1], s[4:7], 0 addr64 offset:96
|
|
; GFX6-SDAG-NEXT: buffer_load_dwordx4 v[8:11], v[0:1], s[4:7], 0 addr64 offset:112
|
|
; GFX6-SDAG-NEXT: buffer_load_dwordx4 v[12:15], v[0:1], s[4:7], 0 addr64 offset:64
|
|
; GFX6-SDAG-NEXT: buffer_load_dwordx4 v[16:19], v[0:1], s[4:7], 0 addr64 offset:80
|
|
; GFX6-SDAG-NEXT: buffer_load_dwordx4 v[20:23], v[0:1], s[4:7], 0 addr64 offset:32
|
|
; GFX6-SDAG-NEXT: buffer_load_dwordx4 v[24:27], v[0:1], s[4:7], 0 addr64 offset:48
|
|
; GFX6-SDAG-NEXT: buffer_load_dwordx4 v[28:31], v[0:1], s[4:7], 0 addr64
|
|
; GFX6-SDAG-NEXT: buffer_load_dwordx4 v[32:35], v[0:1], s[4:7], 0 addr64 offset:16
|
|
; GFX6-SDAG-NEXT: s_waitcnt vmcnt(7)
|
|
; GFX6-SDAG-NEXT: buffer_store_dwordx4 v[4:7], v[2:3], s[4:7], 0 addr64 offset:96
|
|
; GFX6-SDAG-NEXT: s_waitcnt vmcnt(7)
|
|
; GFX6-SDAG-NEXT: buffer_store_dwordx4 v[8:11], v[2:3], s[4:7], 0 addr64 offset:112
|
|
; GFX6-SDAG-NEXT: s_waitcnt vmcnt(7)
|
|
; GFX6-SDAG-NEXT: buffer_store_dwordx4 v[12:15], v[2:3], s[4:7], 0 addr64 offset:64
|
|
; GFX6-SDAG-NEXT: s_waitcnt vmcnt(7)
|
|
; GFX6-SDAG-NEXT: buffer_store_dwordx4 v[16:19], v[2:3], s[4:7], 0 addr64 offset:80
|
|
; GFX6-SDAG-NEXT: s_waitcnt vmcnt(7)
|
|
; GFX6-SDAG-NEXT: buffer_store_dwordx4 v[20:23], v[2:3], s[4:7], 0 addr64 offset:32
|
|
; GFX6-SDAG-NEXT: s_waitcnt vmcnt(7)
|
|
; GFX6-SDAG-NEXT: buffer_store_dwordx4 v[24:27], v[2:3], s[4:7], 0 addr64 offset:48
|
|
; GFX6-SDAG-NEXT: s_waitcnt vmcnt(7)
|
|
; GFX6-SDAG-NEXT: buffer_store_dwordx4 v[28:31], v[2:3], s[4:7], 0 addr64
|
|
; GFX6-SDAG-NEXT: s_waitcnt vmcnt(7)
|
|
; GFX6-SDAG-NEXT: buffer_store_dwordx4 v[32:35], v[2:3], s[4:7], 0 addr64 offset:16
|
|
; GFX6-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0)
|
|
; GFX6-SDAG-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX6-GISEL-LABEL: freeze_v16p1:
|
|
; GFX6-GISEL: ; %bb.0:
|
|
; GFX6-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX6-GISEL-NEXT: s_mov_b32 s6, 0
|
|
; GFX6-GISEL-NEXT: s_mov_b32 s7, 0xf000
|
|
; GFX6-GISEL-NEXT: s_mov_b64 s[4:5], 0
|
|
; GFX6-GISEL-NEXT: buffer_load_dwordx4 v[4:7], v[0:1], s[4:7], 0 addr64
|
|
; GFX6-GISEL-NEXT: buffer_load_dwordx4 v[8:11], v[0:1], s[4:7], 0 addr64 offset:16
|
|
; GFX6-GISEL-NEXT: buffer_load_dwordx4 v[12:15], v[0:1], s[4:7], 0 addr64 offset:32
|
|
; GFX6-GISEL-NEXT: buffer_load_dwordx4 v[16:19], v[0:1], s[4:7], 0 addr64 offset:48
|
|
; GFX6-GISEL-NEXT: buffer_load_dwordx4 v[20:23], v[0:1], s[4:7], 0 addr64 offset:64
|
|
; GFX6-GISEL-NEXT: buffer_load_dwordx4 v[24:27], v[0:1], s[4:7], 0 addr64 offset:80
|
|
; GFX6-GISEL-NEXT: buffer_load_dwordx4 v[28:31], v[0:1], s[4:7], 0 addr64 offset:96
|
|
; GFX6-GISEL-NEXT: buffer_load_dwordx4 v[32:35], v[0:1], s[4:7], 0 addr64 offset:112
|
|
; GFX6-GISEL-NEXT: s_waitcnt vmcnt(7)
|
|
; GFX6-GISEL-NEXT: buffer_store_dwordx4 v[4:7], v[2:3], s[4:7], 0 addr64
|
|
; GFX6-GISEL-NEXT: s_waitcnt vmcnt(7)
|
|
; GFX6-GISEL-NEXT: buffer_store_dwordx4 v[8:11], v[2:3], s[4:7], 0 addr64 offset:16
|
|
; GFX6-GISEL-NEXT: s_waitcnt vmcnt(7)
|
|
; GFX6-GISEL-NEXT: buffer_store_dwordx4 v[12:15], v[2:3], s[4:7], 0 addr64 offset:32
|
|
; GFX6-GISEL-NEXT: s_waitcnt vmcnt(7)
|
|
; GFX6-GISEL-NEXT: buffer_store_dwordx4 v[16:19], v[2:3], s[4:7], 0 addr64 offset:48
|
|
; GFX6-GISEL-NEXT: s_waitcnt vmcnt(7)
|
|
; GFX6-GISEL-NEXT: buffer_store_dwordx4 v[20:23], v[2:3], s[4:7], 0 addr64 offset:64
|
|
; GFX6-GISEL-NEXT: s_waitcnt vmcnt(7)
|
|
; GFX6-GISEL-NEXT: buffer_store_dwordx4 v[24:27], v[2:3], s[4:7], 0 addr64 offset:80
|
|
; GFX6-GISEL-NEXT: s_waitcnt vmcnt(7)
|
|
; GFX6-GISEL-NEXT: buffer_store_dwordx4 v[28:31], v[2:3], s[4:7], 0 addr64 offset:96
|
|
; GFX6-GISEL-NEXT: s_waitcnt vmcnt(7)
|
|
; GFX6-GISEL-NEXT: buffer_store_dwordx4 v[32:35], v[2:3], s[4:7], 0 addr64 offset:112
|
|
; GFX6-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0)
|
|
; GFX6-GISEL-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX7-SDAG-LABEL: freeze_v16p1:
|
|
; GFX7-SDAG: ; %bb.0:
|
|
; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX7-SDAG-NEXT: s_mov_b32 s6, 0
|
|
; GFX7-SDAG-NEXT: s_mov_b32 s7, 0xf000
|
|
; GFX7-SDAG-NEXT: s_mov_b32 s4, s6
|
|
; GFX7-SDAG-NEXT: s_mov_b32 s5, s6
|
|
; GFX7-SDAG-NEXT: buffer_load_dwordx4 v[4:7], v[0:1], s[4:7], 0 addr64 offset:96
|
|
; GFX7-SDAG-NEXT: buffer_load_dwordx4 v[8:11], v[0:1], s[4:7], 0 addr64 offset:112
|
|
; GFX7-SDAG-NEXT: buffer_load_dwordx4 v[12:15], v[0:1], s[4:7], 0 addr64 offset:64
|
|
; GFX7-SDAG-NEXT: buffer_load_dwordx4 v[16:19], v[0:1], s[4:7], 0 addr64 offset:80
|
|
; GFX7-SDAG-NEXT: buffer_load_dwordx4 v[20:23], v[0:1], s[4:7], 0 addr64 offset:32
|
|
; GFX7-SDAG-NEXT: buffer_load_dwordx4 v[24:27], v[0:1], s[4:7], 0 addr64 offset:48
|
|
; GFX7-SDAG-NEXT: buffer_load_dwordx4 v[28:31], v[0:1], s[4:7], 0 addr64
|
|
; GFX7-SDAG-NEXT: buffer_load_dwordx4 v[32:35], v[0:1], s[4:7], 0 addr64 offset:16
|
|
; GFX7-SDAG-NEXT: s_waitcnt vmcnt(7)
|
|
; GFX7-SDAG-NEXT: buffer_store_dwordx4 v[4:7], v[2:3], s[4:7], 0 addr64 offset:96
|
|
; GFX7-SDAG-NEXT: s_waitcnt vmcnt(7)
|
|
; GFX7-SDAG-NEXT: buffer_store_dwordx4 v[8:11], v[2:3], s[4:7], 0 addr64 offset:112
|
|
; GFX7-SDAG-NEXT: s_waitcnt vmcnt(7)
|
|
; GFX7-SDAG-NEXT: buffer_store_dwordx4 v[12:15], v[2:3], s[4:7], 0 addr64 offset:64
|
|
; GFX7-SDAG-NEXT: s_waitcnt vmcnt(7)
|
|
; GFX7-SDAG-NEXT: buffer_store_dwordx4 v[16:19], v[2:3], s[4:7], 0 addr64 offset:80
|
|
; GFX7-SDAG-NEXT: s_waitcnt vmcnt(7)
|
|
; GFX7-SDAG-NEXT: buffer_store_dwordx4 v[20:23], v[2:3], s[4:7], 0 addr64 offset:32
|
|
; GFX7-SDAG-NEXT: s_waitcnt vmcnt(7)
|
|
; GFX7-SDAG-NEXT: buffer_store_dwordx4 v[24:27], v[2:3], s[4:7], 0 addr64 offset:48
|
|
; GFX7-SDAG-NEXT: s_waitcnt vmcnt(7)
|
|
; GFX7-SDAG-NEXT: buffer_store_dwordx4 v[28:31], v[2:3], s[4:7], 0 addr64
|
|
; GFX7-SDAG-NEXT: s_waitcnt vmcnt(7)
|
|
; GFX7-SDAG-NEXT: buffer_store_dwordx4 v[32:35], v[2:3], s[4:7], 0 addr64 offset:16
|
|
; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX7-SDAG-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX7-GISEL-LABEL: freeze_v16p1:
|
|
; GFX7-GISEL: ; %bb.0:
|
|
; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX7-GISEL-NEXT: s_mov_b32 s6, 0
|
|
; GFX7-GISEL-NEXT: s_mov_b32 s7, 0xf000
|
|
; GFX7-GISEL-NEXT: s_mov_b64 s[4:5], 0
|
|
; GFX7-GISEL-NEXT: buffer_load_dwordx4 v[4:7], v[0:1], s[4:7], 0 addr64
|
|
; GFX7-GISEL-NEXT: buffer_load_dwordx4 v[8:11], v[0:1], s[4:7], 0 addr64 offset:16
|
|
; GFX7-GISEL-NEXT: buffer_load_dwordx4 v[12:15], v[0:1], s[4:7], 0 addr64 offset:32
|
|
; GFX7-GISEL-NEXT: buffer_load_dwordx4 v[16:19], v[0:1], s[4:7], 0 addr64 offset:48
|
|
; GFX7-GISEL-NEXT: buffer_load_dwordx4 v[20:23], v[0:1], s[4:7], 0 addr64 offset:64
|
|
; GFX7-GISEL-NEXT: buffer_load_dwordx4 v[24:27], v[0:1], s[4:7], 0 addr64 offset:80
|
|
; GFX7-GISEL-NEXT: buffer_load_dwordx4 v[28:31], v[0:1], s[4:7], 0 addr64 offset:96
|
|
; GFX7-GISEL-NEXT: buffer_load_dwordx4 v[32:35], v[0:1], s[4:7], 0 addr64 offset:112
|
|
; GFX7-GISEL-NEXT: s_waitcnt vmcnt(7)
|
|
; GFX7-GISEL-NEXT: buffer_store_dwordx4 v[4:7], v[2:3], s[4:7], 0 addr64
|
|
; GFX7-GISEL-NEXT: s_waitcnt vmcnt(7)
|
|
; GFX7-GISEL-NEXT: buffer_store_dwordx4 v[8:11], v[2:3], s[4:7], 0 addr64 offset:16
|
|
; GFX7-GISEL-NEXT: s_waitcnt vmcnt(7)
|
|
; GFX7-GISEL-NEXT: buffer_store_dwordx4 v[12:15], v[2:3], s[4:7], 0 addr64 offset:32
|
|
; GFX7-GISEL-NEXT: s_waitcnt vmcnt(7)
|
|
; GFX7-GISEL-NEXT: buffer_store_dwordx4 v[16:19], v[2:3], s[4:7], 0 addr64 offset:48
|
|
; GFX7-GISEL-NEXT: s_waitcnt vmcnt(7)
|
|
; GFX7-GISEL-NEXT: buffer_store_dwordx4 v[20:23], v[2:3], s[4:7], 0 addr64 offset:64
|
|
; GFX7-GISEL-NEXT: s_waitcnt vmcnt(7)
|
|
; GFX7-GISEL-NEXT: buffer_store_dwordx4 v[24:27], v[2:3], s[4:7], 0 addr64 offset:80
|
|
; GFX7-GISEL-NEXT: s_waitcnt vmcnt(7)
|
|
; GFX7-GISEL-NEXT: buffer_store_dwordx4 v[28:31], v[2:3], s[4:7], 0 addr64 offset:96
|
|
; GFX7-GISEL-NEXT: s_waitcnt vmcnt(7)
|
|
; GFX7-GISEL-NEXT: buffer_store_dwordx4 v[32:35], v[2:3], s[4:7], 0 addr64 offset:112
|
|
; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX7-GISEL-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX8-GISEL-LABEL: freeze_v16p1:
|
|
; GFX8-GISEL: ; %bb.0:
|
|
; GFX8-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX8-GISEL-NEXT: v_add_u32_e32 v4, vcc, 16, v0
|
|
; GFX8-GISEL-NEXT: v_addc_u32_e32 v5, vcc, 0, v1, vcc
|
|
; GFX8-GISEL-NEXT: v_add_u32_e32 v12, vcc, 32, v0
|
|
; GFX8-GISEL-NEXT: v_addc_u32_e32 v13, vcc, 0, v1, vcc
|
|
; GFX8-GISEL-NEXT: v_add_u32_e32 v16, vcc, 48, v0
|
|
; GFX8-GISEL-NEXT: v_addc_u32_e32 v17, vcc, 0, v1, vcc
|
|
; GFX8-GISEL-NEXT: v_add_u32_e32 v20, vcc, 64, v0
|
|
; GFX8-GISEL-NEXT: flat_load_dwordx4 v[4:7], v[4:5]
|
|
; GFX8-GISEL-NEXT: flat_load_dwordx4 v[8:11], v[0:1]
|
|
; GFX8-GISEL-NEXT: v_addc_u32_e32 v21, vcc, 0, v1, vcc
|
|
; GFX8-GISEL-NEXT: v_mov_b32_e32 v38, 0x50
|
|
; GFX8-GISEL-NEXT: v_add_u32_e32 v24, vcc, v0, v38
|
|
; GFX8-GISEL-NEXT: v_addc_u32_e32 v25, vcc, 0, v1, vcc
|
|
; GFX8-GISEL-NEXT: v_mov_b32_e32 v14, 0x60
|
|
; GFX8-GISEL-NEXT: v_add_u32_e32 v28, vcc, v0, v14
|
|
; GFX8-GISEL-NEXT: v_addc_u32_e32 v29, vcc, 0, v1, vcc
|
|
; GFX8-GISEL-NEXT: v_mov_b32_e32 v14, 0x70
|
|
; GFX8-GISEL-NEXT: v_add_u32_e32 v0, vcc, v0, v14
|
|
; GFX8-GISEL-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc
|
|
; GFX8-GISEL-NEXT: flat_load_dwordx4 v[12:15], v[12:13]
|
|
; GFX8-GISEL-NEXT: flat_load_dwordx4 v[16:19], v[16:17]
|
|
; GFX8-GISEL-NEXT: flat_load_dwordx4 v[20:23], v[20:21]
|
|
; GFX8-GISEL-NEXT: flat_load_dwordx4 v[24:27], v[24:25]
|
|
; GFX8-GISEL-NEXT: flat_load_dwordx4 v[28:31], v[28:29]
|
|
; GFX8-GISEL-NEXT: flat_load_dwordx4 v[32:35], v[0:1]
|
|
; GFX8-GISEL-NEXT: v_add_u32_e32 v36, vcc, 16, v2
|
|
; GFX8-GISEL-NEXT: v_addc_u32_e32 v37, vcc, 0, v3, vcc
|
|
; GFX8-GISEL-NEXT: v_add_u32_e32 v0, vcc, 32, v2
|
|
; GFX8-GISEL-NEXT: v_addc_u32_e32 v1, vcc, 0, v3, vcc
|
|
; GFX8-GISEL-NEXT: s_waitcnt vmcnt(7)
|
|
; GFX8-GISEL-NEXT: flat_store_dwordx4 v[36:37], v[4:7]
|
|
; GFX8-GISEL-NEXT: s_nop 0
|
|
; GFX8-GISEL-NEXT: v_add_u32_e32 v4, vcc, 48, v2
|
|
; GFX8-GISEL-NEXT: v_addc_u32_e32 v5, vcc, 0, v3, vcc
|
|
; GFX8-GISEL-NEXT: s_waitcnt vmcnt(7)
|
|
; GFX8-GISEL-NEXT: flat_store_dwordx4 v[2:3], v[8:11]
|
|
; GFX8-GISEL-NEXT: v_add_u32_e32 v6, vcc, v2, v38
|
|
; GFX8-GISEL-NEXT: v_add_u32_e64 v8, s[4:5], 64, v2
|
|
; GFX8-GISEL-NEXT: v_addc_u32_e64 v9, s[4:5], 0, v3, s[4:5]
|
|
; GFX8-GISEL-NEXT: s_mov_b64 s[4:5], vcc
|
|
; GFX8-GISEL-NEXT: v_add_u32_e32 v10, vcc, 0x60, v2
|
|
; GFX8-GISEL-NEXT: v_addc_u32_e32 v11, vcc, 0, v3, vcc
|
|
; GFX8-GISEL-NEXT: v_add_u32_e32 v2, vcc, 0x70, v2
|
|
; GFX8-GISEL-NEXT: v_addc_u32_e64 v7, s[4:5], 0, v3, s[4:5]
|
|
; GFX8-GISEL-NEXT: v_addc_u32_e32 v3, vcc, 0, v3, vcc
|
|
; GFX8-GISEL-NEXT: s_waitcnt vmcnt(7)
|
|
; GFX8-GISEL-NEXT: flat_store_dwordx4 v[0:1], v[12:15]
|
|
; GFX8-GISEL-NEXT: s_waitcnt vmcnt(7)
|
|
; GFX8-GISEL-NEXT: flat_store_dwordx4 v[4:5], v[16:19]
|
|
; GFX8-GISEL-NEXT: s_waitcnt vmcnt(7)
|
|
; GFX8-GISEL-NEXT: flat_store_dwordx4 v[8:9], v[20:23]
|
|
; GFX8-GISEL-NEXT: s_waitcnt vmcnt(7)
|
|
; GFX8-GISEL-NEXT: flat_store_dwordx4 v[6:7], v[24:27]
|
|
; GFX8-GISEL-NEXT: s_waitcnt vmcnt(7)
|
|
; GFX8-GISEL-NEXT: flat_store_dwordx4 v[10:11], v[28:31]
|
|
; GFX8-GISEL-NEXT: s_waitcnt vmcnt(7)
|
|
; GFX8-GISEL-NEXT: flat_store_dwordx4 v[2:3], v[32:35]
|
|
; GFX8-GISEL-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX8-GISEL-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX9-GISEL-LABEL: freeze_v16p1:
|
|
; GFX9-GISEL: ; %bb.0:
|
|
; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX9-GISEL-NEXT: global_load_dwordx4 v[4:7], v[0:1], off
|
|
; GFX9-GISEL-NEXT: global_load_dwordx4 v[8:11], v[0:1], off offset:16
|
|
; GFX9-GISEL-NEXT: global_load_dwordx4 v[12:15], v[0:1], off offset:32
|
|
; GFX9-GISEL-NEXT: global_load_dwordx4 v[16:19], v[0:1], off offset:48
|
|
; GFX9-GISEL-NEXT: global_load_dwordx4 v[20:23], v[0:1], off offset:64
|
|
; GFX9-GISEL-NEXT: global_load_dwordx4 v[24:27], v[0:1], off offset:80
|
|
; GFX9-GISEL-NEXT: global_load_dwordx4 v[28:31], v[0:1], off offset:96
|
|
; GFX9-GISEL-NEXT: global_load_dwordx4 v[32:35], v[0:1], off offset:112
|
|
; GFX9-GISEL-NEXT: s_waitcnt vmcnt(7)
|
|
; GFX9-GISEL-NEXT: global_store_dwordx4 v[2:3], v[4:7], off
|
|
; GFX9-GISEL-NEXT: s_waitcnt vmcnt(7)
|
|
; GFX9-GISEL-NEXT: global_store_dwordx4 v[2:3], v[8:11], off offset:16
|
|
; GFX9-GISEL-NEXT: s_waitcnt vmcnt(7)
|
|
; GFX9-GISEL-NEXT: global_store_dwordx4 v[2:3], v[12:15], off offset:32
|
|
; GFX9-GISEL-NEXT: s_waitcnt vmcnt(7)
|
|
; GFX9-GISEL-NEXT: global_store_dwordx4 v[2:3], v[16:19], off offset:48
|
|
; GFX9-GISEL-NEXT: s_waitcnt vmcnt(7)
|
|
; GFX9-GISEL-NEXT: global_store_dwordx4 v[2:3], v[20:23], off offset:64
|
|
; GFX9-GISEL-NEXT: s_waitcnt vmcnt(7)
|
|
; GFX9-GISEL-NEXT: global_store_dwordx4 v[2:3], v[24:27], off offset:80
|
|
; GFX9-GISEL-NEXT: s_waitcnt vmcnt(7)
|
|
; GFX9-GISEL-NEXT: global_store_dwordx4 v[2:3], v[28:31], off offset:96
|
|
; GFX9-GISEL-NEXT: s_waitcnt vmcnt(7)
|
|
; GFX9-GISEL-NEXT: global_store_dwordx4 v[2:3], v[32:35], off offset:112
|
|
; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX10-SDAG-LABEL: freeze_v16p1:
|
|
; GFX10-SDAG: ; %bb.0:
|
|
; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX10-SDAG-NEXT: s_clause 0x7
|
|
; GFX10-SDAG-NEXT: global_load_dwordx4 v[4:7], v[0:1], off offset:96
|
|
; GFX10-SDAG-NEXT: global_load_dwordx4 v[8:11], v[0:1], off offset:112
|
|
; GFX10-SDAG-NEXT: global_load_dwordx4 v[12:15], v[0:1], off offset:64
|
|
; GFX10-SDAG-NEXT: global_load_dwordx4 v[16:19], v[0:1], off offset:80
|
|
; GFX10-SDAG-NEXT: global_load_dwordx4 v[20:23], v[0:1], off offset:32
|
|
; GFX10-SDAG-NEXT: global_load_dwordx4 v[24:27], v[0:1], off offset:48
|
|
; GFX10-SDAG-NEXT: global_load_dwordx4 v[28:31], v[0:1], off
|
|
; GFX10-SDAG-NEXT: global_load_dwordx4 v[32:35], v[0:1], off offset:16
|
|
; GFX10-SDAG-NEXT: s_waitcnt vmcnt(7)
|
|
; GFX10-SDAG-NEXT: global_store_dwordx4 v[2:3], v[4:7], off offset:96
|
|
; GFX10-SDAG-NEXT: s_waitcnt vmcnt(6)
|
|
; GFX10-SDAG-NEXT: global_store_dwordx4 v[2:3], v[8:11], off offset:112
|
|
; GFX10-SDAG-NEXT: s_waitcnt vmcnt(5)
|
|
; GFX10-SDAG-NEXT: global_store_dwordx4 v[2:3], v[12:15], off offset:64
|
|
; GFX10-SDAG-NEXT: s_waitcnt vmcnt(4)
|
|
; GFX10-SDAG-NEXT: global_store_dwordx4 v[2:3], v[16:19], off offset:80
|
|
; GFX10-SDAG-NEXT: s_waitcnt vmcnt(3)
|
|
; GFX10-SDAG-NEXT: global_store_dwordx4 v[2:3], v[20:23], off offset:32
|
|
; GFX10-SDAG-NEXT: s_waitcnt vmcnt(2)
|
|
; GFX10-SDAG-NEXT: global_store_dwordx4 v[2:3], v[24:27], off offset:48
|
|
; GFX10-SDAG-NEXT: s_waitcnt vmcnt(1)
|
|
; GFX10-SDAG-NEXT: global_store_dwordx4 v[2:3], v[28:31], off
|
|
; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX10-SDAG-NEXT: global_store_dwordx4 v[2:3], v[32:35], off offset:16
|
|
; GFX10-SDAG-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX10-GISEL-LABEL: freeze_v16p1:
|
|
; GFX10-GISEL: ; %bb.0:
|
|
; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX10-GISEL-NEXT: s_clause 0x7
|
|
; GFX10-GISEL-NEXT: global_load_dwordx4 v[4:7], v[0:1], off
|
|
; GFX10-GISEL-NEXT: global_load_dwordx4 v[8:11], v[0:1], off offset:16
|
|
; GFX10-GISEL-NEXT: global_load_dwordx4 v[12:15], v[0:1], off offset:32
|
|
; GFX10-GISEL-NEXT: global_load_dwordx4 v[16:19], v[0:1], off offset:48
|
|
; GFX10-GISEL-NEXT: global_load_dwordx4 v[20:23], v[0:1], off offset:64
|
|
; GFX10-GISEL-NEXT: global_load_dwordx4 v[24:27], v[0:1], off offset:80
|
|
; GFX10-GISEL-NEXT: global_load_dwordx4 v[28:31], v[0:1], off offset:96
|
|
; GFX10-GISEL-NEXT: global_load_dwordx4 v[32:35], v[0:1], off offset:112
|
|
; GFX10-GISEL-NEXT: s_waitcnt vmcnt(7)
|
|
; GFX10-GISEL-NEXT: global_store_dwordx4 v[2:3], v[4:7], off
|
|
; GFX10-GISEL-NEXT: s_waitcnt vmcnt(6)
|
|
; GFX10-GISEL-NEXT: global_store_dwordx4 v[2:3], v[8:11], off offset:16
|
|
; GFX10-GISEL-NEXT: s_waitcnt vmcnt(5)
|
|
; GFX10-GISEL-NEXT: global_store_dwordx4 v[2:3], v[12:15], off offset:32
|
|
; GFX10-GISEL-NEXT: s_waitcnt vmcnt(4)
|
|
; GFX10-GISEL-NEXT: global_store_dwordx4 v[2:3], v[16:19], off offset:48
|
|
; GFX10-GISEL-NEXT: s_waitcnt vmcnt(3)
|
|
; GFX10-GISEL-NEXT: global_store_dwordx4 v[2:3], v[20:23], off offset:64
|
|
; GFX10-GISEL-NEXT: s_waitcnt vmcnt(2)
|
|
; GFX10-GISEL-NEXT: global_store_dwordx4 v[2:3], v[24:27], off offset:80
|
|
; GFX10-GISEL-NEXT: s_waitcnt vmcnt(1)
|
|
; GFX10-GISEL-NEXT: global_store_dwordx4 v[2:3], v[28:31], off offset:96
|
|
; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX10-GISEL-NEXT: global_store_dwordx4 v[2:3], v[32:35], off offset:112
|
|
; GFX10-GISEL-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX11-SDAG-LABEL: freeze_v16p1:
|
|
; GFX11-SDAG: ; %bb.0:
|
|
; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX11-SDAG-NEXT: s_clause 0x7
|
|
; GFX11-SDAG-NEXT: global_load_b128 v[4:7], v[0:1], off offset:96
|
|
; GFX11-SDAG-NEXT: global_load_b128 v[8:11], v[0:1], off offset:112
|
|
; GFX11-SDAG-NEXT: global_load_b128 v[12:15], v[0:1], off offset:64
|
|
; GFX11-SDAG-NEXT: global_load_b128 v[16:19], v[0:1], off offset:80
|
|
; GFX11-SDAG-NEXT: global_load_b128 v[20:23], v[0:1], off offset:32
|
|
; GFX11-SDAG-NEXT: global_load_b128 v[24:27], v[0:1], off offset:48
|
|
; GFX11-SDAG-NEXT: global_load_b128 v[28:31], v[0:1], off
|
|
; GFX11-SDAG-NEXT: global_load_b128 v[32:35], v[0:1], off offset:16
|
|
; GFX11-SDAG-NEXT: s_waitcnt vmcnt(7)
|
|
; GFX11-SDAG-NEXT: global_store_b128 v[2:3], v[4:7], off offset:96
|
|
; GFX11-SDAG-NEXT: s_waitcnt vmcnt(6)
|
|
; GFX11-SDAG-NEXT: global_store_b128 v[2:3], v[8:11], off offset:112
|
|
; GFX11-SDAG-NEXT: s_waitcnt vmcnt(5)
|
|
; GFX11-SDAG-NEXT: global_store_b128 v[2:3], v[12:15], off offset:64
|
|
; GFX11-SDAG-NEXT: s_waitcnt vmcnt(4)
|
|
; GFX11-SDAG-NEXT: global_store_b128 v[2:3], v[16:19], off offset:80
|
|
; GFX11-SDAG-NEXT: s_waitcnt vmcnt(3)
|
|
; GFX11-SDAG-NEXT: global_store_b128 v[2:3], v[20:23], off offset:32
|
|
; GFX11-SDAG-NEXT: s_waitcnt vmcnt(2)
|
|
; GFX11-SDAG-NEXT: global_store_b128 v[2:3], v[24:27], off offset:48
|
|
; GFX11-SDAG-NEXT: s_waitcnt vmcnt(1)
|
|
; GFX11-SDAG-NEXT: global_store_b128 v[2:3], v[28:31], off
|
|
; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX11-SDAG-NEXT: global_store_b128 v[2:3], v[32:35], off offset:16
|
|
; GFX11-SDAG-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX11-GISEL-LABEL: freeze_v16p1:
|
|
; GFX11-GISEL: ; %bb.0:
|
|
; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX11-GISEL-NEXT: s_clause 0x7
|
|
; GFX11-GISEL-NEXT: global_load_b128 v[4:7], v[0:1], off
|
|
; GFX11-GISEL-NEXT: global_load_b128 v[8:11], v[0:1], off offset:16
|
|
; GFX11-GISEL-NEXT: global_load_b128 v[12:15], v[0:1], off offset:32
|
|
; GFX11-GISEL-NEXT: global_load_b128 v[16:19], v[0:1], off offset:48
|
|
; GFX11-GISEL-NEXT: global_load_b128 v[20:23], v[0:1], off offset:64
|
|
; GFX11-GISEL-NEXT: global_load_b128 v[24:27], v[0:1], off offset:80
|
|
; GFX11-GISEL-NEXT: global_load_b128 v[28:31], v[0:1], off offset:96
|
|
; GFX11-GISEL-NEXT: global_load_b128 v[32:35], v[0:1], off offset:112
|
|
; GFX11-GISEL-NEXT: s_waitcnt vmcnt(7)
|
|
; GFX11-GISEL-NEXT: global_store_b128 v[2:3], v[4:7], off
|
|
; GFX11-GISEL-NEXT: s_waitcnt vmcnt(6)
|
|
; GFX11-GISEL-NEXT: global_store_b128 v[2:3], v[8:11], off offset:16
|
|
; GFX11-GISEL-NEXT: s_waitcnt vmcnt(5)
|
|
; GFX11-GISEL-NEXT: global_store_b128 v[2:3], v[12:15], off offset:32
|
|
; GFX11-GISEL-NEXT: s_waitcnt vmcnt(4)
|
|
; GFX11-GISEL-NEXT: global_store_b128 v[2:3], v[16:19], off offset:48
|
|
; GFX11-GISEL-NEXT: s_waitcnt vmcnt(3)
|
|
; GFX11-GISEL-NEXT: global_store_b128 v[2:3], v[20:23], off offset:64
|
|
; GFX11-GISEL-NEXT: s_waitcnt vmcnt(2)
|
|
; GFX11-GISEL-NEXT: global_store_b128 v[2:3], v[24:27], off offset:80
|
|
; GFX11-GISEL-NEXT: s_waitcnt vmcnt(1)
|
|
; GFX11-GISEL-NEXT: global_store_b128 v[2:3], v[28:31], off offset:96
|
|
; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX11-GISEL-NEXT: global_store_b128 v[2:3], v[32:35], off offset:112
|
|
; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31]
|
|
%a = load <16 x ptr addrspace(1)>, ptr addrspace(1) %ptra
|
|
%freeze = freeze <16 x ptr addrspace(1)> %a
|
|
store <16 x ptr addrspace(1)> %freeze, ptr addrspace(1) %ptrb
|
|
ret void
|
|
}
|
|
|
|
define void @freeze_p3(ptr addrspace(3) %ptra, ptr addrspace(3) %ptrb) {
|
|
; GFX6-LABEL: freeze_p3:
|
|
; GFX6: ; %bb.0:
|
|
; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX6-NEXT: s_mov_b32 m0, -1
|
|
; GFX6-NEXT: ds_read_b32 v0, v0
|
|
; GFX6-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX6-NEXT: ds_write_b32 v1, v0
|
|
; GFX6-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX6-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX7-LABEL: freeze_p3:
|
|
; GFX7: ; %bb.0:
|
|
; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX7-NEXT: s_mov_b32 m0, -1
|
|
; GFX7-NEXT: ds_read_b32 v0, v0
|
|
; GFX7-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX7-NEXT: ds_write_b32 v1, v0
|
|
; GFX7-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX7-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX8-LABEL: freeze_p3:
|
|
; GFX8: ; %bb.0:
|
|
; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX8-NEXT: s_mov_b32 m0, -1
|
|
; GFX8-NEXT: ds_read_b32 v0, v0
|
|
; GFX8-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX8-NEXT: ds_write_b32 v1, v0
|
|
; GFX8-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX8-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX9-LABEL: freeze_p3:
|
|
; GFX9: ; %bb.0:
|
|
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX9-NEXT: ds_read_b32 v0, v0
|
|
; GFX9-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX9-NEXT: ds_write_b32 v1, v0
|
|
; GFX9-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX9-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX10-LABEL: freeze_p3:
|
|
; GFX10: ; %bb.0:
|
|
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX10-NEXT: ds_read_b32 v0, v0
|
|
; GFX10-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX10-NEXT: ds_write_b32 v1, v0
|
|
; GFX10-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX10-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX11-LABEL: freeze_p3:
|
|
; GFX11: ; %bb.0:
|
|
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX11-NEXT: ds_load_b32 v0, v0
|
|
; GFX11-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX11-NEXT: ds_store_b32 v1, v0
|
|
; GFX11-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX11-NEXT: s_setpc_b64 s[30:31]
|
|
%a = load ptr addrspace(3), ptr addrspace(3) %ptra
|
|
%freeze = freeze ptr addrspace(3) %a
|
|
store ptr addrspace(3) %freeze, ptr addrspace(3) %ptrb
|
|
ret void
|
|
}
|
|
|
|
define void @freeze_v2p3(ptr addrspace(3) %ptra, ptr addrspace(3) %ptrb) {
|
|
; GFX6-LABEL: freeze_v2p3:
|
|
; GFX6: ; %bb.0:
|
|
; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX6-NEXT: s_mov_b32 m0, -1
|
|
; GFX6-NEXT: ds_read_b64 v[2:3], v0
|
|
; GFX6-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX6-NEXT: ds_write_b64 v1, v[2:3]
|
|
; GFX6-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX6-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX7-LABEL: freeze_v2p3:
|
|
; GFX7: ; %bb.0:
|
|
; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX7-NEXT: s_mov_b32 m0, -1
|
|
; GFX7-NEXT: ds_read_b64 v[2:3], v0
|
|
; GFX7-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX7-NEXT: ds_write_b64 v1, v[2:3]
|
|
; GFX7-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX7-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX8-LABEL: freeze_v2p3:
|
|
; GFX8: ; %bb.0:
|
|
; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX8-NEXT: s_mov_b32 m0, -1
|
|
; GFX8-NEXT: ds_read_b64 v[2:3], v0
|
|
; GFX8-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX8-NEXT: ds_write_b64 v1, v[2:3]
|
|
; GFX8-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX8-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX9-LABEL: freeze_v2p3:
|
|
; GFX9: ; %bb.0:
|
|
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX9-NEXT: ds_read_b64 v[2:3], v0
|
|
; GFX9-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX9-NEXT: ds_write_b64 v1, v[2:3]
|
|
; GFX9-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX9-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX10-LABEL: freeze_v2p3:
|
|
; GFX10: ; %bb.0:
|
|
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX10-NEXT: ds_read_b64 v[2:3], v0
|
|
; GFX10-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX10-NEXT: ds_write_b64 v1, v[2:3]
|
|
; GFX10-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX10-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX11-LABEL: freeze_v2p3:
|
|
; GFX11: ; %bb.0:
|
|
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX11-NEXT: ds_load_b64 v[2:3], v0
|
|
; GFX11-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX11-NEXT: ds_store_b64 v1, v[2:3]
|
|
; GFX11-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX11-NEXT: s_setpc_b64 s[30:31]
|
|
%a = load <2 x ptr addrspace(3)>, ptr addrspace(3) %ptra
|
|
%freeze = freeze <2 x ptr addrspace(3)> %a
|
|
store <2 x ptr addrspace(3)> %freeze, ptr addrspace(3) %ptrb
|
|
ret void
|
|
}
|
|
|
|
define void @freeze_v3p3(ptr addrspace(3) %ptra, ptr addrspace(3) %ptrb) {
|
|
; GFX6-SDAG-LABEL: freeze_v3p3:
|
|
; GFX6-SDAG: ; %bb.0:
|
|
; GFX6-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX6-SDAG-NEXT: v_add_i32_e32 v2, vcc, 8, v0
|
|
; GFX6-SDAG-NEXT: s_mov_b32 m0, -1
|
|
; GFX6-SDAG-NEXT: ds_read_b32 v4, v2
|
|
; GFX6-SDAG-NEXT: ds_read_b64 v[2:3], v0
|
|
; GFX6-SDAG-NEXT: v_add_i32_e32 v0, vcc, 8, v1
|
|
; GFX6-SDAG-NEXT: s_waitcnt lgkmcnt(1)
|
|
; GFX6-SDAG-NEXT: ds_write_b32 v0, v4
|
|
; GFX6-SDAG-NEXT: s_waitcnt lgkmcnt(1)
|
|
; GFX6-SDAG-NEXT: ds_write_b64 v1, v[2:3]
|
|
; GFX6-SDAG-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX6-SDAG-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX6-GISEL-LABEL: freeze_v3p3:
|
|
; GFX6-GISEL: ; %bb.0:
|
|
; GFX6-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX6-GISEL-NEXT: s_mov_b32 m0, -1
|
|
; GFX6-GISEL-NEXT: ds_read_b64 v[2:3], v0
|
|
; GFX6-GISEL-NEXT: v_add_i32_e32 v0, vcc, 8, v0
|
|
; GFX6-GISEL-NEXT: ds_read_b32 v0, v0
|
|
; GFX6-GISEL-NEXT: s_waitcnt lgkmcnt(1)
|
|
; GFX6-GISEL-NEXT: ds_write_b64 v1, v[2:3]
|
|
; GFX6-GISEL-NEXT: v_add_i32_e32 v1, vcc, 8, v1
|
|
; GFX6-GISEL-NEXT: s_waitcnt lgkmcnt(1)
|
|
; GFX6-GISEL-NEXT: ds_write_b32 v1, v0
|
|
; GFX6-GISEL-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX6-GISEL-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX7-LABEL: freeze_v3p3:
|
|
; GFX7: ; %bb.0:
|
|
; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX7-NEXT: s_mov_b32 m0, -1
|
|
; GFX7-NEXT: ds_read_b96 v[2:4], v0
|
|
; GFX7-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX7-NEXT: ds_write_b96 v1, v[2:4]
|
|
; GFX7-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX7-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX8-LABEL: freeze_v3p3:
|
|
; GFX8: ; %bb.0:
|
|
; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX8-NEXT: s_mov_b32 m0, -1
|
|
; GFX8-NEXT: ds_read_b96 v[2:4], v0
|
|
; GFX8-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX8-NEXT: ds_write_b96 v1, v[2:4]
|
|
; GFX8-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX8-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX9-LABEL: freeze_v3p3:
|
|
; GFX9: ; %bb.0:
|
|
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX9-NEXT: ds_read_b96 v[2:4], v0
|
|
; GFX9-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX9-NEXT: ds_write_b96 v1, v[2:4]
|
|
; GFX9-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX9-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX10-LABEL: freeze_v3p3:
|
|
; GFX10: ; %bb.0:
|
|
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX10-NEXT: ds_read_b96 v[2:4], v0
|
|
; GFX10-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX10-NEXT: ds_write_b96 v1, v[2:4]
|
|
; GFX10-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX10-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX11-LABEL: freeze_v3p3:
|
|
; GFX11: ; %bb.0:
|
|
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX11-NEXT: ds_load_b96 v[2:4], v0
|
|
; GFX11-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX11-NEXT: ds_store_b96 v1, v[2:4]
|
|
; GFX11-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX11-NEXT: s_setpc_b64 s[30:31]
|
|
%a = load <3 x ptr addrspace(3)>, ptr addrspace(3) %ptra
|
|
%freeze = freeze <3 x ptr addrspace(3)> %a
|
|
store <3 x ptr addrspace(3)> %freeze, ptr addrspace(3) %ptrb
|
|
ret void
|
|
}
|
|
|
|
define void @freeze_v4p3(ptr addrspace(3) %ptra, ptr addrspace(3) %ptrb) {
|
|
; GFX6-LABEL: freeze_v4p3:
|
|
; GFX6: ; %bb.0:
|
|
; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX6-NEXT: s_mov_b32 m0, -1
|
|
; GFX6-NEXT: ds_read_b64 v[2:3], v0
|
|
; GFX6-NEXT: v_add_i32_e32 v0, vcc, 8, v0
|
|
; GFX6-NEXT: ds_read_b64 v[4:5], v0
|
|
; GFX6-NEXT: v_add_i32_e32 v0, vcc, 8, v1
|
|
; GFX6-NEXT: s_waitcnt lgkmcnt(1)
|
|
; GFX6-NEXT: ds_write_b64 v1, v[2:3]
|
|
; GFX6-NEXT: s_waitcnt lgkmcnt(1)
|
|
; GFX6-NEXT: ds_write_b64 v0, v[4:5]
|
|
; GFX6-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX6-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX7-LABEL: freeze_v4p3:
|
|
; GFX7: ; %bb.0:
|
|
; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX7-NEXT: s_mov_b32 m0, -1
|
|
; GFX7-NEXT: ds_read_b128 v[2:5], v0
|
|
; GFX7-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX7-NEXT: ds_write_b128 v1, v[2:5]
|
|
; GFX7-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX7-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX8-LABEL: freeze_v4p3:
|
|
; GFX8: ; %bb.0:
|
|
; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX8-NEXT: s_mov_b32 m0, -1
|
|
; GFX8-NEXT: ds_read_b128 v[2:5], v0
|
|
; GFX8-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX8-NEXT: ds_write_b128 v1, v[2:5]
|
|
; GFX8-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX8-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX9-LABEL: freeze_v4p3:
|
|
; GFX9: ; %bb.0:
|
|
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX9-NEXT: ds_read_b128 v[2:5], v0
|
|
; GFX9-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX9-NEXT: ds_write_b128 v1, v[2:5]
|
|
; GFX9-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX9-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX10-LABEL: freeze_v4p3:
|
|
; GFX10: ; %bb.0:
|
|
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX10-NEXT: ds_read_b128 v[2:5], v0
|
|
; GFX10-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX10-NEXT: ds_write_b128 v1, v[2:5]
|
|
; GFX10-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX10-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX11-LABEL: freeze_v4p3:
|
|
; GFX11: ; %bb.0:
|
|
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX11-NEXT: ds_load_b128 v[2:5], v0
|
|
; GFX11-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX11-NEXT: ds_store_b128 v1, v[2:5]
|
|
; GFX11-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX11-NEXT: s_setpc_b64 s[30:31]
|
|
%a = load <4 x ptr addrspace(3)>, ptr addrspace(3) %ptra
|
|
%freeze = freeze <4 x ptr addrspace(3)> %a
|
|
store <4 x ptr addrspace(3)> %freeze, ptr addrspace(3) %ptrb
|
|
ret void
|
|
}
|
|
|
|
define void @freeze_v8p3(ptr addrspace(3) %ptra, ptr addrspace(3) %ptrb) {
|
|
; GFX6-SDAG-LABEL: freeze_v8p3:
|
|
; GFX6-SDAG: ; %bb.0:
|
|
; GFX6-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX6-SDAG-NEXT: v_add_i32_e32 v2, vcc, 24, v0
|
|
; GFX6-SDAG-NEXT: v_add_i32_e32 v4, vcc, 16, v0
|
|
; GFX6-SDAG-NEXT: s_mov_b32 m0, -1
|
|
; GFX6-SDAG-NEXT: ds_read_b64 v[2:3], v2
|
|
; GFX6-SDAG-NEXT: ds_read_b64 v[4:5], v4
|
|
; GFX6-SDAG-NEXT: ds_read_b64 v[6:7], v0
|
|
; GFX6-SDAG-NEXT: v_add_i32_e32 v0, vcc, 8, v0
|
|
; GFX6-SDAG-NEXT: ds_read_b64 v[8:9], v0
|
|
; GFX6-SDAG-NEXT: v_add_i32_e32 v0, vcc, 16, v1
|
|
; GFX6-SDAG-NEXT: s_waitcnt lgkmcnt(2)
|
|
; GFX6-SDAG-NEXT: ds_write_b64 v0, v[4:5]
|
|
; GFX6-SDAG-NEXT: v_add_i32_e32 v0, vcc, 24, v1
|
|
; GFX6-SDAG-NEXT: ds_write_b64 v0, v[2:3]
|
|
; GFX6-SDAG-NEXT: v_add_i32_e32 v0, vcc, 8, v1
|
|
; GFX6-SDAG-NEXT: s_waitcnt lgkmcnt(3)
|
|
; GFX6-SDAG-NEXT: ds_write_b64 v1, v[6:7]
|
|
; GFX6-SDAG-NEXT: s_waitcnt lgkmcnt(3)
|
|
; GFX6-SDAG-NEXT: ds_write_b64 v0, v[8:9]
|
|
; GFX6-SDAG-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX6-SDAG-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX6-GISEL-LABEL: freeze_v8p3:
|
|
; GFX6-GISEL: ; %bb.0:
|
|
; GFX6-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX6-GISEL-NEXT: s_mov_b32 m0, -1
|
|
; GFX6-GISEL-NEXT: v_add_i32_e32 v4, vcc, 8, v0
|
|
; GFX6-GISEL-NEXT: v_add_i32_e32 v6, vcc, 16, v0
|
|
; GFX6-GISEL-NEXT: ds_read_b64 v[2:3], v0
|
|
; GFX6-GISEL-NEXT: v_add_i32_e32 v0, vcc, 24, v0
|
|
; GFX6-GISEL-NEXT: ds_read_b64 v[4:5], v4
|
|
; GFX6-GISEL-NEXT: ds_read_b64 v[6:7], v6
|
|
; GFX6-GISEL-NEXT: ds_read_b64 v[8:9], v0
|
|
; GFX6-GISEL-NEXT: v_add_i32_e32 v0, vcc, 8, v1
|
|
; GFX6-GISEL-NEXT: s_waitcnt lgkmcnt(2)
|
|
; GFX6-GISEL-NEXT: ds_write_b64 v0, v[4:5]
|
|
; GFX6-GISEL-NEXT: v_add_i32_e32 v0, vcc, 16, v1
|
|
; GFX6-GISEL-NEXT: s_waitcnt lgkmcnt(2)
|
|
; GFX6-GISEL-NEXT: ds_write_b64 v0, v[6:7]
|
|
; GFX6-GISEL-NEXT: v_add_i32_e32 v0, vcc, 24, v1
|
|
; GFX6-GISEL-NEXT: ds_write_b64 v1, v[2:3]
|
|
; GFX6-GISEL-NEXT: s_waitcnt lgkmcnt(3)
|
|
; GFX6-GISEL-NEXT: ds_write_b64 v0, v[8:9]
|
|
; GFX6-GISEL-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX6-GISEL-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX7-SDAG-LABEL: freeze_v8p3:
|
|
; GFX7-SDAG: ; %bb.0:
|
|
; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX7-SDAG-NEXT: s_mov_b32 m0, -1
|
|
; GFX7-SDAG-NEXT: ds_read_b128 v[2:5], v0 offset:16
|
|
; GFX7-SDAG-NEXT: ds_read_b128 v[6:9], v0
|
|
; GFX7-SDAG-NEXT: s_waitcnt lgkmcnt(1)
|
|
; GFX7-SDAG-NEXT: ds_write_b128 v1, v[2:5] offset:16
|
|
; GFX7-SDAG-NEXT: s_waitcnt lgkmcnt(1)
|
|
; GFX7-SDAG-NEXT: ds_write_b128 v1, v[6:9]
|
|
; GFX7-SDAG-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX7-SDAG-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX7-GISEL-LABEL: freeze_v8p3:
|
|
; GFX7-GISEL: ; %bb.0:
|
|
; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX7-GISEL-NEXT: s_mov_b32 m0, -1
|
|
; GFX7-GISEL-NEXT: ds_read_b128 v[2:5], v0
|
|
; GFX7-GISEL-NEXT: ds_read_b128 v[6:9], v0 offset:16
|
|
; GFX7-GISEL-NEXT: s_waitcnt lgkmcnt(1)
|
|
; GFX7-GISEL-NEXT: ds_write_b128 v1, v[2:5]
|
|
; GFX7-GISEL-NEXT: s_waitcnt lgkmcnt(1)
|
|
; GFX7-GISEL-NEXT: ds_write_b128 v1, v[6:9] offset:16
|
|
; GFX7-GISEL-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX7-GISEL-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX8-GISEL-LABEL: freeze_v8p3:
|
|
; GFX8-GISEL: ; %bb.0:
|
|
; GFX8-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX8-GISEL-NEXT: s_mov_b32 m0, -1
|
|
; GFX8-GISEL-NEXT: ds_read_b128 v[2:5], v0
|
|
; GFX8-GISEL-NEXT: ds_read_b128 v[6:9], v0 offset:16
|
|
; GFX8-GISEL-NEXT: s_waitcnt lgkmcnt(1)
|
|
; GFX8-GISEL-NEXT: ds_write_b128 v1, v[2:5]
|
|
; GFX8-GISEL-NEXT: s_waitcnt lgkmcnt(1)
|
|
; GFX8-GISEL-NEXT: ds_write_b128 v1, v[6:9] offset:16
|
|
; GFX8-GISEL-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX8-GISEL-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX9-GISEL-LABEL: freeze_v8p3:
|
|
; GFX9-GISEL: ; %bb.0:
|
|
; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX9-GISEL-NEXT: ds_read_b128 v[2:5], v0
|
|
; GFX9-GISEL-NEXT: ds_read_b128 v[6:9], v0 offset:16
|
|
; GFX9-GISEL-NEXT: s_waitcnt lgkmcnt(1)
|
|
; GFX9-GISEL-NEXT: ds_write_b128 v1, v[2:5]
|
|
; GFX9-GISEL-NEXT: s_waitcnt lgkmcnt(1)
|
|
; GFX9-GISEL-NEXT: ds_write_b128 v1, v[6:9] offset:16
|
|
; GFX9-GISEL-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX10-SDAG-LABEL: freeze_v8p3:
|
|
; GFX10-SDAG: ; %bb.0:
|
|
; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX10-SDAG-NEXT: ds_read_b128 v[2:5], v0 offset:16
|
|
; GFX10-SDAG-NEXT: ds_read_b128 v[6:9], v0
|
|
; GFX10-SDAG-NEXT: s_waitcnt lgkmcnt(1)
|
|
; GFX10-SDAG-NEXT: ds_write_b128 v1, v[2:5] offset:16
|
|
; GFX10-SDAG-NEXT: s_waitcnt lgkmcnt(1)
|
|
; GFX10-SDAG-NEXT: ds_write_b128 v1, v[6:9]
|
|
; GFX10-SDAG-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX10-SDAG-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX10-GISEL-LABEL: freeze_v8p3:
|
|
; GFX10-GISEL: ; %bb.0:
|
|
; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX10-GISEL-NEXT: ds_read_b128 v[2:5], v0
|
|
; GFX10-GISEL-NEXT: ds_read_b128 v[6:9], v0 offset:16
|
|
; GFX10-GISEL-NEXT: s_waitcnt lgkmcnt(1)
|
|
; GFX10-GISEL-NEXT: ds_write_b128 v1, v[2:5]
|
|
; GFX10-GISEL-NEXT: s_waitcnt lgkmcnt(1)
|
|
; GFX10-GISEL-NEXT: ds_write_b128 v1, v[6:9] offset:16
|
|
; GFX10-GISEL-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX10-GISEL-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX11-SDAG-LABEL: freeze_v8p3:
|
|
; GFX11-SDAG: ; %bb.0:
|
|
; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX11-SDAG-NEXT: ds_load_b128 v[2:5], v0 offset:16
|
|
; GFX11-SDAG-NEXT: ds_load_b128 v[6:9], v0
|
|
; GFX11-SDAG-NEXT: s_waitcnt lgkmcnt(1)
|
|
; GFX11-SDAG-NEXT: ds_store_b128 v1, v[2:5] offset:16
|
|
; GFX11-SDAG-NEXT: s_waitcnt lgkmcnt(1)
|
|
; GFX11-SDAG-NEXT: ds_store_b128 v1, v[6:9]
|
|
; GFX11-SDAG-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX11-SDAG-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX11-GISEL-LABEL: freeze_v8p3:
|
|
; GFX11-GISEL: ; %bb.0:
|
|
; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX11-GISEL-NEXT: ds_load_b128 v[2:5], v0
|
|
; GFX11-GISEL-NEXT: ds_load_b128 v[6:9], v0 offset:16
|
|
; GFX11-GISEL-NEXT: s_waitcnt lgkmcnt(1)
|
|
; GFX11-GISEL-NEXT: ds_store_b128 v1, v[2:5]
|
|
; GFX11-GISEL-NEXT: s_waitcnt lgkmcnt(1)
|
|
; GFX11-GISEL-NEXT: ds_store_b128 v1, v[6:9] offset:16
|
|
; GFX11-GISEL-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31]
|
|
%a = load <8 x ptr addrspace(3)>, ptr addrspace(3) %ptra
|
|
%freeze = freeze <8 x ptr addrspace(3)> %a
|
|
store <8 x ptr addrspace(3)> %freeze, ptr addrspace(3) %ptrb
|
|
ret void
|
|
}
|
|
|
|
define void @freeze_v16p3(ptr addrspace(3) %ptra, ptr addrspace(3) %ptrb) {
|
|
; GFX6-SDAG-LABEL: freeze_v16p3:
|
|
; GFX6-SDAG: ; %bb.0:
|
|
; GFX6-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX6-SDAG-NEXT: v_add_i32_e32 v2, vcc, 8, v0
|
|
; GFX6-SDAG-NEXT: v_add_i32_e32 v4, vcc, 24, v0
|
|
; GFX6-SDAG-NEXT: v_add_i32_e32 v6, vcc, 16, v0
|
|
; GFX6-SDAG-NEXT: v_add_i32_e32 v10, vcc, 40, v0
|
|
; GFX6-SDAG-NEXT: v_add_i32_e32 v12, vcc, 32, v0
|
|
; GFX6-SDAG-NEXT: v_add_i32_e32 v16, vcc, 56, v0
|
|
; GFX6-SDAG-NEXT: v_add_i32_e32 v14, vcc, 48, v0
|
|
; GFX6-SDAG-NEXT: s_mov_b32 m0, -1
|
|
; GFX6-SDAG-NEXT: ds_read_b64 v[2:3], v2
|
|
; GFX6-SDAG-NEXT: ds_read_b64 v[4:5], v4
|
|
; GFX6-SDAG-NEXT: ds_read_b64 v[6:7], v6
|
|
; GFX6-SDAG-NEXT: ds_read_b64 v[8:9], v0
|
|
; GFX6-SDAG-NEXT: ds_read_b64 v[10:11], v10
|
|
; GFX6-SDAG-NEXT: ds_read_b64 v[12:13], v12
|
|
; GFX6-SDAG-NEXT: ds_read_b64 v[14:15], v14
|
|
; GFX6-SDAG-NEXT: ds_read_b64 v[16:17], v16
|
|
; GFX6-SDAG-NEXT: v_add_i32_e32 v0, vcc, 48, v1
|
|
; GFX6-SDAG-NEXT: s_waitcnt lgkmcnt(4)
|
|
; GFX6-SDAG-NEXT: ds_write_b64 v1, v[8:9]
|
|
; GFX6-SDAG-NEXT: s_waitcnt lgkmcnt(2)
|
|
; GFX6-SDAG-NEXT: ds_write_b64 v0, v[14:15]
|
|
; GFX6-SDAG-NEXT: v_add_i32_e32 v0, vcc, 56, v1
|
|
; GFX6-SDAG-NEXT: s_waitcnt lgkmcnt(2)
|
|
; GFX6-SDAG-NEXT: ds_write_b64 v0, v[16:17]
|
|
; GFX6-SDAG-NEXT: v_add_i32_e32 v0, vcc, 32, v1
|
|
; GFX6-SDAG-NEXT: ds_write_b64 v0, v[12:13]
|
|
; GFX6-SDAG-NEXT: v_add_i32_e32 v0, vcc, 40, v1
|
|
; GFX6-SDAG-NEXT: ds_write_b64 v0, v[10:11]
|
|
; GFX6-SDAG-NEXT: v_add_i32_e32 v0, vcc, 16, v1
|
|
; GFX6-SDAG-NEXT: ds_write_b64 v0, v[6:7]
|
|
; GFX6-SDAG-NEXT: v_add_i32_e32 v0, vcc, 24, v1
|
|
; GFX6-SDAG-NEXT: ds_write_b64 v0, v[4:5]
|
|
; GFX6-SDAG-NEXT: v_add_i32_e32 v0, vcc, 8, v1
|
|
; GFX6-SDAG-NEXT: ds_write_b64 v0, v[2:3]
|
|
; GFX6-SDAG-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX6-SDAG-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX6-GISEL-LABEL: freeze_v16p3:
|
|
; GFX6-GISEL: ; %bb.0:
|
|
; GFX6-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX6-GISEL-NEXT: v_add_i32_e32 v4, vcc, 8, v0
|
|
; GFX6-GISEL-NEXT: v_add_i32_e32 v6, vcc, 16, v0
|
|
; GFX6-GISEL-NEXT: v_add_i32_e32 v8, vcc, 24, v0
|
|
; GFX6-GISEL-NEXT: s_mov_b32 m0, -1
|
|
; GFX6-GISEL-NEXT: ds_read_b64 v[2:3], v0
|
|
; GFX6-GISEL-NEXT: ds_read_b64 v[4:5], v4
|
|
; GFX6-GISEL-NEXT: ds_read_b64 v[6:7], v6
|
|
; GFX6-GISEL-NEXT: ds_read_b64 v[8:9], v8
|
|
; GFX6-GISEL-NEXT: v_add_i32_e32 v10, vcc, 32, v0
|
|
; GFX6-GISEL-NEXT: v_add_i32_e32 v12, vcc, 40, v0
|
|
; GFX6-GISEL-NEXT: v_add_i32_e32 v14, vcc, 48, v0
|
|
; GFX6-GISEL-NEXT: v_add_i32_e32 v0, vcc, 56, v0
|
|
; GFX6-GISEL-NEXT: ds_read_b64 v[10:11], v10
|
|
; GFX6-GISEL-NEXT: ds_read_b64 v[12:13], v12
|
|
; GFX6-GISEL-NEXT: ds_read_b64 v[14:15], v14
|
|
; GFX6-GISEL-NEXT: ds_read_b64 v[16:17], v0
|
|
; GFX6-GISEL-NEXT: v_add_i32_e32 v0, vcc, 8, v1
|
|
; GFX6-GISEL-NEXT: s_waitcnt lgkmcnt(6)
|
|
; GFX6-GISEL-NEXT: ds_write_b64 v0, v[4:5]
|
|
; GFX6-GISEL-NEXT: v_add_i32_e32 v0, vcc, 16, v1
|
|
; GFX6-GISEL-NEXT: s_waitcnt lgkmcnt(6)
|
|
; GFX6-GISEL-NEXT: ds_write_b64 v0, v[6:7]
|
|
; GFX6-GISEL-NEXT: v_add_i32_e32 v0, vcc, 24, v1
|
|
; GFX6-GISEL-NEXT: s_waitcnt lgkmcnt(6)
|
|
; GFX6-GISEL-NEXT: ds_write_b64 v0, v[8:9]
|
|
; GFX6-GISEL-NEXT: v_add_i32_e32 v0, vcc, 32, v1
|
|
; GFX6-GISEL-NEXT: s_waitcnt lgkmcnt(6)
|
|
; GFX6-GISEL-NEXT: ds_write_b64 v0, v[10:11]
|
|
; GFX6-GISEL-NEXT: v_add_i32_e32 v0, vcc, 40, v1
|
|
; GFX6-GISEL-NEXT: s_waitcnt lgkmcnt(6)
|
|
; GFX6-GISEL-NEXT: ds_write_b64 v0, v[12:13]
|
|
; GFX6-GISEL-NEXT: v_add_i32_e32 v0, vcc, 48, v1
|
|
; GFX6-GISEL-NEXT: s_waitcnt lgkmcnt(6)
|
|
; GFX6-GISEL-NEXT: ds_write_b64 v0, v[14:15]
|
|
; GFX6-GISEL-NEXT: v_add_i32_e32 v0, vcc, 56, v1
|
|
; GFX6-GISEL-NEXT: ds_write_b64 v1, v[2:3]
|
|
; GFX6-GISEL-NEXT: s_waitcnt lgkmcnt(7)
|
|
; GFX6-GISEL-NEXT: ds_write_b64 v0, v[16:17]
|
|
; GFX6-GISEL-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX6-GISEL-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX7-SDAG-LABEL: freeze_v16p3:
|
|
; GFX7-SDAG: ; %bb.0:
|
|
; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX7-SDAG-NEXT: s_mov_b32 m0, -1
|
|
; GFX7-SDAG-NEXT: ds_read_b128 v[2:5], v0 offset:32
|
|
; GFX7-SDAG-NEXT: ds_read_b128 v[6:9], v0 offset:48
|
|
; GFX7-SDAG-NEXT: ds_read_b128 v[10:13], v0
|
|
; GFX7-SDAG-NEXT: ds_read_b128 v[14:17], v0 offset:16
|
|
; GFX7-SDAG-NEXT: s_waitcnt lgkmcnt(3)
|
|
; GFX7-SDAG-NEXT: ds_write_b128 v1, v[2:5] offset:32
|
|
; GFX7-SDAG-NEXT: s_waitcnt lgkmcnt(3)
|
|
; GFX7-SDAG-NEXT: ds_write_b128 v1, v[6:9] offset:48
|
|
; GFX7-SDAG-NEXT: s_waitcnt lgkmcnt(3)
|
|
; GFX7-SDAG-NEXT: ds_write_b128 v1, v[10:13]
|
|
; GFX7-SDAG-NEXT: s_waitcnt lgkmcnt(3)
|
|
; GFX7-SDAG-NEXT: ds_write_b128 v1, v[14:17] offset:16
|
|
; GFX7-SDAG-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX7-SDAG-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX7-GISEL-LABEL: freeze_v16p3:
|
|
; GFX7-GISEL: ; %bb.0:
|
|
; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX7-GISEL-NEXT: s_mov_b32 m0, -1
|
|
; GFX7-GISEL-NEXT: ds_read_b128 v[2:5], v0
|
|
; GFX7-GISEL-NEXT: ds_read_b128 v[6:9], v0 offset:16
|
|
; GFX7-GISEL-NEXT: ds_read_b128 v[10:13], v0 offset:32
|
|
; GFX7-GISEL-NEXT: ds_read_b128 v[14:17], v0 offset:48
|
|
; GFX7-GISEL-NEXT: s_waitcnt lgkmcnt(3)
|
|
; GFX7-GISEL-NEXT: ds_write_b128 v1, v[2:5]
|
|
; GFX7-GISEL-NEXT: s_waitcnt lgkmcnt(3)
|
|
; GFX7-GISEL-NEXT: ds_write_b128 v1, v[6:9] offset:16
|
|
; GFX7-GISEL-NEXT: s_waitcnt lgkmcnt(3)
|
|
; GFX7-GISEL-NEXT: ds_write_b128 v1, v[10:13] offset:32
|
|
; GFX7-GISEL-NEXT: s_waitcnt lgkmcnt(3)
|
|
; GFX7-GISEL-NEXT: ds_write_b128 v1, v[14:17] offset:48
|
|
; GFX7-GISEL-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX7-GISEL-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX8-GISEL-LABEL: freeze_v16p3:
|
|
; GFX8-GISEL: ; %bb.0:
|
|
; GFX8-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX8-GISEL-NEXT: s_mov_b32 m0, -1
|
|
; GFX8-GISEL-NEXT: ds_read_b128 v[2:5], v0
|
|
; GFX8-GISEL-NEXT: ds_read_b128 v[6:9], v0 offset:16
|
|
; GFX8-GISEL-NEXT: ds_read_b128 v[10:13], v0 offset:32
|
|
; GFX8-GISEL-NEXT: ds_read_b128 v[14:17], v0 offset:48
|
|
; GFX8-GISEL-NEXT: s_waitcnt lgkmcnt(3)
|
|
; GFX8-GISEL-NEXT: ds_write_b128 v1, v[2:5]
|
|
; GFX8-GISEL-NEXT: s_waitcnt lgkmcnt(3)
|
|
; GFX8-GISEL-NEXT: ds_write_b128 v1, v[6:9] offset:16
|
|
; GFX8-GISEL-NEXT: s_waitcnt lgkmcnt(3)
|
|
; GFX8-GISEL-NEXT: ds_write_b128 v1, v[10:13] offset:32
|
|
; GFX8-GISEL-NEXT: s_waitcnt lgkmcnt(3)
|
|
; GFX8-GISEL-NEXT: ds_write_b128 v1, v[14:17] offset:48
|
|
; GFX8-GISEL-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX8-GISEL-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX9-GISEL-LABEL: freeze_v16p3:
|
|
; GFX9-GISEL: ; %bb.0:
|
|
; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX9-GISEL-NEXT: ds_read_b128 v[2:5], v0
|
|
; GFX9-GISEL-NEXT: ds_read_b128 v[6:9], v0 offset:16
|
|
; GFX9-GISEL-NEXT: ds_read_b128 v[10:13], v0 offset:32
|
|
; GFX9-GISEL-NEXT: ds_read_b128 v[14:17], v0 offset:48
|
|
; GFX9-GISEL-NEXT: s_waitcnt lgkmcnt(3)
|
|
; GFX9-GISEL-NEXT: ds_write_b128 v1, v[2:5]
|
|
; GFX9-GISEL-NEXT: s_waitcnt lgkmcnt(3)
|
|
; GFX9-GISEL-NEXT: ds_write_b128 v1, v[6:9] offset:16
|
|
; GFX9-GISEL-NEXT: s_waitcnt lgkmcnt(3)
|
|
; GFX9-GISEL-NEXT: ds_write_b128 v1, v[10:13] offset:32
|
|
; GFX9-GISEL-NEXT: s_waitcnt lgkmcnt(3)
|
|
; GFX9-GISEL-NEXT: ds_write_b128 v1, v[14:17] offset:48
|
|
; GFX9-GISEL-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX10-SDAG-LABEL: freeze_v16p3:
|
|
; GFX10-SDAG: ; %bb.0:
|
|
; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX10-SDAG-NEXT: ds_read_b128 v[2:5], v0 offset:32
|
|
; GFX10-SDAG-NEXT: ds_read_b128 v[6:9], v0 offset:48
|
|
; GFX10-SDAG-NEXT: ds_read_b128 v[10:13], v0
|
|
; GFX10-SDAG-NEXT: ds_read_b128 v[14:17], v0 offset:16
|
|
; GFX10-SDAG-NEXT: s_waitcnt lgkmcnt(3)
|
|
; GFX10-SDAG-NEXT: ds_write_b128 v1, v[2:5] offset:32
|
|
; GFX10-SDAG-NEXT: s_waitcnt lgkmcnt(3)
|
|
; GFX10-SDAG-NEXT: ds_write_b128 v1, v[6:9] offset:48
|
|
; GFX10-SDAG-NEXT: s_waitcnt lgkmcnt(3)
|
|
; GFX10-SDAG-NEXT: ds_write_b128 v1, v[10:13]
|
|
; GFX10-SDAG-NEXT: s_waitcnt lgkmcnt(3)
|
|
; GFX10-SDAG-NEXT: ds_write_b128 v1, v[14:17] offset:16
|
|
; GFX10-SDAG-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX10-SDAG-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX10-GISEL-LABEL: freeze_v16p3:
|
|
; GFX10-GISEL: ; %bb.0:
|
|
; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX10-GISEL-NEXT: ds_read_b128 v[2:5], v0
|
|
; GFX10-GISEL-NEXT: ds_read_b128 v[6:9], v0 offset:16
|
|
; GFX10-GISEL-NEXT: ds_read_b128 v[10:13], v0 offset:32
|
|
; GFX10-GISEL-NEXT: ds_read_b128 v[14:17], v0 offset:48
|
|
; GFX10-GISEL-NEXT: s_waitcnt lgkmcnt(3)
|
|
; GFX10-GISEL-NEXT: ds_write_b128 v1, v[2:5]
|
|
; GFX10-GISEL-NEXT: s_waitcnt lgkmcnt(3)
|
|
; GFX10-GISEL-NEXT: ds_write_b128 v1, v[6:9] offset:16
|
|
; GFX10-GISEL-NEXT: s_waitcnt lgkmcnt(3)
|
|
; GFX10-GISEL-NEXT: ds_write_b128 v1, v[10:13] offset:32
|
|
; GFX10-GISEL-NEXT: s_waitcnt lgkmcnt(3)
|
|
; GFX10-GISEL-NEXT: ds_write_b128 v1, v[14:17] offset:48
|
|
; GFX10-GISEL-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX10-GISEL-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX11-SDAG-LABEL: freeze_v16p3:
|
|
; GFX11-SDAG: ; %bb.0:
|
|
; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX11-SDAG-NEXT: ds_load_b128 v[2:5], v0 offset:32
|
|
; GFX11-SDAG-NEXT: ds_load_b128 v[6:9], v0 offset:48
|
|
; GFX11-SDAG-NEXT: ds_load_b128 v[10:13], v0
|
|
; GFX11-SDAG-NEXT: ds_load_b128 v[14:17], v0 offset:16
|
|
; GFX11-SDAG-NEXT: s_waitcnt lgkmcnt(3)
|
|
; GFX11-SDAG-NEXT: ds_store_b128 v1, v[2:5] offset:32
|
|
; GFX11-SDAG-NEXT: s_waitcnt lgkmcnt(3)
|
|
; GFX11-SDAG-NEXT: ds_store_b128 v1, v[6:9] offset:48
|
|
; GFX11-SDAG-NEXT: s_waitcnt lgkmcnt(3)
|
|
; GFX11-SDAG-NEXT: ds_store_b128 v1, v[10:13]
|
|
; GFX11-SDAG-NEXT: s_waitcnt lgkmcnt(3)
|
|
; GFX11-SDAG-NEXT: ds_store_b128 v1, v[14:17] offset:16
|
|
; GFX11-SDAG-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX11-SDAG-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX11-GISEL-LABEL: freeze_v16p3:
|
|
; GFX11-GISEL: ; %bb.0:
|
|
; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX11-GISEL-NEXT: ds_load_b128 v[2:5], v0
|
|
; GFX11-GISEL-NEXT: ds_load_b128 v[6:9], v0 offset:16
|
|
; GFX11-GISEL-NEXT: ds_load_b128 v[10:13], v0 offset:32
|
|
; GFX11-GISEL-NEXT: ds_load_b128 v[14:17], v0 offset:48
|
|
; GFX11-GISEL-NEXT: s_waitcnt lgkmcnt(3)
|
|
; GFX11-GISEL-NEXT: ds_store_b128 v1, v[2:5]
|
|
; GFX11-GISEL-NEXT: s_waitcnt lgkmcnt(3)
|
|
; GFX11-GISEL-NEXT: ds_store_b128 v1, v[6:9] offset:16
|
|
; GFX11-GISEL-NEXT: s_waitcnt lgkmcnt(3)
|
|
; GFX11-GISEL-NEXT: ds_store_b128 v1, v[10:13] offset:32
|
|
; GFX11-GISEL-NEXT: s_waitcnt lgkmcnt(3)
|
|
; GFX11-GISEL-NEXT: ds_store_b128 v1, v[14:17] offset:48
|
|
; GFX11-GISEL-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31]
|
|
%a = load <16 x ptr addrspace(3)>, ptr addrspace(3) %ptra
|
|
%freeze = freeze <16 x ptr addrspace(3)> %a
|
|
store <16 x ptr addrspace(3)> %freeze, ptr addrspace(3) %ptrb
|
|
ret void
|
|
}
|
|
|
|
define void @freeze_p5(ptr addrspace(5) %ptra, ptr addrspace(5) %ptrb) {
|
|
; GFX6-LABEL: freeze_p5:
|
|
; GFX6: ; %bb.0:
|
|
; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX6-NEXT: buffer_load_dword v0, v0, s[0:3], 0 offen
|
|
; GFX6-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX6-NEXT: buffer_store_dword v0, v1, s[0:3], 0 offen
|
|
; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0)
|
|
; GFX6-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX7-LABEL: freeze_p5:
|
|
; GFX7: ; %bb.0:
|
|
; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX7-NEXT: buffer_load_dword v0, v0, s[0:3], 0 offen
|
|
; GFX7-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX7-NEXT: buffer_store_dword v0, v1, s[0:3], 0 offen
|
|
; GFX7-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX7-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX8-LABEL: freeze_p5:
|
|
; GFX8: ; %bb.0:
|
|
; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX8-NEXT: buffer_load_dword v0, v0, s[0:3], 0 offen
|
|
; GFX8-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX8-NEXT: buffer_store_dword v0, v1, s[0:3], 0 offen
|
|
; GFX8-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX8-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX9-LABEL: freeze_p5:
|
|
; GFX9: ; %bb.0:
|
|
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX9-NEXT: buffer_load_dword v0, v0, s[0:3], 0 offen
|
|
; GFX9-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX9-NEXT: buffer_store_dword v0, v1, s[0:3], 0 offen
|
|
; GFX9-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX9-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX10-LABEL: freeze_p5:
|
|
; GFX10: ; %bb.0:
|
|
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX10-NEXT: buffer_load_dword v0, v0, s[0:3], 0 offen
|
|
; GFX10-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX10-NEXT: buffer_store_dword v0, v1, s[0:3], 0 offen
|
|
; GFX10-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX11-LABEL: freeze_p5:
|
|
; GFX11: ; %bb.0:
|
|
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX11-NEXT: scratch_load_b32 v0, v0, off
|
|
; GFX11-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX11-NEXT: scratch_store_b32 v1, v0, off
|
|
; GFX11-NEXT: s_setpc_b64 s[30:31]
|
|
%a = load ptr addrspace(5), ptr addrspace(5) %ptra
|
|
%freeze = freeze ptr addrspace(5) %a
|
|
store ptr addrspace(5) %freeze, ptr addrspace(5) %ptrb
|
|
ret void
|
|
}
|
|
|
|
define void @freeze_v2p5(ptr addrspace(5) %ptra, ptr addrspace(5) %ptrb) {
|
|
; GFX6-SDAG-LABEL: freeze_v2p5:
|
|
; GFX6-SDAG: ; %bb.0:
|
|
; GFX6-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX6-SDAG-NEXT: v_add_i32_e32 v2, vcc, 4, v0
|
|
; GFX6-SDAG-NEXT: buffer_load_dword v0, v0, s[0:3], 0 offen
|
|
; GFX6-SDAG-NEXT: buffer_load_dword v2, v2, s[0:3], 0 offen
|
|
; GFX6-SDAG-NEXT: v_add_i32_e32 v3, vcc, 4, v1
|
|
; GFX6-SDAG-NEXT: s_waitcnt vmcnt(1)
|
|
; GFX6-SDAG-NEXT: buffer_store_dword v0, v1, s[0:3], 0 offen
|
|
; GFX6-SDAG-NEXT: s_waitcnt vmcnt(1)
|
|
; GFX6-SDAG-NEXT: buffer_store_dword v2, v3, s[0:3], 0 offen
|
|
; GFX6-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0)
|
|
; GFX6-SDAG-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX6-GISEL-LABEL: freeze_v2p5:
|
|
; GFX6-GISEL: ; %bb.0:
|
|
; GFX6-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX6-GISEL-NEXT: buffer_load_dword v2, v0, s[0:3], 0 offen
|
|
; GFX6-GISEL-NEXT: v_add_i32_e32 v0, vcc, 4, v0
|
|
; GFX6-GISEL-NEXT: buffer_load_dword v0, v0, s[0:3], 0 offen
|
|
; GFX6-GISEL-NEXT: s_waitcnt vmcnt(1)
|
|
; GFX6-GISEL-NEXT: buffer_store_dword v2, v1, s[0:3], 0 offen
|
|
; GFX6-GISEL-NEXT: v_add_i32_e32 v1, vcc, 4, v1
|
|
; GFX6-GISEL-NEXT: s_waitcnt vmcnt(1)
|
|
; GFX6-GISEL-NEXT: buffer_store_dword v0, v1, s[0:3], 0 offen
|
|
; GFX6-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0)
|
|
; GFX6-GISEL-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX7-SDAG-LABEL: freeze_v2p5:
|
|
; GFX7-SDAG: ; %bb.0:
|
|
; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX7-SDAG-NEXT: v_add_i32_e32 v2, vcc, 4, v0
|
|
; GFX7-SDAG-NEXT: buffer_load_dword v0, v0, s[0:3], 0 offen
|
|
; GFX7-SDAG-NEXT: buffer_load_dword v2, v2, s[0:3], 0 offen
|
|
; GFX7-SDAG-NEXT: v_add_i32_e32 v3, vcc, 4, v1
|
|
; GFX7-SDAG-NEXT: s_waitcnt vmcnt(1)
|
|
; GFX7-SDAG-NEXT: buffer_store_dword v0, v1, s[0:3], 0 offen
|
|
; GFX7-SDAG-NEXT: s_waitcnt vmcnt(1)
|
|
; GFX7-SDAG-NEXT: buffer_store_dword v2, v3, s[0:3], 0 offen
|
|
; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX7-SDAG-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX7-GISEL-LABEL: freeze_v2p5:
|
|
; GFX7-GISEL: ; %bb.0:
|
|
; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX7-GISEL-NEXT: buffer_load_dword v2, v0, s[0:3], 0 offen
|
|
; GFX7-GISEL-NEXT: v_add_i32_e32 v0, vcc, 4, v0
|
|
; GFX7-GISEL-NEXT: buffer_load_dword v0, v0, s[0:3], 0 offen
|
|
; GFX7-GISEL-NEXT: s_waitcnt vmcnt(1)
|
|
; GFX7-GISEL-NEXT: buffer_store_dword v2, v1, s[0:3], 0 offen
|
|
; GFX7-GISEL-NEXT: v_add_i32_e32 v1, vcc, 4, v1
|
|
; GFX7-GISEL-NEXT: s_waitcnt vmcnt(1)
|
|
; GFX7-GISEL-NEXT: buffer_store_dword v0, v1, s[0:3], 0 offen
|
|
; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX7-GISEL-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX8-GISEL-LABEL: freeze_v2p5:
|
|
; GFX8-GISEL: ; %bb.0:
|
|
; GFX8-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX8-GISEL-NEXT: buffer_load_dword v2, v0, s[0:3], 0 offen
|
|
; GFX8-GISEL-NEXT: v_add_u32_e32 v0, vcc, 4, v0
|
|
; GFX8-GISEL-NEXT: buffer_load_dword v0, v0, s[0:3], 0 offen
|
|
; GFX8-GISEL-NEXT: s_waitcnt vmcnt(1)
|
|
; GFX8-GISEL-NEXT: buffer_store_dword v2, v1, s[0:3], 0 offen
|
|
; GFX8-GISEL-NEXT: v_add_u32_e32 v1, vcc, 4, v1
|
|
; GFX8-GISEL-NEXT: s_waitcnt vmcnt(1)
|
|
; GFX8-GISEL-NEXT: buffer_store_dword v0, v1, s[0:3], 0 offen
|
|
; GFX8-GISEL-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX8-GISEL-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX9-LABEL: freeze_v2p5:
|
|
; GFX9: ; %bb.0:
|
|
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX9-NEXT: buffer_load_dword v2, v0, s[0:3], 0 offen
|
|
; GFX9-NEXT: buffer_load_dword v3, v0, s[0:3], 0 offen offset:4
|
|
; GFX9-NEXT: s_waitcnt vmcnt(1)
|
|
; GFX9-NEXT: buffer_store_dword v2, v1, s[0:3], 0 offen
|
|
; GFX9-NEXT: s_waitcnt vmcnt(1)
|
|
; GFX9-NEXT: buffer_store_dword v3, v1, s[0:3], 0 offen offset:4
|
|
; GFX9-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX9-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX10-LABEL: freeze_v2p5:
|
|
; GFX10: ; %bb.0:
|
|
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX10-NEXT: s_clause 0x1
|
|
; GFX10-NEXT: buffer_load_dword v2, v0, s[0:3], 0 offen
|
|
; GFX10-NEXT: buffer_load_dword v3, v0, s[0:3], 0 offen offset:4
|
|
; GFX10-NEXT: s_waitcnt vmcnt(1)
|
|
; GFX10-NEXT: buffer_store_dword v2, v1, s[0:3], 0 offen
|
|
; GFX10-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX10-NEXT: buffer_store_dword v3, v1, s[0:3], 0 offen offset:4
|
|
; GFX10-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX11-LABEL: freeze_v2p5:
|
|
; GFX11: ; %bb.0:
|
|
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX11-NEXT: scratch_load_b64 v[2:3], v0, off
|
|
; GFX11-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX11-NEXT: scratch_store_b64 v1, v[2:3], off
|
|
; GFX11-NEXT: s_setpc_b64 s[30:31]
|
|
%a = load <2 x ptr addrspace(5)>, ptr addrspace(5) %ptra
|
|
%freeze = freeze <2 x ptr addrspace(5)> %a
|
|
store <2 x ptr addrspace(5)> %freeze, ptr addrspace(5) %ptrb
|
|
ret void
|
|
}
|
|
|
|
define void @freeze_v3p5(ptr addrspace(5) %ptra, ptr addrspace(5) %ptrb) {
|
|
; GFX6-SDAG-LABEL: freeze_v3p5:
|
|
; GFX6-SDAG: ; %bb.0:
|
|
; GFX6-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX6-SDAG-NEXT: v_add_i32_e32 v2, vcc, 4, v0
|
|
; GFX6-SDAG-NEXT: buffer_load_dword v2, v2, s[0:3], 0 offen
|
|
; GFX6-SDAG-NEXT: buffer_load_dword v3, v0, s[0:3], 0 offen
|
|
; GFX6-SDAG-NEXT: v_add_i32_e32 v0, vcc, 8, v0
|
|
; GFX6-SDAG-NEXT: buffer_load_dword v0, v0, s[0:3], 0 offen
|
|
; GFX6-SDAG-NEXT: v_add_i32_e32 v4, vcc, 4, v1
|
|
; GFX6-SDAG-NEXT: v_add_i32_e32 v5, vcc, 8, v1
|
|
; GFX6-SDAG-NEXT: s_waitcnt vmcnt(1)
|
|
; GFX6-SDAG-NEXT: buffer_store_dword v3, v1, s[0:3], 0 offen
|
|
; GFX6-SDAG-NEXT: buffer_store_dword v2, v4, s[0:3], 0 offen
|
|
; GFX6-SDAG-NEXT: s_waitcnt vmcnt(2)
|
|
; GFX6-SDAG-NEXT: buffer_store_dword v0, v5, s[0:3], 0 offen
|
|
; GFX6-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0)
|
|
; GFX6-SDAG-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX6-GISEL-LABEL: freeze_v3p5:
|
|
; GFX6-GISEL: ; %bb.0:
|
|
; GFX6-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX6-GISEL-NEXT: v_add_i32_e32 v2, vcc, 4, v0
|
|
; GFX6-GISEL-NEXT: v_add_i32_e32 v3, vcc, 8, v0
|
|
; GFX6-GISEL-NEXT: buffer_load_dword v0, v0, s[0:3], 0 offen
|
|
; GFX6-GISEL-NEXT: buffer_load_dword v2, v2, s[0:3], 0 offen
|
|
; GFX6-GISEL-NEXT: buffer_load_dword v3, v3, s[0:3], 0 offen
|
|
; GFX6-GISEL-NEXT: v_add_i32_e32 v4, vcc, 4, v1
|
|
; GFX6-GISEL-NEXT: v_add_i32_e32 v5, vcc, 8, v1
|
|
; GFX6-GISEL-NEXT: s_waitcnt vmcnt(2)
|
|
; GFX6-GISEL-NEXT: buffer_store_dword v0, v1, s[0:3], 0 offen
|
|
; GFX6-GISEL-NEXT: s_waitcnt vmcnt(2)
|
|
; GFX6-GISEL-NEXT: buffer_store_dword v2, v4, s[0:3], 0 offen
|
|
; GFX6-GISEL-NEXT: s_waitcnt vmcnt(2)
|
|
; GFX6-GISEL-NEXT: buffer_store_dword v3, v5, s[0:3], 0 offen
|
|
; GFX6-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0)
|
|
; GFX6-GISEL-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX7-SDAG-LABEL: freeze_v3p5:
|
|
; GFX7-SDAG: ; %bb.0:
|
|
; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX7-SDAG-NEXT: v_add_i32_e32 v2, vcc, 4, v0
|
|
; GFX7-SDAG-NEXT: buffer_load_dword v2, v2, s[0:3], 0 offen
|
|
; GFX7-SDAG-NEXT: buffer_load_dword v3, v0, s[0:3], 0 offen
|
|
; GFX7-SDAG-NEXT: v_add_i32_e32 v0, vcc, 8, v0
|
|
; GFX7-SDAG-NEXT: buffer_load_dword v0, v0, s[0:3], 0 offen
|
|
; GFX7-SDAG-NEXT: v_add_i32_e32 v4, vcc, 4, v1
|
|
; GFX7-SDAG-NEXT: v_add_i32_e32 v5, vcc, 8, v1
|
|
; GFX7-SDAG-NEXT: s_waitcnt vmcnt(1)
|
|
; GFX7-SDAG-NEXT: buffer_store_dword v3, v1, s[0:3], 0 offen
|
|
; GFX7-SDAG-NEXT: buffer_store_dword v2, v4, s[0:3], 0 offen
|
|
; GFX7-SDAG-NEXT: s_waitcnt vmcnt(2)
|
|
; GFX7-SDAG-NEXT: buffer_store_dword v0, v5, s[0:3], 0 offen
|
|
; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX7-SDAG-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX7-GISEL-LABEL: freeze_v3p5:
|
|
; GFX7-GISEL: ; %bb.0:
|
|
; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX7-GISEL-NEXT: v_add_i32_e32 v2, vcc, 4, v0
|
|
; GFX7-GISEL-NEXT: v_add_i32_e32 v3, vcc, 8, v0
|
|
; GFX7-GISEL-NEXT: buffer_load_dword v0, v0, s[0:3], 0 offen
|
|
; GFX7-GISEL-NEXT: buffer_load_dword v2, v2, s[0:3], 0 offen
|
|
; GFX7-GISEL-NEXT: buffer_load_dword v3, v3, s[0:3], 0 offen
|
|
; GFX7-GISEL-NEXT: v_add_i32_e32 v4, vcc, 4, v1
|
|
; GFX7-GISEL-NEXT: v_add_i32_e32 v5, vcc, 8, v1
|
|
; GFX7-GISEL-NEXT: s_waitcnt vmcnt(2)
|
|
; GFX7-GISEL-NEXT: buffer_store_dword v0, v1, s[0:3], 0 offen
|
|
; GFX7-GISEL-NEXT: s_waitcnt vmcnt(2)
|
|
; GFX7-GISEL-NEXT: buffer_store_dword v2, v4, s[0:3], 0 offen
|
|
; GFX7-GISEL-NEXT: s_waitcnt vmcnt(2)
|
|
; GFX7-GISEL-NEXT: buffer_store_dword v3, v5, s[0:3], 0 offen
|
|
; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX7-GISEL-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX8-GISEL-LABEL: freeze_v3p5:
|
|
; GFX8-GISEL: ; %bb.0:
|
|
; GFX8-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX8-GISEL-NEXT: v_add_u32_e32 v2, vcc, 4, v0
|
|
; GFX8-GISEL-NEXT: v_add_u32_e32 v3, vcc, 8, v0
|
|
; GFX8-GISEL-NEXT: buffer_load_dword v0, v0, s[0:3], 0 offen
|
|
; GFX8-GISEL-NEXT: buffer_load_dword v2, v2, s[0:3], 0 offen
|
|
; GFX8-GISEL-NEXT: buffer_load_dword v3, v3, s[0:3], 0 offen
|
|
; GFX8-GISEL-NEXT: v_add_u32_e32 v4, vcc, 4, v1
|
|
; GFX8-GISEL-NEXT: v_add_u32_e32 v5, vcc, 8, v1
|
|
; GFX8-GISEL-NEXT: s_waitcnt vmcnt(2)
|
|
; GFX8-GISEL-NEXT: buffer_store_dword v0, v1, s[0:3], 0 offen
|
|
; GFX8-GISEL-NEXT: s_waitcnt vmcnt(2)
|
|
; GFX8-GISEL-NEXT: buffer_store_dword v2, v4, s[0:3], 0 offen
|
|
; GFX8-GISEL-NEXT: s_waitcnt vmcnt(2)
|
|
; GFX8-GISEL-NEXT: buffer_store_dword v3, v5, s[0:3], 0 offen
|
|
; GFX8-GISEL-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX8-GISEL-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX9-LABEL: freeze_v3p5:
|
|
; GFX9: ; %bb.0:
|
|
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX9-NEXT: buffer_load_dword v2, v0, s[0:3], 0 offen
|
|
; GFX9-NEXT: buffer_load_dword v3, v0, s[0:3], 0 offen offset:4
|
|
; GFX9-NEXT: buffer_load_dword v4, v0, s[0:3], 0 offen offset:8
|
|
; GFX9-NEXT: s_waitcnt vmcnt(2)
|
|
; GFX9-NEXT: buffer_store_dword v2, v1, s[0:3], 0 offen
|
|
; GFX9-NEXT: s_waitcnt vmcnt(2)
|
|
; GFX9-NEXT: buffer_store_dword v3, v1, s[0:3], 0 offen offset:4
|
|
; GFX9-NEXT: s_waitcnt vmcnt(2)
|
|
; GFX9-NEXT: buffer_store_dword v4, v1, s[0:3], 0 offen offset:8
|
|
; GFX9-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX9-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX10-LABEL: freeze_v3p5:
|
|
; GFX10: ; %bb.0:
|
|
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX10-NEXT: s_clause 0x2
|
|
; GFX10-NEXT: buffer_load_dword v2, v0, s[0:3], 0 offen
|
|
; GFX10-NEXT: buffer_load_dword v3, v0, s[0:3], 0 offen offset:4
|
|
; GFX10-NEXT: buffer_load_dword v4, v0, s[0:3], 0 offen offset:8
|
|
; GFX10-NEXT: s_waitcnt vmcnt(2)
|
|
; GFX10-NEXT: buffer_store_dword v2, v1, s[0:3], 0 offen
|
|
; GFX10-NEXT: s_waitcnt vmcnt(1)
|
|
; GFX10-NEXT: buffer_store_dword v3, v1, s[0:3], 0 offen offset:4
|
|
; GFX10-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX10-NEXT: buffer_store_dword v4, v1, s[0:3], 0 offen offset:8
|
|
; GFX10-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX11-LABEL: freeze_v3p5:
|
|
; GFX11: ; %bb.0:
|
|
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX11-NEXT: scratch_load_b96 v[2:4], v0, off
|
|
; GFX11-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX11-NEXT: scratch_store_b96 v1, v[2:4], off
|
|
; GFX11-NEXT: s_setpc_b64 s[30:31]
|
|
%a = load <3 x ptr addrspace(5)>, ptr addrspace(5) %ptra
|
|
%freeze = freeze <3 x ptr addrspace(5)> %a
|
|
store <3 x ptr addrspace(5)> %freeze, ptr addrspace(5) %ptrb
|
|
ret void
|
|
}
|
|
|
|
define void @freeze_v4p5(ptr addrspace(5) %ptra, ptr addrspace(5) %ptrb) {
|
|
; GFX6-SDAG-LABEL: freeze_v4p5:
|
|
; GFX6-SDAG: ; %bb.0:
|
|
; GFX6-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX6-SDAG-NEXT: v_add_i32_e32 v2, vcc, 8, v0
|
|
; GFX6-SDAG-NEXT: v_add_i32_e32 v3, vcc, 4, v0
|
|
; GFX6-SDAG-NEXT: buffer_load_dword v2, v2, s[0:3], 0 offen
|
|
; GFX6-SDAG-NEXT: buffer_load_dword v3, v3, s[0:3], 0 offen
|
|
; GFX6-SDAG-NEXT: buffer_load_dword v4, v0, s[0:3], 0 offen
|
|
; GFX6-SDAG-NEXT: v_add_i32_e32 v0, vcc, 12, v0
|
|
; GFX6-SDAG-NEXT: buffer_load_dword v0, v0, s[0:3], 0 offen
|
|
; GFX6-SDAG-NEXT: v_add_i32_e32 v5, vcc, 4, v1
|
|
; GFX6-SDAG-NEXT: v_add_i32_e32 v6, vcc, 8, v1
|
|
; GFX6-SDAG-NEXT: v_add_i32_e32 v7, vcc, 12, v1
|
|
; GFX6-SDAG-NEXT: s_waitcnt vmcnt(1)
|
|
; GFX6-SDAG-NEXT: buffer_store_dword v4, v1, s[0:3], 0 offen
|
|
; GFX6-SDAG-NEXT: buffer_store_dword v3, v5, s[0:3], 0 offen
|
|
; GFX6-SDAG-NEXT: buffer_store_dword v2, v6, s[0:3], 0 offen
|
|
; GFX6-SDAG-NEXT: s_waitcnt vmcnt(3)
|
|
; GFX6-SDAG-NEXT: buffer_store_dword v0, v7, s[0:3], 0 offen
|
|
; GFX6-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0)
|
|
; GFX6-SDAG-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX6-GISEL-LABEL: freeze_v4p5:
|
|
; GFX6-GISEL: ; %bb.0:
|
|
; GFX6-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX6-GISEL-NEXT: v_add_i32_e32 v2, vcc, 4, v0
|
|
; GFX6-GISEL-NEXT: v_add_i32_e32 v3, vcc, 8, v0
|
|
; GFX6-GISEL-NEXT: v_add_i32_e32 v4, vcc, 12, v0
|
|
; GFX6-GISEL-NEXT: buffer_load_dword v0, v0, s[0:3], 0 offen
|
|
; GFX6-GISEL-NEXT: buffer_load_dword v2, v2, s[0:3], 0 offen
|
|
; GFX6-GISEL-NEXT: buffer_load_dword v3, v3, s[0:3], 0 offen
|
|
; GFX6-GISEL-NEXT: buffer_load_dword v4, v4, s[0:3], 0 offen
|
|
; GFX6-GISEL-NEXT: v_add_i32_e32 v5, vcc, 4, v1
|
|
; GFX6-GISEL-NEXT: v_add_i32_e32 v6, vcc, 8, v1
|
|
; GFX6-GISEL-NEXT: v_add_i32_e32 v7, vcc, 12, v1
|
|
; GFX6-GISEL-NEXT: s_waitcnt vmcnt(3)
|
|
; GFX6-GISEL-NEXT: buffer_store_dword v0, v1, s[0:3], 0 offen
|
|
; GFX6-GISEL-NEXT: s_waitcnt vmcnt(3)
|
|
; GFX6-GISEL-NEXT: buffer_store_dword v2, v5, s[0:3], 0 offen
|
|
; GFX6-GISEL-NEXT: s_waitcnt vmcnt(3)
|
|
; GFX6-GISEL-NEXT: buffer_store_dword v3, v6, s[0:3], 0 offen
|
|
; GFX6-GISEL-NEXT: s_waitcnt vmcnt(3)
|
|
; GFX6-GISEL-NEXT: buffer_store_dword v4, v7, s[0:3], 0 offen
|
|
; GFX6-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0)
|
|
; GFX6-GISEL-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX7-SDAG-LABEL: freeze_v4p5:
|
|
; GFX7-SDAG: ; %bb.0:
|
|
; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX7-SDAG-NEXT: v_add_i32_e32 v2, vcc, 8, v0
|
|
; GFX7-SDAG-NEXT: v_add_i32_e32 v3, vcc, 4, v0
|
|
; GFX7-SDAG-NEXT: buffer_load_dword v2, v2, s[0:3], 0 offen
|
|
; GFX7-SDAG-NEXT: buffer_load_dword v3, v3, s[0:3], 0 offen
|
|
; GFX7-SDAG-NEXT: buffer_load_dword v4, v0, s[0:3], 0 offen
|
|
; GFX7-SDAG-NEXT: v_add_i32_e32 v0, vcc, 12, v0
|
|
; GFX7-SDAG-NEXT: buffer_load_dword v0, v0, s[0:3], 0 offen
|
|
; GFX7-SDAG-NEXT: v_add_i32_e32 v5, vcc, 4, v1
|
|
; GFX7-SDAG-NEXT: v_add_i32_e32 v6, vcc, 8, v1
|
|
; GFX7-SDAG-NEXT: v_add_i32_e32 v7, vcc, 12, v1
|
|
; GFX7-SDAG-NEXT: s_waitcnt vmcnt(1)
|
|
; GFX7-SDAG-NEXT: buffer_store_dword v4, v1, s[0:3], 0 offen
|
|
; GFX7-SDAG-NEXT: buffer_store_dword v3, v5, s[0:3], 0 offen
|
|
; GFX7-SDAG-NEXT: buffer_store_dword v2, v6, s[0:3], 0 offen
|
|
; GFX7-SDAG-NEXT: s_waitcnt vmcnt(3)
|
|
; GFX7-SDAG-NEXT: buffer_store_dword v0, v7, s[0:3], 0 offen
|
|
; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX7-SDAG-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX7-GISEL-LABEL: freeze_v4p5:
|
|
; GFX7-GISEL: ; %bb.0:
|
|
; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX7-GISEL-NEXT: v_add_i32_e32 v2, vcc, 4, v0
|
|
; GFX7-GISEL-NEXT: v_add_i32_e32 v3, vcc, 8, v0
|
|
; GFX7-GISEL-NEXT: v_add_i32_e32 v4, vcc, 12, v0
|
|
; GFX7-GISEL-NEXT: buffer_load_dword v0, v0, s[0:3], 0 offen
|
|
; GFX7-GISEL-NEXT: buffer_load_dword v2, v2, s[0:3], 0 offen
|
|
; GFX7-GISEL-NEXT: buffer_load_dword v3, v3, s[0:3], 0 offen
|
|
; GFX7-GISEL-NEXT: buffer_load_dword v4, v4, s[0:3], 0 offen
|
|
; GFX7-GISEL-NEXT: v_add_i32_e32 v5, vcc, 4, v1
|
|
; GFX7-GISEL-NEXT: v_add_i32_e32 v6, vcc, 8, v1
|
|
; GFX7-GISEL-NEXT: v_add_i32_e32 v7, vcc, 12, v1
|
|
; GFX7-GISEL-NEXT: s_waitcnt vmcnt(3)
|
|
; GFX7-GISEL-NEXT: buffer_store_dword v0, v1, s[0:3], 0 offen
|
|
; GFX7-GISEL-NEXT: s_waitcnt vmcnt(3)
|
|
; GFX7-GISEL-NEXT: buffer_store_dword v2, v5, s[0:3], 0 offen
|
|
; GFX7-GISEL-NEXT: s_waitcnt vmcnt(3)
|
|
; GFX7-GISEL-NEXT: buffer_store_dword v3, v6, s[0:3], 0 offen
|
|
; GFX7-GISEL-NEXT: s_waitcnt vmcnt(3)
|
|
; GFX7-GISEL-NEXT: buffer_store_dword v4, v7, s[0:3], 0 offen
|
|
; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX7-GISEL-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX8-GISEL-LABEL: freeze_v4p5:
|
|
; GFX8-GISEL: ; %bb.0:
|
|
; GFX8-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX8-GISEL-NEXT: v_add_u32_e32 v2, vcc, 4, v0
|
|
; GFX8-GISEL-NEXT: v_add_u32_e32 v3, vcc, 8, v0
|
|
; GFX8-GISEL-NEXT: v_add_u32_e32 v4, vcc, 12, v0
|
|
; GFX8-GISEL-NEXT: buffer_load_dword v0, v0, s[0:3], 0 offen
|
|
; GFX8-GISEL-NEXT: buffer_load_dword v2, v2, s[0:3], 0 offen
|
|
; GFX8-GISEL-NEXT: buffer_load_dword v3, v3, s[0:3], 0 offen
|
|
; GFX8-GISEL-NEXT: buffer_load_dword v4, v4, s[0:3], 0 offen
|
|
; GFX8-GISEL-NEXT: v_add_u32_e32 v5, vcc, 4, v1
|
|
; GFX8-GISEL-NEXT: v_add_u32_e32 v6, vcc, 8, v1
|
|
; GFX8-GISEL-NEXT: v_add_u32_e32 v7, vcc, 12, v1
|
|
; GFX8-GISEL-NEXT: s_waitcnt vmcnt(3)
|
|
; GFX8-GISEL-NEXT: buffer_store_dword v0, v1, s[0:3], 0 offen
|
|
; GFX8-GISEL-NEXT: s_waitcnt vmcnt(3)
|
|
; GFX8-GISEL-NEXT: buffer_store_dword v2, v5, s[0:3], 0 offen
|
|
; GFX8-GISEL-NEXT: s_waitcnt vmcnt(3)
|
|
; GFX8-GISEL-NEXT: buffer_store_dword v3, v6, s[0:3], 0 offen
|
|
; GFX8-GISEL-NEXT: s_waitcnt vmcnt(3)
|
|
; GFX8-GISEL-NEXT: buffer_store_dword v4, v7, s[0:3], 0 offen
|
|
; GFX8-GISEL-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX8-GISEL-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX9-LABEL: freeze_v4p5:
|
|
; GFX9: ; %bb.0:
|
|
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX9-NEXT: buffer_load_dword v2, v0, s[0:3], 0 offen
|
|
; GFX9-NEXT: buffer_load_dword v3, v0, s[0:3], 0 offen offset:4
|
|
; GFX9-NEXT: buffer_load_dword v4, v0, s[0:3], 0 offen offset:8
|
|
; GFX9-NEXT: buffer_load_dword v5, v0, s[0:3], 0 offen offset:12
|
|
; GFX9-NEXT: s_waitcnt vmcnt(3)
|
|
; GFX9-NEXT: buffer_store_dword v2, v1, s[0:3], 0 offen
|
|
; GFX9-NEXT: s_waitcnt vmcnt(3)
|
|
; GFX9-NEXT: buffer_store_dword v3, v1, s[0:3], 0 offen offset:4
|
|
; GFX9-NEXT: s_waitcnt vmcnt(3)
|
|
; GFX9-NEXT: buffer_store_dword v4, v1, s[0:3], 0 offen offset:8
|
|
; GFX9-NEXT: s_waitcnt vmcnt(3)
|
|
; GFX9-NEXT: buffer_store_dword v5, v1, s[0:3], 0 offen offset:12
|
|
; GFX9-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX9-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX10-LABEL: freeze_v4p5:
|
|
; GFX10: ; %bb.0:
|
|
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX10-NEXT: s_clause 0x3
|
|
; GFX10-NEXT: buffer_load_dword v2, v0, s[0:3], 0 offen
|
|
; GFX10-NEXT: buffer_load_dword v3, v0, s[0:3], 0 offen offset:4
|
|
; GFX10-NEXT: buffer_load_dword v4, v0, s[0:3], 0 offen offset:8
|
|
; GFX10-NEXT: buffer_load_dword v5, v0, s[0:3], 0 offen offset:12
|
|
; GFX10-NEXT: s_waitcnt vmcnt(3)
|
|
; GFX10-NEXT: buffer_store_dword v2, v1, s[0:3], 0 offen
|
|
; GFX10-NEXT: s_waitcnt vmcnt(2)
|
|
; GFX10-NEXT: buffer_store_dword v3, v1, s[0:3], 0 offen offset:4
|
|
; GFX10-NEXT: s_waitcnt vmcnt(1)
|
|
; GFX10-NEXT: buffer_store_dword v4, v1, s[0:3], 0 offen offset:8
|
|
; GFX10-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX10-NEXT: buffer_store_dword v5, v1, s[0:3], 0 offen offset:12
|
|
; GFX10-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX11-LABEL: freeze_v4p5:
|
|
; GFX11: ; %bb.0:
|
|
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX11-NEXT: scratch_load_b128 v[2:5], v0, off
|
|
; GFX11-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX11-NEXT: scratch_store_b128 v1, v[2:5], off
|
|
; GFX11-NEXT: s_setpc_b64 s[30:31]
|
|
%a = load <4 x ptr addrspace(5)>, ptr addrspace(5) %ptra
|
|
%freeze = freeze <4 x ptr addrspace(5)> %a
|
|
store <4 x ptr addrspace(5)> %freeze, ptr addrspace(5) %ptrb
|
|
ret void
|
|
}
|
|
|
|
define void @freeze_v8p5(ptr addrspace(5) %ptra, ptr addrspace(5) %ptrb) {
|
|
; GFX6-SDAG-LABEL: freeze_v8p5:
|
|
; GFX6-SDAG: ; %bb.0:
|
|
; GFX6-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX6-SDAG-NEXT: v_add_i32_e32 v2, vcc, 24, v0
|
|
; GFX6-SDAG-NEXT: v_add_i32_e32 v3, vcc, 20, v0
|
|
; GFX6-SDAG-NEXT: v_add_i32_e32 v4, vcc, 16, v0
|
|
; GFX6-SDAG-NEXT: v_add_i32_e32 v5, vcc, 12, v0
|
|
; GFX6-SDAG-NEXT: v_add_i32_e32 v6, vcc, 8, v0
|
|
; GFX6-SDAG-NEXT: v_add_i32_e32 v7, vcc, 4, v0
|
|
; GFX6-SDAG-NEXT: buffer_load_dword v2, v2, s[0:3], 0 offen
|
|
; GFX6-SDAG-NEXT: buffer_load_dword v3, v3, s[0:3], 0 offen
|
|
; GFX6-SDAG-NEXT: buffer_load_dword v4, v4, s[0:3], 0 offen
|
|
; GFX6-SDAG-NEXT: buffer_load_dword v5, v5, s[0:3], 0 offen
|
|
; GFX6-SDAG-NEXT: buffer_load_dword v6, v6, s[0:3], 0 offen
|
|
; GFX6-SDAG-NEXT: buffer_load_dword v7, v7, s[0:3], 0 offen
|
|
; GFX6-SDAG-NEXT: buffer_load_dword v8, v0, s[0:3], 0 offen
|
|
; GFX6-SDAG-NEXT: v_add_i32_e32 v0, vcc, 28, v0
|
|
; GFX6-SDAG-NEXT: buffer_load_dword v0, v0, s[0:3], 0 offen
|
|
; GFX6-SDAG-NEXT: v_add_i32_e32 v9, vcc, 4, v1
|
|
; GFX6-SDAG-NEXT: v_add_i32_e32 v10, vcc, 8, v1
|
|
; GFX6-SDAG-NEXT: v_add_i32_e32 v11, vcc, 12, v1
|
|
; GFX6-SDAG-NEXT: v_add_i32_e32 v12, vcc, 16, v1
|
|
; GFX6-SDAG-NEXT: v_add_i32_e32 v13, vcc, 20, v1
|
|
; GFX6-SDAG-NEXT: v_add_i32_e32 v14, vcc, 24, v1
|
|
; GFX6-SDAG-NEXT: v_add_i32_e32 v15, vcc, 28, v1
|
|
; GFX6-SDAG-NEXT: s_waitcnt vmcnt(1)
|
|
; GFX6-SDAG-NEXT: buffer_store_dword v8, v1, s[0:3], 0 offen
|
|
; GFX6-SDAG-NEXT: buffer_store_dword v7, v9, s[0:3], 0 offen
|
|
; GFX6-SDAG-NEXT: buffer_store_dword v6, v10, s[0:3], 0 offen
|
|
; GFX6-SDAG-NEXT: buffer_store_dword v5, v11, s[0:3], 0 offen
|
|
; GFX6-SDAG-NEXT: buffer_store_dword v4, v12, s[0:3], 0 offen
|
|
; GFX6-SDAG-NEXT: buffer_store_dword v3, v13, s[0:3], 0 offen
|
|
; GFX6-SDAG-NEXT: buffer_store_dword v2, v14, s[0:3], 0 offen
|
|
; GFX6-SDAG-NEXT: s_waitcnt vmcnt(7)
|
|
; GFX6-SDAG-NEXT: buffer_store_dword v0, v15, s[0:3], 0 offen
|
|
; GFX6-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0)
|
|
; GFX6-SDAG-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX6-GISEL-LABEL: freeze_v8p5:
|
|
; GFX6-GISEL: ; %bb.0:
|
|
; GFX6-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX6-GISEL-NEXT: v_add_i32_e32 v2, vcc, 4, v0
|
|
; GFX6-GISEL-NEXT: v_add_i32_e32 v3, vcc, 8, v0
|
|
; GFX6-GISEL-NEXT: v_add_i32_e32 v4, vcc, 12, v0
|
|
; GFX6-GISEL-NEXT: v_add_i32_e32 v5, vcc, 16, v0
|
|
; GFX6-GISEL-NEXT: v_add_i32_e32 v6, vcc, 20, v0
|
|
; GFX6-GISEL-NEXT: v_add_i32_e32 v7, vcc, 24, v0
|
|
; GFX6-GISEL-NEXT: v_add_i32_e32 v8, vcc, 28, v0
|
|
; GFX6-GISEL-NEXT: buffer_load_dword v0, v0, s[0:3], 0 offen
|
|
; GFX6-GISEL-NEXT: buffer_load_dword v2, v2, s[0:3], 0 offen
|
|
; GFX6-GISEL-NEXT: buffer_load_dword v3, v3, s[0:3], 0 offen
|
|
; GFX6-GISEL-NEXT: buffer_load_dword v4, v4, s[0:3], 0 offen
|
|
; GFX6-GISEL-NEXT: buffer_load_dword v5, v5, s[0:3], 0 offen
|
|
; GFX6-GISEL-NEXT: buffer_load_dword v6, v6, s[0:3], 0 offen
|
|
; GFX6-GISEL-NEXT: buffer_load_dword v7, v7, s[0:3], 0 offen
|
|
; GFX6-GISEL-NEXT: buffer_load_dword v8, v8, s[0:3], 0 offen
|
|
; GFX6-GISEL-NEXT: v_add_i32_e32 v9, vcc, 4, v1
|
|
; GFX6-GISEL-NEXT: v_add_i32_e32 v10, vcc, 8, v1
|
|
; GFX6-GISEL-NEXT: v_add_i32_e32 v11, vcc, 12, v1
|
|
; GFX6-GISEL-NEXT: v_add_i32_e32 v12, vcc, 16, v1
|
|
; GFX6-GISEL-NEXT: v_add_i32_e32 v13, vcc, 20, v1
|
|
; GFX6-GISEL-NEXT: v_add_i32_e32 v14, vcc, 24, v1
|
|
; GFX6-GISEL-NEXT: v_add_i32_e32 v15, vcc, 28, v1
|
|
; GFX6-GISEL-NEXT: s_waitcnt vmcnt(7)
|
|
; GFX6-GISEL-NEXT: buffer_store_dword v0, v1, s[0:3], 0 offen
|
|
; GFX6-GISEL-NEXT: s_waitcnt vmcnt(7)
|
|
; GFX6-GISEL-NEXT: buffer_store_dword v2, v9, s[0:3], 0 offen
|
|
; GFX6-GISEL-NEXT: s_waitcnt vmcnt(7)
|
|
; GFX6-GISEL-NEXT: buffer_store_dword v3, v10, s[0:3], 0 offen
|
|
; GFX6-GISEL-NEXT: s_waitcnt vmcnt(7)
|
|
; GFX6-GISEL-NEXT: buffer_store_dword v4, v11, s[0:3], 0 offen
|
|
; GFX6-GISEL-NEXT: s_waitcnt vmcnt(7)
|
|
; GFX6-GISEL-NEXT: buffer_store_dword v5, v12, s[0:3], 0 offen
|
|
; GFX6-GISEL-NEXT: s_waitcnt vmcnt(7)
|
|
; GFX6-GISEL-NEXT: buffer_store_dword v6, v13, s[0:3], 0 offen
|
|
; GFX6-GISEL-NEXT: s_waitcnt vmcnt(7)
|
|
; GFX6-GISEL-NEXT: buffer_store_dword v7, v14, s[0:3], 0 offen
|
|
; GFX6-GISEL-NEXT: s_waitcnt vmcnt(7)
|
|
; GFX6-GISEL-NEXT: buffer_store_dword v8, v15, s[0:3], 0 offen
|
|
; GFX6-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0)
|
|
; GFX6-GISEL-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX7-SDAG-LABEL: freeze_v8p5:
|
|
; GFX7-SDAG: ; %bb.0:
|
|
; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX7-SDAG-NEXT: v_add_i32_e32 v2, vcc, 24, v0
|
|
; GFX7-SDAG-NEXT: v_add_i32_e32 v3, vcc, 20, v0
|
|
; GFX7-SDAG-NEXT: v_add_i32_e32 v4, vcc, 16, v0
|
|
; GFX7-SDAG-NEXT: v_add_i32_e32 v5, vcc, 12, v0
|
|
; GFX7-SDAG-NEXT: v_add_i32_e32 v6, vcc, 8, v0
|
|
; GFX7-SDAG-NEXT: v_add_i32_e32 v7, vcc, 4, v0
|
|
; GFX7-SDAG-NEXT: buffer_load_dword v2, v2, s[0:3], 0 offen
|
|
; GFX7-SDAG-NEXT: buffer_load_dword v3, v3, s[0:3], 0 offen
|
|
; GFX7-SDAG-NEXT: buffer_load_dword v4, v4, s[0:3], 0 offen
|
|
; GFX7-SDAG-NEXT: buffer_load_dword v5, v5, s[0:3], 0 offen
|
|
; GFX7-SDAG-NEXT: buffer_load_dword v6, v6, s[0:3], 0 offen
|
|
; GFX7-SDAG-NEXT: buffer_load_dword v7, v7, s[0:3], 0 offen
|
|
; GFX7-SDAG-NEXT: buffer_load_dword v8, v0, s[0:3], 0 offen
|
|
; GFX7-SDAG-NEXT: v_add_i32_e32 v0, vcc, 28, v0
|
|
; GFX7-SDAG-NEXT: buffer_load_dword v0, v0, s[0:3], 0 offen
|
|
; GFX7-SDAG-NEXT: v_add_i32_e32 v9, vcc, 4, v1
|
|
; GFX7-SDAG-NEXT: v_add_i32_e32 v10, vcc, 8, v1
|
|
; GFX7-SDAG-NEXT: v_add_i32_e32 v11, vcc, 12, v1
|
|
; GFX7-SDAG-NEXT: v_add_i32_e32 v12, vcc, 16, v1
|
|
; GFX7-SDAG-NEXT: v_add_i32_e32 v13, vcc, 20, v1
|
|
; GFX7-SDAG-NEXT: v_add_i32_e32 v14, vcc, 24, v1
|
|
; GFX7-SDAG-NEXT: v_add_i32_e32 v15, vcc, 28, v1
|
|
; GFX7-SDAG-NEXT: s_waitcnt vmcnt(1)
|
|
; GFX7-SDAG-NEXT: buffer_store_dword v8, v1, s[0:3], 0 offen
|
|
; GFX7-SDAG-NEXT: buffer_store_dword v7, v9, s[0:3], 0 offen
|
|
; GFX7-SDAG-NEXT: buffer_store_dword v6, v10, s[0:3], 0 offen
|
|
; GFX7-SDAG-NEXT: buffer_store_dword v5, v11, s[0:3], 0 offen
|
|
; GFX7-SDAG-NEXT: buffer_store_dword v4, v12, s[0:3], 0 offen
|
|
; GFX7-SDAG-NEXT: buffer_store_dword v3, v13, s[0:3], 0 offen
|
|
; GFX7-SDAG-NEXT: buffer_store_dword v2, v14, s[0:3], 0 offen
|
|
; GFX7-SDAG-NEXT: s_waitcnt vmcnt(7)
|
|
; GFX7-SDAG-NEXT: buffer_store_dword v0, v15, s[0:3], 0 offen
|
|
; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX7-SDAG-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX7-GISEL-LABEL: freeze_v8p5:
|
|
; GFX7-GISEL: ; %bb.0:
|
|
; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX7-GISEL-NEXT: v_add_i32_e32 v2, vcc, 4, v0
|
|
; GFX7-GISEL-NEXT: v_add_i32_e32 v3, vcc, 8, v0
|
|
; GFX7-GISEL-NEXT: v_add_i32_e32 v4, vcc, 12, v0
|
|
; GFX7-GISEL-NEXT: v_add_i32_e32 v5, vcc, 16, v0
|
|
; GFX7-GISEL-NEXT: v_add_i32_e32 v6, vcc, 20, v0
|
|
; GFX7-GISEL-NEXT: v_add_i32_e32 v7, vcc, 24, v0
|
|
; GFX7-GISEL-NEXT: v_add_i32_e32 v8, vcc, 28, v0
|
|
; GFX7-GISEL-NEXT: buffer_load_dword v0, v0, s[0:3], 0 offen
|
|
; GFX7-GISEL-NEXT: buffer_load_dword v2, v2, s[0:3], 0 offen
|
|
; GFX7-GISEL-NEXT: buffer_load_dword v3, v3, s[0:3], 0 offen
|
|
; GFX7-GISEL-NEXT: buffer_load_dword v4, v4, s[0:3], 0 offen
|
|
; GFX7-GISEL-NEXT: buffer_load_dword v5, v5, s[0:3], 0 offen
|
|
; GFX7-GISEL-NEXT: buffer_load_dword v6, v6, s[0:3], 0 offen
|
|
; GFX7-GISEL-NEXT: buffer_load_dword v7, v7, s[0:3], 0 offen
|
|
; GFX7-GISEL-NEXT: buffer_load_dword v8, v8, s[0:3], 0 offen
|
|
; GFX7-GISEL-NEXT: v_add_i32_e32 v9, vcc, 4, v1
|
|
; GFX7-GISEL-NEXT: v_add_i32_e32 v10, vcc, 8, v1
|
|
; GFX7-GISEL-NEXT: v_add_i32_e32 v11, vcc, 12, v1
|
|
; GFX7-GISEL-NEXT: v_add_i32_e32 v12, vcc, 16, v1
|
|
; GFX7-GISEL-NEXT: v_add_i32_e32 v13, vcc, 20, v1
|
|
; GFX7-GISEL-NEXT: v_add_i32_e32 v14, vcc, 24, v1
|
|
; GFX7-GISEL-NEXT: v_add_i32_e32 v15, vcc, 28, v1
|
|
; GFX7-GISEL-NEXT: s_waitcnt vmcnt(7)
|
|
; GFX7-GISEL-NEXT: buffer_store_dword v0, v1, s[0:3], 0 offen
|
|
; GFX7-GISEL-NEXT: s_waitcnt vmcnt(7)
|
|
; GFX7-GISEL-NEXT: buffer_store_dword v2, v9, s[0:3], 0 offen
|
|
; GFX7-GISEL-NEXT: s_waitcnt vmcnt(7)
|
|
; GFX7-GISEL-NEXT: buffer_store_dword v3, v10, s[0:3], 0 offen
|
|
; GFX7-GISEL-NEXT: s_waitcnt vmcnt(7)
|
|
; GFX7-GISEL-NEXT: buffer_store_dword v4, v11, s[0:3], 0 offen
|
|
; GFX7-GISEL-NEXT: s_waitcnt vmcnt(7)
|
|
; GFX7-GISEL-NEXT: buffer_store_dword v5, v12, s[0:3], 0 offen
|
|
; GFX7-GISEL-NEXT: s_waitcnt vmcnt(7)
|
|
; GFX7-GISEL-NEXT: buffer_store_dword v6, v13, s[0:3], 0 offen
|
|
; GFX7-GISEL-NEXT: s_waitcnt vmcnt(7)
|
|
; GFX7-GISEL-NEXT: buffer_store_dword v7, v14, s[0:3], 0 offen
|
|
; GFX7-GISEL-NEXT: s_waitcnt vmcnt(7)
|
|
; GFX7-GISEL-NEXT: buffer_store_dword v8, v15, s[0:3], 0 offen
|
|
; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX7-GISEL-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX8-GISEL-LABEL: freeze_v8p5:
|
|
; GFX8-GISEL: ; %bb.0:
|
|
; GFX8-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX8-GISEL-NEXT: v_add_u32_e32 v2, vcc, 4, v0
|
|
; GFX8-GISEL-NEXT: v_add_u32_e32 v3, vcc, 8, v0
|
|
; GFX8-GISEL-NEXT: v_add_u32_e32 v4, vcc, 12, v0
|
|
; GFX8-GISEL-NEXT: v_add_u32_e32 v5, vcc, 16, v0
|
|
; GFX8-GISEL-NEXT: v_add_u32_e32 v6, vcc, 20, v0
|
|
; GFX8-GISEL-NEXT: v_add_u32_e32 v7, vcc, 24, v0
|
|
; GFX8-GISEL-NEXT: v_add_u32_e32 v8, vcc, 28, v0
|
|
; GFX8-GISEL-NEXT: buffer_load_dword v0, v0, s[0:3], 0 offen
|
|
; GFX8-GISEL-NEXT: buffer_load_dword v2, v2, s[0:3], 0 offen
|
|
; GFX8-GISEL-NEXT: buffer_load_dword v3, v3, s[0:3], 0 offen
|
|
; GFX8-GISEL-NEXT: buffer_load_dword v4, v4, s[0:3], 0 offen
|
|
; GFX8-GISEL-NEXT: buffer_load_dword v5, v5, s[0:3], 0 offen
|
|
; GFX8-GISEL-NEXT: buffer_load_dword v6, v6, s[0:3], 0 offen
|
|
; GFX8-GISEL-NEXT: buffer_load_dword v7, v7, s[0:3], 0 offen
|
|
; GFX8-GISEL-NEXT: buffer_load_dword v8, v8, s[0:3], 0 offen
|
|
; GFX8-GISEL-NEXT: v_add_u32_e32 v9, vcc, 4, v1
|
|
; GFX8-GISEL-NEXT: v_add_u32_e32 v10, vcc, 8, v1
|
|
; GFX8-GISEL-NEXT: v_add_u32_e32 v11, vcc, 12, v1
|
|
; GFX8-GISEL-NEXT: v_add_u32_e32 v12, vcc, 16, v1
|
|
; GFX8-GISEL-NEXT: v_add_u32_e32 v13, vcc, 20, v1
|
|
; GFX8-GISEL-NEXT: v_add_u32_e32 v14, vcc, 24, v1
|
|
; GFX8-GISEL-NEXT: v_add_u32_e32 v15, vcc, 28, v1
|
|
; GFX8-GISEL-NEXT: s_waitcnt vmcnt(7)
|
|
; GFX8-GISEL-NEXT: buffer_store_dword v0, v1, s[0:3], 0 offen
|
|
; GFX8-GISEL-NEXT: s_waitcnt vmcnt(7)
|
|
; GFX8-GISEL-NEXT: buffer_store_dword v2, v9, s[0:3], 0 offen
|
|
; GFX8-GISEL-NEXT: s_waitcnt vmcnt(7)
|
|
; GFX8-GISEL-NEXT: buffer_store_dword v3, v10, s[0:3], 0 offen
|
|
; GFX8-GISEL-NEXT: s_waitcnt vmcnt(7)
|
|
; GFX8-GISEL-NEXT: buffer_store_dword v4, v11, s[0:3], 0 offen
|
|
; GFX8-GISEL-NEXT: s_waitcnt vmcnt(7)
|
|
; GFX8-GISEL-NEXT: buffer_store_dword v5, v12, s[0:3], 0 offen
|
|
; GFX8-GISEL-NEXT: s_waitcnt vmcnt(7)
|
|
; GFX8-GISEL-NEXT: buffer_store_dword v6, v13, s[0:3], 0 offen
|
|
; GFX8-GISEL-NEXT: s_waitcnt vmcnt(7)
|
|
; GFX8-GISEL-NEXT: buffer_store_dword v7, v14, s[0:3], 0 offen
|
|
; GFX8-GISEL-NEXT: s_waitcnt vmcnt(7)
|
|
; GFX8-GISEL-NEXT: buffer_store_dword v8, v15, s[0:3], 0 offen
|
|
; GFX8-GISEL-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX8-GISEL-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX9-LABEL: freeze_v8p5:
|
|
; GFX9: ; %bb.0:
|
|
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX9-NEXT: buffer_load_dword v2, v0, s[0:3], 0 offen
|
|
; GFX9-NEXT: buffer_load_dword v3, v0, s[0:3], 0 offen offset:4
|
|
; GFX9-NEXT: buffer_load_dword v4, v0, s[0:3], 0 offen offset:8
|
|
; GFX9-NEXT: buffer_load_dword v5, v0, s[0:3], 0 offen offset:12
|
|
; GFX9-NEXT: buffer_load_dword v6, v0, s[0:3], 0 offen offset:16
|
|
; GFX9-NEXT: buffer_load_dword v7, v0, s[0:3], 0 offen offset:20
|
|
; GFX9-NEXT: buffer_load_dword v8, v0, s[0:3], 0 offen offset:24
|
|
; GFX9-NEXT: buffer_load_dword v9, v0, s[0:3], 0 offen offset:28
|
|
; GFX9-NEXT: s_waitcnt vmcnt(7)
|
|
; GFX9-NEXT: buffer_store_dword v2, v1, s[0:3], 0 offen
|
|
; GFX9-NEXT: s_waitcnt vmcnt(7)
|
|
; GFX9-NEXT: buffer_store_dword v3, v1, s[0:3], 0 offen offset:4
|
|
; GFX9-NEXT: s_waitcnt vmcnt(7)
|
|
; GFX9-NEXT: buffer_store_dword v4, v1, s[0:3], 0 offen offset:8
|
|
; GFX9-NEXT: s_waitcnt vmcnt(7)
|
|
; GFX9-NEXT: buffer_store_dword v5, v1, s[0:3], 0 offen offset:12
|
|
; GFX9-NEXT: s_waitcnt vmcnt(7)
|
|
; GFX9-NEXT: buffer_store_dword v6, v1, s[0:3], 0 offen offset:16
|
|
; GFX9-NEXT: s_waitcnt vmcnt(7)
|
|
; GFX9-NEXT: buffer_store_dword v7, v1, s[0:3], 0 offen offset:20
|
|
; GFX9-NEXT: s_waitcnt vmcnt(7)
|
|
; GFX9-NEXT: buffer_store_dword v8, v1, s[0:3], 0 offen offset:24
|
|
; GFX9-NEXT: s_waitcnt vmcnt(7)
|
|
; GFX9-NEXT: buffer_store_dword v9, v1, s[0:3], 0 offen offset:28
|
|
; GFX9-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX9-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX10-LABEL: freeze_v8p5:
|
|
; GFX10: ; %bb.0:
|
|
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX10-NEXT: s_clause 0x7
|
|
; GFX10-NEXT: buffer_load_dword v2, v0, s[0:3], 0 offen
|
|
; GFX10-NEXT: buffer_load_dword v3, v0, s[0:3], 0 offen offset:4
|
|
; GFX10-NEXT: buffer_load_dword v4, v0, s[0:3], 0 offen offset:8
|
|
; GFX10-NEXT: buffer_load_dword v5, v0, s[0:3], 0 offen offset:12
|
|
; GFX10-NEXT: buffer_load_dword v6, v0, s[0:3], 0 offen offset:16
|
|
; GFX10-NEXT: buffer_load_dword v7, v0, s[0:3], 0 offen offset:20
|
|
; GFX10-NEXT: buffer_load_dword v8, v0, s[0:3], 0 offen offset:24
|
|
; GFX10-NEXT: buffer_load_dword v9, v0, s[0:3], 0 offen offset:28
|
|
; GFX10-NEXT: s_waitcnt vmcnt(7)
|
|
; GFX10-NEXT: buffer_store_dword v2, v1, s[0:3], 0 offen
|
|
; GFX10-NEXT: s_waitcnt vmcnt(6)
|
|
; GFX10-NEXT: buffer_store_dword v3, v1, s[0:3], 0 offen offset:4
|
|
; GFX10-NEXT: s_waitcnt vmcnt(5)
|
|
; GFX10-NEXT: buffer_store_dword v4, v1, s[0:3], 0 offen offset:8
|
|
; GFX10-NEXT: s_waitcnt vmcnt(4)
|
|
; GFX10-NEXT: buffer_store_dword v5, v1, s[0:3], 0 offen offset:12
|
|
; GFX10-NEXT: s_waitcnt vmcnt(3)
|
|
; GFX10-NEXT: buffer_store_dword v6, v1, s[0:3], 0 offen offset:16
|
|
; GFX10-NEXT: s_waitcnt vmcnt(2)
|
|
; GFX10-NEXT: buffer_store_dword v7, v1, s[0:3], 0 offen offset:20
|
|
; GFX10-NEXT: s_waitcnt vmcnt(1)
|
|
; GFX10-NEXT: buffer_store_dword v8, v1, s[0:3], 0 offen offset:24
|
|
; GFX10-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX10-NEXT: buffer_store_dword v9, v1, s[0:3], 0 offen offset:28
|
|
; GFX10-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX11-SDAG-LABEL: freeze_v8p5:
|
|
; GFX11-SDAG: ; %bb.0:
|
|
; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX11-SDAG-NEXT: s_clause 0x1
|
|
; GFX11-SDAG-NEXT: scratch_load_b128 v[2:5], v0, off offset:16
|
|
; GFX11-SDAG-NEXT: scratch_load_b128 v[6:9], v0, off
|
|
; GFX11-SDAG-NEXT: s_waitcnt vmcnt(1)
|
|
; GFX11-SDAG-NEXT: scratch_store_b128 v1, v[2:5], off offset:16
|
|
; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX11-SDAG-NEXT: scratch_store_b128 v1, v[6:9], off
|
|
; GFX11-SDAG-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX11-GISEL-LABEL: freeze_v8p5:
|
|
; GFX11-GISEL: ; %bb.0:
|
|
; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX11-GISEL-NEXT: s_clause 0x1
|
|
; GFX11-GISEL-NEXT: scratch_load_b128 v[2:5], v0, off
|
|
; GFX11-GISEL-NEXT: scratch_load_b128 v[6:9], v0, off offset:16
|
|
; GFX11-GISEL-NEXT: s_waitcnt vmcnt(1)
|
|
; GFX11-GISEL-NEXT: scratch_store_b128 v1, v[2:5], off
|
|
; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX11-GISEL-NEXT: scratch_store_b128 v1, v[6:9], off offset:16
|
|
; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31]
|
|
%a = load <8 x ptr addrspace(5)>, ptr addrspace(5) %ptra
|
|
%freeze = freeze <8 x ptr addrspace(5)> %a
|
|
store <8 x ptr addrspace(5)> %freeze, ptr addrspace(5) %ptrb
|
|
ret void
|
|
}
|
|
|
|
define void @freeze_v16p5(ptr addrspace(5) %ptra, ptr addrspace(5) %ptrb) {
|
|
; GFX6-SDAG-LABEL: freeze_v16p5:
|
|
; GFX6-SDAG: ; %bb.0:
|
|
; GFX6-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX6-SDAG-NEXT: v_add_i32_e32 v5, vcc, 16, v0
|
|
; GFX6-SDAG-NEXT: v_add_i32_e32 v6, vcc, 12, v0
|
|
; GFX6-SDAG-NEXT: v_add_i32_e32 v7, vcc, 8, v0
|
|
; GFX6-SDAG-NEXT: v_add_i32_e32 v8, vcc, 4, v0
|
|
; GFX6-SDAG-NEXT: buffer_load_dword v5, v5, s[0:3], 0 offen
|
|
; GFX6-SDAG-NEXT: buffer_load_dword v6, v6, s[0:3], 0 offen
|
|
; GFX6-SDAG-NEXT: buffer_load_dword v7, v7, s[0:3], 0 offen
|
|
; GFX6-SDAG-NEXT: buffer_load_dword v8, v8, s[0:3], 0 offen
|
|
; GFX6-SDAG-NEXT: v_add_i32_e32 v2, vcc, 56, v0
|
|
; GFX6-SDAG-NEXT: v_add_i32_e32 v3, vcc, 52, v0
|
|
; GFX6-SDAG-NEXT: v_add_i32_e32 v4, vcc, 48, v0
|
|
; GFX6-SDAG-NEXT: v_add_i32_e32 v9, vcc, 44, v0
|
|
; GFX6-SDAG-NEXT: v_add_i32_e32 v10, vcc, 40, v0
|
|
; GFX6-SDAG-NEXT: v_add_i32_e32 v11, vcc, 36, v0
|
|
; GFX6-SDAG-NEXT: v_add_i32_e32 v12, vcc, 32, v0
|
|
; GFX6-SDAG-NEXT: v_add_i32_e32 v13, vcc, 28, v0
|
|
; GFX6-SDAG-NEXT: v_add_i32_e32 v14, vcc, 24, v0
|
|
; GFX6-SDAG-NEXT: v_add_i32_e32 v15, vcc, 20, v0
|
|
; GFX6-SDAG-NEXT: buffer_load_dword v2, v2, s[0:3], 0 offen
|
|
; GFX6-SDAG-NEXT: buffer_load_dword v3, v3, s[0:3], 0 offen
|
|
; GFX6-SDAG-NEXT: buffer_load_dword v4, v4, s[0:3], 0 offen
|
|
; GFX6-SDAG-NEXT: buffer_load_dword v9, v9, s[0:3], 0 offen
|
|
; GFX6-SDAG-NEXT: buffer_load_dword v10, v10, s[0:3], 0 offen
|
|
; GFX6-SDAG-NEXT: buffer_load_dword v11, v11, s[0:3], 0 offen
|
|
; GFX6-SDAG-NEXT: buffer_load_dword v16, v0, s[0:3], 0 offen
|
|
; GFX6-SDAG-NEXT: buffer_load_dword v15, v15, s[0:3], 0 offen
|
|
; GFX6-SDAG-NEXT: buffer_load_dword v14, v14, s[0:3], 0 offen
|
|
; GFX6-SDAG-NEXT: buffer_load_dword v13, v13, s[0:3], 0 offen
|
|
; GFX6-SDAG-NEXT: buffer_load_dword v12, v12, s[0:3], 0 offen
|
|
; GFX6-SDAG-NEXT: v_add_i32_e32 v0, vcc, 60, v0
|
|
; GFX6-SDAG-NEXT: buffer_load_dword v0, v0, s[0:3], 0 offen
|
|
; GFX6-SDAG-NEXT: v_add_i32_e32 v17, vcc, 4, v1
|
|
; GFX6-SDAG-NEXT: v_add_i32_e32 v18, vcc, 8, v1
|
|
; GFX6-SDAG-NEXT: v_add_i32_e32 v19, vcc, 12, v1
|
|
; GFX6-SDAG-NEXT: s_waitcnt vmcnt(14)
|
|
; GFX6-SDAG-NEXT: buffer_store_dword v6, v19, s[0:3], 0 offen
|
|
; GFX6-SDAG-NEXT: s_waitcnt vmcnt(14)
|
|
; GFX6-SDAG-NEXT: buffer_store_dword v7, v18, s[0:3], 0 offen
|
|
; GFX6-SDAG-NEXT: s_waitcnt vmcnt(14)
|
|
; GFX6-SDAG-NEXT: buffer_store_dword v8, v17, s[0:3], 0 offen
|
|
; GFX6-SDAG-NEXT: s_waitcnt expcnt(0)
|
|
; GFX6-SDAG-NEXT: v_add_i32_e32 v8, vcc, 16, v1
|
|
; GFX6-SDAG-NEXT: buffer_store_dword v5, v8, s[0:3], 0 offen
|
|
; GFX6-SDAG-NEXT: s_waitcnt expcnt(0)
|
|
; GFX6-SDAG-NEXT: v_add_i32_e32 v5, vcc, 40, v1
|
|
; GFX6-SDAG-NEXT: v_add_i32_e32 v17, vcc, 20, v1
|
|
; GFX6-SDAG-NEXT: v_add_i32_e32 v7, vcc, 24, v1
|
|
; GFX6-SDAG-NEXT: v_add_i32_e32 v18, vcc, 28, v1
|
|
; GFX6-SDAG-NEXT: v_add_i32_e32 v6, vcc, 32, v1
|
|
; GFX6-SDAG-NEXT: v_add_i32_e32 v19, vcc, 36, v1
|
|
; GFX6-SDAG-NEXT: v_add_i32_e32 v8, vcc, 44, v1
|
|
; GFX6-SDAG-NEXT: s_waitcnt vmcnt(9)
|
|
; GFX6-SDAG-NEXT: buffer_store_dword v16, v1, s[0:3], 0 offen
|
|
; GFX6-SDAG-NEXT: s_waitcnt vmcnt(9)
|
|
; GFX6-SDAG-NEXT: buffer_store_dword v15, v17, s[0:3], 0 offen
|
|
; GFX6-SDAG-NEXT: s_waitcnt vmcnt(9)
|
|
; GFX6-SDAG-NEXT: buffer_store_dword v14, v7, s[0:3], 0 offen
|
|
; GFX6-SDAG-NEXT: s_waitcnt vmcnt(9)
|
|
; GFX6-SDAG-NEXT: buffer_store_dword v13, v18, s[0:3], 0 offen
|
|
; GFX6-SDAG-NEXT: s_waitcnt vmcnt(9)
|
|
; GFX6-SDAG-NEXT: buffer_store_dword v12, v6, s[0:3], 0 offen
|
|
; GFX6-SDAG-NEXT: buffer_store_dword v11, v19, s[0:3], 0 offen
|
|
; GFX6-SDAG-NEXT: buffer_store_dword v10, v5, s[0:3], 0 offen
|
|
; GFX6-SDAG-NEXT: buffer_store_dword v9, v8, s[0:3], 0 offen
|
|
; GFX6-SDAG-NEXT: v_add_i32_e32 v5, vcc, 48, v1
|
|
; GFX6-SDAG-NEXT: buffer_store_dword v4, v5, s[0:3], 0 offen
|
|
; GFX6-SDAG-NEXT: s_waitcnt expcnt(0)
|
|
; GFX6-SDAG-NEXT: v_add_i32_e32 v4, vcc, 52, v1
|
|
; GFX6-SDAG-NEXT: buffer_store_dword v3, v4, s[0:3], 0 offen
|
|
; GFX6-SDAG-NEXT: s_waitcnt expcnt(0)
|
|
; GFX6-SDAG-NEXT: v_add_i32_e32 v3, vcc, 56, v1
|
|
; GFX6-SDAG-NEXT: v_add_i32_e32 v1, vcc, 60, v1
|
|
; GFX6-SDAG-NEXT: buffer_store_dword v2, v3, s[0:3], 0 offen
|
|
; GFX6-SDAG-NEXT: s_waitcnt vmcnt(14)
|
|
; GFX6-SDAG-NEXT: buffer_store_dword v0, v1, s[0:3], 0 offen
|
|
; GFX6-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0)
|
|
; GFX6-SDAG-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX6-GISEL-LABEL: freeze_v16p5:
|
|
; GFX6-GISEL: ; %bb.0:
|
|
; GFX6-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX6-GISEL-NEXT: v_add_i32_e32 v2, vcc, 4, v0
|
|
; GFX6-GISEL-NEXT: v_add_i32_e32 v3, vcc, 8, v0
|
|
; GFX6-GISEL-NEXT: buffer_load_dword v2, v2, s[0:3], 0 offen
|
|
; GFX6-GISEL-NEXT: buffer_load_dword v3, v3, s[0:3], 0 offen
|
|
; GFX6-GISEL-NEXT: v_add_i32_e32 v4, vcc, 12, v0
|
|
; GFX6-GISEL-NEXT: v_add_i32_e32 v5, vcc, 16, v0
|
|
; GFX6-GISEL-NEXT: v_add_i32_e32 v6, vcc, 20, v0
|
|
; GFX6-GISEL-NEXT: v_add_i32_e32 v7, vcc, 24, v0
|
|
; GFX6-GISEL-NEXT: v_add_i32_e32 v8, vcc, 28, v0
|
|
; GFX6-GISEL-NEXT: v_add_i32_e32 v10, vcc, 32, v0
|
|
; GFX6-GISEL-NEXT: v_add_i32_e32 v11, vcc, 36, v0
|
|
; GFX6-GISEL-NEXT: v_add_i32_e32 v12, vcc, 40, v0
|
|
; GFX6-GISEL-NEXT: v_add_i32_e32 v13, vcc, 44, v0
|
|
; GFX6-GISEL-NEXT: buffer_load_dword v4, v4, s[0:3], 0 offen
|
|
; GFX6-GISEL-NEXT: buffer_load_dword v9, v0, s[0:3], 0 offen
|
|
; GFX6-GISEL-NEXT: buffer_load_dword v5, v5, s[0:3], 0 offen
|
|
; GFX6-GISEL-NEXT: buffer_load_dword v6, v6, s[0:3], 0 offen
|
|
; GFX6-GISEL-NEXT: buffer_load_dword v7, v7, s[0:3], 0 offen
|
|
; GFX6-GISEL-NEXT: buffer_load_dword v8, v8, s[0:3], 0 offen
|
|
; GFX6-GISEL-NEXT: buffer_load_dword v10, v10, s[0:3], 0 offen
|
|
; GFX6-GISEL-NEXT: buffer_load_dword v11, v11, s[0:3], 0 offen
|
|
; GFX6-GISEL-NEXT: buffer_load_dword v12, v12, s[0:3], 0 offen
|
|
; GFX6-GISEL-NEXT: buffer_load_dword v13, v13, s[0:3], 0 offen
|
|
; GFX6-GISEL-NEXT: v_add_i32_e32 v14, vcc, 48, v0
|
|
; GFX6-GISEL-NEXT: buffer_load_dword v14, v14, s[0:3], 0 offen
|
|
; GFX6-GISEL-NEXT: v_add_i32_e32 v15, vcc, 52, v0
|
|
; GFX6-GISEL-NEXT: buffer_load_dword v15, v15, s[0:3], 0 offen
|
|
; GFX6-GISEL-NEXT: v_add_i32_e32 v16, vcc, 56, v0
|
|
; GFX6-GISEL-NEXT: v_add_i32_e32 v0, vcc, 60, v0
|
|
; GFX6-GISEL-NEXT: buffer_load_dword v16, v16, s[0:3], 0 offen
|
|
; GFX6-GISEL-NEXT: buffer_load_dword v0, v0, s[0:3], 0 offen
|
|
; GFX6-GISEL-NEXT: v_add_i32_e32 v17, vcc, 4, v1
|
|
; GFX6-GISEL-NEXT: v_add_i32_e32 v18, vcc, 8, v1
|
|
; GFX6-GISEL-NEXT: v_add_i32_e32 v19, vcc, 12, v1
|
|
; GFX6-GISEL-NEXT: s_waitcnt vmcnt(14)
|
|
; GFX6-GISEL-NEXT: buffer_store_dword v2, v17, s[0:3], 0 offen
|
|
; GFX6-GISEL-NEXT: s_waitcnt expcnt(0)
|
|
; GFX6-GISEL-NEXT: v_add_i32_e32 v2, vcc, 16, v1
|
|
; GFX6-GISEL-NEXT: v_add_i32_e32 v17, vcc, 20, v1
|
|
; GFX6-GISEL-NEXT: buffer_store_dword v3, v18, s[0:3], 0 offen
|
|
; GFX6-GISEL-NEXT: s_waitcnt expcnt(0)
|
|
; GFX6-GISEL-NEXT: v_add_i32_e32 v3, vcc, 24, v1
|
|
; GFX6-GISEL-NEXT: v_add_i32_e32 v18, vcc, 28, v1
|
|
; GFX6-GISEL-NEXT: s_waitcnt vmcnt(14)
|
|
; GFX6-GISEL-NEXT: buffer_store_dword v4, v19, s[0:3], 0 offen
|
|
; GFX6-GISEL-NEXT: s_waitcnt expcnt(0)
|
|
; GFX6-GISEL-NEXT: v_add_i32_e32 v4, vcc, 32, v1
|
|
; GFX6-GISEL-NEXT: s_waitcnt vmcnt(14)
|
|
; GFX6-GISEL-NEXT: buffer_store_dword v5, v2, s[0:3], 0 offen
|
|
; GFX6-GISEL-NEXT: v_add_i32_e32 v2, vcc, 40, v1
|
|
; GFX6-GISEL-NEXT: v_add_i32_e32 v19, vcc, 36, v1
|
|
; GFX6-GISEL-NEXT: s_waitcnt expcnt(0)
|
|
; GFX6-GISEL-NEXT: v_add_i32_e32 v5, vcc, 44, v1
|
|
; GFX6-GISEL-NEXT: buffer_store_dword v9, v1, s[0:3], 0 offen
|
|
; GFX6-GISEL-NEXT: s_waitcnt vmcnt(14)
|
|
; GFX6-GISEL-NEXT: buffer_store_dword v6, v17, s[0:3], 0 offen
|
|
; GFX6-GISEL-NEXT: buffer_store_dword v7, v3, s[0:3], 0 offen
|
|
; GFX6-GISEL-NEXT: s_waitcnt vmcnt(14)
|
|
; GFX6-GISEL-NEXT: buffer_store_dword v8, v18, s[0:3], 0 offen
|
|
; GFX6-GISEL-NEXT: buffer_store_dword v10, v4, s[0:3], 0 offen
|
|
; GFX6-GISEL-NEXT: s_waitcnt vmcnt(14)
|
|
; GFX6-GISEL-NEXT: buffer_store_dword v11, v19, s[0:3], 0 offen
|
|
; GFX6-GISEL-NEXT: buffer_store_dword v12, v2, s[0:3], 0 offen
|
|
; GFX6-GISEL-NEXT: s_waitcnt vmcnt(14)
|
|
; GFX6-GISEL-NEXT: buffer_store_dword v13, v5, s[0:3], 0 offen
|
|
; GFX6-GISEL-NEXT: v_add_i32_e32 v2, vcc, 48, v1
|
|
; GFX6-GISEL-NEXT: buffer_store_dword v14, v2, s[0:3], 0 offen
|
|
; GFX6-GISEL-NEXT: v_add_i32_e32 v2, vcc, 52, v1
|
|
; GFX6-GISEL-NEXT: s_waitcnt vmcnt(14)
|
|
; GFX6-GISEL-NEXT: buffer_store_dword v15, v2, s[0:3], 0 offen
|
|
; GFX6-GISEL-NEXT: v_add_i32_e32 v2, vcc, 56, v1
|
|
; GFX6-GISEL-NEXT: v_add_i32_e32 v1, vcc, 60, v1
|
|
; GFX6-GISEL-NEXT: buffer_store_dword v16, v2, s[0:3], 0 offen
|
|
; GFX6-GISEL-NEXT: s_waitcnt vmcnt(14)
|
|
; GFX6-GISEL-NEXT: buffer_store_dword v0, v1, s[0:3], 0 offen
|
|
; GFX6-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0)
|
|
; GFX6-GISEL-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX7-SDAG-LABEL: freeze_v16p5:
|
|
; GFX7-SDAG: ; %bb.0:
|
|
; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX7-SDAG-NEXT: v_add_i32_e32 v5, vcc, 16, v0
|
|
; GFX7-SDAG-NEXT: v_add_i32_e32 v6, vcc, 12, v0
|
|
; GFX7-SDAG-NEXT: v_add_i32_e32 v7, vcc, 8, v0
|
|
; GFX7-SDAG-NEXT: v_add_i32_e32 v8, vcc, 4, v0
|
|
; GFX7-SDAG-NEXT: buffer_load_dword v5, v5, s[0:3], 0 offen
|
|
; GFX7-SDAG-NEXT: buffer_load_dword v6, v6, s[0:3], 0 offen
|
|
; GFX7-SDAG-NEXT: buffer_load_dword v7, v7, s[0:3], 0 offen
|
|
; GFX7-SDAG-NEXT: buffer_load_dword v8, v8, s[0:3], 0 offen
|
|
; GFX7-SDAG-NEXT: v_add_i32_e32 v2, vcc, 56, v0
|
|
; GFX7-SDAG-NEXT: v_add_i32_e32 v3, vcc, 52, v0
|
|
; GFX7-SDAG-NEXT: v_add_i32_e32 v4, vcc, 48, v0
|
|
; GFX7-SDAG-NEXT: v_add_i32_e32 v9, vcc, 44, v0
|
|
; GFX7-SDAG-NEXT: v_add_i32_e32 v10, vcc, 40, v0
|
|
; GFX7-SDAG-NEXT: v_add_i32_e32 v11, vcc, 36, v0
|
|
; GFX7-SDAG-NEXT: v_add_i32_e32 v12, vcc, 32, v0
|
|
; GFX7-SDAG-NEXT: v_add_i32_e32 v13, vcc, 28, v0
|
|
; GFX7-SDAG-NEXT: v_add_i32_e32 v14, vcc, 24, v0
|
|
; GFX7-SDAG-NEXT: v_add_i32_e32 v15, vcc, 20, v0
|
|
; GFX7-SDAG-NEXT: buffer_load_dword v2, v2, s[0:3], 0 offen
|
|
; GFX7-SDAG-NEXT: buffer_load_dword v3, v3, s[0:3], 0 offen
|
|
; GFX7-SDAG-NEXT: buffer_load_dword v4, v4, s[0:3], 0 offen
|
|
; GFX7-SDAG-NEXT: buffer_load_dword v9, v9, s[0:3], 0 offen
|
|
; GFX7-SDAG-NEXT: buffer_load_dword v10, v10, s[0:3], 0 offen
|
|
; GFX7-SDAG-NEXT: buffer_load_dword v11, v11, s[0:3], 0 offen
|
|
; GFX7-SDAG-NEXT: buffer_load_dword v16, v0, s[0:3], 0 offen
|
|
; GFX7-SDAG-NEXT: buffer_load_dword v15, v15, s[0:3], 0 offen
|
|
; GFX7-SDAG-NEXT: buffer_load_dword v14, v14, s[0:3], 0 offen
|
|
; GFX7-SDAG-NEXT: buffer_load_dword v13, v13, s[0:3], 0 offen
|
|
; GFX7-SDAG-NEXT: buffer_load_dword v12, v12, s[0:3], 0 offen
|
|
; GFX7-SDAG-NEXT: v_add_i32_e32 v0, vcc, 60, v0
|
|
; GFX7-SDAG-NEXT: buffer_load_dword v0, v0, s[0:3], 0 offen
|
|
; GFX7-SDAG-NEXT: v_add_i32_e32 v17, vcc, 4, v1
|
|
; GFX7-SDAG-NEXT: v_add_i32_e32 v18, vcc, 8, v1
|
|
; GFX7-SDAG-NEXT: v_add_i32_e32 v19, vcc, 12, v1
|
|
; GFX7-SDAG-NEXT: s_waitcnt vmcnt(14)
|
|
; GFX7-SDAG-NEXT: buffer_store_dword v6, v19, s[0:3], 0 offen
|
|
; GFX7-SDAG-NEXT: s_waitcnt vmcnt(14)
|
|
; GFX7-SDAG-NEXT: buffer_store_dword v7, v18, s[0:3], 0 offen
|
|
; GFX7-SDAG-NEXT: s_waitcnt vmcnt(14)
|
|
; GFX7-SDAG-NEXT: buffer_store_dword v8, v17, s[0:3], 0 offen
|
|
; GFX7-SDAG-NEXT: v_add_i32_e32 v8, vcc, 16, v1
|
|
; GFX7-SDAG-NEXT: buffer_store_dword v5, v8, s[0:3], 0 offen
|
|
; GFX7-SDAG-NEXT: v_add_i32_e32 v5, vcc, 40, v1
|
|
; GFX7-SDAG-NEXT: v_add_i32_e32 v17, vcc, 20, v1
|
|
; GFX7-SDAG-NEXT: v_add_i32_e32 v7, vcc, 24, v1
|
|
; GFX7-SDAG-NEXT: v_add_i32_e32 v18, vcc, 28, v1
|
|
; GFX7-SDAG-NEXT: v_add_i32_e32 v6, vcc, 32, v1
|
|
; GFX7-SDAG-NEXT: v_add_i32_e32 v19, vcc, 36, v1
|
|
; GFX7-SDAG-NEXT: v_add_i32_e32 v8, vcc, 44, v1
|
|
; GFX7-SDAG-NEXT: s_waitcnt vmcnt(9)
|
|
; GFX7-SDAG-NEXT: buffer_store_dword v16, v1, s[0:3], 0 offen
|
|
; GFX7-SDAG-NEXT: s_waitcnt vmcnt(9)
|
|
; GFX7-SDAG-NEXT: buffer_store_dword v15, v17, s[0:3], 0 offen
|
|
; GFX7-SDAG-NEXT: s_waitcnt vmcnt(9)
|
|
; GFX7-SDAG-NEXT: buffer_store_dword v14, v7, s[0:3], 0 offen
|
|
; GFX7-SDAG-NEXT: s_waitcnt vmcnt(9)
|
|
; GFX7-SDAG-NEXT: buffer_store_dword v13, v18, s[0:3], 0 offen
|
|
; GFX7-SDAG-NEXT: s_waitcnt vmcnt(9)
|
|
; GFX7-SDAG-NEXT: buffer_store_dword v12, v6, s[0:3], 0 offen
|
|
; GFX7-SDAG-NEXT: buffer_store_dword v11, v19, s[0:3], 0 offen
|
|
; GFX7-SDAG-NEXT: buffer_store_dword v10, v5, s[0:3], 0 offen
|
|
; GFX7-SDAG-NEXT: buffer_store_dword v9, v8, s[0:3], 0 offen
|
|
; GFX7-SDAG-NEXT: v_add_i32_e32 v5, vcc, 48, v1
|
|
; GFX7-SDAG-NEXT: buffer_store_dword v4, v5, s[0:3], 0 offen
|
|
; GFX7-SDAG-NEXT: v_add_i32_e32 v4, vcc, 52, v1
|
|
; GFX7-SDAG-NEXT: buffer_store_dword v3, v4, s[0:3], 0 offen
|
|
; GFX7-SDAG-NEXT: v_add_i32_e32 v3, vcc, 56, v1
|
|
; GFX7-SDAG-NEXT: v_add_i32_e32 v1, vcc, 60, v1
|
|
; GFX7-SDAG-NEXT: buffer_store_dword v2, v3, s[0:3], 0 offen
|
|
; GFX7-SDAG-NEXT: s_waitcnt vmcnt(14)
|
|
; GFX7-SDAG-NEXT: buffer_store_dword v0, v1, s[0:3], 0 offen
|
|
; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX7-SDAG-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX7-GISEL-LABEL: freeze_v16p5:
|
|
; GFX7-GISEL: ; %bb.0:
|
|
; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX7-GISEL-NEXT: v_add_i32_e32 v2, vcc, 4, v0
|
|
; GFX7-GISEL-NEXT: v_add_i32_e32 v3, vcc, 8, v0
|
|
; GFX7-GISEL-NEXT: buffer_load_dword v2, v2, s[0:3], 0 offen
|
|
; GFX7-GISEL-NEXT: buffer_load_dword v3, v3, s[0:3], 0 offen
|
|
; GFX7-GISEL-NEXT: v_add_i32_e32 v4, vcc, 12, v0
|
|
; GFX7-GISEL-NEXT: v_add_i32_e32 v5, vcc, 16, v0
|
|
; GFX7-GISEL-NEXT: v_add_i32_e32 v6, vcc, 20, v0
|
|
; GFX7-GISEL-NEXT: v_add_i32_e32 v7, vcc, 24, v0
|
|
; GFX7-GISEL-NEXT: v_add_i32_e32 v8, vcc, 28, v0
|
|
; GFX7-GISEL-NEXT: v_add_i32_e32 v10, vcc, 32, v0
|
|
; GFX7-GISEL-NEXT: v_add_i32_e32 v11, vcc, 36, v0
|
|
; GFX7-GISEL-NEXT: v_add_i32_e32 v12, vcc, 40, v0
|
|
; GFX7-GISEL-NEXT: v_add_i32_e32 v13, vcc, 44, v0
|
|
; GFX7-GISEL-NEXT: buffer_load_dword v4, v4, s[0:3], 0 offen
|
|
; GFX7-GISEL-NEXT: buffer_load_dword v9, v0, s[0:3], 0 offen
|
|
; GFX7-GISEL-NEXT: buffer_load_dword v5, v5, s[0:3], 0 offen
|
|
; GFX7-GISEL-NEXT: buffer_load_dword v6, v6, s[0:3], 0 offen
|
|
; GFX7-GISEL-NEXT: buffer_load_dword v7, v7, s[0:3], 0 offen
|
|
; GFX7-GISEL-NEXT: buffer_load_dword v8, v8, s[0:3], 0 offen
|
|
; GFX7-GISEL-NEXT: buffer_load_dword v10, v10, s[0:3], 0 offen
|
|
; GFX7-GISEL-NEXT: buffer_load_dword v11, v11, s[0:3], 0 offen
|
|
; GFX7-GISEL-NEXT: buffer_load_dword v12, v12, s[0:3], 0 offen
|
|
; GFX7-GISEL-NEXT: buffer_load_dword v13, v13, s[0:3], 0 offen
|
|
; GFX7-GISEL-NEXT: v_add_i32_e32 v14, vcc, 48, v0
|
|
; GFX7-GISEL-NEXT: buffer_load_dword v14, v14, s[0:3], 0 offen
|
|
; GFX7-GISEL-NEXT: v_add_i32_e32 v15, vcc, 52, v0
|
|
; GFX7-GISEL-NEXT: buffer_load_dword v15, v15, s[0:3], 0 offen
|
|
; GFX7-GISEL-NEXT: v_add_i32_e32 v16, vcc, 56, v0
|
|
; GFX7-GISEL-NEXT: v_add_i32_e32 v0, vcc, 60, v0
|
|
; GFX7-GISEL-NEXT: buffer_load_dword v16, v16, s[0:3], 0 offen
|
|
; GFX7-GISEL-NEXT: buffer_load_dword v0, v0, s[0:3], 0 offen
|
|
; GFX7-GISEL-NEXT: v_add_i32_e32 v17, vcc, 4, v1
|
|
; GFX7-GISEL-NEXT: v_add_i32_e32 v18, vcc, 8, v1
|
|
; GFX7-GISEL-NEXT: v_add_i32_e32 v19, vcc, 12, v1
|
|
; GFX7-GISEL-NEXT: s_waitcnt vmcnt(14)
|
|
; GFX7-GISEL-NEXT: buffer_store_dword v2, v17, s[0:3], 0 offen
|
|
; GFX7-GISEL-NEXT: v_add_i32_e32 v2, vcc, 16, v1
|
|
; GFX7-GISEL-NEXT: v_add_i32_e32 v17, vcc, 20, v1
|
|
; GFX7-GISEL-NEXT: buffer_store_dword v3, v18, s[0:3], 0 offen
|
|
; GFX7-GISEL-NEXT: v_add_i32_e32 v3, vcc, 24, v1
|
|
; GFX7-GISEL-NEXT: v_add_i32_e32 v18, vcc, 28, v1
|
|
; GFX7-GISEL-NEXT: s_waitcnt vmcnt(14)
|
|
; GFX7-GISEL-NEXT: buffer_store_dword v4, v19, s[0:3], 0 offen
|
|
; GFX7-GISEL-NEXT: v_add_i32_e32 v4, vcc, 32, v1
|
|
; GFX7-GISEL-NEXT: s_waitcnt vmcnt(14)
|
|
; GFX7-GISEL-NEXT: buffer_store_dword v5, v2, s[0:3], 0 offen
|
|
; GFX7-GISEL-NEXT: v_add_i32_e32 v2, vcc, 40, v1
|
|
; GFX7-GISEL-NEXT: v_add_i32_e32 v19, vcc, 36, v1
|
|
; GFX7-GISEL-NEXT: v_add_i32_e32 v5, vcc, 44, v1
|
|
; GFX7-GISEL-NEXT: buffer_store_dword v9, v1, s[0:3], 0 offen
|
|
; GFX7-GISEL-NEXT: s_waitcnt vmcnt(14)
|
|
; GFX7-GISEL-NEXT: buffer_store_dword v6, v17, s[0:3], 0 offen
|
|
; GFX7-GISEL-NEXT: buffer_store_dword v7, v3, s[0:3], 0 offen
|
|
; GFX7-GISEL-NEXT: s_waitcnt vmcnt(14)
|
|
; GFX7-GISEL-NEXT: buffer_store_dword v8, v18, s[0:3], 0 offen
|
|
; GFX7-GISEL-NEXT: buffer_store_dword v10, v4, s[0:3], 0 offen
|
|
; GFX7-GISEL-NEXT: s_waitcnt vmcnt(14)
|
|
; GFX7-GISEL-NEXT: buffer_store_dword v11, v19, s[0:3], 0 offen
|
|
; GFX7-GISEL-NEXT: buffer_store_dword v12, v2, s[0:3], 0 offen
|
|
; GFX7-GISEL-NEXT: s_waitcnt vmcnt(14)
|
|
; GFX7-GISEL-NEXT: buffer_store_dword v13, v5, s[0:3], 0 offen
|
|
; GFX7-GISEL-NEXT: v_add_i32_e32 v2, vcc, 48, v1
|
|
; GFX7-GISEL-NEXT: buffer_store_dword v14, v2, s[0:3], 0 offen
|
|
; GFX7-GISEL-NEXT: v_add_i32_e32 v2, vcc, 52, v1
|
|
; GFX7-GISEL-NEXT: s_waitcnt vmcnt(14)
|
|
; GFX7-GISEL-NEXT: buffer_store_dword v15, v2, s[0:3], 0 offen
|
|
; GFX7-GISEL-NEXT: v_add_i32_e32 v2, vcc, 56, v1
|
|
; GFX7-GISEL-NEXT: v_add_i32_e32 v1, vcc, 60, v1
|
|
; GFX7-GISEL-NEXT: buffer_store_dword v16, v2, s[0:3], 0 offen
|
|
; GFX7-GISEL-NEXT: s_waitcnt vmcnt(14)
|
|
; GFX7-GISEL-NEXT: buffer_store_dword v0, v1, s[0:3], 0 offen
|
|
; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX7-GISEL-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX8-GISEL-LABEL: freeze_v16p5:
|
|
; GFX8-GISEL: ; %bb.0:
|
|
; GFX8-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX8-GISEL-NEXT: v_add_u32_e32 v2, vcc, 4, v0
|
|
; GFX8-GISEL-NEXT: v_add_u32_e32 v3, vcc, 8, v0
|
|
; GFX8-GISEL-NEXT: buffer_load_dword v2, v2, s[0:3], 0 offen
|
|
; GFX8-GISEL-NEXT: buffer_load_dword v3, v3, s[0:3], 0 offen
|
|
; GFX8-GISEL-NEXT: v_add_u32_e32 v4, vcc, 12, v0
|
|
; GFX8-GISEL-NEXT: v_add_u32_e32 v5, vcc, 16, v0
|
|
; GFX8-GISEL-NEXT: v_add_u32_e32 v6, vcc, 20, v0
|
|
; GFX8-GISEL-NEXT: v_add_u32_e32 v7, vcc, 24, v0
|
|
; GFX8-GISEL-NEXT: v_add_u32_e32 v8, vcc, 28, v0
|
|
; GFX8-GISEL-NEXT: v_add_u32_e32 v10, vcc, 32, v0
|
|
; GFX8-GISEL-NEXT: v_add_u32_e32 v11, vcc, 36, v0
|
|
; GFX8-GISEL-NEXT: v_add_u32_e32 v12, vcc, 40, v0
|
|
; GFX8-GISEL-NEXT: v_add_u32_e32 v13, vcc, 44, v0
|
|
; GFX8-GISEL-NEXT: buffer_load_dword v4, v4, s[0:3], 0 offen
|
|
; GFX8-GISEL-NEXT: buffer_load_dword v9, v0, s[0:3], 0 offen
|
|
; GFX8-GISEL-NEXT: buffer_load_dword v5, v5, s[0:3], 0 offen
|
|
; GFX8-GISEL-NEXT: buffer_load_dword v6, v6, s[0:3], 0 offen
|
|
; GFX8-GISEL-NEXT: buffer_load_dword v7, v7, s[0:3], 0 offen
|
|
; GFX8-GISEL-NEXT: buffer_load_dword v8, v8, s[0:3], 0 offen
|
|
; GFX8-GISEL-NEXT: buffer_load_dword v10, v10, s[0:3], 0 offen
|
|
; GFX8-GISEL-NEXT: buffer_load_dword v11, v11, s[0:3], 0 offen
|
|
; GFX8-GISEL-NEXT: buffer_load_dword v12, v12, s[0:3], 0 offen
|
|
; GFX8-GISEL-NEXT: buffer_load_dword v13, v13, s[0:3], 0 offen
|
|
; GFX8-GISEL-NEXT: v_add_u32_e32 v14, vcc, 48, v0
|
|
; GFX8-GISEL-NEXT: buffer_load_dword v14, v14, s[0:3], 0 offen
|
|
; GFX8-GISEL-NEXT: v_add_u32_e32 v15, vcc, 52, v0
|
|
; GFX8-GISEL-NEXT: buffer_load_dword v15, v15, s[0:3], 0 offen
|
|
; GFX8-GISEL-NEXT: v_add_u32_e32 v16, vcc, 56, v0
|
|
; GFX8-GISEL-NEXT: v_add_u32_e32 v0, vcc, 60, v0
|
|
; GFX8-GISEL-NEXT: buffer_load_dword v16, v16, s[0:3], 0 offen
|
|
; GFX8-GISEL-NEXT: buffer_load_dword v0, v0, s[0:3], 0 offen
|
|
; GFX8-GISEL-NEXT: v_add_u32_e32 v17, vcc, 4, v1
|
|
; GFX8-GISEL-NEXT: v_add_u32_e32 v18, vcc, 8, v1
|
|
; GFX8-GISEL-NEXT: v_add_u32_e32 v19, vcc, 12, v1
|
|
; GFX8-GISEL-NEXT: s_waitcnt vmcnt(14)
|
|
; GFX8-GISEL-NEXT: buffer_store_dword v2, v17, s[0:3], 0 offen
|
|
; GFX8-GISEL-NEXT: v_add_u32_e32 v2, vcc, 16, v1
|
|
; GFX8-GISEL-NEXT: v_add_u32_e32 v17, vcc, 20, v1
|
|
; GFX8-GISEL-NEXT: buffer_store_dword v3, v18, s[0:3], 0 offen
|
|
; GFX8-GISEL-NEXT: v_add_u32_e32 v3, vcc, 24, v1
|
|
; GFX8-GISEL-NEXT: v_add_u32_e32 v18, vcc, 28, v1
|
|
; GFX8-GISEL-NEXT: s_waitcnt vmcnt(14)
|
|
; GFX8-GISEL-NEXT: buffer_store_dword v4, v19, s[0:3], 0 offen
|
|
; GFX8-GISEL-NEXT: v_add_u32_e32 v4, vcc, 32, v1
|
|
; GFX8-GISEL-NEXT: s_waitcnt vmcnt(14)
|
|
; GFX8-GISEL-NEXT: buffer_store_dword v5, v2, s[0:3], 0 offen
|
|
; GFX8-GISEL-NEXT: v_add_u32_e32 v2, vcc, 40, v1
|
|
; GFX8-GISEL-NEXT: v_add_u32_e32 v19, vcc, 36, v1
|
|
; GFX8-GISEL-NEXT: v_add_u32_e32 v5, vcc, 44, v1
|
|
; GFX8-GISEL-NEXT: buffer_store_dword v9, v1, s[0:3], 0 offen
|
|
; GFX8-GISEL-NEXT: s_waitcnt vmcnt(14)
|
|
; GFX8-GISEL-NEXT: buffer_store_dword v6, v17, s[0:3], 0 offen
|
|
; GFX8-GISEL-NEXT: buffer_store_dword v7, v3, s[0:3], 0 offen
|
|
; GFX8-GISEL-NEXT: s_waitcnt vmcnt(14)
|
|
; GFX8-GISEL-NEXT: buffer_store_dword v8, v18, s[0:3], 0 offen
|
|
; GFX8-GISEL-NEXT: buffer_store_dword v10, v4, s[0:3], 0 offen
|
|
; GFX8-GISEL-NEXT: s_waitcnt vmcnt(14)
|
|
; GFX8-GISEL-NEXT: buffer_store_dword v11, v19, s[0:3], 0 offen
|
|
; GFX8-GISEL-NEXT: buffer_store_dword v12, v2, s[0:3], 0 offen
|
|
; GFX8-GISEL-NEXT: s_waitcnt vmcnt(14)
|
|
; GFX8-GISEL-NEXT: buffer_store_dword v13, v5, s[0:3], 0 offen
|
|
; GFX8-GISEL-NEXT: v_add_u32_e32 v2, vcc, 48, v1
|
|
; GFX8-GISEL-NEXT: buffer_store_dword v14, v2, s[0:3], 0 offen
|
|
; GFX8-GISEL-NEXT: v_add_u32_e32 v2, vcc, 52, v1
|
|
; GFX8-GISEL-NEXT: s_waitcnt vmcnt(14)
|
|
; GFX8-GISEL-NEXT: buffer_store_dword v15, v2, s[0:3], 0 offen
|
|
; GFX8-GISEL-NEXT: v_add_u32_e32 v2, vcc, 56, v1
|
|
; GFX8-GISEL-NEXT: v_add_u32_e32 v1, vcc, 60, v1
|
|
; GFX8-GISEL-NEXT: buffer_store_dword v16, v2, s[0:3], 0 offen
|
|
; GFX8-GISEL-NEXT: s_waitcnt vmcnt(14)
|
|
; GFX8-GISEL-NEXT: buffer_store_dword v0, v1, s[0:3], 0 offen
|
|
; GFX8-GISEL-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX8-GISEL-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX9-LABEL: freeze_v16p5:
|
|
; GFX9: ; %bb.0:
|
|
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX9-NEXT: buffer_load_dword v2, v0, s[0:3], 0 offen
|
|
; GFX9-NEXT: buffer_load_dword v3, v0, s[0:3], 0 offen offset:4
|
|
; GFX9-NEXT: buffer_load_dword v4, v0, s[0:3], 0 offen offset:8
|
|
; GFX9-NEXT: buffer_load_dword v5, v0, s[0:3], 0 offen offset:12
|
|
; GFX9-NEXT: buffer_load_dword v6, v0, s[0:3], 0 offen offset:16
|
|
; GFX9-NEXT: buffer_load_dword v7, v0, s[0:3], 0 offen offset:20
|
|
; GFX9-NEXT: buffer_load_dword v8, v0, s[0:3], 0 offen offset:24
|
|
; GFX9-NEXT: buffer_load_dword v9, v0, s[0:3], 0 offen offset:28
|
|
; GFX9-NEXT: buffer_load_dword v10, v0, s[0:3], 0 offen offset:32
|
|
; GFX9-NEXT: buffer_load_dword v11, v0, s[0:3], 0 offen offset:36
|
|
; GFX9-NEXT: buffer_load_dword v12, v0, s[0:3], 0 offen offset:40
|
|
; GFX9-NEXT: buffer_load_dword v13, v0, s[0:3], 0 offen offset:44
|
|
; GFX9-NEXT: buffer_load_dword v14, v0, s[0:3], 0 offen offset:48
|
|
; GFX9-NEXT: buffer_load_dword v15, v0, s[0:3], 0 offen offset:52
|
|
; GFX9-NEXT: buffer_load_dword v16, v0, s[0:3], 0 offen offset:56
|
|
; GFX9-NEXT: s_nop 0
|
|
; GFX9-NEXT: buffer_load_dword v0, v0, s[0:3], 0 offen offset:60
|
|
; GFX9-NEXT: s_waitcnt vmcnt(15)
|
|
; GFX9-NEXT: buffer_store_dword v2, v1, s[0:3], 0 offen
|
|
; GFX9-NEXT: s_waitcnt vmcnt(15)
|
|
; GFX9-NEXT: buffer_store_dword v3, v1, s[0:3], 0 offen offset:4
|
|
; GFX9-NEXT: s_waitcnt vmcnt(15)
|
|
; GFX9-NEXT: buffer_store_dword v4, v1, s[0:3], 0 offen offset:8
|
|
; GFX9-NEXT: s_waitcnt vmcnt(15)
|
|
; GFX9-NEXT: buffer_store_dword v5, v1, s[0:3], 0 offen offset:12
|
|
; GFX9-NEXT: s_waitcnt vmcnt(15)
|
|
; GFX9-NEXT: buffer_store_dword v6, v1, s[0:3], 0 offen offset:16
|
|
; GFX9-NEXT: s_waitcnt vmcnt(15)
|
|
; GFX9-NEXT: buffer_store_dword v7, v1, s[0:3], 0 offen offset:20
|
|
; GFX9-NEXT: s_waitcnt vmcnt(15)
|
|
; GFX9-NEXT: buffer_store_dword v8, v1, s[0:3], 0 offen offset:24
|
|
; GFX9-NEXT: s_waitcnt vmcnt(15)
|
|
; GFX9-NEXT: buffer_store_dword v9, v1, s[0:3], 0 offen offset:28
|
|
; GFX9-NEXT: s_waitcnt vmcnt(15)
|
|
; GFX9-NEXT: buffer_store_dword v10, v1, s[0:3], 0 offen offset:32
|
|
; GFX9-NEXT: s_waitcnt vmcnt(15)
|
|
; GFX9-NEXT: buffer_store_dword v11, v1, s[0:3], 0 offen offset:36
|
|
; GFX9-NEXT: s_waitcnt vmcnt(15)
|
|
; GFX9-NEXT: buffer_store_dword v12, v1, s[0:3], 0 offen offset:40
|
|
; GFX9-NEXT: s_waitcnt vmcnt(15)
|
|
; GFX9-NEXT: buffer_store_dword v13, v1, s[0:3], 0 offen offset:44
|
|
; GFX9-NEXT: s_waitcnt vmcnt(15)
|
|
; GFX9-NEXT: buffer_store_dword v14, v1, s[0:3], 0 offen offset:48
|
|
; GFX9-NEXT: s_waitcnt vmcnt(15)
|
|
; GFX9-NEXT: buffer_store_dword v15, v1, s[0:3], 0 offen offset:52
|
|
; GFX9-NEXT: s_waitcnt vmcnt(15)
|
|
; GFX9-NEXT: buffer_store_dword v16, v1, s[0:3], 0 offen offset:56
|
|
; GFX9-NEXT: s_waitcnt vmcnt(15)
|
|
; GFX9-NEXT: buffer_store_dword v0, v1, s[0:3], 0 offen offset:60
|
|
; GFX9-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX9-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX10-LABEL: freeze_v16p5:
|
|
; GFX10: ; %bb.0:
|
|
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX10-NEXT: s_clause 0xf
|
|
; GFX10-NEXT: buffer_load_dword v2, v0, s[0:3], 0 offen
|
|
; GFX10-NEXT: buffer_load_dword v3, v0, s[0:3], 0 offen offset:4
|
|
; GFX10-NEXT: buffer_load_dword v4, v0, s[0:3], 0 offen offset:8
|
|
; GFX10-NEXT: buffer_load_dword v5, v0, s[0:3], 0 offen offset:12
|
|
; GFX10-NEXT: buffer_load_dword v6, v0, s[0:3], 0 offen offset:16
|
|
; GFX10-NEXT: buffer_load_dword v7, v0, s[0:3], 0 offen offset:20
|
|
; GFX10-NEXT: buffer_load_dword v8, v0, s[0:3], 0 offen offset:24
|
|
; GFX10-NEXT: buffer_load_dword v9, v0, s[0:3], 0 offen offset:28
|
|
; GFX10-NEXT: buffer_load_dword v10, v0, s[0:3], 0 offen offset:32
|
|
; GFX10-NEXT: buffer_load_dword v11, v0, s[0:3], 0 offen offset:36
|
|
; GFX10-NEXT: buffer_load_dword v12, v0, s[0:3], 0 offen offset:40
|
|
; GFX10-NEXT: buffer_load_dword v13, v0, s[0:3], 0 offen offset:44
|
|
; GFX10-NEXT: buffer_load_dword v14, v0, s[0:3], 0 offen offset:48
|
|
; GFX10-NEXT: buffer_load_dword v15, v0, s[0:3], 0 offen offset:52
|
|
; GFX10-NEXT: buffer_load_dword v16, v0, s[0:3], 0 offen offset:56
|
|
; GFX10-NEXT: buffer_load_dword v0, v0, s[0:3], 0 offen offset:60
|
|
; GFX10-NEXT: s_waitcnt vmcnt(15)
|
|
; GFX10-NEXT: buffer_store_dword v2, v1, s[0:3], 0 offen
|
|
; GFX10-NEXT: s_waitcnt vmcnt(14)
|
|
; GFX10-NEXT: buffer_store_dword v3, v1, s[0:3], 0 offen offset:4
|
|
; GFX10-NEXT: s_waitcnt vmcnt(13)
|
|
; GFX10-NEXT: buffer_store_dword v4, v1, s[0:3], 0 offen offset:8
|
|
; GFX10-NEXT: s_waitcnt vmcnt(12)
|
|
; GFX10-NEXT: buffer_store_dword v5, v1, s[0:3], 0 offen offset:12
|
|
; GFX10-NEXT: s_waitcnt vmcnt(11)
|
|
; GFX10-NEXT: buffer_store_dword v6, v1, s[0:3], 0 offen offset:16
|
|
; GFX10-NEXT: s_waitcnt vmcnt(10)
|
|
; GFX10-NEXT: buffer_store_dword v7, v1, s[0:3], 0 offen offset:20
|
|
; GFX10-NEXT: s_waitcnt vmcnt(9)
|
|
; GFX10-NEXT: buffer_store_dword v8, v1, s[0:3], 0 offen offset:24
|
|
; GFX10-NEXT: s_waitcnt vmcnt(8)
|
|
; GFX10-NEXT: buffer_store_dword v9, v1, s[0:3], 0 offen offset:28
|
|
; GFX10-NEXT: s_waitcnt vmcnt(7)
|
|
; GFX10-NEXT: buffer_store_dword v10, v1, s[0:3], 0 offen offset:32
|
|
; GFX10-NEXT: s_waitcnt vmcnt(6)
|
|
; GFX10-NEXT: buffer_store_dword v11, v1, s[0:3], 0 offen offset:36
|
|
; GFX10-NEXT: s_waitcnt vmcnt(5)
|
|
; GFX10-NEXT: buffer_store_dword v12, v1, s[0:3], 0 offen offset:40
|
|
; GFX10-NEXT: s_waitcnt vmcnt(4)
|
|
; GFX10-NEXT: buffer_store_dword v13, v1, s[0:3], 0 offen offset:44
|
|
; GFX10-NEXT: s_waitcnt vmcnt(3)
|
|
; GFX10-NEXT: buffer_store_dword v14, v1, s[0:3], 0 offen offset:48
|
|
; GFX10-NEXT: s_waitcnt vmcnt(2)
|
|
; GFX10-NEXT: buffer_store_dword v15, v1, s[0:3], 0 offen offset:52
|
|
; GFX10-NEXT: s_waitcnt vmcnt(1)
|
|
; GFX10-NEXT: buffer_store_dword v16, v1, s[0:3], 0 offen offset:56
|
|
; GFX10-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX10-NEXT: buffer_store_dword v0, v1, s[0:3], 0 offen offset:60
|
|
; GFX10-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX11-SDAG-LABEL: freeze_v16p5:
|
|
; GFX11-SDAG: ; %bb.0:
|
|
; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX11-SDAG-NEXT: s_clause 0x3
|
|
; GFX11-SDAG-NEXT: scratch_load_b128 v[2:5], v0, off offset:32
|
|
; GFX11-SDAG-NEXT: scratch_load_b128 v[6:9], v0, off offset:48
|
|
; GFX11-SDAG-NEXT: scratch_load_b128 v[10:13], v0, off
|
|
; GFX11-SDAG-NEXT: scratch_load_b128 v[14:17], v0, off offset:16
|
|
; GFX11-SDAG-NEXT: s_waitcnt vmcnt(3)
|
|
; GFX11-SDAG-NEXT: scratch_store_b128 v1, v[2:5], off offset:32
|
|
; GFX11-SDAG-NEXT: s_waitcnt vmcnt(2)
|
|
; GFX11-SDAG-NEXT: scratch_store_b128 v1, v[6:9], off offset:48
|
|
; GFX11-SDAG-NEXT: s_waitcnt vmcnt(1)
|
|
; GFX11-SDAG-NEXT: scratch_store_b128 v1, v[10:13], off
|
|
; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX11-SDAG-NEXT: scratch_store_b128 v1, v[14:17], off offset:16
|
|
; GFX11-SDAG-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX11-GISEL-LABEL: freeze_v16p5:
|
|
; GFX11-GISEL: ; %bb.0:
|
|
; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX11-GISEL-NEXT: s_clause 0x3
|
|
; GFX11-GISEL-NEXT: scratch_load_b128 v[2:5], v0, off
|
|
; GFX11-GISEL-NEXT: scratch_load_b128 v[6:9], v0, off offset:16
|
|
; GFX11-GISEL-NEXT: scratch_load_b128 v[10:13], v0, off offset:32
|
|
; GFX11-GISEL-NEXT: scratch_load_b128 v[14:17], v0, off offset:48
|
|
; GFX11-GISEL-NEXT: s_waitcnt vmcnt(3)
|
|
; GFX11-GISEL-NEXT: scratch_store_b128 v1, v[2:5], off
|
|
; GFX11-GISEL-NEXT: s_waitcnt vmcnt(2)
|
|
; GFX11-GISEL-NEXT: scratch_store_b128 v1, v[6:9], off offset:16
|
|
; GFX11-GISEL-NEXT: s_waitcnt vmcnt(1)
|
|
; GFX11-GISEL-NEXT: scratch_store_b128 v1, v[10:13], off offset:32
|
|
; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX11-GISEL-NEXT: scratch_store_b128 v1, v[14:17], off offset:48
|
|
; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31]
|
|
%a = load <16 x ptr addrspace(5)>, ptr addrspace(5) %ptra
|
|
%freeze = freeze <16 x ptr addrspace(5)> %a
|
|
store <16 x ptr addrspace(5)> %freeze, ptr addrspace(5) %ptrb
|
|
ret void
|
|
}
|
|
|
|
define void @freeze_i8(ptr addrspace(1) %ptra, ptr addrspace(1) %ptrb) {
|
|
; GFX6-SDAG-LABEL: freeze_i8:
|
|
; GFX6-SDAG: ; %bb.0:
|
|
; GFX6-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX6-SDAG-NEXT: s_mov_b32 s6, 0
|
|
; GFX6-SDAG-NEXT: s_mov_b32 s7, 0xf000
|
|
; GFX6-SDAG-NEXT: s_mov_b32 s4, s6
|
|
; GFX6-SDAG-NEXT: s_mov_b32 s5, s6
|
|
; GFX6-SDAG-NEXT: buffer_load_ubyte v0, v[0:1], s[4:7], 0 addr64
|
|
; GFX6-SDAG-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX6-SDAG-NEXT: buffer_store_byte v0, v[2:3], s[4:7], 0 addr64
|
|
; GFX6-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0)
|
|
; GFX6-SDAG-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX6-GISEL-LABEL: freeze_i8:
|
|
; GFX6-GISEL: ; %bb.0:
|
|
; GFX6-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX6-GISEL-NEXT: s_mov_b32 s6, 0
|
|
; GFX6-GISEL-NEXT: s_mov_b32 s7, 0xf000
|
|
; GFX6-GISEL-NEXT: s_mov_b64 s[4:5], 0
|
|
; GFX6-GISEL-NEXT: buffer_load_ubyte v0, v[0:1], s[4:7], 0 addr64
|
|
; GFX6-GISEL-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX6-GISEL-NEXT: buffer_store_byte v0, v[2:3], s[4:7], 0 addr64
|
|
; GFX6-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0)
|
|
; GFX6-GISEL-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX7-SDAG-LABEL: freeze_i8:
|
|
; GFX7-SDAG: ; %bb.0:
|
|
; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX7-SDAG-NEXT: s_mov_b32 s6, 0
|
|
; GFX7-SDAG-NEXT: s_mov_b32 s7, 0xf000
|
|
; GFX7-SDAG-NEXT: s_mov_b32 s4, s6
|
|
; GFX7-SDAG-NEXT: s_mov_b32 s5, s6
|
|
; GFX7-SDAG-NEXT: buffer_load_ubyte v0, v[0:1], s[4:7], 0 addr64
|
|
; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX7-SDAG-NEXT: buffer_store_byte v0, v[2:3], s[4:7], 0 addr64
|
|
; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX7-SDAG-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX7-GISEL-LABEL: freeze_i8:
|
|
; GFX7-GISEL: ; %bb.0:
|
|
; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX7-GISEL-NEXT: s_mov_b32 s6, 0
|
|
; GFX7-GISEL-NEXT: s_mov_b32 s7, 0xf000
|
|
; GFX7-GISEL-NEXT: s_mov_b64 s[4:5], 0
|
|
; GFX7-GISEL-NEXT: buffer_load_ubyte v0, v[0:1], s[4:7], 0 addr64
|
|
; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX7-GISEL-NEXT: buffer_store_byte v0, v[2:3], s[4:7], 0 addr64
|
|
; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX7-GISEL-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX8-LABEL: freeze_i8:
|
|
; GFX8: ; %bb.0:
|
|
; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX8-NEXT: flat_load_ubyte v0, v[0:1]
|
|
; GFX8-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX8-NEXT: flat_store_byte v[2:3], v0
|
|
; GFX8-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX8-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX9-LABEL: freeze_i8:
|
|
; GFX9: ; %bb.0:
|
|
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX9-NEXT: global_load_ubyte v0, v[0:1], off
|
|
; GFX9-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX9-NEXT: global_store_byte v[2:3], v0, off
|
|
; GFX9-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX9-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX10-LABEL: freeze_i8:
|
|
; GFX10: ; %bb.0:
|
|
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX10-NEXT: global_load_ubyte v0, v[0:1], off
|
|
; GFX10-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX10-NEXT: global_store_byte v[2:3], v0, off
|
|
; GFX10-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX11-SDAG-TRUE16-LABEL: freeze_i8:
|
|
; GFX11-SDAG-TRUE16: ; %bb.0:
|
|
; GFX11-SDAG-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX11-SDAG-TRUE16-NEXT: global_load_d16_u8 v0, v[0:1], off
|
|
; GFX11-SDAG-TRUE16-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX11-SDAG-TRUE16-NEXT: global_store_b8 v[2:3], v0, off
|
|
; GFX11-SDAG-TRUE16-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX11-SDAG-FAKE16-LABEL: freeze_i8:
|
|
; GFX11-SDAG-FAKE16: ; %bb.0:
|
|
; GFX11-SDAG-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX11-SDAG-FAKE16-NEXT: global_load_u8 v0, v[0:1], off
|
|
; GFX11-SDAG-FAKE16-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX11-SDAG-FAKE16-NEXT: global_store_b8 v[2:3], v0, off
|
|
; GFX11-SDAG-FAKE16-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX11-GISEL-LABEL: freeze_i8:
|
|
; GFX11-GISEL: ; %bb.0:
|
|
; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX11-GISEL-NEXT: global_load_u8 v0, v[0:1], off
|
|
; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX11-GISEL-NEXT: global_store_b8 v[2:3], v0, off
|
|
; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31]
|
|
%a = load i8, ptr addrspace(1) %ptra
|
|
%freeze = freeze i8 %a
|
|
store i8 %freeze, ptr addrspace(1) %ptrb
|
|
ret void
|
|
}
|
|
|
|
define void @freeze_v2i8(ptr addrspace(1) %ptra, ptr addrspace(1) %ptrb) {
|
|
; GFX6-SDAG-LABEL: freeze_v2i8:
|
|
; GFX6-SDAG: ; %bb.0:
|
|
; GFX6-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX6-SDAG-NEXT: s_mov_b32 s6, 0
|
|
; GFX6-SDAG-NEXT: s_mov_b32 s7, 0xf000
|
|
; GFX6-SDAG-NEXT: s_mov_b32 s4, s6
|
|
; GFX6-SDAG-NEXT: s_mov_b32 s5, s6
|
|
; GFX6-SDAG-NEXT: buffer_load_ushort v0, v[0:1], s[4:7], 0 addr64
|
|
; GFX6-SDAG-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX6-SDAG-NEXT: buffer_store_short v0, v[2:3], s[4:7], 0 addr64
|
|
; GFX6-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0)
|
|
; GFX6-SDAG-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX6-GISEL-LABEL: freeze_v2i8:
|
|
; GFX6-GISEL: ; %bb.0:
|
|
; GFX6-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX6-GISEL-NEXT: s_mov_b32 s6, 0
|
|
; GFX6-GISEL-NEXT: s_mov_b32 s7, 0xf000
|
|
; GFX6-GISEL-NEXT: s_mov_b64 s[4:5], 0
|
|
; GFX6-GISEL-NEXT: buffer_load_ushort v0, v[0:1], s[4:7], 0 addr64
|
|
; GFX6-GISEL-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX6-GISEL-NEXT: v_lshrrev_b32_e32 v1, 8, v0
|
|
; GFX6-GISEL-NEXT: v_and_b32_e32 v1, 0xff, v1
|
|
; GFX6-GISEL-NEXT: v_and_b32_e32 v0, 0xff, v0
|
|
; GFX6-GISEL-NEXT: v_lshlrev_b32_e32 v1, 8, v1
|
|
; GFX6-GISEL-NEXT: v_or_b32_e32 v0, v0, v1
|
|
; GFX6-GISEL-NEXT: buffer_store_short v0, v[2:3], s[4:7], 0 addr64
|
|
; GFX6-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0)
|
|
; GFX6-GISEL-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX7-SDAG-LABEL: freeze_v2i8:
|
|
; GFX7-SDAG: ; %bb.0:
|
|
; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX7-SDAG-NEXT: s_mov_b32 s6, 0
|
|
; GFX7-SDAG-NEXT: s_mov_b32 s7, 0xf000
|
|
; GFX7-SDAG-NEXT: s_mov_b32 s4, s6
|
|
; GFX7-SDAG-NEXT: s_mov_b32 s5, s6
|
|
; GFX7-SDAG-NEXT: buffer_load_ushort v0, v[0:1], s[4:7], 0 addr64
|
|
; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX7-SDAG-NEXT: buffer_store_short v0, v[2:3], s[4:7], 0 addr64
|
|
; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX7-SDAG-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX7-GISEL-LABEL: freeze_v2i8:
|
|
; GFX7-GISEL: ; %bb.0:
|
|
; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX7-GISEL-NEXT: s_mov_b32 s6, 0
|
|
; GFX7-GISEL-NEXT: s_mov_b32 s7, 0xf000
|
|
; GFX7-GISEL-NEXT: s_mov_b64 s[4:5], 0
|
|
; GFX7-GISEL-NEXT: buffer_load_ushort v0, v[0:1], s[4:7], 0 addr64
|
|
; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX7-GISEL-NEXT: v_lshrrev_b32_e32 v1, 8, v0
|
|
; GFX7-GISEL-NEXT: v_and_b32_e32 v1, 0xff, v1
|
|
; GFX7-GISEL-NEXT: v_and_b32_e32 v0, 0xff, v0
|
|
; GFX7-GISEL-NEXT: v_lshlrev_b32_e32 v1, 8, v1
|
|
; GFX7-GISEL-NEXT: v_or_b32_e32 v0, v0, v1
|
|
; GFX7-GISEL-NEXT: buffer_store_short v0, v[2:3], s[4:7], 0 addr64
|
|
; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX7-GISEL-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX8-GISEL-LABEL: freeze_v2i8:
|
|
; GFX8-GISEL: ; %bb.0:
|
|
; GFX8-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX8-GISEL-NEXT: flat_load_ushort v0, v[0:1]
|
|
; GFX8-GISEL-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX8-GISEL-NEXT: v_lshrrev_b32_e32 v1, 8, v0
|
|
; GFX8-GISEL-NEXT: v_and_b32_e32 v1, 0xff, v1
|
|
; GFX8-GISEL-NEXT: v_lshlrev_b16_e32 v1, 8, v1
|
|
; GFX8-GISEL-NEXT: v_or_b32_sdwa v0, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
|
|
; GFX8-GISEL-NEXT: flat_store_short v[2:3], v0
|
|
; GFX8-GISEL-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX8-GISEL-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX9-GISEL-LABEL: freeze_v2i8:
|
|
; GFX9-GISEL: ; %bb.0:
|
|
; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX9-GISEL-NEXT: global_load_ushort v0, v[0:1], off
|
|
; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX9-GISEL-NEXT: v_lshrrev_b32_e32 v1, 8, v0
|
|
; GFX9-GISEL-NEXT: v_and_b32_e32 v1, 0xff, v1
|
|
; GFX9-GISEL-NEXT: v_lshlrev_b16_e32 v1, 8, v1
|
|
; GFX9-GISEL-NEXT: v_or_b32_sdwa v0, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
|
|
; GFX9-GISEL-NEXT: global_store_short v[2:3], v0, off
|
|
; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX10-SDAG-LABEL: freeze_v2i8:
|
|
; GFX10-SDAG: ; %bb.0:
|
|
; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX10-SDAG-NEXT: global_load_ushort v0, v[0:1], off
|
|
; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX10-SDAG-NEXT: global_store_short v[2:3], v0, off
|
|
; GFX10-SDAG-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX10-GISEL-LABEL: freeze_v2i8:
|
|
; GFX10-GISEL: ; %bb.0:
|
|
; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX10-GISEL-NEXT: global_load_ushort v0, v[0:1], off
|
|
; GFX10-GISEL-NEXT: v_mov_b32_e32 v4, 0xff
|
|
; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX10-GISEL-NEXT: v_lshrrev_b32_e32 v1, 8, v0
|
|
; GFX10-GISEL-NEXT: v_and_b32_sdwa v1, v1, v4 dst_sel:BYTE_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
|
|
; GFX10-GISEL-NEXT: v_or_b32_sdwa v0, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
|
|
; GFX10-GISEL-NEXT: global_store_short v[2:3], v0, off
|
|
; GFX10-GISEL-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX11-SDAG-TRUE16-LABEL: freeze_v2i8:
|
|
; GFX11-SDAG-TRUE16: ; %bb.0:
|
|
; GFX11-SDAG-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX11-SDAG-TRUE16-NEXT: global_load_d16_b16 v0, v[0:1], off
|
|
; GFX11-SDAG-TRUE16-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX11-SDAG-TRUE16-NEXT: global_store_b16 v[2:3], v0, off
|
|
; GFX11-SDAG-TRUE16-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX11-SDAG-FAKE16-LABEL: freeze_v2i8:
|
|
; GFX11-SDAG-FAKE16: ; %bb.0:
|
|
; GFX11-SDAG-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX11-SDAG-FAKE16-NEXT: global_load_u16 v0, v[0:1], off
|
|
; GFX11-SDAG-FAKE16-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX11-SDAG-FAKE16-NEXT: global_store_b16 v[2:3], v0, off
|
|
; GFX11-SDAG-FAKE16-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX11-GISEL-LABEL: freeze_v2i8:
|
|
; GFX11-GISEL: ; %bb.0:
|
|
; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX11-GISEL-NEXT: global_load_u16 v0, v[0:1], off
|
|
; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX11-GISEL-NEXT: v_lshrrev_b32_e32 v1, 8, v0
|
|
; GFX11-GISEL-NEXT: v_and_b32_e32 v0, 0xff, v0
|
|
; GFX11-GISEL-NEXT: v_and_b32_e32 v1, 0xff, v1
|
|
; GFX11-GISEL-NEXT: v_lshlrev_b16 v1, 8, v1
|
|
; GFX11-GISEL-NEXT: v_or_b32_e32 v0, v0, v1
|
|
; GFX11-GISEL-NEXT: global_store_b16 v[2:3], v0, off
|
|
; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31]
|
|
%a = load <2 x i8>, ptr addrspace(1) %ptra
|
|
%freeze = freeze <2 x i8> %a
|
|
store <2 x i8> %freeze, ptr addrspace(1) %ptrb
|
|
ret void
|
|
}
|
|
|
|
define void @freeze_v3i8(ptr addrspace(1) %ptra, ptr addrspace(1) %ptrb) {
|
|
; GFX6-SDAG-LABEL: freeze_v3i8:
|
|
; GFX6-SDAG: ; %bb.0:
|
|
; GFX6-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX6-SDAG-NEXT: s_mov_b32 s6, 0
|
|
; GFX6-SDAG-NEXT: s_mov_b32 s7, 0xf000
|
|
; GFX6-SDAG-NEXT: s_mov_b32 s4, s6
|
|
; GFX6-SDAG-NEXT: s_mov_b32 s5, s6
|
|
; GFX6-SDAG-NEXT: buffer_load_dword v0, v[0:1], s[4:7], 0 addr64
|
|
; GFX6-SDAG-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX6-SDAG-NEXT: v_lshrrev_b32_e32 v1, 16, v0
|
|
; GFX6-SDAG-NEXT: buffer_store_short v0, v[2:3], s[4:7], 0 addr64
|
|
; GFX6-SDAG-NEXT: buffer_store_byte v1, v[2:3], s[4:7], 0 addr64 offset:2
|
|
; GFX6-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0)
|
|
; GFX6-SDAG-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX6-GISEL-LABEL: freeze_v3i8:
|
|
; GFX6-GISEL: ; %bb.0:
|
|
; GFX6-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX6-GISEL-NEXT: s_mov_b32 s6, 0
|
|
; GFX6-GISEL-NEXT: s_mov_b32 s7, 0xf000
|
|
; GFX6-GISEL-NEXT: s_mov_b64 s[4:5], 0
|
|
; GFX6-GISEL-NEXT: buffer_load_dword v0, v[0:1], s[4:7], 0 addr64
|
|
; GFX6-GISEL-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX6-GISEL-NEXT: v_lshrrev_b32_e32 v1, 8, v0
|
|
; GFX6-GISEL-NEXT: v_lshrrev_b32_e32 v4, 16, v0
|
|
; GFX6-GISEL-NEXT: v_and_b32_e32 v1, 0xff, v1
|
|
; GFX6-GISEL-NEXT: v_and_b32_e32 v0, 0xff, v0
|
|
; GFX6-GISEL-NEXT: v_and_b32_e32 v4, 0xff, v4
|
|
; GFX6-GISEL-NEXT: v_lshlrev_b32_e32 v1, 8, v1
|
|
; GFX6-GISEL-NEXT: v_and_b32_e32 v4, 0xffff, v4
|
|
; GFX6-GISEL-NEXT: v_or_b32_e32 v0, v0, v1
|
|
; GFX6-GISEL-NEXT: v_lshlrev_b32_e32 v1, 16, v4
|
|
; GFX6-GISEL-NEXT: v_and_b32_e32 v0, 0xffff, v0
|
|
; GFX6-GISEL-NEXT: v_or_b32_e32 v0, v0, v1
|
|
; GFX6-GISEL-NEXT: v_lshrrev_b32_e32 v1, 16, v0
|
|
; GFX6-GISEL-NEXT: buffer_store_short v0, v[2:3], s[4:7], 0 addr64
|
|
; GFX6-GISEL-NEXT: buffer_store_byte v1, v[2:3], s[4:7], 0 addr64 offset:2
|
|
; GFX6-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0)
|
|
; GFX6-GISEL-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX7-SDAG-LABEL: freeze_v3i8:
|
|
; GFX7-SDAG: ; %bb.0:
|
|
; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX7-SDAG-NEXT: s_mov_b32 s6, 0
|
|
; GFX7-SDAG-NEXT: s_mov_b32 s7, 0xf000
|
|
; GFX7-SDAG-NEXT: s_mov_b32 s4, s6
|
|
; GFX7-SDAG-NEXT: s_mov_b32 s5, s6
|
|
; GFX7-SDAG-NEXT: buffer_load_dword v0, v[0:1], s[4:7], 0 addr64
|
|
; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX7-SDAG-NEXT: v_lshrrev_b32_e32 v1, 16, v0
|
|
; GFX7-SDAG-NEXT: buffer_store_short v0, v[2:3], s[4:7], 0 addr64
|
|
; GFX7-SDAG-NEXT: buffer_store_byte v1, v[2:3], s[4:7], 0 addr64 offset:2
|
|
; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX7-SDAG-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX7-GISEL-LABEL: freeze_v3i8:
|
|
; GFX7-GISEL: ; %bb.0:
|
|
; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX7-GISEL-NEXT: s_mov_b32 s6, 0
|
|
; GFX7-GISEL-NEXT: s_mov_b32 s7, 0xf000
|
|
; GFX7-GISEL-NEXT: s_mov_b64 s[4:5], 0
|
|
; GFX7-GISEL-NEXT: buffer_load_dword v0, v[0:1], s[4:7], 0 addr64
|
|
; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX7-GISEL-NEXT: v_lshrrev_b32_e32 v1, 8, v0
|
|
; GFX7-GISEL-NEXT: v_lshrrev_b32_e32 v4, 16, v0
|
|
; GFX7-GISEL-NEXT: v_and_b32_e32 v1, 0xff, v1
|
|
; GFX7-GISEL-NEXT: v_and_b32_e32 v0, 0xff, v0
|
|
; GFX7-GISEL-NEXT: v_and_b32_e32 v4, 0xff, v4
|
|
; GFX7-GISEL-NEXT: v_lshlrev_b32_e32 v1, 8, v1
|
|
; GFX7-GISEL-NEXT: v_and_b32_e32 v4, 0xffff, v4
|
|
; GFX7-GISEL-NEXT: v_or_b32_e32 v0, v0, v1
|
|
; GFX7-GISEL-NEXT: v_lshlrev_b32_e32 v1, 16, v4
|
|
; GFX7-GISEL-NEXT: v_and_b32_e32 v0, 0xffff, v0
|
|
; GFX7-GISEL-NEXT: v_or_b32_e32 v0, v0, v1
|
|
; GFX7-GISEL-NEXT: v_lshrrev_b32_e32 v1, 16, v0
|
|
; GFX7-GISEL-NEXT: buffer_store_short v0, v[2:3], s[4:7], 0 addr64
|
|
; GFX7-GISEL-NEXT: buffer_store_byte v1, v[2:3], s[4:7], 0 addr64 offset:2
|
|
; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX7-GISEL-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX8-GISEL-LABEL: freeze_v3i8:
|
|
; GFX8-GISEL: ; %bb.0:
|
|
; GFX8-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX8-GISEL-NEXT: flat_load_dword v4, v[0:1]
|
|
; GFX8-GISEL-NEXT: v_mov_b32_e32 v5, 0xff
|
|
; GFX8-GISEL-NEXT: v_add_u32_e32 v0, vcc, 2, v2
|
|
; GFX8-GISEL-NEXT: v_addc_u32_e32 v1, vcc, 0, v3, vcc
|
|
; GFX8-GISEL-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX8-GISEL-NEXT: v_lshrrev_b32_e32 v6, 8, v4
|
|
; GFX8-GISEL-NEXT: v_and_b32_sdwa v5, v4, v5 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
|
|
; GFX8-GISEL-NEXT: v_and_b32_e32 v6, 0xff, v6
|
|
; GFX8-GISEL-NEXT: v_and_b32_e32 v5, 0xffff, v5
|
|
; GFX8-GISEL-NEXT: v_lshlrev_b16_e32 v6, 8, v6
|
|
; GFX8-GISEL-NEXT: v_lshlrev_b32_e32 v5, 16, v5
|
|
; GFX8-GISEL-NEXT: v_or_b32_sdwa v4, v4, v6 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
|
|
; GFX8-GISEL-NEXT: v_or_b32_sdwa v4, v4, v5 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
|
|
; GFX8-GISEL-NEXT: v_lshrrev_b32_e32 v5, 16, v4
|
|
; GFX8-GISEL-NEXT: flat_store_short v[2:3], v4
|
|
; GFX8-GISEL-NEXT: flat_store_byte v[0:1], v5
|
|
; GFX8-GISEL-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX8-GISEL-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX9-GISEL-LABEL: freeze_v3i8:
|
|
; GFX9-GISEL: ; %bb.0:
|
|
; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX9-GISEL-NEXT: global_load_dword v0, v[0:1], off
|
|
; GFX9-GISEL-NEXT: v_mov_b32_e32 v1, 0xff
|
|
; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX9-GISEL-NEXT: v_lshrrev_b32_e32 v4, 8, v0
|
|
; GFX9-GISEL-NEXT: v_and_b32_e32 v4, 0xff, v4
|
|
; GFX9-GISEL-NEXT: v_lshlrev_b16_e32 v4, 8, v4
|
|
; GFX9-GISEL-NEXT: v_and_b32_sdwa v1, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
|
|
; GFX9-GISEL-NEXT: v_or_b32_sdwa v0, v0, v4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
|
|
; GFX9-GISEL-NEXT: v_and_b32_e32 v1, 0xffff, v1
|
|
; GFX9-GISEL-NEXT: v_and_b32_e32 v0, 0xffff, v0
|
|
; GFX9-GISEL-NEXT: v_lshl_or_b32 v0, v1, 16, v0
|
|
; GFX9-GISEL-NEXT: global_store_short v[2:3], v0, off
|
|
; GFX9-GISEL-NEXT: global_store_byte_d16_hi v[2:3], v0, off offset:2
|
|
; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX10-SDAG-LABEL: freeze_v3i8:
|
|
; GFX10-SDAG: ; %bb.0:
|
|
; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX10-SDAG-NEXT: global_load_dword v0, v[0:1], off
|
|
; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX10-SDAG-NEXT: global_store_byte_d16_hi v[2:3], v0, off offset:2
|
|
; GFX10-SDAG-NEXT: global_store_short v[2:3], v0, off
|
|
; GFX10-SDAG-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX10-GISEL-LABEL: freeze_v3i8:
|
|
; GFX10-GISEL: ; %bb.0:
|
|
; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX10-GISEL-NEXT: global_load_dword v0, v[0:1], off
|
|
; GFX10-GISEL-NEXT: v_mov_b32_e32 v1, 0xff
|
|
; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX10-GISEL-NEXT: v_lshrrev_b32_e32 v4, 8, v0
|
|
; GFX10-GISEL-NEXT: v_and_b32_sdwa v4, v4, v1 dst_sel:BYTE_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
|
|
; GFX10-GISEL-NEXT: v_and_b32_sdwa v1, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
|
|
; GFX10-GISEL-NEXT: v_or_b32_sdwa v0, v0, v4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
|
|
; GFX10-GISEL-NEXT: v_and_b32_e32 v1, 0xffff, v1
|
|
; GFX10-GISEL-NEXT: v_and_b32_e32 v0, 0xffff, v0
|
|
; GFX10-GISEL-NEXT: v_lshl_or_b32 v0, v1, 16, v0
|
|
; GFX10-GISEL-NEXT: global_store_short v[2:3], v0, off
|
|
; GFX10-GISEL-NEXT: global_store_byte_d16_hi v[2:3], v0, off offset:2
|
|
; GFX10-GISEL-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX11-SDAG-LABEL: freeze_v3i8:
|
|
; GFX11-SDAG: ; %bb.0:
|
|
; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX11-SDAG-NEXT: global_load_b32 v0, v[0:1], off
|
|
; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX11-SDAG-NEXT: s_clause 0x1
|
|
; GFX11-SDAG-NEXT: global_store_d16_hi_b8 v[2:3], v0, off offset:2
|
|
; GFX11-SDAG-NEXT: global_store_b16 v[2:3], v0, off
|
|
; GFX11-SDAG-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX11-GISEL-LABEL: freeze_v3i8:
|
|
; GFX11-GISEL: ; %bb.0:
|
|
; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX11-GISEL-NEXT: global_load_b32 v0, v[0:1], off
|
|
; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX11-GISEL-NEXT: v_lshrrev_b32_e32 v1, 8, v0
|
|
; GFX11-GISEL-NEXT: v_lshrrev_b32_e32 v4, 16, v0
|
|
; GFX11-GISEL-NEXT: v_and_b32_e32 v0, 0xff, v0
|
|
; GFX11-GISEL-NEXT: v_and_b32_e32 v1, 0xff, v1
|
|
; GFX11-GISEL-NEXT: v_and_b32_e32 v4, 0xff, v4
|
|
; GFX11-GISEL-NEXT: v_lshlrev_b16 v1, 8, v1
|
|
; GFX11-GISEL-NEXT: v_or_b32_e32 v0, v0, v1
|
|
; GFX11-GISEL-NEXT: v_and_b32_e32 v1, 0xffff, v4
|
|
; GFX11-GISEL-NEXT: v_and_b32_e32 v0, 0xffff, v0
|
|
; GFX11-GISEL-NEXT: v_lshl_or_b32 v0, v1, 16, v0
|
|
; GFX11-GISEL-NEXT: s_clause 0x1
|
|
; GFX11-GISEL-NEXT: global_store_b16 v[2:3], v0, off
|
|
; GFX11-GISEL-NEXT: global_store_d16_hi_b8 v[2:3], v0, off offset:2
|
|
; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31]
|
|
%a = load <3 x i8>, ptr addrspace(1) %ptra
|
|
%freeze = freeze <3 x i8> %a
|
|
store <3 x i8> %freeze, ptr addrspace(1) %ptrb
|
|
ret void
|
|
}
|
|
|
|
define void @freeze_v4i8(ptr addrspace(1) %ptra, ptr addrspace(1) %ptrb) {
|
|
; GFX6-SDAG-LABEL: freeze_v4i8:
|
|
; GFX6-SDAG: ; %bb.0:
|
|
; GFX6-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX6-SDAG-NEXT: s_mov_b32 s6, 0
|
|
; GFX6-SDAG-NEXT: s_mov_b32 s7, 0xf000
|
|
; GFX6-SDAG-NEXT: s_mov_b32 s4, s6
|
|
; GFX6-SDAG-NEXT: s_mov_b32 s5, s6
|
|
; GFX6-SDAG-NEXT: buffer_load_dword v0, v[0:1], s[4:7], 0 addr64
|
|
; GFX6-SDAG-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX6-SDAG-NEXT: buffer_store_dword v0, v[2:3], s[4:7], 0 addr64
|
|
; GFX6-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0)
|
|
; GFX6-SDAG-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX6-GISEL-LABEL: freeze_v4i8:
|
|
; GFX6-GISEL: ; %bb.0:
|
|
; GFX6-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX6-GISEL-NEXT: s_mov_b32 s6, 0
|
|
; GFX6-GISEL-NEXT: s_mov_b32 s7, 0xf000
|
|
; GFX6-GISEL-NEXT: s_mov_b64 s[4:5], 0
|
|
; GFX6-GISEL-NEXT: buffer_load_dword v0, v[0:1], s[4:7], 0 addr64
|
|
; GFX6-GISEL-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX6-GISEL-NEXT: v_lshrrev_b32_e32 v1, 8, v0
|
|
; GFX6-GISEL-NEXT: v_lshrrev_b32_e32 v4, 16, v0
|
|
; GFX6-GISEL-NEXT: v_and_b32_e32 v1, 0xff, v1
|
|
; GFX6-GISEL-NEXT: v_lshrrev_b32_e32 v5, 24, v0
|
|
; GFX6-GISEL-NEXT: v_and_b32_e32 v0, 0xff, v0
|
|
; GFX6-GISEL-NEXT: v_and_b32_e32 v4, 0xff, v4
|
|
; GFX6-GISEL-NEXT: v_lshlrev_b32_e32 v1, 8, v1
|
|
; GFX6-GISEL-NEXT: v_and_b32_e32 v5, 0xff, v5
|
|
; GFX6-GISEL-NEXT: v_lshlrev_b32_e32 v4, 16, v4
|
|
; GFX6-GISEL-NEXT: v_or_b32_e32 v0, v0, v1
|
|
; GFX6-GISEL-NEXT: v_or_b32_e32 v0, v0, v4
|
|
; GFX6-GISEL-NEXT: v_lshlrev_b32_e32 v1, 24, v5
|
|
; GFX6-GISEL-NEXT: v_or_b32_e32 v0, v0, v1
|
|
; GFX6-GISEL-NEXT: buffer_store_dword v0, v[2:3], s[4:7], 0 addr64
|
|
; GFX6-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0)
|
|
; GFX6-GISEL-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX7-SDAG-LABEL: freeze_v4i8:
|
|
; GFX7-SDAG: ; %bb.0:
|
|
; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX7-SDAG-NEXT: s_mov_b32 s6, 0
|
|
; GFX7-SDAG-NEXT: s_mov_b32 s7, 0xf000
|
|
; GFX7-SDAG-NEXT: s_mov_b32 s4, s6
|
|
; GFX7-SDAG-NEXT: s_mov_b32 s5, s6
|
|
; GFX7-SDAG-NEXT: buffer_load_dword v0, v[0:1], s[4:7], 0 addr64
|
|
; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX7-SDAG-NEXT: buffer_store_dword v0, v[2:3], s[4:7], 0 addr64
|
|
; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX7-SDAG-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX7-GISEL-LABEL: freeze_v4i8:
|
|
; GFX7-GISEL: ; %bb.0:
|
|
; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX7-GISEL-NEXT: s_mov_b32 s6, 0
|
|
; GFX7-GISEL-NEXT: s_mov_b32 s7, 0xf000
|
|
; GFX7-GISEL-NEXT: s_mov_b64 s[4:5], 0
|
|
; GFX7-GISEL-NEXT: buffer_load_dword v0, v[0:1], s[4:7], 0 addr64
|
|
; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX7-GISEL-NEXT: v_lshrrev_b32_e32 v1, 8, v0
|
|
; GFX7-GISEL-NEXT: v_lshrrev_b32_e32 v4, 16, v0
|
|
; GFX7-GISEL-NEXT: v_and_b32_e32 v1, 0xff, v1
|
|
; GFX7-GISEL-NEXT: v_lshrrev_b32_e32 v5, 24, v0
|
|
; GFX7-GISEL-NEXT: v_and_b32_e32 v0, 0xff, v0
|
|
; GFX7-GISEL-NEXT: v_and_b32_e32 v4, 0xff, v4
|
|
; GFX7-GISEL-NEXT: v_lshlrev_b32_e32 v1, 8, v1
|
|
; GFX7-GISEL-NEXT: v_and_b32_e32 v5, 0xff, v5
|
|
; GFX7-GISEL-NEXT: v_lshlrev_b32_e32 v4, 16, v4
|
|
; GFX7-GISEL-NEXT: v_or_b32_e32 v0, v0, v1
|
|
; GFX7-GISEL-NEXT: v_or_b32_e32 v0, v0, v4
|
|
; GFX7-GISEL-NEXT: v_lshlrev_b32_e32 v1, 24, v5
|
|
; GFX7-GISEL-NEXT: v_or_b32_e32 v0, v0, v1
|
|
; GFX7-GISEL-NEXT: buffer_store_dword v0, v[2:3], s[4:7], 0 addr64
|
|
; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX7-GISEL-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX8-GISEL-LABEL: freeze_v4i8:
|
|
; GFX8-GISEL: ; %bb.0:
|
|
; GFX8-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX8-GISEL-NEXT: flat_load_dword v0, v[0:1]
|
|
; GFX8-GISEL-NEXT: v_mov_b32_e32 v4, 8
|
|
; GFX8-GISEL-NEXT: v_mov_b32_e32 v1, 0xff
|
|
; GFX8-GISEL-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX8-GISEL-NEXT: v_lshrrev_b32_e32 v5, 8, v0
|
|
; GFX8-GISEL-NEXT: v_lshlrev_b32_sdwa v4, v4, v5 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0
|
|
; GFX8-GISEL-NEXT: v_and_b32_sdwa v6, v0, v1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
|
|
; GFX8-GISEL-NEXT: v_or_b32_sdwa v4, v0, v4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
|
|
; GFX8-GISEL-NEXT: v_or_b32_e32 v4, v4, v6
|
|
; GFX8-GISEL-NEXT: v_and_b32_sdwa v0, v0, v1 dst_sel:BYTE_3 dst_unused:UNUSED_PAD src0_sel:BYTE_3 src1_sel:DWORD
|
|
; GFX8-GISEL-NEXT: v_or_b32_e32 v0, v4, v0
|
|
; GFX8-GISEL-NEXT: flat_store_dword v[2:3], v0
|
|
; GFX8-GISEL-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX8-GISEL-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX9-GISEL-LABEL: freeze_v4i8:
|
|
; GFX9-GISEL: ; %bb.0:
|
|
; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX9-GISEL-NEXT: global_load_dword v0, v[0:1], off
|
|
; GFX9-GISEL-NEXT: v_mov_b32_e32 v4, 8
|
|
; GFX9-GISEL-NEXT: v_mov_b32_e32 v1, 0xff
|
|
; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX9-GISEL-NEXT: v_lshrrev_b32_e32 v5, 8, v0
|
|
; GFX9-GISEL-NEXT: v_lshlrev_b32_sdwa v4, v4, v5 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0
|
|
; GFX9-GISEL-NEXT: v_and_b32_sdwa v6, v0, v1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
|
|
; GFX9-GISEL-NEXT: v_and_or_b32 v4, v0, v1, v4
|
|
; GFX9-GISEL-NEXT: v_and_b32_sdwa v0, v0, v1 dst_sel:BYTE_3 dst_unused:UNUSED_PAD src0_sel:BYTE_3 src1_sel:DWORD
|
|
; GFX9-GISEL-NEXT: v_or3_b32 v0, v4, v6, v0
|
|
; GFX9-GISEL-NEXT: global_store_dword v[2:3], v0, off
|
|
; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX10-SDAG-LABEL: freeze_v4i8:
|
|
; GFX10-SDAG: ; %bb.0:
|
|
; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX10-SDAG-NEXT: global_load_dword v0, v[0:1], off
|
|
; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX10-SDAG-NEXT: global_store_dword v[2:3], v0, off
|
|
; GFX10-SDAG-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX10-GISEL-LABEL: freeze_v4i8:
|
|
; GFX10-GISEL: ; %bb.0:
|
|
; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX10-GISEL-NEXT: global_load_dword v0, v[0:1], off
|
|
; GFX10-GISEL-NEXT: v_mov_b32_e32 v1, 8
|
|
; GFX10-GISEL-NEXT: v_mov_b32_e32 v5, 0xff
|
|
; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX10-GISEL-NEXT: v_lshrrev_b32_e32 v4, 8, v0
|
|
; GFX10-GISEL-NEXT: v_lshlrev_b32_sdwa v1, v1, v4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0
|
|
; GFX10-GISEL-NEXT: v_and_b32_sdwa v4, v0, v5 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
|
|
; GFX10-GISEL-NEXT: v_and_or_b32 v1, 0xff, v0, v1
|
|
; GFX10-GISEL-NEXT: v_and_b32_sdwa v0, v0, v5 dst_sel:BYTE_3 dst_unused:UNUSED_PAD src0_sel:BYTE_3 src1_sel:DWORD
|
|
; GFX10-GISEL-NEXT: v_or3_b32 v0, v1, v4, v0
|
|
; GFX10-GISEL-NEXT: global_store_dword v[2:3], v0, off
|
|
; GFX10-GISEL-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX11-SDAG-LABEL: freeze_v4i8:
|
|
; GFX11-SDAG: ; %bb.0:
|
|
; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX11-SDAG-NEXT: global_load_b32 v0, v[0:1], off
|
|
; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX11-SDAG-NEXT: global_store_b32 v[2:3], v0, off
|
|
; GFX11-SDAG-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX11-GISEL-LABEL: freeze_v4i8:
|
|
; GFX11-GISEL: ; %bb.0:
|
|
; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX11-GISEL-NEXT: global_load_b32 v0, v[0:1], off
|
|
; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX11-GISEL-NEXT: v_lshrrev_b32_e32 v1, 8, v0
|
|
; GFX11-GISEL-NEXT: v_lshrrev_b32_e32 v4, 16, v0
|
|
; GFX11-GISEL-NEXT: v_lshrrev_b32_e32 v5, 24, v0
|
|
; GFX11-GISEL-NEXT: v_and_b32_e32 v1, 0xff, v1
|
|
; GFX11-GISEL-NEXT: v_and_b32_e32 v4, 0xff, v4
|
|
; GFX11-GISEL-NEXT: v_and_b32_e32 v5, 0xff, v5
|
|
; GFX11-GISEL-NEXT: v_lshlrev_b32_e32 v1, 8, v1
|
|
; GFX11-GISEL-NEXT: v_lshlrev_b32_e32 v4, 16, v4
|
|
; GFX11-GISEL-NEXT: v_and_or_b32 v0, 0xff, v0, v1
|
|
; GFX11-GISEL-NEXT: v_lshlrev_b32_e32 v1, 24, v5
|
|
; GFX11-GISEL-NEXT: v_or3_b32 v0, v0, v4, v1
|
|
; GFX11-GISEL-NEXT: global_store_b32 v[2:3], v0, off
|
|
; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31]
|
|
%a = load <4 x i8>, ptr addrspace(1) %ptra
|
|
%freeze = freeze <4 x i8> %a
|
|
store <4 x i8> %freeze, ptr addrspace(1) %ptrb
|
|
ret void
|
|
}
|
|
|
|
define void @freeze_v8i8(ptr addrspace(1) %ptra, ptr addrspace(1) %ptrb) {
|
|
; GFX6-SDAG-LABEL: freeze_v8i8:
|
|
; GFX6-SDAG: ; %bb.0:
|
|
; GFX6-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX6-SDAG-NEXT: s_mov_b32 s6, 0
|
|
; GFX6-SDAG-NEXT: s_mov_b32 s7, 0xf000
|
|
; GFX6-SDAG-NEXT: s_mov_b32 s4, s6
|
|
; GFX6-SDAG-NEXT: s_mov_b32 s5, s6
|
|
; GFX6-SDAG-NEXT: buffer_load_dwordx2 v[0:1], v[0:1], s[4:7], 0 addr64
|
|
; GFX6-SDAG-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX6-SDAG-NEXT: buffer_store_dwordx2 v[0:1], v[2:3], s[4:7], 0 addr64
|
|
; GFX6-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0)
|
|
; GFX6-SDAG-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX6-GISEL-LABEL: freeze_v8i8:
|
|
; GFX6-GISEL: ; %bb.0:
|
|
; GFX6-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX6-GISEL-NEXT: s_mov_b32 s6, 0
|
|
; GFX6-GISEL-NEXT: s_mov_b32 s7, 0xf000
|
|
; GFX6-GISEL-NEXT: s_mov_b64 s[4:5], 0
|
|
; GFX6-GISEL-NEXT: buffer_load_dwordx2 v[0:1], v[0:1], s[4:7], 0 addr64
|
|
; GFX6-GISEL-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX6-GISEL-NEXT: v_lshrrev_b32_e32 v4, 8, v0
|
|
; GFX6-GISEL-NEXT: v_lshrrev_b32_e32 v7, 8, v1
|
|
; GFX6-GISEL-NEXT: v_lshrrev_b32_e32 v5, 16, v0
|
|
; GFX6-GISEL-NEXT: v_lshrrev_b32_e32 v8, 16, v1
|
|
; GFX6-GISEL-NEXT: v_and_b32_e32 v4, 0xff, v4
|
|
; GFX6-GISEL-NEXT: v_and_b32_e32 v7, 0xff, v7
|
|
; GFX6-GISEL-NEXT: v_lshrrev_b32_e32 v6, 24, v0
|
|
; GFX6-GISEL-NEXT: v_lshrrev_b32_e32 v9, 24, v1
|
|
; GFX6-GISEL-NEXT: v_and_b32_e32 v0, 0xff, v0
|
|
; GFX6-GISEL-NEXT: v_and_b32_e32 v1, 0xff, v1
|
|
; GFX6-GISEL-NEXT: v_and_b32_e32 v5, 0xff, v5
|
|
; GFX6-GISEL-NEXT: v_and_b32_e32 v8, 0xff, v8
|
|
; GFX6-GISEL-NEXT: v_lshlrev_b32_e32 v4, 8, v4
|
|
; GFX6-GISEL-NEXT: v_lshlrev_b32_e32 v7, 8, v7
|
|
; GFX6-GISEL-NEXT: v_and_b32_e32 v6, 0xff, v6
|
|
; GFX6-GISEL-NEXT: v_and_b32_e32 v9, 0xff, v9
|
|
; GFX6-GISEL-NEXT: v_lshlrev_b32_e32 v5, 16, v5
|
|
; GFX6-GISEL-NEXT: v_lshlrev_b32_e32 v8, 16, v8
|
|
; GFX6-GISEL-NEXT: v_or_b32_e32 v0, v0, v4
|
|
; GFX6-GISEL-NEXT: v_or_b32_e32 v1, v1, v7
|
|
; GFX6-GISEL-NEXT: v_lshlrev_b32_e32 v6, 24, v6
|
|
; GFX6-GISEL-NEXT: v_lshlrev_b32_e32 v9, 24, v9
|
|
; GFX6-GISEL-NEXT: v_or_b32_e32 v0, v0, v5
|
|
; GFX6-GISEL-NEXT: v_or_b32_e32 v1, v1, v8
|
|
; GFX6-GISEL-NEXT: v_or_b32_e32 v0, v0, v6
|
|
; GFX6-GISEL-NEXT: v_or_b32_e32 v1, v1, v9
|
|
; GFX6-GISEL-NEXT: buffer_store_dwordx2 v[0:1], v[2:3], s[4:7], 0 addr64
|
|
; GFX6-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0)
|
|
; GFX6-GISEL-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX7-SDAG-LABEL: freeze_v8i8:
|
|
; GFX7-SDAG: ; %bb.0:
|
|
; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX7-SDAG-NEXT: s_mov_b32 s6, 0
|
|
; GFX7-SDAG-NEXT: s_mov_b32 s7, 0xf000
|
|
; GFX7-SDAG-NEXT: s_mov_b32 s4, s6
|
|
; GFX7-SDAG-NEXT: s_mov_b32 s5, s6
|
|
; GFX7-SDAG-NEXT: buffer_load_dwordx2 v[0:1], v[0:1], s[4:7], 0 addr64
|
|
; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX7-SDAG-NEXT: buffer_store_dwordx2 v[0:1], v[2:3], s[4:7], 0 addr64
|
|
; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX7-SDAG-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX7-GISEL-LABEL: freeze_v8i8:
|
|
; GFX7-GISEL: ; %bb.0:
|
|
; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX7-GISEL-NEXT: s_mov_b32 s6, 0
|
|
; GFX7-GISEL-NEXT: s_mov_b32 s7, 0xf000
|
|
; GFX7-GISEL-NEXT: s_mov_b64 s[4:5], 0
|
|
; GFX7-GISEL-NEXT: buffer_load_dwordx2 v[0:1], v[0:1], s[4:7], 0 addr64
|
|
; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX7-GISEL-NEXT: v_lshrrev_b32_e32 v4, 8, v0
|
|
; GFX7-GISEL-NEXT: v_lshrrev_b32_e32 v7, 8, v1
|
|
; GFX7-GISEL-NEXT: v_lshrrev_b32_e32 v5, 16, v0
|
|
; GFX7-GISEL-NEXT: v_lshrrev_b32_e32 v8, 16, v1
|
|
; GFX7-GISEL-NEXT: v_and_b32_e32 v4, 0xff, v4
|
|
; GFX7-GISEL-NEXT: v_and_b32_e32 v7, 0xff, v7
|
|
; GFX7-GISEL-NEXT: v_lshrrev_b32_e32 v6, 24, v0
|
|
; GFX7-GISEL-NEXT: v_lshrrev_b32_e32 v9, 24, v1
|
|
; GFX7-GISEL-NEXT: v_and_b32_e32 v0, 0xff, v0
|
|
; GFX7-GISEL-NEXT: v_and_b32_e32 v1, 0xff, v1
|
|
; GFX7-GISEL-NEXT: v_and_b32_e32 v5, 0xff, v5
|
|
; GFX7-GISEL-NEXT: v_and_b32_e32 v8, 0xff, v8
|
|
; GFX7-GISEL-NEXT: v_lshlrev_b32_e32 v4, 8, v4
|
|
; GFX7-GISEL-NEXT: v_lshlrev_b32_e32 v7, 8, v7
|
|
; GFX7-GISEL-NEXT: v_and_b32_e32 v6, 0xff, v6
|
|
; GFX7-GISEL-NEXT: v_and_b32_e32 v9, 0xff, v9
|
|
; GFX7-GISEL-NEXT: v_lshlrev_b32_e32 v5, 16, v5
|
|
; GFX7-GISEL-NEXT: v_lshlrev_b32_e32 v8, 16, v8
|
|
; GFX7-GISEL-NEXT: v_or_b32_e32 v0, v0, v4
|
|
; GFX7-GISEL-NEXT: v_or_b32_e32 v1, v1, v7
|
|
; GFX7-GISEL-NEXT: v_lshlrev_b32_e32 v6, 24, v6
|
|
; GFX7-GISEL-NEXT: v_lshlrev_b32_e32 v9, 24, v9
|
|
; GFX7-GISEL-NEXT: v_or_b32_e32 v0, v0, v5
|
|
; GFX7-GISEL-NEXT: v_or_b32_e32 v1, v1, v8
|
|
; GFX7-GISEL-NEXT: v_or_b32_e32 v0, v0, v6
|
|
; GFX7-GISEL-NEXT: v_or_b32_e32 v1, v1, v9
|
|
; GFX7-GISEL-NEXT: buffer_store_dwordx2 v[0:1], v[2:3], s[4:7], 0 addr64
|
|
; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX7-GISEL-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX8-GISEL-LABEL: freeze_v8i8:
|
|
; GFX8-GISEL: ; %bb.0:
|
|
; GFX8-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX8-GISEL-NEXT: flat_load_dwordx2 v[0:1], v[0:1]
|
|
; GFX8-GISEL-NEXT: v_mov_b32_e32 v5, 8
|
|
; GFX8-GISEL-NEXT: v_mov_b32_e32 v4, 0xff
|
|
; GFX8-GISEL-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX8-GISEL-NEXT: v_lshrrev_b32_e32 v6, 8, v0
|
|
; GFX8-GISEL-NEXT: v_lshrrev_b32_e32 v7, 8, v1
|
|
; GFX8-GISEL-NEXT: v_lshlrev_b32_sdwa v6, v5, v6 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0
|
|
; GFX8-GISEL-NEXT: v_lshlrev_b32_sdwa v5, v5, v7 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0
|
|
; GFX8-GISEL-NEXT: v_and_b32_sdwa v8, v0, v4 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
|
|
; GFX8-GISEL-NEXT: v_and_b32_sdwa v9, v0, v4 dst_sel:BYTE_3 dst_unused:UNUSED_PAD src0_sel:BYTE_3 src1_sel:DWORD
|
|
; GFX8-GISEL-NEXT: v_and_b32_sdwa v10, v1, v4 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
|
|
; GFX8-GISEL-NEXT: v_and_b32_sdwa v4, v1, v4 dst_sel:BYTE_3 dst_unused:UNUSED_PAD src0_sel:BYTE_3 src1_sel:DWORD
|
|
; GFX8-GISEL-NEXT: v_or_b32_sdwa v0, v0, v6 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
|
|
; GFX8-GISEL-NEXT: v_or_b32_sdwa v1, v1, v5 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
|
|
; GFX8-GISEL-NEXT: v_or_b32_e32 v0, v0, v8
|
|
; GFX8-GISEL-NEXT: v_or_b32_e32 v1, v1, v10
|
|
; GFX8-GISEL-NEXT: v_or_b32_e32 v0, v0, v9
|
|
; GFX8-GISEL-NEXT: v_or_b32_e32 v1, v1, v4
|
|
; GFX8-GISEL-NEXT: flat_store_dwordx2 v[2:3], v[0:1]
|
|
; GFX8-GISEL-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX8-GISEL-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX9-GISEL-LABEL: freeze_v8i8:
|
|
; GFX9-GISEL: ; %bb.0:
|
|
; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX9-GISEL-NEXT: global_load_dwordx2 v[0:1], v[0:1], off
|
|
; GFX9-GISEL-NEXT: v_mov_b32_e32 v5, 8
|
|
; GFX9-GISEL-NEXT: v_mov_b32_e32 v4, 0xff
|
|
; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX9-GISEL-NEXT: v_lshrrev_b32_e32 v6, 8, v0
|
|
; GFX9-GISEL-NEXT: v_lshrrev_b32_e32 v7, 8, v1
|
|
; GFX9-GISEL-NEXT: v_lshlrev_b32_sdwa v6, v5, v6 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0
|
|
; GFX9-GISEL-NEXT: v_lshlrev_b32_sdwa v5, v5, v7 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0
|
|
; GFX9-GISEL-NEXT: v_and_b32_sdwa v8, v0, v4 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
|
|
; GFX9-GISEL-NEXT: v_and_b32_sdwa v9, v0, v4 dst_sel:BYTE_3 dst_unused:UNUSED_PAD src0_sel:BYTE_3 src1_sel:DWORD
|
|
; GFX9-GISEL-NEXT: v_and_b32_sdwa v10, v1, v4 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
|
|
; GFX9-GISEL-NEXT: v_and_b32_sdwa v11, v1, v4 dst_sel:BYTE_3 dst_unused:UNUSED_PAD src0_sel:BYTE_3 src1_sel:DWORD
|
|
; GFX9-GISEL-NEXT: v_and_or_b32 v0, v0, v4, v6
|
|
; GFX9-GISEL-NEXT: v_and_or_b32 v1, v1, v4, v5
|
|
; GFX9-GISEL-NEXT: v_or3_b32 v0, v0, v8, v9
|
|
; GFX9-GISEL-NEXT: v_or3_b32 v1, v1, v10, v11
|
|
; GFX9-GISEL-NEXT: global_store_dwordx2 v[2:3], v[0:1], off
|
|
; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX10-SDAG-LABEL: freeze_v8i8:
|
|
; GFX10-SDAG: ; %bb.0:
|
|
; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX10-SDAG-NEXT: global_load_dwordx2 v[0:1], v[0:1], off
|
|
; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX10-SDAG-NEXT: global_store_dwordx2 v[2:3], v[0:1], off
|
|
; GFX10-SDAG-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX10-GISEL-LABEL: freeze_v8i8:
|
|
; GFX10-GISEL: ; %bb.0:
|
|
; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX10-GISEL-NEXT: global_load_dwordx2 v[0:1], v[0:1], off
|
|
; GFX10-GISEL-NEXT: v_mov_b32_e32 v4, 8
|
|
; GFX10-GISEL-NEXT: v_mov_b32_e32 v5, 0xff
|
|
; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX10-GISEL-NEXT: v_lshrrev_b32_e32 v6, 8, v0
|
|
; GFX10-GISEL-NEXT: v_lshrrev_b32_e32 v7, 8, v1
|
|
; GFX10-GISEL-NEXT: v_and_b32_sdwa v8, v0, v5 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
|
|
; GFX10-GISEL-NEXT: v_and_b32_sdwa v9, v1, v5 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
|
|
; GFX10-GISEL-NEXT: v_lshlrev_b32_sdwa v6, v4, v6 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0
|
|
; GFX10-GISEL-NEXT: v_lshlrev_b32_sdwa v4, v4, v7 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0
|
|
; GFX10-GISEL-NEXT: v_and_b32_sdwa v7, v0, v5 dst_sel:BYTE_3 dst_unused:UNUSED_PAD src0_sel:BYTE_3 src1_sel:DWORD
|
|
; GFX10-GISEL-NEXT: v_and_b32_sdwa v5, v1, v5 dst_sel:BYTE_3 dst_unused:UNUSED_PAD src0_sel:BYTE_3 src1_sel:DWORD
|
|
; GFX10-GISEL-NEXT: v_and_or_b32 v0, 0xff, v0, v6
|
|
; GFX10-GISEL-NEXT: v_and_or_b32 v1, 0xff, v1, v4
|
|
; GFX10-GISEL-NEXT: v_or3_b32 v0, v0, v8, v7
|
|
; GFX10-GISEL-NEXT: v_or3_b32 v1, v1, v9, v5
|
|
; GFX10-GISEL-NEXT: global_store_dwordx2 v[2:3], v[0:1], off
|
|
; GFX10-GISEL-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX11-SDAG-LABEL: freeze_v8i8:
|
|
; GFX11-SDAG: ; %bb.0:
|
|
; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX11-SDAG-NEXT: global_load_b64 v[0:1], v[0:1], off
|
|
; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX11-SDAG-NEXT: global_store_b64 v[2:3], v[0:1], off
|
|
; GFX11-SDAG-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX11-GISEL-LABEL: freeze_v8i8:
|
|
; GFX11-GISEL: ; %bb.0:
|
|
; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX11-GISEL-NEXT: global_load_b64 v[0:1], v[0:1], off
|
|
; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX11-GISEL-NEXT: v_lshrrev_b32_e32 v4, 8, v0
|
|
; GFX11-GISEL-NEXT: v_lshrrev_b32_e32 v7, 8, v1
|
|
; GFX11-GISEL-NEXT: v_lshrrev_b32_e32 v5, 16, v0
|
|
; GFX11-GISEL-NEXT: v_lshrrev_b32_e32 v6, 24, v0
|
|
; GFX11-GISEL-NEXT: v_lshrrev_b32_e32 v8, 16, v1
|
|
; GFX11-GISEL-NEXT: v_lshrrev_b32_e32 v9, 24, v1
|
|
; GFX11-GISEL-NEXT: v_and_b32_e32 v4, 0xff, v4
|
|
; GFX11-GISEL-NEXT: v_and_b32_e32 v7, 0xff, v7
|
|
; GFX11-GISEL-NEXT: v_and_b32_e32 v5, 0xff, v5
|
|
; GFX11-GISEL-NEXT: v_and_b32_e32 v6, 0xff, v6
|
|
; GFX11-GISEL-NEXT: v_and_b32_e32 v8, 0xff, v8
|
|
; GFX11-GISEL-NEXT: v_and_b32_e32 v9, 0xff, v9
|
|
; GFX11-GISEL-NEXT: v_lshlrev_b32_e32 v4, 8, v4
|
|
; GFX11-GISEL-NEXT: v_lshlrev_b32_e32 v7, 8, v7
|
|
; GFX11-GISEL-NEXT: v_lshlrev_b32_e32 v5, 16, v5
|
|
; GFX11-GISEL-NEXT: v_lshlrev_b32_e32 v6, 24, v6
|
|
; GFX11-GISEL-NEXT: v_lshlrev_b32_e32 v8, 16, v8
|
|
; GFX11-GISEL-NEXT: v_lshlrev_b32_e32 v9, 24, v9
|
|
; GFX11-GISEL-NEXT: v_and_or_b32 v0, 0xff, v0, v4
|
|
; GFX11-GISEL-NEXT: v_and_or_b32 v1, 0xff, v1, v7
|
|
; GFX11-GISEL-NEXT: v_or3_b32 v0, v0, v5, v6
|
|
; GFX11-GISEL-NEXT: v_or3_b32 v1, v1, v8, v9
|
|
; GFX11-GISEL-NEXT: global_store_b64 v[2:3], v[0:1], off
|
|
; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31]
|
|
%a = load <8 x i8>, ptr addrspace(1) %ptra
|
|
%freeze = freeze <8 x i8> %a
|
|
store <8 x i8> %freeze, ptr addrspace(1) %ptrb
|
|
ret void
|
|
}
|
|
|
|
define void @freeze_v16i8(ptr addrspace(1) %ptra, ptr addrspace(1) %ptrb) {
|
|
; GFX6-SDAG-LABEL: freeze_v16i8:
|
|
; GFX6-SDAG: ; %bb.0:
|
|
; GFX6-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX6-SDAG-NEXT: s_mov_b32 s6, 0
|
|
; GFX6-SDAG-NEXT: s_mov_b32 s7, 0xf000
|
|
; GFX6-SDAG-NEXT: s_mov_b32 s4, s6
|
|
; GFX6-SDAG-NEXT: s_mov_b32 s5, s6
|
|
; GFX6-SDAG-NEXT: buffer_load_dwordx4 v[4:7], v[0:1], s[4:7], 0 addr64
|
|
; GFX6-SDAG-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX6-SDAG-NEXT: buffer_store_dwordx4 v[4:7], v[2:3], s[4:7], 0 addr64
|
|
; GFX6-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0)
|
|
; GFX6-SDAG-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX6-GISEL-LABEL: freeze_v16i8:
|
|
; GFX6-GISEL: ; %bb.0:
|
|
; GFX6-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX6-GISEL-NEXT: s_mov_b32 s6, 0
|
|
; GFX6-GISEL-NEXT: s_mov_b32 s7, 0xf000
|
|
; GFX6-GISEL-NEXT: s_mov_b64 s[4:5], 0
|
|
; GFX6-GISEL-NEXT: buffer_load_dwordx4 v[4:7], v[0:1], s[4:7], 0 addr64
|
|
; GFX6-GISEL-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX6-GISEL-NEXT: v_lshrrev_b32_e32 v0, 8, v4
|
|
; GFX6-GISEL-NEXT: v_lshrrev_b32_e32 v9, 8, v5
|
|
; GFX6-GISEL-NEXT: v_lshrrev_b32_e32 v12, 8, v6
|
|
; GFX6-GISEL-NEXT: v_lshrrev_b32_e32 v15, 8, v7
|
|
; GFX6-GISEL-NEXT: v_lshrrev_b32_e32 v1, 16, v4
|
|
; GFX6-GISEL-NEXT: v_lshrrev_b32_e32 v10, 16, v5
|
|
; GFX6-GISEL-NEXT: v_lshrrev_b32_e32 v13, 16, v6
|
|
; GFX6-GISEL-NEXT: v_lshrrev_b32_e32 v16, 16, v7
|
|
; GFX6-GISEL-NEXT: v_and_b32_e32 v0, 0xff, v0
|
|
; GFX6-GISEL-NEXT: v_and_b32_e32 v9, 0xff, v9
|
|
; GFX6-GISEL-NEXT: v_and_b32_e32 v12, 0xff, v12
|
|
; GFX6-GISEL-NEXT: v_and_b32_e32 v15, 0xff, v15
|
|
; GFX6-GISEL-NEXT: v_lshrrev_b32_e32 v8, 24, v4
|
|
; GFX6-GISEL-NEXT: v_lshrrev_b32_e32 v11, 24, v5
|
|
; GFX6-GISEL-NEXT: v_lshrrev_b32_e32 v14, 24, v6
|
|
; GFX6-GISEL-NEXT: v_lshrrev_b32_e32 v17, 24, v7
|
|
; GFX6-GISEL-NEXT: v_and_b32_e32 v4, 0xff, v4
|
|
; GFX6-GISEL-NEXT: v_and_b32_e32 v5, 0xff, v5
|
|
; GFX6-GISEL-NEXT: v_and_b32_e32 v6, 0xff, v6
|
|
; GFX6-GISEL-NEXT: v_and_b32_e32 v7, 0xff, v7
|
|
; GFX6-GISEL-NEXT: v_and_b32_e32 v1, 0xff, v1
|
|
; GFX6-GISEL-NEXT: v_and_b32_e32 v10, 0xff, v10
|
|
; GFX6-GISEL-NEXT: v_and_b32_e32 v13, 0xff, v13
|
|
; GFX6-GISEL-NEXT: v_and_b32_e32 v16, 0xff, v16
|
|
; GFX6-GISEL-NEXT: v_lshlrev_b32_e32 v0, 8, v0
|
|
; GFX6-GISEL-NEXT: v_lshlrev_b32_e32 v9, 8, v9
|
|
; GFX6-GISEL-NEXT: v_lshlrev_b32_e32 v12, 8, v12
|
|
; GFX6-GISEL-NEXT: v_lshlrev_b32_e32 v15, 8, v15
|
|
; GFX6-GISEL-NEXT: v_and_b32_e32 v8, 0xff, v8
|
|
; GFX6-GISEL-NEXT: v_and_b32_e32 v11, 0xff, v11
|
|
; GFX6-GISEL-NEXT: v_and_b32_e32 v14, 0xff, v14
|
|
; GFX6-GISEL-NEXT: v_and_b32_e32 v17, 0xff, v17
|
|
; GFX6-GISEL-NEXT: v_lshlrev_b32_e32 v1, 16, v1
|
|
; GFX6-GISEL-NEXT: v_lshlrev_b32_e32 v10, 16, v10
|
|
; GFX6-GISEL-NEXT: v_lshlrev_b32_e32 v13, 16, v13
|
|
; GFX6-GISEL-NEXT: v_lshlrev_b32_e32 v16, 16, v16
|
|
; GFX6-GISEL-NEXT: v_or_b32_e32 v0, v4, v0
|
|
; GFX6-GISEL-NEXT: v_or_b32_e32 v4, v5, v9
|
|
; GFX6-GISEL-NEXT: v_or_b32_e32 v5, v6, v12
|
|
; GFX6-GISEL-NEXT: v_or_b32_e32 v6, v7, v15
|
|
; GFX6-GISEL-NEXT: v_lshlrev_b32_e32 v8, 24, v8
|
|
; GFX6-GISEL-NEXT: v_lshlrev_b32_e32 v11, 24, v11
|
|
; GFX6-GISEL-NEXT: v_lshlrev_b32_e32 v14, 24, v14
|
|
; GFX6-GISEL-NEXT: v_lshlrev_b32_e32 v17, 24, v17
|
|
; GFX6-GISEL-NEXT: v_or_b32_e32 v0, v0, v1
|
|
; GFX6-GISEL-NEXT: v_or_b32_e32 v1, v4, v10
|
|
; GFX6-GISEL-NEXT: v_or_b32_e32 v7, v5, v13
|
|
; GFX6-GISEL-NEXT: v_or_b32_e32 v9, v6, v16
|
|
; GFX6-GISEL-NEXT: v_or_b32_e32 v4, v0, v8
|
|
; GFX6-GISEL-NEXT: v_or_b32_e32 v5, v1, v11
|
|
; GFX6-GISEL-NEXT: v_or_b32_e32 v6, v7, v14
|
|
; GFX6-GISEL-NEXT: v_or_b32_e32 v7, v9, v17
|
|
; GFX6-GISEL-NEXT: buffer_store_dwordx4 v[4:7], v[2:3], s[4:7], 0 addr64
|
|
; GFX6-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0)
|
|
; GFX6-GISEL-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX7-SDAG-LABEL: freeze_v16i8:
|
|
; GFX7-SDAG: ; %bb.0:
|
|
; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX7-SDAG-NEXT: s_mov_b32 s6, 0
|
|
; GFX7-SDAG-NEXT: s_mov_b32 s7, 0xf000
|
|
; GFX7-SDAG-NEXT: s_mov_b32 s4, s6
|
|
; GFX7-SDAG-NEXT: s_mov_b32 s5, s6
|
|
; GFX7-SDAG-NEXT: buffer_load_dwordx4 v[4:7], v[0:1], s[4:7], 0 addr64
|
|
; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX7-SDAG-NEXT: buffer_store_dwordx4 v[4:7], v[2:3], s[4:7], 0 addr64
|
|
; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX7-SDAG-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX7-GISEL-LABEL: freeze_v16i8:
|
|
; GFX7-GISEL: ; %bb.0:
|
|
; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX7-GISEL-NEXT: s_mov_b32 s6, 0
|
|
; GFX7-GISEL-NEXT: s_mov_b32 s7, 0xf000
|
|
; GFX7-GISEL-NEXT: s_mov_b64 s[4:5], 0
|
|
; GFX7-GISEL-NEXT: buffer_load_dwordx4 v[4:7], v[0:1], s[4:7], 0 addr64
|
|
; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX7-GISEL-NEXT: v_lshrrev_b32_e32 v0, 8, v4
|
|
; GFX7-GISEL-NEXT: v_lshrrev_b32_e32 v9, 8, v5
|
|
; GFX7-GISEL-NEXT: v_lshrrev_b32_e32 v12, 8, v6
|
|
; GFX7-GISEL-NEXT: v_lshrrev_b32_e32 v15, 8, v7
|
|
; GFX7-GISEL-NEXT: v_lshrrev_b32_e32 v1, 16, v4
|
|
; GFX7-GISEL-NEXT: v_lshrrev_b32_e32 v10, 16, v5
|
|
; GFX7-GISEL-NEXT: v_lshrrev_b32_e32 v13, 16, v6
|
|
; GFX7-GISEL-NEXT: v_lshrrev_b32_e32 v16, 16, v7
|
|
; GFX7-GISEL-NEXT: v_and_b32_e32 v0, 0xff, v0
|
|
; GFX7-GISEL-NEXT: v_and_b32_e32 v9, 0xff, v9
|
|
; GFX7-GISEL-NEXT: v_and_b32_e32 v12, 0xff, v12
|
|
; GFX7-GISEL-NEXT: v_and_b32_e32 v15, 0xff, v15
|
|
; GFX7-GISEL-NEXT: v_lshrrev_b32_e32 v8, 24, v4
|
|
; GFX7-GISEL-NEXT: v_lshrrev_b32_e32 v11, 24, v5
|
|
; GFX7-GISEL-NEXT: v_lshrrev_b32_e32 v14, 24, v6
|
|
; GFX7-GISEL-NEXT: v_lshrrev_b32_e32 v17, 24, v7
|
|
; GFX7-GISEL-NEXT: v_and_b32_e32 v4, 0xff, v4
|
|
; GFX7-GISEL-NEXT: v_and_b32_e32 v5, 0xff, v5
|
|
; GFX7-GISEL-NEXT: v_and_b32_e32 v6, 0xff, v6
|
|
; GFX7-GISEL-NEXT: v_and_b32_e32 v7, 0xff, v7
|
|
; GFX7-GISEL-NEXT: v_and_b32_e32 v1, 0xff, v1
|
|
; GFX7-GISEL-NEXT: v_and_b32_e32 v10, 0xff, v10
|
|
; GFX7-GISEL-NEXT: v_and_b32_e32 v13, 0xff, v13
|
|
; GFX7-GISEL-NEXT: v_and_b32_e32 v16, 0xff, v16
|
|
; GFX7-GISEL-NEXT: v_lshlrev_b32_e32 v0, 8, v0
|
|
; GFX7-GISEL-NEXT: v_lshlrev_b32_e32 v9, 8, v9
|
|
; GFX7-GISEL-NEXT: v_lshlrev_b32_e32 v12, 8, v12
|
|
; GFX7-GISEL-NEXT: v_lshlrev_b32_e32 v15, 8, v15
|
|
; GFX7-GISEL-NEXT: v_and_b32_e32 v8, 0xff, v8
|
|
; GFX7-GISEL-NEXT: v_and_b32_e32 v11, 0xff, v11
|
|
; GFX7-GISEL-NEXT: v_and_b32_e32 v14, 0xff, v14
|
|
; GFX7-GISEL-NEXT: v_and_b32_e32 v17, 0xff, v17
|
|
; GFX7-GISEL-NEXT: v_lshlrev_b32_e32 v1, 16, v1
|
|
; GFX7-GISEL-NEXT: v_lshlrev_b32_e32 v10, 16, v10
|
|
; GFX7-GISEL-NEXT: v_lshlrev_b32_e32 v13, 16, v13
|
|
; GFX7-GISEL-NEXT: v_lshlrev_b32_e32 v16, 16, v16
|
|
; GFX7-GISEL-NEXT: v_or_b32_e32 v0, v4, v0
|
|
; GFX7-GISEL-NEXT: v_or_b32_e32 v4, v5, v9
|
|
; GFX7-GISEL-NEXT: v_or_b32_e32 v5, v6, v12
|
|
; GFX7-GISEL-NEXT: v_or_b32_e32 v6, v7, v15
|
|
; GFX7-GISEL-NEXT: v_lshlrev_b32_e32 v8, 24, v8
|
|
; GFX7-GISEL-NEXT: v_lshlrev_b32_e32 v11, 24, v11
|
|
; GFX7-GISEL-NEXT: v_lshlrev_b32_e32 v14, 24, v14
|
|
; GFX7-GISEL-NEXT: v_lshlrev_b32_e32 v17, 24, v17
|
|
; GFX7-GISEL-NEXT: v_or_b32_e32 v0, v0, v1
|
|
; GFX7-GISEL-NEXT: v_or_b32_e32 v1, v4, v10
|
|
; GFX7-GISEL-NEXT: v_or_b32_e32 v7, v5, v13
|
|
; GFX7-GISEL-NEXT: v_or_b32_e32 v9, v6, v16
|
|
; GFX7-GISEL-NEXT: v_or_b32_e32 v4, v0, v8
|
|
; GFX7-GISEL-NEXT: v_or_b32_e32 v5, v1, v11
|
|
; GFX7-GISEL-NEXT: v_or_b32_e32 v6, v7, v14
|
|
; GFX7-GISEL-NEXT: v_or_b32_e32 v7, v9, v17
|
|
; GFX7-GISEL-NEXT: buffer_store_dwordx4 v[4:7], v[2:3], s[4:7], 0 addr64
|
|
; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX7-GISEL-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX8-GISEL-LABEL: freeze_v16i8:
|
|
; GFX8-GISEL: ; %bb.0:
|
|
; GFX8-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX8-GISEL-NEXT: flat_load_dwordx4 v[4:7], v[0:1]
|
|
; GFX8-GISEL-NEXT: v_mov_b32_e32 v1, 8
|
|
; GFX8-GISEL-NEXT: v_mov_b32_e32 v0, 0xff
|
|
; GFX8-GISEL-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX8-GISEL-NEXT: v_lshrrev_b32_e32 v8, 8, v4
|
|
; GFX8-GISEL-NEXT: v_lshrrev_b32_e32 v9, 8, v5
|
|
; GFX8-GISEL-NEXT: v_lshrrev_b32_e32 v10, 8, v6
|
|
; GFX8-GISEL-NEXT: v_lshrrev_b32_e32 v11, 8, v7
|
|
; GFX8-GISEL-NEXT: v_lshlrev_b32_sdwa v8, v1, v8 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0
|
|
; GFX8-GISEL-NEXT: v_lshlrev_b32_sdwa v9, v1, v9 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0
|
|
; GFX8-GISEL-NEXT: v_lshlrev_b32_sdwa v10, v1, v10 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0
|
|
; GFX8-GISEL-NEXT: v_lshlrev_b32_sdwa v1, v1, v11 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0
|
|
; GFX8-GISEL-NEXT: v_and_b32_sdwa v12, v4, v0 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
|
|
; GFX8-GISEL-NEXT: v_and_b32_sdwa v13, v4, v0 dst_sel:BYTE_3 dst_unused:UNUSED_PAD src0_sel:BYTE_3 src1_sel:DWORD
|
|
; GFX8-GISEL-NEXT: v_and_b32_sdwa v14, v5, v0 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
|
|
; GFX8-GISEL-NEXT: v_and_b32_sdwa v15, v5, v0 dst_sel:BYTE_3 dst_unused:UNUSED_PAD src0_sel:BYTE_3 src1_sel:DWORD
|
|
; GFX8-GISEL-NEXT: v_and_b32_sdwa v16, v6, v0 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
|
|
; GFX8-GISEL-NEXT: v_and_b32_sdwa v17, v6, v0 dst_sel:BYTE_3 dst_unused:UNUSED_PAD src0_sel:BYTE_3 src1_sel:DWORD
|
|
; GFX8-GISEL-NEXT: v_and_b32_sdwa v18, v7, v0 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
|
|
; GFX8-GISEL-NEXT: v_or_b32_sdwa v4, v4, v8 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
|
|
; GFX8-GISEL-NEXT: v_or_b32_sdwa v5, v5, v9 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
|
|
; GFX8-GISEL-NEXT: v_or_b32_sdwa v6, v6, v10 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
|
|
; GFX8-GISEL-NEXT: v_or_b32_sdwa v1, v7, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
|
|
; GFX8-GISEL-NEXT: v_and_b32_sdwa v0, v7, v0 dst_sel:BYTE_3 dst_unused:UNUSED_PAD src0_sel:BYTE_3 src1_sel:DWORD
|
|
; GFX8-GISEL-NEXT: v_or_b32_e32 v4, v4, v12
|
|
; GFX8-GISEL-NEXT: v_or_b32_e32 v5, v5, v14
|
|
; GFX8-GISEL-NEXT: v_or_b32_e32 v6, v6, v16
|
|
; GFX8-GISEL-NEXT: v_or_b32_e32 v1, v1, v18
|
|
; GFX8-GISEL-NEXT: v_or_b32_e32 v4, v4, v13
|
|
; GFX8-GISEL-NEXT: v_or_b32_e32 v5, v5, v15
|
|
; GFX8-GISEL-NEXT: v_or_b32_e32 v6, v6, v17
|
|
; GFX8-GISEL-NEXT: v_or_b32_e32 v7, v1, v0
|
|
; GFX8-GISEL-NEXT: flat_store_dwordx4 v[2:3], v[4:7]
|
|
; GFX8-GISEL-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX8-GISEL-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX9-GISEL-LABEL: freeze_v16i8:
|
|
; GFX9-GISEL: ; %bb.0:
|
|
; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX9-GISEL-NEXT: global_load_dwordx4 v[4:7], v[0:1], off
|
|
; GFX9-GISEL-NEXT: v_mov_b32_e32 v1, 8
|
|
; GFX9-GISEL-NEXT: v_mov_b32_e32 v0, 0xff
|
|
; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX9-GISEL-NEXT: v_lshrrev_b32_e32 v8, 8, v4
|
|
; GFX9-GISEL-NEXT: v_lshrrev_b32_e32 v9, 8, v5
|
|
; GFX9-GISEL-NEXT: v_lshrrev_b32_e32 v10, 8, v6
|
|
; GFX9-GISEL-NEXT: v_lshrrev_b32_e32 v11, 8, v7
|
|
; GFX9-GISEL-NEXT: v_lshlrev_b32_sdwa v8, v1, v8 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0
|
|
; GFX9-GISEL-NEXT: v_lshlrev_b32_sdwa v9, v1, v9 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0
|
|
; GFX9-GISEL-NEXT: v_lshlrev_b32_sdwa v10, v1, v10 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0
|
|
; GFX9-GISEL-NEXT: v_lshlrev_b32_sdwa v1, v1, v11 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0
|
|
; GFX9-GISEL-NEXT: v_and_b32_sdwa v12, v4, v0 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
|
|
; GFX9-GISEL-NEXT: v_and_b32_sdwa v13, v4, v0 dst_sel:BYTE_3 dst_unused:UNUSED_PAD src0_sel:BYTE_3 src1_sel:DWORD
|
|
; GFX9-GISEL-NEXT: v_and_b32_sdwa v14, v5, v0 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
|
|
; GFX9-GISEL-NEXT: v_and_b32_sdwa v15, v5, v0 dst_sel:BYTE_3 dst_unused:UNUSED_PAD src0_sel:BYTE_3 src1_sel:DWORD
|
|
; GFX9-GISEL-NEXT: v_and_b32_sdwa v16, v6, v0 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
|
|
; GFX9-GISEL-NEXT: v_and_b32_sdwa v17, v6, v0 dst_sel:BYTE_3 dst_unused:UNUSED_PAD src0_sel:BYTE_3 src1_sel:DWORD
|
|
; GFX9-GISEL-NEXT: v_and_b32_sdwa v18, v7, v0 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
|
|
; GFX9-GISEL-NEXT: v_and_b32_sdwa v19, v7, v0 dst_sel:BYTE_3 dst_unused:UNUSED_PAD src0_sel:BYTE_3 src1_sel:DWORD
|
|
; GFX9-GISEL-NEXT: v_and_or_b32 v4, v4, v0, v8
|
|
; GFX9-GISEL-NEXT: v_and_or_b32 v5, v5, v0, v9
|
|
; GFX9-GISEL-NEXT: v_and_or_b32 v6, v6, v0, v10
|
|
; GFX9-GISEL-NEXT: v_and_or_b32 v0, v7, v0, v1
|
|
; GFX9-GISEL-NEXT: v_or3_b32 v4, v4, v12, v13
|
|
; GFX9-GISEL-NEXT: v_or3_b32 v5, v5, v14, v15
|
|
; GFX9-GISEL-NEXT: v_or3_b32 v6, v6, v16, v17
|
|
; GFX9-GISEL-NEXT: v_or3_b32 v7, v0, v18, v19
|
|
; GFX9-GISEL-NEXT: global_store_dwordx4 v[2:3], v[4:7], off
|
|
; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX10-SDAG-LABEL: freeze_v16i8:
|
|
; GFX10-SDAG: ; %bb.0:
|
|
; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX10-SDAG-NEXT: global_load_dwordx4 v[4:7], v[0:1], off
|
|
; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX10-SDAG-NEXT: global_store_dwordx4 v[2:3], v[4:7], off
|
|
; GFX10-SDAG-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX10-GISEL-LABEL: freeze_v16i8:
|
|
; GFX10-GISEL: ; %bb.0:
|
|
; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX10-GISEL-NEXT: global_load_dwordx4 v[4:7], v[0:1], off
|
|
; GFX10-GISEL-NEXT: v_mov_b32_e32 v0, 8
|
|
; GFX10-GISEL-NEXT: v_mov_b32_e32 v1, 0xff
|
|
; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX10-GISEL-NEXT: v_lshrrev_b32_e32 v8, 8, v4
|
|
; GFX10-GISEL-NEXT: v_lshrrev_b32_e32 v9, 8, v5
|
|
; GFX10-GISEL-NEXT: v_lshrrev_b32_e32 v10, 8, v6
|
|
; GFX10-GISEL-NEXT: v_lshrrev_b32_e32 v11, 8, v7
|
|
; GFX10-GISEL-NEXT: v_and_b32_sdwa v12, v4, v1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
|
|
; GFX10-GISEL-NEXT: v_lshlrev_b32_sdwa v8, v0, v8 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0
|
|
; GFX10-GISEL-NEXT: v_lshlrev_b32_sdwa v9, v0, v9 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0
|
|
; GFX10-GISEL-NEXT: v_lshlrev_b32_sdwa v10, v0, v10 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0
|
|
; GFX10-GISEL-NEXT: v_lshlrev_b32_sdwa v0, v0, v11 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0
|
|
; GFX10-GISEL-NEXT: v_and_b32_sdwa v13, v4, v1 dst_sel:BYTE_3 dst_unused:UNUSED_PAD src0_sel:BYTE_3 src1_sel:DWORD
|
|
; GFX10-GISEL-NEXT: v_and_b32_sdwa v14, v5, v1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
|
|
; GFX10-GISEL-NEXT: v_and_b32_sdwa v15, v5, v1 dst_sel:BYTE_3 dst_unused:UNUSED_PAD src0_sel:BYTE_3 src1_sel:DWORD
|
|
; GFX10-GISEL-NEXT: v_and_b32_sdwa v16, v6, v1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
|
|
; GFX10-GISEL-NEXT: v_and_b32_sdwa v17, v6, v1 dst_sel:BYTE_3 dst_unused:UNUSED_PAD src0_sel:BYTE_3 src1_sel:DWORD
|
|
; GFX10-GISEL-NEXT: v_and_b32_sdwa v18, v7, v1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
|
|
; GFX10-GISEL-NEXT: v_and_b32_sdwa v1, v7, v1 dst_sel:BYTE_3 dst_unused:UNUSED_PAD src0_sel:BYTE_3 src1_sel:DWORD
|
|
; GFX10-GISEL-NEXT: v_and_or_b32 v4, 0xff, v4, v8
|
|
; GFX10-GISEL-NEXT: v_and_or_b32 v5, 0xff, v5, v9
|
|
; GFX10-GISEL-NEXT: v_and_or_b32 v6, 0xff, v6, v10
|
|
; GFX10-GISEL-NEXT: v_and_or_b32 v0, 0xff, v7, v0
|
|
; GFX10-GISEL-NEXT: v_or3_b32 v4, v4, v12, v13
|
|
; GFX10-GISEL-NEXT: v_or3_b32 v5, v5, v14, v15
|
|
; GFX10-GISEL-NEXT: v_or3_b32 v6, v6, v16, v17
|
|
; GFX10-GISEL-NEXT: v_or3_b32 v7, v0, v18, v1
|
|
; GFX10-GISEL-NEXT: global_store_dwordx4 v[2:3], v[4:7], off
|
|
; GFX10-GISEL-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX11-SDAG-LABEL: freeze_v16i8:
|
|
; GFX11-SDAG: ; %bb.0:
|
|
; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX11-SDAG-NEXT: global_load_b128 v[4:7], v[0:1], off
|
|
; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX11-SDAG-NEXT: global_store_b128 v[2:3], v[4:7], off
|
|
; GFX11-SDAG-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX11-GISEL-LABEL: freeze_v16i8:
|
|
; GFX11-GISEL: ; %bb.0:
|
|
; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX11-GISEL-NEXT: global_load_b128 v[4:7], v[0:1], off
|
|
; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX11-GISEL-NEXT: v_lshrrev_b32_e32 v0, 8, v4
|
|
; GFX11-GISEL-NEXT: v_lshrrev_b32_e32 v9, 8, v5
|
|
; GFX11-GISEL-NEXT: v_lshrrev_b32_e32 v12, 8, v6
|
|
; GFX11-GISEL-NEXT: v_lshrrev_b32_e32 v15, 8, v7
|
|
; GFX11-GISEL-NEXT: v_lshrrev_b32_e32 v1, 16, v4
|
|
; GFX11-GISEL-NEXT: v_lshrrev_b32_e32 v8, 24, v4
|
|
; GFX11-GISEL-NEXT: v_lshrrev_b32_e32 v10, 16, v5
|
|
; GFX11-GISEL-NEXT: v_lshrrev_b32_e32 v11, 24, v5
|
|
; GFX11-GISEL-NEXT: v_lshrrev_b32_e32 v13, 16, v6
|
|
; GFX11-GISEL-NEXT: v_lshrrev_b32_e32 v14, 24, v6
|
|
; GFX11-GISEL-NEXT: v_lshrrev_b32_e32 v16, 16, v7
|
|
; GFX11-GISEL-NEXT: v_lshrrev_b32_e32 v17, 24, v7
|
|
; GFX11-GISEL-NEXT: v_and_b32_e32 v0, 0xff, v0
|
|
; GFX11-GISEL-NEXT: v_and_b32_e32 v9, 0xff, v9
|
|
; GFX11-GISEL-NEXT: v_and_b32_e32 v12, 0xff, v12
|
|
; GFX11-GISEL-NEXT: v_and_b32_e32 v15, 0xff, v15
|
|
; GFX11-GISEL-NEXT: v_and_b32_e32 v1, 0xff, v1
|
|
; GFX11-GISEL-NEXT: v_and_b32_e32 v8, 0xff, v8
|
|
; GFX11-GISEL-NEXT: v_and_b32_e32 v10, 0xff, v10
|
|
; GFX11-GISEL-NEXT: v_and_b32_e32 v11, 0xff, v11
|
|
; GFX11-GISEL-NEXT: v_and_b32_e32 v13, 0xff, v13
|
|
; GFX11-GISEL-NEXT: v_and_b32_e32 v14, 0xff, v14
|
|
; GFX11-GISEL-NEXT: v_and_b32_e32 v16, 0xff, v16
|
|
; GFX11-GISEL-NEXT: v_and_b32_e32 v17, 0xff, v17
|
|
; GFX11-GISEL-NEXT: v_lshlrev_b32_e32 v0, 8, v0
|
|
; GFX11-GISEL-NEXT: v_lshlrev_b32_e32 v9, 8, v9
|
|
; GFX11-GISEL-NEXT: v_lshlrev_b32_e32 v12, 8, v12
|
|
; GFX11-GISEL-NEXT: v_lshlrev_b32_e32 v15, 8, v15
|
|
; GFX11-GISEL-NEXT: v_lshlrev_b32_e32 v1, 16, v1
|
|
; GFX11-GISEL-NEXT: v_lshlrev_b32_e32 v8, 24, v8
|
|
; GFX11-GISEL-NEXT: v_lshlrev_b32_e32 v10, 16, v10
|
|
; GFX11-GISEL-NEXT: v_lshlrev_b32_e32 v11, 24, v11
|
|
; GFX11-GISEL-NEXT: v_lshlrev_b32_e32 v13, 16, v13
|
|
; GFX11-GISEL-NEXT: v_lshlrev_b32_e32 v14, 24, v14
|
|
; GFX11-GISEL-NEXT: v_lshlrev_b32_e32 v16, 16, v16
|
|
; GFX11-GISEL-NEXT: v_lshlrev_b32_e32 v17, 24, v17
|
|
; GFX11-GISEL-NEXT: v_and_or_b32 v0, 0xff, v4, v0
|
|
; GFX11-GISEL-NEXT: v_and_or_b32 v5, 0xff, v5, v9
|
|
; GFX11-GISEL-NEXT: v_and_or_b32 v6, 0xff, v6, v12
|
|
; GFX11-GISEL-NEXT: v_and_or_b32 v7, 0xff, v7, v15
|
|
; GFX11-GISEL-NEXT: v_or3_b32 v4, v0, v1, v8
|
|
; GFX11-GISEL-NEXT: v_or3_b32 v5, v5, v10, v11
|
|
; GFX11-GISEL-NEXT: v_or3_b32 v6, v6, v13, v14
|
|
; GFX11-GISEL-NEXT: v_or3_b32 v7, v7, v16, v17
|
|
; GFX11-GISEL-NEXT: global_store_b128 v[2:3], v[4:7], off
|
|
; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31]
|
|
%a = load <16 x i8>, ptr addrspace(1) %ptra
|
|
%freeze = freeze <16 x i8> %a
|
|
store <16 x i8> %freeze, ptr addrspace(1) %ptrb
|
|
ret void
|
|
}
|
|
|
|
define void @freeze_i1(ptr addrspace(1) %ptra, ptr addrspace(1) %ptrb) {
|
|
; GFX6-SDAG-LABEL: freeze_i1:
|
|
; GFX6-SDAG: ; %bb.0:
|
|
; GFX6-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX6-SDAG-NEXT: s_mov_b32 s6, 0
|
|
; GFX6-SDAG-NEXT: s_mov_b32 s7, 0xf000
|
|
; GFX6-SDAG-NEXT: s_mov_b32 s4, s6
|
|
; GFX6-SDAG-NEXT: s_mov_b32 s5, s6
|
|
; GFX6-SDAG-NEXT: buffer_load_ubyte v0, v[0:1], s[4:7], 0 addr64
|
|
; GFX6-SDAG-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX6-SDAG-NEXT: v_and_b32_e32 v0, 1, v0
|
|
; GFX6-SDAG-NEXT: buffer_store_byte v0, v[2:3], s[4:7], 0 addr64
|
|
; GFX6-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0)
|
|
; GFX6-SDAG-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX6-GISEL-LABEL: freeze_i1:
|
|
; GFX6-GISEL: ; %bb.0:
|
|
; GFX6-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX6-GISEL-NEXT: s_mov_b32 s6, 0
|
|
; GFX6-GISEL-NEXT: s_mov_b32 s7, 0xf000
|
|
; GFX6-GISEL-NEXT: s_mov_b64 s[4:5], 0
|
|
; GFX6-GISEL-NEXT: buffer_load_ubyte v0, v[0:1], s[4:7], 0 addr64
|
|
; GFX6-GISEL-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX6-GISEL-NEXT: v_and_b32_e32 v0, 1, v0
|
|
; GFX6-GISEL-NEXT: buffer_store_byte v0, v[2:3], s[4:7], 0 addr64
|
|
; GFX6-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0)
|
|
; GFX6-GISEL-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX7-SDAG-LABEL: freeze_i1:
|
|
; GFX7-SDAG: ; %bb.0:
|
|
; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX7-SDAG-NEXT: s_mov_b32 s6, 0
|
|
; GFX7-SDAG-NEXT: s_mov_b32 s7, 0xf000
|
|
; GFX7-SDAG-NEXT: s_mov_b32 s4, s6
|
|
; GFX7-SDAG-NEXT: s_mov_b32 s5, s6
|
|
; GFX7-SDAG-NEXT: buffer_load_ubyte v0, v[0:1], s[4:7], 0 addr64
|
|
; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX7-SDAG-NEXT: v_and_b32_e32 v0, 1, v0
|
|
; GFX7-SDAG-NEXT: buffer_store_byte v0, v[2:3], s[4:7], 0 addr64
|
|
; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX7-SDAG-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX7-GISEL-LABEL: freeze_i1:
|
|
; GFX7-GISEL: ; %bb.0:
|
|
; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX7-GISEL-NEXT: s_mov_b32 s6, 0
|
|
; GFX7-GISEL-NEXT: s_mov_b32 s7, 0xf000
|
|
; GFX7-GISEL-NEXT: s_mov_b64 s[4:5], 0
|
|
; GFX7-GISEL-NEXT: buffer_load_ubyte v0, v[0:1], s[4:7], 0 addr64
|
|
; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX7-GISEL-NEXT: v_and_b32_e32 v0, 1, v0
|
|
; GFX7-GISEL-NEXT: buffer_store_byte v0, v[2:3], s[4:7], 0 addr64
|
|
; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX7-GISEL-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX8-LABEL: freeze_i1:
|
|
; GFX8: ; %bb.0:
|
|
; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX8-NEXT: flat_load_ubyte v0, v[0:1]
|
|
; GFX8-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX8-NEXT: v_and_b32_e32 v0, 1, v0
|
|
; GFX8-NEXT: flat_store_byte v[2:3], v0
|
|
; GFX8-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX8-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX9-LABEL: freeze_i1:
|
|
; GFX9: ; %bb.0:
|
|
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX9-NEXT: global_load_ubyte v0, v[0:1], off
|
|
; GFX9-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX9-NEXT: v_and_b32_e32 v0, 1, v0
|
|
; GFX9-NEXT: global_store_byte v[2:3], v0, off
|
|
; GFX9-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX9-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX10-LABEL: freeze_i1:
|
|
; GFX10: ; %bb.0:
|
|
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX10-NEXT: global_load_ubyte v0, v[0:1], off
|
|
; GFX10-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX10-NEXT: v_and_b32_e32 v0, 1, v0
|
|
; GFX10-NEXT: global_store_byte v[2:3], v0, off
|
|
; GFX10-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX11-LABEL: freeze_i1:
|
|
; GFX11: ; %bb.0:
|
|
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX11-NEXT: global_load_u8 v0, v[0:1], off
|
|
; GFX11-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX11-NEXT: v_and_b32_e32 v0, 1, v0
|
|
; GFX11-NEXT: global_store_b8 v[2:3], v0, off
|
|
; GFX11-NEXT: s_setpc_b64 s[30:31]
|
|
%a = load i1, ptr addrspace(1) %ptra
|
|
%freeze = freeze i1 %a
|
|
store i1 %freeze, ptr addrspace(1) %ptrb
|
|
ret void
|
|
}
|
|
|
|
define void @freeze_v2i1(ptr addrspace(1) %ptra, ptr addrspace(1) %ptrb) {
|
|
; GFX6-SDAG-LABEL: freeze_v2i1:
|
|
; GFX6-SDAG: ; %bb.0:
|
|
; GFX6-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX6-SDAG-NEXT: s_mov_b32 s6, 0
|
|
; GFX6-SDAG-NEXT: s_mov_b32 s7, 0xf000
|
|
; GFX6-SDAG-NEXT: s_mov_b32 s4, s6
|
|
; GFX6-SDAG-NEXT: s_mov_b32 s5, s6
|
|
; GFX6-SDAG-NEXT: buffer_load_ubyte v0, v[0:1], s[4:7], 0 addr64
|
|
; GFX6-SDAG-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX6-SDAG-NEXT: v_and_b32_e32 v0, 3, v0
|
|
; GFX6-SDAG-NEXT: buffer_store_byte v0, v[2:3], s[4:7], 0 addr64
|
|
; GFX6-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0)
|
|
; GFX6-SDAG-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX6-GISEL-LABEL: freeze_v2i1:
|
|
; GFX6-GISEL: ; %bb.0:
|
|
; GFX6-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX6-GISEL-NEXT: s_mov_b32 s6, 0
|
|
; GFX6-GISEL-NEXT: s_mov_b32 s7, 0xf000
|
|
; GFX6-GISEL-NEXT: s_mov_b64 s[4:5], 0
|
|
; GFX6-GISEL-NEXT: buffer_load_ubyte v0, v[0:1], s[4:7], 0 addr64
|
|
; GFX6-GISEL-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX6-GISEL-NEXT: v_lshrrev_b32_e32 v1, 1, v0
|
|
; GFX6-GISEL-NEXT: v_and_b32_e32 v1, 1, v1
|
|
; GFX6-GISEL-NEXT: v_and_b32_e32 v0, 1, v0
|
|
; GFX6-GISEL-NEXT: v_lshlrev_b32_e32 v1, 1, v1
|
|
; GFX6-GISEL-NEXT: v_or_b32_e32 v0, v0, v1
|
|
; GFX6-GISEL-NEXT: v_and_b32_e32 v0, 3, v0
|
|
; GFX6-GISEL-NEXT: buffer_store_byte v0, v[2:3], s[4:7], 0 addr64
|
|
; GFX6-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0)
|
|
; GFX6-GISEL-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX7-SDAG-LABEL: freeze_v2i1:
|
|
; GFX7-SDAG: ; %bb.0:
|
|
; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX7-SDAG-NEXT: s_mov_b32 s6, 0
|
|
; GFX7-SDAG-NEXT: s_mov_b32 s7, 0xf000
|
|
; GFX7-SDAG-NEXT: s_mov_b32 s4, s6
|
|
; GFX7-SDAG-NEXT: s_mov_b32 s5, s6
|
|
; GFX7-SDAG-NEXT: buffer_load_ubyte v0, v[0:1], s[4:7], 0 addr64
|
|
; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX7-SDAG-NEXT: v_and_b32_e32 v0, 3, v0
|
|
; GFX7-SDAG-NEXT: buffer_store_byte v0, v[2:3], s[4:7], 0 addr64
|
|
; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX7-SDAG-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX7-GISEL-LABEL: freeze_v2i1:
|
|
; GFX7-GISEL: ; %bb.0:
|
|
; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX7-GISEL-NEXT: s_mov_b32 s6, 0
|
|
; GFX7-GISEL-NEXT: s_mov_b32 s7, 0xf000
|
|
; GFX7-GISEL-NEXT: s_mov_b64 s[4:5], 0
|
|
; GFX7-GISEL-NEXT: buffer_load_ubyte v0, v[0:1], s[4:7], 0 addr64
|
|
; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX7-GISEL-NEXT: v_lshrrev_b32_e32 v1, 1, v0
|
|
; GFX7-GISEL-NEXT: v_and_b32_e32 v1, 1, v1
|
|
; GFX7-GISEL-NEXT: v_and_b32_e32 v0, 1, v0
|
|
; GFX7-GISEL-NEXT: v_lshlrev_b32_e32 v1, 1, v1
|
|
; GFX7-GISEL-NEXT: v_or_b32_e32 v0, v0, v1
|
|
; GFX7-GISEL-NEXT: v_and_b32_e32 v0, 3, v0
|
|
; GFX7-GISEL-NEXT: buffer_store_byte v0, v[2:3], s[4:7], 0 addr64
|
|
; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX7-GISEL-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX8-GISEL-LABEL: freeze_v2i1:
|
|
; GFX8-GISEL: ; %bb.0:
|
|
; GFX8-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX8-GISEL-NEXT: flat_load_ubyte v0, v[0:1]
|
|
; GFX8-GISEL-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX8-GISEL-NEXT: v_lshrrev_b32_e32 v1, 1, v0
|
|
; GFX8-GISEL-NEXT: v_and_b32_e32 v1, 1, v1
|
|
; GFX8-GISEL-NEXT: v_and_b32_e32 v0, 1, v0
|
|
; GFX8-GISEL-NEXT: v_lshlrev_b16_e32 v1, 1, v1
|
|
; GFX8-GISEL-NEXT: v_or_b32_e32 v0, v0, v1
|
|
; GFX8-GISEL-NEXT: v_and_b32_e32 v0, 3, v0
|
|
; GFX8-GISEL-NEXT: flat_store_byte v[2:3], v0
|
|
; GFX8-GISEL-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX8-GISEL-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX9-GISEL-LABEL: freeze_v2i1:
|
|
; GFX9-GISEL: ; %bb.0:
|
|
; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX9-GISEL-NEXT: global_load_ubyte v0, v[0:1], off
|
|
; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX9-GISEL-NEXT: v_lshrrev_b32_e32 v1, 1, v0
|
|
; GFX9-GISEL-NEXT: v_and_b32_e32 v1, 1, v1
|
|
; GFX9-GISEL-NEXT: v_and_b32_e32 v0, 1, v0
|
|
; GFX9-GISEL-NEXT: v_lshlrev_b16_e32 v1, 1, v1
|
|
; GFX9-GISEL-NEXT: v_or_b32_e32 v0, v0, v1
|
|
; GFX9-GISEL-NEXT: v_and_b32_e32 v0, 3, v0
|
|
; GFX9-GISEL-NEXT: global_store_byte v[2:3], v0, off
|
|
; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX10-SDAG-LABEL: freeze_v2i1:
|
|
; GFX10-SDAG: ; %bb.0:
|
|
; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX10-SDAG-NEXT: global_load_ubyte v0, v[0:1], off
|
|
; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX10-SDAG-NEXT: v_and_b32_e32 v0, 3, v0
|
|
; GFX10-SDAG-NEXT: global_store_byte v[2:3], v0, off
|
|
; GFX10-SDAG-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX10-GISEL-LABEL: freeze_v2i1:
|
|
; GFX10-GISEL: ; %bb.0:
|
|
; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX10-GISEL-NEXT: global_load_ubyte v0, v[0:1], off
|
|
; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX10-GISEL-NEXT: v_lshrrev_b32_e32 v1, 1, v0
|
|
; GFX10-GISEL-NEXT: v_and_b32_e32 v0, 1, v0
|
|
; GFX10-GISEL-NEXT: v_and_b32_e32 v1, 1, v1
|
|
; GFX10-GISEL-NEXT: v_lshlrev_b16 v1, 1, v1
|
|
; GFX10-GISEL-NEXT: v_or_b32_e32 v0, v0, v1
|
|
; GFX10-GISEL-NEXT: v_and_b32_e32 v0, 3, v0
|
|
; GFX10-GISEL-NEXT: global_store_byte v[2:3], v0, off
|
|
; GFX10-GISEL-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX11-SDAG-TRUE16-LABEL: freeze_v2i1:
|
|
; GFX11-SDAG-TRUE16: ; %bb.0:
|
|
; GFX11-SDAG-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX11-SDAG-TRUE16-NEXT: global_load_d16_u8 v0, v[0:1], off
|
|
; GFX11-SDAG-TRUE16-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX11-SDAG-TRUE16-NEXT: v_and_b16 v0.l, v0.l, 3
|
|
; GFX11-SDAG-TRUE16-NEXT: global_store_b8 v[2:3], v0, off
|
|
; GFX11-SDAG-TRUE16-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX11-SDAG-FAKE16-LABEL: freeze_v2i1:
|
|
; GFX11-SDAG-FAKE16: ; %bb.0:
|
|
; GFX11-SDAG-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX11-SDAG-FAKE16-NEXT: global_load_u8 v0, v[0:1], off
|
|
; GFX11-SDAG-FAKE16-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX11-SDAG-FAKE16-NEXT: v_and_b32_e32 v0, 3, v0
|
|
; GFX11-SDAG-FAKE16-NEXT: global_store_b8 v[2:3], v0, off
|
|
; GFX11-SDAG-FAKE16-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX11-GISEL-LABEL: freeze_v2i1:
|
|
; GFX11-GISEL: ; %bb.0:
|
|
; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX11-GISEL-NEXT: global_load_u8 v0, v[0:1], off
|
|
; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX11-GISEL-NEXT: v_lshrrev_b32_e32 v1, 1, v0
|
|
; GFX11-GISEL-NEXT: v_and_b32_e32 v0, 1, v0
|
|
; GFX11-GISEL-NEXT: v_and_b32_e32 v1, 1, v1
|
|
; GFX11-GISEL-NEXT: v_lshlrev_b16 v1, 1, v1
|
|
; GFX11-GISEL-NEXT: v_or_b32_e32 v0, v0, v1
|
|
; GFX11-GISEL-NEXT: v_and_b32_e32 v0, 3, v0
|
|
; GFX11-GISEL-NEXT: global_store_b8 v[2:3], v0, off
|
|
; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31]
|
|
%a = load <2 x i1>, ptr addrspace(1) %ptra
|
|
%freeze = freeze <2 x i1> %a
|
|
store <2 x i1> %freeze, ptr addrspace(1) %ptrb
|
|
ret void
|
|
}
|
|
|
|
define void @freeze_v3i1(ptr addrspace(1) %ptra, ptr addrspace(1) %ptrb) {
|
|
; GFX6-SDAG-LABEL: freeze_v3i1:
|
|
; GFX6-SDAG: ; %bb.0:
|
|
; GFX6-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX6-SDAG-NEXT: s_mov_b32 s6, 0
|
|
; GFX6-SDAG-NEXT: s_mov_b32 s7, 0xf000
|
|
; GFX6-SDAG-NEXT: s_mov_b32 s4, s6
|
|
; GFX6-SDAG-NEXT: s_mov_b32 s5, s6
|
|
; GFX6-SDAG-NEXT: buffer_load_ubyte v0, v[0:1], s[4:7], 0 addr64
|
|
; GFX6-SDAG-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX6-SDAG-NEXT: v_and_b32_e32 v0, 7, v0
|
|
; GFX6-SDAG-NEXT: buffer_store_byte v0, v[2:3], s[4:7], 0 addr64
|
|
; GFX6-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0)
|
|
; GFX6-SDAG-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX6-GISEL-LABEL: freeze_v3i1:
|
|
; GFX6-GISEL: ; %bb.0:
|
|
; GFX6-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX6-GISEL-NEXT: s_mov_b32 s6, 0
|
|
; GFX6-GISEL-NEXT: s_mov_b32 s7, 0xf000
|
|
; GFX6-GISEL-NEXT: s_mov_b64 s[4:5], 0
|
|
; GFX6-GISEL-NEXT: buffer_load_ubyte v0, v[0:1], s[4:7], 0 addr64
|
|
; GFX6-GISEL-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX6-GISEL-NEXT: v_lshrrev_b32_e32 v1, 1, v0
|
|
; GFX6-GISEL-NEXT: v_lshrrev_b32_e32 v4, 2, v0
|
|
; GFX6-GISEL-NEXT: v_and_b32_e32 v1, 1, v1
|
|
; GFX6-GISEL-NEXT: v_and_b32_e32 v0, 1, v0
|
|
; GFX6-GISEL-NEXT: v_and_b32_e32 v4, 1, v4
|
|
; GFX6-GISEL-NEXT: v_lshlrev_b32_e32 v1, 1, v1
|
|
; GFX6-GISEL-NEXT: v_or_b32_e32 v0, v0, v1
|
|
; GFX6-GISEL-NEXT: v_lshlrev_b32_e32 v1, 2, v4
|
|
; GFX6-GISEL-NEXT: v_or_b32_e32 v0, v0, v1
|
|
; GFX6-GISEL-NEXT: v_and_b32_e32 v0, 7, v0
|
|
; GFX6-GISEL-NEXT: buffer_store_byte v0, v[2:3], s[4:7], 0 addr64
|
|
; GFX6-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0)
|
|
; GFX6-GISEL-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX7-SDAG-LABEL: freeze_v3i1:
|
|
; GFX7-SDAG: ; %bb.0:
|
|
; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX7-SDAG-NEXT: s_mov_b32 s6, 0
|
|
; GFX7-SDAG-NEXT: s_mov_b32 s7, 0xf000
|
|
; GFX7-SDAG-NEXT: s_mov_b32 s4, s6
|
|
; GFX7-SDAG-NEXT: s_mov_b32 s5, s6
|
|
; GFX7-SDAG-NEXT: buffer_load_ubyte v0, v[0:1], s[4:7], 0 addr64
|
|
; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX7-SDAG-NEXT: v_and_b32_e32 v0, 7, v0
|
|
; GFX7-SDAG-NEXT: buffer_store_byte v0, v[2:3], s[4:7], 0 addr64
|
|
; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX7-SDAG-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX7-GISEL-LABEL: freeze_v3i1:
|
|
; GFX7-GISEL: ; %bb.0:
|
|
; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX7-GISEL-NEXT: s_mov_b32 s6, 0
|
|
; GFX7-GISEL-NEXT: s_mov_b32 s7, 0xf000
|
|
; GFX7-GISEL-NEXT: s_mov_b64 s[4:5], 0
|
|
; GFX7-GISEL-NEXT: buffer_load_ubyte v0, v[0:1], s[4:7], 0 addr64
|
|
; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX7-GISEL-NEXT: v_lshrrev_b32_e32 v1, 1, v0
|
|
; GFX7-GISEL-NEXT: v_lshrrev_b32_e32 v4, 2, v0
|
|
; GFX7-GISEL-NEXT: v_and_b32_e32 v1, 1, v1
|
|
; GFX7-GISEL-NEXT: v_and_b32_e32 v0, 1, v0
|
|
; GFX7-GISEL-NEXT: v_and_b32_e32 v4, 1, v4
|
|
; GFX7-GISEL-NEXT: v_lshlrev_b32_e32 v1, 1, v1
|
|
; GFX7-GISEL-NEXT: v_or_b32_e32 v0, v0, v1
|
|
; GFX7-GISEL-NEXT: v_lshlrev_b32_e32 v1, 2, v4
|
|
; GFX7-GISEL-NEXT: v_or_b32_e32 v0, v0, v1
|
|
; GFX7-GISEL-NEXT: v_and_b32_e32 v0, 7, v0
|
|
; GFX7-GISEL-NEXT: buffer_store_byte v0, v[2:3], s[4:7], 0 addr64
|
|
; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX7-GISEL-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX8-GISEL-LABEL: freeze_v3i1:
|
|
; GFX8-GISEL: ; %bb.0:
|
|
; GFX8-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX8-GISEL-NEXT: flat_load_ubyte v0, v[0:1]
|
|
; GFX8-GISEL-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX8-GISEL-NEXT: v_lshrrev_b32_e32 v1, 1, v0
|
|
; GFX8-GISEL-NEXT: v_lshrrev_b32_e32 v4, 2, v0
|
|
; GFX8-GISEL-NEXT: v_and_b32_e32 v1, 1, v1
|
|
; GFX8-GISEL-NEXT: v_and_b32_e32 v0, 1, v0
|
|
; GFX8-GISEL-NEXT: v_and_b32_e32 v4, 1, v4
|
|
; GFX8-GISEL-NEXT: v_lshlrev_b16_e32 v1, 1, v1
|
|
; GFX8-GISEL-NEXT: v_or_b32_e32 v0, v0, v1
|
|
; GFX8-GISEL-NEXT: v_lshlrev_b16_e32 v1, 2, v4
|
|
; GFX8-GISEL-NEXT: v_or_b32_e32 v0, v0, v1
|
|
; GFX8-GISEL-NEXT: v_and_b32_e32 v0, 7, v0
|
|
; GFX8-GISEL-NEXT: flat_store_byte v[2:3], v0
|
|
; GFX8-GISEL-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX8-GISEL-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX9-GISEL-LABEL: freeze_v3i1:
|
|
; GFX9-GISEL: ; %bb.0:
|
|
; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX9-GISEL-NEXT: global_load_ubyte v0, v[0:1], off
|
|
; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX9-GISEL-NEXT: v_lshrrev_b32_e32 v1, 1, v0
|
|
; GFX9-GISEL-NEXT: v_lshrrev_b32_e32 v4, 2, v0
|
|
; GFX9-GISEL-NEXT: v_and_b32_e32 v1, 1, v1
|
|
; GFX9-GISEL-NEXT: v_and_b32_e32 v0, 1, v0
|
|
; GFX9-GISEL-NEXT: v_and_b32_e32 v4, 1, v4
|
|
; GFX9-GISEL-NEXT: v_lshlrev_b16_e32 v1, 1, v1
|
|
; GFX9-GISEL-NEXT: v_or_b32_e32 v0, v0, v1
|
|
; GFX9-GISEL-NEXT: v_lshlrev_b16_e32 v1, 2, v4
|
|
; GFX9-GISEL-NEXT: v_or_b32_e32 v0, v0, v1
|
|
; GFX9-GISEL-NEXT: v_and_b32_e32 v0, 7, v0
|
|
; GFX9-GISEL-NEXT: global_store_byte v[2:3], v0, off
|
|
; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX10-SDAG-LABEL: freeze_v3i1:
|
|
; GFX10-SDAG: ; %bb.0:
|
|
; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX10-SDAG-NEXT: global_load_ubyte v0, v[0:1], off
|
|
; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX10-SDAG-NEXT: v_and_b32_e32 v0, 7, v0
|
|
; GFX10-SDAG-NEXT: global_store_byte v[2:3], v0, off
|
|
; GFX10-SDAG-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX10-GISEL-LABEL: freeze_v3i1:
|
|
; GFX10-GISEL: ; %bb.0:
|
|
; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX10-GISEL-NEXT: global_load_ubyte v0, v[0:1], off
|
|
; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX10-GISEL-NEXT: v_lshrrev_b32_e32 v1, 1, v0
|
|
; GFX10-GISEL-NEXT: v_lshrrev_b32_e32 v4, 2, v0
|
|
; GFX10-GISEL-NEXT: v_and_b32_e32 v0, 1, v0
|
|
; GFX10-GISEL-NEXT: v_and_b32_e32 v1, 1, v1
|
|
; GFX10-GISEL-NEXT: v_and_b32_e32 v4, 1, v4
|
|
; GFX10-GISEL-NEXT: v_lshlrev_b16 v1, 1, v1
|
|
; GFX10-GISEL-NEXT: v_or_b32_e32 v0, v0, v1
|
|
; GFX10-GISEL-NEXT: v_lshlrev_b16 v1, 2, v4
|
|
; GFX10-GISEL-NEXT: v_or_b32_e32 v0, v0, v1
|
|
; GFX10-GISEL-NEXT: v_and_b32_e32 v0, 7, v0
|
|
; GFX10-GISEL-NEXT: global_store_byte v[2:3], v0, off
|
|
; GFX10-GISEL-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX11-SDAG-TRUE16-LABEL: freeze_v3i1:
|
|
; GFX11-SDAG-TRUE16: ; %bb.0:
|
|
; GFX11-SDAG-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX11-SDAG-TRUE16-NEXT: global_load_d16_u8 v0, v[0:1], off
|
|
; GFX11-SDAG-TRUE16-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX11-SDAG-TRUE16-NEXT: v_and_b16 v0.l, v0.l, 7
|
|
; GFX11-SDAG-TRUE16-NEXT: global_store_b8 v[2:3], v0, off
|
|
; GFX11-SDAG-TRUE16-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX11-SDAG-FAKE16-LABEL: freeze_v3i1:
|
|
; GFX11-SDAG-FAKE16: ; %bb.0:
|
|
; GFX11-SDAG-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX11-SDAG-FAKE16-NEXT: global_load_u8 v0, v[0:1], off
|
|
; GFX11-SDAG-FAKE16-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX11-SDAG-FAKE16-NEXT: v_and_b32_e32 v0, 7, v0
|
|
; GFX11-SDAG-FAKE16-NEXT: global_store_b8 v[2:3], v0, off
|
|
; GFX11-SDAG-FAKE16-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX11-GISEL-LABEL: freeze_v3i1:
|
|
; GFX11-GISEL: ; %bb.0:
|
|
; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX11-GISEL-NEXT: global_load_u8 v0, v[0:1], off
|
|
; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX11-GISEL-NEXT: v_lshrrev_b32_e32 v1, 1, v0
|
|
; GFX11-GISEL-NEXT: v_lshrrev_b32_e32 v4, 2, v0
|
|
; GFX11-GISEL-NEXT: v_and_b32_e32 v0, 1, v0
|
|
; GFX11-GISEL-NEXT: v_and_b32_e32 v1, 1, v1
|
|
; GFX11-GISEL-NEXT: v_and_b32_e32 v4, 1, v4
|
|
; GFX11-GISEL-NEXT: v_lshlrev_b16 v1, 1, v1
|
|
; GFX11-GISEL-NEXT: v_or_b32_e32 v0, v0, v1
|
|
; GFX11-GISEL-NEXT: v_lshlrev_b16 v1, 2, v4
|
|
; GFX11-GISEL-NEXT: v_or_b32_e32 v0, v0, v1
|
|
; GFX11-GISEL-NEXT: v_and_b32_e32 v0, 7, v0
|
|
; GFX11-GISEL-NEXT: global_store_b8 v[2:3], v0, off
|
|
; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31]
|
|
%a = load <3 x i1>, ptr addrspace(1) %ptra
|
|
%freeze = freeze <3 x i1> %a
|
|
store <3 x i1> %freeze, ptr addrspace(1) %ptrb
|
|
ret void
|
|
}
|
|
|
|
define void @freeze_i1_vcc(ptr addrspace(1) %ptra, ptr addrspace(1) %ptrb) {
|
|
; GFX6-SDAG-LABEL: freeze_i1_vcc:
|
|
; GFX6-SDAG: ; %bb.0:
|
|
; GFX6-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX6-SDAG-NEXT: s_mov_b32 s6, 0
|
|
; GFX6-SDAG-NEXT: s_mov_b32 s7, 0xf000
|
|
; GFX6-SDAG-NEXT: s_mov_b32 s4, s6
|
|
; GFX6-SDAG-NEXT: s_mov_b32 s5, s6
|
|
; GFX6-SDAG-NEXT: buffer_load_dword v0, v[0:1], s[4:7], 0 addr64
|
|
; GFX6-SDAG-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX6-SDAG-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0
|
|
; GFX6-SDAG-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc
|
|
; GFX6-SDAG-NEXT: buffer_store_byte v0, v[2:3], s[4:7], 0 addr64
|
|
; GFX6-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0)
|
|
; GFX6-SDAG-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX6-GISEL-LABEL: freeze_i1_vcc:
|
|
; GFX6-GISEL: ; %bb.0:
|
|
; GFX6-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX6-GISEL-NEXT: s_mov_b32 s6, 0
|
|
; GFX6-GISEL-NEXT: s_mov_b32 s7, 0xf000
|
|
; GFX6-GISEL-NEXT: s_mov_b64 s[4:5], 0
|
|
; GFX6-GISEL-NEXT: buffer_load_dword v0, v[0:1], s[4:7], 0 addr64
|
|
; GFX6-GISEL-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX6-GISEL-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0
|
|
; GFX6-GISEL-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc
|
|
; GFX6-GISEL-NEXT: buffer_store_byte v0, v[2:3], s[4:7], 0 addr64
|
|
; GFX6-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0)
|
|
; GFX6-GISEL-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX7-SDAG-LABEL: freeze_i1_vcc:
|
|
; GFX7-SDAG: ; %bb.0:
|
|
; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX7-SDAG-NEXT: s_mov_b32 s6, 0
|
|
; GFX7-SDAG-NEXT: s_mov_b32 s7, 0xf000
|
|
; GFX7-SDAG-NEXT: s_mov_b32 s4, s6
|
|
; GFX7-SDAG-NEXT: s_mov_b32 s5, s6
|
|
; GFX7-SDAG-NEXT: buffer_load_dword v0, v[0:1], s[4:7], 0 addr64
|
|
; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX7-SDAG-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0
|
|
; GFX7-SDAG-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc
|
|
; GFX7-SDAG-NEXT: buffer_store_byte v0, v[2:3], s[4:7], 0 addr64
|
|
; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX7-SDAG-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX7-GISEL-LABEL: freeze_i1_vcc:
|
|
; GFX7-GISEL: ; %bb.0:
|
|
; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX7-GISEL-NEXT: s_mov_b32 s6, 0
|
|
; GFX7-GISEL-NEXT: s_mov_b32 s7, 0xf000
|
|
; GFX7-GISEL-NEXT: s_mov_b64 s[4:5], 0
|
|
; GFX7-GISEL-NEXT: buffer_load_dword v0, v[0:1], s[4:7], 0 addr64
|
|
; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX7-GISEL-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0
|
|
; GFX7-GISEL-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc
|
|
; GFX7-GISEL-NEXT: buffer_store_byte v0, v[2:3], s[4:7], 0 addr64
|
|
; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX7-GISEL-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX8-LABEL: freeze_i1_vcc:
|
|
; GFX8: ; %bb.0:
|
|
; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX8-NEXT: flat_load_dword v0, v[0:1]
|
|
; GFX8-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX8-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0
|
|
; GFX8-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc
|
|
; GFX8-NEXT: flat_store_byte v[2:3], v0
|
|
; GFX8-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX8-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX9-LABEL: freeze_i1_vcc:
|
|
; GFX9: ; %bb.0:
|
|
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX9-NEXT: global_load_dword v0, v[0:1], off
|
|
; GFX9-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX9-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0
|
|
; GFX9-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc
|
|
; GFX9-NEXT: global_store_byte v[2:3], v0, off
|
|
; GFX9-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX9-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX10-LABEL: freeze_i1_vcc:
|
|
; GFX10: ; %bb.0:
|
|
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX10-NEXT: global_load_dword v0, v[0:1], off
|
|
; GFX10-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
|
|
; GFX10-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc_lo
|
|
; GFX10-NEXT: global_store_byte v[2:3], v0, off
|
|
; GFX10-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX11-LABEL: freeze_i1_vcc:
|
|
; GFX11: ; %bb.0:
|
|
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX11-NEXT: global_load_b32 v0, v[0:1], off
|
|
; GFX11-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
|
|
; GFX11-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc_lo
|
|
; GFX11-NEXT: global_store_b8 v[2:3], v0, off
|
|
; GFX11-NEXT: s_setpc_b64 s[30:31]
|
|
%a = load i32, ptr addrspace(1) %ptra
|
|
%cmp = icmp eq i32 %a, 0
|
|
%freeze = freeze i1 %cmp
|
|
store i1 %freeze, ptr addrspace(1) %ptrb
|
|
ret void
|
|
}
|
|
|
|
define void @freeze_v2i1_vcc(ptr addrspace(1) %ptra, ptr addrspace(1) %ptrb) {
|
|
; GFX6-SDAG-LABEL: freeze_v2i1_vcc:
|
|
; GFX6-SDAG: ; %bb.0:
|
|
; GFX6-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX6-SDAG-NEXT: s_mov_b32 s6, 0
|
|
; GFX6-SDAG-NEXT: s_mov_b32 s7, 0xf000
|
|
; GFX6-SDAG-NEXT: s_mov_b32 s4, s6
|
|
; GFX6-SDAG-NEXT: s_mov_b32 s5, s6
|
|
; GFX6-SDAG-NEXT: buffer_load_dwordx2 v[0:1], v[0:1], s[4:7], 0 addr64
|
|
; GFX6-SDAG-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX6-SDAG-NEXT: v_cmp_eq_u32_e32 vcc, 0, v1
|
|
; GFX6-SDAG-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc
|
|
; GFX6-SDAG-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0
|
|
; GFX6-SDAG-NEXT: v_lshlrev_b32_e32 v1, 1, v1
|
|
; GFX6-SDAG-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc
|
|
; GFX6-SDAG-NEXT: v_or_b32_e32 v0, v0, v1
|
|
; GFX6-SDAG-NEXT: v_and_b32_e32 v0, 3, v0
|
|
; GFX6-SDAG-NEXT: buffer_store_byte v0, v[2:3], s[4:7], 0 addr64
|
|
; GFX6-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0)
|
|
; GFX6-SDAG-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX6-GISEL-LABEL: freeze_v2i1_vcc:
|
|
; GFX6-GISEL: ; %bb.0:
|
|
; GFX6-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX6-GISEL-NEXT: s_mov_b32 s6, 0
|
|
; GFX6-GISEL-NEXT: s_mov_b32 s7, 0xf000
|
|
; GFX6-GISEL-NEXT: s_mov_b64 s[4:5], 0
|
|
; GFX6-GISEL-NEXT: buffer_load_dwordx2 v[0:1], v[0:1], s[4:7], 0 addr64
|
|
; GFX6-GISEL-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX6-GISEL-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0
|
|
; GFX6-GISEL-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc
|
|
; GFX6-GISEL-NEXT: v_cmp_eq_u32_e32 vcc, 0, v1
|
|
; GFX6-GISEL-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc
|
|
; GFX6-GISEL-NEXT: v_and_b32_e32 v1, 1, v1
|
|
; GFX6-GISEL-NEXT: v_and_b32_e32 v0, 1, v0
|
|
; GFX6-GISEL-NEXT: v_lshlrev_b32_e32 v1, 1, v1
|
|
; GFX6-GISEL-NEXT: v_or_b32_e32 v0, v0, v1
|
|
; GFX6-GISEL-NEXT: v_and_b32_e32 v0, 3, v0
|
|
; GFX6-GISEL-NEXT: buffer_store_byte v0, v[2:3], s[4:7], 0 addr64
|
|
; GFX6-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0)
|
|
; GFX6-GISEL-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX7-SDAG-LABEL: freeze_v2i1_vcc:
|
|
; GFX7-SDAG: ; %bb.0:
|
|
; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX7-SDAG-NEXT: s_mov_b32 s6, 0
|
|
; GFX7-SDAG-NEXT: s_mov_b32 s7, 0xf000
|
|
; GFX7-SDAG-NEXT: s_mov_b32 s4, s6
|
|
; GFX7-SDAG-NEXT: s_mov_b32 s5, s6
|
|
; GFX7-SDAG-NEXT: buffer_load_dwordx2 v[0:1], v[0:1], s[4:7], 0 addr64
|
|
; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX7-SDAG-NEXT: v_cmp_eq_u32_e32 vcc, 0, v1
|
|
; GFX7-SDAG-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc
|
|
; GFX7-SDAG-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0
|
|
; GFX7-SDAG-NEXT: v_lshlrev_b32_e32 v1, 1, v1
|
|
; GFX7-SDAG-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc
|
|
; GFX7-SDAG-NEXT: v_or_b32_e32 v0, v0, v1
|
|
; GFX7-SDAG-NEXT: v_and_b32_e32 v0, 3, v0
|
|
; GFX7-SDAG-NEXT: buffer_store_byte v0, v[2:3], s[4:7], 0 addr64
|
|
; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX7-SDAG-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX7-GISEL-LABEL: freeze_v2i1_vcc:
|
|
; GFX7-GISEL: ; %bb.0:
|
|
; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX7-GISEL-NEXT: s_mov_b32 s6, 0
|
|
; GFX7-GISEL-NEXT: s_mov_b32 s7, 0xf000
|
|
; GFX7-GISEL-NEXT: s_mov_b64 s[4:5], 0
|
|
; GFX7-GISEL-NEXT: buffer_load_dwordx2 v[0:1], v[0:1], s[4:7], 0 addr64
|
|
; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX7-GISEL-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0
|
|
; GFX7-GISEL-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc
|
|
; GFX7-GISEL-NEXT: v_cmp_eq_u32_e32 vcc, 0, v1
|
|
; GFX7-GISEL-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc
|
|
; GFX7-GISEL-NEXT: v_and_b32_e32 v1, 1, v1
|
|
; GFX7-GISEL-NEXT: v_and_b32_e32 v0, 1, v0
|
|
; GFX7-GISEL-NEXT: v_lshlrev_b32_e32 v1, 1, v1
|
|
; GFX7-GISEL-NEXT: v_or_b32_e32 v0, v0, v1
|
|
; GFX7-GISEL-NEXT: v_and_b32_e32 v0, 3, v0
|
|
; GFX7-GISEL-NEXT: buffer_store_byte v0, v[2:3], s[4:7], 0 addr64
|
|
; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX7-GISEL-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX8-GISEL-LABEL: freeze_v2i1_vcc:
|
|
; GFX8-GISEL: ; %bb.0:
|
|
; GFX8-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX8-GISEL-NEXT: flat_load_dwordx2 v[0:1], v[0:1]
|
|
; GFX8-GISEL-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX8-GISEL-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0
|
|
; GFX8-GISEL-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc
|
|
; GFX8-GISEL-NEXT: v_cmp_eq_u32_e32 vcc, 0, v1
|
|
; GFX8-GISEL-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc
|
|
; GFX8-GISEL-NEXT: v_and_b32_e32 v1, 1, v1
|
|
; GFX8-GISEL-NEXT: v_and_b32_e32 v0, 1, v0
|
|
; GFX8-GISEL-NEXT: v_lshlrev_b16_e32 v1, 1, v1
|
|
; GFX8-GISEL-NEXT: v_or_b32_e32 v0, v0, v1
|
|
; GFX8-GISEL-NEXT: v_and_b32_e32 v0, 3, v0
|
|
; GFX8-GISEL-NEXT: flat_store_byte v[2:3], v0
|
|
; GFX8-GISEL-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX8-GISEL-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX9-GISEL-LABEL: freeze_v2i1_vcc:
|
|
; GFX9-GISEL: ; %bb.0:
|
|
; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX9-GISEL-NEXT: global_load_dwordx2 v[0:1], v[0:1], off
|
|
; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX9-GISEL-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0
|
|
; GFX9-GISEL-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc
|
|
; GFX9-GISEL-NEXT: v_cmp_eq_u32_e32 vcc, 0, v1
|
|
; GFX9-GISEL-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc
|
|
; GFX9-GISEL-NEXT: v_and_b32_e32 v1, 1, v1
|
|
; GFX9-GISEL-NEXT: v_and_b32_e32 v0, 1, v0
|
|
; GFX9-GISEL-NEXT: v_lshlrev_b16_e32 v1, 1, v1
|
|
; GFX9-GISEL-NEXT: v_or_b32_e32 v0, v0, v1
|
|
; GFX9-GISEL-NEXT: v_and_b32_e32 v0, 3, v0
|
|
; GFX9-GISEL-NEXT: global_store_byte v[2:3], v0, off
|
|
; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX10-SDAG-LABEL: freeze_v2i1_vcc:
|
|
; GFX10-SDAG: ; %bb.0:
|
|
; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX10-SDAG-NEXT: global_load_dwordx2 v[0:1], v[0:1], off
|
|
; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX10-SDAG-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1
|
|
; GFX10-SDAG-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc_lo
|
|
; GFX10-SDAG-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
|
|
; GFX10-SDAG-NEXT: v_lshlrev_b16 v1, 1, v1
|
|
; GFX10-SDAG-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc_lo
|
|
; GFX10-SDAG-NEXT: v_or_b32_e32 v0, v0, v1
|
|
; GFX10-SDAG-NEXT: v_and_b32_e32 v0, 3, v0
|
|
; GFX10-SDAG-NEXT: global_store_byte v[2:3], v0, off
|
|
; GFX10-SDAG-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX10-GISEL-LABEL: freeze_v2i1_vcc:
|
|
; GFX10-GISEL: ; %bb.0:
|
|
; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX10-GISEL-NEXT: global_load_dwordx2 v[0:1], v[0:1], off
|
|
; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX10-GISEL-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1
|
|
; GFX10-GISEL-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc_lo
|
|
; GFX10-GISEL-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
|
|
; GFX10-GISEL-NEXT: v_and_b32_e32 v1, 1, v1
|
|
; GFX10-GISEL-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc_lo
|
|
; GFX10-GISEL-NEXT: v_lshlrev_b16 v1, 1, v1
|
|
; GFX10-GISEL-NEXT: v_and_b32_e32 v0, 1, v0
|
|
; GFX10-GISEL-NEXT: v_or_b32_e32 v0, v0, v1
|
|
; GFX10-GISEL-NEXT: v_and_b32_e32 v0, 3, v0
|
|
; GFX10-GISEL-NEXT: global_store_byte v[2:3], v0, off
|
|
; GFX10-GISEL-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX11-SDAG-TRUE16-LABEL: freeze_v2i1_vcc:
|
|
; GFX11-SDAG-TRUE16: ; %bb.0:
|
|
; GFX11-SDAG-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX11-SDAG-TRUE16-NEXT: global_load_b64 v[4:5], v[0:1], off
|
|
; GFX11-SDAG-TRUE16-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX11-SDAG-TRUE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v5
|
|
; GFX11-SDAG-TRUE16-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc_lo
|
|
; GFX11-SDAG-TRUE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v4
|
|
; GFX11-SDAG-TRUE16-NEXT: v_lshlrev_b16 v0.l, 1, v0.l
|
|
; GFX11-SDAG-TRUE16-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc_lo
|
|
; GFX11-SDAG-TRUE16-NEXT: v_or_b16 v0.l, v1.l, v0.l
|
|
; GFX11-SDAG-TRUE16-NEXT: v_and_b16 v0.l, v0.l, 3
|
|
; GFX11-SDAG-TRUE16-NEXT: global_store_b8 v[2:3], v0, off
|
|
; GFX11-SDAG-TRUE16-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX11-SDAG-FAKE16-LABEL: freeze_v2i1_vcc:
|
|
; GFX11-SDAG-FAKE16: ; %bb.0:
|
|
; GFX11-SDAG-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX11-SDAG-FAKE16-NEXT: global_load_b64 v[0:1], v[0:1], off
|
|
; GFX11-SDAG-FAKE16-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX11-SDAG-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1
|
|
; GFX11-SDAG-FAKE16-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc_lo
|
|
; GFX11-SDAG-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
|
|
; GFX11-SDAG-FAKE16-NEXT: v_lshlrev_b16 v1, 1, v1
|
|
; GFX11-SDAG-FAKE16-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc_lo
|
|
; GFX11-SDAG-FAKE16-NEXT: v_or_b32_e32 v0, v0, v1
|
|
; GFX11-SDAG-FAKE16-NEXT: v_and_b32_e32 v0, 3, v0
|
|
; GFX11-SDAG-FAKE16-NEXT: global_store_b8 v[2:3], v0, off
|
|
; GFX11-SDAG-FAKE16-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX11-GISEL-LABEL: freeze_v2i1_vcc:
|
|
; GFX11-GISEL: ; %bb.0:
|
|
; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX11-GISEL-NEXT: global_load_b64 v[0:1], v[0:1], off
|
|
; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX11-GISEL-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1
|
|
; GFX11-GISEL-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc_lo
|
|
; GFX11-GISEL-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
|
|
; GFX11-GISEL-NEXT: v_and_b32_e32 v1, 1, v1
|
|
; GFX11-GISEL-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc_lo
|
|
; GFX11-GISEL-NEXT: v_lshlrev_b16 v1, 1, v1
|
|
; GFX11-GISEL-NEXT: v_and_b32_e32 v0, 1, v0
|
|
; GFX11-GISEL-NEXT: v_or_b32_e32 v0, v0, v1
|
|
; GFX11-GISEL-NEXT: v_and_b32_e32 v0, 3, v0
|
|
; GFX11-GISEL-NEXT: global_store_b8 v[2:3], v0, off
|
|
; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31]
|
|
%a = load <2 x i32>, ptr addrspace(1) %ptra
|
|
%cmp = icmp eq <2 x i32> %a, zeroinitializer
|
|
%freeze = freeze <2 x i1> %cmp
|
|
store <2 x i1> %freeze, ptr addrspace(1) %ptrb
|
|
ret void
|
|
}
|
|
|
|
define void @freeze_v3i1_vcc(ptr addrspace(1) %ptra, ptr addrspace(1) %ptrb) {
|
|
; GFX6-SDAG-LABEL: freeze_v3i1_vcc:
|
|
; GFX6-SDAG: ; %bb.0:
|
|
; GFX6-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX6-SDAG-NEXT: s_mov_b32 s6, 0
|
|
; GFX6-SDAG-NEXT: s_mov_b32 s7, 0xf000
|
|
; GFX6-SDAG-NEXT: s_mov_b32 s4, s6
|
|
; GFX6-SDAG-NEXT: s_mov_b32 s5, s6
|
|
; GFX6-SDAG-NEXT: buffer_load_dwordx4 v[4:7], v[0:1], s[4:7], 0 addr64
|
|
; GFX6-SDAG-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX6-SDAG-NEXT: v_cmp_eq_u32_e32 vcc, 0, v5
|
|
; GFX6-SDAG-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc
|
|
; GFX6-SDAG-NEXT: v_cmp_eq_u32_e32 vcc, 0, v4
|
|
; GFX6-SDAG-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc
|
|
; GFX6-SDAG-NEXT: v_cmp_eq_u32_e32 vcc, 0, v6
|
|
; GFX6-SDAG-NEXT: v_cndmask_b32_e64 v4, 0, 1, vcc
|
|
; GFX6-SDAG-NEXT: v_lshlrev_b32_e32 v0, 1, v0
|
|
; GFX6-SDAG-NEXT: v_or_b32_e32 v0, v1, v0
|
|
; GFX6-SDAG-NEXT: v_lshlrev_b32_e32 v1, 2, v4
|
|
; GFX6-SDAG-NEXT: v_or_b32_e32 v0, v0, v1
|
|
; GFX6-SDAG-NEXT: v_and_b32_e32 v0, 7, v0
|
|
; GFX6-SDAG-NEXT: buffer_store_byte v0, v[2:3], s[4:7], 0 addr64
|
|
; GFX6-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0)
|
|
; GFX6-SDAG-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX6-GISEL-LABEL: freeze_v3i1_vcc:
|
|
; GFX6-GISEL: ; %bb.0:
|
|
; GFX6-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX6-GISEL-NEXT: s_mov_b32 s6, 0
|
|
; GFX6-GISEL-NEXT: s_mov_b32 s7, 0xf000
|
|
; GFX6-GISEL-NEXT: s_mov_b64 s[4:5], 0
|
|
; GFX6-GISEL-NEXT: buffer_load_dwordx4 v[4:7], v[0:1], s[4:7], 0 addr64
|
|
; GFX6-GISEL-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX6-GISEL-NEXT: v_cmp_eq_u32_e32 vcc, 0, v4
|
|
; GFX6-GISEL-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc
|
|
; GFX6-GISEL-NEXT: v_cmp_eq_u32_e32 vcc, 0, v5
|
|
; GFX6-GISEL-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc
|
|
; GFX6-GISEL-NEXT: v_cmp_eq_u32_e32 vcc, 0, v6
|
|
; GFX6-GISEL-NEXT: v_cndmask_b32_e64 v4, 0, 1, vcc
|
|
; GFX6-GISEL-NEXT: v_and_b32_e32 v1, 1, v1
|
|
; GFX6-GISEL-NEXT: v_and_b32_e32 v0, 1, v0
|
|
; GFX6-GISEL-NEXT: v_and_b32_e32 v4, 1, v4
|
|
; GFX6-GISEL-NEXT: v_lshlrev_b32_e32 v1, 1, v1
|
|
; GFX6-GISEL-NEXT: v_or_b32_e32 v0, v0, v1
|
|
; GFX6-GISEL-NEXT: v_lshlrev_b32_e32 v1, 2, v4
|
|
; GFX6-GISEL-NEXT: v_or_b32_e32 v0, v0, v1
|
|
; GFX6-GISEL-NEXT: v_and_b32_e32 v0, 7, v0
|
|
; GFX6-GISEL-NEXT: buffer_store_byte v0, v[2:3], s[4:7], 0 addr64
|
|
; GFX6-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0)
|
|
; GFX6-GISEL-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX7-SDAG-LABEL: freeze_v3i1_vcc:
|
|
; GFX7-SDAG: ; %bb.0:
|
|
; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX7-SDAG-NEXT: s_mov_b32 s6, 0
|
|
; GFX7-SDAG-NEXT: s_mov_b32 s7, 0xf000
|
|
; GFX7-SDAG-NEXT: s_mov_b32 s4, s6
|
|
; GFX7-SDAG-NEXT: s_mov_b32 s5, s6
|
|
; GFX7-SDAG-NEXT: buffer_load_dwordx3 v[4:6], v[0:1], s[4:7], 0 addr64
|
|
; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX7-SDAG-NEXT: v_cmp_eq_u32_e32 vcc, 0, v5
|
|
; GFX7-SDAG-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc
|
|
; GFX7-SDAG-NEXT: v_cmp_eq_u32_e32 vcc, 0, v4
|
|
; GFX7-SDAG-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc
|
|
; GFX7-SDAG-NEXT: v_cmp_eq_u32_e32 vcc, 0, v6
|
|
; GFX7-SDAG-NEXT: v_cndmask_b32_e64 v4, 0, 1, vcc
|
|
; GFX7-SDAG-NEXT: v_lshlrev_b32_e32 v0, 1, v0
|
|
; GFX7-SDAG-NEXT: v_or_b32_e32 v0, v1, v0
|
|
; GFX7-SDAG-NEXT: v_lshlrev_b32_e32 v1, 2, v4
|
|
; GFX7-SDAG-NEXT: v_or_b32_e32 v0, v0, v1
|
|
; GFX7-SDAG-NEXT: v_and_b32_e32 v0, 7, v0
|
|
; GFX7-SDAG-NEXT: buffer_store_byte v0, v[2:3], s[4:7], 0 addr64
|
|
; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX7-SDAG-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX7-GISEL-LABEL: freeze_v3i1_vcc:
|
|
; GFX7-GISEL: ; %bb.0:
|
|
; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX7-GISEL-NEXT: s_mov_b32 s6, 0
|
|
; GFX7-GISEL-NEXT: s_mov_b32 s7, 0xf000
|
|
; GFX7-GISEL-NEXT: s_mov_b64 s[4:5], 0
|
|
; GFX7-GISEL-NEXT: buffer_load_dwordx3 v[4:6], v[0:1], s[4:7], 0 addr64
|
|
; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX7-GISEL-NEXT: v_cmp_eq_u32_e32 vcc, 0, v4
|
|
; GFX7-GISEL-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc
|
|
; GFX7-GISEL-NEXT: v_cmp_eq_u32_e32 vcc, 0, v5
|
|
; GFX7-GISEL-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc
|
|
; GFX7-GISEL-NEXT: v_cmp_eq_u32_e32 vcc, 0, v6
|
|
; GFX7-GISEL-NEXT: v_cndmask_b32_e64 v4, 0, 1, vcc
|
|
; GFX7-GISEL-NEXT: v_and_b32_e32 v1, 1, v1
|
|
; GFX7-GISEL-NEXT: v_and_b32_e32 v0, 1, v0
|
|
; GFX7-GISEL-NEXT: v_and_b32_e32 v4, 1, v4
|
|
; GFX7-GISEL-NEXT: v_lshlrev_b32_e32 v1, 1, v1
|
|
; GFX7-GISEL-NEXT: v_or_b32_e32 v0, v0, v1
|
|
; GFX7-GISEL-NEXT: v_lshlrev_b32_e32 v1, 2, v4
|
|
; GFX7-GISEL-NEXT: v_or_b32_e32 v0, v0, v1
|
|
; GFX7-GISEL-NEXT: v_and_b32_e32 v0, 7, v0
|
|
; GFX7-GISEL-NEXT: buffer_store_byte v0, v[2:3], s[4:7], 0 addr64
|
|
; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX7-GISEL-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX8-GISEL-LABEL: freeze_v3i1_vcc:
|
|
; GFX8-GISEL: ; %bb.0:
|
|
; GFX8-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX8-GISEL-NEXT: flat_load_dwordx3 v[4:6], v[0:1]
|
|
; GFX8-GISEL-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX8-GISEL-NEXT: v_cmp_eq_u32_e32 vcc, 0, v4
|
|
; GFX8-GISEL-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc
|
|
; GFX8-GISEL-NEXT: v_cmp_eq_u32_e32 vcc, 0, v5
|
|
; GFX8-GISEL-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc
|
|
; GFX8-GISEL-NEXT: v_cmp_eq_u32_e32 vcc, 0, v6
|
|
; GFX8-GISEL-NEXT: v_cndmask_b32_e64 v4, 0, 1, vcc
|
|
; GFX8-GISEL-NEXT: v_and_b32_e32 v1, 1, v1
|
|
; GFX8-GISEL-NEXT: v_and_b32_e32 v0, 1, v0
|
|
; GFX8-GISEL-NEXT: v_and_b32_e32 v4, 1, v4
|
|
; GFX8-GISEL-NEXT: v_lshlrev_b16_e32 v1, 1, v1
|
|
; GFX8-GISEL-NEXT: v_or_b32_e32 v0, v0, v1
|
|
; GFX8-GISEL-NEXT: v_lshlrev_b16_e32 v1, 2, v4
|
|
; GFX8-GISEL-NEXT: v_or_b32_e32 v0, v0, v1
|
|
; GFX8-GISEL-NEXT: v_and_b32_e32 v0, 7, v0
|
|
; GFX8-GISEL-NEXT: flat_store_byte v[2:3], v0
|
|
; GFX8-GISEL-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX8-GISEL-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX9-GISEL-LABEL: freeze_v3i1_vcc:
|
|
; GFX9-GISEL: ; %bb.0:
|
|
; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX9-GISEL-NEXT: global_load_dwordx3 v[4:6], v[0:1], off
|
|
; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX9-GISEL-NEXT: v_cmp_eq_u32_e32 vcc, 0, v4
|
|
; GFX9-GISEL-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc
|
|
; GFX9-GISEL-NEXT: v_cmp_eq_u32_e32 vcc, 0, v5
|
|
; GFX9-GISEL-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc
|
|
; GFX9-GISEL-NEXT: v_cmp_eq_u32_e32 vcc, 0, v6
|
|
; GFX9-GISEL-NEXT: v_cndmask_b32_e64 v4, 0, 1, vcc
|
|
; GFX9-GISEL-NEXT: v_and_b32_e32 v1, 1, v1
|
|
; GFX9-GISEL-NEXT: v_and_b32_e32 v0, 1, v0
|
|
; GFX9-GISEL-NEXT: v_and_b32_e32 v4, 1, v4
|
|
; GFX9-GISEL-NEXT: v_lshlrev_b16_e32 v1, 1, v1
|
|
; GFX9-GISEL-NEXT: v_or_b32_e32 v0, v0, v1
|
|
; GFX9-GISEL-NEXT: v_lshlrev_b16_e32 v1, 2, v4
|
|
; GFX9-GISEL-NEXT: v_or_b32_e32 v0, v0, v1
|
|
; GFX9-GISEL-NEXT: v_and_b32_e32 v0, 7, v0
|
|
; GFX9-GISEL-NEXT: global_store_byte v[2:3], v0, off
|
|
; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX10-SDAG-LABEL: freeze_v3i1_vcc:
|
|
; GFX10-SDAG: ; %bb.0:
|
|
; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX10-SDAG-NEXT: global_load_dwordx3 v[4:6], v[0:1], off
|
|
; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX10-SDAG-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v5
|
|
; GFX10-SDAG-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc_lo
|
|
; GFX10-SDAG-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v4
|
|
; GFX10-SDAG-NEXT: v_lshlrev_b16 v0, 1, v0
|
|
; GFX10-SDAG-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc_lo
|
|
; GFX10-SDAG-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v6
|
|
; GFX10-SDAG-NEXT: v_or_b32_e32 v0, v1, v0
|
|
; GFX10-SDAG-NEXT: v_cndmask_b32_e64 v4, 0, 1, vcc_lo
|
|
; GFX10-SDAG-NEXT: v_lshlrev_b16 v1, 2, v4
|
|
; GFX10-SDAG-NEXT: v_or_b32_e32 v0, v0, v1
|
|
; GFX10-SDAG-NEXT: v_and_b32_e32 v0, 7, v0
|
|
; GFX10-SDAG-NEXT: global_store_byte v[2:3], v0, off
|
|
; GFX10-SDAG-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX10-GISEL-LABEL: freeze_v3i1_vcc:
|
|
; GFX10-GISEL: ; %bb.0:
|
|
; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX10-GISEL-NEXT: global_load_dwordx3 v[4:6], v[0:1], off
|
|
; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX10-GISEL-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v5
|
|
; GFX10-GISEL-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc_lo
|
|
; GFX10-GISEL-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v4
|
|
; GFX10-GISEL-NEXT: v_and_b32_e32 v0, 1, v0
|
|
; GFX10-GISEL-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc_lo
|
|
; GFX10-GISEL-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v6
|
|
; GFX10-GISEL-NEXT: v_lshlrev_b16 v0, 1, v0
|
|
; GFX10-GISEL-NEXT: v_and_b32_e32 v1, 1, v1
|
|
; GFX10-GISEL-NEXT: v_cndmask_b32_e64 v4, 0, 1, vcc_lo
|
|
; GFX10-GISEL-NEXT: v_or_b32_e32 v0, v1, v0
|
|
; GFX10-GISEL-NEXT: v_and_b32_e32 v4, 1, v4
|
|
; GFX10-GISEL-NEXT: v_lshlrev_b16 v1, 2, v4
|
|
; GFX10-GISEL-NEXT: v_or_b32_e32 v0, v0, v1
|
|
; GFX10-GISEL-NEXT: v_and_b32_e32 v0, 7, v0
|
|
; GFX10-GISEL-NEXT: global_store_byte v[2:3], v0, off
|
|
; GFX10-GISEL-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX11-SDAG-TRUE16-LABEL: freeze_v3i1_vcc:
|
|
; GFX11-SDAG-TRUE16: ; %bb.0:
|
|
; GFX11-SDAG-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX11-SDAG-TRUE16-NEXT: global_load_b96 v[4:6], v[0:1], off
|
|
; GFX11-SDAG-TRUE16-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX11-SDAG-TRUE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v5
|
|
; GFX11-SDAG-TRUE16-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc_lo
|
|
; GFX11-SDAG-TRUE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v4
|
|
; GFX11-SDAG-TRUE16-NEXT: v_lshlrev_b16 v0.l, 1, v0.l
|
|
; GFX11-SDAG-TRUE16-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc_lo
|
|
; GFX11-SDAG-TRUE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v6
|
|
; GFX11-SDAG-TRUE16-NEXT: v_or_b16 v0.l, v1.l, v0.l
|
|
; GFX11-SDAG-TRUE16-NEXT: v_cndmask_b32_e64 v4, 0, 1, vcc_lo
|
|
; GFX11-SDAG-TRUE16-NEXT: v_lshlrev_b16 v0.h, 2, v4.l
|
|
; GFX11-SDAG-TRUE16-NEXT: v_or_b16 v0.l, v0.l, v0.h
|
|
; GFX11-SDAG-TRUE16-NEXT: v_and_b16 v0.l, v0.l, 7
|
|
; GFX11-SDAG-TRUE16-NEXT: global_store_b8 v[2:3], v0, off
|
|
; GFX11-SDAG-TRUE16-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX11-SDAG-FAKE16-LABEL: freeze_v3i1_vcc:
|
|
; GFX11-SDAG-FAKE16: ; %bb.0:
|
|
; GFX11-SDAG-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX11-SDAG-FAKE16-NEXT: global_load_b96 v[4:6], v[0:1], off
|
|
; GFX11-SDAG-FAKE16-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX11-SDAG-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v5
|
|
; GFX11-SDAG-FAKE16-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc_lo
|
|
; GFX11-SDAG-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v4
|
|
; GFX11-SDAG-FAKE16-NEXT: v_lshlrev_b16 v0, 1, v0
|
|
; GFX11-SDAG-FAKE16-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc_lo
|
|
; GFX11-SDAG-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v6
|
|
; GFX11-SDAG-FAKE16-NEXT: v_or_b32_e32 v0, v1, v0
|
|
; GFX11-SDAG-FAKE16-NEXT: v_cndmask_b32_e64 v4, 0, 1, vcc_lo
|
|
; GFX11-SDAG-FAKE16-NEXT: v_lshlrev_b16 v1, 2, v4
|
|
; GFX11-SDAG-FAKE16-NEXT: v_or_b32_e32 v0, v0, v1
|
|
; GFX11-SDAG-FAKE16-NEXT: v_and_b32_e32 v0, 7, v0
|
|
; GFX11-SDAG-FAKE16-NEXT: global_store_b8 v[2:3], v0, off
|
|
; GFX11-SDAG-FAKE16-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX11-GISEL-LABEL: freeze_v3i1_vcc:
|
|
; GFX11-GISEL: ; %bb.0:
|
|
; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX11-GISEL-NEXT: global_load_b96 v[4:6], v[0:1], off
|
|
; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX11-GISEL-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v5
|
|
; GFX11-GISEL-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc_lo
|
|
; GFX11-GISEL-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v4
|
|
; GFX11-GISEL-NEXT: v_and_b32_e32 v0, 1, v0
|
|
; GFX11-GISEL-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc_lo
|
|
; GFX11-GISEL-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v6
|
|
; GFX11-GISEL-NEXT: v_lshlrev_b16 v0, 1, v0
|
|
; GFX11-GISEL-NEXT: v_and_b32_e32 v1, 1, v1
|
|
; GFX11-GISEL-NEXT: v_cndmask_b32_e64 v4, 0, 1, vcc_lo
|
|
; GFX11-GISEL-NEXT: v_or_b32_e32 v0, v1, v0
|
|
; GFX11-GISEL-NEXT: v_and_b32_e32 v4, 1, v4
|
|
; GFX11-GISEL-NEXT: v_lshlrev_b16 v1, 2, v4
|
|
; GFX11-GISEL-NEXT: v_or_b32_e32 v0, v0, v1
|
|
; GFX11-GISEL-NEXT: v_and_b32_e32 v0, 7, v0
|
|
; GFX11-GISEL-NEXT: global_store_b8 v[2:3], v0, off
|
|
; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31]
|
|
%a = load <3 x i32>, ptr addrspace(1) %ptra
|
|
%cmp = icmp eq <3 x i32> %a, zeroinitializer
|
|
%freeze = freeze <3 x i1> %cmp
|
|
store <3 x i1> %freeze, ptr addrspace(1) %ptrb
|
|
ret void
|
|
}
|
|
|
|
define void @freeze_v4i1_vcc(ptr addrspace(1) %ptra, ptr addrspace(1) %ptrb) {
|
|
; GFX6-SDAG-LABEL: freeze_v4i1_vcc:
|
|
; GFX6-SDAG: ; %bb.0:
|
|
; GFX6-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX6-SDAG-NEXT: s_mov_b32 s6, 0
|
|
; GFX6-SDAG-NEXT: s_mov_b32 s7, 0xf000
|
|
; GFX6-SDAG-NEXT: s_mov_b32 s4, s6
|
|
; GFX6-SDAG-NEXT: s_mov_b32 s5, s6
|
|
; GFX6-SDAG-NEXT: buffer_load_dwordx4 v[4:7], v[0:1], s[4:7], 0 addr64
|
|
; GFX6-SDAG-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX6-SDAG-NEXT: v_cmp_eq_u32_e32 vcc, 0, v5
|
|
; GFX6-SDAG-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc
|
|
; GFX6-SDAG-NEXT: v_cmp_eq_u32_e32 vcc, 0, v4
|
|
; GFX6-SDAG-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc
|
|
; GFX6-SDAG-NEXT: v_cmp_eq_u32_e32 vcc, 0, v6
|
|
; GFX6-SDAG-NEXT: v_cndmask_b32_e64 v4, 0, 1, vcc
|
|
; GFX6-SDAG-NEXT: v_cmp_eq_u32_e32 vcc, 0, v7
|
|
; GFX6-SDAG-NEXT: v_lshlrev_b32_e32 v0, 1, v0
|
|
; GFX6-SDAG-NEXT: v_cndmask_b32_e64 v5, 0, 1, vcc
|
|
; GFX6-SDAG-NEXT: v_lshlrev_b32_e32 v4, 2, v4
|
|
; GFX6-SDAG-NEXT: v_or_b32_e32 v0, v1, v0
|
|
; GFX6-SDAG-NEXT: v_or_b32_e32 v0, v0, v4
|
|
; GFX6-SDAG-NEXT: v_lshlrev_b32_e32 v1, 3, v5
|
|
; GFX6-SDAG-NEXT: v_or_b32_e32 v0, v0, v1
|
|
; GFX6-SDAG-NEXT: v_and_b32_e32 v0, 15, v0
|
|
; GFX6-SDAG-NEXT: buffer_store_byte v0, v[2:3], s[4:7], 0 addr64
|
|
; GFX6-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0)
|
|
; GFX6-SDAG-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX6-GISEL-LABEL: freeze_v4i1_vcc:
|
|
; GFX6-GISEL: ; %bb.0:
|
|
; GFX6-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX6-GISEL-NEXT: s_mov_b32 s6, 0
|
|
; GFX6-GISEL-NEXT: s_mov_b32 s7, 0xf000
|
|
; GFX6-GISEL-NEXT: s_mov_b64 s[4:5], 0
|
|
; GFX6-GISEL-NEXT: buffer_load_dwordx4 v[4:7], v[0:1], s[4:7], 0 addr64
|
|
; GFX6-GISEL-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX6-GISEL-NEXT: v_cmp_eq_u32_e32 vcc, 0, v4
|
|
; GFX6-GISEL-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc
|
|
; GFX6-GISEL-NEXT: v_cmp_eq_u32_e32 vcc, 0, v5
|
|
; GFX6-GISEL-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc
|
|
; GFX6-GISEL-NEXT: v_cmp_eq_u32_e32 vcc, 0, v6
|
|
; GFX6-GISEL-NEXT: v_cndmask_b32_e64 v4, 0, 1, vcc
|
|
; GFX6-GISEL-NEXT: v_cmp_eq_u32_e32 vcc, 0, v7
|
|
; GFX6-GISEL-NEXT: v_and_b32_e32 v1, 1, v1
|
|
; GFX6-GISEL-NEXT: v_cndmask_b32_e64 v5, 0, 1, vcc
|
|
; GFX6-GISEL-NEXT: v_and_b32_e32 v0, 1, v0
|
|
; GFX6-GISEL-NEXT: v_and_b32_e32 v4, 1, v4
|
|
; GFX6-GISEL-NEXT: v_lshlrev_b32_e32 v1, 1, v1
|
|
; GFX6-GISEL-NEXT: v_and_b32_e32 v5, 1, v5
|
|
; GFX6-GISEL-NEXT: v_lshlrev_b32_e32 v4, 2, v4
|
|
; GFX6-GISEL-NEXT: v_or_b32_e32 v0, v0, v1
|
|
; GFX6-GISEL-NEXT: v_or_b32_e32 v0, v0, v4
|
|
; GFX6-GISEL-NEXT: v_lshlrev_b32_e32 v1, 3, v5
|
|
; GFX6-GISEL-NEXT: v_or_b32_e32 v0, v0, v1
|
|
; GFX6-GISEL-NEXT: v_and_b32_e32 v0, 15, v0
|
|
; GFX6-GISEL-NEXT: buffer_store_byte v0, v[2:3], s[4:7], 0 addr64
|
|
; GFX6-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0)
|
|
; GFX6-GISEL-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX7-SDAG-LABEL: freeze_v4i1_vcc:
|
|
; GFX7-SDAG: ; %bb.0:
|
|
; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX7-SDAG-NEXT: s_mov_b32 s6, 0
|
|
; GFX7-SDAG-NEXT: s_mov_b32 s7, 0xf000
|
|
; GFX7-SDAG-NEXT: s_mov_b32 s4, s6
|
|
; GFX7-SDAG-NEXT: s_mov_b32 s5, s6
|
|
; GFX7-SDAG-NEXT: buffer_load_dwordx4 v[4:7], v[0:1], s[4:7], 0 addr64
|
|
; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX7-SDAG-NEXT: v_cmp_eq_u32_e32 vcc, 0, v5
|
|
; GFX7-SDAG-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc
|
|
; GFX7-SDAG-NEXT: v_cmp_eq_u32_e32 vcc, 0, v4
|
|
; GFX7-SDAG-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc
|
|
; GFX7-SDAG-NEXT: v_cmp_eq_u32_e32 vcc, 0, v6
|
|
; GFX7-SDAG-NEXT: v_cndmask_b32_e64 v4, 0, 1, vcc
|
|
; GFX7-SDAG-NEXT: v_cmp_eq_u32_e32 vcc, 0, v7
|
|
; GFX7-SDAG-NEXT: v_lshlrev_b32_e32 v0, 1, v0
|
|
; GFX7-SDAG-NEXT: v_cndmask_b32_e64 v5, 0, 1, vcc
|
|
; GFX7-SDAG-NEXT: v_lshlrev_b32_e32 v4, 2, v4
|
|
; GFX7-SDAG-NEXT: v_or_b32_e32 v0, v1, v0
|
|
; GFX7-SDAG-NEXT: v_or_b32_e32 v0, v0, v4
|
|
; GFX7-SDAG-NEXT: v_lshlrev_b32_e32 v1, 3, v5
|
|
; GFX7-SDAG-NEXT: v_or_b32_e32 v0, v0, v1
|
|
; GFX7-SDAG-NEXT: v_and_b32_e32 v0, 15, v0
|
|
; GFX7-SDAG-NEXT: buffer_store_byte v0, v[2:3], s[4:7], 0 addr64
|
|
; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX7-SDAG-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX7-GISEL-LABEL: freeze_v4i1_vcc:
|
|
; GFX7-GISEL: ; %bb.0:
|
|
; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX7-GISEL-NEXT: s_mov_b32 s6, 0
|
|
; GFX7-GISEL-NEXT: s_mov_b32 s7, 0xf000
|
|
; GFX7-GISEL-NEXT: s_mov_b64 s[4:5], 0
|
|
; GFX7-GISEL-NEXT: buffer_load_dwordx4 v[4:7], v[0:1], s[4:7], 0 addr64
|
|
; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX7-GISEL-NEXT: v_cmp_eq_u32_e32 vcc, 0, v4
|
|
; GFX7-GISEL-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc
|
|
; GFX7-GISEL-NEXT: v_cmp_eq_u32_e32 vcc, 0, v5
|
|
; GFX7-GISEL-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc
|
|
; GFX7-GISEL-NEXT: v_cmp_eq_u32_e32 vcc, 0, v6
|
|
; GFX7-GISEL-NEXT: v_cndmask_b32_e64 v4, 0, 1, vcc
|
|
; GFX7-GISEL-NEXT: v_cmp_eq_u32_e32 vcc, 0, v7
|
|
; GFX7-GISEL-NEXT: v_and_b32_e32 v1, 1, v1
|
|
; GFX7-GISEL-NEXT: v_cndmask_b32_e64 v5, 0, 1, vcc
|
|
; GFX7-GISEL-NEXT: v_and_b32_e32 v0, 1, v0
|
|
; GFX7-GISEL-NEXT: v_and_b32_e32 v4, 1, v4
|
|
; GFX7-GISEL-NEXT: v_lshlrev_b32_e32 v1, 1, v1
|
|
; GFX7-GISEL-NEXT: v_and_b32_e32 v5, 1, v5
|
|
; GFX7-GISEL-NEXT: v_lshlrev_b32_e32 v4, 2, v4
|
|
; GFX7-GISEL-NEXT: v_or_b32_e32 v0, v0, v1
|
|
; GFX7-GISEL-NEXT: v_or_b32_e32 v0, v0, v4
|
|
; GFX7-GISEL-NEXT: v_lshlrev_b32_e32 v1, 3, v5
|
|
; GFX7-GISEL-NEXT: v_or_b32_e32 v0, v0, v1
|
|
; GFX7-GISEL-NEXT: v_and_b32_e32 v0, 15, v0
|
|
; GFX7-GISEL-NEXT: buffer_store_byte v0, v[2:3], s[4:7], 0 addr64
|
|
; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX7-GISEL-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX8-GISEL-LABEL: freeze_v4i1_vcc:
|
|
; GFX8-GISEL: ; %bb.0:
|
|
; GFX8-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX8-GISEL-NEXT: flat_load_dwordx4 v[4:7], v[0:1]
|
|
; GFX8-GISEL-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX8-GISEL-NEXT: v_cmp_eq_u32_e32 vcc, 0, v4
|
|
; GFX8-GISEL-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc
|
|
; GFX8-GISEL-NEXT: v_cmp_eq_u32_e32 vcc, 0, v5
|
|
; GFX8-GISEL-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc
|
|
; GFX8-GISEL-NEXT: v_cmp_eq_u32_e32 vcc, 0, v6
|
|
; GFX8-GISEL-NEXT: v_cndmask_b32_e64 v4, 0, 1, vcc
|
|
; GFX8-GISEL-NEXT: v_cmp_eq_u32_e32 vcc, 0, v7
|
|
; GFX8-GISEL-NEXT: v_and_b32_e32 v1, 1, v1
|
|
; GFX8-GISEL-NEXT: v_cndmask_b32_e64 v5, 0, 1, vcc
|
|
; GFX8-GISEL-NEXT: v_and_b32_e32 v0, 1, v0
|
|
; GFX8-GISEL-NEXT: v_and_b32_e32 v4, 1, v4
|
|
; GFX8-GISEL-NEXT: v_lshlrev_b16_e32 v1, 1, v1
|
|
; GFX8-GISEL-NEXT: v_and_b32_e32 v5, 1, v5
|
|
; GFX8-GISEL-NEXT: v_lshlrev_b16_e32 v4, 2, v4
|
|
; GFX8-GISEL-NEXT: v_or_b32_e32 v0, v0, v1
|
|
; GFX8-GISEL-NEXT: v_or_b32_e32 v0, v0, v4
|
|
; GFX8-GISEL-NEXT: v_lshlrev_b16_e32 v1, 3, v5
|
|
; GFX8-GISEL-NEXT: v_or_b32_e32 v0, v0, v1
|
|
; GFX8-GISEL-NEXT: v_and_b32_e32 v0, 15, v0
|
|
; GFX8-GISEL-NEXT: flat_store_byte v[2:3], v0
|
|
; GFX8-GISEL-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX8-GISEL-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX9-GISEL-LABEL: freeze_v4i1_vcc:
|
|
; GFX9-GISEL: ; %bb.0:
|
|
; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX9-GISEL-NEXT: global_load_dwordx4 v[4:7], v[0:1], off
|
|
; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX9-GISEL-NEXT: v_cmp_eq_u32_e32 vcc, 0, v4
|
|
; GFX9-GISEL-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc
|
|
; GFX9-GISEL-NEXT: v_cmp_eq_u32_e32 vcc, 0, v5
|
|
; GFX9-GISEL-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc
|
|
; GFX9-GISEL-NEXT: v_cmp_eq_u32_e32 vcc, 0, v6
|
|
; GFX9-GISEL-NEXT: v_cndmask_b32_e64 v4, 0, 1, vcc
|
|
; GFX9-GISEL-NEXT: v_cmp_eq_u32_e32 vcc, 0, v7
|
|
; GFX9-GISEL-NEXT: v_and_b32_e32 v1, 1, v1
|
|
; GFX9-GISEL-NEXT: v_cndmask_b32_e64 v5, 0, 1, vcc
|
|
; GFX9-GISEL-NEXT: v_and_b32_e32 v0, 1, v0
|
|
; GFX9-GISEL-NEXT: v_and_b32_e32 v4, 1, v4
|
|
; GFX9-GISEL-NEXT: v_lshlrev_b16_e32 v1, 1, v1
|
|
; GFX9-GISEL-NEXT: v_and_b32_e32 v5, 1, v5
|
|
; GFX9-GISEL-NEXT: v_lshlrev_b16_e32 v4, 2, v4
|
|
; GFX9-GISEL-NEXT: v_or_b32_e32 v0, v0, v1
|
|
; GFX9-GISEL-NEXT: v_or_b32_e32 v0, v0, v4
|
|
; GFX9-GISEL-NEXT: v_lshlrev_b16_e32 v1, 3, v5
|
|
; GFX9-GISEL-NEXT: v_or_b32_e32 v0, v0, v1
|
|
; GFX9-GISEL-NEXT: v_and_b32_e32 v0, 15, v0
|
|
; GFX9-GISEL-NEXT: global_store_byte v[2:3], v0, off
|
|
; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX10-SDAG-LABEL: freeze_v4i1_vcc:
|
|
; GFX10-SDAG: ; %bb.0:
|
|
; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX10-SDAG-NEXT: global_load_dwordx4 v[4:7], v[0:1], off
|
|
; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX10-SDAG-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v5
|
|
; GFX10-SDAG-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc_lo
|
|
; GFX10-SDAG-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v4
|
|
; GFX10-SDAG-NEXT: v_lshlrev_b16 v0, 1, v0
|
|
; GFX10-SDAG-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc_lo
|
|
; GFX10-SDAG-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v6
|
|
; GFX10-SDAG-NEXT: v_or_b32_e32 v0, v1, v0
|
|
; GFX10-SDAG-NEXT: v_cndmask_b32_e64 v4, 0, 1, vcc_lo
|
|
; GFX10-SDAG-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v7
|
|
; GFX10-SDAG-NEXT: v_lshlrev_b16 v4, 2, v4
|
|
; GFX10-SDAG-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc_lo
|
|
; GFX10-SDAG-NEXT: v_or_b32_e32 v0, v0, v4
|
|
; GFX10-SDAG-NEXT: v_lshlrev_b16 v1, 3, v1
|
|
; GFX10-SDAG-NEXT: v_or_b32_e32 v0, v0, v1
|
|
; GFX10-SDAG-NEXT: v_and_b32_e32 v0, 15, v0
|
|
; GFX10-SDAG-NEXT: global_store_byte v[2:3], v0, off
|
|
; GFX10-SDAG-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX10-GISEL-LABEL: freeze_v4i1_vcc:
|
|
; GFX10-GISEL: ; %bb.0:
|
|
; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX10-GISEL-NEXT: global_load_dwordx4 v[4:7], v[0:1], off
|
|
; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX10-GISEL-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v5
|
|
; GFX10-GISEL-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc_lo
|
|
; GFX10-GISEL-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v4
|
|
; GFX10-GISEL-NEXT: v_and_b32_e32 v0, 1, v0
|
|
; GFX10-GISEL-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc_lo
|
|
; GFX10-GISEL-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v6
|
|
; GFX10-GISEL-NEXT: v_lshlrev_b16 v0, 1, v0
|
|
; GFX10-GISEL-NEXT: v_and_b32_e32 v1, 1, v1
|
|
; GFX10-GISEL-NEXT: v_cndmask_b32_e64 v4, 0, 1, vcc_lo
|
|
; GFX10-GISEL-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v7
|
|
; GFX10-GISEL-NEXT: v_or_b32_e32 v0, v1, v0
|
|
; GFX10-GISEL-NEXT: v_and_b32_e32 v4, 1, v4
|
|
; GFX10-GISEL-NEXT: v_cndmask_b32_e64 v5, 0, 1, vcc_lo
|
|
; GFX10-GISEL-NEXT: v_lshlrev_b16 v4, 2, v4
|
|
; GFX10-GISEL-NEXT: v_and_b32_e32 v1, 1, v5
|
|
; GFX10-GISEL-NEXT: v_or_b32_e32 v0, v0, v4
|
|
; GFX10-GISEL-NEXT: v_lshlrev_b16 v1, 3, v1
|
|
; GFX10-GISEL-NEXT: v_or_b32_e32 v0, v0, v1
|
|
; GFX10-GISEL-NEXT: v_and_b32_e32 v0, 15, v0
|
|
; GFX10-GISEL-NEXT: global_store_byte v[2:3], v0, off
|
|
; GFX10-GISEL-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX11-SDAG-TRUE16-LABEL: freeze_v4i1_vcc:
|
|
; GFX11-SDAG-TRUE16: ; %bb.0:
|
|
; GFX11-SDAG-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX11-SDAG-TRUE16-NEXT: global_load_b128 v[4:7], v[0:1], off
|
|
; GFX11-SDAG-TRUE16-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX11-SDAG-TRUE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v5
|
|
; GFX11-SDAG-TRUE16-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc_lo
|
|
; GFX11-SDAG-TRUE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v4
|
|
; GFX11-SDAG-TRUE16-NEXT: v_lshlrev_b16 v0.l, 1, v0.l
|
|
; GFX11-SDAG-TRUE16-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc_lo
|
|
; GFX11-SDAG-TRUE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v6
|
|
; GFX11-SDAG-TRUE16-NEXT: v_or_b16 v0.l, v1.l, v0.l
|
|
; GFX11-SDAG-TRUE16-NEXT: v_cndmask_b32_e64 v4, 0, 1, vcc_lo
|
|
; GFX11-SDAG-TRUE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v7
|
|
; GFX11-SDAG-TRUE16-NEXT: v_lshlrev_b16 v0.h, 2, v4.l
|
|
; GFX11-SDAG-TRUE16-NEXT: v_cndmask_b32_e64 v5, 0, 1, vcc_lo
|
|
; GFX11-SDAG-TRUE16-NEXT: v_or_b16 v0.l, v0.l, v0.h
|
|
; GFX11-SDAG-TRUE16-NEXT: v_mov_b16_e32 v1.l, v5.l
|
|
; GFX11-SDAG-TRUE16-NEXT: v_lshlrev_b16 v0.h, 3, v1.l
|
|
; GFX11-SDAG-TRUE16-NEXT: v_or_b16 v0.l, v0.l, v0.h
|
|
; GFX11-SDAG-TRUE16-NEXT: v_and_b16 v0.l, v0.l, 15
|
|
; GFX11-SDAG-TRUE16-NEXT: global_store_b8 v[2:3], v0, off
|
|
; GFX11-SDAG-TRUE16-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX11-SDAG-FAKE16-LABEL: freeze_v4i1_vcc:
|
|
; GFX11-SDAG-FAKE16: ; %bb.0:
|
|
; GFX11-SDAG-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX11-SDAG-FAKE16-NEXT: global_load_b128 v[4:7], v[0:1], off
|
|
; GFX11-SDAG-FAKE16-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX11-SDAG-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v5
|
|
; GFX11-SDAG-FAKE16-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc_lo
|
|
; GFX11-SDAG-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v4
|
|
; GFX11-SDAG-FAKE16-NEXT: v_lshlrev_b16 v0, 1, v0
|
|
; GFX11-SDAG-FAKE16-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc_lo
|
|
; GFX11-SDAG-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v6
|
|
; GFX11-SDAG-FAKE16-NEXT: v_or_b32_e32 v0, v1, v0
|
|
; GFX11-SDAG-FAKE16-NEXT: v_cndmask_b32_e64 v4, 0, 1, vcc_lo
|
|
; GFX11-SDAG-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v7
|
|
; GFX11-SDAG-FAKE16-NEXT: v_lshlrev_b16 v4, 2, v4
|
|
; GFX11-SDAG-FAKE16-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc_lo
|
|
; GFX11-SDAG-FAKE16-NEXT: v_or_b32_e32 v0, v0, v4
|
|
; GFX11-SDAG-FAKE16-NEXT: v_lshlrev_b16 v1, 3, v1
|
|
; GFX11-SDAG-FAKE16-NEXT: v_or_b32_e32 v0, v0, v1
|
|
; GFX11-SDAG-FAKE16-NEXT: v_and_b32_e32 v0, 15, v0
|
|
; GFX11-SDAG-FAKE16-NEXT: global_store_b8 v[2:3], v0, off
|
|
; GFX11-SDAG-FAKE16-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX11-GISEL-LABEL: freeze_v4i1_vcc:
|
|
; GFX11-GISEL: ; %bb.0:
|
|
; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX11-GISEL-NEXT: global_load_b128 v[4:7], v[0:1], off
|
|
; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX11-GISEL-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v5
|
|
; GFX11-GISEL-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc_lo
|
|
; GFX11-GISEL-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v4
|
|
; GFX11-GISEL-NEXT: v_and_b32_e32 v0, 1, v0
|
|
; GFX11-GISEL-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc_lo
|
|
; GFX11-GISEL-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v6
|
|
; GFX11-GISEL-NEXT: v_lshlrev_b16 v0, 1, v0
|
|
; GFX11-GISEL-NEXT: v_and_b32_e32 v1, 1, v1
|
|
; GFX11-GISEL-NEXT: v_cndmask_b32_e64 v4, 0, 1, vcc_lo
|
|
; GFX11-GISEL-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v7
|
|
; GFX11-GISEL-NEXT: v_or_b32_e32 v0, v1, v0
|
|
; GFX11-GISEL-NEXT: v_and_b32_e32 v4, 1, v4
|
|
; GFX11-GISEL-NEXT: v_cndmask_b32_e64 v5, 0, 1, vcc_lo
|
|
; GFX11-GISEL-NEXT: v_lshlrev_b16 v4, 2, v4
|
|
; GFX11-GISEL-NEXT: v_and_b32_e32 v1, 1, v5
|
|
; GFX11-GISEL-NEXT: v_or_b32_e32 v0, v0, v4
|
|
; GFX11-GISEL-NEXT: v_lshlrev_b16 v1, 3, v1
|
|
; GFX11-GISEL-NEXT: v_or_b32_e32 v0, v0, v1
|
|
; GFX11-GISEL-NEXT: v_and_b32_e32 v0, 15, v0
|
|
; GFX11-GISEL-NEXT: global_store_b8 v[2:3], v0, off
|
|
; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31]
|
|
%a = load <4 x i32>, ptr addrspace(1) %ptra
|
|
%cmp = icmp eq <4 x i32> %a, zeroinitializer
|
|
%freeze = freeze <4 x i1> %cmp
|
|
store <4 x i1> %freeze, ptr addrspace(1) %ptrb
|
|
ret void
|
|
}
|
|
|
|
define double @freeze_fabs_double(float %a, double %b, double %c) {
|
|
; GFX6-SDAG-LABEL: freeze_fabs_double:
|
|
; GFX6-SDAG: ; %bb.0:
|
|
; GFX6-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX6-SDAG-NEXT: v_mov_b32_e32 v5, v0
|
|
; GFX6-SDAG-NEXT: v_add_f64 v[0:1], |v[4:5]|, v[1:2]
|
|
; GFX6-SDAG-NEXT: v_add_f64 v[2:3], |v[4:5]|, v[3:4]
|
|
; GFX6-SDAG-NEXT: v_add_f64 v[0:1], v[0:1], v[2:3]
|
|
; GFX6-SDAG-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX6-GISEL-LABEL: freeze_fabs_double:
|
|
; GFX6-GISEL: ; %bb.0:
|
|
; GFX6-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX6-GISEL-NEXT: v_and_b32_e32 v5, 0x7fffffff, v0
|
|
; GFX6-GISEL-NEXT: v_add_f64 v[0:1], v[4:5], v[1:2]
|
|
; GFX6-GISEL-NEXT: v_add_f64 v[2:3], v[4:5], v[3:4]
|
|
; GFX6-GISEL-NEXT: v_add_f64 v[0:1], v[0:1], v[2:3]
|
|
; GFX6-GISEL-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX7-SDAG-LABEL: freeze_fabs_double:
|
|
; GFX7-SDAG: ; %bb.0:
|
|
; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX7-SDAG-NEXT: v_mov_b32_e32 v5, v0
|
|
; GFX7-SDAG-NEXT: v_add_f64 v[0:1], |v[4:5]|, v[1:2]
|
|
; GFX7-SDAG-NEXT: v_add_f64 v[2:3], |v[4:5]|, v[3:4]
|
|
; GFX7-SDAG-NEXT: v_add_f64 v[0:1], v[0:1], v[2:3]
|
|
; GFX7-SDAG-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX7-GISEL-LABEL: freeze_fabs_double:
|
|
; GFX7-GISEL: ; %bb.0:
|
|
; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX7-GISEL-NEXT: v_and_b32_e32 v5, 0x7fffffff, v0
|
|
; GFX7-GISEL-NEXT: v_add_f64 v[0:1], v[4:5], v[1:2]
|
|
; GFX7-GISEL-NEXT: v_add_f64 v[2:3], v[4:5], v[3:4]
|
|
; GFX7-GISEL-NEXT: v_add_f64 v[0:1], v[0:1], v[2:3]
|
|
; GFX7-GISEL-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX8-SDAG-LABEL: freeze_fabs_double:
|
|
; GFX8-SDAG: ; %bb.0:
|
|
; GFX8-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX8-SDAG-NEXT: v_mov_b32_e32 v5, v0
|
|
; GFX8-SDAG-NEXT: v_add_f64 v[0:1], |v[4:5]|, v[1:2]
|
|
; GFX8-SDAG-NEXT: v_add_f64 v[2:3], |v[4:5]|, v[3:4]
|
|
; GFX8-SDAG-NEXT: v_add_f64 v[0:1], v[0:1], v[2:3]
|
|
; GFX8-SDAG-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX8-GISEL-LABEL: freeze_fabs_double:
|
|
; GFX8-GISEL: ; %bb.0:
|
|
; GFX8-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX8-GISEL-NEXT: v_and_b32_e32 v5, 0x7fffffff, v0
|
|
; GFX8-GISEL-NEXT: v_add_f64 v[0:1], v[4:5], v[1:2]
|
|
; GFX8-GISEL-NEXT: v_add_f64 v[2:3], v[4:5], v[3:4]
|
|
; GFX8-GISEL-NEXT: v_add_f64 v[0:1], v[0:1], v[2:3]
|
|
; GFX8-GISEL-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX9-GISEL-LABEL: freeze_fabs_double:
|
|
; GFX9-GISEL: ; %bb.0:
|
|
; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX9-GISEL-NEXT: v_and_b32_e32 v5, 0x7fffffff, v0
|
|
; GFX9-GISEL-NEXT: v_add_f64 v[0:1], v[4:5], v[1:2]
|
|
; GFX9-GISEL-NEXT: v_add_f64 v[2:3], v[4:5], v[3:4]
|
|
; GFX9-GISEL-NEXT: v_add_f64 v[0:1], v[0:1], v[2:3]
|
|
; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX10-SDAG-LABEL: freeze_fabs_double:
|
|
; GFX10-SDAG: ; %bb.0:
|
|
; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX10-SDAG-NEXT: v_mov_b32_e32 v5, v0
|
|
; GFX10-SDAG-NEXT: v_add_f64 v[0:1], |v[4:5]|, v[1:2]
|
|
; GFX10-SDAG-NEXT: v_add_f64 v[2:3], |v[4:5]|, v[3:4]
|
|
; GFX10-SDAG-NEXT: v_add_f64 v[0:1], v[0:1], v[2:3]
|
|
; GFX10-SDAG-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX10-GISEL-LABEL: freeze_fabs_double:
|
|
; GFX10-GISEL: ; %bb.0:
|
|
; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX10-GISEL-NEXT: v_and_b32_e32 v5, 0x7fffffff, v0
|
|
; GFX10-GISEL-NEXT: v_add_f64 v[0:1], v[4:5], v[1:2]
|
|
; GFX10-GISEL-NEXT: v_add_f64 v[2:3], v[4:5], v[3:4]
|
|
; GFX10-GISEL-NEXT: v_add_f64 v[0:1], v[0:1], v[2:3]
|
|
; GFX10-GISEL-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX11-SDAG-LABEL: freeze_fabs_double:
|
|
; GFX11-SDAG: ; %bb.0:
|
|
; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX11-SDAG-NEXT: v_mov_b32_e32 v5, v0
|
|
; GFX11-SDAG-NEXT: v_add_f64 v[0:1], |v[4:5]|, v[1:2]
|
|
; GFX11-SDAG-NEXT: v_add_f64 v[2:3], |v[4:5]|, v[3:4]
|
|
; GFX11-SDAG-NEXT: v_add_f64 v[0:1], v[0:1], v[2:3]
|
|
; GFX11-SDAG-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX11-GISEL-LABEL: freeze_fabs_double:
|
|
; GFX11-GISEL: ; %bb.0:
|
|
; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX11-GISEL-NEXT: v_and_b32_e32 v5, 0x7fffffff, v0
|
|
; GFX11-GISEL-NEXT: v_add_f64 v[0:1], v[4:5], v[1:2]
|
|
; GFX11-GISEL-NEXT: v_add_f64 v[2:3], v[4:5], v[3:4]
|
|
; GFX11-GISEL-NEXT: v_add_f64 v[0:1], v[0:1], v[2:3]
|
|
; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31]
|
|
%pv = insertelement <2 x float> poison, float %a, i32 1
|
|
%d = bitcast <2 x float> %pv to double
|
|
%r = call double @llvm.fabs.f64(double %d)
|
|
%fr = freeze double %r
|
|
%add1 = fadd double %fr, %b
|
|
%add2 = fadd double %fr, %c
|
|
%add = fadd double %add1, %add2
|
|
ret double %add
|
|
}
|
|
|
|
define <4 x float> @freeze_fabs_v4float(<4 x float> %A, <4 x float> %B) {
|
|
; GFX6-SDAG-LABEL: freeze_fabs_v4float:
|
|
; GFX6-SDAG: ; %bb.0:
|
|
; GFX6-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX6-SDAG-NEXT: v_and_b32_e32 v0, 0x7fffffff, v0
|
|
; GFX6-SDAG-NEXT: v_and_b32_e32 v1, 0x7fffffff, v1
|
|
; GFX6-SDAG-NEXT: v_and_b32_e32 v2, 0x7fffffff, v2
|
|
; GFX6-SDAG-NEXT: v_and_b32_e32 v3, 0x7fffffff, v3
|
|
; GFX6-SDAG-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX6-GISEL-LABEL: freeze_fabs_v4float:
|
|
; GFX6-GISEL: ; %bb.0:
|
|
; GFX6-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX6-GISEL-NEXT: v_and_b32_e32 v0, 0x7fffffff, v0
|
|
; GFX6-GISEL-NEXT: v_and_b32_e32 v1, 0x7fffffff, v1
|
|
; GFX6-GISEL-NEXT: v_and_b32_e32 v2, 0x7fffffff, v2
|
|
; GFX6-GISEL-NEXT: v_and_b32_e32 v3, 0x7fffffff, v3
|
|
; GFX6-GISEL-NEXT: v_and_b32_e32 v0, 0x7fffffff, v0
|
|
; GFX6-GISEL-NEXT: v_and_b32_e32 v1, 0x7fffffff, v1
|
|
; GFX6-GISEL-NEXT: v_and_b32_e32 v2, 0x7fffffff, v2
|
|
; GFX6-GISEL-NEXT: v_and_b32_e32 v3, 0x7fffffff, v3
|
|
; GFX6-GISEL-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX7-SDAG-LABEL: freeze_fabs_v4float:
|
|
; GFX7-SDAG: ; %bb.0:
|
|
; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX7-SDAG-NEXT: v_and_b32_e32 v0, 0x7fffffff, v0
|
|
; GFX7-SDAG-NEXT: v_and_b32_e32 v1, 0x7fffffff, v1
|
|
; GFX7-SDAG-NEXT: v_and_b32_e32 v2, 0x7fffffff, v2
|
|
; GFX7-SDAG-NEXT: v_and_b32_e32 v3, 0x7fffffff, v3
|
|
; GFX7-SDAG-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX7-GISEL-LABEL: freeze_fabs_v4float:
|
|
; GFX7-GISEL: ; %bb.0:
|
|
; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX7-GISEL-NEXT: v_and_b32_e32 v0, 0x7fffffff, v0
|
|
; GFX7-GISEL-NEXT: v_and_b32_e32 v1, 0x7fffffff, v1
|
|
; GFX7-GISEL-NEXT: v_and_b32_e32 v2, 0x7fffffff, v2
|
|
; GFX7-GISEL-NEXT: v_and_b32_e32 v3, 0x7fffffff, v3
|
|
; GFX7-GISEL-NEXT: v_and_b32_e32 v0, 0x7fffffff, v0
|
|
; GFX7-GISEL-NEXT: v_and_b32_e32 v1, 0x7fffffff, v1
|
|
; GFX7-GISEL-NEXT: v_and_b32_e32 v2, 0x7fffffff, v2
|
|
; GFX7-GISEL-NEXT: v_and_b32_e32 v3, 0x7fffffff, v3
|
|
; GFX7-GISEL-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX8-SDAG-LABEL: freeze_fabs_v4float:
|
|
; GFX8-SDAG: ; %bb.0:
|
|
; GFX8-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX8-SDAG-NEXT: v_and_b32_e32 v0, 0x7fffffff, v0
|
|
; GFX8-SDAG-NEXT: v_and_b32_e32 v1, 0x7fffffff, v1
|
|
; GFX8-SDAG-NEXT: v_and_b32_e32 v2, 0x7fffffff, v2
|
|
; GFX8-SDAG-NEXT: v_and_b32_e32 v3, 0x7fffffff, v3
|
|
; GFX8-SDAG-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX8-GISEL-LABEL: freeze_fabs_v4float:
|
|
; GFX8-GISEL: ; %bb.0:
|
|
; GFX8-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX8-GISEL-NEXT: v_and_b32_e32 v0, 0x7fffffff, v0
|
|
; GFX8-GISEL-NEXT: v_and_b32_e32 v1, 0x7fffffff, v1
|
|
; GFX8-GISEL-NEXT: v_and_b32_e32 v2, 0x7fffffff, v2
|
|
; GFX8-GISEL-NEXT: v_and_b32_e32 v3, 0x7fffffff, v3
|
|
; GFX8-GISEL-NEXT: v_and_b32_e32 v0, 0x7fffffff, v0
|
|
; GFX8-GISEL-NEXT: v_and_b32_e32 v1, 0x7fffffff, v1
|
|
; GFX8-GISEL-NEXT: v_and_b32_e32 v2, 0x7fffffff, v2
|
|
; GFX8-GISEL-NEXT: v_and_b32_e32 v3, 0x7fffffff, v3
|
|
; GFX8-GISEL-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX9-GISEL-LABEL: freeze_fabs_v4float:
|
|
; GFX9-GISEL: ; %bb.0:
|
|
; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX9-GISEL-NEXT: v_and_b32_e32 v0, 0x7fffffff, v0
|
|
; GFX9-GISEL-NEXT: v_and_b32_e32 v1, 0x7fffffff, v1
|
|
; GFX9-GISEL-NEXT: v_and_b32_e32 v2, 0x7fffffff, v2
|
|
; GFX9-GISEL-NEXT: v_and_b32_e32 v3, 0x7fffffff, v3
|
|
; GFX9-GISEL-NEXT: v_and_b32_e32 v0, 0x7fffffff, v0
|
|
; GFX9-GISEL-NEXT: v_and_b32_e32 v1, 0x7fffffff, v1
|
|
; GFX9-GISEL-NEXT: v_and_b32_e32 v2, 0x7fffffff, v2
|
|
; GFX9-GISEL-NEXT: v_and_b32_e32 v3, 0x7fffffff, v3
|
|
; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX10-SDAG-LABEL: freeze_fabs_v4float:
|
|
; GFX10-SDAG: ; %bb.0:
|
|
; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX10-SDAG-NEXT: v_and_b32_e32 v0, 0x7fffffff, v0
|
|
; GFX10-SDAG-NEXT: v_and_b32_e32 v1, 0x7fffffff, v1
|
|
; GFX10-SDAG-NEXT: v_and_b32_e32 v2, 0x7fffffff, v2
|
|
; GFX10-SDAG-NEXT: v_and_b32_e32 v3, 0x7fffffff, v3
|
|
; GFX10-SDAG-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX10-GISEL-LABEL: freeze_fabs_v4float:
|
|
; GFX10-GISEL: ; %bb.0:
|
|
; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX10-GISEL-NEXT: v_and_b32_e32 v0, 0x7fffffff, v0
|
|
; GFX10-GISEL-NEXT: v_and_b32_e32 v1, 0x7fffffff, v1
|
|
; GFX10-GISEL-NEXT: v_and_b32_e32 v2, 0x7fffffff, v2
|
|
; GFX10-GISEL-NEXT: v_and_b32_e32 v3, 0x7fffffff, v3
|
|
; GFX10-GISEL-NEXT: v_and_b32_e32 v0, 0x7fffffff, v0
|
|
; GFX10-GISEL-NEXT: v_and_b32_e32 v1, 0x7fffffff, v1
|
|
; GFX10-GISEL-NEXT: v_and_b32_e32 v2, 0x7fffffff, v2
|
|
; GFX10-GISEL-NEXT: v_and_b32_e32 v3, 0x7fffffff, v3
|
|
; GFX10-GISEL-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX11-SDAG-LABEL: freeze_fabs_v4float:
|
|
; GFX11-SDAG: ; %bb.0:
|
|
; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX11-SDAG-NEXT: v_and_b32_e32 v0, 0x7fffffff, v0
|
|
; GFX11-SDAG-NEXT: v_and_b32_e32 v1, 0x7fffffff, v1
|
|
; GFX11-SDAG-NEXT: v_and_b32_e32 v2, 0x7fffffff, v2
|
|
; GFX11-SDAG-NEXT: v_and_b32_e32 v3, 0x7fffffff, v3
|
|
; GFX11-SDAG-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX11-GISEL-LABEL: freeze_fabs_v4float:
|
|
; GFX11-GISEL: ; %bb.0:
|
|
; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX11-GISEL-NEXT: v_and_b32_e32 v0, 0x7fffffff, v0
|
|
; GFX11-GISEL-NEXT: v_and_b32_e32 v1, 0x7fffffff, v1
|
|
; GFX11-GISEL-NEXT: v_and_b32_e32 v2, 0x7fffffff, v2
|
|
; GFX11-GISEL-NEXT: v_and_b32_e32 v3, 0x7fffffff, v3
|
|
; GFX11-GISEL-NEXT: v_and_b32_e32 v0, 0x7fffffff, v0
|
|
; GFX11-GISEL-NEXT: v_and_b32_e32 v1, 0x7fffffff, v1
|
|
; GFX11-GISEL-NEXT: v_and_b32_e32 v2, 0x7fffffff, v2
|
|
; GFX11-GISEL-NEXT: v_and_b32_e32 v3, 0x7fffffff, v3
|
|
; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31]
|
|
%A0 = call <4 x float> @llvm.fabs.v4f32(<4 x float> %A)
|
|
%F1 = freeze <4 x float> %A0
|
|
%A1 = call <4 x float> @llvm.fabs.v4f32(<4 x float> %F1)
|
|
ret <4 x float> %A1
|
|
}
|