Regenerate checks after two recent commits that caused extra stuff to be added at the end of assembly lines, so the existing checks did not fail. - #179414 added "nv" to loads and stores on GFX1250. - #185774 added "msbs" comments on setreg instructions.
195 lines
9.5 KiB
LLVM
195 lines
9.5 KiB
LLVM
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
|
|
; RUN: llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx1250 -O3 < %s | FileCheck -check-prefixes=GFX1250,GFX1250-SDAG %s
|
|
; RUN: llc -global-isel=1 -mtriple=amdgcn -mcpu=gfx1250 -O3 < %s | FileCheck -check-prefixes=GFX1250,GFX1250-GISEL %s
|
|
|
|
declare i32 @llvm.amdgcn.cluster.load.b32.i32.p1(ptr addrspace(1), i32 %cpol, i32 %mask)
|
|
declare <2 x i32> @llvm.amdgcn.cluster.load.b64.v2i32.p1(ptr addrspace(1), i32 %cpol, i32 %mask)
|
|
declare <4 x i32> @llvm.amdgcn.cluster.load.b128.v4i32.p1(ptr addrspace(1), i32 %cpol, i32 %mask)
|
|
|
|
define amdgpu_ps void @cluster_load_b32_vaddr(ptr addrspace(1) %addr, ptr addrspace(1) %use, i32 %mask) {
|
|
; GFX1250-LABEL: cluster_load_b32_vaddr:
|
|
; GFX1250: ; %bb.0: ; %entry
|
|
; GFX1250-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_WAVE_MODE, 25, 1), 1 ; msbs: dst=0 src0=0 src1=0 src2=0
|
|
; GFX1250-NEXT: v_readfirstlane_b32 s0, v4
|
|
; GFX1250-NEXT: s_mov_b32 m0, s0
|
|
; GFX1250-NEXT: cluster_load_b32 v0, v[0:1], off offset:32 th:TH_LOAD_NT
|
|
; GFX1250-NEXT: s_wait_loadcnt 0x0
|
|
; GFX1250-NEXT: global_store_b32 v[2:3], v0, off
|
|
; GFX1250-NEXT: s_endpgm
|
|
entry:
|
|
%gep = getelementptr i64, ptr addrspace(1) %addr, i32 4
|
|
%val = call i32 @llvm.amdgcn.cluster.load.b32.i32.p1(ptr addrspace(1) %gep, i32 1, i32 %mask)
|
|
store i32 %val, ptr addrspace(1) %use
|
|
ret void
|
|
}
|
|
|
|
define amdgpu_ps void @cluster_load_b32_vaddr_imm_mask(ptr addrspace(1) %addr, ptr addrspace(1) %use) {
|
|
; GFX1250-LABEL: cluster_load_b32_vaddr_imm_mask:
|
|
; GFX1250: ; %bb.0: ; %entry
|
|
; GFX1250-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_WAVE_MODE, 25, 1), 1 ; msbs: dst=0 src0=0 src1=0 src2=0
|
|
; GFX1250-NEXT: s_mov_b32 m0, 7
|
|
; GFX1250-NEXT: cluster_load_b32 v0, v[0:1], off offset:32 th:TH_LOAD_HT scope:SCOPE_SE
|
|
; GFX1250-NEXT: s_wait_loadcnt 0x0
|
|
; GFX1250-NEXT: global_store_b32 v[2:3], v0, off
|
|
; GFX1250-NEXT: s_endpgm
|
|
entry:
|
|
%gep = getelementptr i64, ptr addrspace(1) %addr, i32 4
|
|
%val = call i32 @llvm.amdgcn.cluster.load.b32.i32.p1(ptr addrspace(1) %gep, i32 10, i32 7)
|
|
store i32 %val, ptr addrspace(1) %use
|
|
ret void
|
|
}
|
|
|
|
define amdgpu_ps void @cluster_load_b32_saddr(ptr addrspace(1) inreg %addr, ptr addrspace(1) %use, i32 inreg %mask) {
|
|
; GFX1250-LABEL: cluster_load_b32_saddr:
|
|
; GFX1250: ; %bb.0: ; %entry
|
|
; GFX1250-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_WAVE_MODE, 25, 1), 1 ; msbs: dst=0 src0=0 src1=0 src2=0
|
|
; GFX1250-NEXT: v_mov_b32_e32 v2, 0
|
|
; GFX1250-NEXT: s_mov_b32 m0, s2
|
|
; GFX1250-NEXT: cluster_load_b32 v2, v2, s[0:1] offset:32 th:TH_LOAD_NT_HT scope:SCOPE_DEV
|
|
; GFX1250-NEXT: s_wait_loadcnt 0x0
|
|
; GFX1250-NEXT: global_store_b32 v[0:1], v2, off
|
|
; GFX1250-NEXT: s_endpgm
|
|
entry:
|
|
%gep = getelementptr i64, ptr addrspace(1) %addr, i32 4
|
|
%val = call i32 @llvm.amdgcn.cluster.load.b32.i32.p1(ptr addrspace(1) %gep, i32 22, i32 %mask)
|
|
store i32 %val, ptr addrspace(1) %use
|
|
ret void
|
|
}
|
|
|
|
define amdgpu_ps void @cluster_load_monitor_b32_saddr_scale_offset(ptr addrspace(1) inreg %addr, ptr addrspace(1) %use, i32 inreg %mask, i32 %idx) {
|
|
; GFX1250-LABEL: cluster_load_monitor_b32_saddr_scale_offset:
|
|
; GFX1250: ; %bb.0: ; %entry
|
|
; GFX1250-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_WAVE_MODE, 25, 1), 1 ; msbs: dst=0 src0=0 src1=0 src2=0
|
|
; GFX1250-NEXT: s_mov_b32 m0, s2
|
|
; GFX1250-NEXT: cluster_load_b32 v2, v2, s[0:1] scale_offset th:TH_LOAD_BYPASS scope:SCOPE_SYS
|
|
; GFX1250-NEXT: s_wait_loadcnt 0x0
|
|
; GFX1250-NEXT: global_store_b32 v[0:1], v2, off
|
|
; GFX1250-NEXT: s_endpgm
|
|
entry:
|
|
%idxprom = sext i32 %idx to i64
|
|
%gep = getelementptr i32, ptr addrspace(1) %addr, i64 %idxprom
|
|
%val = call i32 @llvm.amdgcn.cluster.load.b32.i32.p1(ptr addrspace(1) %gep, i32 27, i32 inreg %mask)
|
|
store i32 %val, ptr addrspace(1) %use
|
|
ret void
|
|
}
|
|
|
|
define amdgpu_ps void @cluster_load_b64_vaddr(ptr addrspace(1) %addr, ptr addrspace(1) %use, i32 %mask) {
|
|
; GFX1250-LABEL: cluster_load_b64_vaddr:
|
|
; GFX1250: ; %bb.0: ; %entry
|
|
; GFX1250-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_WAVE_MODE, 25, 1), 1 ; msbs: dst=0 src0=0 src1=0 src2=0
|
|
; GFX1250-NEXT: v_readfirstlane_b32 s0, v4
|
|
; GFX1250-NEXT: s_mov_b32 m0, s0
|
|
; GFX1250-NEXT: cluster_load_b64 v[0:1], v[0:1], off offset:32 th:TH_LOAD_NT
|
|
; GFX1250-NEXT: s_wait_loadcnt 0x0
|
|
; GFX1250-NEXT: global_store_b64 v[2:3], v[0:1], off
|
|
; GFX1250-NEXT: s_endpgm
|
|
entry:
|
|
%gep = getelementptr i64, ptr addrspace(1) %addr, i32 4
|
|
%val = call <2 x i32> @llvm.amdgcn.cluster.load.b64.v2i32.p1(ptr addrspace(1) %gep, i32 1, i32 %mask)
|
|
store <2 x i32> %val, ptr addrspace(1) %use
|
|
ret void
|
|
}
|
|
|
|
define amdgpu_ps void @cluster_load_b64_vaddr_imm_mask(ptr addrspace(1) %addr, ptr addrspace(1) %use) {
|
|
; GFX1250-LABEL: cluster_load_b64_vaddr_imm_mask:
|
|
; GFX1250: ; %bb.0: ; %entry
|
|
; GFX1250-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_WAVE_MODE, 25, 1), 1 ; msbs: dst=0 src0=0 src1=0 src2=0
|
|
; GFX1250-NEXT: s_mov_b32 m0, 0x10007
|
|
; GFX1250-NEXT: cluster_load_b64 v[0:1], v[0:1], off offset:32 th:TH_LOAD_HT scope:SCOPE_SE
|
|
; GFX1250-NEXT: s_wait_loadcnt 0x0
|
|
; GFX1250-NEXT: global_store_b64 v[2:3], v[0:1], off
|
|
; GFX1250-NEXT: s_endpgm
|
|
entry:
|
|
%gep = getelementptr i64, ptr addrspace(1) %addr, i32 4
|
|
%val = call <2 x i32> @llvm.amdgcn.cluster.load.b64.v2i32.p1(ptr addrspace(1) %gep, i32 10, i32 65543)
|
|
store <2 x i32> %val, ptr addrspace(1) %use
|
|
ret void
|
|
}
|
|
|
|
define amdgpu_ps void @cluster_load_b64_saddr(ptr addrspace(1) inreg %addr, ptr addrspace(1) %use, i32 inreg %mask) {
|
|
; GFX1250-LABEL: cluster_load_b64_saddr:
|
|
; GFX1250: ; %bb.0: ; %entry
|
|
; GFX1250-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_WAVE_MODE, 25, 1), 1 ; msbs: dst=0 src0=0 src1=0 src2=0
|
|
; GFX1250-NEXT: v_mov_b32_e32 v2, 0
|
|
; GFX1250-NEXT: s_mov_b32 m0, s2
|
|
; GFX1250-NEXT: cluster_load_b64 v[2:3], v2, s[0:1] offset:32 th:TH_LOAD_NT_HT scope:SCOPE_DEV
|
|
; GFX1250-NEXT: s_wait_loadcnt 0x0
|
|
; GFX1250-NEXT: global_store_b64 v[0:1], v[2:3], off
|
|
; GFX1250-NEXT: s_endpgm
|
|
entry:
|
|
%gep = getelementptr i64, ptr addrspace(1) %addr, i32 4
|
|
%val = call <2 x i32> @llvm.amdgcn.cluster.load.b64.v2i32.p1(ptr addrspace(1) %gep, i32 22, i32 %mask)
|
|
store <2 x i32> %val, ptr addrspace(1) %use
|
|
ret void
|
|
}
|
|
|
|
define amdgpu_ps void @cluster_load_monitor_b64_saddr_scale_offset(ptr addrspace(1) inreg %addr, ptr addrspace(1) %use, i32 inreg %mask, i32 %idx) {
|
|
; GFX1250-LABEL: cluster_load_monitor_b64_saddr_scale_offset:
|
|
; GFX1250: ; %bb.0: ; %entry
|
|
; GFX1250-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_WAVE_MODE, 25, 1), 1 ; msbs: dst=0 src0=0 src1=0 src2=0
|
|
; GFX1250-NEXT: s_mov_b32 m0, s2
|
|
; GFX1250-NEXT: cluster_load_b64 v[2:3], v2, s[0:1] scale_offset th:TH_LOAD_BYPASS scope:SCOPE_SYS
|
|
; GFX1250-NEXT: s_wait_loadcnt 0x0
|
|
; GFX1250-NEXT: global_store_b64 v[0:1], v[2:3], off
|
|
; GFX1250-NEXT: s_endpgm
|
|
entry:
|
|
%idxprom = sext i32 %idx to i64
|
|
%gep = getelementptr i64, ptr addrspace(1) %addr, i64 %idxprom
|
|
%val = call <2 x i32> @llvm.amdgcn.cluster.load.b64.v2i32.p1(ptr addrspace(1) %gep, i32 27, i32 inreg %mask)
|
|
store <2 x i32> %val, ptr addrspace(1) %use
|
|
ret void
|
|
}
|
|
|
|
define amdgpu_ps void @cluster_load_b128_vaddr(ptr addrspace(1) %addr, ptr addrspace(1) %use, i32 %mask) {
|
|
; GFX1250-LABEL: cluster_load_b128_vaddr:
|
|
; GFX1250: ; %bb.0: ; %entry
|
|
; GFX1250-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_WAVE_MODE, 25, 1), 1 ; msbs: dst=0 src0=0 src1=0 src2=0
|
|
; GFX1250-NEXT: v_readfirstlane_b32 s0, v4
|
|
; GFX1250-NEXT: s_mov_b32 m0, s0
|
|
; GFX1250-NEXT: cluster_load_b128 v[4:7], v[0:1], off offset:32 th:TH_LOAD_NT
|
|
; GFX1250-NEXT: s_wait_loadcnt 0x0
|
|
; GFX1250-NEXT: global_store_b128 v[2:3], v[4:7], off
|
|
; GFX1250-NEXT: s_endpgm
|
|
entry:
|
|
%gep = getelementptr i64, ptr addrspace(1) %addr, i32 4
|
|
%val = call <4 x i32> @llvm.amdgcn.cluster.load.b128.v4i32.p1(ptr addrspace(1) %gep, i32 1, i32 %mask)
|
|
store <4 x i32> %val, ptr addrspace(1) %use
|
|
ret void
|
|
}
|
|
|
|
define amdgpu_ps void @cluster_load_b128_vaddr_imm_mask(ptr addrspace(1) %addr, ptr addrspace(1) %use) {
|
|
; GFX1250-LABEL: cluster_load_b128_vaddr_imm_mask:
|
|
; GFX1250: ; %bb.0: ; %entry
|
|
; GFX1250-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_WAVE_MODE, 25, 1), 1 ; msbs: dst=0 src0=0 src1=0 src2=0
|
|
; GFX1250-NEXT: s_mov_b32 m0, 15
|
|
; GFX1250-NEXT: cluster_load_b128 v[4:7], v[0:1], off offset:32 th:TH_LOAD_HT scope:SCOPE_SE
|
|
; GFX1250-NEXT: s_wait_loadcnt 0x0
|
|
; GFX1250-NEXT: global_store_b128 v[2:3], v[4:7], off
|
|
; GFX1250-NEXT: s_endpgm
|
|
entry:
|
|
%gep = getelementptr i64, ptr addrspace(1) %addr, i32 4
|
|
%val = call <4 x i32> @llvm.amdgcn.cluster.load.b128.v4i32.p1(ptr addrspace(1) %gep, i32 10, i32 15)
|
|
store <4 x i32> %val, ptr addrspace(1) %use
|
|
ret void
|
|
}
|
|
|
|
define amdgpu_ps void @cluster_load_b128_saddr(ptr addrspace(1) inreg %addr, ptr addrspace(1) %use, i32 inreg %mask) {
|
|
; GFX1250-LABEL: cluster_load_b128_saddr:
|
|
; GFX1250: ; %bb.0: ; %entry
|
|
; GFX1250-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_WAVE_MODE, 25, 1), 1 ; msbs: dst=0 src0=0 src1=0 src2=0
|
|
; GFX1250-NEXT: v_mov_b32_e32 v2, 0
|
|
; GFX1250-NEXT: s_mov_b32 m0, s2
|
|
; GFX1250-NEXT: cluster_load_b128 v[2:5], v2, s[0:1] offset:32 th:TH_LOAD_BYPASS scope:SCOPE_SYS
|
|
; GFX1250-NEXT: s_wait_loadcnt 0x0
|
|
; GFX1250-NEXT: global_store_b128 v[0:1], v[2:5], off
|
|
; GFX1250-NEXT: s_endpgm
|
|
entry:
|
|
%gep = getelementptr i64, ptr addrspace(1) %addr, i32 4
|
|
%val = call <4 x i32> @llvm.amdgcn.cluster.load.b128.v4i32.p1(ptr addrspace(1) %gep, i32 27, i32 inreg %mask)
|
|
store <4 x i32> %val, ptr addrspace(1) %use
|
|
ret void
|
|
}
|
|
;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
|
|
; GFX1250-GISEL: {{.*}}
|
|
; GFX1250-SDAG: {{.*}}
|