
Recent upstream trends have moved away from explicitly using `-verify-machineinstrs`, as it's already covered by the expensive checks. This PR removes almost all `-verify-machineinstrs` from tests in `llvm/test/CodeGen/AMDGPU/*.ll`, leaving only those tests where its removal currently causes failures.
221 lines
9.1 KiB
LLVM
221 lines
9.1 KiB
LLVM
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
|
|
; RUN: llc -mtriple=amdgcn -mcpu=gfx900 < %s | FileCheck %s --check-prefix=GFX90A
|
|
; RUN: llc -mtriple=amdgcn -mcpu=gfx90a < %s | FileCheck %s --check-prefix=GFX90A
|
|
; RUN: llc -mtriple=amdgcn -mcpu=gfx942 < %s | FileCheck %s --check-prefix=GFX942
|
|
; RUN: llc -mtriple=amdgcn -mcpu=gfx1010 < %s | FileCheck %s --check-prefix=GFX10
|
|
; RUN: llc -global-isel -mtriple=amdgcn -mcpu=gfx942 < %s | FileCheck %s --check-prefix=GFX942-GISEL
|
|
|
|
;; Note: load.to.lds is a wrapper intrinsic around underlying operations.
|
|
;; This is a bare-bones test to ensure that it lowers to the correct instructions.
|
|
|
|
declare void @llvm.amdgcn.load.to.lds.p1(ptr addrspace(1) nocapture %gptr, ptr addrspace(3) nocapture %lptr, i32 %size, i32 %offset, i32 %aux)
|
|
declare void @llvm.amdgcn.load.to.lds.p7(ptr addrspace(7) nocapture %gptr, ptr addrspace(3) nocapture %lptr, i32 %size, i32 %offset, i32 %aux)
|
|
|
|
define amdgpu_ps void @global_load_lds_dword_vaddr_saddr(ptr addrspace(1) nocapture %gptr, ptr addrspace(3) nocapture inreg %lptr) {
|
|
; GFX90A-LABEL: global_load_lds_dword_vaddr_saddr:
|
|
; GFX90A: ; %bb.0: ; %main_body
|
|
; GFX90A-NEXT: s_mov_b32 m0, s0
|
|
; GFX90A-NEXT: s_nop 0
|
|
; GFX90A-NEXT: global_load_dword v[0:1], off offset:16 glc lds
|
|
; GFX90A-NEXT: s_endpgm
|
|
;
|
|
; GFX942-LABEL: global_load_lds_dword_vaddr_saddr:
|
|
; GFX942: ; %bb.0: ; %main_body
|
|
; GFX942-NEXT: s_mov_b32 m0, s0
|
|
; GFX942-NEXT: s_nop 0
|
|
; GFX942-NEXT: global_load_lds_dword v[0:1], off offset:16 sc0
|
|
; GFX942-NEXT: s_endpgm
|
|
;
|
|
; GFX10-LABEL: global_load_lds_dword_vaddr_saddr:
|
|
; GFX10: ; %bb.0: ; %main_body
|
|
; GFX10-NEXT: s_mov_b32 m0, s0
|
|
; GFX10-NEXT: global_load_dword v[0:1], off offset:16 glc lds
|
|
; GFX10-NEXT: s_endpgm
|
|
;
|
|
; GFX942-GISEL-LABEL: global_load_lds_dword_vaddr_saddr:
|
|
; GFX942-GISEL: ; %bb.0: ; %main_body
|
|
; GFX942-GISEL-NEXT: s_mov_b32 m0, s0
|
|
; GFX942-GISEL-NEXT: s_nop 0
|
|
; GFX942-GISEL-NEXT: global_load_lds_dword v[0:1], off offset:16 sc0
|
|
; GFX942-GISEL-NEXT: s_endpgm
|
|
main_body:
|
|
call void @llvm.amdgcn.load.to.lds.p1(ptr addrspace(1) %gptr, ptr addrspace(3) %lptr, i32 4, i32 16, i32 1)
|
|
ret void
|
|
}
|
|
|
|
define amdgpu_ps void @buffer_load_lds_dword_vaddr_saddr(ptr addrspace(7) nocapture inreg %gptr, i32 %off, ptr addrspace(3) nocapture inreg %lptr) {
|
|
; GFX90A-LABEL: buffer_load_lds_dword_vaddr_saddr:
|
|
; GFX90A: ; %bb.0: ; %main_body
|
|
; GFX90A-NEXT: v_add_u32_e32 v0, s4, v0
|
|
; GFX90A-NEXT: s_mov_b32 m0, s5
|
|
; GFX90A-NEXT: s_nop 0
|
|
; GFX90A-NEXT: buffer_load_dword v0, s[0:3], 0 offen offset:16 glc lds
|
|
; GFX90A-NEXT: s_endpgm
|
|
;
|
|
; GFX942-LABEL: buffer_load_lds_dword_vaddr_saddr:
|
|
; GFX942: ; %bb.0: ; %main_body
|
|
; GFX942-NEXT: v_add_u32_e32 v0, s4, v0
|
|
; GFX942-NEXT: s_mov_b32 m0, s5
|
|
; GFX942-NEXT: s_nop 0
|
|
; GFX942-NEXT: buffer_load_dword v0, s[0:3], 0 offen offset:16 sc0 lds
|
|
; GFX942-NEXT: s_endpgm
|
|
;
|
|
; GFX10-LABEL: buffer_load_lds_dword_vaddr_saddr:
|
|
; GFX10: ; %bb.0: ; %main_body
|
|
; GFX10-NEXT: v_add_nc_u32_e32 v0, s4, v0
|
|
; GFX10-NEXT: s_mov_b32 m0, s5
|
|
; GFX10-NEXT: buffer_load_dword v0, s[0:3], 0 offen offset:16 glc lds
|
|
; GFX10-NEXT: s_endpgm
|
|
;
|
|
; GFX942-GISEL-LABEL: buffer_load_lds_dword_vaddr_saddr:
|
|
; GFX942-GISEL: ; %bb.0: ; %main_body
|
|
; GFX942-GISEL-NEXT: v_add_u32_e32 v0, s4, v0
|
|
; GFX942-GISEL-NEXT: s_mov_b32 m0, s5
|
|
; GFX942-GISEL-NEXT: s_nop 0
|
|
; GFX942-GISEL-NEXT: buffer_load_dword v0, s[0:3], 0 offen offset:16 sc0 lds
|
|
; GFX942-GISEL-NEXT: s_endpgm
|
|
main_body:
|
|
%gptr.off = getelementptr i8, ptr addrspace(7) %gptr, i32 %off
|
|
call void @llvm.amdgcn.load.to.lds.p7(ptr addrspace(7) %gptr.off, ptr addrspace(3) %lptr, i32 4, i32 16, i32 1)
|
|
ret void
|
|
}
|
|
|
|
define amdgpu_ps void @global_load_lds_ushort_vaddr_saddr(ptr addrspace(1) nocapture %gptr, ptr addrspace(3) nocapture inreg %lptr) {
|
|
; GFX90A-LABEL: global_load_lds_ushort_vaddr_saddr:
|
|
; GFX90A: ; %bb.0: ; %main_body
|
|
; GFX90A-NEXT: s_mov_b32 m0, s0
|
|
; GFX90A-NEXT: s_nop 0
|
|
; GFX90A-NEXT: global_load_ushort v[0:1], off offset:16 glc lds
|
|
; GFX90A-NEXT: s_endpgm
|
|
;
|
|
; GFX942-LABEL: global_load_lds_ushort_vaddr_saddr:
|
|
; GFX942: ; %bb.0: ; %main_body
|
|
; GFX942-NEXT: s_mov_b32 m0, s0
|
|
; GFX942-NEXT: s_nop 0
|
|
; GFX942-NEXT: global_load_lds_ushort v[0:1], off offset:16 sc0
|
|
; GFX942-NEXT: s_endpgm
|
|
;
|
|
; GFX10-LABEL: global_load_lds_ushort_vaddr_saddr:
|
|
; GFX10: ; %bb.0: ; %main_body
|
|
; GFX10-NEXT: s_mov_b32 m0, s0
|
|
; GFX10-NEXT: global_load_ushort v[0:1], off offset:16 glc lds
|
|
; GFX10-NEXT: s_endpgm
|
|
;
|
|
; GFX942-GISEL-LABEL: global_load_lds_ushort_vaddr_saddr:
|
|
; GFX942-GISEL: ; %bb.0: ; %main_body
|
|
; GFX942-GISEL-NEXT: s_mov_b32 m0, s0
|
|
; GFX942-GISEL-NEXT: s_nop 0
|
|
; GFX942-GISEL-NEXT: global_load_lds_ushort v[0:1], off offset:16 sc0
|
|
; GFX942-GISEL-NEXT: s_endpgm
|
|
main_body:
|
|
call void @llvm.amdgcn.load.to.lds.p1(ptr addrspace(1) %gptr, ptr addrspace(3) %lptr, i32 2, i32 16, i32 1)
|
|
ret void
|
|
}
|
|
|
|
define amdgpu_ps void @buffer_load_lds_ushort_vaddr_saddr(ptr addrspace(7) nocapture inreg %gptr, i32 %off, ptr addrspace(3) nocapture inreg %lptr) {
|
|
; GFX90A-LABEL: buffer_load_lds_ushort_vaddr_saddr:
|
|
; GFX90A: ; %bb.0: ; %main_body
|
|
; GFX90A-NEXT: v_add_u32_e32 v0, s4, v0
|
|
; GFX90A-NEXT: s_mov_b32 m0, s5
|
|
; GFX90A-NEXT: s_nop 0
|
|
; GFX90A-NEXT: buffer_load_ushort v0, s[0:3], 0 offen offset:16 glc lds
|
|
; GFX90A-NEXT: s_endpgm
|
|
;
|
|
; GFX942-LABEL: buffer_load_lds_ushort_vaddr_saddr:
|
|
; GFX942: ; %bb.0: ; %main_body
|
|
; GFX942-NEXT: v_add_u32_e32 v0, s4, v0
|
|
; GFX942-NEXT: s_mov_b32 m0, s5
|
|
; GFX942-NEXT: s_nop 0
|
|
; GFX942-NEXT: buffer_load_ushort v0, s[0:3], 0 offen offset:16 sc0 lds
|
|
; GFX942-NEXT: s_endpgm
|
|
;
|
|
; GFX10-LABEL: buffer_load_lds_ushort_vaddr_saddr:
|
|
; GFX10: ; %bb.0: ; %main_body
|
|
; GFX10-NEXT: v_add_nc_u32_e32 v0, s4, v0
|
|
; GFX10-NEXT: s_mov_b32 m0, s5
|
|
; GFX10-NEXT: buffer_load_ushort v0, s[0:3], 0 offen offset:16 glc lds
|
|
; GFX10-NEXT: s_endpgm
|
|
;
|
|
; GFX942-GISEL-LABEL: buffer_load_lds_ushort_vaddr_saddr:
|
|
; GFX942-GISEL: ; %bb.0: ; %main_body
|
|
; GFX942-GISEL-NEXT: v_add_u32_e32 v0, s4, v0
|
|
; GFX942-GISEL-NEXT: s_mov_b32 m0, s5
|
|
; GFX942-GISEL-NEXT: s_nop 0
|
|
; GFX942-GISEL-NEXT: buffer_load_ushort v0, s[0:3], 0 offen offset:16 sc0 lds
|
|
; GFX942-GISEL-NEXT: s_endpgm
|
|
main_body:
|
|
%gptr.off = getelementptr i8, ptr addrspace(7) %gptr, i32 %off
|
|
call void @llvm.amdgcn.load.to.lds.p7(ptr addrspace(7) %gptr.off, ptr addrspace(3) %lptr, i32 2, i32 16, i32 1)
|
|
ret void
|
|
}
|
|
|
|
define amdgpu_ps void @global_load_lds_ubyte_vaddr_saddr(ptr addrspace(1) nocapture %gptr, ptr addrspace(3) nocapture inreg %lptr) {
|
|
; GFX90A-LABEL: global_load_lds_ubyte_vaddr_saddr:
|
|
; GFX90A: ; %bb.0: ; %main_body
|
|
; GFX90A-NEXT: s_mov_b32 m0, s0
|
|
; GFX90A-NEXT: s_nop 0
|
|
; GFX90A-NEXT: global_load_ubyte v[0:1], off offset:16 glc lds
|
|
; GFX90A-NEXT: s_endpgm
|
|
;
|
|
; GFX942-LABEL: global_load_lds_ubyte_vaddr_saddr:
|
|
; GFX942: ; %bb.0: ; %main_body
|
|
; GFX942-NEXT: s_mov_b32 m0, s0
|
|
; GFX942-NEXT: s_nop 0
|
|
; GFX942-NEXT: global_load_lds_ubyte v[0:1], off offset:16 sc0
|
|
; GFX942-NEXT: s_endpgm
|
|
;
|
|
; GFX10-LABEL: global_load_lds_ubyte_vaddr_saddr:
|
|
; GFX10: ; %bb.0: ; %main_body
|
|
; GFX10-NEXT: s_mov_b32 m0, s0
|
|
; GFX10-NEXT: global_load_ubyte v[0:1], off offset:16 glc lds
|
|
; GFX10-NEXT: s_endpgm
|
|
;
|
|
; GFX942-GISEL-LABEL: global_load_lds_ubyte_vaddr_saddr:
|
|
; GFX942-GISEL: ; %bb.0: ; %main_body
|
|
; GFX942-GISEL-NEXT: s_mov_b32 m0, s0
|
|
; GFX942-GISEL-NEXT: s_nop 0
|
|
; GFX942-GISEL-NEXT: global_load_lds_ubyte v[0:1], off offset:16 sc0
|
|
; GFX942-GISEL-NEXT: s_endpgm
|
|
main_body:
|
|
call void @llvm.amdgcn.load.to.lds.p1(ptr addrspace(1) %gptr, ptr addrspace(3) %lptr, i32 1, i32 16, i32 1)
|
|
ret void
|
|
}
|
|
|
|
define amdgpu_ps void @buffer_load_lds_ubyte_vaddr_saddr(ptr addrspace(7) nocapture inreg %gptr, i32 %off, ptr addrspace(3) nocapture inreg %lptr) {
|
|
; GFX90A-LABEL: buffer_load_lds_ubyte_vaddr_saddr:
|
|
; GFX90A: ; %bb.0: ; %main_body
|
|
; GFX90A-NEXT: v_add_u32_e32 v0, s4, v0
|
|
; GFX90A-NEXT: s_mov_b32 m0, s5
|
|
; GFX90A-NEXT: s_nop 0
|
|
; GFX90A-NEXT: buffer_load_ubyte v0, s[0:3], 0 offen offset:16 glc lds
|
|
; GFX90A-NEXT: s_endpgm
|
|
;
|
|
; GFX942-LABEL: buffer_load_lds_ubyte_vaddr_saddr:
|
|
; GFX942: ; %bb.0: ; %main_body
|
|
; GFX942-NEXT: v_add_u32_e32 v0, s4, v0
|
|
; GFX942-NEXT: s_mov_b32 m0, s5
|
|
; GFX942-NEXT: s_nop 0
|
|
; GFX942-NEXT: buffer_load_ubyte v0, s[0:3], 0 offen offset:16 sc0 lds
|
|
; GFX942-NEXT: s_endpgm
|
|
;
|
|
; GFX10-LABEL: buffer_load_lds_ubyte_vaddr_saddr:
|
|
; GFX10: ; %bb.0: ; %main_body
|
|
; GFX10-NEXT: v_add_nc_u32_e32 v0, s4, v0
|
|
; GFX10-NEXT: s_mov_b32 m0, s5
|
|
; GFX10-NEXT: buffer_load_ubyte v0, s[0:3], 0 offen offset:16 glc lds
|
|
; GFX10-NEXT: s_endpgm
|
|
;
|
|
; GFX942-GISEL-LABEL: buffer_load_lds_ubyte_vaddr_saddr:
|
|
; GFX942-GISEL: ; %bb.0: ; %main_body
|
|
; GFX942-GISEL-NEXT: v_add_u32_e32 v0, s4, v0
|
|
; GFX942-GISEL-NEXT: s_mov_b32 m0, s5
|
|
; GFX942-GISEL-NEXT: s_nop 0
|
|
; GFX942-GISEL-NEXT: buffer_load_ubyte v0, s[0:3], 0 offen offset:16 sc0 lds
|
|
; GFX942-GISEL-NEXT: s_endpgm
|
|
main_body:
|
|
%gptr.off = getelementptr i8, ptr addrspace(7) %gptr, i32 %off
|
|
call void @llvm.amdgcn.load.to.lds.p7(ptr addrspace(7) %gptr.off, ptr addrspace(3) %lptr, i32 1, i32 16, i32 1)
|
|
ret void
|
|
}
|
|
|