Brox Chen 222b99d3aa
[AMDGPU][True16][CodeGen] update waitcnt for true16 (#128927)
update waitcnt pass to check hi16 and lo16 in true16 mode

---------

Co-authored-by: Jay Foad <jay.foad@gmail.com>
2025-03-11 10:59:51 -04:00

392 lines
17 KiB
LLVM

; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 2
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1100 -mattr=+real-true16 -enable-misched=0 -post-RA-scheduler=0 -stress-regalloc=8 < %s | FileCheck %s -check-prefixes=GCN,GCN-TRUE16
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1100 -mattr=-real-true16 -enable-misched=0 -post-RA-scheduler=0 -stress-regalloc=8 < %s | FileCheck %s -check-prefixes=GCN,GCN-FAKE16
define void @spill_i16_alu() {
; GCN-TRUE16-LABEL: spill_i16_alu:
; GCN-TRUE16: ; %bb.0: ; %entry
; GCN-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GCN-TRUE16-NEXT: scratch_load_d16_b16 v0, off, s32 glc dlc
; GCN-TRUE16-NEXT: s_waitcnt vmcnt(0)
; GCN-TRUE16-NEXT: v_add_nc_u16 v0.l, 0x7b, v0.l
; GCN-TRUE16-NEXT: scratch_store_b16 off, v0, s32 offset:2 ; 2-byte Folded Spill
; GCN-TRUE16-NEXT: ;;#ASMSTART
; GCN-TRUE16-NEXT: ;;#ASMEND
; GCN-TRUE16-NEXT: scratch_load_d16_b16 v0, off, s32 offset:2 ; 2-byte Folded Reload
; GCN-TRUE16-NEXT: s_waitcnt vmcnt(0)
; GCN-TRUE16-NEXT: scratch_store_b16 off, v0, s32 dlc
; GCN-TRUE16-NEXT: s_waitcnt_vscnt null, 0x0
; GCN-TRUE16-NEXT: s_setpc_b64 s[30:31]
;
; GCN-FAKE16-LABEL: spill_i16_alu:
; GCN-FAKE16: ; %bb.0: ; %entry
; GCN-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GCN-FAKE16-NEXT: scratch_load_u16 v0, off, s32 glc dlc
; GCN-FAKE16-NEXT: s_waitcnt vmcnt(0)
; GCN-FAKE16-NEXT: v_add_nc_u16 v0, 0x7b, v0
; GCN-FAKE16-NEXT: scratch_store_b32 off, v0, s32 offset:4 ; 4-byte Folded Spill
; GCN-FAKE16-NEXT: ;;#ASMSTART
; GCN-FAKE16-NEXT: ;;#ASMEND
; GCN-FAKE16-NEXT: scratch_load_b32 v0, off, s32 offset:4 ; 4-byte Folded Reload
; GCN-FAKE16-NEXT: s_waitcnt vmcnt(0)
; GCN-FAKE16-NEXT: scratch_store_b16 off, v0, s32 dlc
; GCN-FAKE16-NEXT: s_waitcnt_vscnt null, 0x0
; GCN-FAKE16-NEXT: s_setpc_b64 s[30:31]
entry:
%alloca = alloca i16, i32 1, align 4, addrspace(5)
%aptr = getelementptr i16, ptr addrspace(5) %alloca, i32 0
%a = load volatile i16, ptr addrspace(5) %aptr
%add = add i16 %a, 123
; Force %a to spill.
call void asm sideeffect "", "~{v0},~{v1},~{v2},~{v3},~{v4},~{v5},~{v6},~{v7}" ()
%outptr = getelementptr i16, ptr addrspace(5) %alloca, i32 0
store volatile i16 %add, ptr addrspace(5) %outptr
ret void
}
define void @spill_i16_alu_two_vals() {
; GCN-TRUE16-LABEL: spill_i16_alu_two_vals:
; GCN-TRUE16: ; %bb.0: ; %entry
; GCN-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GCN-TRUE16-NEXT: scratch_load_d16_b16 v0, off, s32 glc dlc
; GCN-TRUE16-NEXT: s_waitcnt vmcnt(0)
; GCN-TRUE16-NEXT: v_add_nc_u16 v0.l, 0x7b, v0.l
; GCN-TRUE16-NEXT: scratch_store_b16 off, v0, s32 offset:6 ; 2-byte Folded Spill
; GCN-TRUE16-NEXT: ;;#ASMSTART
; GCN-TRUE16-NEXT: ;;#ASMEND
; GCN-TRUE16-NEXT: scratch_load_d16_b16 v0, off, s32 offset:4 glc dlc
; GCN-TRUE16-NEXT: s_waitcnt vmcnt(0)
; GCN-TRUE16-NEXT: scratch_load_d16_hi_b16 v0, off, s32 offset:6 ; 2-byte Folded Reload
; GCN-TRUE16-NEXT: v_add_nc_u16 v0.l, 0x7b, v0.l
; GCN-TRUE16-NEXT: s_waitcnt vmcnt(0)
; GCN-TRUE16-NEXT: scratch_store_d16_hi_b16 off, v0, s32 dlc
; GCN-TRUE16-NEXT: s_waitcnt_vscnt null, 0x0
; GCN-TRUE16-NEXT: scratch_store_b16 off, v0, s32 offset:4 dlc
; GCN-TRUE16-NEXT: s_waitcnt_vscnt null, 0x0
; GCN-TRUE16-NEXT: s_setpc_b64 s[30:31]
;
; GCN-FAKE16-LABEL: spill_i16_alu_two_vals:
; GCN-FAKE16: ; %bb.0: ; %entry
; GCN-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GCN-FAKE16-NEXT: scratch_load_u16 v0, off, s32 glc dlc
; GCN-FAKE16-NEXT: s_waitcnt vmcnt(0)
; GCN-FAKE16-NEXT: v_add_nc_u16 v0, 0x7b, v0
; GCN-FAKE16-NEXT: scratch_store_b32 off, v0, s32 offset:8 ; 4-byte Folded Spill
; GCN-FAKE16-NEXT: ;;#ASMSTART
; GCN-FAKE16-NEXT: ;;#ASMEND
; GCN-FAKE16-NEXT: scratch_load_u16 v0, off, s32 offset:4 glc dlc
; GCN-FAKE16-NEXT: s_waitcnt vmcnt(0)
; GCN-FAKE16-NEXT: scratch_load_b32 v1, off, s32 offset:8 ; 4-byte Folded Reload
; GCN-FAKE16-NEXT: v_add_nc_u16 v0, 0x7b, v0
; GCN-FAKE16-NEXT: s_waitcnt vmcnt(0)
; GCN-FAKE16-NEXT: scratch_store_b16 off, v1, s32 dlc
; GCN-FAKE16-NEXT: s_waitcnt_vscnt null, 0x0
; GCN-FAKE16-NEXT: scratch_store_b16 off, v0, s32 offset:4 dlc
; GCN-FAKE16-NEXT: s_waitcnt_vscnt null, 0x0
; GCN-FAKE16-NEXT: s_setpc_b64 s[30:31]
entry:
%alloca = alloca i16, i32 1, align 4, addrspace(5)
%alloca2 = alloca i16, i32 1, align 4, addrspace(5)
%aptr = getelementptr i16, ptr addrspace(5) %alloca, i32 0
%a = load volatile i16, ptr addrspace(5) %aptr
%add = add i16 %a, 123
; Force %a to spill.
call void asm sideeffect "", "~{v0},~{v1},~{v2},~{v3},~{v4},~{v5},~{v6},~{v7}" ()
%bptr = getelementptr i16, ptr addrspace(5) %alloca2, i32 0
%b = load volatile i16, ptr addrspace(5) %bptr
%badd = add i16 %b, 123
%outptr = getelementptr i16, ptr addrspace(5) %alloca, i32 0
store volatile i16 %add, ptr addrspace(5) %outptr
%outptr2 = getelementptr i16, ptr addrspace(5) %alloca2, i32 0
store volatile i16 %badd, ptr addrspace(5) %outptr2
ret void
}
; Tests after this do not actually test 16 bit spills because there is no use of VGPR_16. They could demonstrate 16 bit spills if we update the instructions to use VGPR_16 instead of VGPR_32
define void @spill_i16() {
; GCN-TRUE16-LABEL: spill_i16:
; GCN-TRUE16: ; %bb.0: ; %entry
; GCN-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GCN-TRUE16-NEXT: scratch_load_d16_b16 v0, off, s32 glc dlc
; GCN-TRUE16-NEXT: s_waitcnt vmcnt(0)
; GCN-TRUE16-NEXT: scratch_store_b16 off, v0, s32 offset:2 ; 2-byte Folded Spill
; GCN-TRUE16-NEXT: ;;#ASMSTART
; GCN-TRUE16-NEXT: ;;#ASMEND
; GCN-TRUE16-NEXT: scratch_load_d16_b16 v0, off, s32 offset:2 ; 2-byte Folded Reload
; GCN-TRUE16-NEXT: s_waitcnt vmcnt(0)
; GCN-TRUE16-NEXT: scratch_store_b16 off, v0, s32 dlc
; GCN-TRUE16-NEXT: s_waitcnt_vscnt null, 0x0
; GCN-TRUE16-NEXT: s_setpc_b64 s[30:31]
;
; GCN-FAKE16-LABEL: spill_i16:
; GCN-FAKE16: ; %bb.0: ; %entry
; GCN-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GCN-FAKE16-NEXT: scratch_load_u16 v0, off, s32 glc dlc
; GCN-FAKE16-NEXT: s_waitcnt vmcnt(0)
; GCN-FAKE16-NEXT: scratch_store_b32 off, v0, s32 offset:4 ; 4-byte Folded Spill
; GCN-FAKE16-NEXT: ;;#ASMSTART
; GCN-FAKE16-NEXT: ;;#ASMEND
; GCN-FAKE16-NEXT: scratch_load_b32 v0, off, s32 offset:4 ; 4-byte Folded Reload
; GCN-FAKE16-NEXT: s_waitcnt vmcnt(0)
; GCN-FAKE16-NEXT: scratch_store_b16 off, v0, s32 dlc
; GCN-FAKE16-NEXT: s_waitcnt_vscnt null, 0x0
; GCN-FAKE16-NEXT: s_setpc_b64 s[30:31]
entry:
%alloca = alloca i16, i32 1, align 4, addrspace(5)
%aptr = getelementptr i16, ptr addrspace(5) %alloca, i32 0
%a = load volatile i16, ptr addrspace(5) %aptr
; Force %a to spill.
call void asm sideeffect "", "~{v0},~{v1},~{v2},~{v3},~{v4},~{v5},~{v6},~{v7}" ()
%outptr = getelementptr i16, ptr addrspace(5) %alloca, i32 0
store volatile i16 %a, ptr addrspace(5) %outptr
ret void
}
define void @spill_half() {
; GCN-TRUE16-LABEL: spill_half:
; GCN-TRUE16: ; %bb.0: ; %entry
; GCN-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GCN-TRUE16-NEXT: scratch_load_d16_b16 v0, off, s32 glc dlc
; GCN-TRUE16-NEXT: s_waitcnt vmcnt(0)
; GCN-TRUE16-NEXT: scratch_store_b16 off, v0, s32 offset:2 ; 2-byte Folded Spill
; GCN-TRUE16-NEXT: ;;#ASMSTART
; GCN-TRUE16-NEXT: ;;#ASMEND
; GCN-TRUE16-NEXT: scratch_load_d16_b16 v0, off, s32 offset:2 ; 2-byte Folded Reload
; GCN-TRUE16-NEXT: s_waitcnt vmcnt(0)
; GCN-TRUE16-NEXT: scratch_store_b16 off, v0, s32 dlc
; GCN-TRUE16-NEXT: s_waitcnt_vscnt null, 0x0
; GCN-TRUE16-NEXT: s_setpc_b64 s[30:31]
;
; GCN-FAKE16-LABEL: spill_half:
; GCN-FAKE16: ; %bb.0: ; %entry
; GCN-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GCN-FAKE16-NEXT: scratch_load_u16 v0, off, s32 glc dlc
; GCN-FAKE16-NEXT: s_waitcnt vmcnt(0)
; GCN-FAKE16-NEXT: scratch_store_b32 off, v0, s32 offset:4 ; 4-byte Folded Spill
; GCN-FAKE16-NEXT: ;;#ASMSTART
; GCN-FAKE16-NEXT: ;;#ASMEND
; GCN-FAKE16-NEXT: scratch_load_b32 v0, off, s32 offset:4 ; 4-byte Folded Reload
; GCN-FAKE16-NEXT: s_waitcnt vmcnt(0)
; GCN-FAKE16-NEXT: scratch_store_b16 off, v0, s32 dlc
; GCN-FAKE16-NEXT: s_waitcnt_vscnt null, 0x0
; GCN-FAKE16-NEXT: s_setpc_b64 s[30:31]
entry:
%alloca = alloca half, i32 1, align 4, addrspace(5)
%aptr = getelementptr half, ptr addrspace(5) %alloca, i32 0
%a = load volatile half, ptr addrspace(5) %aptr
; Force %a to spill.
call void asm sideeffect "", "~{v0},~{v1},~{v2},~{v3},~{v4},~{v5},~{v6},~{v7}" ()
%outptr = getelementptr half, ptr addrspace(5) %alloca, i32 0
store volatile half %a, ptr addrspace(5) %outptr
ret void
}
define void @spill_i16_from_v2i16() {
; GCN-TRUE16-LABEL: spill_i16_from_v2i16:
; GCN-TRUE16: ; %bb.0: ; %entry
; GCN-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GCN-TRUE16-NEXT: scratch_load_d16_b16 v0, off, s32 offset:2 glc dlc
; GCN-TRUE16-NEXT: s_waitcnt vmcnt(0)
; GCN-TRUE16-NEXT: scratch_store_b16 off, v0, s32 offset:8 ; 2-byte Folded Spill
; GCN-TRUE16-NEXT: ;;#ASMSTART
; GCN-TRUE16-NEXT: ;;#ASMEND
; GCN-TRUE16-NEXT: scratch_load_d16_b16 v0, off, s32 offset:8 ; 2-byte Folded Reload
; GCN-TRUE16-NEXT: s_waitcnt vmcnt(0)
; GCN-TRUE16-NEXT: scratch_store_b16 off, v0, s32 offset:2 dlc
; GCN-TRUE16-NEXT: s_waitcnt_vscnt null, 0x0
; GCN-TRUE16-NEXT: s_setpc_b64 s[30:31]
;
; GCN-FAKE16-LABEL: spill_i16_from_v2i16:
; GCN-FAKE16: ; %bb.0: ; %entry
; GCN-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GCN-FAKE16-NEXT: scratch_load_u16 v0, off, s32 offset:2 glc dlc
; GCN-FAKE16-NEXT: s_waitcnt vmcnt(0)
; GCN-FAKE16-NEXT: scratch_store_b32 off, v0, s32 offset:8 ; 4-byte Folded Spill
; GCN-FAKE16-NEXT: ;;#ASMSTART
; GCN-FAKE16-NEXT: ;;#ASMEND
; GCN-FAKE16-NEXT: scratch_load_b32 v0, off, s32 offset:8 ; 4-byte Folded Reload
; GCN-FAKE16-NEXT: s_waitcnt vmcnt(0)
; GCN-FAKE16-NEXT: scratch_store_b16 off, v0, s32 offset:2 dlc
; GCN-FAKE16-NEXT: s_waitcnt_vscnt null, 0x0
; GCN-FAKE16-NEXT: s_setpc_b64 s[30:31]
entry:
%alloca = alloca <2 x i16>, i32 2, align 1, addrspace(5)
%aptr = getelementptr i16, ptr addrspace(5) %alloca, i32 1
%a = load volatile i16, ptr addrspace(5) %aptr
; Force %a to spill.
call void asm sideeffect "", "~{v0},~{v1},~{v2},~{v3},~{v4},~{v5},~{v6},~{v7}" ()
%outptr = getelementptr i16, ptr addrspace(5) %alloca, i32 1
store volatile i16 %a, ptr addrspace(5) %outptr
ret void
}
define void @spill_2xi16_from_v2i16() {
; GCN-TRUE16-LABEL: spill_2xi16_from_v2i16:
; GCN-TRUE16: ; %bb.0: ; %entry
; GCN-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GCN-TRUE16-NEXT: scratch_load_d16_b16 v0, off, s32 offset:2 glc dlc
; GCN-TRUE16-NEXT: s_waitcnt vmcnt(0)
; GCN-TRUE16-NEXT: scratch_store_b16 off, v0, s32 offset:8 ; 2-byte Folded Spill
; GCN-TRUE16-NEXT: scratch_load_d16_b16 v0, off, s32 glc dlc
; GCN-TRUE16-NEXT: s_waitcnt vmcnt(0)
; GCN-TRUE16-NEXT: scratch_store_b16 off, v0, s32 offset:10 ; 2-byte Folded Spill
; GCN-TRUE16-NEXT: ;;#ASMSTART
; GCN-TRUE16-NEXT: ;;#ASMEND
; GCN-TRUE16-NEXT: scratch_load_d16_b16 v0, off, s32 offset:8 ; 2-byte Folded Reload
; GCN-TRUE16-NEXT: s_waitcnt vmcnt(0)
; GCN-TRUE16-NEXT: scratch_store_b16 off, v0, s32 offset:2 dlc
; GCN-TRUE16-NEXT: s_waitcnt_vscnt null, 0x0
; GCN-TRUE16-NEXT: scratch_load_d16_b16 v0, off, s32 offset:10 ; 2-byte Folded Reload
; GCN-TRUE16-NEXT: s_waitcnt vmcnt(0)
; GCN-TRUE16-NEXT: scratch_store_b16 off, v0, s32 dlc
; GCN-TRUE16-NEXT: s_waitcnt_vscnt null, 0x0
; GCN-TRUE16-NEXT: s_setpc_b64 s[30:31]
;
; GCN-FAKE16-LABEL: spill_2xi16_from_v2i16:
; GCN-FAKE16: ; %bb.0: ; %entry
; GCN-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GCN-FAKE16-NEXT: scratch_load_u16 v0, off, s32 offset:2 glc dlc
; GCN-FAKE16-NEXT: s_waitcnt vmcnt(0)
; GCN-FAKE16-NEXT: scratch_store_b32 off, v0, s32 offset:8 ; 4-byte Folded Spill
; GCN-FAKE16-NEXT: scratch_load_u16 v0, off, s32 glc dlc
; GCN-FAKE16-NEXT: s_waitcnt vmcnt(0)
; GCN-FAKE16-NEXT: scratch_store_b32 off, v0, s32 offset:12 ; 4-byte Folded Spill
; GCN-FAKE16-NEXT: ;;#ASMSTART
; GCN-FAKE16-NEXT: ;;#ASMEND
; GCN-FAKE16-NEXT: scratch_load_b32 v0, off, s32 offset:8 ; 4-byte Folded Reload
; GCN-FAKE16-NEXT: s_waitcnt vmcnt(0)
; GCN-FAKE16-NEXT: scratch_store_b16 off, v0, s32 offset:2 dlc
; GCN-FAKE16-NEXT: s_waitcnt_vscnt null, 0x0
; GCN-FAKE16-NEXT: scratch_load_b32 v0, off, s32 offset:12 ; 4-byte Folded Reload
; GCN-FAKE16-NEXT: s_waitcnt vmcnt(0)
; GCN-FAKE16-NEXT: scratch_store_b16 off, v0, s32 dlc
; GCN-FAKE16-NEXT: s_waitcnt_vscnt null, 0x0
; GCN-FAKE16-NEXT: s_setpc_b64 s[30:31]
entry:
%alloca = alloca <2 x i16>, i32 2, align 1, addrspace(5)
%aptr = getelementptr i16, ptr addrspace(5) %alloca, i32 1
%a = load volatile i16, ptr addrspace(5) %aptr
%bptr = getelementptr i16, ptr addrspace(5) %alloca, i32 0
%b = load volatile i16, ptr addrspace(5) %bptr
; Force %a to spill.
call void asm sideeffect "", "~{v0},~{v1},~{v2},~{v3},~{v4},~{v5},~{v6},~{v7}" ()
%outptr = getelementptr i16, ptr addrspace(5) %alloca, i32 1
store volatile i16 %a, ptr addrspace(5) %outptr
%boutptr = getelementptr i16, ptr addrspace(5) %alloca, i32 0
store volatile i16 %b, ptr addrspace(5) %boutptr
ret void
}
define void @spill_2xi16_from_v2i16_one_free_reg() {
; GCN-TRUE16-LABEL: spill_2xi16_from_v2i16_one_free_reg:
; GCN-TRUE16: ; %bb.0: ; %entry
; GCN-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GCN-TRUE16-NEXT: scratch_load_d16_b16 v0, off, s32 offset:2 glc dlc
; GCN-TRUE16-NEXT: s_waitcnt vmcnt(0)
; GCN-TRUE16-NEXT: scratch_store_b16 off, v0, s32 offset:8 ; 2-byte Folded Spill
; GCN-TRUE16-NEXT: scratch_load_d16_b16 v0, off, s32 glc dlc
; GCN-TRUE16-NEXT: s_waitcnt vmcnt(0)
; GCN-TRUE16-NEXT: scratch_store_b16 off, v0, s32 offset:10 ; 2-byte Folded Spill
; GCN-TRUE16-NEXT: ;;#ASMSTART
; GCN-TRUE16-NEXT: ;;#ASMEND
; GCN-TRUE16-NEXT: scratch_load_d16_b16 v0, off, s32 offset:8 ; 2-byte Folded Reload
; GCN-TRUE16-NEXT: s_waitcnt vmcnt(0)
; GCN-TRUE16-NEXT: scratch_store_b16 off, v0, s32 offset:2 dlc
; GCN-TRUE16-NEXT: s_waitcnt_vscnt null, 0x0
; GCN-TRUE16-NEXT: scratch_load_d16_b16 v0, off, s32 offset:10 ; 2-byte Folded Reload
; GCN-TRUE16-NEXT: s_waitcnt vmcnt(0)
; GCN-TRUE16-NEXT: scratch_store_b16 off, v0, s32 dlc
; GCN-TRUE16-NEXT: s_waitcnt_vscnt null, 0x0
; GCN-TRUE16-NEXT: s_setpc_b64 s[30:31]
;
; GCN-FAKE16-LABEL: spill_2xi16_from_v2i16_one_free_reg:
; GCN-FAKE16: ; %bb.0: ; %entry
; GCN-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GCN-FAKE16-NEXT: scratch_load_u16 v7, off, s32 offset:2 glc dlc
; GCN-FAKE16-NEXT: s_waitcnt vmcnt(0)
; GCN-FAKE16-NEXT: scratch_load_u16 v0, off, s32 glc dlc
; GCN-FAKE16-NEXT: s_waitcnt vmcnt(0)
; GCN-FAKE16-NEXT: scratch_store_b32 off, v0, s32 offset:8 ; 4-byte Folded Spill
; GCN-FAKE16-NEXT: ;;#ASMSTART
; GCN-FAKE16-NEXT: ;;#ASMEND
; GCN-FAKE16-NEXT: scratch_store_b16 off, v7, s32 offset:2 dlc
; GCN-FAKE16-NEXT: s_waitcnt_vscnt null, 0x0
; GCN-FAKE16-NEXT: scratch_load_b32 v0, off, s32 offset:8 ; 4-byte Folded Reload
; GCN-FAKE16-NEXT: s_waitcnt vmcnt(0)
; GCN-FAKE16-NEXT: scratch_store_b16 off, v0, s32 dlc
; GCN-FAKE16-NEXT: s_waitcnt_vscnt null, 0x0
; GCN-FAKE16-NEXT: s_setpc_b64 s[30:31]
entry:
%alloca = alloca <2 x i16>, i32 2, align 1, addrspace(5)
%aptr = getelementptr i16, ptr addrspace(5) %alloca, i32 1
%a = load volatile i16, ptr addrspace(5) %aptr
%bptr = getelementptr i16, ptr addrspace(5) %alloca, i32 0
%b = load volatile i16, ptr addrspace(5) %bptr
; Force %a to spill.
; Would not need to spill if the short scratch instructions used vgpr_16
call void asm sideeffect "", "~{v0},~{v1},~{v2},~{v3},~{v4},~{v5},~{v6}" ()
%outptr = getelementptr i16, ptr addrspace(5) %alloca, i32 1
store volatile i16 %a, ptr addrspace(5) %outptr
%boutptr = getelementptr i16, ptr addrspace(5) %alloca, i32 0
store volatile i16 %b, ptr addrspace(5) %boutptr
ret void
}
define void @spill_v2i16() {
; GCN-LABEL: spill_v2i16:
; GCN: ; %bb.0: ; %entry
; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GCN-NEXT: scratch_load_b32 v0, off, s32 offset:4 glc dlc
; GCN-NEXT: s_waitcnt vmcnt(0)
; GCN-NEXT: scratch_store_b32 off, v0, s32 offset:8 ; 4-byte Folded Spill
; GCN-NEXT: ;;#ASMSTART
; GCN-NEXT: ;;#ASMEND
; GCN-NEXT: scratch_load_b32 v0, off, s32 offset:8 ; 4-byte Folded Reload
; GCN-NEXT: s_waitcnt vmcnt(0)
; GCN-NEXT: scratch_store_b32 off, v0, s32 offset:4 dlc
; GCN-NEXT: s_waitcnt_vscnt null, 0x0
; GCN-NEXT: s_setpc_b64 s[30:31]
entry:
%alloca = alloca <2 x i16>, i32 2, align 1, addrspace(5)
%aptr = getelementptr <2 x i16>, ptr addrspace(5) %alloca, i32 1
%a = load volatile <2 x i16>, ptr addrspace(5) %aptr
; Force %a to spill.
call void asm sideeffect "", "~{v0},~{v1},~{v2},~{v3},~{v4},~{v5},~{v6},~{v7}" ()
%outptr = getelementptr <2 x i16>, ptr addrspace(5) %alloca, i32 1
store volatile <2 x i16> %a, ptr addrspace(5) %outptr
ret void
}