
update waitcnt pass to check hi16 and lo16 in true16 mode --------- Co-authored-by: Jay Foad <jay.foad@gmail.com>
392 lines
17 KiB
LLVM
392 lines
17 KiB
LLVM
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 2
|
|
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1100 -mattr=+real-true16 -enable-misched=0 -post-RA-scheduler=0 -stress-regalloc=8 < %s | FileCheck %s -check-prefixes=GCN,GCN-TRUE16
|
|
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1100 -mattr=-real-true16 -enable-misched=0 -post-RA-scheduler=0 -stress-regalloc=8 < %s | FileCheck %s -check-prefixes=GCN,GCN-FAKE16
|
|
|
|
define void @spill_i16_alu() {
|
|
; GCN-TRUE16-LABEL: spill_i16_alu:
|
|
; GCN-TRUE16: ; %bb.0: ; %entry
|
|
; GCN-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GCN-TRUE16-NEXT: scratch_load_d16_b16 v0, off, s32 glc dlc
|
|
; GCN-TRUE16-NEXT: s_waitcnt vmcnt(0)
|
|
; GCN-TRUE16-NEXT: v_add_nc_u16 v0.l, 0x7b, v0.l
|
|
; GCN-TRUE16-NEXT: scratch_store_b16 off, v0, s32 offset:2 ; 2-byte Folded Spill
|
|
; GCN-TRUE16-NEXT: ;;#ASMSTART
|
|
; GCN-TRUE16-NEXT: ;;#ASMEND
|
|
; GCN-TRUE16-NEXT: scratch_load_d16_b16 v0, off, s32 offset:2 ; 2-byte Folded Reload
|
|
; GCN-TRUE16-NEXT: s_waitcnt vmcnt(0)
|
|
; GCN-TRUE16-NEXT: scratch_store_b16 off, v0, s32 dlc
|
|
; GCN-TRUE16-NEXT: s_waitcnt_vscnt null, 0x0
|
|
; GCN-TRUE16-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GCN-FAKE16-LABEL: spill_i16_alu:
|
|
; GCN-FAKE16: ; %bb.0: ; %entry
|
|
; GCN-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GCN-FAKE16-NEXT: scratch_load_u16 v0, off, s32 glc dlc
|
|
; GCN-FAKE16-NEXT: s_waitcnt vmcnt(0)
|
|
; GCN-FAKE16-NEXT: v_add_nc_u16 v0, 0x7b, v0
|
|
; GCN-FAKE16-NEXT: scratch_store_b32 off, v0, s32 offset:4 ; 4-byte Folded Spill
|
|
; GCN-FAKE16-NEXT: ;;#ASMSTART
|
|
; GCN-FAKE16-NEXT: ;;#ASMEND
|
|
; GCN-FAKE16-NEXT: scratch_load_b32 v0, off, s32 offset:4 ; 4-byte Folded Reload
|
|
; GCN-FAKE16-NEXT: s_waitcnt vmcnt(0)
|
|
; GCN-FAKE16-NEXT: scratch_store_b16 off, v0, s32 dlc
|
|
; GCN-FAKE16-NEXT: s_waitcnt_vscnt null, 0x0
|
|
; GCN-FAKE16-NEXT: s_setpc_b64 s[30:31]
|
|
entry:
|
|
%alloca = alloca i16, i32 1, align 4, addrspace(5)
|
|
|
|
%aptr = getelementptr i16, ptr addrspace(5) %alloca, i32 0
|
|
%a = load volatile i16, ptr addrspace(5) %aptr
|
|
%add = add i16 %a, 123
|
|
|
|
; Force %a to spill.
|
|
call void asm sideeffect "", "~{v0},~{v1},~{v2},~{v3},~{v4},~{v5},~{v6},~{v7}" ()
|
|
|
|
%outptr = getelementptr i16, ptr addrspace(5) %alloca, i32 0
|
|
store volatile i16 %add, ptr addrspace(5) %outptr
|
|
|
|
ret void
|
|
}
|
|
|
|
define void @spill_i16_alu_two_vals() {
|
|
; GCN-TRUE16-LABEL: spill_i16_alu_two_vals:
|
|
; GCN-TRUE16: ; %bb.0: ; %entry
|
|
; GCN-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GCN-TRUE16-NEXT: scratch_load_d16_b16 v0, off, s32 glc dlc
|
|
; GCN-TRUE16-NEXT: s_waitcnt vmcnt(0)
|
|
; GCN-TRUE16-NEXT: v_add_nc_u16 v0.l, 0x7b, v0.l
|
|
; GCN-TRUE16-NEXT: scratch_store_b16 off, v0, s32 offset:6 ; 2-byte Folded Spill
|
|
; GCN-TRUE16-NEXT: ;;#ASMSTART
|
|
; GCN-TRUE16-NEXT: ;;#ASMEND
|
|
; GCN-TRUE16-NEXT: scratch_load_d16_b16 v0, off, s32 offset:4 glc dlc
|
|
; GCN-TRUE16-NEXT: s_waitcnt vmcnt(0)
|
|
; GCN-TRUE16-NEXT: scratch_load_d16_hi_b16 v0, off, s32 offset:6 ; 2-byte Folded Reload
|
|
; GCN-TRUE16-NEXT: v_add_nc_u16 v0.l, 0x7b, v0.l
|
|
; GCN-TRUE16-NEXT: s_waitcnt vmcnt(0)
|
|
; GCN-TRUE16-NEXT: scratch_store_d16_hi_b16 off, v0, s32 dlc
|
|
; GCN-TRUE16-NEXT: s_waitcnt_vscnt null, 0x0
|
|
; GCN-TRUE16-NEXT: scratch_store_b16 off, v0, s32 offset:4 dlc
|
|
; GCN-TRUE16-NEXT: s_waitcnt_vscnt null, 0x0
|
|
; GCN-TRUE16-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GCN-FAKE16-LABEL: spill_i16_alu_two_vals:
|
|
; GCN-FAKE16: ; %bb.0: ; %entry
|
|
; GCN-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GCN-FAKE16-NEXT: scratch_load_u16 v0, off, s32 glc dlc
|
|
; GCN-FAKE16-NEXT: s_waitcnt vmcnt(0)
|
|
; GCN-FAKE16-NEXT: v_add_nc_u16 v0, 0x7b, v0
|
|
; GCN-FAKE16-NEXT: scratch_store_b32 off, v0, s32 offset:8 ; 4-byte Folded Spill
|
|
; GCN-FAKE16-NEXT: ;;#ASMSTART
|
|
; GCN-FAKE16-NEXT: ;;#ASMEND
|
|
; GCN-FAKE16-NEXT: scratch_load_u16 v0, off, s32 offset:4 glc dlc
|
|
; GCN-FAKE16-NEXT: s_waitcnt vmcnt(0)
|
|
; GCN-FAKE16-NEXT: scratch_load_b32 v1, off, s32 offset:8 ; 4-byte Folded Reload
|
|
; GCN-FAKE16-NEXT: v_add_nc_u16 v0, 0x7b, v0
|
|
; GCN-FAKE16-NEXT: s_waitcnt vmcnt(0)
|
|
; GCN-FAKE16-NEXT: scratch_store_b16 off, v1, s32 dlc
|
|
; GCN-FAKE16-NEXT: s_waitcnt_vscnt null, 0x0
|
|
; GCN-FAKE16-NEXT: scratch_store_b16 off, v0, s32 offset:4 dlc
|
|
; GCN-FAKE16-NEXT: s_waitcnt_vscnt null, 0x0
|
|
; GCN-FAKE16-NEXT: s_setpc_b64 s[30:31]
|
|
entry:
|
|
%alloca = alloca i16, i32 1, align 4, addrspace(5)
|
|
%alloca2 = alloca i16, i32 1, align 4, addrspace(5)
|
|
|
|
%aptr = getelementptr i16, ptr addrspace(5) %alloca, i32 0
|
|
%a = load volatile i16, ptr addrspace(5) %aptr
|
|
%add = add i16 %a, 123
|
|
|
|
; Force %a to spill.
|
|
call void asm sideeffect "", "~{v0},~{v1},~{v2},~{v3},~{v4},~{v5},~{v6},~{v7}" ()
|
|
|
|
%bptr = getelementptr i16, ptr addrspace(5) %alloca2, i32 0
|
|
%b = load volatile i16, ptr addrspace(5) %bptr
|
|
%badd = add i16 %b, 123
|
|
%outptr = getelementptr i16, ptr addrspace(5) %alloca, i32 0
|
|
store volatile i16 %add, ptr addrspace(5) %outptr
|
|
%outptr2 = getelementptr i16, ptr addrspace(5) %alloca2, i32 0
|
|
store volatile i16 %badd, ptr addrspace(5) %outptr2
|
|
|
|
ret void
|
|
}
|
|
|
|
; Tests after this do not actually test 16 bit spills because there is no use of VGPR_16. They could demonstrate 16 bit spills if we update the instructions to use VGPR_16 instead of VGPR_32
|
|
|
|
define void @spill_i16() {
|
|
; GCN-TRUE16-LABEL: spill_i16:
|
|
; GCN-TRUE16: ; %bb.0: ; %entry
|
|
; GCN-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GCN-TRUE16-NEXT: scratch_load_d16_b16 v0, off, s32 glc dlc
|
|
; GCN-TRUE16-NEXT: s_waitcnt vmcnt(0)
|
|
; GCN-TRUE16-NEXT: scratch_store_b16 off, v0, s32 offset:2 ; 2-byte Folded Spill
|
|
; GCN-TRUE16-NEXT: ;;#ASMSTART
|
|
; GCN-TRUE16-NEXT: ;;#ASMEND
|
|
; GCN-TRUE16-NEXT: scratch_load_d16_b16 v0, off, s32 offset:2 ; 2-byte Folded Reload
|
|
; GCN-TRUE16-NEXT: s_waitcnt vmcnt(0)
|
|
; GCN-TRUE16-NEXT: scratch_store_b16 off, v0, s32 dlc
|
|
; GCN-TRUE16-NEXT: s_waitcnt_vscnt null, 0x0
|
|
; GCN-TRUE16-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GCN-FAKE16-LABEL: spill_i16:
|
|
; GCN-FAKE16: ; %bb.0: ; %entry
|
|
; GCN-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GCN-FAKE16-NEXT: scratch_load_u16 v0, off, s32 glc dlc
|
|
; GCN-FAKE16-NEXT: s_waitcnt vmcnt(0)
|
|
; GCN-FAKE16-NEXT: scratch_store_b32 off, v0, s32 offset:4 ; 4-byte Folded Spill
|
|
; GCN-FAKE16-NEXT: ;;#ASMSTART
|
|
; GCN-FAKE16-NEXT: ;;#ASMEND
|
|
; GCN-FAKE16-NEXT: scratch_load_b32 v0, off, s32 offset:4 ; 4-byte Folded Reload
|
|
; GCN-FAKE16-NEXT: s_waitcnt vmcnt(0)
|
|
; GCN-FAKE16-NEXT: scratch_store_b16 off, v0, s32 dlc
|
|
; GCN-FAKE16-NEXT: s_waitcnt_vscnt null, 0x0
|
|
; GCN-FAKE16-NEXT: s_setpc_b64 s[30:31]
|
|
entry:
|
|
%alloca = alloca i16, i32 1, align 4, addrspace(5)
|
|
|
|
%aptr = getelementptr i16, ptr addrspace(5) %alloca, i32 0
|
|
%a = load volatile i16, ptr addrspace(5) %aptr
|
|
|
|
; Force %a to spill.
|
|
call void asm sideeffect "", "~{v0},~{v1},~{v2},~{v3},~{v4},~{v5},~{v6},~{v7}" ()
|
|
|
|
%outptr = getelementptr i16, ptr addrspace(5) %alloca, i32 0
|
|
store volatile i16 %a, ptr addrspace(5) %outptr
|
|
|
|
ret void
|
|
}
|
|
|
|
define void @spill_half() {
|
|
; GCN-TRUE16-LABEL: spill_half:
|
|
; GCN-TRUE16: ; %bb.0: ; %entry
|
|
; GCN-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GCN-TRUE16-NEXT: scratch_load_d16_b16 v0, off, s32 glc dlc
|
|
; GCN-TRUE16-NEXT: s_waitcnt vmcnt(0)
|
|
; GCN-TRUE16-NEXT: scratch_store_b16 off, v0, s32 offset:2 ; 2-byte Folded Spill
|
|
; GCN-TRUE16-NEXT: ;;#ASMSTART
|
|
; GCN-TRUE16-NEXT: ;;#ASMEND
|
|
; GCN-TRUE16-NEXT: scratch_load_d16_b16 v0, off, s32 offset:2 ; 2-byte Folded Reload
|
|
; GCN-TRUE16-NEXT: s_waitcnt vmcnt(0)
|
|
; GCN-TRUE16-NEXT: scratch_store_b16 off, v0, s32 dlc
|
|
; GCN-TRUE16-NEXT: s_waitcnt_vscnt null, 0x0
|
|
; GCN-TRUE16-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GCN-FAKE16-LABEL: spill_half:
|
|
; GCN-FAKE16: ; %bb.0: ; %entry
|
|
; GCN-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GCN-FAKE16-NEXT: scratch_load_u16 v0, off, s32 glc dlc
|
|
; GCN-FAKE16-NEXT: s_waitcnt vmcnt(0)
|
|
; GCN-FAKE16-NEXT: scratch_store_b32 off, v0, s32 offset:4 ; 4-byte Folded Spill
|
|
; GCN-FAKE16-NEXT: ;;#ASMSTART
|
|
; GCN-FAKE16-NEXT: ;;#ASMEND
|
|
; GCN-FAKE16-NEXT: scratch_load_b32 v0, off, s32 offset:4 ; 4-byte Folded Reload
|
|
; GCN-FAKE16-NEXT: s_waitcnt vmcnt(0)
|
|
; GCN-FAKE16-NEXT: scratch_store_b16 off, v0, s32 dlc
|
|
; GCN-FAKE16-NEXT: s_waitcnt_vscnt null, 0x0
|
|
; GCN-FAKE16-NEXT: s_setpc_b64 s[30:31]
|
|
entry:
|
|
%alloca = alloca half, i32 1, align 4, addrspace(5)
|
|
|
|
%aptr = getelementptr half, ptr addrspace(5) %alloca, i32 0
|
|
%a = load volatile half, ptr addrspace(5) %aptr
|
|
|
|
; Force %a to spill.
|
|
call void asm sideeffect "", "~{v0},~{v1},~{v2},~{v3},~{v4},~{v5},~{v6},~{v7}" ()
|
|
|
|
%outptr = getelementptr half, ptr addrspace(5) %alloca, i32 0
|
|
store volatile half %a, ptr addrspace(5) %outptr
|
|
|
|
ret void
|
|
}
|
|
|
|
define void @spill_i16_from_v2i16() {
|
|
; GCN-TRUE16-LABEL: spill_i16_from_v2i16:
|
|
; GCN-TRUE16: ; %bb.0: ; %entry
|
|
; GCN-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GCN-TRUE16-NEXT: scratch_load_d16_b16 v0, off, s32 offset:2 glc dlc
|
|
; GCN-TRUE16-NEXT: s_waitcnt vmcnt(0)
|
|
; GCN-TRUE16-NEXT: scratch_store_b16 off, v0, s32 offset:8 ; 2-byte Folded Spill
|
|
; GCN-TRUE16-NEXT: ;;#ASMSTART
|
|
; GCN-TRUE16-NEXT: ;;#ASMEND
|
|
; GCN-TRUE16-NEXT: scratch_load_d16_b16 v0, off, s32 offset:8 ; 2-byte Folded Reload
|
|
; GCN-TRUE16-NEXT: s_waitcnt vmcnt(0)
|
|
; GCN-TRUE16-NEXT: scratch_store_b16 off, v0, s32 offset:2 dlc
|
|
; GCN-TRUE16-NEXT: s_waitcnt_vscnt null, 0x0
|
|
; GCN-TRUE16-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GCN-FAKE16-LABEL: spill_i16_from_v2i16:
|
|
; GCN-FAKE16: ; %bb.0: ; %entry
|
|
; GCN-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GCN-FAKE16-NEXT: scratch_load_u16 v0, off, s32 offset:2 glc dlc
|
|
; GCN-FAKE16-NEXT: s_waitcnt vmcnt(0)
|
|
; GCN-FAKE16-NEXT: scratch_store_b32 off, v0, s32 offset:8 ; 4-byte Folded Spill
|
|
; GCN-FAKE16-NEXT: ;;#ASMSTART
|
|
; GCN-FAKE16-NEXT: ;;#ASMEND
|
|
; GCN-FAKE16-NEXT: scratch_load_b32 v0, off, s32 offset:8 ; 4-byte Folded Reload
|
|
; GCN-FAKE16-NEXT: s_waitcnt vmcnt(0)
|
|
; GCN-FAKE16-NEXT: scratch_store_b16 off, v0, s32 offset:2 dlc
|
|
; GCN-FAKE16-NEXT: s_waitcnt_vscnt null, 0x0
|
|
; GCN-FAKE16-NEXT: s_setpc_b64 s[30:31]
|
|
entry:
|
|
%alloca = alloca <2 x i16>, i32 2, align 1, addrspace(5)
|
|
|
|
%aptr = getelementptr i16, ptr addrspace(5) %alloca, i32 1
|
|
%a = load volatile i16, ptr addrspace(5) %aptr
|
|
|
|
; Force %a to spill.
|
|
call void asm sideeffect "", "~{v0},~{v1},~{v2},~{v3},~{v4},~{v5},~{v6},~{v7}" ()
|
|
|
|
%outptr = getelementptr i16, ptr addrspace(5) %alloca, i32 1
|
|
store volatile i16 %a, ptr addrspace(5) %outptr
|
|
|
|
ret void
|
|
}
|
|
|
|
define void @spill_2xi16_from_v2i16() {
|
|
; GCN-TRUE16-LABEL: spill_2xi16_from_v2i16:
|
|
; GCN-TRUE16: ; %bb.0: ; %entry
|
|
; GCN-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GCN-TRUE16-NEXT: scratch_load_d16_b16 v0, off, s32 offset:2 glc dlc
|
|
; GCN-TRUE16-NEXT: s_waitcnt vmcnt(0)
|
|
; GCN-TRUE16-NEXT: scratch_store_b16 off, v0, s32 offset:8 ; 2-byte Folded Spill
|
|
; GCN-TRUE16-NEXT: scratch_load_d16_b16 v0, off, s32 glc dlc
|
|
; GCN-TRUE16-NEXT: s_waitcnt vmcnt(0)
|
|
; GCN-TRUE16-NEXT: scratch_store_b16 off, v0, s32 offset:10 ; 2-byte Folded Spill
|
|
; GCN-TRUE16-NEXT: ;;#ASMSTART
|
|
; GCN-TRUE16-NEXT: ;;#ASMEND
|
|
; GCN-TRUE16-NEXT: scratch_load_d16_b16 v0, off, s32 offset:8 ; 2-byte Folded Reload
|
|
; GCN-TRUE16-NEXT: s_waitcnt vmcnt(0)
|
|
; GCN-TRUE16-NEXT: scratch_store_b16 off, v0, s32 offset:2 dlc
|
|
; GCN-TRUE16-NEXT: s_waitcnt_vscnt null, 0x0
|
|
; GCN-TRUE16-NEXT: scratch_load_d16_b16 v0, off, s32 offset:10 ; 2-byte Folded Reload
|
|
; GCN-TRUE16-NEXT: s_waitcnt vmcnt(0)
|
|
; GCN-TRUE16-NEXT: scratch_store_b16 off, v0, s32 dlc
|
|
; GCN-TRUE16-NEXT: s_waitcnt_vscnt null, 0x0
|
|
; GCN-TRUE16-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GCN-FAKE16-LABEL: spill_2xi16_from_v2i16:
|
|
; GCN-FAKE16: ; %bb.0: ; %entry
|
|
; GCN-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GCN-FAKE16-NEXT: scratch_load_u16 v0, off, s32 offset:2 glc dlc
|
|
; GCN-FAKE16-NEXT: s_waitcnt vmcnt(0)
|
|
; GCN-FAKE16-NEXT: scratch_store_b32 off, v0, s32 offset:8 ; 4-byte Folded Spill
|
|
; GCN-FAKE16-NEXT: scratch_load_u16 v0, off, s32 glc dlc
|
|
; GCN-FAKE16-NEXT: s_waitcnt vmcnt(0)
|
|
; GCN-FAKE16-NEXT: scratch_store_b32 off, v0, s32 offset:12 ; 4-byte Folded Spill
|
|
; GCN-FAKE16-NEXT: ;;#ASMSTART
|
|
; GCN-FAKE16-NEXT: ;;#ASMEND
|
|
; GCN-FAKE16-NEXT: scratch_load_b32 v0, off, s32 offset:8 ; 4-byte Folded Reload
|
|
; GCN-FAKE16-NEXT: s_waitcnt vmcnt(0)
|
|
; GCN-FAKE16-NEXT: scratch_store_b16 off, v0, s32 offset:2 dlc
|
|
; GCN-FAKE16-NEXT: s_waitcnt_vscnt null, 0x0
|
|
; GCN-FAKE16-NEXT: scratch_load_b32 v0, off, s32 offset:12 ; 4-byte Folded Reload
|
|
; GCN-FAKE16-NEXT: s_waitcnt vmcnt(0)
|
|
; GCN-FAKE16-NEXT: scratch_store_b16 off, v0, s32 dlc
|
|
; GCN-FAKE16-NEXT: s_waitcnt_vscnt null, 0x0
|
|
; GCN-FAKE16-NEXT: s_setpc_b64 s[30:31]
|
|
entry:
|
|
%alloca = alloca <2 x i16>, i32 2, align 1, addrspace(5)
|
|
|
|
%aptr = getelementptr i16, ptr addrspace(5) %alloca, i32 1
|
|
%a = load volatile i16, ptr addrspace(5) %aptr
|
|
%bptr = getelementptr i16, ptr addrspace(5) %alloca, i32 0
|
|
%b = load volatile i16, ptr addrspace(5) %bptr
|
|
|
|
; Force %a to spill.
|
|
call void asm sideeffect "", "~{v0},~{v1},~{v2},~{v3},~{v4},~{v5},~{v6},~{v7}" ()
|
|
|
|
%outptr = getelementptr i16, ptr addrspace(5) %alloca, i32 1
|
|
store volatile i16 %a, ptr addrspace(5) %outptr
|
|
%boutptr = getelementptr i16, ptr addrspace(5) %alloca, i32 0
|
|
store volatile i16 %b, ptr addrspace(5) %boutptr
|
|
|
|
ret void
|
|
}
|
|
|
|
define void @spill_2xi16_from_v2i16_one_free_reg() {
|
|
; GCN-TRUE16-LABEL: spill_2xi16_from_v2i16_one_free_reg:
|
|
; GCN-TRUE16: ; %bb.0: ; %entry
|
|
; GCN-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GCN-TRUE16-NEXT: scratch_load_d16_b16 v0, off, s32 offset:2 glc dlc
|
|
; GCN-TRUE16-NEXT: s_waitcnt vmcnt(0)
|
|
; GCN-TRUE16-NEXT: scratch_store_b16 off, v0, s32 offset:8 ; 2-byte Folded Spill
|
|
; GCN-TRUE16-NEXT: scratch_load_d16_b16 v0, off, s32 glc dlc
|
|
; GCN-TRUE16-NEXT: s_waitcnt vmcnt(0)
|
|
; GCN-TRUE16-NEXT: scratch_store_b16 off, v0, s32 offset:10 ; 2-byte Folded Spill
|
|
; GCN-TRUE16-NEXT: ;;#ASMSTART
|
|
; GCN-TRUE16-NEXT: ;;#ASMEND
|
|
; GCN-TRUE16-NEXT: scratch_load_d16_b16 v0, off, s32 offset:8 ; 2-byte Folded Reload
|
|
; GCN-TRUE16-NEXT: s_waitcnt vmcnt(0)
|
|
; GCN-TRUE16-NEXT: scratch_store_b16 off, v0, s32 offset:2 dlc
|
|
; GCN-TRUE16-NEXT: s_waitcnt_vscnt null, 0x0
|
|
; GCN-TRUE16-NEXT: scratch_load_d16_b16 v0, off, s32 offset:10 ; 2-byte Folded Reload
|
|
; GCN-TRUE16-NEXT: s_waitcnt vmcnt(0)
|
|
; GCN-TRUE16-NEXT: scratch_store_b16 off, v0, s32 dlc
|
|
; GCN-TRUE16-NEXT: s_waitcnt_vscnt null, 0x0
|
|
; GCN-TRUE16-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GCN-FAKE16-LABEL: spill_2xi16_from_v2i16_one_free_reg:
|
|
; GCN-FAKE16: ; %bb.0: ; %entry
|
|
; GCN-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GCN-FAKE16-NEXT: scratch_load_u16 v7, off, s32 offset:2 glc dlc
|
|
; GCN-FAKE16-NEXT: s_waitcnt vmcnt(0)
|
|
; GCN-FAKE16-NEXT: scratch_load_u16 v0, off, s32 glc dlc
|
|
; GCN-FAKE16-NEXT: s_waitcnt vmcnt(0)
|
|
; GCN-FAKE16-NEXT: scratch_store_b32 off, v0, s32 offset:8 ; 4-byte Folded Spill
|
|
; GCN-FAKE16-NEXT: ;;#ASMSTART
|
|
; GCN-FAKE16-NEXT: ;;#ASMEND
|
|
; GCN-FAKE16-NEXT: scratch_store_b16 off, v7, s32 offset:2 dlc
|
|
; GCN-FAKE16-NEXT: s_waitcnt_vscnt null, 0x0
|
|
; GCN-FAKE16-NEXT: scratch_load_b32 v0, off, s32 offset:8 ; 4-byte Folded Reload
|
|
; GCN-FAKE16-NEXT: s_waitcnt vmcnt(0)
|
|
; GCN-FAKE16-NEXT: scratch_store_b16 off, v0, s32 dlc
|
|
; GCN-FAKE16-NEXT: s_waitcnt_vscnt null, 0x0
|
|
; GCN-FAKE16-NEXT: s_setpc_b64 s[30:31]
|
|
entry:
|
|
%alloca = alloca <2 x i16>, i32 2, align 1, addrspace(5)
|
|
|
|
%aptr = getelementptr i16, ptr addrspace(5) %alloca, i32 1
|
|
%a = load volatile i16, ptr addrspace(5) %aptr
|
|
%bptr = getelementptr i16, ptr addrspace(5) %alloca, i32 0
|
|
%b = load volatile i16, ptr addrspace(5) %bptr
|
|
|
|
; Force %a to spill.
|
|
; Would not need to spill if the short scratch instructions used vgpr_16
|
|
call void asm sideeffect "", "~{v0},~{v1},~{v2},~{v3},~{v4},~{v5},~{v6}" ()
|
|
|
|
%outptr = getelementptr i16, ptr addrspace(5) %alloca, i32 1
|
|
store volatile i16 %a, ptr addrspace(5) %outptr
|
|
%boutptr = getelementptr i16, ptr addrspace(5) %alloca, i32 0
|
|
store volatile i16 %b, ptr addrspace(5) %boutptr
|
|
|
|
ret void
|
|
}
|
|
|
|
define void @spill_v2i16() {
|
|
; GCN-LABEL: spill_v2i16:
|
|
; GCN: ; %bb.0: ; %entry
|
|
; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GCN-NEXT: scratch_load_b32 v0, off, s32 offset:4 glc dlc
|
|
; GCN-NEXT: s_waitcnt vmcnt(0)
|
|
; GCN-NEXT: scratch_store_b32 off, v0, s32 offset:8 ; 4-byte Folded Spill
|
|
; GCN-NEXT: ;;#ASMSTART
|
|
; GCN-NEXT: ;;#ASMEND
|
|
; GCN-NEXT: scratch_load_b32 v0, off, s32 offset:8 ; 4-byte Folded Reload
|
|
; GCN-NEXT: s_waitcnt vmcnt(0)
|
|
; GCN-NEXT: scratch_store_b32 off, v0, s32 offset:4 dlc
|
|
; GCN-NEXT: s_waitcnt_vscnt null, 0x0
|
|
; GCN-NEXT: s_setpc_b64 s[30:31]
|
|
entry:
|
|
%alloca = alloca <2 x i16>, i32 2, align 1, addrspace(5)
|
|
|
|
%aptr = getelementptr <2 x i16>, ptr addrspace(5) %alloca, i32 1
|
|
%a = load volatile <2 x i16>, ptr addrspace(5) %aptr
|
|
|
|
; Force %a to spill.
|
|
call void asm sideeffect "", "~{v0},~{v1},~{v2},~{v3},~{v4},~{v5},~{v6},~{v7}" ()
|
|
|
|
%outptr = getelementptr <2 x i16>, ptr addrspace(5) %alloca, i32 1
|
|
store volatile <2 x i16> %a, ptr addrspace(5) %outptr
|
|
|
|
ret void
|
|
}
|