
This changes the RC priorities such that AVRegClass is the least prioritized. These registers are less constrained than the VRegClass and ARegClass as they can be either agpr or vgpr. Thus, assigning them last removes unnecessary constraints from VRegClass and ARegClass assignments, and allows the RA to make smarter decisions about whether to use vgpr / agpr for AVRegClass. We only have 5 bits for RC priorities, and we still want to prioritize larger RCs over smaller ones. Since this new prioritization uses the 5th bit for AVRegClass vs ARegClass / VRegClass, we only have 4 bits to encode the size priorities. Previously, each RC with a distinct size, had a distinct priority. However, this PR groups together multiple sizes to the same priority. Currently, this will have no effect on prioritization in practice because we only have one actually defined RC per group per vector register type. For example, a register class with 15 or 16 32bit registers will have the same size priority (14). However, we only have VReg_512 (VReg_480 doesn't exist), so only one actual RC in VRegClass has this priority. Similarly, we give register class with 17-32+ 32 bit registers a size priority of 15, but we only have VReg_1024. The effect of this PR is to prioritize first the vector register type (VReg & Areg have top priority, then AVReg), with the size of the register class having second priority. Passes PSDB. --------- Co-authored-by: Matt Arsenault <Matthew.Arsenault@amd.com>
29 lines
1.3 KiB
LLVM
29 lines
1.3 KiB
LLVM
; RUN: not llc -mtriple=amdgcn -mcpu=gfx908 -o - %s 2>%t.err | FileCheck -implicit-check-not=error %s
|
|
; RUN: FileCheck -check-prefix=ERR %s < %t.err
|
|
|
|
; This testcase would fail on an "illegal eviction". If the assert was
|
|
; relaxed to allow equivalent cascade numbers, it would infinite loop.
|
|
|
|
; ERR: error: inline assembly requires more registers than available
|
|
|
|
%asm.output = type { <16 x i32>, <8 x i32>, <5 x i32>, <4 x i32>, <16 x i32> }
|
|
|
|
; CHECK-LABEL: {{^}}illegal_eviction_assert:
|
|
; CHECK: ; def v[13:28] v[0:7] v[8:12] v[0:3] a[0:15]
|
|
; CHECK: ; clobber
|
|
; CHECK: ; use v[13:28] v[0:7] v[8:12] v[0:3] a[1:16]
|
|
define void @illegal_eviction_assert(ptr addrspace(1) %arg) #0 {
|
|
;%agpr0 = call i32 asm sideeffect "; def $0","=${a0}"()
|
|
%asm = call %asm.output asm sideeffect "; def $0 $1 $2 $3 $4","=v,=v,=v,=v,={a[0:15]}"()
|
|
%vgpr0 = extractvalue %asm.output %asm, 0
|
|
%vgpr1 = extractvalue %asm.output %asm, 1
|
|
%vgpr2 = extractvalue %asm.output %asm, 2
|
|
%vgpr3 = extractvalue %asm.output %asm, 3
|
|
%agpr0 = extractvalue %asm.output %asm, 4
|
|
call void asm sideeffect "; clobber", "~{v[0:31]}"()
|
|
call void asm sideeffect "; use $0 $1 $2 $3 $4","v,v,v,v,{a[1:16]}"(<16 x i32> %vgpr0, <8 x i32> %vgpr1, <5 x i32> %vgpr2, <4 x i32> %vgpr3, <16 x i32> %agpr0)
|
|
ret void
|
|
}
|
|
|
|
attributes #0 = { "amdgpu-waves-per-eu"="8,8" }
|