
VirtRegRewriter may sometimes fail to correctly apply the kill flag where necessary, which causes unecessary code gen on PowerPC. This patch fixes the way masks for defined lanes are computed and the way mask for used lanes is computed. Contact albion.fung@ibm.com instead of author for problems related to this commit. Differential Revision: https://reviews.llvm.org/D92405
33 lines
949 B
YAML
33 lines
949 B
YAML
# RUN: llc -o - %s -mtriple=amdgcn--amdhsa -verify-machineinstrs -run-pass=greedy,virtregrewriter | FileCheck %s
|
|
--- |
|
|
|
|
define amdgpu_kernel void @func0() {
|
|
ret void
|
|
}
|
|
|
|
...
|
|
|
|
---
|
|
# We should not detect any interference between v0/v1 here and only allocate
|
|
# sgpr0-sgpr3.
|
|
#
|
|
# CHECK-LABEL: func0
|
|
# CHECK: S_NOP 0, implicit-def renamable $sgpr0
|
|
# CHECK: S_NOP 0, implicit-def renamable $sgpr3
|
|
# CHECK: S_NOP 0, implicit-def renamable $sgpr1
|
|
# CHECK: S_NOP 0, implicit-def renamable $sgpr2
|
|
# CHECK: S_NOP 0, implicit killed renamable $sgpr0, implicit renamable $sgpr3
|
|
# CHECK: S_NOP 0, implicit killed renamable $sgpr1, implicit renamable $sgpr2
|
|
name: func0
|
|
body: |
|
|
bb.0:
|
|
S_NOP 0, implicit-def undef %0.sub0 : sgpr_128
|
|
S_NOP 0, implicit-def %0.sub3
|
|
S_NOP 0, implicit-def undef %1.sub1 : sgpr_128
|
|
S_NOP 0, implicit-def %1.sub2
|
|
|
|
|
|
S_NOP 0, implicit %0.sub0, implicit %0.sub3
|
|
S_NOP 0, implicit %1.sub1, implicit %1.sub2
|
|
...
|