llvm-project/llvm/test/CodeGen/AMDGPU/cgp-bitfield-extract.ll
RamNalamothu 18f9351223 [AMDGPU] Do not generate ELF symbols for the local branch target labels
The compiler was generating symbols in the final code object for local
branch target labels. This bloats the code object, slows down the loader,
and is only used to simplify disassembly.

Use '--symbolize-operands' with llvm-objdump to improve readability of the
branch target operands in disassembly.

Fixes: SWDEV-312223

Reviewed By: scott.linder

Differential Revision: https://reviews.llvm.org/D114273
2021-11-20 10:32:41 +05:30

302 lines
6.5 KiB
LLVM

; RUN: opt -S -mtriple=amdgcn-- -codegenprepare < %s | FileCheck -check-prefix=OPT %s
; RUN: opt -S -mtriple=amdgcn-- -mcpu=tonga -mattr=-flat-for-global -codegenprepare < %s | FileCheck -check-prefix=OPT %s
; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=SI %s
; RUN: llc -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=VI %s
; This particular case will actually be worse in terms of code size
; from sinking into both.
; OPT-LABEL: @sink_ubfe_i32(
; OPT: entry:
; OPT-NEXT: br i1
; OPT: bb0:
; OPT: %0 = lshr i32 %arg1, 8
; OPT-NEXT: %val0 = and i32 %0, 255
; OPT: br label
; OPT: bb1:
; OPT: %1 = lshr i32 %arg1, 8
; OPT-NEXT: %val1 = and i32 %1, 127
; OPT: br label
; OPT: ret:
; OPT: store
; OPT: ret
; GCN-LABEL: {{^}}sink_ubfe_i32:
; GCN-NOT: lshr
; GCN: s_cbranch_scc{{[0-1]}}
; GCN: s_bfe_u32 s{{[0-9]+}}, s{{[0-9]+}}, 0x70008
; GCN: .LBB0_3:
; GCN: s_bfe_u32 s{{[0-9]+}}, s{{[0-9]+}}, 0x80008
; GCN: buffer_store_dword
; GCN: s_endpgm
define amdgpu_kernel void @sink_ubfe_i32(i32 addrspace(1)* %out, i32 %arg1) #0 {
entry:
%shr = lshr i32 %arg1, 8
br i1 undef, label %bb0, label %bb1
bb0:
%val0 = and i32 %shr, 255
store volatile i32 0, i32 addrspace(1)* undef
br label %ret
bb1:
%val1 = and i32 %shr, 127
store volatile i32 0, i32 addrspace(1)* undef
br label %ret
ret:
%phi = phi i32 [ %val0, %bb0 ], [ %val1, %bb1 ]
store i32 %phi, i32 addrspace(1)* %out
ret void
}
; OPT-LABEL: @sink_sbfe_i32(
; OPT: entry:
; OPT-NEXT: br i1
; OPT: bb0:
; OPT: %0 = ashr i32 %arg1, 8
; OPT-NEXT: %val0 = and i32 %0, 255
; OPT: br label
; OPT: bb1:
; OPT: %1 = ashr i32 %arg1, 8
; OPT-NEXT: %val1 = and i32 %1, 127
; OPT: br label
; OPT: ret:
; OPT: store
; OPT: ret
; GCN-LABEL: {{^}}sink_sbfe_i32:
define amdgpu_kernel void @sink_sbfe_i32(i32 addrspace(1)* %out, i32 %arg1) #0 {
entry:
%shr = ashr i32 %arg1, 8
br i1 undef, label %bb0, label %bb1
bb0:
%val0 = and i32 %shr, 255
store volatile i32 0, i32 addrspace(1)* undef
br label %ret
bb1:
%val1 = and i32 %shr, 127
store volatile i32 0, i32 addrspace(1)* undef
br label %ret
ret:
%phi = phi i32 [ %val0, %bb0 ], [ %val1, %bb1 ]
store i32 %phi, i32 addrspace(1)* %out
ret void
}
; OPT-LABEL: @sink_ubfe_i16(
; OPT: entry:
; OPT-NEXT: br i1
; OPT: bb0:
; OPT: %0 = lshr i16 %arg1, 4
; OPT-NEXT: %val0 = and i16 %0, 255
; OPT: br label
; OPT: bb1:
; OPT: %1 = lshr i16 %arg1, 4
; OPT-NEXT: %val1 = and i16 %1, 127
; OPT: br label
; OPT: ret:
; OPT: store
; OPT: ret
; For GFX8: since i16 is legal type, we cannot sink lshr into .LBBs.
; GCN-LABEL: {{^}}sink_ubfe_i16:
; GCN-NOT: lshr
; VI: s_load_dword [[ARG:s[0-9]+]], s[0:1], 0x2c
; VI: s_bfe_u32 [[BFE:s[0-9]+]], [[ARG]], 0xc0004
; GCN: s_cbranch_scc{{[0-1]}}
; SI: s_bfe_u32 s{{[0-9]+}}, s{{[0-9]+}}, 0x70004
; VI: v_mov_b32_e32 v{{[0-9]+}}, 0x7f
; GCN: .LBB2_3:
; SI: s_bfe_u32 s{{[0-9]+}}, s{{[0-9]+}}, 0x80004
; VI: v_mov_b32_e32 v{{[0-9]+}}, 0xff
; GCN: buffer_store_short
; GCN: s_endpgm
define amdgpu_kernel void @sink_ubfe_i16(i16 addrspace(1)* %out, i16 %arg1) #0 {
entry:
%shr = lshr i16 %arg1, 4
br i1 undef, label %bb0, label %bb1
bb0:
%val0 = and i16 %shr, 255
store volatile i16 0, i16 addrspace(1)* undef
br label %ret
bb1:
%val1 = and i16 %shr, 127
store volatile i16 0, i16 addrspace(1)* undef
br label %ret
ret:
%phi = phi i16 [ %val0, %bb0 ], [ %val1, %bb1 ]
store i16 %phi, i16 addrspace(1)* %out
ret void
}
; We don't really want to sink this one since it isn't reducible to a
; 32-bit BFE on one half of the integer.
; OPT-LABEL: @sink_ubfe_i64_span_midpoint(
; OPT: entry:
; OPT-NOT: lshr
; OPT: br i1
; OPT: bb0:
; OPT: %0 = lshr i64 %arg1, 30
; OPT-NEXT: %val0 = and i64 %0, 255
; OPT: bb1:
; OPT: %1 = lshr i64 %arg1, 30
; OPT-NEXT: %val1 = and i64 %1, 127
; OPT: ret:
; OPT: store
; OPT: ret
; GCN-LABEL: {{^}}sink_ubfe_i64_span_midpoint:
; GCN: s_cbranch_scc{{[0-1]}} .LBB3_2
; GCN: v_alignbit_b32 v[[LO:[0-9]+]], s{{[0-9]+}}, v{{[0-9]+}}, 30
; GCN: v_and_b32_e32 v{{[0-9]+}}, 0x7f, v[[LO]]
; GCN: .LBB3_3:
; GCN: v_and_b32_e32 v{{[0-9]+}}, 0xff, v[[LO]]
; GCN: buffer_store_dwordx2
define amdgpu_kernel void @sink_ubfe_i64_span_midpoint(i64 addrspace(1)* %out, i64 %arg1) #0 {
entry:
%shr = lshr i64 %arg1, 30
br i1 undef, label %bb0, label %bb1
bb0:
%val0 = and i64 %shr, 255
store volatile i32 0, i32 addrspace(1)* undef
br label %ret
bb1:
%val1 = and i64 %shr, 127
store volatile i32 0, i32 addrspace(1)* undef
br label %ret
ret:
%phi = phi i64 [ %val0, %bb0 ], [ %val1, %bb1 ]
store i64 %phi, i64 addrspace(1)* %out
ret void
}
; OPT-LABEL: @sink_ubfe_i64_low32(
; OPT: entry:
; OPT-NOT: lshr
; OPT: br i1
; OPT: bb0:
; OPT: %0 = lshr i64 %arg1, 15
; OPT-NEXT: %val0 = and i64 %0, 255
; OPT: bb1:
; OPT: %1 = lshr i64 %arg1, 15
; OPT-NEXT: %val1 = and i64 %1, 127
; OPT: ret:
; OPT: store
; OPT: ret
; GCN-LABEL: {{^}}sink_ubfe_i64_low32:
; GCN: s_cbranch_scc{{[0-1]}} .LBB4_2
; GCN: s_bfe_u32 s{{[0-9]+}}, s{{[0-9]+}}, 0x7000f
; GCN: .LBB4_3:
; GCN: s_bfe_u32 s{{[0-9]+}}, s{{[0-9]+}}, 0x8000f
; GCN: buffer_store_dwordx2
define amdgpu_kernel void @sink_ubfe_i64_low32(i64 addrspace(1)* %out, i64 %arg1) #0 {
entry:
%shr = lshr i64 %arg1, 15
br i1 undef, label %bb0, label %bb1
bb0:
%val0 = and i64 %shr, 255
store volatile i32 0, i32 addrspace(1)* undef
br label %ret
bb1:
%val1 = and i64 %shr, 127
store volatile i32 0, i32 addrspace(1)* undef
br label %ret
ret:
%phi = phi i64 [ %val0, %bb0 ], [ %val1, %bb1 ]
store i64 %phi, i64 addrspace(1)* %out
ret void
}
; OPT-LABEL: @sink_ubfe_i64_high32(
; OPT: entry:
; OPT-NOT: lshr
; OPT: br i1
; OPT: bb0:
; OPT: %0 = lshr i64 %arg1, 35
; OPT-NEXT: %val0 = and i64 %0, 255
; OPT: bb1:
; OPT: %1 = lshr i64 %arg1, 35
; OPT-NEXT: %val1 = and i64 %1, 127
; OPT: ret:
; OPT: store
; OPT: ret
; GCN-LABEL: {{^}}sink_ubfe_i64_high32:
; GCN: s_cbranch_scc{{[0-1]}} .LBB5_2
; GCN: s_bfe_u32 s{{[0-9]+}}, s{{[0-9]+}}, 0x70003
; GCN: .LBB5_3:
; GCN: s_bfe_u32 s{{[0-9]+}}, s{{[0-9]+}}, 0x80003
; GCN: buffer_store_dwordx2
define amdgpu_kernel void @sink_ubfe_i64_high32(i64 addrspace(1)* %out, i64 %arg1) #0 {
entry:
%shr = lshr i64 %arg1, 35
br i1 undef, label %bb0, label %bb1
bb0:
%val0 = and i64 %shr, 255
store volatile i32 0, i32 addrspace(1)* undef
br label %ret
bb1:
%val1 = and i64 %shr, 127
store volatile i32 0, i32 addrspace(1)* undef
br label %ret
ret:
%phi = phi i64 [ %val0, %bb0 ], [ %val1, %bb1 ]
store i64 %phi, i64 addrspace(1)* %out
ret void
}
attributes #0 = { nounwind }