Use the default, which freely coalesces anything it can. This mostly shows improvements, with a handful of regressions. The main concern would be if introducing wider registers is more likely to push the register usage up to the next occupancy tier.
47 lines
2.2 KiB
LLVM
47 lines
2.2 KiB
LLVM
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
|
|
; RUN: llc -mtriple=amdgcn < %s | FileCheck %s
|
|
|
|
; Tests for a bug in SelectionDAG::UpdateNodeOperands exposed by VectorLegalizer
|
|
; where divergence information is not updated.
|
|
|
|
declare i32 @llvm.amdgcn.workitem.id.x()
|
|
|
|
define amdgpu_kernel void @spam(ptr addrspace(1) noalias %arg) {
|
|
; CHECK-LABEL: spam:
|
|
; CHECK: ; %bb.0:
|
|
; CHECK-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x9
|
|
; CHECK-NEXT: v_lshlrev_b32_e32 v0, 3, v0
|
|
; CHECK-NEXT: v_mov_b32_e32 v1, 0
|
|
; CHECK-NEXT: s_mov_b32 s3, 0xf000
|
|
; CHECK-NEXT: s_mov_b32 s2, 0
|
|
; CHECK-NEXT: v_mov_b32_e32 v2, 0x7ff80000
|
|
; CHECK-NEXT: s_waitcnt lgkmcnt(0)
|
|
; CHECK-NEXT: buffer_store_dwordx4 v[1:4], v[0:1], s[0:3], 0 addr64 offset:16
|
|
; CHECK-NEXT: s_waitcnt expcnt(0)
|
|
; CHECK-NEXT: v_mov_b32_e32 v3, v1
|
|
; CHECK-NEXT: v_mov_b32_e32 v4, v1
|
|
; CHECK-NEXT: buffer_store_dwordx4 v[1:4], v[0:1], s[0:3], 0 addr64 offset:48
|
|
; CHECK-NEXT: s_endpgm
|
|
%tmp = tail call i32 @llvm.amdgcn.workitem.id.x()
|
|
%tmp1 = zext i32 %tmp to i64
|
|
%tmp2 = getelementptr inbounds double, ptr addrspace(1) %arg, i64 %tmp1
|
|
%tmp3 = load double, ptr addrspace(1) %tmp2, align 8
|
|
%tmp4 = fadd double 0x7FF8000000000000, 0.000000e+00
|
|
%tmp5 = insertelement <2 x double> poison, double %tmp4, i64 0
|
|
%tmp6 = insertelement <2 x double> %tmp5, double %tmp3, i64 1
|
|
%tmp7 = insertelement <2 x double> %tmp6, double 0.000000e+00, i64 1
|
|
%tmp8 = fadd <2 x double> zeroinitializer, splat (double 0x7FF8000000000000)
|
|
%tmp9 = fadd <2 x double> %tmp7, zeroinitializer
|
|
%tmp10 = extractelement <2 x double> %tmp8, i64 0
|
|
%tmp11 = getelementptr inbounds double, ptr addrspace(1) %tmp2, i64 2
|
|
store double %tmp10, ptr addrspace(1) %tmp11, align 8
|
|
%tmp12 = getelementptr inbounds double, ptr addrspace(1) %tmp2, i64 3
|
|
store double poison, ptr addrspace(1) %tmp12, align 8
|
|
%tmp13 = extractelement <2 x double> %tmp9, i64 0
|
|
%tmp14 = getelementptr inbounds double, ptr addrspace(1) %tmp2, i64 6
|
|
store double %tmp13, ptr addrspace(1) %tmp14, align 8
|
|
%tmp15 = getelementptr inbounds double, ptr addrspace(1) %tmp2, i64 7
|
|
store double 0.000000e+00, ptr addrspace(1) %tmp15, align 8
|
|
ret void
|
|
}
|