
BreakFalseDeps picks the best register for undef operands if instructions have false dependency. The problem is if the instruction is close to the beginning of the function, ReachingDefAnalysis is over optimism to the unused registers, which results in collision with registers just defined in the caller. This patch changes the selection of undef register in an reverse order, which reduces the probability of register collisions between caller and callee. It brings improvement in some of our internal benchmarks with negligible effect on other benchmarks.
47 lines
1.7 KiB
LLVM
47 lines
1.7 KiB
LLVM
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
|
|
; RUN: llc < %s -mtriple=i686-apple-darwin -mcpu=corei7-avx -mattr=+sse2 | FileCheck %s
|
|
; PR1877
|
|
|
|
@NNTOT = weak global i32 0 ; <ptr> [#uses=1]
|
|
@G = weak global float 0.000000e+00 ; <ptr> [#uses=1]
|
|
|
|
define void @runcont(ptr %source) nounwind {
|
|
; CHECK-LABEL: runcont:
|
|
; CHECK: ## %bb.0: ## %entry
|
|
; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax
|
|
; CHECK-NEXT: movl L_NNTOT$non_lazy_ptr, %ecx
|
|
; CHECK-NEXT: movl (%ecx), %ecx
|
|
; CHECK-NEXT: vxorps %xmm0, %xmm0, %xmm0
|
|
; CHECK-NEXT: xorl %edx, %edx
|
|
; CHECK-NEXT: .p2align 4
|
|
; CHECK-NEXT: LBB0_1: ## %bb
|
|
; CHECK-NEXT: ## =>This Inner Loop Header: Depth=1
|
|
; CHECK-NEXT: vcvtsi2ssl (%eax,%edx,4), %xmm7, %xmm1
|
|
; CHECK-NEXT: vaddss %xmm0, %xmm1, %xmm0
|
|
; CHECK-NEXT: incl %edx
|
|
; CHECK-NEXT: cmpl %edx, %ecx
|
|
; CHECK-NEXT: jne LBB0_1
|
|
; CHECK-NEXT: ## %bb.2: ## %bb13
|
|
; CHECK-NEXT: movl L_G$non_lazy_ptr, %eax
|
|
; CHECK-NEXT: vmovss %xmm0, (%eax)
|
|
; CHECK-NEXT: retl
|
|
entry:
|
|
%tmp10 = load i32, ptr @NNTOT, align 4 ; <i32> [#uses=1]
|
|
br label %bb
|
|
|
|
bb: ; preds = %bb, %entry
|
|
%neuron.0 = phi i32 [ 0, %entry ], [ %indvar.next, %bb ] ; <i32> [#uses=2]
|
|
%thesum.0 = phi float [ 0.000000e+00, %entry ], [ %tmp6, %bb ] ; <float> [#uses=1]
|
|
%tmp2 = getelementptr i32, ptr %source, i32 %neuron.0 ; <ptr> [#uses=1]
|
|
%tmp3 = load i32, ptr %tmp2, align 4 ; <i32> [#uses=1]
|
|
%tmp34 = sitofp i32 %tmp3 to float ; <float> [#uses=1]
|
|
%tmp6 = fadd float %tmp34, %thesum.0 ; <float> [#uses=2]
|
|
%indvar.next = add i32 %neuron.0, 1 ; <i32> [#uses=2]
|
|
%exitcond = icmp eq i32 %indvar.next, %tmp10 ; <i1> [#uses=1]
|
|
br i1 %exitcond, label %bb13, label %bb
|
|
|
|
bb13: ; preds = %bb
|
|
store volatile float %tmp6, ptr @G, align 4
|
|
ret void
|
|
}
|