
The first attempt missed changing test files for tools (update_llc_test_checks.py). Original commit message: This implements the main suggested change from issue #56498. Using the shorter (non-extending) instruction with only -Oz ("minsize") rather than -Os ("optsize") is left as a possible follow-up. As noted in the bug report, the zero-extending load may have shorter latency/better throughput across a wide range of x86 micro-arches, and it avoids a potential false dependency. The cost is an extra instruction byte. This could cause perf ups and downs from secondary effects, but I don't think it is possible to account for those in advance, and that will likely also depend on exact micro-arch. This does bring LLVM x86 codegen more in line with existing gcc codegen, so if problems are exposed they are more likely to occur for both compilers. Differential Revision: https://reviews.llvm.org/D129775
43 lines
1.2 KiB
LLVM
43 lines
1.2 KiB
LLVM
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
|
|
; RUN: llc < %s -mtriple=i386-unknown-linux-gnu | FileCheck %s
|
|
|
|
@d = global i32 0, align 4
|
|
|
|
; Verify the sar happens before ecx is clobbered with the parameter being
|
|
; passed to fn3
|
|
|
|
define i32 @fn4(i32 %i) #0 {
|
|
; CHECK-LABEL: fn4:
|
|
; CHECK: # %bb.0: # %entry
|
|
; CHECK-NEXT: pushl %esi
|
|
; CHECK-NEXT: subl $8, %esp
|
|
; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax
|
|
; CHECK-NEXT: movzbl d, %ecx
|
|
; CHECK-NEXT: movl %eax, %esi
|
|
; CHECK-NEXT: sarl %cl, %esi
|
|
; CHECK-NEXT: subl $8, %esp
|
|
; CHECK-NEXT: movl $2, %ecx
|
|
; CHECK-NEXT: movl $5, %edx
|
|
; CHECK-NEXT: pushl %eax
|
|
; CHECK-NEXT: pushl %esi
|
|
; CHECK-NEXT: calll fn3@PLT
|
|
; CHECK-NEXT: addl $16, %esp
|
|
; CHECK-NEXT: xorl %eax, %eax
|
|
; CHECK-NEXT: testl %esi, %esi
|
|
; CHECK-NEXT: setle %al
|
|
; CHECK-NEXT: addl $8, %esp
|
|
; CHECK-NEXT: popl %esi
|
|
; CHECK-NEXT: retl
|
|
entry:
|
|
%0 = load i32, ptr @d, align 4
|
|
%shr = ashr i32 %i, %0
|
|
tail call fastcc void @fn3(i32 2, i32 5, i32 %shr, i32 %i)
|
|
%cmp = icmp slt i32 %shr, 1
|
|
%. = zext i1 %cmp to i32
|
|
ret i32 %.
|
|
}
|
|
|
|
declare void @fn3(i32 %p1, i32 %p2, i32 %p3, i32 %p4) #0
|
|
|
|
attributes #0 = { nounwind }
|