
The first attempt missed changing test files for tools (update_llc_test_checks.py). Original commit message: This implements the main suggested change from issue #56498. Using the shorter (non-extending) instruction with only -Oz ("minsize") rather than -Os ("optsize") is left as a possible follow-up. As noted in the bug report, the zero-extending load may have shorter latency/better throughput across a wide range of x86 micro-arches, and it avoids a potential false dependency. The cost is an extra instruction byte. This could cause perf ups and downs from secondary effects, but I don't think it is possible to account for those in advance, and that will likely also depend on exact micro-arch. This does bring LLVM x86 codegen more in line with existing gcc codegen, so if problems are exposed they are more likely to occur for both compilers. Differential Revision: https://reviews.llvm.org/D129775
37 lines
950 B
LLVM
37 lines
950 B
LLVM
; RUN: llc < %s -mtriple=i686-- -mcpu=atom | FileCheck %s
|
|
; CHECK: movl
|
|
; CHECK: movzbl
|
|
; CHECK: movzbl
|
|
; CHECK: cmpb
|
|
; CHECK: notb
|
|
; CHECK: notb
|
|
|
|
; Test for checking of cancel conversion to cmp32 in Atom case
|
|
; in function 'X86TargetLowering::EmitCmp'
|
|
|
|
define i8 @run_test(ptr %rd_p) {
|
|
entry:
|
|
%incdec.ptr = getelementptr inbounds i8, ptr %rd_p, i64 1
|
|
%ld1 = load i8, ptr %rd_p, align 1
|
|
%incdec.ptr1 = getelementptr inbounds i8, ptr %rd_p, i64 2
|
|
%ld2 = load i8, ptr %incdec.ptr, align 1
|
|
%x4 = xor i8 %ld1, -1
|
|
%x5 = xor i8 %ld2, -1
|
|
%cmp34 = icmp ult i8 %ld2, %ld1
|
|
br i1 %cmp34, label %if.then3, label %if.else
|
|
|
|
if.then3:
|
|
%sub7 = sub i8 %x4, %x5
|
|
br label %if.end4
|
|
|
|
if.else:
|
|
%sub8 = sub i8 %x5, %x4
|
|
br label %if.end4
|
|
|
|
if.end4:
|
|
%res = phi i8 [ %sub7, %if.then3 ], [ %sub8, %if.else ]
|
|
ret i8 %res
|
|
|
|
}
|
|
|