
The first attempt missed changing test files for tools (update_llc_test_checks.py). Original commit message: This implements the main suggested change from issue #56498. Using the shorter (non-extending) instruction with only -Oz ("minsize") rather than -Os ("optsize") is left as a possible follow-up. As noted in the bug report, the zero-extending load may have shorter latency/better throughput across a wide range of x86 micro-arches, and it avoids a potential false dependency. The cost is an extra instruction byte. This could cause perf ups and downs from secondary effects, but I don't think it is possible to account for those in advance, and that will likely also depend on exact micro-arch. This does bring LLVM x86 codegen more in line with existing gcc codegen, so if problems are exposed they are more likely to occur for both compilers. Differential Revision: https://reviews.llvm.org/D129775
37 lines
1.2 KiB
LLVM
37 lines
1.2 KiB
LLVM
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
|
|
; RUN: llc < %s -mtriple=x86_64-none-eabi -o - | FileCheck %s
|
|
|
|
define zeroext i1 @bigger(ptr nocapture readonly %c, ptr nocapture readonly %e, i64 %d, i64 %p1) {
|
|
; CHECK-LABEL: bigger:
|
|
; CHECK: # %bb.0: # %entry
|
|
; CHECK-NEXT: andb $7, %cl
|
|
; CHECK-NEXT: movb $8, %al
|
|
; CHECK-NEXT: subb %cl, %al
|
|
; CHECK-NEXT: movl $5, %r8d
|
|
; CHECK-NEXT: movl %eax, %ecx
|
|
; CHECK-NEXT: shll %cl, %r8d
|
|
; CHECK-NEXT: movzbl (%rsi,%rdx), %eax
|
|
; CHECK-NEXT: xorb (%rdi,%rdx), %al
|
|
; CHECK-NEXT: movzbl %al, %eax
|
|
; CHECK-NEXT: andl %r8d, %eax
|
|
; CHECK-NEXT: testb $-1, %al
|
|
; CHECK-NEXT: sete %al
|
|
; CHECK-NEXT: retq
|
|
entry:
|
|
%0 = trunc i64 %p1 to i16
|
|
%1 = and i16 %0, 7
|
|
%sh_prom = sub nuw nsw i16 8, %1
|
|
%shl = shl nuw nsw i16 5, %sh_prom
|
|
%arrayidx = getelementptr inbounds i8, ptr %c, i64 %d
|
|
%2 = load i8, ptr %arrayidx, align 1
|
|
%3 = and i16 %shl, 255
|
|
%conv2 = zext i16 %3 to i32
|
|
%arrayidx3 = getelementptr inbounds i8, ptr %e, i64 %d
|
|
%4 = load i8, ptr %arrayidx3, align 1
|
|
%5 = xor i8 %4, %2
|
|
%6 = zext i8 %5 to i32
|
|
%7 = and i32 %6, %conv2
|
|
%cmp.not = icmp eq i32 %7, 0
|
|
ret i1 %cmp.not
|
|
}
|