llvm-project/llvm/test/CodeGen/X86/sbb-false-dep.ll
Sanjay Patel 40a50f8701 [x86] avoid false dependency stall on 'sbb' with same source reg
This is effectively inverting the transform added with D116804
because the downside of the false dependency of something like
"sbb %eax, %eax" is much greater than the upside of eliminating
a zeroing instruction on (all?) Intel CPUs.

Differential Revision: https://reviews.llvm.org/D118843
2022-02-07 10:12:12 -05:00

119 lines
4.2 KiB
LLVM

; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc < %s -mtriple=x86_64-- | FileCheck %s --check-prefixes=CHECK
; RUN: llc < %s -mtriple=x86_64-- -mattr=+sbb-dep-breaking | FileCheck %s --check-prefixes=IDIOM
%struct.y_s = type { i64*, i64* }
define i32 @mallocbench_gs(i32* noundef %0, %struct.y_s* noundef %1, i32 noundef %2, i32 noundef %3, i32 noundef %4) nounwind {
; CHECK-LABEL: mallocbench_gs:
; CHECK: # %bb.0:
; CHECK-NEXT: pushq %rbp
; CHECK-NEXT: pushq %r15
; CHECK-NEXT: pushq %r14
; CHECK-NEXT: pushq %r13
; CHECK-NEXT: pushq %r12
; CHECK-NEXT: pushq %rbx
; CHECK-NEXT: pushq %rax
; CHECK-NEXT: movl %r8d, %r13d
; CHECK-NEXT: movl %ecx, %r14d
; CHECK-NEXT: movl %edx, %r15d
; CHECK-NEXT: movq %rsi, %rbx
; CHECK-NEXT: movq %rdi, %r12
; CHECK-NEXT: movq (%rsi), %rdi
; CHECK-NEXT: movq 8(%rsi), %rsi
; CHECK-NEXT: movq %rbx, %rdx
; CHECK-NEXT: callq foo1@PLT
; CHECK-NEXT: movq 8(%rbx), %rax
; CHECK-NEXT: movq (%rax), %rdx
; CHECK-NEXT: xorl %ebp, %ebp
; CHECK-NEXT: movl %r13d, %ecx
; CHECK-NEXT: negl %ecx
; CHECK-NEXT: movl $0, %eax
; CHECK-NEXT: sbbq %rax, %rax
; CHECK-NEXT: orq %rdx, %rax
; CHECK-NEXT: cmpl $1, %r13d
; CHECK-NEXT: sbbq %rbp, %rbp
; CHECK-NEXT: orq %rdx, %rbp
; CHECK-NEXT: subq $8, %rsp
; CHECK-NEXT: movq %r12, %rdi
; CHECK-NEXT: movl %r15d, %esi
; CHECK-NEXT: movl %r14d, %edx
; CHECK-NEXT: xorl %ecx, %ecx
; CHECK-NEXT: xorl %r8d, %r8d
; CHECK-NEXT: xorl %r9d, %r9d
; CHECK-NEXT: pushq %rbp
; CHECK-NEXT: pushq %rax
; CHECK-NEXT: pushq %rbx
; CHECK-NEXT: callq foo2@PLT
; CHECK-NEXT: addq $40, %rsp
; CHECK-NEXT: popq %rbx
; CHECK-NEXT: popq %r12
; CHECK-NEXT: popq %r13
; CHECK-NEXT: popq %r14
; CHECK-NEXT: popq %r15
; CHECK-NEXT: popq %rbp
; CHECK-NEXT: retq
;
; IDIOM-LABEL: mallocbench_gs:
; IDIOM: # %bb.0:
; IDIOM-NEXT: pushq %rbp
; IDIOM-NEXT: pushq %r15
; IDIOM-NEXT: pushq %r14
; IDIOM-NEXT: pushq %r13
; IDIOM-NEXT: pushq %r12
; IDIOM-NEXT: pushq %rbx
; IDIOM-NEXT: pushq %rax
; IDIOM-NEXT: movl %r8d, %r13d
; IDIOM-NEXT: movl %ecx, %r14d
; IDIOM-NEXT: movl %edx, %r15d
; IDIOM-NEXT: movq %rsi, %rbx
; IDIOM-NEXT: movq %rdi, %r12
; IDIOM-NEXT: movq (%rsi), %rdi
; IDIOM-NEXT: movq 8(%rsi), %rsi
; IDIOM-NEXT: movq %rbx, %rdx
; IDIOM-NEXT: callq foo1@PLT
; IDIOM-NEXT: movq 8(%rbx), %rax
; IDIOM-NEXT: movq (%rax), %rdx
; IDIOM-NEXT: movl %r13d, %ecx
; IDIOM-NEXT: negl %ecx
; IDIOM-NEXT: sbbq %rbp, %rbp
; IDIOM-NEXT: orq %rdx, %rbp
; IDIOM-NEXT: cmpl $1, %r13d
; IDIOM-NEXT: sbbq %rax, %rax
; IDIOM-NEXT: orq %rdx, %rax
; IDIOM-NEXT: subq $8, %rsp
; IDIOM-NEXT: movq %r12, %rdi
; IDIOM-NEXT: movl %r15d, %esi
; IDIOM-NEXT: movl %r14d, %edx
; IDIOM-NEXT: xorl %ecx, %ecx
; IDIOM-NEXT: xorl %r8d, %r8d
; IDIOM-NEXT: xorl %r9d, %r9d
; IDIOM-NEXT: pushq %rax
; IDIOM-NEXT: pushq %rbp
; IDIOM-NEXT: pushq %rbx
; IDIOM-NEXT: callq foo2@PLT
; IDIOM-NEXT: addq $40, %rsp
; IDIOM-NEXT: popq %rbx
; IDIOM-NEXT: popq %r12
; IDIOM-NEXT: popq %r13
; IDIOM-NEXT: popq %r14
; IDIOM-NEXT: popq %r15
; IDIOM-NEXT: popq %rbp
; IDIOM-NEXT: retq
%6 = getelementptr inbounds %struct.y_s, %struct.y_s* %1, i64 0, i32 0
%7 = load i64*, i64** %6, align 8
%8 = getelementptr inbounds %struct.y_s, %struct.y_s* %1, i64 0, i32 1
%9 = load i64*, i64** %8, align 8
tail call void @foo1(i64* noundef %7, i64* noundef %9, %struct.y_s* noundef %1)
%10 = icmp eq i32 %4, 0
%11 = load i64*, i64** %8, align 8
%12 = load i64, i64* %11, align 8
%13 = select i1 %10, i64 %12, i64 -1
%14 = select i1 %10, i64 -1, i64 %12
%15 = tail call noundef i32 @foo2(i32* noundef %0, i32 noundef %2, i32 noundef %3, i32 noundef 0, i32 noundef 0, i32 noundef 0, %struct.y_s* noundef nonnull %1, i64 noundef %13, i64 noundef %14)
ret i32 %15
}
declare void @foo1(i64* noundef, i64* noundef, %struct.y_s* noundef) local_unnamed_addr #1
declare noundef i32 @foo2(i32* noundef, i32 noundef, i32 noundef, i32 noundef, i32 noundef, i32 noundef, %struct.y_s* noundef, i64 noundef, i64 noundef) local_unnamed_addr #1