This is effectively inverting the transform added with D116804 because the downside of the false dependency of something like "sbb %eax, %eax" is much greater than the upside of eliminating a zeroing instruction on (all?) Intel CPUs. Differential Revision: https://reviews.llvm.org/D118843
119 lines
4.2 KiB
LLVM
119 lines
4.2 KiB
LLVM
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
|
|
; RUN: llc < %s -mtriple=x86_64-- | FileCheck %s --check-prefixes=CHECK
|
|
; RUN: llc < %s -mtriple=x86_64-- -mattr=+sbb-dep-breaking | FileCheck %s --check-prefixes=IDIOM
|
|
|
|
%struct.y_s = type { i64*, i64* }
|
|
|
|
define i32 @mallocbench_gs(i32* noundef %0, %struct.y_s* noundef %1, i32 noundef %2, i32 noundef %3, i32 noundef %4) nounwind {
|
|
; CHECK-LABEL: mallocbench_gs:
|
|
; CHECK: # %bb.0:
|
|
; CHECK-NEXT: pushq %rbp
|
|
; CHECK-NEXT: pushq %r15
|
|
; CHECK-NEXT: pushq %r14
|
|
; CHECK-NEXT: pushq %r13
|
|
; CHECK-NEXT: pushq %r12
|
|
; CHECK-NEXT: pushq %rbx
|
|
; CHECK-NEXT: pushq %rax
|
|
; CHECK-NEXT: movl %r8d, %r13d
|
|
; CHECK-NEXT: movl %ecx, %r14d
|
|
; CHECK-NEXT: movl %edx, %r15d
|
|
; CHECK-NEXT: movq %rsi, %rbx
|
|
; CHECK-NEXT: movq %rdi, %r12
|
|
; CHECK-NEXT: movq (%rsi), %rdi
|
|
; CHECK-NEXT: movq 8(%rsi), %rsi
|
|
; CHECK-NEXT: movq %rbx, %rdx
|
|
; CHECK-NEXT: callq foo1@PLT
|
|
; CHECK-NEXT: movq 8(%rbx), %rax
|
|
; CHECK-NEXT: movq (%rax), %rdx
|
|
; CHECK-NEXT: xorl %ebp, %ebp
|
|
; CHECK-NEXT: movl %r13d, %ecx
|
|
; CHECK-NEXT: negl %ecx
|
|
; CHECK-NEXT: movl $0, %eax
|
|
; CHECK-NEXT: sbbq %rax, %rax
|
|
; CHECK-NEXT: orq %rdx, %rax
|
|
; CHECK-NEXT: cmpl $1, %r13d
|
|
; CHECK-NEXT: sbbq %rbp, %rbp
|
|
; CHECK-NEXT: orq %rdx, %rbp
|
|
; CHECK-NEXT: subq $8, %rsp
|
|
; CHECK-NEXT: movq %r12, %rdi
|
|
; CHECK-NEXT: movl %r15d, %esi
|
|
; CHECK-NEXT: movl %r14d, %edx
|
|
; CHECK-NEXT: xorl %ecx, %ecx
|
|
; CHECK-NEXT: xorl %r8d, %r8d
|
|
; CHECK-NEXT: xorl %r9d, %r9d
|
|
; CHECK-NEXT: pushq %rbp
|
|
; CHECK-NEXT: pushq %rax
|
|
; CHECK-NEXT: pushq %rbx
|
|
; CHECK-NEXT: callq foo2@PLT
|
|
; CHECK-NEXT: addq $40, %rsp
|
|
; CHECK-NEXT: popq %rbx
|
|
; CHECK-NEXT: popq %r12
|
|
; CHECK-NEXT: popq %r13
|
|
; CHECK-NEXT: popq %r14
|
|
; CHECK-NEXT: popq %r15
|
|
; CHECK-NEXT: popq %rbp
|
|
; CHECK-NEXT: retq
|
|
;
|
|
; IDIOM-LABEL: mallocbench_gs:
|
|
; IDIOM: # %bb.0:
|
|
; IDIOM-NEXT: pushq %rbp
|
|
; IDIOM-NEXT: pushq %r15
|
|
; IDIOM-NEXT: pushq %r14
|
|
; IDIOM-NEXT: pushq %r13
|
|
; IDIOM-NEXT: pushq %r12
|
|
; IDIOM-NEXT: pushq %rbx
|
|
; IDIOM-NEXT: pushq %rax
|
|
; IDIOM-NEXT: movl %r8d, %r13d
|
|
; IDIOM-NEXT: movl %ecx, %r14d
|
|
; IDIOM-NEXT: movl %edx, %r15d
|
|
; IDIOM-NEXT: movq %rsi, %rbx
|
|
; IDIOM-NEXT: movq %rdi, %r12
|
|
; IDIOM-NEXT: movq (%rsi), %rdi
|
|
; IDIOM-NEXT: movq 8(%rsi), %rsi
|
|
; IDIOM-NEXT: movq %rbx, %rdx
|
|
; IDIOM-NEXT: callq foo1@PLT
|
|
; IDIOM-NEXT: movq 8(%rbx), %rax
|
|
; IDIOM-NEXT: movq (%rax), %rdx
|
|
; IDIOM-NEXT: movl %r13d, %ecx
|
|
; IDIOM-NEXT: negl %ecx
|
|
; IDIOM-NEXT: sbbq %rbp, %rbp
|
|
; IDIOM-NEXT: orq %rdx, %rbp
|
|
; IDIOM-NEXT: cmpl $1, %r13d
|
|
; IDIOM-NEXT: sbbq %rax, %rax
|
|
; IDIOM-NEXT: orq %rdx, %rax
|
|
; IDIOM-NEXT: subq $8, %rsp
|
|
; IDIOM-NEXT: movq %r12, %rdi
|
|
; IDIOM-NEXT: movl %r15d, %esi
|
|
; IDIOM-NEXT: movl %r14d, %edx
|
|
; IDIOM-NEXT: xorl %ecx, %ecx
|
|
; IDIOM-NEXT: xorl %r8d, %r8d
|
|
; IDIOM-NEXT: xorl %r9d, %r9d
|
|
; IDIOM-NEXT: pushq %rax
|
|
; IDIOM-NEXT: pushq %rbp
|
|
; IDIOM-NEXT: pushq %rbx
|
|
; IDIOM-NEXT: callq foo2@PLT
|
|
; IDIOM-NEXT: addq $40, %rsp
|
|
; IDIOM-NEXT: popq %rbx
|
|
; IDIOM-NEXT: popq %r12
|
|
; IDIOM-NEXT: popq %r13
|
|
; IDIOM-NEXT: popq %r14
|
|
; IDIOM-NEXT: popq %r15
|
|
; IDIOM-NEXT: popq %rbp
|
|
; IDIOM-NEXT: retq
|
|
%6 = getelementptr inbounds %struct.y_s, %struct.y_s* %1, i64 0, i32 0
|
|
%7 = load i64*, i64** %6, align 8
|
|
%8 = getelementptr inbounds %struct.y_s, %struct.y_s* %1, i64 0, i32 1
|
|
%9 = load i64*, i64** %8, align 8
|
|
tail call void @foo1(i64* noundef %7, i64* noundef %9, %struct.y_s* noundef %1)
|
|
%10 = icmp eq i32 %4, 0
|
|
%11 = load i64*, i64** %8, align 8
|
|
%12 = load i64, i64* %11, align 8
|
|
%13 = select i1 %10, i64 %12, i64 -1
|
|
%14 = select i1 %10, i64 -1, i64 %12
|
|
%15 = tail call noundef i32 @foo2(i32* noundef %0, i32 noundef %2, i32 noundef %3, i32 noundef 0, i32 noundef 0, i32 noundef 0, %struct.y_s* noundef nonnull %1, i64 noundef %13, i64 noundef %14)
|
|
ret i32 %15
|
|
}
|
|
|
|
declare void @foo1(i64* noundef, i64* noundef, %struct.y_s* noundef) local_unnamed_addr #1
|
|
declare noundef i32 @foo2(i32* noundef, i32 noundef, i32 noundef, i32 noundef, i32 noundef, i32 noundef, %struct.y_s* noundef, i64 noundef, i64 noundef) local_unnamed_addr #1
|