The motivation is that the update script has at least two deviations (`<...>@GOT`/`<...>@PLT`/ and not hiding pointer arithmetics) from what pretty much all the checklines were generated with, and most of the tests are still not updated, so each time one of the non-up-to-date tests is updated to see the effect of the code change, there is a lot of noise. Instead of having to deal with that each time, let's just deal with everything at once. This has been done via: ``` cd llvm-project/llvm/test/CodeGen/X86 grep -rl "; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py" | xargs -L1 <...>/llvm-project/llvm/utils/update_llc_test_checks.py --llc-binary <...>/llvm-project/build/bin/llc ``` Not all tests were regenerated, however.
931 lines
26 KiB
LLVM
931 lines
26 KiB
LLVM
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
|
|
; RUN: llc < %s -mtriple=x86_64-unknown-linux-gnu -post-RA-scheduler=true | FileCheck %s
|
|
|
|
declare dso_local void @bar(i32)
|
|
declare dso_local void @car(i32)
|
|
declare dso_local void @dar(i32)
|
|
declare dso_local void @ear(i32)
|
|
declare dso_local void @far(i32)
|
|
declare i1 @qux()
|
|
|
|
@GHJK = dso_local global i32 0
|
|
@HABC = dso_local global i32 0
|
|
|
|
; BranchFolding should tail-merge the stores since they all precede
|
|
; direct branches to the same place.
|
|
|
|
define dso_local void @tail_merge_me() nounwind {
|
|
; CHECK-LABEL: tail_merge_me:
|
|
; CHECK: # %bb.0: # %entry
|
|
; CHECK-NEXT: pushq %rax
|
|
; CHECK-NEXT: callq qux@PLT
|
|
; CHECK-NEXT: testb $1, %al
|
|
; CHECK-NEXT: je .LBB0_1
|
|
; CHECK-NEXT: # %bb.6: # %A
|
|
; CHECK-NEXT: xorl %edi, %edi
|
|
; CHECK-NEXT: callq bar
|
|
; CHECK-NEXT: jmp .LBB0_4
|
|
; CHECK-NEXT: .LBB0_1: # %next
|
|
; CHECK-NEXT: callq qux@PLT
|
|
; CHECK-NEXT: testb $1, %al
|
|
; CHECK-NEXT: je .LBB0_3
|
|
; CHECK-NEXT: # %bb.2: # %B
|
|
; CHECK-NEXT: movl $1, %edi
|
|
; CHECK-NEXT: callq car
|
|
; CHECK-NEXT: jmp .LBB0_4
|
|
; CHECK-NEXT: .LBB0_3: # %C
|
|
; CHECK-NEXT: movl $2, %edi
|
|
; CHECK-NEXT: callq dar
|
|
; CHECK-NEXT: .LBB0_4: # %M
|
|
; CHECK-NEXT: movl $0, GHJK(%rip)
|
|
; CHECK-NEXT: movl $1, HABC(%rip)
|
|
; CHECK-NEXT: callq qux@PLT
|
|
; CHECK-NEXT: testb $1, %al
|
|
; CHECK-NEXT: je .LBB0_5
|
|
; CHECK-NEXT: # %bb.7: # %return
|
|
; CHECK-NEXT: movl $1000, %edi # imm = 0x3E8
|
|
; CHECK-NEXT: callq ear
|
|
; CHECK-NEXT: popq %rax
|
|
; CHECK-NEXT: retq
|
|
; CHECK-NEXT: .LBB0_5: # %altret
|
|
; CHECK-NEXT: movl $1001, %edi # imm = 0x3E9
|
|
; CHECK-NEXT: callq far
|
|
; CHECK-NEXT: popq %rax
|
|
; CHECK-NEXT: retq
|
|
entry:
|
|
%a = call i1 @qux()
|
|
br i1 %a, label %A, label %next
|
|
next:
|
|
%b = call i1 @qux()
|
|
br i1 %b, label %B, label %C
|
|
|
|
A:
|
|
call void @bar(i32 0)
|
|
store i32 0, i32* @GHJK
|
|
br label %M
|
|
|
|
B:
|
|
call void @car(i32 1)
|
|
store i32 0, i32* @GHJK
|
|
br label %M
|
|
|
|
C:
|
|
call void @dar(i32 2)
|
|
store i32 0, i32* @GHJK
|
|
br label %M
|
|
|
|
M:
|
|
store i32 1, i32* @HABC
|
|
%c = call i1 @qux()
|
|
br i1 %c, label %return, label %altret
|
|
|
|
return:
|
|
call void @ear(i32 1000)
|
|
ret void
|
|
altret:
|
|
call void @far(i32 1001)
|
|
ret void
|
|
}
|
|
|
|
declare i8* @choose(i8*, i8*)
|
|
|
|
; BranchFolding should tail-duplicate the indirect jump to avoid
|
|
; redundant branching.
|
|
|
|
define dso_local void @tail_duplicate_me() nounwind {
|
|
; CHECK-LABEL: tail_duplicate_me:
|
|
; CHECK: # %bb.0: # %entry
|
|
; CHECK-NEXT: pushq %r14
|
|
; CHECK-NEXT: pushq %rbx
|
|
; CHECK-NEXT: pushq %rax
|
|
; CHECK-NEXT: callq qux@PLT
|
|
; CHECK-NEXT: movl $.Ltmp0, %edi
|
|
; CHECK-NEXT: movl $.Ltmp1, %esi
|
|
; CHECK-NEXT: movl %eax, %ebx
|
|
; CHECK-NEXT: callq choose@PLT
|
|
; CHECK-NEXT: movq %rax, %r14
|
|
; CHECK-NEXT: testb $1, %bl
|
|
; CHECK-NEXT: je .LBB1_1
|
|
; CHECK-NEXT: # %bb.7: # %A
|
|
; CHECK-NEXT: xorl %edi, %edi
|
|
; CHECK-NEXT: callq bar
|
|
; CHECK-NEXT: movl $0, GHJK(%rip)
|
|
; CHECK-NEXT: jmpq *%r14
|
|
; CHECK-NEXT: .Ltmp0: # Block address taken
|
|
; CHECK-NEXT: .LBB1_4: # %return
|
|
; CHECK-NEXT: movl $1000, %edi # imm = 0x3E8
|
|
; CHECK-NEXT: callq ear
|
|
; CHECK-NEXT: jmp .LBB1_5
|
|
; CHECK-NEXT: .LBB1_1: # %next
|
|
; CHECK-NEXT: callq qux@PLT
|
|
; CHECK-NEXT: testb $1, %al
|
|
; CHECK-NEXT: je .LBB1_3
|
|
; CHECK-NEXT: # %bb.2: # %B
|
|
; CHECK-NEXT: movl $1, %edi
|
|
; CHECK-NEXT: callq car
|
|
; CHECK-NEXT: movl $0, GHJK(%rip)
|
|
; CHECK-NEXT: jmpq *%r14
|
|
; CHECK-NEXT: .Ltmp1: # Block address taken
|
|
; CHECK-NEXT: .LBB1_6: # %altret
|
|
; CHECK-NEXT: movl $1001, %edi # imm = 0x3E9
|
|
; CHECK-NEXT: callq far
|
|
; CHECK-NEXT: .LBB1_5: # %return
|
|
; CHECK-NEXT: addq $8, %rsp
|
|
; CHECK-NEXT: popq %rbx
|
|
; CHECK-NEXT: popq %r14
|
|
; CHECK-NEXT: retq
|
|
; CHECK-NEXT: .LBB1_3: # %C
|
|
; CHECK-NEXT: movl $2, %edi
|
|
; CHECK-NEXT: callq dar
|
|
; CHECK-NEXT: movl $0, GHJK(%rip)
|
|
; CHECK-NEXT: jmpq *%r14
|
|
entry:
|
|
%a = call i1 @qux()
|
|
%c = call i8* @choose(i8* blockaddress(@tail_duplicate_me, %return),
|
|
i8* blockaddress(@tail_duplicate_me, %altret))
|
|
br i1 %a, label %A, label %next
|
|
next:
|
|
%b = call i1 @qux()
|
|
br i1 %b, label %B, label %C
|
|
|
|
A:
|
|
call void @bar(i32 0)
|
|
store i32 0, i32* @GHJK
|
|
br label %M
|
|
|
|
B:
|
|
call void @car(i32 1)
|
|
store i32 0, i32* @GHJK
|
|
br label %M
|
|
|
|
C:
|
|
call void @dar(i32 2)
|
|
store i32 0, i32* @GHJK
|
|
br label %M
|
|
|
|
M:
|
|
indirectbr i8* %c, [label %return, label %altret]
|
|
|
|
return:
|
|
call void @ear(i32 1000)
|
|
ret void
|
|
altret:
|
|
call void @far(i32 1001)
|
|
ret void
|
|
}
|
|
|
|
; BranchFolding shouldn't try to merge the tails of two blocks
|
|
; with only a branch in common, regardless of the fallthrough situation.
|
|
|
|
define i1 @dont_merge_oddly(float* %result) nounwind {
|
|
; CHECK-LABEL: dont_merge_oddly:
|
|
; CHECK: # %bb.0: # %entry
|
|
; CHECK-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero
|
|
; CHECK-NEXT: movss {{.*#+}} xmm2 = mem[0],zero,zero,zero
|
|
; CHECK-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
|
|
; CHECK-NEXT: ucomiss %xmm1, %xmm2
|
|
; CHECK-NEXT: jbe .LBB2_3
|
|
; CHECK-NEXT: # %bb.1: # %bb
|
|
; CHECK-NEXT: ucomiss %xmm0, %xmm1
|
|
; CHECK-NEXT: ja .LBB2_4
|
|
; CHECK-NEXT: .LBB2_2: # %bb30
|
|
; CHECK-NEXT: movb $1, %al
|
|
; CHECK-NEXT: retq
|
|
; CHECK-NEXT: .LBB2_3: # %bb21
|
|
; CHECK-NEXT: ucomiss %xmm0, %xmm2
|
|
; CHECK-NEXT: jbe .LBB2_2
|
|
; CHECK-NEXT: .LBB2_4: # %bb26
|
|
; CHECK-NEXT: xorl %eax, %eax
|
|
; CHECK-NEXT: retq
|
|
entry:
|
|
%tmp4 = getelementptr float, float* %result, i32 2
|
|
%tmp5 = load float, float* %tmp4, align 4
|
|
%tmp7 = getelementptr float, float* %result, i32 4
|
|
%tmp8 = load float, float* %tmp7, align 4
|
|
%tmp10 = getelementptr float, float* %result, i32 6
|
|
%tmp11 = load float, float* %tmp10, align 4
|
|
%tmp12 = fcmp olt float %tmp8, %tmp11
|
|
br i1 %tmp12, label %bb, label %bb21
|
|
|
|
bb:
|
|
%tmp23469 = fcmp olt float %tmp5, %tmp8
|
|
br i1 %tmp23469, label %bb26, label %bb30
|
|
|
|
bb21:
|
|
%tmp23 = fcmp olt float %tmp5, %tmp11
|
|
br i1 %tmp23, label %bb26, label %bb30
|
|
|
|
bb26:
|
|
ret i1 0
|
|
|
|
bb30:
|
|
ret i1 1
|
|
}
|
|
|
|
; Do any-size tail-merging when two candidate blocks will both require
|
|
; an unconditional jump to complete a two-way conditional branch.
|
|
;
|
|
; This test only works when register allocation happens to use %rax for both
|
|
; load addresses.
|
|
|
|
%0 = type { %struct.rtx_def* }
|
|
%struct.lang_decl = type opaque
|
|
%struct.rtx_def = type { i16, i8, i8, [1 x %union.rtunion] }
|
|
%struct.tree_decl = type { [24 x i8], i8*, i32, %union.tree_node*, i32, i8, i8, i8, i8, %union.tree_node*, %union.tree_node*, %union.tree_node*, %union.tree_node*, %union.tree_node*, %union.tree_node*, %union.tree_node*, %union.tree_node*, %union.tree_node*, %struct.rtx_def*, %union..2anon, %0, %union.tree_node*, %struct.lang_decl* }
|
|
%union..2anon = type { i32 }
|
|
%union.rtunion = type { i8* }
|
|
%union.tree_node = type { %struct.tree_decl }
|
|
|
|
define fastcc void @c_expand_expr_stmt(%union.tree_node* %expr) nounwind {
|
|
; CHECK-LABEL: c_expand_expr_stmt:
|
|
; CHECK: # %bb.0: # %entry
|
|
; CHECK-NEXT: pushq %rbx
|
|
; CHECK-NEXT: xorl %eax, %eax
|
|
; CHECK-NEXT: testb %al, %al
|
|
; CHECK-NEXT: jne .LBB3_9
|
|
; CHECK-NEXT: # %bb.1: # %entry
|
|
; CHECK-NEXT: movb 0, %bl
|
|
; CHECK-NEXT: xorl %eax, %eax
|
|
; CHECK-NEXT: testb %al, %al
|
|
; CHECK-NEXT: jne .LBB3_8
|
|
; CHECK-NEXT: # %bb.2: # %bb.i
|
|
; CHECK-NEXT: xorl %eax, %eax
|
|
; CHECK-NEXT: testb %al, %al
|
|
; CHECK-NEXT: je .LBB3_8
|
|
; CHECK-NEXT: # %bb.3: # %lvalue_p.exit
|
|
; CHECK-NEXT: movq 0, %rax
|
|
; CHECK-NEXT: movzbl (%rax), %ecx
|
|
; CHECK-NEXT: testl %ecx, %ecx
|
|
; CHECK-NEXT: je .LBB3_10
|
|
; CHECK-NEXT: # %bb.4: # %lvalue_p.exit
|
|
; CHECK-NEXT: cmpl $2, %ecx
|
|
; CHECK-NEXT: jne .LBB3_15
|
|
; CHECK-NEXT: # %bb.5: # %bb.i1
|
|
; CHECK-NEXT: movq 32(%rax), %rax
|
|
; CHECK-NEXT: movzbl 16(%rax), %ecx
|
|
; CHECK-NEXT: testl %ecx, %ecx
|
|
; CHECK-NEXT: je .LBB3_13
|
|
; CHECK-NEXT: # %bb.6: # %bb.i1
|
|
; CHECK-NEXT: cmpl $2, %ecx
|
|
; CHECK-NEXT: jne .LBB3_15
|
|
; CHECK-NEXT: # %bb.7: # %bb.i.i
|
|
; CHECK-NEXT: xorl %edi, %edi
|
|
; CHECK-NEXT: callq lvalue_p@PLT
|
|
; CHECK-NEXT: testl %eax, %eax
|
|
; CHECK-NEXT: setne %al
|
|
; CHECK-NEXT: jmp .LBB3_16
|
|
; CHECK-NEXT: .LBB3_8: # %bb1
|
|
; CHECK-NEXT: cmpb $23, %bl
|
|
; CHECK-NEXT: .LBB3_9: # %bb3
|
|
; CHECK-NEXT: .LBB3_15:
|
|
; CHECK-NEXT: xorl %eax, %eax
|
|
; CHECK-NEXT: .LBB3_16: # %lvalue_p.exit4
|
|
; CHECK-NEXT: testb %al, %al
|
|
; CHECK-NEXT: jne .LBB3_9
|
|
; CHECK-NEXT: # %bb.17: # %lvalue_p.exit4
|
|
; CHECK-NEXT: testb %bl, %bl
|
|
; CHECK-NEXT: .LBB3_10: # %bb2.i3
|
|
; CHECK-NEXT: movq 8(%rax), %rax
|
|
; CHECK-NEXT: movb 16(%rax), %cl
|
|
; CHECK-NEXT: xorl %eax, %eax
|
|
; CHECK-NEXT: cmpb $23, %cl
|
|
; CHECK-NEXT: je .LBB3_16
|
|
; CHECK-NEXT: # %bb.11: # %bb2.i3
|
|
; CHECK-NEXT: cmpb $16, %cl
|
|
; CHECK-NEXT: je .LBB3_16
|
|
; CHECK-NEXT: jmp .LBB3_9
|
|
; CHECK-NEXT: .LBB3_13: # %bb2.i.i2
|
|
; CHECK-NEXT: movq 8(%rax), %rax
|
|
; CHECK-NEXT: movb 16(%rax), %cl
|
|
; CHECK-NEXT: xorl %eax, %eax
|
|
; CHECK-NEXT: cmpb $16, %cl
|
|
; CHECK-NEXT: je .LBB3_16
|
|
; CHECK-NEXT: # %bb.14: # %bb2.i.i2
|
|
; CHECK-NEXT: cmpb $23, %cl
|
|
; CHECK-NEXT: je .LBB3_16
|
|
; CHECK-NEXT: jmp .LBB3_9
|
|
entry:
|
|
%tmp4 = load i8, i8* null, align 8 ; <i8> [#uses=3]
|
|
switch i8 %tmp4, label %bb3 [
|
|
i8 18, label %bb
|
|
]
|
|
|
|
bb: ; preds = %entry
|
|
switch i32 undef, label %bb1 [
|
|
i32 0, label %bb2.i
|
|
i32 37, label %bb.i
|
|
]
|
|
|
|
bb.i: ; preds = %bb
|
|
switch i32 undef, label %bb1 [
|
|
i32 0, label %lvalue_p.exit
|
|
]
|
|
|
|
bb2.i: ; preds = %bb
|
|
br label %bb3
|
|
|
|
lvalue_p.exit: ; preds = %bb.i
|
|
%tmp21 = load %union.tree_node*, %union.tree_node** null, align 8 ; <%union.tree_node*> [#uses=3]
|
|
%tmp22 = getelementptr inbounds %union.tree_node, %union.tree_node* %tmp21, i64 0, i32 0, i32 0, i64 0 ; <i8*> [#uses=1]
|
|
%tmp23 = load i8, i8* %tmp22, align 8 ; <i8> [#uses=1]
|
|
%tmp24 = zext i8 %tmp23 to i32 ; <i32> [#uses=1]
|
|
switch i32 %tmp24, label %lvalue_p.exit4 [
|
|
i32 0, label %bb2.i3
|
|
i32 2, label %bb.i1
|
|
]
|
|
|
|
bb.i1: ; preds = %lvalue_p.exit
|
|
%tmp25 = getelementptr inbounds %union.tree_node, %union.tree_node* %tmp21, i64 0, i32 0, i32 2 ; <i32*> [#uses=1]
|
|
%tmp26 = bitcast i32* %tmp25 to %union.tree_node** ; <%union.tree_node**> [#uses=1]
|
|
%tmp27 = load %union.tree_node*, %union.tree_node** %tmp26, align 8 ; <%union.tree_node*> [#uses=2]
|
|
%tmp28 = getelementptr inbounds %union.tree_node, %union.tree_node* %tmp27, i64 0, i32 0, i32 0, i64 16 ; <i8*> [#uses=1]
|
|
%tmp29 = load i8, i8* %tmp28, align 8 ; <i8> [#uses=1]
|
|
%tmp30 = zext i8 %tmp29 to i32 ; <i32> [#uses=1]
|
|
switch i32 %tmp30, label %lvalue_p.exit4 [
|
|
i32 0, label %bb2.i.i2
|
|
i32 2, label %bb.i.i
|
|
]
|
|
|
|
bb.i.i: ; preds = %bb.i1
|
|
%tmp34 = tail call fastcc i32 @lvalue_p(%union.tree_node* null) nounwind ; <i32> [#uses=1]
|
|
%phitmp = icmp ne i32 %tmp34, 0 ; <i1> [#uses=1]
|
|
br label %lvalue_p.exit4
|
|
|
|
bb2.i.i2: ; preds = %bb.i1
|
|
%tmp35 = getelementptr inbounds %union.tree_node, %union.tree_node* %tmp27, i64 0, i32 0, i32 0, i64 8 ; <i8*> [#uses=1]
|
|
%tmp36 = bitcast i8* %tmp35 to %union.tree_node** ; <%union.tree_node**> [#uses=1]
|
|
%tmp37 = load %union.tree_node*, %union.tree_node** %tmp36, align 8 ; <%union.tree_node*> [#uses=1]
|
|
%tmp38 = getelementptr inbounds %union.tree_node, %union.tree_node* %tmp37, i64 0, i32 0, i32 0, i64 16 ; <i8*> [#uses=1]
|
|
%tmp39 = load i8, i8* %tmp38, align 8 ; <i8> [#uses=1]
|
|
switch i8 %tmp39, label %bb2 [
|
|
i8 16, label %lvalue_p.exit4
|
|
i8 23, label %lvalue_p.exit4
|
|
]
|
|
|
|
bb2.i3: ; preds = %lvalue_p.exit
|
|
%tmp40 = getelementptr inbounds %union.tree_node, %union.tree_node* %tmp21, i64 0, i32 0, i32 0, i64 8 ; <i8*> [#uses=1]
|
|
%tmp41 = bitcast i8* %tmp40 to %union.tree_node** ; <%union.tree_node**> [#uses=1]
|
|
%tmp42 = load %union.tree_node*, %union.tree_node** %tmp41, align 8 ; <%union.tree_node*> [#uses=1]
|
|
%tmp43 = getelementptr inbounds %union.tree_node, %union.tree_node* %tmp42, i64 0, i32 0, i32 0, i64 16 ; <i8*> [#uses=1]
|
|
%tmp44 = load i8, i8* %tmp43, align 8 ; <i8> [#uses=1]
|
|
switch i8 %tmp44, label %bb2 [
|
|
i8 16, label %lvalue_p.exit4
|
|
i8 23, label %lvalue_p.exit4
|
|
]
|
|
|
|
lvalue_p.exit4: ; preds = %bb2.i3, %bb2.i3, %bb2.i.i2, %bb2.i.i2, %bb.i.i, %bb.i1, %lvalue_p.exit
|
|
%tmp45 = phi i1 [ %phitmp, %bb.i.i ], [ false, %bb2.i.i2 ], [ false, %bb2.i.i2 ], [ false, %bb.i1 ], [ false, %bb2.i3 ], [ false, %bb2.i3 ], [ false, %lvalue_p.exit ] ; <i1> [#uses=1]
|
|
%tmp46 = icmp eq i8 %tmp4, 0 ; <i1> [#uses=1]
|
|
%or.cond = or i1 %tmp45, %tmp46 ; <i1> [#uses=1]
|
|
br i1 %or.cond, label %bb2, label %bb3
|
|
|
|
bb1: ; preds = %bb2.i.i, %bb.i, %bb
|
|
%.old = icmp eq i8 %tmp4, 23 ; <i1> [#uses=1]
|
|
br i1 %.old, label %bb2, label %bb3
|
|
|
|
bb2: ; preds = %bb1, %lvalue_p.exit4, %bb2.i3, %bb2.i.i2
|
|
br label %bb3
|
|
|
|
bb3: ; preds = %bb2, %bb1, %lvalue_p.exit4, %bb2.i, %entry
|
|
%expr_addr.0 = phi %union.tree_node* [ null, %bb2 ], [ %expr, %bb2.i ], [ %expr, %entry ], [ %expr, %bb1 ], [ %expr, %lvalue_p.exit4 ] ; <%union.tree_node*> [#uses=0]
|
|
unreachable
|
|
}
|
|
|
|
declare fastcc i32 @lvalue_p(%union.tree_node* nocapture) nounwind readonly
|
|
|
|
declare fastcc %union.tree_node* @default_conversion(%union.tree_node*) nounwind
|
|
|
|
|
|
; If one tail merging candidate falls through into the other,
|
|
; tail merging is likely profitable regardless of how few
|
|
; instructions are involved. This function should have only
|
|
; one ret instruction.
|
|
|
|
define dso_local void @foo(i1* %V) nounwind {
|
|
; CHECK-LABEL: foo:
|
|
; CHECK: # %bb.0: # %entry
|
|
; CHECK-NEXT: testq %rdi, %rdi
|
|
; CHECK-NEXT: je .LBB4_2
|
|
; CHECK-NEXT: # %bb.1: # %bb
|
|
; CHECK-NEXT: pushq %rax
|
|
; CHECK-NEXT: callq func
|
|
; CHECK-NEXT: popq %rax
|
|
; CHECK-NEXT: .LBB4_2: # %return
|
|
; CHECK-NEXT: retq
|
|
entry:
|
|
%t0 = icmp eq i1* %V, null
|
|
br i1 %t0, label %return, label %bb
|
|
|
|
bb:
|
|
call void @func()
|
|
ret void
|
|
|
|
return:
|
|
ret void
|
|
}
|
|
|
|
declare dso_local void @func()
|
|
|
|
; one - One instruction may be tail-duplicated even with optsize.
|
|
|
|
@XYZ = external dso_local global i32
|
|
|
|
declare dso_local void @tail_call_me()
|
|
|
|
define dso_local void @one(i32 %v) nounwind optsize {
|
|
; CHECK-LABEL: one:
|
|
; CHECK: # %bb.0: # %entry
|
|
; CHECK-NEXT: testl %edi, %edi
|
|
; CHECK-NEXT: je .LBB5_3
|
|
; CHECK-NEXT: # %bb.1: # %bby
|
|
; CHECK-NEXT: cmpl $16, %edi
|
|
; CHECK-NEXT: je .LBB5_4
|
|
; CHECK-NEXT: # %bb.2: # %bb7
|
|
; CHECK-NEXT: jmp tail_call_me # TAILCALL
|
|
; CHECK-NEXT: .LBB5_3: # %bbx
|
|
; CHECK-NEXT: cmpl $128, %edi
|
|
; CHECK-NEXT: jne tail_call_me # TAILCALL
|
|
; CHECK-NEXT: .LBB5_4: # %return
|
|
; CHECK-NEXT: retq
|
|
entry:
|
|
%0 = icmp eq i32 %v, 0
|
|
br i1 %0, label %bbx, label %bby
|
|
|
|
bby:
|
|
switch i32 %v, label %bb7 [
|
|
i32 16, label %return
|
|
]
|
|
|
|
bb7:
|
|
tail call void @tail_call_me()
|
|
ret void
|
|
|
|
bbx:
|
|
switch i32 %v, label %bb12 [
|
|
i32 128, label %return
|
|
]
|
|
|
|
bb12:
|
|
tail call void @tail_call_me()
|
|
ret void
|
|
|
|
return:
|
|
ret void
|
|
}
|
|
|
|
define dso_local void @one_pgso(i32 %v) nounwind !prof !14 {
|
|
; CHECK-LABEL: one_pgso:
|
|
; CHECK: # %bb.0: # %entry
|
|
; CHECK-NEXT: testl %edi, %edi
|
|
; CHECK-NEXT: je .LBB6_3
|
|
; CHECK-NEXT: # %bb.1: # %bby
|
|
; CHECK-NEXT: cmpl $16, %edi
|
|
; CHECK-NEXT: je .LBB6_4
|
|
; CHECK-NEXT: # %bb.2: # %bb7
|
|
; CHECK-NEXT: jmp tail_call_me # TAILCALL
|
|
; CHECK-NEXT: .LBB6_3: # %bbx
|
|
; CHECK-NEXT: cmpl $128, %edi
|
|
; CHECK-NEXT: jne tail_call_me # TAILCALL
|
|
; CHECK-NEXT: .LBB6_4: # %return
|
|
; CHECK-NEXT: retq
|
|
entry:
|
|
%0 = icmp eq i32 %v, 0
|
|
br i1 %0, label %bbx, label %bby
|
|
|
|
bby:
|
|
switch i32 %v, label %bb7 [
|
|
i32 16, label %return
|
|
]
|
|
|
|
bb7:
|
|
tail call void @tail_call_me()
|
|
ret void
|
|
|
|
bbx:
|
|
switch i32 %v, label %bb12 [
|
|
i32 128, label %return
|
|
]
|
|
|
|
bb12:
|
|
tail call void @tail_call_me()
|
|
ret void
|
|
|
|
return:
|
|
ret void
|
|
}
|
|
|
|
; two - Same as one, but with two instructions in the common
|
|
; tail instead of one. This is too much to be merged, given
|
|
; the optsize attribute.
|
|
|
|
define dso_local void @two() nounwind optsize {
|
|
; CHECK-LABEL: two:
|
|
; CHECK: # %bb.0: # %entry
|
|
; CHECK-NEXT: xorl %eax, %eax
|
|
; CHECK-NEXT: testb %al, %al
|
|
; CHECK-NEXT: xorl %eax, %eax
|
|
; CHECK-NEXT: testb %al, %al
|
|
; CHECK-NEXT: je .LBB7_1
|
|
; CHECK-NEXT: # %bb.2: # %return
|
|
; CHECK-NEXT: retq
|
|
; CHECK-NEXT: .LBB7_1: # %bb7
|
|
; CHECK-NEXT: movl $0, XYZ(%rip)
|
|
; CHECK-NEXT: movl $1, XYZ(%rip)
|
|
entry:
|
|
%0 = icmp eq i32 undef, 0
|
|
br i1 %0, label %bbx, label %bby
|
|
|
|
bby:
|
|
switch i32 undef, label %bb7 [
|
|
i32 16, label %return
|
|
]
|
|
|
|
bb7:
|
|
store volatile i32 0, i32* @XYZ
|
|
store volatile i32 1, i32* @XYZ
|
|
unreachable
|
|
|
|
bbx:
|
|
switch i32 undef, label %bb12 [
|
|
i32 128, label %return
|
|
]
|
|
|
|
bb12:
|
|
store volatile i32 0, i32* @XYZ
|
|
store volatile i32 1, i32* @XYZ
|
|
unreachable
|
|
|
|
return:
|
|
ret void
|
|
}
|
|
|
|
define dso_local void @two_pgso() nounwind !prof !14 {
|
|
; CHECK-LABEL: two_pgso:
|
|
; CHECK: # %bb.0: # %entry
|
|
; CHECK-NEXT: xorl %eax, %eax
|
|
; CHECK-NEXT: testb %al, %al
|
|
; CHECK-NEXT: xorl %eax, %eax
|
|
; CHECK-NEXT: testb %al, %al
|
|
; CHECK-NEXT: je .LBB8_1
|
|
; CHECK-NEXT: # %bb.2: # %return
|
|
; CHECK-NEXT: retq
|
|
; CHECK-NEXT: .LBB8_1: # %bb7
|
|
; CHECK-NEXT: movl $0, XYZ(%rip)
|
|
; CHECK-NEXT: movl $1, XYZ(%rip)
|
|
entry:
|
|
%0 = icmp eq i32 undef, 0
|
|
br i1 %0, label %bbx, label %bby
|
|
|
|
bby:
|
|
switch i32 undef, label %bb7 [
|
|
i32 16, label %return
|
|
]
|
|
|
|
bb7:
|
|
store volatile i32 0, i32* @XYZ
|
|
store volatile i32 1, i32* @XYZ
|
|
unreachable
|
|
|
|
bbx:
|
|
switch i32 undef, label %bb12 [
|
|
i32 128, label %return
|
|
]
|
|
|
|
bb12:
|
|
store volatile i32 0, i32* @XYZ
|
|
store volatile i32 1, i32* @XYZ
|
|
unreachable
|
|
|
|
return:
|
|
ret void
|
|
}
|
|
|
|
; two_minsize - Same as two, but with minsize instead of optsize.
|
|
|
|
define dso_local void @two_minsize() nounwind minsize {
|
|
; CHECK-LABEL: two_minsize:
|
|
; CHECK: # %bb.0: # %entry
|
|
; CHECK-NEXT: xorl %eax, %eax
|
|
; CHECK-NEXT: testb %al, %al
|
|
; CHECK-NEXT: xorl %eax, %eax
|
|
; CHECK-NEXT: testb %al, %al
|
|
; CHECK-NEXT: je .LBB9_1
|
|
; CHECK-NEXT: # %bb.2: # %return
|
|
; CHECK-NEXT: retq
|
|
; CHECK-NEXT: .LBB9_1: # %bb7
|
|
; CHECK-NEXT: movl $0, XYZ(%rip)
|
|
; CHECK-NEXT: movl $1, XYZ(%rip)
|
|
entry:
|
|
%0 = icmp eq i32 undef, 0
|
|
br i1 %0, label %bbx, label %bby
|
|
|
|
bby:
|
|
switch i32 undef, label %bb7 [
|
|
i32 16, label %return
|
|
]
|
|
|
|
bb7:
|
|
store volatile i32 0, i32* @XYZ
|
|
store volatile i32 1, i32* @XYZ
|
|
unreachable
|
|
|
|
bbx:
|
|
switch i32 undef, label %bb12 [
|
|
i32 128, label %return
|
|
]
|
|
|
|
bb12:
|
|
store volatile i32 0, i32* @XYZ
|
|
store volatile i32 1, i32* @XYZ
|
|
unreachable
|
|
|
|
return:
|
|
ret void
|
|
}
|
|
|
|
; two_nosize - Same as two, but without the optsize attribute.
|
|
; Now two instructions are enough to be tail-duplicated.
|
|
|
|
define dso_local void @two_nosize(i32 %x, i32 %y, i32 %z) nounwind {
|
|
; CHECK-LABEL: two_nosize:
|
|
; CHECK: # %bb.0: # %entry
|
|
; CHECK-NEXT: testl %edi, %edi
|
|
; CHECK-NEXT: je .LBB10_3
|
|
; CHECK-NEXT: # %bb.1: # %bby
|
|
; CHECK-NEXT: testl %esi, %esi
|
|
; CHECK-NEXT: je .LBB10_4
|
|
; CHECK-NEXT: # %bb.2: # %bb7
|
|
; CHECK-NEXT: movl $0, XYZ(%rip)
|
|
; CHECK-NEXT: jmp tail_call_me # TAILCALL
|
|
; CHECK-NEXT: .LBB10_3: # %bbx
|
|
; CHECK-NEXT: cmpl $-1, %edx
|
|
; CHECK-NEXT: je .LBB10_4
|
|
; CHECK-NEXT: # %bb.5: # %bb12
|
|
; CHECK-NEXT: movl $0, XYZ(%rip)
|
|
; CHECK-NEXT: jmp tail_call_me # TAILCALL
|
|
; CHECK-NEXT: .LBB10_4: # %return
|
|
; CHECK-NEXT: retq
|
|
entry:
|
|
%0 = icmp eq i32 %x, 0
|
|
br i1 %0, label %bbx, label %bby
|
|
|
|
bby:
|
|
switch i32 %y, label %bb7 [
|
|
i32 0, label %return
|
|
]
|
|
|
|
bb7:
|
|
store volatile i32 0, i32* @XYZ
|
|
tail call void @tail_call_me()
|
|
ret void
|
|
|
|
bbx:
|
|
switch i32 %z, label %bb12 [
|
|
i32 -1, label %return
|
|
]
|
|
|
|
bb12:
|
|
store volatile i32 0, i32* @XYZ
|
|
tail call void @tail_call_me()
|
|
ret void
|
|
|
|
return:
|
|
ret void
|
|
}
|
|
|
|
; Tail-merging should merge the two ret instructions since one side
|
|
; can fall-through into the ret and the other side has to branch anyway.
|
|
|
|
define i64 @TESTE(i64 %parami, i64 %paraml) nounwind readnone {
|
|
; CHECK-LABEL: TESTE:
|
|
; CHECK: # %bb.0: # %entry
|
|
; CHECK-NEXT: testq %rdi, %rdi
|
|
; CHECK-NEXT: movl $1, %eax
|
|
; CHECK-NEXT: cmovgq %rdi, %rax
|
|
; CHECK-NEXT: testq %rsi, %rsi
|
|
; CHECK-NEXT: jle .LBB11_2
|
|
; CHECK-NEXT: # %bb.1: # %bb.nph
|
|
; CHECK-NEXT: imulq %rdi, %rsi
|
|
; CHECK-NEXT: movq %rsi, %rax
|
|
; CHECK-NEXT: .LBB11_2: # %for.end
|
|
; CHECK-NEXT: retq
|
|
entry:
|
|
%cmp = icmp slt i64 %parami, 1 ; <i1> [#uses=1]
|
|
%varx.0 = select i1 %cmp, i64 1, i64 %parami ; <i64> [#uses=1]
|
|
%cmp410 = icmp slt i64 %paraml, 1 ; <i1> [#uses=1]
|
|
br i1 %cmp410, label %for.end, label %bb.nph
|
|
|
|
bb.nph: ; preds = %entry
|
|
%tmp15 = mul i64 %paraml, %parami ; <i64> [#uses=1]
|
|
ret i64 %tmp15
|
|
|
|
for.end: ; preds = %entry
|
|
ret i64 %varx.0
|
|
}
|
|
|
|
; We should tail merge small blocks that don't end in a tail call or return
|
|
; instruction. Those blocks are typically unreachable and will be placed
|
|
; out-of-line after the main return, so we should try to eliminate as many of
|
|
; them as possible.
|
|
|
|
declare dso_local void @abort()
|
|
define dso_local void @merge_aborts() {
|
|
; CHECK-LABEL: merge_aborts:
|
|
; CHECK: # %bb.0: # %entry
|
|
; CHECK-NEXT: pushq %rax
|
|
; CHECK-NEXT: .cfi_def_cfa_offset 16
|
|
; CHECK-NEXT: callq qux@PLT
|
|
; CHECK-NEXT: testb $1, %al
|
|
; CHECK-NEXT: je .LBB12_5
|
|
; CHECK-NEXT: # %bb.1: # %cont1
|
|
; CHECK-NEXT: callq qux@PLT
|
|
; CHECK-NEXT: testb $1, %al
|
|
; CHECK-NEXT: je .LBB12_5
|
|
; CHECK-NEXT: # %bb.2: # %cont2
|
|
; CHECK-NEXT: callq qux@PLT
|
|
; CHECK-NEXT: testb $1, %al
|
|
; CHECK-NEXT: je .LBB12_5
|
|
; CHECK-NEXT: # %bb.3: # %cont3
|
|
; CHECK-NEXT: callq qux@PLT
|
|
; CHECK-NEXT: testb $1, %al
|
|
; CHECK-NEXT: je .LBB12_5
|
|
; CHECK-NEXT: # %bb.4: # %cont4
|
|
; CHECK-NEXT: popq %rax
|
|
; CHECK-NEXT: .cfi_def_cfa_offset 8
|
|
; CHECK-NEXT: retq
|
|
; CHECK-NEXT: .LBB12_5: # %abort1
|
|
; CHECK-NEXT: .cfi_def_cfa_offset 16
|
|
; CHECK-NEXT: callq abort
|
|
entry:
|
|
%c1 = call i1 @qux()
|
|
br i1 %c1, label %cont1, label %abort1
|
|
abort1:
|
|
call void @abort()
|
|
unreachable
|
|
cont1:
|
|
%c2 = call i1 @qux()
|
|
br i1 %c2, label %cont2, label %abort2
|
|
abort2:
|
|
call void @abort()
|
|
unreachable
|
|
cont2:
|
|
%c3 = call i1 @qux()
|
|
br i1 %c3, label %cont3, label %abort3
|
|
abort3:
|
|
call void @abort()
|
|
unreachable
|
|
cont3:
|
|
%c4 = call i1 @qux()
|
|
br i1 %c4, label %cont4, label %abort4
|
|
abort4:
|
|
call void @abort()
|
|
unreachable
|
|
cont4:
|
|
ret void
|
|
}
|
|
|
|
; Use alternating abort functions so that the blocks we wish to merge are not
|
|
; layout successors during branch folding.
|
|
|
|
declare dso_local void @alt_abort()
|
|
|
|
define dso_local void @merge_alternating_aborts() {
|
|
; CHECK-LABEL: merge_alternating_aborts:
|
|
; CHECK: # %bb.0: # %entry
|
|
; CHECK-NEXT: pushq %rax
|
|
; CHECK-NEXT: .cfi_def_cfa_offset 16
|
|
; CHECK-NEXT: callq qux@PLT
|
|
; CHECK-NEXT: testb $1, %al
|
|
; CHECK-NEXT: je .LBB13_5
|
|
; CHECK-NEXT: # %bb.1: # %cont1
|
|
; CHECK-NEXT: callq qux@PLT
|
|
; CHECK-NEXT: testb $1, %al
|
|
; CHECK-NEXT: je .LBB13_6
|
|
; CHECK-NEXT: # %bb.2: # %cont2
|
|
; CHECK-NEXT: callq qux@PLT
|
|
; CHECK-NEXT: testb $1, %al
|
|
; CHECK-NEXT: je .LBB13_5
|
|
; CHECK-NEXT: # %bb.3: # %cont3
|
|
; CHECK-NEXT: callq qux@PLT
|
|
; CHECK-NEXT: testb $1, %al
|
|
; CHECK-NEXT: je .LBB13_6
|
|
; CHECK-NEXT: # %bb.4: # %cont4
|
|
; CHECK-NEXT: popq %rax
|
|
; CHECK-NEXT: .cfi_def_cfa_offset 8
|
|
; CHECK-NEXT: retq
|
|
; CHECK-NEXT: .LBB13_5: # %abort1
|
|
; CHECK-NEXT: .cfi_def_cfa_offset 16
|
|
; CHECK-NEXT: callq abort
|
|
; CHECK-NEXT: .LBB13_6: # %abort2
|
|
; CHECK-NEXT: callq alt_abort
|
|
entry:
|
|
%c1 = call i1 @qux()
|
|
br i1 %c1, label %cont1, label %abort1
|
|
abort1:
|
|
call void @abort()
|
|
unreachable
|
|
cont1:
|
|
%c2 = call i1 @qux()
|
|
br i1 %c2, label %cont2, label %abort2
|
|
abort2:
|
|
call void @alt_abort()
|
|
unreachable
|
|
cont2:
|
|
%c3 = call i1 @qux()
|
|
br i1 %c3, label %cont3, label %abort3
|
|
abort3:
|
|
call void @abort()
|
|
unreachable
|
|
cont3:
|
|
%c4 = call i1 @qux()
|
|
br i1 %c4, label %cont4, label %abort4
|
|
abort4:
|
|
call void @alt_abort()
|
|
unreachable
|
|
cont4:
|
|
ret void
|
|
}
|
|
|
|
; This triggers a situation where a new block (bb4 is split) is created and then
|
|
; would be passed to the PGSO interface llvm::shouldOptimizeForSize().
|
|
@GV = dso_local global i32 0
|
|
define dso_local void @bfi_new_block_pgso(i32 %c) nounwind {
|
|
; CHECK-LABEL: bfi_new_block_pgso:
|
|
; CHECK: # %bb.0: # %entry
|
|
; CHECK-NEXT: testl %edi, %edi
|
|
; CHECK-NEXT: je .LBB14_4
|
|
; CHECK-NEXT: # %bb.1: # %bb1
|
|
; CHECK-NEXT: pushq %rax
|
|
; CHECK-NEXT: cmpl $16, %edi
|
|
; CHECK-NEXT: je .LBB14_6
|
|
; CHECK-NEXT: # %bb.2: # %bb1
|
|
; CHECK-NEXT: cmpl $17, %edi
|
|
; CHECK-NEXT: je .LBB14_7
|
|
; CHECK-NEXT: # %bb.3: # %bb4
|
|
; CHECK-NEXT: popq %rax
|
|
; CHECK-NEXT: jmp tail_call_me # TAILCALL
|
|
; CHECK-NEXT: .LBB14_4: # %bb5
|
|
; CHECK-NEXT: cmpl $128, %edi
|
|
; CHECK-NEXT: jne .LBB14_8
|
|
; CHECK-NEXT: # %bb.5: # %return
|
|
; CHECK-NEXT: retq
|
|
; CHECK-NEXT: .LBB14_6: # %bb3
|
|
; CHECK-NEXT: movl $0, GV(%rip)
|
|
; CHECK-NEXT: .LBB14_7: # %bb4
|
|
; CHECK-NEXT: callq func
|
|
; CHECK-NEXT: popq %rax
|
|
; CHECK-NEXT: .LBB14_8: # %bb6
|
|
; CHECK-NEXT: jmp tail_call_me # TAILCALL
|
|
entry:
|
|
%0 = icmp eq i32 %c, 0
|
|
br i1 %0, label %bb5, label %bb1
|
|
|
|
bb1:
|
|
switch i32 %c, label %bb4 [
|
|
i32 16, label %bb3
|
|
i32 17, label %bb2
|
|
]
|
|
|
|
bb2:
|
|
call void @func()
|
|
br label %bb4
|
|
|
|
bb3:
|
|
store i32 0, i32* @GV
|
|
call void @func()
|
|
br label %bb4
|
|
|
|
bb4:
|
|
tail call void @tail_call_me()
|
|
br label %return
|
|
|
|
bb5:
|
|
switch i32 %c, label %bb6 [
|
|
i32 128, label %return
|
|
]
|
|
|
|
bb6:
|
|
tail call void @tail_call_me()
|
|
br label %return
|
|
|
|
return:
|
|
ret void
|
|
}
|
|
|
|
!llvm.module.flags = !{!0}
|
|
!0 = !{i32 1, !"ProfileSummary", !1}
|
|
!1 = !{!2, !3, !4, !5, !6, !7, !8, !9}
|
|
!2 = !{!"ProfileFormat", !"InstrProf"}
|
|
!3 = !{!"TotalCount", i64 10000}
|
|
!4 = !{!"MaxCount", i64 10}
|
|
!5 = !{!"MaxInternalCount", i64 1}
|
|
!6 = !{!"MaxFunctionCount", i64 1000}
|
|
!7 = !{!"NumCounts", i64 3}
|
|
!8 = !{!"NumFunctions", i64 3}
|
|
!9 = !{!"DetailedSummary", !10}
|
|
!10 = !{!11, !12, !13}
|
|
!11 = !{i32 10000, i64 100, i32 1}
|
|
!12 = !{i32 999000, i64 100, i32 1}
|
|
!13 = !{i32 999999, i64 1, i32 2}
|
|
!14 = !{!"function_entry_count", i64 0}
|