LOGIC (LOGIC (SH X0, Y), Z), (SH X1, Y) --> LOGIC (SH (LOGIC X0, X1), Y), Z https://alive2.llvm.org/ce/z/QmR9rR This is a reassociation + factoring fold. The common shift operation is moved after a bitwise logic op on 2 input operands. We get simpler cases of these patterns in IR, but I suspect we would miss all of these exact tests in IR too. We also handle the simpler form of this plus several other folds in DAGCombiner::hoistLogicOpWithSameOpcodeHands(). This is a partial implementation of a transform suggested in D111530 (only handles 'or' bitwise logic as a first step - need to stamp out more tests for other opcodes). Several of the same tests added for D111530 are altered here (but not fully optimized). I'm not sure yet if this would help/hinder that patch, but this should be an improvement for all tests added with ecf606cb4329ae since it removes a shift operation in those examples. Differential Revision: https://reviews.llvm.org/D120516
309 lines
9.0 KiB
LLVM
309 lines
9.0 KiB
LLVM
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
|
|
; RUN: llc -mtriple=i686-- < %s | FileCheck %s --check-prefix=X86
|
|
; RUN: llc -mtriple=x86_64-- < %s | FileCheck %s --check-prefix=X64
|
|
|
|
; Optimize expanded SRL/SHL used as an input of
|
|
; SETCC comparing it with zero by removing rotation.
|
|
;
|
|
; See https://bugs.llvm.org/show_bug.cgi?id=50197
|
|
define i128 @opt_setcc_lt_power_of_2(i128 %a) nounwind {
|
|
; X86-LABEL: opt_setcc_lt_power_of_2:
|
|
; X86: # %bb.0:
|
|
; X86-NEXT: pushl %ebp
|
|
; X86-NEXT: pushl %ebx
|
|
; X86-NEXT: pushl %edi
|
|
; X86-NEXT: pushl %esi
|
|
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
|
|
; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
|
|
; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
|
|
; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
|
|
; X86-NEXT: movl {{[0-9]+}}(%esp), %edi
|
|
; X86-NEXT: .p2align 4, 0x90
|
|
; X86-NEXT: .LBB0_1: # %loop
|
|
; X86-NEXT: # =>This Inner Loop Header: Depth=1
|
|
; X86-NEXT: addl $1, %edi
|
|
; X86-NEXT: adcl $0, %esi
|
|
; X86-NEXT: adcl $0, %edx
|
|
; X86-NEXT: adcl $0, %ecx
|
|
; X86-NEXT: movl %ecx, %ebx
|
|
; X86-NEXT: shldl $4, %edx, %ebx
|
|
; X86-NEXT: movl %esi, %ebp
|
|
; X86-NEXT: orl %ecx, %ebp
|
|
; X86-NEXT: shrdl $28, %edx, %ebp
|
|
; X86-NEXT: orl %ebx, %ebp
|
|
; X86-NEXT: jne .LBB0_1
|
|
; X86-NEXT: # %bb.2: # %exit
|
|
; X86-NEXT: movl %edi, (%eax)
|
|
; X86-NEXT: movl %esi, 4(%eax)
|
|
; X86-NEXT: movl %edx, 8(%eax)
|
|
; X86-NEXT: movl %ecx, 12(%eax)
|
|
; X86-NEXT: popl %esi
|
|
; X86-NEXT: popl %edi
|
|
; X86-NEXT: popl %ebx
|
|
; X86-NEXT: popl %ebp
|
|
; X86-NEXT: retl $4
|
|
;
|
|
; X64-LABEL: opt_setcc_lt_power_of_2:
|
|
; X64: # %bb.0:
|
|
; X64-NEXT: movq %rsi, %rdx
|
|
; X64-NEXT: movq %rdi, %rax
|
|
; X64-NEXT: .p2align 4, 0x90
|
|
; X64-NEXT: .LBB0_1: # %loop
|
|
; X64-NEXT: # =>This Inner Loop Header: Depth=1
|
|
; X64-NEXT: addq $1, %rax
|
|
; X64-NEXT: adcq $0, %rdx
|
|
; X64-NEXT: movq %rax, %rcx
|
|
; X64-NEXT: orq %rdx, %rcx
|
|
; X64-NEXT: shrdq $60, %rdx, %rcx
|
|
; X64-NEXT: jne .LBB0_1
|
|
; X64-NEXT: # %bb.2: # %exit
|
|
; X64-NEXT: retq
|
|
br label %loop
|
|
|
|
loop:
|
|
%phi.a = phi i128 [ %a, %0 ], [ %inc, %loop ]
|
|
%inc = add i128 %phi.a, 1
|
|
%cmp = icmp ult i128 %inc, 1152921504606846976
|
|
br i1 %cmp, label %exit, label %loop
|
|
|
|
exit:
|
|
ret i128 %inc
|
|
}
|
|
|
|
define i1 @opt_setcc_srl_eq_zero(i128 %a) nounwind {
|
|
; X86-LABEL: opt_setcc_srl_eq_zero:
|
|
; X86: # %bb.0:
|
|
; X86-NEXT: pushl %esi
|
|
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
|
|
; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
|
|
; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
|
|
; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
|
|
; X86-NEXT: shrdl $17, %ecx, %eax
|
|
; X86-NEXT: orl %esi, %ecx
|
|
; X86-NEXT: shldl $15, %edx, %esi
|
|
; X86-NEXT: orl %esi, %eax
|
|
; X86-NEXT: shrdl $17, %edx, %ecx
|
|
; X86-NEXT: orl %eax, %ecx
|
|
; X86-NEXT: sete %al
|
|
; X86-NEXT: popl %esi
|
|
; X86-NEXT: retl
|
|
;
|
|
; X64-LABEL: opt_setcc_srl_eq_zero:
|
|
; X64: # %bb.0:
|
|
; X64-NEXT: orq %rsi, %rdi
|
|
; X64-NEXT: shrdq $17, %rsi, %rdi
|
|
; X64-NEXT: sete %al
|
|
; X64-NEXT: retq
|
|
%srl = lshr i128 %a, 17
|
|
%cmp = icmp eq i128 %srl, 0
|
|
ret i1 %cmp
|
|
}
|
|
|
|
define i1 @opt_setcc_srl_ne_zero(i128 %a) nounwind {
|
|
; X86-LABEL: opt_setcc_srl_ne_zero:
|
|
; X86: # %bb.0:
|
|
; X86-NEXT: pushl %esi
|
|
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
|
|
; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
|
|
; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
|
|
; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
|
|
; X86-NEXT: shrdl $17, %ecx, %eax
|
|
; X86-NEXT: orl %esi, %ecx
|
|
; X86-NEXT: shldl $15, %edx, %esi
|
|
; X86-NEXT: orl %esi, %eax
|
|
; X86-NEXT: shrdl $17, %edx, %ecx
|
|
; X86-NEXT: orl %eax, %ecx
|
|
; X86-NEXT: setne %al
|
|
; X86-NEXT: popl %esi
|
|
; X86-NEXT: retl
|
|
;
|
|
; X64-LABEL: opt_setcc_srl_ne_zero:
|
|
; X64: # %bb.0:
|
|
; X64-NEXT: orq %rsi, %rdi
|
|
; X64-NEXT: shrdq $17, %rsi, %rdi
|
|
; X64-NEXT: setne %al
|
|
; X64-NEXT: retq
|
|
%srl = lshr i128 %a, 17
|
|
%cmp = icmp ne i128 %srl, 0
|
|
ret i1 %cmp
|
|
}
|
|
|
|
define i1 @opt_setcc_shl_eq_zero(i128 %a) nounwind {
|
|
; X86-LABEL: opt_setcc_shl_eq_zero:
|
|
; X86: # %bb.0:
|
|
; X86-NEXT: pushl %esi
|
|
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
|
|
; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
|
|
; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
|
|
; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
|
|
; X86-NEXT: shldl $17, %edx, %esi
|
|
; X86-NEXT: orl %eax, %edx
|
|
; X86-NEXT: shldl $17, %ecx, %edx
|
|
; X86-NEXT: shldl $17, %eax, %ecx
|
|
; X86-NEXT: orl %esi, %ecx
|
|
; X86-NEXT: orl %ecx, %edx
|
|
; X86-NEXT: sete %al
|
|
; X86-NEXT: popl %esi
|
|
; X86-NEXT: retl
|
|
;
|
|
; X64-LABEL: opt_setcc_shl_eq_zero:
|
|
; X64: # %bb.0:
|
|
; X64-NEXT: orq %rdi, %rsi
|
|
; X64-NEXT: shldq $17, %rdi, %rsi
|
|
; X64-NEXT: sete %al
|
|
; X64-NEXT: retq
|
|
%shl = shl i128 %a, 17
|
|
%cmp = icmp eq i128 %shl, 0
|
|
ret i1 %cmp
|
|
}
|
|
|
|
define i1 @opt_setcc_shl_ne_zero(i128 %a) nounwind {
|
|
; X86-LABEL: opt_setcc_shl_ne_zero:
|
|
; X86: # %bb.0:
|
|
; X86-NEXT: pushl %esi
|
|
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
|
|
; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
|
|
; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
|
|
; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
|
|
; X86-NEXT: shldl $17, %edx, %esi
|
|
; X86-NEXT: orl %eax, %edx
|
|
; X86-NEXT: shldl $17, %ecx, %edx
|
|
; X86-NEXT: shldl $17, %eax, %ecx
|
|
; X86-NEXT: orl %esi, %ecx
|
|
; X86-NEXT: orl %ecx, %edx
|
|
; X86-NEXT: setne %al
|
|
; X86-NEXT: popl %esi
|
|
; X86-NEXT: retl
|
|
;
|
|
; X64-LABEL: opt_setcc_shl_ne_zero:
|
|
; X64: # %bb.0:
|
|
; X64-NEXT: orq %rdi, %rsi
|
|
; X64-NEXT: shldq $17, %rdi, %rsi
|
|
; X64-NEXT: setne %al
|
|
; X64-NEXT: retq
|
|
%shl = shl i128 %a, 17
|
|
%cmp = icmp ne i128 %shl, 0
|
|
ret i1 %cmp
|
|
}
|
|
|
|
; Negative test: optimization should not be applied if shift has multiple users.
|
|
define i1 @opt_setcc_shl_eq_zero_multiple_shl_users(i128 %a) nounwind {
|
|
; X86-LABEL: opt_setcc_shl_eq_zero_multiple_shl_users:
|
|
; X86: # %bb.0:
|
|
; X86-NEXT: pushl %ebx
|
|
; X86-NEXT: pushl %edi
|
|
; X86-NEXT: pushl %esi
|
|
; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
|
|
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
|
|
; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
|
|
; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
|
|
; X86-NEXT: shldl $17, %esi, %edx
|
|
; X86-NEXT: shldl $17, %ecx, %esi
|
|
; X86-NEXT: shldl $17, %eax, %ecx
|
|
; X86-NEXT: shll $17, %eax
|
|
; X86-NEXT: movl %ecx, %edi
|
|
; X86-NEXT: orl %edx, %edi
|
|
; X86-NEXT: movl %eax, %ebx
|
|
; X86-NEXT: orl %esi, %ebx
|
|
; X86-NEXT: orl %edi, %ebx
|
|
; X86-NEXT: sete %bl
|
|
; X86-NEXT: pushl %edx
|
|
; X86-NEXT: pushl %esi
|
|
; X86-NEXT: pushl %ecx
|
|
; X86-NEXT: pushl %eax
|
|
; X86-NEXT: calll use@PLT
|
|
; X86-NEXT: addl $16, %esp
|
|
; X86-NEXT: movl %ebx, %eax
|
|
; X86-NEXT: popl %esi
|
|
; X86-NEXT: popl %edi
|
|
; X86-NEXT: popl %ebx
|
|
; X86-NEXT: retl
|
|
;
|
|
; X64-LABEL: opt_setcc_shl_eq_zero_multiple_shl_users:
|
|
; X64: # %bb.0:
|
|
; X64-NEXT: pushq %rbx
|
|
; X64-NEXT: shldq $17, %rdi, %rsi
|
|
; X64-NEXT: shlq $17, %rdi
|
|
; X64-NEXT: movq %rdi, %rax
|
|
; X64-NEXT: orq %rsi, %rax
|
|
; X64-NEXT: sete %bl
|
|
; X64-NEXT: callq use@PLT
|
|
; X64-NEXT: movl %ebx, %eax
|
|
; X64-NEXT: popq %rbx
|
|
; X64-NEXT: retq
|
|
%shl = shl i128 %a, 17
|
|
%cmp = icmp eq i128 %shl, 0
|
|
call void @use(i128 %shl)
|
|
ret i1 %cmp
|
|
}
|
|
|
|
; Check that optimization is applied to DAG having appropriate shape
|
|
; even if there were no actual shift's expansion.
|
|
define i1 @opt_setcc_expanded_shl_correct_shifts(i64 %a, i64 %b) nounwind {
|
|
; X86-LABEL: opt_setcc_expanded_shl_correct_shifts:
|
|
; X86: # %bb.0:
|
|
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
|
|
; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
|
|
; X86-NEXT: orl {{[0-9]+}}(%esp), %ecx
|
|
; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
|
|
; X86-NEXT: orl %eax, %edx
|
|
; X86-NEXT: shldl $17, %ecx, %edx
|
|
; X86-NEXT: shldl $17, %eax, %ecx
|
|
; X86-NEXT: orl %edx, %ecx
|
|
; X86-NEXT: sete %al
|
|
; X86-NEXT: retl
|
|
;
|
|
; X64-LABEL: opt_setcc_expanded_shl_correct_shifts:
|
|
; X64: # %bb.0:
|
|
; X64-NEXT: orq %rsi, %rdi
|
|
; X64-NEXT: shldq $17, %rsi, %rdi
|
|
; X64-NEXT: sete %al
|
|
; X64-NEXT: retq
|
|
%shl.a = shl i64 %a, 17
|
|
%srl.b = lshr i64 %b, 47
|
|
%or.0 = or i64 %shl.a, %srl.b
|
|
%shl.b = shl i64 %b, 17
|
|
%or.1 = or i64 %or.0, %shl.b
|
|
%cmp = icmp eq i64 %or.1, 0
|
|
ret i1 %cmp
|
|
}
|
|
|
|
; Negative test: optimization should not be applied as
|
|
; constants used in shifts do not match.
|
|
define i1 @opt_setcc_expanded_shl_wrong_shifts(i64 %a, i64 %b) nounwind {
|
|
; X86-LABEL: opt_setcc_expanded_shl_wrong_shifts:
|
|
; X86: # %bb.0:
|
|
; X86-NEXT: pushl %esi
|
|
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
|
|
; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
|
|
; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
|
|
; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
|
|
; X86-NEXT: shldl $17, %edx, %esi
|
|
; X86-NEXT: shldl $17, %ecx, %edx
|
|
; X86-NEXT: shldl $18, %eax, %ecx
|
|
; X86-NEXT: shll $18, %eax
|
|
; X86-NEXT: orl %edx, %eax
|
|
; X86-NEXT: orl %esi, %ecx
|
|
; X86-NEXT: orl %eax, %ecx
|
|
; X86-NEXT: sete %al
|
|
; X86-NEXT: popl %esi
|
|
; X86-NEXT: retl
|
|
;
|
|
; X64-LABEL: opt_setcc_expanded_shl_wrong_shifts:
|
|
; X64: # %bb.0:
|
|
; X64-NEXT: shldq $17, %rsi, %rdi
|
|
; X64-NEXT: shlq $18, %rsi
|
|
; X64-NEXT: orq %rdi, %rsi
|
|
; X64-NEXT: sete %al
|
|
; X64-NEXT: retq
|
|
%shl.a = shl i64 %a, 17
|
|
%srl.b = lshr i64 %b, 47
|
|
%or.0 = or i64 %shl.a, %srl.b
|
|
%shl.b = shl i64 %b, 18
|
|
%or.1 = or i64 %or.0, %shl.b
|
|
%cmp = icmp eq i64 %or.1, 0
|
|
ret i1 %cmp
|
|
}
|
|
|
|
declare void @use(i128 %a)
|