llvm-project/llvm/test/CodeGen/X86/icmp-shift-opt.ll
Sanjay Patel acb96ffd14 [SDAG] fold bitwise logic with shifted operands
LOGIC (LOGIC (SH X0, Y), Z), (SH X1, Y) --> LOGIC (SH (LOGIC X0, X1), Y), Z

https://alive2.llvm.org/ce/z/QmR9rR

This is a reassociation + factoring fold. The common shift operation is moved
after a bitwise logic op on 2 input operands.
We get simpler cases of these patterns in IR, but I suspect we would miss all
of these exact tests in IR too. We also handle the simpler form of this plus
several other folds in DAGCombiner::hoistLogicOpWithSameOpcodeHands().

This is a partial implementation of a transform suggested in D111530
(only handles 'or' bitwise logic as a first step - need to stamp out more
tests for other opcodes).
Several of the same tests added for D111530 are altered here (but not
fully optimized). I'm not sure yet if this would help/hinder that patch,
but this should be an improvement for all tests added with ecf606cb4329ae
since it removes a shift operation in those examples.

Differential Revision: https://reviews.llvm.org/D120516
2022-02-27 09:54:12 -05:00

309 lines
9.0 KiB
LLVM

; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc -mtriple=i686-- < %s | FileCheck %s --check-prefix=X86
; RUN: llc -mtriple=x86_64-- < %s | FileCheck %s --check-prefix=X64
; Optimize expanded SRL/SHL used as an input of
; SETCC comparing it with zero by removing rotation.
;
; See https://bugs.llvm.org/show_bug.cgi?id=50197
define i128 @opt_setcc_lt_power_of_2(i128 %a) nounwind {
; X86-LABEL: opt_setcc_lt_power_of_2:
; X86: # %bb.0:
; X86-NEXT: pushl %ebp
; X86-NEXT: pushl %ebx
; X86-NEXT: pushl %edi
; X86-NEXT: pushl %esi
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
; X86-NEXT: movl {{[0-9]+}}(%esp), %edi
; X86-NEXT: .p2align 4, 0x90
; X86-NEXT: .LBB0_1: # %loop
; X86-NEXT: # =>This Inner Loop Header: Depth=1
; X86-NEXT: addl $1, %edi
; X86-NEXT: adcl $0, %esi
; X86-NEXT: adcl $0, %edx
; X86-NEXT: adcl $0, %ecx
; X86-NEXT: movl %ecx, %ebx
; X86-NEXT: shldl $4, %edx, %ebx
; X86-NEXT: movl %esi, %ebp
; X86-NEXT: orl %ecx, %ebp
; X86-NEXT: shrdl $28, %edx, %ebp
; X86-NEXT: orl %ebx, %ebp
; X86-NEXT: jne .LBB0_1
; X86-NEXT: # %bb.2: # %exit
; X86-NEXT: movl %edi, (%eax)
; X86-NEXT: movl %esi, 4(%eax)
; X86-NEXT: movl %edx, 8(%eax)
; X86-NEXT: movl %ecx, 12(%eax)
; X86-NEXT: popl %esi
; X86-NEXT: popl %edi
; X86-NEXT: popl %ebx
; X86-NEXT: popl %ebp
; X86-NEXT: retl $4
;
; X64-LABEL: opt_setcc_lt_power_of_2:
; X64: # %bb.0:
; X64-NEXT: movq %rsi, %rdx
; X64-NEXT: movq %rdi, %rax
; X64-NEXT: .p2align 4, 0x90
; X64-NEXT: .LBB0_1: # %loop
; X64-NEXT: # =>This Inner Loop Header: Depth=1
; X64-NEXT: addq $1, %rax
; X64-NEXT: adcq $0, %rdx
; X64-NEXT: movq %rax, %rcx
; X64-NEXT: orq %rdx, %rcx
; X64-NEXT: shrdq $60, %rdx, %rcx
; X64-NEXT: jne .LBB0_1
; X64-NEXT: # %bb.2: # %exit
; X64-NEXT: retq
br label %loop
loop:
%phi.a = phi i128 [ %a, %0 ], [ %inc, %loop ]
%inc = add i128 %phi.a, 1
%cmp = icmp ult i128 %inc, 1152921504606846976
br i1 %cmp, label %exit, label %loop
exit:
ret i128 %inc
}
define i1 @opt_setcc_srl_eq_zero(i128 %a) nounwind {
; X86-LABEL: opt_setcc_srl_eq_zero:
; X86: # %bb.0:
; X86-NEXT: pushl %esi
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
; X86-NEXT: shrdl $17, %ecx, %eax
; X86-NEXT: orl %esi, %ecx
; X86-NEXT: shldl $15, %edx, %esi
; X86-NEXT: orl %esi, %eax
; X86-NEXT: shrdl $17, %edx, %ecx
; X86-NEXT: orl %eax, %ecx
; X86-NEXT: sete %al
; X86-NEXT: popl %esi
; X86-NEXT: retl
;
; X64-LABEL: opt_setcc_srl_eq_zero:
; X64: # %bb.0:
; X64-NEXT: orq %rsi, %rdi
; X64-NEXT: shrdq $17, %rsi, %rdi
; X64-NEXT: sete %al
; X64-NEXT: retq
%srl = lshr i128 %a, 17
%cmp = icmp eq i128 %srl, 0
ret i1 %cmp
}
define i1 @opt_setcc_srl_ne_zero(i128 %a) nounwind {
; X86-LABEL: opt_setcc_srl_ne_zero:
; X86: # %bb.0:
; X86-NEXT: pushl %esi
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
; X86-NEXT: shrdl $17, %ecx, %eax
; X86-NEXT: orl %esi, %ecx
; X86-NEXT: shldl $15, %edx, %esi
; X86-NEXT: orl %esi, %eax
; X86-NEXT: shrdl $17, %edx, %ecx
; X86-NEXT: orl %eax, %ecx
; X86-NEXT: setne %al
; X86-NEXT: popl %esi
; X86-NEXT: retl
;
; X64-LABEL: opt_setcc_srl_ne_zero:
; X64: # %bb.0:
; X64-NEXT: orq %rsi, %rdi
; X64-NEXT: shrdq $17, %rsi, %rdi
; X64-NEXT: setne %al
; X64-NEXT: retq
%srl = lshr i128 %a, 17
%cmp = icmp ne i128 %srl, 0
ret i1 %cmp
}
define i1 @opt_setcc_shl_eq_zero(i128 %a) nounwind {
; X86-LABEL: opt_setcc_shl_eq_zero:
; X86: # %bb.0:
; X86-NEXT: pushl %esi
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
; X86-NEXT: shldl $17, %edx, %esi
; X86-NEXT: orl %eax, %edx
; X86-NEXT: shldl $17, %ecx, %edx
; X86-NEXT: shldl $17, %eax, %ecx
; X86-NEXT: orl %esi, %ecx
; X86-NEXT: orl %ecx, %edx
; X86-NEXT: sete %al
; X86-NEXT: popl %esi
; X86-NEXT: retl
;
; X64-LABEL: opt_setcc_shl_eq_zero:
; X64: # %bb.0:
; X64-NEXT: orq %rdi, %rsi
; X64-NEXT: shldq $17, %rdi, %rsi
; X64-NEXT: sete %al
; X64-NEXT: retq
%shl = shl i128 %a, 17
%cmp = icmp eq i128 %shl, 0
ret i1 %cmp
}
define i1 @opt_setcc_shl_ne_zero(i128 %a) nounwind {
; X86-LABEL: opt_setcc_shl_ne_zero:
; X86: # %bb.0:
; X86-NEXT: pushl %esi
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
; X86-NEXT: shldl $17, %edx, %esi
; X86-NEXT: orl %eax, %edx
; X86-NEXT: shldl $17, %ecx, %edx
; X86-NEXT: shldl $17, %eax, %ecx
; X86-NEXT: orl %esi, %ecx
; X86-NEXT: orl %ecx, %edx
; X86-NEXT: setne %al
; X86-NEXT: popl %esi
; X86-NEXT: retl
;
; X64-LABEL: opt_setcc_shl_ne_zero:
; X64: # %bb.0:
; X64-NEXT: orq %rdi, %rsi
; X64-NEXT: shldq $17, %rdi, %rsi
; X64-NEXT: setne %al
; X64-NEXT: retq
%shl = shl i128 %a, 17
%cmp = icmp ne i128 %shl, 0
ret i1 %cmp
}
; Negative test: optimization should not be applied if shift has multiple users.
define i1 @opt_setcc_shl_eq_zero_multiple_shl_users(i128 %a) nounwind {
; X86-LABEL: opt_setcc_shl_eq_zero_multiple_shl_users:
; X86: # %bb.0:
; X86-NEXT: pushl %ebx
; X86-NEXT: pushl %edi
; X86-NEXT: pushl %esi
; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
; X86-NEXT: shldl $17, %esi, %edx
; X86-NEXT: shldl $17, %ecx, %esi
; X86-NEXT: shldl $17, %eax, %ecx
; X86-NEXT: shll $17, %eax
; X86-NEXT: movl %ecx, %edi
; X86-NEXT: orl %edx, %edi
; X86-NEXT: movl %eax, %ebx
; X86-NEXT: orl %esi, %ebx
; X86-NEXT: orl %edi, %ebx
; X86-NEXT: sete %bl
; X86-NEXT: pushl %edx
; X86-NEXT: pushl %esi
; X86-NEXT: pushl %ecx
; X86-NEXT: pushl %eax
; X86-NEXT: calll use@PLT
; X86-NEXT: addl $16, %esp
; X86-NEXT: movl %ebx, %eax
; X86-NEXT: popl %esi
; X86-NEXT: popl %edi
; X86-NEXT: popl %ebx
; X86-NEXT: retl
;
; X64-LABEL: opt_setcc_shl_eq_zero_multiple_shl_users:
; X64: # %bb.0:
; X64-NEXT: pushq %rbx
; X64-NEXT: shldq $17, %rdi, %rsi
; X64-NEXT: shlq $17, %rdi
; X64-NEXT: movq %rdi, %rax
; X64-NEXT: orq %rsi, %rax
; X64-NEXT: sete %bl
; X64-NEXT: callq use@PLT
; X64-NEXT: movl %ebx, %eax
; X64-NEXT: popq %rbx
; X64-NEXT: retq
%shl = shl i128 %a, 17
%cmp = icmp eq i128 %shl, 0
call void @use(i128 %shl)
ret i1 %cmp
}
; Check that optimization is applied to DAG having appropriate shape
; even if there were no actual shift's expansion.
define i1 @opt_setcc_expanded_shl_correct_shifts(i64 %a, i64 %b) nounwind {
; X86-LABEL: opt_setcc_expanded_shl_correct_shifts:
; X86: # %bb.0:
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
; X86-NEXT: orl {{[0-9]+}}(%esp), %ecx
; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
; X86-NEXT: orl %eax, %edx
; X86-NEXT: shldl $17, %ecx, %edx
; X86-NEXT: shldl $17, %eax, %ecx
; X86-NEXT: orl %edx, %ecx
; X86-NEXT: sete %al
; X86-NEXT: retl
;
; X64-LABEL: opt_setcc_expanded_shl_correct_shifts:
; X64: # %bb.0:
; X64-NEXT: orq %rsi, %rdi
; X64-NEXT: shldq $17, %rsi, %rdi
; X64-NEXT: sete %al
; X64-NEXT: retq
%shl.a = shl i64 %a, 17
%srl.b = lshr i64 %b, 47
%or.0 = or i64 %shl.a, %srl.b
%shl.b = shl i64 %b, 17
%or.1 = or i64 %or.0, %shl.b
%cmp = icmp eq i64 %or.1, 0
ret i1 %cmp
}
; Negative test: optimization should not be applied as
; constants used in shifts do not match.
define i1 @opt_setcc_expanded_shl_wrong_shifts(i64 %a, i64 %b) nounwind {
; X86-LABEL: opt_setcc_expanded_shl_wrong_shifts:
; X86: # %bb.0:
; X86-NEXT: pushl %esi
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
; X86-NEXT: shldl $17, %edx, %esi
; X86-NEXT: shldl $17, %ecx, %edx
; X86-NEXT: shldl $18, %eax, %ecx
; X86-NEXT: shll $18, %eax
; X86-NEXT: orl %edx, %eax
; X86-NEXT: orl %esi, %ecx
; X86-NEXT: orl %eax, %ecx
; X86-NEXT: sete %al
; X86-NEXT: popl %esi
; X86-NEXT: retl
;
; X64-LABEL: opt_setcc_expanded_shl_wrong_shifts:
; X64: # %bb.0:
; X64-NEXT: shldq $17, %rsi, %rdi
; X64-NEXT: shlq $18, %rsi
; X64-NEXT: orq %rdi, %rsi
; X64-NEXT: sete %al
; X64-NEXT: retq
%shl.a = shl i64 %a, 17
%srl.b = lshr i64 %b, 47
%or.0 = or i64 %shl.a, %srl.b
%shl.b = shl i64 %b, 18
%or.1 = or i64 %or.0, %shl.b
%cmp = icmp eq i64 %or.1, 0
ret i1 %cmp
}
declare void @use(i128 %a)