llvm-project/llvm/test/CodeGen/X86/funnel-shift-logic-fold.ll
Filipp Zhinkin c55899f763 [DAGCombiner] Hoist funnel shifts from logic operation
Hoist funnel shift from logic op:
logic_op (FSH x0, x1, s), (FSH y0, y1, s) --> FSH (logic_op x0, y0), (logic_op x1, y1), s

The transformation improves code generated for some cases related to
issue https://github.com/llvm/llvm-project/issues/49541.

Reduced amount of funnel shifts can also improve throughput on x86 CPUs by utilizing more
available ports: https://quick-bench.com/q/gC7AKkJJsDZzRrs_JWDzm9t_iDM

Transformation correctness checks:
https://alive2.llvm.org/ce/z/TKPULH
https://alive2.llvm.org/ce/z/UvTd_9
https://alive2.llvm.org/ce/z/j8qW3_
https://alive2.llvm.org/ce/z/7Wq7gE
https://alive2.llvm.org/ce/z/Xr5w8R
https://alive2.llvm.org/ce/z/D5xe_E
https://alive2.llvm.org/ce/z/2yBZiy

Differential Revision: https://reviews.llvm.org/D130994
2022-08-05 17:02:22 -04:00

152 lines
5.0 KiB
LLVM

; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc < %s -mtriple=x86_64-unknown-unknown | FileCheck %s --check-prefixes=X64
declare i64 @llvm.fshl.i64(i64, i64, i64) nounwind readnone
declare i64 @llvm.fshr.i64(i64, i64, i64) nounwind readnone
define i64 @hoist_fshl_from_or(i64 %a, i64 %b, i64 %c, i64 %d, i64 %s) nounwind {
; X64-LABEL: hoist_fshl_from_or:
; X64: # %bb.0:
; X64-NEXT: movq %rdi, %rax
; X64-NEXT: orq %rcx, %rsi
; X64-NEXT: orq %rdx, %rax
; X64-NEXT: movl %r8d, %ecx
; X64-NEXT: shldq %cl, %rsi, %rax
; X64-NEXT: retq
%fshl.0 = call i64 @llvm.fshl.i64(i64 %a, i64 %b, i64 %s)
%fshl.1 = call i64 @llvm.fshl.i64(i64 %c, i64 %d, i64 %s)
%res = or i64 %fshl.0, %fshl.1
ret i64 %res
}
define i64 @hoist_fshl_from_and(i64 %a, i64 %b, i64 %c, i64 %d, i64 %s) nounwind {
; X64-LABEL: hoist_fshl_from_and:
; X64: # %bb.0:
; X64-NEXT: movq %rdi, %rax
; X64-NEXT: andq %rcx, %rsi
; X64-NEXT: andq %rdx, %rax
; X64-NEXT: movl %r8d, %ecx
; X64-NEXT: shldq %cl, %rsi, %rax
; X64-NEXT: retq
%fshl.0 = call i64 @llvm.fshl.i64(i64 %a, i64 %b, i64 %s)
%fshl.1 = call i64 @llvm.fshl.i64(i64 %c, i64 %d, i64 %s)
%res = and i64 %fshl.0, %fshl.1
ret i64 %res
}
define i64 @hoist_fshl_from_xor(i64 %a, i64 %b, i64 %c, i64 %d, i64 %s) nounwind {
; X64-LABEL: hoist_fshl_from_xor:
; X64: # %bb.0:
; X64-NEXT: movq %rdi, %rax
; X64-NEXT: xorq %rcx, %rsi
; X64-NEXT: xorq %rdx, %rax
; X64-NEXT: movl %r8d, %ecx
; X64-NEXT: shldq %cl, %rsi, %rax
; X64-NEXT: retq
%fshl.0 = call i64 @llvm.fshl.i64(i64 %a, i64 %b, i64 %s)
%fshl.1 = call i64 @llvm.fshl.i64(i64 %c, i64 %d, i64 %s)
%res = xor i64 %fshl.0, %fshl.1
ret i64 %res
}
define i64 @fshl_or_with_different_shift_value(i64 %a, i64 %b, i64 %c, i64 %d) nounwind {
; X64-LABEL: fshl_or_with_different_shift_value:
; X64: # %bb.0:
; X64-NEXT: movq %rdx, %rax
; X64-NEXT: shldq $12, %rsi, %rdi
; X64-NEXT: shldq $13, %rcx, %rax
; X64-NEXT: orq %rdi, %rax
; X64-NEXT: retq
%fshl.0 = call i64 @llvm.fshl.i64(i64 %a, i64 %b, i64 12)
%fshl.1 = call i64 @llvm.fshl.i64(i64 %c, i64 %d, i64 13)
%res = or i64 %fshl.0, %fshl.1
ret i64 %res
}
define i64 @hoist_fshl_from_or_const_shift(i64 %a, i64 %b, i64 %c, i64 %d) nounwind {
; X64-LABEL: hoist_fshl_from_or_const_shift:
; X64: # %bb.0:
; X64-NEXT: movq %rdi, %rax
; X64-NEXT: orq %rcx, %rsi
; X64-NEXT: orq %rdx, %rax
; X64-NEXT: shldq $15, %rsi, %rax
; X64-NEXT: retq
%fshl.0 = call i64 @llvm.fshl.i64(i64 %a, i64 %b, i64 15)
%fshl.1 = call i64 @llvm.fshl.i64(i64 %c, i64 %d, i64 15)
%res = or i64 %fshl.0, %fshl.1
ret i64 %res
}
define i64 @hoist_fshr_from_or(i64 %a, i64 %b, i64 %c, i64 %d, i64 %s) nounwind {
; X64-LABEL: hoist_fshr_from_or:
; X64: # %bb.0:
; X64-NEXT: movq %rsi, %rax
; X64-NEXT: orq %rdx, %rdi
; X64-NEXT: orq %rcx, %rax
; X64-NEXT: movl %r8d, %ecx
; X64-NEXT: shrdq %cl, %rdi, %rax
; X64-NEXT: retq
%fshr.0 = call i64 @llvm.fshr.i64(i64 %a, i64 %b, i64 %s)
%fshr.1 = call i64 @llvm.fshr.i64(i64 %c, i64 %d, i64 %s)
%res = or i64 %fshr.0, %fshr.1
ret i64 %res
}
define i64 @hoist_fshr_from_and(i64 %a, i64 %b, i64 %c, i64 %d, i64 %s) nounwind {
; X64-LABEL: hoist_fshr_from_and:
; X64: # %bb.0:
; X64-NEXT: movq %rsi, %rax
; X64-NEXT: andq %rdx, %rdi
; X64-NEXT: andq %rcx, %rax
; X64-NEXT: movl %r8d, %ecx
; X64-NEXT: shrdq %cl, %rdi, %rax
; X64-NEXT: retq
%fshr.0 = call i64 @llvm.fshr.i64(i64 %a, i64 %b, i64 %s)
%fshr.1 = call i64 @llvm.fshr.i64(i64 %c, i64 %d, i64 %s)
%res = and i64 %fshr.0, %fshr.1
ret i64 %res
}
define i64 @hoist_fshr_from_xor(i64 %a, i64 %b, i64 %c, i64 %d, i64 %s) nounwind {
; X64-LABEL: hoist_fshr_from_xor:
; X64: # %bb.0:
; X64-NEXT: movq %rsi, %rax
; X64-NEXT: xorq %rdx, %rdi
; X64-NEXT: xorq %rcx, %rax
; X64-NEXT: movl %r8d, %ecx
; X64-NEXT: shrdq %cl, %rdi, %rax
; X64-NEXT: retq
%fshr.0 = call i64 @llvm.fshr.i64(i64 %a, i64 %b, i64 %s)
%fshr.1 = call i64 @llvm.fshr.i64(i64 %c, i64 %d, i64 %s)
%res = xor i64 %fshr.0, %fshr.1
ret i64 %res
}
define i64 @fshr_or_with_different_shift_value(i64 %a, i64 %b, i64 %c, i64 %d) nounwind {
; X64-LABEL: fshr_or_with_different_shift_value:
; X64: # %bb.0:
; X64-NEXT: movq %rdx, %rax
; X64-NEXT: shldq $52, %rsi, %rdi
; X64-NEXT: shldq $51, %rcx, %rax
; X64-NEXT: orq %rdi, %rax
; X64-NEXT: retq
%fshr.0 = call i64 @llvm.fshr.i64(i64 %a, i64 %b, i64 12)
%fshr.1 = call i64 @llvm.fshr.i64(i64 %c, i64 %d, i64 13)
%res = or i64 %fshr.0, %fshr.1
ret i64 %res
}
define i64 @hoist_fshr_from_or_const_shift(i64 %a, i64 %b, i64 %c, i64 %d) nounwind {
; X64-LABEL: hoist_fshr_from_or_const_shift:
; X64: # %bb.0:
; X64-NEXT: movq %rdi, %rax
; X64-NEXT: orq %rcx, %rsi
; X64-NEXT: orl %edx, %eax
; X64-NEXT: shldq $49, %rsi, %rax
; X64-NEXT: retq
%fshr.0 = call i64 @llvm.fshr.i64(i64 %a, i64 %b, i64 15)
%fshr.1 = call i64 @llvm.fshr.i64(i64 %c, i64 %d, i64 15)
%res = or i64 %fshr.0, %fshr.1
ret i64 %res
}