Matt Arsenault 58a88001f3
PeepholeOpt: Fix looking for def of current copy to coalesce (#125533)
This fixes the handling of subregister extract copies. This
will allow AMDGPU to remove its implementation of
shouldRewriteCopySrc, which exists as a 10 year old workaround
to this bug. peephole-opt-fold-reg-sequence-subreg.mir will
show the expected improvement once the custom implementation
is removed.

The copy coalescing processing here is overly abstracted
from what's actually happening. Previously when visiting
coalescable copy-like instructions, we would parse the
sources one at a time and then pass the def of the root
instruction into findNextSource. This means that the
first thing the new ValueTracker constructed would do
is getVRegDef to find the instruction we are currently
processing. This adds an unnecessary step, placing
a useless entry in the RewriteMap, and required skipping
the no-op case where getNewSource would return the original
source operand. This was a problem since in the case
of a subregister extract, shouldRewriteCopySource would always
say that it is useful to rewrite and the use-def chain walk
would abort, returning the original operand. Move the process
to start looking at the source operand to begin with.

This does not fix the confused handling in the uncoalescable
copy case which is proving to be more difficult. Some currently
handled cases have multiple defs from a single source, and other
handled cases have 0 input operands. It would be simpler if
this was implemented with isCopyLikeInstr, rather than guessing
at the operand structure as it does now.

There are some improvements and some regressions. The
regressions appear to be downstream issues for the most part. One
of the uglier regressions is in PPC, where a sequence of insert_subrgs
is used to build registers. I opened #125502 to use reg_sequence instead,
which may help.

The worst regression is an absurd SPARC testcase using a <251 x fp128>,
which uses a very long chain of insert_subregs.

We need improved subregister handling locally in PeepholeOptimizer,
and other pasess like MachineCSE to fix some of the other regressions.
We should handle subregister composes and folding more indexes
into insert_subreg and reg_sequence.
2025-02-05 23:29:02 +07:00

822 lines
22 KiB
LLVM

; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc < %s -mtriple=aarch64-- | FileCheck %s
define i8 @or_lshr_commute0(i8 %x0, i8 %x1, i8 %y, i8 %z) {
; CHECK-LABEL: or_lshr_commute0:
; CHECK: // %bb.0:
; CHECK-NEXT: orr w8, w0, w1
; CHECK-NEXT: and w8, w8, #0xff
; CHECK-NEXT: lsr w8, w8, w2
; CHECK-NEXT: orr w0, w8, w3
; CHECK-NEXT: ret
%sh1 = lshr i8 %x0, %y
%sh2 = lshr i8 %x1, %y
%logic = or i8 %sh1, %z
%r = or i8 %logic, %sh2
ret i8 %r
}
define i32 @or_lshr_commute1(i32 %x0, i32 %x1, i32 %y, i32 %z) {
; CHECK-LABEL: or_lshr_commute1:
; CHECK: // %bb.0:
; CHECK-NEXT: orr w8, w0, w1
; CHECK-NEXT: lsr w8, w8, w2
; CHECK-NEXT: orr w0, w8, w3
; CHECK-NEXT: ret
%sh1 = lshr i32 %x0, %y
%sh2 = lshr i32 %x1, %y
%logic = or i32 %z, %sh1
%r = or i32 %logic, %sh2
ret i32 %r
}
define <8 x i16> @or_lshr_commute2(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %y, <8 x i16> %z) {
; CHECK-LABEL: or_lshr_commute2:
; CHECK: // %bb.0:
; CHECK-NEXT: orr v0.16b, v0.16b, v1.16b
; CHECK-NEXT: neg v1.8h, v2.8h
; CHECK-NEXT: ushl v0.8h, v0.8h, v1.8h
; CHECK-NEXT: orr v0.16b, v0.16b, v3.16b
; CHECK-NEXT: ret
%sh1 = lshr <8 x i16> %x0, %y
%sh2 = lshr <8 x i16> %x1, %y
%logic = or <8 x i16> %sh1, %z
%r = or <8 x i16> %sh2, %logic
ret <8 x i16> %r
}
define <2 x i64> @or_lshr_commute3(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %y, <2 x i64> %z) {
; CHECK-LABEL: or_lshr_commute3:
; CHECK: // %bb.0:
; CHECK-NEXT: orr v0.16b, v0.16b, v1.16b
; CHECK-NEXT: neg v1.2d, v2.2d
; CHECK-NEXT: ushl v0.2d, v0.2d, v1.2d
; CHECK-NEXT: orr v0.16b, v0.16b, v3.16b
; CHECK-NEXT: ret
%sh1 = lshr <2 x i64> %x0, %y
%sh2 = lshr <2 x i64> %x1, %y
%logic = or <2 x i64> %z, %sh1
%r = or <2 x i64> %sh2, %logic
ret <2 x i64> %r
}
define i16 @or_ashr_commute0(i16 %x0, i16 %x1, i16 %y, i16 %z) {
; CHECK-LABEL: or_ashr_commute0:
; CHECK: // %bb.0:
; CHECK-NEXT: orr w8, w0, w1
; CHECK-NEXT: sxth w8, w8
; CHECK-NEXT: asr w8, w8, w2
; CHECK-NEXT: orr w0, w8, w3
; CHECK-NEXT: ret
%sh1 = ashr i16 %x0, %y
%sh2 = ashr i16 %x1, %y
%logic = or i16 %sh1, %z
%r = or i16 %logic, %sh2
ret i16 %r
}
define i64 @or_ashr_commute1(i64 %x0, i64 %x1, i64 %y, i64 %z) {
; CHECK-LABEL: or_ashr_commute1:
; CHECK: // %bb.0:
; CHECK-NEXT: orr x8, x0, x1
; CHECK-NEXT: asr x8, x8, x2
; CHECK-NEXT: orr x0, x8, x3
; CHECK-NEXT: ret
%sh1 = ashr i64 %x0, %y
%sh2 = ashr i64 %x1, %y
%logic = or i64 %z, %sh1
%r = or i64 %logic, %sh2
ret i64 %r
}
define <4 x i32> @or_ashr_commute2(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %y, <4 x i32> %z) {
; CHECK-LABEL: or_ashr_commute2:
; CHECK: // %bb.0:
; CHECK-NEXT: orr v0.16b, v0.16b, v1.16b
; CHECK-NEXT: neg v1.4s, v2.4s
; CHECK-NEXT: sshl v0.4s, v0.4s, v1.4s
; CHECK-NEXT: orr v0.16b, v0.16b, v3.16b
; CHECK-NEXT: ret
%sh1 = ashr <4 x i32> %x0, %y
%sh2 = ashr <4 x i32> %x1, %y
%logic = or <4 x i32> %sh1, %z
%r = or <4 x i32> %sh2, %logic
ret <4 x i32> %r
}
define <16 x i8> @or_ashr_commute3(<16 x i8> %x0, <16 x i8> %x1, <16 x i8> %y, <16 x i8> %z) {
; CHECK-LABEL: or_ashr_commute3:
; CHECK: // %bb.0:
; CHECK-NEXT: orr v0.16b, v0.16b, v1.16b
; CHECK-NEXT: neg v1.16b, v2.16b
; CHECK-NEXT: sshl v0.16b, v0.16b, v1.16b
; CHECK-NEXT: orr v0.16b, v0.16b, v3.16b
; CHECK-NEXT: ret
%sh1 = ashr <16 x i8> %x0, %y
%sh2 = ashr <16 x i8> %x1, %y
%logic = or <16 x i8> %z, %sh1
%r = or <16 x i8> %sh2, %logic
ret <16 x i8> %r
}
define i32 @or_shl_commute0(i32 %x0, i32 %x1, i32 %y, i32 %z) {
; CHECK-LABEL: or_shl_commute0:
; CHECK: // %bb.0:
; CHECK-NEXT: orr w8, w0, w1
; CHECK-NEXT: lsl w8, w8, w2
; CHECK-NEXT: orr w0, w8, w3
; CHECK-NEXT: ret
%sh1 = shl i32 %x0, %y
%sh2 = shl i32 %x1, %y
%logic = or i32 %sh1, %z
%r = or i32 %logic, %sh2
ret i32 %r
}
define i8 @or_shl_commute1(i8 %x0, i8 %x1, i8 %y, i8 %z) {
; CHECK-LABEL: or_shl_commute1:
; CHECK: // %bb.0:
; CHECK-NEXT: orr w8, w0, w1
; CHECK-NEXT: lsl w8, w8, w2
; CHECK-NEXT: orr w0, w8, w3
; CHECK-NEXT: ret
%sh1 = shl i8 %x0, %y
%sh2 = shl i8 %x1, %y
%logic = or i8 %z, %sh1
%r = or i8 %logic, %sh2
ret i8 %r
}
define <2 x i64> @or_shl_commute2(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %y, <2 x i64> %z) {
; CHECK-LABEL: or_shl_commute2:
; CHECK: // %bb.0:
; CHECK-NEXT: orr v0.16b, v0.16b, v1.16b
; CHECK-NEXT: ushl v0.2d, v0.2d, v2.2d
; CHECK-NEXT: orr v0.16b, v0.16b, v3.16b
; CHECK-NEXT: ret
%sh1 = shl <2 x i64> %x0, %y
%sh2 = shl <2 x i64> %x1, %y
%logic = or <2 x i64> %sh1, %z
%r = or <2 x i64> %sh2, %logic
ret <2 x i64> %r
}
define <8 x i16> @or_shl_commute3(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %y, <8 x i16> %z) {
; CHECK-LABEL: or_shl_commute3:
; CHECK: // %bb.0:
; CHECK-NEXT: orr v0.16b, v0.16b, v1.16b
; CHECK-NEXT: ushl v0.8h, v0.8h, v2.8h
; CHECK-NEXT: orr v0.16b, v0.16b, v3.16b
; CHECK-NEXT: ret
%sh1 = shl <8 x i16> %x0, %y
%sh2 = shl <8 x i16> %x1, %y
%logic = or <8 x i16> %z, %sh1
%r = or <8 x i16> %sh2, %logic
ret <8 x i16> %r
}
; negative test - mismatched shift opcodes
define i64 @or_mix_shr(i64 %x0, i64 %x1, i64 %y, i64 %z) {
; CHECK-LABEL: or_mix_shr:
; CHECK: // %bb.0:
; CHECK-NEXT: asr x8, x0, x2
; CHECK-NEXT: lsr x9, x1, x2
; CHECK-NEXT: orr x8, x8, x3
; CHECK-NEXT: orr x0, x8, x9
; CHECK-NEXT: ret
%sh1 = ashr i64 %x0, %y
%sh2 = lshr i64 %x1, %y
%logic = or i64 %sh1, %z
%r = or i64 %logic, %sh2
ret i64 %r
}
; negative test - mixed shift amounts
define i64 @or_lshr_mix_shift_amount(i64 %x0, i64 %x1, i64 %y, i64 %z, i64 %w) {
; CHECK-LABEL: or_lshr_mix_shift_amount:
; CHECK: // %bb.0:
; CHECK-NEXT: lsr x8, x0, x2
; CHECK-NEXT: lsr x9, x1, x4
; CHECK-NEXT: orr x8, x8, x3
; CHECK-NEXT: orr x0, x8, x9
; CHECK-NEXT: ret
%sh1 = lshr i64 %x0, %y
%sh2 = lshr i64 %x1, %w
%logic = or i64 %sh1, %z
%r = or i64 %logic, %sh2
ret i64 %r
}
; negative test - mismatched logic opcodes
define i64 @mix_logic_lshr(i64 %x0, i64 %x1, i64 %y, i64 %z) {
; CHECK-LABEL: mix_logic_lshr:
; CHECK: // %bb.0:
; CHECK-NEXT: lsr x8, x0, x2
; CHECK-NEXT: lsr x9, x1, x2
; CHECK-NEXT: eor x8, x8, x3
; CHECK-NEXT: orr x0, x8, x9
; CHECK-NEXT: ret
%sh1 = lshr i64 %x0, %y
%sh2 = lshr i64 %x1, %y
%logic = xor i64 %sh1, %z
%r = or i64 %logic, %sh2
ret i64 %r
}
define i8 @xor_lshr_commute0(i8 %x0, i8 %x1, i8 %y, i8 %z) {
; CHECK-LABEL: xor_lshr_commute0:
; CHECK: // %bb.0:
; CHECK-NEXT: eor w8, w0, w1
; CHECK-NEXT: and w8, w8, #0xff
; CHECK-NEXT: lsr w8, w8, w2
; CHECK-NEXT: eor w0, w8, w3
; CHECK-NEXT: ret
%sh1 = lshr i8 %x0, %y
%sh2 = lshr i8 %x1, %y
%logic = xor i8 %sh1, %z
%r = xor i8 %logic, %sh2
ret i8 %r
}
define i32 @xor_lshr_commute1(i32 %x0, i32 %x1, i32 %y, i32 %z) {
; CHECK-LABEL: xor_lshr_commute1:
; CHECK: // %bb.0:
; CHECK-NEXT: eor w8, w0, w1
; CHECK-NEXT: lsr w8, w8, w2
; CHECK-NEXT: eor w0, w8, w3
; CHECK-NEXT: ret
%sh1 = lshr i32 %x0, %y
%sh2 = lshr i32 %x1, %y
%logic = xor i32 %z, %sh1
%r = xor i32 %logic, %sh2
ret i32 %r
}
define <8 x i16> @xor_lshr_commute2(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %y, <8 x i16> %z) {
; CHECK-LABEL: xor_lshr_commute2:
; CHECK: // %bb.0:
; CHECK-NEXT: eor v0.16b, v0.16b, v1.16b
; CHECK-NEXT: neg v1.8h, v2.8h
; CHECK-NEXT: ushl v0.8h, v0.8h, v1.8h
; CHECK-NEXT: eor v0.16b, v0.16b, v3.16b
; CHECK-NEXT: ret
%sh1 = lshr <8 x i16> %x0, %y
%sh2 = lshr <8 x i16> %x1, %y
%logic = xor <8 x i16> %sh1, %z
%r = xor <8 x i16> %sh2, %logic
ret <8 x i16> %r
}
define <2 x i64> @xor_lshr_commute3(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %y, <2 x i64> %z) {
; CHECK-LABEL: xor_lshr_commute3:
; CHECK: // %bb.0:
; CHECK-NEXT: eor v0.16b, v0.16b, v1.16b
; CHECK-NEXT: neg v1.2d, v2.2d
; CHECK-NEXT: ushl v0.2d, v0.2d, v1.2d
; CHECK-NEXT: eor v0.16b, v0.16b, v3.16b
; CHECK-NEXT: ret
%sh1 = lshr <2 x i64> %x0, %y
%sh2 = lshr <2 x i64> %x1, %y
%logic = xor <2 x i64> %z, %sh1
%r = xor <2 x i64> %sh2, %logic
ret <2 x i64> %r
}
define i16 @xor_ashr_commute0(i16 %x0, i16 %x1, i16 %y, i16 %z) {
; CHECK-LABEL: xor_ashr_commute0:
; CHECK: // %bb.0:
; CHECK-NEXT: eor w8, w0, w1
; CHECK-NEXT: sxth w8, w8
; CHECK-NEXT: asr w8, w8, w2
; CHECK-NEXT: eor w0, w8, w3
; CHECK-NEXT: ret
%sh1 = ashr i16 %x0, %y
%sh2 = ashr i16 %x1, %y
%logic = xor i16 %sh1, %z
%r = xor i16 %logic, %sh2
ret i16 %r
}
define i64 @xor_ashr_commute1(i64 %x0, i64 %x1, i64 %y, i64 %z) {
; CHECK-LABEL: xor_ashr_commute1:
; CHECK: // %bb.0:
; CHECK-NEXT: eor x8, x0, x1
; CHECK-NEXT: asr x8, x8, x2
; CHECK-NEXT: eor x0, x8, x3
; CHECK-NEXT: ret
%sh1 = ashr i64 %x0, %y
%sh2 = ashr i64 %x1, %y
%logic = xor i64 %z, %sh1
%r = xor i64 %logic, %sh2
ret i64 %r
}
define <4 x i32> @xor_ashr_commute2(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %y, <4 x i32> %z) {
; CHECK-LABEL: xor_ashr_commute2:
; CHECK: // %bb.0:
; CHECK-NEXT: eor v0.16b, v0.16b, v1.16b
; CHECK-NEXT: neg v1.4s, v2.4s
; CHECK-NEXT: sshl v0.4s, v0.4s, v1.4s
; CHECK-NEXT: eor v0.16b, v0.16b, v3.16b
; CHECK-NEXT: ret
%sh1 = ashr <4 x i32> %x0, %y
%sh2 = ashr <4 x i32> %x1, %y
%logic = xor <4 x i32> %sh1, %z
%r = xor <4 x i32> %sh2, %logic
ret <4 x i32> %r
}
define <16 x i8> @xor_ashr_commute3(<16 x i8> %x0, <16 x i8> %x1, <16 x i8> %y, <16 x i8> %z) {
; CHECK-LABEL: xor_ashr_commute3:
; CHECK: // %bb.0:
; CHECK-NEXT: eor v0.16b, v0.16b, v1.16b
; CHECK-NEXT: neg v1.16b, v2.16b
; CHECK-NEXT: sshl v0.16b, v0.16b, v1.16b
; CHECK-NEXT: eor v0.16b, v0.16b, v3.16b
; CHECK-NEXT: ret
%sh1 = ashr <16 x i8> %x0, %y
%sh2 = ashr <16 x i8> %x1, %y
%logic = xor <16 x i8> %z, %sh1
%r = xor <16 x i8> %sh2, %logic
ret <16 x i8> %r
}
define i32 @xor_shl_commute0(i32 %x0, i32 %x1, i32 %y, i32 %z) {
; CHECK-LABEL: xor_shl_commute0:
; CHECK: // %bb.0:
; CHECK-NEXT: eor w8, w0, w1
; CHECK-NEXT: lsl w8, w8, w2
; CHECK-NEXT: eor w0, w8, w3
; CHECK-NEXT: ret
%sh1 = shl i32 %x0, %y
%sh2 = shl i32 %x1, %y
%logic = xor i32 %sh1, %z
%r = xor i32 %logic, %sh2
ret i32 %r
}
define i8 @xor_shl_commute1(i8 %x0, i8 %x1, i8 %y, i8 %z) {
; CHECK-LABEL: xor_shl_commute1:
; CHECK: // %bb.0:
; CHECK-NEXT: eor w8, w0, w1
; CHECK-NEXT: lsl w8, w8, w2
; CHECK-NEXT: eor w0, w8, w3
; CHECK-NEXT: ret
%sh1 = shl i8 %x0, %y
%sh2 = shl i8 %x1, %y
%logic = xor i8 %z, %sh1
%r = xor i8 %logic, %sh2
ret i8 %r
}
define <2 x i64> @xor_shl_commute2(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %y, <2 x i64> %z) {
; CHECK-LABEL: xor_shl_commute2:
; CHECK: // %bb.0:
; CHECK-NEXT: eor v0.16b, v0.16b, v1.16b
; CHECK-NEXT: ushl v0.2d, v0.2d, v2.2d
; CHECK-NEXT: eor v0.16b, v0.16b, v3.16b
; CHECK-NEXT: ret
%sh1 = shl <2 x i64> %x0, %y
%sh2 = shl <2 x i64> %x1, %y
%logic = xor <2 x i64> %sh1, %z
%r = xor <2 x i64> %sh2, %logic
ret <2 x i64> %r
}
define <8 x i16> @xor_shl_commute3(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %y, <8 x i16> %z) {
; CHECK-LABEL: xor_shl_commute3:
; CHECK: // %bb.0:
; CHECK-NEXT: eor v0.16b, v0.16b, v1.16b
; CHECK-NEXT: ushl v0.8h, v0.8h, v2.8h
; CHECK-NEXT: eor v0.16b, v0.16b, v3.16b
; CHECK-NEXT: ret
%sh1 = shl <8 x i16> %x0, %y
%sh2 = shl <8 x i16> %x1, %y
%logic = xor <8 x i16> %z, %sh1
%r = xor <8 x i16> %sh2, %logic
ret <8 x i16> %r
}
; negative test - mismatched shift opcodes
define i64 @xor_mix_shr(i64 %x0, i64 %x1, i64 %y, i64 %z) {
; CHECK-LABEL: xor_mix_shr:
; CHECK: // %bb.0:
; CHECK-NEXT: asr x8, x0, x2
; CHECK-NEXT: lsr x9, x1, x2
; CHECK-NEXT: eor x8, x8, x3
; CHECK-NEXT: eor x0, x8, x9
; CHECK-NEXT: ret
%sh1 = ashr i64 %x0, %y
%sh2 = lshr i64 %x1, %y
%logic = xor i64 %sh1, %z
%r = xor i64 %logic, %sh2
ret i64 %r
}
; negative test - mismatched shift amounts
define i64 @xor_lshr_mix_shift_amount(i64 %x0, i64 %x1, i64 %y, i64 %z, i64 %w) {
; CHECK-LABEL: xor_lshr_mix_shift_amount:
; CHECK: // %bb.0:
; CHECK-NEXT: lsr x8, x0, x2
; CHECK-NEXT: lsr x9, x1, x4
; CHECK-NEXT: eor x8, x8, x3
; CHECK-NEXT: eor x0, x8, x9
; CHECK-NEXT: ret
%sh1 = lshr i64 %x0, %y
%sh2 = lshr i64 %x1, %w
%logic = xor i64 %sh1, %z
%r = xor i64 %logic, %sh2
ret i64 %r
}
; negative test - mismatched logic opcodes
define i64 @mix_logic_ashr(i64 %x0, i64 %x1, i64 %y, i64 %z) {
; CHECK-LABEL: mix_logic_ashr:
; CHECK: // %bb.0:
; CHECK-NEXT: asr x8, x0, x2
; CHECK-NEXT: asr x9, x1, x2
; CHECK-NEXT: orr x8, x8, x3
; CHECK-NEXT: eor x0, x8, x9
; CHECK-NEXT: ret
%sh1 = ashr i64 %x0, %y
%sh2 = ashr i64 %x1, %y
%logic = or i64 %sh1, %z
%r = xor i64 %logic, %sh2
ret i64 %r
}
define i8 @and_lshr_commute0(i8 %x0, i8 %x1, i8 %y, i8 %z) {
; CHECK-LABEL: and_lshr_commute0:
; CHECK: // %bb.0:
; CHECK-NEXT: and w8, w0, w1
; CHECK-NEXT: and w8, w8, #0xff
; CHECK-NEXT: lsr w8, w8, w2
; CHECK-NEXT: and w0, w8, w3
; CHECK-NEXT: ret
%sh1 = lshr i8 %x0, %y
%sh2 = lshr i8 %x1, %y
%logic = and i8 %sh1, %z
%r = and i8 %logic, %sh2
ret i8 %r
}
define i32 @and_lshr_commute1(i32 %x0, i32 %x1, i32 %y, i32 %z) {
; CHECK-LABEL: and_lshr_commute1:
; CHECK: // %bb.0:
; CHECK-NEXT: and w8, w0, w1
; CHECK-NEXT: lsr w8, w8, w2
; CHECK-NEXT: and w0, w8, w3
; CHECK-NEXT: ret
%sh1 = lshr i32 %x0, %y
%sh2 = lshr i32 %x1, %y
%logic = and i32 %z, %sh1
%r = and i32 %logic, %sh2
ret i32 %r
}
define <8 x i16> @and_lshr_commute2(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %y, <8 x i16> %z) {
; CHECK-LABEL: and_lshr_commute2:
; CHECK: // %bb.0:
; CHECK-NEXT: and v0.16b, v0.16b, v1.16b
; CHECK-NEXT: neg v1.8h, v2.8h
; CHECK-NEXT: ushl v0.8h, v0.8h, v1.8h
; CHECK-NEXT: and v0.16b, v0.16b, v3.16b
; CHECK-NEXT: ret
%sh1 = lshr <8 x i16> %x0, %y
%sh2 = lshr <8 x i16> %x1, %y
%logic = and <8 x i16> %sh1, %z
%r = and <8 x i16> %sh2, %logic
ret <8 x i16> %r
}
define <2 x i64> @and_lshr_commute3(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %y, <2 x i64> %z) {
; CHECK-LABEL: and_lshr_commute3:
; CHECK: // %bb.0:
; CHECK-NEXT: and v0.16b, v0.16b, v1.16b
; CHECK-NEXT: neg v1.2d, v2.2d
; CHECK-NEXT: ushl v0.2d, v0.2d, v1.2d
; CHECK-NEXT: and v0.16b, v0.16b, v3.16b
; CHECK-NEXT: ret
%sh1 = lshr <2 x i64> %x0, %y
%sh2 = lshr <2 x i64> %x1, %y
%logic = and <2 x i64> %z, %sh1
%r = and <2 x i64> %sh2, %logic
ret <2 x i64> %r
}
define i16 @and_ashr_commute0(i16 %x0, i16 %x1, i16 %y, i16 %z) {
; CHECK-LABEL: and_ashr_commute0:
; CHECK: // %bb.0:
; CHECK-NEXT: and w8, w0, w1
; CHECK-NEXT: sxth w8, w8
; CHECK-NEXT: asr w8, w8, w2
; CHECK-NEXT: and w0, w8, w3
; CHECK-NEXT: ret
%sh1 = ashr i16 %x0, %y
%sh2 = ashr i16 %x1, %y
%logic = and i16 %sh1, %z
%r = and i16 %logic, %sh2
ret i16 %r
}
define i64 @and_ashr_commute1(i64 %x0, i64 %x1, i64 %y, i64 %z) {
; CHECK-LABEL: and_ashr_commute1:
; CHECK: // %bb.0:
; CHECK-NEXT: and x8, x0, x1
; CHECK-NEXT: asr x8, x8, x2
; CHECK-NEXT: and x0, x8, x3
; CHECK-NEXT: ret
%sh1 = ashr i64 %x0, %y
%sh2 = ashr i64 %x1, %y
%logic = and i64 %z, %sh1
%r = and i64 %logic, %sh2
ret i64 %r
}
define <4 x i32> @and_ashr_commute2(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %y, <4 x i32> %z) {
; CHECK-LABEL: and_ashr_commute2:
; CHECK: // %bb.0:
; CHECK-NEXT: and v0.16b, v0.16b, v1.16b
; CHECK-NEXT: neg v1.4s, v2.4s
; CHECK-NEXT: sshl v0.4s, v0.4s, v1.4s
; CHECK-NEXT: and v0.16b, v0.16b, v3.16b
; CHECK-NEXT: ret
%sh1 = ashr <4 x i32> %x0, %y
%sh2 = ashr <4 x i32> %x1, %y
%logic = and <4 x i32> %sh1, %z
%r = and <4 x i32> %sh2, %logic
ret <4 x i32> %r
}
define <16 x i8> @and_ashr_commute3(<16 x i8> %x0, <16 x i8> %x1, <16 x i8> %y, <16 x i8> %z) {
; CHECK-LABEL: and_ashr_commute3:
; CHECK: // %bb.0:
; CHECK-NEXT: and v0.16b, v0.16b, v1.16b
; CHECK-NEXT: neg v1.16b, v2.16b
; CHECK-NEXT: sshl v0.16b, v0.16b, v1.16b
; CHECK-NEXT: and v0.16b, v0.16b, v3.16b
; CHECK-NEXT: ret
%sh1 = ashr <16 x i8> %x0, %y
%sh2 = ashr <16 x i8> %x1, %y
%logic = and <16 x i8> %z, %sh1
%r = and <16 x i8> %sh2, %logic
ret <16 x i8> %r
}
define i32 @and_shl_commute0(i32 %x0, i32 %x1, i32 %y, i32 %z) {
; CHECK-LABEL: and_shl_commute0:
; CHECK: // %bb.0:
; CHECK-NEXT: and w8, w0, w1
; CHECK-NEXT: lsl w8, w8, w2
; CHECK-NEXT: and w0, w8, w3
; CHECK-NEXT: ret
%sh1 = shl i32 %x0, %y
%sh2 = shl i32 %x1, %y
%logic = and i32 %sh1, %z
%r = and i32 %logic, %sh2
ret i32 %r
}
define i8 @and_shl_commute1(i8 %x0, i8 %x1, i8 %y, i8 %z) {
; CHECK-LABEL: and_shl_commute1:
; CHECK: // %bb.0:
; CHECK-NEXT: and w8, w0, w1
; CHECK-NEXT: lsl w8, w8, w2
; CHECK-NEXT: and w0, w8, w3
; CHECK-NEXT: ret
%sh1 = shl i8 %x0, %y
%sh2 = shl i8 %x1, %y
%logic = and i8 %z, %sh1
%r = and i8 %logic, %sh2
ret i8 %r
}
define <2 x i64> @and_shl_commute2(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %y, <2 x i64> %z) {
; CHECK-LABEL: and_shl_commute2:
; CHECK: // %bb.0:
; CHECK-NEXT: and v0.16b, v0.16b, v1.16b
; CHECK-NEXT: ushl v0.2d, v0.2d, v2.2d
; CHECK-NEXT: and v0.16b, v0.16b, v3.16b
; CHECK-NEXT: ret
%sh1 = shl <2 x i64> %x0, %y
%sh2 = shl <2 x i64> %x1, %y
%logic = and <2 x i64> %sh1, %z
%r = and <2 x i64> %sh2, %logic
ret <2 x i64> %r
}
define <8 x i16> @and_shl_commute3(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %y, <8 x i16> %z) {
; CHECK-LABEL: and_shl_commute3:
; CHECK: // %bb.0:
; CHECK-NEXT: and v0.16b, v0.16b, v1.16b
; CHECK-NEXT: ushl v0.8h, v0.8h, v2.8h
; CHECK-NEXT: and v0.16b, v0.16b, v3.16b
; CHECK-NEXT: ret
%sh1 = shl <8 x i16> %x0, %y
%sh2 = shl <8 x i16> %x1, %y
%logic = and <8 x i16> %z, %sh1
%r = and <8 x i16> %sh2, %logic
ret <8 x i16> %r
}
; negative test - mismatched shift opcodes
define i64 @and_mix_shr(i64 %x0, i64 %x1, i64 %y, i64 %z) {
; CHECK-LABEL: and_mix_shr:
; CHECK: // %bb.0:
; CHECK-NEXT: lsr x8, x0, x2
; CHECK-NEXT: asr x9, x1, x2
; CHECK-NEXT: and x8, x8, x3
; CHECK-NEXT: and x0, x8, x9
; CHECK-NEXT: ret
%sh1 = lshr i64 %x0, %y
%sh2 = ashr i64 %x1, %y
%logic = and i64 %sh1, %z
%r = and i64 %logic, %sh2
ret i64 %r
}
; negative test - mismatched shift amounts
define i64 @and_lshr_mix_shift_amount(i64 %x0, i64 %x1, i64 %y, i64 %z, i64 %w) {
; CHECK-LABEL: and_lshr_mix_shift_amount:
; CHECK: // %bb.0:
; CHECK-NEXT: lsr x8, x0, x2
; CHECK-NEXT: lsr x9, x1, x4
; CHECK-NEXT: and x8, x8, x3
; CHECK-NEXT: and x0, x8, x9
; CHECK-NEXT: ret
%sh1 = lshr i64 %x0, %y
%sh2 = lshr i64 %x1, %w
%logic = and i64 %sh1, %z
%r = and i64 %logic, %sh2
ret i64 %r
}
; negative test - mismatched logic opcodes
define i64 @mix_logic_shl(i64 %x0, i64 %x1, i64 %y, i64 %z) {
; CHECK-LABEL: mix_logic_shl:
; CHECK: // %bb.0:
; CHECK-NEXT: lsl x8, x0, x2
; CHECK-NEXT: lsl x9, x1, x2
; CHECK-NEXT: eor x8, x8, x3
; CHECK-NEXT: and x0, x8, x9
; CHECK-NEXT: ret
%sh1 = shl i64 %x0, %y
%sh2 = shl i64 %x1, %y
%logic = xor i64 %sh1, %z
%r = and i64 %logic, %sh2
ret i64 %r
}
; (shl (X | Y), C1) | (srl X, C2) --> (rotl X, C1) | (shl Y, C1)
define i32 @or_fshl_commute0(i32 %x, i32 %y) {
; CHECK-LABEL: or_fshl_commute0:
; CHECK: // %bb.0:
; CHECK-NEXT: orr w8, w0, w1
; CHECK-NEXT: extr w0, w8, w0, #27
; CHECK-NEXT: ret
%or1 = or i32 %x, %y
%sh1 = shl i32 %or1, 5
%sh2 = lshr i32 %x, 27
%r = or i32 %sh1, %sh2
ret i32 %r
}
define i64 @or_fshl_commute1(i64 %x, i64 %y) {
; CHECK-LABEL: or_fshl_commute1:
; CHECK: // %bb.0:
; CHECK-NEXT: orr w8, w1, w0
; CHECK-NEXT: extr x0, x8, x0, #29
; CHECK-NEXT: ret
%or1 = or i64 %y, %x
%sh1 = shl i64 %or1, 35
%sh2 = lshr i64 %x, 29
%r = or i64 %sh1, %sh2
ret i64 %r
}
define i16 @or_fshl_commute2(i16 %x, i16 %y) {
; CHECK-LABEL: or_fshl_commute2:
; CHECK: // %bb.0:
; CHECK-NEXT: orr w8, w0, w1
; CHECK-NEXT: lsl w8, w8, #2
; CHECK-NEXT: bfxil w8, w0, #14, #2
; CHECK-NEXT: mov w0, w8
; CHECK-NEXT: ret
%or1 = or i16 %x, %y
%sh1 = shl i16 %or1, 2
%sh2 = lshr i16 %x, 14
%r = or i16 %sh2, %sh1
ret i16 %r
}
define i8 @or_fshl_commute3(i8 %x, i8 %y) {
; CHECK-LABEL: or_fshl_commute3:
; CHECK: // %bb.0:
; CHECK-NEXT: orr w8, w1, w0
; CHECK-NEXT: lsl w8, w8, #5
; CHECK-NEXT: bfxil w8, w0, #3, #5
; CHECK-NEXT: mov w0, w8
; CHECK-NEXT: ret
%or1 = or i8 %y, %x
%sh1 = shl i8 %or1, 5
%sh2 = lshr i8 %x, 3
%r = or i8 %sh2, %sh1
ret i8 %r
}
define i32 @or_fshl_wrong_shift(i32 %x, i32 %y) {
; CHECK-LABEL: or_fshl_wrong_shift:
; CHECK: // %bb.0:
; CHECK-NEXT: orr w8, w0, w1
; CHECK-NEXT: lsl w8, w8, #20
; CHECK-NEXT: orr w0, w8, w0, lsr #11
; CHECK-NEXT: ret
%or1 = or i32 %x, %y
%sh1 = shl i32 %or1, 20
%sh2 = lshr i32 %x, 11
%r = or i32 %sh1, %sh2
ret i32 %r
}
; (shl X, C1) | (srl (X | Y), C2) --> (rotl X, C1) | (srl Y, C2)
define i64 @or_fshr_commute0(i64 %x, i64 %y) {
; CHECK-LABEL: or_fshr_commute0:
; CHECK: // %bb.0:
; CHECK-NEXT: orr x8, x0, x1
; CHECK-NEXT: extr x0, x0, x8, #24
; CHECK-NEXT: ret
%or1 = or i64 %x, %y
%sh1 = shl i64 %x, 40
%sh2 = lshr i64 %or1, 24
%r = or i64 %sh1, %sh2
ret i64 %r
}
define i32 @or_fshr_commute1(i32 %x, i32 %y) {
; CHECK-LABEL: or_fshr_commute1:
; CHECK: // %bb.0:
; CHECK-NEXT: orr w8, w1, w0
; CHECK-NEXT: extr w0, w0, w8, #29
; CHECK-NEXT: ret
%or1 = or i32 %y, %x
%sh1 = shl i32 %x, 3
%sh2 = lshr i32 %or1, 29
%r = or i32 %sh1, %sh2
ret i32 %r
}
define i16 @or_fshr_commute2(i16 %x, i16 %y) {
; CHECK-LABEL: or_fshr_commute2:
; CHECK: // %bb.0:
; CHECK-NEXT: lsl w8, w0, #9
; CHECK-NEXT: orr w9, w0, w1
; CHECK-NEXT: bfxil w8, w9, #7, #9
; CHECK-NEXT: mov w0, w8
; CHECK-NEXT: ret
%or1 = or i16 %x, %y
%sh1 = shl i16 %x, 9
%sh2 = lshr i16 %or1, 7
%r = or i16 %sh2, %sh1
ret i16 %r
}
define i8 @or_fshr_commute3(i8 %x, i8 %y) {
; CHECK-LABEL: or_fshr_commute3:
; CHECK: // %bb.0:
; CHECK-NEXT: lsl w8, w0, #2
; CHECK-NEXT: orr w9, w1, w0
; CHECK-NEXT: bfxil w8, w9, #6, #2
; CHECK-NEXT: mov w0, w8
; CHECK-NEXT: ret
%or1 = or i8 %y, %x
%sh1 = shl i8 %x, 2
%sh2 = lshr i8 %or1, 6
%r = or i8 %sh2, %sh1
ret i8 %r
}
define i32 @or_fshr_wrong_shift(i32 %x, i32 %y) {
; CHECK-LABEL: or_fshr_wrong_shift:
; CHECK: // %bb.0:
; CHECK-NEXT: orr w8, w0, w1
; CHECK-NEXT: lsr w8, w8, #26
; CHECK-NEXT: orr w0, w8, w0, lsl #7
; CHECK-NEXT: ret
%or1 = or i32 %x, %y
%sh1 = shl i32 %x, 7
%sh2 = lshr i32 %or1, 26
%r = or i32 %sh1, %sh2
ret i32 %r
}