
This fixes the handling of subregister extract copies. This will allow AMDGPU to remove its implementation of shouldRewriteCopySrc, which exists as a 10 year old workaround to this bug. peephole-opt-fold-reg-sequence-subreg.mir will show the expected improvement once the custom implementation is removed. The copy coalescing processing here is overly abstracted from what's actually happening. Previously when visiting coalescable copy-like instructions, we would parse the sources one at a time and then pass the def of the root instruction into findNextSource. This means that the first thing the new ValueTracker constructed would do is getVRegDef to find the instruction we are currently processing. This adds an unnecessary step, placing a useless entry in the RewriteMap, and required skipping the no-op case where getNewSource would return the original source operand. This was a problem since in the case of a subregister extract, shouldRewriteCopySource would always say that it is useful to rewrite and the use-def chain walk would abort, returning the original operand. Move the process to start looking at the source operand to begin with. This does not fix the confused handling in the uncoalescable copy case which is proving to be more difficult. Some currently handled cases have multiple defs from a single source, and other handled cases have 0 input operands. It would be simpler if this was implemented with isCopyLikeInstr, rather than guessing at the operand structure as it does now. There are some improvements and some regressions. The regressions appear to be downstream issues for the most part. One of the uglier regressions is in PPC, where a sequence of insert_subrgs is used to build registers. I opened #125502 to use reg_sequence instead, which may help. The worst regression is an absurd SPARC testcase using a <251 x fp128>, which uses a very long chain of insert_subregs. We need improved subregister handling locally in PeepholeOptimizer, and other pasess like MachineCSE to fix some of the other regressions. We should handle subregister composes and folding more indexes into insert_subreg and reg_sequence.
719 lines
22 KiB
LLVM
719 lines
22 KiB
LLVM
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
|
|
; RUN: llc < %s -mtriple=aarch64-- | FileCheck %s --check-prefixes=CHECK,CHECK-SD
|
|
; RUN: llc < %s -mtriple=aarch64-- -global-isel | FileCheck %s --check-prefixes=CHECK,CHECK-GI
|
|
|
|
declare i8 @llvm.fshl.i8(i8, i8, i8)
|
|
declare i16 @llvm.fshl.i16(i16, i16, i16)
|
|
declare i32 @llvm.fshl.i32(i32, i32, i32)
|
|
declare i64 @llvm.fshl.i64(i64, i64, i64)
|
|
declare i128 @llvm.fshl.i128(i128, i128, i128)
|
|
declare <4 x i32> @llvm.fshl.v4i32(<4 x i32>, <4 x i32>, <4 x i32>)
|
|
|
|
declare i8 @llvm.fshr.i8(i8, i8, i8)
|
|
declare i16 @llvm.fshr.i16(i16, i16, i16)
|
|
declare i32 @llvm.fshr.i32(i32, i32, i32)
|
|
declare i64 @llvm.fshr.i64(i64, i64, i64)
|
|
declare <4 x i32> @llvm.fshr.v4i32(<4 x i32>, <4 x i32>, <4 x i32>)
|
|
|
|
; General case - all operands can be variables.
|
|
|
|
define i32 @fshl_i32(i32 %x, i32 %y, i32 %z) {
|
|
; CHECK-SD-LABEL: fshl_i32:
|
|
; CHECK-SD: // %bb.0:
|
|
; CHECK-SD-NEXT: lsr w8, w1, #1
|
|
; CHECK-SD-NEXT: mvn w9, w2
|
|
; CHECK-SD-NEXT: lsl w10, w0, w2
|
|
; CHECK-SD-NEXT: lsr w8, w8, w9
|
|
; CHECK-SD-NEXT: orr w0, w10, w8
|
|
; CHECK-SD-NEXT: ret
|
|
;
|
|
; CHECK-GI-LABEL: fshl_i32:
|
|
; CHECK-GI: // %bb.0:
|
|
; CHECK-GI-NEXT: mov w8, #31 // =0x1f
|
|
; CHECK-GI-NEXT: lsr w9, w1, #1
|
|
; CHECK-GI-NEXT: and w10, w2, #0x1f
|
|
; CHECK-GI-NEXT: bic w8, w8, w2
|
|
; CHECK-GI-NEXT: lsl w10, w0, w10
|
|
; CHECK-GI-NEXT: lsr w8, w9, w8
|
|
; CHECK-GI-NEXT: orr w0, w10, w8
|
|
; CHECK-GI-NEXT: ret
|
|
%f = call i32 @llvm.fshl.i32(i32 %x, i32 %y, i32 %z)
|
|
ret i32 %f
|
|
}
|
|
|
|
define i64 @fshl_i64(i64 %x, i64 %y, i64 %z) {
|
|
; CHECK-SD-LABEL: fshl_i64:
|
|
; CHECK-SD: // %bb.0:
|
|
; CHECK-SD-NEXT: lsr x8, x1, #1
|
|
; CHECK-SD-NEXT: mvn w9, w2
|
|
; CHECK-SD-NEXT: lsl x10, x0, x2
|
|
; CHECK-SD-NEXT: lsr x8, x8, x9
|
|
; CHECK-SD-NEXT: orr x0, x10, x8
|
|
; CHECK-SD-NEXT: ret
|
|
;
|
|
; CHECK-GI-LABEL: fshl_i64:
|
|
; CHECK-GI: // %bb.0:
|
|
; CHECK-GI-NEXT: mov w8, #63 // =0x3f
|
|
; CHECK-GI-NEXT: lsr x9, x1, #1
|
|
; CHECK-GI-NEXT: and x10, x2, #0x3f
|
|
; CHECK-GI-NEXT: bic x8, x8, x2
|
|
; CHECK-GI-NEXT: lsl x10, x0, x10
|
|
; CHECK-GI-NEXT: lsr x8, x9, x8
|
|
; CHECK-GI-NEXT: orr x0, x10, x8
|
|
; CHECK-GI-NEXT: ret
|
|
%f = call i64 @llvm.fshl.i64(i64 %x, i64 %y, i64 %z)
|
|
ret i64 %f
|
|
}
|
|
|
|
define i128 @fshl_i128(i128 %x, i128 %y, i128 %z) nounwind {
|
|
; CHECK-SD-LABEL: fshl_i128:
|
|
; CHECK-SD: // %bb.0:
|
|
; CHECK-SD-NEXT: tst x4, #0x40
|
|
; CHECK-SD-NEXT: mvn w11, w4
|
|
; CHECK-SD-NEXT: csel x8, x3, x0, ne
|
|
; CHECK-SD-NEXT: csel x9, x2, x3, ne
|
|
; CHECK-SD-NEXT: csel x12, x0, x1, ne
|
|
; CHECK-SD-NEXT: lsr x9, x9, #1
|
|
; CHECK-SD-NEXT: lsr x10, x8, #1
|
|
; CHECK-SD-NEXT: lsl x8, x8, x4
|
|
; CHECK-SD-NEXT: lsl x12, x12, x4
|
|
; CHECK-SD-NEXT: lsr x9, x9, x11
|
|
; CHECK-SD-NEXT: lsr x10, x10, x11
|
|
; CHECK-SD-NEXT: orr x0, x8, x9
|
|
; CHECK-SD-NEXT: orr x1, x12, x10
|
|
; CHECK-SD-NEXT: ret
|
|
;
|
|
; CHECK-GI-LABEL: fshl_i128:
|
|
; CHECK-GI: // %bb.0:
|
|
; CHECK-GI-NEXT: and x9, x4, #0x7f
|
|
; CHECK-GI-NEXT: mov w10, #64 // =0x40
|
|
; CHECK-GI-NEXT: lsl x14, x3, #63
|
|
; CHECK-GI-NEXT: sub x12, x10, x9
|
|
; CHECK-GI-NEXT: lsl x13, x1, x9
|
|
; CHECK-GI-NEXT: mov w8, #127 // =0x7f
|
|
; CHECK-GI-NEXT: lsr x12, x0, x12
|
|
; CHECK-GI-NEXT: bic x8, x8, x4
|
|
; CHECK-GI-NEXT: sub x15, x9, #64
|
|
; CHECK-GI-NEXT: cmp x9, #64
|
|
; CHECK-GI-NEXT: lsl x9, x0, x9
|
|
; CHECK-GI-NEXT: lsl x15, x0, x15
|
|
; CHECK-GI-NEXT: orr x12, x12, x13
|
|
; CHECK-GI-NEXT: orr x13, x14, x2, lsr #1
|
|
; CHECK-GI-NEXT: lsr x14, x3, #1
|
|
; CHECK-GI-NEXT: sub x10, x10, x8
|
|
; CHECK-GI-NEXT: sub x16, x8, #64
|
|
; CHECK-GI-NEXT: csel x9, x9, xzr, lo
|
|
; CHECK-GI-NEXT: lsr x17, x13, x8
|
|
; CHECK-GI-NEXT: lsl x10, x14, x10
|
|
; CHECK-GI-NEXT: csel x12, x12, x15, lo
|
|
; CHECK-GI-NEXT: tst x4, #0x7f
|
|
; CHECK-GI-NEXT: lsr x15, x14, x16
|
|
; CHECK-GI-NEXT: mvn x11, x4
|
|
; CHECK-GI-NEXT: csel x12, x1, x12, eq
|
|
; CHECK-GI-NEXT: orr x10, x17, x10
|
|
; CHECK-GI-NEXT: cmp x8, #64
|
|
; CHECK-GI-NEXT: lsr x14, x14, x8
|
|
; CHECK-GI-NEXT: csel x10, x10, x15, lo
|
|
; CHECK-GI-NEXT: tst x11, #0x7f
|
|
; CHECK-GI-NEXT: csel x10, x13, x10, eq
|
|
; CHECK-GI-NEXT: cmp x8, #64
|
|
; CHECK-GI-NEXT: csel x8, x14, xzr, lo
|
|
; CHECK-GI-NEXT: orr x0, x9, x10
|
|
; CHECK-GI-NEXT: orr x1, x12, x8
|
|
; CHECK-GI-NEXT: ret
|
|
%f = call i128 @llvm.fshl.i128(i128 %x, i128 %y, i128 %z)
|
|
ret i128 %f
|
|
}
|
|
|
|
; Verify that weird types are minimally supported.
|
|
declare i37 @llvm.fshl.i37(i37, i37, i37)
|
|
define i37 @fshl_i37(i37 %x, i37 %y, i37 %z) {
|
|
; CHECK-SD-LABEL: fshl_i37:
|
|
; CHECK-SD: // %bb.0:
|
|
; CHECK-SD-NEXT: mov x9, #46053 // =0xb3e5
|
|
; CHECK-SD-NEXT: and x8, x2, #0x1fffffffff
|
|
; CHECK-SD-NEXT: movk x9, #12398, lsl #16
|
|
; CHECK-SD-NEXT: movk x9, #15941, lsl #32
|
|
; CHECK-SD-NEXT: movk x9, #1771, lsl #48
|
|
; CHECK-SD-NEXT: umulh x8, x8, x9
|
|
; CHECK-SD-NEXT: mov w9, #37 // =0x25
|
|
; CHECK-SD-NEXT: msub w8, w8, w9, w2
|
|
; CHECK-SD-NEXT: ubfiz x9, x1, #26, #37
|
|
; CHECK-SD-NEXT: mvn w10, w8
|
|
; CHECK-SD-NEXT: lsl x8, x0, x8
|
|
; CHECK-SD-NEXT: lsr x9, x9, x10
|
|
; CHECK-SD-NEXT: orr x0, x8, x9
|
|
; CHECK-SD-NEXT: ret
|
|
;
|
|
; CHECK-GI-LABEL: fshl_i37:
|
|
; CHECK-GI: // %bb.0:
|
|
; CHECK-GI-NEXT: mov w8, #37 // =0x25
|
|
; CHECK-GI-NEXT: and x9, x2, #0x1fffffffff
|
|
; CHECK-GI-NEXT: udiv x10, x9, x8
|
|
; CHECK-GI-NEXT: msub x8, x10, x8, x9
|
|
; CHECK-GI-NEXT: mov w9, #36 // =0x24
|
|
; CHECK-GI-NEXT: ubfx x10, x1, #1, #36
|
|
; CHECK-GI-NEXT: sub x9, x9, x8
|
|
; CHECK-GI-NEXT: and x8, x8, #0x1fffffffff
|
|
; CHECK-GI-NEXT: and x9, x9, #0x1fffffffff
|
|
; CHECK-GI-NEXT: lsl x8, x0, x8
|
|
; CHECK-GI-NEXT: lsr x9, x10, x9
|
|
; CHECK-GI-NEXT: orr x0, x8, x9
|
|
; CHECK-GI-NEXT: ret
|
|
%f = call i37 @llvm.fshl.i37(i37 %x, i37 %y, i37 %z)
|
|
ret i37 %f
|
|
}
|
|
|
|
; extract(concat(0b1110000, 0b1111111) << 2) = 0b1000011
|
|
|
|
declare i7 @llvm.fshl.i7(i7, i7, i7)
|
|
define i7 @fshl_i7_const_fold() {
|
|
; CHECK-LABEL: fshl_i7_const_fold:
|
|
; CHECK: // %bb.0:
|
|
; CHECK-NEXT: mov w0, #67 // =0x43
|
|
; CHECK-NEXT: ret
|
|
%f = call i7 @llvm.fshl.i7(i7 112, i7 127, i7 2)
|
|
ret i7 %f
|
|
}
|
|
|
|
define i8 @fshl_i8_const_fold_overshift_1() {
|
|
; CHECK-SD-LABEL: fshl_i8_const_fold_overshift_1:
|
|
; CHECK-SD: // %bb.0:
|
|
; CHECK-SD-NEXT: mov w0, #128 // =0x80
|
|
; CHECK-SD-NEXT: ret
|
|
;
|
|
; CHECK-GI-LABEL: fshl_i8_const_fold_overshift_1:
|
|
; CHECK-GI: // %bb.0:
|
|
; CHECK-GI-NEXT: mov w0, #-128 // =0xffffff80
|
|
; CHECK-GI-NEXT: ret
|
|
%f = call i8 @llvm.fshl.i8(i8 255, i8 0, i8 15)
|
|
ret i8 %f
|
|
}
|
|
|
|
define i8 @fshl_i8_const_fold_overshift_2() {
|
|
; CHECK-LABEL: fshl_i8_const_fold_overshift_2:
|
|
; CHECK: // %bb.0:
|
|
; CHECK-NEXT: mov w0, #120 // =0x78
|
|
; CHECK-NEXT: ret
|
|
%f = call i8 @llvm.fshl.i8(i8 15, i8 15, i8 11)
|
|
ret i8 %f
|
|
}
|
|
|
|
define i8 @fshl_i8_const_fold_overshift_3() {
|
|
; CHECK-LABEL: fshl_i8_const_fold_overshift_3:
|
|
; CHECK: // %bb.0:
|
|
; CHECK-NEXT: mov w0, wzr
|
|
; CHECK-NEXT: ret
|
|
%f = call i8 @llvm.fshl.i8(i8 0, i8 225, i8 8)
|
|
ret i8 %f
|
|
}
|
|
|
|
; With constant shift amount, this is 'extr'.
|
|
|
|
define i32 @fshl_i32_const_shift(i32 %x, i32 %y) {
|
|
; CHECK-LABEL: fshl_i32_const_shift:
|
|
; CHECK: // %bb.0:
|
|
; CHECK-NEXT: extr w0, w0, w1, #23
|
|
; CHECK-NEXT: ret
|
|
%f = call i32 @llvm.fshl.i32(i32 %x, i32 %y, i32 9)
|
|
ret i32 %f
|
|
}
|
|
|
|
; Check modulo math on shift amount.
|
|
|
|
define i32 @fshl_i32_const_overshift(i32 %x, i32 %y) {
|
|
; CHECK-LABEL: fshl_i32_const_overshift:
|
|
; CHECK: // %bb.0:
|
|
; CHECK-NEXT: extr w0, w0, w1, #23
|
|
; CHECK-NEXT: ret
|
|
%f = call i32 @llvm.fshl.i32(i32 %x, i32 %y, i32 41)
|
|
ret i32 %f
|
|
}
|
|
|
|
; 64-bit should also work.
|
|
|
|
define i64 @fshl_i64_const_overshift(i64 %x, i64 %y) {
|
|
; CHECK-LABEL: fshl_i64_const_overshift:
|
|
; CHECK: // %bb.0:
|
|
; CHECK-NEXT: extr x0, x0, x1, #23
|
|
; CHECK-NEXT: ret
|
|
%f = call i64 @llvm.fshl.i64(i64 %x, i64 %y, i64 105)
|
|
ret i64 %f
|
|
}
|
|
|
|
; This should work without any node-specific logic.
|
|
|
|
define i8 @fshl_i8_const_fold() {
|
|
; CHECK-SD-LABEL: fshl_i8_const_fold:
|
|
; CHECK-SD: // %bb.0:
|
|
; CHECK-SD-NEXT: mov w0, #128 // =0x80
|
|
; CHECK-SD-NEXT: ret
|
|
;
|
|
; CHECK-GI-LABEL: fshl_i8_const_fold:
|
|
; CHECK-GI: // %bb.0:
|
|
; CHECK-GI-NEXT: mov w0, #-128 // =0xffffff80
|
|
; CHECK-GI-NEXT: ret
|
|
%f = call i8 @llvm.fshl.i8(i8 255, i8 0, i8 7)
|
|
ret i8 %f
|
|
}
|
|
|
|
; Repeat everything for funnel shift right.
|
|
|
|
; General case - all operands can be variables.
|
|
|
|
define i32 @fshr_i32(i32 %x, i32 %y, i32 %z) {
|
|
; CHECK-SD-LABEL: fshr_i32:
|
|
; CHECK-SD: // %bb.0:
|
|
; CHECK-SD-NEXT: lsl w8, w0, #1
|
|
; CHECK-SD-NEXT: mvn w9, w2
|
|
; CHECK-SD-NEXT: lsr w10, w1, w2
|
|
; CHECK-SD-NEXT: lsl w8, w8, w9
|
|
; CHECK-SD-NEXT: orr w0, w8, w10
|
|
; CHECK-SD-NEXT: ret
|
|
;
|
|
; CHECK-GI-LABEL: fshr_i32:
|
|
; CHECK-GI: // %bb.0:
|
|
; CHECK-GI-NEXT: mov w8, #31 // =0x1f
|
|
; CHECK-GI-NEXT: lsl w9, w0, #1
|
|
; CHECK-GI-NEXT: and w10, w2, #0x1f
|
|
; CHECK-GI-NEXT: bic w8, w8, w2
|
|
; CHECK-GI-NEXT: lsl w8, w9, w8
|
|
; CHECK-GI-NEXT: lsr w9, w1, w10
|
|
; CHECK-GI-NEXT: orr w0, w8, w9
|
|
; CHECK-GI-NEXT: ret
|
|
%f = call i32 @llvm.fshr.i32(i32 %x, i32 %y, i32 %z)
|
|
ret i32 %f
|
|
}
|
|
|
|
define i64 @fshr_i64(i64 %x, i64 %y, i64 %z) {
|
|
; CHECK-SD-LABEL: fshr_i64:
|
|
; CHECK-SD: // %bb.0:
|
|
; CHECK-SD-NEXT: lsl x8, x0, #1
|
|
; CHECK-SD-NEXT: mvn w9, w2
|
|
; CHECK-SD-NEXT: lsr x10, x1, x2
|
|
; CHECK-SD-NEXT: lsl x8, x8, x9
|
|
; CHECK-SD-NEXT: orr x0, x8, x10
|
|
; CHECK-SD-NEXT: ret
|
|
;
|
|
; CHECK-GI-LABEL: fshr_i64:
|
|
; CHECK-GI: // %bb.0:
|
|
; CHECK-GI-NEXT: mov w8, #63 // =0x3f
|
|
; CHECK-GI-NEXT: lsl x9, x0, #1
|
|
; CHECK-GI-NEXT: and x10, x2, #0x3f
|
|
; CHECK-GI-NEXT: bic x8, x8, x2
|
|
; CHECK-GI-NEXT: lsl x8, x9, x8
|
|
; CHECK-GI-NEXT: lsr x9, x1, x10
|
|
; CHECK-GI-NEXT: orr x0, x8, x9
|
|
; CHECK-GI-NEXT: ret
|
|
%f = call i64 @llvm.fshr.i64(i64 %x, i64 %y, i64 %z)
|
|
ret i64 %f
|
|
}
|
|
|
|
; Verify that weird types are minimally supported.
|
|
declare i37 @llvm.fshr.i37(i37, i37, i37)
|
|
define i37 @fshr_i37(i37 %x, i37 %y, i37 %z) {
|
|
; CHECK-SD-LABEL: fshr_i37:
|
|
; CHECK-SD: // %bb.0:
|
|
; CHECK-SD-NEXT: mov x9, #46053 // =0xb3e5
|
|
; CHECK-SD-NEXT: and x8, x2, #0x1fffffffff
|
|
; CHECK-SD-NEXT: lsl x10, x0, #1
|
|
; CHECK-SD-NEXT: movk x9, #12398, lsl #16
|
|
; CHECK-SD-NEXT: movk x9, #15941, lsl #32
|
|
; CHECK-SD-NEXT: movk x9, #1771, lsl #48
|
|
; CHECK-SD-NEXT: umulh x8, x8, x9
|
|
; CHECK-SD-NEXT: mov w9, #37 // =0x25
|
|
; CHECK-SD-NEXT: msub w8, w8, w9, w2
|
|
; CHECK-SD-NEXT: lsl x9, x1, #27
|
|
; CHECK-SD-NEXT: add w8, w8, #27
|
|
; CHECK-SD-NEXT: mvn w11, w8
|
|
; CHECK-SD-NEXT: lsr x8, x9, x8
|
|
; CHECK-SD-NEXT: lsl x9, x10, x11
|
|
; CHECK-SD-NEXT: orr x0, x9, x8
|
|
; CHECK-SD-NEXT: ret
|
|
;
|
|
; CHECK-GI-LABEL: fshr_i37:
|
|
; CHECK-GI: // %bb.0:
|
|
; CHECK-GI-NEXT: mov w8, #37 // =0x25
|
|
; CHECK-GI-NEXT: and x9, x2, #0x1fffffffff
|
|
; CHECK-GI-NEXT: and x11, x1, #0x1fffffffff
|
|
; CHECK-GI-NEXT: udiv x10, x9, x8
|
|
; CHECK-GI-NEXT: msub x8, x10, x8, x9
|
|
; CHECK-GI-NEXT: mov w9, #36 // =0x24
|
|
; CHECK-GI-NEXT: lsl x10, x0, #1
|
|
; CHECK-GI-NEXT: sub x9, x9, x8
|
|
; CHECK-GI-NEXT: and x8, x8, #0x1fffffffff
|
|
; CHECK-GI-NEXT: and x9, x9, #0x1fffffffff
|
|
; CHECK-GI-NEXT: lsr x8, x11, x8
|
|
; CHECK-GI-NEXT: lsl x9, x10, x9
|
|
; CHECK-GI-NEXT: orr x0, x9, x8
|
|
; CHECK-GI-NEXT: ret
|
|
%f = call i37 @llvm.fshr.i37(i37 %x, i37 %y, i37 %z)
|
|
ret i37 %f
|
|
}
|
|
|
|
; extract(concat(0b1110000, 0b1111111) >> 2) = 0b0011111
|
|
|
|
declare i7 @llvm.fshr.i7(i7, i7, i7)
|
|
define i7 @fshr_i7_const_fold() {
|
|
; CHECK-LABEL: fshr_i7_const_fold:
|
|
; CHECK: // %bb.0:
|
|
; CHECK-NEXT: mov w0, #31 // =0x1f
|
|
; CHECK-NEXT: ret
|
|
%f = call i7 @llvm.fshr.i7(i7 112, i7 127, i7 2)
|
|
ret i7 %f
|
|
}
|
|
|
|
define i8 @fshr_i8_const_fold_overshift_1() {
|
|
; CHECK-SD-LABEL: fshr_i8_const_fold_overshift_1:
|
|
; CHECK-SD: // %bb.0:
|
|
; CHECK-SD-NEXT: mov w0, #254 // =0xfe
|
|
; CHECK-SD-NEXT: ret
|
|
;
|
|
; CHECK-GI-LABEL: fshr_i8_const_fold_overshift_1:
|
|
; CHECK-GI: // %bb.0:
|
|
; CHECK-GI-NEXT: mov w0, #-2 // =0xfffffffe
|
|
; CHECK-GI-NEXT: ret
|
|
%f = call i8 @llvm.fshr.i8(i8 255, i8 0, i8 15)
|
|
ret i8 %f
|
|
}
|
|
|
|
define i8 @fshr_i8_const_fold_overshift_2() {
|
|
; CHECK-SD-LABEL: fshr_i8_const_fold_overshift_2:
|
|
; CHECK-SD: // %bb.0:
|
|
; CHECK-SD-NEXT: mov w0, #225 // =0xe1
|
|
; CHECK-SD-NEXT: ret
|
|
;
|
|
; CHECK-GI-LABEL: fshr_i8_const_fold_overshift_2:
|
|
; CHECK-GI: // %bb.0:
|
|
; CHECK-GI-NEXT: mov w0, #481 // =0x1e1
|
|
; CHECK-GI-NEXT: ret
|
|
%f = call i8 @llvm.fshr.i8(i8 15, i8 15, i8 11)
|
|
ret i8 %f
|
|
}
|
|
|
|
define i8 @fshr_i8_const_fold_overshift_3() {
|
|
; CHECK-LABEL: fshr_i8_const_fold_overshift_3:
|
|
; CHECK: // %bb.0:
|
|
; CHECK-NEXT: mov w0, #255 // =0xff
|
|
; CHECK-NEXT: ret
|
|
%f = call i8 @llvm.fshr.i8(i8 0, i8 255, i8 8)
|
|
ret i8 %f
|
|
}
|
|
|
|
; With constant shift amount, this is 'extr'.
|
|
|
|
define i32 @fshr_i32_const_shift(i32 %x, i32 %y) {
|
|
; CHECK-LABEL: fshr_i32_const_shift:
|
|
; CHECK: // %bb.0:
|
|
; CHECK-NEXT: extr w0, w0, w1, #9
|
|
; CHECK-NEXT: ret
|
|
%f = call i32 @llvm.fshr.i32(i32 %x, i32 %y, i32 9)
|
|
ret i32 %f
|
|
}
|
|
|
|
; Check modulo math on shift amount. 41-32=9.
|
|
|
|
define i32 @fshr_i32_const_overshift(i32 %x, i32 %y) {
|
|
; CHECK-LABEL: fshr_i32_const_overshift:
|
|
; CHECK: // %bb.0:
|
|
; CHECK-NEXT: extr w0, w0, w1, #9
|
|
; CHECK-NEXT: ret
|
|
%f = call i32 @llvm.fshr.i32(i32 %x, i32 %y, i32 41)
|
|
ret i32 %f
|
|
}
|
|
|
|
; 64-bit should also work. 105-64 = 41.
|
|
|
|
define i64 @fshr_i64_const_overshift(i64 %x, i64 %y) {
|
|
; CHECK-LABEL: fshr_i64_const_overshift:
|
|
; CHECK: // %bb.0:
|
|
; CHECK-NEXT: extr x0, x0, x1, #41
|
|
; CHECK-NEXT: ret
|
|
%f = call i64 @llvm.fshr.i64(i64 %x, i64 %y, i64 105)
|
|
ret i64 %f
|
|
}
|
|
|
|
; This should work without any node-specific logic.
|
|
|
|
define i8 @fshr_i8_const_fold() {
|
|
; CHECK-SD-LABEL: fshr_i8_const_fold:
|
|
; CHECK-SD: // %bb.0:
|
|
; CHECK-SD-NEXT: mov w0, #254 // =0xfe
|
|
; CHECK-SD-NEXT: ret
|
|
;
|
|
; CHECK-GI-LABEL: fshr_i8_const_fold:
|
|
; CHECK-GI: // %bb.0:
|
|
; CHECK-GI-NEXT: mov w0, #-2 // =0xfffffffe
|
|
; CHECK-GI-NEXT: ret
|
|
%f = call i8 @llvm.fshr.i8(i8 255, i8 0, i8 7)
|
|
ret i8 %f
|
|
}
|
|
|
|
define i32 @fshl_i32_shift_by_bitwidth(i32 %x, i32 %y) {
|
|
; CHECK-LABEL: fshl_i32_shift_by_bitwidth:
|
|
; CHECK: // %bb.0:
|
|
; CHECK-NEXT: ret
|
|
%f = call i32 @llvm.fshl.i32(i32 %x, i32 %y, i32 32)
|
|
ret i32 %f
|
|
}
|
|
|
|
define i32 @fshr_i32_shift_by_bitwidth(i32 %x, i32 %y) {
|
|
; CHECK-LABEL: fshr_i32_shift_by_bitwidth:
|
|
; CHECK: // %bb.0:
|
|
; CHECK-NEXT: mov w0, w1
|
|
; CHECK-NEXT: ret
|
|
%f = call i32 @llvm.fshr.i32(i32 %x, i32 %y, i32 32)
|
|
ret i32 %f
|
|
}
|
|
|
|
define <4 x i32> @fshl_v4i32_shift_by_bitwidth(<4 x i32> %x, <4 x i32> %y) {
|
|
; CHECK-LABEL: fshl_v4i32_shift_by_bitwidth:
|
|
; CHECK: // %bb.0:
|
|
; CHECK-NEXT: ret
|
|
%f = call <4 x i32> @llvm.fshl.v4i32(<4 x i32> %x, <4 x i32> %y, <4 x i32> <i32 32, i32 32, i32 32, i32 32>)
|
|
ret <4 x i32> %f
|
|
}
|
|
|
|
define <4 x i32> @fshr_v4i32_shift_by_bitwidth(<4 x i32> %x, <4 x i32> %y) {
|
|
; CHECK-LABEL: fshr_v4i32_shift_by_bitwidth:
|
|
; CHECK: // %bb.0:
|
|
; CHECK-NEXT: mov v0.16b, v1.16b
|
|
; CHECK-NEXT: ret
|
|
%f = call <4 x i32> @llvm.fshr.v4i32(<4 x i32> %x, <4 x i32> %y, <4 x i32> <i32 32, i32 32, i32 32, i32 32>)
|
|
ret <4 x i32> %f
|
|
}
|
|
|
|
define i32 @or_shl_fshl(i32 %x, i32 %y, i32 %s) {
|
|
; CHECK-SD-LABEL: or_shl_fshl:
|
|
; CHECK-SD: // %bb.0:
|
|
; CHECK-SD-NEXT: mov w8, w2
|
|
; CHECK-SD-NEXT: lsr w9, w1, #1
|
|
; CHECK-SD-NEXT: lsl w10, w1, w2
|
|
; CHECK-SD-NEXT: mvn w11, w2
|
|
; CHECK-SD-NEXT: lsl w8, w0, w8
|
|
; CHECK-SD-NEXT: lsr w9, w9, w11
|
|
; CHECK-SD-NEXT: orr w8, w8, w10
|
|
; CHECK-SD-NEXT: orr w0, w8, w9
|
|
; CHECK-SD-NEXT: ret
|
|
;
|
|
; CHECK-GI-LABEL: or_shl_fshl:
|
|
; CHECK-GI: // %bb.0:
|
|
; CHECK-GI-NEXT: mov w8, #31 // =0x1f
|
|
; CHECK-GI-NEXT: and w9, w2, #0x1f
|
|
; CHECK-GI-NEXT: lsr w10, w1, #1
|
|
; CHECK-GI-NEXT: lsl w11, w1, w2
|
|
; CHECK-GI-NEXT: bic w8, w8, w2
|
|
; CHECK-GI-NEXT: lsl w9, w0, w9
|
|
; CHECK-GI-NEXT: lsr w8, w10, w8
|
|
; CHECK-GI-NEXT: orr w9, w9, w11
|
|
; CHECK-GI-NEXT: orr w0, w9, w8
|
|
; CHECK-GI-NEXT: ret
|
|
%shy = shl i32 %y, %s
|
|
%fun = call i32 @llvm.fshl.i32(i32 %x, i32 %y, i32 %s)
|
|
%or = or i32 %fun, %shy
|
|
ret i32 %or
|
|
}
|
|
|
|
define i32 @or_shl_rotl(i32 %x, i32 %y, i32 %s) {
|
|
; CHECK-LABEL: or_shl_rotl:
|
|
; CHECK: // %bb.0:
|
|
; CHECK-NEXT: neg w8, w2
|
|
; CHECK-NEXT: lsl w9, w0, w2
|
|
; CHECK-NEXT: ror w8, w1, w8
|
|
; CHECK-NEXT: orr w0, w8, w9
|
|
; CHECK-NEXT: ret
|
|
%shx = shl i32 %x, %s
|
|
%rot = call i32 @llvm.fshl.i32(i32 %y, i32 %y, i32 %s)
|
|
%or = or i32 %rot, %shx
|
|
ret i32 %or
|
|
}
|
|
|
|
define i32 @or_shl_fshl_commute(i32 %x, i32 %y, i32 %s) {
|
|
; CHECK-SD-LABEL: or_shl_fshl_commute:
|
|
; CHECK-SD: // %bb.0:
|
|
; CHECK-SD-NEXT: mov w8, w2
|
|
; CHECK-SD-NEXT: lsr w9, w1, #1
|
|
; CHECK-SD-NEXT: lsl w10, w1, w2
|
|
; CHECK-SD-NEXT: mvn w11, w2
|
|
; CHECK-SD-NEXT: lsl w8, w0, w8
|
|
; CHECK-SD-NEXT: lsr w9, w9, w11
|
|
; CHECK-SD-NEXT: orr w8, w10, w8
|
|
; CHECK-SD-NEXT: orr w0, w8, w9
|
|
; CHECK-SD-NEXT: ret
|
|
;
|
|
; CHECK-GI-LABEL: or_shl_fshl_commute:
|
|
; CHECK-GI: // %bb.0:
|
|
; CHECK-GI-NEXT: mov w8, #31 // =0x1f
|
|
; CHECK-GI-NEXT: and w9, w2, #0x1f
|
|
; CHECK-GI-NEXT: lsr w10, w1, #1
|
|
; CHECK-GI-NEXT: lsl w11, w1, w2
|
|
; CHECK-GI-NEXT: bic w8, w8, w2
|
|
; CHECK-GI-NEXT: lsl w9, w0, w9
|
|
; CHECK-GI-NEXT: lsr w8, w10, w8
|
|
; CHECK-GI-NEXT: orr w9, w11, w9
|
|
; CHECK-GI-NEXT: orr w0, w9, w8
|
|
; CHECK-GI-NEXT: ret
|
|
%shy = shl i32 %y, %s
|
|
%fun = call i32 @llvm.fshl.i32(i32 %x, i32 %y, i32 %s)
|
|
%or = or i32 %shy, %fun
|
|
ret i32 %or
|
|
}
|
|
|
|
define i32 @or_shl_rotl_commute(i32 %x, i32 %y, i32 %s) {
|
|
; CHECK-LABEL: or_shl_rotl_commute:
|
|
; CHECK: // %bb.0:
|
|
; CHECK-NEXT: neg w8, w2
|
|
; CHECK-NEXT: lsl w9, w0, w2
|
|
; CHECK-NEXT: ror w8, w1, w8
|
|
; CHECK-NEXT: orr w0, w9, w8
|
|
; CHECK-NEXT: ret
|
|
%shx = shl i32 %x, %s
|
|
%rot = call i32 @llvm.fshl.i32(i32 %y, i32 %y, i32 %s)
|
|
%or = or i32 %shx, %rot
|
|
ret i32 %or
|
|
}
|
|
|
|
define i32 @or_lshr_fshr(i32 %x, i32 %y, i32 %s) {
|
|
; CHECK-SD-LABEL: or_lshr_fshr:
|
|
; CHECK-SD: // %bb.0:
|
|
; CHECK-SD-NEXT: mov w8, w2
|
|
; CHECK-SD-NEXT: lsl w9, w1, #1
|
|
; CHECK-SD-NEXT: lsr w10, w1, w2
|
|
; CHECK-SD-NEXT: lsr w8, w0, w8
|
|
; CHECK-SD-NEXT: mvn w11, w2
|
|
; CHECK-SD-NEXT: lsl w9, w9, w11
|
|
; CHECK-SD-NEXT: orr w8, w8, w10
|
|
; CHECK-SD-NEXT: orr w0, w9, w8
|
|
; CHECK-SD-NEXT: ret
|
|
;
|
|
; CHECK-GI-LABEL: or_lshr_fshr:
|
|
; CHECK-GI: // %bb.0:
|
|
; CHECK-GI-NEXT: mov w8, #31 // =0x1f
|
|
; CHECK-GI-NEXT: and w9, w2, #0x1f
|
|
; CHECK-GI-NEXT: lsl w10, w1, #1
|
|
; CHECK-GI-NEXT: lsr w11, w1, w2
|
|
; CHECK-GI-NEXT: bic w8, w8, w2
|
|
; CHECK-GI-NEXT: lsr w9, w0, w9
|
|
; CHECK-GI-NEXT: lsl w8, w10, w8
|
|
; CHECK-GI-NEXT: orr w9, w9, w11
|
|
; CHECK-GI-NEXT: orr w0, w8, w9
|
|
; CHECK-GI-NEXT: ret
|
|
%shy = lshr i32 %y, %s
|
|
%fun = call i32 @llvm.fshr.i32(i32 %y, i32 %x, i32 %s)
|
|
%or = or i32 %fun, %shy
|
|
ret i32 %or
|
|
}
|
|
|
|
define i32 @or_lshr_rotr(i32 %x, i32 %y, i32 %s) {
|
|
; CHECK-LABEL: or_lshr_rotr:
|
|
; CHECK: // %bb.0:
|
|
; CHECK-NEXT: lsr w8, w0, w2
|
|
; CHECK-NEXT: ror w9, w1, w2
|
|
; CHECK-NEXT: orr w0, w9, w8
|
|
; CHECK-NEXT: ret
|
|
%shx = lshr i32 %x, %s
|
|
%rot = call i32 @llvm.fshr.i32(i32 %y, i32 %y, i32 %s)
|
|
%or = or i32 %rot, %shx
|
|
ret i32 %or
|
|
}
|
|
|
|
define i32 @or_lshr_fshr_commute(i32 %x, i32 %y, i32 %s) {
|
|
; CHECK-SD-LABEL: or_lshr_fshr_commute:
|
|
; CHECK-SD: // %bb.0:
|
|
; CHECK-SD-NEXT: mov w8, w2
|
|
; CHECK-SD-NEXT: lsl w9, w1, #1
|
|
; CHECK-SD-NEXT: lsr w10, w1, w2
|
|
; CHECK-SD-NEXT: lsr w8, w0, w8
|
|
; CHECK-SD-NEXT: mvn w11, w2
|
|
; CHECK-SD-NEXT: lsl w9, w9, w11
|
|
; CHECK-SD-NEXT: orr w8, w10, w8
|
|
; CHECK-SD-NEXT: orr w0, w8, w9
|
|
; CHECK-SD-NEXT: ret
|
|
;
|
|
; CHECK-GI-LABEL: or_lshr_fshr_commute:
|
|
; CHECK-GI: // %bb.0:
|
|
; CHECK-GI-NEXT: mov w8, #31 // =0x1f
|
|
; CHECK-GI-NEXT: and w9, w2, #0x1f
|
|
; CHECK-GI-NEXT: lsl w10, w1, #1
|
|
; CHECK-GI-NEXT: lsr w11, w1, w2
|
|
; CHECK-GI-NEXT: bic w8, w8, w2
|
|
; CHECK-GI-NEXT: lsr w9, w0, w9
|
|
; CHECK-GI-NEXT: lsl w8, w10, w8
|
|
; CHECK-GI-NEXT: orr w9, w11, w9
|
|
; CHECK-GI-NEXT: orr w0, w9, w8
|
|
; CHECK-GI-NEXT: ret
|
|
%shy = lshr i32 %y, %s
|
|
%fun = call i32 @llvm.fshr.i32(i32 %y, i32 %x, i32 %s)
|
|
%or = or i32 %shy, %fun
|
|
ret i32 %or
|
|
}
|
|
|
|
define i32 @or_lshr_rotr_commute(i32 %x, i32 %y, i32 %s) {
|
|
; CHECK-LABEL: or_lshr_rotr_commute:
|
|
; CHECK: // %bb.0:
|
|
; CHECK-NEXT: lsr w8, w0, w2
|
|
; CHECK-NEXT: ror w9, w1, w2
|
|
; CHECK-NEXT: orr w0, w8, w9
|
|
; CHECK-NEXT: ret
|
|
%shx = lshr i32 %x, %s
|
|
%rot = call i32 @llvm.fshr.i32(i32 %y, i32 %y, i32 %s)
|
|
%or = or i32 %shx, %rot
|
|
ret i32 %or
|
|
}
|
|
|
|
define i32 @or_shl_fshl_simplify(i32 %x, i32 %y, i32 %s) {
|
|
; CHECK-SD-LABEL: or_shl_fshl_simplify:
|
|
; CHECK-SD: // %bb.0:
|
|
; CHECK-SD-NEXT: lsr w8, w0, #1
|
|
; CHECK-SD-NEXT: mvn w9, w2
|
|
; CHECK-SD-NEXT: lsl w10, w1, w2
|
|
; CHECK-SD-NEXT: lsr w8, w8, w9
|
|
; CHECK-SD-NEXT: orr w0, w10, w8
|
|
; CHECK-SD-NEXT: ret
|
|
;
|
|
; CHECK-GI-LABEL: or_shl_fshl_simplify:
|
|
; CHECK-GI: // %bb.0:
|
|
; CHECK-GI-NEXT: mov w8, #31 // =0x1f
|
|
; CHECK-GI-NEXT: and w9, w2, #0x1f
|
|
; CHECK-GI-NEXT: lsr w10, w0, #1
|
|
; CHECK-GI-NEXT: lsl w11, w1, w2
|
|
; CHECK-GI-NEXT: bic w8, w8, w2
|
|
; CHECK-GI-NEXT: lsl w9, w1, w9
|
|
; CHECK-GI-NEXT: lsr w8, w10, w8
|
|
; CHECK-GI-NEXT: orr w9, w9, w11
|
|
; CHECK-GI-NEXT: orr w0, w9, w8
|
|
; CHECK-GI-NEXT: ret
|
|
%shy = shl i32 %y, %s
|
|
%fun = call i32 @llvm.fshl.i32(i32 %y, i32 %x, i32 %s)
|
|
%or = or i32 %fun, %shy
|
|
ret i32 %or
|
|
}
|
|
|
|
define i32 @or_lshr_fshr_simplify(i32 %x, i32 %y, i32 %s) {
|
|
; CHECK-SD-LABEL: or_lshr_fshr_simplify:
|
|
; CHECK-SD: // %bb.0:
|
|
; CHECK-SD-NEXT: lsl w8, w0, #1
|
|
; CHECK-SD-NEXT: mvn w9, w2
|
|
; CHECK-SD-NEXT: lsr w10, w1, w2
|
|
; CHECK-SD-NEXT: lsl w8, w8, w9
|
|
; CHECK-SD-NEXT: orr w0, w8, w10
|
|
; CHECK-SD-NEXT: ret
|
|
;
|
|
; CHECK-GI-LABEL: or_lshr_fshr_simplify:
|
|
; CHECK-GI: // %bb.0:
|
|
; CHECK-GI-NEXT: mov w8, #31 // =0x1f
|
|
; CHECK-GI-NEXT: and w9, w2, #0x1f
|
|
; CHECK-GI-NEXT: lsl w10, w0, #1
|
|
; CHECK-GI-NEXT: lsr w11, w1, w2
|
|
; CHECK-GI-NEXT: bic w8, w8, w2
|
|
; CHECK-GI-NEXT: lsr w9, w1, w9
|
|
; CHECK-GI-NEXT: lsl w8, w10, w8
|
|
; CHECK-GI-NEXT: orr w9, w11, w9
|
|
; CHECK-GI-NEXT: orr w0, w9, w8
|
|
; CHECK-GI-NEXT: ret
|
|
%shy = lshr i32 %y, %s
|
|
%fun = call i32 @llvm.fshr.i32(i32 %x, i32 %y, i32 %s)
|
|
%or = or i32 %shy, %fun
|
|
ret i32 %or
|
|
}
|