In the register allocator we define non-trivial rematerialization as the rematerlization of an instruction with virtual register uses. We have been able to perform non-trivial rematerialization for a while, but it has been prevented by default unless specifically overriden by the target in `TargetTransformInfo::isReMaterializableImpl`. The original reasoning for this given by the comment in the default implementation is because we might increase a live range of the virtual register, but we don't actually do this. LiveRangeEdit::allUsesAvailableAt makes sure that we only rematerialize instructions whose virtual registers are already live at the use sites. https://reviews.llvm.org/D106408 had originally tried to remove this restriction but it was reverted after some performance regressions were reported. We think it is likely that the regressions were caused by the fact that the old isTriviallyReMaterializable API sometimes returned true for non-trivial rematerializations. However https://github.com/llvm/llvm-project/pull/160377 recently split the API out into a separate non-trivial and trivial version and updated the call-sites accordingly, and https://github.com/llvm/llvm-project/pull/160709 and #159180 fixed heuristics which weren't accounting for the difference between non-trivial and trivial. With these fixes in place, this patch proposes to again allow non-trivial rematerialization by default which reduces a significant amount of spills and reloads across various targets. For llvm-test-suite built with -O3 -flto, we get the following geomean reduction in reloads: - arm64-apple-darwin: 11.6% - riscv64-linux-gnu: 8.1% - x86_64-linux-gnu: 6.5%
4592 lines
129 KiB
LLVM
4592 lines
129 KiB
LLVM
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
|
|
; RUN: llc -mtriple=thumbv7m-eabi %s -o - | FileCheck %s --check-prefix V7M
|
|
; RUN: llc -mtriple=armv7a-eabi %s -o - | FileCheck %s --check-prefix V7A
|
|
; RUN: llc -mtriple=thumbv7a-eabi %s -o - | FileCheck %s --check-prefix V7A-T
|
|
; RUN: llc -mtriple=armv6m-eabi %s -o - | FileCheck %s --check-prefix V6M
|
|
|
|
; Patterns:
|
|
; a) (x >> start) & (1 << nbits) - 1
|
|
; b) (x >> start) & ~(-1 << nbits)
|
|
; c) (x >> start) & (-1 >> (32 - y))
|
|
; d) (x >> start) << (32 - y) >> (32 - y)
|
|
; are equivalent.
|
|
|
|
; ---------------------------------------------------------------------------- ;
|
|
; Pattern a. 32-bit
|
|
; ---------------------------------------------------------------------------- ;
|
|
|
|
define i32 @bextr32_a0(i32 %val, i32 %numskipbits, i32 %numlowbits) nounwind {
|
|
; V7M-LABEL: bextr32_a0:
|
|
; V7M: @ %bb.0:
|
|
; V7M-NEXT: lsrs r0, r1
|
|
; V7M-NEXT: movs r1, #1
|
|
; V7M-NEXT: lsls r1, r2
|
|
; V7M-NEXT: subs r1, #1
|
|
; V7M-NEXT: ands r0, r1
|
|
; V7M-NEXT: bx lr
|
|
;
|
|
; V7A-LABEL: bextr32_a0:
|
|
; V7A: @ %bb.0:
|
|
; V7A-NEXT: mov r12, #1
|
|
; V7A-NEXT: mvn r3, #0
|
|
; V7A-NEXT: add r2, r3, r12, lsl r2
|
|
; V7A-NEXT: and r0, r2, r0, lsr r1
|
|
; V7A-NEXT: bx lr
|
|
;
|
|
; V7A-T-LABEL: bextr32_a0:
|
|
; V7A-T: @ %bb.0:
|
|
; V7A-T-NEXT: lsrs r0, r1
|
|
; V7A-T-NEXT: movs r1, #1
|
|
; V7A-T-NEXT: lsls r1, r2
|
|
; V7A-T-NEXT: subs r1, #1
|
|
; V7A-T-NEXT: ands r0, r1
|
|
; V7A-T-NEXT: bx lr
|
|
;
|
|
; V6M-LABEL: bextr32_a0:
|
|
; V6M: @ %bb.0:
|
|
; V6M-NEXT: lsrs r0, r1
|
|
; V6M-NEXT: movs r1, #1
|
|
; V6M-NEXT: lsls r1, r2
|
|
; V6M-NEXT: subs r1, r1, #1
|
|
; V6M-NEXT: ands r0, r1
|
|
; V6M-NEXT: bx lr
|
|
%shifted = lshr i32 %val, %numskipbits
|
|
%onebit = shl i32 1, %numlowbits
|
|
%mask = add nsw i32 %onebit, -1
|
|
%masked = and i32 %mask, %shifted
|
|
ret i32 %masked
|
|
}
|
|
|
|
define i32 @bextr32_a0_arithmetic(i32 %val, i32 %numskipbits, i32 %numlowbits) nounwind {
|
|
; V7M-LABEL: bextr32_a0_arithmetic:
|
|
; V7M: @ %bb.0:
|
|
; V7M-NEXT: asrs r0, r1
|
|
; V7M-NEXT: movs r1, #1
|
|
; V7M-NEXT: lsls r1, r2
|
|
; V7M-NEXT: subs r1, #1
|
|
; V7M-NEXT: ands r0, r1
|
|
; V7M-NEXT: bx lr
|
|
;
|
|
; V7A-LABEL: bextr32_a0_arithmetic:
|
|
; V7A: @ %bb.0:
|
|
; V7A-NEXT: mov r12, #1
|
|
; V7A-NEXT: mvn r3, #0
|
|
; V7A-NEXT: add r2, r3, r12, lsl r2
|
|
; V7A-NEXT: and r0, r2, r0, asr r1
|
|
; V7A-NEXT: bx lr
|
|
;
|
|
; V7A-T-LABEL: bextr32_a0_arithmetic:
|
|
; V7A-T: @ %bb.0:
|
|
; V7A-T-NEXT: asrs r0, r1
|
|
; V7A-T-NEXT: movs r1, #1
|
|
; V7A-T-NEXT: lsls r1, r2
|
|
; V7A-T-NEXT: subs r1, #1
|
|
; V7A-T-NEXT: ands r0, r1
|
|
; V7A-T-NEXT: bx lr
|
|
;
|
|
; V6M-LABEL: bextr32_a0_arithmetic:
|
|
; V6M: @ %bb.0:
|
|
; V6M-NEXT: asrs r0, r1
|
|
; V6M-NEXT: movs r1, #1
|
|
; V6M-NEXT: lsls r1, r2
|
|
; V6M-NEXT: subs r1, r1, #1
|
|
; V6M-NEXT: ands r0, r1
|
|
; V6M-NEXT: bx lr
|
|
%shifted = ashr i32 %val, %numskipbits
|
|
%onebit = shl i32 1, %numlowbits
|
|
%mask = add nsw i32 %onebit, -1
|
|
%masked = and i32 %mask, %shifted
|
|
ret i32 %masked
|
|
}
|
|
|
|
define i32 @bextr32_a1_indexzext(i32 %val, i8 zeroext %numskipbits, i8 zeroext %numlowbits) nounwind {
|
|
; V7M-LABEL: bextr32_a1_indexzext:
|
|
; V7M: @ %bb.0:
|
|
; V7M-NEXT: lsrs r0, r1
|
|
; V7M-NEXT: movs r1, #1
|
|
; V7M-NEXT: lsls r1, r2
|
|
; V7M-NEXT: subs r1, #1
|
|
; V7M-NEXT: ands r0, r1
|
|
; V7M-NEXT: bx lr
|
|
;
|
|
; V7A-LABEL: bextr32_a1_indexzext:
|
|
; V7A: @ %bb.0:
|
|
; V7A-NEXT: mov r12, #1
|
|
; V7A-NEXT: mvn r3, #0
|
|
; V7A-NEXT: add r2, r3, r12, lsl r2
|
|
; V7A-NEXT: and r0, r2, r0, lsr r1
|
|
; V7A-NEXT: bx lr
|
|
;
|
|
; V7A-T-LABEL: bextr32_a1_indexzext:
|
|
; V7A-T: @ %bb.0:
|
|
; V7A-T-NEXT: lsrs r0, r1
|
|
; V7A-T-NEXT: movs r1, #1
|
|
; V7A-T-NEXT: lsls r1, r2
|
|
; V7A-T-NEXT: subs r1, #1
|
|
; V7A-T-NEXT: ands r0, r1
|
|
; V7A-T-NEXT: bx lr
|
|
;
|
|
; V6M-LABEL: bextr32_a1_indexzext:
|
|
; V6M: @ %bb.0:
|
|
; V6M-NEXT: lsrs r0, r1
|
|
; V6M-NEXT: movs r1, #1
|
|
; V6M-NEXT: lsls r1, r2
|
|
; V6M-NEXT: subs r1, r1, #1
|
|
; V6M-NEXT: ands r0, r1
|
|
; V6M-NEXT: bx lr
|
|
%skip = zext i8 %numskipbits to i32
|
|
%shifted = lshr i32 %val, %skip
|
|
%conv = zext i8 %numlowbits to i32
|
|
%onebit = shl i32 1, %conv
|
|
%mask = add nsw i32 %onebit, -1
|
|
%masked = and i32 %mask, %shifted
|
|
ret i32 %masked
|
|
}
|
|
|
|
define i32 @bextr32_a2_load(ptr %w, i32 %numskipbits, i32 %numlowbits) nounwind {
|
|
; V7M-LABEL: bextr32_a2_load:
|
|
; V7M: @ %bb.0:
|
|
; V7M-NEXT: ldr r0, [r0]
|
|
; V7M-NEXT: lsrs r0, r1
|
|
; V7M-NEXT: movs r1, #1
|
|
; V7M-NEXT: lsls r1, r2
|
|
; V7M-NEXT: subs r1, #1
|
|
; V7M-NEXT: ands r0, r1
|
|
; V7M-NEXT: bx lr
|
|
;
|
|
; V7A-LABEL: bextr32_a2_load:
|
|
; V7A: @ %bb.0:
|
|
; V7A-NEXT: ldr r0, [r0]
|
|
; V7A-NEXT: mov r12, #1
|
|
; V7A-NEXT: mvn r3, #0
|
|
; V7A-NEXT: add r2, r3, r12, lsl r2
|
|
; V7A-NEXT: and r0, r2, r0, lsr r1
|
|
; V7A-NEXT: bx lr
|
|
;
|
|
; V7A-T-LABEL: bextr32_a2_load:
|
|
; V7A-T: @ %bb.0:
|
|
; V7A-T-NEXT: ldr r0, [r0]
|
|
; V7A-T-NEXT: lsrs r0, r1
|
|
; V7A-T-NEXT: movs r1, #1
|
|
; V7A-T-NEXT: lsls r1, r2
|
|
; V7A-T-NEXT: subs r1, #1
|
|
; V7A-T-NEXT: ands r0, r1
|
|
; V7A-T-NEXT: bx lr
|
|
;
|
|
; V6M-LABEL: bextr32_a2_load:
|
|
; V6M: @ %bb.0:
|
|
; V6M-NEXT: ldr r3, [r0]
|
|
; V6M-NEXT: lsrs r3, r1
|
|
; V6M-NEXT: movs r0, #1
|
|
; V6M-NEXT: lsls r0, r2
|
|
; V6M-NEXT: subs r0, r0, #1
|
|
; V6M-NEXT: ands r0, r3
|
|
; V6M-NEXT: bx lr
|
|
%val = load i32, ptr %w
|
|
%shifted = lshr i32 %val, %numskipbits
|
|
%onebit = shl i32 1, %numlowbits
|
|
%mask = add nsw i32 %onebit, -1
|
|
%masked = and i32 %mask, %shifted
|
|
ret i32 %masked
|
|
}
|
|
|
|
define i32 @bextr32_a3_load_indexzext(ptr %w, i8 zeroext %numskipbits, i8 zeroext %numlowbits) nounwind {
|
|
; V7M-LABEL: bextr32_a3_load_indexzext:
|
|
; V7M: @ %bb.0:
|
|
; V7M-NEXT: ldr r0, [r0]
|
|
; V7M-NEXT: lsrs r0, r1
|
|
; V7M-NEXT: movs r1, #1
|
|
; V7M-NEXT: lsls r1, r2
|
|
; V7M-NEXT: subs r1, #1
|
|
; V7M-NEXT: ands r0, r1
|
|
; V7M-NEXT: bx lr
|
|
;
|
|
; V7A-LABEL: bextr32_a3_load_indexzext:
|
|
; V7A: @ %bb.0:
|
|
; V7A-NEXT: ldr r0, [r0]
|
|
; V7A-NEXT: mov r12, #1
|
|
; V7A-NEXT: mvn r3, #0
|
|
; V7A-NEXT: add r2, r3, r12, lsl r2
|
|
; V7A-NEXT: and r0, r2, r0, lsr r1
|
|
; V7A-NEXT: bx lr
|
|
;
|
|
; V7A-T-LABEL: bextr32_a3_load_indexzext:
|
|
; V7A-T: @ %bb.0:
|
|
; V7A-T-NEXT: ldr r0, [r0]
|
|
; V7A-T-NEXT: lsrs r0, r1
|
|
; V7A-T-NEXT: movs r1, #1
|
|
; V7A-T-NEXT: lsls r1, r2
|
|
; V7A-T-NEXT: subs r1, #1
|
|
; V7A-T-NEXT: ands r0, r1
|
|
; V7A-T-NEXT: bx lr
|
|
;
|
|
; V6M-LABEL: bextr32_a3_load_indexzext:
|
|
; V6M: @ %bb.0:
|
|
; V6M-NEXT: ldr r3, [r0]
|
|
; V6M-NEXT: lsrs r3, r1
|
|
; V6M-NEXT: movs r0, #1
|
|
; V6M-NEXT: lsls r0, r2
|
|
; V6M-NEXT: subs r0, r0, #1
|
|
; V6M-NEXT: ands r0, r3
|
|
; V6M-NEXT: bx lr
|
|
%val = load i32, ptr %w
|
|
%skip = zext i8 %numskipbits to i32
|
|
%shifted = lshr i32 %val, %skip
|
|
%conv = zext i8 %numlowbits to i32
|
|
%onebit = shl i32 1, %conv
|
|
%mask = add nsw i32 %onebit, -1
|
|
%masked = and i32 %mask, %shifted
|
|
ret i32 %masked
|
|
}
|
|
|
|
define i32 @bextr32_a4_commutative(i32 %val, i32 %numskipbits, i32 %numlowbits) nounwind {
|
|
; V7M-LABEL: bextr32_a4_commutative:
|
|
; V7M: @ %bb.0:
|
|
; V7M-NEXT: lsrs r0, r1
|
|
; V7M-NEXT: movs r1, #1
|
|
; V7M-NEXT: lsls r1, r2
|
|
; V7M-NEXT: subs r1, #1
|
|
; V7M-NEXT: ands r0, r1
|
|
; V7M-NEXT: bx lr
|
|
;
|
|
; V7A-LABEL: bextr32_a4_commutative:
|
|
; V7A: @ %bb.0:
|
|
; V7A-NEXT: mov r12, #1
|
|
; V7A-NEXT: mvn r3, #0
|
|
; V7A-NEXT: add r2, r3, r12, lsl r2
|
|
; V7A-NEXT: and r0, r2, r0, lsr r1
|
|
; V7A-NEXT: bx lr
|
|
;
|
|
; V7A-T-LABEL: bextr32_a4_commutative:
|
|
; V7A-T: @ %bb.0:
|
|
; V7A-T-NEXT: lsrs r0, r1
|
|
; V7A-T-NEXT: movs r1, #1
|
|
; V7A-T-NEXT: lsls r1, r2
|
|
; V7A-T-NEXT: subs r1, #1
|
|
; V7A-T-NEXT: ands r0, r1
|
|
; V7A-T-NEXT: bx lr
|
|
;
|
|
; V6M-LABEL: bextr32_a4_commutative:
|
|
; V6M: @ %bb.0:
|
|
; V6M-NEXT: lsrs r0, r1
|
|
; V6M-NEXT: movs r1, #1
|
|
; V6M-NEXT: lsls r1, r2
|
|
; V6M-NEXT: subs r1, r1, #1
|
|
; V6M-NEXT: ands r0, r1
|
|
; V6M-NEXT: bx lr
|
|
%shifted = lshr i32 %val, %numskipbits
|
|
%onebit = shl i32 1, %numlowbits
|
|
%mask = add nsw i32 %onebit, -1
|
|
%masked = and i32 %shifted, %mask ; swapped order
|
|
ret i32 %masked
|
|
}
|
|
|
|
; 64-bit
|
|
|
|
define i64 @bextr64_a0(i64 %val, i64 %numskipbits, i64 %numlowbits) nounwind {
|
|
; V7M-LABEL: bextr64_a0:
|
|
; V7M: @ %bb.0:
|
|
; V7M-NEXT: .save {r4, lr}
|
|
; V7M-NEXT: push {r4, lr}
|
|
; V7M-NEXT: ldr.w r12, [sp, #8]
|
|
; V7M-NEXT: mov.w lr, #1
|
|
; V7M-NEXT: lsrs r0, r2
|
|
; V7M-NEXT: rsb.w r4, r12, #32
|
|
; V7M-NEXT: subs.w r3, r12, #32
|
|
; V7M-NEXT: lsr.w r4, lr, r4
|
|
; V7M-NEXT: it pl
|
|
; V7M-NEXT: lslpl.w r4, lr, r3
|
|
; V7M-NEXT: lsl.w r3, lr, r12
|
|
; V7M-NEXT: it pl
|
|
; V7M-NEXT: movpl r3, #0
|
|
; V7M-NEXT: subs r3, #1
|
|
; V7M-NEXT: sbc r12, r4, #0
|
|
; V7M-NEXT: rsb.w r4, r2, #32
|
|
; V7M-NEXT: lsl.w r4, r1, r4
|
|
; V7M-NEXT: orrs r0, r4
|
|
; V7M-NEXT: subs.w r4, r2, #32
|
|
; V7M-NEXT: it pl
|
|
; V7M-NEXT: lsrpl.w r0, r1, r4
|
|
; V7M-NEXT: lsr.w r1, r1, r2
|
|
; V7M-NEXT: and.w r0, r0, r3
|
|
; V7M-NEXT: it pl
|
|
; V7M-NEXT: movpl r1, #0
|
|
; V7M-NEXT: and.w r1, r1, r12
|
|
; V7M-NEXT: pop {r4, pc}
|
|
;
|
|
; V7A-LABEL: bextr64_a0:
|
|
; V7A: @ %bb.0:
|
|
; V7A-NEXT: .save {r4, r5, r11, lr}
|
|
; V7A-NEXT: push {r4, r5, r11, lr}
|
|
; V7A-NEXT: ldr lr, [sp, #16]
|
|
; V7A-NEXT: mov r5, #1
|
|
; V7A-NEXT: lsr r0, r0, r2
|
|
; V7A-NEXT: rsb r12, lr, #32
|
|
; V7A-NEXT: subs r4, lr, #32
|
|
; V7A-NEXT: lsr r3, r5, r12
|
|
; V7A-NEXT: lslpl r3, r5, r4
|
|
; V7A-NEXT: lsl r5, r5, lr
|
|
; V7A-NEXT: movwpl r5, #0
|
|
; V7A-NEXT: rsb r4, r2, #32
|
|
; V7A-NEXT: subs r5, r5, #1
|
|
; V7A-NEXT: sbc r3, r3, #0
|
|
; V7A-NEXT: orr r0, r0, r1, lsl r4
|
|
; V7A-NEXT: subs r4, r2, #32
|
|
; V7A-NEXT: lsrpl r0, r1, r4
|
|
; V7A-NEXT: lsr r1, r1, r2
|
|
; V7A-NEXT: movwpl r1, #0
|
|
; V7A-NEXT: and r0, r5, r0
|
|
; V7A-NEXT: and r1, r3, r1
|
|
; V7A-NEXT: pop {r4, r5, r11, pc}
|
|
;
|
|
; V7A-T-LABEL: bextr64_a0:
|
|
; V7A-T: @ %bb.0:
|
|
; V7A-T-NEXT: .save {r4, lr}
|
|
; V7A-T-NEXT: push {r4, lr}
|
|
; V7A-T-NEXT: ldr.w r12, [sp, #8]
|
|
; V7A-T-NEXT: mov.w lr, #1
|
|
; V7A-T-NEXT: lsrs r0, r2
|
|
; V7A-T-NEXT: rsb.w r4, r12, #32
|
|
; V7A-T-NEXT: subs.w r3, r12, #32
|
|
; V7A-T-NEXT: lsr.w r4, lr, r4
|
|
; V7A-T-NEXT: it pl
|
|
; V7A-T-NEXT: lslpl.w r4, lr, r3
|
|
; V7A-T-NEXT: lsl.w r3, lr, r12
|
|
; V7A-T-NEXT: it pl
|
|
; V7A-T-NEXT: movpl r3, #0
|
|
; V7A-T-NEXT: subs r3, #1
|
|
; V7A-T-NEXT: sbc r12, r4, #0
|
|
; V7A-T-NEXT: rsb.w r4, r2, #32
|
|
; V7A-T-NEXT: lsl.w r4, r1, r4
|
|
; V7A-T-NEXT: orrs r0, r4
|
|
; V7A-T-NEXT: subs.w r4, r2, #32
|
|
; V7A-T-NEXT: it pl
|
|
; V7A-T-NEXT: lsrpl.w r0, r1, r4
|
|
; V7A-T-NEXT: lsr.w r1, r1, r2
|
|
; V7A-T-NEXT: and.w r0, r0, r3
|
|
; V7A-T-NEXT: it pl
|
|
; V7A-T-NEXT: movpl r1, #0
|
|
; V7A-T-NEXT: and.w r1, r1, r12
|
|
; V7A-T-NEXT: pop {r4, pc}
|
|
;
|
|
; V6M-LABEL: bextr64_a0:
|
|
; V6M: @ %bb.0:
|
|
; V6M-NEXT: .save {r4, r5, r6, r7, lr}
|
|
; V6M-NEXT: push {r4, r5, r6, r7, lr}
|
|
; V6M-NEXT: .pad #12
|
|
; V6M-NEXT: sub sp, #12
|
|
; V6M-NEXT: str r2, [sp, #8] @ 4-byte Spill
|
|
; V6M-NEXT: str r1, [sp, #4] @ 4-byte Spill
|
|
; V6M-NEXT: mov r6, r0
|
|
; V6M-NEXT: movs r0, #1
|
|
; V6M-NEXT: movs r7, #0
|
|
; V6M-NEXT: ldr r2, [sp, #32]
|
|
; V6M-NEXT: mov r1, r7
|
|
; V6M-NEXT: bl __aeabi_llsl
|
|
; V6M-NEXT: mov r4, r1
|
|
; V6M-NEXT: subs r5, r0, #1
|
|
; V6M-NEXT: sbcs r4, r7
|
|
; V6M-NEXT: mov r0, r6
|
|
; V6M-NEXT: ldr r1, [sp, #4] @ 4-byte Reload
|
|
; V6M-NEXT: ldr r2, [sp, #8] @ 4-byte Reload
|
|
; V6M-NEXT: bl __aeabi_llsr
|
|
; V6M-NEXT: ands r0, r5
|
|
; V6M-NEXT: ands r1, r4
|
|
; V6M-NEXT: add sp, #12
|
|
; V6M-NEXT: pop {r4, r5, r6, r7, pc}
|
|
%shifted = lshr i64 %val, %numskipbits
|
|
%onebit = shl i64 1, %numlowbits
|
|
%mask = add nsw i64 %onebit, -1
|
|
%masked = and i64 %mask, %shifted
|
|
ret i64 %masked
|
|
}
|
|
|
|
define i64 @bextr64_a0_arithmetic(i64 %val, i64 %numskipbits, i64 %numlowbits) nounwind {
|
|
; V7M-LABEL: bextr64_a0_arithmetic:
|
|
; V7M: @ %bb.0:
|
|
; V7M-NEXT: .save {r4, lr}
|
|
; V7M-NEXT: push {r4, lr}
|
|
; V7M-NEXT: ldr.w r12, [sp, #8]
|
|
; V7M-NEXT: mov.w lr, #1
|
|
; V7M-NEXT: lsrs r0, r2
|
|
; V7M-NEXT: rsb.w r4, r12, #32
|
|
; V7M-NEXT: subs.w r3, r12, #32
|
|
; V7M-NEXT: lsr.w r4, lr, r4
|
|
; V7M-NEXT: it pl
|
|
; V7M-NEXT: lslpl.w r4, lr, r3
|
|
; V7M-NEXT: lsl.w r3, lr, r12
|
|
; V7M-NEXT: it pl
|
|
; V7M-NEXT: movpl r3, #0
|
|
; V7M-NEXT: subs r3, #1
|
|
; V7M-NEXT: sbc r12, r4, #0
|
|
; V7M-NEXT: rsb.w r4, r2, #32
|
|
; V7M-NEXT: lsl.w r4, r1, r4
|
|
; V7M-NEXT: orrs r0, r4
|
|
; V7M-NEXT: subs.w r4, r2, #32
|
|
; V7M-NEXT: it pl
|
|
; V7M-NEXT: asrpl.w r0, r1, r4
|
|
; V7M-NEXT: asr.w r2, r1, r2
|
|
; V7M-NEXT: and.w r0, r0, r3
|
|
; V7M-NEXT: it pl
|
|
; V7M-NEXT: asrpl r2, r1, #31
|
|
; V7M-NEXT: and.w r1, r12, r2
|
|
; V7M-NEXT: pop {r4, pc}
|
|
;
|
|
; V7A-LABEL: bextr64_a0_arithmetic:
|
|
; V7A: @ %bb.0:
|
|
; V7A-NEXT: .save {r4, r5, r11, lr}
|
|
; V7A-NEXT: push {r4, r5, r11, lr}
|
|
; V7A-NEXT: ldr lr, [sp, #16]
|
|
; V7A-NEXT: mov r5, #1
|
|
; V7A-NEXT: lsr r0, r0, r2
|
|
; V7A-NEXT: rsb r12, lr, #32
|
|
; V7A-NEXT: subs r4, lr, #32
|
|
; V7A-NEXT: lsr r3, r5, r12
|
|
; V7A-NEXT: lslpl r3, r5, r4
|
|
; V7A-NEXT: lsl r5, r5, lr
|
|
; V7A-NEXT: movwpl r5, #0
|
|
; V7A-NEXT: rsb r4, r2, #32
|
|
; V7A-NEXT: subs r5, r5, #1
|
|
; V7A-NEXT: sbc r3, r3, #0
|
|
; V7A-NEXT: orr r0, r0, r1, lsl r4
|
|
; V7A-NEXT: subs r4, r2, #32
|
|
; V7A-NEXT: asr r2, r1, r2
|
|
; V7A-NEXT: asrpl r2, r1, #31
|
|
; V7A-NEXT: asrpl r0, r1, r4
|
|
; V7A-NEXT: and r1, r3, r2
|
|
; V7A-NEXT: and r0, r5, r0
|
|
; V7A-NEXT: pop {r4, r5, r11, pc}
|
|
;
|
|
; V7A-T-LABEL: bextr64_a0_arithmetic:
|
|
; V7A-T: @ %bb.0:
|
|
; V7A-T-NEXT: .save {r4, lr}
|
|
; V7A-T-NEXT: push {r4, lr}
|
|
; V7A-T-NEXT: ldr.w r12, [sp, #8]
|
|
; V7A-T-NEXT: mov.w lr, #1
|
|
; V7A-T-NEXT: lsrs r0, r2
|
|
; V7A-T-NEXT: rsb.w r4, r12, #32
|
|
; V7A-T-NEXT: subs.w r3, r12, #32
|
|
; V7A-T-NEXT: lsr.w r4, lr, r4
|
|
; V7A-T-NEXT: it pl
|
|
; V7A-T-NEXT: lslpl.w r4, lr, r3
|
|
; V7A-T-NEXT: lsl.w r3, lr, r12
|
|
; V7A-T-NEXT: it pl
|
|
; V7A-T-NEXT: movpl r3, #0
|
|
; V7A-T-NEXT: subs r3, #1
|
|
; V7A-T-NEXT: sbc r12, r4, #0
|
|
; V7A-T-NEXT: rsb.w r4, r2, #32
|
|
; V7A-T-NEXT: lsl.w r4, r1, r4
|
|
; V7A-T-NEXT: orrs r0, r4
|
|
; V7A-T-NEXT: subs.w r4, r2, #32
|
|
; V7A-T-NEXT: it pl
|
|
; V7A-T-NEXT: asrpl.w r0, r1, r4
|
|
; V7A-T-NEXT: asr.w r2, r1, r2
|
|
; V7A-T-NEXT: and.w r0, r0, r3
|
|
; V7A-T-NEXT: it pl
|
|
; V7A-T-NEXT: asrpl r2, r1, #31
|
|
; V7A-T-NEXT: and.w r1, r12, r2
|
|
; V7A-T-NEXT: pop {r4, pc}
|
|
;
|
|
; V6M-LABEL: bextr64_a0_arithmetic:
|
|
; V6M: @ %bb.0:
|
|
; V6M-NEXT: .save {r4, r5, r6, r7, lr}
|
|
; V6M-NEXT: push {r4, r5, r6, r7, lr}
|
|
; V6M-NEXT: .pad #12
|
|
; V6M-NEXT: sub sp, #12
|
|
; V6M-NEXT: str r2, [sp, #8] @ 4-byte Spill
|
|
; V6M-NEXT: str r1, [sp, #4] @ 4-byte Spill
|
|
; V6M-NEXT: mov r6, r0
|
|
; V6M-NEXT: movs r0, #1
|
|
; V6M-NEXT: movs r7, #0
|
|
; V6M-NEXT: ldr r2, [sp, #32]
|
|
; V6M-NEXT: mov r1, r7
|
|
; V6M-NEXT: bl __aeabi_llsl
|
|
; V6M-NEXT: mov r4, r1
|
|
; V6M-NEXT: subs r5, r0, #1
|
|
; V6M-NEXT: sbcs r4, r7
|
|
; V6M-NEXT: mov r0, r6
|
|
; V6M-NEXT: ldr r1, [sp, #4] @ 4-byte Reload
|
|
; V6M-NEXT: ldr r2, [sp, #8] @ 4-byte Reload
|
|
; V6M-NEXT: bl __aeabi_lasr
|
|
; V6M-NEXT: ands r0, r5
|
|
; V6M-NEXT: ands r1, r4
|
|
; V6M-NEXT: add sp, #12
|
|
; V6M-NEXT: pop {r4, r5, r6, r7, pc}
|
|
%shifted = ashr i64 %val, %numskipbits
|
|
%onebit = shl i64 1, %numlowbits
|
|
%mask = add nsw i64 %onebit, -1
|
|
%masked = and i64 %mask, %shifted
|
|
ret i64 %masked
|
|
}
|
|
|
|
define i64 @bextr64_a1_indexzext(i64 %val, i8 zeroext %numskipbits, i8 zeroext %numlowbits) nounwind {
|
|
; V7M-LABEL: bextr64_a1_indexzext:
|
|
; V7M: @ %bb.0:
|
|
; V7M-NEXT: .save {r4, lr}
|
|
; V7M-NEXT: push {r4, lr}
|
|
; V7M-NEXT: rsb.w r4, r3, #32
|
|
; V7M-NEXT: mov.w lr, #1
|
|
; V7M-NEXT: subs.w r12, r3, #32
|
|
; V7M-NEXT: lsl.w r3, lr, r3
|
|
; V7M-NEXT: lsr.w r4, lr, r4
|
|
; V7M-NEXT: lsr.w r0, r0, r2
|
|
; V7M-NEXT: it pl
|
|
; V7M-NEXT: lslpl.w r4, lr, r12
|
|
; V7M-NEXT: it pl
|
|
; V7M-NEXT: movpl r3, #0
|
|
; V7M-NEXT: subs r3, #1
|
|
; V7M-NEXT: sbc r12, r4, #0
|
|
; V7M-NEXT: rsb.w r4, r2, #32
|
|
; V7M-NEXT: lsl.w r4, r1, r4
|
|
; V7M-NEXT: orrs r0, r4
|
|
; V7M-NEXT: subs.w r4, r2, #32
|
|
; V7M-NEXT: it pl
|
|
; V7M-NEXT: lsrpl.w r0, r1, r4
|
|
; V7M-NEXT: lsr.w r1, r1, r2
|
|
; V7M-NEXT: and.w r0, r0, r3
|
|
; V7M-NEXT: it pl
|
|
; V7M-NEXT: movpl r1, #0
|
|
; V7M-NEXT: and.w r1, r1, r12
|
|
; V7M-NEXT: pop {r4, pc}
|
|
;
|
|
; V7A-LABEL: bextr64_a1_indexzext:
|
|
; V7A: @ %bb.0:
|
|
; V7A-NEXT: .save {r4, lr}
|
|
; V7A-NEXT: push {r4, lr}
|
|
; V7A-NEXT: rsb r12, r3, #32
|
|
; V7A-NEXT: mov lr, #1
|
|
; V7A-NEXT: subs r4, r3, #32
|
|
; V7A-NEXT: lsl r3, lr, r3
|
|
; V7A-NEXT: lsr r12, lr, r12
|
|
; V7A-NEXT: movwpl r3, #0
|
|
; V7A-NEXT: lslpl r12, lr, r4
|
|
; V7A-NEXT: rsb r4, r2, #32
|
|
; V7A-NEXT: lsr r0, r0, r2
|
|
; V7A-NEXT: subs r3, r3, #1
|
|
; V7A-NEXT: sbc r12, r12, #0
|
|
; V7A-NEXT: orr r0, r0, r1, lsl r4
|
|
; V7A-NEXT: subs r4, r2, #32
|
|
; V7A-NEXT: lsrpl r0, r1, r4
|
|
; V7A-NEXT: lsr r1, r1, r2
|
|
; V7A-NEXT: movwpl r1, #0
|
|
; V7A-NEXT: and r0, r3, r0
|
|
; V7A-NEXT: and r1, r12, r1
|
|
; V7A-NEXT: pop {r4, pc}
|
|
;
|
|
; V7A-T-LABEL: bextr64_a1_indexzext:
|
|
; V7A-T: @ %bb.0:
|
|
; V7A-T-NEXT: .save {r4, lr}
|
|
; V7A-T-NEXT: push {r4, lr}
|
|
; V7A-T-NEXT: rsb.w r4, r3, #32
|
|
; V7A-T-NEXT: mov.w lr, #1
|
|
; V7A-T-NEXT: subs.w r12, r3, #32
|
|
; V7A-T-NEXT: lsl.w r3, lr, r3
|
|
; V7A-T-NEXT: lsr.w r4, lr, r4
|
|
; V7A-T-NEXT: lsr.w r0, r0, r2
|
|
; V7A-T-NEXT: it pl
|
|
; V7A-T-NEXT: lslpl.w r4, lr, r12
|
|
; V7A-T-NEXT: it pl
|
|
; V7A-T-NEXT: movpl r3, #0
|
|
; V7A-T-NEXT: subs r3, #1
|
|
; V7A-T-NEXT: sbc r12, r4, #0
|
|
; V7A-T-NEXT: rsb.w r4, r2, #32
|
|
; V7A-T-NEXT: lsl.w r4, r1, r4
|
|
; V7A-T-NEXT: orrs r0, r4
|
|
; V7A-T-NEXT: subs.w r4, r2, #32
|
|
; V7A-T-NEXT: it pl
|
|
; V7A-T-NEXT: lsrpl.w r0, r1, r4
|
|
; V7A-T-NEXT: lsr.w r1, r1, r2
|
|
; V7A-T-NEXT: and.w r0, r0, r3
|
|
; V7A-T-NEXT: it pl
|
|
; V7A-T-NEXT: movpl r1, #0
|
|
; V7A-T-NEXT: and.w r1, r1, r12
|
|
; V7A-T-NEXT: pop {r4, pc}
|
|
;
|
|
; V6M-LABEL: bextr64_a1_indexzext:
|
|
; V6M: @ %bb.0:
|
|
; V6M-NEXT: .save {r4, r5, r6, r7, lr}
|
|
; V6M-NEXT: push {r4, r5, r6, r7, lr}
|
|
; V6M-NEXT: .pad #12
|
|
; V6M-NEXT: sub sp, #12
|
|
; V6M-NEXT: str r2, [sp, #8] @ 4-byte Spill
|
|
; V6M-NEXT: str r1, [sp, #4] @ 4-byte Spill
|
|
; V6M-NEXT: mov r6, r0
|
|
; V6M-NEXT: movs r0, #1
|
|
; V6M-NEXT: movs r7, #0
|
|
; V6M-NEXT: mov r1, r7
|
|
; V6M-NEXT: mov r2, r3
|
|
; V6M-NEXT: bl __aeabi_llsl
|
|
; V6M-NEXT: mov r4, r1
|
|
; V6M-NEXT: subs r5, r0, #1
|
|
; V6M-NEXT: sbcs r4, r7
|
|
; V6M-NEXT: mov r0, r6
|
|
; V6M-NEXT: ldr r1, [sp, #4] @ 4-byte Reload
|
|
; V6M-NEXT: ldr r2, [sp, #8] @ 4-byte Reload
|
|
; V6M-NEXT: bl __aeabi_llsr
|
|
; V6M-NEXT: ands r0, r5
|
|
; V6M-NEXT: ands r1, r4
|
|
; V6M-NEXT: add sp, #12
|
|
; V6M-NEXT: pop {r4, r5, r6, r7, pc}
|
|
%skip = zext i8 %numskipbits to i64
|
|
%shifted = lshr i64 %val, %skip
|
|
%conv = zext i8 %numlowbits to i64
|
|
%onebit = shl i64 1, %conv
|
|
%mask = add nsw i64 %onebit, -1
|
|
%masked = and i64 %mask, %shifted
|
|
ret i64 %masked
|
|
}
|
|
|
|
define i64 @bextr64_a2_load(ptr %w, i64 %numskipbits, i64 %numlowbits) nounwind {
|
|
; V7M-LABEL: bextr64_a2_load:
|
|
; V7M: @ %bb.0:
|
|
; V7M-NEXT: .save {r7, lr}
|
|
; V7M-NEXT: push {r7, lr}
|
|
; V7M-NEXT: ldr.w r12, [sp, #8]
|
|
; V7M-NEXT: mov.w lr, #1
|
|
; V7M-NEXT: rsb.w r1, r12, #32
|
|
; V7M-NEXT: subs.w r3, r12, #32
|
|
; V7M-NEXT: lsr.w r1, lr, r1
|
|
; V7M-NEXT: it pl
|
|
; V7M-NEXT: lslpl.w r1, lr, r3
|
|
; V7M-NEXT: lsl.w r3, lr, r12
|
|
; V7M-NEXT: it pl
|
|
; V7M-NEXT: movpl r3, #0
|
|
; V7M-NEXT: subs.w lr, r3, #1
|
|
; V7M-NEXT: ldrd r0, r3, [r0]
|
|
; V7M-NEXT: sbc r12, r1, #0
|
|
; V7M-NEXT: rsb.w r1, r2, #32
|
|
; V7M-NEXT: lsl.w r1, r3, r1
|
|
; V7M-NEXT: lsrs r0, r2
|
|
; V7M-NEXT: orrs r0, r1
|
|
; V7M-NEXT: subs.w r1, r2, #32
|
|
; V7M-NEXT: it pl
|
|
; V7M-NEXT: lsrpl.w r0, r3, r1
|
|
; V7M-NEXT: lsr.w r1, r3, r2
|
|
; V7M-NEXT: it pl
|
|
; V7M-NEXT: movpl r1, #0
|
|
; V7M-NEXT: and.w r0, r0, lr
|
|
; V7M-NEXT: and.w r1, r1, r12
|
|
; V7M-NEXT: pop {r7, pc}
|
|
;
|
|
; V7A-LABEL: bextr64_a2_load:
|
|
; V7A: @ %bb.0:
|
|
; V7A-NEXT: .save {r4, r5, r6, lr}
|
|
; V7A-NEXT: push {r4, r5, r6, lr}
|
|
; V7A-NEXT: ldr r1, [sp, #16]
|
|
; V7A-NEXT: mov r3, #1
|
|
; V7A-NEXT: ldr r6, [r0]
|
|
; V7A-NEXT: ldr r5, [r0, #4]
|
|
; V7A-NEXT: rsb r0, r1, #32
|
|
; V7A-NEXT: subs r4, r1, #32
|
|
; V7A-NEXT: lsl r1, r3, r1
|
|
; V7A-NEXT: lsr r0, r3, r0
|
|
; V7A-NEXT: movwpl r1, #0
|
|
; V7A-NEXT: lslpl r0, r3, r4
|
|
; V7A-NEXT: subs r1, r1, #1
|
|
; V7A-NEXT: sbc r3, r0, #0
|
|
; V7A-NEXT: lsr r0, r6, r2
|
|
; V7A-NEXT: rsb r6, r2, #32
|
|
; V7A-NEXT: orr r0, r0, r5, lsl r6
|
|
; V7A-NEXT: subs r6, r2, #32
|
|
; V7A-NEXT: lsrpl r0, r5, r6
|
|
; V7A-NEXT: and r0, r1, r0
|
|
; V7A-NEXT: lsr r1, r5, r2
|
|
; V7A-NEXT: movwpl r1, #0
|
|
; V7A-NEXT: and r1, r3, r1
|
|
; V7A-NEXT: pop {r4, r5, r6, pc}
|
|
;
|
|
; V7A-T-LABEL: bextr64_a2_load:
|
|
; V7A-T: @ %bb.0:
|
|
; V7A-T-NEXT: .save {r4, lr}
|
|
; V7A-T-NEXT: push {r4, lr}
|
|
; V7A-T-NEXT: ldr.w r12, [sp, #8]
|
|
; V7A-T-NEXT: movs r3, #1
|
|
; V7A-T-NEXT: ldrd lr, r1, [r0]
|
|
; V7A-T-NEXT: rsb.w r4, r12, #32
|
|
; V7A-T-NEXT: subs.w r0, r12, #32
|
|
; V7A-T-NEXT: lsr.w r4, r3, r4
|
|
; V7A-T-NEXT: it pl
|
|
; V7A-T-NEXT: lslpl.w r4, r3, r0
|
|
; V7A-T-NEXT: lsl.w r0, r3, r12
|
|
; V7A-T-NEXT: it pl
|
|
; V7A-T-NEXT: movpl r0, #0
|
|
; V7A-T-NEXT: lsr.w r3, lr, r2
|
|
; V7A-T-NEXT: subs r0, #1
|
|
; V7A-T-NEXT: sbc r12, r4, #0
|
|
; V7A-T-NEXT: rsb.w r4, r2, #32
|
|
; V7A-T-NEXT: lsl.w r4, r1, r4
|
|
; V7A-T-NEXT: orrs r3, r4
|
|
; V7A-T-NEXT: subs.w r4, r2, #32
|
|
; V7A-T-NEXT: it pl
|
|
; V7A-T-NEXT: lsrpl.w r3, r1, r4
|
|
; V7A-T-NEXT: lsr.w r1, r1, r2
|
|
; V7A-T-NEXT: and.w r0, r0, r3
|
|
; V7A-T-NEXT: it pl
|
|
; V7A-T-NEXT: movpl r1, #0
|
|
; V7A-T-NEXT: and.w r1, r1, r12
|
|
; V7A-T-NEXT: pop {r4, pc}
|
|
;
|
|
; V6M-LABEL: bextr64_a2_load:
|
|
; V6M: @ %bb.0:
|
|
; V6M-NEXT: .save {r4, r5, r6, r7, lr}
|
|
; V6M-NEXT: push {r4, r5, r6, r7, lr}
|
|
; V6M-NEXT: .pad #4
|
|
; V6M-NEXT: sub sp, #4
|
|
; V6M-NEXT: str r2, [sp] @ 4-byte Spill
|
|
; V6M-NEXT: mov r5, r0
|
|
; V6M-NEXT: movs r0, #1
|
|
; V6M-NEXT: movs r7, #0
|
|
; V6M-NEXT: ldr r2, [sp, #24]
|
|
; V6M-NEXT: mov r1, r7
|
|
; V6M-NEXT: bl __aeabi_llsl
|
|
; V6M-NEXT: mov r6, r1
|
|
; V6M-NEXT: subs r4, r0, #1
|
|
; V6M-NEXT: sbcs r6, r7
|
|
; V6M-NEXT: ldm r5!, {r0, r1}
|
|
; V6M-NEXT: ldr r2, [sp] @ 4-byte Reload
|
|
; V6M-NEXT: bl __aeabi_llsr
|
|
; V6M-NEXT: ands r0, r4
|
|
; V6M-NEXT: ands r1, r6
|
|
; V6M-NEXT: add sp, #4
|
|
; V6M-NEXT: pop {r4, r5, r6, r7, pc}
|
|
%val = load i64, ptr %w
|
|
%shifted = lshr i64 %val, %numskipbits
|
|
%onebit = shl i64 1, %numlowbits
|
|
%mask = add nsw i64 %onebit, -1
|
|
%masked = and i64 %mask, %shifted
|
|
ret i64 %masked
|
|
}
|
|
|
|
define i64 @bextr64_a3_load_indexzext(ptr %w, i8 zeroext %numskipbits, i8 zeroext %numlowbits) nounwind {
|
|
; V7M-LABEL: bextr64_a3_load_indexzext:
|
|
; V7M: @ %bb.0:
|
|
; V7M-NEXT: .save {r7, lr}
|
|
; V7M-NEXT: push {r7, lr}
|
|
; V7M-NEXT: rsb.w r3, r2, #32
|
|
; V7M-NEXT: mov.w r12, #1
|
|
; V7M-NEXT: subs.w lr, r2, #32
|
|
; V7M-NEXT: lsl.w r2, r12, r2
|
|
; V7M-NEXT: lsr.w r3, r12, r3
|
|
; V7M-NEXT: it pl
|
|
; V7M-NEXT: lslpl.w r3, r12, lr
|
|
; V7M-NEXT: it pl
|
|
; V7M-NEXT: movpl r2, #0
|
|
; V7M-NEXT: subs.w lr, r2, #1
|
|
; V7M-NEXT: ldrd r0, r2, [r0]
|
|
; V7M-NEXT: sbc r12, r3, #0
|
|
; V7M-NEXT: rsb.w r3, r1, #32
|
|
; V7M-NEXT: lsl.w r3, r2, r3
|
|
; V7M-NEXT: lsrs r0, r1
|
|
; V7M-NEXT: orrs r0, r3
|
|
; V7M-NEXT: subs.w r3, r1, #32
|
|
; V7M-NEXT: lsr.w r1, r2, r1
|
|
; V7M-NEXT: it pl
|
|
; V7M-NEXT: lsrpl.w r0, r2, r3
|
|
; V7M-NEXT: it pl
|
|
; V7M-NEXT: movpl r1, #0
|
|
; V7M-NEXT: and.w r0, r0, lr
|
|
; V7M-NEXT: and.w r1, r1, r12
|
|
; V7M-NEXT: pop {r7, pc}
|
|
;
|
|
; V7A-LABEL: bextr64_a3_load_indexzext:
|
|
; V7A: @ %bb.0:
|
|
; V7A-NEXT: .save {r4, r5, r6, lr}
|
|
; V7A-NEXT: push {r4, r5, r6, lr}
|
|
; V7A-NEXT: ldr r6, [r0]
|
|
; V7A-NEXT: mov r3, #1
|
|
; V7A-NEXT: ldr r5, [r0, #4]
|
|
; V7A-NEXT: rsb r0, r2, #32
|
|
; V7A-NEXT: subs r4, r2, #32
|
|
; V7A-NEXT: lsl r2, r3, r2
|
|
; V7A-NEXT: lsr r0, r3, r0
|
|
; V7A-NEXT: movwpl r2, #0
|
|
; V7A-NEXT: lslpl r0, r3, r4
|
|
; V7A-NEXT: subs r3, r2, #1
|
|
; V7A-NEXT: sbc r0, r0, #0
|
|
; V7A-NEXT: lsr r2, r5, r1
|
|
; V7A-NEXT: subs r4, r1, #32
|
|
; V7A-NEXT: movwpl r2, #0
|
|
; V7A-NEXT: and r2, r0, r2
|
|
; V7A-NEXT: lsr r0, r6, r1
|
|
; V7A-NEXT: rsb r1, r1, #32
|
|
; V7A-NEXT: orr r0, r0, r5, lsl r1
|
|
; V7A-NEXT: mov r1, r2
|
|
; V7A-NEXT: lsrpl r0, r5, r4
|
|
; V7A-NEXT: and r0, r3, r0
|
|
; V7A-NEXT: pop {r4, r5, r6, pc}
|
|
;
|
|
; V7A-T-LABEL: bextr64_a3_load_indexzext:
|
|
; V7A-T: @ %bb.0:
|
|
; V7A-T-NEXT: .save {r4, lr}
|
|
; V7A-T-NEXT: push {r4, lr}
|
|
; V7A-T-NEXT: rsb.w r4, r2, #32
|
|
; V7A-T-NEXT: mov.w lr, #1
|
|
; V7A-T-NEXT: subs.w r3, r2, #32
|
|
; V7A-T-NEXT: lsl.w r2, lr, r2
|
|
; V7A-T-NEXT: lsr.w r4, lr, r4
|
|
; V7A-T-NEXT: ldrd r12, r0, [r0]
|
|
; V7A-T-NEXT: it pl
|
|
; V7A-T-NEXT: lslpl.w r4, lr, r3
|
|
; V7A-T-NEXT: it pl
|
|
; V7A-T-NEXT: movpl r2, #0
|
|
; V7A-T-NEXT: subs.w lr, r2, #1
|
|
; V7A-T-NEXT: sbc r2, r4, #0
|
|
; V7A-T-NEXT: lsr.w r4, r0, r1
|
|
; V7A-T-NEXT: subs.w r3, r1, #32
|
|
; V7A-T-NEXT: it pl
|
|
; V7A-T-NEXT: movpl r4, #0
|
|
; V7A-T-NEXT: and.w r2, r2, r4
|
|
; V7A-T-NEXT: rsb.w r4, r1, #32
|
|
; V7A-T-NEXT: lsr.w r1, r12, r1
|
|
; V7A-T-NEXT: lsl.w r4, r0, r4
|
|
; V7A-T-NEXT: orr.w r1, r1, r4
|
|
; V7A-T-NEXT: it pl
|
|
; V7A-T-NEXT: lsrpl.w r1, r0, r3
|
|
; V7A-T-NEXT: and.w r0, lr, r1
|
|
; V7A-T-NEXT: mov r1, r2
|
|
; V7A-T-NEXT: pop {r4, pc}
|
|
;
|
|
; V6M-LABEL: bextr64_a3_load_indexzext:
|
|
; V6M: @ %bb.0:
|
|
; V6M-NEXT: .save {r4, r5, r6, r7, lr}
|
|
; V6M-NEXT: push {r4, r5, r6, r7, lr}
|
|
; V6M-NEXT: .pad #4
|
|
; V6M-NEXT: sub sp, #4
|
|
; V6M-NEXT: str r1, [sp] @ 4-byte Spill
|
|
; V6M-NEXT: mov r6, r0
|
|
; V6M-NEXT: movs r0, #1
|
|
; V6M-NEXT: movs r7, #0
|
|
; V6M-NEXT: mov r1, r7
|
|
; V6M-NEXT: bl __aeabi_llsl
|
|
; V6M-NEXT: mov r5, r1
|
|
; V6M-NEXT: subs r4, r0, #1
|
|
; V6M-NEXT: sbcs r5, r7
|
|
; V6M-NEXT: ldm r6!, {r0, r1}
|
|
; V6M-NEXT: ldr r2, [sp] @ 4-byte Reload
|
|
; V6M-NEXT: bl __aeabi_llsr
|
|
; V6M-NEXT: ands r0, r4
|
|
; V6M-NEXT: ands r1, r5
|
|
; V6M-NEXT: add sp, #4
|
|
; V6M-NEXT: pop {r4, r5, r6, r7, pc}
|
|
%val = load i64, ptr %w
|
|
%skip = zext i8 %numskipbits to i64
|
|
%shifted = lshr i64 %val, %skip
|
|
%conv = zext i8 %numlowbits to i64
|
|
%onebit = shl i64 1, %conv
|
|
%mask = add nsw i64 %onebit, -1
|
|
%masked = and i64 %mask, %shifted
|
|
ret i64 %masked
|
|
}
|
|
|
|
define i64 @bextr64_a4_commutative(i64 %val, i64 %numskipbits, i64 %numlowbits) nounwind {
|
|
; V7M-LABEL: bextr64_a4_commutative:
|
|
; V7M: @ %bb.0:
|
|
; V7M-NEXT: .save {r4, lr}
|
|
; V7M-NEXT: push {r4, lr}
|
|
; V7M-NEXT: ldr.w r12, [sp, #8]
|
|
; V7M-NEXT: mov.w lr, #1
|
|
; V7M-NEXT: lsrs r0, r2
|
|
; V7M-NEXT: rsb.w r4, r12, #32
|
|
; V7M-NEXT: subs.w r3, r12, #32
|
|
; V7M-NEXT: lsr.w r4, lr, r4
|
|
; V7M-NEXT: it pl
|
|
; V7M-NEXT: lslpl.w r4, lr, r3
|
|
; V7M-NEXT: lsl.w r3, lr, r12
|
|
; V7M-NEXT: it pl
|
|
; V7M-NEXT: movpl r3, #0
|
|
; V7M-NEXT: subs r3, #1
|
|
; V7M-NEXT: sbc r12, r4, #0
|
|
; V7M-NEXT: rsb.w r4, r2, #32
|
|
; V7M-NEXT: lsl.w r4, r1, r4
|
|
; V7M-NEXT: orrs r0, r4
|
|
; V7M-NEXT: subs.w r4, r2, #32
|
|
; V7M-NEXT: it pl
|
|
; V7M-NEXT: lsrpl.w r0, r1, r4
|
|
; V7M-NEXT: lsr.w r1, r1, r2
|
|
; V7M-NEXT: and.w r0, r0, r3
|
|
; V7M-NEXT: it pl
|
|
; V7M-NEXT: movpl r1, #0
|
|
; V7M-NEXT: and.w r1, r1, r12
|
|
; V7M-NEXT: pop {r4, pc}
|
|
;
|
|
; V7A-LABEL: bextr64_a4_commutative:
|
|
; V7A: @ %bb.0:
|
|
; V7A-NEXT: .save {r4, r5, r11, lr}
|
|
; V7A-NEXT: push {r4, r5, r11, lr}
|
|
; V7A-NEXT: ldr lr, [sp, #16]
|
|
; V7A-NEXT: mov r5, #1
|
|
; V7A-NEXT: lsr r0, r0, r2
|
|
; V7A-NEXT: rsb r12, lr, #32
|
|
; V7A-NEXT: subs r4, lr, #32
|
|
; V7A-NEXT: lsr r3, r5, r12
|
|
; V7A-NEXT: lslpl r3, r5, r4
|
|
; V7A-NEXT: lsl r5, r5, lr
|
|
; V7A-NEXT: movwpl r5, #0
|
|
; V7A-NEXT: rsb r4, r2, #32
|
|
; V7A-NEXT: subs r5, r5, #1
|
|
; V7A-NEXT: sbc r3, r3, #0
|
|
; V7A-NEXT: orr r0, r0, r1, lsl r4
|
|
; V7A-NEXT: subs r4, r2, #32
|
|
; V7A-NEXT: lsrpl r0, r1, r4
|
|
; V7A-NEXT: lsr r1, r1, r2
|
|
; V7A-NEXT: movwpl r1, #0
|
|
; V7A-NEXT: and r0, r0, r5
|
|
; V7A-NEXT: and r1, r1, r3
|
|
; V7A-NEXT: pop {r4, r5, r11, pc}
|
|
;
|
|
; V7A-T-LABEL: bextr64_a4_commutative:
|
|
; V7A-T: @ %bb.0:
|
|
; V7A-T-NEXT: .save {r4, lr}
|
|
; V7A-T-NEXT: push {r4, lr}
|
|
; V7A-T-NEXT: ldr.w r12, [sp, #8]
|
|
; V7A-T-NEXT: mov.w lr, #1
|
|
; V7A-T-NEXT: lsrs r0, r2
|
|
; V7A-T-NEXT: rsb.w r4, r12, #32
|
|
; V7A-T-NEXT: subs.w r3, r12, #32
|
|
; V7A-T-NEXT: lsr.w r4, lr, r4
|
|
; V7A-T-NEXT: it pl
|
|
; V7A-T-NEXT: lslpl.w r4, lr, r3
|
|
; V7A-T-NEXT: lsl.w r3, lr, r12
|
|
; V7A-T-NEXT: it pl
|
|
; V7A-T-NEXT: movpl r3, #0
|
|
; V7A-T-NEXT: subs r3, #1
|
|
; V7A-T-NEXT: sbc r12, r4, #0
|
|
; V7A-T-NEXT: rsb.w r4, r2, #32
|
|
; V7A-T-NEXT: lsl.w r4, r1, r4
|
|
; V7A-T-NEXT: orrs r0, r4
|
|
; V7A-T-NEXT: subs.w r4, r2, #32
|
|
; V7A-T-NEXT: it pl
|
|
; V7A-T-NEXT: lsrpl.w r0, r1, r4
|
|
; V7A-T-NEXT: lsr.w r1, r1, r2
|
|
; V7A-T-NEXT: and.w r0, r0, r3
|
|
; V7A-T-NEXT: it pl
|
|
; V7A-T-NEXT: movpl r1, #0
|
|
; V7A-T-NEXT: and.w r1, r1, r12
|
|
; V7A-T-NEXT: pop {r4, pc}
|
|
;
|
|
; V6M-LABEL: bextr64_a4_commutative:
|
|
; V6M: @ %bb.0:
|
|
; V6M-NEXT: .save {r4, r5, r6, r7, lr}
|
|
; V6M-NEXT: push {r4, r5, r6, r7, lr}
|
|
; V6M-NEXT: .pad #12
|
|
; V6M-NEXT: sub sp, #12
|
|
; V6M-NEXT: str r2, [sp, #8] @ 4-byte Spill
|
|
; V6M-NEXT: str r1, [sp, #4] @ 4-byte Spill
|
|
; V6M-NEXT: mov r6, r0
|
|
; V6M-NEXT: movs r0, #1
|
|
; V6M-NEXT: movs r7, #0
|
|
; V6M-NEXT: ldr r2, [sp, #32]
|
|
; V6M-NEXT: mov r1, r7
|
|
; V6M-NEXT: bl __aeabi_llsl
|
|
; V6M-NEXT: mov r4, r1
|
|
; V6M-NEXT: subs r5, r0, #1
|
|
; V6M-NEXT: sbcs r4, r7
|
|
; V6M-NEXT: mov r0, r6
|
|
; V6M-NEXT: ldr r1, [sp, #4] @ 4-byte Reload
|
|
; V6M-NEXT: ldr r2, [sp, #8] @ 4-byte Reload
|
|
; V6M-NEXT: bl __aeabi_llsr
|
|
; V6M-NEXT: ands r0, r5
|
|
; V6M-NEXT: ands r1, r4
|
|
; V6M-NEXT: add sp, #12
|
|
; V6M-NEXT: pop {r4, r5, r6, r7, pc}
|
|
%shifted = lshr i64 %val, %numskipbits
|
|
%onebit = shl i64 1, %numlowbits
|
|
%mask = add nsw i64 %onebit, -1
|
|
%masked = and i64 %shifted, %mask ; swapped order
|
|
ret i64 %masked
|
|
}
|
|
|
|
; 64-bit, but with 32-bit output
|
|
|
|
; Everything done in 64-bit, truncation happens last.
|
|
define i32 @bextr64_32_a0(i64 %val, i64 %numskipbits, i64 %numlowbits) nounwind {
|
|
; V7M-LABEL: bextr64_32_a0:
|
|
; V7M: @ %bb.0:
|
|
; V7M-NEXT: rsb.w r3, r2, #32
|
|
; V7M-NEXT: lsrs r0, r2
|
|
; V7M-NEXT: subs r2, #32
|
|
; V7M-NEXT: lsl.w r3, r1, r3
|
|
; V7M-NEXT: orr.w r0, r0, r3
|
|
; V7M-NEXT: it pl
|
|
; V7M-NEXT: lsrpl.w r0, r1, r2
|
|
; V7M-NEXT: ldr r1, [sp]
|
|
; V7M-NEXT: movs r2, #1
|
|
; V7M-NEXT: lsls r2, r1
|
|
; V7M-NEXT: subs r1, #32
|
|
; V7M-NEXT: it pl
|
|
; V7M-NEXT: movpl r2, #0
|
|
; V7M-NEXT: subs r1, r2, #1
|
|
; V7M-NEXT: ands r0, r1
|
|
; V7M-NEXT: bx lr
|
|
;
|
|
; V7A-LABEL: bextr64_32_a0:
|
|
; V7A: @ %bb.0:
|
|
; V7A-NEXT: rsb r3, r2, #32
|
|
; V7A-NEXT: lsr r0, r0, r2
|
|
; V7A-NEXT: ldr r12, [sp]
|
|
; V7A-NEXT: subs r2, r2, #32
|
|
; V7A-NEXT: orr r0, r0, r1, lsl r3
|
|
; V7A-NEXT: lsrpl r0, r1, r2
|
|
; V7A-NEXT: mov r1, #1
|
|
; V7A-NEXT: lsl r1, r1, r12
|
|
; V7A-NEXT: subs r2, r12, #32
|
|
; V7A-NEXT: movwpl r1, #0
|
|
; V7A-NEXT: sub r1, r1, #1
|
|
; V7A-NEXT: and r0, r1, r0
|
|
; V7A-NEXT: bx lr
|
|
;
|
|
; V7A-T-LABEL: bextr64_32_a0:
|
|
; V7A-T: @ %bb.0:
|
|
; V7A-T-NEXT: rsb.w r3, r2, #32
|
|
; V7A-T-NEXT: lsrs r0, r2
|
|
; V7A-T-NEXT: ldr.w r12, [sp]
|
|
; V7A-T-NEXT: subs r2, #32
|
|
; V7A-T-NEXT: lsl.w r3, r1, r3
|
|
; V7A-T-NEXT: orr.w r0, r0, r3
|
|
; V7A-T-NEXT: it pl
|
|
; V7A-T-NEXT: lsrpl.w r0, r1, r2
|
|
; V7A-T-NEXT: movs r1, #1
|
|
; V7A-T-NEXT: lsl.w r1, r1, r12
|
|
; V7A-T-NEXT: subs.w r2, r12, #32
|
|
; V7A-T-NEXT: it pl
|
|
; V7A-T-NEXT: movpl r1, #0
|
|
; V7A-T-NEXT: subs r1, #1
|
|
; V7A-T-NEXT: ands r0, r1
|
|
; V7A-T-NEXT: bx lr
|
|
;
|
|
; V6M-LABEL: bextr64_32_a0:
|
|
; V6M: @ %bb.0:
|
|
; V6M-NEXT: .save {r4, lr}
|
|
; V6M-NEXT: push {r4, lr}
|
|
; V6M-NEXT: bl __aeabi_llsr
|
|
; V6M-NEXT: mov r4, r0
|
|
; V6M-NEXT: movs r0, #1
|
|
; V6M-NEXT: movs r1, #0
|
|
; V6M-NEXT: ldr r2, [sp, #8]
|
|
; V6M-NEXT: bl __aeabi_llsl
|
|
; V6M-NEXT: subs r0, r0, #1
|
|
; V6M-NEXT: ands r0, r4
|
|
; V6M-NEXT: pop {r4, pc}
|
|
%shifted = lshr i64 %val, %numskipbits
|
|
%onebit = shl i64 1, %numlowbits
|
|
%mask = add nsw i64 %onebit, -1
|
|
%masked = and i64 %mask, %shifted
|
|
%res = trunc i64 %masked to i32
|
|
ret i32 %res
|
|
}
|
|
|
|
; Shifting happens in 64-bit, then truncation. Masking is 32-bit.
|
|
define i32 @bextr64_32_a1(i64 %val, i64 %numskipbits, i32 %numlowbits) nounwind {
|
|
; V7M-LABEL: bextr64_32_a1:
|
|
; V7M: @ %bb.0:
|
|
; V7M-NEXT: rsb.w r3, r2, #32
|
|
; V7M-NEXT: lsrs r0, r2
|
|
; V7M-NEXT: subs r2, #32
|
|
; V7M-NEXT: lsl.w r3, r1, r3
|
|
; V7M-NEXT: orr.w r0, r0, r3
|
|
; V7M-NEXT: it pl
|
|
; V7M-NEXT: lsrpl.w r0, r1, r2
|
|
; V7M-NEXT: ldr r1, [sp]
|
|
; V7M-NEXT: movs r2, #1
|
|
; V7M-NEXT: lsl.w r1, r2, r1
|
|
; V7M-NEXT: subs r1, #1
|
|
; V7M-NEXT: ands r0, r1
|
|
; V7M-NEXT: bx lr
|
|
;
|
|
; V7A-LABEL: bextr64_32_a1:
|
|
; V7A: @ %bb.0:
|
|
; V7A-NEXT: .save {r11, lr}
|
|
; V7A-NEXT: push {r11, lr}
|
|
; V7A-NEXT: ldr r12, [sp, #8]
|
|
; V7A-NEXT: mov lr, #1
|
|
; V7A-NEXT: mvn r3, #0
|
|
; V7A-NEXT: lsr r0, r0, r2
|
|
; V7A-NEXT: add r12, r3, lr, lsl r12
|
|
; V7A-NEXT: rsb r3, r2, #32
|
|
; V7A-NEXT: subs r2, r2, #32
|
|
; V7A-NEXT: orr r0, r0, r1, lsl r3
|
|
; V7A-NEXT: lsrpl r0, r1, r2
|
|
; V7A-NEXT: and r0, r12, r0
|
|
; V7A-NEXT: pop {r11, pc}
|
|
;
|
|
; V7A-T-LABEL: bextr64_32_a1:
|
|
; V7A-T: @ %bb.0:
|
|
; V7A-T-NEXT: rsb.w r3, r2, #32
|
|
; V7A-T-NEXT: lsrs r0, r2
|
|
; V7A-T-NEXT: ldr.w r12, [sp]
|
|
; V7A-T-NEXT: subs r2, #32
|
|
; V7A-T-NEXT: lsl.w r3, r1, r3
|
|
; V7A-T-NEXT: orr.w r0, r0, r3
|
|
; V7A-T-NEXT: it pl
|
|
; V7A-T-NEXT: lsrpl.w r0, r1, r2
|
|
; V7A-T-NEXT: movs r1, #1
|
|
; V7A-T-NEXT: lsl.w r1, r1, r12
|
|
; V7A-T-NEXT: subs r1, #1
|
|
; V7A-T-NEXT: ands r0, r1
|
|
; V7A-T-NEXT: bx lr
|
|
;
|
|
; V6M-LABEL: bextr64_32_a1:
|
|
; V6M: @ %bb.0:
|
|
; V6M-NEXT: .save {r7, lr}
|
|
; V6M-NEXT: push {r7, lr}
|
|
; V6M-NEXT: bl __aeabi_llsr
|
|
; V6M-NEXT: ldr r1, [sp, #8]
|
|
; V6M-NEXT: movs r2, #1
|
|
; V6M-NEXT: lsls r2, r1
|
|
; V6M-NEXT: subs r1, r2, #1
|
|
; V6M-NEXT: ands r0, r1
|
|
; V6M-NEXT: pop {r7, pc}
|
|
%shifted = lshr i64 %val, %numskipbits
|
|
%truncshifted = trunc i64 %shifted to i32
|
|
%onebit = shl i32 1, %numlowbits
|
|
%mask = add nsw i32 %onebit, -1
|
|
%masked = and i32 %mask, %truncshifted
|
|
ret i32 %masked
|
|
}
|
|
|
|
; Shifting happens in 64-bit. Mask is 32-bit, but extended to 64-bit.
|
|
; Masking is 64-bit. Then truncation.
|
|
define i32 @bextr64_32_a2(i64 %val, i64 %numskipbits, i32 %numlowbits) nounwind {
|
|
; V7M-LABEL: bextr64_32_a2:
|
|
; V7M: @ %bb.0:
|
|
; V7M-NEXT: rsb.w r3, r2, #32
|
|
; V7M-NEXT: lsrs r0, r2
|
|
; V7M-NEXT: subs r2, #32
|
|
; V7M-NEXT: lsl.w r3, r1, r3
|
|
; V7M-NEXT: orr.w r0, r0, r3
|
|
; V7M-NEXT: it pl
|
|
; V7M-NEXT: lsrpl.w r0, r1, r2
|
|
; V7M-NEXT: ldr r1, [sp]
|
|
; V7M-NEXT: movs r2, #1
|
|
; V7M-NEXT: lsl.w r1, r2, r1
|
|
; V7M-NEXT: subs r1, #1
|
|
; V7M-NEXT: ands r0, r1
|
|
; V7M-NEXT: bx lr
|
|
;
|
|
; V7A-LABEL: bextr64_32_a2:
|
|
; V7A: @ %bb.0:
|
|
; V7A-NEXT: .save {r11, lr}
|
|
; V7A-NEXT: push {r11, lr}
|
|
; V7A-NEXT: ldr r12, [sp, #8]
|
|
; V7A-NEXT: mov lr, #1
|
|
; V7A-NEXT: mvn r3, #0
|
|
; V7A-NEXT: lsr r0, r0, r2
|
|
; V7A-NEXT: add r12, r3, lr, lsl r12
|
|
; V7A-NEXT: rsb r3, r2, #32
|
|
; V7A-NEXT: subs r2, r2, #32
|
|
; V7A-NEXT: orr r0, r0, r1, lsl r3
|
|
; V7A-NEXT: lsrpl r0, r1, r2
|
|
; V7A-NEXT: and r0, r12, r0
|
|
; V7A-NEXT: pop {r11, pc}
|
|
;
|
|
; V7A-T-LABEL: bextr64_32_a2:
|
|
; V7A-T: @ %bb.0:
|
|
; V7A-T-NEXT: rsb.w r3, r2, #32
|
|
; V7A-T-NEXT: lsrs r0, r2
|
|
; V7A-T-NEXT: ldr.w r12, [sp]
|
|
; V7A-T-NEXT: subs r2, #32
|
|
; V7A-T-NEXT: lsl.w r3, r1, r3
|
|
; V7A-T-NEXT: orr.w r0, r0, r3
|
|
; V7A-T-NEXT: it pl
|
|
; V7A-T-NEXT: lsrpl.w r0, r1, r2
|
|
; V7A-T-NEXT: movs r1, #1
|
|
; V7A-T-NEXT: lsl.w r1, r1, r12
|
|
; V7A-T-NEXT: subs r1, #1
|
|
; V7A-T-NEXT: ands r0, r1
|
|
; V7A-T-NEXT: bx lr
|
|
;
|
|
; V6M-LABEL: bextr64_32_a2:
|
|
; V6M: @ %bb.0:
|
|
; V6M-NEXT: .save {r7, lr}
|
|
; V6M-NEXT: push {r7, lr}
|
|
; V6M-NEXT: bl __aeabi_llsr
|
|
; V6M-NEXT: ldr r1, [sp, #8]
|
|
; V6M-NEXT: movs r2, #1
|
|
; V6M-NEXT: lsls r2, r1
|
|
; V6M-NEXT: subs r1, r2, #1
|
|
; V6M-NEXT: ands r0, r1
|
|
; V6M-NEXT: pop {r7, pc}
|
|
%shifted = lshr i64 %val, %numskipbits
|
|
%onebit = shl i32 1, %numlowbits
|
|
%mask = add nsw i32 %onebit, -1
|
|
%zextmask = zext i32 %mask to i64
|
|
%masked = and i64 %zextmask, %shifted
|
|
%truncmasked = trunc i64 %masked to i32
|
|
ret i32 %truncmasked
|
|
}
|
|
|
|
; ---------------------------------------------------------------------------- ;
|
|
; Pattern b. 32-bit
|
|
; ---------------------------------------------------------------------------- ;
|
|
|
|
define i32 @bextr32_b0(i32 %val, i32 %numskipbits, i32 %numlowbits) nounwind {
|
|
; V7M-LABEL: bextr32_b0:
|
|
; V7M: @ %bb.0:
|
|
; V7M-NEXT: mov.w r3, #-1
|
|
; V7M-NEXT: lsrs r0, r1
|
|
; V7M-NEXT: lsl.w r2, r3, r2
|
|
; V7M-NEXT: bics r0, r2
|
|
; V7M-NEXT: bx lr
|
|
;
|
|
; V7A-LABEL: bextr32_b0:
|
|
; V7A: @ %bb.0:
|
|
; V7A-NEXT: lsr r0, r0, r1
|
|
; V7A-NEXT: mvn r1, #0
|
|
; V7A-NEXT: bic r0, r0, r1, lsl r2
|
|
; V7A-NEXT: bx lr
|
|
;
|
|
; V7A-T-LABEL: bextr32_b0:
|
|
; V7A-T: @ %bb.0:
|
|
; V7A-T-NEXT: mov.w r3, #-1
|
|
; V7A-T-NEXT: lsrs r0, r1
|
|
; V7A-T-NEXT: lsl.w r2, r3, r2
|
|
; V7A-T-NEXT: bics r0, r2
|
|
; V7A-T-NEXT: bx lr
|
|
;
|
|
; V6M-LABEL: bextr32_b0:
|
|
; V6M: @ %bb.0:
|
|
; V6M-NEXT: lsrs r0, r1
|
|
; V6M-NEXT: movs r1, #0
|
|
; V6M-NEXT: mvns r1, r1
|
|
; V6M-NEXT: lsls r1, r2
|
|
; V6M-NEXT: bics r0, r1
|
|
; V6M-NEXT: bx lr
|
|
%shifted = lshr i32 %val, %numskipbits
|
|
%notmask = shl i32 -1, %numlowbits
|
|
%mask = xor i32 %notmask, -1
|
|
%masked = and i32 %mask, %shifted
|
|
ret i32 %masked
|
|
}
|
|
|
|
define i32 @bextr32_b1_indexzext(i32 %val, i8 zeroext %numskipbits, i8 zeroext %numlowbits) nounwind {
|
|
; V7M-LABEL: bextr32_b1_indexzext:
|
|
; V7M: @ %bb.0:
|
|
; V7M-NEXT: mov.w r3, #-1
|
|
; V7M-NEXT: lsrs r0, r1
|
|
; V7M-NEXT: lsl.w r2, r3, r2
|
|
; V7M-NEXT: bics r0, r2
|
|
; V7M-NEXT: bx lr
|
|
;
|
|
; V7A-LABEL: bextr32_b1_indexzext:
|
|
; V7A: @ %bb.0:
|
|
; V7A-NEXT: lsr r0, r0, r1
|
|
; V7A-NEXT: mvn r1, #0
|
|
; V7A-NEXT: bic r0, r0, r1, lsl r2
|
|
; V7A-NEXT: bx lr
|
|
;
|
|
; V7A-T-LABEL: bextr32_b1_indexzext:
|
|
; V7A-T: @ %bb.0:
|
|
; V7A-T-NEXT: mov.w r3, #-1
|
|
; V7A-T-NEXT: lsrs r0, r1
|
|
; V7A-T-NEXT: lsl.w r2, r3, r2
|
|
; V7A-T-NEXT: bics r0, r2
|
|
; V7A-T-NEXT: bx lr
|
|
;
|
|
; V6M-LABEL: bextr32_b1_indexzext:
|
|
; V6M: @ %bb.0:
|
|
; V6M-NEXT: lsrs r0, r1
|
|
; V6M-NEXT: movs r1, #0
|
|
; V6M-NEXT: mvns r1, r1
|
|
; V6M-NEXT: lsls r1, r2
|
|
; V6M-NEXT: bics r0, r1
|
|
; V6M-NEXT: bx lr
|
|
%skip = zext i8 %numskipbits to i32
|
|
%shifted = lshr i32 %val, %skip
|
|
%conv = zext i8 %numlowbits to i32
|
|
%notmask = shl i32 -1, %conv
|
|
%mask = xor i32 %notmask, -1
|
|
%masked = and i32 %mask, %shifted
|
|
ret i32 %masked
|
|
}
|
|
|
|
define i32 @bextr32_b2_load(ptr %w, i32 %numskipbits, i32 %numlowbits) nounwind {
|
|
; V7M-LABEL: bextr32_b2_load:
|
|
; V7M: @ %bb.0:
|
|
; V7M-NEXT: ldr r0, [r0]
|
|
; V7M-NEXT: mov.w r3, #-1
|
|
; V7M-NEXT: lsl.w r2, r3, r2
|
|
; V7M-NEXT: lsrs r0, r1
|
|
; V7M-NEXT: bics r0, r2
|
|
; V7M-NEXT: bx lr
|
|
;
|
|
; V7A-LABEL: bextr32_b2_load:
|
|
; V7A: @ %bb.0:
|
|
; V7A-NEXT: ldr r0, [r0]
|
|
; V7A-NEXT: lsr r0, r0, r1
|
|
; V7A-NEXT: mvn r1, #0
|
|
; V7A-NEXT: bic r0, r0, r1, lsl r2
|
|
; V7A-NEXT: bx lr
|
|
;
|
|
; V7A-T-LABEL: bextr32_b2_load:
|
|
; V7A-T: @ %bb.0:
|
|
; V7A-T-NEXT: ldr r0, [r0]
|
|
; V7A-T-NEXT: mov.w r3, #-1
|
|
; V7A-T-NEXT: lsl.w r2, r3, r2
|
|
; V7A-T-NEXT: lsrs r0, r1
|
|
; V7A-T-NEXT: bics r0, r2
|
|
; V7A-T-NEXT: bx lr
|
|
;
|
|
; V6M-LABEL: bextr32_b2_load:
|
|
; V6M: @ %bb.0:
|
|
; V6M-NEXT: movs r3, #0
|
|
; V6M-NEXT: mvns r3, r3
|
|
; V6M-NEXT: lsls r3, r2
|
|
; V6M-NEXT: ldr r0, [r0]
|
|
; V6M-NEXT: lsrs r0, r1
|
|
; V6M-NEXT: bics r0, r3
|
|
; V6M-NEXT: bx lr
|
|
%val = load i32, ptr %w
|
|
%shifted = lshr i32 %val, %numskipbits
|
|
%notmask = shl i32 -1, %numlowbits
|
|
%mask = xor i32 %notmask, -1
|
|
%masked = and i32 %mask, %shifted
|
|
ret i32 %masked
|
|
}
|
|
|
|
define i32 @bextr32_b3_load_indexzext(ptr %w, i8 zeroext %numskipbits, i8 zeroext %numlowbits) nounwind {
|
|
; V7M-LABEL: bextr32_b3_load_indexzext:
|
|
; V7M: @ %bb.0:
|
|
; V7M-NEXT: ldr r0, [r0]
|
|
; V7M-NEXT: mov.w r3, #-1
|
|
; V7M-NEXT: lsl.w r2, r3, r2
|
|
; V7M-NEXT: lsrs r0, r1
|
|
; V7M-NEXT: bics r0, r2
|
|
; V7M-NEXT: bx lr
|
|
;
|
|
; V7A-LABEL: bextr32_b3_load_indexzext:
|
|
; V7A: @ %bb.0:
|
|
; V7A-NEXT: ldr r0, [r0]
|
|
; V7A-NEXT: lsr r0, r0, r1
|
|
; V7A-NEXT: mvn r1, #0
|
|
; V7A-NEXT: bic r0, r0, r1, lsl r2
|
|
; V7A-NEXT: bx lr
|
|
;
|
|
; V7A-T-LABEL: bextr32_b3_load_indexzext:
|
|
; V7A-T: @ %bb.0:
|
|
; V7A-T-NEXT: ldr r0, [r0]
|
|
; V7A-T-NEXT: mov.w r3, #-1
|
|
; V7A-T-NEXT: lsl.w r2, r3, r2
|
|
; V7A-T-NEXT: lsrs r0, r1
|
|
; V7A-T-NEXT: bics r0, r2
|
|
; V7A-T-NEXT: bx lr
|
|
;
|
|
; V6M-LABEL: bextr32_b3_load_indexzext:
|
|
; V6M: @ %bb.0:
|
|
; V6M-NEXT: movs r3, #0
|
|
; V6M-NEXT: mvns r3, r3
|
|
; V6M-NEXT: lsls r3, r2
|
|
; V6M-NEXT: ldr r0, [r0]
|
|
; V6M-NEXT: lsrs r0, r1
|
|
; V6M-NEXT: bics r0, r3
|
|
; V6M-NEXT: bx lr
|
|
%val = load i32, ptr %w
|
|
%skip = zext i8 %numskipbits to i32
|
|
%shifted = lshr i32 %val, %skip
|
|
%conv = zext i8 %numlowbits to i32
|
|
%notmask = shl i32 -1, %conv
|
|
%mask = xor i32 %notmask, -1
|
|
%masked = and i32 %mask, %shifted
|
|
ret i32 %masked
|
|
}
|
|
|
|
define i32 @bextr32_b4_commutative(i32 %val, i32 %numskipbits, i32 %numlowbits) nounwind {
|
|
; V7M-LABEL: bextr32_b4_commutative:
|
|
; V7M: @ %bb.0:
|
|
; V7M-NEXT: mov.w r3, #-1
|
|
; V7M-NEXT: lsrs r0, r1
|
|
; V7M-NEXT: lsl.w r2, r3, r2
|
|
; V7M-NEXT: bics r0, r2
|
|
; V7M-NEXT: bx lr
|
|
;
|
|
; V7A-LABEL: bextr32_b4_commutative:
|
|
; V7A: @ %bb.0:
|
|
; V7A-NEXT: lsr r0, r0, r1
|
|
; V7A-NEXT: mvn r1, #0
|
|
; V7A-NEXT: bic r0, r0, r1, lsl r2
|
|
; V7A-NEXT: bx lr
|
|
;
|
|
; V7A-T-LABEL: bextr32_b4_commutative:
|
|
; V7A-T: @ %bb.0:
|
|
; V7A-T-NEXT: mov.w r3, #-1
|
|
; V7A-T-NEXT: lsrs r0, r1
|
|
; V7A-T-NEXT: lsl.w r2, r3, r2
|
|
; V7A-T-NEXT: bics r0, r2
|
|
; V7A-T-NEXT: bx lr
|
|
;
|
|
; V6M-LABEL: bextr32_b4_commutative:
|
|
; V6M: @ %bb.0:
|
|
; V6M-NEXT: lsrs r0, r1
|
|
; V6M-NEXT: movs r1, #0
|
|
; V6M-NEXT: mvns r1, r1
|
|
; V6M-NEXT: lsls r1, r2
|
|
; V6M-NEXT: bics r0, r1
|
|
; V6M-NEXT: bx lr
|
|
%shifted = lshr i32 %val, %numskipbits
|
|
%notmask = shl i32 -1, %numlowbits
|
|
%mask = xor i32 %notmask, -1
|
|
%masked = and i32 %shifted, %mask ; swapped order
|
|
ret i32 %masked
|
|
}
|
|
|
|
; 64-bit
|
|
|
|
define i64 @bextr64_b0(i64 %val, i64 %numskipbits, i64 %numlowbits) nounwind {
|
|
; V7M-LABEL: bextr64_b0:
|
|
; V7M: @ %bb.0:
|
|
; V7M-NEXT: .save {r7, lr}
|
|
; V7M-NEXT: push {r7, lr}
|
|
; V7M-NEXT: rsb.w r3, r2, #32
|
|
; V7M-NEXT: lsrs r0, r2
|
|
; V7M-NEXT: ldr.w r12, [sp, #8]
|
|
; V7M-NEXT: lsl.w r3, r1, r3
|
|
; V7M-NEXT: orrs r0, r3
|
|
; V7M-NEXT: subs.w r3, r2, #32
|
|
; V7M-NEXT: it pl
|
|
; V7M-NEXT: lsrpl.w r0, r1, r3
|
|
; V7M-NEXT: lsr.w r1, r1, r2
|
|
; V7M-NEXT: mov.w r2, #-1
|
|
; V7M-NEXT: it pl
|
|
; V7M-NEXT: movpl r1, #0
|
|
; V7M-NEXT: lsl.w r3, r2, r12
|
|
; V7M-NEXT: subs.w lr, r12, #32
|
|
; V7M-NEXT: it pl
|
|
; V7M-NEXT: lslpl.w r2, r2, lr
|
|
; V7M-NEXT: it pl
|
|
; V7M-NEXT: movpl r3, #0
|
|
; V7M-NEXT: bics r1, r2
|
|
; V7M-NEXT: bics r0, r3
|
|
; V7M-NEXT: pop {r7, pc}
|
|
;
|
|
; V7A-LABEL: bextr64_b0:
|
|
; V7A: @ %bb.0:
|
|
; V7A-NEXT: .save {r11, lr}
|
|
; V7A-NEXT: push {r11, lr}
|
|
; V7A-NEXT: rsb r3, r2, #32
|
|
; V7A-NEXT: lsr r0, r0, r2
|
|
; V7A-NEXT: ldr r12, [sp, #8]
|
|
; V7A-NEXT: orr r0, r0, r1, lsl r3
|
|
; V7A-NEXT: subs r3, r2, #32
|
|
; V7A-NEXT: lsrpl r0, r1, r3
|
|
; V7A-NEXT: lsr r1, r1, r2
|
|
; V7A-NEXT: movwpl r1, #0
|
|
; V7A-NEXT: mvn r3, #0
|
|
; V7A-NEXT: subs lr, r12, #32
|
|
; V7A-NEXT: lsl r2, r3, r12
|
|
; V7A-NEXT: movwpl r2, #0
|
|
; V7A-NEXT: bic r0, r0, r2
|
|
; V7A-NEXT: lslpl r3, r3, lr
|
|
; V7A-NEXT: bic r1, r1, r3
|
|
; V7A-NEXT: pop {r11, pc}
|
|
;
|
|
; V7A-T-LABEL: bextr64_b0:
|
|
; V7A-T: @ %bb.0:
|
|
; V7A-T-NEXT: .save {r4, r5, r7, lr}
|
|
; V7A-T-NEXT: push {r4, r5, r7, lr}
|
|
; V7A-T-NEXT: rsb.w r3, r2, #32
|
|
; V7A-T-NEXT: ldr.w r12, [sp, #16]
|
|
; V7A-T-NEXT: lsrs r0, r2
|
|
; V7A-T-NEXT: lsl.w r3, r1, r3
|
|
; V7A-T-NEXT: orr.w r5, r0, r3
|
|
; V7A-T-NEXT: mov.w r3, #-1
|
|
; V7A-T-NEXT: subs.w lr, r12, #32
|
|
; V7A-T-NEXT: lsl.w r0, r3, r12
|
|
; V7A-T-NEXT: itt pl
|
|
; V7A-T-NEXT: lslpl.w r3, r3, lr
|
|
; V7A-T-NEXT: movpl r0, #0
|
|
; V7A-T-NEXT: subs.w r4, r2, #32
|
|
; V7A-T-NEXT: it pl
|
|
; V7A-T-NEXT: lsrpl.w r5, r1, r4
|
|
; V7A-T-NEXT: lsr.w r1, r1, r2
|
|
; V7A-T-NEXT: it pl
|
|
; V7A-T-NEXT: movpl r1, #0
|
|
; V7A-T-NEXT: bic.w r0, r5, r0
|
|
; V7A-T-NEXT: bics r1, r3
|
|
; V7A-T-NEXT: pop {r4, r5, r7, pc}
|
|
;
|
|
; V6M-LABEL: bextr64_b0:
|
|
; V6M: @ %bb.0:
|
|
; V6M-NEXT: .save {r4, r5, r7, lr}
|
|
; V6M-NEXT: push {r4, r5, r7, lr}
|
|
; V6M-NEXT: bl __aeabi_llsr
|
|
; V6M-NEXT: mov r4, r0
|
|
; V6M-NEXT: mov r5, r1
|
|
; V6M-NEXT: movs r0, #0
|
|
; V6M-NEXT: mvns r0, r0
|
|
; V6M-NEXT: ldr r2, [sp, #16]
|
|
; V6M-NEXT: mov r1, r0
|
|
; V6M-NEXT: bl __aeabi_llsl
|
|
; V6M-NEXT: bics r4, r0
|
|
; V6M-NEXT: bics r5, r1
|
|
; V6M-NEXT: mov r0, r4
|
|
; V6M-NEXT: mov r1, r5
|
|
; V6M-NEXT: pop {r4, r5, r7, pc}
|
|
%shifted = lshr i64 %val, %numskipbits
|
|
%notmask = shl i64 -1, %numlowbits
|
|
%mask = xor i64 %notmask, -1
|
|
%masked = and i64 %mask, %shifted
|
|
ret i64 %masked
|
|
}
|
|
|
|
define i64 @bextr64_b1_indexzext(i64 %val, i8 zeroext %numskipbits, i8 zeroext %numlowbits) nounwind {
|
|
; V7M-LABEL: bextr64_b1_indexzext:
|
|
; V7M: @ %bb.0:
|
|
; V7M-NEXT: lsr.w r12, r0, r2
|
|
; V7M-NEXT: rsb.w r0, r2, #32
|
|
; V7M-NEXT: lsl.w r0, r1, r0
|
|
; V7M-NEXT: orr.w r12, r12, r0
|
|
; V7M-NEXT: subs.w r0, r2, #32
|
|
; V7M-NEXT: it pl
|
|
; V7M-NEXT: lsrpl.w r12, r1, r0
|
|
; V7M-NEXT: lsr.w r0, r1, r2
|
|
; V7M-NEXT: mov.w r2, #-1
|
|
; V7M-NEXT: it pl
|
|
; V7M-NEXT: movpl r0, #0
|
|
; V7M-NEXT: subs.w r1, r3, #32
|
|
; V7M-NEXT: lsl.w r3, r2, r3
|
|
; V7M-NEXT: it pl
|
|
; V7M-NEXT: lslpl r2, r1
|
|
; V7M-NEXT: bic.w r1, r0, r2
|
|
; V7M-NEXT: it pl
|
|
; V7M-NEXT: movpl r3, #0
|
|
; V7M-NEXT: bic.w r0, r12, r3
|
|
; V7M-NEXT: bx lr
|
|
;
|
|
; V7A-LABEL: bextr64_b1_indexzext:
|
|
; V7A: @ %bb.0:
|
|
; V7A-NEXT: lsr r12, r0, r2
|
|
; V7A-NEXT: rsb r0, r2, #32
|
|
; V7A-NEXT: orr r12, r12, r1, lsl r0
|
|
; V7A-NEXT: subs r0, r2, #32
|
|
; V7A-NEXT: lsrpl r12, r1, r0
|
|
; V7A-NEXT: lsr r0, r1, r2
|
|
; V7A-NEXT: movwpl r0, #0
|
|
; V7A-NEXT: subs r1, r3, #32
|
|
; V7A-NEXT: mvn r2, #0
|
|
; V7A-NEXT: lsl r3, r2, r3
|
|
; V7A-NEXT: lslpl r2, r2, r1
|
|
; V7A-NEXT: bic r1, r0, r2
|
|
; V7A-NEXT: movwpl r3, #0
|
|
; V7A-NEXT: bic r0, r12, r3
|
|
; V7A-NEXT: bx lr
|
|
;
|
|
; V7A-T-LABEL: bextr64_b1_indexzext:
|
|
; V7A-T: @ %bb.0:
|
|
; V7A-T-NEXT: lsr.w r12, r0, r2
|
|
; V7A-T-NEXT: rsb.w r0, r2, #32
|
|
; V7A-T-NEXT: lsl.w r0, r1, r0
|
|
; V7A-T-NEXT: orr.w r12, r12, r0
|
|
; V7A-T-NEXT: subs.w r0, r2, #32
|
|
; V7A-T-NEXT: it pl
|
|
; V7A-T-NEXT: lsrpl.w r12, r1, r0
|
|
; V7A-T-NEXT: lsr.w r0, r1, r2
|
|
; V7A-T-NEXT: mov.w r2, #-1
|
|
; V7A-T-NEXT: it pl
|
|
; V7A-T-NEXT: movpl r0, #0
|
|
; V7A-T-NEXT: subs.w r1, r3, #32
|
|
; V7A-T-NEXT: lsl.w r3, r2, r3
|
|
; V7A-T-NEXT: it pl
|
|
; V7A-T-NEXT: lslpl r2, r1
|
|
; V7A-T-NEXT: bic.w r1, r0, r2
|
|
; V7A-T-NEXT: it pl
|
|
; V7A-T-NEXT: movpl r3, #0
|
|
; V7A-T-NEXT: bic.w r0, r12, r3
|
|
; V7A-T-NEXT: bx lr
|
|
;
|
|
; V6M-LABEL: bextr64_b1_indexzext:
|
|
; V6M: @ %bb.0:
|
|
; V6M-NEXT: .save {r4, r5, r6, lr}
|
|
; V6M-NEXT: push {r4, r5, r6, lr}
|
|
; V6M-NEXT: mov r4, r3
|
|
; V6M-NEXT: bl __aeabi_llsr
|
|
; V6M-NEXT: mov r5, r0
|
|
; V6M-NEXT: mov r6, r1
|
|
; V6M-NEXT: movs r0, #0
|
|
; V6M-NEXT: mvns r0, r0
|
|
; V6M-NEXT: mov r1, r0
|
|
; V6M-NEXT: mov r2, r4
|
|
; V6M-NEXT: bl __aeabi_llsl
|
|
; V6M-NEXT: bics r5, r0
|
|
; V6M-NEXT: bics r6, r1
|
|
; V6M-NEXT: mov r0, r5
|
|
; V6M-NEXT: mov r1, r6
|
|
; V6M-NEXT: pop {r4, r5, r6, pc}
|
|
%skip = zext i8 %numskipbits to i64
|
|
%shifted = lshr i64 %val, %skip
|
|
%conv = zext i8 %numlowbits to i64
|
|
%notmask = shl i64 -1, %conv
|
|
%mask = xor i64 %notmask, -1
|
|
%masked = and i64 %mask, %shifted
|
|
ret i64 %masked
|
|
}
|
|
|
|
define i64 @bextr64_b2_load(ptr %w, i64 %numskipbits, i64 %numlowbits) nounwind {
|
|
; V7M-LABEL: bextr64_b2_load:
|
|
; V7M: @ %bb.0:
|
|
; V7M-NEXT: .save {r7, lr}
|
|
; V7M-NEXT: push {r7, lr}
|
|
; V7M-NEXT: ldrd r0, r3, [r0]
|
|
; V7M-NEXT: rsb.w r1, r2, #32
|
|
; V7M-NEXT: ldr.w r12, [sp, #8]
|
|
; V7M-NEXT: lsl.w r1, r3, r1
|
|
; V7M-NEXT: lsrs r0, r2
|
|
; V7M-NEXT: orrs r0, r1
|
|
; V7M-NEXT: subs.w r1, r2, #32
|
|
; V7M-NEXT: it pl
|
|
; V7M-NEXT: lsrpl.w r0, r3, r1
|
|
; V7M-NEXT: lsr.w r1, r3, r2
|
|
; V7M-NEXT: mov.w r2, #-1
|
|
; V7M-NEXT: it pl
|
|
; V7M-NEXT: movpl r1, #0
|
|
; V7M-NEXT: lsl.w r3, r2, r12
|
|
; V7M-NEXT: subs.w lr, r12, #32
|
|
; V7M-NEXT: it pl
|
|
; V7M-NEXT: lslpl.w r2, r2, lr
|
|
; V7M-NEXT: it pl
|
|
; V7M-NEXT: movpl r3, #0
|
|
; V7M-NEXT: bics r1, r2
|
|
; V7M-NEXT: bics r0, r3
|
|
; V7M-NEXT: pop {r7, pc}
|
|
;
|
|
; V7A-LABEL: bextr64_b2_load:
|
|
; V7A: @ %bb.0:
|
|
; V7A-NEXT: .save {r11, lr}
|
|
; V7A-NEXT: push {r11, lr}
|
|
; V7A-NEXT: ldrd r0, r1, [r0]
|
|
; V7A-NEXT: rsb r3, r2, #32
|
|
; V7A-NEXT: ldr r12, [sp, #8]
|
|
; V7A-NEXT: lsr r0, r0, r2
|
|
; V7A-NEXT: orr r0, r0, r1, lsl r3
|
|
; V7A-NEXT: subs r3, r2, #32
|
|
; V7A-NEXT: lsrpl r0, r1, r3
|
|
; V7A-NEXT: lsr r1, r1, r2
|
|
; V7A-NEXT: movwpl r1, #0
|
|
; V7A-NEXT: mvn r3, #0
|
|
; V7A-NEXT: subs lr, r12, #32
|
|
; V7A-NEXT: lsl r2, r3, r12
|
|
; V7A-NEXT: movwpl r2, #0
|
|
; V7A-NEXT: bic r0, r0, r2
|
|
; V7A-NEXT: lslpl r3, r3, lr
|
|
; V7A-NEXT: bic r1, r1, r3
|
|
; V7A-NEXT: pop {r11, pc}
|
|
;
|
|
; V7A-T-LABEL: bextr64_b2_load:
|
|
; V7A-T: @ %bb.0:
|
|
; V7A-T-NEXT: .save {r7, lr}
|
|
; V7A-T-NEXT: push {r7, lr}
|
|
; V7A-T-NEXT: ldrd r0, r3, [r0]
|
|
; V7A-T-NEXT: rsb.w r1, r2, #32
|
|
; V7A-T-NEXT: ldr.w r12, [sp, #8]
|
|
; V7A-T-NEXT: lsl.w r1, r3, r1
|
|
; V7A-T-NEXT: lsrs r0, r2
|
|
; V7A-T-NEXT: orrs r0, r1
|
|
; V7A-T-NEXT: subs.w r1, r2, #32
|
|
; V7A-T-NEXT: it pl
|
|
; V7A-T-NEXT: lsrpl.w r0, r3, r1
|
|
; V7A-T-NEXT: lsr.w r1, r3, r2
|
|
; V7A-T-NEXT: mov.w r3, #-1
|
|
; V7A-T-NEXT: it pl
|
|
; V7A-T-NEXT: movpl r1, #0
|
|
; V7A-T-NEXT: lsl.w r2, r3, r12
|
|
; V7A-T-NEXT: subs.w lr, r12, #32
|
|
; V7A-T-NEXT: it pl
|
|
; V7A-T-NEXT: lslpl.w r3, r3, lr
|
|
; V7A-T-NEXT: it pl
|
|
; V7A-T-NEXT: movpl r2, #0
|
|
; V7A-T-NEXT: bics r1, r3
|
|
; V7A-T-NEXT: bics r0, r2
|
|
; V7A-T-NEXT: pop {r7, pc}
|
|
;
|
|
; V6M-LABEL: bextr64_b2_load:
|
|
; V6M: @ %bb.0:
|
|
; V6M-NEXT: .save {r4, r5, r7, lr}
|
|
; V6M-NEXT: push {r4, r5, r7, lr}
|
|
; V6M-NEXT: ldr r3, [r0]
|
|
; V6M-NEXT: ldr r1, [r0, #4]
|
|
; V6M-NEXT: mov r0, r3
|
|
; V6M-NEXT: bl __aeabi_llsr
|
|
; V6M-NEXT: mov r4, r0
|
|
; V6M-NEXT: mov r5, r1
|
|
; V6M-NEXT: movs r0, #0
|
|
; V6M-NEXT: mvns r0, r0
|
|
; V6M-NEXT: ldr r2, [sp, #16]
|
|
; V6M-NEXT: mov r1, r0
|
|
; V6M-NEXT: bl __aeabi_llsl
|
|
; V6M-NEXT: bics r4, r0
|
|
; V6M-NEXT: bics r5, r1
|
|
; V6M-NEXT: mov r0, r4
|
|
; V6M-NEXT: mov r1, r5
|
|
; V6M-NEXT: pop {r4, r5, r7, pc}
|
|
%val = load i64, ptr %w
|
|
%shifted = lshr i64 %val, %numskipbits
|
|
%notmask = shl i64 -1, %numlowbits
|
|
%mask = xor i64 %notmask, -1
|
|
%masked = and i64 %mask, %shifted
|
|
ret i64 %masked
|
|
}
|
|
|
|
define i64 @bextr64_b3_load_indexzext(ptr %w, i8 zeroext %numskipbits, i8 zeroext %numlowbits) nounwind {
|
|
; V7M-LABEL: bextr64_b3_load_indexzext:
|
|
; V7M: @ %bb.0:
|
|
; V7M-NEXT: .save {r7, lr}
|
|
; V7M-NEXT: push {r7, lr}
|
|
; V7M-NEXT: ldrd r12, r0, [r0]
|
|
; V7M-NEXT: rsb.w r3, r1, #32
|
|
; V7M-NEXT: lsl.w lr, r0, r3
|
|
; V7M-NEXT: lsr.w r3, r12, r1
|
|
; V7M-NEXT: orr.w r12, r3, lr
|
|
; V7M-NEXT: subs.w r3, r1, #32
|
|
; V7M-NEXT: it pl
|
|
; V7M-NEXT: lsrpl.w r12, r0, r3
|
|
; V7M-NEXT: lsr.w r0, r0, r1
|
|
; V7M-NEXT: mov.w r3, #-1
|
|
; V7M-NEXT: it pl
|
|
; V7M-NEXT: movpl r0, #0
|
|
; V7M-NEXT: subs.w r1, r2, #32
|
|
; V7M-NEXT: lsl.w r2, r3, r2
|
|
; V7M-NEXT: it pl
|
|
; V7M-NEXT: lslpl r3, r1
|
|
; V7M-NEXT: bic.w r1, r0, r3
|
|
; V7M-NEXT: it pl
|
|
; V7M-NEXT: movpl r2, #0
|
|
; V7M-NEXT: bic.w r0, r12, r2
|
|
; V7M-NEXT: pop {r7, pc}
|
|
;
|
|
; V7A-LABEL: bextr64_b3_load_indexzext:
|
|
; V7A: @ %bb.0:
|
|
; V7A-NEXT: ldm r0, {r0, r3}
|
|
; V7A-NEXT: lsr r12, r0, r1
|
|
; V7A-NEXT: rsb r0, r1, #32
|
|
; V7A-NEXT: orr r12, r12, r3, lsl r0
|
|
; V7A-NEXT: subs r0, r1, #32
|
|
; V7A-NEXT: lsrpl r12, r3, r0
|
|
; V7A-NEXT: lsr r0, r3, r1
|
|
; V7A-NEXT: movwpl r0, #0
|
|
; V7A-NEXT: subs r1, r2, #32
|
|
; V7A-NEXT: mvn r3, #0
|
|
; V7A-NEXT: lsl r2, r3, r2
|
|
; V7A-NEXT: lslpl r3, r3, r1
|
|
; V7A-NEXT: bic r1, r0, r3
|
|
; V7A-NEXT: movwpl r2, #0
|
|
; V7A-NEXT: bic r0, r12, r2
|
|
; V7A-NEXT: bx lr
|
|
;
|
|
; V7A-T-LABEL: bextr64_b3_load_indexzext:
|
|
; V7A-T: @ %bb.0:
|
|
; V7A-T-NEXT: .save {r7, lr}
|
|
; V7A-T-NEXT: push {r7, lr}
|
|
; V7A-T-NEXT: ldrd r12, r3, [r0]
|
|
; V7A-T-NEXT: rsb.w r0, r1, #32
|
|
; V7A-T-NEXT: lsl.w lr, r3, r0
|
|
; V7A-T-NEXT: lsr.w r0, r12, r1
|
|
; V7A-T-NEXT: orr.w r12, r0, lr
|
|
; V7A-T-NEXT: subs.w r0, r1, #32
|
|
; V7A-T-NEXT: it pl
|
|
; V7A-T-NEXT: lsrpl.w r12, r3, r0
|
|
; V7A-T-NEXT: lsr.w r0, r3, r1
|
|
; V7A-T-NEXT: mov.w r3, #-1
|
|
; V7A-T-NEXT: it pl
|
|
; V7A-T-NEXT: movpl r0, #0
|
|
; V7A-T-NEXT: subs.w r1, r2, #32
|
|
; V7A-T-NEXT: lsl.w r2, r3, r2
|
|
; V7A-T-NEXT: it pl
|
|
; V7A-T-NEXT: lslpl r3, r1
|
|
; V7A-T-NEXT: bic.w r1, r0, r3
|
|
; V7A-T-NEXT: it pl
|
|
; V7A-T-NEXT: movpl r2, #0
|
|
; V7A-T-NEXT: bic.w r0, r12, r2
|
|
; V7A-T-NEXT: pop {r7, pc}
|
|
;
|
|
; V6M-LABEL: bextr64_b3_load_indexzext:
|
|
; V6M: @ %bb.0:
|
|
; V6M-NEXT: .save {r4, r5, r6, lr}
|
|
; V6M-NEXT: push {r4, r5, r6, lr}
|
|
; V6M-NEXT: mov r4, r2
|
|
; V6M-NEXT: mov r2, r1
|
|
; V6M-NEXT: ldr r3, [r0]
|
|
; V6M-NEXT: ldr r1, [r0, #4]
|
|
; V6M-NEXT: mov r0, r3
|
|
; V6M-NEXT: bl __aeabi_llsr
|
|
; V6M-NEXT: mov r5, r0
|
|
; V6M-NEXT: mov r6, r1
|
|
; V6M-NEXT: movs r0, #0
|
|
; V6M-NEXT: mvns r0, r0
|
|
; V6M-NEXT: mov r1, r0
|
|
; V6M-NEXT: mov r2, r4
|
|
; V6M-NEXT: bl __aeabi_llsl
|
|
; V6M-NEXT: bics r5, r0
|
|
; V6M-NEXT: bics r6, r1
|
|
; V6M-NEXT: mov r0, r5
|
|
; V6M-NEXT: mov r1, r6
|
|
; V6M-NEXT: pop {r4, r5, r6, pc}
|
|
%val = load i64, ptr %w
|
|
%skip = zext i8 %numskipbits to i64
|
|
%shifted = lshr i64 %val, %skip
|
|
%conv = zext i8 %numlowbits to i64
|
|
%notmask = shl i64 -1, %conv
|
|
%mask = xor i64 %notmask, -1
|
|
%masked = and i64 %mask, %shifted
|
|
ret i64 %masked
|
|
}
|
|
|
|
define i64 @bextr64_b4_commutative(i64 %val, i64 %numskipbits, i64 %numlowbits) nounwind {
|
|
; V7M-LABEL: bextr64_b4_commutative:
|
|
; V7M: @ %bb.0:
|
|
; V7M-NEXT: .save {r7, lr}
|
|
; V7M-NEXT: push {r7, lr}
|
|
; V7M-NEXT: rsb.w r3, r2, #32
|
|
; V7M-NEXT: lsrs r0, r2
|
|
; V7M-NEXT: ldr.w r12, [sp, #8]
|
|
; V7M-NEXT: lsl.w r3, r1, r3
|
|
; V7M-NEXT: orrs r0, r3
|
|
; V7M-NEXT: subs.w r3, r2, #32
|
|
; V7M-NEXT: it pl
|
|
; V7M-NEXT: lsrpl.w r0, r1, r3
|
|
; V7M-NEXT: lsr.w r1, r1, r2
|
|
; V7M-NEXT: mov.w r2, #-1
|
|
; V7M-NEXT: it pl
|
|
; V7M-NEXT: movpl r1, #0
|
|
; V7M-NEXT: lsl.w r3, r2, r12
|
|
; V7M-NEXT: subs.w lr, r12, #32
|
|
; V7M-NEXT: it pl
|
|
; V7M-NEXT: lslpl.w r2, r2, lr
|
|
; V7M-NEXT: it pl
|
|
; V7M-NEXT: movpl r3, #0
|
|
; V7M-NEXT: bics r1, r2
|
|
; V7M-NEXT: bics r0, r3
|
|
; V7M-NEXT: pop {r7, pc}
|
|
;
|
|
; V7A-LABEL: bextr64_b4_commutative:
|
|
; V7A: @ %bb.0:
|
|
; V7A-NEXT: .save {r11, lr}
|
|
; V7A-NEXT: push {r11, lr}
|
|
; V7A-NEXT: rsb r3, r2, #32
|
|
; V7A-NEXT: lsr r0, r0, r2
|
|
; V7A-NEXT: ldr r12, [sp, #8]
|
|
; V7A-NEXT: orr r0, r0, r1, lsl r3
|
|
; V7A-NEXT: subs r3, r2, #32
|
|
; V7A-NEXT: lsrpl r0, r1, r3
|
|
; V7A-NEXT: lsr r1, r1, r2
|
|
; V7A-NEXT: movwpl r1, #0
|
|
; V7A-NEXT: mvn r3, #0
|
|
; V7A-NEXT: subs lr, r12, #32
|
|
; V7A-NEXT: lsl r2, r3, r12
|
|
; V7A-NEXT: movwpl r2, #0
|
|
; V7A-NEXT: bic r0, r0, r2
|
|
; V7A-NEXT: lslpl r3, r3, lr
|
|
; V7A-NEXT: bic r1, r1, r3
|
|
; V7A-NEXT: pop {r11, pc}
|
|
;
|
|
; V7A-T-LABEL: bextr64_b4_commutative:
|
|
; V7A-T: @ %bb.0:
|
|
; V7A-T-NEXT: .save {r4, r5, r7, lr}
|
|
; V7A-T-NEXT: push {r4, r5, r7, lr}
|
|
; V7A-T-NEXT: rsb.w r3, r2, #32
|
|
; V7A-T-NEXT: ldr.w r12, [sp, #16]
|
|
; V7A-T-NEXT: lsrs r0, r2
|
|
; V7A-T-NEXT: lsl.w r3, r1, r3
|
|
; V7A-T-NEXT: orr.w r5, r0, r3
|
|
; V7A-T-NEXT: mov.w r3, #-1
|
|
; V7A-T-NEXT: subs.w lr, r12, #32
|
|
; V7A-T-NEXT: lsl.w r0, r3, r12
|
|
; V7A-T-NEXT: itt pl
|
|
; V7A-T-NEXT: lslpl.w r3, r3, lr
|
|
; V7A-T-NEXT: movpl r0, #0
|
|
; V7A-T-NEXT: subs.w r4, r2, #32
|
|
; V7A-T-NEXT: it pl
|
|
; V7A-T-NEXT: lsrpl.w r5, r1, r4
|
|
; V7A-T-NEXT: lsr.w r1, r1, r2
|
|
; V7A-T-NEXT: it pl
|
|
; V7A-T-NEXT: movpl r1, #0
|
|
; V7A-T-NEXT: bic.w r0, r5, r0
|
|
; V7A-T-NEXT: bics r1, r3
|
|
; V7A-T-NEXT: pop {r4, r5, r7, pc}
|
|
;
|
|
; V6M-LABEL: bextr64_b4_commutative:
|
|
; V6M: @ %bb.0:
|
|
; V6M-NEXT: .save {r4, r5, r7, lr}
|
|
; V6M-NEXT: push {r4, r5, r7, lr}
|
|
; V6M-NEXT: bl __aeabi_llsr
|
|
; V6M-NEXT: mov r4, r0
|
|
; V6M-NEXT: mov r5, r1
|
|
; V6M-NEXT: movs r0, #0
|
|
; V6M-NEXT: mvns r0, r0
|
|
; V6M-NEXT: ldr r2, [sp, #16]
|
|
; V6M-NEXT: mov r1, r0
|
|
; V6M-NEXT: bl __aeabi_llsl
|
|
; V6M-NEXT: bics r4, r0
|
|
; V6M-NEXT: bics r5, r1
|
|
; V6M-NEXT: mov r0, r4
|
|
; V6M-NEXT: mov r1, r5
|
|
; V6M-NEXT: pop {r4, r5, r7, pc}
|
|
%shifted = lshr i64 %val, %numskipbits
|
|
%notmask = shl i64 -1, %numlowbits
|
|
%mask = xor i64 %notmask, -1
|
|
%masked = and i64 %shifted, %mask ; swapped order
|
|
ret i64 %masked
|
|
}
|
|
|
|
; 64-bit, but with 32-bit output
|
|
|
|
; Everything done in 64-bit, truncation happens last.
|
|
define i32 @bextr64_32_b0(i64 %val, i64 %numskipbits, i8 %numlowbits) nounwind {
|
|
; V7M-LABEL: bextr64_32_b0:
|
|
; V7M: @ %bb.0:
|
|
; V7M-NEXT: rsb.w r3, r2, #32
|
|
; V7M-NEXT: lsrs r0, r2
|
|
; V7M-NEXT: subs r2, #32
|
|
; V7M-NEXT: lsl.w r3, r1, r3
|
|
; V7M-NEXT: orr.w r0, r0, r3
|
|
; V7M-NEXT: it pl
|
|
; V7M-NEXT: lsrpl.w r0, r1, r2
|
|
; V7M-NEXT: ldrb.w r1, [sp]
|
|
; V7M-NEXT: mov.w r2, #-1
|
|
; V7M-NEXT: lsls r2, r1
|
|
; V7M-NEXT: subs r1, #32
|
|
; V7M-NEXT: it pl
|
|
; V7M-NEXT: movpl r2, #0
|
|
; V7M-NEXT: bics r0, r2
|
|
; V7M-NEXT: bx lr
|
|
;
|
|
; V7A-LABEL: bextr64_32_b0:
|
|
; V7A: @ %bb.0:
|
|
; V7A-NEXT: rsb r3, r2, #32
|
|
; V7A-NEXT: lsr r0, r0, r2
|
|
; V7A-NEXT: ldrb r12, [sp]
|
|
; V7A-NEXT: subs r2, r2, #32
|
|
; V7A-NEXT: orr r0, r0, r1, lsl r3
|
|
; V7A-NEXT: lsrpl r0, r1, r2
|
|
; V7A-NEXT: mvn r1, #0
|
|
; V7A-NEXT: lsl r1, r1, r12
|
|
; V7A-NEXT: subs r2, r12, #32
|
|
; V7A-NEXT: movwpl r1, #0
|
|
; V7A-NEXT: bic r0, r0, r1
|
|
; V7A-NEXT: bx lr
|
|
;
|
|
; V7A-T-LABEL: bextr64_32_b0:
|
|
; V7A-T: @ %bb.0:
|
|
; V7A-T-NEXT: lsr.w r12, r0, r2
|
|
; V7A-T-NEXT: rsb.w r0, r2, #32
|
|
; V7A-T-NEXT: ldrb.w r3, [sp]
|
|
; V7A-T-NEXT: subs r2, #32
|
|
; V7A-T-NEXT: lsl.w r0, r1, r0
|
|
; V7A-T-NEXT: orr.w r0, r0, r12
|
|
; V7A-T-NEXT: it pl
|
|
; V7A-T-NEXT: lsrpl.w r0, r1, r2
|
|
; V7A-T-NEXT: mov.w r1, #-1
|
|
; V7A-T-NEXT: lsls r1, r3
|
|
; V7A-T-NEXT: subs.w r2, r3, #32
|
|
; V7A-T-NEXT: it pl
|
|
; V7A-T-NEXT: movpl r1, #0
|
|
; V7A-T-NEXT: bics r0, r1
|
|
; V7A-T-NEXT: bx lr
|
|
;
|
|
; V6M-LABEL: bextr64_32_b0:
|
|
; V6M: @ %bb.0:
|
|
; V6M-NEXT: .save {r4, lr}
|
|
; V6M-NEXT: push {r4, lr}
|
|
; V6M-NEXT: bl __aeabi_llsr
|
|
; V6M-NEXT: mov r4, r0
|
|
; V6M-NEXT: movs r0, #0
|
|
; V6M-NEXT: mvns r0, r0
|
|
; V6M-NEXT: add r1, sp, #8
|
|
; V6M-NEXT: ldrb r2, [r1]
|
|
; V6M-NEXT: mov r1, r0
|
|
; V6M-NEXT: bl __aeabi_llsl
|
|
; V6M-NEXT: bics r4, r0
|
|
; V6M-NEXT: mov r0, r4
|
|
; V6M-NEXT: pop {r4, pc}
|
|
%shiftedval = lshr i64 %val, %numskipbits
|
|
%widenumlowbits = zext i8 %numlowbits to i64
|
|
%notmask = shl nsw i64 -1, %widenumlowbits
|
|
%mask = xor i64 %notmask, -1
|
|
%wideres = and i64 %shiftedval, %mask
|
|
%res = trunc i64 %wideres to i32
|
|
ret i32 %res
|
|
}
|
|
|
|
; Shifting happens in 64-bit, then truncation. Masking is 32-bit.
|
|
define i32 @bextr64_32_b1(i64 %val, i64 %numskipbits, i8 %numlowbits) nounwind {
|
|
; V7M-LABEL: bextr64_32_b1:
|
|
; V7M: @ %bb.0:
|
|
; V7M-NEXT: rsb.w r3, r2, #32
|
|
; V7M-NEXT: lsrs r0, r2
|
|
; V7M-NEXT: subs r2, #32
|
|
; V7M-NEXT: lsl.w r3, r1, r3
|
|
; V7M-NEXT: orr.w r0, r0, r3
|
|
; V7M-NEXT: it pl
|
|
; V7M-NEXT: lsrpl.w r0, r1, r2
|
|
; V7M-NEXT: ldrb.w r1, [sp]
|
|
; V7M-NEXT: mov.w r2, #-1
|
|
; V7M-NEXT: lsl.w r1, r2, r1
|
|
; V7M-NEXT: bics r0, r1
|
|
; V7M-NEXT: bx lr
|
|
;
|
|
; V7A-LABEL: bextr64_32_b1:
|
|
; V7A: @ %bb.0:
|
|
; V7A-NEXT: rsb r3, r2, #32
|
|
; V7A-NEXT: lsr r0, r0, r2
|
|
; V7A-NEXT: ldrb r12, [sp]
|
|
; V7A-NEXT: subs r2, r2, #32
|
|
; V7A-NEXT: orr r0, r0, r1, lsl r3
|
|
; V7A-NEXT: lsrpl r0, r1, r2
|
|
; V7A-NEXT: mvn r1, #0
|
|
; V7A-NEXT: bic r0, r0, r1, lsl r12
|
|
; V7A-NEXT: bx lr
|
|
;
|
|
; V7A-T-LABEL: bextr64_32_b1:
|
|
; V7A-T: @ %bb.0:
|
|
; V7A-T-NEXT: rsb.w r3, r2, #32
|
|
; V7A-T-NEXT: lsrs r0, r2
|
|
; V7A-T-NEXT: ldrb.w r12, [sp]
|
|
; V7A-T-NEXT: subs r2, #32
|
|
; V7A-T-NEXT: lsl.w r3, r1, r3
|
|
; V7A-T-NEXT: orr.w r0, r0, r3
|
|
; V7A-T-NEXT: it pl
|
|
; V7A-T-NEXT: lsrpl.w r0, r1, r2
|
|
; V7A-T-NEXT: mov.w r1, #-1
|
|
; V7A-T-NEXT: lsl.w r1, r1, r12
|
|
; V7A-T-NEXT: bics r0, r1
|
|
; V7A-T-NEXT: bx lr
|
|
;
|
|
; V6M-LABEL: bextr64_32_b1:
|
|
; V6M: @ %bb.0:
|
|
; V6M-NEXT: .save {r7, lr}
|
|
; V6M-NEXT: push {r7, lr}
|
|
; V6M-NEXT: bl __aeabi_llsr
|
|
; V6M-NEXT: add r1, sp, #8
|
|
; V6M-NEXT: ldrb r1, [r1]
|
|
; V6M-NEXT: movs r2, #0
|
|
; V6M-NEXT: mvns r2, r2
|
|
; V6M-NEXT: lsls r2, r1
|
|
; V6M-NEXT: bics r0, r2
|
|
; V6M-NEXT: pop {r7, pc}
|
|
%shiftedval = lshr i64 %val, %numskipbits
|
|
%truncshiftedval = trunc i64 %shiftedval to i32
|
|
%widenumlowbits = zext i8 %numlowbits to i32
|
|
%notmask = shl nsw i32 -1, %widenumlowbits
|
|
%mask = xor i32 %notmask, -1
|
|
%res = and i32 %truncshiftedval, %mask
|
|
ret i32 %res
|
|
}
|
|
|
|
; Shifting happens in 64-bit. Mask is 32-bit, but extended to 64-bit.
|
|
; Masking is 64-bit. Then truncation.
|
|
define i32 @bextr64_32_b2(i64 %val, i64 %numskipbits, i8 %numlowbits) nounwind {
|
|
; V7M-LABEL: bextr64_32_b2:
|
|
; V7M: @ %bb.0:
|
|
; V7M-NEXT: rsb.w r3, r2, #32
|
|
; V7M-NEXT: lsrs r0, r2
|
|
; V7M-NEXT: subs r2, #32
|
|
; V7M-NEXT: lsl.w r3, r1, r3
|
|
; V7M-NEXT: orr.w r0, r0, r3
|
|
; V7M-NEXT: it pl
|
|
; V7M-NEXT: lsrpl.w r0, r1, r2
|
|
; V7M-NEXT: ldrb.w r1, [sp]
|
|
; V7M-NEXT: mov.w r2, #-1
|
|
; V7M-NEXT: lsl.w r1, r2, r1
|
|
; V7M-NEXT: bics r0, r1
|
|
; V7M-NEXT: bx lr
|
|
;
|
|
; V7A-LABEL: bextr64_32_b2:
|
|
; V7A: @ %bb.0:
|
|
; V7A-NEXT: rsb r3, r2, #32
|
|
; V7A-NEXT: lsr r0, r0, r2
|
|
; V7A-NEXT: ldrb r12, [sp]
|
|
; V7A-NEXT: subs r2, r2, #32
|
|
; V7A-NEXT: orr r0, r0, r1, lsl r3
|
|
; V7A-NEXT: lsrpl r0, r1, r2
|
|
; V7A-NEXT: mvn r1, #0
|
|
; V7A-NEXT: bic r0, r0, r1, lsl r12
|
|
; V7A-NEXT: bx lr
|
|
;
|
|
; V7A-T-LABEL: bextr64_32_b2:
|
|
; V7A-T: @ %bb.0:
|
|
; V7A-T-NEXT: rsb.w r3, r2, #32
|
|
; V7A-T-NEXT: lsrs r0, r2
|
|
; V7A-T-NEXT: ldrb.w r12, [sp]
|
|
; V7A-T-NEXT: subs r2, #32
|
|
; V7A-T-NEXT: lsl.w r3, r1, r3
|
|
; V7A-T-NEXT: orr.w r0, r0, r3
|
|
; V7A-T-NEXT: it pl
|
|
; V7A-T-NEXT: lsrpl.w r0, r1, r2
|
|
; V7A-T-NEXT: mov.w r1, #-1
|
|
; V7A-T-NEXT: lsl.w r1, r1, r12
|
|
; V7A-T-NEXT: bics r0, r1
|
|
; V7A-T-NEXT: bx lr
|
|
;
|
|
; V6M-LABEL: bextr64_32_b2:
|
|
; V6M: @ %bb.0:
|
|
; V6M-NEXT: .save {r7, lr}
|
|
; V6M-NEXT: push {r7, lr}
|
|
; V6M-NEXT: bl __aeabi_llsr
|
|
; V6M-NEXT: add r1, sp, #8
|
|
; V6M-NEXT: ldrb r1, [r1]
|
|
; V6M-NEXT: movs r2, #0
|
|
; V6M-NEXT: mvns r2, r2
|
|
; V6M-NEXT: lsls r2, r1
|
|
; V6M-NEXT: bics r0, r2
|
|
; V6M-NEXT: pop {r7, pc}
|
|
%shiftedval = lshr i64 %val, %numskipbits
|
|
%widenumlowbits = zext i8 %numlowbits to i32
|
|
%notmask = shl nsw i32 -1, %widenumlowbits
|
|
%mask = xor i32 %notmask, -1
|
|
%zextmask = zext i32 %mask to i64
|
|
%wideres = and i64 %shiftedval, %zextmask
|
|
%res = trunc i64 %wideres to i32
|
|
ret i32 %res
|
|
}
|
|
|
|
; ---------------------------------------------------------------------------- ;
|
|
; Pattern c. 32-bit
|
|
; ---------------------------------------------------------------------------- ;
|
|
|
|
define i32 @bextr32_c0(i32 %val, i32 %numskipbits, i32 %numlowbits) nounwind {
|
|
; V7M-LABEL: bextr32_c0:
|
|
; V7M: @ %bb.0:
|
|
; V7M-NEXT: lsrs r0, r1
|
|
; V7M-NEXT: rsb.w r1, r2, #32
|
|
; V7M-NEXT: lsls r0, r1
|
|
; V7M-NEXT: lsrs r0, r1
|
|
; V7M-NEXT: bx lr
|
|
;
|
|
; V7A-LABEL: bextr32_c0:
|
|
; V7A: @ %bb.0:
|
|
; V7A-NEXT: lsr r0, r0, r1
|
|
; V7A-NEXT: rsb r1, r2, #32
|
|
; V7A-NEXT: lsl r0, r0, r1
|
|
; V7A-NEXT: lsr r0, r0, r1
|
|
; V7A-NEXT: bx lr
|
|
;
|
|
; V7A-T-LABEL: bextr32_c0:
|
|
; V7A-T: @ %bb.0:
|
|
; V7A-T-NEXT: lsrs r0, r1
|
|
; V7A-T-NEXT: rsb.w r1, r2, #32
|
|
; V7A-T-NEXT: lsls r0, r1
|
|
; V7A-T-NEXT: lsrs r0, r1
|
|
; V7A-T-NEXT: bx lr
|
|
;
|
|
; V6M-LABEL: bextr32_c0:
|
|
; V6M: @ %bb.0:
|
|
; V6M-NEXT: movs r3, #32
|
|
; V6M-NEXT: subs r2, r3, r2
|
|
; V6M-NEXT: lsrs r0, r1
|
|
; V6M-NEXT: lsls r0, r2
|
|
; V6M-NEXT: lsrs r0, r2
|
|
; V6M-NEXT: bx lr
|
|
%shifted = lshr i32 %val, %numskipbits
|
|
%numhighbits = sub i32 32, %numlowbits
|
|
%mask = lshr i32 -1, %numhighbits
|
|
%masked = and i32 %mask, %shifted
|
|
ret i32 %masked
|
|
}
|
|
|
|
define i32 @bextr32_c1_indexzext(i32 %val, i8 %numskipbits, i8 %numlowbits) nounwind {
|
|
; V7M-LABEL: bextr32_c1_indexzext:
|
|
; V7M: @ %bb.0:
|
|
; V7M-NEXT: uxtb r1, r1
|
|
; V7M-NEXT: lsrs r0, r1
|
|
; V7M-NEXT: rsb.w r1, r2, #32
|
|
; V7M-NEXT: uxtb r1, r1
|
|
; V7M-NEXT: lsls r0, r1
|
|
; V7M-NEXT: lsrs r0, r1
|
|
; V7M-NEXT: bx lr
|
|
;
|
|
; V7A-LABEL: bextr32_c1_indexzext:
|
|
; V7A: @ %bb.0:
|
|
; V7A-NEXT: uxtb r1, r1
|
|
; V7A-NEXT: lsr r0, r0, r1
|
|
; V7A-NEXT: rsb r1, r2, #32
|
|
; V7A-NEXT: uxtb r1, r1
|
|
; V7A-NEXT: lsl r0, r0, r1
|
|
; V7A-NEXT: lsr r0, r0, r1
|
|
; V7A-NEXT: bx lr
|
|
;
|
|
; V7A-T-LABEL: bextr32_c1_indexzext:
|
|
; V7A-T: @ %bb.0:
|
|
; V7A-T-NEXT: uxtb r1, r1
|
|
; V7A-T-NEXT: lsrs r0, r1
|
|
; V7A-T-NEXT: rsb.w r1, r2, #32
|
|
; V7A-T-NEXT: uxtb r1, r1
|
|
; V7A-T-NEXT: lsls r0, r1
|
|
; V7A-T-NEXT: lsrs r0, r1
|
|
; V7A-T-NEXT: bx lr
|
|
;
|
|
; V6M-LABEL: bextr32_c1_indexzext:
|
|
; V6M: @ %bb.0:
|
|
; V6M-NEXT: uxtb r1, r1
|
|
; V6M-NEXT: lsrs r0, r1
|
|
; V6M-NEXT: movs r1, #32
|
|
; V6M-NEXT: subs r1, r1, r2
|
|
; V6M-NEXT: uxtb r1, r1
|
|
; V6M-NEXT: lsls r0, r1
|
|
; V6M-NEXT: lsrs r0, r1
|
|
; V6M-NEXT: bx lr
|
|
%skip = zext i8 %numskipbits to i32
|
|
%shifted = lshr i32 %val, %skip
|
|
%numhighbits = sub i8 32, %numlowbits
|
|
%sh_prom = zext i8 %numhighbits to i32
|
|
%mask = lshr i32 -1, %sh_prom
|
|
%masked = and i32 %mask, %shifted
|
|
ret i32 %masked
|
|
}
|
|
|
|
define i32 @bextr32_c2_load(ptr %w, i32 %numskipbits, i32 %numlowbits) nounwind {
|
|
; V7M-LABEL: bextr32_c2_load:
|
|
; V7M: @ %bb.0:
|
|
; V7M-NEXT: ldr r0, [r0]
|
|
; V7M-NEXT: lsrs r0, r1
|
|
; V7M-NEXT: rsb.w r1, r2, #32
|
|
; V7M-NEXT: lsls r0, r1
|
|
; V7M-NEXT: lsrs r0, r1
|
|
; V7M-NEXT: bx lr
|
|
;
|
|
; V7A-LABEL: bextr32_c2_load:
|
|
; V7A: @ %bb.0:
|
|
; V7A-NEXT: ldr r0, [r0]
|
|
; V7A-NEXT: lsr r0, r0, r1
|
|
; V7A-NEXT: rsb r1, r2, #32
|
|
; V7A-NEXT: lsl r0, r0, r1
|
|
; V7A-NEXT: lsr r0, r0, r1
|
|
; V7A-NEXT: bx lr
|
|
;
|
|
; V7A-T-LABEL: bextr32_c2_load:
|
|
; V7A-T: @ %bb.0:
|
|
; V7A-T-NEXT: ldr r0, [r0]
|
|
; V7A-T-NEXT: lsrs r0, r1
|
|
; V7A-T-NEXT: rsb.w r1, r2, #32
|
|
; V7A-T-NEXT: lsls r0, r1
|
|
; V7A-T-NEXT: lsrs r0, r1
|
|
; V7A-T-NEXT: bx lr
|
|
;
|
|
; V6M-LABEL: bextr32_c2_load:
|
|
; V6M: @ %bb.0:
|
|
; V6M-NEXT: movs r3, #32
|
|
; V6M-NEXT: subs r2, r3, r2
|
|
; V6M-NEXT: ldr r0, [r0]
|
|
; V6M-NEXT: lsrs r0, r1
|
|
; V6M-NEXT: lsls r0, r2
|
|
; V6M-NEXT: lsrs r0, r2
|
|
; V6M-NEXT: bx lr
|
|
%val = load i32, ptr %w
|
|
%shifted = lshr i32 %val, %numskipbits
|
|
%numhighbits = sub i32 32, %numlowbits
|
|
%mask = lshr i32 -1, %numhighbits
|
|
%masked = and i32 %mask, %shifted
|
|
ret i32 %masked
|
|
}
|
|
|
|
define i32 @bextr32_c3_load_indexzext(ptr %w, i8 %numskipbits, i8 %numlowbits) nounwind {
|
|
; V7M-LABEL: bextr32_c3_load_indexzext:
|
|
; V7M: @ %bb.0:
|
|
; V7M-NEXT: ldr r0, [r0]
|
|
; V7M-NEXT: uxtb r1, r1
|
|
; V7M-NEXT: lsrs r0, r1
|
|
; V7M-NEXT: rsb.w r1, r2, #32
|
|
; V7M-NEXT: uxtb r1, r1
|
|
; V7M-NEXT: lsls r0, r1
|
|
; V7M-NEXT: lsrs r0, r1
|
|
; V7M-NEXT: bx lr
|
|
;
|
|
; V7A-LABEL: bextr32_c3_load_indexzext:
|
|
; V7A: @ %bb.0:
|
|
; V7A-NEXT: ldr r0, [r0]
|
|
; V7A-NEXT: uxtb r1, r1
|
|
; V7A-NEXT: lsr r0, r0, r1
|
|
; V7A-NEXT: rsb r1, r2, #32
|
|
; V7A-NEXT: uxtb r1, r1
|
|
; V7A-NEXT: lsl r0, r0, r1
|
|
; V7A-NEXT: lsr r0, r0, r1
|
|
; V7A-NEXT: bx lr
|
|
;
|
|
; V7A-T-LABEL: bextr32_c3_load_indexzext:
|
|
; V7A-T: @ %bb.0:
|
|
; V7A-T-NEXT: ldr r0, [r0]
|
|
; V7A-T-NEXT: uxtb r1, r1
|
|
; V7A-T-NEXT: lsrs r0, r1
|
|
; V7A-T-NEXT: rsb.w r1, r2, #32
|
|
; V7A-T-NEXT: uxtb r1, r1
|
|
; V7A-T-NEXT: lsls r0, r1
|
|
; V7A-T-NEXT: lsrs r0, r1
|
|
; V7A-T-NEXT: bx lr
|
|
;
|
|
; V6M-LABEL: bextr32_c3_load_indexzext:
|
|
; V6M: @ %bb.0:
|
|
; V6M-NEXT: uxtb r1, r1
|
|
; V6M-NEXT: ldr r0, [r0]
|
|
; V6M-NEXT: lsrs r0, r1
|
|
; V6M-NEXT: movs r1, #32
|
|
; V6M-NEXT: subs r1, r1, r2
|
|
; V6M-NEXT: uxtb r1, r1
|
|
; V6M-NEXT: lsls r0, r1
|
|
; V6M-NEXT: lsrs r0, r1
|
|
; V6M-NEXT: bx lr
|
|
%val = load i32, ptr %w
|
|
%skip = zext i8 %numskipbits to i32
|
|
%shifted = lshr i32 %val, %skip
|
|
%numhighbits = sub i8 32, %numlowbits
|
|
%sh_prom = zext i8 %numhighbits to i32
|
|
%mask = lshr i32 -1, %sh_prom
|
|
%masked = and i32 %mask, %shifted
|
|
ret i32 %masked
|
|
}
|
|
|
|
define i32 @bextr32_c4_commutative(i32 %val, i32 %numskipbits, i32 %numlowbits) nounwind {
|
|
; V7M-LABEL: bextr32_c4_commutative:
|
|
; V7M: @ %bb.0:
|
|
; V7M-NEXT: lsrs r0, r1
|
|
; V7M-NEXT: rsb.w r1, r2, #32
|
|
; V7M-NEXT: lsls r0, r1
|
|
; V7M-NEXT: lsrs r0, r1
|
|
; V7M-NEXT: bx lr
|
|
;
|
|
; V7A-LABEL: bextr32_c4_commutative:
|
|
; V7A: @ %bb.0:
|
|
; V7A-NEXT: lsr r0, r0, r1
|
|
; V7A-NEXT: rsb r1, r2, #32
|
|
; V7A-NEXT: lsl r0, r0, r1
|
|
; V7A-NEXT: lsr r0, r0, r1
|
|
; V7A-NEXT: bx lr
|
|
;
|
|
; V7A-T-LABEL: bextr32_c4_commutative:
|
|
; V7A-T: @ %bb.0:
|
|
; V7A-T-NEXT: lsrs r0, r1
|
|
; V7A-T-NEXT: rsb.w r1, r2, #32
|
|
; V7A-T-NEXT: lsls r0, r1
|
|
; V7A-T-NEXT: lsrs r0, r1
|
|
; V7A-T-NEXT: bx lr
|
|
;
|
|
; V6M-LABEL: bextr32_c4_commutative:
|
|
; V6M: @ %bb.0:
|
|
; V6M-NEXT: movs r3, #32
|
|
; V6M-NEXT: subs r2, r3, r2
|
|
; V6M-NEXT: lsrs r0, r1
|
|
; V6M-NEXT: lsls r0, r2
|
|
; V6M-NEXT: lsrs r0, r2
|
|
; V6M-NEXT: bx lr
|
|
%shifted = lshr i32 %val, %numskipbits
|
|
%numhighbits = sub i32 32, %numlowbits
|
|
%mask = lshr i32 -1, %numhighbits
|
|
%masked = and i32 %shifted, %mask ; swapped order
|
|
ret i32 %masked
|
|
}
|
|
|
|
; 64-bit
|
|
|
|
define i64 @bextr64_c0(i64 %val, i64 %numskipbits, i64 %numlowbits) nounwind {
|
|
; V7M-LABEL: bextr64_c0:
|
|
; V7M: @ %bb.0:
|
|
; V7M-NEXT: rsb.w r3, r2, #32
|
|
; V7M-NEXT: lsrs r0, r2
|
|
; V7M-NEXT: ldr.w r12, [sp]
|
|
; V7M-NEXT: lsl.w r3, r1, r3
|
|
; V7M-NEXT: orrs r0, r3
|
|
; V7M-NEXT: subs.w r3, r2, #32
|
|
; V7M-NEXT: it pl
|
|
; V7M-NEXT: lsrpl.w r0, r1, r3
|
|
; V7M-NEXT: rsb.w r3, r12, #64
|
|
; V7M-NEXT: lsr.w r1, r1, r2
|
|
; V7M-NEXT: mov.w r2, #-1
|
|
; V7M-NEXT: it pl
|
|
; V7M-NEXT: movpl r1, #0
|
|
; V7M-NEXT: lsr.w r3, r2, r3
|
|
; V7M-NEXT: rsbs.w r12, r12, #32
|
|
; V7M-NEXT: it pl
|
|
; V7M-NEXT: movpl r3, #0
|
|
; V7M-NEXT: it pl
|
|
; V7M-NEXT: lsrpl.w r2, r2, r12
|
|
; V7M-NEXT: ands r1, r3
|
|
; V7M-NEXT: ands r0, r2
|
|
; V7M-NEXT: bx lr
|
|
;
|
|
; V7A-LABEL: bextr64_c0:
|
|
; V7A: @ %bb.0:
|
|
; V7A-NEXT: .save {r4, r5, r11, lr}
|
|
; V7A-NEXT: push {r4, r5, r11, lr}
|
|
; V7A-NEXT: ldr r12, [sp, #16]
|
|
; V7A-NEXT: mvn r3, #0
|
|
; V7A-NEXT: lsr r5, r1, r2
|
|
; V7A-NEXT: lsr r0, r0, r2
|
|
; V7A-NEXT: rsb r4, r12, #64
|
|
; V7A-NEXT: rsbs lr, r12, #32
|
|
; V7A-NEXT: lsr r4, r3, r4
|
|
; V7A-NEXT: lsrpl r3, r3, lr
|
|
; V7A-NEXT: movwpl r4, #0
|
|
; V7A-NEXT: subs lr, r2, #32
|
|
; V7A-NEXT: rsb r2, r2, #32
|
|
; V7A-NEXT: movwpl r5, #0
|
|
; V7A-NEXT: and r12, r4, r5
|
|
; V7A-NEXT: orr r0, r0, r1, lsl r2
|
|
; V7A-NEXT: lsrpl r0, r1, lr
|
|
; V7A-NEXT: mov r1, r12
|
|
; V7A-NEXT: and r0, r3, r0
|
|
; V7A-NEXT: pop {r4, r5, r11, pc}
|
|
;
|
|
; V7A-T-LABEL: bextr64_c0:
|
|
; V7A-T: @ %bb.0:
|
|
; V7A-T-NEXT: .save {r7, lr}
|
|
; V7A-T-NEXT: push {r7, lr}
|
|
; V7A-T-NEXT: rsb.w r3, r2, #32
|
|
; V7A-T-NEXT: lsrs r0, r2
|
|
; V7A-T-NEXT: ldr.w r12, [sp, #8]
|
|
; V7A-T-NEXT: mov.w lr, #-1
|
|
; V7A-T-NEXT: lsl.w r3, r1, r3
|
|
; V7A-T-NEXT: orrs r0, r3
|
|
; V7A-T-NEXT: subs.w r3, r2, #32
|
|
; V7A-T-NEXT: it pl
|
|
; V7A-T-NEXT: lsrpl.w r0, r1, r3
|
|
; V7A-T-NEXT: lsr.w r1, r1, r2
|
|
; V7A-T-NEXT: mov.w r3, #-1
|
|
; V7A-T-NEXT: it pl
|
|
; V7A-T-NEXT: movpl r1, #0
|
|
; V7A-T-NEXT: rsbs.w r2, r12, #32
|
|
; V7A-T-NEXT: it pl
|
|
; V7A-T-NEXT: lsrpl r3, r2
|
|
; V7A-T-NEXT: rsb.w r2, r12, #64
|
|
; V7A-T-NEXT: and.w r0, r0, r3
|
|
; V7A-T-NEXT: lsr.w r2, lr, r2
|
|
; V7A-T-NEXT: it pl
|
|
; V7A-T-NEXT: movpl r2, #0
|
|
; V7A-T-NEXT: ands r1, r2
|
|
; V7A-T-NEXT: pop {r7, pc}
|
|
;
|
|
; V6M-LABEL: bextr64_c0:
|
|
; V6M: @ %bb.0:
|
|
; V6M-NEXT: .save {r4, r5, r7, lr}
|
|
; V6M-NEXT: push {r4, r5, r7, lr}
|
|
; V6M-NEXT: bl __aeabi_llsr
|
|
; V6M-NEXT: mov r5, r0
|
|
; V6M-NEXT: mov r4, r1
|
|
; V6M-NEXT: ldr r0, [sp, #16]
|
|
; V6M-NEXT: movs r1, #64
|
|
; V6M-NEXT: subs r2, r1, r0
|
|
; V6M-NEXT: movs r0, #0
|
|
; V6M-NEXT: mvns r0, r0
|
|
; V6M-NEXT: mov r1, r0
|
|
; V6M-NEXT: bl __aeabi_llsr
|
|
; V6M-NEXT: ands r0, r5
|
|
; V6M-NEXT: ands r1, r4
|
|
; V6M-NEXT: pop {r4, r5, r7, pc}
|
|
%shifted = lshr i64 %val, %numskipbits
|
|
%numhighbits = sub i64 64, %numlowbits
|
|
%mask = lshr i64 -1, %numhighbits
|
|
%masked = and i64 %mask, %shifted
|
|
ret i64 %masked
|
|
}
|
|
|
|
define i64 @bextr64_c1_indexzext(i64 %val, i8 %numskipbits, i8 %numlowbits) nounwind {
|
|
; V7M-LABEL: bextr64_c1_indexzext:
|
|
; V7M: @ %bb.0:
|
|
; V7M-NEXT: .save {r7, lr}
|
|
; V7M-NEXT: push {r7, lr}
|
|
; V7M-NEXT: uxtb r2, r2
|
|
; V7M-NEXT: lsr.w r12, r0, r2
|
|
; V7M-NEXT: rsb.w r0, r2, #32
|
|
; V7M-NEXT: lsl.w r0, r1, r0
|
|
; V7M-NEXT: orr.w r12, r12, r0
|
|
; V7M-NEXT: subs.w r0, r2, #32
|
|
; V7M-NEXT: it pl
|
|
; V7M-NEXT: lsrpl.w r12, r1, r0
|
|
; V7M-NEXT: rsb.w r0, r3, #64
|
|
; V7M-NEXT: lsr.w r1, r1, r2
|
|
; V7M-NEXT: mov.w r3, #-1
|
|
; V7M-NEXT: it pl
|
|
; V7M-NEXT: movpl r1, #0
|
|
; V7M-NEXT: uxtb r0, r0
|
|
; V7M-NEXT: subs.w lr, r0, #32
|
|
; V7M-NEXT: lsr.w r2, r3, r0
|
|
; V7M-NEXT: it pl
|
|
; V7M-NEXT: lsrpl.w r3, r3, lr
|
|
; V7M-NEXT: it pl
|
|
; V7M-NEXT: movpl r2, #0
|
|
; V7M-NEXT: and.w r0, r3, r12
|
|
; V7M-NEXT: ands r1, r2
|
|
; V7M-NEXT: pop {r7, pc}
|
|
;
|
|
; V7A-LABEL: bextr64_c1_indexzext:
|
|
; V7A: @ %bb.0:
|
|
; V7A-NEXT: .save {r4, lr}
|
|
; V7A-NEXT: push {r4, lr}
|
|
; V7A-NEXT: uxtb r12, r2
|
|
; V7A-NEXT: lsr lr, r0, r12
|
|
; V7A-NEXT: rsb r0, r12, #32
|
|
; V7A-NEXT: orr r4, lr, r1, lsl r0
|
|
; V7A-NEXT: mvn lr, #31
|
|
; V7A-NEXT: uxtab r2, lr, r2
|
|
; V7A-NEXT: cmp r2, #0
|
|
; V7A-NEXT: lsrpl r4, r1, r2
|
|
; V7A-NEXT: rsb r2, r3, #64
|
|
; V7A-NEXT: lsr r1, r1, r12
|
|
; V7A-NEXT: mvn r3, #0
|
|
; V7A-NEXT: uxtb r12, r2
|
|
; V7A-NEXT: uxtab r2, lr, r2
|
|
; V7A-NEXT: movwpl r1, #0
|
|
; V7A-NEXT: lsr r0, r3, r12
|
|
; V7A-NEXT: cmp r2, #0
|
|
; V7A-NEXT: movwpl r0, #0
|
|
; V7A-NEXT: and r1, r0, r1
|
|
; V7A-NEXT: lsrpl r3, r3, r2
|
|
; V7A-NEXT: and r0, r3, r4
|
|
; V7A-NEXT: pop {r4, pc}
|
|
;
|
|
; V7A-T-LABEL: bextr64_c1_indexzext:
|
|
; V7A-T: @ %bb.0:
|
|
; V7A-T-NEXT: .save {r4, lr}
|
|
; V7A-T-NEXT: push {r4, lr}
|
|
; V7A-T-NEXT: uxtb.w r12, r2
|
|
; V7A-T-NEXT: lsr.w lr, r0, r12
|
|
; V7A-T-NEXT: rsb.w r0, r12, #32
|
|
; V7A-T-NEXT: lsl.w r0, r1, r0
|
|
; V7A-T-NEXT: orr.w r4, lr, r0
|
|
; V7A-T-NEXT: mvn lr, #31
|
|
; V7A-T-NEXT: uxtab r2, lr, r2
|
|
; V7A-T-NEXT: cmp r2, #0
|
|
; V7A-T-NEXT: it pl
|
|
; V7A-T-NEXT: lsrpl.w r4, r1, r2
|
|
; V7A-T-NEXT: rsb.w r2, r3, #64
|
|
; V7A-T-NEXT: lsr.w r1, r1, r12
|
|
; V7A-T-NEXT: mov.w r3, #-1
|
|
; V7A-T-NEXT: it pl
|
|
; V7A-T-NEXT: movpl r1, #0
|
|
; V7A-T-NEXT: uxtb.w r12, r2
|
|
; V7A-T-NEXT: uxtab r2, lr, r2
|
|
; V7A-T-NEXT: lsr.w r0, r3, r12
|
|
; V7A-T-NEXT: cmp r2, #0
|
|
; V7A-T-NEXT: it pl
|
|
; V7A-T-NEXT: movpl r0, #0
|
|
; V7A-T-NEXT: and.w r1, r1, r0
|
|
; V7A-T-NEXT: it pl
|
|
; V7A-T-NEXT: lsrpl r3, r2
|
|
; V7A-T-NEXT: and.w r0, r3, r4
|
|
; V7A-T-NEXT: pop {r4, pc}
|
|
;
|
|
; V6M-LABEL: bextr64_c1_indexzext:
|
|
; V6M: @ %bb.0:
|
|
; V6M-NEXT: .save {r4, r5, r6, lr}
|
|
; V6M-NEXT: push {r4, r5, r6, lr}
|
|
; V6M-NEXT: mov r5, r3
|
|
; V6M-NEXT: uxtb r2, r2
|
|
; V6M-NEXT: bl __aeabi_llsr
|
|
; V6M-NEXT: mov r6, r0
|
|
; V6M-NEXT: mov r4, r1
|
|
; V6M-NEXT: movs r0, #64
|
|
; V6M-NEXT: subs r0, r0, r5
|
|
; V6M-NEXT: uxtb r2, r0
|
|
; V6M-NEXT: movs r0, #0
|
|
; V6M-NEXT: mvns r0, r0
|
|
; V6M-NEXT: mov r1, r0
|
|
; V6M-NEXT: bl __aeabi_llsr
|
|
; V6M-NEXT: ands r0, r6
|
|
; V6M-NEXT: ands r1, r4
|
|
; V6M-NEXT: pop {r4, r5, r6, pc}
|
|
%skip = zext i8 %numskipbits to i64
|
|
%shifted = lshr i64 %val, %skip
|
|
%numhighbits = sub i8 64, %numlowbits
|
|
%sh_prom = zext i8 %numhighbits to i64
|
|
%mask = lshr i64 -1, %sh_prom
|
|
%masked = and i64 %mask, %shifted
|
|
ret i64 %masked
|
|
}
|
|
|
|
define i64 @bextr64_c2_load(ptr %w, i64 %numskipbits, i64 %numlowbits) nounwind {
|
|
; V7M-LABEL: bextr64_c2_load:
|
|
; V7M: @ %bb.0:
|
|
; V7M-NEXT: ldrd r0, r3, [r0]
|
|
; V7M-NEXT: rsb.w r1, r2, #32
|
|
; V7M-NEXT: ldr.w r12, [sp]
|
|
; V7M-NEXT: lsl.w r1, r3, r1
|
|
; V7M-NEXT: lsrs r0, r2
|
|
; V7M-NEXT: orrs r0, r1
|
|
; V7M-NEXT: subs.w r1, r2, #32
|
|
; V7M-NEXT: it pl
|
|
; V7M-NEXT: lsrpl.w r0, r3, r1
|
|
; V7M-NEXT: lsr.w r1, r3, r2
|
|
; V7M-NEXT: rsb.w r3, r12, #64
|
|
; V7M-NEXT: mov.w r2, #-1
|
|
; V7M-NEXT: it pl
|
|
; V7M-NEXT: movpl r1, #0
|
|
; V7M-NEXT: rsbs.w r12, r12, #32
|
|
; V7M-NEXT: lsr.w r3, r2, r3
|
|
; V7M-NEXT: it pl
|
|
; V7M-NEXT: movpl r3, #0
|
|
; V7M-NEXT: it pl
|
|
; V7M-NEXT: lsrpl.w r2, r2, r12
|
|
; V7M-NEXT: ands r1, r3
|
|
; V7M-NEXT: ands r0, r2
|
|
; V7M-NEXT: bx lr
|
|
;
|
|
; V7A-LABEL: bextr64_c2_load:
|
|
; V7A: @ %bb.0:
|
|
; V7A-NEXT: .save {r4, r6, r8, lr}
|
|
; V7A-NEXT: push {r4, r6, r8, lr}
|
|
; V7A-NEXT: ldr r12, [sp, #16]
|
|
; V7A-NEXT: ldr r3, [r0, #4]
|
|
; V7A-NEXT: rsb r6, r12, #64
|
|
; V7A-NEXT: ldr r8, [r0]
|
|
; V7A-NEXT: mvn r0, #0
|
|
; V7A-NEXT: rsbs r1, r12, #32
|
|
; V7A-NEXT: lsr r6, r0, r6
|
|
; V7A-NEXT: lsr r4, r3, r2
|
|
; V7A-NEXT: lsrpl r0, r0, r1
|
|
; V7A-NEXT: movwpl r6, #0
|
|
; V7A-NEXT: subs r12, r2, #32
|
|
; V7A-NEXT: movwpl r4, #0
|
|
; V7A-NEXT: and r1, r6, r4
|
|
; V7A-NEXT: lsr r6, r8, r2
|
|
; V7A-NEXT: rsb r2, r2, #32
|
|
; V7A-NEXT: orr r2, r6, r3, lsl r2
|
|
; V7A-NEXT: lsrpl r2, r3, r12
|
|
; V7A-NEXT: and r0, r0, r2
|
|
; V7A-NEXT: pop {r4, r6, r8, pc}
|
|
;
|
|
; V7A-T-LABEL: bextr64_c2_load:
|
|
; V7A-T: @ %bb.0:
|
|
; V7A-T-NEXT: ldrd r0, r3, [r0]
|
|
; V7A-T-NEXT: rsb.w r1, r2, #32
|
|
; V7A-T-NEXT: ldr.w r12, [sp]
|
|
; V7A-T-NEXT: lsl.w r1, r3, r1
|
|
; V7A-T-NEXT: lsrs r0, r2
|
|
; V7A-T-NEXT: orrs r0, r1
|
|
; V7A-T-NEXT: subs.w r1, r2, #32
|
|
; V7A-T-NEXT: it pl
|
|
; V7A-T-NEXT: lsrpl.w r0, r3, r1
|
|
; V7A-T-NEXT: lsr.w r1, r3, r2
|
|
; V7A-T-NEXT: rsb.w r2, r12, #64
|
|
; V7A-T-NEXT: mov.w r3, #-1
|
|
; V7A-T-NEXT: it pl
|
|
; V7A-T-NEXT: movpl r1, #0
|
|
; V7A-T-NEXT: rsbs.w r12, r12, #32
|
|
; V7A-T-NEXT: lsr.w r2, r3, r2
|
|
; V7A-T-NEXT: it pl
|
|
; V7A-T-NEXT: movpl r2, #0
|
|
; V7A-T-NEXT: it pl
|
|
; V7A-T-NEXT: lsrpl.w r3, r3, r12
|
|
; V7A-T-NEXT: ands r1, r2
|
|
; V7A-T-NEXT: ands r0, r3
|
|
; V7A-T-NEXT: bx lr
|
|
;
|
|
; V6M-LABEL: bextr64_c2_load:
|
|
; V6M: @ %bb.0:
|
|
; V6M-NEXT: .save {r4, r5, r7, lr}
|
|
; V6M-NEXT: push {r4, r5, r7, lr}
|
|
; V6M-NEXT: ldr r3, [r0]
|
|
; V6M-NEXT: ldr r1, [r0, #4]
|
|
; V6M-NEXT: mov r0, r3
|
|
; V6M-NEXT: bl __aeabi_llsr
|
|
; V6M-NEXT: mov r5, r0
|
|
; V6M-NEXT: mov r4, r1
|
|
; V6M-NEXT: ldr r0, [sp, #16]
|
|
; V6M-NEXT: movs r1, #64
|
|
; V6M-NEXT: subs r2, r1, r0
|
|
; V6M-NEXT: movs r0, #0
|
|
; V6M-NEXT: mvns r0, r0
|
|
; V6M-NEXT: mov r1, r0
|
|
; V6M-NEXT: bl __aeabi_llsr
|
|
; V6M-NEXT: ands r0, r5
|
|
; V6M-NEXT: ands r1, r4
|
|
; V6M-NEXT: pop {r4, r5, r7, pc}
|
|
%val = load i64, ptr %w
|
|
%shifted = lshr i64 %val, %numskipbits
|
|
%numhighbits = sub i64 64, %numlowbits
|
|
%mask = lshr i64 -1, %numhighbits
|
|
%masked = and i64 %mask, %shifted
|
|
ret i64 %masked
|
|
}
|
|
|
|
define i64 @bextr64_c3_load_indexzext(ptr %w, i8 %numskipbits, i8 %numlowbits) nounwind {
|
|
; V7M-LABEL: bextr64_c3_load_indexzext:
|
|
; V7M: @ %bb.0:
|
|
; V7M-NEXT: .save {r7, lr}
|
|
; V7M-NEXT: push {r7, lr}
|
|
; V7M-NEXT: ldrd r0, r3, [r0]
|
|
; V7M-NEXT: uxtb r1, r1
|
|
; V7M-NEXT: lsr.w r12, r0, r1
|
|
; V7M-NEXT: rsb.w r0, r1, #32
|
|
; V7M-NEXT: lsl.w r0, r3, r0
|
|
; V7M-NEXT: orr.w r12, r12, r0
|
|
; V7M-NEXT: subs.w r0, r1, #32
|
|
; V7M-NEXT: it pl
|
|
; V7M-NEXT: lsrpl.w r12, r3, r0
|
|
; V7M-NEXT: rsb.w r0, r2, #64
|
|
; V7M-NEXT: lsr.w r1, r3, r1
|
|
; V7M-NEXT: mov.w r3, #-1
|
|
; V7M-NEXT: it pl
|
|
; V7M-NEXT: movpl r1, #0
|
|
; V7M-NEXT: uxtb r0, r0
|
|
; V7M-NEXT: subs.w lr, r0, #32
|
|
; V7M-NEXT: lsr.w r2, r3, r0
|
|
; V7M-NEXT: it pl
|
|
; V7M-NEXT: lsrpl.w r3, r3, lr
|
|
; V7M-NEXT: it pl
|
|
; V7M-NEXT: movpl r2, #0
|
|
; V7M-NEXT: and.w r0, r3, r12
|
|
; V7M-NEXT: ands r1, r2
|
|
; V7M-NEXT: pop {r7, pc}
|
|
;
|
|
; V7A-LABEL: bextr64_c3_load_indexzext:
|
|
; V7A: @ %bb.0:
|
|
; V7A-NEXT: .save {r4, lr}
|
|
; V7A-NEXT: push {r4, lr}
|
|
; V7A-NEXT: ldr r4, [r0]
|
|
; V7A-NEXT: ldr r3, [r0, #4]
|
|
; V7A-NEXT: uxtb r0, r1
|
|
; V7A-NEXT: lsr r12, r4, r0
|
|
; V7A-NEXT: rsb r4, r0, #32
|
|
; V7A-NEXT: lsr r0, r3, r0
|
|
; V7A-NEXT: orr lr, r12, r3, lsl r4
|
|
; V7A-NEXT: mvn r12, #31
|
|
; V7A-NEXT: uxtab r1, r12, r1
|
|
; V7A-NEXT: cmp r1, #0
|
|
; V7A-NEXT: lsrpl lr, r3, r1
|
|
; V7A-NEXT: rsb r1, r2, #64
|
|
; V7A-NEXT: mvn r3, #0
|
|
; V7A-NEXT: movwpl r0, #0
|
|
; V7A-NEXT: uxtb r2, r1
|
|
; V7A-NEXT: uxtab r4, r12, r1
|
|
; V7A-NEXT: lsr r2, r3, r2
|
|
; V7A-NEXT: cmp r4, #0
|
|
; V7A-NEXT: movwpl r2, #0
|
|
; V7A-NEXT: and r1, r2, r0
|
|
; V7A-NEXT: lsrpl r3, r3, r4
|
|
; V7A-NEXT: and r0, r3, lr
|
|
; V7A-NEXT: pop {r4, pc}
|
|
;
|
|
; V7A-T-LABEL: bextr64_c3_load_indexzext:
|
|
; V7A-T: @ %bb.0:
|
|
; V7A-T-NEXT: .save {r4, r5, r7, lr}
|
|
; V7A-T-NEXT: push {r4, r5, r7, lr}
|
|
; V7A-T-NEXT: ldrd r12, lr, [r0]
|
|
; V7A-T-NEXT: uxtb r0, r1
|
|
; V7A-T-NEXT: rsb.w r3, r0, #32
|
|
; V7A-T-NEXT: lsl.w r4, lr, r3
|
|
; V7A-T-NEXT: lsr.w r3, r12, r0
|
|
; V7A-T-NEXT: orr.w r5, r3, r4
|
|
; V7A-T-NEXT: mvn r12, #31
|
|
; V7A-T-NEXT: uxtab r1, r12, r1
|
|
; V7A-T-NEXT: lsr.w r0, lr, r0
|
|
; V7A-T-NEXT: cmp r1, #0
|
|
; V7A-T-NEXT: it pl
|
|
; V7A-T-NEXT: lsrpl.w r5, lr, r1
|
|
; V7A-T-NEXT: rsb.w r1, r2, #64
|
|
; V7A-T-NEXT: mov.w r4, #-1
|
|
; V7A-T-NEXT: it pl
|
|
; V7A-T-NEXT: movpl r0, #0
|
|
; V7A-T-NEXT: uxtb r2, r1
|
|
; V7A-T-NEXT: uxtab r3, r12, r1
|
|
; V7A-T-NEXT: lsr.w r2, r4, r2
|
|
; V7A-T-NEXT: cmp r3, #0
|
|
; V7A-T-NEXT: it pl
|
|
; V7A-T-NEXT: movpl r2, #0
|
|
; V7A-T-NEXT: and.w r1, r2, r0
|
|
; V7A-T-NEXT: it pl
|
|
; V7A-T-NEXT: lsrpl r4, r3
|
|
; V7A-T-NEXT: and.w r0, r4, r5
|
|
; V7A-T-NEXT: pop {r4, r5, r7, pc}
|
|
;
|
|
; V6M-LABEL: bextr64_c3_load_indexzext:
|
|
; V6M: @ %bb.0:
|
|
; V6M-NEXT: .save {r4, r5, r6, lr}
|
|
; V6M-NEXT: push {r4, r5, r6, lr}
|
|
; V6M-NEXT: mov r5, r2
|
|
; V6M-NEXT: ldr r4, [r0]
|
|
; V6M-NEXT: ldr r3, [r0, #4]
|
|
; V6M-NEXT: uxtb r2, r1
|
|
; V6M-NEXT: mov r0, r4
|
|
; V6M-NEXT: mov r1, r3
|
|
; V6M-NEXT: bl __aeabi_llsr
|
|
; V6M-NEXT: mov r6, r0
|
|
; V6M-NEXT: mov r4, r1
|
|
; V6M-NEXT: movs r0, #64
|
|
; V6M-NEXT: subs r0, r0, r5
|
|
; V6M-NEXT: uxtb r2, r0
|
|
; V6M-NEXT: movs r0, #0
|
|
; V6M-NEXT: mvns r0, r0
|
|
; V6M-NEXT: mov r1, r0
|
|
; V6M-NEXT: bl __aeabi_llsr
|
|
; V6M-NEXT: ands r0, r6
|
|
; V6M-NEXT: ands r1, r4
|
|
; V6M-NEXT: pop {r4, r5, r6, pc}
|
|
%val = load i64, ptr %w
|
|
%skip = zext i8 %numskipbits to i64
|
|
%shifted = lshr i64 %val, %skip
|
|
%numhighbits = sub i8 64, %numlowbits
|
|
%sh_prom = zext i8 %numhighbits to i64
|
|
%mask = lshr i64 -1, %sh_prom
|
|
%masked = and i64 %mask, %shifted
|
|
ret i64 %masked
|
|
}
|
|
|
|
define i64 @bextr64_c4_commutative(i64 %val, i64 %numskipbits, i64 %numlowbits) nounwind {
|
|
; V7M-LABEL: bextr64_c4_commutative:
|
|
; V7M: @ %bb.0:
|
|
; V7M-NEXT: rsb.w r3, r2, #32
|
|
; V7M-NEXT: lsrs r0, r2
|
|
; V7M-NEXT: ldr.w r12, [sp]
|
|
; V7M-NEXT: lsl.w r3, r1, r3
|
|
; V7M-NEXT: orrs r0, r3
|
|
; V7M-NEXT: subs.w r3, r2, #32
|
|
; V7M-NEXT: it pl
|
|
; V7M-NEXT: lsrpl.w r0, r1, r3
|
|
; V7M-NEXT: rsb.w r3, r12, #64
|
|
; V7M-NEXT: lsr.w r1, r1, r2
|
|
; V7M-NEXT: mov.w r2, #-1
|
|
; V7M-NEXT: it pl
|
|
; V7M-NEXT: movpl r1, #0
|
|
; V7M-NEXT: lsr.w r3, r2, r3
|
|
; V7M-NEXT: rsbs.w r12, r12, #32
|
|
; V7M-NEXT: it pl
|
|
; V7M-NEXT: movpl r3, #0
|
|
; V7M-NEXT: it pl
|
|
; V7M-NEXT: lsrpl.w r2, r2, r12
|
|
; V7M-NEXT: ands r1, r3
|
|
; V7M-NEXT: ands r0, r2
|
|
; V7M-NEXT: bx lr
|
|
;
|
|
; V7A-LABEL: bextr64_c4_commutative:
|
|
; V7A: @ %bb.0:
|
|
; V7A-NEXT: .save {r4, r5, r11, lr}
|
|
; V7A-NEXT: push {r4, r5, r11, lr}
|
|
; V7A-NEXT: ldr r12, [sp, #16]
|
|
; V7A-NEXT: mvn r3, #0
|
|
; V7A-NEXT: lsr r5, r1, r2
|
|
; V7A-NEXT: lsr r0, r0, r2
|
|
; V7A-NEXT: rsb r4, r12, #64
|
|
; V7A-NEXT: rsbs lr, r12, #32
|
|
; V7A-NEXT: lsr r4, r3, r4
|
|
; V7A-NEXT: lsrpl r3, r3, lr
|
|
; V7A-NEXT: movwpl r4, #0
|
|
; V7A-NEXT: subs lr, r2, #32
|
|
; V7A-NEXT: rsb r2, r2, #32
|
|
; V7A-NEXT: movwpl r5, #0
|
|
; V7A-NEXT: and r12, r5, r4
|
|
; V7A-NEXT: orr r0, r0, r1, lsl r2
|
|
; V7A-NEXT: lsrpl r0, r1, lr
|
|
; V7A-NEXT: mov r1, r12
|
|
; V7A-NEXT: and r0, r0, r3
|
|
; V7A-NEXT: pop {r4, r5, r11, pc}
|
|
;
|
|
; V7A-T-LABEL: bextr64_c4_commutative:
|
|
; V7A-T: @ %bb.0:
|
|
; V7A-T-NEXT: .save {r7, lr}
|
|
; V7A-T-NEXT: push {r7, lr}
|
|
; V7A-T-NEXT: rsb.w r3, r2, #32
|
|
; V7A-T-NEXT: lsrs r0, r2
|
|
; V7A-T-NEXT: ldr.w r12, [sp, #8]
|
|
; V7A-T-NEXT: mov.w lr, #-1
|
|
; V7A-T-NEXT: lsl.w r3, r1, r3
|
|
; V7A-T-NEXT: orrs r0, r3
|
|
; V7A-T-NEXT: subs.w r3, r2, #32
|
|
; V7A-T-NEXT: it pl
|
|
; V7A-T-NEXT: lsrpl.w r0, r1, r3
|
|
; V7A-T-NEXT: lsr.w r1, r1, r2
|
|
; V7A-T-NEXT: mov.w r3, #-1
|
|
; V7A-T-NEXT: it pl
|
|
; V7A-T-NEXT: movpl r1, #0
|
|
; V7A-T-NEXT: rsbs.w r2, r12, #32
|
|
; V7A-T-NEXT: it pl
|
|
; V7A-T-NEXT: lsrpl r3, r2
|
|
; V7A-T-NEXT: rsb.w r2, r12, #64
|
|
; V7A-T-NEXT: and.w r0, r0, r3
|
|
; V7A-T-NEXT: lsr.w r2, lr, r2
|
|
; V7A-T-NEXT: it pl
|
|
; V7A-T-NEXT: movpl r2, #0
|
|
; V7A-T-NEXT: ands r1, r2
|
|
; V7A-T-NEXT: pop {r7, pc}
|
|
;
|
|
; V6M-LABEL: bextr64_c4_commutative:
|
|
; V6M: @ %bb.0:
|
|
; V6M-NEXT: .save {r4, r5, r7, lr}
|
|
; V6M-NEXT: push {r4, r5, r7, lr}
|
|
; V6M-NEXT: bl __aeabi_llsr
|
|
; V6M-NEXT: mov r5, r0
|
|
; V6M-NEXT: mov r4, r1
|
|
; V6M-NEXT: ldr r0, [sp, #16]
|
|
; V6M-NEXT: movs r1, #64
|
|
; V6M-NEXT: subs r2, r1, r0
|
|
; V6M-NEXT: movs r0, #0
|
|
; V6M-NEXT: mvns r0, r0
|
|
; V6M-NEXT: mov r1, r0
|
|
; V6M-NEXT: bl __aeabi_llsr
|
|
; V6M-NEXT: ands r0, r5
|
|
; V6M-NEXT: ands r1, r4
|
|
; V6M-NEXT: pop {r4, r5, r7, pc}
|
|
%shifted = lshr i64 %val, %numskipbits
|
|
%numhighbits = sub i64 64, %numlowbits
|
|
%mask = lshr i64 -1, %numhighbits
|
|
%masked = and i64 %shifted, %mask ; swapped order
|
|
ret i64 %masked
|
|
}
|
|
|
|
; 64-bit, but with 32-bit output
|
|
|
|
; Everything done in 64-bit, truncation happens last.
|
|
define i32 @bextr64_32_c0(i64 %val, i64 %numskipbits, i64 %numlowbits) nounwind {
|
|
; V7M-LABEL: bextr64_32_c0:
|
|
; V7M: @ %bb.0:
|
|
; V7M-NEXT: rsb.w r3, r2, #32
|
|
; V7M-NEXT: lsrs r0, r2
|
|
; V7M-NEXT: subs r2, #32
|
|
; V7M-NEXT: lsl.w r3, r1, r3
|
|
; V7M-NEXT: orr.w r0, r0, r3
|
|
; V7M-NEXT: it pl
|
|
; V7M-NEXT: lsrpl.w r0, r1, r2
|
|
; V7M-NEXT: ldr r1, [sp]
|
|
; V7M-NEXT: mov.w r2, #-1
|
|
; V7M-NEXT: rsbs.w r1, r1, #32
|
|
; V7M-NEXT: it pl
|
|
; V7M-NEXT: lsrpl r2, r1
|
|
; V7M-NEXT: ands r0, r2
|
|
; V7M-NEXT: bx lr
|
|
;
|
|
; V7A-LABEL: bextr64_32_c0:
|
|
; V7A: @ %bb.0:
|
|
; V7A-NEXT: ldr r3, [sp]
|
|
; V7A-NEXT: rsbs r12, r3, #32
|
|
; V7A-NEXT: mvn r3, #0
|
|
; V7A-NEXT: lsrpl r3, r3, r12
|
|
; V7A-NEXT: lsr r12, r0, r2
|
|
; V7A-NEXT: rsb r0, r2, #32
|
|
; V7A-NEXT: subs r2, r2, #32
|
|
; V7A-NEXT: orr r0, r12, r1, lsl r0
|
|
; V7A-NEXT: lsrpl r0, r1, r2
|
|
; V7A-NEXT: and r0, r3, r0
|
|
; V7A-NEXT: bx lr
|
|
;
|
|
; V7A-T-LABEL: bextr64_32_c0:
|
|
; V7A-T: @ %bb.0:
|
|
; V7A-T-NEXT: rsb.w r3, r2, #32
|
|
; V7A-T-NEXT: lsrs r0, r2
|
|
; V7A-T-NEXT: ldr.w r12, [sp]
|
|
; V7A-T-NEXT: subs r2, #32
|
|
; V7A-T-NEXT: lsl.w r3, r1, r3
|
|
; V7A-T-NEXT: orr.w r0, r0, r3
|
|
; V7A-T-NEXT: it pl
|
|
; V7A-T-NEXT: lsrpl.w r0, r1, r2
|
|
; V7A-T-NEXT: mov.w r2, #-1
|
|
; V7A-T-NEXT: rsbs.w r1, r12, #32
|
|
; V7A-T-NEXT: it pl
|
|
; V7A-T-NEXT: lsrpl r2, r1
|
|
; V7A-T-NEXT: ands r0, r2
|
|
; V7A-T-NEXT: bx lr
|
|
;
|
|
; V6M-LABEL: bextr64_32_c0:
|
|
; V6M: @ %bb.0:
|
|
; V6M-NEXT: .save {r4, lr}
|
|
; V6M-NEXT: push {r4, lr}
|
|
; V6M-NEXT: bl __aeabi_llsr
|
|
; V6M-NEXT: mov r4, r0
|
|
; V6M-NEXT: ldr r0, [sp, #8]
|
|
; V6M-NEXT: movs r1, #64
|
|
; V6M-NEXT: subs r2, r1, r0
|
|
; V6M-NEXT: movs r0, #0
|
|
; V6M-NEXT: mvns r0, r0
|
|
; V6M-NEXT: mov r1, r0
|
|
; V6M-NEXT: bl __aeabi_llsr
|
|
; V6M-NEXT: ands r0, r4
|
|
; V6M-NEXT: pop {r4, pc}
|
|
%shifted = lshr i64 %val, %numskipbits
|
|
%numhighbits = sub i64 64, %numlowbits
|
|
%mask = lshr i64 -1, %numhighbits
|
|
%masked = and i64 %mask, %shifted
|
|
%res = trunc i64 %masked to i32
|
|
ret i32 %res
|
|
}
|
|
|
|
; Shifting happens in 64-bit, then truncation. Masking is 32-bit.
|
|
define i32 @bextr64_32_c1(i64 %val, i64 %numskipbits, i32 %numlowbits) nounwind {
|
|
; V7M-LABEL: bextr64_32_c1:
|
|
; V7M: @ %bb.0:
|
|
; V7M-NEXT: rsb.w r3, r2, #32
|
|
; V7M-NEXT: lsrs r0, r2
|
|
; V7M-NEXT: subs r2, #32
|
|
; V7M-NEXT: lsl.w r3, r1, r3
|
|
; V7M-NEXT: orr.w r0, r0, r3
|
|
; V7M-NEXT: it pl
|
|
; V7M-NEXT: lsrpl.w r0, r1, r2
|
|
; V7M-NEXT: ldr r1, [sp]
|
|
; V7M-NEXT: rsb.w r1, r1, #32
|
|
; V7M-NEXT: lsls r0, r1
|
|
; V7M-NEXT: lsrs r0, r1
|
|
; V7M-NEXT: bx lr
|
|
;
|
|
; V7A-LABEL: bextr64_32_c1:
|
|
; V7A: @ %bb.0:
|
|
; V7A-NEXT: rsb r3, r2, #32
|
|
; V7A-NEXT: lsr r0, r0, r2
|
|
; V7A-NEXT: ldr r12, [sp]
|
|
; V7A-NEXT: subs r2, r2, #32
|
|
; V7A-NEXT: orr r0, r0, r1, lsl r3
|
|
; V7A-NEXT: lsrpl r0, r1, r2
|
|
; V7A-NEXT: rsb r1, r12, #32
|
|
; V7A-NEXT: lsl r0, r0, r1
|
|
; V7A-NEXT: lsr r0, r0, r1
|
|
; V7A-NEXT: bx lr
|
|
;
|
|
; V7A-T-LABEL: bextr64_32_c1:
|
|
; V7A-T: @ %bb.0:
|
|
; V7A-T-NEXT: rsb.w r3, r2, #32
|
|
; V7A-T-NEXT: lsrs r0, r2
|
|
; V7A-T-NEXT: ldr.w r12, [sp]
|
|
; V7A-T-NEXT: subs r2, #32
|
|
; V7A-T-NEXT: lsl.w r3, r1, r3
|
|
; V7A-T-NEXT: orr.w r0, r0, r3
|
|
; V7A-T-NEXT: it pl
|
|
; V7A-T-NEXT: lsrpl.w r0, r1, r2
|
|
; V7A-T-NEXT: rsb.w r1, r12, #32
|
|
; V7A-T-NEXT: lsls r0, r1
|
|
; V7A-T-NEXT: lsrs r0, r1
|
|
; V7A-T-NEXT: bx lr
|
|
;
|
|
; V6M-LABEL: bextr64_32_c1:
|
|
; V6M: @ %bb.0:
|
|
; V6M-NEXT: .save {r7, lr}
|
|
; V6M-NEXT: push {r7, lr}
|
|
; V6M-NEXT: bl __aeabi_llsr
|
|
; V6M-NEXT: ldr r1, [sp, #8]
|
|
; V6M-NEXT: movs r2, #32
|
|
; V6M-NEXT: subs r1, r2, r1
|
|
; V6M-NEXT: lsls r0, r1
|
|
; V6M-NEXT: lsrs r0, r1
|
|
; V6M-NEXT: pop {r7, pc}
|
|
%shifted = lshr i64 %val, %numskipbits
|
|
%truncshifted = trunc i64 %shifted to i32
|
|
%numhighbits = sub i32 32, %numlowbits
|
|
%mask = lshr i32 -1, %numhighbits
|
|
%masked = and i32 %mask, %truncshifted
|
|
ret i32 %masked
|
|
}
|
|
|
|
; Shifting happens in 64-bit. Mask is 32-bit, but extended to 64-bit.
|
|
; Masking is 64-bit. Then truncation.
|
|
define i32 @bextr64_32_c2(i64 %val, i64 %numskipbits, i32 %numlowbits) nounwind {
|
|
; V7M-LABEL: bextr64_32_c2:
|
|
; V7M: @ %bb.0:
|
|
; V7M-NEXT: rsb.w r3, r2, #32
|
|
; V7M-NEXT: lsrs r0, r2
|
|
; V7M-NEXT: subs r2, #32
|
|
; V7M-NEXT: lsl.w r3, r1, r3
|
|
; V7M-NEXT: orr.w r0, r0, r3
|
|
; V7M-NEXT: it pl
|
|
; V7M-NEXT: lsrpl.w r0, r1, r2
|
|
; V7M-NEXT: ldr r1, [sp]
|
|
; V7M-NEXT: rsb.w r1, r1, #32
|
|
; V7M-NEXT: lsls r0, r1
|
|
; V7M-NEXT: lsrs r0, r1
|
|
; V7M-NEXT: bx lr
|
|
;
|
|
; V7A-LABEL: bextr64_32_c2:
|
|
; V7A: @ %bb.0:
|
|
; V7A-NEXT: rsb r3, r2, #32
|
|
; V7A-NEXT: lsr r0, r0, r2
|
|
; V7A-NEXT: ldr r12, [sp]
|
|
; V7A-NEXT: subs r2, r2, #32
|
|
; V7A-NEXT: orr r0, r0, r1, lsl r3
|
|
; V7A-NEXT: lsrpl r0, r1, r2
|
|
; V7A-NEXT: rsb r1, r12, #32
|
|
; V7A-NEXT: lsl r0, r0, r1
|
|
; V7A-NEXT: lsr r0, r0, r1
|
|
; V7A-NEXT: bx lr
|
|
;
|
|
; V7A-T-LABEL: bextr64_32_c2:
|
|
; V7A-T: @ %bb.0:
|
|
; V7A-T-NEXT: rsb.w r3, r2, #32
|
|
; V7A-T-NEXT: lsrs r0, r2
|
|
; V7A-T-NEXT: ldr.w r12, [sp]
|
|
; V7A-T-NEXT: subs r2, #32
|
|
; V7A-T-NEXT: lsl.w r3, r1, r3
|
|
; V7A-T-NEXT: orr.w r0, r0, r3
|
|
; V7A-T-NEXT: it pl
|
|
; V7A-T-NEXT: lsrpl.w r0, r1, r2
|
|
; V7A-T-NEXT: rsb.w r1, r12, #32
|
|
; V7A-T-NEXT: lsls r0, r1
|
|
; V7A-T-NEXT: lsrs r0, r1
|
|
; V7A-T-NEXT: bx lr
|
|
;
|
|
; V6M-LABEL: bextr64_32_c2:
|
|
; V6M: @ %bb.0:
|
|
; V6M-NEXT: .save {r7, lr}
|
|
; V6M-NEXT: push {r7, lr}
|
|
; V6M-NEXT: bl __aeabi_llsr
|
|
; V6M-NEXT: ldr r1, [sp, #8]
|
|
; V6M-NEXT: movs r2, #32
|
|
; V6M-NEXT: subs r1, r2, r1
|
|
; V6M-NEXT: lsls r0, r1
|
|
; V6M-NEXT: lsrs r0, r1
|
|
; V6M-NEXT: pop {r7, pc}
|
|
%shifted = lshr i64 %val, %numskipbits
|
|
%numhighbits = sub i32 32, %numlowbits
|
|
%mask = lshr i32 -1, %numhighbits
|
|
%zextmask = zext i32 %mask to i64
|
|
%masked = and i64 %zextmask, %shifted
|
|
%truncmasked = trunc i64 %masked to i32
|
|
ret i32 %truncmasked
|
|
}
|
|
|
|
; ---------------------------------------------------------------------------- ;
|
|
; Pattern d. 32-bit.
|
|
; ---------------------------------------------------------------------------- ;
|
|
|
|
define i32 @bextr32_d0(i32 %val, i32 %numskipbits, i32 %numlowbits) nounwind {
|
|
; V7M-LABEL: bextr32_d0:
|
|
; V7M: @ %bb.0:
|
|
; V7M-NEXT: lsrs r0, r1
|
|
; V7M-NEXT: rsb.w r1, r2, #32
|
|
; V7M-NEXT: lsls r0, r1
|
|
; V7M-NEXT: lsrs r0, r1
|
|
; V7M-NEXT: bx lr
|
|
;
|
|
; V7A-LABEL: bextr32_d0:
|
|
; V7A: @ %bb.0:
|
|
; V7A-NEXT: lsr r0, r0, r1
|
|
; V7A-NEXT: rsb r1, r2, #32
|
|
; V7A-NEXT: lsl r0, r0, r1
|
|
; V7A-NEXT: lsr r0, r0, r1
|
|
; V7A-NEXT: bx lr
|
|
;
|
|
; V7A-T-LABEL: bextr32_d0:
|
|
; V7A-T: @ %bb.0:
|
|
; V7A-T-NEXT: lsrs r0, r1
|
|
; V7A-T-NEXT: rsb.w r1, r2, #32
|
|
; V7A-T-NEXT: lsls r0, r1
|
|
; V7A-T-NEXT: lsrs r0, r1
|
|
; V7A-T-NEXT: bx lr
|
|
;
|
|
; V6M-LABEL: bextr32_d0:
|
|
; V6M: @ %bb.0:
|
|
; V6M-NEXT: movs r3, #32
|
|
; V6M-NEXT: subs r2, r3, r2
|
|
; V6M-NEXT: lsrs r0, r1
|
|
; V6M-NEXT: lsls r0, r2
|
|
; V6M-NEXT: lsrs r0, r2
|
|
; V6M-NEXT: bx lr
|
|
%shifted = lshr i32 %val, %numskipbits
|
|
%numhighbits = sub i32 32, %numlowbits
|
|
%highbitscleared = shl i32 %shifted, %numhighbits
|
|
%masked = lshr i32 %highbitscleared, %numhighbits
|
|
ret i32 %masked
|
|
}
|
|
|
|
define i32 @bextr32_d1_indexzext(i32 %val, i8 %numskipbits, i8 %numlowbits) nounwind {
|
|
; V7M-LABEL: bextr32_d1_indexzext:
|
|
; V7M: @ %bb.0:
|
|
; V7M-NEXT: uxtb r1, r1
|
|
; V7M-NEXT: lsrs r0, r1
|
|
; V7M-NEXT: rsb.w r1, r2, #32
|
|
; V7M-NEXT: uxtb r1, r1
|
|
; V7M-NEXT: lsls r0, r1
|
|
; V7M-NEXT: lsrs r0, r1
|
|
; V7M-NEXT: bx lr
|
|
;
|
|
; V7A-LABEL: bextr32_d1_indexzext:
|
|
; V7A: @ %bb.0:
|
|
; V7A-NEXT: uxtb r1, r1
|
|
; V7A-NEXT: lsr r0, r0, r1
|
|
; V7A-NEXT: rsb r1, r2, #32
|
|
; V7A-NEXT: uxtb r1, r1
|
|
; V7A-NEXT: lsl r0, r0, r1
|
|
; V7A-NEXT: lsr r0, r0, r1
|
|
; V7A-NEXT: bx lr
|
|
;
|
|
; V7A-T-LABEL: bextr32_d1_indexzext:
|
|
; V7A-T: @ %bb.0:
|
|
; V7A-T-NEXT: uxtb r1, r1
|
|
; V7A-T-NEXT: lsrs r0, r1
|
|
; V7A-T-NEXT: rsb.w r1, r2, #32
|
|
; V7A-T-NEXT: uxtb r1, r1
|
|
; V7A-T-NEXT: lsls r0, r1
|
|
; V7A-T-NEXT: lsrs r0, r1
|
|
; V7A-T-NEXT: bx lr
|
|
;
|
|
; V6M-LABEL: bextr32_d1_indexzext:
|
|
; V6M: @ %bb.0:
|
|
; V6M-NEXT: uxtb r1, r1
|
|
; V6M-NEXT: lsrs r0, r1
|
|
; V6M-NEXT: movs r1, #32
|
|
; V6M-NEXT: subs r1, r1, r2
|
|
; V6M-NEXT: uxtb r1, r1
|
|
; V6M-NEXT: lsls r0, r1
|
|
; V6M-NEXT: lsrs r0, r1
|
|
; V6M-NEXT: bx lr
|
|
%skip = zext i8 %numskipbits to i32
|
|
%shifted = lshr i32 %val, %skip
|
|
%numhighbits = sub i8 32, %numlowbits
|
|
%sh_prom = zext i8 %numhighbits to i32
|
|
%highbitscleared = shl i32 %shifted, %sh_prom
|
|
%masked = lshr i32 %highbitscleared, %sh_prom
|
|
ret i32 %masked
|
|
}
|
|
|
|
define i32 @bextr32_d2_load(ptr %w, i32 %numskipbits, i32 %numlowbits) nounwind {
|
|
; V7M-LABEL: bextr32_d2_load:
|
|
; V7M: @ %bb.0:
|
|
; V7M-NEXT: ldr r0, [r0]
|
|
; V7M-NEXT: lsrs r0, r1
|
|
; V7M-NEXT: rsb.w r1, r2, #32
|
|
; V7M-NEXT: lsls r0, r1
|
|
; V7M-NEXT: lsrs r0, r1
|
|
; V7M-NEXT: bx lr
|
|
;
|
|
; V7A-LABEL: bextr32_d2_load:
|
|
; V7A: @ %bb.0:
|
|
; V7A-NEXT: ldr r0, [r0]
|
|
; V7A-NEXT: lsr r0, r0, r1
|
|
; V7A-NEXT: rsb r1, r2, #32
|
|
; V7A-NEXT: lsl r0, r0, r1
|
|
; V7A-NEXT: lsr r0, r0, r1
|
|
; V7A-NEXT: bx lr
|
|
;
|
|
; V7A-T-LABEL: bextr32_d2_load:
|
|
; V7A-T: @ %bb.0:
|
|
; V7A-T-NEXT: ldr r0, [r0]
|
|
; V7A-T-NEXT: lsrs r0, r1
|
|
; V7A-T-NEXT: rsb.w r1, r2, #32
|
|
; V7A-T-NEXT: lsls r0, r1
|
|
; V7A-T-NEXT: lsrs r0, r1
|
|
; V7A-T-NEXT: bx lr
|
|
;
|
|
; V6M-LABEL: bextr32_d2_load:
|
|
; V6M: @ %bb.0:
|
|
; V6M-NEXT: movs r3, #32
|
|
; V6M-NEXT: subs r2, r3, r2
|
|
; V6M-NEXT: ldr r0, [r0]
|
|
; V6M-NEXT: lsrs r0, r1
|
|
; V6M-NEXT: lsls r0, r2
|
|
; V6M-NEXT: lsrs r0, r2
|
|
; V6M-NEXT: bx lr
|
|
%val = load i32, ptr %w
|
|
%shifted = lshr i32 %val, %numskipbits
|
|
%numhighbits = sub i32 32, %numlowbits
|
|
%highbitscleared = shl i32 %shifted, %numhighbits
|
|
%masked = lshr i32 %highbitscleared, %numhighbits
|
|
ret i32 %masked
|
|
}
|
|
|
|
define i32 @bextr32_d3_load_indexzext(ptr %w, i8 %numskipbits, i8 %numlowbits) nounwind {
|
|
; V7M-LABEL: bextr32_d3_load_indexzext:
|
|
; V7M: @ %bb.0:
|
|
; V7M-NEXT: ldr r0, [r0]
|
|
; V7M-NEXT: uxtb r1, r1
|
|
; V7M-NEXT: lsrs r0, r1
|
|
; V7M-NEXT: rsb.w r1, r2, #32
|
|
; V7M-NEXT: uxtb r1, r1
|
|
; V7M-NEXT: lsls r0, r1
|
|
; V7M-NEXT: lsrs r0, r1
|
|
; V7M-NEXT: bx lr
|
|
;
|
|
; V7A-LABEL: bextr32_d3_load_indexzext:
|
|
; V7A: @ %bb.0:
|
|
; V7A-NEXT: ldr r0, [r0]
|
|
; V7A-NEXT: uxtb r1, r1
|
|
; V7A-NEXT: lsr r0, r0, r1
|
|
; V7A-NEXT: rsb r1, r2, #32
|
|
; V7A-NEXT: uxtb r1, r1
|
|
; V7A-NEXT: lsl r0, r0, r1
|
|
; V7A-NEXT: lsr r0, r0, r1
|
|
; V7A-NEXT: bx lr
|
|
;
|
|
; V7A-T-LABEL: bextr32_d3_load_indexzext:
|
|
; V7A-T: @ %bb.0:
|
|
; V7A-T-NEXT: ldr r0, [r0]
|
|
; V7A-T-NEXT: uxtb r1, r1
|
|
; V7A-T-NEXT: lsrs r0, r1
|
|
; V7A-T-NEXT: rsb.w r1, r2, #32
|
|
; V7A-T-NEXT: uxtb r1, r1
|
|
; V7A-T-NEXT: lsls r0, r1
|
|
; V7A-T-NEXT: lsrs r0, r1
|
|
; V7A-T-NEXT: bx lr
|
|
;
|
|
; V6M-LABEL: bextr32_d3_load_indexzext:
|
|
; V6M: @ %bb.0:
|
|
; V6M-NEXT: uxtb r1, r1
|
|
; V6M-NEXT: ldr r0, [r0]
|
|
; V6M-NEXT: lsrs r0, r1
|
|
; V6M-NEXT: movs r1, #32
|
|
; V6M-NEXT: subs r1, r1, r2
|
|
; V6M-NEXT: uxtb r1, r1
|
|
; V6M-NEXT: lsls r0, r1
|
|
; V6M-NEXT: lsrs r0, r1
|
|
; V6M-NEXT: bx lr
|
|
%val = load i32, ptr %w
|
|
%skip = zext i8 %numskipbits to i32
|
|
%shifted = lshr i32 %val, %skip
|
|
%numhighbits = sub i8 32, %numlowbits
|
|
%sh_prom = zext i8 %numhighbits to i32
|
|
%highbitscleared = shl i32 %shifted, %sh_prom
|
|
%masked = lshr i32 %highbitscleared, %sh_prom
|
|
ret i32 %masked
|
|
}
|
|
|
|
; 64-bit.
|
|
|
|
define i64 @bextr64_d0(i64 %val, i64 %numskipbits, i64 %numlowbits) nounwind {
|
|
; V7M-LABEL: bextr64_d0:
|
|
; V7M: @ %bb.0:
|
|
; V7M-NEXT: .save {r4, lr}
|
|
; V7M-NEXT: push {r4, lr}
|
|
; V7M-NEXT: rsb.w r3, r2, #32
|
|
; V7M-NEXT: ldr.w r12, [sp, #8]
|
|
; V7M-NEXT: lsrs r0, r2
|
|
; V7M-NEXT: lsl.w r3, r1, r3
|
|
; V7M-NEXT: orrs r0, r3
|
|
; V7M-NEXT: subs.w r3, r2, #32
|
|
; V7M-NEXT: it pl
|
|
; V7M-NEXT: lsrpl.w r0, r1, r3
|
|
; V7M-NEXT: lsr.w r1, r1, r2
|
|
; V7M-NEXT: rsb.w r3, r12, #64
|
|
; V7M-NEXT: it pl
|
|
; V7M-NEXT: movpl r1, #0
|
|
; V7M-NEXT: rsb.w lr, r12, #32
|
|
; V7M-NEXT: rsb.w r12, r3, #32
|
|
; V7M-NEXT: lsls r1, r3
|
|
; V7M-NEXT: cmp.w lr, #0
|
|
; V7M-NEXT: lsr.w r4, r0, r12
|
|
; V7M-NEXT: orr.w r1, r1, r4
|
|
; V7M-NEXT: it pl
|
|
; V7M-NEXT: lslpl.w r1, r0, lr
|
|
; V7M-NEXT: lsl.w r0, r0, r3
|
|
; V7M-NEXT: it pl
|
|
; V7M-NEXT: movpl r0, #0
|
|
; V7M-NEXT: lsl.w r2, r1, r12
|
|
; V7M-NEXT: lsr.w r0, r0, r3
|
|
; V7M-NEXT: orr.w r0, r0, r2
|
|
; V7M-NEXT: it pl
|
|
; V7M-NEXT: lsrpl.w r0, r1, lr
|
|
; V7M-NEXT: lsr.w r1, r1, r3
|
|
; V7M-NEXT: it pl
|
|
; V7M-NEXT: movpl r1, #0
|
|
; V7M-NEXT: pop {r4, pc}
|
|
;
|
|
; V7A-LABEL: bextr64_d0:
|
|
; V7A: @ %bb.0:
|
|
; V7A-NEXT: .save {r11, lr}
|
|
; V7A-NEXT: push {r11, lr}
|
|
; V7A-NEXT: lsr r3, r1, r2
|
|
; V7A-NEXT: subs lr, r2, #32
|
|
; V7A-NEXT: lsr r0, r0, r2
|
|
; V7A-NEXT: rsb r2, r2, #32
|
|
; V7A-NEXT: ldr r12, [sp, #8]
|
|
; V7A-NEXT: movwpl r3, #0
|
|
; V7A-NEXT: orr r0, r0, r1, lsl r2
|
|
; V7A-NEXT: lsrpl r0, r1, lr
|
|
; V7A-NEXT: rsb r1, r12, #64
|
|
; V7A-NEXT: rsb lr, r1, #32
|
|
; V7A-NEXT: lsr r2, r0, lr
|
|
; V7A-NEXT: orr r2, r2, r3, lsl r1
|
|
; V7A-NEXT: rsbs r3, r12, #32
|
|
; V7A-NEXT: lslpl r2, r0, r3
|
|
; V7A-NEXT: lsl r0, r0, r1
|
|
; V7A-NEXT: movwpl r0, #0
|
|
; V7A-NEXT: lsr r0, r0, r1
|
|
; V7A-NEXT: lsr r1, r2, r1
|
|
; V7A-NEXT: orr r0, r0, r2, lsl lr
|
|
; V7A-NEXT: movwpl r1, #0
|
|
; V7A-NEXT: lsrpl r0, r2, r3
|
|
; V7A-NEXT: pop {r11, pc}
|
|
;
|
|
; V7A-T-LABEL: bextr64_d0:
|
|
; V7A-T: @ %bb.0:
|
|
; V7A-T-NEXT: .save {r4, lr}
|
|
; V7A-T-NEXT: push {r4, lr}
|
|
; V7A-T-NEXT: rsb.w r3, r2, #32
|
|
; V7A-T-NEXT: ldr.w r12, [sp, #8]
|
|
; V7A-T-NEXT: lsrs r0, r2
|
|
; V7A-T-NEXT: lsl.w r3, r1, r3
|
|
; V7A-T-NEXT: orrs r0, r3
|
|
; V7A-T-NEXT: subs.w r3, r2, #32
|
|
; V7A-T-NEXT: it pl
|
|
; V7A-T-NEXT: lsrpl.w r0, r1, r3
|
|
; V7A-T-NEXT: lsr.w r1, r1, r2
|
|
; V7A-T-NEXT: rsb.w r3, r12, #64
|
|
; V7A-T-NEXT: it pl
|
|
; V7A-T-NEXT: movpl r1, #0
|
|
; V7A-T-NEXT: rsb.w lr, r3, #32
|
|
; V7A-T-NEXT: lsls r1, r3
|
|
; V7A-T-NEXT: rsbs.w r2, r12, #32
|
|
; V7A-T-NEXT: lsr.w r4, r0, lr
|
|
; V7A-T-NEXT: orr.w r1, r1, r4
|
|
; V7A-T-NEXT: it pl
|
|
; V7A-T-NEXT: lslpl.w r1, r0, r2
|
|
; V7A-T-NEXT: lsl.w r0, r0, r3
|
|
; V7A-T-NEXT: it pl
|
|
; V7A-T-NEXT: movpl r0, #0
|
|
; V7A-T-NEXT: lsl.w r4, r1, lr
|
|
; V7A-T-NEXT: lsr.w r0, r0, r3
|
|
; V7A-T-NEXT: orr.w r0, r0, r4
|
|
; V7A-T-NEXT: it pl
|
|
; V7A-T-NEXT: lsrpl.w r0, r1, r2
|
|
; V7A-T-NEXT: lsr.w r1, r1, r3
|
|
; V7A-T-NEXT: it pl
|
|
; V7A-T-NEXT: movpl r1, #0
|
|
; V7A-T-NEXT: pop {r4, pc}
|
|
;
|
|
; V6M-LABEL: bextr64_d0:
|
|
; V6M: @ %bb.0:
|
|
; V6M-NEXT: .save {r4, lr}
|
|
; V6M-NEXT: push {r4, lr}
|
|
; V6M-NEXT: bl __aeabi_llsr
|
|
; V6M-NEXT: ldr r2, [sp, #8]
|
|
; V6M-NEXT: movs r3, #64
|
|
; V6M-NEXT: subs r4, r3, r2
|
|
; V6M-NEXT: mov r2, r4
|
|
; V6M-NEXT: bl __aeabi_llsl
|
|
; V6M-NEXT: mov r2, r4
|
|
; V6M-NEXT: bl __aeabi_llsr
|
|
; V6M-NEXT: pop {r4, pc}
|
|
%shifted = lshr i64 %val, %numskipbits
|
|
%numhighbits = sub i64 64, %numlowbits
|
|
%highbitscleared = shl i64 %shifted, %numhighbits
|
|
%masked = lshr i64 %highbitscleared, %numhighbits
|
|
ret i64 %masked
|
|
}
|
|
|
|
define i64 @bextr64_d1_indexzext(i64 %val, i8 %numskipbits, i8 %numlowbits) nounwind {
|
|
; V7M-LABEL: bextr64_d1_indexzext:
|
|
; V7M: @ %bb.0:
|
|
; V7M-NEXT: .save {r4, lr}
|
|
; V7M-NEXT: push {r4, lr}
|
|
; V7M-NEXT: uxtb.w lr, r2
|
|
; V7M-NEXT: subs.w r2, lr, #32
|
|
; V7M-NEXT: lsr.w r12, r0, lr
|
|
; V7M-NEXT: rsb.w r0, lr, #32
|
|
; V7M-NEXT: lsl.w r0, r1, r0
|
|
; V7M-NEXT: orr.w r0, r0, r12
|
|
; V7M-NEXT: it pl
|
|
; V7M-NEXT: lsrpl.w r0, r1, r2
|
|
; V7M-NEXT: rsb.w r2, r3, #64
|
|
; V7M-NEXT: lsr.w r1, r1, lr
|
|
; V7M-NEXT: uxtb r2, r2
|
|
; V7M-NEXT: it pl
|
|
; V7M-NEXT: movpl r1, #0
|
|
; V7M-NEXT: rsb.w r3, r2, #32
|
|
; V7M-NEXT: lsls r1, r2
|
|
; V7M-NEXT: sub.w r12, r2, #32
|
|
; V7M-NEXT: lsr.w r4, r0, r3
|
|
; V7M-NEXT: orrs r1, r4
|
|
; V7M-NEXT: cmp.w r12, #0
|
|
; V7M-NEXT: it pl
|
|
; V7M-NEXT: lslpl.w r1, r0, r12
|
|
; V7M-NEXT: lsl.w r0, r0, r2
|
|
; V7M-NEXT: lsl.w r3, r1, r3
|
|
; V7M-NEXT: it pl
|
|
; V7M-NEXT: movpl r0, #0
|
|
; V7M-NEXT: lsr.w r0, r0, r2
|
|
; V7M-NEXT: orr.w r0, r0, r3
|
|
; V7M-NEXT: it pl
|
|
; V7M-NEXT: lsrpl.w r0, r1, r12
|
|
; V7M-NEXT: lsr.w r1, r1, r2
|
|
; V7M-NEXT: it pl
|
|
; V7M-NEXT: movpl r1, #0
|
|
; V7M-NEXT: pop {r4, pc}
|
|
;
|
|
; V7A-LABEL: bextr64_d1_indexzext:
|
|
; V7A: @ %bb.0:
|
|
; V7A-NEXT: .save {r4, r5, r11, lr}
|
|
; V7A-NEXT: push {r4, r5, r11, lr}
|
|
; V7A-NEXT: uxtb r12, r2
|
|
; V7A-NEXT: lsr lr, r0, r12
|
|
; V7A-NEXT: rsb r0, r12, #32
|
|
; V7A-NEXT: orr r0, lr, r1, lsl r0
|
|
; V7A-NEXT: mvn lr, #31
|
|
; V7A-NEXT: uxtab r2, lr, r2
|
|
; V7A-NEXT: cmp r2, #0
|
|
; V7A-NEXT: lsrpl r0, r1, r2
|
|
; V7A-NEXT: rsb r2, r3, #64
|
|
; V7A-NEXT: lsr r1, r1, r12
|
|
; V7A-NEXT: uxtb r3, r2
|
|
; V7A-NEXT: rsb r4, r3, #32
|
|
; V7A-NEXT: movwpl r1, #0
|
|
; V7A-NEXT: uxtab r2, lr, r2
|
|
; V7A-NEXT: lsr r5, r0, r4
|
|
; V7A-NEXT: orr r1, r5, r1, lsl r3
|
|
; V7A-NEXT: cmp r2, #0
|
|
; V7A-NEXT: lslpl r1, r0, r2
|
|
; V7A-NEXT: lsl r0, r0, r3
|
|
; V7A-NEXT: movwpl r0, #0
|
|
; V7A-NEXT: lsr r0, r0, r3
|
|
; V7A-NEXT: orr r0, r0, r1, lsl r4
|
|
; V7A-NEXT: lsrpl r0, r1, r2
|
|
; V7A-NEXT: lsr r1, r1, r3
|
|
; V7A-NEXT: movwpl r1, #0
|
|
; V7A-NEXT: pop {r4, r5, r11, pc}
|
|
;
|
|
; V7A-T-LABEL: bextr64_d1_indexzext:
|
|
; V7A-T: @ %bb.0:
|
|
; V7A-T-NEXT: .save {r4, r5, r6, r7, lr}
|
|
; V7A-T-NEXT: push {r4, r5, r6, r7, lr}
|
|
; V7A-T-NEXT: uxtb.w r12, r2
|
|
; V7A-T-NEXT: rsb.w r6, r12, #32
|
|
; V7A-T-NEXT: rsb.w r3, r3, #64
|
|
; V7A-T-NEXT: lsr.w r0, r0, r12
|
|
; V7A-T-NEXT: mvn r7, #31
|
|
; V7A-T-NEXT: uxtab r2, r7, r2
|
|
; V7A-T-NEXT: lsl.w r6, r1, r6
|
|
; V7A-T-NEXT: lsr.w lr, r1, r12
|
|
; V7A-T-NEXT: orrs r0, r6
|
|
; V7A-T-NEXT: cmp r2, #0
|
|
; V7A-T-NEXT: it pl
|
|
; V7A-T-NEXT: movpl.w lr, #0
|
|
; V7A-T-NEXT: uxtb r5, r3
|
|
; V7A-T-NEXT: it pl
|
|
; V7A-T-NEXT: lsrpl.w r0, r1, r2
|
|
; V7A-T-NEXT: rsb.w r1, r5, #32
|
|
; V7A-T-NEXT: uxtab r3, r7, r3
|
|
; V7A-T-NEXT: lsl.w r4, lr, r5
|
|
; V7A-T-NEXT: lsr.w r2, r0, r1
|
|
; V7A-T-NEXT: cmp r3, #0
|
|
; V7A-T-NEXT: orr.w r2, r2, r4
|
|
; V7A-T-NEXT: it pl
|
|
; V7A-T-NEXT: lslpl.w r2, r0, r3
|
|
; V7A-T-NEXT: lsl.w r0, r0, r5
|
|
; V7A-T-NEXT: it pl
|
|
; V7A-T-NEXT: movpl r0, #0
|
|
; V7A-T-NEXT: lsl.w r1, r2, r1
|
|
; V7A-T-NEXT: lsr.w r0, r0, r5
|
|
; V7A-T-NEXT: orr.w r0, r0, r1
|
|
; V7A-T-NEXT: lsr.w r1, r2, r5
|
|
; V7A-T-NEXT: it pl
|
|
; V7A-T-NEXT: lsrpl.w r0, r2, r3
|
|
; V7A-T-NEXT: it pl
|
|
; V7A-T-NEXT: movpl r1, #0
|
|
; V7A-T-NEXT: pop {r4, r5, r6, r7, pc}
|
|
;
|
|
; V6M-LABEL: bextr64_d1_indexzext:
|
|
; V6M: @ %bb.0:
|
|
; V6M-NEXT: .save {r4, lr}
|
|
; V6M-NEXT: push {r4, lr}
|
|
; V6M-NEXT: mov r4, r3
|
|
; V6M-NEXT: uxtb r2, r2
|
|
; V6M-NEXT: bl __aeabi_llsr
|
|
; V6M-NEXT: movs r2, #64
|
|
; V6M-NEXT: subs r2, r2, r4
|
|
; V6M-NEXT: uxtb r4, r2
|
|
; V6M-NEXT: mov r2, r4
|
|
; V6M-NEXT: bl __aeabi_llsl
|
|
; V6M-NEXT: mov r2, r4
|
|
; V6M-NEXT: bl __aeabi_llsr
|
|
; V6M-NEXT: pop {r4, pc}
|
|
%skip = zext i8 %numskipbits to i64
|
|
%shifted = lshr i64 %val, %skip
|
|
%numhighbits = sub i8 64, %numlowbits
|
|
%sh_prom = zext i8 %numhighbits to i64
|
|
%highbitscleared = shl i64 %shifted, %sh_prom
|
|
%masked = lshr i64 %highbitscleared, %sh_prom
|
|
ret i64 %masked
|
|
}
|
|
|
|
define i64 @bextr64_d2_load(ptr %w, i64 %numskipbits, i64 %numlowbits) nounwind {
|
|
; V7M-LABEL: bextr64_d2_load:
|
|
; V7M: @ %bb.0:
|
|
; V7M-NEXT: .save {r4, lr}
|
|
; V7M-NEXT: push {r4, lr}
|
|
; V7M-NEXT: ldrd r0, r3, [r0]
|
|
; V7M-NEXT: rsb.w r1, r2, #32
|
|
; V7M-NEXT: ldr.w r12, [sp, #8]
|
|
; V7M-NEXT: lsl.w r1, r3, r1
|
|
; V7M-NEXT: lsrs r0, r2
|
|
; V7M-NEXT: rsb.w lr, r12, #32
|
|
; V7M-NEXT: orrs r0, r1
|
|
; V7M-NEXT: subs.w r1, r2, #32
|
|
; V7M-NEXT: it pl
|
|
; V7M-NEXT: lsrpl.w r0, r3, r1
|
|
; V7M-NEXT: rsb.w r1, r12, #64
|
|
; V7M-NEXT: lsr.w r2, r3, r2
|
|
; V7M-NEXT: rsb.w r12, r1, #32
|
|
; V7M-NEXT: it pl
|
|
; V7M-NEXT: movpl r2, #0
|
|
; V7M-NEXT: cmp.w lr, #0
|
|
; V7M-NEXT: lsl.w r2, r2, r1
|
|
; V7M-NEXT: lsr.w r4, r0, r12
|
|
; V7M-NEXT: orr.w r2, r2, r4
|
|
; V7M-NEXT: it pl
|
|
; V7M-NEXT: lslpl.w r2, r0, lr
|
|
; V7M-NEXT: lsl.w r0, r0, r1
|
|
; V7M-NEXT: it pl
|
|
; V7M-NEXT: movpl r0, #0
|
|
; V7M-NEXT: lsl.w r3, r2, r12
|
|
; V7M-NEXT: lsr.w r0, r0, r1
|
|
; V7M-NEXT: lsr.w r1, r2, r1
|
|
; V7M-NEXT: orr.w r0, r0, r3
|
|
; V7M-NEXT: it pl
|
|
; V7M-NEXT: lsrpl.w r0, r2, lr
|
|
; V7M-NEXT: it pl
|
|
; V7M-NEXT: movpl r1, #0
|
|
; V7M-NEXT: pop {r4, pc}
|
|
;
|
|
; V7A-LABEL: bextr64_d2_load:
|
|
; V7A: @ %bb.0:
|
|
; V7A-NEXT: .save {r11, lr}
|
|
; V7A-NEXT: push {r11, lr}
|
|
; V7A-NEXT: ldrd r0, r1, [r0]
|
|
; V7A-NEXT: subs lr, r2, #32
|
|
; V7A-NEXT: lsr r3, r1, r2
|
|
; V7A-NEXT: ldr r12, [sp, #8]
|
|
; V7A-NEXT: movwpl r3, #0
|
|
; V7A-NEXT: lsr r0, r0, r2
|
|
; V7A-NEXT: rsb r2, r2, #32
|
|
; V7A-NEXT: orr r0, r0, r1, lsl r2
|
|
; V7A-NEXT: lsrpl r0, r1, lr
|
|
; V7A-NEXT: rsb r1, r12, #64
|
|
; V7A-NEXT: rsb lr, r1, #32
|
|
; V7A-NEXT: lsr r2, r0, lr
|
|
; V7A-NEXT: orr r2, r2, r3, lsl r1
|
|
; V7A-NEXT: rsbs r3, r12, #32
|
|
; V7A-NEXT: lslpl r2, r0, r3
|
|
; V7A-NEXT: lsl r0, r0, r1
|
|
; V7A-NEXT: movwpl r0, #0
|
|
; V7A-NEXT: lsr r0, r0, r1
|
|
; V7A-NEXT: lsr r1, r2, r1
|
|
; V7A-NEXT: orr r0, r0, r2, lsl lr
|
|
; V7A-NEXT: movwpl r1, #0
|
|
; V7A-NEXT: lsrpl r0, r2, r3
|
|
; V7A-NEXT: pop {r11, pc}
|
|
;
|
|
; V7A-T-LABEL: bextr64_d2_load:
|
|
; V7A-T: @ %bb.0:
|
|
; V7A-T-NEXT: .save {r4, lr}
|
|
; V7A-T-NEXT: push {r4, lr}
|
|
; V7A-T-NEXT: ldrd r0, r3, [r0]
|
|
; V7A-T-NEXT: rsb.w r1, r2, #32
|
|
; V7A-T-NEXT: ldr.w r12, [sp, #8]
|
|
; V7A-T-NEXT: lsl.w r1, r3, r1
|
|
; V7A-T-NEXT: lsrs r0, r2
|
|
; V7A-T-NEXT: orrs r0, r1
|
|
; V7A-T-NEXT: subs.w r1, r2, #32
|
|
; V7A-T-NEXT: it pl
|
|
; V7A-T-NEXT: lsrpl.w r0, r3, r1
|
|
; V7A-T-NEXT: lsr.w r2, r3, r2
|
|
; V7A-T-NEXT: rsb.w r1, r12, #64
|
|
; V7A-T-NEXT: it pl
|
|
; V7A-T-NEXT: movpl r2, #0
|
|
; V7A-T-NEXT: rsb.w lr, r1, #32
|
|
; V7A-T-NEXT: rsbs.w r3, r12, #32
|
|
; V7A-T-NEXT: lsl.w r2, r2, r1
|
|
; V7A-T-NEXT: lsr.w r4, r0, lr
|
|
; V7A-T-NEXT: orr.w r2, r2, r4
|
|
; V7A-T-NEXT: it pl
|
|
; V7A-T-NEXT: lslpl.w r2, r0, r3
|
|
; V7A-T-NEXT: lsl.w r0, r0, r1
|
|
; V7A-T-NEXT: it pl
|
|
; V7A-T-NEXT: movpl r0, #0
|
|
; V7A-T-NEXT: lsl.w r4, r2, lr
|
|
; V7A-T-NEXT: lsr.w r0, r0, r1
|
|
; V7A-T-NEXT: lsr.w r1, r2, r1
|
|
; V7A-T-NEXT: orr.w r0, r0, r4
|
|
; V7A-T-NEXT: it pl
|
|
; V7A-T-NEXT: lsrpl.w r0, r2, r3
|
|
; V7A-T-NEXT: it pl
|
|
; V7A-T-NEXT: movpl r1, #0
|
|
; V7A-T-NEXT: pop {r4, pc}
|
|
;
|
|
; V6M-LABEL: bextr64_d2_load:
|
|
; V6M: @ %bb.0:
|
|
; V6M-NEXT: .save {r4, lr}
|
|
; V6M-NEXT: push {r4, lr}
|
|
; V6M-NEXT: ldr r3, [r0]
|
|
; V6M-NEXT: ldr r1, [r0, #4]
|
|
; V6M-NEXT: mov r0, r3
|
|
; V6M-NEXT: bl __aeabi_llsr
|
|
; V6M-NEXT: ldr r2, [sp, #8]
|
|
; V6M-NEXT: movs r3, #64
|
|
; V6M-NEXT: subs r4, r3, r2
|
|
; V6M-NEXT: mov r2, r4
|
|
; V6M-NEXT: bl __aeabi_llsl
|
|
; V6M-NEXT: mov r2, r4
|
|
; V6M-NEXT: bl __aeabi_llsr
|
|
; V6M-NEXT: pop {r4, pc}
|
|
%val = load i64, ptr %w
|
|
%shifted = lshr i64 %val, %numskipbits
|
|
%numhighbits = sub i64 64, %numlowbits
|
|
%highbitscleared = shl i64 %shifted, %numhighbits
|
|
%masked = lshr i64 %highbitscleared, %numhighbits
|
|
ret i64 %masked
|
|
}
|
|
|
|
define i64 @bextr64_d3_load_indexzext(ptr %w, i8 %numskipbits, i8 %numlowbits) nounwind {
|
|
; V7M-LABEL: bextr64_d3_load_indexzext:
|
|
; V7M: @ %bb.0:
|
|
; V7M-NEXT: .save {r4, lr}
|
|
; V7M-NEXT: push {r4, lr}
|
|
; V7M-NEXT: ldrd r0, lr, [r0]
|
|
; V7M-NEXT: uxtb r1, r1
|
|
; V7M-NEXT: rsb.w r2, r2, #64
|
|
; V7M-NEXT: subs.w r3, r1, #32
|
|
; V7M-NEXT: lsr.w r12, r0, r1
|
|
; V7M-NEXT: rsb.w r0, r1, #32
|
|
; V7M-NEXT: lsr.w r1, lr, r1
|
|
; V7M-NEXT: uxtb r2, r2
|
|
; V7M-NEXT: lsl.w r0, lr, r0
|
|
; V7M-NEXT: orr.w r0, r0, r12
|
|
; V7M-NEXT: sub.w r12, r2, #32
|
|
; V7M-NEXT: it pl
|
|
; V7M-NEXT: lsrpl.w r0, lr, r3
|
|
; V7M-NEXT: it pl
|
|
; V7M-NEXT: movpl r1, #0
|
|
; V7M-NEXT: rsb.w r3, r2, #32
|
|
; V7M-NEXT: lsls r1, r2
|
|
; V7M-NEXT: cmp.w r12, #0
|
|
; V7M-NEXT: lsr.w r4, r0, r3
|
|
; V7M-NEXT: orr.w r1, r1, r4
|
|
; V7M-NEXT: it pl
|
|
; V7M-NEXT: lslpl.w r1, r0, r12
|
|
; V7M-NEXT: lsl.w r0, r0, r2
|
|
; V7M-NEXT: it pl
|
|
; V7M-NEXT: movpl r0, #0
|
|
; V7M-NEXT: lsl.w r3, r1, r3
|
|
; V7M-NEXT: lsr.w r0, r0, r2
|
|
; V7M-NEXT: orr.w r0, r0, r3
|
|
; V7M-NEXT: it pl
|
|
; V7M-NEXT: lsrpl.w r0, r1, r12
|
|
; V7M-NEXT: lsr.w r1, r1, r2
|
|
; V7M-NEXT: it pl
|
|
; V7M-NEXT: movpl r1, #0
|
|
; V7M-NEXT: pop {r4, pc}
|
|
;
|
|
; V7A-LABEL: bextr64_d3_load_indexzext:
|
|
; V7A: @ %bb.0:
|
|
; V7A-NEXT: .save {r4, r5, r11, lr}
|
|
; V7A-NEXT: push {r4, r5, r11, lr}
|
|
; V7A-NEXT: ldr r4, [r0]
|
|
; V7A-NEXT: ldr r3, [r0, #4]
|
|
; V7A-NEXT: uxtb r0, r1
|
|
; V7A-NEXT: lsr r12, r4, r0
|
|
; V7A-NEXT: rsb r4, r0, #32
|
|
; V7A-NEXT: lsr r0, r3, r0
|
|
; V7A-NEXT: orr r4, r12, r3, lsl r4
|
|
; V7A-NEXT: mvn r12, #31
|
|
; V7A-NEXT: uxtab r1, r12, r1
|
|
; V7A-NEXT: cmp r1, #0
|
|
; V7A-NEXT: lsrpl r4, r3, r1
|
|
; V7A-NEXT: rsb r1, r2, #64
|
|
; V7A-NEXT: movwpl r0, #0
|
|
; V7A-NEXT: uxtb r2, r1
|
|
; V7A-NEXT: rsb lr, r2, #32
|
|
; V7A-NEXT: uxtab r1, r12, r1
|
|
; V7A-NEXT: lsr r5, r4, lr
|
|
; V7A-NEXT: orr r3, r5, r0, lsl r2
|
|
; V7A-NEXT: cmp r1, #0
|
|
; V7A-NEXT: lsl r0, r4, r2
|
|
; V7A-NEXT: movwpl r0, #0
|
|
; V7A-NEXT: lslpl r3, r4, r1
|
|
; V7A-NEXT: lsr r0, r0, r2
|
|
; V7A-NEXT: orr r0, r0, r3, lsl lr
|
|
; V7A-NEXT: lsrpl r0, r3, r1
|
|
; V7A-NEXT: lsr r1, r3, r2
|
|
; V7A-NEXT: movwpl r1, #0
|
|
; V7A-NEXT: pop {r4, r5, r11, pc}
|
|
;
|
|
; V7A-T-LABEL: bextr64_d3_load_indexzext:
|
|
; V7A-T: @ %bb.0:
|
|
; V7A-T-NEXT: .save {r4, r5, r6, lr}
|
|
; V7A-T-NEXT: push {r4, r5, r6, lr}
|
|
; V7A-T-NEXT: ldrd r12, lr, [r0]
|
|
; V7A-T-NEXT: uxtb r0, r1
|
|
; V7A-T-NEXT: rsb.w r6, r0, #32
|
|
; V7A-T-NEXT: lsr.w r3, lr, r0
|
|
; V7A-T-NEXT: rsb.w r2, r2, #64
|
|
; V7A-T-NEXT: mvn r4, #31
|
|
; V7A-T-NEXT: lsr.w r0, r12, r0
|
|
; V7A-T-NEXT: uxtab r1, r4, r1
|
|
; V7A-T-NEXT: lsl.w r6, lr, r6
|
|
; V7A-T-NEXT: orrs r0, r6
|
|
; V7A-T-NEXT: cmp r1, #0
|
|
; V7A-T-NEXT: it pl
|
|
; V7A-T-NEXT: movpl r3, #0
|
|
; V7A-T-NEXT: uxtb r5, r2
|
|
; V7A-T-NEXT: it pl
|
|
; V7A-T-NEXT: lsrpl.w r0, lr, r1
|
|
; V7A-T-NEXT: rsb.w r1, r5, #32
|
|
; V7A-T-NEXT: lsls r3, r5
|
|
; V7A-T-NEXT: uxtab r2, r4, r2
|
|
; V7A-T-NEXT: lsr.w r6, r0, r1
|
|
; V7A-T-NEXT: orrs r3, r6
|
|
; V7A-T-NEXT: cmp r2, #0
|
|
; V7A-T-NEXT: it pl
|
|
; V7A-T-NEXT: lslpl.w r3, r0, r2
|
|
; V7A-T-NEXT: lsl.w r0, r0, r5
|
|
; V7A-T-NEXT: lsl.w r1, r3, r1
|
|
; V7A-T-NEXT: it pl
|
|
; V7A-T-NEXT: movpl r0, #0
|
|
; V7A-T-NEXT: lsr.w r0, r0, r5
|
|
; V7A-T-NEXT: orr.w r0, r0, r1
|
|
; V7A-T-NEXT: lsr.w r1, r3, r5
|
|
; V7A-T-NEXT: it pl
|
|
; V7A-T-NEXT: lsrpl.w r0, r3, r2
|
|
; V7A-T-NEXT: it pl
|
|
; V7A-T-NEXT: movpl r1, #0
|
|
; V7A-T-NEXT: pop {r4, r5, r6, pc}
|
|
;
|
|
; V6M-LABEL: bextr64_d3_load_indexzext:
|
|
; V6M: @ %bb.0:
|
|
; V6M-NEXT: .save {r4, r5, r7, lr}
|
|
; V6M-NEXT: push {r4, r5, r7, lr}
|
|
; V6M-NEXT: mov r4, r2
|
|
; V6M-NEXT: ldr r5, [r0]
|
|
; V6M-NEXT: ldr r3, [r0, #4]
|
|
; V6M-NEXT: uxtb r2, r1
|
|
; V6M-NEXT: mov r0, r5
|
|
; V6M-NEXT: mov r1, r3
|
|
; V6M-NEXT: bl __aeabi_llsr
|
|
; V6M-NEXT: movs r2, #64
|
|
; V6M-NEXT: subs r2, r2, r4
|
|
; V6M-NEXT: uxtb r4, r2
|
|
; V6M-NEXT: mov r2, r4
|
|
; V6M-NEXT: bl __aeabi_llsl
|
|
; V6M-NEXT: mov r2, r4
|
|
; V6M-NEXT: bl __aeabi_llsr
|
|
; V6M-NEXT: pop {r4, r5, r7, pc}
|
|
%val = load i64, ptr %w
|
|
%skip = zext i8 %numskipbits to i64
|
|
%shifted = lshr i64 %val, %skip
|
|
%numhighbits = sub i8 64, %numlowbits
|
|
%sh_prom = zext i8 %numhighbits to i64
|
|
%highbitscleared = shl i64 %shifted, %sh_prom
|
|
%masked = lshr i64 %highbitscleared, %sh_prom
|
|
ret i64 %masked
|
|
}
|
|
|
|
; 64-bit, but with 32-bit output
|
|
|
|
; Everything done in 64-bit, truncation happens last.
|
|
define i32 @bextr64_32_d0(i64 %val, i64 %numskipbits, i64 %numlowbits) nounwind {
|
|
; V7M-LABEL: bextr64_32_d0:
|
|
; V7M: @ %bb.0:
|
|
; V7M-NEXT: .save {r4, lr}
|
|
; V7M-NEXT: push {r4, lr}
|
|
; V7M-NEXT: rsb.w r3, r2, #32
|
|
; V7M-NEXT: ldr.w r12, [sp, #8]
|
|
; V7M-NEXT: lsrs r0, r2
|
|
; V7M-NEXT: lsl.w r3, r1, r3
|
|
; V7M-NEXT: orrs r0, r3
|
|
; V7M-NEXT: subs.w r3, r2, #32
|
|
; V7M-NEXT: it pl
|
|
; V7M-NEXT: lsrpl.w r0, r1, r3
|
|
; V7M-NEXT: lsr.w r1, r1, r2
|
|
; V7M-NEXT: rsb.w r3, r12, #64
|
|
; V7M-NEXT: it pl
|
|
; V7M-NEXT: movpl r1, #0
|
|
; V7M-NEXT: rsb.w lr, r12, #32
|
|
; V7M-NEXT: rsb.w r12, r3, #32
|
|
; V7M-NEXT: lsls r1, r3
|
|
; V7M-NEXT: cmp.w lr, #0
|
|
; V7M-NEXT: lsr.w r4, r0, r12
|
|
; V7M-NEXT: orr.w r1, r1, r4
|
|
; V7M-NEXT: it pl
|
|
; V7M-NEXT: lslpl.w r1, r0, lr
|
|
; V7M-NEXT: lsl.w r0, r0, r3
|
|
; V7M-NEXT: it pl
|
|
; V7M-NEXT: movpl r0, #0
|
|
; V7M-NEXT: lsl.w r2, r1, r12
|
|
; V7M-NEXT: lsr.w r0, r0, r3
|
|
; V7M-NEXT: orr.w r0, r0, r2
|
|
; V7M-NEXT: it pl
|
|
; V7M-NEXT: lsrpl.w r0, r1, lr
|
|
; V7M-NEXT: pop {r4, pc}
|
|
;
|
|
; V7A-LABEL: bextr64_32_d0:
|
|
; V7A: @ %bb.0:
|
|
; V7A-NEXT: .save {r11, lr}
|
|
; V7A-NEXT: push {r11, lr}
|
|
; V7A-NEXT: lsr r3, r1, r2
|
|
; V7A-NEXT: subs lr, r2, #32
|
|
; V7A-NEXT: lsr r0, r0, r2
|
|
; V7A-NEXT: rsb r2, r2, #32
|
|
; V7A-NEXT: ldr r12, [sp, #8]
|
|
; V7A-NEXT: movwpl r3, #0
|
|
; V7A-NEXT: orr r0, r0, r1, lsl r2
|
|
; V7A-NEXT: lsrpl r0, r1, lr
|
|
; V7A-NEXT: rsb r1, r12, #64
|
|
; V7A-NEXT: rsb lr, r1, #32
|
|
; V7A-NEXT: lsr r2, r0, lr
|
|
; V7A-NEXT: orr r2, r2, r3, lsl r1
|
|
; V7A-NEXT: rsbs r3, r12, #32
|
|
; V7A-NEXT: lslpl r2, r0, r3
|
|
; V7A-NEXT: lsl r0, r0, r1
|
|
; V7A-NEXT: movwpl r0, #0
|
|
; V7A-NEXT: lsr r0, r0, r1
|
|
; V7A-NEXT: orr r0, r0, r2, lsl lr
|
|
; V7A-NEXT: lsrpl r0, r2, r3
|
|
; V7A-NEXT: pop {r11, pc}
|
|
;
|
|
; V7A-T-LABEL: bextr64_32_d0:
|
|
; V7A-T: @ %bb.0:
|
|
; V7A-T-NEXT: .save {r4, lr}
|
|
; V7A-T-NEXT: push {r4, lr}
|
|
; V7A-T-NEXT: rsb.w r3, r2, #32
|
|
; V7A-T-NEXT: ldr.w r12, [sp, #8]
|
|
; V7A-T-NEXT: lsrs r0, r2
|
|
; V7A-T-NEXT: lsl.w r3, r1, r3
|
|
; V7A-T-NEXT: orrs r0, r3
|
|
; V7A-T-NEXT: subs.w r3, r2, #32
|
|
; V7A-T-NEXT: it pl
|
|
; V7A-T-NEXT: lsrpl.w r0, r1, r3
|
|
; V7A-T-NEXT: lsr.w r1, r1, r2
|
|
; V7A-T-NEXT: rsb.w r3, r12, #64
|
|
; V7A-T-NEXT: it pl
|
|
; V7A-T-NEXT: movpl r1, #0
|
|
; V7A-T-NEXT: rsb.w lr, r3, #32
|
|
; V7A-T-NEXT: lsls r1, r3
|
|
; V7A-T-NEXT: rsbs.w r2, r12, #32
|
|
; V7A-T-NEXT: lsr.w r4, r0, lr
|
|
; V7A-T-NEXT: orr.w r1, r1, r4
|
|
; V7A-T-NEXT: it pl
|
|
; V7A-T-NEXT: lslpl.w r1, r0, r2
|
|
; V7A-T-NEXT: lsl.w r0, r0, r3
|
|
; V7A-T-NEXT: it pl
|
|
; V7A-T-NEXT: movpl r0, #0
|
|
; V7A-T-NEXT: lsl.w r4, r1, lr
|
|
; V7A-T-NEXT: lsr.w r0, r0, r3
|
|
; V7A-T-NEXT: orr.w r0, r0, r4
|
|
; V7A-T-NEXT: it pl
|
|
; V7A-T-NEXT: lsrpl.w r0, r1, r2
|
|
; V7A-T-NEXT: pop {r4, pc}
|
|
;
|
|
; V6M-LABEL: bextr64_32_d0:
|
|
; V6M: @ %bb.0:
|
|
; V6M-NEXT: .save {r4, lr}
|
|
; V6M-NEXT: push {r4, lr}
|
|
; V6M-NEXT: bl __aeabi_llsr
|
|
; V6M-NEXT: ldr r2, [sp, #8]
|
|
; V6M-NEXT: movs r3, #64
|
|
; V6M-NEXT: subs r4, r3, r2
|
|
; V6M-NEXT: mov r2, r4
|
|
; V6M-NEXT: bl __aeabi_llsl
|
|
; V6M-NEXT: mov r2, r4
|
|
; V6M-NEXT: bl __aeabi_llsr
|
|
; V6M-NEXT: pop {r4, pc}
|
|
%shifted = lshr i64 %val, %numskipbits
|
|
%numhighbits = sub i64 64, %numlowbits
|
|
%highbitscleared = shl i64 %shifted, %numhighbits
|
|
%masked = lshr i64 %highbitscleared, %numhighbits
|
|
%res = trunc i64 %masked to i32
|
|
ret i32 %res
|
|
}
|
|
|
|
; Shifting happens in 64-bit, then truncation. Masking is 32-bit.
|
|
define i32 @bextr64_32_d1(i64 %val, i64 %numskipbits, i32 %numlowbits) nounwind {
|
|
; V7M-LABEL: bextr64_32_d1:
|
|
; V7M: @ %bb.0:
|
|
; V7M-NEXT: rsb.w r3, r2, #32
|
|
; V7M-NEXT: lsrs r0, r2
|
|
; V7M-NEXT: subs r2, #32
|
|
; V7M-NEXT: lsl.w r3, r1, r3
|
|
; V7M-NEXT: orr.w r0, r0, r3
|
|
; V7M-NEXT: it pl
|
|
; V7M-NEXT: lsrpl.w r0, r1, r2
|
|
; V7M-NEXT: ldr r1, [sp]
|
|
; V7M-NEXT: rsb.w r1, r1, #32
|
|
; V7M-NEXT: lsls r0, r1
|
|
; V7M-NEXT: lsrs r0, r1
|
|
; V7M-NEXT: bx lr
|
|
;
|
|
; V7A-LABEL: bextr64_32_d1:
|
|
; V7A: @ %bb.0:
|
|
; V7A-NEXT: rsb r3, r2, #32
|
|
; V7A-NEXT: lsr r0, r0, r2
|
|
; V7A-NEXT: ldr r12, [sp]
|
|
; V7A-NEXT: subs r2, r2, #32
|
|
; V7A-NEXT: orr r0, r0, r1, lsl r3
|
|
; V7A-NEXT: lsrpl r0, r1, r2
|
|
; V7A-NEXT: rsb r1, r12, #32
|
|
; V7A-NEXT: lsl r0, r0, r1
|
|
; V7A-NEXT: lsr r0, r0, r1
|
|
; V7A-NEXT: bx lr
|
|
;
|
|
; V7A-T-LABEL: bextr64_32_d1:
|
|
; V7A-T: @ %bb.0:
|
|
; V7A-T-NEXT: rsb.w r3, r2, #32
|
|
; V7A-T-NEXT: lsrs r0, r2
|
|
; V7A-T-NEXT: ldr.w r12, [sp]
|
|
; V7A-T-NEXT: subs r2, #32
|
|
; V7A-T-NEXT: lsl.w r3, r1, r3
|
|
; V7A-T-NEXT: orr.w r0, r0, r3
|
|
; V7A-T-NEXT: it pl
|
|
; V7A-T-NEXT: lsrpl.w r0, r1, r2
|
|
; V7A-T-NEXT: rsb.w r1, r12, #32
|
|
; V7A-T-NEXT: lsls r0, r1
|
|
; V7A-T-NEXT: lsrs r0, r1
|
|
; V7A-T-NEXT: bx lr
|
|
;
|
|
; V6M-LABEL: bextr64_32_d1:
|
|
; V6M: @ %bb.0:
|
|
; V6M-NEXT: .save {r7, lr}
|
|
; V6M-NEXT: push {r7, lr}
|
|
; V6M-NEXT: bl __aeabi_llsr
|
|
; V6M-NEXT: ldr r1, [sp, #8]
|
|
; V6M-NEXT: movs r2, #32
|
|
; V6M-NEXT: subs r1, r2, r1
|
|
; V6M-NEXT: lsls r0, r1
|
|
; V6M-NEXT: lsrs r0, r1
|
|
; V6M-NEXT: pop {r7, pc}
|
|
%shifted = lshr i64 %val, %numskipbits
|
|
%truncshifted = trunc i64 %shifted to i32
|
|
%numhighbits = sub i32 32, %numlowbits
|
|
%highbitscleared = shl i32 %truncshifted, %numhighbits
|
|
%masked = lshr i32 %highbitscleared, %numhighbits
|
|
ret i32 %masked
|
|
}
|
|
|
|
; ---------------------------------------------------------------------------- ;
|
|
; Constant
|
|
; ---------------------------------------------------------------------------- ;
|
|
|
|
; https://bugs.llvm.org/show_bug.cgi?id=38938
|
|
define void @pr38938(ptr %a0, ptr %a1) nounwind {
|
|
; V7M-LABEL: pr38938:
|
|
; V7M: @ %bb.0:
|
|
; V7M-NEXT: ldr r1, [r1]
|
|
; V7M-NEXT: ubfx r1, r1, #21, #10
|
|
; V7M-NEXT: ldr.w r2, [r0, r1, lsl #2]
|
|
; V7M-NEXT: adds r2, #1
|
|
; V7M-NEXT: str.w r2, [r0, r1, lsl #2]
|
|
; V7M-NEXT: bx lr
|
|
;
|
|
; V7A-LABEL: pr38938:
|
|
; V7A: @ %bb.0:
|
|
; V7A-NEXT: ldr r1, [r1]
|
|
; V7A-NEXT: ubfx r1, r1, #21, #10
|
|
; V7A-NEXT: ldr r2, [r0, r1, lsl #2]
|
|
; V7A-NEXT: add r2, r2, #1
|
|
; V7A-NEXT: str r2, [r0, r1, lsl #2]
|
|
; V7A-NEXT: bx lr
|
|
;
|
|
; V7A-T-LABEL: pr38938:
|
|
; V7A-T: @ %bb.0:
|
|
; V7A-T-NEXT: ldr r1, [r1]
|
|
; V7A-T-NEXT: ubfx r1, r1, #21, #10
|
|
; V7A-T-NEXT: ldr.w r2, [r0, r1, lsl #2]
|
|
; V7A-T-NEXT: adds r2, #1
|
|
; V7A-T-NEXT: str.w r2, [r0, r1, lsl #2]
|
|
; V7A-T-NEXT: bx lr
|
|
;
|
|
; V6M-LABEL: pr38938:
|
|
; V6M: @ %bb.0:
|
|
; V6M-NEXT: ldr r1, [r1]
|
|
; V6M-NEXT: lsrs r1, r1, #19
|
|
; V6M-NEXT: ldr r2, .LCPI51_0
|
|
; V6M-NEXT: ands r2, r1
|
|
; V6M-NEXT: ldr r1, [r0, r2]
|
|
; V6M-NEXT: adds r1, r1, #1
|
|
; V6M-NEXT: str r1, [r0, r2]
|
|
; V6M-NEXT: bx lr
|
|
; V6M-NEXT: .p2align 2
|
|
; V6M-NEXT: @ %bb.1:
|
|
; V6M-NEXT: .LCPI51_0:
|
|
; V6M-NEXT: .long 4092 @ 0xffc
|
|
%tmp = load i64, ptr %a1, align 8
|
|
%tmp1 = lshr i64 %tmp, 21
|
|
%tmp2 = and i64 %tmp1, 1023
|
|
%tmp3 = getelementptr inbounds i32, ptr %a0, i64 %tmp2
|
|
%tmp4 = load i32, ptr %tmp3, align 4
|
|
%tmp5 = add nsw i32 %tmp4, 1
|
|
store i32 %tmp5, ptr %tmp3, align 4
|
|
ret void
|
|
}
|
|
|
|
; The most canonical variant
|
|
define i32 @c0_i32(i32 %arg) nounwind {
|
|
; V7M-LABEL: c0_i32:
|
|
; V7M: @ %bb.0:
|
|
; V7M-NEXT: ubfx r0, r0, #19, #10
|
|
; V7M-NEXT: bx lr
|
|
;
|
|
; V7A-LABEL: c0_i32:
|
|
; V7A: @ %bb.0:
|
|
; V7A-NEXT: ubfx r0, r0, #19, #10
|
|
; V7A-NEXT: bx lr
|
|
;
|
|
; V7A-T-LABEL: c0_i32:
|
|
; V7A-T: @ %bb.0:
|
|
; V7A-T-NEXT: ubfx r0, r0, #19, #10
|
|
; V7A-T-NEXT: bx lr
|
|
;
|
|
; V6M-LABEL: c0_i32:
|
|
; V6M: @ %bb.0:
|
|
; V6M-NEXT: lsls r0, r0, #3
|
|
; V6M-NEXT: lsrs r0, r0, #22
|
|
; V6M-NEXT: bx lr
|
|
%tmp0 = lshr i32 %arg, 19
|
|
%tmp1 = and i32 %tmp0, 1023
|
|
ret i32 %tmp1
|
|
}
|
|
|
|
; Should be still fine, but the mask is shifted
|
|
define i32 @c1_i32(i32 %arg) nounwind {
|
|
; V7M-LABEL: c1_i32:
|
|
; V7M: @ %bb.0:
|
|
; V7M-NEXT: movw r1, #4092
|
|
; V7M-NEXT: and.w r0, r1, r0, lsr #19
|
|
; V7M-NEXT: bx lr
|
|
;
|
|
; V7A-LABEL: c1_i32:
|
|
; V7A: @ %bb.0:
|
|
; V7A-NEXT: movw r1, #4092
|
|
; V7A-NEXT: and r0, r1, r0, lsr #19
|
|
; V7A-NEXT: bx lr
|
|
;
|
|
; V7A-T-LABEL: c1_i32:
|
|
; V7A-T: @ %bb.0:
|
|
; V7A-T-NEXT: movw r1, #4092
|
|
; V7A-T-NEXT: and.w r0, r1, r0, lsr #19
|
|
; V7A-T-NEXT: bx lr
|
|
;
|
|
; V6M-LABEL: c1_i32:
|
|
; V6M: @ %bb.0:
|
|
; V6M-NEXT: lsrs r1, r0, #19
|
|
; V6M-NEXT: ldr r0, .LCPI53_0
|
|
; V6M-NEXT: ands r0, r1
|
|
; V6M-NEXT: bx lr
|
|
; V6M-NEXT: .p2align 2
|
|
; V6M-NEXT: @ %bb.1:
|
|
; V6M-NEXT: .LCPI53_0:
|
|
; V6M-NEXT: .long 4092 @ 0xffc
|
|
%tmp0 = lshr i32 %arg, 19
|
|
%tmp1 = and i32 %tmp0, 4092
|
|
ret i32 %tmp1
|
|
}
|
|
|
|
; Should be still fine, but the result is shifted left afterwards
|
|
define i32 @c2_i32(i32 %arg) nounwind {
|
|
; V7M-LABEL: c2_i32:
|
|
; V7M: @ %bb.0:
|
|
; V7M-NEXT: movw r1, #4092
|
|
; V7M-NEXT: and.w r0, r1, r0, lsr #17
|
|
; V7M-NEXT: bx lr
|
|
;
|
|
; V7A-LABEL: c2_i32:
|
|
; V7A: @ %bb.0:
|
|
; V7A-NEXT: movw r1, #4092
|
|
; V7A-NEXT: and r0, r1, r0, lsr #17
|
|
; V7A-NEXT: bx lr
|
|
;
|
|
; V7A-T-LABEL: c2_i32:
|
|
; V7A-T: @ %bb.0:
|
|
; V7A-T-NEXT: movw r1, #4092
|
|
; V7A-T-NEXT: and.w r0, r1, r0, lsr #17
|
|
; V7A-T-NEXT: bx lr
|
|
;
|
|
; V6M-LABEL: c2_i32:
|
|
; V6M: @ %bb.0:
|
|
; V6M-NEXT: lsrs r1, r0, #17
|
|
; V6M-NEXT: ldr r0, .LCPI54_0
|
|
; V6M-NEXT: ands r0, r1
|
|
; V6M-NEXT: bx lr
|
|
; V6M-NEXT: .p2align 2
|
|
; V6M-NEXT: @ %bb.1:
|
|
; V6M-NEXT: .LCPI54_0:
|
|
; V6M-NEXT: .long 4092 @ 0xffc
|
|
%tmp0 = lshr i32 %arg, 19
|
|
%tmp1 = and i32 %tmp0, 1023
|
|
%tmp2 = shl i32 %tmp1, 2
|
|
ret i32 %tmp2
|
|
}
|
|
|
|
; The mask covers newly shifted-in bit
|
|
define i32 @c4_i32_bad(i32 %arg) nounwind {
|
|
; V7M-LABEL: c4_i32_bad:
|
|
; V7M: @ %bb.0:
|
|
; V7M-NEXT: mvn r1, #1
|
|
; V7M-NEXT: and.w r0, r1, r0, lsr #19
|
|
; V7M-NEXT: bx lr
|
|
;
|
|
; V7A-LABEL: c4_i32_bad:
|
|
; V7A: @ %bb.0:
|
|
; V7A-NEXT: mvn r1, #1
|
|
; V7A-NEXT: and r0, r1, r0, lsr #19
|
|
; V7A-NEXT: bx lr
|
|
;
|
|
; V7A-T-LABEL: c4_i32_bad:
|
|
; V7A-T: @ %bb.0:
|
|
; V7A-T-NEXT: mvn r1, #1
|
|
; V7A-T-NEXT: and.w r0, r1, r0, lsr #19
|
|
; V7A-T-NEXT: bx lr
|
|
;
|
|
; V6M-LABEL: c4_i32_bad:
|
|
; V6M: @ %bb.0:
|
|
; V6M-NEXT: lsrs r0, r0, #20
|
|
; V6M-NEXT: lsls r0, r0, #1
|
|
; V6M-NEXT: bx lr
|
|
%tmp0 = lshr i32 %arg, 19
|
|
%tmp1 = and i32 %tmp0, 16382
|
|
ret i32 %tmp1
|
|
}
|
|
|
|
; i64
|
|
|
|
; The most canonical variant
|
|
define i64 @c0_i64(i64 %arg) nounwind {
|
|
; V7M-LABEL: c0_i64:
|
|
; V7M: @ %bb.0:
|
|
; V7M-NEXT: ubfx r0, r1, #19, #10
|
|
; V7M-NEXT: movs r1, #0
|
|
; V7M-NEXT: bx lr
|
|
;
|
|
; V7A-LABEL: c0_i64:
|
|
; V7A: @ %bb.0:
|
|
; V7A-NEXT: ubfx r0, r1, #19, #10
|
|
; V7A-NEXT: mov r1, #0
|
|
; V7A-NEXT: bx lr
|
|
;
|
|
; V7A-T-LABEL: c0_i64:
|
|
; V7A-T: @ %bb.0:
|
|
; V7A-T-NEXT: ubfx r0, r1, #19, #10
|
|
; V7A-T-NEXT: movs r1, #0
|
|
; V7A-T-NEXT: bx lr
|
|
;
|
|
; V6M-LABEL: c0_i64:
|
|
; V6M: @ %bb.0:
|
|
; V6M-NEXT: lsls r0, r1, #3
|
|
; V6M-NEXT: lsrs r0, r0, #22
|
|
; V6M-NEXT: movs r1, #0
|
|
; V6M-NEXT: bx lr
|
|
%tmp0 = lshr i64 %arg, 51
|
|
%tmp1 = and i64 %tmp0, 1023
|
|
ret i64 %tmp1
|
|
}
|
|
|
|
; Should be still fine, but the mask is shifted
|
|
define i64 @c1_i64(i64 %arg) nounwind {
|
|
; V7M-LABEL: c1_i64:
|
|
; V7M: @ %bb.0:
|
|
; V7M-NEXT: movw r0, #4092
|
|
; V7M-NEXT: and.w r0, r0, r1, lsr #19
|
|
; V7M-NEXT: movs r1, #0
|
|
; V7M-NEXT: bx lr
|
|
;
|
|
; V7A-LABEL: c1_i64:
|
|
; V7A: @ %bb.0:
|
|
; V7A-NEXT: movw r0, #4092
|
|
; V7A-NEXT: and r0, r0, r1, lsr #19
|
|
; V7A-NEXT: mov r1, #0
|
|
; V7A-NEXT: bx lr
|
|
;
|
|
; V7A-T-LABEL: c1_i64:
|
|
; V7A-T: @ %bb.0:
|
|
; V7A-T-NEXT: movw r0, #4092
|
|
; V7A-T-NEXT: and.w r0, r0, r1, lsr #19
|
|
; V7A-T-NEXT: movs r1, #0
|
|
; V7A-T-NEXT: bx lr
|
|
;
|
|
; V6M-LABEL: c1_i64:
|
|
; V6M: @ %bb.0:
|
|
; V6M-NEXT: lsrs r1, r1, #19
|
|
; V6M-NEXT: ldr r0, .LCPI57_0
|
|
; V6M-NEXT: ands r0, r1
|
|
; V6M-NEXT: movs r1, #0
|
|
; V6M-NEXT: bx lr
|
|
; V6M-NEXT: .p2align 2
|
|
; V6M-NEXT: @ %bb.1:
|
|
; V6M-NEXT: .LCPI57_0:
|
|
; V6M-NEXT: .long 4092 @ 0xffc
|
|
%tmp0 = lshr i64 %arg, 51
|
|
%tmp1 = and i64 %tmp0, 4092
|
|
ret i64 %tmp1
|
|
}
|
|
|
|
; Should be still fine, but the result is shifted left afterwards
|
|
define i64 @c2_i64(i64 %arg) nounwind {
|
|
; V7M-LABEL: c2_i64:
|
|
; V7M: @ %bb.0:
|
|
; V7M-NEXT: movw r0, #4092
|
|
; V7M-NEXT: and.w r0, r0, r1, lsr #17
|
|
; V7M-NEXT: movs r1, #0
|
|
; V7M-NEXT: bx lr
|
|
;
|
|
; V7A-LABEL: c2_i64:
|
|
; V7A: @ %bb.0:
|
|
; V7A-NEXT: movw r0, #4092
|
|
; V7A-NEXT: and r0, r0, r1, lsr #17
|
|
; V7A-NEXT: mov r1, #0
|
|
; V7A-NEXT: bx lr
|
|
;
|
|
; V7A-T-LABEL: c2_i64:
|
|
; V7A-T: @ %bb.0:
|
|
; V7A-T-NEXT: movw r0, #4092
|
|
; V7A-T-NEXT: and.w r0, r0, r1, lsr #17
|
|
; V7A-T-NEXT: movs r1, #0
|
|
; V7A-T-NEXT: bx lr
|
|
;
|
|
; V6M-LABEL: c2_i64:
|
|
; V6M: @ %bb.0:
|
|
; V6M-NEXT: lsrs r1, r1, #17
|
|
; V6M-NEXT: ldr r0, .LCPI58_0
|
|
; V6M-NEXT: ands r0, r1
|
|
; V6M-NEXT: movs r1, #0
|
|
; V6M-NEXT: bx lr
|
|
; V6M-NEXT: .p2align 2
|
|
; V6M-NEXT: @ %bb.1:
|
|
; V6M-NEXT: .LCPI58_0:
|
|
; V6M-NEXT: .long 4092 @ 0xffc
|
|
%tmp0 = lshr i64 %arg, 51
|
|
%tmp1 = and i64 %tmp0, 1023
|
|
%tmp2 = shl i64 %tmp1, 2
|
|
ret i64 %tmp2
|
|
}
|
|
|
|
; The mask covers newly shifted-in bit
|
|
define i64 @c4_i64_bad(i64 %arg) nounwind {
|
|
; V7M-LABEL: c4_i64_bad:
|
|
; V7M: @ %bb.0:
|
|
; V7M-NEXT: mvn r0, #1
|
|
; V7M-NEXT: and.w r0, r0, r1, lsr #19
|
|
; V7M-NEXT: movs r1, #0
|
|
; V7M-NEXT: bx lr
|
|
;
|
|
; V7A-LABEL: c4_i64_bad:
|
|
; V7A: @ %bb.0:
|
|
; V7A-NEXT: mvn r0, #1
|
|
; V7A-NEXT: and r0, r0, r1, lsr #19
|
|
; V7A-NEXT: mov r1, #0
|
|
; V7A-NEXT: bx lr
|
|
;
|
|
; V7A-T-LABEL: c4_i64_bad:
|
|
; V7A-T: @ %bb.0:
|
|
; V7A-T-NEXT: mvn r0, #1
|
|
; V7A-T-NEXT: and.w r0, r0, r1, lsr #19
|
|
; V7A-T-NEXT: movs r1, #0
|
|
; V7A-T-NEXT: bx lr
|
|
;
|
|
; V6M-LABEL: c4_i64_bad:
|
|
; V6M: @ %bb.0:
|
|
; V6M-NEXT: lsrs r0, r1, #20
|
|
; V6M-NEXT: lsls r0, r0, #1
|
|
; V6M-NEXT: movs r1, #0
|
|
; V6M-NEXT: bx lr
|
|
%tmp0 = lshr i64 %arg, 51
|
|
%tmp1 = and i64 %tmp0, 16382
|
|
ret i64 %tmp1
|
|
}
|
|
|
|
; ---------------------------------------------------------------------------- ;
|
|
; Constant, storing the result afterwards.
|
|
; ---------------------------------------------------------------------------- ;
|
|
|
|
; i32
|
|
|
|
; The most canonical variant
|
|
define void @c5_i32(i32 %arg, ptr %ptr) nounwind {
|
|
; V7M-LABEL: c5_i32:
|
|
; V7M: @ %bb.0:
|
|
; V7M-NEXT: ubfx r0, r0, #19, #10
|
|
; V7M-NEXT: str r0, [r1]
|
|
; V7M-NEXT: bx lr
|
|
;
|
|
; V7A-LABEL: c5_i32:
|
|
; V7A: @ %bb.0:
|
|
; V7A-NEXT: ubfx r0, r0, #19, #10
|
|
; V7A-NEXT: str r0, [r1]
|
|
; V7A-NEXT: bx lr
|
|
;
|
|
; V7A-T-LABEL: c5_i32:
|
|
; V7A-T: @ %bb.0:
|
|
; V7A-T-NEXT: ubfx r0, r0, #19, #10
|
|
; V7A-T-NEXT: str r0, [r1]
|
|
; V7A-T-NEXT: bx lr
|
|
;
|
|
; V6M-LABEL: c5_i32:
|
|
; V6M: @ %bb.0:
|
|
; V6M-NEXT: lsls r0, r0, #3
|
|
; V6M-NEXT: lsrs r0, r0, #22
|
|
; V6M-NEXT: str r0, [r1]
|
|
; V6M-NEXT: bx lr
|
|
%tmp0 = lshr i32 %arg, 19
|
|
%tmp1 = and i32 %tmp0, 1023
|
|
store i32 %tmp1, ptr %ptr
|
|
ret void
|
|
}
|
|
|
|
; Should be still fine, but the mask is shifted
|
|
define void @c6_i32(i32 %arg, ptr %ptr) nounwind {
|
|
; V7M-LABEL: c6_i32:
|
|
; V7M: @ %bb.0:
|
|
; V7M-NEXT: ubfx r0, r0, #19, #12
|
|
; V7M-NEXT: str r0, [r1]
|
|
; V7M-NEXT: bx lr
|
|
;
|
|
; V7A-LABEL: c6_i32:
|
|
; V7A: @ %bb.0:
|
|
; V7A-NEXT: ubfx r0, r0, #19, #12
|
|
; V7A-NEXT: str r0, [r1]
|
|
; V7A-NEXT: bx lr
|
|
;
|
|
; V7A-T-LABEL: c6_i32:
|
|
; V7A-T: @ %bb.0:
|
|
; V7A-T-NEXT: ubfx r0, r0, #19, #12
|
|
; V7A-T-NEXT: str r0, [r1]
|
|
; V7A-T-NEXT: bx lr
|
|
;
|
|
; V6M-LABEL: c6_i32:
|
|
; V6M: @ %bb.0:
|
|
; V6M-NEXT: lsls r0, r0, #1
|
|
; V6M-NEXT: lsrs r0, r0, #20
|
|
; V6M-NEXT: str r0, [r1]
|
|
; V6M-NEXT: bx lr
|
|
%tmp0 = lshr i32 %arg, 19
|
|
%tmp1 = and i32 %tmp0, 4095
|
|
store i32 %tmp1, ptr %ptr
|
|
ret void
|
|
}
|
|
|
|
; Should be still fine, but the result is shifted left afterwards
|
|
define void @c7_i32(i32 %arg, ptr %ptr) nounwind {
|
|
; V7M-LABEL: c7_i32:
|
|
; V7M: @ %bb.0:
|
|
; V7M-NEXT: movw r2, #4092
|
|
; V7M-NEXT: and.w r0, r2, r0, lsr #17
|
|
; V7M-NEXT: str r0, [r1]
|
|
; V7M-NEXT: bx lr
|
|
;
|
|
; V7A-LABEL: c7_i32:
|
|
; V7A: @ %bb.0:
|
|
; V7A-NEXT: movw r2, #4092
|
|
; V7A-NEXT: and r0, r2, r0, lsr #17
|
|
; V7A-NEXT: str r0, [r1]
|
|
; V7A-NEXT: bx lr
|
|
;
|
|
; V7A-T-LABEL: c7_i32:
|
|
; V7A-T: @ %bb.0:
|
|
; V7A-T-NEXT: movw r2, #4092
|
|
; V7A-T-NEXT: and.w r0, r2, r0, lsr #17
|
|
; V7A-T-NEXT: str r0, [r1]
|
|
; V7A-T-NEXT: bx lr
|
|
;
|
|
; V6M-LABEL: c7_i32:
|
|
; V6M: @ %bb.0:
|
|
; V6M-NEXT: lsrs r0, r0, #17
|
|
; V6M-NEXT: ldr r2, .LCPI62_0
|
|
; V6M-NEXT: ands r2, r0
|
|
; V6M-NEXT: str r2, [r1]
|
|
; V6M-NEXT: bx lr
|
|
; V6M-NEXT: .p2align 2
|
|
; V6M-NEXT: @ %bb.1:
|
|
; V6M-NEXT: .LCPI62_0:
|
|
; V6M-NEXT: .long 4092 @ 0xffc
|
|
%tmp0 = lshr i32 %arg, 19
|
|
%tmp1 = and i32 %tmp0, 1023
|
|
%tmp2 = shl i32 %tmp1, 2
|
|
store i32 %tmp2, ptr %ptr
|
|
ret void
|
|
}
|
|
|
|
; i64
|
|
|
|
; The most canonical variant
|
|
define void @c5_i64(i64 %arg, ptr %ptr) nounwind {
|
|
; V7M-LABEL: c5_i64:
|
|
; V7M: @ %bb.0:
|
|
; V7M-NEXT: movs r0, #0
|
|
; V7M-NEXT: ubfx r1, r1, #19, #10
|
|
; V7M-NEXT: strd r1, r0, [r2]
|
|
; V7M-NEXT: bx lr
|
|
;
|
|
; V7A-LABEL: c5_i64:
|
|
; V7A: @ %bb.0:
|
|
; V7A-NEXT: mov r0, #0
|
|
; V7A-NEXT: str r0, [r2, #4]
|
|
; V7A-NEXT: ubfx r0, r1, #19, #10
|
|
; V7A-NEXT: str r0, [r2]
|
|
; V7A-NEXT: bx lr
|
|
;
|
|
; V7A-T-LABEL: c5_i64:
|
|
; V7A-T: @ %bb.0:
|
|
; V7A-T-NEXT: movs r0, #0
|
|
; V7A-T-NEXT: ubfx r1, r1, #19, #10
|
|
; V7A-T-NEXT: strd r1, r0, [r2]
|
|
; V7A-T-NEXT: bx lr
|
|
;
|
|
; V6M-LABEL: c5_i64:
|
|
; V6M: @ %bb.0:
|
|
; V6M-NEXT: movs r0, #0
|
|
; V6M-NEXT: lsls r1, r1, #3
|
|
; V6M-NEXT: lsrs r1, r1, #22
|
|
; V6M-NEXT: str r1, [r2]
|
|
; V6M-NEXT: str r0, [r2, #4]
|
|
; V6M-NEXT: bx lr
|
|
%tmp0 = lshr i64 %arg, 51
|
|
%tmp1 = and i64 %tmp0, 1023
|
|
store i64 %tmp1, ptr %ptr
|
|
ret void
|
|
}
|
|
|
|
; Should be still fine, but the mask is shifted
|
|
define void @c6_i64(i64 %arg, ptr %ptr) nounwind {
|
|
; V7M-LABEL: c6_i64:
|
|
; V7M: @ %bb.0:
|
|
; V7M-NEXT: movs r0, #0
|
|
; V7M-NEXT: ubfx r1, r1, #19, #12
|
|
; V7M-NEXT: strd r1, r0, [r2]
|
|
; V7M-NEXT: bx lr
|
|
;
|
|
; V7A-LABEL: c6_i64:
|
|
; V7A: @ %bb.0:
|
|
; V7A-NEXT: mov r0, #0
|
|
; V7A-NEXT: str r0, [r2, #4]
|
|
; V7A-NEXT: ubfx r0, r1, #19, #12
|
|
; V7A-NEXT: str r0, [r2]
|
|
; V7A-NEXT: bx lr
|
|
;
|
|
; V7A-T-LABEL: c6_i64:
|
|
; V7A-T: @ %bb.0:
|
|
; V7A-T-NEXT: movs r0, #0
|
|
; V7A-T-NEXT: ubfx r1, r1, #19, #12
|
|
; V7A-T-NEXT: strd r1, r0, [r2]
|
|
; V7A-T-NEXT: bx lr
|
|
;
|
|
; V6M-LABEL: c6_i64:
|
|
; V6M: @ %bb.0:
|
|
; V6M-NEXT: movs r0, #0
|
|
; V6M-NEXT: lsls r1, r1, #1
|
|
; V6M-NEXT: lsrs r1, r1, #20
|
|
; V6M-NEXT: str r1, [r2]
|
|
; V6M-NEXT: str r0, [r2, #4]
|
|
; V6M-NEXT: bx lr
|
|
%tmp0 = lshr i64 %arg, 51
|
|
%tmp1 = and i64 %tmp0, 4095
|
|
store i64 %tmp1, ptr %ptr
|
|
ret void
|
|
}
|
|
|
|
; Should be still fine, but the result is shifted left afterwards
|
|
define void @c7_i64(i64 %arg, ptr %ptr) nounwind {
|
|
; V7M-LABEL: c7_i64:
|
|
; V7M: @ %bb.0:
|
|
; V7M-NEXT: movs r0, #0
|
|
; V7M-NEXT: movw r3, #4092
|
|
; V7M-NEXT: and.w r1, r3, r1, lsr #17
|
|
; V7M-NEXT: strd r1, r0, [r2]
|
|
; V7M-NEXT: bx lr
|
|
;
|
|
; V7A-LABEL: c7_i64:
|
|
; V7A: @ %bb.0:
|
|
; V7A-NEXT: movw r0, #4092
|
|
; V7A-NEXT: mov r3, #0
|
|
; V7A-NEXT: and r0, r0, r1, lsr #17
|
|
; V7A-NEXT: stm r2, {r0, r3}
|
|
; V7A-NEXT: bx lr
|
|
;
|
|
; V7A-T-LABEL: c7_i64:
|
|
; V7A-T: @ %bb.0:
|
|
; V7A-T-NEXT: movs r0, #0
|
|
; V7A-T-NEXT: movw r3, #4092
|
|
; V7A-T-NEXT: and.w r1, r3, r1, lsr #17
|
|
; V7A-T-NEXT: strd r1, r0, [r2]
|
|
; V7A-T-NEXT: bx lr
|
|
;
|
|
; V6M-LABEL: c7_i64:
|
|
; V6M: @ %bb.0:
|
|
; V6M-NEXT: movs r0, #0
|
|
; V6M-NEXT: lsrs r1, r1, #17
|
|
; V6M-NEXT: ldr r3, .LCPI65_0
|
|
; V6M-NEXT: ands r3, r1
|
|
; V6M-NEXT: str r3, [r2]
|
|
; V6M-NEXT: str r0, [r2, #4]
|
|
; V6M-NEXT: bx lr
|
|
; V6M-NEXT: .p2align 2
|
|
; V6M-NEXT: @ %bb.1:
|
|
; V6M-NEXT: .LCPI65_0:
|
|
; V6M-NEXT: .long 4092 @ 0xffc
|
|
%tmp0 = lshr i64 %arg, 51
|
|
%tmp1 = and i64 %tmp0, 1023
|
|
%tmp2 = shl i64 %tmp1, 2
|
|
store i64 %tmp2, ptr %ptr
|
|
ret void
|
|
}
|