In the register allocator we define non-trivial rematerialization as the rematerlization of an instruction with virtual register uses. We have been able to perform non-trivial rematerialization for a while, but it has been prevented by default unless specifically overriden by the target in `TargetTransformInfo::isReMaterializableImpl`. The original reasoning for this given by the comment in the default implementation is because we might increase a live range of the virtual register, but we don't actually do this. LiveRangeEdit::allUsesAvailableAt makes sure that we only rematerialize instructions whose virtual registers are already live at the use sites. https://reviews.llvm.org/D106408 had originally tried to remove this restriction but it was reverted after some performance regressions were reported. We think it is likely that the regressions were caused by the fact that the old isTriviallyReMaterializable API sometimes returned true for non-trivial rematerializations. However https://github.com/llvm/llvm-project/pull/160377 recently split the API out into a separate non-trivial and trivial version and updated the call-sites accordingly, and https://github.com/llvm/llvm-project/pull/160709 and #159180 fixed heuristics which weren't accounting for the difference between non-trivial and trivial. With these fixes in place, this patch proposes to again allow non-trivial rematerialization by default which reduces a significant amount of spills and reloads across various targets. For llvm-test-suite built with -O3 -flto, we get the following geomean reduction in reloads: - arm64-apple-darwin: 11.6% - riscv64-linux-gnu: 8.1% - x86_64-linux-gnu: 6.5%
2753 lines
74 KiB
LLVM
2753 lines
74 KiB
LLVM
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
|
|
; RUN: llc -mtriple=thumbv7m-eabi %s -o - | FileCheck %s --check-prefix V7M
|
|
; RUN: llc -mtriple=armv7a-eabi %s -o - | FileCheck %s --check-prefix V7A
|
|
; RUN: llc -mtriple=thumbv7a-eabi %s -o - | FileCheck %s --check-prefix V7A-T
|
|
; RUN: llc -mtriple=armv6m-eabi %s -o - | FileCheck %s --check-prefix V6M
|
|
|
|
; Patterns:
|
|
; a) x & (1 << nbits) - 1
|
|
; b) x & ~(-1 << nbits)
|
|
; c) x & (-1 >> (32 - y))
|
|
; d) x << (32 - y) >> (32 - y)
|
|
; are equivalent.
|
|
|
|
; ---------------------------------------------------------------------------- ;
|
|
; Pattern a. 32-bit
|
|
; ---------------------------------------------------------------------------- ;
|
|
|
|
define i32 @bzhi32_a0(i32 %val, i32 %numlowbits) nounwind {
|
|
; V7M-LABEL: bzhi32_a0:
|
|
; V7M: @ %bb.0:
|
|
; V7M-NEXT: movs r2, #1
|
|
; V7M-NEXT: lsl.w r1, r2, r1
|
|
; V7M-NEXT: subs r1, #1
|
|
; V7M-NEXT: ands r0, r1
|
|
; V7M-NEXT: bx lr
|
|
;
|
|
; V7A-LABEL: bzhi32_a0:
|
|
; V7A: @ %bb.0:
|
|
; V7A-NEXT: mov r2, #1
|
|
; V7A-NEXT: mvn r3, #0
|
|
; V7A-NEXT: add r1, r3, r2, lsl r1
|
|
; V7A-NEXT: and r0, r1, r0
|
|
; V7A-NEXT: bx lr
|
|
;
|
|
; V7A-T-LABEL: bzhi32_a0:
|
|
; V7A-T: @ %bb.0:
|
|
; V7A-T-NEXT: movs r2, #1
|
|
; V7A-T-NEXT: lsl.w r1, r2, r1
|
|
; V7A-T-NEXT: subs r1, #1
|
|
; V7A-T-NEXT: ands r0, r1
|
|
; V7A-T-NEXT: bx lr
|
|
;
|
|
; V6M-LABEL: bzhi32_a0:
|
|
; V6M: @ %bb.0:
|
|
; V6M-NEXT: movs r2, #1
|
|
; V6M-NEXT: lsls r2, r1
|
|
; V6M-NEXT: subs r1, r2, #1
|
|
; V6M-NEXT: ands r0, r1
|
|
; V6M-NEXT: bx lr
|
|
%onebit = shl i32 1, %numlowbits
|
|
%mask = add nsw i32 %onebit, -1
|
|
%masked = and i32 %mask, %val
|
|
ret i32 %masked
|
|
}
|
|
|
|
define i32 @bzhi32_a1_indexzext(i32 %val, i8 zeroext %numlowbits) nounwind {
|
|
; V7M-LABEL: bzhi32_a1_indexzext:
|
|
; V7M: @ %bb.0:
|
|
; V7M-NEXT: movs r2, #1
|
|
; V7M-NEXT: lsl.w r1, r2, r1
|
|
; V7M-NEXT: subs r1, #1
|
|
; V7M-NEXT: ands r0, r1
|
|
; V7M-NEXT: bx lr
|
|
;
|
|
; V7A-LABEL: bzhi32_a1_indexzext:
|
|
; V7A: @ %bb.0:
|
|
; V7A-NEXT: mov r2, #1
|
|
; V7A-NEXT: mvn r3, #0
|
|
; V7A-NEXT: add r1, r3, r2, lsl r1
|
|
; V7A-NEXT: and r0, r1, r0
|
|
; V7A-NEXT: bx lr
|
|
;
|
|
; V7A-T-LABEL: bzhi32_a1_indexzext:
|
|
; V7A-T: @ %bb.0:
|
|
; V7A-T-NEXT: movs r2, #1
|
|
; V7A-T-NEXT: lsl.w r1, r2, r1
|
|
; V7A-T-NEXT: subs r1, #1
|
|
; V7A-T-NEXT: ands r0, r1
|
|
; V7A-T-NEXT: bx lr
|
|
;
|
|
; V6M-LABEL: bzhi32_a1_indexzext:
|
|
; V6M: @ %bb.0:
|
|
; V6M-NEXT: movs r2, #1
|
|
; V6M-NEXT: lsls r2, r1
|
|
; V6M-NEXT: subs r1, r2, #1
|
|
; V6M-NEXT: ands r0, r1
|
|
; V6M-NEXT: bx lr
|
|
%conv = zext i8 %numlowbits to i32
|
|
%onebit = shl i32 1, %conv
|
|
%mask = add nsw i32 %onebit, -1
|
|
%masked = and i32 %mask, %val
|
|
ret i32 %masked
|
|
}
|
|
|
|
define i32 @bzhi32_a2_load(ptr %w, i32 %numlowbits) nounwind {
|
|
; V7M-LABEL: bzhi32_a2_load:
|
|
; V7M: @ %bb.0:
|
|
; V7M-NEXT: movs r2, #1
|
|
; V7M-NEXT: ldr r0, [r0]
|
|
; V7M-NEXT: lsl.w r1, r2, r1
|
|
; V7M-NEXT: subs r1, #1
|
|
; V7M-NEXT: ands r0, r1
|
|
; V7M-NEXT: bx lr
|
|
;
|
|
; V7A-LABEL: bzhi32_a2_load:
|
|
; V7A: @ %bb.0:
|
|
; V7A-NEXT: mov r2, #1
|
|
; V7A-NEXT: ldr r0, [r0]
|
|
; V7A-NEXT: mvn r3, #0
|
|
; V7A-NEXT: add r1, r3, r2, lsl r1
|
|
; V7A-NEXT: and r0, r1, r0
|
|
; V7A-NEXT: bx lr
|
|
;
|
|
; V7A-T-LABEL: bzhi32_a2_load:
|
|
; V7A-T: @ %bb.0:
|
|
; V7A-T-NEXT: movs r2, #1
|
|
; V7A-T-NEXT: ldr r0, [r0]
|
|
; V7A-T-NEXT: lsl.w r1, r2, r1
|
|
; V7A-T-NEXT: subs r1, #1
|
|
; V7A-T-NEXT: ands r0, r1
|
|
; V7A-T-NEXT: bx lr
|
|
;
|
|
; V6M-LABEL: bzhi32_a2_load:
|
|
; V6M: @ %bb.0:
|
|
; V6M-NEXT: movs r2, #1
|
|
; V6M-NEXT: lsls r2, r1
|
|
; V6M-NEXT: subs r1, r2, #1
|
|
; V6M-NEXT: ldr r0, [r0]
|
|
; V6M-NEXT: ands r0, r1
|
|
; V6M-NEXT: bx lr
|
|
%val = load i32, ptr %w
|
|
%onebit = shl i32 1, %numlowbits
|
|
%mask = add nsw i32 %onebit, -1
|
|
%masked = and i32 %mask, %val
|
|
ret i32 %masked
|
|
}
|
|
|
|
define i32 @bzhi32_a3_load_indexzext(ptr %w, i8 zeroext %numlowbits) nounwind {
|
|
; V7M-LABEL: bzhi32_a3_load_indexzext:
|
|
; V7M: @ %bb.0:
|
|
; V7M-NEXT: movs r2, #1
|
|
; V7M-NEXT: ldr r0, [r0]
|
|
; V7M-NEXT: lsl.w r1, r2, r1
|
|
; V7M-NEXT: subs r1, #1
|
|
; V7M-NEXT: ands r0, r1
|
|
; V7M-NEXT: bx lr
|
|
;
|
|
; V7A-LABEL: bzhi32_a3_load_indexzext:
|
|
; V7A: @ %bb.0:
|
|
; V7A-NEXT: mov r2, #1
|
|
; V7A-NEXT: ldr r0, [r0]
|
|
; V7A-NEXT: mvn r3, #0
|
|
; V7A-NEXT: add r1, r3, r2, lsl r1
|
|
; V7A-NEXT: and r0, r1, r0
|
|
; V7A-NEXT: bx lr
|
|
;
|
|
; V7A-T-LABEL: bzhi32_a3_load_indexzext:
|
|
; V7A-T: @ %bb.0:
|
|
; V7A-T-NEXT: movs r2, #1
|
|
; V7A-T-NEXT: ldr r0, [r0]
|
|
; V7A-T-NEXT: lsl.w r1, r2, r1
|
|
; V7A-T-NEXT: subs r1, #1
|
|
; V7A-T-NEXT: ands r0, r1
|
|
; V7A-T-NEXT: bx lr
|
|
;
|
|
; V6M-LABEL: bzhi32_a3_load_indexzext:
|
|
; V6M: @ %bb.0:
|
|
; V6M-NEXT: movs r2, #1
|
|
; V6M-NEXT: lsls r2, r1
|
|
; V6M-NEXT: subs r1, r2, #1
|
|
; V6M-NEXT: ldr r0, [r0]
|
|
; V6M-NEXT: ands r0, r1
|
|
; V6M-NEXT: bx lr
|
|
%val = load i32, ptr %w
|
|
%conv = zext i8 %numlowbits to i32
|
|
%onebit = shl i32 1, %conv
|
|
%mask = add nsw i32 %onebit, -1
|
|
%masked = and i32 %mask, %val
|
|
ret i32 %masked
|
|
}
|
|
|
|
define i32 @bzhi32_a4_commutative(i32 %val, i32 %numlowbits) nounwind {
|
|
; V7M-LABEL: bzhi32_a4_commutative:
|
|
; V7M: @ %bb.0:
|
|
; V7M-NEXT: movs r2, #1
|
|
; V7M-NEXT: lsl.w r1, r2, r1
|
|
; V7M-NEXT: subs r1, #1
|
|
; V7M-NEXT: ands r0, r1
|
|
; V7M-NEXT: bx lr
|
|
;
|
|
; V7A-LABEL: bzhi32_a4_commutative:
|
|
; V7A: @ %bb.0:
|
|
; V7A-NEXT: mov r2, #1
|
|
; V7A-NEXT: mvn r3, #0
|
|
; V7A-NEXT: add r1, r3, r2, lsl r1
|
|
; V7A-NEXT: and r0, r0, r1
|
|
; V7A-NEXT: bx lr
|
|
;
|
|
; V7A-T-LABEL: bzhi32_a4_commutative:
|
|
; V7A-T: @ %bb.0:
|
|
; V7A-T-NEXT: movs r2, #1
|
|
; V7A-T-NEXT: lsl.w r1, r2, r1
|
|
; V7A-T-NEXT: subs r1, #1
|
|
; V7A-T-NEXT: ands r0, r1
|
|
; V7A-T-NEXT: bx lr
|
|
;
|
|
; V6M-LABEL: bzhi32_a4_commutative:
|
|
; V6M: @ %bb.0:
|
|
; V6M-NEXT: movs r2, #1
|
|
; V6M-NEXT: lsls r2, r1
|
|
; V6M-NEXT: subs r1, r2, #1
|
|
; V6M-NEXT: ands r0, r1
|
|
; V6M-NEXT: bx lr
|
|
%onebit = shl i32 1, %numlowbits
|
|
%mask = add nsw i32 %onebit, -1
|
|
%masked = and i32 %val, %mask ; swapped order
|
|
ret i32 %masked
|
|
}
|
|
|
|
; 64-bit
|
|
|
|
define i64 @bzhi64_a0(i64 %val, i64 %numlowbits) nounwind {
|
|
; V7M-LABEL: bzhi64_a0:
|
|
; V7M: @ %bb.0:
|
|
; V7M-NEXT: .save {r7, lr}
|
|
; V7M-NEXT: push {r7, lr}
|
|
; V7M-NEXT: rsb.w r3, r2, #32
|
|
; V7M-NEXT: mov.w r12, #1
|
|
; V7M-NEXT: subs.w lr, r2, #32
|
|
; V7M-NEXT: lsl.w r2, r12, r2
|
|
; V7M-NEXT: lsr.w r3, r12, r3
|
|
; V7M-NEXT: it pl
|
|
; V7M-NEXT: lslpl.w r3, r12, lr
|
|
; V7M-NEXT: it pl
|
|
; V7M-NEXT: movpl r2, #0
|
|
; V7M-NEXT: subs r2, #1
|
|
; V7M-NEXT: sbc r3, r3, #0
|
|
; V7M-NEXT: ands r0, r2
|
|
; V7M-NEXT: ands r1, r3
|
|
; V7M-NEXT: pop {r7, pc}
|
|
;
|
|
; V7A-LABEL: bzhi64_a0:
|
|
; V7A: @ %bb.0:
|
|
; V7A-NEXT: .save {r11, lr}
|
|
; V7A-NEXT: push {r11, lr}
|
|
; V7A-NEXT: rsb r12, r2, #32
|
|
; V7A-NEXT: mov lr, #1
|
|
; V7A-NEXT: subs r3, r2, #32
|
|
; V7A-NEXT: lsl r2, lr, r2
|
|
; V7A-NEXT: lsr r12, lr, r12
|
|
; V7A-NEXT: movwpl r2, #0
|
|
; V7A-NEXT: lslpl r12, lr, r3
|
|
; V7A-NEXT: subs r2, r2, #1
|
|
; V7A-NEXT: sbc r3, r12, #0
|
|
; V7A-NEXT: and r0, r2, r0
|
|
; V7A-NEXT: and r1, r3, r1
|
|
; V7A-NEXT: pop {r11, pc}
|
|
;
|
|
; V7A-T-LABEL: bzhi64_a0:
|
|
; V7A-T: @ %bb.0:
|
|
; V7A-T-NEXT: .save {r7, lr}
|
|
; V7A-T-NEXT: push {r7, lr}
|
|
; V7A-T-NEXT: rsb.w r3, r2, #32
|
|
; V7A-T-NEXT: mov.w r12, #1
|
|
; V7A-T-NEXT: subs.w lr, r2, #32
|
|
; V7A-T-NEXT: lsl.w r2, r12, r2
|
|
; V7A-T-NEXT: lsr.w r3, r12, r3
|
|
; V7A-T-NEXT: it pl
|
|
; V7A-T-NEXT: lslpl.w r3, r12, lr
|
|
; V7A-T-NEXT: it pl
|
|
; V7A-T-NEXT: movpl r2, #0
|
|
; V7A-T-NEXT: subs r2, #1
|
|
; V7A-T-NEXT: sbc r3, r3, #0
|
|
; V7A-T-NEXT: ands r0, r2
|
|
; V7A-T-NEXT: ands r1, r3
|
|
; V7A-T-NEXT: pop {r7, pc}
|
|
;
|
|
; V6M-LABEL: bzhi64_a0:
|
|
; V6M: @ %bb.0:
|
|
; V6M-NEXT: .save {r4, r5, r6, lr}
|
|
; V6M-NEXT: push {r4, r5, r6, lr}
|
|
; V6M-NEXT: mov r5, r1
|
|
; V6M-NEXT: mov r4, r0
|
|
; V6M-NEXT: movs r0, #1
|
|
; V6M-NEXT: movs r6, #0
|
|
; V6M-NEXT: mov r1, r6
|
|
; V6M-NEXT: bl __aeabi_llsl
|
|
; V6M-NEXT: subs r0, r0, #1
|
|
; V6M-NEXT: sbcs r1, r6
|
|
; V6M-NEXT: ands r1, r5
|
|
; V6M-NEXT: ands r0, r4
|
|
; V6M-NEXT: pop {r4, r5, r6, pc}
|
|
%onebit = shl i64 1, %numlowbits
|
|
%mask = add nsw i64 %onebit, -1
|
|
%masked = and i64 %mask, %val
|
|
ret i64 %masked
|
|
}
|
|
|
|
; Check that we don't throw away the vreg_width-1 mask if not using shifts
|
|
define i64 @bzhi64_a0_masked(i64 %val, i64 %numlowbits) nounwind {
|
|
; V7M-LABEL: bzhi64_a0_masked:
|
|
; V7M: @ %bb.0:
|
|
; V7M-NEXT: .save {r7, lr}
|
|
; V7M-NEXT: push {r7, lr}
|
|
; V7M-NEXT: and r2, r2, #63
|
|
; V7M-NEXT: mov.w r12, #1
|
|
; V7M-NEXT: rsb.w r3, r2, #32
|
|
; V7M-NEXT: subs.w lr, r2, #32
|
|
; V7M-NEXT: lsl.w r2, r12, r2
|
|
; V7M-NEXT: lsr.w r3, r12, r3
|
|
; V7M-NEXT: it pl
|
|
; V7M-NEXT: lslpl.w r3, r12, lr
|
|
; V7M-NEXT: it pl
|
|
; V7M-NEXT: movpl r2, #0
|
|
; V7M-NEXT: subs r2, #1
|
|
; V7M-NEXT: sbc r3, r3, #0
|
|
; V7M-NEXT: ands r0, r2
|
|
; V7M-NEXT: ands r1, r3
|
|
; V7M-NEXT: pop {r7, pc}
|
|
;
|
|
; V7A-LABEL: bzhi64_a0_masked:
|
|
; V7A: @ %bb.0:
|
|
; V7A-NEXT: .save {r11, lr}
|
|
; V7A-NEXT: push {r11, lr}
|
|
; V7A-NEXT: and r2, r2, #63
|
|
; V7A-NEXT: mov lr, #1
|
|
; V7A-NEXT: rsb r12, r2, #32
|
|
; V7A-NEXT: subs r3, r2, #32
|
|
; V7A-NEXT: lsl r2, lr, r2
|
|
; V7A-NEXT: lsr r12, lr, r12
|
|
; V7A-NEXT: movwpl r2, #0
|
|
; V7A-NEXT: lslpl r12, lr, r3
|
|
; V7A-NEXT: subs r2, r2, #1
|
|
; V7A-NEXT: sbc r3, r12, #0
|
|
; V7A-NEXT: and r0, r2, r0
|
|
; V7A-NEXT: and r1, r3, r1
|
|
; V7A-NEXT: pop {r11, pc}
|
|
;
|
|
; V7A-T-LABEL: bzhi64_a0_masked:
|
|
; V7A-T: @ %bb.0:
|
|
; V7A-T-NEXT: .save {r7, lr}
|
|
; V7A-T-NEXT: push {r7, lr}
|
|
; V7A-T-NEXT: and r2, r2, #63
|
|
; V7A-T-NEXT: mov.w r12, #1
|
|
; V7A-T-NEXT: rsb.w r3, r2, #32
|
|
; V7A-T-NEXT: subs.w lr, r2, #32
|
|
; V7A-T-NEXT: lsl.w r2, r12, r2
|
|
; V7A-T-NEXT: lsr.w r3, r12, r3
|
|
; V7A-T-NEXT: it pl
|
|
; V7A-T-NEXT: lslpl.w r3, r12, lr
|
|
; V7A-T-NEXT: it pl
|
|
; V7A-T-NEXT: movpl r2, #0
|
|
; V7A-T-NEXT: subs r2, #1
|
|
; V7A-T-NEXT: sbc r3, r3, #0
|
|
; V7A-T-NEXT: ands r0, r2
|
|
; V7A-T-NEXT: ands r1, r3
|
|
; V7A-T-NEXT: pop {r7, pc}
|
|
;
|
|
; V6M-LABEL: bzhi64_a0_masked:
|
|
; V6M: @ %bb.0:
|
|
; V6M-NEXT: .save {r4, r5, r6, lr}
|
|
; V6M-NEXT: push {r4, r5, r6, lr}
|
|
; V6M-NEXT: mov r5, r1
|
|
; V6M-NEXT: mov r4, r0
|
|
; V6M-NEXT: movs r0, #63
|
|
; V6M-NEXT: ands r2, r0
|
|
; V6M-NEXT: movs r0, #1
|
|
; V6M-NEXT: movs r6, #0
|
|
; V6M-NEXT: mov r1, r6
|
|
; V6M-NEXT: bl __aeabi_llsl
|
|
; V6M-NEXT: subs r0, r0, #1
|
|
; V6M-NEXT: sbcs r1, r6
|
|
; V6M-NEXT: ands r1, r5
|
|
; V6M-NEXT: ands r0, r4
|
|
; V6M-NEXT: pop {r4, r5, r6, pc}
|
|
%numlowbits.masked = and i64 %numlowbits, 63
|
|
%onebit = shl i64 1, %numlowbits.masked
|
|
%mask = add nsw i64 %onebit, -1
|
|
%masked = and i64 %mask, %val
|
|
ret i64 %masked
|
|
}
|
|
|
|
define i64 @bzhi64_a1_indexzext(i64 %val, i8 zeroext %numlowbits) nounwind {
|
|
; V7M-LABEL: bzhi64_a1_indexzext:
|
|
; V7M: @ %bb.0:
|
|
; V7M-NEXT: .save {r7, lr}
|
|
; V7M-NEXT: push {r7, lr}
|
|
; V7M-NEXT: rsb.w r3, r2, #32
|
|
; V7M-NEXT: mov.w r12, #1
|
|
; V7M-NEXT: subs.w lr, r2, #32
|
|
; V7M-NEXT: lsl.w r2, r12, r2
|
|
; V7M-NEXT: lsr.w r3, r12, r3
|
|
; V7M-NEXT: it pl
|
|
; V7M-NEXT: lslpl.w r3, r12, lr
|
|
; V7M-NEXT: it pl
|
|
; V7M-NEXT: movpl r2, #0
|
|
; V7M-NEXT: subs r2, #1
|
|
; V7M-NEXT: sbc r3, r3, #0
|
|
; V7M-NEXT: ands r0, r2
|
|
; V7M-NEXT: ands r1, r3
|
|
; V7M-NEXT: pop {r7, pc}
|
|
;
|
|
; V7A-LABEL: bzhi64_a1_indexzext:
|
|
; V7A: @ %bb.0:
|
|
; V7A-NEXT: .save {r11, lr}
|
|
; V7A-NEXT: push {r11, lr}
|
|
; V7A-NEXT: rsb r12, r2, #32
|
|
; V7A-NEXT: mov lr, #1
|
|
; V7A-NEXT: subs r3, r2, #32
|
|
; V7A-NEXT: lsl r2, lr, r2
|
|
; V7A-NEXT: lsr r12, lr, r12
|
|
; V7A-NEXT: movwpl r2, #0
|
|
; V7A-NEXT: lslpl r12, lr, r3
|
|
; V7A-NEXT: subs r2, r2, #1
|
|
; V7A-NEXT: sbc r3, r12, #0
|
|
; V7A-NEXT: and r0, r2, r0
|
|
; V7A-NEXT: and r1, r3, r1
|
|
; V7A-NEXT: pop {r11, pc}
|
|
;
|
|
; V7A-T-LABEL: bzhi64_a1_indexzext:
|
|
; V7A-T: @ %bb.0:
|
|
; V7A-T-NEXT: .save {r7, lr}
|
|
; V7A-T-NEXT: push {r7, lr}
|
|
; V7A-T-NEXT: rsb.w r3, r2, #32
|
|
; V7A-T-NEXT: mov.w r12, #1
|
|
; V7A-T-NEXT: subs.w lr, r2, #32
|
|
; V7A-T-NEXT: lsl.w r2, r12, r2
|
|
; V7A-T-NEXT: lsr.w r3, r12, r3
|
|
; V7A-T-NEXT: it pl
|
|
; V7A-T-NEXT: lslpl.w r3, r12, lr
|
|
; V7A-T-NEXT: it pl
|
|
; V7A-T-NEXT: movpl r2, #0
|
|
; V7A-T-NEXT: subs r2, #1
|
|
; V7A-T-NEXT: sbc r3, r3, #0
|
|
; V7A-T-NEXT: ands r0, r2
|
|
; V7A-T-NEXT: ands r1, r3
|
|
; V7A-T-NEXT: pop {r7, pc}
|
|
;
|
|
; V6M-LABEL: bzhi64_a1_indexzext:
|
|
; V6M: @ %bb.0:
|
|
; V6M-NEXT: .save {r4, r5, r6, lr}
|
|
; V6M-NEXT: push {r4, r5, r6, lr}
|
|
; V6M-NEXT: mov r5, r1
|
|
; V6M-NEXT: mov r4, r0
|
|
; V6M-NEXT: movs r0, #1
|
|
; V6M-NEXT: movs r6, #0
|
|
; V6M-NEXT: mov r1, r6
|
|
; V6M-NEXT: bl __aeabi_llsl
|
|
; V6M-NEXT: subs r0, r0, #1
|
|
; V6M-NEXT: sbcs r1, r6
|
|
; V6M-NEXT: ands r1, r5
|
|
; V6M-NEXT: ands r0, r4
|
|
; V6M-NEXT: pop {r4, r5, r6, pc}
|
|
%conv = zext i8 %numlowbits to i64
|
|
%onebit = shl i64 1, %conv
|
|
%mask = add nsw i64 %onebit, -1
|
|
%masked = and i64 %mask, %val
|
|
ret i64 %masked
|
|
}
|
|
|
|
define i64 @bzhi64_a2_load(ptr %w, i64 %numlowbits) nounwind {
|
|
; V7M-LABEL: bzhi64_a2_load:
|
|
; V7M: @ %bb.0:
|
|
; V7M-NEXT: rsb.w r1, r2, #32
|
|
; V7M-NEXT: movs r3, #1
|
|
; V7M-NEXT: subs.w r12, r2, #32
|
|
; V7M-NEXT: lsl.w r2, r3, r2
|
|
; V7M-NEXT: lsr.w r1, r3, r1
|
|
; V7M-NEXT: it pl
|
|
; V7M-NEXT: lslpl.w r1, r3, r12
|
|
; V7M-NEXT: it pl
|
|
; V7M-NEXT: movpl r2, #0
|
|
; V7M-NEXT: subs r2, #1
|
|
; V7M-NEXT: ldrd r0, r3, [r0]
|
|
; V7M-NEXT: sbc r1, r1, #0
|
|
; V7M-NEXT: ands r1, r3
|
|
; V7M-NEXT: ands r0, r2
|
|
; V7M-NEXT: bx lr
|
|
;
|
|
; V7A-LABEL: bzhi64_a2_load:
|
|
; V7A: @ %bb.0:
|
|
; V7A-NEXT: .save {r4, r6, r11, lr}
|
|
; V7A-NEXT: push {r4, r6, r11, lr}
|
|
; V7A-NEXT: ldr r6, [r0]
|
|
; V7A-NEXT: mov r1, #1
|
|
; V7A-NEXT: ldr r3, [r0, #4]
|
|
; V7A-NEXT: rsb r0, r2, #32
|
|
; V7A-NEXT: subs r4, r2, #32
|
|
; V7A-NEXT: lsr r0, r1, r0
|
|
; V7A-NEXT: lslpl r0, r1, r4
|
|
; V7A-NEXT: lsl r1, r1, r2
|
|
; V7A-NEXT: movwpl r1, #0
|
|
; V7A-NEXT: subs r2, r1, #1
|
|
; V7A-NEXT: sbc r0, r0, #0
|
|
; V7A-NEXT: and r1, r0, r3
|
|
; V7A-NEXT: and r0, r2, r6
|
|
; V7A-NEXT: pop {r4, r6, r11, pc}
|
|
;
|
|
; V7A-T-LABEL: bzhi64_a2_load:
|
|
; V7A-T: @ %bb.0:
|
|
; V7A-T-NEXT: .save {r7, lr}
|
|
; V7A-T-NEXT: push {r7, lr}
|
|
; V7A-T-NEXT: rsb.w r3, r2, #32
|
|
; V7A-T-NEXT: movs r1, #1
|
|
; V7A-T-NEXT: ldrd r12, lr, [r0]
|
|
; V7A-T-NEXT: subs.w r0, r2, #32
|
|
; V7A-T-NEXT: lsr.w r3, r1, r3
|
|
; V7A-T-NEXT: it pl
|
|
; V7A-T-NEXT: lslpl.w r3, r1, r0
|
|
; V7A-T-NEXT: lsl.w r0, r1, r2
|
|
; V7A-T-NEXT: it pl
|
|
; V7A-T-NEXT: movpl r0, #0
|
|
; V7A-T-NEXT: subs r0, #1
|
|
; V7A-T-NEXT: sbc r1, r3, #0
|
|
; V7A-T-NEXT: and.w r0, r0, r12
|
|
; V7A-T-NEXT: and.w r1, r1, lr
|
|
; V7A-T-NEXT: pop {r7, pc}
|
|
;
|
|
; V6M-LABEL: bzhi64_a2_load:
|
|
; V6M: @ %bb.0:
|
|
; V6M-NEXT: .save {r4, r5, r7, lr}
|
|
; V6M-NEXT: push {r4, r5, r7, lr}
|
|
; V6M-NEXT: mov r4, r0
|
|
; V6M-NEXT: movs r0, #1
|
|
; V6M-NEXT: movs r5, #0
|
|
; V6M-NEXT: mov r1, r5
|
|
; V6M-NEXT: bl __aeabi_llsl
|
|
; V6M-NEXT: subs r2, r0, #1
|
|
; V6M-NEXT: sbcs r1, r5
|
|
; V6M-NEXT: ldm r4!, {r0, r3}
|
|
; V6M-NEXT: ands r1, r3
|
|
; V6M-NEXT: ands r0, r2
|
|
; V6M-NEXT: pop {r4, r5, r7, pc}
|
|
%val = load i64, ptr %w
|
|
%onebit = shl i64 1, %numlowbits
|
|
%mask = add nsw i64 %onebit, -1
|
|
%masked = and i64 %mask, %val
|
|
ret i64 %masked
|
|
}
|
|
|
|
define i64 @bzhi64_a3_load_indexzext(ptr %w, i8 zeroext %numlowbits) nounwind {
|
|
; V7M-LABEL: bzhi64_a3_load_indexzext:
|
|
; V7M: @ %bb.0:
|
|
; V7M-NEXT: rsb.w r2, r1, #32
|
|
; V7M-NEXT: movs r3, #1
|
|
; V7M-NEXT: subs.w r12, r1, #32
|
|
; V7M-NEXT: lsl.w r1, r3, r1
|
|
; V7M-NEXT: lsr.w r2, r3, r2
|
|
; V7M-NEXT: it pl
|
|
; V7M-NEXT: lslpl.w r2, r3, r12
|
|
; V7M-NEXT: it pl
|
|
; V7M-NEXT: movpl r1, #0
|
|
; V7M-NEXT: subs r3, r1, #1
|
|
; V7M-NEXT: sbc r1, r2, #0
|
|
; V7M-NEXT: ldrd r0, r2, [r0]
|
|
; V7M-NEXT: ands r1, r2
|
|
; V7M-NEXT: ands r0, r3
|
|
; V7M-NEXT: bx lr
|
|
;
|
|
; V7A-LABEL: bzhi64_a3_load_indexzext:
|
|
; V7A: @ %bb.0:
|
|
; V7A-NEXT: .save {r4, r6, r11, lr}
|
|
; V7A-NEXT: push {r4, r6, r11, lr}
|
|
; V7A-NEXT: ldr r6, [r0]
|
|
; V7A-NEXT: mov r2, #1
|
|
; V7A-NEXT: ldr r3, [r0, #4]
|
|
; V7A-NEXT: rsb r0, r1, #32
|
|
; V7A-NEXT: subs r4, r1, #32
|
|
; V7A-NEXT: lsl r1, r2, r1
|
|
; V7A-NEXT: lsr r0, r2, r0
|
|
; V7A-NEXT: movwpl r1, #0
|
|
; V7A-NEXT: lslpl r0, r2, r4
|
|
; V7A-NEXT: subs r2, r1, #1
|
|
; V7A-NEXT: sbc r0, r0, #0
|
|
; V7A-NEXT: and r1, r0, r3
|
|
; V7A-NEXT: and r0, r2, r6
|
|
; V7A-NEXT: pop {r4, r6, r11, pc}
|
|
;
|
|
; V7A-T-LABEL: bzhi64_a3_load_indexzext:
|
|
; V7A-T: @ %bb.0:
|
|
; V7A-T-NEXT: .save {r7, lr}
|
|
; V7A-T-NEXT: push {r7, lr}
|
|
; V7A-T-NEXT: rsb.w r3, r1, #32
|
|
; V7A-T-NEXT: movs r2, #1
|
|
; V7A-T-NEXT: ldrd r12, lr, [r0]
|
|
; V7A-T-NEXT: subs.w r0, r1, #32
|
|
; V7A-T-NEXT: lsr.w r3, r2, r3
|
|
; V7A-T-NEXT: it pl
|
|
; V7A-T-NEXT: lslpl.w r3, r2, r0
|
|
; V7A-T-NEXT: lsl.w r0, r2, r1
|
|
; V7A-T-NEXT: it pl
|
|
; V7A-T-NEXT: movpl r0, #0
|
|
; V7A-T-NEXT: subs r0, #1
|
|
; V7A-T-NEXT: sbc r1, r3, #0
|
|
; V7A-T-NEXT: and.w r0, r0, r12
|
|
; V7A-T-NEXT: and.w r1, r1, lr
|
|
; V7A-T-NEXT: pop {r7, pc}
|
|
;
|
|
; V6M-LABEL: bzhi64_a3_load_indexzext:
|
|
; V6M: @ %bb.0:
|
|
; V6M-NEXT: .save {r4, r5, r7, lr}
|
|
; V6M-NEXT: push {r4, r5, r7, lr}
|
|
; V6M-NEXT: mov r2, r1
|
|
; V6M-NEXT: mov r4, r0
|
|
; V6M-NEXT: movs r0, #1
|
|
; V6M-NEXT: movs r5, #0
|
|
; V6M-NEXT: mov r1, r5
|
|
; V6M-NEXT: bl __aeabi_llsl
|
|
; V6M-NEXT: subs r2, r0, #1
|
|
; V6M-NEXT: sbcs r1, r5
|
|
; V6M-NEXT: ldm r4!, {r0, r3}
|
|
; V6M-NEXT: ands r1, r3
|
|
; V6M-NEXT: ands r0, r2
|
|
; V6M-NEXT: pop {r4, r5, r7, pc}
|
|
%val = load i64, ptr %w
|
|
%conv = zext i8 %numlowbits to i64
|
|
%onebit = shl i64 1, %conv
|
|
%mask = add nsw i64 %onebit, -1
|
|
%masked = and i64 %mask, %val
|
|
ret i64 %masked
|
|
}
|
|
|
|
define i64 @bzhi64_a4_commutative(i64 %val, i64 %numlowbits) nounwind {
|
|
; V7M-LABEL: bzhi64_a4_commutative:
|
|
; V7M: @ %bb.0:
|
|
; V7M-NEXT: .save {r7, lr}
|
|
; V7M-NEXT: push {r7, lr}
|
|
; V7M-NEXT: rsb.w r3, r2, #32
|
|
; V7M-NEXT: mov.w r12, #1
|
|
; V7M-NEXT: subs.w lr, r2, #32
|
|
; V7M-NEXT: lsl.w r2, r12, r2
|
|
; V7M-NEXT: lsr.w r3, r12, r3
|
|
; V7M-NEXT: it pl
|
|
; V7M-NEXT: lslpl.w r3, r12, lr
|
|
; V7M-NEXT: it pl
|
|
; V7M-NEXT: movpl r2, #0
|
|
; V7M-NEXT: subs r2, #1
|
|
; V7M-NEXT: sbc r3, r3, #0
|
|
; V7M-NEXT: ands r0, r2
|
|
; V7M-NEXT: ands r1, r3
|
|
; V7M-NEXT: pop {r7, pc}
|
|
;
|
|
; V7A-LABEL: bzhi64_a4_commutative:
|
|
; V7A: @ %bb.0:
|
|
; V7A-NEXT: .save {r11, lr}
|
|
; V7A-NEXT: push {r11, lr}
|
|
; V7A-NEXT: rsb r12, r2, #32
|
|
; V7A-NEXT: mov lr, #1
|
|
; V7A-NEXT: subs r3, r2, #32
|
|
; V7A-NEXT: lsl r2, lr, r2
|
|
; V7A-NEXT: lsr r12, lr, r12
|
|
; V7A-NEXT: movwpl r2, #0
|
|
; V7A-NEXT: lslpl r12, lr, r3
|
|
; V7A-NEXT: subs r2, r2, #1
|
|
; V7A-NEXT: sbc r3, r12, #0
|
|
; V7A-NEXT: and r0, r0, r2
|
|
; V7A-NEXT: and r1, r1, r3
|
|
; V7A-NEXT: pop {r11, pc}
|
|
;
|
|
; V7A-T-LABEL: bzhi64_a4_commutative:
|
|
; V7A-T: @ %bb.0:
|
|
; V7A-T-NEXT: .save {r7, lr}
|
|
; V7A-T-NEXT: push {r7, lr}
|
|
; V7A-T-NEXT: rsb.w r3, r2, #32
|
|
; V7A-T-NEXT: mov.w r12, #1
|
|
; V7A-T-NEXT: subs.w lr, r2, #32
|
|
; V7A-T-NEXT: lsl.w r2, r12, r2
|
|
; V7A-T-NEXT: lsr.w r3, r12, r3
|
|
; V7A-T-NEXT: it pl
|
|
; V7A-T-NEXT: lslpl.w r3, r12, lr
|
|
; V7A-T-NEXT: it pl
|
|
; V7A-T-NEXT: movpl r2, #0
|
|
; V7A-T-NEXT: subs r2, #1
|
|
; V7A-T-NEXT: sbc r3, r3, #0
|
|
; V7A-T-NEXT: ands r0, r2
|
|
; V7A-T-NEXT: ands r1, r3
|
|
; V7A-T-NEXT: pop {r7, pc}
|
|
;
|
|
; V6M-LABEL: bzhi64_a4_commutative:
|
|
; V6M: @ %bb.0:
|
|
; V6M-NEXT: .save {r4, r5, r6, lr}
|
|
; V6M-NEXT: push {r4, r5, r6, lr}
|
|
; V6M-NEXT: mov r5, r1
|
|
; V6M-NEXT: mov r4, r0
|
|
; V6M-NEXT: movs r0, #1
|
|
; V6M-NEXT: movs r6, #0
|
|
; V6M-NEXT: mov r1, r6
|
|
; V6M-NEXT: bl __aeabi_llsl
|
|
; V6M-NEXT: subs r0, r0, #1
|
|
; V6M-NEXT: sbcs r1, r6
|
|
; V6M-NEXT: ands r1, r5
|
|
; V6M-NEXT: ands r0, r4
|
|
; V6M-NEXT: pop {r4, r5, r6, pc}
|
|
%onebit = shl i64 1, %numlowbits
|
|
%mask = add nsw i64 %onebit, -1
|
|
%masked = and i64 %val, %mask ; swapped order
|
|
ret i64 %masked
|
|
}
|
|
|
|
; ---------------------------------------------------------------------------- ;
|
|
; Pattern b. 32-bit
|
|
; ---------------------------------------------------------------------------- ;
|
|
|
|
define i32 @bzhi32_b0(i32 %val, i32 %numlowbits) nounwind {
|
|
; V7M-LABEL: bzhi32_b0:
|
|
; V7M: @ %bb.0:
|
|
; V7M-NEXT: mov.w r2, #-1
|
|
; V7M-NEXT: lsl.w r1, r2, r1
|
|
; V7M-NEXT: bics r0, r1
|
|
; V7M-NEXT: bx lr
|
|
;
|
|
; V7A-LABEL: bzhi32_b0:
|
|
; V7A: @ %bb.0:
|
|
; V7A-NEXT: mvn r2, #0
|
|
; V7A-NEXT: bic r0, r0, r2, lsl r1
|
|
; V7A-NEXT: bx lr
|
|
;
|
|
; V7A-T-LABEL: bzhi32_b0:
|
|
; V7A-T: @ %bb.0:
|
|
; V7A-T-NEXT: mov.w r2, #-1
|
|
; V7A-T-NEXT: lsl.w r1, r2, r1
|
|
; V7A-T-NEXT: bics r0, r1
|
|
; V7A-T-NEXT: bx lr
|
|
;
|
|
; V6M-LABEL: bzhi32_b0:
|
|
; V6M: @ %bb.0:
|
|
; V6M-NEXT: movs r2, #0
|
|
; V6M-NEXT: mvns r2, r2
|
|
; V6M-NEXT: lsls r2, r1
|
|
; V6M-NEXT: bics r0, r2
|
|
; V6M-NEXT: bx lr
|
|
%notmask = shl i32 -1, %numlowbits
|
|
%mask = xor i32 %notmask, -1
|
|
%masked = and i32 %mask, %val
|
|
ret i32 %masked
|
|
}
|
|
|
|
define i32 @bzhi32_b1_indexzext(i32 %val, i8 zeroext %numlowbits) nounwind {
|
|
; V7M-LABEL: bzhi32_b1_indexzext:
|
|
; V7M: @ %bb.0:
|
|
; V7M-NEXT: mov.w r2, #-1
|
|
; V7M-NEXT: lsl.w r1, r2, r1
|
|
; V7M-NEXT: bics r0, r1
|
|
; V7M-NEXT: bx lr
|
|
;
|
|
; V7A-LABEL: bzhi32_b1_indexzext:
|
|
; V7A: @ %bb.0:
|
|
; V7A-NEXT: mvn r2, #0
|
|
; V7A-NEXT: bic r0, r0, r2, lsl r1
|
|
; V7A-NEXT: bx lr
|
|
;
|
|
; V7A-T-LABEL: bzhi32_b1_indexzext:
|
|
; V7A-T: @ %bb.0:
|
|
; V7A-T-NEXT: mov.w r2, #-1
|
|
; V7A-T-NEXT: lsl.w r1, r2, r1
|
|
; V7A-T-NEXT: bics r0, r1
|
|
; V7A-T-NEXT: bx lr
|
|
;
|
|
; V6M-LABEL: bzhi32_b1_indexzext:
|
|
; V6M: @ %bb.0:
|
|
; V6M-NEXT: movs r2, #0
|
|
; V6M-NEXT: mvns r2, r2
|
|
; V6M-NEXT: lsls r2, r1
|
|
; V6M-NEXT: bics r0, r2
|
|
; V6M-NEXT: bx lr
|
|
%conv = zext i8 %numlowbits to i32
|
|
%notmask = shl i32 -1, %conv
|
|
%mask = xor i32 %notmask, -1
|
|
%masked = and i32 %mask, %val
|
|
ret i32 %masked
|
|
}
|
|
|
|
define i32 @bzhi32_b2_load(ptr %w, i32 %numlowbits) nounwind {
|
|
; V7M-LABEL: bzhi32_b2_load:
|
|
; V7M: @ %bb.0:
|
|
; V7M-NEXT: ldr r0, [r0]
|
|
; V7M-NEXT: mov.w r2, #-1
|
|
; V7M-NEXT: lsl.w r1, r2, r1
|
|
; V7M-NEXT: bics r0, r1
|
|
; V7M-NEXT: bx lr
|
|
;
|
|
; V7A-LABEL: bzhi32_b2_load:
|
|
; V7A: @ %bb.0:
|
|
; V7A-NEXT: ldr r0, [r0]
|
|
; V7A-NEXT: mvn r2, #0
|
|
; V7A-NEXT: bic r0, r0, r2, lsl r1
|
|
; V7A-NEXT: bx lr
|
|
;
|
|
; V7A-T-LABEL: bzhi32_b2_load:
|
|
; V7A-T: @ %bb.0:
|
|
; V7A-T-NEXT: ldr r0, [r0]
|
|
; V7A-T-NEXT: mov.w r2, #-1
|
|
; V7A-T-NEXT: lsl.w r1, r2, r1
|
|
; V7A-T-NEXT: bics r0, r1
|
|
; V7A-T-NEXT: bx lr
|
|
;
|
|
; V6M-LABEL: bzhi32_b2_load:
|
|
; V6M: @ %bb.0:
|
|
; V6M-NEXT: movs r2, #0
|
|
; V6M-NEXT: mvns r2, r2
|
|
; V6M-NEXT: lsls r2, r1
|
|
; V6M-NEXT: ldr r0, [r0]
|
|
; V6M-NEXT: bics r0, r2
|
|
; V6M-NEXT: bx lr
|
|
%val = load i32, ptr %w
|
|
%notmask = shl i32 -1, %numlowbits
|
|
%mask = xor i32 %notmask, -1
|
|
%masked = and i32 %mask, %val
|
|
ret i32 %masked
|
|
}
|
|
|
|
define i32 @bzhi32_b3_load_indexzext(ptr %w, i8 zeroext %numlowbits) nounwind {
|
|
; V7M-LABEL: bzhi32_b3_load_indexzext:
|
|
; V7M: @ %bb.0:
|
|
; V7M-NEXT: ldr r0, [r0]
|
|
; V7M-NEXT: mov.w r2, #-1
|
|
; V7M-NEXT: lsl.w r1, r2, r1
|
|
; V7M-NEXT: bics r0, r1
|
|
; V7M-NEXT: bx lr
|
|
;
|
|
; V7A-LABEL: bzhi32_b3_load_indexzext:
|
|
; V7A: @ %bb.0:
|
|
; V7A-NEXT: ldr r0, [r0]
|
|
; V7A-NEXT: mvn r2, #0
|
|
; V7A-NEXT: bic r0, r0, r2, lsl r1
|
|
; V7A-NEXT: bx lr
|
|
;
|
|
; V7A-T-LABEL: bzhi32_b3_load_indexzext:
|
|
; V7A-T: @ %bb.0:
|
|
; V7A-T-NEXT: ldr r0, [r0]
|
|
; V7A-T-NEXT: mov.w r2, #-1
|
|
; V7A-T-NEXT: lsl.w r1, r2, r1
|
|
; V7A-T-NEXT: bics r0, r1
|
|
; V7A-T-NEXT: bx lr
|
|
;
|
|
; V6M-LABEL: bzhi32_b3_load_indexzext:
|
|
; V6M: @ %bb.0:
|
|
; V6M-NEXT: movs r2, #0
|
|
; V6M-NEXT: mvns r2, r2
|
|
; V6M-NEXT: lsls r2, r1
|
|
; V6M-NEXT: ldr r0, [r0]
|
|
; V6M-NEXT: bics r0, r2
|
|
; V6M-NEXT: bx lr
|
|
%val = load i32, ptr %w
|
|
%conv = zext i8 %numlowbits to i32
|
|
%notmask = shl i32 -1, %conv
|
|
%mask = xor i32 %notmask, -1
|
|
%masked = and i32 %mask, %val
|
|
ret i32 %masked
|
|
}
|
|
|
|
define i32 @bzhi32_b4_commutative(i32 %val, i32 %numlowbits) nounwind {
|
|
; V7M-LABEL: bzhi32_b4_commutative:
|
|
; V7M: @ %bb.0:
|
|
; V7M-NEXT: mov.w r2, #-1
|
|
; V7M-NEXT: lsl.w r1, r2, r1
|
|
; V7M-NEXT: bics r0, r1
|
|
; V7M-NEXT: bx lr
|
|
;
|
|
; V7A-LABEL: bzhi32_b4_commutative:
|
|
; V7A: @ %bb.0:
|
|
; V7A-NEXT: mvn r2, #0
|
|
; V7A-NEXT: bic r0, r0, r2, lsl r1
|
|
; V7A-NEXT: bx lr
|
|
;
|
|
; V7A-T-LABEL: bzhi32_b4_commutative:
|
|
; V7A-T: @ %bb.0:
|
|
; V7A-T-NEXT: mov.w r2, #-1
|
|
; V7A-T-NEXT: lsl.w r1, r2, r1
|
|
; V7A-T-NEXT: bics r0, r1
|
|
; V7A-T-NEXT: bx lr
|
|
;
|
|
; V6M-LABEL: bzhi32_b4_commutative:
|
|
; V6M: @ %bb.0:
|
|
; V6M-NEXT: movs r2, #0
|
|
; V6M-NEXT: mvns r2, r2
|
|
; V6M-NEXT: lsls r2, r1
|
|
; V6M-NEXT: bics r0, r2
|
|
; V6M-NEXT: bx lr
|
|
%notmask = shl i32 -1, %numlowbits
|
|
%mask = xor i32 %notmask, -1
|
|
%masked = and i32 %val, %mask ; swapped order
|
|
ret i32 %masked
|
|
}
|
|
|
|
; 64-bit
|
|
|
|
define i64 @bzhi64_b0(i64 %val, i64 %numlowbits) nounwind {
|
|
; V7M-LABEL: bzhi64_b0:
|
|
; V7M: @ %bb.0:
|
|
; V7M-NEXT: mov.w r3, #-1
|
|
; V7M-NEXT: lsl.w r12, r3, r2
|
|
; V7M-NEXT: subs r2, #32
|
|
; V7M-NEXT: it pl
|
|
; V7M-NEXT: movpl.w r12, #0
|
|
; V7M-NEXT: it pl
|
|
; V7M-NEXT: lslpl r3, r2
|
|
; V7M-NEXT: bic.w r0, r0, r12
|
|
; V7M-NEXT: bics r1, r3
|
|
; V7M-NEXT: bx lr
|
|
;
|
|
; V7A-LABEL: bzhi64_b0:
|
|
; V7A: @ %bb.0:
|
|
; V7A-NEXT: subs r12, r2, #32
|
|
; V7A-NEXT: mvn r3, #0
|
|
; V7A-NEXT: lsl r2, r3, r2
|
|
; V7A-NEXT: lslpl r3, r3, r12
|
|
; V7A-NEXT: movwpl r2, #0
|
|
; V7A-NEXT: bic r1, r1, r3
|
|
; V7A-NEXT: bic r0, r0, r2
|
|
; V7A-NEXT: bx lr
|
|
;
|
|
; V7A-T-LABEL: bzhi64_b0:
|
|
; V7A-T: @ %bb.0:
|
|
; V7A-T-NEXT: mov.w r3, #-1
|
|
; V7A-T-NEXT: lsl.w r12, r3, r2
|
|
; V7A-T-NEXT: subs r2, #32
|
|
; V7A-T-NEXT: it pl
|
|
; V7A-T-NEXT: movpl.w r12, #0
|
|
; V7A-T-NEXT: it pl
|
|
; V7A-T-NEXT: lslpl r3, r2
|
|
; V7A-T-NEXT: bic.w r0, r0, r12
|
|
; V7A-T-NEXT: bics r1, r3
|
|
; V7A-T-NEXT: bx lr
|
|
;
|
|
; V6M-LABEL: bzhi64_b0:
|
|
; V6M: @ %bb.0:
|
|
; V6M-NEXT: .save {r4, r5, r7, lr}
|
|
; V6M-NEXT: push {r4, r5, r7, lr}
|
|
; V6M-NEXT: mov r4, r1
|
|
; V6M-NEXT: mov r5, r0
|
|
; V6M-NEXT: movs r0, #0
|
|
; V6M-NEXT: mvns r0, r0
|
|
; V6M-NEXT: mov r1, r0
|
|
; V6M-NEXT: bl __aeabi_llsl
|
|
; V6M-NEXT: bics r5, r0
|
|
; V6M-NEXT: bics r4, r1
|
|
; V6M-NEXT: mov r0, r5
|
|
; V6M-NEXT: mov r1, r4
|
|
; V6M-NEXT: pop {r4, r5, r7, pc}
|
|
%notmask = shl i64 -1, %numlowbits
|
|
%mask = xor i64 %notmask, -1
|
|
%masked = and i64 %mask, %val
|
|
ret i64 %masked
|
|
}
|
|
|
|
define i64 @bzhi64_b1_indexzext(i64 %val, i8 zeroext %numlowbits) nounwind {
|
|
; V7M-LABEL: bzhi64_b1_indexzext:
|
|
; V7M: @ %bb.0:
|
|
; V7M-NEXT: mov.w r3, #-1
|
|
; V7M-NEXT: lsl.w r12, r3, r2
|
|
; V7M-NEXT: subs r2, #32
|
|
; V7M-NEXT: it pl
|
|
; V7M-NEXT: movpl.w r12, #0
|
|
; V7M-NEXT: it pl
|
|
; V7M-NEXT: lslpl r3, r2
|
|
; V7M-NEXT: bic.w r0, r0, r12
|
|
; V7M-NEXT: bics r1, r3
|
|
; V7M-NEXT: bx lr
|
|
;
|
|
; V7A-LABEL: bzhi64_b1_indexzext:
|
|
; V7A: @ %bb.0:
|
|
; V7A-NEXT: subs r12, r2, #32
|
|
; V7A-NEXT: mvn r3, #0
|
|
; V7A-NEXT: lsl r2, r3, r2
|
|
; V7A-NEXT: lslpl r3, r3, r12
|
|
; V7A-NEXT: movwpl r2, #0
|
|
; V7A-NEXT: bic r1, r1, r3
|
|
; V7A-NEXT: bic r0, r0, r2
|
|
; V7A-NEXT: bx lr
|
|
;
|
|
; V7A-T-LABEL: bzhi64_b1_indexzext:
|
|
; V7A-T: @ %bb.0:
|
|
; V7A-T-NEXT: mov.w r3, #-1
|
|
; V7A-T-NEXT: lsl.w r12, r3, r2
|
|
; V7A-T-NEXT: subs r2, #32
|
|
; V7A-T-NEXT: it pl
|
|
; V7A-T-NEXT: movpl.w r12, #0
|
|
; V7A-T-NEXT: it pl
|
|
; V7A-T-NEXT: lslpl r3, r2
|
|
; V7A-T-NEXT: bic.w r0, r0, r12
|
|
; V7A-T-NEXT: bics r1, r3
|
|
; V7A-T-NEXT: bx lr
|
|
;
|
|
; V6M-LABEL: bzhi64_b1_indexzext:
|
|
; V6M: @ %bb.0:
|
|
; V6M-NEXT: .save {r4, r5, r7, lr}
|
|
; V6M-NEXT: push {r4, r5, r7, lr}
|
|
; V6M-NEXT: mov r4, r1
|
|
; V6M-NEXT: mov r5, r0
|
|
; V6M-NEXT: movs r0, #0
|
|
; V6M-NEXT: mvns r0, r0
|
|
; V6M-NEXT: mov r1, r0
|
|
; V6M-NEXT: bl __aeabi_llsl
|
|
; V6M-NEXT: bics r5, r0
|
|
; V6M-NEXT: bics r4, r1
|
|
; V6M-NEXT: mov r0, r5
|
|
; V6M-NEXT: mov r1, r4
|
|
; V6M-NEXT: pop {r4, r5, r7, pc}
|
|
%conv = zext i8 %numlowbits to i64
|
|
%notmask = shl i64 -1, %conv
|
|
%mask = xor i64 %notmask, -1
|
|
%masked = and i64 %mask, %val
|
|
ret i64 %masked
|
|
}
|
|
|
|
define i64 @bzhi64_b2_load(ptr %w, i64 %numlowbits) nounwind {
|
|
; V7M-LABEL: bzhi64_b2_load:
|
|
; V7M: @ %bb.0:
|
|
; V7M-NEXT: mov.w r1, #-1
|
|
; V7M-NEXT: subs.w r12, r2, #32
|
|
; V7M-NEXT: lsl.w r3, r1, r2
|
|
; V7M-NEXT: it pl
|
|
; V7M-NEXT: movpl r3, #0
|
|
; V7M-NEXT: ldrd r0, r2, [r0]
|
|
; V7M-NEXT: it pl
|
|
; V7M-NEXT: lslpl.w r1, r1, r12
|
|
; V7M-NEXT: bics r0, r3
|
|
; V7M-NEXT: bic.w r1, r2, r1
|
|
; V7M-NEXT: bx lr
|
|
;
|
|
; V7A-LABEL: bzhi64_b2_load:
|
|
; V7A: @ %bb.0:
|
|
; V7A-NEXT: .save {r4, lr}
|
|
; V7A-NEXT: push {r4, lr}
|
|
; V7A-NEXT: ldr r4, [r0]
|
|
; V7A-NEXT: mvn r1, #0
|
|
; V7A-NEXT: ldr r3, [r0, #4]
|
|
; V7A-NEXT: subs r0, r2, #32
|
|
; V7A-NEXT: lsl r2, r1, r2
|
|
; V7A-NEXT: lslpl r1, r1, r0
|
|
; V7A-NEXT: movwpl r2, #0
|
|
; V7A-NEXT: bic r1, r3, r1
|
|
; V7A-NEXT: bic r0, r4, r2
|
|
; V7A-NEXT: pop {r4, pc}
|
|
;
|
|
; V7A-T-LABEL: bzhi64_b2_load:
|
|
; V7A-T: @ %bb.0:
|
|
; V7A-T-NEXT: mov.w r1, #-1
|
|
; V7A-T-NEXT: ldrd r0, r12, [r0]
|
|
; V7A-T-NEXT: lsl.w r3, r1, r2
|
|
; V7A-T-NEXT: subs r2, #32
|
|
; V7A-T-NEXT: it pl
|
|
; V7A-T-NEXT: movpl r3, #0
|
|
; V7A-T-NEXT: it pl
|
|
; V7A-T-NEXT: lslpl r1, r2
|
|
; V7A-T-NEXT: bics r0, r3
|
|
; V7A-T-NEXT: bic.w r1, r12, r1
|
|
; V7A-T-NEXT: bx lr
|
|
;
|
|
; V6M-LABEL: bzhi64_b2_load:
|
|
; V6M: @ %bb.0:
|
|
; V6M-NEXT: .save {r4, lr}
|
|
; V6M-NEXT: push {r4, lr}
|
|
; V6M-NEXT: mov r4, r0
|
|
; V6M-NEXT: movs r0, #0
|
|
; V6M-NEXT: mvns r0, r0
|
|
; V6M-NEXT: mov r1, r0
|
|
; V6M-NEXT: bl __aeabi_llsl
|
|
; V6M-NEXT: ldm r4!, {r2, r3}
|
|
; V6M-NEXT: bics r2, r0
|
|
; V6M-NEXT: bics r3, r1
|
|
; V6M-NEXT: mov r0, r2
|
|
; V6M-NEXT: mov r1, r3
|
|
; V6M-NEXT: pop {r4, pc}
|
|
%val = load i64, ptr %w
|
|
%notmask = shl i64 -1, %numlowbits
|
|
%mask = xor i64 %notmask, -1
|
|
%masked = and i64 %mask, %val
|
|
ret i64 %masked
|
|
}
|
|
|
|
define i64 @bzhi64_b3_load_indexzext(ptr %w, i8 zeroext %numlowbits) nounwind {
|
|
; V7M-LABEL: bzhi64_b3_load_indexzext:
|
|
; V7M: @ %bb.0:
|
|
; V7M-NEXT: mov.w r2, #-1
|
|
; V7M-NEXT: subs.w r12, r1, #32
|
|
; V7M-NEXT: lsl.w r3, r2, r1
|
|
; V7M-NEXT: it pl
|
|
; V7M-NEXT: movpl r3, #0
|
|
; V7M-NEXT: ldrd r0, r1, [r0]
|
|
; V7M-NEXT: it pl
|
|
; V7M-NEXT: lslpl.w r2, r2, r12
|
|
; V7M-NEXT: bics r1, r2
|
|
; V7M-NEXT: bics r0, r3
|
|
; V7M-NEXT: bx lr
|
|
;
|
|
; V7A-LABEL: bzhi64_b3_load_indexzext:
|
|
; V7A: @ %bb.0:
|
|
; V7A-NEXT: .save {r4, r6, r11, lr}
|
|
; V7A-NEXT: push {r4, r6, r11, lr}
|
|
; V7A-NEXT: mvn r2, #0
|
|
; V7A-NEXT: ldr r6, [r0]
|
|
; V7A-NEXT: ldr r3, [r0, #4]
|
|
; V7A-NEXT: subs r0, r1, #32
|
|
; V7A-NEXT: lsl r4, r2, r1
|
|
; V7A-NEXT: lslpl r2, r2, r0
|
|
; V7A-NEXT: movwpl r4, #0
|
|
; V7A-NEXT: bic r1, r3, r2
|
|
; V7A-NEXT: bic r0, r6, r4
|
|
; V7A-NEXT: pop {r4, r6, r11, pc}
|
|
;
|
|
; V7A-T-LABEL: bzhi64_b3_load_indexzext:
|
|
; V7A-T: @ %bb.0:
|
|
; V7A-T-NEXT: mov.w r2, #-1
|
|
; V7A-T-NEXT: ldrd r0, r12, [r0]
|
|
; V7A-T-NEXT: lsl.w r3, r2, r1
|
|
; V7A-T-NEXT: subs r1, #32
|
|
; V7A-T-NEXT: it pl
|
|
; V7A-T-NEXT: movpl r3, #0
|
|
; V7A-T-NEXT: it pl
|
|
; V7A-T-NEXT: lslpl r2, r1
|
|
; V7A-T-NEXT: bics r0, r3
|
|
; V7A-T-NEXT: bic.w r1, r12, r2
|
|
; V7A-T-NEXT: bx lr
|
|
;
|
|
; V6M-LABEL: bzhi64_b3_load_indexzext:
|
|
; V6M: @ %bb.0:
|
|
; V6M-NEXT: .save {r4, lr}
|
|
; V6M-NEXT: push {r4, lr}
|
|
; V6M-NEXT: mov r2, r1
|
|
; V6M-NEXT: mov r4, r0
|
|
; V6M-NEXT: movs r0, #0
|
|
; V6M-NEXT: mvns r0, r0
|
|
; V6M-NEXT: mov r1, r0
|
|
; V6M-NEXT: bl __aeabi_llsl
|
|
; V6M-NEXT: ldm r4!, {r2, r3}
|
|
; V6M-NEXT: bics r2, r0
|
|
; V6M-NEXT: bics r3, r1
|
|
; V6M-NEXT: mov r0, r2
|
|
; V6M-NEXT: mov r1, r3
|
|
; V6M-NEXT: pop {r4, pc}
|
|
%val = load i64, ptr %w
|
|
%conv = zext i8 %numlowbits to i64
|
|
%notmask = shl i64 -1, %conv
|
|
%mask = xor i64 %notmask, -1
|
|
%masked = and i64 %mask, %val
|
|
ret i64 %masked
|
|
}
|
|
|
|
define i64 @bzhi64_b4_commutative(i64 %val, i64 %numlowbits) nounwind {
|
|
; V7M-LABEL: bzhi64_b4_commutative:
|
|
; V7M: @ %bb.0:
|
|
; V7M-NEXT: mov.w r3, #-1
|
|
; V7M-NEXT: lsl.w r12, r3, r2
|
|
; V7M-NEXT: subs r2, #32
|
|
; V7M-NEXT: it pl
|
|
; V7M-NEXT: movpl.w r12, #0
|
|
; V7M-NEXT: it pl
|
|
; V7M-NEXT: lslpl r3, r2
|
|
; V7M-NEXT: bic.w r0, r0, r12
|
|
; V7M-NEXT: bics r1, r3
|
|
; V7M-NEXT: bx lr
|
|
;
|
|
; V7A-LABEL: bzhi64_b4_commutative:
|
|
; V7A: @ %bb.0:
|
|
; V7A-NEXT: subs r12, r2, #32
|
|
; V7A-NEXT: mvn r3, #0
|
|
; V7A-NEXT: lsl r2, r3, r2
|
|
; V7A-NEXT: lslpl r3, r3, r12
|
|
; V7A-NEXT: movwpl r2, #0
|
|
; V7A-NEXT: bic r1, r1, r3
|
|
; V7A-NEXT: bic r0, r0, r2
|
|
; V7A-NEXT: bx lr
|
|
;
|
|
; V7A-T-LABEL: bzhi64_b4_commutative:
|
|
; V7A-T: @ %bb.0:
|
|
; V7A-T-NEXT: mov.w r3, #-1
|
|
; V7A-T-NEXT: lsl.w r12, r3, r2
|
|
; V7A-T-NEXT: subs r2, #32
|
|
; V7A-T-NEXT: it pl
|
|
; V7A-T-NEXT: movpl.w r12, #0
|
|
; V7A-T-NEXT: it pl
|
|
; V7A-T-NEXT: lslpl r3, r2
|
|
; V7A-T-NEXT: bic.w r0, r0, r12
|
|
; V7A-T-NEXT: bics r1, r3
|
|
; V7A-T-NEXT: bx lr
|
|
;
|
|
; V6M-LABEL: bzhi64_b4_commutative:
|
|
; V6M: @ %bb.0:
|
|
; V6M-NEXT: .save {r4, r5, r7, lr}
|
|
; V6M-NEXT: push {r4, r5, r7, lr}
|
|
; V6M-NEXT: mov r4, r1
|
|
; V6M-NEXT: mov r5, r0
|
|
; V6M-NEXT: movs r0, #0
|
|
; V6M-NEXT: mvns r0, r0
|
|
; V6M-NEXT: mov r1, r0
|
|
; V6M-NEXT: bl __aeabi_llsl
|
|
; V6M-NEXT: bics r5, r0
|
|
; V6M-NEXT: bics r4, r1
|
|
; V6M-NEXT: mov r0, r5
|
|
; V6M-NEXT: mov r1, r4
|
|
; V6M-NEXT: pop {r4, r5, r7, pc}
|
|
%notmask = shl i64 -1, %numlowbits
|
|
%mask = xor i64 %notmask, -1
|
|
%masked = and i64 %val, %mask ; swapped order
|
|
ret i64 %masked
|
|
}
|
|
|
|
; ---------------------------------------------------------------------------- ;
|
|
; Pattern c. 32-bit
|
|
; ---------------------------------------------------------------------------- ;
|
|
|
|
define i32 @bzhi32_c0(i32 %val, i32 %numlowbits) nounwind {
|
|
; V7M-LABEL: bzhi32_c0:
|
|
; V7M: @ %bb.0:
|
|
; V7M-NEXT: rsb.w r1, r1, #32
|
|
; V7M-NEXT: lsls r0, r1
|
|
; V7M-NEXT: lsrs r0, r1
|
|
; V7M-NEXT: bx lr
|
|
;
|
|
; V7A-LABEL: bzhi32_c0:
|
|
; V7A: @ %bb.0:
|
|
; V7A-NEXT: rsb r1, r1, #32
|
|
; V7A-NEXT: lsl r0, r0, r1
|
|
; V7A-NEXT: lsr r0, r0, r1
|
|
; V7A-NEXT: bx lr
|
|
;
|
|
; V7A-T-LABEL: bzhi32_c0:
|
|
; V7A-T: @ %bb.0:
|
|
; V7A-T-NEXT: rsb.w r1, r1, #32
|
|
; V7A-T-NEXT: lsls r0, r1
|
|
; V7A-T-NEXT: lsrs r0, r1
|
|
; V7A-T-NEXT: bx lr
|
|
;
|
|
; V6M-LABEL: bzhi32_c0:
|
|
; V6M: @ %bb.0:
|
|
; V6M-NEXT: movs r2, #32
|
|
; V6M-NEXT: subs r1, r2, r1
|
|
; V6M-NEXT: lsls r0, r1
|
|
; V6M-NEXT: lsrs r0, r1
|
|
; V6M-NEXT: bx lr
|
|
%numhighbits = sub i32 32, %numlowbits
|
|
%mask = lshr i32 -1, %numhighbits
|
|
%masked = and i32 %mask, %val
|
|
ret i32 %masked
|
|
}
|
|
|
|
define i32 @bzhi32_c1_indexzext(i32 %val, i8 %numlowbits) nounwind {
|
|
; V7M-LABEL: bzhi32_c1_indexzext:
|
|
; V7M: @ %bb.0:
|
|
; V7M-NEXT: rsb.w r1, r1, #32
|
|
; V7M-NEXT: uxtb r1, r1
|
|
; V7M-NEXT: lsls r0, r1
|
|
; V7M-NEXT: lsrs r0, r1
|
|
; V7M-NEXT: bx lr
|
|
;
|
|
; V7A-LABEL: bzhi32_c1_indexzext:
|
|
; V7A: @ %bb.0:
|
|
; V7A-NEXT: rsb r1, r1, #32
|
|
; V7A-NEXT: uxtb r1, r1
|
|
; V7A-NEXT: lsl r0, r0, r1
|
|
; V7A-NEXT: lsr r0, r0, r1
|
|
; V7A-NEXT: bx lr
|
|
;
|
|
; V7A-T-LABEL: bzhi32_c1_indexzext:
|
|
; V7A-T: @ %bb.0:
|
|
; V7A-T-NEXT: rsb.w r1, r1, #32
|
|
; V7A-T-NEXT: uxtb r1, r1
|
|
; V7A-T-NEXT: lsls r0, r1
|
|
; V7A-T-NEXT: lsrs r0, r1
|
|
; V7A-T-NEXT: bx lr
|
|
;
|
|
; V6M-LABEL: bzhi32_c1_indexzext:
|
|
; V6M: @ %bb.0:
|
|
; V6M-NEXT: movs r2, #32
|
|
; V6M-NEXT: subs r1, r2, r1
|
|
; V6M-NEXT: uxtb r1, r1
|
|
; V6M-NEXT: lsls r0, r1
|
|
; V6M-NEXT: lsrs r0, r1
|
|
; V6M-NEXT: bx lr
|
|
%numhighbits = sub i8 32, %numlowbits
|
|
%sh_prom = zext i8 %numhighbits to i32
|
|
%mask = lshr i32 -1, %sh_prom
|
|
%masked = and i32 %mask, %val
|
|
ret i32 %masked
|
|
}
|
|
|
|
define i32 @bzhi32_c2_load(ptr %w, i32 %numlowbits) nounwind {
|
|
; V7M-LABEL: bzhi32_c2_load:
|
|
; V7M: @ %bb.0:
|
|
; V7M-NEXT: ldr r0, [r0]
|
|
; V7M-NEXT: rsb.w r1, r1, #32
|
|
; V7M-NEXT: lsls r0, r1
|
|
; V7M-NEXT: lsrs r0, r1
|
|
; V7M-NEXT: bx lr
|
|
;
|
|
; V7A-LABEL: bzhi32_c2_load:
|
|
; V7A: @ %bb.0:
|
|
; V7A-NEXT: ldr r0, [r0]
|
|
; V7A-NEXT: rsb r1, r1, #32
|
|
; V7A-NEXT: lsl r0, r0, r1
|
|
; V7A-NEXT: lsr r0, r0, r1
|
|
; V7A-NEXT: bx lr
|
|
;
|
|
; V7A-T-LABEL: bzhi32_c2_load:
|
|
; V7A-T: @ %bb.0:
|
|
; V7A-T-NEXT: ldr r0, [r0]
|
|
; V7A-T-NEXT: rsb.w r1, r1, #32
|
|
; V7A-T-NEXT: lsls r0, r1
|
|
; V7A-T-NEXT: lsrs r0, r1
|
|
; V7A-T-NEXT: bx lr
|
|
;
|
|
; V6M-LABEL: bzhi32_c2_load:
|
|
; V6M: @ %bb.0:
|
|
; V6M-NEXT: movs r2, #32
|
|
; V6M-NEXT: subs r1, r2, r1
|
|
; V6M-NEXT: ldr r0, [r0]
|
|
; V6M-NEXT: lsls r0, r1
|
|
; V6M-NEXT: lsrs r0, r1
|
|
; V6M-NEXT: bx lr
|
|
%val = load i32, ptr %w
|
|
%numhighbits = sub i32 32, %numlowbits
|
|
%mask = lshr i32 -1, %numhighbits
|
|
%masked = and i32 %mask, %val
|
|
ret i32 %masked
|
|
}
|
|
|
|
define i32 @bzhi32_c3_load_indexzext(ptr %w, i8 %numlowbits) nounwind {
|
|
; V7M-LABEL: bzhi32_c3_load_indexzext:
|
|
; V7M: @ %bb.0:
|
|
; V7M-NEXT: rsb.w r1, r1, #32
|
|
; V7M-NEXT: ldr r0, [r0]
|
|
; V7M-NEXT: uxtb r1, r1
|
|
; V7M-NEXT: lsls r0, r1
|
|
; V7M-NEXT: lsrs r0, r1
|
|
; V7M-NEXT: bx lr
|
|
;
|
|
; V7A-LABEL: bzhi32_c3_load_indexzext:
|
|
; V7A: @ %bb.0:
|
|
; V7A-NEXT: rsb r1, r1, #32
|
|
; V7A-NEXT: ldr r0, [r0]
|
|
; V7A-NEXT: uxtb r1, r1
|
|
; V7A-NEXT: lsl r0, r0, r1
|
|
; V7A-NEXT: lsr r0, r0, r1
|
|
; V7A-NEXT: bx lr
|
|
;
|
|
; V7A-T-LABEL: bzhi32_c3_load_indexzext:
|
|
; V7A-T: @ %bb.0:
|
|
; V7A-T-NEXT: rsb.w r1, r1, #32
|
|
; V7A-T-NEXT: ldr r0, [r0]
|
|
; V7A-T-NEXT: uxtb r1, r1
|
|
; V7A-T-NEXT: lsls r0, r1
|
|
; V7A-T-NEXT: lsrs r0, r1
|
|
; V7A-T-NEXT: bx lr
|
|
;
|
|
; V6M-LABEL: bzhi32_c3_load_indexzext:
|
|
; V6M: @ %bb.0:
|
|
; V6M-NEXT: movs r2, #32
|
|
; V6M-NEXT: subs r1, r2, r1
|
|
; V6M-NEXT: uxtb r1, r1
|
|
; V6M-NEXT: ldr r0, [r0]
|
|
; V6M-NEXT: lsls r0, r1
|
|
; V6M-NEXT: lsrs r0, r1
|
|
; V6M-NEXT: bx lr
|
|
%val = load i32, ptr %w
|
|
%numhighbits = sub i8 32, %numlowbits
|
|
%sh_prom = zext i8 %numhighbits to i32
|
|
%mask = lshr i32 -1, %sh_prom
|
|
%masked = and i32 %mask, %val
|
|
ret i32 %masked
|
|
}
|
|
|
|
define i32 @bzhi32_c4_commutative(i32 %val, i32 %numlowbits) nounwind {
|
|
; V7M-LABEL: bzhi32_c4_commutative:
|
|
; V7M: @ %bb.0:
|
|
; V7M-NEXT: rsb.w r1, r1, #32
|
|
; V7M-NEXT: lsls r0, r1
|
|
; V7M-NEXT: lsrs r0, r1
|
|
; V7M-NEXT: bx lr
|
|
;
|
|
; V7A-LABEL: bzhi32_c4_commutative:
|
|
; V7A: @ %bb.0:
|
|
; V7A-NEXT: rsb r1, r1, #32
|
|
; V7A-NEXT: lsl r0, r0, r1
|
|
; V7A-NEXT: lsr r0, r0, r1
|
|
; V7A-NEXT: bx lr
|
|
;
|
|
; V7A-T-LABEL: bzhi32_c4_commutative:
|
|
; V7A-T: @ %bb.0:
|
|
; V7A-T-NEXT: rsb.w r1, r1, #32
|
|
; V7A-T-NEXT: lsls r0, r1
|
|
; V7A-T-NEXT: lsrs r0, r1
|
|
; V7A-T-NEXT: bx lr
|
|
;
|
|
; V6M-LABEL: bzhi32_c4_commutative:
|
|
; V6M: @ %bb.0:
|
|
; V6M-NEXT: movs r2, #32
|
|
; V6M-NEXT: subs r1, r2, r1
|
|
; V6M-NEXT: lsls r0, r1
|
|
; V6M-NEXT: lsrs r0, r1
|
|
; V6M-NEXT: bx lr
|
|
%numhighbits = sub i32 32, %numlowbits
|
|
%mask = lshr i32 -1, %numhighbits
|
|
%masked = and i32 %val, %mask ; swapped order
|
|
ret i32 %masked
|
|
}
|
|
|
|
; 64-bit
|
|
|
|
define i64 @bzhi64_c0(i64 %val, i64 %numlowbits) nounwind {
|
|
; V7M-LABEL: bzhi64_c0:
|
|
; V7M: @ %bb.0:
|
|
; V7M-NEXT: .save {r7, lr}
|
|
; V7M-NEXT: push {r7, lr}
|
|
; V7M-NEXT: rsbs.w lr, r2, #32
|
|
; V7M-NEXT: rsb.w r2, r2, #64
|
|
; V7M-NEXT: mov.w r12, #-1
|
|
; V7M-NEXT: mov.w r3, #-1
|
|
; V7M-NEXT: lsr.w r2, r12, r2
|
|
; V7M-NEXT: it pl
|
|
; V7M-NEXT: lsrpl.w r3, r3, lr
|
|
; V7M-NEXT: it pl
|
|
; V7M-NEXT: movpl r2, #0
|
|
; V7M-NEXT: ands r0, r3
|
|
; V7M-NEXT: ands r1, r2
|
|
; V7M-NEXT: pop {r7, pc}
|
|
;
|
|
; V7A-LABEL: bzhi64_c0:
|
|
; V7A: @ %bb.0:
|
|
; V7A-NEXT: .save {r11, lr}
|
|
; V7A-NEXT: push {r11, lr}
|
|
; V7A-NEXT: rsbs lr, r2, #32
|
|
; V7A-NEXT: rsb r2, r2, #64
|
|
; V7A-NEXT: mvn r12, #0
|
|
; V7A-NEXT: mvn r3, #0
|
|
; V7A-NEXT: lsr r2, r12, r2
|
|
; V7A-NEXT: lsrpl r3, r3, lr
|
|
; V7A-NEXT: movwpl r2, #0
|
|
; V7A-NEXT: and r0, r3, r0
|
|
; V7A-NEXT: and r1, r2, r1
|
|
; V7A-NEXT: pop {r11, pc}
|
|
;
|
|
; V7A-T-LABEL: bzhi64_c0:
|
|
; V7A-T: @ %bb.0:
|
|
; V7A-T-NEXT: .save {r7, lr}
|
|
; V7A-T-NEXT: push {r7, lr}
|
|
; V7A-T-NEXT: rsbs.w lr, r2, #32
|
|
; V7A-T-NEXT: rsb.w r2, r2, #64
|
|
; V7A-T-NEXT: mov.w r12, #-1
|
|
; V7A-T-NEXT: mov.w r3, #-1
|
|
; V7A-T-NEXT: lsr.w r2, r12, r2
|
|
; V7A-T-NEXT: it pl
|
|
; V7A-T-NEXT: lsrpl.w r3, r3, lr
|
|
; V7A-T-NEXT: it pl
|
|
; V7A-T-NEXT: movpl r2, #0
|
|
; V7A-T-NEXT: ands r0, r3
|
|
; V7A-T-NEXT: ands r1, r2
|
|
; V7A-T-NEXT: pop {r7, pc}
|
|
;
|
|
; V6M-LABEL: bzhi64_c0:
|
|
; V6M: @ %bb.0:
|
|
; V6M-NEXT: .save {r4, r5, r7, lr}
|
|
; V6M-NEXT: push {r4, r5, r7, lr}
|
|
; V6M-NEXT: mov r4, r1
|
|
; V6M-NEXT: mov r5, r0
|
|
; V6M-NEXT: movs r0, #64
|
|
; V6M-NEXT: subs r2, r0, r2
|
|
; V6M-NEXT: movs r0, #0
|
|
; V6M-NEXT: mvns r0, r0
|
|
; V6M-NEXT: mov r1, r0
|
|
; V6M-NEXT: bl __aeabi_llsr
|
|
; V6M-NEXT: ands r0, r5
|
|
; V6M-NEXT: ands r1, r4
|
|
; V6M-NEXT: pop {r4, r5, r7, pc}
|
|
%numhighbits = sub i64 64, %numlowbits
|
|
%mask = lshr i64 -1, %numhighbits
|
|
%masked = and i64 %mask, %val
|
|
ret i64 %masked
|
|
}
|
|
|
|
define i64 @bzhi64_c1_indexzext(i64 %val, i8 %numlowbits) nounwind {
|
|
; V7M-LABEL: bzhi64_c1_indexzext:
|
|
; V7M: @ %bb.0:
|
|
; V7M-NEXT: rsb.w r2, r2, #64
|
|
; V7M-NEXT: mov.w r3, #-1
|
|
; V7M-NEXT: uxtb r2, r2
|
|
; V7M-NEXT: subs.w r12, r2, #32
|
|
; V7M-NEXT: lsr.w r2, r3, r2
|
|
; V7M-NEXT: it pl
|
|
; V7M-NEXT: lsrpl.w r3, r3, r12
|
|
; V7M-NEXT: it pl
|
|
; V7M-NEXT: movpl r2, #0
|
|
; V7M-NEXT: ands r0, r3
|
|
; V7M-NEXT: ands r1, r2
|
|
; V7M-NEXT: bx lr
|
|
;
|
|
; V7A-LABEL: bzhi64_c1_indexzext:
|
|
; V7A: @ %bb.0:
|
|
; V7A-NEXT: .save {r11, lr}
|
|
; V7A-NEXT: push {r11, lr}
|
|
; V7A-NEXT: rsb lr, r2, #64
|
|
; V7A-NEXT: mvn r2, #31
|
|
; V7A-NEXT: mvn r3, #0
|
|
; V7A-NEXT: uxtb r12, lr
|
|
; V7A-NEXT: uxtab r2, r2, lr
|
|
; V7A-NEXT: lsr r12, r3, r12
|
|
; V7A-NEXT: cmp r2, #0
|
|
; V7A-NEXT: movwpl r12, #0
|
|
; V7A-NEXT: lsrpl r3, r3, r2
|
|
; V7A-NEXT: and r1, r12, r1
|
|
; V7A-NEXT: and r0, r3, r0
|
|
; V7A-NEXT: pop {r11, pc}
|
|
;
|
|
; V7A-T-LABEL: bzhi64_c1_indexzext:
|
|
; V7A-T: @ %bb.0:
|
|
; V7A-T-NEXT: .save {r7, lr}
|
|
; V7A-T-NEXT: push {r7, lr}
|
|
; V7A-T-NEXT: rsb.w lr, r2, #64
|
|
; V7A-T-NEXT: mvn r2, #31
|
|
; V7A-T-NEXT: mov.w r3, #-1
|
|
; V7A-T-NEXT: uxtb.w r12, lr
|
|
; V7A-T-NEXT: uxtab r2, r2, lr
|
|
; V7A-T-NEXT: lsr.w r12, r3, r12
|
|
; V7A-T-NEXT: cmp r2, #0
|
|
; V7A-T-NEXT: it pl
|
|
; V7A-T-NEXT: movpl.w r12, #0
|
|
; V7A-T-NEXT: it pl
|
|
; V7A-T-NEXT: lsrpl r3, r2
|
|
; V7A-T-NEXT: and.w r1, r1, r12
|
|
; V7A-T-NEXT: ands r0, r3
|
|
; V7A-T-NEXT: pop {r7, pc}
|
|
;
|
|
; V6M-LABEL: bzhi64_c1_indexzext:
|
|
; V6M: @ %bb.0:
|
|
; V6M-NEXT: .save {r4, r5, r7, lr}
|
|
; V6M-NEXT: push {r4, r5, r7, lr}
|
|
; V6M-NEXT: mov r4, r1
|
|
; V6M-NEXT: mov r5, r0
|
|
; V6M-NEXT: movs r0, #64
|
|
; V6M-NEXT: subs r0, r0, r2
|
|
; V6M-NEXT: uxtb r2, r0
|
|
; V6M-NEXT: movs r0, #0
|
|
; V6M-NEXT: mvns r0, r0
|
|
; V6M-NEXT: mov r1, r0
|
|
; V6M-NEXT: bl __aeabi_llsr
|
|
; V6M-NEXT: ands r0, r5
|
|
; V6M-NEXT: ands r1, r4
|
|
; V6M-NEXT: pop {r4, r5, r7, pc}
|
|
%numhighbits = sub i8 64, %numlowbits
|
|
%sh_prom = zext i8 %numhighbits to i64
|
|
%mask = lshr i64 -1, %sh_prom
|
|
%masked = and i64 %mask, %val
|
|
ret i64 %masked
|
|
}
|
|
|
|
define i64 @bzhi64_c2_load(ptr %w, i64 %numlowbits) nounwind {
|
|
; V7M-LABEL: bzhi64_c2_load:
|
|
; V7M: @ %bb.0:
|
|
; V7M-NEXT: rsbs.w r1, r2, #32
|
|
; V7M-NEXT: mov.w r3, #-1
|
|
; V7M-NEXT: rsb.w r2, r2, #64
|
|
; V7M-NEXT: it pl
|
|
; V7M-NEXT: lsrpl r3, r1
|
|
; V7M-NEXT: ldrd r0, r1, [r0]
|
|
; V7M-NEXT: mov.w r12, #-1
|
|
; V7M-NEXT: lsr.w r2, r12, r2
|
|
; V7M-NEXT: it pl
|
|
; V7M-NEXT: movpl r2, #0
|
|
; V7M-NEXT: ands r0, r3
|
|
; V7M-NEXT: ands r1, r2
|
|
; V7M-NEXT: bx lr
|
|
;
|
|
; V7A-LABEL: bzhi64_c2_load:
|
|
; V7A: @ %bb.0:
|
|
; V7A-NEXT: .save {r5, lr}
|
|
; V7A-NEXT: push {r5, lr}
|
|
; V7A-NEXT: rsbs r1, r2, #32
|
|
; V7A-NEXT: mvn r3, #0
|
|
; V7A-NEXT: mvn r12, #0
|
|
; V7A-NEXT: ldm r0, {r0, r5}
|
|
; V7A-NEXT: lsrpl r3, r3, r1
|
|
; V7A-NEXT: rsb r1, r2, #64
|
|
; V7A-NEXT: and r0, r3, r0
|
|
; V7A-NEXT: lsr r1, r12, r1
|
|
; V7A-NEXT: movwpl r1, #0
|
|
; V7A-NEXT: and r1, r1, r5
|
|
; V7A-NEXT: pop {r5, pc}
|
|
;
|
|
; V7A-T-LABEL: bzhi64_c2_load:
|
|
; V7A-T: @ %bb.0:
|
|
; V7A-T-NEXT: .save {r7, lr}
|
|
; V7A-T-NEXT: push {r7, lr}
|
|
; V7A-T-NEXT: rsbs.w r1, r2, #32
|
|
; V7A-T-NEXT: mov.w r3, #-1
|
|
; V7A-T-NEXT: ldrd r0, lr, [r0]
|
|
; V7A-T-NEXT: it pl
|
|
; V7A-T-NEXT: lsrpl r3, r1
|
|
; V7A-T-NEXT: rsb.w r1, r2, #64
|
|
; V7A-T-NEXT: mov.w r12, #-1
|
|
; V7A-T-NEXT: and.w r0, r0, r3
|
|
; V7A-T-NEXT: lsr.w r1, r12, r1
|
|
; V7A-T-NEXT: it pl
|
|
; V7A-T-NEXT: movpl r1, #0
|
|
; V7A-T-NEXT: and.w r1, r1, lr
|
|
; V7A-T-NEXT: pop {r7, pc}
|
|
;
|
|
; V6M-LABEL: bzhi64_c2_load:
|
|
; V6M: @ %bb.0:
|
|
; V6M-NEXT: .save {r4, lr}
|
|
; V6M-NEXT: push {r4, lr}
|
|
; V6M-NEXT: mov r4, r0
|
|
; V6M-NEXT: movs r0, #64
|
|
; V6M-NEXT: subs r2, r0, r2
|
|
; V6M-NEXT: movs r0, #0
|
|
; V6M-NEXT: mvns r0, r0
|
|
; V6M-NEXT: mov r1, r0
|
|
; V6M-NEXT: bl __aeabi_llsr
|
|
; V6M-NEXT: ldm r4!, {r2, r3}
|
|
; V6M-NEXT: ands r0, r2
|
|
; V6M-NEXT: ands r1, r3
|
|
; V6M-NEXT: pop {r4, pc}
|
|
%val = load i64, ptr %w
|
|
%numhighbits = sub i64 64, %numlowbits
|
|
%mask = lshr i64 -1, %numhighbits
|
|
%masked = and i64 %mask, %val
|
|
ret i64 %masked
|
|
}
|
|
|
|
define i64 @bzhi64_c3_load_indexzext(ptr %w, i8 %numlowbits) nounwind {
|
|
; V7M-LABEL: bzhi64_c3_load_indexzext:
|
|
; V7M: @ %bb.0:
|
|
; V7M-NEXT: rsb.w r1, r1, #64
|
|
; V7M-NEXT: mov.w r3, #-1
|
|
; V7M-NEXT: uxtb r1, r1
|
|
; V7M-NEXT: subs.w r2, r1, #32
|
|
; V7M-NEXT: lsr.w r1, r3, r1
|
|
; V7M-NEXT: it pl
|
|
; V7M-NEXT: lsrpl r3, r2
|
|
; V7M-NEXT: ldrd r0, r2, [r0]
|
|
; V7M-NEXT: it pl
|
|
; V7M-NEXT: movpl r1, #0
|
|
; V7M-NEXT: ands r1, r2
|
|
; V7M-NEXT: ands r0, r3
|
|
; V7M-NEXT: bx lr
|
|
;
|
|
; V7A-LABEL: bzhi64_c3_load_indexzext:
|
|
; V7A: @ %bb.0:
|
|
; V7A-NEXT: .save {r4, r6, r11, lr}
|
|
; V7A-NEXT: push {r4, r6, r11, lr}
|
|
; V7A-NEXT: rsb r1, r1, #64
|
|
; V7A-NEXT: mvn r4, #31
|
|
; V7A-NEXT: mvn r2, #0
|
|
; V7A-NEXT: ldr r6, [r0]
|
|
; V7A-NEXT: ldr r3, [r0, #4]
|
|
; V7A-NEXT: uxtb r0, r1
|
|
; V7A-NEXT: uxtab r4, r4, r1
|
|
; V7A-NEXT: lsr r0, r2, r0
|
|
; V7A-NEXT: cmp r4, #0
|
|
; V7A-NEXT: movwpl r0, #0
|
|
; V7A-NEXT: and r1, r0, r3
|
|
; V7A-NEXT: lsrpl r2, r2, r4
|
|
; V7A-NEXT: and r0, r2, r6
|
|
; V7A-NEXT: pop {r4, r6, r11, pc}
|
|
;
|
|
; V7A-T-LABEL: bzhi64_c3_load_indexzext:
|
|
; V7A-T: @ %bb.0:
|
|
; V7A-T-NEXT: .save {r7, lr}
|
|
; V7A-T-NEXT: push {r7, lr}
|
|
; V7A-T-NEXT: rsb.w r1, r1, #64
|
|
; V7A-T-NEXT: mvn r3, #31
|
|
; V7A-T-NEXT: ldrd r12, lr, [r0]
|
|
; V7A-T-NEXT: mov.w r2, #-1
|
|
; V7A-T-NEXT: uxtb r0, r1
|
|
; V7A-T-NEXT: uxtab r3, r3, r1
|
|
; V7A-T-NEXT: lsr.w r0, r2, r0
|
|
; V7A-T-NEXT: cmp r3, #0
|
|
; V7A-T-NEXT: it pl
|
|
; V7A-T-NEXT: movpl r0, #0
|
|
; V7A-T-NEXT: and.w r1, r0, lr
|
|
; V7A-T-NEXT: it pl
|
|
; V7A-T-NEXT: lsrpl r2, r3
|
|
; V7A-T-NEXT: and.w r0, r2, r12
|
|
; V7A-T-NEXT: pop {r7, pc}
|
|
;
|
|
; V6M-LABEL: bzhi64_c3_load_indexzext:
|
|
; V6M: @ %bb.0:
|
|
; V6M-NEXT: .save {r4, lr}
|
|
; V6M-NEXT: push {r4, lr}
|
|
; V6M-NEXT: mov r4, r0
|
|
; V6M-NEXT: movs r0, #64
|
|
; V6M-NEXT: subs r0, r0, r1
|
|
; V6M-NEXT: uxtb r2, r0
|
|
; V6M-NEXT: movs r0, #0
|
|
; V6M-NEXT: mvns r0, r0
|
|
; V6M-NEXT: mov r1, r0
|
|
; V6M-NEXT: bl __aeabi_llsr
|
|
; V6M-NEXT: ldm r4!, {r2, r3}
|
|
; V6M-NEXT: ands r0, r2
|
|
; V6M-NEXT: ands r1, r3
|
|
; V6M-NEXT: pop {r4, pc}
|
|
%val = load i64, ptr %w
|
|
%numhighbits = sub i8 64, %numlowbits
|
|
%sh_prom = zext i8 %numhighbits to i64
|
|
%mask = lshr i64 -1, %sh_prom
|
|
%masked = and i64 %mask, %val
|
|
ret i64 %masked
|
|
}
|
|
|
|
define i64 @bzhi64_c4_commutative(i64 %val, i64 %numlowbits) nounwind {
|
|
; V7M-LABEL: bzhi64_c4_commutative:
|
|
; V7M: @ %bb.0:
|
|
; V7M-NEXT: .save {r7, lr}
|
|
; V7M-NEXT: push {r7, lr}
|
|
; V7M-NEXT: rsbs.w lr, r2, #32
|
|
; V7M-NEXT: rsb.w r2, r2, #64
|
|
; V7M-NEXT: mov.w r12, #-1
|
|
; V7M-NEXT: mov.w r3, #-1
|
|
; V7M-NEXT: lsr.w r2, r12, r2
|
|
; V7M-NEXT: it pl
|
|
; V7M-NEXT: lsrpl.w r3, r3, lr
|
|
; V7M-NEXT: it pl
|
|
; V7M-NEXT: movpl r2, #0
|
|
; V7M-NEXT: ands r0, r3
|
|
; V7M-NEXT: ands r1, r2
|
|
; V7M-NEXT: pop {r7, pc}
|
|
;
|
|
; V7A-LABEL: bzhi64_c4_commutative:
|
|
; V7A: @ %bb.0:
|
|
; V7A-NEXT: .save {r11, lr}
|
|
; V7A-NEXT: push {r11, lr}
|
|
; V7A-NEXT: rsbs lr, r2, #32
|
|
; V7A-NEXT: rsb r2, r2, #64
|
|
; V7A-NEXT: mvn r12, #0
|
|
; V7A-NEXT: mvn r3, #0
|
|
; V7A-NEXT: lsr r2, r12, r2
|
|
; V7A-NEXT: lsrpl r3, r3, lr
|
|
; V7A-NEXT: movwpl r2, #0
|
|
; V7A-NEXT: and r0, r0, r3
|
|
; V7A-NEXT: and r1, r1, r2
|
|
; V7A-NEXT: pop {r11, pc}
|
|
;
|
|
; V7A-T-LABEL: bzhi64_c4_commutative:
|
|
; V7A-T: @ %bb.0:
|
|
; V7A-T-NEXT: .save {r7, lr}
|
|
; V7A-T-NEXT: push {r7, lr}
|
|
; V7A-T-NEXT: rsbs.w lr, r2, #32
|
|
; V7A-T-NEXT: rsb.w r2, r2, #64
|
|
; V7A-T-NEXT: mov.w r12, #-1
|
|
; V7A-T-NEXT: mov.w r3, #-1
|
|
; V7A-T-NEXT: lsr.w r2, r12, r2
|
|
; V7A-T-NEXT: it pl
|
|
; V7A-T-NEXT: lsrpl.w r3, r3, lr
|
|
; V7A-T-NEXT: it pl
|
|
; V7A-T-NEXT: movpl r2, #0
|
|
; V7A-T-NEXT: ands r0, r3
|
|
; V7A-T-NEXT: ands r1, r2
|
|
; V7A-T-NEXT: pop {r7, pc}
|
|
;
|
|
; V6M-LABEL: bzhi64_c4_commutative:
|
|
; V6M: @ %bb.0:
|
|
; V6M-NEXT: .save {r4, r5, r7, lr}
|
|
; V6M-NEXT: push {r4, r5, r7, lr}
|
|
; V6M-NEXT: mov r4, r1
|
|
; V6M-NEXT: mov r5, r0
|
|
; V6M-NEXT: movs r0, #64
|
|
; V6M-NEXT: subs r2, r0, r2
|
|
; V6M-NEXT: movs r0, #0
|
|
; V6M-NEXT: mvns r0, r0
|
|
; V6M-NEXT: mov r1, r0
|
|
; V6M-NEXT: bl __aeabi_llsr
|
|
; V6M-NEXT: ands r0, r5
|
|
; V6M-NEXT: ands r1, r4
|
|
; V6M-NEXT: pop {r4, r5, r7, pc}
|
|
%numhighbits = sub i64 64, %numlowbits
|
|
%mask = lshr i64 -1, %numhighbits
|
|
%masked = and i64 %val, %mask ; swapped order
|
|
ret i64 %masked
|
|
}
|
|
|
|
; ---------------------------------------------------------------------------- ;
|
|
; Pattern d. 32-bit.
|
|
; ---------------------------------------------------------------------------- ;
|
|
|
|
define i32 @bzhi32_d0(i32 %val, i32 %numlowbits) nounwind {
|
|
; V7M-LABEL: bzhi32_d0:
|
|
; V7M: @ %bb.0:
|
|
; V7M-NEXT: rsb.w r1, r1, #32
|
|
; V7M-NEXT: lsls r0, r1
|
|
; V7M-NEXT: lsrs r0, r1
|
|
; V7M-NEXT: bx lr
|
|
;
|
|
; V7A-LABEL: bzhi32_d0:
|
|
; V7A: @ %bb.0:
|
|
; V7A-NEXT: rsb r1, r1, #32
|
|
; V7A-NEXT: lsl r0, r0, r1
|
|
; V7A-NEXT: lsr r0, r0, r1
|
|
; V7A-NEXT: bx lr
|
|
;
|
|
; V7A-T-LABEL: bzhi32_d0:
|
|
; V7A-T: @ %bb.0:
|
|
; V7A-T-NEXT: rsb.w r1, r1, #32
|
|
; V7A-T-NEXT: lsls r0, r1
|
|
; V7A-T-NEXT: lsrs r0, r1
|
|
; V7A-T-NEXT: bx lr
|
|
;
|
|
; V6M-LABEL: bzhi32_d0:
|
|
; V6M: @ %bb.0:
|
|
; V6M-NEXT: movs r2, #32
|
|
; V6M-NEXT: subs r1, r2, r1
|
|
; V6M-NEXT: lsls r0, r1
|
|
; V6M-NEXT: lsrs r0, r1
|
|
; V6M-NEXT: bx lr
|
|
%numhighbits = sub i32 32, %numlowbits
|
|
%highbitscleared = shl i32 %val, %numhighbits
|
|
%masked = lshr i32 %highbitscleared, %numhighbits
|
|
ret i32 %masked
|
|
}
|
|
|
|
define i32 @bzhi32_d1_indexzext(i32 %val, i8 %numlowbits) nounwind {
|
|
; V7M-LABEL: bzhi32_d1_indexzext:
|
|
; V7M: @ %bb.0:
|
|
; V7M-NEXT: rsb.w r1, r1, #32
|
|
; V7M-NEXT: uxtb r1, r1
|
|
; V7M-NEXT: lsls r0, r1
|
|
; V7M-NEXT: lsrs r0, r1
|
|
; V7M-NEXT: bx lr
|
|
;
|
|
; V7A-LABEL: bzhi32_d1_indexzext:
|
|
; V7A: @ %bb.0:
|
|
; V7A-NEXT: rsb r1, r1, #32
|
|
; V7A-NEXT: uxtb r1, r1
|
|
; V7A-NEXT: lsl r0, r0, r1
|
|
; V7A-NEXT: lsr r0, r0, r1
|
|
; V7A-NEXT: bx lr
|
|
;
|
|
; V7A-T-LABEL: bzhi32_d1_indexzext:
|
|
; V7A-T: @ %bb.0:
|
|
; V7A-T-NEXT: rsb.w r1, r1, #32
|
|
; V7A-T-NEXT: uxtb r1, r1
|
|
; V7A-T-NEXT: lsls r0, r1
|
|
; V7A-T-NEXT: lsrs r0, r1
|
|
; V7A-T-NEXT: bx lr
|
|
;
|
|
; V6M-LABEL: bzhi32_d1_indexzext:
|
|
; V6M: @ %bb.0:
|
|
; V6M-NEXT: movs r2, #32
|
|
; V6M-NEXT: subs r1, r2, r1
|
|
; V6M-NEXT: uxtb r1, r1
|
|
; V6M-NEXT: lsls r0, r1
|
|
; V6M-NEXT: lsrs r0, r1
|
|
; V6M-NEXT: bx lr
|
|
%numhighbits = sub i8 32, %numlowbits
|
|
%sh_prom = zext i8 %numhighbits to i32
|
|
%highbitscleared = shl i32 %val, %sh_prom
|
|
%masked = lshr i32 %highbitscleared, %sh_prom
|
|
ret i32 %masked
|
|
}
|
|
|
|
define i32 @bzhi32_d2_load(ptr %w, i32 %numlowbits) nounwind {
|
|
; V7M-LABEL: bzhi32_d2_load:
|
|
; V7M: @ %bb.0:
|
|
; V7M-NEXT: ldr r0, [r0]
|
|
; V7M-NEXT: rsb.w r1, r1, #32
|
|
; V7M-NEXT: lsls r0, r1
|
|
; V7M-NEXT: lsrs r0, r1
|
|
; V7M-NEXT: bx lr
|
|
;
|
|
; V7A-LABEL: bzhi32_d2_load:
|
|
; V7A: @ %bb.0:
|
|
; V7A-NEXT: ldr r0, [r0]
|
|
; V7A-NEXT: rsb r1, r1, #32
|
|
; V7A-NEXT: lsl r0, r0, r1
|
|
; V7A-NEXT: lsr r0, r0, r1
|
|
; V7A-NEXT: bx lr
|
|
;
|
|
; V7A-T-LABEL: bzhi32_d2_load:
|
|
; V7A-T: @ %bb.0:
|
|
; V7A-T-NEXT: ldr r0, [r0]
|
|
; V7A-T-NEXT: rsb.w r1, r1, #32
|
|
; V7A-T-NEXT: lsls r0, r1
|
|
; V7A-T-NEXT: lsrs r0, r1
|
|
; V7A-T-NEXT: bx lr
|
|
;
|
|
; V6M-LABEL: bzhi32_d2_load:
|
|
; V6M: @ %bb.0:
|
|
; V6M-NEXT: movs r2, #32
|
|
; V6M-NEXT: subs r1, r2, r1
|
|
; V6M-NEXT: ldr r0, [r0]
|
|
; V6M-NEXT: lsls r0, r1
|
|
; V6M-NEXT: lsrs r0, r1
|
|
; V6M-NEXT: bx lr
|
|
%val = load i32, ptr %w
|
|
%numhighbits = sub i32 32, %numlowbits
|
|
%highbitscleared = shl i32 %val, %numhighbits
|
|
%masked = lshr i32 %highbitscleared, %numhighbits
|
|
ret i32 %masked
|
|
}
|
|
|
|
define i32 @bzhi32_d3_load_indexzext(ptr %w, i8 %numlowbits) nounwind {
|
|
; V7M-LABEL: bzhi32_d3_load_indexzext:
|
|
; V7M: @ %bb.0:
|
|
; V7M-NEXT: rsb.w r1, r1, #32
|
|
; V7M-NEXT: ldr r0, [r0]
|
|
; V7M-NEXT: uxtb r1, r1
|
|
; V7M-NEXT: lsls r0, r1
|
|
; V7M-NEXT: lsrs r0, r1
|
|
; V7M-NEXT: bx lr
|
|
;
|
|
; V7A-LABEL: bzhi32_d3_load_indexzext:
|
|
; V7A: @ %bb.0:
|
|
; V7A-NEXT: rsb r1, r1, #32
|
|
; V7A-NEXT: ldr r0, [r0]
|
|
; V7A-NEXT: uxtb r1, r1
|
|
; V7A-NEXT: lsl r0, r0, r1
|
|
; V7A-NEXT: lsr r0, r0, r1
|
|
; V7A-NEXT: bx lr
|
|
;
|
|
; V7A-T-LABEL: bzhi32_d3_load_indexzext:
|
|
; V7A-T: @ %bb.0:
|
|
; V7A-T-NEXT: rsb.w r1, r1, #32
|
|
; V7A-T-NEXT: ldr r0, [r0]
|
|
; V7A-T-NEXT: uxtb r1, r1
|
|
; V7A-T-NEXT: lsls r0, r1
|
|
; V7A-T-NEXT: lsrs r0, r1
|
|
; V7A-T-NEXT: bx lr
|
|
;
|
|
; V6M-LABEL: bzhi32_d3_load_indexzext:
|
|
; V6M: @ %bb.0:
|
|
; V6M-NEXT: movs r2, #32
|
|
; V6M-NEXT: subs r1, r2, r1
|
|
; V6M-NEXT: uxtb r1, r1
|
|
; V6M-NEXT: ldr r0, [r0]
|
|
; V6M-NEXT: lsls r0, r1
|
|
; V6M-NEXT: lsrs r0, r1
|
|
; V6M-NEXT: bx lr
|
|
%val = load i32, ptr %w
|
|
%numhighbits = sub i8 32, %numlowbits
|
|
%sh_prom = zext i8 %numhighbits to i32
|
|
%highbitscleared = shl i32 %val, %sh_prom
|
|
%masked = lshr i32 %highbitscleared, %sh_prom
|
|
ret i32 %masked
|
|
}
|
|
|
|
; 64-bit.
|
|
|
|
define i64 @bzhi64_d0(i64 %val, i64 %numlowbits) nounwind {
|
|
; V7M-LABEL: bzhi64_d0:
|
|
; V7M: @ %bb.0:
|
|
; V7M-NEXT: .save {r7, lr}
|
|
; V7M-NEXT: push {r7, lr}
|
|
; V7M-NEXT: rsb.w r3, r2, #64
|
|
; V7M-NEXT: rsbs.w r2, r2, #32
|
|
; V7M-NEXT: rsb.w lr, r3, #32
|
|
; V7M-NEXT: lsl.w r12, r1, r3
|
|
; V7M-NEXT: lsr.w r1, r0, lr
|
|
; V7M-NEXT: orr.w r1, r1, r12
|
|
; V7M-NEXT: it pl
|
|
; V7M-NEXT: lslpl.w r1, r0, r2
|
|
; V7M-NEXT: lsl.w r0, r0, r3
|
|
; V7M-NEXT: it pl
|
|
; V7M-NEXT: movpl r0, #0
|
|
; V7M-NEXT: lsl.w r12, r1, lr
|
|
; V7M-NEXT: lsr.w r0, r0, r3
|
|
; V7M-NEXT: orr.w r0, r0, r12
|
|
; V7M-NEXT: it pl
|
|
; V7M-NEXT: lsrpl.w r0, r1, r2
|
|
; V7M-NEXT: lsr.w r1, r1, r3
|
|
; V7M-NEXT: it pl
|
|
; V7M-NEXT: movpl r1, #0
|
|
; V7M-NEXT: pop {r7, pc}
|
|
;
|
|
; V7A-LABEL: bzhi64_d0:
|
|
; V7A: @ %bb.0:
|
|
; V7A-NEXT: .save {r11, lr}
|
|
; V7A-NEXT: push {r11, lr}
|
|
; V7A-NEXT: rsb lr, r2, #64
|
|
; V7A-NEXT: rsbs r2, r2, #32
|
|
; V7A-NEXT: rsb r12, lr, #32
|
|
; V7A-NEXT: lsr r3, r0, r12
|
|
; V7A-NEXT: orr r1, r3, r1, lsl lr
|
|
; V7A-NEXT: lslpl r1, r0, r2
|
|
; V7A-NEXT: lsl r0, r0, lr
|
|
; V7A-NEXT: movwpl r0, #0
|
|
; V7A-NEXT: lsr r0, r0, lr
|
|
; V7A-NEXT: orr r0, r0, r1, lsl r12
|
|
; V7A-NEXT: lsrpl r0, r1, r2
|
|
; V7A-NEXT: lsr r1, r1, lr
|
|
; V7A-NEXT: movwpl r1, #0
|
|
; V7A-NEXT: pop {r11, pc}
|
|
;
|
|
; V7A-T-LABEL: bzhi64_d0:
|
|
; V7A-T: @ %bb.0:
|
|
; V7A-T-NEXT: .save {r7, lr}
|
|
; V7A-T-NEXT: push {r7, lr}
|
|
; V7A-T-NEXT: rsb.w r3, r2, #64
|
|
; V7A-T-NEXT: rsbs.w r2, r2, #32
|
|
; V7A-T-NEXT: rsb.w lr, r3, #32
|
|
; V7A-T-NEXT: lsl.w r12, r1, r3
|
|
; V7A-T-NEXT: lsr.w r1, r0, lr
|
|
; V7A-T-NEXT: orr.w r1, r1, r12
|
|
; V7A-T-NEXT: it pl
|
|
; V7A-T-NEXT: lslpl.w r1, r0, r2
|
|
; V7A-T-NEXT: lsl.w r0, r0, r3
|
|
; V7A-T-NEXT: it pl
|
|
; V7A-T-NEXT: movpl r0, #0
|
|
; V7A-T-NEXT: lsl.w r12, r1, lr
|
|
; V7A-T-NEXT: lsr.w r0, r0, r3
|
|
; V7A-T-NEXT: orr.w r0, r0, r12
|
|
; V7A-T-NEXT: it pl
|
|
; V7A-T-NEXT: lsrpl.w r0, r1, r2
|
|
; V7A-T-NEXT: lsr.w r1, r1, r3
|
|
; V7A-T-NEXT: it pl
|
|
; V7A-T-NEXT: movpl r1, #0
|
|
; V7A-T-NEXT: pop {r7, pc}
|
|
;
|
|
; V6M-LABEL: bzhi64_d0:
|
|
; V6M: @ %bb.0:
|
|
; V6M-NEXT: .save {r4, lr}
|
|
; V6M-NEXT: push {r4, lr}
|
|
; V6M-NEXT: movs r3, #64
|
|
; V6M-NEXT: subs r4, r3, r2
|
|
; V6M-NEXT: mov r2, r4
|
|
; V6M-NEXT: bl __aeabi_llsl
|
|
; V6M-NEXT: mov r2, r4
|
|
; V6M-NEXT: bl __aeabi_llsr
|
|
; V6M-NEXT: pop {r4, pc}
|
|
%numhighbits = sub i64 64, %numlowbits
|
|
%highbitscleared = shl i64 %val, %numhighbits
|
|
%masked = lshr i64 %highbitscleared, %numhighbits
|
|
ret i64 %masked
|
|
}
|
|
|
|
define i64 @bzhi64_d1_indexzext(i64 %val, i8 %numlowbits) nounwind {
|
|
; V7M-LABEL: bzhi64_d1_indexzext:
|
|
; V7M: @ %bb.0:
|
|
; V7M-NEXT: rsb.w r2, r2, #64
|
|
; V7M-NEXT: uxtb r2, r2
|
|
; V7M-NEXT: rsb.w r3, r2, #32
|
|
; V7M-NEXT: lsl.w r12, r1, r2
|
|
; V7M-NEXT: lsr.w r1, r0, r3
|
|
; V7M-NEXT: orr.w r1, r1, r12
|
|
; V7M-NEXT: subs.w r12, r2, #32
|
|
; V7M-NEXT: it pl
|
|
; V7M-NEXT: lslpl.w r1, r0, r12
|
|
; V7M-NEXT: lsl.w r0, r0, r2
|
|
; V7M-NEXT: it pl
|
|
; V7M-NEXT: movpl r0, #0
|
|
; V7M-NEXT: lsl.w r3, r1, r3
|
|
; V7M-NEXT: lsr.w r0, r0, r2
|
|
; V7M-NEXT: orr.w r0, r0, r3
|
|
; V7M-NEXT: it pl
|
|
; V7M-NEXT: lsrpl.w r0, r1, r12
|
|
; V7M-NEXT: lsr.w r1, r1, r2
|
|
; V7M-NEXT: it pl
|
|
; V7M-NEXT: movpl r1, #0
|
|
; V7M-NEXT: bx lr
|
|
;
|
|
; V7A-LABEL: bzhi64_d1_indexzext:
|
|
; V7A: @ %bb.0:
|
|
; V7A-NEXT: .save {r11, lr}
|
|
; V7A-NEXT: push {r11, lr}
|
|
; V7A-NEXT: rsb lr, r2, #64
|
|
; V7A-NEXT: uxtb r3, lr
|
|
; V7A-NEXT: rsb r12, r3, #32
|
|
; V7A-NEXT: lsr r2, r0, r12
|
|
; V7A-NEXT: orr r1, r2, r1, lsl r3
|
|
; V7A-NEXT: mvn r2, #31
|
|
; V7A-NEXT: uxtab r2, r2, lr
|
|
; V7A-NEXT: cmp r2, #0
|
|
; V7A-NEXT: lslpl r1, r0, r2
|
|
; V7A-NEXT: lsl r0, r0, r3
|
|
; V7A-NEXT: movwpl r0, #0
|
|
; V7A-NEXT: lsr r0, r0, r3
|
|
; V7A-NEXT: orr r0, r0, r1, lsl r12
|
|
; V7A-NEXT: lsrpl r0, r1, r2
|
|
; V7A-NEXT: lsr r1, r1, r3
|
|
; V7A-NEXT: movwpl r1, #0
|
|
; V7A-NEXT: pop {r11, pc}
|
|
;
|
|
; V7A-T-LABEL: bzhi64_d1_indexzext:
|
|
; V7A-T: @ %bb.0:
|
|
; V7A-T-NEXT: .save {r4, lr}
|
|
; V7A-T-NEXT: push {r4, lr}
|
|
; V7A-T-NEXT: rsb.w r4, r2, #64
|
|
; V7A-T-NEXT: mvn r2, #31
|
|
; V7A-T-NEXT: uxtb r3, r4
|
|
; V7A-T-NEXT: rsb.w lr, r3, #32
|
|
; V7A-T-NEXT: lsl.w r12, r1, r3
|
|
; V7A-T-NEXT: uxtab r2, r2, r4
|
|
; V7A-T-NEXT: lsr.w r1, r0, lr
|
|
; V7A-T-NEXT: cmp r2, #0
|
|
; V7A-T-NEXT: orr.w r1, r1, r12
|
|
; V7A-T-NEXT: it pl
|
|
; V7A-T-NEXT: lslpl.w r1, r0, r2
|
|
; V7A-T-NEXT: lsl.w r0, r0, r3
|
|
; V7A-T-NEXT: it pl
|
|
; V7A-T-NEXT: movpl r0, #0
|
|
; V7A-T-NEXT: lsl.w r4, r1, lr
|
|
; V7A-T-NEXT: lsr.w r0, r0, r3
|
|
; V7A-T-NEXT: orr.w r0, r0, r4
|
|
; V7A-T-NEXT: it pl
|
|
; V7A-T-NEXT: lsrpl.w r0, r1, r2
|
|
; V7A-T-NEXT: lsr.w r1, r1, r3
|
|
; V7A-T-NEXT: it pl
|
|
; V7A-T-NEXT: movpl r1, #0
|
|
; V7A-T-NEXT: pop {r4, pc}
|
|
;
|
|
; V6M-LABEL: bzhi64_d1_indexzext:
|
|
; V6M: @ %bb.0:
|
|
; V6M-NEXT: .save {r4, lr}
|
|
; V6M-NEXT: push {r4, lr}
|
|
; V6M-NEXT: movs r3, #64
|
|
; V6M-NEXT: subs r2, r3, r2
|
|
; V6M-NEXT: uxtb r4, r2
|
|
; V6M-NEXT: mov r2, r4
|
|
; V6M-NEXT: bl __aeabi_llsl
|
|
; V6M-NEXT: mov r2, r4
|
|
; V6M-NEXT: bl __aeabi_llsr
|
|
; V6M-NEXT: pop {r4, pc}
|
|
%numhighbits = sub i8 64, %numlowbits
|
|
%sh_prom = zext i8 %numhighbits to i64
|
|
%highbitscleared = shl i64 %val, %sh_prom
|
|
%masked = lshr i64 %highbitscleared, %sh_prom
|
|
ret i64 %masked
|
|
}
|
|
|
|
define i64 @bzhi64_d2_load(ptr %w, i64 %numlowbits) nounwind {
|
|
; V7M-LABEL: bzhi64_d2_load:
|
|
; V7M: @ %bb.0:
|
|
; V7M-NEXT: .save {r7, lr}
|
|
; V7M-NEXT: push {r7, lr}
|
|
; V7M-NEXT: rsb.w r1, r2, #64
|
|
; V7M-NEXT: ldrd r0, r3, [r0]
|
|
; V7M-NEXT: rsb.w lr, r1, #32
|
|
; V7M-NEXT: rsbs.w r2, r2, #32
|
|
; V7M-NEXT: lsl.w r12, r3, r1
|
|
; V7M-NEXT: lsr.w r3, r0, lr
|
|
; V7M-NEXT: orr.w r3, r3, r12
|
|
; V7M-NEXT: it pl
|
|
; V7M-NEXT: lslpl.w r3, r0, r2
|
|
; V7M-NEXT: lsl.w r0, r0, r1
|
|
; V7M-NEXT: it pl
|
|
; V7M-NEXT: movpl r0, #0
|
|
; V7M-NEXT: lsl.w r12, r3, lr
|
|
; V7M-NEXT: lsr.w r0, r0, r1
|
|
; V7M-NEXT: lsr.w r1, r3, r1
|
|
; V7M-NEXT: orr.w r0, r0, r12
|
|
; V7M-NEXT: it pl
|
|
; V7M-NEXT: lsrpl.w r0, r3, r2
|
|
; V7M-NEXT: it pl
|
|
; V7M-NEXT: movpl r1, #0
|
|
; V7M-NEXT: pop {r7, pc}
|
|
;
|
|
; V7A-LABEL: bzhi64_d2_load:
|
|
; V7A: @ %bb.0:
|
|
; V7A-NEXT: .save {r5, lr}
|
|
; V7A-NEXT: push {r5, lr}
|
|
; V7A-NEXT: rsb r3, r2, #64
|
|
; V7A-NEXT: ldm r0, {r0, r5}
|
|
; V7A-NEXT: rsb r12, r3, #32
|
|
; V7A-NEXT: rsbs r2, r2, #32
|
|
; V7A-NEXT: lsr r1, r0, r12
|
|
; V7A-NEXT: orr r1, r1, r5, lsl r3
|
|
; V7A-NEXT: lslpl r1, r0, r2
|
|
; V7A-NEXT: lsl r0, r0, r3
|
|
; V7A-NEXT: movwpl r0, #0
|
|
; V7A-NEXT: lsr r0, r0, r3
|
|
; V7A-NEXT: orr r0, r0, r1, lsl r12
|
|
; V7A-NEXT: lsrpl r0, r1, r2
|
|
; V7A-NEXT: lsr r1, r1, r3
|
|
; V7A-NEXT: movwpl r1, #0
|
|
; V7A-NEXT: pop {r5, pc}
|
|
;
|
|
; V7A-T-LABEL: bzhi64_d2_load:
|
|
; V7A-T: @ %bb.0:
|
|
; V7A-T-NEXT: .save {r7, lr}
|
|
; V7A-T-NEXT: push {r7, lr}
|
|
; V7A-T-NEXT: rsb.w r3, r2, #64
|
|
; V7A-T-NEXT: ldrd r0, r1, [r0]
|
|
; V7A-T-NEXT: rsb.w lr, r3, #32
|
|
; V7A-T-NEXT: rsbs.w r2, r2, #32
|
|
; V7A-T-NEXT: lsl.w r12, r1, r3
|
|
; V7A-T-NEXT: lsr.w r1, r0, lr
|
|
; V7A-T-NEXT: orr.w r1, r1, r12
|
|
; V7A-T-NEXT: it pl
|
|
; V7A-T-NEXT: lslpl.w r1, r0, r2
|
|
; V7A-T-NEXT: lsl.w r0, r0, r3
|
|
; V7A-T-NEXT: it pl
|
|
; V7A-T-NEXT: movpl r0, #0
|
|
; V7A-T-NEXT: lsl.w r12, r1, lr
|
|
; V7A-T-NEXT: lsr.w r0, r0, r3
|
|
; V7A-T-NEXT: orr.w r0, r0, r12
|
|
; V7A-T-NEXT: it pl
|
|
; V7A-T-NEXT: lsrpl.w r0, r1, r2
|
|
; V7A-T-NEXT: lsr.w r1, r1, r3
|
|
; V7A-T-NEXT: it pl
|
|
; V7A-T-NEXT: movpl r1, #0
|
|
; V7A-T-NEXT: pop {r7, pc}
|
|
;
|
|
; V6M-LABEL: bzhi64_d2_load:
|
|
; V6M: @ %bb.0:
|
|
; V6M-NEXT: .save {r4, lr}
|
|
; V6M-NEXT: push {r4, lr}
|
|
; V6M-NEXT: movs r1, #64
|
|
; V6M-NEXT: subs r4, r1, r2
|
|
; V6M-NEXT: ldr r2, [r0]
|
|
; V6M-NEXT: ldr r1, [r0, #4]
|
|
; V6M-NEXT: mov r0, r2
|
|
; V6M-NEXT: mov r2, r4
|
|
; V6M-NEXT: bl __aeabi_llsl
|
|
; V6M-NEXT: mov r2, r4
|
|
; V6M-NEXT: bl __aeabi_llsr
|
|
; V6M-NEXT: pop {r4, pc}
|
|
%val = load i64, ptr %w
|
|
%numhighbits = sub i64 64, %numlowbits
|
|
%highbitscleared = shl i64 %val, %numhighbits
|
|
%masked = lshr i64 %highbitscleared, %numhighbits
|
|
ret i64 %masked
|
|
}
|
|
|
|
define i64 @bzhi64_d3_load_indexzext(ptr %w, i8 %numlowbits) nounwind {
|
|
; V7M-LABEL: bzhi64_d3_load_indexzext:
|
|
; V7M: @ %bb.0:
|
|
; V7M-NEXT: rsb.w r1, r1, #64
|
|
; V7M-NEXT: ldrd r0, r2, [r0]
|
|
; V7M-NEXT: uxtb r1, r1
|
|
; V7M-NEXT: rsb.w r3, r1, #32
|
|
; V7M-NEXT: lsl.w r12, r2, r1
|
|
; V7M-NEXT: lsr.w r2, r0, r3
|
|
; V7M-NEXT: orr.w r2, r2, r12
|
|
; V7M-NEXT: subs.w r12, r1, #32
|
|
; V7M-NEXT: it pl
|
|
; V7M-NEXT: lslpl.w r2, r0, r12
|
|
; V7M-NEXT: lsl.w r0, r0, r1
|
|
; V7M-NEXT: it pl
|
|
; V7M-NEXT: movpl r0, #0
|
|
; V7M-NEXT: lsl.w r3, r2, r3
|
|
; V7M-NEXT: lsr.w r0, r0, r1
|
|
; V7M-NEXT: lsr.w r1, r2, r1
|
|
; V7M-NEXT: orr.w r0, r0, r3
|
|
; V7M-NEXT: it pl
|
|
; V7M-NEXT: lsrpl.w r0, r2, r12
|
|
; V7M-NEXT: it pl
|
|
; V7M-NEXT: movpl r1, #0
|
|
; V7M-NEXT: bx lr
|
|
;
|
|
; V7A-LABEL: bzhi64_d3_load_indexzext:
|
|
; V7A: @ %bb.0:
|
|
; V7A-NEXT: .save {r5, lr}
|
|
; V7A-NEXT: push {r5, lr}
|
|
; V7A-NEXT: rsb r1, r1, #64
|
|
; V7A-NEXT: ldm r0, {r0, r5}
|
|
; V7A-NEXT: uxtb r2, r1
|
|
; V7A-NEXT: rsb r12, r2, #32
|
|
; V7A-NEXT: lsr r3, r0, r12
|
|
; V7A-NEXT: orr r3, r3, r5, lsl r2
|
|
; V7A-NEXT: mvn r5, #31
|
|
; V7A-NEXT: uxtab r1, r5, r1
|
|
; V7A-NEXT: cmp r1, #0
|
|
; V7A-NEXT: lslpl r3, r0, r1
|
|
; V7A-NEXT: lsl r0, r0, r2
|
|
; V7A-NEXT: movwpl r0, #0
|
|
; V7A-NEXT: lsr r0, r0, r2
|
|
; V7A-NEXT: orr r0, r0, r3, lsl r12
|
|
; V7A-NEXT: lsrpl r0, r3, r1
|
|
; V7A-NEXT: lsr r1, r3, r2
|
|
; V7A-NEXT: movwpl r1, #0
|
|
; V7A-NEXT: pop {r5, pc}
|
|
;
|
|
; V7A-T-LABEL: bzhi64_d3_load_indexzext:
|
|
; V7A-T: @ %bb.0:
|
|
; V7A-T-NEXT: .save {r4, lr}
|
|
; V7A-T-NEXT: push {r4, lr}
|
|
; V7A-T-NEXT: rsb.w r4, r1, #64
|
|
; V7A-T-NEXT: ldrd r0, r2, [r0]
|
|
; V7A-T-NEXT: mvn r1, #31
|
|
; V7A-T-NEXT: uxtb r3, r4
|
|
; V7A-T-NEXT: rsb.w lr, r3, #32
|
|
; V7A-T-NEXT: lsl.w r12, r2, r3
|
|
; V7A-T-NEXT: uxtab r1, r1, r4
|
|
; V7A-T-NEXT: lsr.w r2, r0, lr
|
|
; V7A-T-NEXT: cmp r1, #0
|
|
; V7A-T-NEXT: orr.w r2, r2, r12
|
|
; V7A-T-NEXT: it pl
|
|
; V7A-T-NEXT: lslpl.w r2, r0, r1
|
|
; V7A-T-NEXT: lsl.w r0, r0, r3
|
|
; V7A-T-NEXT: it pl
|
|
; V7A-T-NEXT: movpl r0, #0
|
|
; V7A-T-NEXT: lsl.w r4, r2, lr
|
|
; V7A-T-NEXT: lsr.w r0, r0, r3
|
|
; V7A-T-NEXT: orr.w r0, r0, r4
|
|
; V7A-T-NEXT: it pl
|
|
; V7A-T-NEXT: lsrpl.w r0, r2, r1
|
|
; V7A-T-NEXT: lsr.w r1, r2, r3
|
|
; V7A-T-NEXT: it pl
|
|
; V7A-T-NEXT: movpl r1, #0
|
|
; V7A-T-NEXT: pop {r4, pc}
|
|
;
|
|
; V6M-LABEL: bzhi64_d3_load_indexzext:
|
|
; V6M: @ %bb.0:
|
|
; V6M-NEXT: .save {r4, lr}
|
|
; V6M-NEXT: push {r4, lr}
|
|
; V6M-NEXT: movs r2, #64
|
|
; V6M-NEXT: subs r1, r2, r1
|
|
; V6M-NEXT: uxtb r4, r1
|
|
; V6M-NEXT: ldr r2, [r0]
|
|
; V6M-NEXT: ldr r1, [r0, #4]
|
|
; V6M-NEXT: mov r0, r2
|
|
; V6M-NEXT: mov r2, r4
|
|
; V6M-NEXT: bl __aeabi_llsl
|
|
; V6M-NEXT: mov r2, r4
|
|
; V6M-NEXT: bl __aeabi_llsr
|
|
; V6M-NEXT: pop {r4, pc}
|
|
%val = load i64, ptr %w
|
|
%numhighbits = sub i8 64, %numlowbits
|
|
%sh_prom = zext i8 %numhighbits to i64
|
|
%highbitscleared = shl i64 %val, %sh_prom
|
|
%masked = lshr i64 %highbitscleared, %sh_prom
|
|
ret i64 %masked
|
|
}
|
|
|
|
; ---------------------------------------------------------------------------- ;
|
|
; Constant mask
|
|
; ---------------------------------------------------------------------------- ;
|
|
|
|
; 32-bit
|
|
|
|
define i32 @bzhi32_constant_mask32(i32 %val) nounwind {
|
|
; V7M-LABEL: bzhi32_constant_mask32:
|
|
; V7M: @ %bb.0:
|
|
; V7M-NEXT: bic r0, r0, #-2147483648
|
|
; V7M-NEXT: bx lr
|
|
;
|
|
; V7A-LABEL: bzhi32_constant_mask32:
|
|
; V7A: @ %bb.0:
|
|
; V7A-NEXT: bic r0, r0, #-2147483648
|
|
; V7A-NEXT: bx lr
|
|
;
|
|
; V7A-T-LABEL: bzhi32_constant_mask32:
|
|
; V7A-T: @ %bb.0:
|
|
; V7A-T-NEXT: bic r0, r0, #-2147483648
|
|
; V7A-T-NEXT: bx lr
|
|
;
|
|
; V6M-LABEL: bzhi32_constant_mask32:
|
|
; V6M: @ %bb.0:
|
|
; V6M-NEXT: movs r1, #1
|
|
; V6M-NEXT: lsls r1, r1, #31
|
|
; V6M-NEXT: bics r0, r1
|
|
; V6M-NEXT: bx lr
|
|
%masked = and i32 %val, 2147483647
|
|
ret i32 %masked
|
|
}
|
|
|
|
define i32 @bzhi32_constant_mask32_load(ptr %val) nounwind {
|
|
; V7M-LABEL: bzhi32_constant_mask32_load:
|
|
; V7M: @ %bb.0:
|
|
; V7M-NEXT: ldr r0, [r0]
|
|
; V7M-NEXT: bic r0, r0, #-2147483648
|
|
; V7M-NEXT: bx lr
|
|
;
|
|
; V7A-LABEL: bzhi32_constant_mask32_load:
|
|
; V7A: @ %bb.0:
|
|
; V7A-NEXT: ldr r0, [r0]
|
|
; V7A-NEXT: bic r0, r0, #-2147483648
|
|
; V7A-NEXT: bx lr
|
|
;
|
|
; V7A-T-LABEL: bzhi32_constant_mask32_load:
|
|
; V7A-T: @ %bb.0:
|
|
; V7A-T-NEXT: ldr r0, [r0]
|
|
; V7A-T-NEXT: bic r0, r0, #-2147483648
|
|
; V7A-T-NEXT: bx lr
|
|
;
|
|
; V6M-LABEL: bzhi32_constant_mask32_load:
|
|
; V6M: @ %bb.0:
|
|
; V6M-NEXT: movs r1, #1
|
|
; V6M-NEXT: lsls r1, r1, #31
|
|
; V6M-NEXT: ldr r0, [r0]
|
|
; V6M-NEXT: bics r0, r1
|
|
; V6M-NEXT: bx lr
|
|
%val1 = load i32, ptr %val
|
|
%masked = and i32 %val1, 2147483647
|
|
ret i32 %masked
|
|
}
|
|
|
|
define i32 @bzhi32_constant_mask16(i32 %val) nounwind {
|
|
; V7M-LABEL: bzhi32_constant_mask16:
|
|
; V7M: @ %bb.0:
|
|
; V7M-NEXT: bfc r0, #15, #17
|
|
; V7M-NEXT: bx lr
|
|
;
|
|
; V7A-LABEL: bzhi32_constant_mask16:
|
|
; V7A: @ %bb.0:
|
|
; V7A-NEXT: bfc r0, #15, #17
|
|
; V7A-NEXT: bx lr
|
|
;
|
|
; V7A-T-LABEL: bzhi32_constant_mask16:
|
|
; V7A-T: @ %bb.0:
|
|
; V7A-T-NEXT: bfc r0, #15, #17
|
|
; V7A-T-NEXT: bx lr
|
|
;
|
|
; V6M-LABEL: bzhi32_constant_mask16:
|
|
; V6M: @ %bb.0:
|
|
; V6M-NEXT: ldr r1, .LCPI41_0
|
|
; V6M-NEXT: ands r0, r1
|
|
; V6M-NEXT: bx lr
|
|
; V6M-NEXT: .p2align 2
|
|
; V6M-NEXT: @ %bb.1:
|
|
; V6M-NEXT: .LCPI41_0:
|
|
; V6M-NEXT: .long 32767 @ 0x7fff
|
|
%masked = and i32 %val, 32767
|
|
ret i32 %masked
|
|
}
|
|
|
|
define i32 @bzhi32_constant_mask16_load(ptr %val) nounwind {
|
|
; V7M-LABEL: bzhi32_constant_mask16_load:
|
|
; V7M: @ %bb.0:
|
|
; V7M-NEXT: ldr r0, [r0]
|
|
; V7M-NEXT: bfc r0, #15, #17
|
|
; V7M-NEXT: bx lr
|
|
;
|
|
; V7A-LABEL: bzhi32_constant_mask16_load:
|
|
; V7A: @ %bb.0:
|
|
; V7A-NEXT: ldr r0, [r0]
|
|
; V7A-NEXT: bfc r0, #15, #17
|
|
; V7A-NEXT: bx lr
|
|
;
|
|
; V7A-T-LABEL: bzhi32_constant_mask16_load:
|
|
; V7A-T: @ %bb.0:
|
|
; V7A-T-NEXT: ldr r0, [r0]
|
|
; V7A-T-NEXT: bfc r0, #15, #17
|
|
; V7A-T-NEXT: bx lr
|
|
;
|
|
; V6M-LABEL: bzhi32_constant_mask16_load:
|
|
; V6M: @ %bb.0:
|
|
; V6M-NEXT: ldr r1, [r0]
|
|
; V6M-NEXT: ldr r0, .LCPI42_0
|
|
; V6M-NEXT: ands r0, r1
|
|
; V6M-NEXT: bx lr
|
|
; V6M-NEXT: .p2align 2
|
|
; V6M-NEXT: @ %bb.1:
|
|
; V6M-NEXT: .LCPI42_0:
|
|
; V6M-NEXT: .long 32767 @ 0x7fff
|
|
%val1 = load i32, ptr %val
|
|
%masked = and i32 %val1, 32767
|
|
ret i32 %masked
|
|
}
|
|
|
|
define i32 @bzhi32_constant_mask8(i32 %val) nounwind {
|
|
; V7M-LABEL: bzhi32_constant_mask8:
|
|
; V7M: @ %bb.0:
|
|
; V7M-NEXT: and r0, r0, #127
|
|
; V7M-NEXT: bx lr
|
|
;
|
|
; V7A-LABEL: bzhi32_constant_mask8:
|
|
; V7A: @ %bb.0:
|
|
; V7A-NEXT: and r0, r0, #127
|
|
; V7A-NEXT: bx lr
|
|
;
|
|
; V7A-T-LABEL: bzhi32_constant_mask8:
|
|
; V7A-T: @ %bb.0:
|
|
; V7A-T-NEXT: and r0, r0, #127
|
|
; V7A-T-NEXT: bx lr
|
|
;
|
|
; V6M-LABEL: bzhi32_constant_mask8:
|
|
; V6M: @ %bb.0:
|
|
; V6M-NEXT: movs r1, #127
|
|
; V6M-NEXT: ands r0, r1
|
|
; V6M-NEXT: bx lr
|
|
%masked = and i32 %val, 127
|
|
ret i32 %masked
|
|
}
|
|
|
|
define i32 @bzhi32_constant_mask8_load(ptr %val) nounwind {
|
|
; V7M-LABEL: bzhi32_constant_mask8_load:
|
|
; V7M: @ %bb.0:
|
|
; V7M-NEXT: ldr r0, [r0]
|
|
; V7M-NEXT: and r0, r0, #127
|
|
; V7M-NEXT: bx lr
|
|
;
|
|
; V7A-LABEL: bzhi32_constant_mask8_load:
|
|
; V7A: @ %bb.0:
|
|
; V7A-NEXT: ldr r0, [r0]
|
|
; V7A-NEXT: and r0, r0, #127
|
|
; V7A-NEXT: bx lr
|
|
;
|
|
; V7A-T-LABEL: bzhi32_constant_mask8_load:
|
|
; V7A-T: @ %bb.0:
|
|
; V7A-T-NEXT: ldr r0, [r0]
|
|
; V7A-T-NEXT: and r0, r0, #127
|
|
; V7A-T-NEXT: bx lr
|
|
;
|
|
; V6M-LABEL: bzhi32_constant_mask8_load:
|
|
; V6M: @ %bb.0:
|
|
; V6M-NEXT: ldr r1, [r0]
|
|
; V6M-NEXT: movs r0, #127
|
|
; V6M-NEXT: ands r0, r1
|
|
; V6M-NEXT: bx lr
|
|
%val1 = load i32, ptr %val
|
|
%masked = and i32 %val1, 127
|
|
ret i32 %masked
|
|
}
|
|
|
|
; 64-bit
|
|
|
|
define i64 @bzhi64_constant_mask64(i64 %val) nounwind {
|
|
; V7M-LABEL: bzhi64_constant_mask64:
|
|
; V7M: @ %bb.0:
|
|
; V7M-NEXT: bic r1, r1, #-1073741824
|
|
; V7M-NEXT: bx lr
|
|
;
|
|
; V7A-LABEL: bzhi64_constant_mask64:
|
|
; V7A: @ %bb.0:
|
|
; V7A-NEXT: bic r1, r1, #-1073741824
|
|
; V7A-NEXT: bx lr
|
|
;
|
|
; V7A-T-LABEL: bzhi64_constant_mask64:
|
|
; V7A-T: @ %bb.0:
|
|
; V7A-T-NEXT: bic r1, r1, #-1073741824
|
|
; V7A-T-NEXT: bx lr
|
|
;
|
|
; V6M-LABEL: bzhi64_constant_mask64:
|
|
; V6M: @ %bb.0:
|
|
; V6M-NEXT: movs r2, #3
|
|
; V6M-NEXT: lsls r2, r2, #30
|
|
; V6M-NEXT: bics r1, r2
|
|
; V6M-NEXT: bx lr
|
|
%masked = and i64 %val, 4611686018427387903
|
|
ret i64 %masked
|
|
}
|
|
|
|
define i64 @bzhi64_constant_mask64_load(ptr %val) nounwind {
|
|
; V7M-LABEL: bzhi64_constant_mask64_load:
|
|
; V7M: @ %bb.0:
|
|
; V7M-NEXT: ldrd r0, r1, [r0]
|
|
; V7M-NEXT: bic r1, r1, #-1073741824
|
|
; V7M-NEXT: bx lr
|
|
;
|
|
; V7A-LABEL: bzhi64_constant_mask64_load:
|
|
; V7A: @ %bb.0:
|
|
; V7A-NEXT: ldrd r0, r1, [r0]
|
|
; V7A-NEXT: bic r1, r1, #-1073741824
|
|
; V7A-NEXT: bx lr
|
|
;
|
|
; V7A-T-LABEL: bzhi64_constant_mask64_load:
|
|
; V7A-T: @ %bb.0:
|
|
; V7A-T-NEXT: ldrd r0, r1, [r0]
|
|
; V7A-T-NEXT: bic r1, r1, #-1073741824
|
|
; V7A-T-NEXT: bx lr
|
|
;
|
|
; V6M-LABEL: bzhi64_constant_mask64_load:
|
|
; V6M: @ %bb.0:
|
|
; V6M-NEXT: movs r1, #3
|
|
; V6M-NEXT: lsls r3, r1, #30
|
|
; V6M-NEXT: ldr r2, [r0]
|
|
; V6M-NEXT: ldr r1, [r0, #4]
|
|
; V6M-NEXT: bics r1, r3
|
|
; V6M-NEXT: mov r0, r2
|
|
; V6M-NEXT: bx lr
|
|
%val1 = load i64, ptr %val
|
|
%masked = and i64 %val1, 4611686018427387903
|
|
ret i64 %masked
|
|
}
|
|
|
|
define i64 @bzhi64_constant_mask32(i64 %val) nounwind {
|
|
; V7M-LABEL: bzhi64_constant_mask32:
|
|
; V7M: @ %bb.0:
|
|
; V7M-NEXT: bic r0, r0, #-2147483648
|
|
; V7M-NEXT: movs r1, #0
|
|
; V7M-NEXT: bx lr
|
|
;
|
|
; V7A-LABEL: bzhi64_constant_mask32:
|
|
; V7A: @ %bb.0:
|
|
; V7A-NEXT: bic r0, r0, #-2147483648
|
|
; V7A-NEXT: mov r1, #0
|
|
; V7A-NEXT: bx lr
|
|
;
|
|
; V7A-T-LABEL: bzhi64_constant_mask32:
|
|
; V7A-T: @ %bb.0:
|
|
; V7A-T-NEXT: bic r0, r0, #-2147483648
|
|
; V7A-T-NEXT: movs r1, #0
|
|
; V7A-T-NEXT: bx lr
|
|
;
|
|
; V6M-LABEL: bzhi64_constant_mask32:
|
|
; V6M: @ %bb.0:
|
|
; V6M-NEXT: movs r1, #1
|
|
; V6M-NEXT: lsls r1, r1, #31
|
|
; V6M-NEXT: bics r0, r1
|
|
; V6M-NEXT: movs r1, #0
|
|
; V6M-NEXT: bx lr
|
|
%masked = and i64 %val, 2147483647
|
|
ret i64 %masked
|
|
}
|
|
|
|
define i64 @bzhi64_constant_mask32_load(ptr %val) nounwind {
|
|
; V7M-LABEL: bzhi64_constant_mask32_load:
|
|
; V7M: @ %bb.0:
|
|
; V7M-NEXT: ldr r0, [r0]
|
|
; V7M-NEXT: movs r1, #0
|
|
; V7M-NEXT: bic r0, r0, #-2147483648
|
|
; V7M-NEXT: bx lr
|
|
;
|
|
; V7A-LABEL: bzhi64_constant_mask32_load:
|
|
; V7A: @ %bb.0:
|
|
; V7A-NEXT: ldr r0, [r0]
|
|
; V7A-NEXT: mov r1, #0
|
|
; V7A-NEXT: bic r0, r0, #-2147483648
|
|
; V7A-NEXT: bx lr
|
|
;
|
|
; V7A-T-LABEL: bzhi64_constant_mask32_load:
|
|
; V7A-T: @ %bb.0:
|
|
; V7A-T-NEXT: ldr r0, [r0]
|
|
; V7A-T-NEXT: movs r1, #0
|
|
; V7A-T-NEXT: bic r0, r0, #-2147483648
|
|
; V7A-T-NEXT: bx lr
|
|
;
|
|
; V6M-LABEL: bzhi64_constant_mask32_load:
|
|
; V6M: @ %bb.0:
|
|
; V6M-NEXT: movs r1, #1
|
|
; V6M-NEXT: lsls r1, r1, #31
|
|
; V6M-NEXT: ldr r0, [r0]
|
|
; V6M-NEXT: bics r0, r1
|
|
; V6M-NEXT: movs r1, #0
|
|
; V6M-NEXT: bx lr
|
|
%val1 = load i64, ptr %val
|
|
%masked = and i64 %val1, 2147483647
|
|
ret i64 %masked
|
|
}
|
|
|
|
define i64 @bzhi64_constant_mask16(i64 %val) nounwind {
|
|
; V7M-LABEL: bzhi64_constant_mask16:
|
|
; V7M: @ %bb.0:
|
|
; V7M-NEXT: bfc r0, #15, #17
|
|
; V7M-NEXT: movs r1, #0
|
|
; V7M-NEXT: bx lr
|
|
;
|
|
; V7A-LABEL: bzhi64_constant_mask16:
|
|
; V7A: @ %bb.0:
|
|
; V7A-NEXT: bfc r0, #15, #17
|
|
; V7A-NEXT: mov r1, #0
|
|
; V7A-NEXT: bx lr
|
|
;
|
|
; V7A-T-LABEL: bzhi64_constant_mask16:
|
|
; V7A-T: @ %bb.0:
|
|
; V7A-T-NEXT: bfc r0, #15, #17
|
|
; V7A-T-NEXT: movs r1, #0
|
|
; V7A-T-NEXT: bx lr
|
|
;
|
|
; V6M-LABEL: bzhi64_constant_mask16:
|
|
; V6M: @ %bb.0:
|
|
; V6M-NEXT: ldr r1, .LCPI49_0
|
|
; V6M-NEXT: ands r0, r1
|
|
; V6M-NEXT: movs r1, #0
|
|
; V6M-NEXT: bx lr
|
|
; V6M-NEXT: .p2align 2
|
|
; V6M-NEXT: @ %bb.1:
|
|
; V6M-NEXT: .LCPI49_0:
|
|
; V6M-NEXT: .long 32767 @ 0x7fff
|
|
%masked = and i64 %val, 32767
|
|
ret i64 %masked
|
|
}
|
|
|
|
define i64 @bzhi64_constant_mask16_load(ptr %val) nounwind {
|
|
; V7M-LABEL: bzhi64_constant_mask16_load:
|
|
; V7M: @ %bb.0:
|
|
; V7M-NEXT: ldr r0, [r0]
|
|
; V7M-NEXT: movs r1, #0
|
|
; V7M-NEXT: bfc r0, #15, #17
|
|
; V7M-NEXT: bx lr
|
|
;
|
|
; V7A-LABEL: bzhi64_constant_mask16_load:
|
|
; V7A: @ %bb.0:
|
|
; V7A-NEXT: ldr r0, [r0]
|
|
; V7A-NEXT: mov r1, #0
|
|
; V7A-NEXT: bfc r0, #15, #17
|
|
; V7A-NEXT: bx lr
|
|
;
|
|
; V7A-T-LABEL: bzhi64_constant_mask16_load:
|
|
; V7A-T: @ %bb.0:
|
|
; V7A-T-NEXT: ldr r0, [r0]
|
|
; V7A-T-NEXT: movs r1, #0
|
|
; V7A-T-NEXT: bfc r0, #15, #17
|
|
; V7A-T-NEXT: bx lr
|
|
;
|
|
; V6M-LABEL: bzhi64_constant_mask16_load:
|
|
; V6M: @ %bb.0:
|
|
; V6M-NEXT: ldr r1, [r0]
|
|
; V6M-NEXT: ldr r0, .LCPI50_0
|
|
; V6M-NEXT: ands r0, r1
|
|
; V6M-NEXT: movs r1, #0
|
|
; V6M-NEXT: bx lr
|
|
; V6M-NEXT: .p2align 2
|
|
; V6M-NEXT: @ %bb.1:
|
|
; V6M-NEXT: .LCPI50_0:
|
|
; V6M-NEXT: .long 32767 @ 0x7fff
|
|
%val1 = load i64, ptr %val
|
|
%masked = and i64 %val1, 32767
|
|
ret i64 %masked
|
|
}
|
|
|
|
define i64 @bzhi64_constant_mask8(i64 %val) nounwind {
|
|
; V7M-LABEL: bzhi64_constant_mask8:
|
|
; V7M: @ %bb.0:
|
|
; V7M-NEXT: and r0, r0, #127
|
|
; V7M-NEXT: movs r1, #0
|
|
; V7M-NEXT: bx lr
|
|
;
|
|
; V7A-LABEL: bzhi64_constant_mask8:
|
|
; V7A: @ %bb.0:
|
|
; V7A-NEXT: and r0, r0, #127
|
|
; V7A-NEXT: mov r1, #0
|
|
; V7A-NEXT: bx lr
|
|
;
|
|
; V7A-T-LABEL: bzhi64_constant_mask8:
|
|
; V7A-T: @ %bb.0:
|
|
; V7A-T-NEXT: and r0, r0, #127
|
|
; V7A-T-NEXT: movs r1, #0
|
|
; V7A-T-NEXT: bx lr
|
|
;
|
|
; V6M-LABEL: bzhi64_constant_mask8:
|
|
; V6M: @ %bb.0:
|
|
; V6M-NEXT: movs r1, #127
|
|
; V6M-NEXT: ands r0, r1
|
|
; V6M-NEXT: movs r1, #0
|
|
; V6M-NEXT: bx lr
|
|
%masked = and i64 %val, 127
|
|
ret i64 %masked
|
|
}
|
|
|
|
define i64 @bzhi64_constant_mask8_load(ptr %val) nounwind {
|
|
; V7M-LABEL: bzhi64_constant_mask8_load:
|
|
; V7M: @ %bb.0:
|
|
; V7M-NEXT: ldr r0, [r0]
|
|
; V7M-NEXT: movs r1, #0
|
|
; V7M-NEXT: and r0, r0, #127
|
|
; V7M-NEXT: bx lr
|
|
;
|
|
; V7A-LABEL: bzhi64_constant_mask8_load:
|
|
; V7A: @ %bb.0:
|
|
; V7A-NEXT: ldr r0, [r0]
|
|
; V7A-NEXT: mov r1, #0
|
|
; V7A-NEXT: and r0, r0, #127
|
|
; V7A-NEXT: bx lr
|
|
;
|
|
; V7A-T-LABEL: bzhi64_constant_mask8_load:
|
|
; V7A-T: @ %bb.0:
|
|
; V7A-T-NEXT: ldr r0, [r0]
|
|
; V7A-T-NEXT: movs r1, #0
|
|
; V7A-T-NEXT: and r0, r0, #127
|
|
; V7A-T-NEXT: bx lr
|
|
;
|
|
; V6M-LABEL: bzhi64_constant_mask8_load:
|
|
; V6M: @ %bb.0:
|
|
; V6M-NEXT: ldr r1, [r0]
|
|
; V6M-NEXT: movs r0, #127
|
|
; V6M-NEXT: ands r0, r1
|
|
; V6M-NEXT: movs r1, #0
|
|
; V6M-NEXT: bx lr
|
|
%val1 = load i64, ptr %val
|
|
%masked = and i64 %val1, 127
|
|
ret i64 %masked
|
|
}
|