llvm-project/llvm/test/CodeGen/ARM/extract-lowbits.ll
Luke Lau 795a115d19
[RegAlloc] Remove default restriction on non-trivial rematerialization (#159211)
In the register allocator we define non-trivial rematerialization as the
rematerlization of an instruction with virtual register uses.

We have been able to perform non-trivial rematerialization for a while,
but it has been prevented by default unless specifically overriden by
the target in `TargetTransformInfo::isReMaterializableImpl`. The
original reasoning for this given by the comment in the default
implementation is because we might increase a live range of the virtual
register, but we don't actually do this.
LiveRangeEdit::allUsesAvailableAt makes sure that we only rematerialize
instructions whose virtual registers are already live at the use sites.

https://reviews.llvm.org/D106408 had originally tried to remove this
restriction but it was reverted after some performance regressions were
reported. We think it is likely that the regressions were caused by the
fact that the old isTriviallyReMaterializable API sometimes returned
true for non-trivial rematerializations.

However https://github.com/llvm/llvm-project/pull/160377 recently split
the API out into a separate non-trivial and trivial version and updated
the call-sites accordingly, and
https://github.com/llvm/llvm-project/pull/160709 and #159180 fixed
heuristics which weren't accounting for the difference between
non-trivial and trivial.

With these fixes in place, this patch proposes to again allow
non-trivial rematerialization by default which reduces a significant
amount of spills and reloads across various targets.

For llvm-test-suite built with -O3 -flto, we get the following geomean
reduction in reloads:

- arm64-apple-darwin: 11.6%
- riscv64-linux-gnu: 8.1%
- x86_64-linux-gnu: 6.5%
2025-10-04 22:50:44 +00:00

2753 lines
74 KiB
LLVM

; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc -mtriple=thumbv7m-eabi %s -o - | FileCheck %s --check-prefix V7M
; RUN: llc -mtriple=armv7a-eabi %s -o - | FileCheck %s --check-prefix V7A
; RUN: llc -mtriple=thumbv7a-eabi %s -o - | FileCheck %s --check-prefix V7A-T
; RUN: llc -mtriple=armv6m-eabi %s -o - | FileCheck %s --check-prefix V6M
; Patterns:
; a) x & (1 << nbits) - 1
; b) x & ~(-1 << nbits)
; c) x & (-1 >> (32 - y))
; d) x << (32 - y) >> (32 - y)
; are equivalent.
; ---------------------------------------------------------------------------- ;
; Pattern a. 32-bit
; ---------------------------------------------------------------------------- ;
define i32 @bzhi32_a0(i32 %val, i32 %numlowbits) nounwind {
; V7M-LABEL: bzhi32_a0:
; V7M: @ %bb.0:
; V7M-NEXT: movs r2, #1
; V7M-NEXT: lsl.w r1, r2, r1
; V7M-NEXT: subs r1, #1
; V7M-NEXT: ands r0, r1
; V7M-NEXT: bx lr
;
; V7A-LABEL: bzhi32_a0:
; V7A: @ %bb.0:
; V7A-NEXT: mov r2, #1
; V7A-NEXT: mvn r3, #0
; V7A-NEXT: add r1, r3, r2, lsl r1
; V7A-NEXT: and r0, r1, r0
; V7A-NEXT: bx lr
;
; V7A-T-LABEL: bzhi32_a0:
; V7A-T: @ %bb.0:
; V7A-T-NEXT: movs r2, #1
; V7A-T-NEXT: lsl.w r1, r2, r1
; V7A-T-NEXT: subs r1, #1
; V7A-T-NEXT: ands r0, r1
; V7A-T-NEXT: bx lr
;
; V6M-LABEL: bzhi32_a0:
; V6M: @ %bb.0:
; V6M-NEXT: movs r2, #1
; V6M-NEXT: lsls r2, r1
; V6M-NEXT: subs r1, r2, #1
; V6M-NEXT: ands r0, r1
; V6M-NEXT: bx lr
%onebit = shl i32 1, %numlowbits
%mask = add nsw i32 %onebit, -1
%masked = and i32 %mask, %val
ret i32 %masked
}
define i32 @bzhi32_a1_indexzext(i32 %val, i8 zeroext %numlowbits) nounwind {
; V7M-LABEL: bzhi32_a1_indexzext:
; V7M: @ %bb.0:
; V7M-NEXT: movs r2, #1
; V7M-NEXT: lsl.w r1, r2, r1
; V7M-NEXT: subs r1, #1
; V7M-NEXT: ands r0, r1
; V7M-NEXT: bx lr
;
; V7A-LABEL: bzhi32_a1_indexzext:
; V7A: @ %bb.0:
; V7A-NEXT: mov r2, #1
; V7A-NEXT: mvn r3, #0
; V7A-NEXT: add r1, r3, r2, lsl r1
; V7A-NEXT: and r0, r1, r0
; V7A-NEXT: bx lr
;
; V7A-T-LABEL: bzhi32_a1_indexzext:
; V7A-T: @ %bb.0:
; V7A-T-NEXT: movs r2, #1
; V7A-T-NEXT: lsl.w r1, r2, r1
; V7A-T-NEXT: subs r1, #1
; V7A-T-NEXT: ands r0, r1
; V7A-T-NEXT: bx lr
;
; V6M-LABEL: bzhi32_a1_indexzext:
; V6M: @ %bb.0:
; V6M-NEXT: movs r2, #1
; V6M-NEXT: lsls r2, r1
; V6M-NEXT: subs r1, r2, #1
; V6M-NEXT: ands r0, r1
; V6M-NEXT: bx lr
%conv = zext i8 %numlowbits to i32
%onebit = shl i32 1, %conv
%mask = add nsw i32 %onebit, -1
%masked = and i32 %mask, %val
ret i32 %masked
}
define i32 @bzhi32_a2_load(ptr %w, i32 %numlowbits) nounwind {
; V7M-LABEL: bzhi32_a2_load:
; V7M: @ %bb.0:
; V7M-NEXT: movs r2, #1
; V7M-NEXT: ldr r0, [r0]
; V7M-NEXT: lsl.w r1, r2, r1
; V7M-NEXT: subs r1, #1
; V7M-NEXT: ands r0, r1
; V7M-NEXT: bx lr
;
; V7A-LABEL: bzhi32_a2_load:
; V7A: @ %bb.0:
; V7A-NEXT: mov r2, #1
; V7A-NEXT: ldr r0, [r0]
; V7A-NEXT: mvn r3, #0
; V7A-NEXT: add r1, r3, r2, lsl r1
; V7A-NEXT: and r0, r1, r0
; V7A-NEXT: bx lr
;
; V7A-T-LABEL: bzhi32_a2_load:
; V7A-T: @ %bb.0:
; V7A-T-NEXT: movs r2, #1
; V7A-T-NEXT: ldr r0, [r0]
; V7A-T-NEXT: lsl.w r1, r2, r1
; V7A-T-NEXT: subs r1, #1
; V7A-T-NEXT: ands r0, r1
; V7A-T-NEXT: bx lr
;
; V6M-LABEL: bzhi32_a2_load:
; V6M: @ %bb.0:
; V6M-NEXT: movs r2, #1
; V6M-NEXT: lsls r2, r1
; V6M-NEXT: subs r1, r2, #1
; V6M-NEXT: ldr r0, [r0]
; V6M-NEXT: ands r0, r1
; V6M-NEXT: bx lr
%val = load i32, ptr %w
%onebit = shl i32 1, %numlowbits
%mask = add nsw i32 %onebit, -1
%masked = and i32 %mask, %val
ret i32 %masked
}
define i32 @bzhi32_a3_load_indexzext(ptr %w, i8 zeroext %numlowbits) nounwind {
; V7M-LABEL: bzhi32_a3_load_indexzext:
; V7M: @ %bb.0:
; V7M-NEXT: movs r2, #1
; V7M-NEXT: ldr r0, [r0]
; V7M-NEXT: lsl.w r1, r2, r1
; V7M-NEXT: subs r1, #1
; V7M-NEXT: ands r0, r1
; V7M-NEXT: bx lr
;
; V7A-LABEL: bzhi32_a3_load_indexzext:
; V7A: @ %bb.0:
; V7A-NEXT: mov r2, #1
; V7A-NEXT: ldr r0, [r0]
; V7A-NEXT: mvn r3, #0
; V7A-NEXT: add r1, r3, r2, lsl r1
; V7A-NEXT: and r0, r1, r0
; V7A-NEXT: bx lr
;
; V7A-T-LABEL: bzhi32_a3_load_indexzext:
; V7A-T: @ %bb.0:
; V7A-T-NEXT: movs r2, #1
; V7A-T-NEXT: ldr r0, [r0]
; V7A-T-NEXT: lsl.w r1, r2, r1
; V7A-T-NEXT: subs r1, #1
; V7A-T-NEXT: ands r0, r1
; V7A-T-NEXT: bx lr
;
; V6M-LABEL: bzhi32_a3_load_indexzext:
; V6M: @ %bb.0:
; V6M-NEXT: movs r2, #1
; V6M-NEXT: lsls r2, r1
; V6M-NEXT: subs r1, r2, #1
; V6M-NEXT: ldr r0, [r0]
; V6M-NEXT: ands r0, r1
; V6M-NEXT: bx lr
%val = load i32, ptr %w
%conv = zext i8 %numlowbits to i32
%onebit = shl i32 1, %conv
%mask = add nsw i32 %onebit, -1
%masked = and i32 %mask, %val
ret i32 %masked
}
define i32 @bzhi32_a4_commutative(i32 %val, i32 %numlowbits) nounwind {
; V7M-LABEL: bzhi32_a4_commutative:
; V7M: @ %bb.0:
; V7M-NEXT: movs r2, #1
; V7M-NEXT: lsl.w r1, r2, r1
; V7M-NEXT: subs r1, #1
; V7M-NEXT: ands r0, r1
; V7M-NEXT: bx lr
;
; V7A-LABEL: bzhi32_a4_commutative:
; V7A: @ %bb.0:
; V7A-NEXT: mov r2, #1
; V7A-NEXT: mvn r3, #0
; V7A-NEXT: add r1, r3, r2, lsl r1
; V7A-NEXT: and r0, r0, r1
; V7A-NEXT: bx lr
;
; V7A-T-LABEL: bzhi32_a4_commutative:
; V7A-T: @ %bb.0:
; V7A-T-NEXT: movs r2, #1
; V7A-T-NEXT: lsl.w r1, r2, r1
; V7A-T-NEXT: subs r1, #1
; V7A-T-NEXT: ands r0, r1
; V7A-T-NEXT: bx lr
;
; V6M-LABEL: bzhi32_a4_commutative:
; V6M: @ %bb.0:
; V6M-NEXT: movs r2, #1
; V6M-NEXT: lsls r2, r1
; V6M-NEXT: subs r1, r2, #1
; V6M-NEXT: ands r0, r1
; V6M-NEXT: bx lr
%onebit = shl i32 1, %numlowbits
%mask = add nsw i32 %onebit, -1
%masked = and i32 %val, %mask ; swapped order
ret i32 %masked
}
; 64-bit
define i64 @bzhi64_a0(i64 %val, i64 %numlowbits) nounwind {
; V7M-LABEL: bzhi64_a0:
; V7M: @ %bb.0:
; V7M-NEXT: .save {r7, lr}
; V7M-NEXT: push {r7, lr}
; V7M-NEXT: rsb.w r3, r2, #32
; V7M-NEXT: mov.w r12, #1
; V7M-NEXT: subs.w lr, r2, #32
; V7M-NEXT: lsl.w r2, r12, r2
; V7M-NEXT: lsr.w r3, r12, r3
; V7M-NEXT: it pl
; V7M-NEXT: lslpl.w r3, r12, lr
; V7M-NEXT: it pl
; V7M-NEXT: movpl r2, #0
; V7M-NEXT: subs r2, #1
; V7M-NEXT: sbc r3, r3, #0
; V7M-NEXT: ands r0, r2
; V7M-NEXT: ands r1, r3
; V7M-NEXT: pop {r7, pc}
;
; V7A-LABEL: bzhi64_a0:
; V7A: @ %bb.0:
; V7A-NEXT: .save {r11, lr}
; V7A-NEXT: push {r11, lr}
; V7A-NEXT: rsb r12, r2, #32
; V7A-NEXT: mov lr, #1
; V7A-NEXT: subs r3, r2, #32
; V7A-NEXT: lsl r2, lr, r2
; V7A-NEXT: lsr r12, lr, r12
; V7A-NEXT: movwpl r2, #0
; V7A-NEXT: lslpl r12, lr, r3
; V7A-NEXT: subs r2, r2, #1
; V7A-NEXT: sbc r3, r12, #0
; V7A-NEXT: and r0, r2, r0
; V7A-NEXT: and r1, r3, r1
; V7A-NEXT: pop {r11, pc}
;
; V7A-T-LABEL: bzhi64_a0:
; V7A-T: @ %bb.0:
; V7A-T-NEXT: .save {r7, lr}
; V7A-T-NEXT: push {r7, lr}
; V7A-T-NEXT: rsb.w r3, r2, #32
; V7A-T-NEXT: mov.w r12, #1
; V7A-T-NEXT: subs.w lr, r2, #32
; V7A-T-NEXT: lsl.w r2, r12, r2
; V7A-T-NEXT: lsr.w r3, r12, r3
; V7A-T-NEXT: it pl
; V7A-T-NEXT: lslpl.w r3, r12, lr
; V7A-T-NEXT: it pl
; V7A-T-NEXT: movpl r2, #0
; V7A-T-NEXT: subs r2, #1
; V7A-T-NEXT: sbc r3, r3, #0
; V7A-T-NEXT: ands r0, r2
; V7A-T-NEXT: ands r1, r3
; V7A-T-NEXT: pop {r7, pc}
;
; V6M-LABEL: bzhi64_a0:
; V6M: @ %bb.0:
; V6M-NEXT: .save {r4, r5, r6, lr}
; V6M-NEXT: push {r4, r5, r6, lr}
; V6M-NEXT: mov r5, r1
; V6M-NEXT: mov r4, r0
; V6M-NEXT: movs r0, #1
; V6M-NEXT: movs r6, #0
; V6M-NEXT: mov r1, r6
; V6M-NEXT: bl __aeabi_llsl
; V6M-NEXT: subs r0, r0, #1
; V6M-NEXT: sbcs r1, r6
; V6M-NEXT: ands r1, r5
; V6M-NEXT: ands r0, r4
; V6M-NEXT: pop {r4, r5, r6, pc}
%onebit = shl i64 1, %numlowbits
%mask = add nsw i64 %onebit, -1
%masked = and i64 %mask, %val
ret i64 %masked
}
; Check that we don't throw away the vreg_width-1 mask if not using shifts
define i64 @bzhi64_a0_masked(i64 %val, i64 %numlowbits) nounwind {
; V7M-LABEL: bzhi64_a0_masked:
; V7M: @ %bb.0:
; V7M-NEXT: .save {r7, lr}
; V7M-NEXT: push {r7, lr}
; V7M-NEXT: and r2, r2, #63
; V7M-NEXT: mov.w r12, #1
; V7M-NEXT: rsb.w r3, r2, #32
; V7M-NEXT: subs.w lr, r2, #32
; V7M-NEXT: lsl.w r2, r12, r2
; V7M-NEXT: lsr.w r3, r12, r3
; V7M-NEXT: it pl
; V7M-NEXT: lslpl.w r3, r12, lr
; V7M-NEXT: it pl
; V7M-NEXT: movpl r2, #0
; V7M-NEXT: subs r2, #1
; V7M-NEXT: sbc r3, r3, #0
; V7M-NEXT: ands r0, r2
; V7M-NEXT: ands r1, r3
; V7M-NEXT: pop {r7, pc}
;
; V7A-LABEL: bzhi64_a0_masked:
; V7A: @ %bb.0:
; V7A-NEXT: .save {r11, lr}
; V7A-NEXT: push {r11, lr}
; V7A-NEXT: and r2, r2, #63
; V7A-NEXT: mov lr, #1
; V7A-NEXT: rsb r12, r2, #32
; V7A-NEXT: subs r3, r2, #32
; V7A-NEXT: lsl r2, lr, r2
; V7A-NEXT: lsr r12, lr, r12
; V7A-NEXT: movwpl r2, #0
; V7A-NEXT: lslpl r12, lr, r3
; V7A-NEXT: subs r2, r2, #1
; V7A-NEXT: sbc r3, r12, #0
; V7A-NEXT: and r0, r2, r0
; V7A-NEXT: and r1, r3, r1
; V7A-NEXT: pop {r11, pc}
;
; V7A-T-LABEL: bzhi64_a0_masked:
; V7A-T: @ %bb.0:
; V7A-T-NEXT: .save {r7, lr}
; V7A-T-NEXT: push {r7, lr}
; V7A-T-NEXT: and r2, r2, #63
; V7A-T-NEXT: mov.w r12, #1
; V7A-T-NEXT: rsb.w r3, r2, #32
; V7A-T-NEXT: subs.w lr, r2, #32
; V7A-T-NEXT: lsl.w r2, r12, r2
; V7A-T-NEXT: lsr.w r3, r12, r3
; V7A-T-NEXT: it pl
; V7A-T-NEXT: lslpl.w r3, r12, lr
; V7A-T-NEXT: it pl
; V7A-T-NEXT: movpl r2, #0
; V7A-T-NEXT: subs r2, #1
; V7A-T-NEXT: sbc r3, r3, #0
; V7A-T-NEXT: ands r0, r2
; V7A-T-NEXT: ands r1, r3
; V7A-T-NEXT: pop {r7, pc}
;
; V6M-LABEL: bzhi64_a0_masked:
; V6M: @ %bb.0:
; V6M-NEXT: .save {r4, r5, r6, lr}
; V6M-NEXT: push {r4, r5, r6, lr}
; V6M-NEXT: mov r5, r1
; V6M-NEXT: mov r4, r0
; V6M-NEXT: movs r0, #63
; V6M-NEXT: ands r2, r0
; V6M-NEXT: movs r0, #1
; V6M-NEXT: movs r6, #0
; V6M-NEXT: mov r1, r6
; V6M-NEXT: bl __aeabi_llsl
; V6M-NEXT: subs r0, r0, #1
; V6M-NEXT: sbcs r1, r6
; V6M-NEXT: ands r1, r5
; V6M-NEXT: ands r0, r4
; V6M-NEXT: pop {r4, r5, r6, pc}
%numlowbits.masked = and i64 %numlowbits, 63
%onebit = shl i64 1, %numlowbits.masked
%mask = add nsw i64 %onebit, -1
%masked = and i64 %mask, %val
ret i64 %masked
}
define i64 @bzhi64_a1_indexzext(i64 %val, i8 zeroext %numlowbits) nounwind {
; V7M-LABEL: bzhi64_a1_indexzext:
; V7M: @ %bb.0:
; V7M-NEXT: .save {r7, lr}
; V7M-NEXT: push {r7, lr}
; V7M-NEXT: rsb.w r3, r2, #32
; V7M-NEXT: mov.w r12, #1
; V7M-NEXT: subs.w lr, r2, #32
; V7M-NEXT: lsl.w r2, r12, r2
; V7M-NEXT: lsr.w r3, r12, r3
; V7M-NEXT: it pl
; V7M-NEXT: lslpl.w r3, r12, lr
; V7M-NEXT: it pl
; V7M-NEXT: movpl r2, #0
; V7M-NEXT: subs r2, #1
; V7M-NEXT: sbc r3, r3, #0
; V7M-NEXT: ands r0, r2
; V7M-NEXT: ands r1, r3
; V7M-NEXT: pop {r7, pc}
;
; V7A-LABEL: bzhi64_a1_indexzext:
; V7A: @ %bb.0:
; V7A-NEXT: .save {r11, lr}
; V7A-NEXT: push {r11, lr}
; V7A-NEXT: rsb r12, r2, #32
; V7A-NEXT: mov lr, #1
; V7A-NEXT: subs r3, r2, #32
; V7A-NEXT: lsl r2, lr, r2
; V7A-NEXT: lsr r12, lr, r12
; V7A-NEXT: movwpl r2, #0
; V7A-NEXT: lslpl r12, lr, r3
; V7A-NEXT: subs r2, r2, #1
; V7A-NEXT: sbc r3, r12, #0
; V7A-NEXT: and r0, r2, r0
; V7A-NEXT: and r1, r3, r1
; V7A-NEXT: pop {r11, pc}
;
; V7A-T-LABEL: bzhi64_a1_indexzext:
; V7A-T: @ %bb.0:
; V7A-T-NEXT: .save {r7, lr}
; V7A-T-NEXT: push {r7, lr}
; V7A-T-NEXT: rsb.w r3, r2, #32
; V7A-T-NEXT: mov.w r12, #1
; V7A-T-NEXT: subs.w lr, r2, #32
; V7A-T-NEXT: lsl.w r2, r12, r2
; V7A-T-NEXT: lsr.w r3, r12, r3
; V7A-T-NEXT: it pl
; V7A-T-NEXT: lslpl.w r3, r12, lr
; V7A-T-NEXT: it pl
; V7A-T-NEXT: movpl r2, #0
; V7A-T-NEXT: subs r2, #1
; V7A-T-NEXT: sbc r3, r3, #0
; V7A-T-NEXT: ands r0, r2
; V7A-T-NEXT: ands r1, r3
; V7A-T-NEXT: pop {r7, pc}
;
; V6M-LABEL: bzhi64_a1_indexzext:
; V6M: @ %bb.0:
; V6M-NEXT: .save {r4, r5, r6, lr}
; V6M-NEXT: push {r4, r5, r6, lr}
; V6M-NEXT: mov r5, r1
; V6M-NEXT: mov r4, r0
; V6M-NEXT: movs r0, #1
; V6M-NEXT: movs r6, #0
; V6M-NEXT: mov r1, r6
; V6M-NEXT: bl __aeabi_llsl
; V6M-NEXT: subs r0, r0, #1
; V6M-NEXT: sbcs r1, r6
; V6M-NEXT: ands r1, r5
; V6M-NEXT: ands r0, r4
; V6M-NEXT: pop {r4, r5, r6, pc}
%conv = zext i8 %numlowbits to i64
%onebit = shl i64 1, %conv
%mask = add nsw i64 %onebit, -1
%masked = and i64 %mask, %val
ret i64 %masked
}
define i64 @bzhi64_a2_load(ptr %w, i64 %numlowbits) nounwind {
; V7M-LABEL: bzhi64_a2_load:
; V7M: @ %bb.0:
; V7M-NEXT: rsb.w r1, r2, #32
; V7M-NEXT: movs r3, #1
; V7M-NEXT: subs.w r12, r2, #32
; V7M-NEXT: lsl.w r2, r3, r2
; V7M-NEXT: lsr.w r1, r3, r1
; V7M-NEXT: it pl
; V7M-NEXT: lslpl.w r1, r3, r12
; V7M-NEXT: it pl
; V7M-NEXT: movpl r2, #0
; V7M-NEXT: subs r2, #1
; V7M-NEXT: ldrd r0, r3, [r0]
; V7M-NEXT: sbc r1, r1, #0
; V7M-NEXT: ands r1, r3
; V7M-NEXT: ands r0, r2
; V7M-NEXT: bx lr
;
; V7A-LABEL: bzhi64_a2_load:
; V7A: @ %bb.0:
; V7A-NEXT: .save {r4, r6, r11, lr}
; V7A-NEXT: push {r4, r6, r11, lr}
; V7A-NEXT: ldr r6, [r0]
; V7A-NEXT: mov r1, #1
; V7A-NEXT: ldr r3, [r0, #4]
; V7A-NEXT: rsb r0, r2, #32
; V7A-NEXT: subs r4, r2, #32
; V7A-NEXT: lsr r0, r1, r0
; V7A-NEXT: lslpl r0, r1, r4
; V7A-NEXT: lsl r1, r1, r2
; V7A-NEXT: movwpl r1, #0
; V7A-NEXT: subs r2, r1, #1
; V7A-NEXT: sbc r0, r0, #0
; V7A-NEXT: and r1, r0, r3
; V7A-NEXT: and r0, r2, r6
; V7A-NEXT: pop {r4, r6, r11, pc}
;
; V7A-T-LABEL: bzhi64_a2_load:
; V7A-T: @ %bb.0:
; V7A-T-NEXT: .save {r7, lr}
; V7A-T-NEXT: push {r7, lr}
; V7A-T-NEXT: rsb.w r3, r2, #32
; V7A-T-NEXT: movs r1, #1
; V7A-T-NEXT: ldrd r12, lr, [r0]
; V7A-T-NEXT: subs.w r0, r2, #32
; V7A-T-NEXT: lsr.w r3, r1, r3
; V7A-T-NEXT: it pl
; V7A-T-NEXT: lslpl.w r3, r1, r0
; V7A-T-NEXT: lsl.w r0, r1, r2
; V7A-T-NEXT: it pl
; V7A-T-NEXT: movpl r0, #0
; V7A-T-NEXT: subs r0, #1
; V7A-T-NEXT: sbc r1, r3, #0
; V7A-T-NEXT: and.w r0, r0, r12
; V7A-T-NEXT: and.w r1, r1, lr
; V7A-T-NEXT: pop {r7, pc}
;
; V6M-LABEL: bzhi64_a2_load:
; V6M: @ %bb.0:
; V6M-NEXT: .save {r4, r5, r7, lr}
; V6M-NEXT: push {r4, r5, r7, lr}
; V6M-NEXT: mov r4, r0
; V6M-NEXT: movs r0, #1
; V6M-NEXT: movs r5, #0
; V6M-NEXT: mov r1, r5
; V6M-NEXT: bl __aeabi_llsl
; V6M-NEXT: subs r2, r0, #1
; V6M-NEXT: sbcs r1, r5
; V6M-NEXT: ldm r4!, {r0, r3}
; V6M-NEXT: ands r1, r3
; V6M-NEXT: ands r0, r2
; V6M-NEXT: pop {r4, r5, r7, pc}
%val = load i64, ptr %w
%onebit = shl i64 1, %numlowbits
%mask = add nsw i64 %onebit, -1
%masked = and i64 %mask, %val
ret i64 %masked
}
define i64 @bzhi64_a3_load_indexzext(ptr %w, i8 zeroext %numlowbits) nounwind {
; V7M-LABEL: bzhi64_a3_load_indexzext:
; V7M: @ %bb.0:
; V7M-NEXT: rsb.w r2, r1, #32
; V7M-NEXT: movs r3, #1
; V7M-NEXT: subs.w r12, r1, #32
; V7M-NEXT: lsl.w r1, r3, r1
; V7M-NEXT: lsr.w r2, r3, r2
; V7M-NEXT: it pl
; V7M-NEXT: lslpl.w r2, r3, r12
; V7M-NEXT: it pl
; V7M-NEXT: movpl r1, #0
; V7M-NEXT: subs r3, r1, #1
; V7M-NEXT: sbc r1, r2, #0
; V7M-NEXT: ldrd r0, r2, [r0]
; V7M-NEXT: ands r1, r2
; V7M-NEXT: ands r0, r3
; V7M-NEXT: bx lr
;
; V7A-LABEL: bzhi64_a3_load_indexzext:
; V7A: @ %bb.0:
; V7A-NEXT: .save {r4, r6, r11, lr}
; V7A-NEXT: push {r4, r6, r11, lr}
; V7A-NEXT: ldr r6, [r0]
; V7A-NEXT: mov r2, #1
; V7A-NEXT: ldr r3, [r0, #4]
; V7A-NEXT: rsb r0, r1, #32
; V7A-NEXT: subs r4, r1, #32
; V7A-NEXT: lsl r1, r2, r1
; V7A-NEXT: lsr r0, r2, r0
; V7A-NEXT: movwpl r1, #0
; V7A-NEXT: lslpl r0, r2, r4
; V7A-NEXT: subs r2, r1, #1
; V7A-NEXT: sbc r0, r0, #0
; V7A-NEXT: and r1, r0, r3
; V7A-NEXT: and r0, r2, r6
; V7A-NEXT: pop {r4, r6, r11, pc}
;
; V7A-T-LABEL: bzhi64_a3_load_indexzext:
; V7A-T: @ %bb.0:
; V7A-T-NEXT: .save {r7, lr}
; V7A-T-NEXT: push {r7, lr}
; V7A-T-NEXT: rsb.w r3, r1, #32
; V7A-T-NEXT: movs r2, #1
; V7A-T-NEXT: ldrd r12, lr, [r0]
; V7A-T-NEXT: subs.w r0, r1, #32
; V7A-T-NEXT: lsr.w r3, r2, r3
; V7A-T-NEXT: it pl
; V7A-T-NEXT: lslpl.w r3, r2, r0
; V7A-T-NEXT: lsl.w r0, r2, r1
; V7A-T-NEXT: it pl
; V7A-T-NEXT: movpl r0, #0
; V7A-T-NEXT: subs r0, #1
; V7A-T-NEXT: sbc r1, r3, #0
; V7A-T-NEXT: and.w r0, r0, r12
; V7A-T-NEXT: and.w r1, r1, lr
; V7A-T-NEXT: pop {r7, pc}
;
; V6M-LABEL: bzhi64_a3_load_indexzext:
; V6M: @ %bb.0:
; V6M-NEXT: .save {r4, r5, r7, lr}
; V6M-NEXT: push {r4, r5, r7, lr}
; V6M-NEXT: mov r2, r1
; V6M-NEXT: mov r4, r0
; V6M-NEXT: movs r0, #1
; V6M-NEXT: movs r5, #0
; V6M-NEXT: mov r1, r5
; V6M-NEXT: bl __aeabi_llsl
; V6M-NEXT: subs r2, r0, #1
; V6M-NEXT: sbcs r1, r5
; V6M-NEXT: ldm r4!, {r0, r3}
; V6M-NEXT: ands r1, r3
; V6M-NEXT: ands r0, r2
; V6M-NEXT: pop {r4, r5, r7, pc}
%val = load i64, ptr %w
%conv = zext i8 %numlowbits to i64
%onebit = shl i64 1, %conv
%mask = add nsw i64 %onebit, -1
%masked = and i64 %mask, %val
ret i64 %masked
}
define i64 @bzhi64_a4_commutative(i64 %val, i64 %numlowbits) nounwind {
; V7M-LABEL: bzhi64_a4_commutative:
; V7M: @ %bb.0:
; V7M-NEXT: .save {r7, lr}
; V7M-NEXT: push {r7, lr}
; V7M-NEXT: rsb.w r3, r2, #32
; V7M-NEXT: mov.w r12, #1
; V7M-NEXT: subs.w lr, r2, #32
; V7M-NEXT: lsl.w r2, r12, r2
; V7M-NEXT: lsr.w r3, r12, r3
; V7M-NEXT: it pl
; V7M-NEXT: lslpl.w r3, r12, lr
; V7M-NEXT: it pl
; V7M-NEXT: movpl r2, #0
; V7M-NEXT: subs r2, #1
; V7M-NEXT: sbc r3, r3, #0
; V7M-NEXT: ands r0, r2
; V7M-NEXT: ands r1, r3
; V7M-NEXT: pop {r7, pc}
;
; V7A-LABEL: bzhi64_a4_commutative:
; V7A: @ %bb.0:
; V7A-NEXT: .save {r11, lr}
; V7A-NEXT: push {r11, lr}
; V7A-NEXT: rsb r12, r2, #32
; V7A-NEXT: mov lr, #1
; V7A-NEXT: subs r3, r2, #32
; V7A-NEXT: lsl r2, lr, r2
; V7A-NEXT: lsr r12, lr, r12
; V7A-NEXT: movwpl r2, #0
; V7A-NEXT: lslpl r12, lr, r3
; V7A-NEXT: subs r2, r2, #1
; V7A-NEXT: sbc r3, r12, #0
; V7A-NEXT: and r0, r0, r2
; V7A-NEXT: and r1, r1, r3
; V7A-NEXT: pop {r11, pc}
;
; V7A-T-LABEL: bzhi64_a4_commutative:
; V7A-T: @ %bb.0:
; V7A-T-NEXT: .save {r7, lr}
; V7A-T-NEXT: push {r7, lr}
; V7A-T-NEXT: rsb.w r3, r2, #32
; V7A-T-NEXT: mov.w r12, #1
; V7A-T-NEXT: subs.w lr, r2, #32
; V7A-T-NEXT: lsl.w r2, r12, r2
; V7A-T-NEXT: lsr.w r3, r12, r3
; V7A-T-NEXT: it pl
; V7A-T-NEXT: lslpl.w r3, r12, lr
; V7A-T-NEXT: it pl
; V7A-T-NEXT: movpl r2, #0
; V7A-T-NEXT: subs r2, #1
; V7A-T-NEXT: sbc r3, r3, #0
; V7A-T-NEXT: ands r0, r2
; V7A-T-NEXT: ands r1, r3
; V7A-T-NEXT: pop {r7, pc}
;
; V6M-LABEL: bzhi64_a4_commutative:
; V6M: @ %bb.0:
; V6M-NEXT: .save {r4, r5, r6, lr}
; V6M-NEXT: push {r4, r5, r6, lr}
; V6M-NEXT: mov r5, r1
; V6M-NEXT: mov r4, r0
; V6M-NEXT: movs r0, #1
; V6M-NEXT: movs r6, #0
; V6M-NEXT: mov r1, r6
; V6M-NEXT: bl __aeabi_llsl
; V6M-NEXT: subs r0, r0, #1
; V6M-NEXT: sbcs r1, r6
; V6M-NEXT: ands r1, r5
; V6M-NEXT: ands r0, r4
; V6M-NEXT: pop {r4, r5, r6, pc}
%onebit = shl i64 1, %numlowbits
%mask = add nsw i64 %onebit, -1
%masked = and i64 %val, %mask ; swapped order
ret i64 %masked
}
; ---------------------------------------------------------------------------- ;
; Pattern b. 32-bit
; ---------------------------------------------------------------------------- ;
define i32 @bzhi32_b0(i32 %val, i32 %numlowbits) nounwind {
; V7M-LABEL: bzhi32_b0:
; V7M: @ %bb.0:
; V7M-NEXT: mov.w r2, #-1
; V7M-NEXT: lsl.w r1, r2, r1
; V7M-NEXT: bics r0, r1
; V7M-NEXT: bx lr
;
; V7A-LABEL: bzhi32_b0:
; V7A: @ %bb.0:
; V7A-NEXT: mvn r2, #0
; V7A-NEXT: bic r0, r0, r2, lsl r1
; V7A-NEXT: bx lr
;
; V7A-T-LABEL: bzhi32_b0:
; V7A-T: @ %bb.0:
; V7A-T-NEXT: mov.w r2, #-1
; V7A-T-NEXT: lsl.w r1, r2, r1
; V7A-T-NEXT: bics r0, r1
; V7A-T-NEXT: bx lr
;
; V6M-LABEL: bzhi32_b0:
; V6M: @ %bb.0:
; V6M-NEXT: movs r2, #0
; V6M-NEXT: mvns r2, r2
; V6M-NEXT: lsls r2, r1
; V6M-NEXT: bics r0, r2
; V6M-NEXT: bx lr
%notmask = shl i32 -1, %numlowbits
%mask = xor i32 %notmask, -1
%masked = and i32 %mask, %val
ret i32 %masked
}
define i32 @bzhi32_b1_indexzext(i32 %val, i8 zeroext %numlowbits) nounwind {
; V7M-LABEL: bzhi32_b1_indexzext:
; V7M: @ %bb.0:
; V7M-NEXT: mov.w r2, #-1
; V7M-NEXT: lsl.w r1, r2, r1
; V7M-NEXT: bics r0, r1
; V7M-NEXT: bx lr
;
; V7A-LABEL: bzhi32_b1_indexzext:
; V7A: @ %bb.0:
; V7A-NEXT: mvn r2, #0
; V7A-NEXT: bic r0, r0, r2, lsl r1
; V7A-NEXT: bx lr
;
; V7A-T-LABEL: bzhi32_b1_indexzext:
; V7A-T: @ %bb.0:
; V7A-T-NEXT: mov.w r2, #-1
; V7A-T-NEXT: lsl.w r1, r2, r1
; V7A-T-NEXT: bics r0, r1
; V7A-T-NEXT: bx lr
;
; V6M-LABEL: bzhi32_b1_indexzext:
; V6M: @ %bb.0:
; V6M-NEXT: movs r2, #0
; V6M-NEXT: mvns r2, r2
; V6M-NEXT: lsls r2, r1
; V6M-NEXT: bics r0, r2
; V6M-NEXT: bx lr
%conv = zext i8 %numlowbits to i32
%notmask = shl i32 -1, %conv
%mask = xor i32 %notmask, -1
%masked = and i32 %mask, %val
ret i32 %masked
}
define i32 @bzhi32_b2_load(ptr %w, i32 %numlowbits) nounwind {
; V7M-LABEL: bzhi32_b2_load:
; V7M: @ %bb.0:
; V7M-NEXT: ldr r0, [r0]
; V7M-NEXT: mov.w r2, #-1
; V7M-NEXT: lsl.w r1, r2, r1
; V7M-NEXT: bics r0, r1
; V7M-NEXT: bx lr
;
; V7A-LABEL: bzhi32_b2_load:
; V7A: @ %bb.0:
; V7A-NEXT: ldr r0, [r0]
; V7A-NEXT: mvn r2, #0
; V7A-NEXT: bic r0, r0, r2, lsl r1
; V7A-NEXT: bx lr
;
; V7A-T-LABEL: bzhi32_b2_load:
; V7A-T: @ %bb.0:
; V7A-T-NEXT: ldr r0, [r0]
; V7A-T-NEXT: mov.w r2, #-1
; V7A-T-NEXT: lsl.w r1, r2, r1
; V7A-T-NEXT: bics r0, r1
; V7A-T-NEXT: bx lr
;
; V6M-LABEL: bzhi32_b2_load:
; V6M: @ %bb.0:
; V6M-NEXT: movs r2, #0
; V6M-NEXT: mvns r2, r2
; V6M-NEXT: lsls r2, r1
; V6M-NEXT: ldr r0, [r0]
; V6M-NEXT: bics r0, r2
; V6M-NEXT: bx lr
%val = load i32, ptr %w
%notmask = shl i32 -1, %numlowbits
%mask = xor i32 %notmask, -1
%masked = and i32 %mask, %val
ret i32 %masked
}
define i32 @bzhi32_b3_load_indexzext(ptr %w, i8 zeroext %numlowbits) nounwind {
; V7M-LABEL: bzhi32_b3_load_indexzext:
; V7M: @ %bb.0:
; V7M-NEXT: ldr r0, [r0]
; V7M-NEXT: mov.w r2, #-1
; V7M-NEXT: lsl.w r1, r2, r1
; V7M-NEXT: bics r0, r1
; V7M-NEXT: bx lr
;
; V7A-LABEL: bzhi32_b3_load_indexzext:
; V7A: @ %bb.0:
; V7A-NEXT: ldr r0, [r0]
; V7A-NEXT: mvn r2, #0
; V7A-NEXT: bic r0, r0, r2, lsl r1
; V7A-NEXT: bx lr
;
; V7A-T-LABEL: bzhi32_b3_load_indexzext:
; V7A-T: @ %bb.0:
; V7A-T-NEXT: ldr r0, [r0]
; V7A-T-NEXT: mov.w r2, #-1
; V7A-T-NEXT: lsl.w r1, r2, r1
; V7A-T-NEXT: bics r0, r1
; V7A-T-NEXT: bx lr
;
; V6M-LABEL: bzhi32_b3_load_indexzext:
; V6M: @ %bb.0:
; V6M-NEXT: movs r2, #0
; V6M-NEXT: mvns r2, r2
; V6M-NEXT: lsls r2, r1
; V6M-NEXT: ldr r0, [r0]
; V6M-NEXT: bics r0, r2
; V6M-NEXT: bx lr
%val = load i32, ptr %w
%conv = zext i8 %numlowbits to i32
%notmask = shl i32 -1, %conv
%mask = xor i32 %notmask, -1
%masked = and i32 %mask, %val
ret i32 %masked
}
define i32 @bzhi32_b4_commutative(i32 %val, i32 %numlowbits) nounwind {
; V7M-LABEL: bzhi32_b4_commutative:
; V7M: @ %bb.0:
; V7M-NEXT: mov.w r2, #-1
; V7M-NEXT: lsl.w r1, r2, r1
; V7M-NEXT: bics r0, r1
; V7M-NEXT: bx lr
;
; V7A-LABEL: bzhi32_b4_commutative:
; V7A: @ %bb.0:
; V7A-NEXT: mvn r2, #0
; V7A-NEXT: bic r0, r0, r2, lsl r1
; V7A-NEXT: bx lr
;
; V7A-T-LABEL: bzhi32_b4_commutative:
; V7A-T: @ %bb.0:
; V7A-T-NEXT: mov.w r2, #-1
; V7A-T-NEXT: lsl.w r1, r2, r1
; V7A-T-NEXT: bics r0, r1
; V7A-T-NEXT: bx lr
;
; V6M-LABEL: bzhi32_b4_commutative:
; V6M: @ %bb.0:
; V6M-NEXT: movs r2, #0
; V6M-NEXT: mvns r2, r2
; V6M-NEXT: lsls r2, r1
; V6M-NEXT: bics r0, r2
; V6M-NEXT: bx lr
%notmask = shl i32 -1, %numlowbits
%mask = xor i32 %notmask, -1
%masked = and i32 %val, %mask ; swapped order
ret i32 %masked
}
; 64-bit
define i64 @bzhi64_b0(i64 %val, i64 %numlowbits) nounwind {
; V7M-LABEL: bzhi64_b0:
; V7M: @ %bb.0:
; V7M-NEXT: mov.w r3, #-1
; V7M-NEXT: lsl.w r12, r3, r2
; V7M-NEXT: subs r2, #32
; V7M-NEXT: it pl
; V7M-NEXT: movpl.w r12, #0
; V7M-NEXT: it pl
; V7M-NEXT: lslpl r3, r2
; V7M-NEXT: bic.w r0, r0, r12
; V7M-NEXT: bics r1, r3
; V7M-NEXT: bx lr
;
; V7A-LABEL: bzhi64_b0:
; V7A: @ %bb.0:
; V7A-NEXT: subs r12, r2, #32
; V7A-NEXT: mvn r3, #0
; V7A-NEXT: lsl r2, r3, r2
; V7A-NEXT: lslpl r3, r3, r12
; V7A-NEXT: movwpl r2, #0
; V7A-NEXT: bic r1, r1, r3
; V7A-NEXT: bic r0, r0, r2
; V7A-NEXT: bx lr
;
; V7A-T-LABEL: bzhi64_b0:
; V7A-T: @ %bb.0:
; V7A-T-NEXT: mov.w r3, #-1
; V7A-T-NEXT: lsl.w r12, r3, r2
; V7A-T-NEXT: subs r2, #32
; V7A-T-NEXT: it pl
; V7A-T-NEXT: movpl.w r12, #0
; V7A-T-NEXT: it pl
; V7A-T-NEXT: lslpl r3, r2
; V7A-T-NEXT: bic.w r0, r0, r12
; V7A-T-NEXT: bics r1, r3
; V7A-T-NEXT: bx lr
;
; V6M-LABEL: bzhi64_b0:
; V6M: @ %bb.0:
; V6M-NEXT: .save {r4, r5, r7, lr}
; V6M-NEXT: push {r4, r5, r7, lr}
; V6M-NEXT: mov r4, r1
; V6M-NEXT: mov r5, r0
; V6M-NEXT: movs r0, #0
; V6M-NEXT: mvns r0, r0
; V6M-NEXT: mov r1, r0
; V6M-NEXT: bl __aeabi_llsl
; V6M-NEXT: bics r5, r0
; V6M-NEXT: bics r4, r1
; V6M-NEXT: mov r0, r5
; V6M-NEXT: mov r1, r4
; V6M-NEXT: pop {r4, r5, r7, pc}
%notmask = shl i64 -1, %numlowbits
%mask = xor i64 %notmask, -1
%masked = and i64 %mask, %val
ret i64 %masked
}
define i64 @bzhi64_b1_indexzext(i64 %val, i8 zeroext %numlowbits) nounwind {
; V7M-LABEL: bzhi64_b1_indexzext:
; V7M: @ %bb.0:
; V7M-NEXT: mov.w r3, #-1
; V7M-NEXT: lsl.w r12, r3, r2
; V7M-NEXT: subs r2, #32
; V7M-NEXT: it pl
; V7M-NEXT: movpl.w r12, #0
; V7M-NEXT: it pl
; V7M-NEXT: lslpl r3, r2
; V7M-NEXT: bic.w r0, r0, r12
; V7M-NEXT: bics r1, r3
; V7M-NEXT: bx lr
;
; V7A-LABEL: bzhi64_b1_indexzext:
; V7A: @ %bb.0:
; V7A-NEXT: subs r12, r2, #32
; V7A-NEXT: mvn r3, #0
; V7A-NEXT: lsl r2, r3, r2
; V7A-NEXT: lslpl r3, r3, r12
; V7A-NEXT: movwpl r2, #0
; V7A-NEXT: bic r1, r1, r3
; V7A-NEXT: bic r0, r0, r2
; V7A-NEXT: bx lr
;
; V7A-T-LABEL: bzhi64_b1_indexzext:
; V7A-T: @ %bb.0:
; V7A-T-NEXT: mov.w r3, #-1
; V7A-T-NEXT: lsl.w r12, r3, r2
; V7A-T-NEXT: subs r2, #32
; V7A-T-NEXT: it pl
; V7A-T-NEXT: movpl.w r12, #0
; V7A-T-NEXT: it pl
; V7A-T-NEXT: lslpl r3, r2
; V7A-T-NEXT: bic.w r0, r0, r12
; V7A-T-NEXT: bics r1, r3
; V7A-T-NEXT: bx lr
;
; V6M-LABEL: bzhi64_b1_indexzext:
; V6M: @ %bb.0:
; V6M-NEXT: .save {r4, r5, r7, lr}
; V6M-NEXT: push {r4, r5, r7, lr}
; V6M-NEXT: mov r4, r1
; V6M-NEXT: mov r5, r0
; V6M-NEXT: movs r0, #0
; V6M-NEXT: mvns r0, r0
; V6M-NEXT: mov r1, r0
; V6M-NEXT: bl __aeabi_llsl
; V6M-NEXT: bics r5, r0
; V6M-NEXT: bics r4, r1
; V6M-NEXT: mov r0, r5
; V6M-NEXT: mov r1, r4
; V6M-NEXT: pop {r4, r5, r7, pc}
%conv = zext i8 %numlowbits to i64
%notmask = shl i64 -1, %conv
%mask = xor i64 %notmask, -1
%masked = and i64 %mask, %val
ret i64 %masked
}
define i64 @bzhi64_b2_load(ptr %w, i64 %numlowbits) nounwind {
; V7M-LABEL: bzhi64_b2_load:
; V7M: @ %bb.0:
; V7M-NEXT: mov.w r1, #-1
; V7M-NEXT: subs.w r12, r2, #32
; V7M-NEXT: lsl.w r3, r1, r2
; V7M-NEXT: it pl
; V7M-NEXT: movpl r3, #0
; V7M-NEXT: ldrd r0, r2, [r0]
; V7M-NEXT: it pl
; V7M-NEXT: lslpl.w r1, r1, r12
; V7M-NEXT: bics r0, r3
; V7M-NEXT: bic.w r1, r2, r1
; V7M-NEXT: bx lr
;
; V7A-LABEL: bzhi64_b2_load:
; V7A: @ %bb.0:
; V7A-NEXT: .save {r4, lr}
; V7A-NEXT: push {r4, lr}
; V7A-NEXT: ldr r4, [r0]
; V7A-NEXT: mvn r1, #0
; V7A-NEXT: ldr r3, [r0, #4]
; V7A-NEXT: subs r0, r2, #32
; V7A-NEXT: lsl r2, r1, r2
; V7A-NEXT: lslpl r1, r1, r0
; V7A-NEXT: movwpl r2, #0
; V7A-NEXT: bic r1, r3, r1
; V7A-NEXT: bic r0, r4, r2
; V7A-NEXT: pop {r4, pc}
;
; V7A-T-LABEL: bzhi64_b2_load:
; V7A-T: @ %bb.0:
; V7A-T-NEXT: mov.w r1, #-1
; V7A-T-NEXT: ldrd r0, r12, [r0]
; V7A-T-NEXT: lsl.w r3, r1, r2
; V7A-T-NEXT: subs r2, #32
; V7A-T-NEXT: it pl
; V7A-T-NEXT: movpl r3, #0
; V7A-T-NEXT: it pl
; V7A-T-NEXT: lslpl r1, r2
; V7A-T-NEXT: bics r0, r3
; V7A-T-NEXT: bic.w r1, r12, r1
; V7A-T-NEXT: bx lr
;
; V6M-LABEL: bzhi64_b2_load:
; V6M: @ %bb.0:
; V6M-NEXT: .save {r4, lr}
; V6M-NEXT: push {r4, lr}
; V6M-NEXT: mov r4, r0
; V6M-NEXT: movs r0, #0
; V6M-NEXT: mvns r0, r0
; V6M-NEXT: mov r1, r0
; V6M-NEXT: bl __aeabi_llsl
; V6M-NEXT: ldm r4!, {r2, r3}
; V6M-NEXT: bics r2, r0
; V6M-NEXT: bics r3, r1
; V6M-NEXT: mov r0, r2
; V6M-NEXT: mov r1, r3
; V6M-NEXT: pop {r4, pc}
%val = load i64, ptr %w
%notmask = shl i64 -1, %numlowbits
%mask = xor i64 %notmask, -1
%masked = and i64 %mask, %val
ret i64 %masked
}
define i64 @bzhi64_b3_load_indexzext(ptr %w, i8 zeroext %numlowbits) nounwind {
; V7M-LABEL: bzhi64_b3_load_indexzext:
; V7M: @ %bb.0:
; V7M-NEXT: mov.w r2, #-1
; V7M-NEXT: subs.w r12, r1, #32
; V7M-NEXT: lsl.w r3, r2, r1
; V7M-NEXT: it pl
; V7M-NEXT: movpl r3, #0
; V7M-NEXT: ldrd r0, r1, [r0]
; V7M-NEXT: it pl
; V7M-NEXT: lslpl.w r2, r2, r12
; V7M-NEXT: bics r1, r2
; V7M-NEXT: bics r0, r3
; V7M-NEXT: bx lr
;
; V7A-LABEL: bzhi64_b3_load_indexzext:
; V7A: @ %bb.0:
; V7A-NEXT: .save {r4, r6, r11, lr}
; V7A-NEXT: push {r4, r6, r11, lr}
; V7A-NEXT: mvn r2, #0
; V7A-NEXT: ldr r6, [r0]
; V7A-NEXT: ldr r3, [r0, #4]
; V7A-NEXT: subs r0, r1, #32
; V7A-NEXT: lsl r4, r2, r1
; V7A-NEXT: lslpl r2, r2, r0
; V7A-NEXT: movwpl r4, #0
; V7A-NEXT: bic r1, r3, r2
; V7A-NEXT: bic r0, r6, r4
; V7A-NEXT: pop {r4, r6, r11, pc}
;
; V7A-T-LABEL: bzhi64_b3_load_indexzext:
; V7A-T: @ %bb.0:
; V7A-T-NEXT: mov.w r2, #-1
; V7A-T-NEXT: ldrd r0, r12, [r0]
; V7A-T-NEXT: lsl.w r3, r2, r1
; V7A-T-NEXT: subs r1, #32
; V7A-T-NEXT: it pl
; V7A-T-NEXT: movpl r3, #0
; V7A-T-NEXT: it pl
; V7A-T-NEXT: lslpl r2, r1
; V7A-T-NEXT: bics r0, r3
; V7A-T-NEXT: bic.w r1, r12, r2
; V7A-T-NEXT: bx lr
;
; V6M-LABEL: bzhi64_b3_load_indexzext:
; V6M: @ %bb.0:
; V6M-NEXT: .save {r4, lr}
; V6M-NEXT: push {r4, lr}
; V6M-NEXT: mov r2, r1
; V6M-NEXT: mov r4, r0
; V6M-NEXT: movs r0, #0
; V6M-NEXT: mvns r0, r0
; V6M-NEXT: mov r1, r0
; V6M-NEXT: bl __aeabi_llsl
; V6M-NEXT: ldm r4!, {r2, r3}
; V6M-NEXT: bics r2, r0
; V6M-NEXT: bics r3, r1
; V6M-NEXT: mov r0, r2
; V6M-NEXT: mov r1, r3
; V6M-NEXT: pop {r4, pc}
%val = load i64, ptr %w
%conv = zext i8 %numlowbits to i64
%notmask = shl i64 -1, %conv
%mask = xor i64 %notmask, -1
%masked = and i64 %mask, %val
ret i64 %masked
}
define i64 @bzhi64_b4_commutative(i64 %val, i64 %numlowbits) nounwind {
; V7M-LABEL: bzhi64_b4_commutative:
; V7M: @ %bb.0:
; V7M-NEXT: mov.w r3, #-1
; V7M-NEXT: lsl.w r12, r3, r2
; V7M-NEXT: subs r2, #32
; V7M-NEXT: it pl
; V7M-NEXT: movpl.w r12, #0
; V7M-NEXT: it pl
; V7M-NEXT: lslpl r3, r2
; V7M-NEXT: bic.w r0, r0, r12
; V7M-NEXT: bics r1, r3
; V7M-NEXT: bx lr
;
; V7A-LABEL: bzhi64_b4_commutative:
; V7A: @ %bb.0:
; V7A-NEXT: subs r12, r2, #32
; V7A-NEXT: mvn r3, #0
; V7A-NEXT: lsl r2, r3, r2
; V7A-NEXT: lslpl r3, r3, r12
; V7A-NEXT: movwpl r2, #0
; V7A-NEXT: bic r1, r1, r3
; V7A-NEXT: bic r0, r0, r2
; V7A-NEXT: bx lr
;
; V7A-T-LABEL: bzhi64_b4_commutative:
; V7A-T: @ %bb.0:
; V7A-T-NEXT: mov.w r3, #-1
; V7A-T-NEXT: lsl.w r12, r3, r2
; V7A-T-NEXT: subs r2, #32
; V7A-T-NEXT: it pl
; V7A-T-NEXT: movpl.w r12, #0
; V7A-T-NEXT: it pl
; V7A-T-NEXT: lslpl r3, r2
; V7A-T-NEXT: bic.w r0, r0, r12
; V7A-T-NEXT: bics r1, r3
; V7A-T-NEXT: bx lr
;
; V6M-LABEL: bzhi64_b4_commutative:
; V6M: @ %bb.0:
; V6M-NEXT: .save {r4, r5, r7, lr}
; V6M-NEXT: push {r4, r5, r7, lr}
; V6M-NEXT: mov r4, r1
; V6M-NEXT: mov r5, r0
; V6M-NEXT: movs r0, #0
; V6M-NEXT: mvns r0, r0
; V6M-NEXT: mov r1, r0
; V6M-NEXT: bl __aeabi_llsl
; V6M-NEXT: bics r5, r0
; V6M-NEXT: bics r4, r1
; V6M-NEXT: mov r0, r5
; V6M-NEXT: mov r1, r4
; V6M-NEXT: pop {r4, r5, r7, pc}
%notmask = shl i64 -1, %numlowbits
%mask = xor i64 %notmask, -1
%masked = and i64 %val, %mask ; swapped order
ret i64 %masked
}
; ---------------------------------------------------------------------------- ;
; Pattern c. 32-bit
; ---------------------------------------------------------------------------- ;
define i32 @bzhi32_c0(i32 %val, i32 %numlowbits) nounwind {
; V7M-LABEL: bzhi32_c0:
; V7M: @ %bb.0:
; V7M-NEXT: rsb.w r1, r1, #32
; V7M-NEXT: lsls r0, r1
; V7M-NEXT: lsrs r0, r1
; V7M-NEXT: bx lr
;
; V7A-LABEL: bzhi32_c0:
; V7A: @ %bb.0:
; V7A-NEXT: rsb r1, r1, #32
; V7A-NEXT: lsl r0, r0, r1
; V7A-NEXT: lsr r0, r0, r1
; V7A-NEXT: bx lr
;
; V7A-T-LABEL: bzhi32_c0:
; V7A-T: @ %bb.0:
; V7A-T-NEXT: rsb.w r1, r1, #32
; V7A-T-NEXT: lsls r0, r1
; V7A-T-NEXT: lsrs r0, r1
; V7A-T-NEXT: bx lr
;
; V6M-LABEL: bzhi32_c0:
; V6M: @ %bb.0:
; V6M-NEXT: movs r2, #32
; V6M-NEXT: subs r1, r2, r1
; V6M-NEXT: lsls r0, r1
; V6M-NEXT: lsrs r0, r1
; V6M-NEXT: bx lr
%numhighbits = sub i32 32, %numlowbits
%mask = lshr i32 -1, %numhighbits
%masked = and i32 %mask, %val
ret i32 %masked
}
define i32 @bzhi32_c1_indexzext(i32 %val, i8 %numlowbits) nounwind {
; V7M-LABEL: bzhi32_c1_indexzext:
; V7M: @ %bb.0:
; V7M-NEXT: rsb.w r1, r1, #32
; V7M-NEXT: uxtb r1, r1
; V7M-NEXT: lsls r0, r1
; V7M-NEXT: lsrs r0, r1
; V7M-NEXT: bx lr
;
; V7A-LABEL: bzhi32_c1_indexzext:
; V7A: @ %bb.0:
; V7A-NEXT: rsb r1, r1, #32
; V7A-NEXT: uxtb r1, r1
; V7A-NEXT: lsl r0, r0, r1
; V7A-NEXT: lsr r0, r0, r1
; V7A-NEXT: bx lr
;
; V7A-T-LABEL: bzhi32_c1_indexzext:
; V7A-T: @ %bb.0:
; V7A-T-NEXT: rsb.w r1, r1, #32
; V7A-T-NEXT: uxtb r1, r1
; V7A-T-NEXT: lsls r0, r1
; V7A-T-NEXT: lsrs r0, r1
; V7A-T-NEXT: bx lr
;
; V6M-LABEL: bzhi32_c1_indexzext:
; V6M: @ %bb.0:
; V6M-NEXT: movs r2, #32
; V6M-NEXT: subs r1, r2, r1
; V6M-NEXT: uxtb r1, r1
; V6M-NEXT: lsls r0, r1
; V6M-NEXT: lsrs r0, r1
; V6M-NEXT: bx lr
%numhighbits = sub i8 32, %numlowbits
%sh_prom = zext i8 %numhighbits to i32
%mask = lshr i32 -1, %sh_prom
%masked = and i32 %mask, %val
ret i32 %masked
}
define i32 @bzhi32_c2_load(ptr %w, i32 %numlowbits) nounwind {
; V7M-LABEL: bzhi32_c2_load:
; V7M: @ %bb.0:
; V7M-NEXT: ldr r0, [r0]
; V7M-NEXT: rsb.w r1, r1, #32
; V7M-NEXT: lsls r0, r1
; V7M-NEXT: lsrs r0, r1
; V7M-NEXT: bx lr
;
; V7A-LABEL: bzhi32_c2_load:
; V7A: @ %bb.0:
; V7A-NEXT: ldr r0, [r0]
; V7A-NEXT: rsb r1, r1, #32
; V7A-NEXT: lsl r0, r0, r1
; V7A-NEXT: lsr r0, r0, r1
; V7A-NEXT: bx lr
;
; V7A-T-LABEL: bzhi32_c2_load:
; V7A-T: @ %bb.0:
; V7A-T-NEXT: ldr r0, [r0]
; V7A-T-NEXT: rsb.w r1, r1, #32
; V7A-T-NEXT: lsls r0, r1
; V7A-T-NEXT: lsrs r0, r1
; V7A-T-NEXT: bx lr
;
; V6M-LABEL: bzhi32_c2_load:
; V6M: @ %bb.0:
; V6M-NEXT: movs r2, #32
; V6M-NEXT: subs r1, r2, r1
; V6M-NEXT: ldr r0, [r0]
; V6M-NEXT: lsls r0, r1
; V6M-NEXT: lsrs r0, r1
; V6M-NEXT: bx lr
%val = load i32, ptr %w
%numhighbits = sub i32 32, %numlowbits
%mask = lshr i32 -1, %numhighbits
%masked = and i32 %mask, %val
ret i32 %masked
}
define i32 @bzhi32_c3_load_indexzext(ptr %w, i8 %numlowbits) nounwind {
; V7M-LABEL: bzhi32_c3_load_indexzext:
; V7M: @ %bb.0:
; V7M-NEXT: rsb.w r1, r1, #32
; V7M-NEXT: ldr r0, [r0]
; V7M-NEXT: uxtb r1, r1
; V7M-NEXT: lsls r0, r1
; V7M-NEXT: lsrs r0, r1
; V7M-NEXT: bx lr
;
; V7A-LABEL: bzhi32_c3_load_indexzext:
; V7A: @ %bb.0:
; V7A-NEXT: rsb r1, r1, #32
; V7A-NEXT: ldr r0, [r0]
; V7A-NEXT: uxtb r1, r1
; V7A-NEXT: lsl r0, r0, r1
; V7A-NEXT: lsr r0, r0, r1
; V7A-NEXT: bx lr
;
; V7A-T-LABEL: bzhi32_c3_load_indexzext:
; V7A-T: @ %bb.0:
; V7A-T-NEXT: rsb.w r1, r1, #32
; V7A-T-NEXT: ldr r0, [r0]
; V7A-T-NEXT: uxtb r1, r1
; V7A-T-NEXT: lsls r0, r1
; V7A-T-NEXT: lsrs r0, r1
; V7A-T-NEXT: bx lr
;
; V6M-LABEL: bzhi32_c3_load_indexzext:
; V6M: @ %bb.0:
; V6M-NEXT: movs r2, #32
; V6M-NEXT: subs r1, r2, r1
; V6M-NEXT: uxtb r1, r1
; V6M-NEXT: ldr r0, [r0]
; V6M-NEXT: lsls r0, r1
; V6M-NEXT: lsrs r0, r1
; V6M-NEXT: bx lr
%val = load i32, ptr %w
%numhighbits = sub i8 32, %numlowbits
%sh_prom = zext i8 %numhighbits to i32
%mask = lshr i32 -1, %sh_prom
%masked = and i32 %mask, %val
ret i32 %masked
}
define i32 @bzhi32_c4_commutative(i32 %val, i32 %numlowbits) nounwind {
; V7M-LABEL: bzhi32_c4_commutative:
; V7M: @ %bb.0:
; V7M-NEXT: rsb.w r1, r1, #32
; V7M-NEXT: lsls r0, r1
; V7M-NEXT: lsrs r0, r1
; V7M-NEXT: bx lr
;
; V7A-LABEL: bzhi32_c4_commutative:
; V7A: @ %bb.0:
; V7A-NEXT: rsb r1, r1, #32
; V7A-NEXT: lsl r0, r0, r1
; V7A-NEXT: lsr r0, r0, r1
; V7A-NEXT: bx lr
;
; V7A-T-LABEL: bzhi32_c4_commutative:
; V7A-T: @ %bb.0:
; V7A-T-NEXT: rsb.w r1, r1, #32
; V7A-T-NEXT: lsls r0, r1
; V7A-T-NEXT: lsrs r0, r1
; V7A-T-NEXT: bx lr
;
; V6M-LABEL: bzhi32_c4_commutative:
; V6M: @ %bb.0:
; V6M-NEXT: movs r2, #32
; V6M-NEXT: subs r1, r2, r1
; V6M-NEXT: lsls r0, r1
; V6M-NEXT: lsrs r0, r1
; V6M-NEXT: bx lr
%numhighbits = sub i32 32, %numlowbits
%mask = lshr i32 -1, %numhighbits
%masked = and i32 %val, %mask ; swapped order
ret i32 %masked
}
; 64-bit
define i64 @bzhi64_c0(i64 %val, i64 %numlowbits) nounwind {
; V7M-LABEL: bzhi64_c0:
; V7M: @ %bb.0:
; V7M-NEXT: .save {r7, lr}
; V7M-NEXT: push {r7, lr}
; V7M-NEXT: rsbs.w lr, r2, #32
; V7M-NEXT: rsb.w r2, r2, #64
; V7M-NEXT: mov.w r12, #-1
; V7M-NEXT: mov.w r3, #-1
; V7M-NEXT: lsr.w r2, r12, r2
; V7M-NEXT: it pl
; V7M-NEXT: lsrpl.w r3, r3, lr
; V7M-NEXT: it pl
; V7M-NEXT: movpl r2, #0
; V7M-NEXT: ands r0, r3
; V7M-NEXT: ands r1, r2
; V7M-NEXT: pop {r7, pc}
;
; V7A-LABEL: bzhi64_c0:
; V7A: @ %bb.0:
; V7A-NEXT: .save {r11, lr}
; V7A-NEXT: push {r11, lr}
; V7A-NEXT: rsbs lr, r2, #32
; V7A-NEXT: rsb r2, r2, #64
; V7A-NEXT: mvn r12, #0
; V7A-NEXT: mvn r3, #0
; V7A-NEXT: lsr r2, r12, r2
; V7A-NEXT: lsrpl r3, r3, lr
; V7A-NEXT: movwpl r2, #0
; V7A-NEXT: and r0, r3, r0
; V7A-NEXT: and r1, r2, r1
; V7A-NEXT: pop {r11, pc}
;
; V7A-T-LABEL: bzhi64_c0:
; V7A-T: @ %bb.0:
; V7A-T-NEXT: .save {r7, lr}
; V7A-T-NEXT: push {r7, lr}
; V7A-T-NEXT: rsbs.w lr, r2, #32
; V7A-T-NEXT: rsb.w r2, r2, #64
; V7A-T-NEXT: mov.w r12, #-1
; V7A-T-NEXT: mov.w r3, #-1
; V7A-T-NEXT: lsr.w r2, r12, r2
; V7A-T-NEXT: it pl
; V7A-T-NEXT: lsrpl.w r3, r3, lr
; V7A-T-NEXT: it pl
; V7A-T-NEXT: movpl r2, #0
; V7A-T-NEXT: ands r0, r3
; V7A-T-NEXT: ands r1, r2
; V7A-T-NEXT: pop {r7, pc}
;
; V6M-LABEL: bzhi64_c0:
; V6M: @ %bb.0:
; V6M-NEXT: .save {r4, r5, r7, lr}
; V6M-NEXT: push {r4, r5, r7, lr}
; V6M-NEXT: mov r4, r1
; V6M-NEXT: mov r5, r0
; V6M-NEXT: movs r0, #64
; V6M-NEXT: subs r2, r0, r2
; V6M-NEXT: movs r0, #0
; V6M-NEXT: mvns r0, r0
; V6M-NEXT: mov r1, r0
; V6M-NEXT: bl __aeabi_llsr
; V6M-NEXT: ands r0, r5
; V6M-NEXT: ands r1, r4
; V6M-NEXT: pop {r4, r5, r7, pc}
%numhighbits = sub i64 64, %numlowbits
%mask = lshr i64 -1, %numhighbits
%masked = and i64 %mask, %val
ret i64 %masked
}
define i64 @bzhi64_c1_indexzext(i64 %val, i8 %numlowbits) nounwind {
; V7M-LABEL: bzhi64_c1_indexzext:
; V7M: @ %bb.0:
; V7M-NEXT: rsb.w r2, r2, #64
; V7M-NEXT: mov.w r3, #-1
; V7M-NEXT: uxtb r2, r2
; V7M-NEXT: subs.w r12, r2, #32
; V7M-NEXT: lsr.w r2, r3, r2
; V7M-NEXT: it pl
; V7M-NEXT: lsrpl.w r3, r3, r12
; V7M-NEXT: it pl
; V7M-NEXT: movpl r2, #0
; V7M-NEXT: ands r0, r3
; V7M-NEXT: ands r1, r2
; V7M-NEXT: bx lr
;
; V7A-LABEL: bzhi64_c1_indexzext:
; V7A: @ %bb.0:
; V7A-NEXT: .save {r11, lr}
; V7A-NEXT: push {r11, lr}
; V7A-NEXT: rsb lr, r2, #64
; V7A-NEXT: mvn r2, #31
; V7A-NEXT: mvn r3, #0
; V7A-NEXT: uxtb r12, lr
; V7A-NEXT: uxtab r2, r2, lr
; V7A-NEXT: lsr r12, r3, r12
; V7A-NEXT: cmp r2, #0
; V7A-NEXT: movwpl r12, #0
; V7A-NEXT: lsrpl r3, r3, r2
; V7A-NEXT: and r1, r12, r1
; V7A-NEXT: and r0, r3, r0
; V7A-NEXT: pop {r11, pc}
;
; V7A-T-LABEL: bzhi64_c1_indexzext:
; V7A-T: @ %bb.0:
; V7A-T-NEXT: .save {r7, lr}
; V7A-T-NEXT: push {r7, lr}
; V7A-T-NEXT: rsb.w lr, r2, #64
; V7A-T-NEXT: mvn r2, #31
; V7A-T-NEXT: mov.w r3, #-1
; V7A-T-NEXT: uxtb.w r12, lr
; V7A-T-NEXT: uxtab r2, r2, lr
; V7A-T-NEXT: lsr.w r12, r3, r12
; V7A-T-NEXT: cmp r2, #0
; V7A-T-NEXT: it pl
; V7A-T-NEXT: movpl.w r12, #0
; V7A-T-NEXT: it pl
; V7A-T-NEXT: lsrpl r3, r2
; V7A-T-NEXT: and.w r1, r1, r12
; V7A-T-NEXT: ands r0, r3
; V7A-T-NEXT: pop {r7, pc}
;
; V6M-LABEL: bzhi64_c1_indexzext:
; V6M: @ %bb.0:
; V6M-NEXT: .save {r4, r5, r7, lr}
; V6M-NEXT: push {r4, r5, r7, lr}
; V6M-NEXT: mov r4, r1
; V6M-NEXT: mov r5, r0
; V6M-NEXT: movs r0, #64
; V6M-NEXT: subs r0, r0, r2
; V6M-NEXT: uxtb r2, r0
; V6M-NEXT: movs r0, #0
; V6M-NEXT: mvns r0, r0
; V6M-NEXT: mov r1, r0
; V6M-NEXT: bl __aeabi_llsr
; V6M-NEXT: ands r0, r5
; V6M-NEXT: ands r1, r4
; V6M-NEXT: pop {r4, r5, r7, pc}
%numhighbits = sub i8 64, %numlowbits
%sh_prom = zext i8 %numhighbits to i64
%mask = lshr i64 -1, %sh_prom
%masked = and i64 %mask, %val
ret i64 %masked
}
define i64 @bzhi64_c2_load(ptr %w, i64 %numlowbits) nounwind {
; V7M-LABEL: bzhi64_c2_load:
; V7M: @ %bb.0:
; V7M-NEXT: rsbs.w r1, r2, #32
; V7M-NEXT: mov.w r3, #-1
; V7M-NEXT: rsb.w r2, r2, #64
; V7M-NEXT: it pl
; V7M-NEXT: lsrpl r3, r1
; V7M-NEXT: ldrd r0, r1, [r0]
; V7M-NEXT: mov.w r12, #-1
; V7M-NEXT: lsr.w r2, r12, r2
; V7M-NEXT: it pl
; V7M-NEXT: movpl r2, #0
; V7M-NEXT: ands r0, r3
; V7M-NEXT: ands r1, r2
; V7M-NEXT: bx lr
;
; V7A-LABEL: bzhi64_c2_load:
; V7A: @ %bb.0:
; V7A-NEXT: .save {r5, lr}
; V7A-NEXT: push {r5, lr}
; V7A-NEXT: rsbs r1, r2, #32
; V7A-NEXT: mvn r3, #0
; V7A-NEXT: mvn r12, #0
; V7A-NEXT: ldm r0, {r0, r5}
; V7A-NEXT: lsrpl r3, r3, r1
; V7A-NEXT: rsb r1, r2, #64
; V7A-NEXT: and r0, r3, r0
; V7A-NEXT: lsr r1, r12, r1
; V7A-NEXT: movwpl r1, #0
; V7A-NEXT: and r1, r1, r5
; V7A-NEXT: pop {r5, pc}
;
; V7A-T-LABEL: bzhi64_c2_load:
; V7A-T: @ %bb.0:
; V7A-T-NEXT: .save {r7, lr}
; V7A-T-NEXT: push {r7, lr}
; V7A-T-NEXT: rsbs.w r1, r2, #32
; V7A-T-NEXT: mov.w r3, #-1
; V7A-T-NEXT: ldrd r0, lr, [r0]
; V7A-T-NEXT: it pl
; V7A-T-NEXT: lsrpl r3, r1
; V7A-T-NEXT: rsb.w r1, r2, #64
; V7A-T-NEXT: mov.w r12, #-1
; V7A-T-NEXT: and.w r0, r0, r3
; V7A-T-NEXT: lsr.w r1, r12, r1
; V7A-T-NEXT: it pl
; V7A-T-NEXT: movpl r1, #0
; V7A-T-NEXT: and.w r1, r1, lr
; V7A-T-NEXT: pop {r7, pc}
;
; V6M-LABEL: bzhi64_c2_load:
; V6M: @ %bb.0:
; V6M-NEXT: .save {r4, lr}
; V6M-NEXT: push {r4, lr}
; V6M-NEXT: mov r4, r0
; V6M-NEXT: movs r0, #64
; V6M-NEXT: subs r2, r0, r2
; V6M-NEXT: movs r0, #0
; V6M-NEXT: mvns r0, r0
; V6M-NEXT: mov r1, r0
; V6M-NEXT: bl __aeabi_llsr
; V6M-NEXT: ldm r4!, {r2, r3}
; V6M-NEXT: ands r0, r2
; V6M-NEXT: ands r1, r3
; V6M-NEXT: pop {r4, pc}
%val = load i64, ptr %w
%numhighbits = sub i64 64, %numlowbits
%mask = lshr i64 -1, %numhighbits
%masked = and i64 %mask, %val
ret i64 %masked
}
define i64 @bzhi64_c3_load_indexzext(ptr %w, i8 %numlowbits) nounwind {
; V7M-LABEL: bzhi64_c3_load_indexzext:
; V7M: @ %bb.0:
; V7M-NEXT: rsb.w r1, r1, #64
; V7M-NEXT: mov.w r3, #-1
; V7M-NEXT: uxtb r1, r1
; V7M-NEXT: subs.w r2, r1, #32
; V7M-NEXT: lsr.w r1, r3, r1
; V7M-NEXT: it pl
; V7M-NEXT: lsrpl r3, r2
; V7M-NEXT: ldrd r0, r2, [r0]
; V7M-NEXT: it pl
; V7M-NEXT: movpl r1, #0
; V7M-NEXT: ands r1, r2
; V7M-NEXT: ands r0, r3
; V7M-NEXT: bx lr
;
; V7A-LABEL: bzhi64_c3_load_indexzext:
; V7A: @ %bb.0:
; V7A-NEXT: .save {r4, r6, r11, lr}
; V7A-NEXT: push {r4, r6, r11, lr}
; V7A-NEXT: rsb r1, r1, #64
; V7A-NEXT: mvn r4, #31
; V7A-NEXT: mvn r2, #0
; V7A-NEXT: ldr r6, [r0]
; V7A-NEXT: ldr r3, [r0, #4]
; V7A-NEXT: uxtb r0, r1
; V7A-NEXT: uxtab r4, r4, r1
; V7A-NEXT: lsr r0, r2, r0
; V7A-NEXT: cmp r4, #0
; V7A-NEXT: movwpl r0, #0
; V7A-NEXT: and r1, r0, r3
; V7A-NEXT: lsrpl r2, r2, r4
; V7A-NEXT: and r0, r2, r6
; V7A-NEXT: pop {r4, r6, r11, pc}
;
; V7A-T-LABEL: bzhi64_c3_load_indexzext:
; V7A-T: @ %bb.0:
; V7A-T-NEXT: .save {r7, lr}
; V7A-T-NEXT: push {r7, lr}
; V7A-T-NEXT: rsb.w r1, r1, #64
; V7A-T-NEXT: mvn r3, #31
; V7A-T-NEXT: ldrd r12, lr, [r0]
; V7A-T-NEXT: mov.w r2, #-1
; V7A-T-NEXT: uxtb r0, r1
; V7A-T-NEXT: uxtab r3, r3, r1
; V7A-T-NEXT: lsr.w r0, r2, r0
; V7A-T-NEXT: cmp r3, #0
; V7A-T-NEXT: it pl
; V7A-T-NEXT: movpl r0, #0
; V7A-T-NEXT: and.w r1, r0, lr
; V7A-T-NEXT: it pl
; V7A-T-NEXT: lsrpl r2, r3
; V7A-T-NEXT: and.w r0, r2, r12
; V7A-T-NEXT: pop {r7, pc}
;
; V6M-LABEL: bzhi64_c3_load_indexzext:
; V6M: @ %bb.0:
; V6M-NEXT: .save {r4, lr}
; V6M-NEXT: push {r4, lr}
; V6M-NEXT: mov r4, r0
; V6M-NEXT: movs r0, #64
; V6M-NEXT: subs r0, r0, r1
; V6M-NEXT: uxtb r2, r0
; V6M-NEXT: movs r0, #0
; V6M-NEXT: mvns r0, r0
; V6M-NEXT: mov r1, r0
; V6M-NEXT: bl __aeabi_llsr
; V6M-NEXT: ldm r4!, {r2, r3}
; V6M-NEXT: ands r0, r2
; V6M-NEXT: ands r1, r3
; V6M-NEXT: pop {r4, pc}
%val = load i64, ptr %w
%numhighbits = sub i8 64, %numlowbits
%sh_prom = zext i8 %numhighbits to i64
%mask = lshr i64 -1, %sh_prom
%masked = and i64 %mask, %val
ret i64 %masked
}
define i64 @bzhi64_c4_commutative(i64 %val, i64 %numlowbits) nounwind {
; V7M-LABEL: bzhi64_c4_commutative:
; V7M: @ %bb.0:
; V7M-NEXT: .save {r7, lr}
; V7M-NEXT: push {r7, lr}
; V7M-NEXT: rsbs.w lr, r2, #32
; V7M-NEXT: rsb.w r2, r2, #64
; V7M-NEXT: mov.w r12, #-1
; V7M-NEXT: mov.w r3, #-1
; V7M-NEXT: lsr.w r2, r12, r2
; V7M-NEXT: it pl
; V7M-NEXT: lsrpl.w r3, r3, lr
; V7M-NEXT: it pl
; V7M-NEXT: movpl r2, #0
; V7M-NEXT: ands r0, r3
; V7M-NEXT: ands r1, r2
; V7M-NEXT: pop {r7, pc}
;
; V7A-LABEL: bzhi64_c4_commutative:
; V7A: @ %bb.0:
; V7A-NEXT: .save {r11, lr}
; V7A-NEXT: push {r11, lr}
; V7A-NEXT: rsbs lr, r2, #32
; V7A-NEXT: rsb r2, r2, #64
; V7A-NEXT: mvn r12, #0
; V7A-NEXT: mvn r3, #0
; V7A-NEXT: lsr r2, r12, r2
; V7A-NEXT: lsrpl r3, r3, lr
; V7A-NEXT: movwpl r2, #0
; V7A-NEXT: and r0, r0, r3
; V7A-NEXT: and r1, r1, r2
; V7A-NEXT: pop {r11, pc}
;
; V7A-T-LABEL: bzhi64_c4_commutative:
; V7A-T: @ %bb.0:
; V7A-T-NEXT: .save {r7, lr}
; V7A-T-NEXT: push {r7, lr}
; V7A-T-NEXT: rsbs.w lr, r2, #32
; V7A-T-NEXT: rsb.w r2, r2, #64
; V7A-T-NEXT: mov.w r12, #-1
; V7A-T-NEXT: mov.w r3, #-1
; V7A-T-NEXT: lsr.w r2, r12, r2
; V7A-T-NEXT: it pl
; V7A-T-NEXT: lsrpl.w r3, r3, lr
; V7A-T-NEXT: it pl
; V7A-T-NEXT: movpl r2, #0
; V7A-T-NEXT: ands r0, r3
; V7A-T-NEXT: ands r1, r2
; V7A-T-NEXT: pop {r7, pc}
;
; V6M-LABEL: bzhi64_c4_commutative:
; V6M: @ %bb.0:
; V6M-NEXT: .save {r4, r5, r7, lr}
; V6M-NEXT: push {r4, r5, r7, lr}
; V6M-NEXT: mov r4, r1
; V6M-NEXT: mov r5, r0
; V6M-NEXT: movs r0, #64
; V6M-NEXT: subs r2, r0, r2
; V6M-NEXT: movs r0, #0
; V6M-NEXT: mvns r0, r0
; V6M-NEXT: mov r1, r0
; V6M-NEXT: bl __aeabi_llsr
; V6M-NEXT: ands r0, r5
; V6M-NEXT: ands r1, r4
; V6M-NEXT: pop {r4, r5, r7, pc}
%numhighbits = sub i64 64, %numlowbits
%mask = lshr i64 -1, %numhighbits
%masked = and i64 %val, %mask ; swapped order
ret i64 %masked
}
; ---------------------------------------------------------------------------- ;
; Pattern d. 32-bit.
; ---------------------------------------------------------------------------- ;
define i32 @bzhi32_d0(i32 %val, i32 %numlowbits) nounwind {
; V7M-LABEL: bzhi32_d0:
; V7M: @ %bb.0:
; V7M-NEXT: rsb.w r1, r1, #32
; V7M-NEXT: lsls r0, r1
; V7M-NEXT: lsrs r0, r1
; V7M-NEXT: bx lr
;
; V7A-LABEL: bzhi32_d0:
; V7A: @ %bb.0:
; V7A-NEXT: rsb r1, r1, #32
; V7A-NEXT: lsl r0, r0, r1
; V7A-NEXT: lsr r0, r0, r1
; V7A-NEXT: bx lr
;
; V7A-T-LABEL: bzhi32_d0:
; V7A-T: @ %bb.0:
; V7A-T-NEXT: rsb.w r1, r1, #32
; V7A-T-NEXT: lsls r0, r1
; V7A-T-NEXT: lsrs r0, r1
; V7A-T-NEXT: bx lr
;
; V6M-LABEL: bzhi32_d0:
; V6M: @ %bb.0:
; V6M-NEXT: movs r2, #32
; V6M-NEXT: subs r1, r2, r1
; V6M-NEXT: lsls r0, r1
; V6M-NEXT: lsrs r0, r1
; V6M-NEXT: bx lr
%numhighbits = sub i32 32, %numlowbits
%highbitscleared = shl i32 %val, %numhighbits
%masked = lshr i32 %highbitscleared, %numhighbits
ret i32 %masked
}
define i32 @bzhi32_d1_indexzext(i32 %val, i8 %numlowbits) nounwind {
; V7M-LABEL: bzhi32_d1_indexzext:
; V7M: @ %bb.0:
; V7M-NEXT: rsb.w r1, r1, #32
; V7M-NEXT: uxtb r1, r1
; V7M-NEXT: lsls r0, r1
; V7M-NEXT: lsrs r0, r1
; V7M-NEXT: bx lr
;
; V7A-LABEL: bzhi32_d1_indexzext:
; V7A: @ %bb.0:
; V7A-NEXT: rsb r1, r1, #32
; V7A-NEXT: uxtb r1, r1
; V7A-NEXT: lsl r0, r0, r1
; V7A-NEXT: lsr r0, r0, r1
; V7A-NEXT: bx lr
;
; V7A-T-LABEL: bzhi32_d1_indexzext:
; V7A-T: @ %bb.0:
; V7A-T-NEXT: rsb.w r1, r1, #32
; V7A-T-NEXT: uxtb r1, r1
; V7A-T-NEXT: lsls r0, r1
; V7A-T-NEXT: lsrs r0, r1
; V7A-T-NEXT: bx lr
;
; V6M-LABEL: bzhi32_d1_indexzext:
; V6M: @ %bb.0:
; V6M-NEXT: movs r2, #32
; V6M-NEXT: subs r1, r2, r1
; V6M-NEXT: uxtb r1, r1
; V6M-NEXT: lsls r0, r1
; V6M-NEXT: lsrs r0, r1
; V6M-NEXT: bx lr
%numhighbits = sub i8 32, %numlowbits
%sh_prom = zext i8 %numhighbits to i32
%highbitscleared = shl i32 %val, %sh_prom
%masked = lshr i32 %highbitscleared, %sh_prom
ret i32 %masked
}
define i32 @bzhi32_d2_load(ptr %w, i32 %numlowbits) nounwind {
; V7M-LABEL: bzhi32_d2_load:
; V7M: @ %bb.0:
; V7M-NEXT: ldr r0, [r0]
; V7M-NEXT: rsb.w r1, r1, #32
; V7M-NEXT: lsls r0, r1
; V7M-NEXT: lsrs r0, r1
; V7M-NEXT: bx lr
;
; V7A-LABEL: bzhi32_d2_load:
; V7A: @ %bb.0:
; V7A-NEXT: ldr r0, [r0]
; V7A-NEXT: rsb r1, r1, #32
; V7A-NEXT: lsl r0, r0, r1
; V7A-NEXT: lsr r0, r0, r1
; V7A-NEXT: bx lr
;
; V7A-T-LABEL: bzhi32_d2_load:
; V7A-T: @ %bb.0:
; V7A-T-NEXT: ldr r0, [r0]
; V7A-T-NEXT: rsb.w r1, r1, #32
; V7A-T-NEXT: lsls r0, r1
; V7A-T-NEXT: lsrs r0, r1
; V7A-T-NEXT: bx lr
;
; V6M-LABEL: bzhi32_d2_load:
; V6M: @ %bb.0:
; V6M-NEXT: movs r2, #32
; V6M-NEXT: subs r1, r2, r1
; V6M-NEXT: ldr r0, [r0]
; V6M-NEXT: lsls r0, r1
; V6M-NEXT: lsrs r0, r1
; V6M-NEXT: bx lr
%val = load i32, ptr %w
%numhighbits = sub i32 32, %numlowbits
%highbitscleared = shl i32 %val, %numhighbits
%masked = lshr i32 %highbitscleared, %numhighbits
ret i32 %masked
}
define i32 @bzhi32_d3_load_indexzext(ptr %w, i8 %numlowbits) nounwind {
; V7M-LABEL: bzhi32_d3_load_indexzext:
; V7M: @ %bb.0:
; V7M-NEXT: rsb.w r1, r1, #32
; V7M-NEXT: ldr r0, [r0]
; V7M-NEXT: uxtb r1, r1
; V7M-NEXT: lsls r0, r1
; V7M-NEXT: lsrs r0, r1
; V7M-NEXT: bx lr
;
; V7A-LABEL: bzhi32_d3_load_indexzext:
; V7A: @ %bb.0:
; V7A-NEXT: rsb r1, r1, #32
; V7A-NEXT: ldr r0, [r0]
; V7A-NEXT: uxtb r1, r1
; V7A-NEXT: lsl r0, r0, r1
; V7A-NEXT: lsr r0, r0, r1
; V7A-NEXT: bx lr
;
; V7A-T-LABEL: bzhi32_d3_load_indexzext:
; V7A-T: @ %bb.0:
; V7A-T-NEXT: rsb.w r1, r1, #32
; V7A-T-NEXT: ldr r0, [r0]
; V7A-T-NEXT: uxtb r1, r1
; V7A-T-NEXT: lsls r0, r1
; V7A-T-NEXT: lsrs r0, r1
; V7A-T-NEXT: bx lr
;
; V6M-LABEL: bzhi32_d3_load_indexzext:
; V6M: @ %bb.0:
; V6M-NEXT: movs r2, #32
; V6M-NEXT: subs r1, r2, r1
; V6M-NEXT: uxtb r1, r1
; V6M-NEXT: ldr r0, [r0]
; V6M-NEXT: lsls r0, r1
; V6M-NEXT: lsrs r0, r1
; V6M-NEXT: bx lr
%val = load i32, ptr %w
%numhighbits = sub i8 32, %numlowbits
%sh_prom = zext i8 %numhighbits to i32
%highbitscleared = shl i32 %val, %sh_prom
%masked = lshr i32 %highbitscleared, %sh_prom
ret i32 %masked
}
; 64-bit.
define i64 @bzhi64_d0(i64 %val, i64 %numlowbits) nounwind {
; V7M-LABEL: bzhi64_d0:
; V7M: @ %bb.0:
; V7M-NEXT: .save {r7, lr}
; V7M-NEXT: push {r7, lr}
; V7M-NEXT: rsb.w r3, r2, #64
; V7M-NEXT: rsbs.w r2, r2, #32
; V7M-NEXT: rsb.w lr, r3, #32
; V7M-NEXT: lsl.w r12, r1, r3
; V7M-NEXT: lsr.w r1, r0, lr
; V7M-NEXT: orr.w r1, r1, r12
; V7M-NEXT: it pl
; V7M-NEXT: lslpl.w r1, r0, r2
; V7M-NEXT: lsl.w r0, r0, r3
; V7M-NEXT: it pl
; V7M-NEXT: movpl r0, #0
; V7M-NEXT: lsl.w r12, r1, lr
; V7M-NEXT: lsr.w r0, r0, r3
; V7M-NEXT: orr.w r0, r0, r12
; V7M-NEXT: it pl
; V7M-NEXT: lsrpl.w r0, r1, r2
; V7M-NEXT: lsr.w r1, r1, r3
; V7M-NEXT: it pl
; V7M-NEXT: movpl r1, #0
; V7M-NEXT: pop {r7, pc}
;
; V7A-LABEL: bzhi64_d0:
; V7A: @ %bb.0:
; V7A-NEXT: .save {r11, lr}
; V7A-NEXT: push {r11, lr}
; V7A-NEXT: rsb lr, r2, #64
; V7A-NEXT: rsbs r2, r2, #32
; V7A-NEXT: rsb r12, lr, #32
; V7A-NEXT: lsr r3, r0, r12
; V7A-NEXT: orr r1, r3, r1, lsl lr
; V7A-NEXT: lslpl r1, r0, r2
; V7A-NEXT: lsl r0, r0, lr
; V7A-NEXT: movwpl r0, #0
; V7A-NEXT: lsr r0, r0, lr
; V7A-NEXT: orr r0, r0, r1, lsl r12
; V7A-NEXT: lsrpl r0, r1, r2
; V7A-NEXT: lsr r1, r1, lr
; V7A-NEXT: movwpl r1, #0
; V7A-NEXT: pop {r11, pc}
;
; V7A-T-LABEL: bzhi64_d0:
; V7A-T: @ %bb.0:
; V7A-T-NEXT: .save {r7, lr}
; V7A-T-NEXT: push {r7, lr}
; V7A-T-NEXT: rsb.w r3, r2, #64
; V7A-T-NEXT: rsbs.w r2, r2, #32
; V7A-T-NEXT: rsb.w lr, r3, #32
; V7A-T-NEXT: lsl.w r12, r1, r3
; V7A-T-NEXT: lsr.w r1, r0, lr
; V7A-T-NEXT: orr.w r1, r1, r12
; V7A-T-NEXT: it pl
; V7A-T-NEXT: lslpl.w r1, r0, r2
; V7A-T-NEXT: lsl.w r0, r0, r3
; V7A-T-NEXT: it pl
; V7A-T-NEXT: movpl r0, #0
; V7A-T-NEXT: lsl.w r12, r1, lr
; V7A-T-NEXT: lsr.w r0, r0, r3
; V7A-T-NEXT: orr.w r0, r0, r12
; V7A-T-NEXT: it pl
; V7A-T-NEXT: lsrpl.w r0, r1, r2
; V7A-T-NEXT: lsr.w r1, r1, r3
; V7A-T-NEXT: it pl
; V7A-T-NEXT: movpl r1, #0
; V7A-T-NEXT: pop {r7, pc}
;
; V6M-LABEL: bzhi64_d0:
; V6M: @ %bb.0:
; V6M-NEXT: .save {r4, lr}
; V6M-NEXT: push {r4, lr}
; V6M-NEXT: movs r3, #64
; V6M-NEXT: subs r4, r3, r2
; V6M-NEXT: mov r2, r4
; V6M-NEXT: bl __aeabi_llsl
; V6M-NEXT: mov r2, r4
; V6M-NEXT: bl __aeabi_llsr
; V6M-NEXT: pop {r4, pc}
%numhighbits = sub i64 64, %numlowbits
%highbitscleared = shl i64 %val, %numhighbits
%masked = lshr i64 %highbitscleared, %numhighbits
ret i64 %masked
}
define i64 @bzhi64_d1_indexzext(i64 %val, i8 %numlowbits) nounwind {
; V7M-LABEL: bzhi64_d1_indexzext:
; V7M: @ %bb.0:
; V7M-NEXT: rsb.w r2, r2, #64
; V7M-NEXT: uxtb r2, r2
; V7M-NEXT: rsb.w r3, r2, #32
; V7M-NEXT: lsl.w r12, r1, r2
; V7M-NEXT: lsr.w r1, r0, r3
; V7M-NEXT: orr.w r1, r1, r12
; V7M-NEXT: subs.w r12, r2, #32
; V7M-NEXT: it pl
; V7M-NEXT: lslpl.w r1, r0, r12
; V7M-NEXT: lsl.w r0, r0, r2
; V7M-NEXT: it pl
; V7M-NEXT: movpl r0, #0
; V7M-NEXT: lsl.w r3, r1, r3
; V7M-NEXT: lsr.w r0, r0, r2
; V7M-NEXT: orr.w r0, r0, r3
; V7M-NEXT: it pl
; V7M-NEXT: lsrpl.w r0, r1, r12
; V7M-NEXT: lsr.w r1, r1, r2
; V7M-NEXT: it pl
; V7M-NEXT: movpl r1, #0
; V7M-NEXT: bx lr
;
; V7A-LABEL: bzhi64_d1_indexzext:
; V7A: @ %bb.0:
; V7A-NEXT: .save {r11, lr}
; V7A-NEXT: push {r11, lr}
; V7A-NEXT: rsb lr, r2, #64
; V7A-NEXT: uxtb r3, lr
; V7A-NEXT: rsb r12, r3, #32
; V7A-NEXT: lsr r2, r0, r12
; V7A-NEXT: orr r1, r2, r1, lsl r3
; V7A-NEXT: mvn r2, #31
; V7A-NEXT: uxtab r2, r2, lr
; V7A-NEXT: cmp r2, #0
; V7A-NEXT: lslpl r1, r0, r2
; V7A-NEXT: lsl r0, r0, r3
; V7A-NEXT: movwpl r0, #0
; V7A-NEXT: lsr r0, r0, r3
; V7A-NEXT: orr r0, r0, r1, lsl r12
; V7A-NEXT: lsrpl r0, r1, r2
; V7A-NEXT: lsr r1, r1, r3
; V7A-NEXT: movwpl r1, #0
; V7A-NEXT: pop {r11, pc}
;
; V7A-T-LABEL: bzhi64_d1_indexzext:
; V7A-T: @ %bb.0:
; V7A-T-NEXT: .save {r4, lr}
; V7A-T-NEXT: push {r4, lr}
; V7A-T-NEXT: rsb.w r4, r2, #64
; V7A-T-NEXT: mvn r2, #31
; V7A-T-NEXT: uxtb r3, r4
; V7A-T-NEXT: rsb.w lr, r3, #32
; V7A-T-NEXT: lsl.w r12, r1, r3
; V7A-T-NEXT: uxtab r2, r2, r4
; V7A-T-NEXT: lsr.w r1, r0, lr
; V7A-T-NEXT: cmp r2, #0
; V7A-T-NEXT: orr.w r1, r1, r12
; V7A-T-NEXT: it pl
; V7A-T-NEXT: lslpl.w r1, r0, r2
; V7A-T-NEXT: lsl.w r0, r0, r3
; V7A-T-NEXT: it pl
; V7A-T-NEXT: movpl r0, #0
; V7A-T-NEXT: lsl.w r4, r1, lr
; V7A-T-NEXT: lsr.w r0, r0, r3
; V7A-T-NEXT: orr.w r0, r0, r4
; V7A-T-NEXT: it pl
; V7A-T-NEXT: lsrpl.w r0, r1, r2
; V7A-T-NEXT: lsr.w r1, r1, r3
; V7A-T-NEXT: it pl
; V7A-T-NEXT: movpl r1, #0
; V7A-T-NEXT: pop {r4, pc}
;
; V6M-LABEL: bzhi64_d1_indexzext:
; V6M: @ %bb.0:
; V6M-NEXT: .save {r4, lr}
; V6M-NEXT: push {r4, lr}
; V6M-NEXT: movs r3, #64
; V6M-NEXT: subs r2, r3, r2
; V6M-NEXT: uxtb r4, r2
; V6M-NEXT: mov r2, r4
; V6M-NEXT: bl __aeabi_llsl
; V6M-NEXT: mov r2, r4
; V6M-NEXT: bl __aeabi_llsr
; V6M-NEXT: pop {r4, pc}
%numhighbits = sub i8 64, %numlowbits
%sh_prom = zext i8 %numhighbits to i64
%highbitscleared = shl i64 %val, %sh_prom
%masked = lshr i64 %highbitscleared, %sh_prom
ret i64 %masked
}
define i64 @bzhi64_d2_load(ptr %w, i64 %numlowbits) nounwind {
; V7M-LABEL: bzhi64_d2_load:
; V7M: @ %bb.0:
; V7M-NEXT: .save {r7, lr}
; V7M-NEXT: push {r7, lr}
; V7M-NEXT: rsb.w r1, r2, #64
; V7M-NEXT: ldrd r0, r3, [r0]
; V7M-NEXT: rsb.w lr, r1, #32
; V7M-NEXT: rsbs.w r2, r2, #32
; V7M-NEXT: lsl.w r12, r3, r1
; V7M-NEXT: lsr.w r3, r0, lr
; V7M-NEXT: orr.w r3, r3, r12
; V7M-NEXT: it pl
; V7M-NEXT: lslpl.w r3, r0, r2
; V7M-NEXT: lsl.w r0, r0, r1
; V7M-NEXT: it pl
; V7M-NEXT: movpl r0, #0
; V7M-NEXT: lsl.w r12, r3, lr
; V7M-NEXT: lsr.w r0, r0, r1
; V7M-NEXT: lsr.w r1, r3, r1
; V7M-NEXT: orr.w r0, r0, r12
; V7M-NEXT: it pl
; V7M-NEXT: lsrpl.w r0, r3, r2
; V7M-NEXT: it pl
; V7M-NEXT: movpl r1, #0
; V7M-NEXT: pop {r7, pc}
;
; V7A-LABEL: bzhi64_d2_load:
; V7A: @ %bb.0:
; V7A-NEXT: .save {r5, lr}
; V7A-NEXT: push {r5, lr}
; V7A-NEXT: rsb r3, r2, #64
; V7A-NEXT: ldm r0, {r0, r5}
; V7A-NEXT: rsb r12, r3, #32
; V7A-NEXT: rsbs r2, r2, #32
; V7A-NEXT: lsr r1, r0, r12
; V7A-NEXT: orr r1, r1, r5, lsl r3
; V7A-NEXT: lslpl r1, r0, r2
; V7A-NEXT: lsl r0, r0, r3
; V7A-NEXT: movwpl r0, #0
; V7A-NEXT: lsr r0, r0, r3
; V7A-NEXT: orr r0, r0, r1, lsl r12
; V7A-NEXT: lsrpl r0, r1, r2
; V7A-NEXT: lsr r1, r1, r3
; V7A-NEXT: movwpl r1, #0
; V7A-NEXT: pop {r5, pc}
;
; V7A-T-LABEL: bzhi64_d2_load:
; V7A-T: @ %bb.0:
; V7A-T-NEXT: .save {r7, lr}
; V7A-T-NEXT: push {r7, lr}
; V7A-T-NEXT: rsb.w r3, r2, #64
; V7A-T-NEXT: ldrd r0, r1, [r0]
; V7A-T-NEXT: rsb.w lr, r3, #32
; V7A-T-NEXT: rsbs.w r2, r2, #32
; V7A-T-NEXT: lsl.w r12, r1, r3
; V7A-T-NEXT: lsr.w r1, r0, lr
; V7A-T-NEXT: orr.w r1, r1, r12
; V7A-T-NEXT: it pl
; V7A-T-NEXT: lslpl.w r1, r0, r2
; V7A-T-NEXT: lsl.w r0, r0, r3
; V7A-T-NEXT: it pl
; V7A-T-NEXT: movpl r0, #0
; V7A-T-NEXT: lsl.w r12, r1, lr
; V7A-T-NEXT: lsr.w r0, r0, r3
; V7A-T-NEXT: orr.w r0, r0, r12
; V7A-T-NEXT: it pl
; V7A-T-NEXT: lsrpl.w r0, r1, r2
; V7A-T-NEXT: lsr.w r1, r1, r3
; V7A-T-NEXT: it pl
; V7A-T-NEXT: movpl r1, #0
; V7A-T-NEXT: pop {r7, pc}
;
; V6M-LABEL: bzhi64_d2_load:
; V6M: @ %bb.0:
; V6M-NEXT: .save {r4, lr}
; V6M-NEXT: push {r4, lr}
; V6M-NEXT: movs r1, #64
; V6M-NEXT: subs r4, r1, r2
; V6M-NEXT: ldr r2, [r0]
; V6M-NEXT: ldr r1, [r0, #4]
; V6M-NEXT: mov r0, r2
; V6M-NEXT: mov r2, r4
; V6M-NEXT: bl __aeabi_llsl
; V6M-NEXT: mov r2, r4
; V6M-NEXT: bl __aeabi_llsr
; V6M-NEXT: pop {r4, pc}
%val = load i64, ptr %w
%numhighbits = sub i64 64, %numlowbits
%highbitscleared = shl i64 %val, %numhighbits
%masked = lshr i64 %highbitscleared, %numhighbits
ret i64 %masked
}
define i64 @bzhi64_d3_load_indexzext(ptr %w, i8 %numlowbits) nounwind {
; V7M-LABEL: bzhi64_d3_load_indexzext:
; V7M: @ %bb.0:
; V7M-NEXT: rsb.w r1, r1, #64
; V7M-NEXT: ldrd r0, r2, [r0]
; V7M-NEXT: uxtb r1, r1
; V7M-NEXT: rsb.w r3, r1, #32
; V7M-NEXT: lsl.w r12, r2, r1
; V7M-NEXT: lsr.w r2, r0, r3
; V7M-NEXT: orr.w r2, r2, r12
; V7M-NEXT: subs.w r12, r1, #32
; V7M-NEXT: it pl
; V7M-NEXT: lslpl.w r2, r0, r12
; V7M-NEXT: lsl.w r0, r0, r1
; V7M-NEXT: it pl
; V7M-NEXT: movpl r0, #0
; V7M-NEXT: lsl.w r3, r2, r3
; V7M-NEXT: lsr.w r0, r0, r1
; V7M-NEXT: lsr.w r1, r2, r1
; V7M-NEXT: orr.w r0, r0, r3
; V7M-NEXT: it pl
; V7M-NEXT: lsrpl.w r0, r2, r12
; V7M-NEXT: it pl
; V7M-NEXT: movpl r1, #0
; V7M-NEXT: bx lr
;
; V7A-LABEL: bzhi64_d3_load_indexzext:
; V7A: @ %bb.0:
; V7A-NEXT: .save {r5, lr}
; V7A-NEXT: push {r5, lr}
; V7A-NEXT: rsb r1, r1, #64
; V7A-NEXT: ldm r0, {r0, r5}
; V7A-NEXT: uxtb r2, r1
; V7A-NEXT: rsb r12, r2, #32
; V7A-NEXT: lsr r3, r0, r12
; V7A-NEXT: orr r3, r3, r5, lsl r2
; V7A-NEXT: mvn r5, #31
; V7A-NEXT: uxtab r1, r5, r1
; V7A-NEXT: cmp r1, #0
; V7A-NEXT: lslpl r3, r0, r1
; V7A-NEXT: lsl r0, r0, r2
; V7A-NEXT: movwpl r0, #0
; V7A-NEXT: lsr r0, r0, r2
; V7A-NEXT: orr r0, r0, r3, lsl r12
; V7A-NEXT: lsrpl r0, r3, r1
; V7A-NEXT: lsr r1, r3, r2
; V7A-NEXT: movwpl r1, #0
; V7A-NEXT: pop {r5, pc}
;
; V7A-T-LABEL: bzhi64_d3_load_indexzext:
; V7A-T: @ %bb.0:
; V7A-T-NEXT: .save {r4, lr}
; V7A-T-NEXT: push {r4, lr}
; V7A-T-NEXT: rsb.w r4, r1, #64
; V7A-T-NEXT: ldrd r0, r2, [r0]
; V7A-T-NEXT: mvn r1, #31
; V7A-T-NEXT: uxtb r3, r4
; V7A-T-NEXT: rsb.w lr, r3, #32
; V7A-T-NEXT: lsl.w r12, r2, r3
; V7A-T-NEXT: uxtab r1, r1, r4
; V7A-T-NEXT: lsr.w r2, r0, lr
; V7A-T-NEXT: cmp r1, #0
; V7A-T-NEXT: orr.w r2, r2, r12
; V7A-T-NEXT: it pl
; V7A-T-NEXT: lslpl.w r2, r0, r1
; V7A-T-NEXT: lsl.w r0, r0, r3
; V7A-T-NEXT: it pl
; V7A-T-NEXT: movpl r0, #0
; V7A-T-NEXT: lsl.w r4, r2, lr
; V7A-T-NEXT: lsr.w r0, r0, r3
; V7A-T-NEXT: orr.w r0, r0, r4
; V7A-T-NEXT: it pl
; V7A-T-NEXT: lsrpl.w r0, r2, r1
; V7A-T-NEXT: lsr.w r1, r2, r3
; V7A-T-NEXT: it pl
; V7A-T-NEXT: movpl r1, #0
; V7A-T-NEXT: pop {r4, pc}
;
; V6M-LABEL: bzhi64_d3_load_indexzext:
; V6M: @ %bb.0:
; V6M-NEXT: .save {r4, lr}
; V6M-NEXT: push {r4, lr}
; V6M-NEXT: movs r2, #64
; V6M-NEXT: subs r1, r2, r1
; V6M-NEXT: uxtb r4, r1
; V6M-NEXT: ldr r2, [r0]
; V6M-NEXT: ldr r1, [r0, #4]
; V6M-NEXT: mov r0, r2
; V6M-NEXT: mov r2, r4
; V6M-NEXT: bl __aeabi_llsl
; V6M-NEXT: mov r2, r4
; V6M-NEXT: bl __aeabi_llsr
; V6M-NEXT: pop {r4, pc}
%val = load i64, ptr %w
%numhighbits = sub i8 64, %numlowbits
%sh_prom = zext i8 %numhighbits to i64
%highbitscleared = shl i64 %val, %sh_prom
%masked = lshr i64 %highbitscleared, %sh_prom
ret i64 %masked
}
; ---------------------------------------------------------------------------- ;
; Constant mask
; ---------------------------------------------------------------------------- ;
; 32-bit
define i32 @bzhi32_constant_mask32(i32 %val) nounwind {
; V7M-LABEL: bzhi32_constant_mask32:
; V7M: @ %bb.0:
; V7M-NEXT: bic r0, r0, #-2147483648
; V7M-NEXT: bx lr
;
; V7A-LABEL: bzhi32_constant_mask32:
; V7A: @ %bb.0:
; V7A-NEXT: bic r0, r0, #-2147483648
; V7A-NEXT: bx lr
;
; V7A-T-LABEL: bzhi32_constant_mask32:
; V7A-T: @ %bb.0:
; V7A-T-NEXT: bic r0, r0, #-2147483648
; V7A-T-NEXT: bx lr
;
; V6M-LABEL: bzhi32_constant_mask32:
; V6M: @ %bb.0:
; V6M-NEXT: movs r1, #1
; V6M-NEXT: lsls r1, r1, #31
; V6M-NEXT: bics r0, r1
; V6M-NEXT: bx lr
%masked = and i32 %val, 2147483647
ret i32 %masked
}
define i32 @bzhi32_constant_mask32_load(ptr %val) nounwind {
; V7M-LABEL: bzhi32_constant_mask32_load:
; V7M: @ %bb.0:
; V7M-NEXT: ldr r0, [r0]
; V7M-NEXT: bic r0, r0, #-2147483648
; V7M-NEXT: bx lr
;
; V7A-LABEL: bzhi32_constant_mask32_load:
; V7A: @ %bb.0:
; V7A-NEXT: ldr r0, [r0]
; V7A-NEXT: bic r0, r0, #-2147483648
; V7A-NEXT: bx lr
;
; V7A-T-LABEL: bzhi32_constant_mask32_load:
; V7A-T: @ %bb.0:
; V7A-T-NEXT: ldr r0, [r0]
; V7A-T-NEXT: bic r0, r0, #-2147483648
; V7A-T-NEXT: bx lr
;
; V6M-LABEL: bzhi32_constant_mask32_load:
; V6M: @ %bb.0:
; V6M-NEXT: movs r1, #1
; V6M-NEXT: lsls r1, r1, #31
; V6M-NEXT: ldr r0, [r0]
; V6M-NEXT: bics r0, r1
; V6M-NEXT: bx lr
%val1 = load i32, ptr %val
%masked = and i32 %val1, 2147483647
ret i32 %masked
}
define i32 @bzhi32_constant_mask16(i32 %val) nounwind {
; V7M-LABEL: bzhi32_constant_mask16:
; V7M: @ %bb.0:
; V7M-NEXT: bfc r0, #15, #17
; V7M-NEXT: bx lr
;
; V7A-LABEL: bzhi32_constant_mask16:
; V7A: @ %bb.0:
; V7A-NEXT: bfc r0, #15, #17
; V7A-NEXT: bx lr
;
; V7A-T-LABEL: bzhi32_constant_mask16:
; V7A-T: @ %bb.0:
; V7A-T-NEXT: bfc r0, #15, #17
; V7A-T-NEXT: bx lr
;
; V6M-LABEL: bzhi32_constant_mask16:
; V6M: @ %bb.0:
; V6M-NEXT: ldr r1, .LCPI41_0
; V6M-NEXT: ands r0, r1
; V6M-NEXT: bx lr
; V6M-NEXT: .p2align 2
; V6M-NEXT: @ %bb.1:
; V6M-NEXT: .LCPI41_0:
; V6M-NEXT: .long 32767 @ 0x7fff
%masked = and i32 %val, 32767
ret i32 %masked
}
define i32 @bzhi32_constant_mask16_load(ptr %val) nounwind {
; V7M-LABEL: bzhi32_constant_mask16_load:
; V7M: @ %bb.0:
; V7M-NEXT: ldr r0, [r0]
; V7M-NEXT: bfc r0, #15, #17
; V7M-NEXT: bx lr
;
; V7A-LABEL: bzhi32_constant_mask16_load:
; V7A: @ %bb.0:
; V7A-NEXT: ldr r0, [r0]
; V7A-NEXT: bfc r0, #15, #17
; V7A-NEXT: bx lr
;
; V7A-T-LABEL: bzhi32_constant_mask16_load:
; V7A-T: @ %bb.0:
; V7A-T-NEXT: ldr r0, [r0]
; V7A-T-NEXT: bfc r0, #15, #17
; V7A-T-NEXT: bx lr
;
; V6M-LABEL: bzhi32_constant_mask16_load:
; V6M: @ %bb.0:
; V6M-NEXT: ldr r1, [r0]
; V6M-NEXT: ldr r0, .LCPI42_0
; V6M-NEXT: ands r0, r1
; V6M-NEXT: bx lr
; V6M-NEXT: .p2align 2
; V6M-NEXT: @ %bb.1:
; V6M-NEXT: .LCPI42_0:
; V6M-NEXT: .long 32767 @ 0x7fff
%val1 = load i32, ptr %val
%masked = and i32 %val1, 32767
ret i32 %masked
}
define i32 @bzhi32_constant_mask8(i32 %val) nounwind {
; V7M-LABEL: bzhi32_constant_mask8:
; V7M: @ %bb.0:
; V7M-NEXT: and r0, r0, #127
; V7M-NEXT: bx lr
;
; V7A-LABEL: bzhi32_constant_mask8:
; V7A: @ %bb.0:
; V7A-NEXT: and r0, r0, #127
; V7A-NEXT: bx lr
;
; V7A-T-LABEL: bzhi32_constant_mask8:
; V7A-T: @ %bb.0:
; V7A-T-NEXT: and r0, r0, #127
; V7A-T-NEXT: bx lr
;
; V6M-LABEL: bzhi32_constant_mask8:
; V6M: @ %bb.0:
; V6M-NEXT: movs r1, #127
; V6M-NEXT: ands r0, r1
; V6M-NEXT: bx lr
%masked = and i32 %val, 127
ret i32 %masked
}
define i32 @bzhi32_constant_mask8_load(ptr %val) nounwind {
; V7M-LABEL: bzhi32_constant_mask8_load:
; V7M: @ %bb.0:
; V7M-NEXT: ldr r0, [r0]
; V7M-NEXT: and r0, r0, #127
; V7M-NEXT: bx lr
;
; V7A-LABEL: bzhi32_constant_mask8_load:
; V7A: @ %bb.0:
; V7A-NEXT: ldr r0, [r0]
; V7A-NEXT: and r0, r0, #127
; V7A-NEXT: bx lr
;
; V7A-T-LABEL: bzhi32_constant_mask8_load:
; V7A-T: @ %bb.0:
; V7A-T-NEXT: ldr r0, [r0]
; V7A-T-NEXT: and r0, r0, #127
; V7A-T-NEXT: bx lr
;
; V6M-LABEL: bzhi32_constant_mask8_load:
; V6M: @ %bb.0:
; V6M-NEXT: ldr r1, [r0]
; V6M-NEXT: movs r0, #127
; V6M-NEXT: ands r0, r1
; V6M-NEXT: bx lr
%val1 = load i32, ptr %val
%masked = and i32 %val1, 127
ret i32 %masked
}
; 64-bit
define i64 @bzhi64_constant_mask64(i64 %val) nounwind {
; V7M-LABEL: bzhi64_constant_mask64:
; V7M: @ %bb.0:
; V7M-NEXT: bic r1, r1, #-1073741824
; V7M-NEXT: bx lr
;
; V7A-LABEL: bzhi64_constant_mask64:
; V7A: @ %bb.0:
; V7A-NEXT: bic r1, r1, #-1073741824
; V7A-NEXT: bx lr
;
; V7A-T-LABEL: bzhi64_constant_mask64:
; V7A-T: @ %bb.0:
; V7A-T-NEXT: bic r1, r1, #-1073741824
; V7A-T-NEXT: bx lr
;
; V6M-LABEL: bzhi64_constant_mask64:
; V6M: @ %bb.0:
; V6M-NEXT: movs r2, #3
; V6M-NEXT: lsls r2, r2, #30
; V6M-NEXT: bics r1, r2
; V6M-NEXT: bx lr
%masked = and i64 %val, 4611686018427387903
ret i64 %masked
}
define i64 @bzhi64_constant_mask64_load(ptr %val) nounwind {
; V7M-LABEL: bzhi64_constant_mask64_load:
; V7M: @ %bb.0:
; V7M-NEXT: ldrd r0, r1, [r0]
; V7M-NEXT: bic r1, r1, #-1073741824
; V7M-NEXT: bx lr
;
; V7A-LABEL: bzhi64_constant_mask64_load:
; V7A: @ %bb.0:
; V7A-NEXT: ldrd r0, r1, [r0]
; V7A-NEXT: bic r1, r1, #-1073741824
; V7A-NEXT: bx lr
;
; V7A-T-LABEL: bzhi64_constant_mask64_load:
; V7A-T: @ %bb.0:
; V7A-T-NEXT: ldrd r0, r1, [r0]
; V7A-T-NEXT: bic r1, r1, #-1073741824
; V7A-T-NEXT: bx lr
;
; V6M-LABEL: bzhi64_constant_mask64_load:
; V6M: @ %bb.0:
; V6M-NEXT: movs r1, #3
; V6M-NEXT: lsls r3, r1, #30
; V6M-NEXT: ldr r2, [r0]
; V6M-NEXT: ldr r1, [r0, #4]
; V6M-NEXT: bics r1, r3
; V6M-NEXT: mov r0, r2
; V6M-NEXT: bx lr
%val1 = load i64, ptr %val
%masked = and i64 %val1, 4611686018427387903
ret i64 %masked
}
define i64 @bzhi64_constant_mask32(i64 %val) nounwind {
; V7M-LABEL: bzhi64_constant_mask32:
; V7M: @ %bb.0:
; V7M-NEXT: bic r0, r0, #-2147483648
; V7M-NEXT: movs r1, #0
; V7M-NEXT: bx lr
;
; V7A-LABEL: bzhi64_constant_mask32:
; V7A: @ %bb.0:
; V7A-NEXT: bic r0, r0, #-2147483648
; V7A-NEXT: mov r1, #0
; V7A-NEXT: bx lr
;
; V7A-T-LABEL: bzhi64_constant_mask32:
; V7A-T: @ %bb.0:
; V7A-T-NEXT: bic r0, r0, #-2147483648
; V7A-T-NEXT: movs r1, #0
; V7A-T-NEXT: bx lr
;
; V6M-LABEL: bzhi64_constant_mask32:
; V6M: @ %bb.0:
; V6M-NEXT: movs r1, #1
; V6M-NEXT: lsls r1, r1, #31
; V6M-NEXT: bics r0, r1
; V6M-NEXT: movs r1, #0
; V6M-NEXT: bx lr
%masked = and i64 %val, 2147483647
ret i64 %masked
}
define i64 @bzhi64_constant_mask32_load(ptr %val) nounwind {
; V7M-LABEL: bzhi64_constant_mask32_load:
; V7M: @ %bb.0:
; V7M-NEXT: ldr r0, [r0]
; V7M-NEXT: movs r1, #0
; V7M-NEXT: bic r0, r0, #-2147483648
; V7M-NEXT: bx lr
;
; V7A-LABEL: bzhi64_constant_mask32_load:
; V7A: @ %bb.0:
; V7A-NEXT: ldr r0, [r0]
; V7A-NEXT: mov r1, #0
; V7A-NEXT: bic r0, r0, #-2147483648
; V7A-NEXT: bx lr
;
; V7A-T-LABEL: bzhi64_constant_mask32_load:
; V7A-T: @ %bb.0:
; V7A-T-NEXT: ldr r0, [r0]
; V7A-T-NEXT: movs r1, #0
; V7A-T-NEXT: bic r0, r0, #-2147483648
; V7A-T-NEXT: bx lr
;
; V6M-LABEL: bzhi64_constant_mask32_load:
; V6M: @ %bb.0:
; V6M-NEXT: movs r1, #1
; V6M-NEXT: lsls r1, r1, #31
; V6M-NEXT: ldr r0, [r0]
; V6M-NEXT: bics r0, r1
; V6M-NEXT: movs r1, #0
; V6M-NEXT: bx lr
%val1 = load i64, ptr %val
%masked = and i64 %val1, 2147483647
ret i64 %masked
}
define i64 @bzhi64_constant_mask16(i64 %val) nounwind {
; V7M-LABEL: bzhi64_constant_mask16:
; V7M: @ %bb.0:
; V7M-NEXT: bfc r0, #15, #17
; V7M-NEXT: movs r1, #0
; V7M-NEXT: bx lr
;
; V7A-LABEL: bzhi64_constant_mask16:
; V7A: @ %bb.0:
; V7A-NEXT: bfc r0, #15, #17
; V7A-NEXT: mov r1, #0
; V7A-NEXT: bx lr
;
; V7A-T-LABEL: bzhi64_constant_mask16:
; V7A-T: @ %bb.0:
; V7A-T-NEXT: bfc r0, #15, #17
; V7A-T-NEXT: movs r1, #0
; V7A-T-NEXT: bx lr
;
; V6M-LABEL: bzhi64_constant_mask16:
; V6M: @ %bb.0:
; V6M-NEXT: ldr r1, .LCPI49_0
; V6M-NEXT: ands r0, r1
; V6M-NEXT: movs r1, #0
; V6M-NEXT: bx lr
; V6M-NEXT: .p2align 2
; V6M-NEXT: @ %bb.1:
; V6M-NEXT: .LCPI49_0:
; V6M-NEXT: .long 32767 @ 0x7fff
%masked = and i64 %val, 32767
ret i64 %masked
}
define i64 @bzhi64_constant_mask16_load(ptr %val) nounwind {
; V7M-LABEL: bzhi64_constant_mask16_load:
; V7M: @ %bb.0:
; V7M-NEXT: ldr r0, [r0]
; V7M-NEXT: movs r1, #0
; V7M-NEXT: bfc r0, #15, #17
; V7M-NEXT: bx lr
;
; V7A-LABEL: bzhi64_constant_mask16_load:
; V7A: @ %bb.0:
; V7A-NEXT: ldr r0, [r0]
; V7A-NEXT: mov r1, #0
; V7A-NEXT: bfc r0, #15, #17
; V7A-NEXT: bx lr
;
; V7A-T-LABEL: bzhi64_constant_mask16_load:
; V7A-T: @ %bb.0:
; V7A-T-NEXT: ldr r0, [r0]
; V7A-T-NEXT: movs r1, #0
; V7A-T-NEXT: bfc r0, #15, #17
; V7A-T-NEXT: bx lr
;
; V6M-LABEL: bzhi64_constant_mask16_load:
; V6M: @ %bb.0:
; V6M-NEXT: ldr r1, [r0]
; V6M-NEXT: ldr r0, .LCPI50_0
; V6M-NEXT: ands r0, r1
; V6M-NEXT: movs r1, #0
; V6M-NEXT: bx lr
; V6M-NEXT: .p2align 2
; V6M-NEXT: @ %bb.1:
; V6M-NEXT: .LCPI50_0:
; V6M-NEXT: .long 32767 @ 0x7fff
%val1 = load i64, ptr %val
%masked = and i64 %val1, 32767
ret i64 %masked
}
define i64 @bzhi64_constant_mask8(i64 %val) nounwind {
; V7M-LABEL: bzhi64_constant_mask8:
; V7M: @ %bb.0:
; V7M-NEXT: and r0, r0, #127
; V7M-NEXT: movs r1, #0
; V7M-NEXT: bx lr
;
; V7A-LABEL: bzhi64_constant_mask8:
; V7A: @ %bb.0:
; V7A-NEXT: and r0, r0, #127
; V7A-NEXT: mov r1, #0
; V7A-NEXT: bx lr
;
; V7A-T-LABEL: bzhi64_constant_mask8:
; V7A-T: @ %bb.0:
; V7A-T-NEXT: and r0, r0, #127
; V7A-T-NEXT: movs r1, #0
; V7A-T-NEXT: bx lr
;
; V6M-LABEL: bzhi64_constant_mask8:
; V6M: @ %bb.0:
; V6M-NEXT: movs r1, #127
; V6M-NEXT: ands r0, r1
; V6M-NEXT: movs r1, #0
; V6M-NEXT: bx lr
%masked = and i64 %val, 127
ret i64 %masked
}
define i64 @bzhi64_constant_mask8_load(ptr %val) nounwind {
; V7M-LABEL: bzhi64_constant_mask8_load:
; V7M: @ %bb.0:
; V7M-NEXT: ldr r0, [r0]
; V7M-NEXT: movs r1, #0
; V7M-NEXT: and r0, r0, #127
; V7M-NEXT: bx lr
;
; V7A-LABEL: bzhi64_constant_mask8_load:
; V7A: @ %bb.0:
; V7A-NEXT: ldr r0, [r0]
; V7A-NEXT: mov r1, #0
; V7A-NEXT: and r0, r0, #127
; V7A-NEXT: bx lr
;
; V7A-T-LABEL: bzhi64_constant_mask8_load:
; V7A-T: @ %bb.0:
; V7A-T-NEXT: ldr r0, [r0]
; V7A-T-NEXT: movs r1, #0
; V7A-T-NEXT: and r0, r0, #127
; V7A-T-NEXT: bx lr
;
; V6M-LABEL: bzhi64_constant_mask8_load:
; V6M: @ %bb.0:
; V6M-NEXT: ldr r1, [r0]
; V6M-NEXT: movs r0, #127
; V6M-NEXT: ands r0, r1
; V6M-NEXT: movs r1, #0
; V6M-NEXT: bx lr
%val1 = load i64, ptr %val
%masked = and i64 %val1, 127
ret i64 %masked
}