llvm-project/llvm/test/CodeGen/RISCV/wide-scalar-shift-by-byte-multiple-legalization.ll
Ruiling, Song 0487db1f13
MachineScheduler: Improve instruction clustering (#137784)
The existing way of managing clustered nodes was done through adding
weak edges between the neighbouring cluster nodes, which is a sort of
ordered queue. And this will be later recorded as `NextClusterPred` or
`NextClusterSucc` in `ScheduleDAGMI`.

But actually the instruction may be picked not in the exact order of the
queue. For example, we have a queue of cluster nodes A B C. But during
scheduling, node B might be picked first, then it will be very likely
that we only cluster B and C for Top-Down scheduling (leaving A alone).

Another issue is:
```
   if (!ReorderWhileClustering && SUa->NodeNum > SUb->NodeNum)
      std::swap(SUa, SUb);
   if (!DAG->addEdge(SUb, SDep(SUa, SDep::Cluster)))
```
may break the cluster queue.

For example, we want to cluster nodes (order as in `MemOpRecords`): 1 3
2. 1(SUa) will be pred of 3(SUb) normally. But when it comes to (3, 2),
As 3(SUa) > 2(SUb), we would reorder the two nodes, which makes 2 be
pred of 3. This makes both 1 and 2 become preds of 3, but there is no
edge between 1 and 2. Thus we get a broken cluster chain.

To fix both issues, we introduce an unordered set in the change. This
could help improve clustering in some hard case.

One key reason the change causes so many test check changes is: As the
cluster candidates are not ordered now, the candidates might be picked
in different order from before.

The most affected targets are: AMDGPU, AArch64, RISCV.

For RISCV, it seems to me most are just minor instruction reorder, don't
see obvious regression.

For AArch64, there were some combining of ldr into ldp being affected.
With two cases being regressed and two being improved. This has more
deeper reason that machine scheduler cannot cluster them well both
before and after the change, and the load combine algorithm later is
also not smart enough.

For AMDGPU, some cases have more v_dual instructions used while some are
regressed. It seems less critical. Seems like test `v_vselect_v32bf16`
gets more buffer_load being claused.
2025-06-05 15:28:04 +08:00

5922 lines
192 KiB
LLVM

; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc -mtriple=riscv64 -verify-machineinstrs < %s | FileCheck %s -check-prefixes=RV64I
; RUN: llc -mtriple=riscv32 -verify-machineinstrs < %s | FileCheck %s -check-prefixes=RV32I
define void @lshr_4bytes(ptr %src.ptr, ptr %byteOff.ptr, ptr %dst) nounwind {
; RV64I-LABEL: lshr_4bytes:
; RV64I: # %bb.0:
; RV64I-NEXT: lbu a3, 0(a0)
; RV64I-NEXT: lbu a4, 1(a0)
; RV64I-NEXT: lbu a5, 2(a0)
; RV64I-NEXT: lb a0, 3(a0)
; RV64I-NEXT: lbu a1, 0(a1)
; RV64I-NEXT: slli a4, a4, 8
; RV64I-NEXT: or a3, a4, a3
; RV64I-NEXT: slli a5, a5, 16
; RV64I-NEXT: slli a0, a0, 24
; RV64I-NEXT: slli a1, a1, 3
; RV64I-NEXT: or a0, a0, a5
; RV64I-NEXT: or a0, a0, a3
; RV64I-NEXT: srlw a0, a0, a1
; RV64I-NEXT: srli a1, a0, 16
; RV64I-NEXT: srli a3, a0, 24
; RV64I-NEXT: srli a4, a0, 8
; RV64I-NEXT: sb a0, 0(a2)
; RV64I-NEXT: sb a4, 1(a2)
; RV64I-NEXT: sb a1, 2(a2)
; RV64I-NEXT: sb a3, 3(a2)
; RV64I-NEXT: ret
;
; RV32I-LABEL: lshr_4bytes:
; RV32I: # %bb.0:
; RV32I-NEXT: lbu a3, 1(a0)
; RV32I-NEXT: lbu a4, 2(a0)
; RV32I-NEXT: lbu a5, 3(a0)
; RV32I-NEXT: lbu a0, 0(a0)
; RV32I-NEXT: slli a3, a3, 8
; RV32I-NEXT: slli a4, a4, 16
; RV32I-NEXT: slli a5, a5, 24
; RV32I-NEXT: or a0, a3, a0
; RV32I-NEXT: or a4, a5, a4
; RV32I-NEXT: lbu a3, 1(a1)
; RV32I-NEXT: lbu a5, 0(a1)
; RV32I-NEXT: lbu a6, 2(a1)
; RV32I-NEXT: lbu a1, 3(a1)
; RV32I-NEXT: slli a3, a3, 8
; RV32I-NEXT: or a3, a3, a5
; RV32I-NEXT: slli a6, a6, 16
; RV32I-NEXT: slli a1, a1, 24
; RV32I-NEXT: or a1, a1, a6
; RV32I-NEXT: or a0, a4, a0
; RV32I-NEXT: or a1, a1, a3
; RV32I-NEXT: slli a1, a1, 3
; RV32I-NEXT: srl a0, a0, a1
; RV32I-NEXT: srli a1, a0, 16
; RV32I-NEXT: srli a3, a0, 24
; RV32I-NEXT: srli a4, a0, 8
; RV32I-NEXT: sb a0, 0(a2)
; RV32I-NEXT: sb a4, 1(a2)
; RV32I-NEXT: sb a1, 2(a2)
; RV32I-NEXT: sb a3, 3(a2)
; RV32I-NEXT: ret
%src = load i32, ptr %src.ptr, align 1
%byteOff = load i32, ptr %byteOff.ptr, align 1
%bitOff = shl i32 %byteOff, 3
%res = lshr i32 %src, %bitOff
store i32 %res, ptr %dst, align 1
ret void
}
define void @shl_4bytes(ptr %src.ptr, ptr %byteOff.ptr, ptr %dst) nounwind {
; RV64I-LABEL: shl_4bytes:
; RV64I: # %bb.0:
; RV64I-NEXT: lbu a3, 0(a0)
; RV64I-NEXT: lbu a4, 1(a0)
; RV64I-NEXT: lbu a5, 2(a0)
; RV64I-NEXT: lb a0, 3(a0)
; RV64I-NEXT: lbu a1, 0(a1)
; RV64I-NEXT: slli a4, a4, 8
; RV64I-NEXT: or a3, a4, a3
; RV64I-NEXT: slli a5, a5, 16
; RV64I-NEXT: slli a0, a0, 24
; RV64I-NEXT: slli a1, a1, 3
; RV64I-NEXT: or a0, a0, a5
; RV64I-NEXT: or a0, a0, a3
; RV64I-NEXT: sllw a0, a0, a1
; RV64I-NEXT: srli a1, a0, 16
; RV64I-NEXT: srli a3, a0, 24
; RV64I-NEXT: srli a4, a0, 8
; RV64I-NEXT: sb a0, 0(a2)
; RV64I-NEXT: sb a4, 1(a2)
; RV64I-NEXT: sb a1, 2(a2)
; RV64I-NEXT: sb a3, 3(a2)
; RV64I-NEXT: ret
;
; RV32I-LABEL: shl_4bytes:
; RV32I: # %bb.0:
; RV32I-NEXT: lbu a3, 1(a0)
; RV32I-NEXT: lbu a4, 2(a0)
; RV32I-NEXT: lbu a5, 3(a0)
; RV32I-NEXT: lbu a0, 0(a0)
; RV32I-NEXT: slli a3, a3, 8
; RV32I-NEXT: slli a4, a4, 16
; RV32I-NEXT: slli a5, a5, 24
; RV32I-NEXT: or a0, a3, a0
; RV32I-NEXT: or a4, a5, a4
; RV32I-NEXT: lbu a3, 1(a1)
; RV32I-NEXT: lbu a5, 0(a1)
; RV32I-NEXT: lbu a6, 2(a1)
; RV32I-NEXT: lbu a1, 3(a1)
; RV32I-NEXT: slli a3, a3, 8
; RV32I-NEXT: or a3, a3, a5
; RV32I-NEXT: slli a6, a6, 16
; RV32I-NEXT: slli a1, a1, 24
; RV32I-NEXT: or a1, a1, a6
; RV32I-NEXT: or a0, a4, a0
; RV32I-NEXT: or a1, a1, a3
; RV32I-NEXT: slli a1, a1, 3
; RV32I-NEXT: sll a0, a0, a1
; RV32I-NEXT: srli a1, a0, 16
; RV32I-NEXT: srli a3, a0, 24
; RV32I-NEXT: srli a4, a0, 8
; RV32I-NEXT: sb a0, 0(a2)
; RV32I-NEXT: sb a4, 1(a2)
; RV32I-NEXT: sb a1, 2(a2)
; RV32I-NEXT: sb a3, 3(a2)
; RV32I-NEXT: ret
%src = load i32, ptr %src.ptr, align 1
%byteOff = load i32, ptr %byteOff.ptr, align 1
%bitOff = shl i32 %byteOff, 3
%res = shl i32 %src, %bitOff
store i32 %res, ptr %dst, align 1
ret void
}
define void @ashr_4bytes(ptr %src.ptr, ptr %byteOff.ptr, ptr %dst) nounwind {
; RV64I-LABEL: ashr_4bytes:
; RV64I: # %bb.0:
; RV64I-NEXT: lbu a3, 0(a0)
; RV64I-NEXT: lbu a4, 1(a0)
; RV64I-NEXT: lbu a5, 2(a0)
; RV64I-NEXT: lb a0, 3(a0)
; RV64I-NEXT: lbu a1, 0(a1)
; RV64I-NEXT: slli a4, a4, 8
; RV64I-NEXT: or a3, a4, a3
; RV64I-NEXT: slli a5, a5, 16
; RV64I-NEXT: slli a0, a0, 24
; RV64I-NEXT: slli a1, a1, 3
; RV64I-NEXT: or a0, a0, a5
; RV64I-NEXT: or a0, a0, a3
; RV64I-NEXT: sraw a0, a0, a1
; RV64I-NEXT: srli a1, a0, 16
; RV64I-NEXT: srli a3, a0, 24
; RV64I-NEXT: srli a4, a0, 8
; RV64I-NEXT: sb a0, 0(a2)
; RV64I-NEXT: sb a4, 1(a2)
; RV64I-NEXT: sb a1, 2(a2)
; RV64I-NEXT: sb a3, 3(a2)
; RV64I-NEXT: ret
;
; RV32I-LABEL: ashr_4bytes:
; RV32I: # %bb.0:
; RV32I-NEXT: lbu a3, 1(a0)
; RV32I-NEXT: lbu a4, 2(a0)
; RV32I-NEXT: lbu a5, 3(a0)
; RV32I-NEXT: lbu a0, 0(a0)
; RV32I-NEXT: slli a3, a3, 8
; RV32I-NEXT: slli a4, a4, 16
; RV32I-NEXT: slli a5, a5, 24
; RV32I-NEXT: or a0, a3, a0
; RV32I-NEXT: or a4, a5, a4
; RV32I-NEXT: lbu a3, 1(a1)
; RV32I-NEXT: lbu a5, 0(a1)
; RV32I-NEXT: lbu a6, 2(a1)
; RV32I-NEXT: lbu a1, 3(a1)
; RV32I-NEXT: slli a3, a3, 8
; RV32I-NEXT: or a3, a3, a5
; RV32I-NEXT: slli a6, a6, 16
; RV32I-NEXT: slli a1, a1, 24
; RV32I-NEXT: or a1, a1, a6
; RV32I-NEXT: or a0, a4, a0
; RV32I-NEXT: or a1, a1, a3
; RV32I-NEXT: slli a1, a1, 3
; RV32I-NEXT: sra a0, a0, a1
; RV32I-NEXT: srli a1, a0, 16
; RV32I-NEXT: srli a3, a0, 24
; RV32I-NEXT: srli a4, a0, 8
; RV32I-NEXT: sb a0, 0(a2)
; RV32I-NEXT: sb a4, 1(a2)
; RV32I-NEXT: sb a1, 2(a2)
; RV32I-NEXT: sb a3, 3(a2)
; RV32I-NEXT: ret
%src = load i32, ptr %src.ptr, align 1
%byteOff = load i32, ptr %byteOff.ptr, align 1
%bitOff = shl i32 %byteOff, 3
%res = ashr i32 %src, %bitOff
store i32 %res, ptr %dst, align 1
ret void
}
define void @lshr_8bytes(ptr %src.ptr, ptr %byteOff.ptr, ptr %dst) nounwind {
; RV64I-LABEL: lshr_8bytes:
; RV64I: # %bb.0:
; RV64I-NEXT: lbu a3, 0(a0)
; RV64I-NEXT: lbu a4, 1(a0)
; RV64I-NEXT: lbu a5, 2(a0)
; RV64I-NEXT: lbu a6, 3(a0)
; RV64I-NEXT: lbu a7, 4(a0)
; RV64I-NEXT: lbu t0, 5(a0)
; RV64I-NEXT: lbu t1, 6(a0)
; RV64I-NEXT: lbu a0, 7(a0)
; RV64I-NEXT: slli a4, a4, 8
; RV64I-NEXT: slli a5, a5, 16
; RV64I-NEXT: slli a6, a6, 24
; RV64I-NEXT: slli t0, t0, 8
; RV64I-NEXT: or a3, a4, a3
; RV64I-NEXT: or a4, a6, a5
; RV64I-NEXT: or a5, t0, a7
; RV64I-NEXT: lbu a6, 4(a1)
; RV64I-NEXT: lbu a7, 5(a1)
; RV64I-NEXT: lbu t0, 6(a1)
; RV64I-NEXT: lbu t2, 7(a1)
; RV64I-NEXT: slli t1, t1, 16
; RV64I-NEXT: slli a0, a0, 24
; RV64I-NEXT: slli a7, a7, 8
; RV64I-NEXT: slli t0, t0, 16
; RV64I-NEXT: slli t2, t2, 24
; RV64I-NEXT: or a0, a0, t1
; RV64I-NEXT: or a6, a7, a6
; RV64I-NEXT: or a7, t2, t0
; RV64I-NEXT: lbu t0, 1(a1)
; RV64I-NEXT: lbu t1, 0(a1)
; RV64I-NEXT: lbu t2, 2(a1)
; RV64I-NEXT: lbu a1, 3(a1)
; RV64I-NEXT: slli t0, t0, 8
; RV64I-NEXT: or t0, t0, t1
; RV64I-NEXT: slli t2, t2, 16
; RV64I-NEXT: slli a1, a1, 24
; RV64I-NEXT: or a1, a1, t2
; RV64I-NEXT: or a3, a4, a3
; RV64I-NEXT: or a0, a0, a5
; RV64I-NEXT: or a4, a7, a6
; RV64I-NEXT: or a1, a1, t0
; RV64I-NEXT: slli a0, a0, 32
; RV64I-NEXT: slli a1, a1, 3
; RV64I-NEXT: slli a4, a4, 35
; RV64I-NEXT: or a0, a0, a3
; RV64I-NEXT: or a1, a4, a1
; RV64I-NEXT: srl a0, a0, a1
; RV64I-NEXT: srli a1, a0, 48
; RV64I-NEXT: srli a3, a0, 56
; RV64I-NEXT: srli a4, a0, 32
; RV64I-NEXT: srli a5, a0, 40
; RV64I-NEXT: srli a6, a0, 16
; RV64I-NEXT: srli a7, a0, 24
; RV64I-NEXT: srli t0, a0, 8
; RV64I-NEXT: sb a4, 4(a2)
; RV64I-NEXT: sb a5, 5(a2)
; RV64I-NEXT: sb a1, 6(a2)
; RV64I-NEXT: sb a3, 7(a2)
; RV64I-NEXT: sb a0, 0(a2)
; RV64I-NEXT: sb t0, 1(a2)
; RV64I-NEXT: sb a6, 2(a2)
; RV64I-NEXT: sb a7, 3(a2)
; RV64I-NEXT: ret
;
; RV32I-LABEL: lshr_8bytes:
; RV32I: # %bb.0:
; RV32I-NEXT: lbu a3, 5(a0)
; RV32I-NEXT: lbu a4, 6(a0)
; RV32I-NEXT: lbu a5, 7(a0)
; RV32I-NEXT: lbu a6, 4(a0)
; RV32I-NEXT: slli a3, a3, 8
; RV32I-NEXT: slli a4, a4, 16
; RV32I-NEXT: slli a5, a5, 24
; RV32I-NEXT: or a3, a3, a6
; RV32I-NEXT: or a4, a5, a4
; RV32I-NEXT: lbu a5, 1(a1)
; RV32I-NEXT: lbu a6, 0(a1)
; RV32I-NEXT: lbu a7, 2(a1)
; RV32I-NEXT: lbu a1, 3(a1)
; RV32I-NEXT: slli a5, a5, 8
; RV32I-NEXT: or a6, a5, a6
; RV32I-NEXT: slli a7, a7, 16
; RV32I-NEXT: slli a1, a1, 24
; RV32I-NEXT: or a1, a1, a7
; RV32I-NEXT: or a5, a4, a3
; RV32I-NEXT: or a4, a1, a6
; RV32I-NEXT: slli a4, a4, 3
; RV32I-NEXT: addi a3, a4, -32
; RV32I-NEXT: srl a1, a5, a4
; RV32I-NEXT: bltz a3, .LBB3_2
; RV32I-NEXT: # %bb.1:
; RV32I-NEXT: mv a0, a1
; RV32I-NEXT: j .LBB3_3
; RV32I-NEXT: .LBB3_2:
; RV32I-NEXT: lbu a6, 1(a0)
; RV32I-NEXT: lbu a7, 0(a0)
; RV32I-NEXT: lbu t0, 2(a0)
; RV32I-NEXT: lbu a0, 3(a0)
; RV32I-NEXT: slli a6, a6, 8
; RV32I-NEXT: or a6, a6, a7
; RV32I-NEXT: slli a5, a5, 1
; RV32I-NEXT: slli t0, t0, 16
; RV32I-NEXT: slli a0, a0, 24
; RV32I-NEXT: or a0, a0, t0
; RV32I-NEXT: not a7, a4
; RV32I-NEXT: or a0, a0, a6
; RV32I-NEXT: srl a0, a0, a4
; RV32I-NEXT: sll a4, a5, a7
; RV32I-NEXT: or a0, a0, a4
; RV32I-NEXT: .LBB3_3:
; RV32I-NEXT: srai a3, a3, 31
; RV32I-NEXT: srli a4, a0, 16
; RV32I-NEXT: srli a5, a0, 24
; RV32I-NEXT: and a1, a3, a1
; RV32I-NEXT: srli a3, a1, 16
; RV32I-NEXT: srli a6, a1, 24
; RV32I-NEXT: srli a7, a1, 8
; RV32I-NEXT: sb a1, 4(a2)
; RV32I-NEXT: sb a7, 5(a2)
; RV32I-NEXT: sb a3, 6(a2)
; RV32I-NEXT: sb a6, 7(a2)
; RV32I-NEXT: srli a1, a0, 8
; RV32I-NEXT: sb a0, 0(a2)
; RV32I-NEXT: sb a1, 1(a2)
; RV32I-NEXT: sb a4, 2(a2)
; RV32I-NEXT: sb a5, 3(a2)
; RV32I-NEXT: ret
%src = load i64, ptr %src.ptr, align 1
%byteOff = load i64, ptr %byteOff.ptr, align 1
%bitOff = shl i64 %byteOff, 3
%res = lshr i64 %src, %bitOff
store i64 %res, ptr %dst, align 1
ret void
}
define void @shl_8bytes(ptr %src.ptr, ptr %byteOff.ptr, ptr %dst) nounwind {
; RV64I-LABEL: shl_8bytes:
; RV64I: # %bb.0:
; RV64I-NEXT: lbu a3, 0(a0)
; RV64I-NEXT: lbu a4, 1(a0)
; RV64I-NEXT: lbu a5, 2(a0)
; RV64I-NEXT: lbu a6, 3(a0)
; RV64I-NEXT: lbu a7, 4(a0)
; RV64I-NEXT: lbu t0, 5(a0)
; RV64I-NEXT: lbu t1, 6(a0)
; RV64I-NEXT: lbu a0, 7(a0)
; RV64I-NEXT: slli a4, a4, 8
; RV64I-NEXT: slli a5, a5, 16
; RV64I-NEXT: slli a6, a6, 24
; RV64I-NEXT: slli t0, t0, 8
; RV64I-NEXT: or a3, a4, a3
; RV64I-NEXT: or a4, a6, a5
; RV64I-NEXT: or a5, t0, a7
; RV64I-NEXT: lbu a6, 4(a1)
; RV64I-NEXT: lbu a7, 5(a1)
; RV64I-NEXT: lbu t0, 6(a1)
; RV64I-NEXT: lbu t2, 7(a1)
; RV64I-NEXT: slli t1, t1, 16
; RV64I-NEXT: slli a0, a0, 24
; RV64I-NEXT: slli a7, a7, 8
; RV64I-NEXT: slli t0, t0, 16
; RV64I-NEXT: slli t2, t2, 24
; RV64I-NEXT: or a0, a0, t1
; RV64I-NEXT: or a6, a7, a6
; RV64I-NEXT: or a7, t2, t0
; RV64I-NEXT: lbu t0, 1(a1)
; RV64I-NEXT: lbu t1, 0(a1)
; RV64I-NEXT: lbu t2, 2(a1)
; RV64I-NEXT: lbu a1, 3(a1)
; RV64I-NEXT: slli t0, t0, 8
; RV64I-NEXT: or t0, t0, t1
; RV64I-NEXT: slli t2, t2, 16
; RV64I-NEXT: slli a1, a1, 24
; RV64I-NEXT: or a1, a1, t2
; RV64I-NEXT: or a3, a4, a3
; RV64I-NEXT: or a0, a0, a5
; RV64I-NEXT: or a4, a7, a6
; RV64I-NEXT: or a1, a1, t0
; RV64I-NEXT: slli a0, a0, 32
; RV64I-NEXT: slli a1, a1, 3
; RV64I-NEXT: slli a4, a4, 35
; RV64I-NEXT: or a0, a0, a3
; RV64I-NEXT: or a1, a4, a1
; RV64I-NEXT: sll a0, a0, a1
; RV64I-NEXT: srli a1, a0, 48
; RV64I-NEXT: srli a3, a0, 56
; RV64I-NEXT: srli a4, a0, 32
; RV64I-NEXT: srli a5, a0, 40
; RV64I-NEXT: srli a6, a0, 16
; RV64I-NEXT: srli a7, a0, 24
; RV64I-NEXT: srli t0, a0, 8
; RV64I-NEXT: sb a4, 4(a2)
; RV64I-NEXT: sb a5, 5(a2)
; RV64I-NEXT: sb a1, 6(a2)
; RV64I-NEXT: sb a3, 7(a2)
; RV64I-NEXT: sb a0, 0(a2)
; RV64I-NEXT: sb t0, 1(a2)
; RV64I-NEXT: sb a6, 2(a2)
; RV64I-NEXT: sb a7, 3(a2)
; RV64I-NEXT: ret
;
; RV32I-LABEL: shl_8bytes:
; RV32I: # %bb.0:
; RV32I-NEXT: lbu a3, 1(a0)
; RV32I-NEXT: lbu a4, 2(a0)
; RV32I-NEXT: lbu a5, 3(a0)
; RV32I-NEXT: lbu a6, 0(a0)
; RV32I-NEXT: slli a3, a3, 8
; RV32I-NEXT: slli a4, a4, 16
; RV32I-NEXT: slli a5, a5, 24
; RV32I-NEXT: or a3, a3, a6
; RV32I-NEXT: or a4, a5, a4
; RV32I-NEXT: lbu a5, 1(a1)
; RV32I-NEXT: lbu a6, 0(a1)
; RV32I-NEXT: lbu a7, 2(a1)
; RV32I-NEXT: lbu a1, 3(a1)
; RV32I-NEXT: slli a5, a5, 8
; RV32I-NEXT: or a6, a5, a6
; RV32I-NEXT: slli a7, a7, 16
; RV32I-NEXT: slli a1, a1, 24
; RV32I-NEXT: or a1, a1, a7
; RV32I-NEXT: or a5, a4, a3
; RV32I-NEXT: or a4, a1, a6
; RV32I-NEXT: slli a4, a4, 3
; RV32I-NEXT: addi a3, a4, -32
; RV32I-NEXT: sll a1, a5, a4
; RV32I-NEXT: bltz a3, .LBB4_2
; RV32I-NEXT: # %bb.1:
; RV32I-NEXT: mv a0, a1
; RV32I-NEXT: j .LBB4_3
; RV32I-NEXT: .LBB4_2:
; RV32I-NEXT: lbu a6, 5(a0)
; RV32I-NEXT: lbu a7, 4(a0)
; RV32I-NEXT: lbu t0, 6(a0)
; RV32I-NEXT: lbu a0, 7(a0)
; RV32I-NEXT: slli a6, a6, 8
; RV32I-NEXT: or a6, a6, a7
; RV32I-NEXT: srli a5, a5, 1
; RV32I-NEXT: slli t0, t0, 16
; RV32I-NEXT: slli a0, a0, 24
; RV32I-NEXT: or a0, a0, t0
; RV32I-NEXT: not a7, a4
; RV32I-NEXT: or a0, a0, a6
; RV32I-NEXT: sll a0, a0, a4
; RV32I-NEXT: srl a4, a5, a7
; RV32I-NEXT: or a0, a0, a4
; RV32I-NEXT: .LBB4_3:
; RV32I-NEXT: srai a3, a3, 31
; RV32I-NEXT: srli a4, a0, 16
; RV32I-NEXT: srli a5, a0, 24
; RV32I-NEXT: and a1, a3, a1
; RV32I-NEXT: srli a3, a1, 16
; RV32I-NEXT: srli a6, a1, 24
; RV32I-NEXT: srli a7, a1, 8
; RV32I-NEXT: sb a1, 0(a2)
; RV32I-NEXT: sb a7, 1(a2)
; RV32I-NEXT: sb a3, 2(a2)
; RV32I-NEXT: sb a6, 3(a2)
; RV32I-NEXT: srli a1, a0, 8
; RV32I-NEXT: sb a0, 4(a2)
; RV32I-NEXT: sb a1, 5(a2)
; RV32I-NEXT: sb a4, 6(a2)
; RV32I-NEXT: sb a5, 7(a2)
; RV32I-NEXT: ret
%src = load i64, ptr %src.ptr, align 1
%byteOff = load i64, ptr %byteOff.ptr, align 1
%bitOff = shl i64 %byteOff, 3
%res = shl i64 %src, %bitOff
store i64 %res, ptr %dst, align 1
ret void
}
define void @ashr_8bytes(ptr %src.ptr, ptr %byteOff.ptr, ptr %dst) nounwind {
; RV64I-LABEL: ashr_8bytes:
; RV64I: # %bb.0:
; RV64I-NEXT: lbu a3, 0(a0)
; RV64I-NEXT: lbu a4, 1(a0)
; RV64I-NEXT: lbu a5, 2(a0)
; RV64I-NEXT: lbu a6, 3(a0)
; RV64I-NEXT: lbu a7, 4(a0)
; RV64I-NEXT: lbu t0, 5(a0)
; RV64I-NEXT: lbu t1, 6(a0)
; RV64I-NEXT: lbu a0, 7(a0)
; RV64I-NEXT: slli a4, a4, 8
; RV64I-NEXT: slli a5, a5, 16
; RV64I-NEXT: slli a6, a6, 24
; RV64I-NEXT: slli t0, t0, 8
; RV64I-NEXT: or a3, a4, a3
; RV64I-NEXT: or a4, a6, a5
; RV64I-NEXT: or a5, t0, a7
; RV64I-NEXT: lbu a6, 4(a1)
; RV64I-NEXT: lbu a7, 5(a1)
; RV64I-NEXT: lbu t0, 6(a1)
; RV64I-NEXT: lbu t2, 7(a1)
; RV64I-NEXT: slli t1, t1, 16
; RV64I-NEXT: slli a0, a0, 24
; RV64I-NEXT: slli a7, a7, 8
; RV64I-NEXT: slli t0, t0, 16
; RV64I-NEXT: slli t2, t2, 24
; RV64I-NEXT: or a0, a0, t1
; RV64I-NEXT: or a6, a7, a6
; RV64I-NEXT: or a7, t2, t0
; RV64I-NEXT: lbu t0, 1(a1)
; RV64I-NEXT: lbu t1, 0(a1)
; RV64I-NEXT: lbu t2, 2(a1)
; RV64I-NEXT: lbu a1, 3(a1)
; RV64I-NEXT: slli t0, t0, 8
; RV64I-NEXT: or t0, t0, t1
; RV64I-NEXT: slli t2, t2, 16
; RV64I-NEXT: slli a1, a1, 24
; RV64I-NEXT: or a1, a1, t2
; RV64I-NEXT: or a3, a4, a3
; RV64I-NEXT: or a0, a0, a5
; RV64I-NEXT: or a4, a7, a6
; RV64I-NEXT: or a1, a1, t0
; RV64I-NEXT: slli a0, a0, 32
; RV64I-NEXT: slli a1, a1, 3
; RV64I-NEXT: slli a4, a4, 35
; RV64I-NEXT: or a0, a0, a3
; RV64I-NEXT: or a1, a4, a1
; RV64I-NEXT: sra a0, a0, a1
; RV64I-NEXT: srli a1, a0, 48
; RV64I-NEXT: srli a3, a0, 56
; RV64I-NEXT: srli a4, a0, 32
; RV64I-NEXT: srli a5, a0, 40
; RV64I-NEXT: srli a6, a0, 16
; RV64I-NEXT: srli a7, a0, 24
; RV64I-NEXT: srli t0, a0, 8
; RV64I-NEXT: sb a4, 4(a2)
; RV64I-NEXT: sb a5, 5(a2)
; RV64I-NEXT: sb a1, 6(a2)
; RV64I-NEXT: sb a3, 7(a2)
; RV64I-NEXT: sb a0, 0(a2)
; RV64I-NEXT: sb t0, 1(a2)
; RV64I-NEXT: sb a6, 2(a2)
; RV64I-NEXT: sb a7, 3(a2)
; RV64I-NEXT: ret
;
; RV32I-LABEL: ashr_8bytes:
; RV32I: # %bb.0:
; RV32I-NEXT: lbu a3, 5(a0)
; RV32I-NEXT: lbu a4, 4(a0)
; RV32I-NEXT: lbu a5, 6(a0)
; RV32I-NEXT: lbu a6, 7(a0)
; RV32I-NEXT: slli a3, a3, 8
; RV32I-NEXT: or a3, a3, a4
; RV32I-NEXT: lbu a4, 1(a1)
; RV32I-NEXT: lbu a7, 0(a1)
; RV32I-NEXT: lbu t0, 2(a1)
; RV32I-NEXT: lbu a1, 3(a1)
; RV32I-NEXT: slli a4, a4, 8
; RV32I-NEXT: or a7, a4, a7
; RV32I-NEXT: slli t0, t0, 16
; RV32I-NEXT: slli a1, a1, 24
; RV32I-NEXT: or a1, a1, t0
; RV32I-NEXT: slli a4, a5, 16
; RV32I-NEXT: slli a5, a6, 24
; RV32I-NEXT: or a4, a5, a4
; RV32I-NEXT: or a4, a4, a3
; RV32I-NEXT: or a3, a1, a7
; RV32I-NEXT: slli a3, a3, 3
; RV32I-NEXT: addi a6, a3, -32
; RV32I-NEXT: sra a1, a4, a3
; RV32I-NEXT: bltz a6, .LBB5_2
; RV32I-NEXT: # %bb.1:
; RV32I-NEXT: srai a5, a5, 31
; RV32I-NEXT: mv a0, a1
; RV32I-NEXT: mv a1, a5
; RV32I-NEXT: j .LBB5_3
; RV32I-NEXT: .LBB5_2:
; RV32I-NEXT: lbu a5, 1(a0)
; RV32I-NEXT: lbu a6, 0(a0)
; RV32I-NEXT: lbu a7, 2(a0)
; RV32I-NEXT: lbu a0, 3(a0)
; RV32I-NEXT: slli a5, a5, 8
; RV32I-NEXT: or a5, a5, a6
; RV32I-NEXT: slli a4, a4, 1
; RV32I-NEXT: slli a7, a7, 16
; RV32I-NEXT: slli a0, a0, 24
; RV32I-NEXT: or a0, a0, a7
; RV32I-NEXT: not a6, a3
; RV32I-NEXT: or a0, a0, a5
; RV32I-NEXT: srl a0, a0, a3
; RV32I-NEXT: sll a3, a4, a6
; RV32I-NEXT: or a0, a0, a3
; RV32I-NEXT: .LBB5_3:
; RV32I-NEXT: srli a3, a1, 16
; RV32I-NEXT: srli a4, a1, 24
; RV32I-NEXT: srli a5, a1, 8
; RV32I-NEXT: srli a6, a0, 16
; RV32I-NEXT: srli a7, a0, 24
; RV32I-NEXT: sb a1, 4(a2)
; RV32I-NEXT: sb a5, 5(a2)
; RV32I-NEXT: sb a3, 6(a2)
; RV32I-NEXT: sb a4, 7(a2)
; RV32I-NEXT: srli a1, a0, 8
; RV32I-NEXT: sb a0, 0(a2)
; RV32I-NEXT: sb a1, 1(a2)
; RV32I-NEXT: sb a6, 2(a2)
; RV32I-NEXT: sb a7, 3(a2)
; RV32I-NEXT: ret
%src = load i64, ptr %src.ptr, align 1
%byteOff = load i64, ptr %byteOff.ptr, align 1
%bitOff = shl i64 %byteOff, 3
%res = ashr i64 %src, %bitOff
store i64 %res, ptr %dst, align 1
ret void
}
define void @lshr_16bytes(ptr %src.ptr, ptr %byteOff.ptr, ptr %dst) nounwind {
; RV64I-LABEL: lshr_16bytes:
; RV64I: # %bb.0:
; RV64I-NEXT: lbu a3, 8(a0)
; RV64I-NEXT: lbu a4, 9(a0)
; RV64I-NEXT: lbu a5, 10(a0)
; RV64I-NEXT: lbu a6, 11(a0)
; RV64I-NEXT: lbu a7, 12(a0)
; RV64I-NEXT: lbu t0, 13(a0)
; RV64I-NEXT: lbu t1, 14(a0)
; RV64I-NEXT: lbu t2, 15(a0)
; RV64I-NEXT: slli a4, a4, 8
; RV64I-NEXT: slli a5, a5, 16
; RV64I-NEXT: slli a6, a6, 24
; RV64I-NEXT: slli t0, t0, 8
; RV64I-NEXT: or a3, a4, a3
; RV64I-NEXT: or a4, a6, a5
; RV64I-NEXT: or a5, t0, a7
; RV64I-NEXT: lbu a6, 4(a1)
; RV64I-NEXT: lbu a7, 5(a1)
; RV64I-NEXT: lbu t0, 6(a1)
; RV64I-NEXT: lbu t3, 7(a1)
; RV64I-NEXT: slli t1, t1, 16
; RV64I-NEXT: slli t2, t2, 24
; RV64I-NEXT: slli a7, a7, 8
; RV64I-NEXT: slli t0, t0, 16
; RV64I-NEXT: slli t3, t3, 24
; RV64I-NEXT: or t1, t2, t1
; RV64I-NEXT: or a6, a7, a6
; RV64I-NEXT: or a7, t3, t0
; RV64I-NEXT: lbu t0, 1(a1)
; RV64I-NEXT: lbu t2, 0(a1)
; RV64I-NEXT: lbu t3, 2(a1)
; RV64I-NEXT: lbu a1, 3(a1)
; RV64I-NEXT: slli t0, t0, 8
; RV64I-NEXT: or t0, t0, t2
; RV64I-NEXT: slli t3, t3, 16
; RV64I-NEXT: slli a1, a1, 24
; RV64I-NEXT: or a1, a1, t3
; RV64I-NEXT: or a3, a4, a3
; RV64I-NEXT: or a4, t1, a5
; RV64I-NEXT: or a5, a7, a6
; RV64I-NEXT: or a1, a1, t0
; RV64I-NEXT: slli a4, a4, 32
; RV64I-NEXT: slli a1, a1, 3
; RV64I-NEXT: slli a6, a5, 35
; RV64I-NEXT: or a5, a4, a3
; RV64I-NEXT: or a4, a6, a1
; RV64I-NEXT: addi a3, a4, -64
; RV64I-NEXT: srl a1, a5, a4
; RV64I-NEXT: bltz a3, .LBB6_2
; RV64I-NEXT: # %bb.1:
; RV64I-NEXT: mv a0, a1
; RV64I-NEXT: j .LBB6_3
; RV64I-NEXT: .LBB6_2:
; RV64I-NEXT: lbu a6, 1(a0)
; RV64I-NEXT: lbu a7, 2(a0)
; RV64I-NEXT: lbu t0, 3(a0)
; RV64I-NEXT: lbu t1, 0(a0)
; RV64I-NEXT: slli a6, a6, 8
; RV64I-NEXT: slli a7, a7, 16
; RV64I-NEXT: slli t0, t0, 24
; RV64I-NEXT: or a6, a6, t1
; RV64I-NEXT: or a7, t0, a7
; RV64I-NEXT: lbu t0, 5(a0)
; RV64I-NEXT: lbu t1, 4(a0)
; RV64I-NEXT: lbu t2, 6(a0)
; RV64I-NEXT: lbu a0, 7(a0)
; RV64I-NEXT: slli t0, t0, 8
; RV64I-NEXT: or t0, t0, t1
; RV64I-NEXT: slli t2, t2, 16
; RV64I-NEXT: slli a0, a0, 24
; RV64I-NEXT: or a0, a0, t2
; RV64I-NEXT: or a6, a7, a6
; RV64I-NEXT: not a7, a4
; RV64I-NEXT: slli a5, a5, 1
; RV64I-NEXT: or a0, a0, t0
; RV64I-NEXT: slli a0, a0, 32
; RV64I-NEXT: or a0, a0, a6
; RV64I-NEXT: srl a0, a0, a4
; RV64I-NEXT: sll a4, a5, a7
; RV64I-NEXT: or a0, a0, a4
; RV64I-NEXT: .LBB6_3:
; RV64I-NEXT: srai a3, a3, 63
; RV64I-NEXT: srli a4, a0, 56
; RV64I-NEXT: srli a5, a0, 48
; RV64I-NEXT: srli a6, a0, 40
; RV64I-NEXT: srli a7, a0, 32
; RV64I-NEXT: srli t0, a0, 24
; RV64I-NEXT: srli t1, a0, 16
; RV64I-NEXT: and a1, a3, a1
; RV64I-NEXT: sb a7, 4(a2)
; RV64I-NEXT: sb a6, 5(a2)
; RV64I-NEXT: sb a5, 6(a2)
; RV64I-NEXT: sb a4, 7(a2)
; RV64I-NEXT: srli a3, a1, 56
; RV64I-NEXT: srli a4, a1, 48
; RV64I-NEXT: srli a5, a1, 40
; RV64I-NEXT: srli a6, a1, 32
; RV64I-NEXT: srli a7, a1, 24
; RV64I-NEXT: srli t2, a1, 16
; RV64I-NEXT: sb a6, 12(a2)
; RV64I-NEXT: sb a5, 13(a2)
; RV64I-NEXT: sb a4, 14(a2)
; RV64I-NEXT: sb a3, 15(a2)
; RV64I-NEXT: srli a3, a1, 8
; RV64I-NEXT: sb a1, 8(a2)
; RV64I-NEXT: sb a3, 9(a2)
; RV64I-NEXT: sb t2, 10(a2)
; RV64I-NEXT: sb a7, 11(a2)
; RV64I-NEXT: srli a1, a0, 8
; RV64I-NEXT: sb a0, 0(a2)
; RV64I-NEXT: sb a1, 1(a2)
; RV64I-NEXT: sb t1, 2(a2)
; RV64I-NEXT: sb t0, 3(a2)
; RV64I-NEXT: ret
;
; RV32I-LABEL: lshr_16bytes:
; RV32I: # %bb.0:
; RV32I-NEXT: addi sp, sp, -32
; RV32I-NEXT: lbu a3, 0(a0)
; RV32I-NEXT: lbu a4, 1(a0)
; RV32I-NEXT: lbu a5, 2(a0)
; RV32I-NEXT: lbu a6, 3(a0)
; RV32I-NEXT: lbu a7, 4(a0)
; RV32I-NEXT: lbu t0, 5(a0)
; RV32I-NEXT: lbu t1, 6(a0)
; RV32I-NEXT: lbu t2, 7(a0)
; RV32I-NEXT: slli a4, a4, 8
; RV32I-NEXT: slli a5, a5, 16
; RV32I-NEXT: slli a6, a6, 24
; RV32I-NEXT: or a3, a4, a3
; RV32I-NEXT: or a4, a6, a5
; RV32I-NEXT: lbu a5, 8(a0)
; RV32I-NEXT: lbu a6, 9(a0)
; RV32I-NEXT: lbu t3, 10(a0)
; RV32I-NEXT: lbu t4, 11(a0)
; RV32I-NEXT: slli t0, t0, 8
; RV32I-NEXT: slli t1, t1, 16
; RV32I-NEXT: slli t2, t2, 24
; RV32I-NEXT: slli a6, a6, 8
; RV32I-NEXT: or a7, t0, a7
; RV32I-NEXT: or t0, t2, t1
; RV32I-NEXT: or a5, a6, a5
; RV32I-NEXT: lbu a6, 12(a0)
; RV32I-NEXT: lbu t1, 13(a0)
; RV32I-NEXT: lbu t2, 14(a0)
; RV32I-NEXT: lbu a0, 15(a0)
; RV32I-NEXT: slli t3, t3, 16
; RV32I-NEXT: slli t4, t4, 24
; RV32I-NEXT: slli t1, t1, 8
; RV32I-NEXT: slli t2, t2, 16
; RV32I-NEXT: slli a0, a0, 24
; RV32I-NEXT: or t3, t4, t3
; RV32I-NEXT: or a6, t1, a6
; RV32I-NEXT: or a0, a0, t2
; RV32I-NEXT: lbu t1, 0(a1)
; RV32I-NEXT: lbu t2, 1(a1)
; RV32I-NEXT: lbu t4, 2(a1)
; RV32I-NEXT: lbu a1, 3(a1)
; RV32I-NEXT: sw zero, 16(sp)
; RV32I-NEXT: sw zero, 20(sp)
; RV32I-NEXT: sw zero, 24(sp)
; RV32I-NEXT: sw zero, 28(sp)
; RV32I-NEXT: slli t2, t2, 8
; RV32I-NEXT: or t1, t2, t1
; RV32I-NEXT: mv t2, sp
; RV32I-NEXT: slli t4, t4, 16
; RV32I-NEXT: slli a1, a1, 24
; RV32I-NEXT: or a1, a1, t4
; RV32I-NEXT: or a3, a4, a3
; RV32I-NEXT: or a4, t0, a7
; RV32I-NEXT: or a5, t3, a5
; RV32I-NEXT: or a0, a0, a6
; RV32I-NEXT: or a1, a1, t1
; RV32I-NEXT: sw a3, 0(sp)
; RV32I-NEXT: sw a4, 4(sp)
; RV32I-NEXT: sw a5, 8(sp)
; RV32I-NEXT: sw a0, 12(sp)
; RV32I-NEXT: slli a0, a1, 3
; RV32I-NEXT: andi a1, a1, 12
; RV32I-NEXT: add a1, t2, a1
; RV32I-NEXT: andi a3, a0, 24
; RV32I-NEXT: xori a3, a3, 31
; RV32I-NEXT: lw a4, 4(a1)
; RV32I-NEXT: lw a5, 8(a1)
; RV32I-NEXT: lw a6, 0(a1)
; RV32I-NEXT: lw a1, 12(a1)
; RV32I-NEXT: srl a7, a4, a0
; RV32I-NEXT: slli t0, a5, 1
; RV32I-NEXT: srl a6, a6, a0
; RV32I-NEXT: slli a4, a4, 1
; RV32I-NEXT: srl a5, a5, a0
; RV32I-NEXT: slli t1, a1, 1
; RV32I-NEXT: srl a0, a1, a0
; RV32I-NEXT: sll a1, t0, a3
; RV32I-NEXT: sll a4, a4, a3
; RV32I-NEXT: sll a3, t1, a3
; RV32I-NEXT: srli t0, a0, 16
; RV32I-NEXT: srli t1, a0, 24
; RV32I-NEXT: srli t2, a0, 8
; RV32I-NEXT: or a1, a7, a1
; RV32I-NEXT: or a4, a6, a4
; RV32I-NEXT: or a3, a5, a3
; RV32I-NEXT: sb a0, 12(a2)
; RV32I-NEXT: sb t2, 13(a2)
; RV32I-NEXT: sb t0, 14(a2)
; RV32I-NEXT: sb t1, 15(a2)
; RV32I-NEXT: srli a0, a3, 16
; RV32I-NEXT: srli t0, a3, 24
; RV32I-NEXT: srli a3, a3, 8
; RV32I-NEXT: srli t1, a4, 16
; RV32I-NEXT: srli t2, a4, 24
; RV32I-NEXT: srli a4, a4, 8
; RV32I-NEXT: srli t3, a1, 16
; RV32I-NEXT: srli t4, a1, 24
; RV32I-NEXT: srli a1, a1, 8
; RV32I-NEXT: sb a5, 8(a2)
; RV32I-NEXT: sb a3, 9(a2)
; RV32I-NEXT: sb a0, 10(a2)
; RV32I-NEXT: sb t0, 11(a2)
; RV32I-NEXT: sb a6, 0(a2)
; RV32I-NEXT: sb a4, 1(a2)
; RV32I-NEXT: sb t1, 2(a2)
; RV32I-NEXT: sb t2, 3(a2)
; RV32I-NEXT: sb a7, 4(a2)
; RV32I-NEXT: sb a1, 5(a2)
; RV32I-NEXT: sb t3, 6(a2)
; RV32I-NEXT: sb t4, 7(a2)
; RV32I-NEXT: addi sp, sp, 32
; RV32I-NEXT: ret
%src = load i128, ptr %src.ptr, align 1
%byteOff = load i128, ptr %byteOff.ptr, align 1
%bitOff = shl i128 %byteOff, 3
%res = lshr i128 %src, %bitOff
store i128 %res, ptr %dst, align 1
ret void
}
define void @lshr_16bytes_wordOff(ptr %src.ptr, ptr %wordOff.ptr, ptr %dst) nounwind {
; RV64I-LABEL: lshr_16bytes_wordOff:
; RV64I: # %bb.0:
; RV64I-NEXT: lbu a3, 8(a0)
; RV64I-NEXT: lbu a4, 9(a0)
; RV64I-NEXT: lbu a5, 10(a0)
; RV64I-NEXT: lbu a6, 11(a0)
; RV64I-NEXT: lbu a7, 12(a0)
; RV64I-NEXT: lbu t0, 13(a0)
; RV64I-NEXT: lbu t1, 14(a0)
; RV64I-NEXT: lbu t2, 15(a0)
; RV64I-NEXT: slli a4, a4, 8
; RV64I-NEXT: slli a5, a5, 16
; RV64I-NEXT: slli a6, a6, 24
; RV64I-NEXT: slli t0, t0, 8
; RV64I-NEXT: or a3, a4, a3
; RV64I-NEXT: or a4, a6, a5
; RV64I-NEXT: or a5, t0, a7
; RV64I-NEXT: lbu a6, 4(a1)
; RV64I-NEXT: lbu a7, 5(a1)
; RV64I-NEXT: lbu t0, 6(a1)
; RV64I-NEXT: lbu t3, 7(a1)
; RV64I-NEXT: slli t1, t1, 16
; RV64I-NEXT: slli t2, t2, 24
; RV64I-NEXT: slli a7, a7, 8
; RV64I-NEXT: slli t0, t0, 16
; RV64I-NEXT: slli t3, t3, 24
; RV64I-NEXT: or t1, t2, t1
; RV64I-NEXT: or a6, a7, a6
; RV64I-NEXT: or a7, t3, t0
; RV64I-NEXT: lbu t0, 1(a1)
; RV64I-NEXT: lbu t2, 0(a1)
; RV64I-NEXT: lbu t3, 2(a1)
; RV64I-NEXT: lbu a1, 3(a1)
; RV64I-NEXT: slli t0, t0, 8
; RV64I-NEXT: or t0, t0, t2
; RV64I-NEXT: slli t3, t3, 16
; RV64I-NEXT: slli a1, a1, 24
; RV64I-NEXT: or a1, a1, t3
; RV64I-NEXT: or a3, a4, a3
; RV64I-NEXT: or a4, t1, a5
; RV64I-NEXT: or a5, a7, a6
; RV64I-NEXT: or a1, a1, t0
; RV64I-NEXT: slli a4, a4, 32
; RV64I-NEXT: slli a1, a1, 5
; RV64I-NEXT: slli a6, a5, 37
; RV64I-NEXT: or a5, a4, a3
; RV64I-NEXT: or a4, a6, a1
; RV64I-NEXT: addi a3, a4, -64
; RV64I-NEXT: srl a1, a5, a4
; RV64I-NEXT: bltz a3, .LBB7_2
; RV64I-NEXT: # %bb.1:
; RV64I-NEXT: mv a0, a1
; RV64I-NEXT: j .LBB7_3
; RV64I-NEXT: .LBB7_2:
; RV64I-NEXT: lbu a6, 1(a0)
; RV64I-NEXT: lbu a7, 2(a0)
; RV64I-NEXT: lbu t0, 3(a0)
; RV64I-NEXT: lbu t1, 0(a0)
; RV64I-NEXT: slli a6, a6, 8
; RV64I-NEXT: slli a7, a7, 16
; RV64I-NEXT: slli t0, t0, 24
; RV64I-NEXT: or a6, a6, t1
; RV64I-NEXT: or a7, t0, a7
; RV64I-NEXT: lbu t0, 5(a0)
; RV64I-NEXT: lbu t1, 4(a0)
; RV64I-NEXT: lbu t2, 6(a0)
; RV64I-NEXT: lbu a0, 7(a0)
; RV64I-NEXT: slli t0, t0, 8
; RV64I-NEXT: or t0, t0, t1
; RV64I-NEXT: slli t2, t2, 16
; RV64I-NEXT: slli a0, a0, 24
; RV64I-NEXT: or a0, a0, t2
; RV64I-NEXT: or a6, a7, a6
; RV64I-NEXT: not a7, a4
; RV64I-NEXT: slli a5, a5, 1
; RV64I-NEXT: or a0, a0, t0
; RV64I-NEXT: slli a0, a0, 32
; RV64I-NEXT: or a0, a0, a6
; RV64I-NEXT: srl a0, a0, a4
; RV64I-NEXT: sll a4, a5, a7
; RV64I-NEXT: or a0, a0, a4
; RV64I-NEXT: .LBB7_3:
; RV64I-NEXT: srai a3, a3, 63
; RV64I-NEXT: srli a4, a0, 56
; RV64I-NEXT: srli a5, a0, 48
; RV64I-NEXT: srli a6, a0, 40
; RV64I-NEXT: srli a7, a0, 32
; RV64I-NEXT: srli t0, a0, 24
; RV64I-NEXT: srli t1, a0, 16
; RV64I-NEXT: and a1, a3, a1
; RV64I-NEXT: sb a7, 4(a2)
; RV64I-NEXT: sb a6, 5(a2)
; RV64I-NEXT: sb a5, 6(a2)
; RV64I-NEXT: sb a4, 7(a2)
; RV64I-NEXT: srli a3, a1, 56
; RV64I-NEXT: srli a4, a1, 48
; RV64I-NEXT: srli a5, a1, 40
; RV64I-NEXT: srli a6, a1, 32
; RV64I-NEXT: srli a7, a1, 24
; RV64I-NEXT: srli t2, a1, 16
; RV64I-NEXT: sb a6, 12(a2)
; RV64I-NEXT: sb a5, 13(a2)
; RV64I-NEXT: sb a4, 14(a2)
; RV64I-NEXT: sb a3, 15(a2)
; RV64I-NEXT: srli a3, a1, 8
; RV64I-NEXT: sb a1, 8(a2)
; RV64I-NEXT: sb a3, 9(a2)
; RV64I-NEXT: sb t2, 10(a2)
; RV64I-NEXT: sb a7, 11(a2)
; RV64I-NEXT: srli a1, a0, 8
; RV64I-NEXT: sb a0, 0(a2)
; RV64I-NEXT: sb a1, 1(a2)
; RV64I-NEXT: sb t1, 2(a2)
; RV64I-NEXT: sb t0, 3(a2)
; RV64I-NEXT: ret
;
; RV32I-LABEL: lshr_16bytes_wordOff:
; RV32I: # %bb.0:
; RV32I-NEXT: addi sp, sp, -32
; RV32I-NEXT: lbu a3, 0(a0)
; RV32I-NEXT: lbu a4, 1(a0)
; RV32I-NEXT: lbu a5, 2(a0)
; RV32I-NEXT: lbu a6, 3(a0)
; RV32I-NEXT: lbu a7, 4(a0)
; RV32I-NEXT: lbu t0, 5(a0)
; RV32I-NEXT: lbu t1, 6(a0)
; RV32I-NEXT: lbu t2, 7(a0)
; RV32I-NEXT: lbu t3, 8(a0)
; RV32I-NEXT: lbu t4, 9(a0)
; RV32I-NEXT: lbu t5, 10(a0)
; RV32I-NEXT: lbu t6, 11(a0)
; RV32I-NEXT: slli a4, a4, 8
; RV32I-NEXT: slli a5, a5, 16
; RV32I-NEXT: slli a6, a6, 24
; RV32I-NEXT: slli t0, t0, 8
; RV32I-NEXT: or a3, a4, a3
; RV32I-NEXT: or a4, a6, a5
; RV32I-NEXT: or a5, t0, a7
; RV32I-NEXT: lbu a6, 12(a0)
; RV32I-NEXT: lbu a7, 13(a0)
; RV32I-NEXT: lbu t0, 14(a0)
; RV32I-NEXT: lbu a0, 15(a0)
; RV32I-NEXT: lbu a1, 0(a1)
; RV32I-NEXT: sw zero, 16(sp)
; RV32I-NEXT: sw zero, 20(sp)
; RV32I-NEXT: sw zero, 24(sp)
; RV32I-NEXT: sw zero, 28(sp)
; RV32I-NEXT: slli t1, t1, 16
; RV32I-NEXT: slli t2, t2, 24
; RV32I-NEXT: or t1, t2, t1
; RV32I-NEXT: mv t2, sp
; RV32I-NEXT: slli t4, t4, 8
; RV32I-NEXT: slli t5, t5, 16
; RV32I-NEXT: slli t6, t6, 24
; RV32I-NEXT: slli a7, a7, 8
; RV32I-NEXT: slli t0, t0, 16
; RV32I-NEXT: slli a0, a0, 24
; RV32I-NEXT: slli a1, a1, 2
; RV32I-NEXT: or t3, t4, t3
; RV32I-NEXT: or t4, t6, t5
; RV32I-NEXT: or a6, a7, a6
; RV32I-NEXT: or a0, a0, t0
; RV32I-NEXT: andi a1, a1, 12
; RV32I-NEXT: or a3, a4, a3
; RV32I-NEXT: or a4, t1, a5
; RV32I-NEXT: or a5, t4, t3
; RV32I-NEXT: or a0, a0, a6
; RV32I-NEXT: add a1, t2, a1
; RV32I-NEXT: sw a3, 0(sp)
; RV32I-NEXT: sw a4, 4(sp)
; RV32I-NEXT: sw a5, 8(sp)
; RV32I-NEXT: sw a0, 12(sp)
; RV32I-NEXT: lw a0, 8(a1)
; RV32I-NEXT: lw a3, 4(a1)
; RV32I-NEXT: lw a4, 0(a1)
; RV32I-NEXT: lw a1, 12(a1)
; RV32I-NEXT: srli a5, a0, 16
; RV32I-NEXT: srli a6, a0, 24
; RV32I-NEXT: srli a7, a0, 8
; RV32I-NEXT: srli t0, a1, 16
; RV32I-NEXT: srli t1, a1, 24
; RV32I-NEXT: srli t2, a1, 8
; RV32I-NEXT: srli t3, a4, 16
; RV32I-NEXT: srli t4, a4, 24
; RV32I-NEXT: srli t5, a4, 8
; RV32I-NEXT: srli t6, a3, 16
; RV32I-NEXT: sb a0, 8(a2)
; RV32I-NEXT: sb a7, 9(a2)
; RV32I-NEXT: sb a5, 10(a2)
; RV32I-NEXT: sb a6, 11(a2)
; RV32I-NEXT: srli a0, a3, 24
; RV32I-NEXT: sb a1, 12(a2)
; RV32I-NEXT: sb t2, 13(a2)
; RV32I-NEXT: sb t0, 14(a2)
; RV32I-NEXT: sb t1, 15(a2)
; RV32I-NEXT: srli a1, a3, 8
; RV32I-NEXT: sb a4, 0(a2)
; RV32I-NEXT: sb t5, 1(a2)
; RV32I-NEXT: sb t3, 2(a2)
; RV32I-NEXT: sb t4, 3(a2)
; RV32I-NEXT: sb a3, 4(a2)
; RV32I-NEXT: sb a1, 5(a2)
; RV32I-NEXT: sb t6, 6(a2)
; RV32I-NEXT: sb a0, 7(a2)
; RV32I-NEXT: addi sp, sp, 32
; RV32I-NEXT: ret
%src = load i128, ptr %src.ptr, align 1
%wordOff = load i128, ptr %wordOff.ptr, align 1
%bitOff = shl i128 %wordOff, 5
%res = lshr i128 %src, %bitOff
store i128 %res, ptr %dst, align 1
ret void
}
define void @shl_16bytes(ptr %src.ptr, ptr %byteOff.ptr, ptr %dst) nounwind {
; RV64I-LABEL: shl_16bytes:
; RV64I: # %bb.0:
; RV64I-NEXT: lbu a3, 0(a0)
; RV64I-NEXT: lbu a4, 1(a0)
; RV64I-NEXT: lbu a5, 2(a0)
; RV64I-NEXT: lbu a6, 3(a0)
; RV64I-NEXT: lbu a7, 4(a0)
; RV64I-NEXT: lbu t0, 5(a0)
; RV64I-NEXT: lbu t1, 6(a0)
; RV64I-NEXT: lbu t2, 7(a0)
; RV64I-NEXT: slli a4, a4, 8
; RV64I-NEXT: slli a5, a5, 16
; RV64I-NEXT: slli a6, a6, 24
; RV64I-NEXT: slli t0, t0, 8
; RV64I-NEXT: or a3, a4, a3
; RV64I-NEXT: or a4, a6, a5
; RV64I-NEXT: or a5, t0, a7
; RV64I-NEXT: lbu a6, 4(a1)
; RV64I-NEXT: lbu a7, 5(a1)
; RV64I-NEXT: lbu t0, 6(a1)
; RV64I-NEXT: lbu t3, 7(a1)
; RV64I-NEXT: slli t1, t1, 16
; RV64I-NEXT: slli t2, t2, 24
; RV64I-NEXT: slli a7, a7, 8
; RV64I-NEXT: slli t0, t0, 16
; RV64I-NEXT: slli t3, t3, 24
; RV64I-NEXT: or t1, t2, t1
; RV64I-NEXT: or a6, a7, a6
; RV64I-NEXT: or a7, t3, t0
; RV64I-NEXT: lbu t0, 1(a1)
; RV64I-NEXT: lbu t2, 0(a1)
; RV64I-NEXT: lbu t3, 2(a1)
; RV64I-NEXT: lbu a1, 3(a1)
; RV64I-NEXT: slli t0, t0, 8
; RV64I-NEXT: or t0, t0, t2
; RV64I-NEXT: slli t3, t3, 16
; RV64I-NEXT: slli a1, a1, 24
; RV64I-NEXT: or a1, a1, t3
; RV64I-NEXT: or a3, a4, a3
; RV64I-NEXT: or a4, t1, a5
; RV64I-NEXT: or a5, a7, a6
; RV64I-NEXT: or a1, a1, t0
; RV64I-NEXT: slli a4, a4, 32
; RV64I-NEXT: slli a1, a1, 3
; RV64I-NEXT: slli a6, a5, 35
; RV64I-NEXT: or a5, a4, a3
; RV64I-NEXT: or a4, a6, a1
; RV64I-NEXT: addi a3, a4, -64
; RV64I-NEXT: sll a1, a5, a4
; RV64I-NEXT: bltz a3, .LBB8_2
; RV64I-NEXT: # %bb.1:
; RV64I-NEXT: mv a0, a1
; RV64I-NEXT: j .LBB8_3
; RV64I-NEXT: .LBB8_2:
; RV64I-NEXT: lbu a6, 9(a0)
; RV64I-NEXT: lbu a7, 10(a0)
; RV64I-NEXT: lbu t0, 11(a0)
; RV64I-NEXT: lbu t1, 8(a0)
; RV64I-NEXT: slli a6, a6, 8
; RV64I-NEXT: slli a7, a7, 16
; RV64I-NEXT: slli t0, t0, 24
; RV64I-NEXT: or a6, a6, t1
; RV64I-NEXT: or a7, t0, a7
; RV64I-NEXT: lbu t0, 13(a0)
; RV64I-NEXT: lbu t1, 12(a0)
; RV64I-NEXT: lbu t2, 14(a0)
; RV64I-NEXT: lbu a0, 15(a0)
; RV64I-NEXT: slli t0, t0, 8
; RV64I-NEXT: or t0, t0, t1
; RV64I-NEXT: slli t2, t2, 16
; RV64I-NEXT: slli a0, a0, 24
; RV64I-NEXT: or a0, a0, t2
; RV64I-NEXT: or a6, a7, a6
; RV64I-NEXT: not a7, a4
; RV64I-NEXT: srli a5, a5, 1
; RV64I-NEXT: or a0, a0, t0
; RV64I-NEXT: slli a0, a0, 32
; RV64I-NEXT: or a0, a0, a6
; RV64I-NEXT: sll a0, a0, a4
; RV64I-NEXT: srl a4, a5, a7
; RV64I-NEXT: or a0, a0, a4
; RV64I-NEXT: .LBB8_3:
; RV64I-NEXT: srai a3, a3, 63
; RV64I-NEXT: srli a4, a0, 56
; RV64I-NEXT: srli a5, a0, 48
; RV64I-NEXT: srli a6, a0, 40
; RV64I-NEXT: srli a7, a0, 32
; RV64I-NEXT: srli t0, a0, 24
; RV64I-NEXT: srli t1, a0, 16
; RV64I-NEXT: and a1, a3, a1
; RV64I-NEXT: sb a7, 12(a2)
; RV64I-NEXT: sb a6, 13(a2)
; RV64I-NEXT: sb a5, 14(a2)
; RV64I-NEXT: sb a4, 15(a2)
; RV64I-NEXT: srli a3, a1, 56
; RV64I-NEXT: srli a4, a1, 48
; RV64I-NEXT: srli a5, a1, 40
; RV64I-NEXT: srli a6, a1, 32
; RV64I-NEXT: srli a7, a1, 24
; RV64I-NEXT: srli t2, a1, 16
; RV64I-NEXT: sb a6, 4(a2)
; RV64I-NEXT: sb a5, 5(a2)
; RV64I-NEXT: sb a4, 6(a2)
; RV64I-NEXT: sb a3, 7(a2)
; RV64I-NEXT: srli a3, a1, 8
; RV64I-NEXT: sb a1, 0(a2)
; RV64I-NEXT: sb a3, 1(a2)
; RV64I-NEXT: sb t2, 2(a2)
; RV64I-NEXT: sb a7, 3(a2)
; RV64I-NEXT: srli a1, a0, 8
; RV64I-NEXT: sb a0, 8(a2)
; RV64I-NEXT: sb a1, 9(a2)
; RV64I-NEXT: sb t1, 10(a2)
; RV64I-NEXT: sb t0, 11(a2)
; RV64I-NEXT: ret
;
; RV32I-LABEL: shl_16bytes:
; RV32I: # %bb.0:
; RV32I-NEXT: addi sp, sp, -32
; RV32I-NEXT: lbu a3, 0(a0)
; RV32I-NEXT: lbu a4, 1(a0)
; RV32I-NEXT: lbu a5, 2(a0)
; RV32I-NEXT: lbu a6, 3(a0)
; RV32I-NEXT: lbu a7, 4(a0)
; RV32I-NEXT: lbu t0, 5(a0)
; RV32I-NEXT: lbu t1, 6(a0)
; RV32I-NEXT: lbu t2, 7(a0)
; RV32I-NEXT: slli a4, a4, 8
; RV32I-NEXT: slli a5, a5, 16
; RV32I-NEXT: slli a6, a6, 24
; RV32I-NEXT: or a3, a4, a3
; RV32I-NEXT: or a4, a6, a5
; RV32I-NEXT: lbu a5, 8(a0)
; RV32I-NEXT: lbu a6, 9(a0)
; RV32I-NEXT: lbu t3, 10(a0)
; RV32I-NEXT: lbu t4, 11(a0)
; RV32I-NEXT: slli t0, t0, 8
; RV32I-NEXT: slli t1, t1, 16
; RV32I-NEXT: slli t2, t2, 24
; RV32I-NEXT: slli a6, a6, 8
; RV32I-NEXT: or a7, t0, a7
; RV32I-NEXT: or t0, t2, t1
; RV32I-NEXT: or a5, a6, a5
; RV32I-NEXT: lbu a6, 12(a0)
; RV32I-NEXT: lbu t1, 13(a0)
; RV32I-NEXT: lbu t2, 14(a0)
; RV32I-NEXT: lbu a0, 15(a0)
; RV32I-NEXT: slli t3, t3, 16
; RV32I-NEXT: slli t4, t4, 24
; RV32I-NEXT: slli t1, t1, 8
; RV32I-NEXT: slli t2, t2, 16
; RV32I-NEXT: slli a0, a0, 24
; RV32I-NEXT: or t3, t4, t3
; RV32I-NEXT: or a6, t1, a6
; RV32I-NEXT: or a0, a0, t2
; RV32I-NEXT: lbu t1, 0(a1)
; RV32I-NEXT: lbu t2, 1(a1)
; RV32I-NEXT: lbu t4, 2(a1)
; RV32I-NEXT: lbu a1, 3(a1)
; RV32I-NEXT: sw zero, 0(sp)
; RV32I-NEXT: sw zero, 4(sp)
; RV32I-NEXT: sw zero, 8(sp)
; RV32I-NEXT: sw zero, 12(sp)
; RV32I-NEXT: slli t2, t2, 8
; RV32I-NEXT: or t1, t2, t1
; RV32I-NEXT: addi t2, sp, 16
; RV32I-NEXT: slli t4, t4, 16
; RV32I-NEXT: slli a1, a1, 24
; RV32I-NEXT: or a1, a1, t4
; RV32I-NEXT: or a3, a4, a3
; RV32I-NEXT: or a4, t0, a7
; RV32I-NEXT: or a5, t3, a5
; RV32I-NEXT: or a0, a0, a6
; RV32I-NEXT: or a1, a1, t1
; RV32I-NEXT: sw a3, 16(sp)
; RV32I-NEXT: sw a4, 20(sp)
; RV32I-NEXT: sw a5, 24(sp)
; RV32I-NEXT: sw a0, 28(sp)
; RV32I-NEXT: slli a0, a1, 3
; RV32I-NEXT: andi a1, a1, 12
; RV32I-NEXT: sub a1, t2, a1
; RV32I-NEXT: andi a3, a0, 24
; RV32I-NEXT: lw a4, 0(a1)
; RV32I-NEXT: lw a5, 4(a1)
; RV32I-NEXT: lw a6, 8(a1)
; RV32I-NEXT: lw a1, 12(a1)
; RV32I-NEXT: xori a3, a3, 31
; RV32I-NEXT: sll a7, a5, a0
; RV32I-NEXT: srli t0, a4, 1
; RV32I-NEXT: sll a1, a1, a0
; RV32I-NEXT: srli t1, a6, 1
; RV32I-NEXT: sll a6, a6, a0
; RV32I-NEXT: srli a5, a5, 1
; RV32I-NEXT: sll a0, a4, a0
; RV32I-NEXT: srl a4, t0, a3
; RV32I-NEXT: srl t0, t1, a3
; RV32I-NEXT: srl a3, a5, a3
; RV32I-NEXT: srli a5, a6, 24
; RV32I-NEXT: srli t1, a1, 24
; RV32I-NEXT: srli t2, a0, 16
; RV32I-NEXT: srli t3, a0, 24
; RV32I-NEXT: srli t4, a0, 8
; RV32I-NEXT: or a4, a7, a4
; RV32I-NEXT: srli a7, a7, 24
; RV32I-NEXT: or a1, a1, t0
; RV32I-NEXT: or a3, a6, a3
; RV32I-NEXT: sb a0, 0(a2)
; RV32I-NEXT: sb t4, 1(a2)
; RV32I-NEXT: sb t2, 2(a2)
; RV32I-NEXT: sb t3, 3(a2)
; RV32I-NEXT: srli a0, a3, 16
; RV32I-NEXT: srli a6, a3, 8
; RV32I-NEXT: srli t0, a1, 16
; RV32I-NEXT: srli t2, a1, 8
; RV32I-NEXT: srli t3, a4, 16
; RV32I-NEXT: srli t4, a4, 8
; RV32I-NEXT: sb a3, 8(a2)
; RV32I-NEXT: sb a6, 9(a2)
; RV32I-NEXT: sb a0, 10(a2)
; RV32I-NEXT: sb a5, 11(a2)
; RV32I-NEXT: sb a1, 12(a2)
; RV32I-NEXT: sb t2, 13(a2)
; RV32I-NEXT: sb t0, 14(a2)
; RV32I-NEXT: sb t1, 15(a2)
; RV32I-NEXT: sb a4, 4(a2)
; RV32I-NEXT: sb t4, 5(a2)
; RV32I-NEXT: sb t3, 6(a2)
; RV32I-NEXT: sb a7, 7(a2)
; RV32I-NEXT: addi sp, sp, 32
; RV32I-NEXT: ret
%src = load i128, ptr %src.ptr, align 1
%byteOff = load i128, ptr %byteOff.ptr, align 1
%bitOff = shl i128 %byteOff, 3
%res = shl i128 %src, %bitOff
store i128 %res, ptr %dst, align 1
ret void
}
define void @shl_16bytes_wordOff(ptr %src.ptr, ptr %wordOff.ptr, ptr %dst) nounwind {
; RV64I-LABEL: shl_16bytes_wordOff:
; RV64I: # %bb.0:
; RV64I-NEXT: lbu a3, 0(a0)
; RV64I-NEXT: lbu a4, 1(a0)
; RV64I-NEXT: lbu a5, 2(a0)
; RV64I-NEXT: lbu a6, 3(a0)
; RV64I-NEXT: lbu a7, 4(a0)
; RV64I-NEXT: lbu t0, 5(a0)
; RV64I-NEXT: lbu t1, 6(a0)
; RV64I-NEXT: lbu t2, 7(a0)
; RV64I-NEXT: slli a4, a4, 8
; RV64I-NEXT: slli a5, a5, 16
; RV64I-NEXT: slli a6, a6, 24
; RV64I-NEXT: slli t0, t0, 8
; RV64I-NEXT: or a3, a4, a3
; RV64I-NEXT: or a4, a6, a5
; RV64I-NEXT: or a5, t0, a7
; RV64I-NEXT: lbu a6, 4(a1)
; RV64I-NEXT: lbu a7, 5(a1)
; RV64I-NEXT: lbu t0, 6(a1)
; RV64I-NEXT: lbu t3, 7(a1)
; RV64I-NEXT: slli t1, t1, 16
; RV64I-NEXT: slli t2, t2, 24
; RV64I-NEXT: slli a7, a7, 8
; RV64I-NEXT: slli t0, t0, 16
; RV64I-NEXT: slli t3, t3, 24
; RV64I-NEXT: or t1, t2, t1
; RV64I-NEXT: or a6, a7, a6
; RV64I-NEXT: or a7, t3, t0
; RV64I-NEXT: lbu t0, 1(a1)
; RV64I-NEXT: lbu t2, 0(a1)
; RV64I-NEXT: lbu t3, 2(a1)
; RV64I-NEXT: lbu a1, 3(a1)
; RV64I-NEXT: slli t0, t0, 8
; RV64I-NEXT: or t0, t0, t2
; RV64I-NEXT: slli t3, t3, 16
; RV64I-NEXT: slli a1, a1, 24
; RV64I-NEXT: or a1, a1, t3
; RV64I-NEXT: or a3, a4, a3
; RV64I-NEXT: or a4, t1, a5
; RV64I-NEXT: or a5, a7, a6
; RV64I-NEXT: or a1, a1, t0
; RV64I-NEXT: slli a4, a4, 32
; RV64I-NEXT: slli a1, a1, 5
; RV64I-NEXT: slli a6, a5, 37
; RV64I-NEXT: or a5, a4, a3
; RV64I-NEXT: or a4, a6, a1
; RV64I-NEXT: addi a3, a4, -64
; RV64I-NEXT: sll a1, a5, a4
; RV64I-NEXT: bltz a3, .LBB9_2
; RV64I-NEXT: # %bb.1:
; RV64I-NEXT: mv a0, a1
; RV64I-NEXT: j .LBB9_3
; RV64I-NEXT: .LBB9_2:
; RV64I-NEXT: lbu a6, 9(a0)
; RV64I-NEXT: lbu a7, 10(a0)
; RV64I-NEXT: lbu t0, 11(a0)
; RV64I-NEXT: lbu t1, 8(a0)
; RV64I-NEXT: slli a6, a6, 8
; RV64I-NEXT: slli a7, a7, 16
; RV64I-NEXT: slli t0, t0, 24
; RV64I-NEXT: or a6, a6, t1
; RV64I-NEXT: or a7, t0, a7
; RV64I-NEXT: lbu t0, 13(a0)
; RV64I-NEXT: lbu t1, 12(a0)
; RV64I-NEXT: lbu t2, 14(a0)
; RV64I-NEXT: lbu a0, 15(a0)
; RV64I-NEXT: slli t0, t0, 8
; RV64I-NEXT: or t0, t0, t1
; RV64I-NEXT: slli t2, t2, 16
; RV64I-NEXT: slli a0, a0, 24
; RV64I-NEXT: or a0, a0, t2
; RV64I-NEXT: or a6, a7, a6
; RV64I-NEXT: not a7, a4
; RV64I-NEXT: srli a5, a5, 1
; RV64I-NEXT: or a0, a0, t0
; RV64I-NEXT: slli a0, a0, 32
; RV64I-NEXT: or a0, a0, a6
; RV64I-NEXT: sll a0, a0, a4
; RV64I-NEXT: srl a4, a5, a7
; RV64I-NEXT: or a0, a0, a4
; RV64I-NEXT: .LBB9_3:
; RV64I-NEXT: srai a3, a3, 63
; RV64I-NEXT: srli a4, a0, 56
; RV64I-NEXT: srli a5, a0, 48
; RV64I-NEXT: srli a6, a0, 40
; RV64I-NEXT: srli a7, a0, 32
; RV64I-NEXT: srli t0, a0, 24
; RV64I-NEXT: srli t1, a0, 16
; RV64I-NEXT: and a1, a3, a1
; RV64I-NEXT: sb a7, 12(a2)
; RV64I-NEXT: sb a6, 13(a2)
; RV64I-NEXT: sb a5, 14(a2)
; RV64I-NEXT: sb a4, 15(a2)
; RV64I-NEXT: srli a3, a1, 56
; RV64I-NEXT: srli a4, a1, 48
; RV64I-NEXT: srli a5, a1, 40
; RV64I-NEXT: srli a6, a1, 32
; RV64I-NEXT: srli a7, a1, 24
; RV64I-NEXT: srli t2, a1, 16
; RV64I-NEXT: sb a6, 4(a2)
; RV64I-NEXT: sb a5, 5(a2)
; RV64I-NEXT: sb a4, 6(a2)
; RV64I-NEXT: sb a3, 7(a2)
; RV64I-NEXT: srli a3, a1, 8
; RV64I-NEXT: sb a1, 0(a2)
; RV64I-NEXT: sb a3, 1(a2)
; RV64I-NEXT: sb t2, 2(a2)
; RV64I-NEXT: sb a7, 3(a2)
; RV64I-NEXT: srli a1, a0, 8
; RV64I-NEXT: sb a0, 8(a2)
; RV64I-NEXT: sb a1, 9(a2)
; RV64I-NEXT: sb t1, 10(a2)
; RV64I-NEXT: sb t0, 11(a2)
; RV64I-NEXT: ret
;
; RV32I-LABEL: shl_16bytes_wordOff:
; RV32I: # %bb.0:
; RV32I-NEXT: addi sp, sp, -32
; RV32I-NEXT: lbu a3, 0(a0)
; RV32I-NEXT: lbu a4, 1(a0)
; RV32I-NEXT: lbu a5, 2(a0)
; RV32I-NEXT: lbu a6, 3(a0)
; RV32I-NEXT: lbu a7, 4(a0)
; RV32I-NEXT: lbu t0, 5(a0)
; RV32I-NEXT: lbu t1, 6(a0)
; RV32I-NEXT: lbu t2, 7(a0)
; RV32I-NEXT: lbu t3, 8(a0)
; RV32I-NEXT: lbu t4, 9(a0)
; RV32I-NEXT: lbu t5, 10(a0)
; RV32I-NEXT: lbu t6, 11(a0)
; RV32I-NEXT: slli a4, a4, 8
; RV32I-NEXT: slli a5, a5, 16
; RV32I-NEXT: slli a6, a6, 24
; RV32I-NEXT: slli t0, t0, 8
; RV32I-NEXT: or a3, a4, a3
; RV32I-NEXT: or a4, a6, a5
; RV32I-NEXT: or a5, t0, a7
; RV32I-NEXT: lbu a6, 12(a0)
; RV32I-NEXT: lbu a7, 13(a0)
; RV32I-NEXT: lbu t0, 14(a0)
; RV32I-NEXT: lbu a0, 15(a0)
; RV32I-NEXT: lbu a1, 0(a1)
; RV32I-NEXT: sw zero, 0(sp)
; RV32I-NEXT: sw zero, 4(sp)
; RV32I-NEXT: sw zero, 8(sp)
; RV32I-NEXT: sw zero, 12(sp)
; RV32I-NEXT: slli t1, t1, 16
; RV32I-NEXT: slli t2, t2, 24
; RV32I-NEXT: or t1, t2, t1
; RV32I-NEXT: addi t2, sp, 16
; RV32I-NEXT: slli t4, t4, 8
; RV32I-NEXT: slli t5, t5, 16
; RV32I-NEXT: slli t6, t6, 24
; RV32I-NEXT: slli a7, a7, 8
; RV32I-NEXT: slli t0, t0, 16
; RV32I-NEXT: slli a0, a0, 24
; RV32I-NEXT: slli a1, a1, 2
; RV32I-NEXT: or t3, t4, t3
; RV32I-NEXT: or t4, t6, t5
; RV32I-NEXT: or a6, a7, a6
; RV32I-NEXT: or a0, a0, t0
; RV32I-NEXT: andi a1, a1, 12
; RV32I-NEXT: or a3, a4, a3
; RV32I-NEXT: or a4, t1, a5
; RV32I-NEXT: or a5, t4, t3
; RV32I-NEXT: or a0, a0, a6
; RV32I-NEXT: sub a1, t2, a1
; RV32I-NEXT: sw a3, 16(sp)
; RV32I-NEXT: sw a4, 20(sp)
; RV32I-NEXT: sw a5, 24(sp)
; RV32I-NEXT: sw a0, 28(sp)
; RV32I-NEXT: lw a0, 8(a1)
; RV32I-NEXT: lw a3, 4(a1)
; RV32I-NEXT: lw a4, 0(a1)
; RV32I-NEXT: lw a1, 12(a1)
; RV32I-NEXT: srli a5, a0, 16
; RV32I-NEXT: srli a6, a0, 24
; RV32I-NEXT: srli a7, a0, 8
; RV32I-NEXT: srli t0, a1, 16
; RV32I-NEXT: srli t1, a1, 24
; RV32I-NEXT: srli t2, a1, 8
; RV32I-NEXT: srli t3, a4, 16
; RV32I-NEXT: srli t4, a4, 24
; RV32I-NEXT: srli t5, a4, 8
; RV32I-NEXT: srli t6, a3, 16
; RV32I-NEXT: sb a0, 8(a2)
; RV32I-NEXT: sb a7, 9(a2)
; RV32I-NEXT: sb a5, 10(a2)
; RV32I-NEXT: sb a6, 11(a2)
; RV32I-NEXT: srli a0, a3, 24
; RV32I-NEXT: sb a1, 12(a2)
; RV32I-NEXT: sb t2, 13(a2)
; RV32I-NEXT: sb t0, 14(a2)
; RV32I-NEXT: sb t1, 15(a2)
; RV32I-NEXT: srli a1, a3, 8
; RV32I-NEXT: sb a4, 0(a2)
; RV32I-NEXT: sb t5, 1(a2)
; RV32I-NEXT: sb t3, 2(a2)
; RV32I-NEXT: sb t4, 3(a2)
; RV32I-NEXT: sb a3, 4(a2)
; RV32I-NEXT: sb a1, 5(a2)
; RV32I-NEXT: sb t6, 6(a2)
; RV32I-NEXT: sb a0, 7(a2)
; RV32I-NEXT: addi sp, sp, 32
; RV32I-NEXT: ret
%src = load i128, ptr %src.ptr, align 1
%wordOff = load i128, ptr %wordOff.ptr, align 1
%bitOff = shl i128 %wordOff, 5
%res = shl i128 %src, %bitOff
store i128 %res, ptr %dst, align 1
ret void
}
define void @ashr_16bytes(ptr %src.ptr, ptr %byteOff.ptr, ptr %dst) nounwind {
; RV64I-LABEL: ashr_16bytes:
; RV64I: # %bb.0:
; RV64I-NEXT: lbu a3, 8(a0)
; RV64I-NEXT: lbu a4, 9(a0)
; RV64I-NEXT: lbu a5, 10(a0)
; RV64I-NEXT: lbu a6, 11(a0)
; RV64I-NEXT: lbu a7, 12(a0)
; RV64I-NEXT: lbu t0, 13(a0)
; RV64I-NEXT: lbu t1, 14(a0)
; RV64I-NEXT: lbu t2, 15(a0)
; RV64I-NEXT: slli a4, a4, 8
; RV64I-NEXT: slli a5, a5, 16
; RV64I-NEXT: slli a6, a6, 24
; RV64I-NEXT: slli t0, t0, 8
; RV64I-NEXT: or a3, a4, a3
; RV64I-NEXT: or a4, a6, a5
; RV64I-NEXT: or a5, t0, a7
; RV64I-NEXT: lbu a6, 4(a1)
; RV64I-NEXT: lbu a7, 5(a1)
; RV64I-NEXT: lbu t0, 6(a1)
; RV64I-NEXT: lbu t3, 7(a1)
; RV64I-NEXT: slli t1, t1, 16
; RV64I-NEXT: slli t2, t2, 24
; RV64I-NEXT: slli a7, a7, 8
; RV64I-NEXT: slli t0, t0, 16
; RV64I-NEXT: slli t3, t3, 24
; RV64I-NEXT: or t1, t2, t1
; RV64I-NEXT: or a6, a7, a6
; RV64I-NEXT: or a7, t3, t0
; RV64I-NEXT: lbu t0, 1(a1)
; RV64I-NEXT: lbu t2, 0(a1)
; RV64I-NEXT: lbu t3, 2(a1)
; RV64I-NEXT: lbu a1, 3(a1)
; RV64I-NEXT: slli t0, t0, 8
; RV64I-NEXT: or t0, t0, t2
; RV64I-NEXT: slli t3, t3, 16
; RV64I-NEXT: slli a1, a1, 24
; RV64I-NEXT: or a1, a1, t3
; RV64I-NEXT: or a3, a4, a3
; RV64I-NEXT: or a5, t1, a5
; RV64I-NEXT: or a4, a7, a6
; RV64I-NEXT: or a1, a1, t0
; RV64I-NEXT: slli a6, a5, 32
; RV64I-NEXT: slli a1, a1, 3
; RV64I-NEXT: slli a7, a4, 35
; RV64I-NEXT: or a4, a6, a3
; RV64I-NEXT: or a3, a7, a1
; RV64I-NEXT: addi a6, a3, -64
; RV64I-NEXT: sra a1, a4, a3
; RV64I-NEXT: bltz a6, .LBB10_2
; RV64I-NEXT: # %bb.1:
; RV64I-NEXT: sraiw a3, a5, 31
; RV64I-NEXT: mv a0, a1
; RV64I-NEXT: mv a1, a3
; RV64I-NEXT: j .LBB10_3
; RV64I-NEXT: .LBB10_2:
; RV64I-NEXT: lbu a5, 1(a0)
; RV64I-NEXT: lbu a6, 2(a0)
; RV64I-NEXT: lbu a7, 3(a0)
; RV64I-NEXT: lbu t0, 0(a0)
; RV64I-NEXT: slli a5, a5, 8
; RV64I-NEXT: slli a6, a6, 16
; RV64I-NEXT: slli a7, a7, 24
; RV64I-NEXT: or a5, a5, t0
; RV64I-NEXT: or a6, a7, a6
; RV64I-NEXT: lbu a7, 5(a0)
; RV64I-NEXT: lbu t0, 4(a0)
; RV64I-NEXT: lbu t1, 6(a0)
; RV64I-NEXT: lbu a0, 7(a0)
; RV64I-NEXT: slli a7, a7, 8
; RV64I-NEXT: or a7, a7, t0
; RV64I-NEXT: slli t1, t1, 16
; RV64I-NEXT: slli a0, a0, 24
; RV64I-NEXT: or a0, a0, t1
; RV64I-NEXT: or a5, a6, a5
; RV64I-NEXT: not a6, a3
; RV64I-NEXT: slli a4, a4, 1
; RV64I-NEXT: or a0, a0, a7
; RV64I-NEXT: slli a0, a0, 32
; RV64I-NEXT: or a0, a0, a5
; RV64I-NEXT: srl a0, a0, a3
; RV64I-NEXT: sll a3, a4, a6
; RV64I-NEXT: or a0, a0, a3
; RV64I-NEXT: .LBB10_3:
; RV64I-NEXT: srli a3, a1, 56
; RV64I-NEXT: srli a4, a1, 48
; RV64I-NEXT: srli a5, a1, 40
; RV64I-NEXT: srli a6, a1, 32
; RV64I-NEXT: srli a7, a1, 24
; RV64I-NEXT: srli t0, a1, 16
; RV64I-NEXT: srli t1, a1, 8
; RV64I-NEXT: srli t2, a0, 56
; RV64I-NEXT: srli t3, a0, 48
; RV64I-NEXT: srli t4, a0, 40
; RV64I-NEXT: srli t5, a0, 32
; RV64I-NEXT: sb a6, 12(a2)
; RV64I-NEXT: sb a5, 13(a2)
; RV64I-NEXT: sb a4, 14(a2)
; RV64I-NEXT: sb a3, 15(a2)
; RV64I-NEXT: srli a3, a0, 24
; RV64I-NEXT: sb a1, 8(a2)
; RV64I-NEXT: sb t1, 9(a2)
; RV64I-NEXT: sb t0, 10(a2)
; RV64I-NEXT: sb a7, 11(a2)
; RV64I-NEXT: srli a1, a0, 16
; RV64I-NEXT: sb t5, 4(a2)
; RV64I-NEXT: sb t4, 5(a2)
; RV64I-NEXT: sb t3, 6(a2)
; RV64I-NEXT: sb t2, 7(a2)
; RV64I-NEXT: srli a4, a0, 8
; RV64I-NEXT: sb a0, 0(a2)
; RV64I-NEXT: sb a4, 1(a2)
; RV64I-NEXT: sb a1, 2(a2)
; RV64I-NEXT: sb a3, 3(a2)
; RV64I-NEXT: ret
;
; RV32I-LABEL: ashr_16bytes:
; RV32I: # %bb.0:
; RV32I-NEXT: addi sp, sp, -32
; RV32I-NEXT: lbu a3, 0(a0)
; RV32I-NEXT: lbu a4, 1(a0)
; RV32I-NEXT: lbu a5, 2(a0)
; RV32I-NEXT: lbu a6, 3(a0)
; RV32I-NEXT: lbu a7, 4(a0)
; RV32I-NEXT: lbu t0, 5(a0)
; RV32I-NEXT: lbu t1, 6(a0)
; RV32I-NEXT: lbu t2, 7(a0)
; RV32I-NEXT: slli a4, a4, 8
; RV32I-NEXT: or a3, a4, a3
; RV32I-NEXT: lbu a4, 8(a0)
; RV32I-NEXT: lbu t3, 9(a0)
; RV32I-NEXT: lbu t4, 10(a0)
; RV32I-NEXT: lbu t5, 11(a0)
; RV32I-NEXT: slli a5, a5, 16
; RV32I-NEXT: slli a6, a6, 24
; RV32I-NEXT: slli t0, t0, 8
; RV32I-NEXT: slli t1, t1, 16
; RV32I-NEXT: slli t2, t2, 24
; RV32I-NEXT: or a5, a6, a5
; RV32I-NEXT: or a6, t0, a7
; RV32I-NEXT: or a7, t2, t1
; RV32I-NEXT: lbu t0, 12(a0)
; RV32I-NEXT: lbu t1, 13(a0)
; RV32I-NEXT: lbu t2, 14(a0)
; RV32I-NEXT: lbu a0, 15(a0)
; RV32I-NEXT: slli t3, t3, 8
; RV32I-NEXT: slli t4, t4, 16
; RV32I-NEXT: slli t5, t5, 24
; RV32I-NEXT: slli t1, t1, 8
; RV32I-NEXT: or a4, t3, a4
; RV32I-NEXT: or t3, t5, t4
; RV32I-NEXT: or t0, t1, t0
; RV32I-NEXT: lbu t1, 1(a1)
; RV32I-NEXT: lbu t4, 0(a1)
; RV32I-NEXT: lbu t5, 2(a1)
; RV32I-NEXT: lbu a1, 3(a1)
; RV32I-NEXT: slli t1, t1, 8
; RV32I-NEXT: or t1, t1, t4
; RV32I-NEXT: slli t5, t5, 16
; RV32I-NEXT: slli a1, a1, 24
; RV32I-NEXT: or a1, a1, t5
; RV32I-NEXT: mv t4, sp
; RV32I-NEXT: slli t2, t2, 16
; RV32I-NEXT: slli a0, a0, 24
; RV32I-NEXT: or t2, a0, t2
; RV32I-NEXT: srai a0, a0, 31
; RV32I-NEXT: or a3, a5, a3
; RV32I-NEXT: or a5, a7, a6
; RV32I-NEXT: or a4, t3, a4
; RV32I-NEXT: or a6, t2, t0
; RV32I-NEXT: or a1, a1, t1
; RV32I-NEXT: sw a0, 16(sp)
; RV32I-NEXT: sw a0, 20(sp)
; RV32I-NEXT: sw a0, 24(sp)
; RV32I-NEXT: sw a0, 28(sp)
; RV32I-NEXT: sw a3, 0(sp)
; RV32I-NEXT: sw a5, 4(sp)
; RV32I-NEXT: sw a4, 8(sp)
; RV32I-NEXT: sw a6, 12(sp)
; RV32I-NEXT: slli a0, a1, 3
; RV32I-NEXT: andi a1, a1, 12
; RV32I-NEXT: add a1, t4, a1
; RV32I-NEXT: andi a3, a0, 24
; RV32I-NEXT: xori a3, a3, 31
; RV32I-NEXT: lw a4, 4(a1)
; RV32I-NEXT: lw a5, 8(a1)
; RV32I-NEXT: lw a6, 0(a1)
; RV32I-NEXT: lw a1, 12(a1)
; RV32I-NEXT: srl a7, a4, a0
; RV32I-NEXT: slli t0, a5, 1
; RV32I-NEXT: srl a6, a6, a0
; RV32I-NEXT: slli a4, a4, 1
; RV32I-NEXT: srl a5, a5, a0
; RV32I-NEXT: slli t1, a1, 1
; RV32I-NEXT: sra a0, a1, a0
; RV32I-NEXT: sll a1, t0, a3
; RV32I-NEXT: sll a4, a4, a3
; RV32I-NEXT: sll a3, t1, a3
; RV32I-NEXT: srli t0, a0, 16
; RV32I-NEXT: srli t1, a0, 24
; RV32I-NEXT: srli t2, a0, 8
; RV32I-NEXT: or a1, a7, a1
; RV32I-NEXT: or a4, a6, a4
; RV32I-NEXT: or a3, a5, a3
; RV32I-NEXT: sb a0, 12(a2)
; RV32I-NEXT: sb t2, 13(a2)
; RV32I-NEXT: sb t0, 14(a2)
; RV32I-NEXT: sb t1, 15(a2)
; RV32I-NEXT: srli a0, a3, 16
; RV32I-NEXT: srli t0, a3, 24
; RV32I-NEXT: srli a3, a3, 8
; RV32I-NEXT: srli t1, a4, 16
; RV32I-NEXT: srli t2, a4, 24
; RV32I-NEXT: srli a4, a4, 8
; RV32I-NEXT: srli t3, a1, 16
; RV32I-NEXT: srli t4, a1, 24
; RV32I-NEXT: srli a1, a1, 8
; RV32I-NEXT: sb a5, 8(a2)
; RV32I-NEXT: sb a3, 9(a2)
; RV32I-NEXT: sb a0, 10(a2)
; RV32I-NEXT: sb t0, 11(a2)
; RV32I-NEXT: sb a6, 0(a2)
; RV32I-NEXT: sb a4, 1(a2)
; RV32I-NEXT: sb t1, 2(a2)
; RV32I-NEXT: sb t2, 3(a2)
; RV32I-NEXT: sb a7, 4(a2)
; RV32I-NEXT: sb a1, 5(a2)
; RV32I-NEXT: sb t3, 6(a2)
; RV32I-NEXT: sb t4, 7(a2)
; RV32I-NEXT: addi sp, sp, 32
; RV32I-NEXT: ret
%src = load i128, ptr %src.ptr, align 1
%byteOff = load i128, ptr %byteOff.ptr, align 1
%bitOff = shl i128 %byteOff, 3
%res = ashr i128 %src, %bitOff
store i128 %res, ptr %dst, align 1
ret void
}
define void @ashr_16bytes_wordOff(ptr %src.ptr, ptr %wordOff.ptr, ptr %dst) nounwind {
; RV64I-LABEL: ashr_16bytes_wordOff:
; RV64I: # %bb.0:
; RV64I-NEXT: lbu a3, 8(a0)
; RV64I-NEXT: lbu a4, 9(a0)
; RV64I-NEXT: lbu a5, 10(a0)
; RV64I-NEXT: lbu a6, 11(a0)
; RV64I-NEXT: lbu a7, 12(a0)
; RV64I-NEXT: lbu t0, 13(a0)
; RV64I-NEXT: lbu t1, 14(a0)
; RV64I-NEXT: lbu t2, 15(a0)
; RV64I-NEXT: slli a4, a4, 8
; RV64I-NEXT: slli a5, a5, 16
; RV64I-NEXT: slli a6, a6, 24
; RV64I-NEXT: slli t0, t0, 8
; RV64I-NEXT: or a3, a4, a3
; RV64I-NEXT: or a4, a6, a5
; RV64I-NEXT: or a5, t0, a7
; RV64I-NEXT: lbu a6, 4(a1)
; RV64I-NEXT: lbu a7, 5(a1)
; RV64I-NEXT: lbu t0, 6(a1)
; RV64I-NEXT: lbu t3, 7(a1)
; RV64I-NEXT: slli t1, t1, 16
; RV64I-NEXT: slli t2, t2, 24
; RV64I-NEXT: slli a7, a7, 8
; RV64I-NEXT: slli t0, t0, 16
; RV64I-NEXT: slli t3, t3, 24
; RV64I-NEXT: or t1, t2, t1
; RV64I-NEXT: or a6, a7, a6
; RV64I-NEXT: or a7, t3, t0
; RV64I-NEXT: lbu t0, 1(a1)
; RV64I-NEXT: lbu t2, 0(a1)
; RV64I-NEXT: lbu t3, 2(a1)
; RV64I-NEXT: lbu a1, 3(a1)
; RV64I-NEXT: slli t0, t0, 8
; RV64I-NEXT: or t0, t0, t2
; RV64I-NEXT: slli t3, t3, 16
; RV64I-NEXT: slli a1, a1, 24
; RV64I-NEXT: or a1, a1, t3
; RV64I-NEXT: or a3, a4, a3
; RV64I-NEXT: or a5, t1, a5
; RV64I-NEXT: or a4, a7, a6
; RV64I-NEXT: or a1, a1, t0
; RV64I-NEXT: slli a6, a5, 32
; RV64I-NEXT: slli a1, a1, 5
; RV64I-NEXT: slli a7, a4, 37
; RV64I-NEXT: or a4, a6, a3
; RV64I-NEXT: or a3, a7, a1
; RV64I-NEXT: addi a6, a3, -64
; RV64I-NEXT: sra a1, a4, a3
; RV64I-NEXT: bltz a6, .LBB11_2
; RV64I-NEXT: # %bb.1:
; RV64I-NEXT: sraiw a3, a5, 31
; RV64I-NEXT: mv a0, a1
; RV64I-NEXT: mv a1, a3
; RV64I-NEXT: j .LBB11_3
; RV64I-NEXT: .LBB11_2:
; RV64I-NEXT: lbu a5, 1(a0)
; RV64I-NEXT: lbu a6, 2(a0)
; RV64I-NEXT: lbu a7, 3(a0)
; RV64I-NEXT: lbu t0, 0(a0)
; RV64I-NEXT: slli a5, a5, 8
; RV64I-NEXT: slli a6, a6, 16
; RV64I-NEXT: slli a7, a7, 24
; RV64I-NEXT: or a5, a5, t0
; RV64I-NEXT: or a6, a7, a6
; RV64I-NEXT: lbu a7, 5(a0)
; RV64I-NEXT: lbu t0, 4(a0)
; RV64I-NEXT: lbu t1, 6(a0)
; RV64I-NEXT: lbu a0, 7(a0)
; RV64I-NEXT: slli a7, a7, 8
; RV64I-NEXT: or a7, a7, t0
; RV64I-NEXT: slli t1, t1, 16
; RV64I-NEXT: slli a0, a0, 24
; RV64I-NEXT: or a0, a0, t1
; RV64I-NEXT: or a5, a6, a5
; RV64I-NEXT: not a6, a3
; RV64I-NEXT: slli a4, a4, 1
; RV64I-NEXT: or a0, a0, a7
; RV64I-NEXT: slli a0, a0, 32
; RV64I-NEXT: or a0, a0, a5
; RV64I-NEXT: srl a0, a0, a3
; RV64I-NEXT: sll a3, a4, a6
; RV64I-NEXT: or a0, a0, a3
; RV64I-NEXT: .LBB11_3:
; RV64I-NEXT: srli a3, a1, 56
; RV64I-NEXT: srli a4, a1, 48
; RV64I-NEXT: srli a5, a1, 40
; RV64I-NEXT: srli a6, a1, 32
; RV64I-NEXT: srli a7, a1, 24
; RV64I-NEXT: srli t0, a1, 16
; RV64I-NEXT: srli t1, a1, 8
; RV64I-NEXT: srli t2, a0, 56
; RV64I-NEXT: srli t3, a0, 48
; RV64I-NEXT: srli t4, a0, 40
; RV64I-NEXT: srli t5, a0, 32
; RV64I-NEXT: sb a6, 12(a2)
; RV64I-NEXT: sb a5, 13(a2)
; RV64I-NEXT: sb a4, 14(a2)
; RV64I-NEXT: sb a3, 15(a2)
; RV64I-NEXT: srli a3, a0, 24
; RV64I-NEXT: sb a1, 8(a2)
; RV64I-NEXT: sb t1, 9(a2)
; RV64I-NEXT: sb t0, 10(a2)
; RV64I-NEXT: sb a7, 11(a2)
; RV64I-NEXT: srli a1, a0, 16
; RV64I-NEXT: sb t5, 4(a2)
; RV64I-NEXT: sb t4, 5(a2)
; RV64I-NEXT: sb t3, 6(a2)
; RV64I-NEXT: sb t2, 7(a2)
; RV64I-NEXT: srli a4, a0, 8
; RV64I-NEXT: sb a0, 0(a2)
; RV64I-NEXT: sb a4, 1(a2)
; RV64I-NEXT: sb a1, 2(a2)
; RV64I-NEXT: sb a3, 3(a2)
; RV64I-NEXT: ret
;
; RV32I-LABEL: ashr_16bytes_wordOff:
; RV32I: # %bb.0:
; RV32I-NEXT: addi sp, sp, -32
; RV32I-NEXT: lbu a3, 0(a0)
; RV32I-NEXT: lbu a4, 1(a0)
; RV32I-NEXT: lbu a5, 2(a0)
; RV32I-NEXT: lbu a6, 3(a0)
; RV32I-NEXT: lbu a7, 4(a0)
; RV32I-NEXT: lbu t0, 5(a0)
; RV32I-NEXT: lbu t1, 6(a0)
; RV32I-NEXT: lbu t2, 7(a0)
; RV32I-NEXT: lbu t3, 8(a0)
; RV32I-NEXT: lbu t4, 9(a0)
; RV32I-NEXT: lbu t5, 10(a0)
; RV32I-NEXT: lbu t6, 11(a0)
; RV32I-NEXT: slli a4, a4, 8
; RV32I-NEXT: slli a5, a5, 16
; RV32I-NEXT: slli a6, a6, 24
; RV32I-NEXT: slli t0, t0, 8
; RV32I-NEXT: or a3, a4, a3
; RV32I-NEXT: or a4, a6, a5
; RV32I-NEXT: or a5, t0, a7
; RV32I-NEXT: lbu a6, 12(a0)
; RV32I-NEXT: lbu a7, 13(a0)
; RV32I-NEXT: lbu t0, 14(a0)
; RV32I-NEXT: lbu a0, 15(a0)
; RV32I-NEXT: lbu a1, 0(a1)
; RV32I-NEXT: slli t1, t1, 16
; RV32I-NEXT: slli t2, t2, 24
; RV32I-NEXT: or t1, t2, t1
; RV32I-NEXT: mv t2, sp
; RV32I-NEXT: slli t4, t4, 8
; RV32I-NEXT: slli t5, t5, 16
; RV32I-NEXT: slli t6, t6, 24
; RV32I-NEXT: slli a7, a7, 8
; RV32I-NEXT: slli t0, t0, 16
; RV32I-NEXT: slli a0, a0, 24
; RV32I-NEXT: slli a1, a1, 2
; RV32I-NEXT: or t3, t4, t3
; RV32I-NEXT: or t4, t6, t5
; RV32I-NEXT: or a6, a7, a6
; RV32I-NEXT: or a7, a0, t0
; RV32I-NEXT: srai a0, a0, 31
; RV32I-NEXT: andi a1, a1, 12
; RV32I-NEXT: or a3, a4, a3
; RV32I-NEXT: or a4, t1, a5
; RV32I-NEXT: or a5, t4, t3
; RV32I-NEXT: or a6, a7, a6
; RV32I-NEXT: sw a0, 16(sp)
; RV32I-NEXT: sw a0, 20(sp)
; RV32I-NEXT: sw a0, 24(sp)
; RV32I-NEXT: sw a0, 28(sp)
; RV32I-NEXT: add a1, t2, a1
; RV32I-NEXT: sw a3, 0(sp)
; RV32I-NEXT: sw a4, 4(sp)
; RV32I-NEXT: sw a5, 8(sp)
; RV32I-NEXT: sw a6, 12(sp)
; RV32I-NEXT: lw a0, 8(a1)
; RV32I-NEXT: lw a3, 4(a1)
; RV32I-NEXT: lw a4, 0(a1)
; RV32I-NEXT: lw a1, 12(a1)
; RV32I-NEXT: srli a5, a0, 16
; RV32I-NEXT: srli a6, a0, 24
; RV32I-NEXT: srli a7, a0, 8
; RV32I-NEXT: srli t0, a1, 16
; RV32I-NEXT: srli t1, a1, 24
; RV32I-NEXT: srli t2, a1, 8
; RV32I-NEXT: srli t3, a4, 16
; RV32I-NEXT: srli t4, a4, 24
; RV32I-NEXT: srli t5, a4, 8
; RV32I-NEXT: srli t6, a3, 16
; RV32I-NEXT: sb a0, 8(a2)
; RV32I-NEXT: sb a7, 9(a2)
; RV32I-NEXT: sb a5, 10(a2)
; RV32I-NEXT: sb a6, 11(a2)
; RV32I-NEXT: srli a0, a3, 24
; RV32I-NEXT: sb a1, 12(a2)
; RV32I-NEXT: sb t2, 13(a2)
; RV32I-NEXT: sb t0, 14(a2)
; RV32I-NEXT: sb t1, 15(a2)
; RV32I-NEXT: srli a1, a3, 8
; RV32I-NEXT: sb a4, 0(a2)
; RV32I-NEXT: sb t5, 1(a2)
; RV32I-NEXT: sb t3, 2(a2)
; RV32I-NEXT: sb t4, 3(a2)
; RV32I-NEXT: sb a3, 4(a2)
; RV32I-NEXT: sb a1, 5(a2)
; RV32I-NEXT: sb t6, 6(a2)
; RV32I-NEXT: sb a0, 7(a2)
; RV32I-NEXT: addi sp, sp, 32
; RV32I-NEXT: ret
%src = load i128, ptr %src.ptr, align 1
%wordOff = load i128, ptr %wordOff.ptr, align 1
%bitOff = shl i128 %wordOff, 5
%res = ashr i128 %src, %bitOff
store i128 %res, ptr %dst, align 1
ret void
}
define void @lshr_32bytes(ptr %src.ptr, ptr %byteOff.ptr, ptr %dst) nounwind {
; RV64I-LABEL: lshr_32bytes:
; RV64I: # %bb.0:
; RV64I-NEXT: addi sp, sp, -160
; RV64I-NEXT: sd s0, 152(sp) # 8-byte Folded Spill
; RV64I-NEXT: sd s1, 144(sp) # 8-byte Folded Spill
; RV64I-NEXT: sd s2, 136(sp) # 8-byte Folded Spill
; RV64I-NEXT: sd s3, 128(sp) # 8-byte Folded Spill
; RV64I-NEXT: sd s4, 120(sp) # 8-byte Folded Spill
; RV64I-NEXT: sd s5, 112(sp) # 8-byte Folded Spill
; RV64I-NEXT: sd s6, 104(sp) # 8-byte Folded Spill
; RV64I-NEXT: sd s7, 96(sp) # 8-byte Folded Spill
; RV64I-NEXT: sd s8, 88(sp) # 8-byte Folded Spill
; RV64I-NEXT: sd s9, 80(sp) # 8-byte Folded Spill
; RV64I-NEXT: sd s10, 72(sp) # 8-byte Folded Spill
; RV64I-NEXT: sd s11, 64(sp) # 8-byte Folded Spill
; RV64I-NEXT: lbu a3, 0(a0)
; RV64I-NEXT: lbu a4, 1(a0)
; RV64I-NEXT: lbu a5, 2(a0)
; RV64I-NEXT: lbu a6, 3(a0)
; RV64I-NEXT: lbu a7, 4(a0)
; RV64I-NEXT: lbu t0, 5(a0)
; RV64I-NEXT: lbu t1, 6(a0)
; RV64I-NEXT: lbu t2, 7(a0)
; RV64I-NEXT: lbu t3, 8(a0)
; RV64I-NEXT: lbu t4, 9(a0)
; RV64I-NEXT: lbu t5, 10(a0)
; RV64I-NEXT: lbu t6, 11(a0)
; RV64I-NEXT: lbu s0, 12(a0)
; RV64I-NEXT: lbu s1, 13(a0)
; RV64I-NEXT: lbu s2, 14(a0)
; RV64I-NEXT: lbu s3, 15(a0)
; RV64I-NEXT: lbu s4, 16(a0)
; RV64I-NEXT: lbu s5, 17(a0)
; RV64I-NEXT: lbu s6, 18(a0)
; RV64I-NEXT: lbu s7, 19(a0)
; RV64I-NEXT: slli a4, a4, 8
; RV64I-NEXT: slli a5, a5, 16
; RV64I-NEXT: slli a6, a6, 24
; RV64I-NEXT: slli t0, t0, 8
; RV64I-NEXT: slli t1, t1, 16
; RV64I-NEXT: slli t2, t2, 24
; RV64I-NEXT: or a3, a4, a3
; RV64I-NEXT: or a4, a6, a5
; RV64I-NEXT: or a5, t0, a7
; RV64I-NEXT: or a6, t2, t1
; RV64I-NEXT: lbu s8, 20(a0)
; RV64I-NEXT: lbu s9, 21(a0)
; RV64I-NEXT: lbu s10, 22(a0)
; RV64I-NEXT: lbu s11, 23(a0)
; RV64I-NEXT: slli t4, t4, 8
; RV64I-NEXT: slli t5, t5, 16
; RV64I-NEXT: slli t6, t6, 24
; RV64I-NEXT: slli s1, s1, 8
; RV64I-NEXT: slli s2, s2, 16
; RV64I-NEXT: slli s3, s3, 24
; RV64I-NEXT: or a7, t4, t3
; RV64I-NEXT: or t0, t6, t5
; RV64I-NEXT: or t1, s1, s0
; RV64I-NEXT: or t2, s3, s2
; RV64I-NEXT: lbu t6, 24(a0)
; RV64I-NEXT: lbu s0, 25(a0)
; RV64I-NEXT: lbu s1, 26(a0)
; RV64I-NEXT: lbu s2, 27(a0)
; RV64I-NEXT: slli s5, s5, 8
; RV64I-NEXT: slli s6, s6, 16
; RV64I-NEXT: slli s7, s7, 24
; RV64I-NEXT: slli s9, s9, 8
; RV64I-NEXT: or t3, s5, s4
; RV64I-NEXT: or t4, s7, s6
; RV64I-NEXT: or t5, s9, s8
; RV64I-NEXT: lbu s3, 28(a0)
; RV64I-NEXT: lbu s4, 29(a0)
; RV64I-NEXT: lbu s5, 30(a0)
; RV64I-NEXT: lbu s6, 31(a0)
; RV64I-NEXT: slli s10, s10, 16
; RV64I-NEXT: slli s11, s11, 24
; RV64I-NEXT: slli s0, s0, 8
; RV64I-NEXT: slli s1, s1, 16
; RV64I-NEXT: slli s2, s2, 24
; RV64I-NEXT: slli s4, s4, 8
; RV64I-NEXT: or a0, s11, s10
; RV64I-NEXT: or t6, s0, t6
; RV64I-NEXT: or s0, s2, s1
; RV64I-NEXT: or s1, s4, s3
; RV64I-NEXT: lbu s2, 0(a1)
; RV64I-NEXT: lbu s3, 1(a1)
; RV64I-NEXT: lbu s4, 2(a1)
; RV64I-NEXT: lbu s7, 3(a1)
; RV64I-NEXT: slli s5, s5, 16
; RV64I-NEXT: slli s6, s6, 24
; RV64I-NEXT: slli s3, s3, 8
; RV64I-NEXT: slli s4, s4, 16
; RV64I-NEXT: slli s7, s7, 24
; RV64I-NEXT: or s5, s6, s5
; RV64I-NEXT: or s2, s3, s2
; RV64I-NEXT: or s3, s7, s4
; RV64I-NEXT: lbu s4, 5(a1)
; RV64I-NEXT: lbu s6, 4(a1)
; RV64I-NEXT: lbu s7, 6(a1)
; RV64I-NEXT: lbu a1, 7(a1)
; RV64I-NEXT: slli s4, s4, 8
; RV64I-NEXT: or s4, s4, s6
; RV64I-NEXT: sd zero, 32(sp)
; RV64I-NEXT: sd zero, 40(sp)
; RV64I-NEXT: sd zero, 48(sp)
; RV64I-NEXT: sd zero, 56(sp)
; RV64I-NEXT: slli s7, s7, 16
; RV64I-NEXT: slli a1, a1, 24
; RV64I-NEXT: or a1, a1, s7
; RV64I-NEXT: mv s6, sp
; RV64I-NEXT: or a3, a4, a3
; RV64I-NEXT: or a4, a6, a5
; RV64I-NEXT: or a5, t0, a7
; RV64I-NEXT: or a6, t2, t1
; RV64I-NEXT: or a7, t4, t3
; RV64I-NEXT: or a0, a0, t5
; RV64I-NEXT: or t0, s0, t6
; RV64I-NEXT: or t1, s5, s1
; RV64I-NEXT: or t2, s3, s2
; RV64I-NEXT: or a1, a1, s4
; RV64I-NEXT: slli a4, a4, 32
; RV64I-NEXT: slli a6, a6, 32
; RV64I-NEXT: slli a0, a0, 32
; RV64I-NEXT: slli t1, t1, 32
; RV64I-NEXT: slli a1, a1, 32
; RV64I-NEXT: or a3, a4, a3
; RV64I-NEXT: or a4, a6, a5
; RV64I-NEXT: or a0, a0, a7
; RV64I-NEXT: or a5, t1, t0
; RV64I-NEXT: or a1, a1, t2
; RV64I-NEXT: sd a3, 0(sp)
; RV64I-NEXT: sd a4, 8(sp)
; RV64I-NEXT: sd a0, 16(sp)
; RV64I-NEXT: sd a5, 24(sp)
; RV64I-NEXT: slli a4, a1, 3
; RV64I-NEXT: andi a1, a1, 24
; RV64I-NEXT: add a1, s6, a1
; RV64I-NEXT: andi a0, a4, 56
; RV64I-NEXT: xori a5, a0, 63
; RV64I-NEXT: ld a3, 8(a1)
; RV64I-NEXT: ld a6, 16(a1)
; RV64I-NEXT: ld a7, 0(a1)
; RV64I-NEXT: ld t0, 24(a1)
; RV64I-NEXT: srl a0, a3, a4
; RV64I-NEXT: slli t1, a6, 1
; RV64I-NEXT: srl a1, a7, a4
; RV64I-NEXT: slli a7, a3, 1
; RV64I-NEXT: srl a3, a6, a4
; RV64I-NEXT: slli a6, t0, 1
; RV64I-NEXT: srl t0, t0, a4
; RV64I-NEXT: sll a4, t1, a5
; RV64I-NEXT: sll a7, a7, a5
; RV64I-NEXT: sll a5, a6, a5
; RV64I-NEXT: srli a6, t0, 56
; RV64I-NEXT: srli t1, t0, 48
; RV64I-NEXT: srli t2, t0, 40
; RV64I-NEXT: srli t3, t0, 32
; RV64I-NEXT: srli t4, t0, 24
; RV64I-NEXT: srli t5, t0, 16
; RV64I-NEXT: srli t6, t0, 8
; RV64I-NEXT: or a4, a0, a4
; RV64I-NEXT: or a7, a1, a7
; RV64I-NEXT: or a5, a3, a5
; RV64I-NEXT: sb t3, 28(a2)
; RV64I-NEXT: sb t2, 29(a2)
; RV64I-NEXT: sb t1, 30(a2)
; RV64I-NEXT: sb a6, 31(a2)
; RV64I-NEXT: sb t0, 24(a2)
; RV64I-NEXT: sb t6, 25(a2)
; RV64I-NEXT: sb t5, 26(a2)
; RV64I-NEXT: sb t4, 27(a2)
; RV64I-NEXT: srli a6, a5, 56
; RV64I-NEXT: srli t0, a5, 48
; RV64I-NEXT: srli t1, a5, 40
; RV64I-NEXT: srli t2, a5, 32
; RV64I-NEXT: srli t3, a5, 24
; RV64I-NEXT: srli t4, a5, 16
; RV64I-NEXT: srli a5, a5, 8
; RV64I-NEXT: srli t5, a7, 56
; RV64I-NEXT: srli t6, a7, 48
; RV64I-NEXT: srli s0, a7, 40
; RV64I-NEXT: srli s1, a7, 32
; RV64I-NEXT: srli s2, a7, 24
; RV64I-NEXT: srli s3, a7, 16
; RV64I-NEXT: srli a7, a7, 8
; RV64I-NEXT: srli s4, a4, 56
; RV64I-NEXT: srli s5, a4, 48
; RV64I-NEXT: srli s6, a4, 40
; RV64I-NEXT: sb t2, 20(a2)
; RV64I-NEXT: sb t1, 21(a2)
; RV64I-NEXT: sb t0, 22(a2)
; RV64I-NEXT: sb a6, 23(a2)
; RV64I-NEXT: srli a6, a4, 32
; RV64I-NEXT: sb a3, 16(a2)
; RV64I-NEXT: sb a5, 17(a2)
; RV64I-NEXT: sb t4, 18(a2)
; RV64I-NEXT: sb t3, 19(a2)
; RV64I-NEXT: srli a3, a4, 24
; RV64I-NEXT: sb s1, 4(a2)
; RV64I-NEXT: sb s0, 5(a2)
; RV64I-NEXT: sb t6, 6(a2)
; RV64I-NEXT: sb t5, 7(a2)
; RV64I-NEXT: srli a5, a4, 16
; RV64I-NEXT: srli a4, a4, 8
; RV64I-NEXT: sb a1, 0(a2)
; RV64I-NEXT: sb a7, 1(a2)
; RV64I-NEXT: sb s3, 2(a2)
; RV64I-NEXT: sb s2, 3(a2)
; RV64I-NEXT: sb a6, 12(a2)
; RV64I-NEXT: sb s6, 13(a2)
; RV64I-NEXT: sb s5, 14(a2)
; RV64I-NEXT: sb s4, 15(a2)
; RV64I-NEXT: sb a0, 8(a2)
; RV64I-NEXT: sb a4, 9(a2)
; RV64I-NEXT: sb a5, 10(a2)
; RV64I-NEXT: sb a3, 11(a2)
; RV64I-NEXT: ld s0, 152(sp) # 8-byte Folded Reload
; RV64I-NEXT: ld s1, 144(sp) # 8-byte Folded Reload
; RV64I-NEXT: ld s2, 136(sp) # 8-byte Folded Reload
; RV64I-NEXT: ld s3, 128(sp) # 8-byte Folded Reload
; RV64I-NEXT: ld s4, 120(sp) # 8-byte Folded Reload
; RV64I-NEXT: ld s5, 112(sp) # 8-byte Folded Reload
; RV64I-NEXT: ld s6, 104(sp) # 8-byte Folded Reload
; RV64I-NEXT: ld s7, 96(sp) # 8-byte Folded Reload
; RV64I-NEXT: ld s8, 88(sp) # 8-byte Folded Reload
; RV64I-NEXT: ld s9, 80(sp) # 8-byte Folded Reload
; RV64I-NEXT: ld s10, 72(sp) # 8-byte Folded Reload
; RV64I-NEXT: ld s11, 64(sp) # 8-byte Folded Reload
; RV64I-NEXT: addi sp, sp, 160
; RV64I-NEXT: ret
;
; RV32I-LABEL: lshr_32bytes:
; RV32I: # %bb.0:
; RV32I-NEXT: addi sp, sp, -128
; RV32I-NEXT: sw ra, 124(sp) # 4-byte Folded Spill
; RV32I-NEXT: sw s0, 120(sp) # 4-byte Folded Spill
; RV32I-NEXT: sw s1, 116(sp) # 4-byte Folded Spill
; RV32I-NEXT: sw s2, 112(sp) # 4-byte Folded Spill
; RV32I-NEXT: sw s3, 108(sp) # 4-byte Folded Spill
; RV32I-NEXT: sw s4, 104(sp) # 4-byte Folded Spill
; RV32I-NEXT: sw s5, 100(sp) # 4-byte Folded Spill
; RV32I-NEXT: sw s6, 96(sp) # 4-byte Folded Spill
; RV32I-NEXT: sw s7, 92(sp) # 4-byte Folded Spill
; RV32I-NEXT: sw s8, 88(sp) # 4-byte Folded Spill
; RV32I-NEXT: sw s9, 84(sp) # 4-byte Folded Spill
; RV32I-NEXT: sw s10, 80(sp) # 4-byte Folded Spill
; RV32I-NEXT: sw s11, 76(sp) # 4-byte Folded Spill
; RV32I-NEXT: lbu s1, 0(a0)
; RV32I-NEXT: lbu a4, 1(a0)
; RV32I-NEXT: lbu a5, 2(a0)
; RV32I-NEXT: lbu a6, 3(a0)
; RV32I-NEXT: lbu t1, 4(a0)
; RV32I-NEXT: lbu t3, 5(a0)
; RV32I-NEXT: lbu t4, 6(a0)
; RV32I-NEXT: lbu s0, 7(a0)
; RV32I-NEXT: lbu t2, 8(a0)
; RV32I-NEXT: lbu s3, 9(a0)
; RV32I-NEXT: lbu s6, 10(a0)
; RV32I-NEXT: lbu s8, 11(a0)
; RV32I-NEXT: lbu s9, 12(a0)
; RV32I-NEXT: lbu s10, 13(a0)
; RV32I-NEXT: lbu s4, 14(a0)
; RV32I-NEXT: lbu s7, 15(a0)
; RV32I-NEXT: lbu s5, 16(a0)
; RV32I-NEXT: lbu s11, 17(a0)
; RV32I-NEXT: lbu ra, 18(a0)
; RV32I-NEXT: lbu a3, 19(a0)
; RV32I-NEXT: lbu t5, 20(a0)
; RV32I-NEXT: lbu t6, 21(a0)
; RV32I-NEXT: lbu a7, 22(a0)
; RV32I-NEXT: lbu t0, 23(a0)
; RV32I-NEXT: slli a4, a4, 8
; RV32I-NEXT: slli a5, a5, 16
; RV32I-NEXT: slli a6, a6, 24
; RV32I-NEXT: slli t3, t3, 8
; RV32I-NEXT: slli t4, t4, 16
; RV32I-NEXT: slli s0, s0, 24
; RV32I-NEXT: or a4, a4, s1
; RV32I-NEXT: sw a4, 4(sp) # 4-byte Folded Spill
; RV32I-NEXT: or a4, a6, a5
; RV32I-NEXT: or a5, t3, t1
; RV32I-NEXT: or a6, s0, t4
; RV32I-NEXT: lbu t1, 24(a0)
; RV32I-NEXT: lbu s0, 25(a0)
; RV32I-NEXT: lbu s1, 26(a0)
; RV32I-NEXT: lbu s2, 27(a0)
; RV32I-NEXT: slli s3, s3, 8
; RV32I-NEXT: slli s6, s6, 16
; RV32I-NEXT: slli s8, s8, 24
; RV32I-NEXT: slli s10, s10, 8
; RV32I-NEXT: or t2, s3, t2
; RV32I-NEXT: or t3, s8, s6
; RV32I-NEXT: or t4, s10, s9
; RV32I-NEXT: lbu s3, 28(a0)
; RV32I-NEXT: lbu s6, 29(a0)
; RV32I-NEXT: lbu s8, 30(a0)
; RV32I-NEXT: lbu s9, 31(a0)
; RV32I-NEXT: slli s4, s4, 16
; RV32I-NEXT: slli s7, s7, 24
; RV32I-NEXT: slli s11, s11, 8
; RV32I-NEXT: slli ra, ra, 16
; RV32I-NEXT: slli a3, a3, 24
; RV32I-NEXT: or a0, s7, s4
; RV32I-NEXT: or s4, s11, s5
; RV32I-NEXT: or s5, a3, ra
; RV32I-NEXT: lbu a3, 0(a1)
; RV32I-NEXT: lbu s7, 1(a1)
; RV32I-NEXT: lbu s10, 2(a1)
; RV32I-NEXT: lbu a1, 3(a1)
; RV32I-NEXT: sw zero, 56(sp)
; RV32I-NEXT: sw zero, 60(sp)
; RV32I-NEXT: sw zero, 64(sp)
; RV32I-NEXT: sw zero, 68(sp)
; RV32I-NEXT: sw zero, 40(sp)
; RV32I-NEXT: sw zero, 44(sp)
; RV32I-NEXT: sw zero, 48(sp)
; RV32I-NEXT: sw zero, 52(sp)
; RV32I-NEXT: slli t6, t6, 8
; RV32I-NEXT: or t5, t6, t5
; RV32I-NEXT: addi t6, sp, 8
; RV32I-NEXT: slli a7, a7, 16
; RV32I-NEXT: slli t0, t0, 24
; RV32I-NEXT: slli s0, s0, 8
; RV32I-NEXT: slli s1, s1, 16
; RV32I-NEXT: slli s2, s2, 24
; RV32I-NEXT: slli s6, s6, 8
; RV32I-NEXT: slli s8, s8, 16
; RV32I-NEXT: slli s9, s9, 24
; RV32I-NEXT: slli s7, s7, 8
; RV32I-NEXT: slli s10, s10, 16
; RV32I-NEXT: slli a1, a1, 24
; RV32I-NEXT: or a7, t0, a7
; RV32I-NEXT: or t0, s0, t1
; RV32I-NEXT: or t1, s2, s1
; RV32I-NEXT: or s0, s6, s3
; RV32I-NEXT: or s1, s9, s8
; RV32I-NEXT: or a3, s7, a3
; RV32I-NEXT: or a1, a1, s10
; RV32I-NEXT: lw s2, 4(sp) # 4-byte Folded Reload
; RV32I-NEXT: or a4, a4, s2
; RV32I-NEXT: or a5, a6, a5
; RV32I-NEXT: or a6, t3, t2
; RV32I-NEXT: or a0, a0, t4
; RV32I-NEXT: or t2, s5, s4
; RV32I-NEXT: or a7, a7, t5
; RV32I-NEXT: or t0, t1, t0
; RV32I-NEXT: or s0, s1, s0
; RV32I-NEXT: or a1, a1, a3
; RV32I-NEXT: sw t2, 24(sp)
; RV32I-NEXT: sw a7, 28(sp)
; RV32I-NEXT: sw t0, 32(sp)
; RV32I-NEXT: sw s0, 36(sp)
; RV32I-NEXT: sw a4, 8(sp)
; RV32I-NEXT: sw a5, 12(sp)
; RV32I-NEXT: sw a6, 16(sp)
; RV32I-NEXT: sw a0, 20(sp)
; RV32I-NEXT: slli t1, a1, 3
; RV32I-NEXT: andi a1, a1, 28
; RV32I-NEXT: add a1, t6, a1
; RV32I-NEXT: andi a0, t1, 24
; RV32I-NEXT: xori t0, a0, 31
; RV32I-NEXT: lw a3, 0(a1)
; RV32I-NEXT: lw a4, 4(a1)
; RV32I-NEXT: lw a5, 8(a1)
; RV32I-NEXT: lw a6, 12(a1)
; RV32I-NEXT: lw a7, 16(a1)
; RV32I-NEXT: lw t2, 20(a1)
; RV32I-NEXT: lw t3, 24(a1)
; RV32I-NEXT: lw t4, 28(a1)
; RV32I-NEXT: srl a0, a4, t1
; RV32I-NEXT: slli t5, a5, 1
; RV32I-NEXT: srl a1, a3, t1
; RV32I-NEXT: slli t6, a4, 1
; RV32I-NEXT: srl a3, a6, t1
; RV32I-NEXT: slli s0, a7, 1
; RV32I-NEXT: srl a4, a5, t1
; RV32I-NEXT: slli s1, a6, 1
; RV32I-NEXT: srl a5, t2, t1
; RV32I-NEXT: slli s2, t3, 1
; RV32I-NEXT: srl a6, a7, t1
; RV32I-NEXT: slli t2, t2, 1
; RV32I-NEXT: srl a7, t3, t1
; RV32I-NEXT: slli t3, t4, 1
; RV32I-NEXT: srl t1, t4, t1
; RV32I-NEXT: sll t4, t5, t0
; RV32I-NEXT: sll t5, t6, t0
; RV32I-NEXT: sll t6, s0, t0
; RV32I-NEXT: sll s0, s1, t0
; RV32I-NEXT: sll s1, s2, t0
; RV32I-NEXT: sll t2, t2, t0
; RV32I-NEXT: sll t3, t3, t0
; RV32I-NEXT: srli s2, t1, 24
; RV32I-NEXT: srli s3, t1, 16
; RV32I-NEXT: srli s4, t1, 8
; RV32I-NEXT: or t0, a0, t4
; RV32I-NEXT: or t4, a1, t5
; RV32I-NEXT: or t5, a3, t6
; RV32I-NEXT: or s0, a4, s0
; RV32I-NEXT: or s1, a5, s1
; RV32I-NEXT: or t2, a6, t2
; RV32I-NEXT: or t3, a7, t3
; RV32I-NEXT: sb t1, 28(a2)
; RV32I-NEXT: sb s4, 29(a2)
; RV32I-NEXT: sb s3, 30(a2)
; RV32I-NEXT: sb s2, 31(a2)
; RV32I-NEXT: srli t1, t3, 24
; RV32I-NEXT: srli t6, t3, 16
; RV32I-NEXT: srli t3, t3, 8
; RV32I-NEXT: srli s2, t2, 24
; RV32I-NEXT: srli s3, t2, 16
; RV32I-NEXT: srli t2, t2, 8
; RV32I-NEXT: srli s4, s1, 24
; RV32I-NEXT: srli s5, s1, 16
; RV32I-NEXT: srli s1, s1, 8
; RV32I-NEXT: srli s6, s0, 24
; RV32I-NEXT: srli s7, s0, 16
; RV32I-NEXT: srli s0, s0, 8
; RV32I-NEXT: srli s8, t5, 24
; RV32I-NEXT: srli s9, t5, 16
; RV32I-NEXT: srli t5, t5, 8
; RV32I-NEXT: srli s10, t4, 24
; RV32I-NEXT: srli s11, t4, 16
; RV32I-NEXT: srli t4, t4, 8
; RV32I-NEXT: sb a7, 24(a2)
; RV32I-NEXT: sb t3, 25(a2)
; RV32I-NEXT: sb t6, 26(a2)
; RV32I-NEXT: sb t1, 27(a2)
; RV32I-NEXT: srli a7, t0, 24
; RV32I-NEXT: sb a6, 16(a2)
; RV32I-NEXT: sb t2, 17(a2)
; RV32I-NEXT: sb s3, 18(a2)
; RV32I-NEXT: sb s2, 19(a2)
; RV32I-NEXT: srli a6, t0, 16
; RV32I-NEXT: srli t0, t0, 8
; RV32I-NEXT: sb a5, 20(a2)
; RV32I-NEXT: sb s1, 21(a2)
; RV32I-NEXT: sb s5, 22(a2)
; RV32I-NEXT: sb s4, 23(a2)
; RV32I-NEXT: sb a4, 8(a2)
; RV32I-NEXT: sb s0, 9(a2)
; RV32I-NEXT: sb s7, 10(a2)
; RV32I-NEXT: sb s6, 11(a2)
; RV32I-NEXT: sb a3, 12(a2)
; RV32I-NEXT: sb t5, 13(a2)
; RV32I-NEXT: sb s9, 14(a2)
; RV32I-NEXT: sb s8, 15(a2)
; RV32I-NEXT: sb a1, 0(a2)
; RV32I-NEXT: sb t4, 1(a2)
; RV32I-NEXT: sb s11, 2(a2)
; RV32I-NEXT: sb s10, 3(a2)
; RV32I-NEXT: sb a0, 4(a2)
; RV32I-NEXT: sb t0, 5(a2)
; RV32I-NEXT: sb a6, 6(a2)
; RV32I-NEXT: sb a7, 7(a2)
; RV32I-NEXT: lw ra, 124(sp) # 4-byte Folded Reload
; RV32I-NEXT: lw s0, 120(sp) # 4-byte Folded Reload
; RV32I-NEXT: lw s1, 116(sp) # 4-byte Folded Reload
; RV32I-NEXT: lw s2, 112(sp) # 4-byte Folded Reload
; RV32I-NEXT: lw s3, 108(sp) # 4-byte Folded Reload
; RV32I-NEXT: lw s4, 104(sp) # 4-byte Folded Reload
; RV32I-NEXT: lw s5, 100(sp) # 4-byte Folded Reload
; RV32I-NEXT: lw s6, 96(sp) # 4-byte Folded Reload
; RV32I-NEXT: lw s7, 92(sp) # 4-byte Folded Reload
; RV32I-NEXT: lw s8, 88(sp) # 4-byte Folded Reload
; RV32I-NEXT: lw s9, 84(sp) # 4-byte Folded Reload
; RV32I-NEXT: lw s10, 80(sp) # 4-byte Folded Reload
; RV32I-NEXT: lw s11, 76(sp) # 4-byte Folded Reload
; RV32I-NEXT: addi sp, sp, 128
; RV32I-NEXT: ret
%src = load i256, ptr %src.ptr, align 1
%byteOff = load i256, ptr %byteOff.ptr, align 1
%bitOff = shl i256 %byteOff, 3
%res = lshr i256 %src, %bitOff
store i256 %res, ptr %dst, align 1
ret void
}
define void @lshr_32bytes_wordOff(ptr %src.ptr, ptr %wordOff.ptr, ptr %dst) nounwind {
; RV64I-LABEL: lshr_32bytes_wordOff:
; RV64I: # %bb.0:
; RV64I-NEXT: addi sp, sp, -160
; RV64I-NEXT: sd s0, 152(sp) # 8-byte Folded Spill
; RV64I-NEXT: sd s1, 144(sp) # 8-byte Folded Spill
; RV64I-NEXT: sd s2, 136(sp) # 8-byte Folded Spill
; RV64I-NEXT: sd s3, 128(sp) # 8-byte Folded Spill
; RV64I-NEXT: sd s4, 120(sp) # 8-byte Folded Spill
; RV64I-NEXT: sd s5, 112(sp) # 8-byte Folded Spill
; RV64I-NEXT: sd s6, 104(sp) # 8-byte Folded Spill
; RV64I-NEXT: sd s7, 96(sp) # 8-byte Folded Spill
; RV64I-NEXT: sd s8, 88(sp) # 8-byte Folded Spill
; RV64I-NEXT: sd s9, 80(sp) # 8-byte Folded Spill
; RV64I-NEXT: sd s10, 72(sp) # 8-byte Folded Spill
; RV64I-NEXT: sd s11, 64(sp) # 8-byte Folded Spill
; RV64I-NEXT: lbu a3, 0(a0)
; RV64I-NEXT: lbu a4, 1(a0)
; RV64I-NEXT: lbu a5, 2(a0)
; RV64I-NEXT: lbu a6, 3(a0)
; RV64I-NEXT: lbu a7, 4(a0)
; RV64I-NEXT: lbu t0, 5(a0)
; RV64I-NEXT: lbu t1, 6(a0)
; RV64I-NEXT: lbu t2, 7(a0)
; RV64I-NEXT: lbu t3, 8(a0)
; RV64I-NEXT: lbu t4, 9(a0)
; RV64I-NEXT: lbu t5, 10(a0)
; RV64I-NEXT: lbu t6, 11(a0)
; RV64I-NEXT: lbu s0, 12(a0)
; RV64I-NEXT: lbu s1, 13(a0)
; RV64I-NEXT: lbu s2, 14(a0)
; RV64I-NEXT: lbu s3, 15(a0)
; RV64I-NEXT: lbu s4, 16(a0)
; RV64I-NEXT: lbu s5, 17(a0)
; RV64I-NEXT: lbu s6, 18(a0)
; RV64I-NEXT: lbu s7, 19(a0)
; RV64I-NEXT: slli a4, a4, 8
; RV64I-NEXT: slli a5, a5, 16
; RV64I-NEXT: slli a6, a6, 24
; RV64I-NEXT: slli t0, t0, 8
; RV64I-NEXT: slli t1, t1, 16
; RV64I-NEXT: slli t2, t2, 24
; RV64I-NEXT: or a3, a4, a3
; RV64I-NEXT: or a4, a6, a5
; RV64I-NEXT: or a5, t0, a7
; RV64I-NEXT: or a6, t2, t1
; RV64I-NEXT: lbu s8, 20(a0)
; RV64I-NEXT: lbu s9, 21(a0)
; RV64I-NEXT: lbu s10, 22(a0)
; RV64I-NEXT: lbu s11, 23(a0)
; RV64I-NEXT: slli t4, t4, 8
; RV64I-NEXT: slli t5, t5, 16
; RV64I-NEXT: slli t6, t6, 24
; RV64I-NEXT: slli s1, s1, 8
; RV64I-NEXT: slli s2, s2, 16
; RV64I-NEXT: slli s3, s3, 24
; RV64I-NEXT: or a7, t4, t3
; RV64I-NEXT: or t0, t6, t5
; RV64I-NEXT: or t1, s1, s0
; RV64I-NEXT: or t2, s3, s2
; RV64I-NEXT: lbu t6, 24(a0)
; RV64I-NEXT: lbu s0, 25(a0)
; RV64I-NEXT: lbu s1, 26(a0)
; RV64I-NEXT: lbu s2, 27(a0)
; RV64I-NEXT: slli s5, s5, 8
; RV64I-NEXT: slli s6, s6, 16
; RV64I-NEXT: slli s7, s7, 24
; RV64I-NEXT: slli s9, s9, 8
; RV64I-NEXT: or t3, s5, s4
; RV64I-NEXT: or t4, s7, s6
; RV64I-NEXT: or t5, s9, s8
; RV64I-NEXT: lbu s3, 28(a0)
; RV64I-NEXT: lbu s4, 29(a0)
; RV64I-NEXT: lbu s5, 30(a0)
; RV64I-NEXT: lbu s6, 31(a0)
; RV64I-NEXT: slli s10, s10, 16
; RV64I-NEXT: slli s11, s11, 24
; RV64I-NEXT: slli s0, s0, 8
; RV64I-NEXT: slli s1, s1, 16
; RV64I-NEXT: slli s2, s2, 24
; RV64I-NEXT: slli s4, s4, 8
; RV64I-NEXT: or a0, s11, s10
; RV64I-NEXT: or t6, s0, t6
; RV64I-NEXT: or s0, s2, s1
; RV64I-NEXT: or s1, s4, s3
; RV64I-NEXT: lbu s2, 0(a1)
; RV64I-NEXT: lbu s3, 1(a1)
; RV64I-NEXT: lbu s4, 2(a1)
; RV64I-NEXT: lbu s7, 3(a1)
; RV64I-NEXT: slli s5, s5, 16
; RV64I-NEXT: slli s6, s6, 24
; RV64I-NEXT: slli s3, s3, 8
; RV64I-NEXT: slli s4, s4, 16
; RV64I-NEXT: slli s7, s7, 24
; RV64I-NEXT: or s5, s6, s5
; RV64I-NEXT: or s2, s3, s2
; RV64I-NEXT: or s3, s7, s4
; RV64I-NEXT: lbu s4, 5(a1)
; RV64I-NEXT: lbu s6, 4(a1)
; RV64I-NEXT: lbu s7, 6(a1)
; RV64I-NEXT: lbu a1, 7(a1)
; RV64I-NEXT: slli s4, s4, 8
; RV64I-NEXT: or s4, s4, s6
; RV64I-NEXT: sd zero, 32(sp)
; RV64I-NEXT: sd zero, 40(sp)
; RV64I-NEXT: sd zero, 48(sp)
; RV64I-NEXT: sd zero, 56(sp)
; RV64I-NEXT: slli s7, s7, 16
; RV64I-NEXT: slli a1, a1, 24
; RV64I-NEXT: or a1, a1, s7
; RV64I-NEXT: mv s6, sp
; RV64I-NEXT: or a3, a4, a3
; RV64I-NEXT: or a4, a6, a5
; RV64I-NEXT: or a5, t0, a7
; RV64I-NEXT: or a6, t2, t1
; RV64I-NEXT: or a7, t4, t3
; RV64I-NEXT: or a0, a0, t5
; RV64I-NEXT: or t0, s0, t6
; RV64I-NEXT: or t1, s5, s1
; RV64I-NEXT: or t2, s3, s2
; RV64I-NEXT: or a1, a1, s4
; RV64I-NEXT: slli a4, a4, 32
; RV64I-NEXT: slli a6, a6, 32
; RV64I-NEXT: slli a0, a0, 32
; RV64I-NEXT: slli t1, t1, 32
; RV64I-NEXT: slli a1, a1, 32
; RV64I-NEXT: or a3, a4, a3
; RV64I-NEXT: or a4, a6, a5
; RV64I-NEXT: or a0, a0, a7
; RV64I-NEXT: or a5, t1, t0
; RV64I-NEXT: or a1, a1, t2
; RV64I-NEXT: sd a3, 0(sp)
; RV64I-NEXT: sd a4, 8(sp)
; RV64I-NEXT: sd a0, 16(sp)
; RV64I-NEXT: sd a5, 24(sp)
; RV64I-NEXT: slli a3, a1, 5
; RV64I-NEXT: slli a1, a1, 2
; RV64I-NEXT: andi a1, a1, 24
; RV64I-NEXT: andi a0, a3, 32
; RV64I-NEXT: add a1, s6, a1
; RV64I-NEXT: xori a4, a0, 63
; RV64I-NEXT: ld a5, 8(a1)
; RV64I-NEXT: ld a6, 16(a1)
; RV64I-NEXT: ld a7, 0(a1)
; RV64I-NEXT: ld t0, 24(a1)
; RV64I-NEXT: srl a0, a5, a3
; RV64I-NEXT: slli t1, a6, 1
; RV64I-NEXT: srl a1, a7, a3
; RV64I-NEXT: slli a5, a5, 1
; RV64I-NEXT: srl a6, a6, a3
; RV64I-NEXT: slli a7, t0, 1
; RV64I-NEXT: srl a3, t0, a3
; RV64I-NEXT: sll t0, t1, a4
; RV64I-NEXT: sll a5, a5, a4
; RV64I-NEXT: sll a4, a7, a4
; RV64I-NEXT: srli a7, a6, 24
; RV64I-NEXT: srli t1, a6, 16
; RV64I-NEXT: srli t2, a6, 8
; RV64I-NEXT: srli t3, a3, 56
; RV64I-NEXT: srli t4, a3, 48
; RV64I-NEXT: srli t5, a3, 40
; RV64I-NEXT: srli t6, a3, 32
; RV64I-NEXT: srli s0, a3, 24
; RV64I-NEXT: srli s1, a3, 16
; RV64I-NEXT: srli s2, a3, 8
; RV64I-NEXT: srli s3, a1, 24
; RV64I-NEXT: srli s4, a1, 16
; RV64I-NEXT: srli s5, a1, 8
; RV64I-NEXT: srli s6, a0, 24
; RV64I-NEXT: or a4, a6, a4
; RV64I-NEXT: sb a6, 16(a2)
; RV64I-NEXT: sb t2, 17(a2)
; RV64I-NEXT: sb t1, 18(a2)
; RV64I-NEXT: sb a7, 19(a2)
; RV64I-NEXT: srli a6, a0, 16
; RV64I-NEXT: sb t6, 28(a2)
; RV64I-NEXT: sb t5, 29(a2)
; RV64I-NEXT: sb t4, 30(a2)
; RV64I-NEXT: sb t3, 31(a2)
; RV64I-NEXT: srli a7, a0, 8
; RV64I-NEXT: or t0, a0, t0
; RV64I-NEXT: or a5, a1, a5
; RV64I-NEXT: sb a3, 24(a2)
; RV64I-NEXT: sb s2, 25(a2)
; RV64I-NEXT: sb s1, 26(a2)
; RV64I-NEXT: sb s0, 27(a2)
; RV64I-NEXT: sb a1, 0(a2)
; RV64I-NEXT: sb s5, 1(a2)
; RV64I-NEXT: sb s4, 2(a2)
; RV64I-NEXT: sb s3, 3(a2)
; RV64I-NEXT: sb a0, 8(a2)
; RV64I-NEXT: sb a7, 9(a2)
; RV64I-NEXT: sb a6, 10(a2)
; RV64I-NEXT: sb s6, 11(a2)
; RV64I-NEXT: srli a0, a4, 56
; RV64I-NEXT: srli a1, a4, 48
; RV64I-NEXT: srli a3, a4, 40
; RV64I-NEXT: srli a4, a4, 32
; RV64I-NEXT: srli a6, a5, 56
; RV64I-NEXT: srli a7, a5, 48
; RV64I-NEXT: srli t1, a5, 40
; RV64I-NEXT: srli a5, a5, 32
; RV64I-NEXT: srli t2, t0, 56
; RV64I-NEXT: srli t3, t0, 48
; RV64I-NEXT: srli t4, t0, 40
; RV64I-NEXT: srli t0, t0, 32
; RV64I-NEXT: sb a4, 20(a2)
; RV64I-NEXT: sb a3, 21(a2)
; RV64I-NEXT: sb a1, 22(a2)
; RV64I-NEXT: sb a0, 23(a2)
; RV64I-NEXT: sb a5, 4(a2)
; RV64I-NEXT: sb t1, 5(a2)
; RV64I-NEXT: sb a7, 6(a2)
; RV64I-NEXT: sb a6, 7(a2)
; RV64I-NEXT: sb t0, 12(a2)
; RV64I-NEXT: sb t4, 13(a2)
; RV64I-NEXT: sb t3, 14(a2)
; RV64I-NEXT: sb t2, 15(a2)
; RV64I-NEXT: ld s0, 152(sp) # 8-byte Folded Reload
; RV64I-NEXT: ld s1, 144(sp) # 8-byte Folded Reload
; RV64I-NEXT: ld s2, 136(sp) # 8-byte Folded Reload
; RV64I-NEXT: ld s3, 128(sp) # 8-byte Folded Reload
; RV64I-NEXT: ld s4, 120(sp) # 8-byte Folded Reload
; RV64I-NEXT: ld s5, 112(sp) # 8-byte Folded Reload
; RV64I-NEXT: ld s6, 104(sp) # 8-byte Folded Reload
; RV64I-NEXT: ld s7, 96(sp) # 8-byte Folded Reload
; RV64I-NEXT: ld s8, 88(sp) # 8-byte Folded Reload
; RV64I-NEXT: ld s9, 80(sp) # 8-byte Folded Reload
; RV64I-NEXT: ld s10, 72(sp) # 8-byte Folded Reload
; RV64I-NEXT: ld s11, 64(sp) # 8-byte Folded Reload
; RV64I-NEXT: addi sp, sp, 160
; RV64I-NEXT: ret
;
; RV32I-LABEL: lshr_32bytes_wordOff:
; RV32I: # %bb.0:
; RV32I-NEXT: addi sp, sp, -128
; RV32I-NEXT: sw ra, 124(sp) # 4-byte Folded Spill
; RV32I-NEXT: sw s0, 120(sp) # 4-byte Folded Spill
; RV32I-NEXT: sw s1, 116(sp) # 4-byte Folded Spill
; RV32I-NEXT: sw s2, 112(sp) # 4-byte Folded Spill
; RV32I-NEXT: sw s3, 108(sp) # 4-byte Folded Spill
; RV32I-NEXT: sw s4, 104(sp) # 4-byte Folded Spill
; RV32I-NEXT: sw s5, 100(sp) # 4-byte Folded Spill
; RV32I-NEXT: sw s6, 96(sp) # 4-byte Folded Spill
; RV32I-NEXT: sw s7, 92(sp) # 4-byte Folded Spill
; RV32I-NEXT: sw s8, 88(sp) # 4-byte Folded Spill
; RV32I-NEXT: sw s9, 84(sp) # 4-byte Folded Spill
; RV32I-NEXT: sw s10, 80(sp) # 4-byte Folded Spill
; RV32I-NEXT: sw s11, 76(sp) # 4-byte Folded Spill
; RV32I-NEXT: lbu a7, 0(a0)
; RV32I-NEXT: lbu t0, 1(a0)
; RV32I-NEXT: lbu t1, 2(a0)
; RV32I-NEXT: lbu s1, 3(a0)
; RV32I-NEXT: lbu s7, 4(a0)
; RV32I-NEXT: lbu s8, 5(a0)
; RV32I-NEXT: lbu s4, 6(a0)
; RV32I-NEXT: lbu s6, 7(a0)
; RV32I-NEXT: lbu s5, 8(a0)
; RV32I-NEXT: lbu s10, 9(a0)
; RV32I-NEXT: lbu s11, 10(a0)
; RV32I-NEXT: lbu ra, 11(a0)
; RV32I-NEXT: lbu t4, 12(a0)
; RV32I-NEXT: lbu t6, 13(a0)
; RV32I-NEXT: lbu a5, 14(a0)
; RV32I-NEXT: lbu a6, 15(a0)
; RV32I-NEXT: lbu a3, 16(a0)
; RV32I-NEXT: lbu t2, 17(a0)
; RV32I-NEXT: lbu t3, 18(a0)
; RV32I-NEXT: lbu t5, 19(a0)
; RV32I-NEXT: lbu a4, 20(a0)
; RV32I-NEXT: lbu s0, 21(a0)
; RV32I-NEXT: lbu s2, 22(a0)
; RV32I-NEXT: lbu s3, 23(a0)
; RV32I-NEXT: slli t0, t0, 8
; RV32I-NEXT: slli t1, t1, 16
; RV32I-NEXT: slli s1, s1, 24
; RV32I-NEXT: slli s8, s8, 8
; RV32I-NEXT: or a7, t0, a7
; RV32I-NEXT: or t0, s1, t1
; RV32I-NEXT: or t1, s8, s7
; RV32I-NEXT: lbu s1, 24(a0)
; RV32I-NEXT: lbu s7, 25(a0)
; RV32I-NEXT: lbu s8, 26(a0)
; RV32I-NEXT: lbu s9, 27(a0)
; RV32I-NEXT: slli s4, s4, 16
; RV32I-NEXT: slli s6, s6, 24
; RV32I-NEXT: slli s10, s10, 8
; RV32I-NEXT: slli s11, s11, 16
; RV32I-NEXT: slli ra, ra, 24
; RV32I-NEXT: or s4, s6, s4
; RV32I-NEXT: or s5, s10, s5
; RV32I-NEXT: or s6, ra, s11
; RV32I-NEXT: lbu s10, 28(a0)
; RV32I-NEXT: lbu s11, 29(a0)
; RV32I-NEXT: lbu ra, 30(a0)
; RV32I-NEXT: lbu a0, 31(a0)
; RV32I-NEXT: lbu a1, 0(a1)
; RV32I-NEXT: sw zero, 56(sp)
; RV32I-NEXT: sw zero, 60(sp)
; RV32I-NEXT: sw zero, 64(sp)
; RV32I-NEXT: sw zero, 68(sp)
; RV32I-NEXT: sw zero, 40(sp)
; RV32I-NEXT: sw zero, 44(sp)
; RV32I-NEXT: sw zero, 48(sp)
; RV32I-NEXT: sw zero, 52(sp)
; RV32I-NEXT: slli t6, t6, 8
; RV32I-NEXT: or t4, t6, t4
; RV32I-NEXT: addi t6, sp, 8
; RV32I-NEXT: slli a5, a5, 16
; RV32I-NEXT: slli a6, a6, 24
; RV32I-NEXT: slli t2, t2, 8
; RV32I-NEXT: slli t3, t3, 16
; RV32I-NEXT: slli t5, t5, 24
; RV32I-NEXT: slli s0, s0, 8
; RV32I-NEXT: slli s2, s2, 16
; RV32I-NEXT: slli s3, s3, 24
; RV32I-NEXT: slli s7, s7, 8
; RV32I-NEXT: slli s8, s8, 16
; RV32I-NEXT: slli s9, s9, 24
; RV32I-NEXT: slli s11, s11, 8
; RV32I-NEXT: slli ra, ra, 16
; RV32I-NEXT: slli a0, a0, 24
; RV32I-NEXT: slli a1, a1, 2
; RV32I-NEXT: or a5, a6, a5
; RV32I-NEXT: or a3, t2, a3
; RV32I-NEXT: or a6, t5, t3
; RV32I-NEXT: or a4, s0, a4
; RV32I-NEXT: or t2, s3, s2
; RV32I-NEXT: or t3, s7, s1
; RV32I-NEXT: or t5, s9, s8
; RV32I-NEXT: or s0, s11, s10
; RV32I-NEXT: or a0, a0, ra
; RV32I-NEXT: andi a1, a1, 28
; RV32I-NEXT: or a7, t0, a7
; RV32I-NEXT: or t0, s4, t1
; RV32I-NEXT: or t1, s6, s5
; RV32I-NEXT: or a5, a5, t4
; RV32I-NEXT: or a3, a6, a3
; RV32I-NEXT: or a4, t2, a4
; RV32I-NEXT: or a6, t5, t3
; RV32I-NEXT: or a0, a0, s0
; RV32I-NEXT: add t6, t6, a1
; RV32I-NEXT: sw a3, 24(sp)
; RV32I-NEXT: sw a4, 28(sp)
; RV32I-NEXT: sw a6, 32(sp)
; RV32I-NEXT: sw a0, 36(sp)
; RV32I-NEXT: sw a7, 8(sp)
; RV32I-NEXT: sw t0, 12(sp)
; RV32I-NEXT: sw t1, 16(sp)
; RV32I-NEXT: sw a5, 20(sp)
; RV32I-NEXT: lw a1, 0(t6)
; RV32I-NEXT: lw a0, 4(t6)
; RV32I-NEXT: lw a4, 8(t6)
; RV32I-NEXT: lw a3, 12(t6)
; RV32I-NEXT: lw a7, 24(t6)
; RV32I-NEXT: lw a5, 20(t6)
; RV32I-NEXT: lw a6, 16(t6)
; RV32I-NEXT: lw t0, 28(t6)
; RV32I-NEXT: srli t1, a7, 24
; RV32I-NEXT: srli t2, a7, 16
; RV32I-NEXT: srli t3, a7, 8
; RV32I-NEXT: srli t4, t0, 24
; RV32I-NEXT: srli t5, t0, 16
; RV32I-NEXT: srli t6, t0, 8
; RV32I-NEXT: srli s0, a6, 24
; RV32I-NEXT: srli s1, a6, 16
; RV32I-NEXT: srli s2, a6, 8
; RV32I-NEXT: srli s3, a5, 24
; RV32I-NEXT: srli s4, a5, 16
; RV32I-NEXT: srli s5, a5, 8
; RV32I-NEXT: srli s6, a4, 24
; RV32I-NEXT: srli s7, a4, 16
; RV32I-NEXT: srli s8, a4, 8
; RV32I-NEXT: srli s9, a3, 24
; RV32I-NEXT: srli s10, a3, 16
; RV32I-NEXT: srli s11, a3, 8
; RV32I-NEXT: srli ra, a1, 24
; RV32I-NEXT: sb a7, 24(a2)
; RV32I-NEXT: sb t3, 25(a2)
; RV32I-NEXT: sb t2, 26(a2)
; RV32I-NEXT: sb t1, 27(a2)
; RV32I-NEXT: srli a7, a1, 16
; RV32I-NEXT: sb t0, 28(a2)
; RV32I-NEXT: sb t6, 29(a2)
; RV32I-NEXT: sb t5, 30(a2)
; RV32I-NEXT: sb t4, 31(a2)
; RV32I-NEXT: srli t0, a1, 8
; RV32I-NEXT: sb a6, 16(a2)
; RV32I-NEXT: sb s2, 17(a2)
; RV32I-NEXT: sb s1, 18(a2)
; RV32I-NEXT: sb s0, 19(a2)
; RV32I-NEXT: srli a6, a0, 24
; RV32I-NEXT: sb a5, 20(a2)
; RV32I-NEXT: sb s5, 21(a2)
; RV32I-NEXT: sb s4, 22(a2)
; RV32I-NEXT: sb s3, 23(a2)
; RV32I-NEXT: srli a5, a0, 16
; RV32I-NEXT: sb a4, 8(a2)
; RV32I-NEXT: sb s8, 9(a2)
; RV32I-NEXT: sb s7, 10(a2)
; RV32I-NEXT: sb s6, 11(a2)
; RV32I-NEXT: srli a4, a0, 8
; RV32I-NEXT: sb a3, 12(a2)
; RV32I-NEXT: sb s11, 13(a2)
; RV32I-NEXT: sb s10, 14(a2)
; RV32I-NEXT: sb s9, 15(a2)
; RV32I-NEXT: sb a1, 0(a2)
; RV32I-NEXT: sb t0, 1(a2)
; RV32I-NEXT: sb a7, 2(a2)
; RV32I-NEXT: sb ra, 3(a2)
; RV32I-NEXT: sb a0, 4(a2)
; RV32I-NEXT: sb a4, 5(a2)
; RV32I-NEXT: sb a5, 6(a2)
; RV32I-NEXT: sb a6, 7(a2)
; RV32I-NEXT: lw ra, 124(sp) # 4-byte Folded Reload
; RV32I-NEXT: lw s0, 120(sp) # 4-byte Folded Reload
; RV32I-NEXT: lw s1, 116(sp) # 4-byte Folded Reload
; RV32I-NEXT: lw s2, 112(sp) # 4-byte Folded Reload
; RV32I-NEXT: lw s3, 108(sp) # 4-byte Folded Reload
; RV32I-NEXT: lw s4, 104(sp) # 4-byte Folded Reload
; RV32I-NEXT: lw s5, 100(sp) # 4-byte Folded Reload
; RV32I-NEXT: lw s6, 96(sp) # 4-byte Folded Reload
; RV32I-NEXT: lw s7, 92(sp) # 4-byte Folded Reload
; RV32I-NEXT: lw s8, 88(sp) # 4-byte Folded Reload
; RV32I-NEXT: lw s9, 84(sp) # 4-byte Folded Reload
; RV32I-NEXT: lw s10, 80(sp) # 4-byte Folded Reload
; RV32I-NEXT: lw s11, 76(sp) # 4-byte Folded Reload
; RV32I-NEXT: addi sp, sp, 128
; RV32I-NEXT: ret
%src = load i256, ptr %src.ptr, align 1
%wordOff = load i256, ptr %wordOff.ptr, align 1
%bitOff = shl i256 %wordOff, 5
%res = lshr i256 %src, %bitOff
store i256 %res, ptr %dst, align 1
ret void
}
define void @lshr_32bytes_dwordOff(ptr %src.ptr, ptr %dwordOff.ptr, ptr %dst) nounwind {
; RV64I-LABEL: lshr_32bytes_dwordOff:
; RV64I: # %bb.0:
; RV64I-NEXT: addi sp, sp, -160
; RV64I-NEXT: sd s0, 152(sp) # 8-byte Folded Spill
; RV64I-NEXT: sd s1, 144(sp) # 8-byte Folded Spill
; RV64I-NEXT: sd s2, 136(sp) # 8-byte Folded Spill
; RV64I-NEXT: sd s3, 128(sp) # 8-byte Folded Spill
; RV64I-NEXT: sd s4, 120(sp) # 8-byte Folded Spill
; RV64I-NEXT: sd s5, 112(sp) # 8-byte Folded Spill
; RV64I-NEXT: sd s6, 104(sp) # 8-byte Folded Spill
; RV64I-NEXT: sd s7, 96(sp) # 8-byte Folded Spill
; RV64I-NEXT: sd s8, 88(sp) # 8-byte Folded Spill
; RV64I-NEXT: sd s9, 80(sp) # 8-byte Folded Spill
; RV64I-NEXT: sd s10, 72(sp) # 8-byte Folded Spill
; RV64I-NEXT: sd s11, 64(sp) # 8-byte Folded Spill
; RV64I-NEXT: lbu a5, 0(a0)
; RV64I-NEXT: lbu a7, 1(a0)
; RV64I-NEXT: lbu t2, 2(a0)
; RV64I-NEXT: lbu s3, 3(a0)
; RV64I-NEXT: lbu t0, 4(a0)
; RV64I-NEXT: lbu s8, 5(a0)
; RV64I-NEXT: lbu s9, 6(a0)
; RV64I-NEXT: lbu s10, 7(a0)
; RV64I-NEXT: lbu s2, 8(a0)
; RV64I-NEXT: lbu s4, 9(a0)
; RV64I-NEXT: lbu s5, 10(a0)
; RV64I-NEXT: lbu s6, 11(a0)
; RV64I-NEXT: lbu s7, 12(a0)
; RV64I-NEXT: lbu s11, 13(a0)
; RV64I-NEXT: lbu t1, 14(a0)
; RV64I-NEXT: lbu t3, 15(a0)
; RV64I-NEXT: lbu a3, 16(a0)
; RV64I-NEXT: lbu a6, 17(a0)
; RV64I-NEXT: lbu t4, 18(a0)
; RV64I-NEXT: lbu t5, 19(a0)
; RV64I-NEXT: lbu a4, 20(a0)
; RV64I-NEXT: lbu t6, 21(a0)
; RV64I-NEXT: lbu s0, 22(a0)
; RV64I-NEXT: lbu s1, 23(a0)
; RV64I-NEXT: slli a7, a7, 8
; RV64I-NEXT: slli t2, t2, 16
; RV64I-NEXT: slli s3, s3, 24
; RV64I-NEXT: slli s8, s8, 8
; RV64I-NEXT: slli s9, s9, 16
; RV64I-NEXT: slli s10, s10, 24
; RV64I-NEXT: or a5, a7, a5
; RV64I-NEXT: or a7, s3, t2
; RV64I-NEXT: or t0, s8, t0
; RV64I-NEXT: or t2, s10, s9
; RV64I-NEXT: lbu s3, 24(a0)
; RV64I-NEXT: lbu s8, 25(a0)
; RV64I-NEXT: lbu s9, 26(a0)
; RV64I-NEXT: lbu s10, 27(a0)
; RV64I-NEXT: slli s4, s4, 8
; RV64I-NEXT: slli s5, s5, 16
; RV64I-NEXT: slli s6, s6, 24
; RV64I-NEXT: slli s11, s11, 8
; RV64I-NEXT: or s2, s4, s2
; RV64I-NEXT: or s4, s6, s5
; RV64I-NEXT: or s5, s11, s7
; RV64I-NEXT: lbu s6, 28(a0)
; RV64I-NEXT: lbu s7, 29(a0)
; RV64I-NEXT: lbu s11, 30(a0)
; RV64I-NEXT: lbu a0, 31(a0)
; RV64I-NEXT: lbu a1, 0(a1)
; RV64I-NEXT: sd zero, 32(sp)
; RV64I-NEXT: sd zero, 40(sp)
; RV64I-NEXT: sd zero, 48(sp)
; RV64I-NEXT: sd zero, 56(sp)
; RV64I-NEXT: slli t1, t1, 16
; RV64I-NEXT: slli t3, t3, 24
; RV64I-NEXT: or t1, t3, t1
; RV64I-NEXT: mv t3, sp
; RV64I-NEXT: slli a6, a6, 8
; RV64I-NEXT: slli t4, t4, 16
; RV64I-NEXT: slli t5, t5, 24
; RV64I-NEXT: slli t6, t6, 8
; RV64I-NEXT: slli s0, s0, 16
; RV64I-NEXT: slli s1, s1, 24
; RV64I-NEXT: slli s8, s8, 8
; RV64I-NEXT: slli s9, s9, 16
; RV64I-NEXT: slli s10, s10, 24
; RV64I-NEXT: slli s7, s7, 8
; RV64I-NEXT: slli s11, s11, 16
; RV64I-NEXT: slli a0, a0, 24
; RV64I-NEXT: slli a1, a1, 3
; RV64I-NEXT: or a3, a6, a3
; RV64I-NEXT: or a6, t5, t4
; RV64I-NEXT: or a4, t6, a4
; RV64I-NEXT: or s0, s1, s0
; RV64I-NEXT: or t4, s8, s3
; RV64I-NEXT: or t5, s10, s9
; RV64I-NEXT: or t6, s7, s6
; RV64I-NEXT: or a0, a0, s11
; RV64I-NEXT: andi a1, a1, 24
; RV64I-NEXT: or a5, a7, a5
; RV64I-NEXT: or a7, t2, t0
; RV64I-NEXT: or t0, s4, s2
; RV64I-NEXT: or t1, t1, s5
; RV64I-NEXT: or a3, a6, a3
; RV64I-NEXT: or a4, s0, a4
; RV64I-NEXT: or a6, t5, t4
; RV64I-NEXT: or a0, a0, t6
; RV64I-NEXT: add t3, t3, a1
; RV64I-NEXT: slli a7, a7, 32
; RV64I-NEXT: slli t1, t1, 32
; RV64I-NEXT: slli a4, a4, 32
; RV64I-NEXT: slli a0, a0, 32
; RV64I-NEXT: or a1, a7, a5
; RV64I-NEXT: or a5, t1, t0
; RV64I-NEXT: or a3, a4, a3
; RV64I-NEXT: or a0, a0, a6
; RV64I-NEXT: sd a1, 0(sp)
; RV64I-NEXT: sd a5, 8(sp)
; RV64I-NEXT: sd a3, 16(sp)
; RV64I-NEXT: sd a0, 24(sp)
; RV64I-NEXT: ld a4, 16(t3)
; RV64I-NEXT: ld a0, 8(t3)
; RV64I-NEXT: ld a1, 0(t3)
; RV64I-NEXT: ld a3, 24(t3)
; RV64I-NEXT: srli a5, a4, 56
; RV64I-NEXT: srli a6, a4, 48
; RV64I-NEXT: srli a7, a4, 40
; RV64I-NEXT: srli t0, a4, 32
; RV64I-NEXT: srli t1, a4, 24
; RV64I-NEXT: srli t2, a4, 16
; RV64I-NEXT: srli t3, a4, 8
; RV64I-NEXT: srli t4, a3, 56
; RV64I-NEXT: srli t5, a3, 48
; RV64I-NEXT: srli t6, a3, 40
; RV64I-NEXT: srli s0, a3, 32
; RV64I-NEXT: srli s1, a3, 24
; RV64I-NEXT: srli s2, a3, 16
; RV64I-NEXT: srli s3, a3, 8
; RV64I-NEXT: srli s4, a1, 56
; RV64I-NEXT: srli s5, a1, 48
; RV64I-NEXT: srli s6, a1, 40
; RV64I-NEXT: srli s7, a1, 32
; RV64I-NEXT: srli s8, a1, 24
; RV64I-NEXT: srli s9, a1, 16
; RV64I-NEXT: srli s10, a1, 8
; RV64I-NEXT: srli s11, a0, 56
; RV64I-NEXT: sb t0, 20(a2)
; RV64I-NEXT: sb a7, 21(a2)
; RV64I-NEXT: sb a6, 22(a2)
; RV64I-NEXT: sb a5, 23(a2)
; RV64I-NEXT: srli a5, a0, 48
; RV64I-NEXT: sb a4, 16(a2)
; RV64I-NEXT: sb t3, 17(a2)
; RV64I-NEXT: sb t2, 18(a2)
; RV64I-NEXT: sb t1, 19(a2)
; RV64I-NEXT: srli a4, a0, 40
; RV64I-NEXT: sb s0, 28(a2)
; RV64I-NEXT: sb t6, 29(a2)
; RV64I-NEXT: sb t5, 30(a2)
; RV64I-NEXT: sb t4, 31(a2)
; RV64I-NEXT: srli a6, a0, 32
; RV64I-NEXT: sb a3, 24(a2)
; RV64I-NEXT: sb s3, 25(a2)
; RV64I-NEXT: sb s2, 26(a2)
; RV64I-NEXT: sb s1, 27(a2)
; RV64I-NEXT: srli a3, a0, 24
; RV64I-NEXT: sb s7, 4(a2)
; RV64I-NEXT: sb s6, 5(a2)
; RV64I-NEXT: sb s5, 6(a2)
; RV64I-NEXT: sb s4, 7(a2)
; RV64I-NEXT: srli a7, a0, 16
; RV64I-NEXT: sb a1, 0(a2)
; RV64I-NEXT: sb s10, 1(a2)
; RV64I-NEXT: sb s9, 2(a2)
; RV64I-NEXT: sb s8, 3(a2)
; RV64I-NEXT: srli a1, a0, 8
; RV64I-NEXT: sb a6, 12(a2)
; RV64I-NEXT: sb a4, 13(a2)
; RV64I-NEXT: sb a5, 14(a2)
; RV64I-NEXT: sb s11, 15(a2)
; RV64I-NEXT: sb a0, 8(a2)
; RV64I-NEXT: sb a1, 9(a2)
; RV64I-NEXT: sb a7, 10(a2)
; RV64I-NEXT: sb a3, 11(a2)
; RV64I-NEXT: ld s0, 152(sp) # 8-byte Folded Reload
; RV64I-NEXT: ld s1, 144(sp) # 8-byte Folded Reload
; RV64I-NEXT: ld s2, 136(sp) # 8-byte Folded Reload
; RV64I-NEXT: ld s3, 128(sp) # 8-byte Folded Reload
; RV64I-NEXT: ld s4, 120(sp) # 8-byte Folded Reload
; RV64I-NEXT: ld s5, 112(sp) # 8-byte Folded Reload
; RV64I-NEXT: ld s6, 104(sp) # 8-byte Folded Reload
; RV64I-NEXT: ld s7, 96(sp) # 8-byte Folded Reload
; RV64I-NEXT: ld s8, 88(sp) # 8-byte Folded Reload
; RV64I-NEXT: ld s9, 80(sp) # 8-byte Folded Reload
; RV64I-NEXT: ld s10, 72(sp) # 8-byte Folded Reload
; RV64I-NEXT: ld s11, 64(sp) # 8-byte Folded Reload
; RV64I-NEXT: addi sp, sp, 160
; RV64I-NEXT: ret
;
; RV32I-LABEL: lshr_32bytes_dwordOff:
; RV32I: # %bb.0:
; RV32I-NEXT: addi sp, sp, -128
; RV32I-NEXT: sw ra, 124(sp) # 4-byte Folded Spill
; RV32I-NEXT: sw s0, 120(sp) # 4-byte Folded Spill
; RV32I-NEXT: sw s1, 116(sp) # 4-byte Folded Spill
; RV32I-NEXT: sw s2, 112(sp) # 4-byte Folded Spill
; RV32I-NEXT: sw s3, 108(sp) # 4-byte Folded Spill
; RV32I-NEXT: sw s4, 104(sp) # 4-byte Folded Spill
; RV32I-NEXT: sw s5, 100(sp) # 4-byte Folded Spill
; RV32I-NEXT: sw s6, 96(sp) # 4-byte Folded Spill
; RV32I-NEXT: sw s7, 92(sp) # 4-byte Folded Spill
; RV32I-NEXT: sw s8, 88(sp) # 4-byte Folded Spill
; RV32I-NEXT: sw s9, 84(sp) # 4-byte Folded Spill
; RV32I-NEXT: sw s10, 80(sp) # 4-byte Folded Spill
; RV32I-NEXT: sw s11, 76(sp) # 4-byte Folded Spill
; RV32I-NEXT: lbu a7, 0(a0)
; RV32I-NEXT: lbu t0, 1(a0)
; RV32I-NEXT: lbu t1, 2(a0)
; RV32I-NEXT: lbu s1, 3(a0)
; RV32I-NEXT: lbu s7, 4(a0)
; RV32I-NEXT: lbu s8, 5(a0)
; RV32I-NEXT: lbu s4, 6(a0)
; RV32I-NEXT: lbu s6, 7(a0)
; RV32I-NEXT: lbu s5, 8(a0)
; RV32I-NEXT: lbu s10, 9(a0)
; RV32I-NEXT: lbu s11, 10(a0)
; RV32I-NEXT: lbu ra, 11(a0)
; RV32I-NEXT: lbu t4, 12(a0)
; RV32I-NEXT: lbu t6, 13(a0)
; RV32I-NEXT: lbu a5, 14(a0)
; RV32I-NEXT: lbu a6, 15(a0)
; RV32I-NEXT: lbu a3, 16(a0)
; RV32I-NEXT: lbu t2, 17(a0)
; RV32I-NEXT: lbu t3, 18(a0)
; RV32I-NEXT: lbu t5, 19(a0)
; RV32I-NEXT: lbu a4, 20(a0)
; RV32I-NEXT: lbu s0, 21(a0)
; RV32I-NEXT: lbu s2, 22(a0)
; RV32I-NEXT: lbu s3, 23(a0)
; RV32I-NEXT: slli t0, t0, 8
; RV32I-NEXT: slli t1, t1, 16
; RV32I-NEXT: slli s1, s1, 24
; RV32I-NEXT: slli s8, s8, 8
; RV32I-NEXT: or a7, t0, a7
; RV32I-NEXT: or t0, s1, t1
; RV32I-NEXT: or t1, s8, s7
; RV32I-NEXT: lbu s1, 24(a0)
; RV32I-NEXT: lbu s7, 25(a0)
; RV32I-NEXT: lbu s8, 26(a0)
; RV32I-NEXT: lbu s9, 27(a0)
; RV32I-NEXT: slli s4, s4, 16
; RV32I-NEXT: slli s6, s6, 24
; RV32I-NEXT: slli s10, s10, 8
; RV32I-NEXT: slli s11, s11, 16
; RV32I-NEXT: slli ra, ra, 24
; RV32I-NEXT: or s4, s6, s4
; RV32I-NEXT: or s5, s10, s5
; RV32I-NEXT: or s6, ra, s11
; RV32I-NEXT: lbu s10, 28(a0)
; RV32I-NEXT: lbu s11, 29(a0)
; RV32I-NEXT: lbu ra, 30(a0)
; RV32I-NEXT: lbu a0, 31(a0)
; RV32I-NEXT: lbu a1, 0(a1)
; RV32I-NEXT: sw zero, 56(sp)
; RV32I-NEXT: sw zero, 60(sp)
; RV32I-NEXT: sw zero, 64(sp)
; RV32I-NEXT: sw zero, 68(sp)
; RV32I-NEXT: sw zero, 40(sp)
; RV32I-NEXT: sw zero, 44(sp)
; RV32I-NEXT: sw zero, 48(sp)
; RV32I-NEXT: sw zero, 52(sp)
; RV32I-NEXT: slli t6, t6, 8
; RV32I-NEXT: or t4, t6, t4
; RV32I-NEXT: addi t6, sp, 8
; RV32I-NEXT: slli a5, a5, 16
; RV32I-NEXT: slli a6, a6, 24
; RV32I-NEXT: slli t2, t2, 8
; RV32I-NEXT: slli t3, t3, 16
; RV32I-NEXT: slli t5, t5, 24
; RV32I-NEXT: slli s0, s0, 8
; RV32I-NEXT: slli s2, s2, 16
; RV32I-NEXT: slli s3, s3, 24
; RV32I-NEXT: slli s7, s7, 8
; RV32I-NEXT: slli s8, s8, 16
; RV32I-NEXT: slli s9, s9, 24
; RV32I-NEXT: slli s11, s11, 8
; RV32I-NEXT: slli ra, ra, 16
; RV32I-NEXT: slli a0, a0, 24
; RV32I-NEXT: slli a1, a1, 3
; RV32I-NEXT: or a5, a6, a5
; RV32I-NEXT: or a3, t2, a3
; RV32I-NEXT: or a6, t5, t3
; RV32I-NEXT: or a4, s0, a4
; RV32I-NEXT: or t2, s3, s2
; RV32I-NEXT: or t3, s7, s1
; RV32I-NEXT: or t5, s9, s8
; RV32I-NEXT: or s0, s11, s10
; RV32I-NEXT: or a0, a0, ra
; RV32I-NEXT: andi a1, a1, 24
; RV32I-NEXT: or a7, t0, a7
; RV32I-NEXT: or t0, s4, t1
; RV32I-NEXT: or t1, s6, s5
; RV32I-NEXT: or a5, a5, t4
; RV32I-NEXT: or a3, a6, a3
; RV32I-NEXT: or a4, t2, a4
; RV32I-NEXT: or a6, t5, t3
; RV32I-NEXT: or a0, a0, s0
; RV32I-NEXT: add t6, t6, a1
; RV32I-NEXT: sw a3, 24(sp)
; RV32I-NEXT: sw a4, 28(sp)
; RV32I-NEXT: sw a6, 32(sp)
; RV32I-NEXT: sw a0, 36(sp)
; RV32I-NEXT: sw a7, 8(sp)
; RV32I-NEXT: sw t0, 12(sp)
; RV32I-NEXT: sw t1, 16(sp)
; RV32I-NEXT: sw a5, 20(sp)
; RV32I-NEXT: lw a1, 0(t6)
; RV32I-NEXT: lw a0, 4(t6)
; RV32I-NEXT: lw a4, 8(t6)
; RV32I-NEXT: lw a3, 12(t6)
; RV32I-NEXT: lw a7, 24(t6)
; RV32I-NEXT: lw a5, 20(t6)
; RV32I-NEXT: lw a6, 16(t6)
; RV32I-NEXT: lw t0, 28(t6)
; RV32I-NEXT: srli t1, a7, 24
; RV32I-NEXT: srli t2, a7, 16
; RV32I-NEXT: srli t3, a7, 8
; RV32I-NEXT: srli t4, t0, 24
; RV32I-NEXT: srli t5, t0, 16
; RV32I-NEXT: srli t6, t0, 8
; RV32I-NEXT: srli s0, a6, 24
; RV32I-NEXT: srli s1, a6, 16
; RV32I-NEXT: srli s2, a6, 8
; RV32I-NEXT: srli s3, a5, 24
; RV32I-NEXT: srli s4, a5, 16
; RV32I-NEXT: srli s5, a5, 8
; RV32I-NEXT: srli s6, a4, 24
; RV32I-NEXT: srli s7, a4, 16
; RV32I-NEXT: srli s8, a4, 8
; RV32I-NEXT: srli s9, a3, 24
; RV32I-NEXT: srli s10, a3, 16
; RV32I-NEXT: srli s11, a3, 8
; RV32I-NEXT: srli ra, a1, 24
; RV32I-NEXT: sb a7, 24(a2)
; RV32I-NEXT: sb t3, 25(a2)
; RV32I-NEXT: sb t2, 26(a2)
; RV32I-NEXT: sb t1, 27(a2)
; RV32I-NEXT: srli a7, a1, 16
; RV32I-NEXT: sb t0, 28(a2)
; RV32I-NEXT: sb t6, 29(a2)
; RV32I-NEXT: sb t5, 30(a2)
; RV32I-NEXT: sb t4, 31(a2)
; RV32I-NEXT: srli t0, a1, 8
; RV32I-NEXT: sb a6, 16(a2)
; RV32I-NEXT: sb s2, 17(a2)
; RV32I-NEXT: sb s1, 18(a2)
; RV32I-NEXT: sb s0, 19(a2)
; RV32I-NEXT: srli a6, a0, 24
; RV32I-NEXT: sb a5, 20(a2)
; RV32I-NEXT: sb s5, 21(a2)
; RV32I-NEXT: sb s4, 22(a2)
; RV32I-NEXT: sb s3, 23(a2)
; RV32I-NEXT: srli a5, a0, 16
; RV32I-NEXT: sb a4, 8(a2)
; RV32I-NEXT: sb s8, 9(a2)
; RV32I-NEXT: sb s7, 10(a2)
; RV32I-NEXT: sb s6, 11(a2)
; RV32I-NEXT: srli a4, a0, 8
; RV32I-NEXT: sb a3, 12(a2)
; RV32I-NEXT: sb s11, 13(a2)
; RV32I-NEXT: sb s10, 14(a2)
; RV32I-NEXT: sb s9, 15(a2)
; RV32I-NEXT: sb a1, 0(a2)
; RV32I-NEXT: sb t0, 1(a2)
; RV32I-NEXT: sb a7, 2(a2)
; RV32I-NEXT: sb ra, 3(a2)
; RV32I-NEXT: sb a0, 4(a2)
; RV32I-NEXT: sb a4, 5(a2)
; RV32I-NEXT: sb a5, 6(a2)
; RV32I-NEXT: sb a6, 7(a2)
; RV32I-NEXT: lw ra, 124(sp) # 4-byte Folded Reload
; RV32I-NEXT: lw s0, 120(sp) # 4-byte Folded Reload
; RV32I-NEXT: lw s1, 116(sp) # 4-byte Folded Reload
; RV32I-NEXT: lw s2, 112(sp) # 4-byte Folded Reload
; RV32I-NEXT: lw s3, 108(sp) # 4-byte Folded Reload
; RV32I-NEXT: lw s4, 104(sp) # 4-byte Folded Reload
; RV32I-NEXT: lw s5, 100(sp) # 4-byte Folded Reload
; RV32I-NEXT: lw s6, 96(sp) # 4-byte Folded Reload
; RV32I-NEXT: lw s7, 92(sp) # 4-byte Folded Reload
; RV32I-NEXT: lw s8, 88(sp) # 4-byte Folded Reload
; RV32I-NEXT: lw s9, 84(sp) # 4-byte Folded Reload
; RV32I-NEXT: lw s10, 80(sp) # 4-byte Folded Reload
; RV32I-NEXT: lw s11, 76(sp) # 4-byte Folded Reload
; RV32I-NEXT: addi sp, sp, 128
; RV32I-NEXT: ret
%src = load i256, ptr %src.ptr, align 1
%dwordOff = load i256, ptr %dwordOff.ptr, align 1
%bitOff = shl i256 %dwordOff, 6
%res = lshr i256 %src, %bitOff
store i256 %res, ptr %dst, align 1
ret void
}
define void @shl_32bytes(ptr %src.ptr, ptr %byteOff.ptr, ptr %dst) nounwind {
; RV64I-LABEL: shl_32bytes:
; RV64I: # %bb.0:
; RV64I-NEXT: addi sp, sp, -160
; RV64I-NEXT: sd s0, 152(sp) # 8-byte Folded Spill
; RV64I-NEXT: sd s1, 144(sp) # 8-byte Folded Spill
; RV64I-NEXT: sd s2, 136(sp) # 8-byte Folded Spill
; RV64I-NEXT: sd s3, 128(sp) # 8-byte Folded Spill
; RV64I-NEXT: sd s4, 120(sp) # 8-byte Folded Spill
; RV64I-NEXT: sd s5, 112(sp) # 8-byte Folded Spill
; RV64I-NEXT: sd s6, 104(sp) # 8-byte Folded Spill
; RV64I-NEXT: sd s7, 96(sp) # 8-byte Folded Spill
; RV64I-NEXT: sd s8, 88(sp) # 8-byte Folded Spill
; RV64I-NEXT: sd s9, 80(sp) # 8-byte Folded Spill
; RV64I-NEXT: sd s10, 72(sp) # 8-byte Folded Spill
; RV64I-NEXT: sd s11, 64(sp) # 8-byte Folded Spill
; RV64I-NEXT: lbu a3, 0(a0)
; RV64I-NEXT: lbu a4, 1(a0)
; RV64I-NEXT: lbu a5, 2(a0)
; RV64I-NEXT: lbu a6, 3(a0)
; RV64I-NEXT: lbu a7, 4(a0)
; RV64I-NEXT: lbu t0, 5(a0)
; RV64I-NEXT: lbu t1, 6(a0)
; RV64I-NEXT: lbu t2, 7(a0)
; RV64I-NEXT: lbu t3, 8(a0)
; RV64I-NEXT: lbu t4, 9(a0)
; RV64I-NEXT: lbu t5, 10(a0)
; RV64I-NEXT: lbu t6, 11(a0)
; RV64I-NEXT: lbu s0, 12(a0)
; RV64I-NEXT: lbu s1, 13(a0)
; RV64I-NEXT: lbu s2, 14(a0)
; RV64I-NEXT: lbu s3, 15(a0)
; RV64I-NEXT: lbu s4, 16(a0)
; RV64I-NEXT: lbu s5, 17(a0)
; RV64I-NEXT: lbu s6, 18(a0)
; RV64I-NEXT: lbu s7, 19(a0)
; RV64I-NEXT: slli a4, a4, 8
; RV64I-NEXT: slli a5, a5, 16
; RV64I-NEXT: slli a6, a6, 24
; RV64I-NEXT: slli t0, t0, 8
; RV64I-NEXT: slli t1, t1, 16
; RV64I-NEXT: slli t2, t2, 24
; RV64I-NEXT: or a3, a4, a3
; RV64I-NEXT: or a4, a6, a5
; RV64I-NEXT: or a5, t0, a7
; RV64I-NEXT: or a6, t2, t1
; RV64I-NEXT: lbu s8, 20(a0)
; RV64I-NEXT: lbu s9, 21(a0)
; RV64I-NEXT: lbu s10, 22(a0)
; RV64I-NEXT: lbu s11, 23(a0)
; RV64I-NEXT: slli t4, t4, 8
; RV64I-NEXT: slli t5, t5, 16
; RV64I-NEXT: slli t6, t6, 24
; RV64I-NEXT: slli s1, s1, 8
; RV64I-NEXT: slli s2, s2, 16
; RV64I-NEXT: slli s3, s3, 24
; RV64I-NEXT: or a7, t4, t3
; RV64I-NEXT: or t0, t6, t5
; RV64I-NEXT: or t1, s1, s0
; RV64I-NEXT: or t2, s3, s2
; RV64I-NEXT: lbu t6, 24(a0)
; RV64I-NEXT: lbu s0, 25(a0)
; RV64I-NEXT: lbu s1, 26(a0)
; RV64I-NEXT: lbu s2, 27(a0)
; RV64I-NEXT: slli s5, s5, 8
; RV64I-NEXT: slli s6, s6, 16
; RV64I-NEXT: slli s7, s7, 24
; RV64I-NEXT: slli s9, s9, 8
; RV64I-NEXT: or t3, s5, s4
; RV64I-NEXT: or t4, s7, s6
; RV64I-NEXT: or t5, s9, s8
; RV64I-NEXT: lbu s3, 28(a0)
; RV64I-NEXT: lbu s4, 29(a0)
; RV64I-NEXT: lbu s5, 30(a0)
; RV64I-NEXT: lbu s6, 31(a0)
; RV64I-NEXT: slli s10, s10, 16
; RV64I-NEXT: slli s11, s11, 24
; RV64I-NEXT: slli s0, s0, 8
; RV64I-NEXT: slli s1, s1, 16
; RV64I-NEXT: slli s2, s2, 24
; RV64I-NEXT: slli s4, s4, 8
; RV64I-NEXT: or a0, s11, s10
; RV64I-NEXT: or t6, s0, t6
; RV64I-NEXT: or s0, s2, s1
; RV64I-NEXT: or s1, s4, s3
; RV64I-NEXT: lbu s2, 0(a1)
; RV64I-NEXT: lbu s3, 1(a1)
; RV64I-NEXT: lbu s4, 2(a1)
; RV64I-NEXT: lbu s7, 3(a1)
; RV64I-NEXT: slli s5, s5, 16
; RV64I-NEXT: slli s6, s6, 24
; RV64I-NEXT: slli s3, s3, 8
; RV64I-NEXT: slli s4, s4, 16
; RV64I-NEXT: slli s7, s7, 24
; RV64I-NEXT: or s5, s6, s5
; RV64I-NEXT: or s2, s3, s2
; RV64I-NEXT: or s3, s7, s4
; RV64I-NEXT: lbu s4, 5(a1)
; RV64I-NEXT: lbu s6, 4(a1)
; RV64I-NEXT: lbu s7, 6(a1)
; RV64I-NEXT: lbu a1, 7(a1)
; RV64I-NEXT: slli s4, s4, 8
; RV64I-NEXT: or s4, s4, s6
; RV64I-NEXT: sd zero, 0(sp)
; RV64I-NEXT: sd zero, 8(sp)
; RV64I-NEXT: sd zero, 16(sp)
; RV64I-NEXT: sd zero, 24(sp)
; RV64I-NEXT: slli s7, s7, 16
; RV64I-NEXT: slli a1, a1, 24
; RV64I-NEXT: or a1, a1, s7
; RV64I-NEXT: addi s6, sp, 32
; RV64I-NEXT: or a3, a4, a3
; RV64I-NEXT: or a4, a6, a5
; RV64I-NEXT: or a5, t0, a7
; RV64I-NEXT: or a6, t2, t1
; RV64I-NEXT: or a7, t4, t3
; RV64I-NEXT: or a0, a0, t5
; RV64I-NEXT: or t0, s0, t6
; RV64I-NEXT: or t1, s5, s1
; RV64I-NEXT: or t2, s3, s2
; RV64I-NEXT: or a1, a1, s4
; RV64I-NEXT: slli a4, a4, 32
; RV64I-NEXT: slli a6, a6, 32
; RV64I-NEXT: slli a0, a0, 32
; RV64I-NEXT: slli t1, t1, 32
; RV64I-NEXT: slli a1, a1, 32
; RV64I-NEXT: or a3, a4, a3
; RV64I-NEXT: or a4, a6, a5
; RV64I-NEXT: or a0, a0, a7
; RV64I-NEXT: or a5, t1, t0
; RV64I-NEXT: or a1, a1, t2
; RV64I-NEXT: sd a3, 32(sp)
; RV64I-NEXT: sd a4, 40(sp)
; RV64I-NEXT: sd a0, 48(sp)
; RV64I-NEXT: sd a5, 56(sp)
; RV64I-NEXT: slli a0, a1, 3
; RV64I-NEXT: andi a1, a1, 24
; RV64I-NEXT: sub a1, s6, a1
; RV64I-NEXT: andi a3, a0, 56
; RV64I-NEXT: ld a4, 0(a1)
; RV64I-NEXT: ld a5, 8(a1)
; RV64I-NEXT: ld a6, 16(a1)
; RV64I-NEXT: ld a1, 24(a1)
; RV64I-NEXT: xori a3, a3, 63
; RV64I-NEXT: sll a7, a5, a0
; RV64I-NEXT: srli t0, a4, 1
; RV64I-NEXT: sll t1, a1, a0
; RV64I-NEXT: srli a1, a6, 1
; RV64I-NEXT: sll t2, a6, a0
; RV64I-NEXT: srli a5, a5, 1
; RV64I-NEXT: sll t3, a4, a0
; RV64I-NEXT: srl a0, t0, a3
; RV64I-NEXT: srl a4, a1, a3
; RV64I-NEXT: srl a5, a5, a3
; RV64I-NEXT: srli a3, t2, 56
; RV64I-NEXT: srli a1, t1, 56
; RV64I-NEXT: srli t0, t3, 56
; RV64I-NEXT: srli t4, t3, 48
; RV64I-NEXT: srli t5, t3, 40
; RV64I-NEXT: srli t6, t3, 32
; RV64I-NEXT: srli s0, t3, 24
; RV64I-NEXT: srli s1, t3, 16
; RV64I-NEXT: srli s2, t3, 8
; RV64I-NEXT: srli a6, a7, 56
; RV64I-NEXT: or a0, a7, a0
; RV64I-NEXT: or a4, t1, a4
; RV64I-NEXT: or a5, t2, a5
; RV64I-NEXT: sb t6, 4(a2)
; RV64I-NEXT: sb t5, 5(a2)
; RV64I-NEXT: sb t4, 6(a2)
; RV64I-NEXT: sb t0, 7(a2)
; RV64I-NEXT: sb t3, 0(a2)
; RV64I-NEXT: sb s2, 1(a2)
; RV64I-NEXT: sb s1, 2(a2)
; RV64I-NEXT: sb s0, 3(a2)
; RV64I-NEXT: srli a7, a5, 48
; RV64I-NEXT: srli t0, a5, 40
; RV64I-NEXT: srli t1, a5, 32
; RV64I-NEXT: srli t2, a5, 24
; RV64I-NEXT: srli t3, a5, 16
; RV64I-NEXT: srli t4, a5, 8
; RV64I-NEXT: srli t5, a4, 48
; RV64I-NEXT: srli t6, a4, 40
; RV64I-NEXT: srli s0, a4, 32
; RV64I-NEXT: srli s1, a4, 24
; RV64I-NEXT: srli s2, a4, 16
; RV64I-NEXT: srli s3, a4, 8
; RV64I-NEXT: srli s4, a0, 48
; RV64I-NEXT: srli s5, a0, 40
; RV64I-NEXT: srli s6, a0, 32
; RV64I-NEXT: sb t1, 20(a2)
; RV64I-NEXT: sb t0, 21(a2)
; RV64I-NEXT: sb a7, 22(a2)
; RV64I-NEXT: sb a3, 23(a2)
; RV64I-NEXT: srli a3, a0, 24
; RV64I-NEXT: sb s0, 28(a2)
; RV64I-NEXT: sb t6, 29(a2)
; RV64I-NEXT: sb t5, 30(a2)
; RV64I-NEXT: sb a1, 31(a2)
; RV64I-NEXT: srli a1, a0, 16
; RV64I-NEXT: sb s6, 12(a2)
; RV64I-NEXT: sb s5, 13(a2)
; RV64I-NEXT: sb s4, 14(a2)
; RV64I-NEXT: sb a6, 15(a2)
; RV64I-NEXT: srli a6, a0, 8
; RV64I-NEXT: sb a5, 16(a2)
; RV64I-NEXT: sb t4, 17(a2)
; RV64I-NEXT: sb t3, 18(a2)
; RV64I-NEXT: sb t2, 19(a2)
; RV64I-NEXT: sb a4, 24(a2)
; RV64I-NEXT: sb s3, 25(a2)
; RV64I-NEXT: sb s2, 26(a2)
; RV64I-NEXT: sb s1, 27(a2)
; RV64I-NEXT: sb a0, 8(a2)
; RV64I-NEXT: sb a6, 9(a2)
; RV64I-NEXT: sb a1, 10(a2)
; RV64I-NEXT: sb a3, 11(a2)
; RV64I-NEXT: ld s0, 152(sp) # 8-byte Folded Reload
; RV64I-NEXT: ld s1, 144(sp) # 8-byte Folded Reload
; RV64I-NEXT: ld s2, 136(sp) # 8-byte Folded Reload
; RV64I-NEXT: ld s3, 128(sp) # 8-byte Folded Reload
; RV64I-NEXT: ld s4, 120(sp) # 8-byte Folded Reload
; RV64I-NEXT: ld s5, 112(sp) # 8-byte Folded Reload
; RV64I-NEXT: ld s6, 104(sp) # 8-byte Folded Reload
; RV64I-NEXT: ld s7, 96(sp) # 8-byte Folded Reload
; RV64I-NEXT: ld s8, 88(sp) # 8-byte Folded Reload
; RV64I-NEXT: ld s9, 80(sp) # 8-byte Folded Reload
; RV64I-NEXT: ld s10, 72(sp) # 8-byte Folded Reload
; RV64I-NEXT: ld s11, 64(sp) # 8-byte Folded Reload
; RV64I-NEXT: addi sp, sp, 160
; RV64I-NEXT: ret
;
; RV32I-LABEL: shl_32bytes:
; RV32I: # %bb.0:
; RV32I-NEXT: addi sp, sp, -128
; RV32I-NEXT: sw ra, 124(sp) # 4-byte Folded Spill
; RV32I-NEXT: sw s0, 120(sp) # 4-byte Folded Spill
; RV32I-NEXT: sw s1, 116(sp) # 4-byte Folded Spill
; RV32I-NEXT: sw s2, 112(sp) # 4-byte Folded Spill
; RV32I-NEXT: sw s3, 108(sp) # 4-byte Folded Spill
; RV32I-NEXT: sw s4, 104(sp) # 4-byte Folded Spill
; RV32I-NEXT: sw s5, 100(sp) # 4-byte Folded Spill
; RV32I-NEXT: sw s6, 96(sp) # 4-byte Folded Spill
; RV32I-NEXT: sw s7, 92(sp) # 4-byte Folded Spill
; RV32I-NEXT: sw s8, 88(sp) # 4-byte Folded Spill
; RV32I-NEXT: sw s9, 84(sp) # 4-byte Folded Spill
; RV32I-NEXT: sw s10, 80(sp) # 4-byte Folded Spill
; RV32I-NEXT: sw s11, 76(sp) # 4-byte Folded Spill
; RV32I-NEXT: lbu s1, 0(a0)
; RV32I-NEXT: lbu a4, 1(a0)
; RV32I-NEXT: lbu a5, 2(a0)
; RV32I-NEXT: lbu a6, 3(a0)
; RV32I-NEXT: lbu t1, 4(a0)
; RV32I-NEXT: lbu t3, 5(a0)
; RV32I-NEXT: lbu t4, 6(a0)
; RV32I-NEXT: lbu s0, 7(a0)
; RV32I-NEXT: lbu t2, 8(a0)
; RV32I-NEXT: lbu s3, 9(a0)
; RV32I-NEXT: lbu s6, 10(a0)
; RV32I-NEXT: lbu s8, 11(a0)
; RV32I-NEXT: lbu s9, 12(a0)
; RV32I-NEXT: lbu s10, 13(a0)
; RV32I-NEXT: lbu s4, 14(a0)
; RV32I-NEXT: lbu s7, 15(a0)
; RV32I-NEXT: lbu s5, 16(a0)
; RV32I-NEXT: lbu s11, 17(a0)
; RV32I-NEXT: lbu ra, 18(a0)
; RV32I-NEXT: lbu a3, 19(a0)
; RV32I-NEXT: lbu t5, 20(a0)
; RV32I-NEXT: lbu t6, 21(a0)
; RV32I-NEXT: lbu a7, 22(a0)
; RV32I-NEXT: lbu t0, 23(a0)
; RV32I-NEXT: slli a4, a4, 8
; RV32I-NEXT: slli a5, a5, 16
; RV32I-NEXT: slli a6, a6, 24
; RV32I-NEXT: slli t3, t3, 8
; RV32I-NEXT: slli t4, t4, 16
; RV32I-NEXT: slli s0, s0, 24
; RV32I-NEXT: or a4, a4, s1
; RV32I-NEXT: sw a4, 4(sp) # 4-byte Folded Spill
; RV32I-NEXT: or a4, a6, a5
; RV32I-NEXT: or a5, t3, t1
; RV32I-NEXT: or a6, s0, t4
; RV32I-NEXT: lbu t1, 24(a0)
; RV32I-NEXT: lbu s0, 25(a0)
; RV32I-NEXT: lbu s1, 26(a0)
; RV32I-NEXT: lbu s2, 27(a0)
; RV32I-NEXT: slli s3, s3, 8
; RV32I-NEXT: slli s6, s6, 16
; RV32I-NEXT: slli s8, s8, 24
; RV32I-NEXT: slli s10, s10, 8
; RV32I-NEXT: or t2, s3, t2
; RV32I-NEXT: or t3, s8, s6
; RV32I-NEXT: or t4, s10, s9
; RV32I-NEXT: lbu s3, 28(a0)
; RV32I-NEXT: lbu s6, 29(a0)
; RV32I-NEXT: lbu s8, 30(a0)
; RV32I-NEXT: lbu s9, 31(a0)
; RV32I-NEXT: slli s4, s4, 16
; RV32I-NEXT: slli s7, s7, 24
; RV32I-NEXT: slli s11, s11, 8
; RV32I-NEXT: slli ra, ra, 16
; RV32I-NEXT: slli a3, a3, 24
; RV32I-NEXT: or a0, s7, s4
; RV32I-NEXT: or s4, s11, s5
; RV32I-NEXT: or s5, a3, ra
; RV32I-NEXT: lbu a3, 0(a1)
; RV32I-NEXT: lbu s7, 1(a1)
; RV32I-NEXT: lbu s10, 2(a1)
; RV32I-NEXT: lbu a1, 3(a1)
; RV32I-NEXT: sw zero, 24(sp)
; RV32I-NEXT: sw zero, 28(sp)
; RV32I-NEXT: sw zero, 32(sp)
; RV32I-NEXT: sw zero, 36(sp)
; RV32I-NEXT: sw zero, 8(sp)
; RV32I-NEXT: sw zero, 12(sp)
; RV32I-NEXT: sw zero, 16(sp)
; RV32I-NEXT: sw zero, 20(sp)
; RV32I-NEXT: slli t6, t6, 8
; RV32I-NEXT: or t5, t6, t5
; RV32I-NEXT: addi t6, sp, 40
; RV32I-NEXT: slli a7, a7, 16
; RV32I-NEXT: slli t0, t0, 24
; RV32I-NEXT: slli s0, s0, 8
; RV32I-NEXT: slli s1, s1, 16
; RV32I-NEXT: slli s2, s2, 24
; RV32I-NEXT: slli s6, s6, 8
; RV32I-NEXT: slli s8, s8, 16
; RV32I-NEXT: slli s9, s9, 24
; RV32I-NEXT: slli s7, s7, 8
; RV32I-NEXT: slli s10, s10, 16
; RV32I-NEXT: slli a1, a1, 24
; RV32I-NEXT: or a7, t0, a7
; RV32I-NEXT: or t0, s0, t1
; RV32I-NEXT: or t1, s2, s1
; RV32I-NEXT: or s0, s6, s3
; RV32I-NEXT: or s1, s9, s8
; RV32I-NEXT: or a3, s7, a3
; RV32I-NEXT: or a1, a1, s10
; RV32I-NEXT: lw s2, 4(sp) # 4-byte Folded Reload
; RV32I-NEXT: or a4, a4, s2
; RV32I-NEXT: or a5, a6, a5
; RV32I-NEXT: or a6, t3, t2
; RV32I-NEXT: or a0, a0, t4
; RV32I-NEXT: or t2, s5, s4
; RV32I-NEXT: or a7, a7, t5
; RV32I-NEXT: or t0, t1, t0
; RV32I-NEXT: or s0, s1, s0
; RV32I-NEXT: or a1, a1, a3
; RV32I-NEXT: sw t2, 56(sp)
; RV32I-NEXT: sw a7, 60(sp)
; RV32I-NEXT: sw t0, 64(sp)
; RV32I-NEXT: sw s0, 68(sp)
; RV32I-NEXT: sw a4, 40(sp)
; RV32I-NEXT: sw a5, 44(sp)
; RV32I-NEXT: sw a6, 48(sp)
; RV32I-NEXT: sw a0, 52(sp)
; RV32I-NEXT: slli a3, a1, 3
; RV32I-NEXT: andi a1, a1, 28
; RV32I-NEXT: sub a1, t6, a1
; RV32I-NEXT: andi a0, a3, 24
; RV32I-NEXT: xori a0, a0, 31
; RV32I-NEXT: lw a4, 0(a1)
; RV32I-NEXT: lw a5, 4(a1)
; RV32I-NEXT: lw a6, 8(a1)
; RV32I-NEXT: lw a7, 12(a1)
; RV32I-NEXT: lw t0, 16(a1)
; RV32I-NEXT: lw t1, 20(a1)
; RV32I-NEXT: lw t2, 24(a1)
; RV32I-NEXT: lw a1, 28(a1)
; RV32I-NEXT: sll t3, a5, a3
; RV32I-NEXT: srli t4, a4, 1
; RV32I-NEXT: sll t5, a7, a3
; RV32I-NEXT: srli t6, a6, 1
; RV32I-NEXT: sll s0, a6, a3
; RV32I-NEXT: srli a5, a5, 1
; RV32I-NEXT: sll s1, t1, a3
; RV32I-NEXT: srli a6, t0, 1
; RV32I-NEXT: sll s2, t0, a3
; RV32I-NEXT: srli a7, a7, 1
; RV32I-NEXT: sll s3, a1, a3
; RV32I-NEXT: srli a1, t2, 1
; RV32I-NEXT: sll s4, t2, a3
; RV32I-NEXT: srli t0, t1, 1
; RV32I-NEXT: sll s5, a4, a3
; RV32I-NEXT: srl t2, t4, a0
; RV32I-NEXT: srl t4, t6, a0
; RV32I-NEXT: srl t6, a5, a0
; RV32I-NEXT: srl s6, a6, a0
; RV32I-NEXT: srl s7, a7, a0
; RV32I-NEXT: srl s8, a1, a0
; RV32I-NEXT: srl s9, t0, a0
; RV32I-NEXT: srli t1, s4, 24
; RV32I-NEXT: srli a7, s3, 24
; RV32I-NEXT: srli a5, s2, 24
; RV32I-NEXT: srli a3, s1, 24
; RV32I-NEXT: srli a1, s0, 24
; RV32I-NEXT: srli a0, t5, 24
; RV32I-NEXT: srli s10, s5, 24
; RV32I-NEXT: srli s11, s5, 16
; RV32I-NEXT: srli ra, s5, 8
; RV32I-NEXT: srli a4, t3, 24
; RV32I-NEXT: or a6, t3, t2
; RV32I-NEXT: or t0, t5, t4
; RV32I-NEXT: or t2, s0, t6
; RV32I-NEXT: or t3, s1, s6
; RV32I-NEXT: or t4, s2, s7
; RV32I-NEXT: or t5, s3, s8
; RV32I-NEXT: or t6, s4, s9
; RV32I-NEXT: sb s5, 0(a2)
; RV32I-NEXT: sb ra, 1(a2)
; RV32I-NEXT: sb s11, 2(a2)
; RV32I-NEXT: sb s10, 3(a2)
; RV32I-NEXT: srli s0, t6, 16
; RV32I-NEXT: srli s1, t6, 8
; RV32I-NEXT: srli s2, t5, 16
; RV32I-NEXT: srli s3, t5, 8
; RV32I-NEXT: srli s4, t4, 16
; RV32I-NEXT: srli s5, t4, 8
; RV32I-NEXT: srli s6, t3, 16
; RV32I-NEXT: srli s7, t3, 8
; RV32I-NEXT: srli s8, t2, 16
; RV32I-NEXT: srli s9, t2, 8
; RV32I-NEXT: srli s10, t0, 16
; RV32I-NEXT: srli s11, t0, 8
; RV32I-NEXT: sb t6, 24(a2)
; RV32I-NEXT: sb s1, 25(a2)
; RV32I-NEXT: sb s0, 26(a2)
; RV32I-NEXT: sb t1, 27(a2)
; RV32I-NEXT: srli t1, a6, 16
; RV32I-NEXT: sb t5, 28(a2)
; RV32I-NEXT: sb s3, 29(a2)
; RV32I-NEXT: sb s2, 30(a2)
; RV32I-NEXT: sb a7, 31(a2)
; RV32I-NEXT: srli a7, a6, 8
; RV32I-NEXT: sb t4, 16(a2)
; RV32I-NEXT: sb s5, 17(a2)
; RV32I-NEXT: sb s4, 18(a2)
; RV32I-NEXT: sb a5, 19(a2)
; RV32I-NEXT: sb t3, 20(a2)
; RV32I-NEXT: sb s7, 21(a2)
; RV32I-NEXT: sb s6, 22(a2)
; RV32I-NEXT: sb a3, 23(a2)
; RV32I-NEXT: sb t2, 8(a2)
; RV32I-NEXT: sb s9, 9(a2)
; RV32I-NEXT: sb s8, 10(a2)
; RV32I-NEXT: sb a1, 11(a2)
; RV32I-NEXT: sb t0, 12(a2)
; RV32I-NEXT: sb s11, 13(a2)
; RV32I-NEXT: sb s10, 14(a2)
; RV32I-NEXT: sb a0, 15(a2)
; RV32I-NEXT: sb a6, 4(a2)
; RV32I-NEXT: sb a7, 5(a2)
; RV32I-NEXT: sb t1, 6(a2)
; RV32I-NEXT: sb a4, 7(a2)
; RV32I-NEXT: lw ra, 124(sp) # 4-byte Folded Reload
; RV32I-NEXT: lw s0, 120(sp) # 4-byte Folded Reload
; RV32I-NEXT: lw s1, 116(sp) # 4-byte Folded Reload
; RV32I-NEXT: lw s2, 112(sp) # 4-byte Folded Reload
; RV32I-NEXT: lw s3, 108(sp) # 4-byte Folded Reload
; RV32I-NEXT: lw s4, 104(sp) # 4-byte Folded Reload
; RV32I-NEXT: lw s5, 100(sp) # 4-byte Folded Reload
; RV32I-NEXT: lw s6, 96(sp) # 4-byte Folded Reload
; RV32I-NEXT: lw s7, 92(sp) # 4-byte Folded Reload
; RV32I-NEXT: lw s8, 88(sp) # 4-byte Folded Reload
; RV32I-NEXT: lw s9, 84(sp) # 4-byte Folded Reload
; RV32I-NEXT: lw s10, 80(sp) # 4-byte Folded Reload
; RV32I-NEXT: lw s11, 76(sp) # 4-byte Folded Reload
; RV32I-NEXT: addi sp, sp, 128
; RV32I-NEXT: ret
%src = load i256, ptr %src.ptr, align 1
%byteOff = load i256, ptr %byteOff.ptr, align 1
%bitOff = shl i256 %byteOff, 3
%res = shl i256 %src, %bitOff
store i256 %res, ptr %dst, align 1
ret void
}
define void @shl_32bytes_wordOff(ptr %src.ptr, ptr %wordOff.ptr, ptr %dst) nounwind {
; RV64I-LABEL: shl_32bytes_wordOff:
; RV64I: # %bb.0:
; RV64I-NEXT: addi sp, sp, -160
; RV64I-NEXT: sd s0, 152(sp) # 8-byte Folded Spill
; RV64I-NEXT: sd s1, 144(sp) # 8-byte Folded Spill
; RV64I-NEXT: sd s2, 136(sp) # 8-byte Folded Spill
; RV64I-NEXT: sd s3, 128(sp) # 8-byte Folded Spill
; RV64I-NEXT: sd s4, 120(sp) # 8-byte Folded Spill
; RV64I-NEXT: sd s5, 112(sp) # 8-byte Folded Spill
; RV64I-NEXT: sd s6, 104(sp) # 8-byte Folded Spill
; RV64I-NEXT: sd s7, 96(sp) # 8-byte Folded Spill
; RV64I-NEXT: sd s8, 88(sp) # 8-byte Folded Spill
; RV64I-NEXT: sd s9, 80(sp) # 8-byte Folded Spill
; RV64I-NEXT: sd s10, 72(sp) # 8-byte Folded Spill
; RV64I-NEXT: sd s11, 64(sp) # 8-byte Folded Spill
; RV64I-NEXT: lbu a3, 0(a0)
; RV64I-NEXT: lbu a4, 1(a0)
; RV64I-NEXT: lbu a5, 2(a0)
; RV64I-NEXT: lbu a6, 3(a0)
; RV64I-NEXT: lbu a7, 4(a0)
; RV64I-NEXT: lbu t0, 5(a0)
; RV64I-NEXT: lbu t1, 6(a0)
; RV64I-NEXT: lbu t2, 7(a0)
; RV64I-NEXT: lbu t3, 8(a0)
; RV64I-NEXT: lbu t4, 9(a0)
; RV64I-NEXT: lbu t5, 10(a0)
; RV64I-NEXT: lbu t6, 11(a0)
; RV64I-NEXT: lbu s0, 12(a0)
; RV64I-NEXT: lbu s1, 13(a0)
; RV64I-NEXT: lbu s2, 14(a0)
; RV64I-NEXT: lbu s3, 15(a0)
; RV64I-NEXT: lbu s4, 16(a0)
; RV64I-NEXT: lbu s5, 17(a0)
; RV64I-NEXT: lbu s6, 18(a0)
; RV64I-NEXT: lbu s7, 19(a0)
; RV64I-NEXT: slli a4, a4, 8
; RV64I-NEXT: slli a5, a5, 16
; RV64I-NEXT: slli a6, a6, 24
; RV64I-NEXT: slli t0, t0, 8
; RV64I-NEXT: slli t1, t1, 16
; RV64I-NEXT: slli t2, t2, 24
; RV64I-NEXT: or a3, a4, a3
; RV64I-NEXT: or a4, a6, a5
; RV64I-NEXT: or a5, t0, a7
; RV64I-NEXT: or a6, t2, t1
; RV64I-NEXT: lbu s8, 20(a0)
; RV64I-NEXT: lbu s9, 21(a0)
; RV64I-NEXT: lbu s10, 22(a0)
; RV64I-NEXT: lbu s11, 23(a0)
; RV64I-NEXT: slli t4, t4, 8
; RV64I-NEXT: slli t5, t5, 16
; RV64I-NEXT: slli t6, t6, 24
; RV64I-NEXT: slli s1, s1, 8
; RV64I-NEXT: slli s2, s2, 16
; RV64I-NEXT: slli s3, s3, 24
; RV64I-NEXT: or a7, t4, t3
; RV64I-NEXT: or t0, t6, t5
; RV64I-NEXT: or t1, s1, s0
; RV64I-NEXT: or t2, s3, s2
; RV64I-NEXT: lbu t6, 24(a0)
; RV64I-NEXT: lbu s0, 25(a0)
; RV64I-NEXT: lbu s1, 26(a0)
; RV64I-NEXT: lbu s2, 27(a0)
; RV64I-NEXT: slli s5, s5, 8
; RV64I-NEXT: slli s6, s6, 16
; RV64I-NEXT: slli s7, s7, 24
; RV64I-NEXT: slli s9, s9, 8
; RV64I-NEXT: or t3, s5, s4
; RV64I-NEXT: or t4, s7, s6
; RV64I-NEXT: or t5, s9, s8
; RV64I-NEXT: lbu s3, 28(a0)
; RV64I-NEXT: lbu s4, 29(a0)
; RV64I-NEXT: lbu s5, 30(a0)
; RV64I-NEXT: lbu s6, 31(a0)
; RV64I-NEXT: slli s10, s10, 16
; RV64I-NEXT: slli s11, s11, 24
; RV64I-NEXT: slli s0, s0, 8
; RV64I-NEXT: slli s1, s1, 16
; RV64I-NEXT: slli s2, s2, 24
; RV64I-NEXT: slli s4, s4, 8
; RV64I-NEXT: or a0, s11, s10
; RV64I-NEXT: or t6, s0, t6
; RV64I-NEXT: or s0, s2, s1
; RV64I-NEXT: or s1, s4, s3
; RV64I-NEXT: lbu s2, 0(a1)
; RV64I-NEXT: lbu s3, 1(a1)
; RV64I-NEXT: lbu s4, 2(a1)
; RV64I-NEXT: lbu s7, 3(a1)
; RV64I-NEXT: slli s5, s5, 16
; RV64I-NEXT: slli s6, s6, 24
; RV64I-NEXT: slli s3, s3, 8
; RV64I-NEXT: slli s4, s4, 16
; RV64I-NEXT: slli s7, s7, 24
; RV64I-NEXT: or s5, s6, s5
; RV64I-NEXT: or s2, s3, s2
; RV64I-NEXT: or s3, s7, s4
; RV64I-NEXT: lbu s4, 5(a1)
; RV64I-NEXT: lbu s6, 4(a1)
; RV64I-NEXT: lbu s7, 6(a1)
; RV64I-NEXT: lbu a1, 7(a1)
; RV64I-NEXT: slli s4, s4, 8
; RV64I-NEXT: or s4, s4, s6
; RV64I-NEXT: sd zero, 0(sp)
; RV64I-NEXT: sd zero, 8(sp)
; RV64I-NEXT: sd zero, 16(sp)
; RV64I-NEXT: sd zero, 24(sp)
; RV64I-NEXT: slli s7, s7, 16
; RV64I-NEXT: slli a1, a1, 24
; RV64I-NEXT: or a1, a1, s7
; RV64I-NEXT: addi s6, sp, 32
; RV64I-NEXT: or a3, a4, a3
; RV64I-NEXT: or a4, a6, a5
; RV64I-NEXT: or a5, t0, a7
; RV64I-NEXT: or a6, t2, t1
; RV64I-NEXT: or a7, t4, t3
; RV64I-NEXT: or a0, a0, t5
; RV64I-NEXT: or t0, s0, t6
; RV64I-NEXT: or t1, s5, s1
; RV64I-NEXT: or t2, s3, s2
; RV64I-NEXT: or a1, a1, s4
; RV64I-NEXT: slli a4, a4, 32
; RV64I-NEXT: slli a6, a6, 32
; RV64I-NEXT: slli a0, a0, 32
; RV64I-NEXT: slli t1, t1, 32
; RV64I-NEXT: slli a1, a1, 32
; RV64I-NEXT: or a3, a4, a3
; RV64I-NEXT: or a4, a6, a5
; RV64I-NEXT: or a0, a0, a7
; RV64I-NEXT: or a5, t1, t0
; RV64I-NEXT: or a1, a1, t2
; RV64I-NEXT: sd a3, 32(sp)
; RV64I-NEXT: sd a4, 40(sp)
; RV64I-NEXT: sd a0, 48(sp)
; RV64I-NEXT: sd a5, 56(sp)
; RV64I-NEXT: slli a3, a1, 5
; RV64I-NEXT: slli a1, a1, 2
; RV64I-NEXT: andi a1, a1, 24
; RV64I-NEXT: andi a0, a3, 32
; RV64I-NEXT: sub a1, s6, a1
; RV64I-NEXT: ld a4, 0(a1)
; RV64I-NEXT: ld a5, 8(a1)
; RV64I-NEXT: ld a6, 16(a1)
; RV64I-NEXT: ld a1, 24(a1)
; RV64I-NEXT: xori a7, a0, 63
; RV64I-NEXT: sll a0, a5, a3
; RV64I-NEXT: srli t0, a4, 1
; RV64I-NEXT: sll a1, a1, a3
; RV64I-NEXT: srli t1, a6, 1
; RV64I-NEXT: sll a6, a6, a3
; RV64I-NEXT: srli a5, a5, 1
; RV64I-NEXT: sll a3, a4, a3
; RV64I-NEXT: srl a4, t0, a7
; RV64I-NEXT: srl t0, t1, a7
; RV64I-NEXT: srl a5, a5, a7
; RV64I-NEXT: srli a7, a6, 56
; RV64I-NEXT: srli t1, a6, 48
; RV64I-NEXT: srli t2, a6, 40
; RV64I-NEXT: srli t3, a6, 32
; RV64I-NEXT: srli t4, a1, 56
; RV64I-NEXT: srli t5, a1, 48
; RV64I-NEXT: srli t6, a1, 40
; RV64I-NEXT: srli s0, a1, 32
; RV64I-NEXT: srli s1, a3, 56
; RV64I-NEXT: srli s2, a3, 48
; RV64I-NEXT: srli s3, a3, 40
; RV64I-NEXT: srli s4, a3, 32
; RV64I-NEXT: srli s5, a3, 24
; RV64I-NEXT: srli s6, a3, 16
; RV64I-NEXT: or a1, a1, t0
; RV64I-NEXT: srli t0, a3, 8
; RV64I-NEXT: or a5, a6, a5
; RV64I-NEXT: srli a6, a0, 56
; RV64I-NEXT: sb t3, 20(a2)
; RV64I-NEXT: sb t2, 21(a2)
; RV64I-NEXT: sb t1, 22(a2)
; RV64I-NEXT: sb a7, 23(a2)
; RV64I-NEXT: srli a7, a0, 48
; RV64I-NEXT: sb s0, 28(a2)
; RV64I-NEXT: sb t6, 29(a2)
; RV64I-NEXT: sb t5, 30(a2)
; RV64I-NEXT: sb t4, 31(a2)
; RV64I-NEXT: srli t1, a0, 40
; RV64I-NEXT: or a4, a0, a4
; RV64I-NEXT: srli a0, a0, 32
; RV64I-NEXT: sb s4, 4(a2)
; RV64I-NEXT: sb s3, 5(a2)
; RV64I-NEXT: sb s2, 6(a2)
; RV64I-NEXT: sb s1, 7(a2)
; RV64I-NEXT: sb a3, 0(a2)
; RV64I-NEXT: sb t0, 1(a2)
; RV64I-NEXT: sb s6, 2(a2)
; RV64I-NEXT: sb s5, 3(a2)
; RV64I-NEXT: sb a0, 12(a2)
; RV64I-NEXT: sb t1, 13(a2)
; RV64I-NEXT: sb a7, 14(a2)
; RV64I-NEXT: sb a6, 15(a2)
; RV64I-NEXT: srli a0, a5, 24
; RV64I-NEXT: srli a3, a5, 16
; RV64I-NEXT: srli a6, a5, 8
; RV64I-NEXT: srli a7, a1, 24
; RV64I-NEXT: srli t0, a1, 16
; RV64I-NEXT: srli t1, a1, 8
; RV64I-NEXT: srli t2, a4, 24
; RV64I-NEXT: srli t3, a4, 16
; RV64I-NEXT: srli t4, a4, 8
; RV64I-NEXT: sb a5, 16(a2)
; RV64I-NEXT: sb a6, 17(a2)
; RV64I-NEXT: sb a3, 18(a2)
; RV64I-NEXT: sb a0, 19(a2)
; RV64I-NEXT: sb a1, 24(a2)
; RV64I-NEXT: sb t1, 25(a2)
; RV64I-NEXT: sb t0, 26(a2)
; RV64I-NEXT: sb a7, 27(a2)
; RV64I-NEXT: sb a4, 8(a2)
; RV64I-NEXT: sb t4, 9(a2)
; RV64I-NEXT: sb t3, 10(a2)
; RV64I-NEXT: sb t2, 11(a2)
; RV64I-NEXT: ld s0, 152(sp) # 8-byte Folded Reload
; RV64I-NEXT: ld s1, 144(sp) # 8-byte Folded Reload
; RV64I-NEXT: ld s2, 136(sp) # 8-byte Folded Reload
; RV64I-NEXT: ld s3, 128(sp) # 8-byte Folded Reload
; RV64I-NEXT: ld s4, 120(sp) # 8-byte Folded Reload
; RV64I-NEXT: ld s5, 112(sp) # 8-byte Folded Reload
; RV64I-NEXT: ld s6, 104(sp) # 8-byte Folded Reload
; RV64I-NEXT: ld s7, 96(sp) # 8-byte Folded Reload
; RV64I-NEXT: ld s8, 88(sp) # 8-byte Folded Reload
; RV64I-NEXT: ld s9, 80(sp) # 8-byte Folded Reload
; RV64I-NEXT: ld s10, 72(sp) # 8-byte Folded Reload
; RV64I-NEXT: ld s11, 64(sp) # 8-byte Folded Reload
; RV64I-NEXT: addi sp, sp, 160
; RV64I-NEXT: ret
;
; RV32I-LABEL: shl_32bytes_wordOff:
; RV32I: # %bb.0:
; RV32I-NEXT: addi sp, sp, -128
; RV32I-NEXT: sw ra, 124(sp) # 4-byte Folded Spill
; RV32I-NEXT: sw s0, 120(sp) # 4-byte Folded Spill
; RV32I-NEXT: sw s1, 116(sp) # 4-byte Folded Spill
; RV32I-NEXT: sw s2, 112(sp) # 4-byte Folded Spill
; RV32I-NEXT: sw s3, 108(sp) # 4-byte Folded Spill
; RV32I-NEXT: sw s4, 104(sp) # 4-byte Folded Spill
; RV32I-NEXT: sw s5, 100(sp) # 4-byte Folded Spill
; RV32I-NEXT: sw s6, 96(sp) # 4-byte Folded Spill
; RV32I-NEXT: sw s7, 92(sp) # 4-byte Folded Spill
; RV32I-NEXT: sw s8, 88(sp) # 4-byte Folded Spill
; RV32I-NEXT: sw s9, 84(sp) # 4-byte Folded Spill
; RV32I-NEXT: sw s10, 80(sp) # 4-byte Folded Spill
; RV32I-NEXT: sw s11, 76(sp) # 4-byte Folded Spill
; RV32I-NEXT: lbu a7, 0(a0)
; RV32I-NEXT: lbu t0, 1(a0)
; RV32I-NEXT: lbu t1, 2(a0)
; RV32I-NEXT: lbu s1, 3(a0)
; RV32I-NEXT: lbu s7, 4(a0)
; RV32I-NEXT: lbu s8, 5(a0)
; RV32I-NEXT: lbu s4, 6(a0)
; RV32I-NEXT: lbu s6, 7(a0)
; RV32I-NEXT: lbu s5, 8(a0)
; RV32I-NEXT: lbu s10, 9(a0)
; RV32I-NEXT: lbu s11, 10(a0)
; RV32I-NEXT: lbu ra, 11(a0)
; RV32I-NEXT: lbu t4, 12(a0)
; RV32I-NEXT: lbu t6, 13(a0)
; RV32I-NEXT: lbu a5, 14(a0)
; RV32I-NEXT: lbu a6, 15(a0)
; RV32I-NEXT: lbu a3, 16(a0)
; RV32I-NEXT: lbu t2, 17(a0)
; RV32I-NEXT: lbu t3, 18(a0)
; RV32I-NEXT: lbu t5, 19(a0)
; RV32I-NEXT: lbu a4, 20(a0)
; RV32I-NEXT: lbu s0, 21(a0)
; RV32I-NEXT: lbu s2, 22(a0)
; RV32I-NEXT: lbu s3, 23(a0)
; RV32I-NEXT: slli t0, t0, 8
; RV32I-NEXT: slli t1, t1, 16
; RV32I-NEXT: slli s1, s1, 24
; RV32I-NEXT: slli s8, s8, 8
; RV32I-NEXT: or a7, t0, a7
; RV32I-NEXT: or t0, s1, t1
; RV32I-NEXT: or t1, s8, s7
; RV32I-NEXT: lbu s1, 24(a0)
; RV32I-NEXT: lbu s7, 25(a0)
; RV32I-NEXT: lbu s8, 26(a0)
; RV32I-NEXT: lbu s9, 27(a0)
; RV32I-NEXT: slli s4, s4, 16
; RV32I-NEXT: slli s6, s6, 24
; RV32I-NEXT: slli s10, s10, 8
; RV32I-NEXT: slli s11, s11, 16
; RV32I-NEXT: slli ra, ra, 24
; RV32I-NEXT: or s4, s6, s4
; RV32I-NEXT: or s5, s10, s5
; RV32I-NEXT: or s6, ra, s11
; RV32I-NEXT: lbu s10, 28(a0)
; RV32I-NEXT: lbu s11, 29(a0)
; RV32I-NEXT: lbu ra, 30(a0)
; RV32I-NEXT: lbu a0, 31(a0)
; RV32I-NEXT: lbu a1, 0(a1)
; RV32I-NEXT: sw zero, 24(sp)
; RV32I-NEXT: sw zero, 28(sp)
; RV32I-NEXT: sw zero, 32(sp)
; RV32I-NEXT: sw zero, 36(sp)
; RV32I-NEXT: sw zero, 8(sp)
; RV32I-NEXT: sw zero, 12(sp)
; RV32I-NEXT: sw zero, 16(sp)
; RV32I-NEXT: sw zero, 20(sp)
; RV32I-NEXT: slli t6, t6, 8
; RV32I-NEXT: or t4, t6, t4
; RV32I-NEXT: addi t6, sp, 40
; RV32I-NEXT: slli a5, a5, 16
; RV32I-NEXT: slli a6, a6, 24
; RV32I-NEXT: slli t2, t2, 8
; RV32I-NEXT: slli t3, t3, 16
; RV32I-NEXT: slli t5, t5, 24
; RV32I-NEXT: slli s0, s0, 8
; RV32I-NEXT: slli s2, s2, 16
; RV32I-NEXT: slli s3, s3, 24
; RV32I-NEXT: slli s7, s7, 8
; RV32I-NEXT: slli s8, s8, 16
; RV32I-NEXT: slli s9, s9, 24
; RV32I-NEXT: slli s11, s11, 8
; RV32I-NEXT: slli ra, ra, 16
; RV32I-NEXT: slli a0, a0, 24
; RV32I-NEXT: slli a1, a1, 2
; RV32I-NEXT: or a5, a6, a5
; RV32I-NEXT: or a3, t2, a3
; RV32I-NEXT: or a6, t5, t3
; RV32I-NEXT: or a4, s0, a4
; RV32I-NEXT: or t2, s3, s2
; RV32I-NEXT: or t3, s7, s1
; RV32I-NEXT: or t5, s9, s8
; RV32I-NEXT: or s0, s11, s10
; RV32I-NEXT: or a0, a0, ra
; RV32I-NEXT: andi a1, a1, 28
; RV32I-NEXT: or a7, t0, a7
; RV32I-NEXT: or t0, s4, t1
; RV32I-NEXT: or t1, s6, s5
; RV32I-NEXT: or a5, a5, t4
; RV32I-NEXT: or a3, a6, a3
; RV32I-NEXT: or a4, t2, a4
; RV32I-NEXT: or a6, t5, t3
; RV32I-NEXT: or a0, a0, s0
; RV32I-NEXT: sub t2, t6, a1
; RV32I-NEXT: sw a3, 56(sp)
; RV32I-NEXT: sw a4, 60(sp)
; RV32I-NEXT: sw a6, 64(sp)
; RV32I-NEXT: sw a0, 68(sp)
; RV32I-NEXT: sw a7, 40(sp)
; RV32I-NEXT: sw t0, 44(sp)
; RV32I-NEXT: sw t1, 48(sp)
; RV32I-NEXT: sw a5, 52(sp)
; RV32I-NEXT: lw a1, 0(t2)
; RV32I-NEXT: lw a0, 4(t2)
; RV32I-NEXT: lw a4, 8(t2)
; RV32I-NEXT: lw a3, 12(t2)
; RV32I-NEXT: lw a7, 24(t2)
; RV32I-NEXT: lw a5, 20(t2)
; RV32I-NEXT: lw a6, 16(t2)
; RV32I-NEXT: lw t0, 28(t2)
; RV32I-NEXT: srli t1, a7, 24
; RV32I-NEXT: srli t2, a7, 16
; RV32I-NEXT: srli t3, a7, 8
; RV32I-NEXT: srli t4, t0, 24
; RV32I-NEXT: srli t5, t0, 16
; RV32I-NEXT: srli t6, t0, 8
; RV32I-NEXT: srli s0, a6, 24
; RV32I-NEXT: srli s1, a6, 16
; RV32I-NEXT: srli s2, a6, 8
; RV32I-NEXT: srli s3, a5, 24
; RV32I-NEXT: srli s4, a5, 16
; RV32I-NEXT: srli s5, a5, 8
; RV32I-NEXT: srli s6, a4, 24
; RV32I-NEXT: srli s7, a4, 16
; RV32I-NEXT: srli s8, a4, 8
; RV32I-NEXT: srli s9, a3, 24
; RV32I-NEXT: srli s10, a3, 16
; RV32I-NEXT: srli s11, a3, 8
; RV32I-NEXT: srli ra, a1, 24
; RV32I-NEXT: sb a7, 24(a2)
; RV32I-NEXT: sb t3, 25(a2)
; RV32I-NEXT: sb t2, 26(a2)
; RV32I-NEXT: sb t1, 27(a2)
; RV32I-NEXT: srli a7, a1, 16
; RV32I-NEXT: sb t0, 28(a2)
; RV32I-NEXT: sb t6, 29(a2)
; RV32I-NEXT: sb t5, 30(a2)
; RV32I-NEXT: sb t4, 31(a2)
; RV32I-NEXT: srli t0, a1, 8
; RV32I-NEXT: sb a6, 16(a2)
; RV32I-NEXT: sb s2, 17(a2)
; RV32I-NEXT: sb s1, 18(a2)
; RV32I-NEXT: sb s0, 19(a2)
; RV32I-NEXT: srli a6, a0, 24
; RV32I-NEXT: sb a5, 20(a2)
; RV32I-NEXT: sb s5, 21(a2)
; RV32I-NEXT: sb s4, 22(a2)
; RV32I-NEXT: sb s3, 23(a2)
; RV32I-NEXT: srli a5, a0, 16
; RV32I-NEXT: sb a4, 8(a2)
; RV32I-NEXT: sb s8, 9(a2)
; RV32I-NEXT: sb s7, 10(a2)
; RV32I-NEXT: sb s6, 11(a2)
; RV32I-NEXT: srli a4, a0, 8
; RV32I-NEXT: sb a3, 12(a2)
; RV32I-NEXT: sb s11, 13(a2)
; RV32I-NEXT: sb s10, 14(a2)
; RV32I-NEXT: sb s9, 15(a2)
; RV32I-NEXT: sb a1, 0(a2)
; RV32I-NEXT: sb t0, 1(a2)
; RV32I-NEXT: sb a7, 2(a2)
; RV32I-NEXT: sb ra, 3(a2)
; RV32I-NEXT: sb a0, 4(a2)
; RV32I-NEXT: sb a4, 5(a2)
; RV32I-NEXT: sb a5, 6(a2)
; RV32I-NEXT: sb a6, 7(a2)
; RV32I-NEXT: lw ra, 124(sp) # 4-byte Folded Reload
; RV32I-NEXT: lw s0, 120(sp) # 4-byte Folded Reload
; RV32I-NEXT: lw s1, 116(sp) # 4-byte Folded Reload
; RV32I-NEXT: lw s2, 112(sp) # 4-byte Folded Reload
; RV32I-NEXT: lw s3, 108(sp) # 4-byte Folded Reload
; RV32I-NEXT: lw s4, 104(sp) # 4-byte Folded Reload
; RV32I-NEXT: lw s5, 100(sp) # 4-byte Folded Reload
; RV32I-NEXT: lw s6, 96(sp) # 4-byte Folded Reload
; RV32I-NEXT: lw s7, 92(sp) # 4-byte Folded Reload
; RV32I-NEXT: lw s8, 88(sp) # 4-byte Folded Reload
; RV32I-NEXT: lw s9, 84(sp) # 4-byte Folded Reload
; RV32I-NEXT: lw s10, 80(sp) # 4-byte Folded Reload
; RV32I-NEXT: lw s11, 76(sp) # 4-byte Folded Reload
; RV32I-NEXT: addi sp, sp, 128
; RV32I-NEXT: ret
%src = load i256, ptr %src.ptr, align 1
%wordOff = load i256, ptr %wordOff.ptr, align 1
%bitOff = shl i256 %wordOff, 5
%res = shl i256 %src, %bitOff
store i256 %res, ptr %dst, align 1
ret void
}
define void @shl_32bytes_dwordOff(ptr %src.ptr, ptr %dwordOff.ptr, ptr %dst) nounwind {
; RV64I-LABEL: shl_32bytes_dwordOff:
; RV64I: # %bb.0:
; RV64I-NEXT: addi sp, sp, -160
; RV64I-NEXT: sd s0, 152(sp) # 8-byte Folded Spill
; RV64I-NEXT: sd s1, 144(sp) # 8-byte Folded Spill
; RV64I-NEXT: sd s2, 136(sp) # 8-byte Folded Spill
; RV64I-NEXT: sd s3, 128(sp) # 8-byte Folded Spill
; RV64I-NEXT: sd s4, 120(sp) # 8-byte Folded Spill
; RV64I-NEXT: sd s5, 112(sp) # 8-byte Folded Spill
; RV64I-NEXT: sd s6, 104(sp) # 8-byte Folded Spill
; RV64I-NEXT: sd s7, 96(sp) # 8-byte Folded Spill
; RV64I-NEXT: sd s8, 88(sp) # 8-byte Folded Spill
; RV64I-NEXT: sd s9, 80(sp) # 8-byte Folded Spill
; RV64I-NEXT: sd s10, 72(sp) # 8-byte Folded Spill
; RV64I-NEXT: sd s11, 64(sp) # 8-byte Folded Spill
; RV64I-NEXT: lbu a5, 0(a0)
; RV64I-NEXT: lbu a7, 1(a0)
; RV64I-NEXT: lbu t2, 2(a0)
; RV64I-NEXT: lbu s3, 3(a0)
; RV64I-NEXT: lbu t0, 4(a0)
; RV64I-NEXT: lbu s8, 5(a0)
; RV64I-NEXT: lbu s9, 6(a0)
; RV64I-NEXT: lbu s10, 7(a0)
; RV64I-NEXT: lbu s2, 8(a0)
; RV64I-NEXT: lbu s4, 9(a0)
; RV64I-NEXT: lbu s5, 10(a0)
; RV64I-NEXT: lbu s6, 11(a0)
; RV64I-NEXT: lbu s7, 12(a0)
; RV64I-NEXT: lbu s11, 13(a0)
; RV64I-NEXT: lbu t1, 14(a0)
; RV64I-NEXT: lbu t3, 15(a0)
; RV64I-NEXT: lbu a3, 16(a0)
; RV64I-NEXT: lbu a6, 17(a0)
; RV64I-NEXT: lbu t4, 18(a0)
; RV64I-NEXT: lbu t5, 19(a0)
; RV64I-NEXT: lbu a4, 20(a0)
; RV64I-NEXT: lbu t6, 21(a0)
; RV64I-NEXT: lbu s0, 22(a0)
; RV64I-NEXT: lbu s1, 23(a0)
; RV64I-NEXT: slli a7, a7, 8
; RV64I-NEXT: slli t2, t2, 16
; RV64I-NEXT: slli s3, s3, 24
; RV64I-NEXT: slli s8, s8, 8
; RV64I-NEXT: slli s9, s9, 16
; RV64I-NEXT: slli s10, s10, 24
; RV64I-NEXT: or a5, a7, a5
; RV64I-NEXT: or a7, s3, t2
; RV64I-NEXT: or t0, s8, t0
; RV64I-NEXT: or t2, s10, s9
; RV64I-NEXT: lbu s3, 24(a0)
; RV64I-NEXT: lbu s8, 25(a0)
; RV64I-NEXT: lbu s9, 26(a0)
; RV64I-NEXT: lbu s10, 27(a0)
; RV64I-NEXT: slli s4, s4, 8
; RV64I-NEXT: slli s5, s5, 16
; RV64I-NEXT: slli s6, s6, 24
; RV64I-NEXT: slli s11, s11, 8
; RV64I-NEXT: or s2, s4, s2
; RV64I-NEXT: or s4, s6, s5
; RV64I-NEXT: or s5, s11, s7
; RV64I-NEXT: lbu s6, 28(a0)
; RV64I-NEXT: lbu s7, 29(a0)
; RV64I-NEXT: lbu s11, 30(a0)
; RV64I-NEXT: lbu a0, 31(a0)
; RV64I-NEXT: lbu a1, 0(a1)
; RV64I-NEXT: sd zero, 0(sp)
; RV64I-NEXT: sd zero, 8(sp)
; RV64I-NEXT: sd zero, 16(sp)
; RV64I-NEXT: sd zero, 24(sp)
; RV64I-NEXT: slli t1, t1, 16
; RV64I-NEXT: slli t3, t3, 24
; RV64I-NEXT: or t1, t3, t1
; RV64I-NEXT: addi t3, sp, 32
; RV64I-NEXT: slli a6, a6, 8
; RV64I-NEXT: slli t4, t4, 16
; RV64I-NEXT: slli t5, t5, 24
; RV64I-NEXT: slli t6, t6, 8
; RV64I-NEXT: slli s0, s0, 16
; RV64I-NEXT: slli s1, s1, 24
; RV64I-NEXT: slli s8, s8, 8
; RV64I-NEXT: slli s9, s9, 16
; RV64I-NEXT: slli s10, s10, 24
; RV64I-NEXT: slli s7, s7, 8
; RV64I-NEXT: slli s11, s11, 16
; RV64I-NEXT: slli a0, a0, 24
; RV64I-NEXT: slli a1, a1, 3
; RV64I-NEXT: or a3, a6, a3
; RV64I-NEXT: or a6, t5, t4
; RV64I-NEXT: or a4, t6, a4
; RV64I-NEXT: or s0, s1, s0
; RV64I-NEXT: or t4, s8, s3
; RV64I-NEXT: or t5, s10, s9
; RV64I-NEXT: or t6, s7, s6
; RV64I-NEXT: or a0, a0, s11
; RV64I-NEXT: andi a1, a1, 24
; RV64I-NEXT: or a5, a7, a5
; RV64I-NEXT: or a7, t2, t0
; RV64I-NEXT: or t0, s4, s2
; RV64I-NEXT: or t1, t1, s5
; RV64I-NEXT: or a3, a6, a3
; RV64I-NEXT: or a4, s0, a4
; RV64I-NEXT: or a6, t5, t4
; RV64I-NEXT: or a0, a0, t6
; RV64I-NEXT: sub t2, t3, a1
; RV64I-NEXT: slli a7, a7, 32
; RV64I-NEXT: slli t1, t1, 32
; RV64I-NEXT: slli a4, a4, 32
; RV64I-NEXT: slli a0, a0, 32
; RV64I-NEXT: or a1, a7, a5
; RV64I-NEXT: or a5, t1, t0
; RV64I-NEXT: or a3, a4, a3
; RV64I-NEXT: or a0, a0, a6
; RV64I-NEXT: sd a1, 32(sp)
; RV64I-NEXT: sd a5, 40(sp)
; RV64I-NEXT: sd a3, 48(sp)
; RV64I-NEXT: sd a0, 56(sp)
; RV64I-NEXT: ld a4, 16(t2)
; RV64I-NEXT: ld a0, 8(t2)
; RV64I-NEXT: ld a1, 0(t2)
; RV64I-NEXT: ld a3, 24(t2)
; RV64I-NEXT: srli a5, a4, 56
; RV64I-NEXT: srli a6, a4, 48
; RV64I-NEXT: srli a7, a4, 40
; RV64I-NEXT: srli t0, a4, 32
; RV64I-NEXT: srli t1, a4, 24
; RV64I-NEXT: srli t2, a4, 16
; RV64I-NEXT: srli t3, a4, 8
; RV64I-NEXT: srli t4, a3, 56
; RV64I-NEXT: srli t5, a3, 48
; RV64I-NEXT: srli t6, a3, 40
; RV64I-NEXT: srli s0, a3, 32
; RV64I-NEXT: srli s1, a3, 24
; RV64I-NEXT: srli s2, a3, 16
; RV64I-NEXT: srli s3, a3, 8
; RV64I-NEXT: srli s4, a1, 56
; RV64I-NEXT: srli s5, a1, 48
; RV64I-NEXT: srli s6, a1, 40
; RV64I-NEXT: srli s7, a1, 32
; RV64I-NEXT: srli s8, a1, 24
; RV64I-NEXT: srli s9, a1, 16
; RV64I-NEXT: srli s10, a1, 8
; RV64I-NEXT: srli s11, a0, 56
; RV64I-NEXT: sb t0, 20(a2)
; RV64I-NEXT: sb a7, 21(a2)
; RV64I-NEXT: sb a6, 22(a2)
; RV64I-NEXT: sb a5, 23(a2)
; RV64I-NEXT: srli a5, a0, 48
; RV64I-NEXT: sb a4, 16(a2)
; RV64I-NEXT: sb t3, 17(a2)
; RV64I-NEXT: sb t2, 18(a2)
; RV64I-NEXT: sb t1, 19(a2)
; RV64I-NEXT: srli a4, a0, 40
; RV64I-NEXT: sb s0, 28(a2)
; RV64I-NEXT: sb t6, 29(a2)
; RV64I-NEXT: sb t5, 30(a2)
; RV64I-NEXT: sb t4, 31(a2)
; RV64I-NEXT: srli a6, a0, 32
; RV64I-NEXT: sb a3, 24(a2)
; RV64I-NEXT: sb s3, 25(a2)
; RV64I-NEXT: sb s2, 26(a2)
; RV64I-NEXT: sb s1, 27(a2)
; RV64I-NEXT: srli a3, a0, 24
; RV64I-NEXT: sb s7, 4(a2)
; RV64I-NEXT: sb s6, 5(a2)
; RV64I-NEXT: sb s5, 6(a2)
; RV64I-NEXT: sb s4, 7(a2)
; RV64I-NEXT: srli a7, a0, 16
; RV64I-NEXT: sb a1, 0(a2)
; RV64I-NEXT: sb s10, 1(a2)
; RV64I-NEXT: sb s9, 2(a2)
; RV64I-NEXT: sb s8, 3(a2)
; RV64I-NEXT: srli a1, a0, 8
; RV64I-NEXT: sb a6, 12(a2)
; RV64I-NEXT: sb a4, 13(a2)
; RV64I-NEXT: sb a5, 14(a2)
; RV64I-NEXT: sb s11, 15(a2)
; RV64I-NEXT: sb a0, 8(a2)
; RV64I-NEXT: sb a1, 9(a2)
; RV64I-NEXT: sb a7, 10(a2)
; RV64I-NEXT: sb a3, 11(a2)
; RV64I-NEXT: ld s0, 152(sp) # 8-byte Folded Reload
; RV64I-NEXT: ld s1, 144(sp) # 8-byte Folded Reload
; RV64I-NEXT: ld s2, 136(sp) # 8-byte Folded Reload
; RV64I-NEXT: ld s3, 128(sp) # 8-byte Folded Reload
; RV64I-NEXT: ld s4, 120(sp) # 8-byte Folded Reload
; RV64I-NEXT: ld s5, 112(sp) # 8-byte Folded Reload
; RV64I-NEXT: ld s6, 104(sp) # 8-byte Folded Reload
; RV64I-NEXT: ld s7, 96(sp) # 8-byte Folded Reload
; RV64I-NEXT: ld s8, 88(sp) # 8-byte Folded Reload
; RV64I-NEXT: ld s9, 80(sp) # 8-byte Folded Reload
; RV64I-NEXT: ld s10, 72(sp) # 8-byte Folded Reload
; RV64I-NEXT: ld s11, 64(sp) # 8-byte Folded Reload
; RV64I-NEXT: addi sp, sp, 160
; RV64I-NEXT: ret
;
; RV32I-LABEL: shl_32bytes_dwordOff:
; RV32I: # %bb.0:
; RV32I-NEXT: addi sp, sp, -128
; RV32I-NEXT: sw ra, 124(sp) # 4-byte Folded Spill
; RV32I-NEXT: sw s0, 120(sp) # 4-byte Folded Spill
; RV32I-NEXT: sw s1, 116(sp) # 4-byte Folded Spill
; RV32I-NEXT: sw s2, 112(sp) # 4-byte Folded Spill
; RV32I-NEXT: sw s3, 108(sp) # 4-byte Folded Spill
; RV32I-NEXT: sw s4, 104(sp) # 4-byte Folded Spill
; RV32I-NEXT: sw s5, 100(sp) # 4-byte Folded Spill
; RV32I-NEXT: sw s6, 96(sp) # 4-byte Folded Spill
; RV32I-NEXT: sw s7, 92(sp) # 4-byte Folded Spill
; RV32I-NEXT: sw s8, 88(sp) # 4-byte Folded Spill
; RV32I-NEXT: sw s9, 84(sp) # 4-byte Folded Spill
; RV32I-NEXT: sw s10, 80(sp) # 4-byte Folded Spill
; RV32I-NEXT: sw s11, 76(sp) # 4-byte Folded Spill
; RV32I-NEXT: lbu a7, 0(a0)
; RV32I-NEXT: lbu t0, 1(a0)
; RV32I-NEXT: lbu t1, 2(a0)
; RV32I-NEXT: lbu s1, 3(a0)
; RV32I-NEXT: lbu s7, 4(a0)
; RV32I-NEXT: lbu s8, 5(a0)
; RV32I-NEXT: lbu s4, 6(a0)
; RV32I-NEXT: lbu s6, 7(a0)
; RV32I-NEXT: lbu s5, 8(a0)
; RV32I-NEXT: lbu s10, 9(a0)
; RV32I-NEXT: lbu s11, 10(a0)
; RV32I-NEXT: lbu ra, 11(a0)
; RV32I-NEXT: lbu t4, 12(a0)
; RV32I-NEXT: lbu t6, 13(a0)
; RV32I-NEXT: lbu a5, 14(a0)
; RV32I-NEXT: lbu a6, 15(a0)
; RV32I-NEXT: lbu a3, 16(a0)
; RV32I-NEXT: lbu t2, 17(a0)
; RV32I-NEXT: lbu t3, 18(a0)
; RV32I-NEXT: lbu t5, 19(a0)
; RV32I-NEXT: lbu a4, 20(a0)
; RV32I-NEXT: lbu s0, 21(a0)
; RV32I-NEXT: lbu s2, 22(a0)
; RV32I-NEXT: lbu s3, 23(a0)
; RV32I-NEXT: slli t0, t0, 8
; RV32I-NEXT: slli t1, t1, 16
; RV32I-NEXT: slli s1, s1, 24
; RV32I-NEXT: slli s8, s8, 8
; RV32I-NEXT: or a7, t0, a7
; RV32I-NEXT: or t0, s1, t1
; RV32I-NEXT: or t1, s8, s7
; RV32I-NEXT: lbu s1, 24(a0)
; RV32I-NEXT: lbu s7, 25(a0)
; RV32I-NEXT: lbu s8, 26(a0)
; RV32I-NEXT: lbu s9, 27(a0)
; RV32I-NEXT: slli s4, s4, 16
; RV32I-NEXT: slli s6, s6, 24
; RV32I-NEXT: slli s10, s10, 8
; RV32I-NEXT: slli s11, s11, 16
; RV32I-NEXT: slli ra, ra, 24
; RV32I-NEXT: or s4, s6, s4
; RV32I-NEXT: or s5, s10, s5
; RV32I-NEXT: or s6, ra, s11
; RV32I-NEXT: lbu s10, 28(a0)
; RV32I-NEXT: lbu s11, 29(a0)
; RV32I-NEXT: lbu ra, 30(a0)
; RV32I-NEXT: lbu a0, 31(a0)
; RV32I-NEXT: lbu a1, 0(a1)
; RV32I-NEXT: sw zero, 24(sp)
; RV32I-NEXT: sw zero, 28(sp)
; RV32I-NEXT: sw zero, 32(sp)
; RV32I-NEXT: sw zero, 36(sp)
; RV32I-NEXT: sw zero, 8(sp)
; RV32I-NEXT: sw zero, 12(sp)
; RV32I-NEXT: sw zero, 16(sp)
; RV32I-NEXT: sw zero, 20(sp)
; RV32I-NEXT: slli t6, t6, 8
; RV32I-NEXT: or t4, t6, t4
; RV32I-NEXT: addi t6, sp, 40
; RV32I-NEXT: slli a5, a5, 16
; RV32I-NEXT: slli a6, a6, 24
; RV32I-NEXT: slli t2, t2, 8
; RV32I-NEXT: slli t3, t3, 16
; RV32I-NEXT: slli t5, t5, 24
; RV32I-NEXT: slli s0, s0, 8
; RV32I-NEXT: slli s2, s2, 16
; RV32I-NEXT: slli s3, s3, 24
; RV32I-NEXT: slli s7, s7, 8
; RV32I-NEXT: slli s8, s8, 16
; RV32I-NEXT: slli s9, s9, 24
; RV32I-NEXT: slli s11, s11, 8
; RV32I-NEXT: slli ra, ra, 16
; RV32I-NEXT: slli a0, a0, 24
; RV32I-NEXT: slli a1, a1, 3
; RV32I-NEXT: or a5, a6, a5
; RV32I-NEXT: or a3, t2, a3
; RV32I-NEXT: or a6, t5, t3
; RV32I-NEXT: or a4, s0, a4
; RV32I-NEXT: or t2, s3, s2
; RV32I-NEXT: or t3, s7, s1
; RV32I-NEXT: or t5, s9, s8
; RV32I-NEXT: or s0, s11, s10
; RV32I-NEXT: or a0, a0, ra
; RV32I-NEXT: andi a1, a1, 24
; RV32I-NEXT: or a7, t0, a7
; RV32I-NEXT: or t0, s4, t1
; RV32I-NEXT: or t1, s6, s5
; RV32I-NEXT: or a5, a5, t4
; RV32I-NEXT: or a3, a6, a3
; RV32I-NEXT: or a4, t2, a4
; RV32I-NEXT: or a6, t5, t3
; RV32I-NEXT: or a0, a0, s0
; RV32I-NEXT: sub t2, t6, a1
; RV32I-NEXT: sw a3, 56(sp)
; RV32I-NEXT: sw a4, 60(sp)
; RV32I-NEXT: sw a6, 64(sp)
; RV32I-NEXT: sw a0, 68(sp)
; RV32I-NEXT: sw a7, 40(sp)
; RV32I-NEXT: sw t0, 44(sp)
; RV32I-NEXT: sw t1, 48(sp)
; RV32I-NEXT: sw a5, 52(sp)
; RV32I-NEXT: lw a1, 0(t2)
; RV32I-NEXT: lw a0, 4(t2)
; RV32I-NEXT: lw a4, 8(t2)
; RV32I-NEXT: lw a3, 12(t2)
; RV32I-NEXT: lw a7, 24(t2)
; RV32I-NEXT: lw a5, 20(t2)
; RV32I-NEXT: lw a6, 16(t2)
; RV32I-NEXT: lw t0, 28(t2)
; RV32I-NEXT: srli t1, a7, 24
; RV32I-NEXT: srli t2, a7, 16
; RV32I-NEXT: srli t3, a7, 8
; RV32I-NEXT: srli t4, t0, 24
; RV32I-NEXT: srli t5, t0, 16
; RV32I-NEXT: srli t6, t0, 8
; RV32I-NEXT: srli s0, a6, 24
; RV32I-NEXT: srli s1, a6, 16
; RV32I-NEXT: srli s2, a6, 8
; RV32I-NEXT: srli s3, a5, 24
; RV32I-NEXT: srli s4, a5, 16
; RV32I-NEXT: srli s5, a5, 8
; RV32I-NEXT: srli s6, a4, 24
; RV32I-NEXT: srli s7, a4, 16
; RV32I-NEXT: srli s8, a4, 8
; RV32I-NEXT: srli s9, a3, 24
; RV32I-NEXT: srli s10, a3, 16
; RV32I-NEXT: srli s11, a3, 8
; RV32I-NEXT: srli ra, a1, 24
; RV32I-NEXT: sb a7, 24(a2)
; RV32I-NEXT: sb t3, 25(a2)
; RV32I-NEXT: sb t2, 26(a2)
; RV32I-NEXT: sb t1, 27(a2)
; RV32I-NEXT: srli a7, a1, 16
; RV32I-NEXT: sb t0, 28(a2)
; RV32I-NEXT: sb t6, 29(a2)
; RV32I-NEXT: sb t5, 30(a2)
; RV32I-NEXT: sb t4, 31(a2)
; RV32I-NEXT: srli t0, a1, 8
; RV32I-NEXT: sb a6, 16(a2)
; RV32I-NEXT: sb s2, 17(a2)
; RV32I-NEXT: sb s1, 18(a2)
; RV32I-NEXT: sb s0, 19(a2)
; RV32I-NEXT: srli a6, a0, 24
; RV32I-NEXT: sb a5, 20(a2)
; RV32I-NEXT: sb s5, 21(a2)
; RV32I-NEXT: sb s4, 22(a2)
; RV32I-NEXT: sb s3, 23(a2)
; RV32I-NEXT: srli a5, a0, 16
; RV32I-NEXT: sb a4, 8(a2)
; RV32I-NEXT: sb s8, 9(a2)
; RV32I-NEXT: sb s7, 10(a2)
; RV32I-NEXT: sb s6, 11(a2)
; RV32I-NEXT: srli a4, a0, 8
; RV32I-NEXT: sb a3, 12(a2)
; RV32I-NEXT: sb s11, 13(a2)
; RV32I-NEXT: sb s10, 14(a2)
; RV32I-NEXT: sb s9, 15(a2)
; RV32I-NEXT: sb a1, 0(a2)
; RV32I-NEXT: sb t0, 1(a2)
; RV32I-NEXT: sb a7, 2(a2)
; RV32I-NEXT: sb ra, 3(a2)
; RV32I-NEXT: sb a0, 4(a2)
; RV32I-NEXT: sb a4, 5(a2)
; RV32I-NEXT: sb a5, 6(a2)
; RV32I-NEXT: sb a6, 7(a2)
; RV32I-NEXT: lw ra, 124(sp) # 4-byte Folded Reload
; RV32I-NEXT: lw s0, 120(sp) # 4-byte Folded Reload
; RV32I-NEXT: lw s1, 116(sp) # 4-byte Folded Reload
; RV32I-NEXT: lw s2, 112(sp) # 4-byte Folded Reload
; RV32I-NEXT: lw s3, 108(sp) # 4-byte Folded Reload
; RV32I-NEXT: lw s4, 104(sp) # 4-byte Folded Reload
; RV32I-NEXT: lw s5, 100(sp) # 4-byte Folded Reload
; RV32I-NEXT: lw s6, 96(sp) # 4-byte Folded Reload
; RV32I-NEXT: lw s7, 92(sp) # 4-byte Folded Reload
; RV32I-NEXT: lw s8, 88(sp) # 4-byte Folded Reload
; RV32I-NEXT: lw s9, 84(sp) # 4-byte Folded Reload
; RV32I-NEXT: lw s10, 80(sp) # 4-byte Folded Reload
; RV32I-NEXT: lw s11, 76(sp) # 4-byte Folded Reload
; RV32I-NEXT: addi sp, sp, 128
; RV32I-NEXT: ret
%src = load i256, ptr %src.ptr, align 1
%dwordOff = load i256, ptr %dwordOff.ptr, align 1
%bitOff = shl i256 %dwordOff, 6
%res = shl i256 %src, %bitOff
store i256 %res, ptr %dst, align 1
ret void
}
define void @ashr_32bytes(ptr %src.ptr, ptr %byteOff.ptr, ptr %dst) nounwind {
; RV64I-LABEL: ashr_32bytes:
; RV64I: # %bb.0:
; RV64I-NEXT: addi sp, sp, -160
; RV64I-NEXT: sd s0, 152(sp) # 8-byte Folded Spill
; RV64I-NEXT: sd s1, 144(sp) # 8-byte Folded Spill
; RV64I-NEXT: sd s2, 136(sp) # 8-byte Folded Spill
; RV64I-NEXT: sd s3, 128(sp) # 8-byte Folded Spill
; RV64I-NEXT: sd s4, 120(sp) # 8-byte Folded Spill
; RV64I-NEXT: sd s5, 112(sp) # 8-byte Folded Spill
; RV64I-NEXT: sd s6, 104(sp) # 8-byte Folded Spill
; RV64I-NEXT: sd s7, 96(sp) # 8-byte Folded Spill
; RV64I-NEXT: sd s8, 88(sp) # 8-byte Folded Spill
; RV64I-NEXT: sd s9, 80(sp) # 8-byte Folded Spill
; RV64I-NEXT: sd s10, 72(sp) # 8-byte Folded Spill
; RV64I-NEXT: sd s11, 64(sp) # 8-byte Folded Spill
; RV64I-NEXT: lbu a3, 0(a0)
; RV64I-NEXT: lbu a4, 1(a0)
; RV64I-NEXT: lbu a5, 2(a0)
; RV64I-NEXT: lbu a6, 3(a0)
; RV64I-NEXT: lbu a7, 4(a0)
; RV64I-NEXT: lbu t0, 5(a0)
; RV64I-NEXT: lbu t1, 6(a0)
; RV64I-NEXT: lbu t2, 7(a0)
; RV64I-NEXT: lbu t3, 8(a0)
; RV64I-NEXT: lbu t4, 9(a0)
; RV64I-NEXT: lbu t5, 10(a0)
; RV64I-NEXT: lbu t6, 11(a0)
; RV64I-NEXT: lbu s0, 12(a0)
; RV64I-NEXT: lbu s1, 13(a0)
; RV64I-NEXT: lbu s2, 14(a0)
; RV64I-NEXT: lbu s3, 15(a0)
; RV64I-NEXT: lbu s4, 16(a0)
; RV64I-NEXT: lbu s5, 17(a0)
; RV64I-NEXT: lbu s6, 18(a0)
; RV64I-NEXT: lbu s7, 19(a0)
; RV64I-NEXT: slli a4, a4, 8
; RV64I-NEXT: slli a5, a5, 16
; RV64I-NEXT: slli a6, a6, 24
; RV64I-NEXT: slli t0, t0, 8
; RV64I-NEXT: slli t1, t1, 16
; RV64I-NEXT: slli t2, t2, 24
; RV64I-NEXT: or a3, a4, a3
; RV64I-NEXT: or a4, a6, a5
; RV64I-NEXT: or a5, t0, a7
; RV64I-NEXT: or a6, t2, t1
; RV64I-NEXT: lbu s8, 20(a0)
; RV64I-NEXT: lbu s9, 21(a0)
; RV64I-NEXT: lbu s10, 22(a0)
; RV64I-NEXT: lbu s11, 23(a0)
; RV64I-NEXT: slli t4, t4, 8
; RV64I-NEXT: slli t5, t5, 16
; RV64I-NEXT: slli t6, t6, 24
; RV64I-NEXT: slli s1, s1, 8
; RV64I-NEXT: slli s2, s2, 16
; RV64I-NEXT: slli s3, s3, 24
; RV64I-NEXT: or a7, t4, t3
; RV64I-NEXT: or t0, t6, t5
; RV64I-NEXT: or t1, s1, s0
; RV64I-NEXT: or t2, s3, s2
; RV64I-NEXT: lbu t6, 24(a0)
; RV64I-NEXT: lbu s0, 25(a0)
; RV64I-NEXT: lbu s1, 26(a0)
; RV64I-NEXT: lbu s2, 27(a0)
; RV64I-NEXT: slli s5, s5, 8
; RV64I-NEXT: slli s6, s6, 16
; RV64I-NEXT: slli s7, s7, 24
; RV64I-NEXT: slli s9, s9, 8
; RV64I-NEXT: or t3, s5, s4
; RV64I-NEXT: or t4, s7, s6
; RV64I-NEXT: or t5, s9, s8
; RV64I-NEXT: lbu s3, 28(a0)
; RV64I-NEXT: lbu s4, 29(a0)
; RV64I-NEXT: lbu s5, 30(a0)
; RV64I-NEXT: lbu s6, 31(a0)
; RV64I-NEXT: slli s10, s10, 16
; RV64I-NEXT: slli s11, s11, 24
; RV64I-NEXT: slli s0, s0, 8
; RV64I-NEXT: slli s1, s1, 16
; RV64I-NEXT: slli s2, s2, 24
; RV64I-NEXT: slli s4, s4, 8
; RV64I-NEXT: or a0, s11, s10
; RV64I-NEXT: or t6, s0, t6
; RV64I-NEXT: or s0, s2, s1
; RV64I-NEXT: or s1, s4, s3
; RV64I-NEXT: lbu s2, 0(a1)
; RV64I-NEXT: lbu s3, 1(a1)
; RV64I-NEXT: lbu s4, 2(a1)
; RV64I-NEXT: lbu s7, 3(a1)
; RV64I-NEXT: slli s5, s5, 16
; RV64I-NEXT: slli s6, s6, 24
; RV64I-NEXT: slli s3, s3, 8
; RV64I-NEXT: slli s4, s4, 16
; RV64I-NEXT: slli s7, s7, 24
; RV64I-NEXT: or s5, s6, s5
; RV64I-NEXT: or s2, s3, s2
; RV64I-NEXT: or s3, s7, s4
; RV64I-NEXT: lbu s4, 5(a1)
; RV64I-NEXT: lbu s6, 4(a1)
; RV64I-NEXT: lbu s7, 6(a1)
; RV64I-NEXT: lbu a1, 7(a1)
; RV64I-NEXT: slli s4, s4, 8
; RV64I-NEXT: or s4, s4, s6
; RV64I-NEXT: slli s7, s7, 16
; RV64I-NEXT: slli a1, a1, 24
; RV64I-NEXT: or a1, a1, s7
; RV64I-NEXT: mv s6, sp
; RV64I-NEXT: or a3, a4, a3
; RV64I-NEXT: or a4, a6, a5
; RV64I-NEXT: or a5, t0, a7
; RV64I-NEXT: or a6, t2, t1
; RV64I-NEXT: or a7, t4, t3
; RV64I-NEXT: or a0, a0, t5
; RV64I-NEXT: or t0, s0, t6
; RV64I-NEXT: or t1, s5, s1
; RV64I-NEXT: or t2, s3, s2
; RV64I-NEXT: or a1, a1, s4
; RV64I-NEXT: slli a4, a4, 32
; RV64I-NEXT: slli a6, a6, 32
; RV64I-NEXT: slli a0, a0, 32
; RV64I-NEXT: slli t3, t1, 32
; RV64I-NEXT: slli a1, a1, 32
; RV64I-NEXT: sraiw t1, t1, 31
; RV64I-NEXT: or a3, a4, a3
; RV64I-NEXT: or a4, a6, a5
; RV64I-NEXT: or a0, a0, a7
; RV64I-NEXT: or a5, t3, t0
; RV64I-NEXT: or a1, a1, t2
; RV64I-NEXT: sd t1, 32(sp)
; RV64I-NEXT: sd t1, 40(sp)
; RV64I-NEXT: sd t1, 48(sp)
; RV64I-NEXT: sd t1, 56(sp)
; RV64I-NEXT: sd a3, 0(sp)
; RV64I-NEXT: sd a4, 8(sp)
; RV64I-NEXT: sd a0, 16(sp)
; RV64I-NEXT: sd a5, 24(sp)
; RV64I-NEXT: slli a4, a1, 3
; RV64I-NEXT: andi a1, a1, 24
; RV64I-NEXT: add a1, s6, a1
; RV64I-NEXT: andi a0, a4, 56
; RV64I-NEXT: xori a5, a0, 63
; RV64I-NEXT: ld a3, 8(a1)
; RV64I-NEXT: ld a6, 16(a1)
; RV64I-NEXT: ld a7, 0(a1)
; RV64I-NEXT: ld t0, 24(a1)
; RV64I-NEXT: srl a0, a3, a4
; RV64I-NEXT: slli t1, a6, 1
; RV64I-NEXT: srl a1, a7, a4
; RV64I-NEXT: slli a7, a3, 1
; RV64I-NEXT: srl a3, a6, a4
; RV64I-NEXT: slli a6, t0, 1
; RV64I-NEXT: sra t0, t0, a4
; RV64I-NEXT: sll a4, t1, a5
; RV64I-NEXT: sll a7, a7, a5
; RV64I-NEXT: sll a5, a6, a5
; RV64I-NEXT: srli a6, t0, 56
; RV64I-NEXT: srli t1, t0, 48
; RV64I-NEXT: srli t2, t0, 40
; RV64I-NEXT: srli t3, t0, 32
; RV64I-NEXT: srli t4, t0, 24
; RV64I-NEXT: srli t5, t0, 16
; RV64I-NEXT: srli t6, t0, 8
; RV64I-NEXT: or a4, a0, a4
; RV64I-NEXT: or a7, a1, a7
; RV64I-NEXT: or a5, a3, a5
; RV64I-NEXT: sb t3, 28(a2)
; RV64I-NEXT: sb t2, 29(a2)
; RV64I-NEXT: sb t1, 30(a2)
; RV64I-NEXT: sb a6, 31(a2)
; RV64I-NEXT: sb t0, 24(a2)
; RV64I-NEXT: sb t6, 25(a2)
; RV64I-NEXT: sb t5, 26(a2)
; RV64I-NEXT: sb t4, 27(a2)
; RV64I-NEXT: srli a6, a5, 56
; RV64I-NEXT: srli t0, a5, 48
; RV64I-NEXT: srli t1, a5, 40
; RV64I-NEXT: srli t2, a5, 32
; RV64I-NEXT: srli t3, a5, 24
; RV64I-NEXT: srli t4, a5, 16
; RV64I-NEXT: srli a5, a5, 8
; RV64I-NEXT: srli t5, a7, 56
; RV64I-NEXT: srli t6, a7, 48
; RV64I-NEXT: srli s0, a7, 40
; RV64I-NEXT: srli s1, a7, 32
; RV64I-NEXT: srli s2, a7, 24
; RV64I-NEXT: srli s3, a7, 16
; RV64I-NEXT: srli a7, a7, 8
; RV64I-NEXT: srli s4, a4, 56
; RV64I-NEXT: srli s5, a4, 48
; RV64I-NEXT: srli s6, a4, 40
; RV64I-NEXT: sb t2, 20(a2)
; RV64I-NEXT: sb t1, 21(a2)
; RV64I-NEXT: sb t0, 22(a2)
; RV64I-NEXT: sb a6, 23(a2)
; RV64I-NEXT: srli a6, a4, 32
; RV64I-NEXT: sb a3, 16(a2)
; RV64I-NEXT: sb a5, 17(a2)
; RV64I-NEXT: sb t4, 18(a2)
; RV64I-NEXT: sb t3, 19(a2)
; RV64I-NEXT: srli a3, a4, 24
; RV64I-NEXT: sb s1, 4(a2)
; RV64I-NEXT: sb s0, 5(a2)
; RV64I-NEXT: sb t6, 6(a2)
; RV64I-NEXT: sb t5, 7(a2)
; RV64I-NEXT: srli a5, a4, 16
; RV64I-NEXT: srli a4, a4, 8
; RV64I-NEXT: sb a1, 0(a2)
; RV64I-NEXT: sb a7, 1(a2)
; RV64I-NEXT: sb s3, 2(a2)
; RV64I-NEXT: sb s2, 3(a2)
; RV64I-NEXT: sb a6, 12(a2)
; RV64I-NEXT: sb s6, 13(a2)
; RV64I-NEXT: sb s5, 14(a2)
; RV64I-NEXT: sb s4, 15(a2)
; RV64I-NEXT: sb a0, 8(a2)
; RV64I-NEXT: sb a4, 9(a2)
; RV64I-NEXT: sb a5, 10(a2)
; RV64I-NEXT: sb a3, 11(a2)
; RV64I-NEXT: ld s0, 152(sp) # 8-byte Folded Reload
; RV64I-NEXT: ld s1, 144(sp) # 8-byte Folded Reload
; RV64I-NEXT: ld s2, 136(sp) # 8-byte Folded Reload
; RV64I-NEXT: ld s3, 128(sp) # 8-byte Folded Reload
; RV64I-NEXT: ld s4, 120(sp) # 8-byte Folded Reload
; RV64I-NEXT: ld s5, 112(sp) # 8-byte Folded Reload
; RV64I-NEXT: ld s6, 104(sp) # 8-byte Folded Reload
; RV64I-NEXT: ld s7, 96(sp) # 8-byte Folded Reload
; RV64I-NEXT: ld s8, 88(sp) # 8-byte Folded Reload
; RV64I-NEXT: ld s9, 80(sp) # 8-byte Folded Reload
; RV64I-NEXT: ld s10, 72(sp) # 8-byte Folded Reload
; RV64I-NEXT: ld s11, 64(sp) # 8-byte Folded Reload
; RV64I-NEXT: addi sp, sp, 160
; RV64I-NEXT: ret
;
; RV32I-LABEL: ashr_32bytes:
; RV32I: # %bb.0:
; RV32I-NEXT: addi sp, sp, -128
; RV32I-NEXT: sw ra, 124(sp) # 4-byte Folded Spill
; RV32I-NEXT: sw s0, 120(sp) # 4-byte Folded Spill
; RV32I-NEXT: sw s1, 116(sp) # 4-byte Folded Spill
; RV32I-NEXT: sw s2, 112(sp) # 4-byte Folded Spill
; RV32I-NEXT: sw s3, 108(sp) # 4-byte Folded Spill
; RV32I-NEXT: sw s4, 104(sp) # 4-byte Folded Spill
; RV32I-NEXT: sw s5, 100(sp) # 4-byte Folded Spill
; RV32I-NEXT: sw s6, 96(sp) # 4-byte Folded Spill
; RV32I-NEXT: sw s7, 92(sp) # 4-byte Folded Spill
; RV32I-NEXT: sw s8, 88(sp) # 4-byte Folded Spill
; RV32I-NEXT: sw s9, 84(sp) # 4-byte Folded Spill
; RV32I-NEXT: sw s10, 80(sp) # 4-byte Folded Spill
; RV32I-NEXT: sw s11, 76(sp) # 4-byte Folded Spill
; RV32I-NEXT: lbu t6, 0(a0)
; RV32I-NEXT: lbu a4, 1(a0)
; RV32I-NEXT: lbu a5, 2(a0)
; RV32I-NEXT: lbu a6, 3(a0)
; RV32I-NEXT: lbu t1, 4(a0)
; RV32I-NEXT: lbu t3, 5(a0)
; RV32I-NEXT: lbu t4, 6(a0)
; RV32I-NEXT: lbu t5, 7(a0)
; RV32I-NEXT: lbu t2, 8(a0)
; RV32I-NEXT: lbu s1, 9(a0)
; RV32I-NEXT: lbu s7, 10(a0)
; RV32I-NEXT: lbu s8, 11(a0)
; RV32I-NEXT: lbu s9, 12(a0)
; RV32I-NEXT: lbu s10, 13(a0)
; RV32I-NEXT: lbu s4, 14(a0)
; RV32I-NEXT: lbu s6, 15(a0)
; RV32I-NEXT: lbu s5, 16(a0)
; RV32I-NEXT: lbu s11, 17(a0)
; RV32I-NEXT: lbu ra, 18(a0)
; RV32I-NEXT: lbu a3, 19(a0)
; RV32I-NEXT: lbu s2, 20(a0)
; RV32I-NEXT: lbu s3, 21(a0)
; RV32I-NEXT: lbu a7, 22(a0)
; RV32I-NEXT: lbu t0, 23(a0)
; RV32I-NEXT: slli a4, a4, 8
; RV32I-NEXT: slli a5, a5, 16
; RV32I-NEXT: slli a6, a6, 24
; RV32I-NEXT: slli t3, t3, 8
; RV32I-NEXT: slli t4, t4, 16
; RV32I-NEXT: slli t5, t5, 24
; RV32I-NEXT: or a4, a4, t6
; RV32I-NEXT: sw a4, 4(sp) # 4-byte Folded Spill
; RV32I-NEXT: or a4, a6, a5
; RV32I-NEXT: or a5, t3, t1
; RV32I-NEXT: or a6, t5, t4
; RV32I-NEXT: lbu t1, 24(a0)
; RV32I-NEXT: lbu t5, 25(a0)
; RV32I-NEXT: lbu t6, 26(a0)
; RV32I-NEXT: lbu s0, 27(a0)
; RV32I-NEXT: slli s1, s1, 8
; RV32I-NEXT: slli s7, s7, 16
; RV32I-NEXT: slli s8, s8, 24
; RV32I-NEXT: slli s10, s10, 8
; RV32I-NEXT: or t2, s1, t2
; RV32I-NEXT: or t3, s8, s7
; RV32I-NEXT: or t4, s10, s9
; RV32I-NEXT: lbu s1, 28(a0)
; RV32I-NEXT: lbu s7, 29(a0)
; RV32I-NEXT: lbu s8, 30(a0)
; RV32I-NEXT: lbu s9, 31(a0)
; RV32I-NEXT: slli s4, s4, 16
; RV32I-NEXT: slli s6, s6, 24
; RV32I-NEXT: slli s11, s11, 8
; RV32I-NEXT: slli ra, ra, 16
; RV32I-NEXT: slli a3, a3, 24
; RV32I-NEXT: or a0, s6, s4
; RV32I-NEXT: or s4, s11, s5
; RV32I-NEXT: or s5, a3, ra
; RV32I-NEXT: lbu a3, 0(a1)
; RV32I-NEXT: lbu s6, 1(a1)
; RV32I-NEXT: lbu s10, 2(a1)
; RV32I-NEXT: lbu a1, 3(a1)
; RV32I-NEXT: slli s3, s3, 8
; RV32I-NEXT: or s2, s3, s2
; RV32I-NEXT: addi s3, sp, 8
; RV32I-NEXT: slli a7, a7, 16
; RV32I-NEXT: slli t0, t0, 24
; RV32I-NEXT: slli t5, t5, 8
; RV32I-NEXT: slli t6, t6, 16
; RV32I-NEXT: slli s0, s0, 24
; RV32I-NEXT: slli s7, s7, 8
; RV32I-NEXT: slli s8, s8, 16
; RV32I-NEXT: slli s9, s9, 24
; RV32I-NEXT: slli s6, s6, 8
; RV32I-NEXT: slli s10, s10, 16
; RV32I-NEXT: slli a1, a1, 24
; RV32I-NEXT: or a7, t0, a7
; RV32I-NEXT: or t0, t5, t1
; RV32I-NEXT: or t1, s0, t6
; RV32I-NEXT: or t5, s7, s1
; RV32I-NEXT: or t6, s9, s8
; RV32I-NEXT: or a3, s6, a3
; RV32I-NEXT: or a1, a1, s10
; RV32I-NEXT: srai s0, s9, 31
; RV32I-NEXT: lw s1, 4(sp) # 4-byte Folded Reload
; RV32I-NEXT: or a4, a4, s1
; RV32I-NEXT: or a5, a6, a5
; RV32I-NEXT: or a6, t3, t2
; RV32I-NEXT: or a0, a0, t4
; RV32I-NEXT: or t2, s5, s4
; RV32I-NEXT: or a7, a7, s2
; RV32I-NEXT: or t0, t1, t0
; RV32I-NEXT: or t1, t6, t5
; RV32I-NEXT: or a1, a1, a3
; RV32I-NEXT: sw s0, 56(sp)
; RV32I-NEXT: sw s0, 60(sp)
; RV32I-NEXT: sw s0, 64(sp)
; RV32I-NEXT: sw s0, 68(sp)
; RV32I-NEXT: sw s0, 40(sp)
; RV32I-NEXT: sw s0, 44(sp)
; RV32I-NEXT: sw s0, 48(sp)
; RV32I-NEXT: sw s0, 52(sp)
; RV32I-NEXT: sw t2, 24(sp)
; RV32I-NEXT: sw a7, 28(sp)
; RV32I-NEXT: sw t0, 32(sp)
; RV32I-NEXT: sw t1, 36(sp)
; RV32I-NEXT: sw a4, 8(sp)
; RV32I-NEXT: sw a5, 12(sp)
; RV32I-NEXT: sw a6, 16(sp)
; RV32I-NEXT: sw a0, 20(sp)
; RV32I-NEXT: slli t1, a1, 3
; RV32I-NEXT: andi a1, a1, 28
; RV32I-NEXT: add a1, s3, a1
; RV32I-NEXT: andi a0, t1, 24
; RV32I-NEXT: xori t0, a0, 31
; RV32I-NEXT: lw a3, 0(a1)
; RV32I-NEXT: lw a4, 4(a1)
; RV32I-NEXT: lw a5, 8(a1)
; RV32I-NEXT: lw a6, 12(a1)
; RV32I-NEXT: lw a7, 16(a1)
; RV32I-NEXT: lw t2, 20(a1)
; RV32I-NEXT: lw t3, 24(a1)
; RV32I-NEXT: lw t4, 28(a1)
; RV32I-NEXT: srl a0, a4, t1
; RV32I-NEXT: slli t5, a5, 1
; RV32I-NEXT: srl a1, a3, t1
; RV32I-NEXT: slli t6, a4, 1
; RV32I-NEXT: srl a3, a6, t1
; RV32I-NEXT: slli s0, a7, 1
; RV32I-NEXT: srl a4, a5, t1
; RV32I-NEXT: slli s1, a6, 1
; RV32I-NEXT: srl a5, t2, t1
; RV32I-NEXT: slli s2, t3, 1
; RV32I-NEXT: srl a6, a7, t1
; RV32I-NEXT: slli t2, t2, 1
; RV32I-NEXT: srl a7, t3, t1
; RV32I-NEXT: slli t3, t4, 1
; RV32I-NEXT: sra t1, t4, t1
; RV32I-NEXT: sll t4, t5, t0
; RV32I-NEXT: sll t5, t6, t0
; RV32I-NEXT: sll t6, s0, t0
; RV32I-NEXT: sll s0, s1, t0
; RV32I-NEXT: sll s1, s2, t0
; RV32I-NEXT: sll t2, t2, t0
; RV32I-NEXT: sll t3, t3, t0
; RV32I-NEXT: srli s2, t1, 24
; RV32I-NEXT: srli s3, t1, 16
; RV32I-NEXT: srli s4, t1, 8
; RV32I-NEXT: or t0, a0, t4
; RV32I-NEXT: or t4, a1, t5
; RV32I-NEXT: or t5, a3, t6
; RV32I-NEXT: or s0, a4, s0
; RV32I-NEXT: or s1, a5, s1
; RV32I-NEXT: or t2, a6, t2
; RV32I-NEXT: or t3, a7, t3
; RV32I-NEXT: sb t1, 28(a2)
; RV32I-NEXT: sb s4, 29(a2)
; RV32I-NEXT: sb s3, 30(a2)
; RV32I-NEXT: sb s2, 31(a2)
; RV32I-NEXT: srli t1, t3, 24
; RV32I-NEXT: srli t6, t3, 16
; RV32I-NEXT: srli t3, t3, 8
; RV32I-NEXT: srli s2, t2, 24
; RV32I-NEXT: srli s3, t2, 16
; RV32I-NEXT: srli t2, t2, 8
; RV32I-NEXT: srli s4, s1, 24
; RV32I-NEXT: srli s5, s1, 16
; RV32I-NEXT: srli s1, s1, 8
; RV32I-NEXT: srli s6, s0, 24
; RV32I-NEXT: srli s7, s0, 16
; RV32I-NEXT: srli s0, s0, 8
; RV32I-NEXT: srli s8, t5, 24
; RV32I-NEXT: srli s9, t5, 16
; RV32I-NEXT: srli t5, t5, 8
; RV32I-NEXT: srli s10, t4, 24
; RV32I-NEXT: srli s11, t4, 16
; RV32I-NEXT: srli t4, t4, 8
; RV32I-NEXT: sb a7, 24(a2)
; RV32I-NEXT: sb t3, 25(a2)
; RV32I-NEXT: sb t6, 26(a2)
; RV32I-NEXT: sb t1, 27(a2)
; RV32I-NEXT: srli a7, t0, 24
; RV32I-NEXT: sb a6, 16(a2)
; RV32I-NEXT: sb t2, 17(a2)
; RV32I-NEXT: sb s3, 18(a2)
; RV32I-NEXT: sb s2, 19(a2)
; RV32I-NEXT: srli a6, t0, 16
; RV32I-NEXT: srli t0, t0, 8
; RV32I-NEXT: sb a5, 20(a2)
; RV32I-NEXT: sb s1, 21(a2)
; RV32I-NEXT: sb s5, 22(a2)
; RV32I-NEXT: sb s4, 23(a2)
; RV32I-NEXT: sb a4, 8(a2)
; RV32I-NEXT: sb s0, 9(a2)
; RV32I-NEXT: sb s7, 10(a2)
; RV32I-NEXT: sb s6, 11(a2)
; RV32I-NEXT: sb a3, 12(a2)
; RV32I-NEXT: sb t5, 13(a2)
; RV32I-NEXT: sb s9, 14(a2)
; RV32I-NEXT: sb s8, 15(a2)
; RV32I-NEXT: sb a1, 0(a2)
; RV32I-NEXT: sb t4, 1(a2)
; RV32I-NEXT: sb s11, 2(a2)
; RV32I-NEXT: sb s10, 3(a2)
; RV32I-NEXT: sb a0, 4(a2)
; RV32I-NEXT: sb t0, 5(a2)
; RV32I-NEXT: sb a6, 6(a2)
; RV32I-NEXT: sb a7, 7(a2)
; RV32I-NEXT: lw ra, 124(sp) # 4-byte Folded Reload
; RV32I-NEXT: lw s0, 120(sp) # 4-byte Folded Reload
; RV32I-NEXT: lw s1, 116(sp) # 4-byte Folded Reload
; RV32I-NEXT: lw s2, 112(sp) # 4-byte Folded Reload
; RV32I-NEXT: lw s3, 108(sp) # 4-byte Folded Reload
; RV32I-NEXT: lw s4, 104(sp) # 4-byte Folded Reload
; RV32I-NEXT: lw s5, 100(sp) # 4-byte Folded Reload
; RV32I-NEXT: lw s6, 96(sp) # 4-byte Folded Reload
; RV32I-NEXT: lw s7, 92(sp) # 4-byte Folded Reload
; RV32I-NEXT: lw s8, 88(sp) # 4-byte Folded Reload
; RV32I-NEXT: lw s9, 84(sp) # 4-byte Folded Reload
; RV32I-NEXT: lw s10, 80(sp) # 4-byte Folded Reload
; RV32I-NEXT: lw s11, 76(sp) # 4-byte Folded Reload
; RV32I-NEXT: addi sp, sp, 128
; RV32I-NEXT: ret
%src = load i256, ptr %src.ptr, align 1
%byteOff = load i256, ptr %byteOff.ptr, align 1
%bitOff = shl i256 %byteOff, 3
%res = ashr i256 %src, %bitOff
store i256 %res, ptr %dst, align 1
ret void
}
define void @ashr_32bytes_wordOff(ptr %src.ptr, ptr %wordOff.ptr, ptr %dst) nounwind {
; RV64I-LABEL: ashr_32bytes_wordOff:
; RV64I: # %bb.0:
; RV64I-NEXT: addi sp, sp, -160
; RV64I-NEXT: sd s0, 152(sp) # 8-byte Folded Spill
; RV64I-NEXT: sd s1, 144(sp) # 8-byte Folded Spill
; RV64I-NEXT: sd s2, 136(sp) # 8-byte Folded Spill
; RV64I-NEXT: sd s3, 128(sp) # 8-byte Folded Spill
; RV64I-NEXT: sd s4, 120(sp) # 8-byte Folded Spill
; RV64I-NEXT: sd s5, 112(sp) # 8-byte Folded Spill
; RV64I-NEXT: sd s6, 104(sp) # 8-byte Folded Spill
; RV64I-NEXT: sd s7, 96(sp) # 8-byte Folded Spill
; RV64I-NEXT: sd s8, 88(sp) # 8-byte Folded Spill
; RV64I-NEXT: sd s9, 80(sp) # 8-byte Folded Spill
; RV64I-NEXT: sd s10, 72(sp) # 8-byte Folded Spill
; RV64I-NEXT: sd s11, 64(sp) # 8-byte Folded Spill
; RV64I-NEXT: lbu a3, 0(a0)
; RV64I-NEXT: lbu a4, 1(a0)
; RV64I-NEXT: lbu a5, 2(a0)
; RV64I-NEXT: lbu a6, 3(a0)
; RV64I-NEXT: lbu a7, 4(a0)
; RV64I-NEXT: lbu t0, 5(a0)
; RV64I-NEXT: lbu t1, 6(a0)
; RV64I-NEXT: lbu t2, 7(a0)
; RV64I-NEXT: lbu t3, 8(a0)
; RV64I-NEXT: lbu t4, 9(a0)
; RV64I-NEXT: lbu t5, 10(a0)
; RV64I-NEXT: lbu t6, 11(a0)
; RV64I-NEXT: lbu s0, 12(a0)
; RV64I-NEXT: lbu s1, 13(a0)
; RV64I-NEXT: lbu s2, 14(a0)
; RV64I-NEXT: lbu s3, 15(a0)
; RV64I-NEXT: lbu s4, 16(a0)
; RV64I-NEXT: lbu s5, 17(a0)
; RV64I-NEXT: lbu s6, 18(a0)
; RV64I-NEXT: lbu s7, 19(a0)
; RV64I-NEXT: slli a4, a4, 8
; RV64I-NEXT: slli a5, a5, 16
; RV64I-NEXT: slli a6, a6, 24
; RV64I-NEXT: slli t0, t0, 8
; RV64I-NEXT: slli t1, t1, 16
; RV64I-NEXT: slli t2, t2, 24
; RV64I-NEXT: or a3, a4, a3
; RV64I-NEXT: or a4, a6, a5
; RV64I-NEXT: or a5, t0, a7
; RV64I-NEXT: or a6, t2, t1
; RV64I-NEXT: lbu s8, 20(a0)
; RV64I-NEXT: lbu s9, 21(a0)
; RV64I-NEXT: lbu s10, 22(a0)
; RV64I-NEXT: lbu s11, 23(a0)
; RV64I-NEXT: slli t4, t4, 8
; RV64I-NEXT: slli t5, t5, 16
; RV64I-NEXT: slli t6, t6, 24
; RV64I-NEXT: slli s1, s1, 8
; RV64I-NEXT: slli s2, s2, 16
; RV64I-NEXT: slli s3, s3, 24
; RV64I-NEXT: or a7, t4, t3
; RV64I-NEXT: or t0, t6, t5
; RV64I-NEXT: or t1, s1, s0
; RV64I-NEXT: or t2, s3, s2
; RV64I-NEXT: lbu t6, 24(a0)
; RV64I-NEXT: lbu s0, 25(a0)
; RV64I-NEXT: lbu s1, 26(a0)
; RV64I-NEXT: lbu s2, 27(a0)
; RV64I-NEXT: slli s5, s5, 8
; RV64I-NEXT: slli s6, s6, 16
; RV64I-NEXT: slli s7, s7, 24
; RV64I-NEXT: slli s9, s9, 8
; RV64I-NEXT: or t3, s5, s4
; RV64I-NEXT: or t4, s7, s6
; RV64I-NEXT: or t5, s9, s8
; RV64I-NEXT: lbu s3, 28(a0)
; RV64I-NEXT: lbu s4, 29(a0)
; RV64I-NEXT: lbu s5, 30(a0)
; RV64I-NEXT: lbu s6, 31(a0)
; RV64I-NEXT: slli s10, s10, 16
; RV64I-NEXT: slli s11, s11, 24
; RV64I-NEXT: slli s0, s0, 8
; RV64I-NEXT: slli s1, s1, 16
; RV64I-NEXT: slli s2, s2, 24
; RV64I-NEXT: slli s4, s4, 8
; RV64I-NEXT: or a0, s11, s10
; RV64I-NEXT: or t6, s0, t6
; RV64I-NEXT: or s0, s2, s1
; RV64I-NEXT: or s1, s4, s3
; RV64I-NEXT: lbu s2, 0(a1)
; RV64I-NEXT: lbu s3, 1(a1)
; RV64I-NEXT: lbu s4, 2(a1)
; RV64I-NEXT: lbu s7, 3(a1)
; RV64I-NEXT: slli s5, s5, 16
; RV64I-NEXT: slli s6, s6, 24
; RV64I-NEXT: slli s3, s3, 8
; RV64I-NEXT: slli s4, s4, 16
; RV64I-NEXT: slli s7, s7, 24
; RV64I-NEXT: or s5, s6, s5
; RV64I-NEXT: or s2, s3, s2
; RV64I-NEXT: or s3, s7, s4
; RV64I-NEXT: lbu s4, 5(a1)
; RV64I-NEXT: lbu s6, 4(a1)
; RV64I-NEXT: lbu s7, 6(a1)
; RV64I-NEXT: lbu a1, 7(a1)
; RV64I-NEXT: slli s4, s4, 8
; RV64I-NEXT: or s4, s4, s6
; RV64I-NEXT: slli s7, s7, 16
; RV64I-NEXT: slli a1, a1, 24
; RV64I-NEXT: or a1, a1, s7
; RV64I-NEXT: mv s6, sp
; RV64I-NEXT: or a3, a4, a3
; RV64I-NEXT: or a4, a6, a5
; RV64I-NEXT: or a5, t0, a7
; RV64I-NEXT: or a6, t2, t1
; RV64I-NEXT: or a7, t4, t3
; RV64I-NEXT: or a0, a0, t5
; RV64I-NEXT: or t0, s0, t6
; RV64I-NEXT: or t1, s5, s1
; RV64I-NEXT: or t2, s3, s2
; RV64I-NEXT: or a1, a1, s4
; RV64I-NEXT: slli a4, a4, 32
; RV64I-NEXT: slli a6, a6, 32
; RV64I-NEXT: slli a0, a0, 32
; RV64I-NEXT: slli t3, t1, 32
; RV64I-NEXT: slli a1, a1, 32
; RV64I-NEXT: sraiw t1, t1, 31
; RV64I-NEXT: or a3, a4, a3
; RV64I-NEXT: or a4, a6, a5
; RV64I-NEXT: or a0, a0, a7
; RV64I-NEXT: or a5, t3, t0
; RV64I-NEXT: or a1, a1, t2
; RV64I-NEXT: sd t1, 32(sp)
; RV64I-NEXT: sd t1, 40(sp)
; RV64I-NEXT: sd t1, 48(sp)
; RV64I-NEXT: sd t1, 56(sp)
; RV64I-NEXT: sd a3, 0(sp)
; RV64I-NEXT: sd a4, 8(sp)
; RV64I-NEXT: sd a0, 16(sp)
; RV64I-NEXT: sd a5, 24(sp)
; RV64I-NEXT: slli a3, a1, 5
; RV64I-NEXT: slli a1, a1, 2
; RV64I-NEXT: andi a1, a1, 24
; RV64I-NEXT: andi a0, a3, 32
; RV64I-NEXT: add a1, s6, a1
; RV64I-NEXT: xori a4, a0, 63
; RV64I-NEXT: ld a5, 8(a1)
; RV64I-NEXT: ld a6, 16(a1)
; RV64I-NEXT: ld a7, 0(a1)
; RV64I-NEXT: ld t0, 24(a1)
; RV64I-NEXT: srl a0, a5, a3
; RV64I-NEXT: slli t1, a6, 1
; RV64I-NEXT: srl a1, a7, a3
; RV64I-NEXT: slli a5, a5, 1
; RV64I-NEXT: srl a6, a6, a3
; RV64I-NEXT: slli a7, t0, 1
; RV64I-NEXT: sra a3, t0, a3
; RV64I-NEXT: sll t0, t1, a4
; RV64I-NEXT: sll a5, a5, a4
; RV64I-NEXT: sll a4, a7, a4
; RV64I-NEXT: srli a7, a6, 24
; RV64I-NEXT: srli t1, a6, 16
; RV64I-NEXT: srli t2, a6, 8
; RV64I-NEXT: srli t3, a3, 56
; RV64I-NEXT: srli t4, a3, 48
; RV64I-NEXT: srli t5, a3, 40
; RV64I-NEXT: srli t6, a3, 32
; RV64I-NEXT: srli s0, a3, 24
; RV64I-NEXT: srli s1, a3, 16
; RV64I-NEXT: srli s2, a3, 8
; RV64I-NEXT: srli s3, a1, 24
; RV64I-NEXT: srli s4, a1, 16
; RV64I-NEXT: srli s5, a1, 8
; RV64I-NEXT: srli s6, a0, 24
; RV64I-NEXT: or a4, a6, a4
; RV64I-NEXT: sb a6, 16(a2)
; RV64I-NEXT: sb t2, 17(a2)
; RV64I-NEXT: sb t1, 18(a2)
; RV64I-NEXT: sb a7, 19(a2)
; RV64I-NEXT: srli a6, a0, 16
; RV64I-NEXT: sb t6, 28(a2)
; RV64I-NEXT: sb t5, 29(a2)
; RV64I-NEXT: sb t4, 30(a2)
; RV64I-NEXT: sb t3, 31(a2)
; RV64I-NEXT: srli a7, a0, 8
; RV64I-NEXT: or t0, a0, t0
; RV64I-NEXT: or a5, a1, a5
; RV64I-NEXT: sb a3, 24(a2)
; RV64I-NEXT: sb s2, 25(a2)
; RV64I-NEXT: sb s1, 26(a2)
; RV64I-NEXT: sb s0, 27(a2)
; RV64I-NEXT: sb a1, 0(a2)
; RV64I-NEXT: sb s5, 1(a2)
; RV64I-NEXT: sb s4, 2(a2)
; RV64I-NEXT: sb s3, 3(a2)
; RV64I-NEXT: sb a0, 8(a2)
; RV64I-NEXT: sb a7, 9(a2)
; RV64I-NEXT: sb a6, 10(a2)
; RV64I-NEXT: sb s6, 11(a2)
; RV64I-NEXT: srli a0, a4, 56
; RV64I-NEXT: srli a1, a4, 48
; RV64I-NEXT: srli a3, a4, 40
; RV64I-NEXT: srli a4, a4, 32
; RV64I-NEXT: srli a6, a5, 56
; RV64I-NEXT: srli a7, a5, 48
; RV64I-NEXT: srli t1, a5, 40
; RV64I-NEXT: srli a5, a5, 32
; RV64I-NEXT: srli t2, t0, 56
; RV64I-NEXT: srli t3, t0, 48
; RV64I-NEXT: srli t4, t0, 40
; RV64I-NEXT: srli t0, t0, 32
; RV64I-NEXT: sb a4, 20(a2)
; RV64I-NEXT: sb a3, 21(a2)
; RV64I-NEXT: sb a1, 22(a2)
; RV64I-NEXT: sb a0, 23(a2)
; RV64I-NEXT: sb a5, 4(a2)
; RV64I-NEXT: sb t1, 5(a2)
; RV64I-NEXT: sb a7, 6(a2)
; RV64I-NEXT: sb a6, 7(a2)
; RV64I-NEXT: sb t0, 12(a2)
; RV64I-NEXT: sb t4, 13(a2)
; RV64I-NEXT: sb t3, 14(a2)
; RV64I-NEXT: sb t2, 15(a2)
; RV64I-NEXT: ld s0, 152(sp) # 8-byte Folded Reload
; RV64I-NEXT: ld s1, 144(sp) # 8-byte Folded Reload
; RV64I-NEXT: ld s2, 136(sp) # 8-byte Folded Reload
; RV64I-NEXT: ld s3, 128(sp) # 8-byte Folded Reload
; RV64I-NEXT: ld s4, 120(sp) # 8-byte Folded Reload
; RV64I-NEXT: ld s5, 112(sp) # 8-byte Folded Reload
; RV64I-NEXT: ld s6, 104(sp) # 8-byte Folded Reload
; RV64I-NEXT: ld s7, 96(sp) # 8-byte Folded Reload
; RV64I-NEXT: ld s8, 88(sp) # 8-byte Folded Reload
; RV64I-NEXT: ld s9, 80(sp) # 8-byte Folded Reload
; RV64I-NEXT: ld s10, 72(sp) # 8-byte Folded Reload
; RV64I-NEXT: ld s11, 64(sp) # 8-byte Folded Reload
; RV64I-NEXT: addi sp, sp, 160
; RV64I-NEXT: ret
;
; RV32I-LABEL: ashr_32bytes_wordOff:
; RV32I: # %bb.0:
; RV32I-NEXT: addi sp, sp, -128
; RV32I-NEXT: sw ra, 124(sp) # 4-byte Folded Spill
; RV32I-NEXT: sw s0, 120(sp) # 4-byte Folded Spill
; RV32I-NEXT: sw s1, 116(sp) # 4-byte Folded Spill
; RV32I-NEXT: sw s2, 112(sp) # 4-byte Folded Spill
; RV32I-NEXT: sw s3, 108(sp) # 4-byte Folded Spill
; RV32I-NEXT: sw s4, 104(sp) # 4-byte Folded Spill
; RV32I-NEXT: sw s5, 100(sp) # 4-byte Folded Spill
; RV32I-NEXT: sw s6, 96(sp) # 4-byte Folded Spill
; RV32I-NEXT: sw s7, 92(sp) # 4-byte Folded Spill
; RV32I-NEXT: sw s8, 88(sp) # 4-byte Folded Spill
; RV32I-NEXT: sw s9, 84(sp) # 4-byte Folded Spill
; RV32I-NEXT: sw s10, 80(sp) # 4-byte Folded Spill
; RV32I-NEXT: sw s11, 76(sp) # 4-byte Folded Spill
; RV32I-NEXT: lbu a6, 0(a0)
; RV32I-NEXT: lbu t0, 1(a0)
; RV32I-NEXT: lbu t1, 2(a0)
; RV32I-NEXT: lbu t6, 3(a0)
; RV32I-NEXT: lbu s7, 4(a0)
; RV32I-NEXT: lbu s8, 5(a0)
; RV32I-NEXT: lbu s3, 6(a0)
; RV32I-NEXT: lbu s5, 7(a0)
; RV32I-NEXT: lbu s4, 8(a0)
; RV32I-NEXT: lbu s9, 9(a0)
; RV32I-NEXT: lbu s10, 10(a0)
; RV32I-NEXT: lbu s11, 11(a0)
; RV32I-NEXT: lbu s2, 12(a0)
; RV32I-NEXT: lbu s6, 13(a0)
; RV32I-NEXT: lbu a5, 14(a0)
; RV32I-NEXT: lbu a7, 15(a0)
; RV32I-NEXT: lbu a3, 16(a0)
; RV32I-NEXT: lbu t2, 17(a0)
; RV32I-NEXT: lbu t3, 18(a0)
; RV32I-NEXT: lbu t4, 19(a0)
; RV32I-NEXT: lbu a4, 20(a0)
; RV32I-NEXT: lbu t5, 21(a0)
; RV32I-NEXT: lbu s0, 22(a0)
; RV32I-NEXT: lbu s1, 23(a0)
; RV32I-NEXT: slli t0, t0, 8
; RV32I-NEXT: slli t1, t1, 16
; RV32I-NEXT: slli t6, t6, 24
; RV32I-NEXT: slli s8, s8, 8
; RV32I-NEXT: or a6, t0, a6
; RV32I-NEXT: or t0, t6, t1
; RV32I-NEXT: or t1, s8, s7
; RV32I-NEXT: lbu t6, 24(a0)
; RV32I-NEXT: lbu s7, 25(a0)
; RV32I-NEXT: lbu s8, 26(a0)
; RV32I-NEXT: lbu ra, 27(a0)
; RV32I-NEXT: slli s3, s3, 16
; RV32I-NEXT: slli s5, s5, 24
; RV32I-NEXT: slli s9, s9, 8
; RV32I-NEXT: slli s10, s10, 16
; RV32I-NEXT: slli s11, s11, 24
; RV32I-NEXT: or s3, s5, s3
; RV32I-NEXT: or s4, s9, s4
; RV32I-NEXT: or s5, s11, s10
; RV32I-NEXT: lbu s9, 28(a0)
; RV32I-NEXT: lbu s10, 29(a0)
; RV32I-NEXT: lbu s11, 30(a0)
; RV32I-NEXT: lbu a0, 31(a0)
; RV32I-NEXT: lbu a1, 0(a1)
; RV32I-NEXT: slli s6, s6, 8
; RV32I-NEXT: or s2, s6, s2
; RV32I-NEXT: addi s6, sp, 8
; RV32I-NEXT: slli a5, a5, 16
; RV32I-NEXT: slli a7, a7, 24
; RV32I-NEXT: slli t2, t2, 8
; RV32I-NEXT: slli t3, t3, 16
; RV32I-NEXT: slli t4, t4, 24
; RV32I-NEXT: slli t5, t5, 8
; RV32I-NEXT: slli s0, s0, 16
; RV32I-NEXT: slli s1, s1, 24
; RV32I-NEXT: slli s7, s7, 8
; RV32I-NEXT: slli s8, s8, 16
; RV32I-NEXT: slli ra, ra, 24
; RV32I-NEXT: slli s10, s10, 8
; RV32I-NEXT: slli s11, s11, 16
; RV32I-NEXT: slli a0, a0, 24
; RV32I-NEXT: slli a1, a1, 2
; RV32I-NEXT: or a5, a7, a5
; RV32I-NEXT: or a3, t2, a3
; RV32I-NEXT: or a7, t4, t3
; RV32I-NEXT: or a4, t5, a4
; RV32I-NEXT: or s0, s1, s0
; RV32I-NEXT: or t2, s7, t6
; RV32I-NEXT: or t3, ra, s8
; RV32I-NEXT: or t4, s10, s9
; RV32I-NEXT: or t5, a0, s11
; RV32I-NEXT: srai a0, a0, 31
; RV32I-NEXT: andi a1, a1, 28
; RV32I-NEXT: or a6, t0, a6
; RV32I-NEXT: or t0, s3, t1
; RV32I-NEXT: or t1, s5, s4
; RV32I-NEXT: or a5, a5, s2
; RV32I-NEXT: or a3, a7, a3
; RV32I-NEXT: or a4, s0, a4
; RV32I-NEXT: or a7, t3, t2
; RV32I-NEXT: or t2, t5, t4
; RV32I-NEXT: sw a0, 56(sp)
; RV32I-NEXT: sw a0, 60(sp)
; RV32I-NEXT: sw a0, 64(sp)
; RV32I-NEXT: sw a0, 68(sp)
; RV32I-NEXT: sw a0, 40(sp)
; RV32I-NEXT: sw a0, 44(sp)
; RV32I-NEXT: sw a0, 48(sp)
; RV32I-NEXT: sw a0, 52(sp)
; RV32I-NEXT: add s6, s6, a1
; RV32I-NEXT: sw a3, 24(sp)
; RV32I-NEXT: sw a4, 28(sp)
; RV32I-NEXT: sw a7, 32(sp)
; RV32I-NEXT: sw t2, 36(sp)
; RV32I-NEXT: sw a6, 8(sp)
; RV32I-NEXT: sw t0, 12(sp)
; RV32I-NEXT: sw t1, 16(sp)
; RV32I-NEXT: sw a5, 20(sp)
; RV32I-NEXT: lw a1, 0(s6)
; RV32I-NEXT: lw a0, 4(s6)
; RV32I-NEXT: lw a4, 8(s6)
; RV32I-NEXT: lw a3, 12(s6)
; RV32I-NEXT: lw a7, 24(s6)
; RV32I-NEXT: lw a5, 20(s6)
; RV32I-NEXT: lw a6, 16(s6)
; RV32I-NEXT: lw t0, 28(s6)
; RV32I-NEXT: srli t1, a7, 24
; RV32I-NEXT: srli t2, a7, 16
; RV32I-NEXT: srli t3, a7, 8
; RV32I-NEXT: srli t4, t0, 24
; RV32I-NEXT: srli t5, t0, 16
; RV32I-NEXT: srli t6, t0, 8
; RV32I-NEXT: srli s0, a6, 24
; RV32I-NEXT: srli s1, a6, 16
; RV32I-NEXT: srli s2, a6, 8
; RV32I-NEXT: srli s3, a5, 24
; RV32I-NEXT: srli s4, a5, 16
; RV32I-NEXT: srli s5, a5, 8
; RV32I-NEXT: srli s6, a4, 24
; RV32I-NEXT: srli s7, a4, 16
; RV32I-NEXT: srli s8, a4, 8
; RV32I-NEXT: srli s9, a3, 24
; RV32I-NEXT: srli s10, a3, 16
; RV32I-NEXT: srli s11, a3, 8
; RV32I-NEXT: srli ra, a1, 24
; RV32I-NEXT: sb a7, 24(a2)
; RV32I-NEXT: sb t3, 25(a2)
; RV32I-NEXT: sb t2, 26(a2)
; RV32I-NEXT: sb t1, 27(a2)
; RV32I-NEXT: srli a7, a1, 16
; RV32I-NEXT: sb t0, 28(a2)
; RV32I-NEXT: sb t6, 29(a2)
; RV32I-NEXT: sb t5, 30(a2)
; RV32I-NEXT: sb t4, 31(a2)
; RV32I-NEXT: srli t0, a1, 8
; RV32I-NEXT: sb a6, 16(a2)
; RV32I-NEXT: sb s2, 17(a2)
; RV32I-NEXT: sb s1, 18(a2)
; RV32I-NEXT: sb s0, 19(a2)
; RV32I-NEXT: srli a6, a0, 24
; RV32I-NEXT: sb a5, 20(a2)
; RV32I-NEXT: sb s5, 21(a2)
; RV32I-NEXT: sb s4, 22(a2)
; RV32I-NEXT: sb s3, 23(a2)
; RV32I-NEXT: srli a5, a0, 16
; RV32I-NEXT: sb a4, 8(a2)
; RV32I-NEXT: sb s8, 9(a2)
; RV32I-NEXT: sb s7, 10(a2)
; RV32I-NEXT: sb s6, 11(a2)
; RV32I-NEXT: srli a4, a0, 8
; RV32I-NEXT: sb a3, 12(a2)
; RV32I-NEXT: sb s11, 13(a2)
; RV32I-NEXT: sb s10, 14(a2)
; RV32I-NEXT: sb s9, 15(a2)
; RV32I-NEXT: sb a1, 0(a2)
; RV32I-NEXT: sb t0, 1(a2)
; RV32I-NEXT: sb a7, 2(a2)
; RV32I-NEXT: sb ra, 3(a2)
; RV32I-NEXT: sb a0, 4(a2)
; RV32I-NEXT: sb a4, 5(a2)
; RV32I-NEXT: sb a5, 6(a2)
; RV32I-NEXT: sb a6, 7(a2)
; RV32I-NEXT: lw ra, 124(sp) # 4-byte Folded Reload
; RV32I-NEXT: lw s0, 120(sp) # 4-byte Folded Reload
; RV32I-NEXT: lw s1, 116(sp) # 4-byte Folded Reload
; RV32I-NEXT: lw s2, 112(sp) # 4-byte Folded Reload
; RV32I-NEXT: lw s3, 108(sp) # 4-byte Folded Reload
; RV32I-NEXT: lw s4, 104(sp) # 4-byte Folded Reload
; RV32I-NEXT: lw s5, 100(sp) # 4-byte Folded Reload
; RV32I-NEXT: lw s6, 96(sp) # 4-byte Folded Reload
; RV32I-NEXT: lw s7, 92(sp) # 4-byte Folded Reload
; RV32I-NEXT: lw s8, 88(sp) # 4-byte Folded Reload
; RV32I-NEXT: lw s9, 84(sp) # 4-byte Folded Reload
; RV32I-NEXT: lw s10, 80(sp) # 4-byte Folded Reload
; RV32I-NEXT: lw s11, 76(sp) # 4-byte Folded Reload
; RV32I-NEXT: addi sp, sp, 128
; RV32I-NEXT: ret
%src = load i256, ptr %src.ptr, align 1
%wordOff = load i256, ptr %wordOff.ptr, align 1
%bitOff = shl i256 %wordOff, 5
%res = ashr i256 %src, %bitOff
store i256 %res, ptr %dst, align 1
ret void
}
define void @ashr_32bytes_dwordOff(ptr %src.ptr, ptr %dwordOff.ptr, ptr %dst) nounwind {
; RV64I-LABEL: ashr_32bytes_dwordOff:
; RV64I: # %bb.0:
; RV64I-NEXT: addi sp, sp, -160
; RV64I-NEXT: sd s0, 152(sp) # 8-byte Folded Spill
; RV64I-NEXT: sd s1, 144(sp) # 8-byte Folded Spill
; RV64I-NEXT: sd s2, 136(sp) # 8-byte Folded Spill
; RV64I-NEXT: sd s3, 128(sp) # 8-byte Folded Spill
; RV64I-NEXT: sd s4, 120(sp) # 8-byte Folded Spill
; RV64I-NEXT: sd s5, 112(sp) # 8-byte Folded Spill
; RV64I-NEXT: sd s6, 104(sp) # 8-byte Folded Spill
; RV64I-NEXT: sd s7, 96(sp) # 8-byte Folded Spill
; RV64I-NEXT: sd s8, 88(sp) # 8-byte Folded Spill
; RV64I-NEXT: sd s9, 80(sp) # 8-byte Folded Spill
; RV64I-NEXT: sd s10, 72(sp) # 8-byte Folded Spill
; RV64I-NEXT: sd s11, 64(sp) # 8-byte Folded Spill
; RV64I-NEXT: lbu a5, 0(a0)
; RV64I-NEXT: lbu a7, 1(a0)
; RV64I-NEXT: lbu t1, 2(a0)
; RV64I-NEXT: lbu s3, 3(a0)
; RV64I-NEXT: lbu t0, 4(a0)
; RV64I-NEXT: lbu s8, 5(a0)
; RV64I-NEXT: lbu s9, 6(a0)
; RV64I-NEXT: lbu s10, 7(a0)
; RV64I-NEXT: lbu s2, 8(a0)
; RV64I-NEXT: lbu s4, 9(a0)
; RV64I-NEXT: lbu s5, 10(a0)
; RV64I-NEXT: lbu s6, 11(a0)
; RV64I-NEXT: lbu s7, 12(a0)
; RV64I-NEXT: lbu s11, 13(a0)
; RV64I-NEXT: lbu t4, 14(a0)
; RV64I-NEXT: lbu t5, 15(a0)
; RV64I-NEXT: lbu a3, 16(a0)
; RV64I-NEXT: lbu a6, 17(a0)
; RV64I-NEXT: lbu t2, 18(a0)
; RV64I-NEXT: lbu t3, 19(a0)
; RV64I-NEXT: lbu a4, 20(a0)
; RV64I-NEXT: lbu t6, 21(a0)
; RV64I-NEXT: lbu s0, 22(a0)
; RV64I-NEXT: lbu s1, 23(a0)
; RV64I-NEXT: slli a7, a7, 8
; RV64I-NEXT: slli t1, t1, 16
; RV64I-NEXT: slli s3, s3, 24
; RV64I-NEXT: slli s8, s8, 8
; RV64I-NEXT: slli s9, s9, 16
; RV64I-NEXT: slli s10, s10, 24
; RV64I-NEXT: or a5, a7, a5
; RV64I-NEXT: or a7, s3, t1
; RV64I-NEXT: or t0, s8, t0
; RV64I-NEXT: or t1, s10, s9
; RV64I-NEXT: lbu s3, 24(a0)
; RV64I-NEXT: lbu s8, 25(a0)
; RV64I-NEXT: lbu s9, 26(a0)
; RV64I-NEXT: lbu s10, 27(a0)
; RV64I-NEXT: slli s4, s4, 8
; RV64I-NEXT: slli s5, s5, 16
; RV64I-NEXT: slli s6, s6, 24
; RV64I-NEXT: slli s11, s11, 8
; RV64I-NEXT: or s2, s4, s2
; RV64I-NEXT: or s4, s6, s5
; RV64I-NEXT: or s5, s11, s7
; RV64I-NEXT: lbu s6, 28(a0)
; RV64I-NEXT: lbu s7, 29(a0)
; RV64I-NEXT: lbu s11, 30(a0)
; RV64I-NEXT: lbu a0, 31(a0)
; RV64I-NEXT: lbu a1, 0(a1)
; RV64I-NEXT: slli t4, t4, 16
; RV64I-NEXT: slli t5, t5, 24
; RV64I-NEXT: or t4, t5, t4
; RV64I-NEXT: mv t5, sp
; RV64I-NEXT: slli a6, a6, 8
; RV64I-NEXT: slli t2, t2, 16
; RV64I-NEXT: slli t3, t3, 24
; RV64I-NEXT: slli t6, t6, 8
; RV64I-NEXT: slli s0, s0, 16
; RV64I-NEXT: slli s1, s1, 24
; RV64I-NEXT: slli s8, s8, 8
; RV64I-NEXT: slli s9, s9, 16
; RV64I-NEXT: slli s10, s10, 24
; RV64I-NEXT: slli s7, s7, 8
; RV64I-NEXT: slli s11, s11, 16
; RV64I-NEXT: slli a0, a0, 24
; RV64I-NEXT: slli a1, a1, 3
; RV64I-NEXT: or a3, a6, a3
; RV64I-NEXT: or a6, t3, t2
; RV64I-NEXT: or a4, t6, a4
; RV64I-NEXT: or s0, s1, s0
; RV64I-NEXT: or t2, s8, s3
; RV64I-NEXT: or t3, s10, s9
; RV64I-NEXT: or t6, s7, s6
; RV64I-NEXT: or a0, a0, s11
; RV64I-NEXT: andi a1, a1, 24
; RV64I-NEXT: or a5, a7, a5
; RV64I-NEXT: or a7, t1, t0
; RV64I-NEXT: or t0, s4, s2
; RV64I-NEXT: or t1, t4, s5
; RV64I-NEXT: or a3, a6, a3
; RV64I-NEXT: or a4, s0, a4
; RV64I-NEXT: or a6, t3, t2
; RV64I-NEXT: or a0, a0, t6
; RV64I-NEXT: add t5, t5, a1
; RV64I-NEXT: slli a7, a7, 32
; RV64I-NEXT: slli t1, t1, 32
; RV64I-NEXT: slli a4, a4, 32
; RV64I-NEXT: slli a1, a0, 32
; RV64I-NEXT: sraiw a0, a0, 31
; RV64I-NEXT: or a5, a7, a5
; RV64I-NEXT: or a7, t1, t0
; RV64I-NEXT: or a3, a4, a3
; RV64I-NEXT: or a1, a1, a6
; RV64I-NEXT: sd a0, 32(sp)
; RV64I-NEXT: sd a0, 40(sp)
; RV64I-NEXT: sd a0, 48(sp)
; RV64I-NEXT: sd a0, 56(sp)
; RV64I-NEXT: sd a5, 0(sp)
; RV64I-NEXT: sd a7, 8(sp)
; RV64I-NEXT: sd a3, 16(sp)
; RV64I-NEXT: sd a1, 24(sp)
; RV64I-NEXT: ld a4, 16(t5)
; RV64I-NEXT: ld a0, 8(t5)
; RV64I-NEXT: ld a1, 0(t5)
; RV64I-NEXT: ld a3, 24(t5)
; RV64I-NEXT: srli a5, a4, 56
; RV64I-NEXT: srli a6, a4, 48
; RV64I-NEXT: srli a7, a4, 40
; RV64I-NEXT: srli t0, a4, 32
; RV64I-NEXT: srli t1, a4, 24
; RV64I-NEXT: srli t2, a4, 16
; RV64I-NEXT: srli t3, a4, 8
; RV64I-NEXT: srli t4, a3, 56
; RV64I-NEXT: srli t5, a3, 48
; RV64I-NEXT: srli t6, a3, 40
; RV64I-NEXT: srli s0, a3, 32
; RV64I-NEXT: srli s1, a3, 24
; RV64I-NEXT: srli s2, a3, 16
; RV64I-NEXT: srli s3, a3, 8
; RV64I-NEXT: srli s4, a1, 56
; RV64I-NEXT: srli s5, a1, 48
; RV64I-NEXT: srli s6, a1, 40
; RV64I-NEXT: srli s7, a1, 32
; RV64I-NEXT: srli s8, a1, 24
; RV64I-NEXT: srli s9, a1, 16
; RV64I-NEXT: srli s10, a1, 8
; RV64I-NEXT: srli s11, a0, 56
; RV64I-NEXT: sb t0, 20(a2)
; RV64I-NEXT: sb a7, 21(a2)
; RV64I-NEXT: sb a6, 22(a2)
; RV64I-NEXT: sb a5, 23(a2)
; RV64I-NEXT: srli a5, a0, 48
; RV64I-NEXT: sb a4, 16(a2)
; RV64I-NEXT: sb t3, 17(a2)
; RV64I-NEXT: sb t2, 18(a2)
; RV64I-NEXT: sb t1, 19(a2)
; RV64I-NEXT: srli a4, a0, 40
; RV64I-NEXT: sb s0, 28(a2)
; RV64I-NEXT: sb t6, 29(a2)
; RV64I-NEXT: sb t5, 30(a2)
; RV64I-NEXT: sb t4, 31(a2)
; RV64I-NEXT: srli a6, a0, 32
; RV64I-NEXT: sb a3, 24(a2)
; RV64I-NEXT: sb s3, 25(a2)
; RV64I-NEXT: sb s2, 26(a2)
; RV64I-NEXT: sb s1, 27(a2)
; RV64I-NEXT: srli a3, a0, 24
; RV64I-NEXT: sb s7, 4(a2)
; RV64I-NEXT: sb s6, 5(a2)
; RV64I-NEXT: sb s5, 6(a2)
; RV64I-NEXT: sb s4, 7(a2)
; RV64I-NEXT: srli a7, a0, 16
; RV64I-NEXT: sb a1, 0(a2)
; RV64I-NEXT: sb s10, 1(a2)
; RV64I-NEXT: sb s9, 2(a2)
; RV64I-NEXT: sb s8, 3(a2)
; RV64I-NEXT: srli a1, a0, 8
; RV64I-NEXT: sb a6, 12(a2)
; RV64I-NEXT: sb a4, 13(a2)
; RV64I-NEXT: sb a5, 14(a2)
; RV64I-NEXT: sb s11, 15(a2)
; RV64I-NEXT: sb a0, 8(a2)
; RV64I-NEXT: sb a1, 9(a2)
; RV64I-NEXT: sb a7, 10(a2)
; RV64I-NEXT: sb a3, 11(a2)
; RV64I-NEXT: ld s0, 152(sp) # 8-byte Folded Reload
; RV64I-NEXT: ld s1, 144(sp) # 8-byte Folded Reload
; RV64I-NEXT: ld s2, 136(sp) # 8-byte Folded Reload
; RV64I-NEXT: ld s3, 128(sp) # 8-byte Folded Reload
; RV64I-NEXT: ld s4, 120(sp) # 8-byte Folded Reload
; RV64I-NEXT: ld s5, 112(sp) # 8-byte Folded Reload
; RV64I-NEXT: ld s6, 104(sp) # 8-byte Folded Reload
; RV64I-NEXT: ld s7, 96(sp) # 8-byte Folded Reload
; RV64I-NEXT: ld s8, 88(sp) # 8-byte Folded Reload
; RV64I-NEXT: ld s9, 80(sp) # 8-byte Folded Reload
; RV64I-NEXT: ld s10, 72(sp) # 8-byte Folded Reload
; RV64I-NEXT: ld s11, 64(sp) # 8-byte Folded Reload
; RV64I-NEXT: addi sp, sp, 160
; RV64I-NEXT: ret
;
; RV32I-LABEL: ashr_32bytes_dwordOff:
; RV32I: # %bb.0:
; RV32I-NEXT: addi sp, sp, -128
; RV32I-NEXT: sw ra, 124(sp) # 4-byte Folded Spill
; RV32I-NEXT: sw s0, 120(sp) # 4-byte Folded Spill
; RV32I-NEXT: sw s1, 116(sp) # 4-byte Folded Spill
; RV32I-NEXT: sw s2, 112(sp) # 4-byte Folded Spill
; RV32I-NEXT: sw s3, 108(sp) # 4-byte Folded Spill
; RV32I-NEXT: sw s4, 104(sp) # 4-byte Folded Spill
; RV32I-NEXT: sw s5, 100(sp) # 4-byte Folded Spill
; RV32I-NEXT: sw s6, 96(sp) # 4-byte Folded Spill
; RV32I-NEXT: sw s7, 92(sp) # 4-byte Folded Spill
; RV32I-NEXT: sw s8, 88(sp) # 4-byte Folded Spill
; RV32I-NEXT: sw s9, 84(sp) # 4-byte Folded Spill
; RV32I-NEXT: sw s10, 80(sp) # 4-byte Folded Spill
; RV32I-NEXT: sw s11, 76(sp) # 4-byte Folded Spill
; RV32I-NEXT: lbu a6, 0(a0)
; RV32I-NEXT: lbu t0, 1(a0)
; RV32I-NEXT: lbu t1, 2(a0)
; RV32I-NEXT: lbu t6, 3(a0)
; RV32I-NEXT: lbu s7, 4(a0)
; RV32I-NEXT: lbu s8, 5(a0)
; RV32I-NEXT: lbu s3, 6(a0)
; RV32I-NEXT: lbu s5, 7(a0)
; RV32I-NEXT: lbu s4, 8(a0)
; RV32I-NEXT: lbu s9, 9(a0)
; RV32I-NEXT: lbu s10, 10(a0)
; RV32I-NEXT: lbu s11, 11(a0)
; RV32I-NEXT: lbu s2, 12(a0)
; RV32I-NEXT: lbu s6, 13(a0)
; RV32I-NEXT: lbu a5, 14(a0)
; RV32I-NEXT: lbu a7, 15(a0)
; RV32I-NEXT: lbu a3, 16(a0)
; RV32I-NEXT: lbu t2, 17(a0)
; RV32I-NEXT: lbu t3, 18(a0)
; RV32I-NEXT: lbu t4, 19(a0)
; RV32I-NEXT: lbu a4, 20(a0)
; RV32I-NEXT: lbu t5, 21(a0)
; RV32I-NEXT: lbu s0, 22(a0)
; RV32I-NEXT: lbu s1, 23(a0)
; RV32I-NEXT: slli t0, t0, 8
; RV32I-NEXT: slli t1, t1, 16
; RV32I-NEXT: slli t6, t6, 24
; RV32I-NEXT: slli s8, s8, 8
; RV32I-NEXT: or a6, t0, a6
; RV32I-NEXT: or t0, t6, t1
; RV32I-NEXT: or t1, s8, s7
; RV32I-NEXT: lbu t6, 24(a0)
; RV32I-NEXT: lbu s7, 25(a0)
; RV32I-NEXT: lbu s8, 26(a0)
; RV32I-NEXT: lbu ra, 27(a0)
; RV32I-NEXT: slli s3, s3, 16
; RV32I-NEXT: slli s5, s5, 24
; RV32I-NEXT: slli s9, s9, 8
; RV32I-NEXT: slli s10, s10, 16
; RV32I-NEXT: slli s11, s11, 24
; RV32I-NEXT: or s3, s5, s3
; RV32I-NEXT: or s4, s9, s4
; RV32I-NEXT: or s5, s11, s10
; RV32I-NEXT: lbu s9, 28(a0)
; RV32I-NEXT: lbu s10, 29(a0)
; RV32I-NEXT: lbu s11, 30(a0)
; RV32I-NEXT: lbu a0, 31(a0)
; RV32I-NEXT: lbu a1, 0(a1)
; RV32I-NEXT: slli s6, s6, 8
; RV32I-NEXT: or s2, s6, s2
; RV32I-NEXT: addi s6, sp, 8
; RV32I-NEXT: slli a5, a5, 16
; RV32I-NEXT: slli a7, a7, 24
; RV32I-NEXT: slli t2, t2, 8
; RV32I-NEXT: slli t3, t3, 16
; RV32I-NEXT: slli t4, t4, 24
; RV32I-NEXT: slli t5, t5, 8
; RV32I-NEXT: slli s0, s0, 16
; RV32I-NEXT: slli s1, s1, 24
; RV32I-NEXT: slli s7, s7, 8
; RV32I-NEXT: slli s8, s8, 16
; RV32I-NEXT: slli ra, ra, 24
; RV32I-NEXT: slli s10, s10, 8
; RV32I-NEXT: slli s11, s11, 16
; RV32I-NEXT: slli a0, a0, 24
; RV32I-NEXT: slli a1, a1, 3
; RV32I-NEXT: or a5, a7, a5
; RV32I-NEXT: or a3, t2, a3
; RV32I-NEXT: or a7, t4, t3
; RV32I-NEXT: or a4, t5, a4
; RV32I-NEXT: or s0, s1, s0
; RV32I-NEXT: or t2, s7, t6
; RV32I-NEXT: or t3, ra, s8
; RV32I-NEXT: or t4, s10, s9
; RV32I-NEXT: or t5, a0, s11
; RV32I-NEXT: srai a0, a0, 31
; RV32I-NEXT: andi a1, a1, 24
; RV32I-NEXT: or a6, t0, a6
; RV32I-NEXT: or t0, s3, t1
; RV32I-NEXT: or t1, s5, s4
; RV32I-NEXT: or a5, a5, s2
; RV32I-NEXT: or a3, a7, a3
; RV32I-NEXT: or a4, s0, a4
; RV32I-NEXT: or a7, t3, t2
; RV32I-NEXT: or t2, t5, t4
; RV32I-NEXT: sw a0, 56(sp)
; RV32I-NEXT: sw a0, 60(sp)
; RV32I-NEXT: sw a0, 64(sp)
; RV32I-NEXT: sw a0, 68(sp)
; RV32I-NEXT: sw a0, 40(sp)
; RV32I-NEXT: sw a0, 44(sp)
; RV32I-NEXT: sw a0, 48(sp)
; RV32I-NEXT: sw a0, 52(sp)
; RV32I-NEXT: add s6, s6, a1
; RV32I-NEXT: sw a3, 24(sp)
; RV32I-NEXT: sw a4, 28(sp)
; RV32I-NEXT: sw a7, 32(sp)
; RV32I-NEXT: sw t2, 36(sp)
; RV32I-NEXT: sw a6, 8(sp)
; RV32I-NEXT: sw t0, 12(sp)
; RV32I-NEXT: sw t1, 16(sp)
; RV32I-NEXT: sw a5, 20(sp)
; RV32I-NEXT: lw a1, 0(s6)
; RV32I-NEXT: lw a0, 4(s6)
; RV32I-NEXT: lw a4, 8(s6)
; RV32I-NEXT: lw a3, 12(s6)
; RV32I-NEXT: lw a7, 24(s6)
; RV32I-NEXT: lw a5, 20(s6)
; RV32I-NEXT: lw a6, 16(s6)
; RV32I-NEXT: lw t0, 28(s6)
; RV32I-NEXT: srli t1, a7, 24
; RV32I-NEXT: srli t2, a7, 16
; RV32I-NEXT: srli t3, a7, 8
; RV32I-NEXT: srli t4, t0, 24
; RV32I-NEXT: srli t5, t0, 16
; RV32I-NEXT: srli t6, t0, 8
; RV32I-NEXT: srli s0, a6, 24
; RV32I-NEXT: srli s1, a6, 16
; RV32I-NEXT: srli s2, a6, 8
; RV32I-NEXT: srli s3, a5, 24
; RV32I-NEXT: srli s4, a5, 16
; RV32I-NEXT: srli s5, a5, 8
; RV32I-NEXT: srli s6, a4, 24
; RV32I-NEXT: srli s7, a4, 16
; RV32I-NEXT: srli s8, a4, 8
; RV32I-NEXT: srli s9, a3, 24
; RV32I-NEXT: srli s10, a3, 16
; RV32I-NEXT: srli s11, a3, 8
; RV32I-NEXT: srli ra, a1, 24
; RV32I-NEXT: sb a7, 24(a2)
; RV32I-NEXT: sb t3, 25(a2)
; RV32I-NEXT: sb t2, 26(a2)
; RV32I-NEXT: sb t1, 27(a2)
; RV32I-NEXT: srli a7, a1, 16
; RV32I-NEXT: sb t0, 28(a2)
; RV32I-NEXT: sb t6, 29(a2)
; RV32I-NEXT: sb t5, 30(a2)
; RV32I-NEXT: sb t4, 31(a2)
; RV32I-NEXT: srli t0, a1, 8
; RV32I-NEXT: sb a6, 16(a2)
; RV32I-NEXT: sb s2, 17(a2)
; RV32I-NEXT: sb s1, 18(a2)
; RV32I-NEXT: sb s0, 19(a2)
; RV32I-NEXT: srli a6, a0, 24
; RV32I-NEXT: sb a5, 20(a2)
; RV32I-NEXT: sb s5, 21(a2)
; RV32I-NEXT: sb s4, 22(a2)
; RV32I-NEXT: sb s3, 23(a2)
; RV32I-NEXT: srli a5, a0, 16
; RV32I-NEXT: sb a4, 8(a2)
; RV32I-NEXT: sb s8, 9(a2)
; RV32I-NEXT: sb s7, 10(a2)
; RV32I-NEXT: sb s6, 11(a2)
; RV32I-NEXT: srli a4, a0, 8
; RV32I-NEXT: sb a3, 12(a2)
; RV32I-NEXT: sb s11, 13(a2)
; RV32I-NEXT: sb s10, 14(a2)
; RV32I-NEXT: sb s9, 15(a2)
; RV32I-NEXT: sb a1, 0(a2)
; RV32I-NEXT: sb t0, 1(a2)
; RV32I-NEXT: sb a7, 2(a2)
; RV32I-NEXT: sb ra, 3(a2)
; RV32I-NEXT: sb a0, 4(a2)
; RV32I-NEXT: sb a4, 5(a2)
; RV32I-NEXT: sb a5, 6(a2)
; RV32I-NEXT: sb a6, 7(a2)
; RV32I-NEXT: lw ra, 124(sp) # 4-byte Folded Reload
; RV32I-NEXT: lw s0, 120(sp) # 4-byte Folded Reload
; RV32I-NEXT: lw s1, 116(sp) # 4-byte Folded Reload
; RV32I-NEXT: lw s2, 112(sp) # 4-byte Folded Reload
; RV32I-NEXT: lw s3, 108(sp) # 4-byte Folded Reload
; RV32I-NEXT: lw s4, 104(sp) # 4-byte Folded Reload
; RV32I-NEXT: lw s5, 100(sp) # 4-byte Folded Reload
; RV32I-NEXT: lw s6, 96(sp) # 4-byte Folded Reload
; RV32I-NEXT: lw s7, 92(sp) # 4-byte Folded Reload
; RV32I-NEXT: lw s8, 88(sp) # 4-byte Folded Reload
; RV32I-NEXT: lw s9, 84(sp) # 4-byte Folded Reload
; RV32I-NEXT: lw s10, 80(sp) # 4-byte Folded Reload
; RV32I-NEXT: lw s11, 76(sp) # 4-byte Folded Reload
; RV32I-NEXT: addi sp, sp, 128
; RV32I-NEXT: ret
%src = load i256, ptr %src.ptr, align 1
%dwordOff = load i256, ptr %dwordOff.ptr, align 1
%bitOff = shl i256 %dwordOff, 6
%res = ashr i256 %src, %bitOff
store i256 %res, ptr %dst, align 1
ret void
}