
The existing way of managing clustered nodes was done through adding weak edges between the neighbouring cluster nodes, which is a sort of ordered queue. And this will be later recorded as `NextClusterPred` or `NextClusterSucc` in `ScheduleDAGMI`. But actually the instruction may be picked not in the exact order of the queue. For example, we have a queue of cluster nodes A B C. But during scheduling, node B might be picked first, then it will be very likely that we only cluster B and C for Top-Down scheduling (leaving A alone). Another issue is: ``` if (!ReorderWhileClustering && SUa->NodeNum > SUb->NodeNum) std::swap(SUa, SUb); if (!DAG->addEdge(SUb, SDep(SUa, SDep::Cluster))) ``` may break the cluster queue. For example, we want to cluster nodes (order as in `MemOpRecords`): 1 3 2. 1(SUa) will be pred of 3(SUb) normally. But when it comes to (3, 2), As 3(SUa) > 2(SUb), we would reorder the two nodes, which makes 2 be pred of 3. This makes both 1 and 2 become preds of 3, but there is no edge between 1 and 2. Thus we get a broken cluster chain. To fix both issues, we introduce an unordered set in the change. This could help improve clustering in some hard case. One key reason the change causes so many test check changes is: As the cluster candidates are not ordered now, the candidates might be picked in different order from before. The most affected targets are: AMDGPU, AArch64, RISCV. For RISCV, it seems to me most are just minor instruction reorder, don't see obvious regression. For AArch64, there were some combining of ldr into ldp being affected. With two cases being regressed and two being improved. This has more deeper reason that machine scheduler cannot cluster them well both before and after the change, and the load combine algorithm later is also not smart enough. For AMDGPU, some cases have more v_dual instructions used while some are regressed. It seems less critical. Seems like test `v_vselect_v32bf16` gets more buffer_load being claused.
1959 lines
56 KiB
LLVM
1959 lines
56 KiB
LLVM
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
|
|
; RUN: llc < %s -mtriple=riscv32 | FileCheck %s --check-prefixes=NOZBB,RV32I
|
|
; RUN: llc < %s -mtriple=riscv64 | FileCheck %s --check-prefixes=NOZBB,RV64I
|
|
; RUN: llc < %s -mtriple=riscv32 -mattr=+zbb | FileCheck %s --check-prefixes=ZBB,RV32ZBB
|
|
; RUN: llc < %s -mtriple=riscv64 -mattr=+zbb | FileCheck %s --check-prefixes=ZBB,RV64ZBB
|
|
|
|
;
|
|
; trunc(nabs(sub(zext(a),zext(b)))) -> nabds(a,b)
|
|
;
|
|
|
|
define i8 @abd_ext_i8(i8 %a, i8 %b) nounwind {
|
|
; RV32I-LABEL: abd_ext_i8:
|
|
; RV32I: # %bb.0:
|
|
; RV32I-NEXT: zext.b a1, a1
|
|
; RV32I-NEXT: zext.b a0, a0
|
|
; RV32I-NEXT: sub a0, a0, a1
|
|
; RV32I-NEXT: srai a1, a0, 31
|
|
; RV32I-NEXT: xor a0, a0, a1
|
|
; RV32I-NEXT: sub a0, a1, a0
|
|
; RV32I-NEXT: ret
|
|
;
|
|
; RV64I-LABEL: abd_ext_i8:
|
|
; RV64I: # %bb.0:
|
|
; RV64I-NEXT: zext.b a0, a0
|
|
; RV64I-NEXT: zext.b a1, a1
|
|
; RV64I-NEXT: sub a0, a0, a1
|
|
; RV64I-NEXT: srai a1, a0, 63
|
|
; RV64I-NEXT: xor a0, a0, a1
|
|
; RV64I-NEXT: sub a0, a1, a0
|
|
; RV64I-NEXT: ret
|
|
;
|
|
; RV32ZBB-LABEL: abd_ext_i8:
|
|
; RV32ZBB: # %bb.0:
|
|
; RV32ZBB-NEXT: zext.b a1, a1
|
|
; RV32ZBB-NEXT: zext.b a0, a0
|
|
; RV32ZBB-NEXT: maxu a2, a0, a1
|
|
; RV32ZBB-NEXT: minu a0, a0, a1
|
|
; RV32ZBB-NEXT: sub a0, a0, a2
|
|
; RV32ZBB-NEXT: ret
|
|
;
|
|
; RV64ZBB-LABEL: abd_ext_i8:
|
|
; RV64ZBB: # %bb.0:
|
|
; RV64ZBB-NEXT: zext.b a0, a0
|
|
; RV64ZBB-NEXT: zext.b a1, a1
|
|
; RV64ZBB-NEXT: sub a0, a0, a1
|
|
; RV64ZBB-NEXT: neg a1, a0
|
|
; RV64ZBB-NEXT: min a0, a0, a1
|
|
; RV64ZBB-NEXT: ret
|
|
%aext = zext i8 %a to i64
|
|
%bext = zext i8 %b to i64
|
|
%sub = sub i64 %aext, %bext
|
|
%abs = call i64 @llvm.abs.i64(i64 %sub, i1 false)
|
|
%nabs = sub i64 0, %abs
|
|
%trunc = trunc i64 %nabs to i8
|
|
ret i8 %trunc
|
|
}
|
|
|
|
define i8 @abd_ext_i8_i16(i8 %a, i16 %b) nounwind {
|
|
; RV32I-LABEL: abd_ext_i8_i16:
|
|
; RV32I: # %bb.0:
|
|
; RV32I-NEXT: slli a1, a1, 16
|
|
; RV32I-NEXT: srli a1, a1, 16
|
|
; RV32I-NEXT: zext.b a0, a0
|
|
; RV32I-NEXT: sub a0, a0, a1
|
|
; RV32I-NEXT: srai a1, a0, 31
|
|
; RV32I-NEXT: xor a0, a0, a1
|
|
; RV32I-NEXT: sub a0, a1, a0
|
|
; RV32I-NEXT: ret
|
|
;
|
|
; RV64I-LABEL: abd_ext_i8_i16:
|
|
; RV64I: # %bb.0:
|
|
; RV64I-NEXT: zext.b a0, a0
|
|
; RV64I-NEXT: slli a1, a1, 48
|
|
; RV64I-NEXT: srli a1, a1, 48
|
|
; RV64I-NEXT: sub a0, a0, a1
|
|
; RV64I-NEXT: srai a1, a0, 63
|
|
; RV64I-NEXT: xor a0, a0, a1
|
|
; RV64I-NEXT: sub a0, a1, a0
|
|
; RV64I-NEXT: ret
|
|
;
|
|
; RV32ZBB-LABEL: abd_ext_i8_i16:
|
|
; RV32ZBB: # %bb.0:
|
|
; RV32ZBB-NEXT: zext.h a1, a1
|
|
; RV32ZBB-NEXT: zext.b a0, a0
|
|
; RV32ZBB-NEXT: maxu a2, a0, a1
|
|
; RV32ZBB-NEXT: minu a0, a0, a1
|
|
; RV32ZBB-NEXT: sub a0, a0, a2
|
|
; RV32ZBB-NEXT: ret
|
|
;
|
|
; RV64ZBB-LABEL: abd_ext_i8_i16:
|
|
; RV64ZBB: # %bb.0:
|
|
; RV64ZBB-NEXT: zext.b a0, a0
|
|
; RV64ZBB-NEXT: zext.h a1, a1
|
|
; RV64ZBB-NEXT: sub a0, a0, a1
|
|
; RV64ZBB-NEXT: neg a1, a0
|
|
; RV64ZBB-NEXT: min a0, a0, a1
|
|
; RV64ZBB-NEXT: ret
|
|
%aext = zext i8 %a to i64
|
|
%bext = zext i16 %b to i64
|
|
%sub = sub i64 %aext, %bext
|
|
%abs = call i64 @llvm.abs.i64(i64 %sub, i1 false)
|
|
%nabs = sub i64 0, %abs
|
|
%trunc = trunc i64 %nabs to i8
|
|
ret i8 %trunc
|
|
}
|
|
|
|
define i8 @abd_ext_i8_undef(i8 %a, i8 %b) nounwind {
|
|
; RV32I-LABEL: abd_ext_i8_undef:
|
|
; RV32I: # %bb.0:
|
|
; RV32I-NEXT: zext.b a1, a1
|
|
; RV32I-NEXT: zext.b a0, a0
|
|
; RV32I-NEXT: sub a0, a0, a1
|
|
; RV32I-NEXT: srai a1, a0, 31
|
|
; RV32I-NEXT: xor a0, a0, a1
|
|
; RV32I-NEXT: sub a0, a1, a0
|
|
; RV32I-NEXT: ret
|
|
;
|
|
; RV64I-LABEL: abd_ext_i8_undef:
|
|
; RV64I: # %bb.0:
|
|
; RV64I-NEXT: zext.b a0, a0
|
|
; RV64I-NEXT: zext.b a1, a1
|
|
; RV64I-NEXT: sub a0, a0, a1
|
|
; RV64I-NEXT: srai a1, a0, 63
|
|
; RV64I-NEXT: xor a0, a0, a1
|
|
; RV64I-NEXT: sub a0, a1, a0
|
|
; RV64I-NEXT: ret
|
|
;
|
|
; RV32ZBB-LABEL: abd_ext_i8_undef:
|
|
; RV32ZBB: # %bb.0:
|
|
; RV32ZBB-NEXT: zext.b a1, a1
|
|
; RV32ZBB-NEXT: zext.b a0, a0
|
|
; RV32ZBB-NEXT: maxu a2, a0, a1
|
|
; RV32ZBB-NEXT: minu a0, a0, a1
|
|
; RV32ZBB-NEXT: sub a0, a0, a2
|
|
; RV32ZBB-NEXT: ret
|
|
;
|
|
; RV64ZBB-LABEL: abd_ext_i8_undef:
|
|
; RV64ZBB: # %bb.0:
|
|
; RV64ZBB-NEXT: zext.b a0, a0
|
|
; RV64ZBB-NEXT: zext.b a1, a1
|
|
; RV64ZBB-NEXT: sub a0, a0, a1
|
|
; RV64ZBB-NEXT: neg a1, a0
|
|
; RV64ZBB-NEXT: min a0, a0, a1
|
|
; RV64ZBB-NEXT: ret
|
|
%aext = zext i8 %a to i64
|
|
%bext = zext i8 %b to i64
|
|
%sub = sub i64 %aext, %bext
|
|
%abs = call i64 @llvm.abs.i64(i64 %sub, i1 true)
|
|
%nabs = sub i64 0, %abs
|
|
%trunc = trunc i64 %nabs to i8
|
|
ret i8 %trunc
|
|
}
|
|
|
|
define i16 @abd_ext_i16(i16 %a, i16 %b) nounwind {
|
|
; RV32I-LABEL: abd_ext_i16:
|
|
; RV32I: # %bb.0:
|
|
; RV32I-NEXT: lui a2, 16
|
|
; RV32I-NEXT: addi a2, a2, -1
|
|
; RV32I-NEXT: and a1, a1, a2
|
|
; RV32I-NEXT: and a0, a0, a2
|
|
; RV32I-NEXT: sub a0, a0, a1
|
|
; RV32I-NEXT: srai a1, a0, 31
|
|
; RV32I-NEXT: xor a0, a0, a1
|
|
; RV32I-NEXT: sub a0, a1, a0
|
|
; RV32I-NEXT: ret
|
|
;
|
|
; RV64I-LABEL: abd_ext_i16:
|
|
; RV64I: # %bb.0:
|
|
; RV64I-NEXT: lui a2, 16
|
|
; RV64I-NEXT: addi a2, a2, -1
|
|
; RV64I-NEXT: and a0, a0, a2
|
|
; RV64I-NEXT: and a1, a1, a2
|
|
; RV64I-NEXT: sub a0, a0, a1
|
|
; RV64I-NEXT: srai a1, a0, 63
|
|
; RV64I-NEXT: xor a0, a0, a1
|
|
; RV64I-NEXT: sub a0, a1, a0
|
|
; RV64I-NEXT: ret
|
|
;
|
|
; RV32ZBB-LABEL: abd_ext_i16:
|
|
; RV32ZBB: # %bb.0:
|
|
; RV32ZBB-NEXT: zext.h a1, a1
|
|
; RV32ZBB-NEXT: zext.h a0, a0
|
|
; RV32ZBB-NEXT: maxu a2, a0, a1
|
|
; RV32ZBB-NEXT: minu a0, a0, a1
|
|
; RV32ZBB-NEXT: sub a0, a0, a2
|
|
; RV32ZBB-NEXT: ret
|
|
;
|
|
; RV64ZBB-LABEL: abd_ext_i16:
|
|
; RV64ZBB: # %bb.0:
|
|
; RV64ZBB-NEXT: zext.h a0, a0
|
|
; RV64ZBB-NEXT: zext.h a1, a1
|
|
; RV64ZBB-NEXT: sub a0, a0, a1
|
|
; RV64ZBB-NEXT: neg a1, a0
|
|
; RV64ZBB-NEXT: min a0, a0, a1
|
|
; RV64ZBB-NEXT: ret
|
|
%aext = zext i16 %a to i64
|
|
%bext = zext i16 %b to i64
|
|
%sub = sub i64 %aext, %bext
|
|
%abs = call i64 @llvm.abs.i64(i64 %sub, i1 false)
|
|
%nabs = sub i64 0, %abs
|
|
%trunc = trunc i64 %nabs to i16
|
|
ret i16 %trunc
|
|
}
|
|
|
|
define i16 @abd_ext_i16_i32(i16 %a, i32 %b) nounwind {
|
|
; RV32I-LABEL: abd_ext_i16_i32:
|
|
; RV32I: # %bb.0:
|
|
; RV32I-NEXT: slli a0, a0, 16
|
|
; RV32I-NEXT: srli a0, a0, 16
|
|
; RV32I-NEXT: bltu a1, a0, .LBB4_2
|
|
; RV32I-NEXT: # %bb.1:
|
|
; RV32I-NEXT: sub a0, a1, a0
|
|
; RV32I-NEXT: neg a0, a0
|
|
; RV32I-NEXT: ret
|
|
; RV32I-NEXT: .LBB4_2:
|
|
; RV32I-NEXT: sub a0, a0, a1
|
|
; RV32I-NEXT: neg a0, a0
|
|
; RV32I-NEXT: ret
|
|
;
|
|
; RV64I-LABEL: abd_ext_i16_i32:
|
|
; RV64I: # %bb.0:
|
|
; RV64I-NEXT: slli a0, a0, 48
|
|
; RV64I-NEXT: slli a1, a1, 32
|
|
; RV64I-NEXT: srli a0, a0, 48
|
|
; RV64I-NEXT: srli a1, a1, 32
|
|
; RV64I-NEXT: sub a0, a0, a1
|
|
; RV64I-NEXT: srai a1, a0, 63
|
|
; RV64I-NEXT: xor a0, a0, a1
|
|
; RV64I-NEXT: sub a0, a1, a0
|
|
; RV64I-NEXT: ret
|
|
;
|
|
; RV32ZBB-LABEL: abd_ext_i16_i32:
|
|
; RV32ZBB: # %bb.0:
|
|
; RV32ZBB-NEXT: zext.h a0, a0
|
|
; RV32ZBB-NEXT: maxu a2, a0, a1
|
|
; RV32ZBB-NEXT: minu a0, a0, a1
|
|
; RV32ZBB-NEXT: sub a0, a0, a2
|
|
; RV32ZBB-NEXT: ret
|
|
;
|
|
; RV64ZBB-LABEL: abd_ext_i16_i32:
|
|
; RV64ZBB: # %bb.0:
|
|
; RV64ZBB-NEXT: zext.h a0, a0
|
|
; RV64ZBB-NEXT: slli a1, a1, 32
|
|
; RV64ZBB-NEXT: srli a1, a1, 32
|
|
; RV64ZBB-NEXT: sub a0, a0, a1
|
|
; RV64ZBB-NEXT: neg a1, a0
|
|
; RV64ZBB-NEXT: min a0, a0, a1
|
|
; RV64ZBB-NEXT: ret
|
|
%aext = zext i16 %a to i64
|
|
%bext = zext i32 %b to i64
|
|
%sub = sub i64 %aext, %bext
|
|
%abs = call i64 @llvm.abs.i64(i64 %sub, i1 false)
|
|
%nabs = sub i64 0, %abs
|
|
%trunc = trunc i64 %nabs to i16
|
|
ret i16 %trunc
|
|
}
|
|
|
|
define i16 @abd_ext_i16_undef(i16 %a, i16 %b) nounwind {
|
|
; RV32I-LABEL: abd_ext_i16_undef:
|
|
; RV32I: # %bb.0:
|
|
; RV32I-NEXT: lui a2, 16
|
|
; RV32I-NEXT: addi a2, a2, -1
|
|
; RV32I-NEXT: and a1, a1, a2
|
|
; RV32I-NEXT: and a0, a0, a2
|
|
; RV32I-NEXT: sub a0, a0, a1
|
|
; RV32I-NEXT: srai a1, a0, 31
|
|
; RV32I-NEXT: xor a0, a0, a1
|
|
; RV32I-NEXT: sub a0, a1, a0
|
|
; RV32I-NEXT: ret
|
|
;
|
|
; RV64I-LABEL: abd_ext_i16_undef:
|
|
; RV64I: # %bb.0:
|
|
; RV64I-NEXT: lui a2, 16
|
|
; RV64I-NEXT: addi a2, a2, -1
|
|
; RV64I-NEXT: and a0, a0, a2
|
|
; RV64I-NEXT: and a1, a1, a2
|
|
; RV64I-NEXT: sub a0, a0, a1
|
|
; RV64I-NEXT: srai a1, a0, 63
|
|
; RV64I-NEXT: xor a0, a0, a1
|
|
; RV64I-NEXT: sub a0, a1, a0
|
|
; RV64I-NEXT: ret
|
|
;
|
|
; RV32ZBB-LABEL: abd_ext_i16_undef:
|
|
; RV32ZBB: # %bb.0:
|
|
; RV32ZBB-NEXT: zext.h a1, a1
|
|
; RV32ZBB-NEXT: zext.h a0, a0
|
|
; RV32ZBB-NEXT: maxu a2, a0, a1
|
|
; RV32ZBB-NEXT: minu a0, a0, a1
|
|
; RV32ZBB-NEXT: sub a0, a0, a2
|
|
; RV32ZBB-NEXT: ret
|
|
;
|
|
; RV64ZBB-LABEL: abd_ext_i16_undef:
|
|
; RV64ZBB: # %bb.0:
|
|
; RV64ZBB-NEXT: zext.h a0, a0
|
|
; RV64ZBB-NEXT: zext.h a1, a1
|
|
; RV64ZBB-NEXT: sub a0, a0, a1
|
|
; RV64ZBB-NEXT: neg a1, a0
|
|
; RV64ZBB-NEXT: min a0, a0, a1
|
|
; RV64ZBB-NEXT: ret
|
|
%aext = zext i16 %a to i64
|
|
%bext = zext i16 %b to i64
|
|
%sub = sub i64 %aext, %bext
|
|
%abs = call i64 @llvm.abs.i64(i64 %sub, i1 true)
|
|
%nabs = sub i64 0, %abs
|
|
%trunc = trunc i64 %nabs to i16
|
|
ret i16 %trunc
|
|
}
|
|
|
|
define i32 @abd_ext_i32(i32 %a, i32 %b) nounwind {
|
|
; RV32I-LABEL: abd_ext_i32:
|
|
; RV32I: # %bb.0:
|
|
; RV32I-NEXT: bltu a1, a0, .LBB6_2
|
|
; RV32I-NEXT: # %bb.1:
|
|
; RV32I-NEXT: sub a0, a1, a0
|
|
; RV32I-NEXT: neg a0, a0
|
|
; RV32I-NEXT: ret
|
|
; RV32I-NEXT: .LBB6_2:
|
|
; RV32I-NEXT: sub a0, a0, a1
|
|
; RV32I-NEXT: neg a0, a0
|
|
; RV32I-NEXT: ret
|
|
;
|
|
; RV64I-LABEL: abd_ext_i32:
|
|
; RV64I: # %bb.0:
|
|
; RV64I-NEXT: slli a0, a0, 32
|
|
; RV64I-NEXT: slli a1, a1, 32
|
|
; RV64I-NEXT: srli a0, a0, 32
|
|
; RV64I-NEXT: srli a1, a1, 32
|
|
; RV64I-NEXT: sub a0, a0, a1
|
|
; RV64I-NEXT: srai a1, a0, 63
|
|
; RV64I-NEXT: xor a0, a0, a1
|
|
; RV64I-NEXT: sub a0, a1, a0
|
|
; RV64I-NEXT: ret
|
|
;
|
|
; RV32ZBB-LABEL: abd_ext_i32:
|
|
; RV32ZBB: # %bb.0:
|
|
; RV32ZBB-NEXT: maxu a2, a0, a1
|
|
; RV32ZBB-NEXT: minu a0, a0, a1
|
|
; RV32ZBB-NEXT: sub a0, a0, a2
|
|
; RV32ZBB-NEXT: ret
|
|
;
|
|
; RV64ZBB-LABEL: abd_ext_i32:
|
|
; RV64ZBB: # %bb.0:
|
|
; RV64ZBB-NEXT: slli a0, a0, 32
|
|
; RV64ZBB-NEXT: slli a1, a1, 32
|
|
; RV64ZBB-NEXT: srli a0, a0, 32
|
|
; RV64ZBB-NEXT: srli a1, a1, 32
|
|
; RV64ZBB-NEXT: sub a0, a0, a1
|
|
; RV64ZBB-NEXT: neg a1, a0
|
|
; RV64ZBB-NEXT: min a0, a0, a1
|
|
; RV64ZBB-NEXT: ret
|
|
%aext = zext i32 %a to i64
|
|
%bext = zext i32 %b to i64
|
|
%sub = sub i64 %aext, %bext
|
|
%abs = call i64 @llvm.abs.i64(i64 %sub, i1 false)
|
|
%nabs = sub i64 0, %abs
|
|
%trunc = trunc i64 %nabs to i32
|
|
ret i32 %trunc
|
|
}
|
|
|
|
define i32 @abd_ext_i32_i16(i32 %a, i16 %b) nounwind {
|
|
; RV32I-LABEL: abd_ext_i32_i16:
|
|
; RV32I: # %bb.0:
|
|
; RV32I-NEXT: slli a1, a1, 16
|
|
; RV32I-NEXT: srli a1, a1, 16
|
|
; RV32I-NEXT: bltu a1, a0, .LBB7_2
|
|
; RV32I-NEXT: # %bb.1:
|
|
; RV32I-NEXT: sub a0, a1, a0
|
|
; RV32I-NEXT: neg a0, a0
|
|
; RV32I-NEXT: ret
|
|
; RV32I-NEXT: .LBB7_2:
|
|
; RV32I-NEXT: sub a0, a0, a1
|
|
; RV32I-NEXT: neg a0, a0
|
|
; RV32I-NEXT: ret
|
|
;
|
|
; RV64I-LABEL: abd_ext_i32_i16:
|
|
; RV64I: # %bb.0:
|
|
; RV64I-NEXT: slli a0, a0, 32
|
|
; RV64I-NEXT: slli a1, a1, 48
|
|
; RV64I-NEXT: srli a0, a0, 32
|
|
; RV64I-NEXT: srli a1, a1, 48
|
|
; RV64I-NEXT: sub a0, a0, a1
|
|
; RV64I-NEXT: srai a1, a0, 63
|
|
; RV64I-NEXT: xor a0, a0, a1
|
|
; RV64I-NEXT: sub a0, a1, a0
|
|
; RV64I-NEXT: ret
|
|
;
|
|
; RV32ZBB-LABEL: abd_ext_i32_i16:
|
|
; RV32ZBB: # %bb.0:
|
|
; RV32ZBB-NEXT: zext.h a1, a1
|
|
; RV32ZBB-NEXT: maxu a2, a0, a1
|
|
; RV32ZBB-NEXT: minu a0, a0, a1
|
|
; RV32ZBB-NEXT: sub a0, a0, a2
|
|
; RV32ZBB-NEXT: ret
|
|
;
|
|
; RV64ZBB-LABEL: abd_ext_i32_i16:
|
|
; RV64ZBB: # %bb.0:
|
|
; RV64ZBB-NEXT: slli a0, a0, 32
|
|
; RV64ZBB-NEXT: srli a0, a0, 32
|
|
; RV64ZBB-NEXT: zext.h a1, a1
|
|
; RV64ZBB-NEXT: sub a0, a0, a1
|
|
; RV64ZBB-NEXT: neg a1, a0
|
|
; RV64ZBB-NEXT: min a0, a0, a1
|
|
; RV64ZBB-NEXT: ret
|
|
%aext = zext i32 %a to i64
|
|
%bext = zext i16 %b to i64
|
|
%sub = sub i64 %aext, %bext
|
|
%abs = call i64 @llvm.abs.i64(i64 %sub, i1 false)
|
|
%nabs = sub i64 0, %abs
|
|
%trunc = trunc i64 %nabs to i32
|
|
ret i32 %trunc
|
|
}
|
|
|
|
define i32 @abd_ext_i32_undef(i32 %a, i32 %b) nounwind {
|
|
; RV32I-LABEL: abd_ext_i32_undef:
|
|
; RV32I: # %bb.0:
|
|
; RV32I-NEXT: bltu a1, a0, .LBB8_2
|
|
; RV32I-NEXT: # %bb.1:
|
|
; RV32I-NEXT: sub a0, a1, a0
|
|
; RV32I-NEXT: neg a0, a0
|
|
; RV32I-NEXT: ret
|
|
; RV32I-NEXT: .LBB8_2:
|
|
; RV32I-NEXT: sub a0, a0, a1
|
|
; RV32I-NEXT: neg a0, a0
|
|
; RV32I-NEXT: ret
|
|
;
|
|
; RV64I-LABEL: abd_ext_i32_undef:
|
|
; RV64I: # %bb.0:
|
|
; RV64I-NEXT: slli a0, a0, 32
|
|
; RV64I-NEXT: slli a1, a1, 32
|
|
; RV64I-NEXT: srli a0, a0, 32
|
|
; RV64I-NEXT: srli a1, a1, 32
|
|
; RV64I-NEXT: sub a0, a0, a1
|
|
; RV64I-NEXT: srai a1, a0, 63
|
|
; RV64I-NEXT: xor a0, a0, a1
|
|
; RV64I-NEXT: sub a0, a1, a0
|
|
; RV64I-NEXT: ret
|
|
;
|
|
; RV32ZBB-LABEL: abd_ext_i32_undef:
|
|
; RV32ZBB: # %bb.0:
|
|
; RV32ZBB-NEXT: maxu a2, a0, a1
|
|
; RV32ZBB-NEXT: minu a0, a0, a1
|
|
; RV32ZBB-NEXT: sub a0, a0, a2
|
|
; RV32ZBB-NEXT: ret
|
|
;
|
|
; RV64ZBB-LABEL: abd_ext_i32_undef:
|
|
; RV64ZBB: # %bb.0:
|
|
; RV64ZBB-NEXT: slli a0, a0, 32
|
|
; RV64ZBB-NEXT: slli a1, a1, 32
|
|
; RV64ZBB-NEXT: srli a0, a0, 32
|
|
; RV64ZBB-NEXT: srli a1, a1, 32
|
|
; RV64ZBB-NEXT: sub a0, a0, a1
|
|
; RV64ZBB-NEXT: neg a1, a0
|
|
; RV64ZBB-NEXT: min a0, a0, a1
|
|
; RV64ZBB-NEXT: ret
|
|
%aext = zext i32 %a to i64
|
|
%bext = zext i32 %b to i64
|
|
%sub = sub i64 %aext, %bext
|
|
%abs = call i64 @llvm.abs.i64(i64 %sub, i1 true)
|
|
%nabs = sub i64 0, %abs
|
|
%trunc = trunc i64 %nabs to i32
|
|
ret i32 %trunc
|
|
}
|
|
|
|
define i64 @abd_ext_i64(i64 %a, i64 %b) nounwind {
|
|
; RV32I-LABEL: abd_ext_i64:
|
|
; RV32I: # %bb.0:
|
|
; RV32I-NEXT: sltu a4, a0, a2
|
|
; RV32I-NEXT: sub a3, a1, a3
|
|
; RV32I-NEXT: sub a3, a3, a4
|
|
; RV32I-NEXT: sub a2, a0, a2
|
|
; RV32I-NEXT: beq a3, a1, .LBB9_2
|
|
; RV32I-NEXT: # %bb.1:
|
|
; RV32I-NEXT: sltu a0, a1, a3
|
|
; RV32I-NEXT: j .LBB9_3
|
|
; RV32I-NEXT: .LBB9_2:
|
|
; RV32I-NEXT: sltu a0, a0, a2
|
|
; RV32I-NEXT: .LBB9_3:
|
|
; RV32I-NEXT: neg a1, a0
|
|
; RV32I-NEXT: xor a2, a2, a1
|
|
; RV32I-NEXT: xor a3, a3, a1
|
|
; RV32I-NEXT: sltu a1, a2, a1
|
|
; RV32I-NEXT: add a3, a3, a0
|
|
; RV32I-NEXT: add a0, a2, a0
|
|
; RV32I-NEXT: sub a3, a3, a1
|
|
; RV32I-NEXT: snez a1, a0
|
|
; RV32I-NEXT: add a1, a3, a1
|
|
; RV32I-NEXT: neg a1, a1
|
|
; RV32I-NEXT: neg a0, a0
|
|
; RV32I-NEXT: ret
|
|
;
|
|
; RV64I-LABEL: abd_ext_i64:
|
|
; RV64I: # %bb.0:
|
|
; RV64I-NEXT: bltu a1, a0, .LBB9_2
|
|
; RV64I-NEXT: # %bb.1:
|
|
; RV64I-NEXT: sub a0, a1, a0
|
|
; RV64I-NEXT: neg a0, a0
|
|
; RV64I-NEXT: ret
|
|
; RV64I-NEXT: .LBB9_2:
|
|
; RV64I-NEXT: sub a0, a0, a1
|
|
; RV64I-NEXT: neg a0, a0
|
|
; RV64I-NEXT: ret
|
|
;
|
|
; RV32ZBB-LABEL: abd_ext_i64:
|
|
; RV32ZBB: # %bb.0:
|
|
; RV32ZBB-NEXT: sltu a4, a0, a2
|
|
; RV32ZBB-NEXT: sub a3, a1, a3
|
|
; RV32ZBB-NEXT: sub a3, a3, a4
|
|
; RV32ZBB-NEXT: sub a2, a0, a2
|
|
; RV32ZBB-NEXT: beq a3, a1, .LBB9_2
|
|
; RV32ZBB-NEXT: # %bb.1:
|
|
; RV32ZBB-NEXT: sltu a0, a1, a3
|
|
; RV32ZBB-NEXT: j .LBB9_3
|
|
; RV32ZBB-NEXT: .LBB9_2:
|
|
; RV32ZBB-NEXT: sltu a0, a0, a2
|
|
; RV32ZBB-NEXT: .LBB9_3:
|
|
; RV32ZBB-NEXT: neg a1, a0
|
|
; RV32ZBB-NEXT: xor a2, a2, a1
|
|
; RV32ZBB-NEXT: xor a3, a3, a1
|
|
; RV32ZBB-NEXT: sltu a1, a2, a1
|
|
; RV32ZBB-NEXT: add a3, a3, a0
|
|
; RV32ZBB-NEXT: add a0, a2, a0
|
|
; RV32ZBB-NEXT: sub a3, a3, a1
|
|
; RV32ZBB-NEXT: snez a1, a0
|
|
; RV32ZBB-NEXT: add a1, a3, a1
|
|
; RV32ZBB-NEXT: neg a1, a1
|
|
; RV32ZBB-NEXT: neg a0, a0
|
|
; RV32ZBB-NEXT: ret
|
|
;
|
|
; RV64ZBB-LABEL: abd_ext_i64:
|
|
; RV64ZBB: # %bb.0:
|
|
; RV64ZBB-NEXT: maxu a2, a0, a1
|
|
; RV64ZBB-NEXT: minu a0, a0, a1
|
|
; RV64ZBB-NEXT: sub a0, a0, a2
|
|
; RV64ZBB-NEXT: ret
|
|
%aext = zext i64 %a to i128
|
|
%bext = zext i64 %b to i128
|
|
%sub = sub i128 %aext, %bext
|
|
%abs = call i128 @llvm.abs.i128(i128 %sub, i1 false)
|
|
%nabs = sub i128 0, %abs
|
|
%trunc = trunc i128 %nabs to i64
|
|
ret i64 %trunc
|
|
}
|
|
|
|
define i64 @abd_ext_i64_undef(i64 %a, i64 %b) nounwind {
|
|
; RV32I-LABEL: abd_ext_i64_undef:
|
|
; RV32I: # %bb.0:
|
|
; RV32I-NEXT: sltu a4, a0, a2
|
|
; RV32I-NEXT: sub a3, a1, a3
|
|
; RV32I-NEXT: sub a3, a3, a4
|
|
; RV32I-NEXT: sub a2, a0, a2
|
|
; RV32I-NEXT: beq a3, a1, .LBB10_2
|
|
; RV32I-NEXT: # %bb.1:
|
|
; RV32I-NEXT: sltu a0, a1, a3
|
|
; RV32I-NEXT: j .LBB10_3
|
|
; RV32I-NEXT: .LBB10_2:
|
|
; RV32I-NEXT: sltu a0, a0, a2
|
|
; RV32I-NEXT: .LBB10_3:
|
|
; RV32I-NEXT: neg a1, a0
|
|
; RV32I-NEXT: xor a2, a2, a1
|
|
; RV32I-NEXT: xor a3, a3, a1
|
|
; RV32I-NEXT: sltu a1, a2, a1
|
|
; RV32I-NEXT: add a3, a3, a0
|
|
; RV32I-NEXT: add a0, a2, a0
|
|
; RV32I-NEXT: sub a3, a3, a1
|
|
; RV32I-NEXT: snez a1, a0
|
|
; RV32I-NEXT: add a1, a3, a1
|
|
; RV32I-NEXT: neg a1, a1
|
|
; RV32I-NEXT: neg a0, a0
|
|
; RV32I-NEXT: ret
|
|
;
|
|
; RV64I-LABEL: abd_ext_i64_undef:
|
|
; RV64I: # %bb.0:
|
|
; RV64I-NEXT: bltu a1, a0, .LBB10_2
|
|
; RV64I-NEXT: # %bb.1:
|
|
; RV64I-NEXT: sub a0, a1, a0
|
|
; RV64I-NEXT: neg a0, a0
|
|
; RV64I-NEXT: ret
|
|
; RV64I-NEXT: .LBB10_2:
|
|
; RV64I-NEXT: sub a0, a0, a1
|
|
; RV64I-NEXT: neg a0, a0
|
|
; RV64I-NEXT: ret
|
|
;
|
|
; RV32ZBB-LABEL: abd_ext_i64_undef:
|
|
; RV32ZBB: # %bb.0:
|
|
; RV32ZBB-NEXT: sltu a4, a0, a2
|
|
; RV32ZBB-NEXT: sub a3, a1, a3
|
|
; RV32ZBB-NEXT: sub a3, a3, a4
|
|
; RV32ZBB-NEXT: sub a2, a0, a2
|
|
; RV32ZBB-NEXT: beq a3, a1, .LBB10_2
|
|
; RV32ZBB-NEXT: # %bb.1:
|
|
; RV32ZBB-NEXT: sltu a0, a1, a3
|
|
; RV32ZBB-NEXT: j .LBB10_3
|
|
; RV32ZBB-NEXT: .LBB10_2:
|
|
; RV32ZBB-NEXT: sltu a0, a0, a2
|
|
; RV32ZBB-NEXT: .LBB10_3:
|
|
; RV32ZBB-NEXT: neg a1, a0
|
|
; RV32ZBB-NEXT: xor a2, a2, a1
|
|
; RV32ZBB-NEXT: xor a3, a3, a1
|
|
; RV32ZBB-NEXT: sltu a1, a2, a1
|
|
; RV32ZBB-NEXT: add a3, a3, a0
|
|
; RV32ZBB-NEXT: add a0, a2, a0
|
|
; RV32ZBB-NEXT: sub a3, a3, a1
|
|
; RV32ZBB-NEXT: snez a1, a0
|
|
; RV32ZBB-NEXT: add a1, a3, a1
|
|
; RV32ZBB-NEXT: neg a1, a1
|
|
; RV32ZBB-NEXT: neg a0, a0
|
|
; RV32ZBB-NEXT: ret
|
|
;
|
|
; RV64ZBB-LABEL: abd_ext_i64_undef:
|
|
; RV64ZBB: # %bb.0:
|
|
; RV64ZBB-NEXT: maxu a2, a0, a1
|
|
; RV64ZBB-NEXT: minu a0, a0, a1
|
|
; RV64ZBB-NEXT: sub a0, a0, a2
|
|
; RV64ZBB-NEXT: ret
|
|
%aext = zext i64 %a to i128
|
|
%bext = zext i64 %b to i128
|
|
%sub = sub i128 %aext, %bext
|
|
%abs = call i128 @llvm.abs.i128(i128 %sub, i1 true)
|
|
%nabs = sub i128 0, %abs
|
|
%trunc = trunc i128 %nabs to i64
|
|
ret i64 %trunc
|
|
}
|
|
|
|
define i128 @abd_ext_i128(i128 %a, i128 %b) nounwind {
|
|
; RV32I-LABEL: abd_ext_i128:
|
|
; RV32I: # %bb.0:
|
|
; RV32I-NEXT: lw a5, 0(a2)
|
|
; RV32I-NEXT: lw a7, 4(a2)
|
|
; RV32I-NEXT: lw a3, 8(a2)
|
|
; RV32I-NEXT: lw t1, 12(a2)
|
|
; RV32I-NEXT: lw a4, 8(a1)
|
|
; RV32I-NEXT: lw a6, 12(a1)
|
|
; RV32I-NEXT: lw a2, 0(a1)
|
|
; RV32I-NEXT: lw t0, 4(a1)
|
|
; RV32I-NEXT: sltu a1, a4, a3
|
|
; RV32I-NEXT: sub t1, a6, t1
|
|
; RV32I-NEXT: sltu t2, a2, a5
|
|
; RV32I-NEXT: sub a1, t1, a1
|
|
; RV32I-NEXT: mv t1, t2
|
|
; RV32I-NEXT: beq t0, a7, .LBB11_2
|
|
; RV32I-NEXT: # %bb.1:
|
|
; RV32I-NEXT: sltu t1, t0, a7
|
|
; RV32I-NEXT: .LBB11_2:
|
|
; RV32I-NEXT: sub a3, a4, a3
|
|
; RV32I-NEXT: sltu t3, a3, t1
|
|
; RV32I-NEXT: sub a1, a1, t3
|
|
; RV32I-NEXT: sub a3, a3, t1
|
|
; RV32I-NEXT: beq a1, a6, .LBB11_4
|
|
; RV32I-NEXT: # %bb.3:
|
|
; RV32I-NEXT: sltu t1, a6, a1
|
|
; RV32I-NEXT: j .LBB11_5
|
|
; RV32I-NEXT: .LBB11_4:
|
|
; RV32I-NEXT: sltu t1, a4, a3
|
|
; RV32I-NEXT: .LBB11_5:
|
|
; RV32I-NEXT: sub a7, t0, a7
|
|
; RV32I-NEXT: sub a7, a7, t2
|
|
; RV32I-NEXT: sub a5, a2, a5
|
|
; RV32I-NEXT: beq a7, t0, .LBB11_7
|
|
; RV32I-NEXT: # %bb.6:
|
|
; RV32I-NEXT: sltu a2, t0, a7
|
|
; RV32I-NEXT: j .LBB11_8
|
|
; RV32I-NEXT: .LBB11_7:
|
|
; RV32I-NEXT: sltu a2, a2, a5
|
|
; RV32I-NEXT: .LBB11_8:
|
|
; RV32I-NEXT: xor a6, a1, a6
|
|
; RV32I-NEXT: xor a4, a3, a4
|
|
; RV32I-NEXT: or a4, a4, a6
|
|
; RV32I-NEXT: beqz a4, .LBB11_10
|
|
; RV32I-NEXT: # %bb.9:
|
|
; RV32I-NEXT: mv a2, t1
|
|
; RV32I-NEXT: .LBB11_10:
|
|
; RV32I-NEXT: neg a4, a2
|
|
; RV32I-NEXT: xor t0, a5, a4
|
|
; RV32I-NEXT: xor t3, a7, a4
|
|
; RV32I-NEXT: sltu a5, t0, a4
|
|
; RV32I-NEXT: add a6, t3, a2
|
|
; RV32I-NEXT: add t0, t0, a2
|
|
; RV32I-NEXT: sub t1, a6, a5
|
|
; RV32I-NEXT: snez a6, t1
|
|
; RV32I-NEXT: snez t2, t0
|
|
; RV32I-NEXT: or a6, t2, a6
|
|
; RV32I-NEXT: beqz a7, .LBB11_12
|
|
; RV32I-NEXT: # %bb.11:
|
|
; RV32I-NEXT: sltu a5, t3, a4
|
|
; RV32I-NEXT: .LBB11_12:
|
|
; RV32I-NEXT: xor a3, a3, a4
|
|
; RV32I-NEXT: xor a1, a1, a4
|
|
; RV32I-NEXT: add t1, t1, t2
|
|
; RV32I-NEXT: neg a7, t0
|
|
; RV32I-NEXT: add t0, a3, a2
|
|
; RV32I-NEXT: sltu a3, a3, a4
|
|
; RV32I-NEXT: add a1, a1, a2
|
|
; RV32I-NEXT: neg a2, t1
|
|
; RV32I-NEXT: sub a4, t0, a5
|
|
; RV32I-NEXT: sub a1, a1, a3
|
|
; RV32I-NEXT: sltu a3, t0, a5
|
|
; RV32I-NEXT: neg a5, a4
|
|
; RV32I-NEXT: sub a1, a1, a3
|
|
; RV32I-NEXT: snez a3, a4
|
|
; RV32I-NEXT: sltu a4, a5, a6
|
|
; RV32I-NEXT: add a1, a1, a3
|
|
; RV32I-NEXT: sub a3, a5, a6
|
|
; RV32I-NEXT: neg a1, a1
|
|
; RV32I-NEXT: sub a1, a1, a4
|
|
; RV32I-NEXT: sw a7, 0(a0)
|
|
; RV32I-NEXT: sw a2, 4(a0)
|
|
; RV32I-NEXT: sw a3, 8(a0)
|
|
; RV32I-NEXT: sw a1, 12(a0)
|
|
; RV32I-NEXT: ret
|
|
;
|
|
; RV64I-LABEL: abd_ext_i128:
|
|
; RV64I: # %bb.0:
|
|
; RV64I-NEXT: sltu a4, a0, a2
|
|
; RV64I-NEXT: sub a3, a1, a3
|
|
; RV64I-NEXT: sub a3, a3, a4
|
|
; RV64I-NEXT: sub a2, a0, a2
|
|
; RV64I-NEXT: beq a3, a1, .LBB11_2
|
|
; RV64I-NEXT: # %bb.1:
|
|
; RV64I-NEXT: sltu a0, a1, a3
|
|
; RV64I-NEXT: j .LBB11_3
|
|
; RV64I-NEXT: .LBB11_2:
|
|
; RV64I-NEXT: sltu a0, a0, a2
|
|
; RV64I-NEXT: .LBB11_3:
|
|
; RV64I-NEXT: neg a1, a0
|
|
; RV64I-NEXT: xor a2, a2, a1
|
|
; RV64I-NEXT: xor a3, a3, a1
|
|
; RV64I-NEXT: sltu a1, a2, a1
|
|
; RV64I-NEXT: add a3, a3, a0
|
|
; RV64I-NEXT: add a0, a2, a0
|
|
; RV64I-NEXT: sub a3, a3, a1
|
|
; RV64I-NEXT: snez a1, a0
|
|
; RV64I-NEXT: add a1, a3, a1
|
|
; RV64I-NEXT: neg a1, a1
|
|
; RV64I-NEXT: neg a0, a0
|
|
; RV64I-NEXT: ret
|
|
;
|
|
; RV32ZBB-LABEL: abd_ext_i128:
|
|
; RV32ZBB: # %bb.0:
|
|
; RV32ZBB-NEXT: lw a5, 0(a2)
|
|
; RV32ZBB-NEXT: lw a7, 4(a2)
|
|
; RV32ZBB-NEXT: lw a3, 8(a2)
|
|
; RV32ZBB-NEXT: lw t1, 12(a2)
|
|
; RV32ZBB-NEXT: lw a4, 8(a1)
|
|
; RV32ZBB-NEXT: lw a6, 12(a1)
|
|
; RV32ZBB-NEXT: lw a2, 0(a1)
|
|
; RV32ZBB-NEXT: lw t0, 4(a1)
|
|
; RV32ZBB-NEXT: sltu a1, a4, a3
|
|
; RV32ZBB-NEXT: sub t1, a6, t1
|
|
; RV32ZBB-NEXT: sltu t2, a2, a5
|
|
; RV32ZBB-NEXT: sub a1, t1, a1
|
|
; RV32ZBB-NEXT: mv t1, t2
|
|
; RV32ZBB-NEXT: beq t0, a7, .LBB11_2
|
|
; RV32ZBB-NEXT: # %bb.1:
|
|
; RV32ZBB-NEXT: sltu t1, t0, a7
|
|
; RV32ZBB-NEXT: .LBB11_2:
|
|
; RV32ZBB-NEXT: sub a3, a4, a3
|
|
; RV32ZBB-NEXT: sltu t3, a3, t1
|
|
; RV32ZBB-NEXT: sub a1, a1, t3
|
|
; RV32ZBB-NEXT: sub a3, a3, t1
|
|
; RV32ZBB-NEXT: beq a1, a6, .LBB11_4
|
|
; RV32ZBB-NEXT: # %bb.3:
|
|
; RV32ZBB-NEXT: sltu t1, a6, a1
|
|
; RV32ZBB-NEXT: j .LBB11_5
|
|
; RV32ZBB-NEXT: .LBB11_4:
|
|
; RV32ZBB-NEXT: sltu t1, a4, a3
|
|
; RV32ZBB-NEXT: .LBB11_5:
|
|
; RV32ZBB-NEXT: sub a7, t0, a7
|
|
; RV32ZBB-NEXT: sub a7, a7, t2
|
|
; RV32ZBB-NEXT: sub a5, a2, a5
|
|
; RV32ZBB-NEXT: beq a7, t0, .LBB11_7
|
|
; RV32ZBB-NEXT: # %bb.6:
|
|
; RV32ZBB-NEXT: sltu a2, t0, a7
|
|
; RV32ZBB-NEXT: j .LBB11_8
|
|
; RV32ZBB-NEXT: .LBB11_7:
|
|
; RV32ZBB-NEXT: sltu a2, a2, a5
|
|
; RV32ZBB-NEXT: .LBB11_8:
|
|
; RV32ZBB-NEXT: xor a6, a1, a6
|
|
; RV32ZBB-NEXT: xor a4, a3, a4
|
|
; RV32ZBB-NEXT: or a4, a4, a6
|
|
; RV32ZBB-NEXT: beqz a4, .LBB11_10
|
|
; RV32ZBB-NEXT: # %bb.9:
|
|
; RV32ZBB-NEXT: mv a2, t1
|
|
; RV32ZBB-NEXT: .LBB11_10:
|
|
; RV32ZBB-NEXT: neg a4, a2
|
|
; RV32ZBB-NEXT: xor t0, a5, a4
|
|
; RV32ZBB-NEXT: xor t3, a7, a4
|
|
; RV32ZBB-NEXT: sltu a5, t0, a4
|
|
; RV32ZBB-NEXT: add a6, t3, a2
|
|
; RV32ZBB-NEXT: add t0, t0, a2
|
|
; RV32ZBB-NEXT: sub t1, a6, a5
|
|
; RV32ZBB-NEXT: snez a6, t1
|
|
; RV32ZBB-NEXT: snez t2, t0
|
|
; RV32ZBB-NEXT: or a6, t2, a6
|
|
; RV32ZBB-NEXT: beqz a7, .LBB11_12
|
|
; RV32ZBB-NEXT: # %bb.11:
|
|
; RV32ZBB-NEXT: sltu a5, t3, a4
|
|
; RV32ZBB-NEXT: .LBB11_12:
|
|
; RV32ZBB-NEXT: xor a3, a3, a4
|
|
; RV32ZBB-NEXT: xor a1, a1, a4
|
|
; RV32ZBB-NEXT: add t1, t1, t2
|
|
; RV32ZBB-NEXT: neg a7, t0
|
|
; RV32ZBB-NEXT: add t0, a3, a2
|
|
; RV32ZBB-NEXT: sltu a3, a3, a4
|
|
; RV32ZBB-NEXT: add a1, a1, a2
|
|
; RV32ZBB-NEXT: neg a2, t1
|
|
; RV32ZBB-NEXT: sub a4, t0, a5
|
|
; RV32ZBB-NEXT: sub a1, a1, a3
|
|
; RV32ZBB-NEXT: sltu a3, t0, a5
|
|
; RV32ZBB-NEXT: neg a5, a4
|
|
; RV32ZBB-NEXT: sub a1, a1, a3
|
|
; RV32ZBB-NEXT: snez a3, a4
|
|
; RV32ZBB-NEXT: sltu a4, a5, a6
|
|
; RV32ZBB-NEXT: add a1, a1, a3
|
|
; RV32ZBB-NEXT: sub a3, a5, a6
|
|
; RV32ZBB-NEXT: neg a1, a1
|
|
; RV32ZBB-NEXT: sub a1, a1, a4
|
|
; RV32ZBB-NEXT: sw a7, 0(a0)
|
|
; RV32ZBB-NEXT: sw a2, 4(a0)
|
|
; RV32ZBB-NEXT: sw a3, 8(a0)
|
|
; RV32ZBB-NEXT: sw a1, 12(a0)
|
|
; RV32ZBB-NEXT: ret
|
|
;
|
|
; RV64ZBB-LABEL: abd_ext_i128:
|
|
; RV64ZBB: # %bb.0:
|
|
; RV64ZBB-NEXT: sltu a4, a0, a2
|
|
; RV64ZBB-NEXT: sub a3, a1, a3
|
|
; RV64ZBB-NEXT: sub a3, a3, a4
|
|
; RV64ZBB-NEXT: sub a2, a0, a2
|
|
; RV64ZBB-NEXT: beq a3, a1, .LBB11_2
|
|
; RV64ZBB-NEXT: # %bb.1:
|
|
; RV64ZBB-NEXT: sltu a0, a1, a3
|
|
; RV64ZBB-NEXT: j .LBB11_3
|
|
; RV64ZBB-NEXT: .LBB11_2:
|
|
; RV64ZBB-NEXT: sltu a0, a0, a2
|
|
; RV64ZBB-NEXT: .LBB11_3:
|
|
; RV64ZBB-NEXT: neg a1, a0
|
|
; RV64ZBB-NEXT: xor a2, a2, a1
|
|
; RV64ZBB-NEXT: xor a3, a3, a1
|
|
; RV64ZBB-NEXT: sltu a1, a2, a1
|
|
; RV64ZBB-NEXT: add a3, a3, a0
|
|
; RV64ZBB-NEXT: add a0, a2, a0
|
|
; RV64ZBB-NEXT: sub a3, a3, a1
|
|
; RV64ZBB-NEXT: snez a1, a0
|
|
; RV64ZBB-NEXT: add a1, a3, a1
|
|
; RV64ZBB-NEXT: neg a1, a1
|
|
; RV64ZBB-NEXT: neg a0, a0
|
|
; RV64ZBB-NEXT: ret
|
|
%aext = zext i128 %a to i256
|
|
%bext = zext i128 %b to i256
|
|
%sub = sub i256 %aext, %bext
|
|
%abs = call i256 @llvm.abs.i256(i256 %sub, i1 false)
|
|
%nabs = sub i256 0, %abs
|
|
%trunc = trunc i256 %nabs to i128
|
|
ret i128 %trunc
|
|
}
|
|
|
|
define i128 @abd_ext_i128_undef(i128 %a, i128 %b) nounwind {
|
|
; RV32I-LABEL: abd_ext_i128_undef:
|
|
; RV32I: # %bb.0:
|
|
; RV32I-NEXT: lw a5, 0(a2)
|
|
; RV32I-NEXT: lw a7, 4(a2)
|
|
; RV32I-NEXT: lw a3, 8(a2)
|
|
; RV32I-NEXT: lw t1, 12(a2)
|
|
; RV32I-NEXT: lw a4, 8(a1)
|
|
; RV32I-NEXT: lw a6, 12(a1)
|
|
; RV32I-NEXT: lw a2, 0(a1)
|
|
; RV32I-NEXT: lw t0, 4(a1)
|
|
; RV32I-NEXT: sltu a1, a4, a3
|
|
; RV32I-NEXT: sub t1, a6, t1
|
|
; RV32I-NEXT: sltu t2, a2, a5
|
|
; RV32I-NEXT: sub a1, t1, a1
|
|
; RV32I-NEXT: mv t1, t2
|
|
; RV32I-NEXT: beq t0, a7, .LBB12_2
|
|
; RV32I-NEXT: # %bb.1:
|
|
; RV32I-NEXT: sltu t1, t0, a7
|
|
; RV32I-NEXT: .LBB12_2:
|
|
; RV32I-NEXT: sub a3, a4, a3
|
|
; RV32I-NEXT: sltu t3, a3, t1
|
|
; RV32I-NEXT: sub a1, a1, t3
|
|
; RV32I-NEXT: sub a3, a3, t1
|
|
; RV32I-NEXT: beq a1, a6, .LBB12_4
|
|
; RV32I-NEXT: # %bb.3:
|
|
; RV32I-NEXT: sltu t1, a6, a1
|
|
; RV32I-NEXT: j .LBB12_5
|
|
; RV32I-NEXT: .LBB12_4:
|
|
; RV32I-NEXT: sltu t1, a4, a3
|
|
; RV32I-NEXT: .LBB12_5:
|
|
; RV32I-NEXT: sub a7, t0, a7
|
|
; RV32I-NEXT: sub a7, a7, t2
|
|
; RV32I-NEXT: sub a5, a2, a5
|
|
; RV32I-NEXT: beq a7, t0, .LBB12_7
|
|
; RV32I-NEXT: # %bb.6:
|
|
; RV32I-NEXT: sltu a2, t0, a7
|
|
; RV32I-NEXT: j .LBB12_8
|
|
; RV32I-NEXT: .LBB12_7:
|
|
; RV32I-NEXT: sltu a2, a2, a5
|
|
; RV32I-NEXT: .LBB12_8:
|
|
; RV32I-NEXT: xor a6, a1, a6
|
|
; RV32I-NEXT: xor a4, a3, a4
|
|
; RV32I-NEXT: or a4, a4, a6
|
|
; RV32I-NEXT: beqz a4, .LBB12_10
|
|
; RV32I-NEXT: # %bb.9:
|
|
; RV32I-NEXT: mv a2, t1
|
|
; RV32I-NEXT: .LBB12_10:
|
|
; RV32I-NEXT: neg a4, a2
|
|
; RV32I-NEXT: xor t0, a5, a4
|
|
; RV32I-NEXT: xor t3, a7, a4
|
|
; RV32I-NEXT: sltu a5, t0, a4
|
|
; RV32I-NEXT: add a6, t3, a2
|
|
; RV32I-NEXT: add t0, t0, a2
|
|
; RV32I-NEXT: sub t1, a6, a5
|
|
; RV32I-NEXT: snez a6, t1
|
|
; RV32I-NEXT: snez t2, t0
|
|
; RV32I-NEXT: or a6, t2, a6
|
|
; RV32I-NEXT: beqz a7, .LBB12_12
|
|
; RV32I-NEXT: # %bb.11:
|
|
; RV32I-NEXT: sltu a5, t3, a4
|
|
; RV32I-NEXT: .LBB12_12:
|
|
; RV32I-NEXT: xor a3, a3, a4
|
|
; RV32I-NEXT: xor a1, a1, a4
|
|
; RV32I-NEXT: add t1, t1, t2
|
|
; RV32I-NEXT: neg a7, t0
|
|
; RV32I-NEXT: add t0, a3, a2
|
|
; RV32I-NEXT: sltu a3, a3, a4
|
|
; RV32I-NEXT: add a1, a1, a2
|
|
; RV32I-NEXT: neg a2, t1
|
|
; RV32I-NEXT: sub a4, t0, a5
|
|
; RV32I-NEXT: sub a1, a1, a3
|
|
; RV32I-NEXT: sltu a3, t0, a5
|
|
; RV32I-NEXT: neg a5, a4
|
|
; RV32I-NEXT: sub a1, a1, a3
|
|
; RV32I-NEXT: snez a3, a4
|
|
; RV32I-NEXT: sltu a4, a5, a6
|
|
; RV32I-NEXT: add a1, a1, a3
|
|
; RV32I-NEXT: sub a3, a5, a6
|
|
; RV32I-NEXT: neg a1, a1
|
|
; RV32I-NEXT: sub a1, a1, a4
|
|
; RV32I-NEXT: sw a7, 0(a0)
|
|
; RV32I-NEXT: sw a2, 4(a0)
|
|
; RV32I-NEXT: sw a3, 8(a0)
|
|
; RV32I-NEXT: sw a1, 12(a0)
|
|
; RV32I-NEXT: ret
|
|
;
|
|
; RV64I-LABEL: abd_ext_i128_undef:
|
|
; RV64I: # %bb.0:
|
|
; RV64I-NEXT: sltu a4, a0, a2
|
|
; RV64I-NEXT: sub a3, a1, a3
|
|
; RV64I-NEXT: sub a3, a3, a4
|
|
; RV64I-NEXT: sub a2, a0, a2
|
|
; RV64I-NEXT: beq a3, a1, .LBB12_2
|
|
; RV64I-NEXT: # %bb.1:
|
|
; RV64I-NEXT: sltu a0, a1, a3
|
|
; RV64I-NEXT: j .LBB12_3
|
|
; RV64I-NEXT: .LBB12_2:
|
|
; RV64I-NEXT: sltu a0, a0, a2
|
|
; RV64I-NEXT: .LBB12_3:
|
|
; RV64I-NEXT: neg a1, a0
|
|
; RV64I-NEXT: xor a2, a2, a1
|
|
; RV64I-NEXT: xor a3, a3, a1
|
|
; RV64I-NEXT: sltu a1, a2, a1
|
|
; RV64I-NEXT: add a3, a3, a0
|
|
; RV64I-NEXT: add a0, a2, a0
|
|
; RV64I-NEXT: sub a3, a3, a1
|
|
; RV64I-NEXT: snez a1, a0
|
|
; RV64I-NEXT: add a1, a3, a1
|
|
; RV64I-NEXT: neg a1, a1
|
|
; RV64I-NEXT: neg a0, a0
|
|
; RV64I-NEXT: ret
|
|
;
|
|
; RV32ZBB-LABEL: abd_ext_i128_undef:
|
|
; RV32ZBB: # %bb.0:
|
|
; RV32ZBB-NEXT: lw a5, 0(a2)
|
|
; RV32ZBB-NEXT: lw a7, 4(a2)
|
|
; RV32ZBB-NEXT: lw a3, 8(a2)
|
|
; RV32ZBB-NEXT: lw t1, 12(a2)
|
|
; RV32ZBB-NEXT: lw a4, 8(a1)
|
|
; RV32ZBB-NEXT: lw a6, 12(a1)
|
|
; RV32ZBB-NEXT: lw a2, 0(a1)
|
|
; RV32ZBB-NEXT: lw t0, 4(a1)
|
|
; RV32ZBB-NEXT: sltu a1, a4, a3
|
|
; RV32ZBB-NEXT: sub t1, a6, t1
|
|
; RV32ZBB-NEXT: sltu t2, a2, a5
|
|
; RV32ZBB-NEXT: sub a1, t1, a1
|
|
; RV32ZBB-NEXT: mv t1, t2
|
|
; RV32ZBB-NEXT: beq t0, a7, .LBB12_2
|
|
; RV32ZBB-NEXT: # %bb.1:
|
|
; RV32ZBB-NEXT: sltu t1, t0, a7
|
|
; RV32ZBB-NEXT: .LBB12_2:
|
|
; RV32ZBB-NEXT: sub a3, a4, a3
|
|
; RV32ZBB-NEXT: sltu t3, a3, t1
|
|
; RV32ZBB-NEXT: sub a1, a1, t3
|
|
; RV32ZBB-NEXT: sub a3, a3, t1
|
|
; RV32ZBB-NEXT: beq a1, a6, .LBB12_4
|
|
; RV32ZBB-NEXT: # %bb.3:
|
|
; RV32ZBB-NEXT: sltu t1, a6, a1
|
|
; RV32ZBB-NEXT: j .LBB12_5
|
|
; RV32ZBB-NEXT: .LBB12_4:
|
|
; RV32ZBB-NEXT: sltu t1, a4, a3
|
|
; RV32ZBB-NEXT: .LBB12_5:
|
|
; RV32ZBB-NEXT: sub a7, t0, a7
|
|
; RV32ZBB-NEXT: sub a7, a7, t2
|
|
; RV32ZBB-NEXT: sub a5, a2, a5
|
|
; RV32ZBB-NEXT: beq a7, t0, .LBB12_7
|
|
; RV32ZBB-NEXT: # %bb.6:
|
|
; RV32ZBB-NEXT: sltu a2, t0, a7
|
|
; RV32ZBB-NEXT: j .LBB12_8
|
|
; RV32ZBB-NEXT: .LBB12_7:
|
|
; RV32ZBB-NEXT: sltu a2, a2, a5
|
|
; RV32ZBB-NEXT: .LBB12_8:
|
|
; RV32ZBB-NEXT: xor a6, a1, a6
|
|
; RV32ZBB-NEXT: xor a4, a3, a4
|
|
; RV32ZBB-NEXT: or a4, a4, a6
|
|
; RV32ZBB-NEXT: beqz a4, .LBB12_10
|
|
; RV32ZBB-NEXT: # %bb.9:
|
|
; RV32ZBB-NEXT: mv a2, t1
|
|
; RV32ZBB-NEXT: .LBB12_10:
|
|
; RV32ZBB-NEXT: neg a4, a2
|
|
; RV32ZBB-NEXT: xor t0, a5, a4
|
|
; RV32ZBB-NEXT: xor t3, a7, a4
|
|
; RV32ZBB-NEXT: sltu a5, t0, a4
|
|
; RV32ZBB-NEXT: add a6, t3, a2
|
|
; RV32ZBB-NEXT: add t0, t0, a2
|
|
; RV32ZBB-NEXT: sub t1, a6, a5
|
|
; RV32ZBB-NEXT: snez a6, t1
|
|
; RV32ZBB-NEXT: snez t2, t0
|
|
; RV32ZBB-NEXT: or a6, t2, a6
|
|
; RV32ZBB-NEXT: beqz a7, .LBB12_12
|
|
; RV32ZBB-NEXT: # %bb.11:
|
|
; RV32ZBB-NEXT: sltu a5, t3, a4
|
|
; RV32ZBB-NEXT: .LBB12_12:
|
|
; RV32ZBB-NEXT: xor a3, a3, a4
|
|
; RV32ZBB-NEXT: xor a1, a1, a4
|
|
; RV32ZBB-NEXT: add t1, t1, t2
|
|
; RV32ZBB-NEXT: neg a7, t0
|
|
; RV32ZBB-NEXT: add t0, a3, a2
|
|
; RV32ZBB-NEXT: sltu a3, a3, a4
|
|
; RV32ZBB-NEXT: add a1, a1, a2
|
|
; RV32ZBB-NEXT: neg a2, t1
|
|
; RV32ZBB-NEXT: sub a4, t0, a5
|
|
; RV32ZBB-NEXT: sub a1, a1, a3
|
|
; RV32ZBB-NEXT: sltu a3, t0, a5
|
|
; RV32ZBB-NEXT: neg a5, a4
|
|
; RV32ZBB-NEXT: sub a1, a1, a3
|
|
; RV32ZBB-NEXT: snez a3, a4
|
|
; RV32ZBB-NEXT: sltu a4, a5, a6
|
|
; RV32ZBB-NEXT: add a1, a1, a3
|
|
; RV32ZBB-NEXT: sub a3, a5, a6
|
|
; RV32ZBB-NEXT: neg a1, a1
|
|
; RV32ZBB-NEXT: sub a1, a1, a4
|
|
; RV32ZBB-NEXT: sw a7, 0(a0)
|
|
; RV32ZBB-NEXT: sw a2, 4(a0)
|
|
; RV32ZBB-NEXT: sw a3, 8(a0)
|
|
; RV32ZBB-NEXT: sw a1, 12(a0)
|
|
; RV32ZBB-NEXT: ret
|
|
;
|
|
; RV64ZBB-LABEL: abd_ext_i128_undef:
|
|
; RV64ZBB: # %bb.0:
|
|
; RV64ZBB-NEXT: sltu a4, a0, a2
|
|
; RV64ZBB-NEXT: sub a3, a1, a3
|
|
; RV64ZBB-NEXT: sub a3, a3, a4
|
|
; RV64ZBB-NEXT: sub a2, a0, a2
|
|
; RV64ZBB-NEXT: beq a3, a1, .LBB12_2
|
|
; RV64ZBB-NEXT: # %bb.1:
|
|
; RV64ZBB-NEXT: sltu a0, a1, a3
|
|
; RV64ZBB-NEXT: j .LBB12_3
|
|
; RV64ZBB-NEXT: .LBB12_2:
|
|
; RV64ZBB-NEXT: sltu a0, a0, a2
|
|
; RV64ZBB-NEXT: .LBB12_3:
|
|
; RV64ZBB-NEXT: neg a1, a0
|
|
; RV64ZBB-NEXT: xor a2, a2, a1
|
|
; RV64ZBB-NEXT: xor a3, a3, a1
|
|
; RV64ZBB-NEXT: sltu a1, a2, a1
|
|
; RV64ZBB-NEXT: add a3, a3, a0
|
|
; RV64ZBB-NEXT: add a0, a2, a0
|
|
; RV64ZBB-NEXT: sub a3, a3, a1
|
|
; RV64ZBB-NEXT: snez a1, a0
|
|
; RV64ZBB-NEXT: add a1, a3, a1
|
|
; RV64ZBB-NEXT: neg a1, a1
|
|
; RV64ZBB-NEXT: neg a0, a0
|
|
; RV64ZBB-NEXT: ret
|
|
%aext = zext i128 %a to i256
|
|
%bext = zext i128 %b to i256
|
|
%sub = sub i256 %aext, %bext
|
|
%abs = call i256 @llvm.abs.i256(i256 %sub, i1 true)
|
|
%nabs = sub i256 0, %abs
|
|
%trunc = trunc i256 %nabs to i128
|
|
ret i128 %trunc
|
|
}
|
|
|
|
;
|
|
; sub(umin(a,b),umax(a,b)) -> nabds(a,b)
|
|
;
|
|
|
|
define i8 @abd_minmax_i8(i8 %a, i8 %b) nounwind {
|
|
; NOZBB-LABEL: abd_minmax_i8:
|
|
; NOZBB: # %bb.0:
|
|
; NOZBB-NEXT: zext.b a1, a1
|
|
; NOZBB-NEXT: zext.b a0, a0
|
|
; NOZBB-NEXT: mv a2, a0
|
|
; NOZBB-NEXT: bgeu a0, a1, .LBB13_3
|
|
; NOZBB-NEXT: # %bb.1:
|
|
; NOZBB-NEXT: bgeu a1, a0, .LBB13_4
|
|
; NOZBB-NEXT: .LBB13_2:
|
|
; NOZBB-NEXT: sub a0, a2, a0
|
|
; NOZBB-NEXT: ret
|
|
; NOZBB-NEXT: .LBB13_3:
|
|
; NOZBB-NEXT: mv a2, a1
|
|
; NOZBB-NEXT: bltu a1, a0, .LBB13_2
|
|
; NOZBB-NEXT: .LBB13_4:
|
|
; NOZBB-NEXT: sub a0, a2, a1
|
|
; NOZBB-NEXT: ret
|
|
;
|
|
; ZBB-LABEL: abd_minmax_i8:
|
|
; ZBB: # %bb.0:
|
|
; ZBB-NEXT: zext.b a1, a1
|
|
; ZBB-NEXT: zext.b a0, a0
|
|
; ZBB-NEXT: minu a2, a0, a1
|
|
; ZBB-NEXT: maxu a0, a0, a1
|
|
; ZBB-NEXT: sub a0, a2, a0
|
|
; ZBB-NEXT: ret
|
|
%min = call i8 @llvm.umin.i8(i8 %a, i8 %b)
|
|
%max = call i8 @llvm.umax.i8(i8 %a, i8 %b)
|
|
%sub = sub i8 %min, %max
|
|
ret i8 %sub
|
|
}
|
|
|
|
define i16 @abd_minmax_i16(i16 %a, i16 %b) nounwind {
|
|
; NOZBB-LABEL: abd_minmax_i16:
|
|
; NOZBB: # %bb.0:
|
|
; NOZBB-NEXT: lui a2, 16
|
|
; NOZBB-NEXT: addi a2, a2, -1
|
|
; NOZBB-NEXT: and a1, a1, a2
|
|
; NOZBB-NEXT: and a0, a0, a2
|
|
; NOZBB-NEXT: mv a2, a0
|
|
; NOZBB-NEXT: bgeu a0, a1, .LBB14_3
|
|
; NOZBB-NEXT: # %bb.1:
|
|
; NOZBB-NEXT: bgeu a1, a0, .LBB14_4
|
|
; NOZBB-NEXT: .LBB14_2:
|
|
; NOZBB-NEXT: sub a0, a2, a0
|
|
; NOZBB-NEXT: ret
|
|
; NOZBB-NEXT: .LBB14_3:
|
|
; NOZBB-NEXT: mv a2, a1
|
|
; NOZBB-NEXT: bltu a1, a0, .LBB14_2
|
|
; NOZBB-NEXT: .LBB14_4:
|
|
; NOZBB-NEXT: sub a0, a2, a1
|
|
; NOZBB-NEXT: ret
|
|
;
|
|
; ZBB-LABEL: abd_minmax_i16:
|
|
; ZBB: # %bb.0:
|
|
; ZBB-NEXT: zext.h a1, a1
|
|
; ZBB-NEXT: zext.h a0, a0
|
|
; ZBB-NEXT: minu a2, a0, a1
|
|
; ZBB-NEXT: maxu a0, a0, a1
|
|
; ZBB-NEXT: sub a0, a2, a0
|
|
; ZBB-NEXT: ret
|
|
%min = call i16 @llvm.umin.i16(i16 %a, i16 %b)
|
|
%max = call i16 @llvm.umax.i16(i16 %a, i16 %b)
|
|
%sub = sub i16 %min, %max
|
|
ret i16 %sub
|
|
}
|
|
|
|
define i32 @abd_minmax_i32(i32 %a, i32 %b) nounwind {
|
|
; RV32I-LABEL: abd_minmax_i32:
|
|
; RV32I: # %bb.0:
|
|
; RV32I-NEXT: mv a2, a0
|
|
; RV32I-NEXT: bgeu a0, a1, .LBB15_3
|
|
; RV32I-NEXT: # %bb.1:
|
|
; RV32I-NEXT: bgeu a1, a0, .LBB15_4
|
|
; RV32I-NEXT: .LBB15_2:
|
|
; RV32I-NEXT: sub a0, a2, a0
|
|
; RV32I-NEXT: ret
|
|
; RV32I-NEXT: .LBB15_3:
|
|
; RV32I-NEXT: mv a2, a1
|
|
; RV32I-NEXT: bltu a1, a0, .LBB15_2
|
|
; RV32I-NEXT: .LBB15_4:
|
|
; RV32I-NEXT: sub a0, a2, a1
|
|
; RV32I-NEXT: ret
|
|
;
|
|
; RV64I-LABEL: abd_minmax_i32:
|
|
; RV64I: # %bb.0:
|
|
; RV64I-NEXT: sext.w a1, a1
|
|
; RV64I-NEXT: sext.w a0, a0
|
|
; RV64I-NEXT: mv a2, a0
|
|
; RV64I-NEXT: bgeu a0, a1, .LBB15_3
|
|
; RV64I-NEXT: # %bb.1:
|
|
; RV64I-NEXT: bgeu a1, a0, .LBB15_4
|
|
; RV64I-NEXT: .LBB15_2:
|
|
; RV64I-NEXT: subw a0, a2, a0
|
|
; RV64I-NEXT: ret
|
|
; RV64I-NEXT: .LBB15_3:
|
|
; RV64I-NEXT: mv a2, a1
|
|
; RV64I-NEXT: bltu a1, a0, .LBB15_2
|
|
; RV64I-NEXT: .LBB15_4:
|
|
; RV64I-NEXT: subw a0, a2, a1
|
|
; RV64I-NEXT: ret
|
|
;
|
|
; RV32ZBB-LABEL: abd_minmax_i32:
|
|
; RV32ZBB: # %bb.0:
|
|
; RV32ZBB-NEXT: minu a2, a0, a1
|
|
; RV32ZBB-NEXT: maxu a0, a0, a1
|
|
; RV32ZBB-NEXT: sub a0, a2, a0
|
|
; RV32ZBB-NEXT: ret
|
|
;
|
|
; RV64ZBB-LABEL: abd_minmax_i32:
|
|
; RV64ZBB: # %bb.0:
|
|
; RV64ZBB-NEXT: sext.w a1, a1
|
|
; RV64ZBB-NEXT: sext.w a0, a0
|
|
; RV64ZBB-NEXT: minu a2, a0, a1
|
|
; RV64ZBB-NEXT: maxu a0, a0, a1
|
|
; RV64ZBB-NEXT: subw a0, a2, a0
|
|
; RV64ZBB-NEXT: ret
|
|
%min = call i32 @llvm.umin.i32(i32 %a, i32 %b)
|
|
%max = call i32 @llvm.umax.i32(i32 %a, i32 %b)
|
|
%sub = sub i32 %min, %max
|
|
ret i32 %sub
|
|
}
|
|
|
|
define i64 @abd_minmax_i64(i64 %a, i64 %b) nounwind {
|
|
; RV32I-LABEL: abd_minmax_i64:
|
|
; RV32I: # %bb.0:
|
|
; RV32I-NEXT: beq a1, a3, .LBB16_2
|
|
; RV32I-NEXT: # %bb.1:
|
|
; RV32I-NEXT: sltu a6, a1, a3
|
|
; RV32I-NEXT: j .LBB16_3
|
|
; RV32I-NEXT: .LBB16_2:
|
|
; RV32I-NEXT: sltu a6, a0, a2
|
|
; RV32I-NEXT: .LBB16_3:
|
|
; RV32I-NEXT: mv a4, a1
|
|
; RV32I-NEXT: mv a5, a0
|
|
; RV32I-NEXT: bnez a6, .LBB16_5
|
|
; RV32I-NEXT: # %bb.4:
|
|
; RV32I-NEXT: mv a4, a3
|
|
; RV32I-NEXT: mv a5, a2
|
|
; RV32I-NEXT: .LBB16_5:
|
|
; RV32I-NEXT: beq a1, a3, .LBB16_7
|
|
; RV32I-NEXT: # %bb.6:
|
|
; RV32I-NEXT: sltu a6, a3, a1
|
|
; RV32I-NEXT: beqz a6, .LBB16_8
|
|
; RV32I-NEXT: j .LBB16_9
|
|
; RV32I-NEXT: .LBB16_7:
|
|
; RV32I-NEXT: sltu a6, a2, a0
|
|
; RV32I-NEXT: bnez a6, .LBB16_9
|
|
; RV32I-NEXT: .LBB16_8:
|
|
; RV32I-NEXT: mv a1, a3
|
|
; RV32I-NEXT: mv a0, a2
|
|
; RV32I-NEXT: .LBB16_9:
|
|
; RV32I-NEXT: sltu a2, a5, a0
|
|
; RV32I-NEXT: sub a1, a4, a1
|
|
; RV32I-NEXT: sub a1, a1, a2
|
|
; RV32I-NEXT: sub a0, a5, a0
|
|
; RV32I-NEXT: ret
|
|
;
|
|
; RV64I-LABEL: abd_minmax_i64:
|
|
; RV64I: # %bb.0:
|
|
; RV64I-NEXT: mv a2, a0
|
|
; RV64I-NEXT: bgeu a0, a1, .LBB16_3
|
|
; RV64I-NEXT: # %bb.1:
|
|
; RV64I-NEXT: bgeu a1, a0, .LBB16_4
|
|
; RV64I-NEXT: .LBB16_2:
|
|
; RV64I-NEXT: sub a0, a2, a0
|
|
; RV64I-NEXT: ret
|
|
; RV64I-NEXT: .LBB16_3:
|
|
; RV64I-NEXT: mv a2, a1
|
|
; RV64I-NEXT: bltu a1, a0, .LBB16_2
|
|
; RV64I-NEXT: .LBB16_4:
|
|
; RV64I-NEXT: sub a0, a2, a1
|
|
; RV64I-NEXT: ret
|
|
;
|
|
; RV32ZBB-LABEL: abd_minmax_i64:
|
|
; RV32ZBB: # %bb.0:
|
|
; RV32ZBB-NEXT: beq a1, a3, .LBB16_2
|
|
; RV32ZBB-NEXT: # %bb.1:
|
|
; RV32ZBB-NEXT: sltu a6, a1, a3
|
|
; RV32ZBB-NEXT: j .LBB16_3
|
|
; RV32ZBB-NEXT: .LBB16_2:
|
|
; RV32ZBB-NEXT: sltu a6, a0, a2
|
|
; RV32ZBB-NEXT: .LBB16_3:
|
|
; RV32ZBB-NEXT: mv a4, a1
|
|
; RV32ZBB-NEXT: mv a5, a0
|
|
; RV32ZBB-NEXT: bnez a6, .LBB16_5
|
|
; RV32ZBB-NEXT: # %bb.4:
|
|
; RV32ZBB-NEXT: mv a4, a3
|
|
; RV32ZBB-NEXT: mv a5, a2
|
|
; RV32ZBB-NEXT: .LBB16_5:
|
|
; RV32ZBB-NEXT: beq a1, a3, .LBB16_7
|
|
; RV32ZBB-NEXT: # %bb.6:
|
|
; RV32ZBB-NEXT: sltu a6, a3, a1
|
|
; RV32ZBB-NEXT: beqz a6, .LBB16_8
|
|
; RV32ZBB-NEXT: j .LBB16_9
|
|
; RV32ZBB-NEXT: .LBB16_7:
|
|
; RV32ZBB-NEXT: sltu a6, a2, a0
|
|
; RV32ZBB-NEXT: bnez a6, .LBB16_9
|
|
; RV32ZBB-NEXT: .LBB16_8:
|
|
; RV32ZBB-NEXT: mv a1, a3
|
|
; RV32ZBB-NEXT: mv a0, a2
|
|
; RV32ZBB-NEXT: .LBB16_9:
|
|
; RV32ZBB-NEXT: sltu a2, a5, a0
|
|
; RV32ZBB-NEXT: sub a1, a4, a1
|
|
; RV32ZBB-NEXT: sub a1, a1, a2
|
|
; RV32ZBB-NEXT: sub a0, a5, a0
|
|
; RV32ZBB-NEXT: ret
|
|
;
|
|
; RV64ZBB-LABEL: abd_minmax_i64:
|
|
; RV64ZBB: # %bb.0:
|
|
; RV64ZBB-NEXT: minu a2, a0, a1
|
|
; RV64ZBB-NEXT: maxu a0, a0, a1
|
|
; RV64ZBB-NEXT: sub a0, a2, a0
|
|
; RV64ZBB-NEXT: ret
|
|
%min = call i64 @llvm.umin.i64(i64 %a, i64 %b)
|
|
%max = call i64 @llvm.umax.i64(i64 %a, i64 %b)
|
|
%sub = sub i64 %min, %max
|
|
ret i64 %sub
|
|
}
|
|
|
|
define i128 @abd_minmax_i128(i128 %a, i128 %b) nounwind {
|
|
; RV32I-LABEL: abd_minmax_i128:
|
|
; RV32I: # %bb.0:
|
|
; RV32I-NEXT: lw a6, 4(a2)
|
|
; RV32I-NEXT: lw a7, 8(a2)
|
|
; RV32I-NEXT: lw t0, 12(a2)
|
|
; RV32I-NEXT: lw a3, 4(a1)
|
|
; RV32I-NEXT: lw a5, 12(a1)
|
|
; RV32I-NEXT: lw a4, 8(a1)
|
|
; RV32I-NEXT: beq a5, t0, .LBB17_2
|
|
; RV32I-NEXT: # %bb.1:
|
|
; RV32I-NEXT: sltu t1, a5, t0
|
|
; RV32I-NEXT: j .LBB17_3
|
|
; RV32I-NEXT: .LBB17_2:
|
|
; RV32I-NEXT: sltu t1, a4, a7
|
|
; RV32I-NEXT: .LBB17_3:
|
|
; RV32I-NEXT: lw t2, 0(a2)
|
|
; RV32I-NEXT: lw a1, 0(a1)
|
|
; RV32I-NEXT: beq a3, a6, .LBB17_5
|
|
; RV32I-NEXT: # %bb.4:
|
|
; RV32I-NEXT: sltu t6, a3, a6
|
|
; RV32I-NEXT: j .LBB17_6
|
|
; RV32I-NEXT: .LBB17_5:
|
|
; RV32I-NEXT: sltu t6, a1, t2
|
|
; RV32I-NEXT: .LBB17_6:
|
|
; RV32I-NEXT: xor a2, a5, t0
|
|
; RV32I-NEXT: xor t3, a4, a7
|
|
; RV32I-NEXT: or t5, t3, a2
|
|
; RV32I-NEXT: beqz t5, .LBB17_8
|
|
; RV32I-NEXT: # %bb.7:
|
|
; RV32I-NEXT: mv t6, t1
|
|
; RV32I-NEXT: .LBB17_8:
|
|
; RV32I-NEXT: mv a2, a1
|
|
; RV32I-NEXT: mv t1, a3
|
|
; RV32I-NEXT: mv t4, a5
|
|
; RV32I-NEXT: mv t3, a4
|
|
; RV32I-NEXT: bnez t6, .LBB17_10
|
|
; RV32I-NEXT: # %bb.9:
|
|
; RV32I-NEXT: mv a2, t2
|
|
; RV32I-NEXT: mv t1, a6
|
|
; RV32I-NEXT: mv t4, t0
|
|
; RV32I-NEXT: mv t3, a7
|
|
; RV32I-NEXT: .LBB17_10:
|
|
; RV32I-NEXT: beq a5, t0, .LBB17_12
|
|
; RV32I-NEXT: # %bb.11:
|
|
; RV32I-NEXT: sltu t6, t0, a5
|
|
; RV32I-NEXT: j .LBB17_13
|
|
; RV32I-NEXT: .LBB17_12:
|
|
; RV32I-NEXT: sltu t6, a7, a4
|
|
; RV32I-NEXT: .LBB17_13:
|
|
; RV32I-NEXT: addi sp, sp, -16
|
|
; RV32I-NEXT: sw s0, 12(sp) # 4-byte Folded Spill
|
|
; RV32I-NEXT: beq a3, a6, .LBB17_15
|
|
; RV32I-NEXT: # %bb.14:
|
|
; RV32I-NEXT: sltu s0, a6, a3
|
|
; RV32I-NEXT: bnez t5, .LBB17_16
|
|
; RV32I-NEXT: j .LBB17_17
|
|
; RV32I-NEXT: .LBB17_15:
|
|
; RV32I-NEXT: sltu s0, t2, a1
|
|
; RV32I-NEXT: beqz t5, .LBB17_17
|
|
; RV32I-NEXT: .LBB17_16:
|
|
; RV32I-NEXT: mv s0, t6
|
|
; RV32I-NEXT: .LBB17_17:
|
|
; RV32I-NEXT: bnez s0, .LBB17_19
|
|
; RV32I-NEXT: # %bb.18:
|
|
; RV32I-NEXT: mv a1, t2
|
|
; RV32I-NEXT: mv a3, a6
|
|
; RV32I-NEXT: mv a5, t0
|
|
; RV32I-NEXT: mv a4, a7
|
|
; RV32I-NEXT: .LBB17_19:
|
|
; RV32I-NEXT: sltu a7, t3, a4
|
|
; RV32I-NEXT: sub a5, t4, a5
|
|
; RV32I-NEXT: sltu a6, a2, a1
|
|
; RV32I-NEXT: sub a5, a5, a7
|
|
; RV32I-NEXT: mv a7, a6
|
|
; RV32I-NEXT: beq t1, a3, .LBB17_21
|
|
; RV32I-NEXT: # %bb.20:
|
|
; RV32I-NEXT: sltu a7, t1, a3
|
|
; RV32I-NEXT: .LBB17_21:
|
|
; RV32I-NEXT: sub a4, t3, a4
|
|
; RV32I-NEXT: sub a3, t1, a3
|
|
; RV32I-NEXT: sub a2, a2, a1
|
|
; RV32I-NEXT: sltu a1, a4, a7
|
|
; RV32I-NEXT: sub a4, a4, a7
|
|
; RV32I-NEXT: sub a3, a3, a6
|
|
; RV32I-NEXT: sub a5, a5, a1
|
|
; RV32I-NEXT: sw a2, 0(a0)
|
|
; RV32I-NEXT: sw a3, 4(a0)
|
|
; RV32I-NEXT: sw a4, 8(a0)
|
|
; RV32I-NEXT: sw a5, 12(a0)
|
|
; RV32I-NEXT: lw s0, 12(sp) # 4-byte Folded Reload
|
|
; RV32I-NEXT: addi sp, sp, 16
|
|
; RV32I-NEXT: ret
|
|
;
|
|
; RV64I-LABEL: abd_minmax_i128:
|
|
; RV64I: # %bb.0:
|
|
; RV64I-NEXT: beq a1, a3, .LBB17_2
|
|
; RV64I-NEXT: # %bb.1:
|
|
; RV64I-NEXT: sltu a6, a1, a3
|
|
; RV64I-NEXT: j .LBB17_3
|
|
; RV64I-NEXT: .LBB17_2:
|
|
; RV64I-NEXT: sltu a6, a0, a2
|
|
; RV64I-NEXT: .LBB17_3:
|
|
; RV64I-NEXT: mv a4, a1
|
|
; RV64I-NEXT: mv a5, a0
|
|
; RV64I-NEXT: bnez a6, .LBB17_5
|
|
; RV64I-NEXT: # %bb.4:
|
|
; RV64I-NEXT: mv a4, a3
|
|
; RV64I-NEXT: mv a5, a2
|
|
; RV64I-NEXT: .LBB17_5:
|
|
; RV64I-NEXT: beq a1, a3, .LBB17_7
|
|
; RV64I-NEXT: # %bb.6:
|
|
; RV64I-NEXT: sltu a6, a3, a1
|
|
; RV64I-NEXT: beqz a6, .LBB17_8
|
|
; RV64I-NEXT: j .LBB17_9
|
|
; RV64I-NEXT: .LBB17_7:
|
|
; RV64I-NEXT: sltu a6, a2, a0
|
|
; RV64I-NEXT: bnez a6, .LBB17_9
|
|
; RV64I-NEXT: .LBB17_8:
|
|
; RV64I-NEXT: mv a1, a3
|
|
; RV64I-NEXT: mv a0, a2
|
|
; RV64I-NEXT: .LBB17_9:
|
|
; RV64I-NEXT: sltu a2, a5, a0
|
|
; RV64I-NEXT: sub a1, a4, a1
|
|
; RV64I-NEXT: sub a1, a1, a2
|
|
; RV64I-NEXT: sub a0, a5, a0
|
|
; RV64I-NEXT: ret
|
|
;
|
|
; RV32ZBB-LABEL: abd_minmax_i128:
|
|
; RV32ZBB: # %bb.0:
|
|
; RV32ZBB-NEXT: lw a6, 4(a2)
|
|
; RV32ZBB-NEXT: lw a7, 8(a2)
|
|
; RV32ZBB-NEXT: lw t0, 12(a2)
|
|
; RV32ZBB-NEXT: lw a3, 4(a1)
|
|
; RV32ZBB-NEXT: lw a5, 12(a1)
|
|
; RV32ZBB-NEXT: lw a4, 8(a1)
|
|
; RV32ZBB-NEXT: beq a5, t0, .LBB17_2
|
|
; RV32ZBB-NEXT: # %bb.1:
|
|
; RV32ZBB-NEXT: sltu t1, a5, t0
|
|
; RV32ZBB-NEXT: j .LBB17_3
|
|
; RV32ZBB-NEXT: .LBB17_2:
|
|
; RV32ZBB-NEXT: sltu t1, a4, a7
|
|
; RV32ZBB-NEXT: .LBB17_3:
|
|
; RV32ZBB-NEXT: lw t2, 0(a2)
|
|
; RV32ZBB-NEXT: lw a1, 0(a1)
|
|
; RV32ZBB-NEXT: beq a3, a6, .LBB17_5
|
|
; RV32ZBB-NEXT: # %bb.4:
|
|
; RV32ZBB-NEXT: sltu t6, a3, a6
|
|
; RV32ZBB-NEXT: j .LBB17_6
|
|
; RV32ZBB-NEXT: .LBB17_5:
|
|
; RV32ZBB-NEXT: sltu t6, a1, t2
|
|
; RV32ZBB-NEXT: .LBB17_6:
|
|
; RV32ZBB-NEXT: xor a2, a5, t0
|
|
; RV32ZBB-NEXT: xor t3, a4, a7
|
|
; RV32ZBB-NEXT: or t5, t3, a2
|
|
; RV32ZBB-NEXT: beqz t5, .LBB17_8
|
|
; RV32ZBB-NEXT: # %bb.7:
|
|
; RV32ZBB-NEXT: mv t6, t1
|
|
; RV32ZBB-NEXT: .LBB17_8:
|
|
; RV32ZBB-NEXT: mv a2, a1
|
|
; RV32ZBB-NEXT: mv t1, a3
|
|
; RV32ZBB-NEXT: mv t4, a5
|
|
; RV32ZBB-NEXT: mv t3, a4
|
|
; RV32ZBB-NEXT: bnez t6, .LBB17_10
|
|
; RV32ZBB-NEXT: # %bb.9:
|
|
; RV32ZBB-NEXT: mv a2, t2
|
|
; RV32ZBB-NEXT: mv t1, a6
|
|
; RV32ZBB-NEXT: mv t4, t0
|
|
; RV32ZBB-NEXT: mv t3, a7
|
|
; RV32ZBB-NEXT: .LBB17_10:
|
|
; RV32ZBB-NEXT: beq a5, t0, .LBB17_12
|
|
; RV32ZBB-NEXT: # %bb.11:
|
|
; RV32ZBB-NEXT: sltu t6, t0, a5
|
|
; RV32ZBB-NEXT: j .LBB17_13
|
|
; RV32ZBB-NEXT: .LBB17_12:
|
|
; RV32ZBB-NEXT: sltu t6, a7, a4
|
|
; RV32ZBB-NEXT: .LBB17_13:
|
|
; RV32ZBB-NEXT: addi sp, sp, -16
|
|
; RV32ZBB-NEXT: sw s0, 12(sp) # 4-byte Folded Spill
|
|
; RV32ZBB-NEXT: beq a3, a6, .LBB17_15
|
|
; RV32ZBB-NEXT: # %bb.14:
|
|
; RV32ZBB-NEXT: sltu s0, a6, a3
|
|
; RV32ZBB-NEXT: bnez t5, .LBB17_16
|
|
; RV32ZBB-NEXT: j .LBB17_17
|
|
; RV32ZBB-NEXT: .LBB17_15:
|
|
; RV32ZBB-NEXT: sltu s0, t2, a1
|
|
; RV32ZBB-NEXT: beqz t5, .LBB17_17
|
|
; RV32ZBB-NEXT: .LBB17_16:
|
|
; RV32ZBB-NEXT: mv s0, t6
|
|
; RV32ZBB-NEXT: .LBB17_17:
|
|
; RV32ZBB-NEXT: bnez s0, .LBB17_19
|
|
; RV32ZBB-NEXT: # %bb.18:
|
|
; RV32ZBB-NEXT: mv a1, t2
|
|
; RV32ZBB-NEXT: mv a3, a6
|
|
; RV32ZBB-NEXT: mv a5, t0
|
|
; RV32ZBB-NEXT: mv a4, a7
|
|
; RV32ZBB-NEXT: .LBB17_19:
|
|
; RV32ZBB-NEXT: sltu a7, t3, a4
|
|
; RV32ZBB-NEXT: sub a5, t4, a5
|
|
; RV32ZBB-NEXT: sltu a6, a2, a1
|
|
; RV32ZBB-NEXT: sub a5, a5, a7
|
|
; RV32ZBB-NEXT: mv a7, a6
|
|
; RV32ZBB-NEXT: beq t1, a3, .LBB17_21
|
|
; RV32ZBB-NEXT: # %bb.20:
|
|
; RV32ZBB-NEXT: sltu a7, t1, a3
|
|
; RV32ZBB-NEXT: .LBB17_21:
|
|
; RV32ZBB-NEXT: sub a4, t3, a4
|
|
; RV32ZBB-NEXT: sub a3, t1, a3
|
|
; RV32ZBB-NEXT: sub a2, a2, a1
|
|
; RV32ZBB-NEXT: sltu a1, a4, a7
|
|
; RV32ZBB-NEXT: sub a4, a4, a7
|
|
; RV32ZBB-NEXT: sub a3, a3, a6
|
|
; RV32ZBB-NEXT: sub a5, a5, a1
|
|
; RV32ZBB-NEXT: sw a2, 0(a0)
|
|
; RV32ZBB-NEXT: sw a3, 4(a0)
|
|
; RV32ZBB-NEXT: sw a4, 8(a0)
|
|
; RV32ZBB-NEXT: sw a5, 12(a0)
|
|
; RV32ZBB-NEXT: lw s0, 12(sp) # 4-byte Folded Reload
|
|
; RV32ZBB-NEXT: addi sp, sp, 16
|
|
; RV32ZBB-NEXT: ret
|
|
;
|
|
; RV64ZBB-LABEL: abd_minmax_i128:
|
|
; RV64ZBB: # %bb.0:
|
|
; RV64ZBB-NEXT: beq a1, a3, .LBB17_2
|
|
; RV64ZBB-NEXT: # %bb.1:
|
|
; RV64ZBB-NEXT: sltu a6, a1, a3
|
|
; RV64ZBB-NEXT: j .LBB17_3
|
|
; RV64ZBB-NEXT: .LBB17_2:
|
|
; RV64ZBB-NEXT: sltu a6, a0, a2
|
|
; RV64ZBB-NEXT: .LBB17_3:
|
|
; RV64ZBB-NEXT: mv a4, a1
|
|
; RV64ZBB-NEXT: mv a5, a0
|
|
; RV64ZBB-NEXT: bnez a6, .LBB17_5
|
|
; RV64ZBB-NEXT: # %bb.4:
|
|
; RV64ZBB-NEXT: mv a4, a3
|
|
; RV64ZBB-NEXT: mv a5, a2
|
|
; RV64ZBB-NEXT: .LBB17_5:
|
|
; RV64ZBB-NEXT: beq a1, a3, .LBB17_7
|
|
; RV64ZBB-NEXT: # %bb.6:
|
|
; RV64ZBB-NEXT: sltu a6, a3, a1
|
|
; RV64ZBB-NEXT: beqz a6, .LBB17_8
|
|
; RV64ZBB-NEXT: j .LBB17_9
|
|
; RV64ZBB-NEXT: .LBB17_7:
|
|
; RV64ZBB-NEXT: sltu a6, a2, a0
|
|
; RV64ZBB-NEXT: bnez a6, .LBB17_9
|
|
; RV64ZBB-NEXT: .LBB17_8:
|
|
; RV64ZBB-NEXT: mv a1, a3
|
|
; RV64ZBB-NEXT: mv a0, a2
|
|
; RV64ZBB-NEXT: .LBB17_9:
|
|
; RV64ZBB-NEXT: sltu a2, a5, a0
|
|
; RV64ZBB-NEXT: sub a1, a4, a1
|
|
; RV64ZBB-NEXT: sub a1, a1, a2
|
|
; RV64ZBB-NEXT: sub a0, a5, a0
|
|
; RV64ZBB-NEXT: ret
|
|
%min = call i128 @llvm.umin.i128(i128 %a, i128 %b)
|
|
%max = call i128 @llvm.umax.i128(i128 %a, i128 %b)
|
|
%sub = sub i128 %min, %max
|
|
ret i128 %sub
|
|
}
|
|
|
|
;
|
|
; select(icmp(a,b),sub(a,b),sub(b,a)) -> nabds(a,b)
|
|
;
|
|
|
|
define i8 @abd_cmp_i8(i8 %a, i8 %b) nounwind {
|
|
; NOZBB-LABEL: abd_cmp_i8:
|
|
; NOZBB: # %bb.0:
|
|
; NOZBB-NEXT: zext.b a2, a0
|
|
; NOZBB-NEXT: zext.b a3, a1
|
|
; NOZBB-NEXT: bgeu a3, a2, .LBB18_2
|
|
; NOZBB-NEXT: # %bb.1:
|
|
; NOZBB-NEXT: sub a0, a1, a0
|
|
; NOZBB-NEXT: ret
|
|
; NOZBB-NEXT: .LBB18_2:
|
|
; NOZBB-NEXT: sub a0, a0, a1
|
|
; NOZBB-NEXT: ret
|
|
;
|
|
; ZBB-LABEL: abd_cmp_i8:
|
|
; ZBB: # %bb.0:
|
|
; ZBB-NEXT: zext.b a2, a0
|
|
; ZBB-NEXT: zext.b a3, a1
|
|
; ZBB-NEXT: bgeu a3, a2, .LBB18_2
|
|
; ZBB-NEXT: # %bb.1:
|
|
; ZBB-NEXT: sub a0, a1, a0
|
|
; ZBB-NEXT: ret
|
|
; ZBB-NEXT: .LBB18_2:
|
|
; ZBB-NEXT: sub a0, a0, a1
|
|
; ZBB-NEXT: ret
|
|
%cmp = icmp ule i8 %a, %b
|
|
%ab = sub i8 %a, %b
|
|
%ba = sub i8 %b, %a
|
|
%sel = select i1 %cmp, i8 %ab, i8 %ba
|
|
ret i8 %sel
|
|
}
|
|
|
|
define i16 @abd_cmp_i16(i16 %a, i16 %b) nounwind {
|
|
; NOZBB-LABEL: abd_cmp_i16:
|
|
; NOZBB: # %bb.0:
|
|
; NOZBB-NEXT: lui a2, 16
|
|
; NOZBB-NEXT: addi a2, a2, -1
|
|
; NOZBB-NEXT: and a3, a1, a2
|
|
; NOZBB-NEXT: and a2, a0, a2
|
|
; NOZBB-NEXT: bltu a2, a3, .LBB19_2
|
|
; NOZBB-NEXT: # %bb.1:
|
|
; NOZBB-NEXT: sub a0, a1, a0
|
|
; NOZBB-NEXT: ret
|
|
; NOZBB-NEXT: .LBB19_2:
|
|
; NOZBB-NEXT: sub a0, a0, a1
|
|
; NOZBB-NEXT: ret
|
|
;
|
|
; ZBB-LABEL: abd_cmp_i16:
|
|
; ZBB: # %bb.0:
|
|
; ZBB-NEXT: zext.h a2, a1
|
|
; ZBB-NEXT: zext.h a3, a0
|
|
; ZBB-NEXT: bltu a3, a2, .LBB19_2
|
|
; ZBB-NEXT: # %bb.1:
|
|
; ZBB-NEXT: sub a0, a1, a0
|
|
; ZBB-NEXT: ret
|
|
; ZBB-NEXT: .LBB19_2:
|
|
; ZBB-NEXT: sub a0, a0, a1
|
|
; ZBB-NEXT: ret
|
|
%cmp = icmp ult i16 %a, %b
|
|
%ab = sub i16 %a, %b
|
|
%ba = sub i16 %b, %a
|
|
%sel = select i1 %cmp, i16 %ab, i16 %ba
|
|
ret i16 %sel
|
|
}
|
|
|
|
define i32 @abd_cmp_i32(i32 %a, i32 %b) nounwind {
|
|
; RV32I-LABEL: abd_cmp_i32:
|
|
; RV32I: # %bb.0:
|
|
; RV32I-NEXT: bgeu a0, a1, .LBB20_2
|
|
; RV32I-NEXT: # %bb.1:
|
|
; RV32I-NEXT: sub a0, a0, a1
|
|
; RV32I-NEXT: ret
|
|
; RV32I-NEXT: .LBB20_2:
|
|
; RV32I-NEXT: sub a0, a1, a0
|
|
; RV32I-NEXT: ret
|
|
;
|
|
; RV64I-LABEL: abd_cmp_i32:
|
|
; RV64I: # %bb.0:
|
|
; RV64I-NEXT: sext.w a2, a1
|
|
; RV64I-NEXT: sext.w a3, a0
|
|
; RV64I-NEXT: bgeu a3, a2, .LBB20_2
|
|
; RV64I-NEXT: # %bb.1:
|
|
; RV64I-NEXT: subw a0, a0, a1
|
|
; RV64I-NEXT: ret
|
|
; RV64I-NEXT: .LBB20_2:
|
|
; RV64I-NEXT: subw a0, a1, a0
|
|
; RV64I-NEXT: ret
|
|
;
|
|
; RV32ZBB-LABEL: abd_cmp_i32:
|
|
; RV32ZBB: # %bb.0:
|
|
; RV32ZBB-NEXT: bgeu a0, a1, .LBB20_2
|
|
; RV32ZBB-NEXT: # %bb.1:
|
|
; RV32ZBB-NEXT: sub a0, a0, a1
|
|
; RV32ZBB-NEXT: ret
|
|
; RV32ZBB-NEXT: .LBB20_2:
|
|
; RV32ZBB-NEXT: sub a0, a1, a0
|
|
; RV32ZBB-NEXT: ret
|
|
;
|
|
; RV64ZBB-LABEL: abd_cmp_i32:
|
|
; RV64ZBB: # %bb.0:
|
|
; RV64ZBB-NEXT: sext.w a2, a1
|
|
; RV64ZBB-NEXT: sext.w a3, a0
|
|
; RV64ZBB-NEXT: bgeu a3, a2, .LBB20_2
|
|
; RV64ZBB-NEXT: # %bb.1:
|
|
; RV64ZBB-NEXT: subw a0, a0, a1
|
|
; RV64ZBB-NEXT: ret
|
|
; RV64ZBB-NEXT: .LBB20_2:
|
|
; RV64ZBB-NEXT: subw a0, a1, a0
|
|
; RV64ZBB-NEXT: ret
|
|
%cmp = icmp uge i32 %a, %b
|
|
%ab = sub i32 %a, %b
|
|
%ba = sub i32 %b, %a
|
|
%sel = select i1 %cmp, i32 %ba, i32 %ab
|
|
ret i32 %sel
|
|
}
|
|
|
|
define i64 @abd_cmp_i64(i64 %a, i64 %b) nounwind {
|
|
; RV32I-LABEL: abd_cmp_i64:
|
|
; RV32I: # %bb.0:
|
|
; RV32I-NEXT: sltu a4, a0, a2
|
|
; RV32I-NEXT: mv a5, a4
|
|
; RV32I-NEXT: beq a1, a3, .LBB21_2
|
|
; RV32I-NEXT: # %bb.1:
|
|
; RV32I-NEXT: sltu a5, a1, a3
|
|
; RV32I-NEXT: .LBB21_2:
|
|
; RV32I-NEXT: bnez a5, .LBB21_4
|
|
; RV32I-NEXT: # %bb.3:
|
|
; RV32I-NEXT: sltu a4, a2, a0
|
|
; RV32I-NEXT: sub a1, a3, a1
|
|
; RV32I-NEXT: sub a1, a1, a4
|
|
; RV32I-NEXT: sub a0, a2, a0
|
|
; RV32I-NEXT: ret
|
|
; RV32I-NEXT: .LBB21_4:
|
|
; RV32I-NEXT: sub a1, a1, a3
|
|
; RV32I-NEXT: sub a1, a1, a4
|
|
; RV32I-NEXT: sub a0, a0, a2
|
|
; RV32I-NEXT: ret
|
|
;
|
|
; RV64I-LABEL: abd_cmp_i64:
|
|
; RV64I: # %bb.0:
|
|
; RV64I-NEXT: bltu a0, a1, .LBB21_2
|
|
; RV64I-NEXT: # %bb.1:
|
|
; RV64I-NEXT: sub a0, a1, a0
|
|
; RV64I-NEXT: ret
|
|
; RV64I-NEXT: .LBB21_2:
|
|
; RV64I-NEXT: sub a0, a0, a1
|
|
; RV64I-NEXT: ret
|
|
;
|
|
; RV32ZBB-LABEL: abd_cmp_i64:
|
|
; RV32ZBB: # %bb.0:
|
|
; RV32ZBB-NEXT: sltu a4, a0, a2
|
|
; RV32ZBB-NEXT: mv a5, a4
|
|
; RV32ZBB-NEXT: beq a1, a3, .LBB21_2
|
|
; RV32ZBB-NEXT: # %bb.1:
|
|
; RV32ZBB-NEXT: sltu a5, a1, a3
|
|
; RV32ZBB-NEXT: .LBB21_2:
|
|
; RV32ZBB-NEXT: bnez a5, .LBB21_4
|
|
; RV32ZBB-NEXT: # %bb.3:
|
|
; RV32ZBB-NEXT: sltu a4, a2, a0
|
|
; RV32ZBB-NEXT: sub a1, a3, a1
|
|
; RV32ZBB-NEXT: sub a1, a1, a4
|
|
; RV32ZBB-NEXT: sub a0, a2, a0
|
|
; RV32ZBB-NEXT: ret
|
|
; RV32ZBB-NEXT: .LBB21_4:
|
|
; RV32ZBB-NEXT: sub a1, a1, a3
|
|
; RV32ZBB-NEXT: sub a1, a1, a4
|
|
; RV32ZBB-NEXT: sub a0, a0, a2
|
|
; RV32ZBB-NEXT: ret
|
|
;
|
|
; RV64ZBB-LABEL: abd_cmp_i64:
|
|
; RV64ZBB: # %bb.0:
|
|
; RV64ZBB-NEXT: bltu a0, a1, .LBB21_2
|
|
; RV64ZBB-NEXT: # %bb.1:
|
|
; RV64ZBB-NEXT: sub a0, a1, a0
|
|
; RV64ZBB-NEXT: ret
|
|
; RV64ZBB-NEXT: .LBB21_2:
|
|
; RV64ZBB-NEXT: sub a0, a0, a1
|
|
; RV64ZBB-NEXT: ret
|
|
%cmp = icmp ult i64 %a, %b
|
|
%ab = sub i64 %a, %b
|
|
%ba = sub i64 %b, %a
|
|
%sel = select i1 %cmp, i64 %ab, i64 %ba
|
|
ret i64 %sel
|
|
}
|
|
|
|
define i128 @abd_cmp_i128(i128 %a, i128 %b) nounwind {
|
|
; RV32I-LABEL: abd_cmp_i128:
|
|
; RV32I: # %bb.0:
|
|
; RV32I-NEXT: lw a3, 0(a2)
|
|
; RV32I-NEXT: lw a4, 4(a2)
|
|
; RV32I-NEXT: lw a5, 8(a2)
|
|
; RV32I-NEXT: lw a7, 12(a2)
|
|
; RV32I-NEXT: lw a2, 0(a1)
|
|
; RV32I-NEXT: lw a6, 8(a1)
|
|
; RV32I-NEXT: lw t1, 12(a1)
|
|
; RV32I-NEXT: lw a1, 4(a1)
|
|
; RV32I-NEXT: sltu t0, a6, a5
|
|
; RV32I-NEXT: mv t4, t0
|
|
; RV32I-NEXT: beq t1, a7, .LBB22_2
|
|
; RV32I-NEXT: # %bb.1:
|
|
; RV32I-NEXT: sltu t4, t1, a7
|
|
; RV32I-NEXT: .LBB22_2:
|
|
; RV32I-NEXT: sltu t2, a2, a3
|
|
; RV32I-NEXT: mv t3, t2
|
|
; RV32I-NEXT: beq a1, a4, .LBB22_4
|
|
; RV32I-NEXT: # %bb.3:
|
|
; RV32I-NEXT: sltu t3, a1, a4
|
|
; RV32I-NEXT: .LBB22_4:
|
|
; RV32I-NEXT: xor t5, t1, a7
|
|
; RV32I-NEXT: xor t6, a6, a5
|
|
; RV32I-NEXT: or t5, t6, t5
|
|
; RV32I-NEXT: mv t6, t3
|
|
; RV32I-NEXT: beqz t5, .LBB22_6
|
|
; RV32I-NEXT: # %bb.5:
|
|
; RV32I-NEXT: mv t6, t4
|
|
; RV32I-NEXT: .LBB22_6:
|
|
; RV32I-NEXT: sltu t4, a3, a2
|
|
; RV32I-NEXT: mv t5, t4
|
|
; RV32I-NEXT: beq a1, a4, .LBB22_8
|
|
; RV32I-NEXT: # %bb.7:
|
|
; RV32I-NEXT: sltu t5, a4, a1
|
|
; RV32I-NEXT: .LBB22_8:
|
|
; RV32I-NEXT: bnez t6, .LBB22_10
|
|
; RV32I-NEXT: # %bb.9:
|
|
; RV32I-NEXT: sltu t0, a5, a6
|
|
; RV32I-NEXT: sub a7, a7, t1
|
|
; RV32I-NEXT: sub a5, a5, a6
|
|
; RV32I-NEXT: sub a4, a4, a1
|
|
; RV32I-NEXT: sub a6, a7, t0
|
|
; RV32I-NEXT: sltu a7, a5, t5
|
|
; RV32I-NEXT: sub a1, a5, t5
|
|
; RV32I-NEXT: sub a5, a4, t4
|
|
; RV32I-NEXT: sub a4, a6, a7
|
|
; RV32I-NEXT: sub a2, a3, a2
|
|
; RV32I-NEXT: j .LBB22_11
|
|
; RV32I-NEXT: .LBB22_10:
|
|
; RV32I-NEXT: sub a7, t1, a7
|
|
; RV32I-NEXT: sub a5, a6, a5
|
|
; RV32I-NEXT: sub a4, a1, a4
|
|
; RV32I-NEXT: sub a6, a7, t0
|
|
; RV32I-NEXT: sltu a7, a5, t3
|
|
; RV32I-NEXT: sub a1, a5, t3
|
|
; RV32I-NEXT: sub a5, a4, t2
|
|
; RV32I-NEXT: sub a4, a6, a7
|
|
; RV32I-NEXT: sub a2, a2, a3
|
|
; RV32I-NEXT: .LBB22_11:
|
|
; RV32I-NEXT: sw a2, 0(a0)
|
|
; RV32I-NEXT: sw a5, 4(a0)
|
|
; RV32I-NEXT: sw a1, 8(a0)
|
|
; RV32I-NEXT: sw a4, 12(a0)
|
|
; RV32I-NEXT: ret
|
|
;
|
|
; RV64I-LABEL: abd_cmp_i128:
|
|
; RV64I: # %bb.0:
|
|
; RV64I-NEXT: sltu a4, a0, a2
|
|
; RV64I-NEXT: mv a5, a4
|
|
; RV64I-NEXT: beq a1, a3, .LBB22_2
|
|
; RV64I-NEXT: # %bb.1:
|
|
; RV64I-NEXT: sltu a5, a1, a3
|
|
; RV64I-NEXT: .LBB22_2:
|
|
; RV64I-NEXT: bnez a5, .LBB22_4
|
|
; RV64I-NEXT: # %bb.3:
|
|
; RV64I-NEXT: sltu a4, a2, a0
|
|
; RV64I-NEXT: sub a1, a3, a1
|
|
; RV64I-NEXT: sub a1, a1, a4
|
|
; RV64I-NEXT: sub a0, a2, a0
|
|
; RV64I-NEXT: ret
|
|
; RV64I-NEXT: .LBB22_4:
|
|
; RV64I-NEXT: sub a1, a1, a3
|
|
; RV64I-NEXT: sub a1, a1, a4
|
|
; RV64I-NEXT: sub a0, a0, a2
|
|
; RV64I-NEXT: ret
|
|
;
|
|
; RV32ZBB-LABEL: abd_cmp_i128:
|
|
; RV32ZBB: # %bb.0:
|
|
; RV32ZBB-NEXT: lw a3, 0(a2)
|
|
; RV32ZBB-NEXT: lw a4, 4(a2)
|
|
; RV32ZBB-NEXT: lw a5, 8(a2)
|
|
; RV32ZBB-NEXT: lw a7, 12(a2)
|
|
; RV32ZBB-NEXT: lw a2, 0(a1)
|
|
; RV32ZBB-NEXT: lw a6, 8(a1)
|
|
; RV32ZBB-NEXT: lw t1, 12(a1)
|
|
; RV32ZBB-NEXT: lw a1, 4(a1)
|
|
; RV32ZBB-NEXT: sltu t0, a6, a5
|
|
; RV32ZBB-NEXT: mv t4, t0
|
|
; RV32ZBB-NEXT: beq t1, a7, .LBB22_2
|
|
; RV32ZBB-NEXT: # %bb.1:
|
|
; RV32ZBB-NEXT: sltu t4, t1, a7
|
|
; RV32ZBB-NEXT: .LBB22_2:
|
|
; RV32ZBB-NEXT: sltu t2, a2, a3
|
|
; RV32ZBB-NEXT: mv t3, t2
|
|
; RV32ZBB-NEXT: beq a1, a4, .LBB22_4
|
|
; RV32ZBB-NEXT: # %bb.3:
|
|
; RV32ZBB-NEXT: sltu t3, a1, a4
|
|
; RV32ZBB-NEXT: .LBB22_4:
|
|
; RV32ZBB-NEXT: xor t5, t1, a7
|
|
; RV32ZBB-NEXT: xor t6, a6, a5
|
|
; RV32ZBB-NEXT: or t5, t6, t5
|
|
; RV32ZBB-NEXT: mv t6, t3
|
|
; RV32ZBB-NEXT: beqz t5, .LBB22_6
|
|
; RV32ZBB-NEXT: # %bb.5:
|
|
; RV32ZBB-NEXT: mv t6, t4
|
|
; RV32ZBB-NEXT: .LBB22_6:
|
|
; RV32ZBB-NEXT: sltu t4, a3, a2
|
|
; RV32ZBB-NEXT: mv t5, t4
|
|
; RV32ZBB-NEXT: beq a1, a4, .LBB22_8
|
|
; RV32ZBB-NEXT: # %bb.7:
|
|
; RV32ZBB-NEXT: sltu t5, a4, a1
|
|
; RV32ZBB-NEXT: .LBB22_8:
|
|
; RV32ZBB-NEXT: bnez t6, .LBB22_10
|
|
; RV32ZBB-NEXT: # %bb.9:
|
|
; RV32ZBB-NEXT: sltu t0, a5, a6
|
|
; RV32ZBB-NEXT: sub a7, a7, t1
|
|
; RV32ZBB-NEXT: sub a5, a5, a6
|
|
; RV32ZBB-NEXT: sub a4, a4, a1
|
|
; RV32ZBB-NEXT: sub a6, a7, t0
|
|
; RV32ZBB-NEXT: sltu a7, a5, t5
|
|
; RV32ZBB-NEXT: sub a1, a5, t5
|
|
; RV32ZBB-NEXT: sub a5, a4, t4
|
|
; RV32ZBB-NEXT: sub a4, a6, a7
|
|
; RV32ZBB-NEXT: sub a2, a3, a2
|
|
; RV32ZBB-NEXT: j .LBB22_11
|
|
; RV32ZBB-NEXT: .LBB22_10:
|
|
; RV32ZBB-NEXT: sub a7, t1, a7
|
|
; RV32ZBB-NEXT: sub a5, a6, a5
|
|
; RV32ZBB-NEXT: sub a4, a1, a4
|
|
; RV32ZBB-NEXT: sub a6, a7, t0
|
|
; RV32ZBB-NEXT: sltu a7, a5, t3
|
|
; RV32ZBB-NEXT: sub a1, a5, t3
|
|
; RV32ZBB-NEXT: sub a5, a4, t2
|
|
; RV32ZBB-NEXT: sub a4, a6, a7
|
|
; RV32ZBB-NEXT: sub a2, a2, a3
|
|
; RV32ZBB-NEXT: .LBB22_11:
|
|
; RV32ZBB-NEXT: sw a2, 0(a0)
|
|
; RV32ZBB-NEXT: sw a5, 4(a0)
|
|
; RV32ZBB-NEXT: sw a1, 8(a0)
|
|
; RV32ZBB-NEXT: sw a4, 12(a0)
|
|
; RV32ZBB-NEXT: ret
|
|
;
|
|
; RV64ZBB-LABEL: abd_cmp_i128:
|
|
; RV64ZBB: # %bb.0:
|
|
; RV64ZBB-NEXT: sltu a4, a0, a2
|
|
; RV64ZBB-NEXT: mv a5, a4
|
|
; RV64ZBB-NEXT: beq a1, a3, .LBB22_2
|
|
; RV64ZBB-NEXT: # %bb.1:
|
|
; RV64ZBB-NEXT: sltu a5, a1, a3
|
|
; RV64ZBB-NEXT: .LBB22_2:
|
|
; RV64ZBB-NEXT: bnez a5, .LBB22_4
|
|
; RV64ZBB-NEXT: # %bb.3:
|
|
; RV64ZBB-NEXT: sltu a4, a2, a0
|
|
; RV64ZBB-NEXT: sub a1, a3, a1
|
|
; RV64ZBB-NEXT: sub a1, a1, a4
|
|
; RV64ZBB-NEXT: sub a0, a2, a0
|
|
; RV64ZBB-NEXT: ret
|
|
; RV64ZBB-NEXT: .LBB22_4:
|
|
; RV64ZBB-NEXT: sub a1, a1, a3
|
|
; RV64ZBB-NEXT: sub a1, a1, a4
|
|
; RV64ZBB-NEXT: sub a0, a0, a2
|
|
; RV64ZBB-NEXT: ret
|
|
%cmp = icmp ult i128 %a, %b
|
|
%ab = sub i128 %a, %b
|
|
%ba = sub i128 %b, %a
|
|
%sel = select i1 %cmp, i128 %ab, i128 %ba
|
|
ret i128 %sel
|
|
}
|
|
|
|
declare i8 @llvm.abs.i8(i8, i1)
|
|
declare i16 @llvm.abs.i16(i16, i1)
|
|
declare i32 @llvm.abs.i32(i32, i1)
|
|
declare i64 @llvm.abs.i64(i64, i1)
|
|
declare i128 @llvm.abs.i128(i128, i1)
|
|
|
|
declare i8 @llvm.umax.i8(i8, i8)
|
|
declare i16 @llvm.umax.i16(i16, i16)
|
|
declare i32 @llvm.umax.i32(i32, i32)
|
|
declare i64 @llvm.umax.i64(i64, i64)
|
|
|
|
declare i8 @llvm.umin.i8(i8, i8)
|
|
declare i16 @llvm.umin.i16(i16, i16)
|
|
declare i32 @llvm.umin.i32(i32, i32)
|
|
declare i64 @llvm.umin.i64(i64, i64)
|