
We have two forceExpandWideMUL functions. One takes the low and high half of 2 inputs and calculates the low and high half of their product. This does not calculate the full 2x width product. The other signature takes 2 inputs and calculates the low and high half of their full 2x width product. Previously it did this by sign/zero extending the inputs to create the high bits and then calling the other function. We can instead copy the algorithm from the other function and use the Signed flag to determine whether we should do SRA or SRL. This avoids the need to multiply the high part of the inputs and add them to the high half of the result. This improves the generated code for signed multiplication. This should improve the performance of #123262. I don't know yet how close we will get to gcc.
506 lines
16 KiB
LLVM
506 lines
16 KiB
LLVM
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
|
|
; RUN: llc < %s -mtriple=aarch64-- | FileCheck %s
|
|
|
|
declare { i128, i1 } @llvm.uadd.with.overflow.i128(i128, i128)
|
|
declare i128 @llvm.uadd.sat.i128(i128, i128)
|
|
|
|
declare { i128, i1 } @llvm.usub.with.overflow.i128(i128, i128)
|
|
declare i128 @llvm.usub.sat.i128(i128, i128)
|
|
|
|
declare { i128, i1 } @llvm.umul.with.overflow.i128(i128, i128)
|
|
declare i128 @llvm.umul.sat.i128(i128, i128)
|
|
|
|
declare { i128, i1 } @llvm.sadd.with.overflow.i128(i128, i128)
|
|
declare i128 @llvm.sadd.sat.i128(i128, i128)
|
|
|
|
declare { i128, i1 } @llvm.ssub.with.overflow.i128(i128, i128)
|
|
declare i128 @llvm.ssub.sat.i128(i128, i128)
|
|
|
|
declare { i128, i1 } @llvm.smul.with.overflow.i128(i128, i128)
|
|
declare i128 @llvm.smul.sat.i128(i128, i128)
|
|
|
|
define i128 @u128_add(i128 %x, i128 %y) {
|
|
; CHECK-LABEL: u128_add:
|
|
; CHECK: // %bb.0:
|
|
; CHECK-NEXT: adds x0, x0, x2
|
|
; CHECK-NEXT: adc x1, x1, x3
|
|
; CHECK-NEXT: ret
|
|
%1 = add i128 %x, %y
|
|
ret i128 %1
|
|
}
|
|
|
|
define { i128, i8 } @u128_checked_add(i128 %x, i128 %y) {
|
|
; CHECK-LABEL: u128_checked_add:
|
|
; CHECK: // %bb.0:
|
|
; CHECK-NEXT: adds x0, x0, x2
|
|
; CHECK-NEXT: adcs x1, x1, x3
|
|
; CHECK-NEXT: cset w8, hs
|
|
; CHECK-NEXT: eor w2, w8, #0x1
|
|
; CHECK-NEXT: ret
|
|
%1 = tail call { i128, i1 } @llvm.uadd.with.overflow.i128(i128 %x, i128 %y)
|
|
%2 = extractvalue { i128, i1 } %1, 0
|
|
%3 = extractvalue { i128, i1 } %1, 1
|
|
%4 = xor i1 %3, true
|
|
%5 = zext i1 %4 to i8
|
|
%6 = insertvalue { i128, i8 } undef, i128 %2, 0
|
|
%7 = insertvalue { i128, i8 } %6, i8 %5, 1
|
|
ret { i128, i8 } %7
|
|
}
|
|
|
|
define { i128, i8 } @u128_overflowing_add(i128 %x, i128 %y) {
|
|
; CHECK-LABEL: u128_overflowing_add:
|
|
; CHECK: // %bb.0:
|
|
; CHECK-NEXT: adds x0, x0, x2
|
|
; CHECK-NEXT: adcs x1, x1, x3
|
|
; CHECK-NEXT: cset w2, hs
|
|
; CHECK-NEXT: ret
|
|
%1 = tail call { i128, i1 } @llvm.uadd.with.overflow.i128(i128 %x, i128 %y)
|
|
%2 = extractvalue { i128, i1 } %1, 0
|
|
%3 = extractvalue { i128, i1 } %1, 1
|
|
%4 = zext i1 %3 to i8
|
|
%5 = insertvalue { i128, i8 } undef, i128 %2, 0
|
|
%6 = insertvalue { i128, i8 } %5, i8 %4, 1
|
|
ret { i128, i8 } %6
|
|
}
|
|
|
|
define i128 @u128_saturating_add(i128 %x, i128 %y) {
|
|
; CHECK-LABEL: u128_saturating_add:
|
|
; CHECK: // %bb.0:
|
|
; CHECK-NEXT: adds x8, x0, x2
|
|
; CHECK-NEXT: adcs x9, x1, x3
|
|
; CHECK-NEXT: csinv x0, x8, xzr, lo
|
|
; CHECK-NEXT: csinv x1, x9, xzr, lo
|
|
; CHECK-NEXT: ret
|
|
%1 = tail call i128 @llvm.uadd.sat.i128(i128 %x, i128 %y)
|
|
ret i128 %1
|
|
}
|
|
|
|
define i128 @u128_sub(i128 %x, i128 %y) {
|
|
; CHECK-LABEL: u128_sub:
|
|
; CHECK: // %bb.0:
|
|
; CHECK-NEXT: subs x0, x0, x2
|
|
; CHECK-NEXT: sbc x1, x1, x3
|
|
; CHECK-NEXT: ret
|
|
%1 = sub i128 %x, %y
|
|
ret i128 %1
|
|
}
|
|
|
|
define { i128, i8 } @u128_checked_sub(i128 %x, i128 %y) {
|
|
; CHECK-LABEL: u128_checked_sub:
|
|
; CHECK: // %bb.0:
|
|
; CHECK-NEXT: subs x0, x0, x2
|
|
; CHECK-NEXT: sbcs x1, x1, x3
|
|
; CHECK-NEXT: cset w8, lo
|
|
; CHECK-NEXT: eor w2, w8, #0x1
|
|
; CHECK-NEXT: ret
|
|
%1 = tail call { i128, i1 } @llvm.usub.with.overflow.i128(i128 %x, i128 %y)
|
|
%2 = extractvalue { i128, i1 } %1, 0
|
|
%3 = extractvalue { i128, i1 } %1, 1
|
|
%4 = xor i1 %3, true
|
|
%5 = zext i1 %4 to i8
|
|
%6 = insertvalue { i128, i8 } undef, i128 %2, 0
|
|
%7 = insertvalue { i128, i8 } %6, i8 %5, 1
|
|
ret { i128, i8 } %7
|
|
}
|
|
|
|
define { i128, i8 } @u128_overflowing_sub(i128 %x, i128 %y) {
|
|
; CHECK-LABEL: u128_overflowing_sub:
|
|
; CHECK: // %bb.0:
|
|
; CHECK-NEXT: subs x0, x0, x2
|
|
; CHECK-NEXT: sbcs x1, x1, x3
|
|
; CHECK-NEXT: cset w2, lo
|
|
; CHECK-NEXT: ret
|
|
%1 = tail call { i128, i1 } @llvm.usub.with.overflow.i128(i128 %x, i128 %y)
|
|
%2 = extractvalue { i128, i1 } %1, 0
|
|
%3 = extractvalue { i128, i1 } %1, 1
|
|
%4 = zext i1 %3 to i8
|
|
%5 = insertvalue { i128, i8 } undef, i128 %2, 0
|
|
%6 = insertvalue { i128, i8 } %5, i8 %4, 1
|
|
ret { i128, i8 } %6
|
|
}
|
|
|
|
define i128 @u128_saturating_sub(i128 %x, i128 %y) {
|
|
; CHECK-LABEL: u128_saturating_sub:
|
|
; CHECK: // %bb.0:
|
|
; CHECK-NEXT: subs x8, x0, x2
|
|
; CHECK-NEXT: sbcs x9, x1, x3
|
|
; CHECK-NEXT: csel x0, xzr, x8, lo
|
|
; CHECK-NEXT: csel x1, xzr, x9, lo
|
|
; CHECK-NEXT: ret
|
|
%1 = tail call i128 @llvm.usub.sat.i128(i128 %x, i128 %y)
|
|
ret i128 %1
|
|
}
|
|
|
|
define i128 @i128_add(i128 %x, i128 %y) {
|
|
; CHECK-LABEL: i128_add:
|
|
; CHECK: // %bb.0:
|
|
; CHECK-NEXT: adds x0, x0, x2
|
|
; CHECK-NEXT: adc x1, x1, x3
|
|
; CHECK-NEXT: ret
|
|
%1 = add i128 %x, %y
|
|
ret i128 %1
|
|
}
|
|
|
|
define { i128, i8 } @i128_checked_add(i128 %x, i128 %y) {
|
|
; CHECK-LABEL: i128_checked_add:
|
|
; CHECK: // %bb.0:
|
|
; CHECK-NEXT: adds x0, x0, x2
|
|
; CHECK-NEXT: adcs x1, x1, x3
|
|
; CHECK-NEXT: cset w8, vs
|
|
; CHECK-NEXT: eor w2, w8, #0x1
|
|
; CHECK-NEXT: ret
|
|
%1 = tail call { i128, i1 } @llvm.sadd.with.overflow.i128(i128 %x, i128 %y)
|
|
%2 = extractvalue { i128, i1 } %1, 0
|
|
%3 = extractvalue { i128, i1 } %1, 1
|
|
%4 = xor i1 %3, true
|
|
%5 = zext i1 %4 to i8
|
|
%6 = insertvalue { i128, i8 } undef, i128 %2, 0
|
|
%7 = insertvalue { i128, i8 } %6, i8 %5, 1
|
|
ret { i128, i8 } %7
|
|
}
|
|
|
|
define { i128, i8 } @i128_overflowing_add(i128 %x, i128 %y) {
|
|
; CHECK-LABEL: i128_overflowing_add:
|
|
; CHECK: // %bb.0:
|
|
; CHECK-NEXT: adds x0, x0, x2
|
|
; CHECK-NEXT: adcs x1, x1, x3
|
|
; CHECK-NEXT: cset w2, vs
|
|
; CHECK-NEXT: ret
|
|
%1 = tail call { i128, i1 } @llvm.sadd.with.overflow.i128(i128 %x, i128 %y)
|
|
%2 = extractvalue { i128, i1 } %1, 0
|
|
%3 = extractvalue { i128, i1 } %1, 1
|
|
%4 = zext i1 %3 to i8
|
|
%5 = insertvalue { i128, i8 } undef, i128 %2, 0
|
|
%6 = insertvalue { i128, i8 } %5, i8 %4, 1
|
|
ret { i128, i8 } %6
|
|
}
|
|
|
|
define i128 @i128_saturating_add(i128 %x, i128 %y) {
|
|
; CHECK-LABEL: i128_saturating_add:
|
|
; CHECK: // %bb.0:
|
|
; CHECK-NEXT: adds x8, x0, x2
|
|
; CHECK-NEXT: adcs x9, x1, x3
|
|
; CHECK-NEXT: asr x10, x9, #63
|
|
; CHECK-NEXT: eor x11, x10, #0x8000000000000000
|
|
; CHECK-NEXT: csel x0, x10, x8, vs
|
|
; CHECK-NEXT: csel x1, x11, x9, vs
|
|
; CHECK-NEXT: ret
|
|
%1 = tail call i128 @llvm.sadd.sat.i128(i128 %x, i128 %y)
|
|
ret i128 %1
|
|
}
|
|
|
|
define i128 @i128_sub(i128 %x, i128 %y) {
|
|
; CHECK-LABEL: i128_sub:
|
|
; CHECK: // %bb.0:
|
|
; CHECK-NEXT: subs x0, x0, x2
|
|
; CHECK-NEXT: sbc x1, x1, x3
|
|
; CHECK-NEXT: ret
|
|
%1 = sub i128 %x, %y
|
|
ret i128 %1
|
|
}
|
|
|
|
define { i128, i8 } @i128_checked_sub(i128 %x, i128 %y) {
|
|
; CHECK-LABEL: i128_checked_sub:
|
|
; CHECK: // %bb.0:
|
|
; CHECK-NEXT: subs x0, x0, x2
|
|
; CHECK-NEXT: sbcs x1, x1, x3
|
|
; CHECK-NEXT: cset w8, vs
|
|
; CHECK-NEXT: eor w2, w8, #0x1
|
|
; CHECK-NEXT: ret
|
|
%1 = tail call { i128, i1 } @llvm.ssub.with.overflow.i128(i128 %x, i128 %y)
|
|
%2 = extractvalue { i128, i1 } %1, 0
|
|
%3 = extractvalue { i128, i1 } %1, 1
|
|
%4 = xor i1 %3, true
|
|
%5 = zext i1 %4 to i8
|
|
%6 = insertvalue { i128, i8 } undef, i128 %2, 0
|
|
%7 = insertvalue { i128, i8 } %6, i8 %5, 1
|
|
ret { i128, i8 } %7
|
|
}
|
|
|
|
define { i128, i8 } @i128_overflowing_sub(i128 %x, i128 %y) {
|
|
; CHECK-LABEL: i128_overflowing_sub:
|
|
; CHECK: // %bb.0:
|
|
; CHECK-NEXT: subs x0, x0, x2
|
|
; CHECK-NEXT: sbcs x1, x1, x3
|
|
; CHECK-NEXT: cset w2, vs
|
|
; CHECK-NEXT: ret
|
|
%1 = tail call { i128, i1 } @llvm.ssub.with.overflow.i128(i128 %x, i128 %y)
|
|
%2 = extractvalue { i128, i1 } %1, 0
|
|
%3 = extractvalue { i128, i1 } %1, 1
|
|
%4 = zext i1 %3 to i8
|
|
%5 = insertvalue { i128, i8 } undef, i128 %2, 0
|
|
%6 = insertvalue { i128, i8 } %5, i8 %4, 1
|
|
ret { i128, i8 } %6
|
|
}
|
|
|
|
define i128 @i128_saturating_sub(i128 %x, i128 %y) {
|
|
; CHECK-LABEL: i128_saturating_sub:
|
|
; CHECK: // %bb.0:
|
|
; CHECK-NEXT: subs x8, x0, x2
|
|
; CHECK-NEXT: sbcs x9, x1, x3
|
|
; CHECK-NEXT: asr x10, x9, #63
|
|
; CHECK-NEXT: eor x11, x10, #0x8000000000000000
|
|
; CHECK-NEXT: csel x0, x10, x8, vs
|
|
; CHECK-NEXT: csel x1, x11, x9, vs
|
|
; CHECK-NEXT: ret
|
|
%1 = tail call i128 @llvm.ssub.sat.i128(i128 %x, i128 %y)
|
|
ret i128 %1
|
|
}
|
|
|
|
define i128 @u128_mul(i128 %x, i128 %y) {
|
|
; CHECK-LABEL: u128_mul:
|
|
; CHECK: // %bb.0:
|
|
; CHECK-NEXT: umulh x8, x0, x2
|
|
; CHECK-NEXT: madd x8, x0, x3, x8
|
|
; CHECK-NEXT: mul x0, x0, x2
|
|
; CHECK-NEXT: madd x1, x1, x2, x8
|
|
; CHECK-NEXT: ret
|
|
%1 = mul i128 %x, %y
|
|
ret i128 %1
|
|
}
|
|
|
|
define { i128, i8 } @u128_checked_mul(i128 %x, i128 %y) {
|
|
; CHECK-LABEL: u128_checked_mul:
|
|
; CHECK: // %bb.0:
|
|
; CHECK-NEXT: mul x9, x3, x0
|
|
; CHECK-NEXT: cmp x1, #0
|
|
; CHECK-NEXT: ccmp x3, #0, #4, ne
|
|
; CHECK-NEXT: umulh x8, x1, x2
|
|
; CHECK-NEXT: umulh x10, x3, x0
|
|
; CHECK-NEXT: madd x9, x1, x2, x9
|
|
; CHECK-NEXT: ccmp xzr, x8, #0, eq
|
|
; CHECK-NEXT: umulh x11, x0, x2
|
|
; CHECK-NEXT: ccmp xzr, x10, #0, eq
|
|
; CHECK-NEXT: mul x0, x0, x2
|
|
; CHECK-NEXT: cset w8, ne
|
|
; CHECK-NEXT: adds x1, x11, x9
|
|
; CHECK-NEXT: csinc w8, w8, wzr, lo
|
|
; CHECK-NEXT: eor w2, w8, #0x1
|
|
; CHECK-NEXT: ret
|
|
%1 = tail call { i128, i1 } @llvm.umul.with.overflow.i128(i128 %x, i128 %y)
|
|
%2 = extractvalue { i128, i1 } %1, 0
|
|
%3 = extractvalue { i128, i1 } %1, 1
|
|
%4 = xor i1 %3, true
|
|
%5 = zext i1 %4 to i8
|
|
%6 = insertvalue { i128, i8 } undef, i128 %2, 0
|
|
%7 = insertvalue { i128, i8 } %6, i8 %5, 1
|
|
ret { i128, i8 } %7
|
|
}
|
|
|
|
define { i128, i8 } @u128_overflowing_mul(i128 %x, i128 %y) {
|
|
; CHECK-LABEL: u128_overflowing_mul:
|
|
; CHECK: // %bb.0:
|
|
; CHECK-NEXT: mul x9, x3, x0
|
|
; CHECK-NEXT: cmp x1, #0
|
|
; CHECK-NEXT: ccmp x3, #0, #4, ne
|
|
; CHECK-NEXT: umulh x8, x1, x2
|
|
; CHECK-NEXT: umulh x10, x3, x0
|
|
; CHECK-NEXT: madd x9, x1, x2, x9
|
|
; CHECK-NEXT: ccmp xzr, x8, #0, eq
|
|
; CHECK-NEXT: umulh x11, x0, x2
|
|
; CHECK-NEXT: ccmp xzr, x10, #0, eq
|
|
; CHECK-NEXT: mul x0, x0, x2
|
|
; CHECK-NEXT: cset w8, ne
|
|
; CHECK-NEXT: adds x1, x11, x9
|
|
; CHECK-NEXT: csinc w2, w8, wzr, lo
|
|
; CHECK-NEXT: ret
|
|
%1 = tail call { i128, i1 } @llvm.umul.with.overflow.i128(i128 %x, i128 %y)
|
|
%2 = extractvalue { i128, i1 } %1, 0
|
|
%3 = extractvalue { i128, i1 } %1, 1
|
|
%4 = zext i1 %3 to i8
|
|
%5 = insertvalue { i128, i8 } undef, i128 %2, 0
|
|
%6 = insertvalue { i128, i8 } %5, i8 %4, 1
|
|
ret { i128, i8 } %6
|
|
}
|
|
|
|
define i128 @u128_saturating_mul(i128 %x, i128 %y) {
|
|
; CHECK-LABEL: u128_saturating_mul:
|
|
; CHECK: // %bb.0:
|
|
; CHECK-NEXT: mul x9, x3, x0
|
|
; CHECK-NEXT: cmp x1, #0
|
|
; CHECK-NEXT: ccmp x3, #0, #4, ne
|
|
; CHECK-NEXT: umulh x8, x1, x2
|
|
; CHECK-NEXT: umulh x10, x3, x0
|
|
; CHECK-NEXT: madd x9, x1, x2, x9
|
|
; CHECK-NEXT: ccmp xzr, x8, #0, eq
|
|
; CHECK-NEXT: umulh x11, x0, x2
|
|
; CHECK-NEXT: ccmp xzr, x10, #0, eq
|
|
; CHECK-NEXT: mul x8, x0, x2
|
|
; CHECK-NEXT: cset w10, ne
|
|
; CHECK-NEXT: adds x9, x11, x9
|
|
; CHECK-NEXT: csinc w10, w10, wzr, lo
|
|
; CHECK-NEXT: cmp w10, #0
|
|
; CHECK-NEXT: csinv x0, x8, xzr, eq
|
|
; CHECK-NEXT: csinv x1, x9, xzr, eq
|
|
; CHECK-NEXT: ret
|
|
%1 = tail call { i128, i1 } @llvm.umul.with.overflow.i128(i128 %x, i128 %y)
|
|
%2 = extractvalue { i128, i1 } %1, 0
|
|
%3 = extractvalue { i128, i1 } %1, 1
|
|
%4 = select i1 %3, i128 -1, i128 %2
|
|
ret i128 %4
|
|
}
|
|
|
|
define i128 @i128_mul(i128 %x, i128 %y) {
|
|
; CHECK-LABEL: i128_mul:
|
|
; CHECK: // %bb.0:
|
|
; CHECK-NEXT: umulh x8, x0, x2
|
|
; CHECK-NEXT: madd x8, x0, x3, x8
|
|
; CHECK-NEXT: mul x0, x0, x2
|
|
; CHECK-NEXT: madd x1, x1, x2, x8
|
|
; CHECK-NEXT: ret
|
|
%1 = mul i128 %x, %y
|
|
ret i128 %1
|
|
}
|
|
|
|
define { i128, i8 } @i128_checked_mul(i128 %x, i128 %y) {
|
|
; CHECK-LABEL: i128_checked_mul:
|
|
; CHECK: // %bb.0:
|
|
; CHECK-NEXT: asr x9, x1, #63
|
|
; CHECK-NEXT: umulh x10, x0, x2
|
|
; CHECK-NEXT: asr x13, x3, #63
|
|
; CHECK-NEXT: mul x11, x1, x2
|
|
; CHECK-NEXT: umulh x8, x1, x2
|
|
; CHECK-NEXT: mul x9, x9, x2
|
|
; CHECK-NEXT: adds x10, x11, x10
|
|
; CHECK-NEXT: mul x14, x0, x3
|
|
; CHECK-NEXT: umulh x12, x0, x3
|
|
; CHECK-NEXT: adc x9, x8, x9
|
|
; CHECK-NEXT: mul x13, x0, x13
|
|
; CHECK-NEXT: adds x8, x14, x10
|
|
; CHECK-NEXT: mul x15, x1, x3
|
|
; CHECK-NEXT: smulh x10, x1, x3
|
|
; CHECK-NEXT: mov x1, x8
|
|
; CHECK-NEXT: adc x11, x12, x13
|
|
; CHECK-NEXT: asr x12, x9, #63
|
|
; CHECK-NEXT: asr x13, x11, #63
|
|
; CHECK-NEXT: adds x9, x9, x11
|
|
; CHECK-NEXT: asr x11, x8, #63
|
|
; CHECK-NEXT: mul x0, x0, x2
|
|
; CHECK-NEXT: adc x12, x12, x13
|
|
; CHECK-NEXT: adds x9, x15, x9
|
|
; CHECK-NEXT: adc x10, x10, x12
|
|
; CHECK-NEXT: cmp x9, x11
|
|
; CHECK-NEXT: ccmp x10, x11, #0, eq
|
|
; CHECK-NEXT: cset w2, eq
|
|
; CHECK-NEXT: ret
|
|
%1 = tail call { i128, i1 } @llvm.smul.with.overflow.i128(i128 %x, i128 %y)
|
|
%2 = extractvalue { i128, i1 } %1, 0
|
|
%3 = extractvalue { i128, i1 } %1, 1
|
|
%4 = xor i1 %3, true
|
|
%5 = zext i1 %4 to i8
|
|
%6 = insertvalue { i128, i8 } undef, i128 %2, 0
|
|
%7 = insertvalue { i128, i8 } %6, i8 %5, 1
|
|
ret { i128, i8 } %7
|
|
}
|
|
|
|
define { i128, i8 } @i128_overflowing_mul(i128 %x, i128 %y) {
|
|
; CHECK-LABEL: i128_overflowing_mul:
|
|
; CHECK: // %bb.0:
|
|
; CHECK-NEXT: asr x9, x1, #63
|
|
; CHECK-NEXT: umulh x10, x0, x2
|
|
; CHECK-NEXT: asr x13, x3, #63
|
|
; CHECK-NEXT: mul x11, x1, x2
|
|
; CHECK-NEXT: umulh x8, x1, x2
|
|
; CHECK-NEXT: mul x9, x9, x2
|
|
; CHECK-NEXT: adds x10, x11, x10
|
|
; CHECK-NEXT: mul x14, x0, x3
|
|
; CHECK-NEXT: umulh x12, x0, x3
|
|
; CHECK-NEXT: adc x9, x8, x9
|
|
; CHECK-NEXT: mul x13, x0, x13
|
|
; CHECK-NEXT: adds x8, x14, x10
|
|
; CHECK-NEXT: mul x15, x1, x3
|
|
; CHECK-NEXT: smulh x10, x1, x3
|
|
; CHECK-NEXT: mov x1, x8
|
|
; CHECK-NEXT: adc x11, x12, x13
|
|
; CHECK-NEXT: asr x12, x9, #63
|
|
; CHECK-NEXT: asr x13, x11, #63
|
|
; CHECK-NEXT: adds x9, x9, x11
|
|
; CHECK-NEXT: asr x11, x8, #63
|
|
; CHECK-NEXT: mul x0, x0, x2
|
|
; CHECK-NEXT: adc x12, x12, x13
|
|
; CHECK-NEXT: adds x9, x15, x9
|
|
; CHECK-NEXT: adc x10, x10, x12
|
|
; CHECK-NEXT: cmp x9, x11
|
|
; CHECK-NEXT: ccmp x10, x11, #0, eq
|
|
; CHECK-NEXT: cset w2, ne
|
|
; CHECK-NEXT: ret
|
|
%1 = tail call { i128, i1 } @llvm.smul.with.overflow.i128(i128 %x, i128 %y)
|
|
%2 = extractvalue { i128, i1 } %1, 0
|
|
%3 = extractvalue { i128, i1 } %1, 1
|
|
%4 = zext i1 %3 to i8
|
|
%5 = insertvalue { i128, i8 } undef, i128 %2, 0
|
|
%6 = insertvalue { i128, i8 } %5, i8 %4, 1
|
|
ret { i128, i8 } %6
|
|
}
|
|
|
|
define i128 @i128_saturating_mul(i128 %x, i128 %y) {
|
|
; CHECK-LABEL: i128_saturating_mul:
|
|
; CHECK: // %bb.0:
|
|
; CHECK-NEXT: asr x9, x1, #63
|
|
; CHECK-NEXT: umulh x10, x0, x2
|
|
; CHECK-NEXT: asr x13, x3, #63
|
|
; CHECK-NEXT: mul x11, x1, x2
|
|
; CHECK-NEXT: umulh x8, x1, x2
|
|
; CHECK-NEXT: mul x9, x9, x2
|
|
; CHECK-NEXT: adds x10, x11, x10
|
|
; CHECK-NEXT: mul x14, x0, x3
|
|
; CHECK-NEXT: umulh x12, x0, x3
|
|
; CHECK-NEXT: adc x8, x8, x9
|
|
; CHECK-NEXT: mul x13, x0, x13
|
|
; CHECK-NEXT: adds x9, x14, x10
|
|
; CHECK-NEXT: mul x11, x1, x3
|
|
; CHECK-NEXT: adc x10, x12, x13
|
|
; CHECK-NEXT: smulh x12, x1, x3
|
|
; CHECK-NEXT: asr x13, x8, #63
|
|
; CHECK-NEXT: asr x14, x10, #63
|
|
; CHECK-NEXT: adds x8, x8, x10
|
|
; CHECK-NEXT: adc x10, x13, x14
|
|
; CHECK-NEXT: adds x8, x11, x8
|
|
; CHECK-NEXT: asr x11, x9, #63
|
|
; CHECK-NEXT: mul x13, x0, x2
|
|
; CHECK-NEXT: adc x10, x12, x10
|
|
; CHECK-NEXT: eor x12, x3, x1
|
|
; CHECK-NEXT: eor x8, x8, x11
|
|
; CHECK-NEXT: eor x10, x10, x11
|
|
; CHECK-NEXT: asr x11, x12, #63
|
|
; CHECK-NEXT: orr x8, x8, x10
|
|
; CHECK-NEXT: eor x10, x11, #0x7fffffffffffffff
|
|
; CHECK-NEXT: cmp x8, #0
|
|
; CHECK-NEXT: csinv x0, x13, x11, eq
|
|
; CHECK-NEXT: csel x1, x10, x9, ne
|
|
; CHECK-NEXT: ret
|
|
%1 = tail call { i128, i1 } @llvm.smul.with.overflow.i128(i128 %x, i128 %y)
|
|
%2 = extractvalue { i128, i1 } %1, 0
|
|
%3 = extractvalue { i128, i1 } %1, 1
|
|
%4 = xor i128 %y, %x
|
|
%5 = icmp sgt i128 %4, -1
|
|
%6 = select i1 %5, i128 170141183460469231731687303715884105727, i128 -170141183460469231731687303715884105728
|
|
%7 = select i1 %3, i128 %6, i128 %2
|
|
ret i128 %7
|
|
}
|
|
|
|
define { i128, i1 } @saddo_not_1(i128 %x) nounwind {
|
|
; CHECK-LABEL: saddo_not_1:
|
|
; CHECK: // %bb.0:
|
|
; CHECK-NEXT: negs x0, x0
|
|
; CHECK-NEXT: ngcs x1, x1
|
|
; CHECK-NEXT: cset w2, vs
|
|
; CHECK-NEXT: ret
|
|
%not = xor i128 %x, -1
|
|
%r = call { i128, i1 } @llvm.sadd.with.overflow.i128(i128 %not, i128 1)
|
|
ret { i128, i1 } %r
|
|
}
|
|
|
|
define { i128, i1 } @saddo_carry_not_1(i128 %x) nounwind {
|
|
; CHECK-LABEL: saddo_carry_not_1:
|
|
; CHECK: // %bb.0:
|
|
; CHECK-NEXT: mov w8, #1 // =0x1
|
|
; CHECK-NEXT: negs x0, x0
|
|
; CHECK-NEXT: sbcs x1, x8, x1
|
|
; CHECK-NEXT: cset w2, vs
|
|
; CHECK-NEXT: ret
|
|
%not = xor i128 %x, -1
|
|
%r = call { i128, i1 } @llvm.sadd.with.overflow.i128(i128 %not, i128 u0x10000000000000001)
|
|
ret { i128, i1 } %r
|
|
}
|