David Green d10f23a25d [ISel] Expand saddsat and ssubsat via asr and xor
This changes the lowering of saddsat and ssubsat so that instead of
using:
  r,o = saddo x, y
  c = setcc r < 0
  s = c ? INTMAX : INTMIN
  ret o ? s : r
into using asr and xor to materialize the INTMAX/INTMIN constants:
  r,o = saddo x, y
  s = ashr r, BW-1
  x = xor s, INTMIN
  ret o ? x : r
https://alive2.llvm.org/ce/z/TYufgD

This seems to reduce the instruction count in most testcases across most
architectures. X86 has some custom lowering added to compensate for
cases where it can increase instruction count.

Differential Revision: https://reviews.llvm.org/D105853
2021-08-19 16:08:07 +01:00

115 lines
3.7 KiB
LLVM

; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc < %s -mtriple=thumbv7m-none-eabi | FileCheck %s --check-prefix=CHECK-T2NODSP
; RUN: llc < %s -mtriple=thumbv7em-none-eabi | FileCheck %s --check-prefix=CHECK-T2DSP
; RUN: llc < %s -mtriple=armv5te-none-none-eabi | FileCheck %s --check-prefix=CHECK-ARM
; RUN: llc < %s -mtriple=armv8a-none-eabi | FileCheck %s --check-prefix=CHECK-ARM
define i32 @qdadd(i32 %x, i32 %y) nounwind {
; CHECK-T2NODSP-LABEL: qdadd:
; CHECK-T2NODSP: @ %bb.0:
; CHECK-T2NODSP-NEXT: adds r0, r0, r0
; CHECK-T2NODSP-NEXT: mov.w r2, #-2147483648
; CHECK-T2NODSP-NEXT: it vs
; CHECK-T2NODSP-NEXT: eorvs.w r0, r2, r0, asr #31
; CHECK-T2NODSP-NEXT: adds r0, r0, r1
; CHECK-T2NODSP-NEXT: it vs
; CHECK-T2NODSP-NEXT: eorvs.w r0, r2, r0, asr #31
; CHECK-T2NODSP-NEXT: bx lr
;
; CHECK-T2DSP-LABEL: qdadd:
; CHECK-T2DSP: @ %bb.0:
; CHECK-T2DSP-NEXT: qdadd r0, r1, r0
; CHECK-T2DSP-NEXT: bx lr
;
; CHECK-ARM-LABEL: qdadd:
; CHECK-ARM: @ %bb.0:
; CHECK-ARM-NEXT: qdadd r0, r1, r0
; CHECK-ARM-NEXT: bx lr
%z = call i32 @llvm.sadd.sat.i32(i32 %x, i32 %x)
%tmp = call i32 @llvm.sadd.sat.i32(i32 %z, i32 %y)
ret i32 %tmp
}
define i32 @qdadd_c(i32 %x, i32 %y) nounwind {
; CHECK-T2NODSP-LABEL: qdadd_c:
; CHECK-T2NODSP: @ %bb.0:
; CHECK-T2NODSP-NEXT: adds r0, r0, r0
; CHECK-T2NODSP-NEXT: mov.w r2, #-2147483648
; CHECK-T2NODSP-NEXT: it vs
; CHECK-T2NODSP-NEXT: eorvs.w r0, r2, r0, asr #31
; CHECK-T2NODSP-NEXT: adds r0, r0, r1
; CHECK-T2NODSP-NEXT: it vs
; CHECK-T2NODSP-NEXT: eorvs.w r0, r2, r0, asr #31
; CHECK-T2NODSP-NEXT: bx lr
;
; CHECK-T2DSP-LABEL: qdadd_c:
; CHECK-T2DSP: @ %bb.0:
; CHECK-T2DSP-NEXT: qdadd r0, r1, r0
; CHECK-T2DSP-NEXT: bx lr
;
; CHECK-ARM-LABEL: qdadd_c:
; CHECK-ARM: @ %bb.0:
; CHECK-ARM-NEXT: qdadd r0, r1, r0
; CHECK-ARM-NEXT: bx lr
%z = call i32 @llvm.sadd.sat.i32(i32 %x, i32 %x)
%tmp = call i32 @llvm.sadd.sat.i32(i32 %y, i32 %z)
ret i32 %tmp
}
define i32 @qdsub(i32 %x, i32 %y) nounwind {
; CHECK-T2NODSP-LABEL: qdsub:
; CHECK-T2NODSP: @ %bb.0:
; CHECK-T2NODSP-NEXT: adds r0, r0, r0
; CHECK-T2NODSP-NEXT: mov.w r2, #-2147483648
; CHECK-T2NODSP-NEXT: it vs
; CHECK-T2NODSP-NEXT: eorvs.w r0, r2, r0, asr #31
; CHECK-T2NODSP-NEXT: subs r0, r1, r0
; CHECK-T2NODSP-NEXT: it vs
; CHECK-T2NODSP-NEXT: eorvs.w r0, r2, r0, asr #31
; CHECK-T2NODSP-NEXT: bx lr
;
; CHECK-T2DSP-LABEL: qdsub:
; CHECK-T2DSP: @ %bb.0:
; CHECK-T2DSP-NEXT: qdsub r0, r1, r0
; CHECK-T2DSP-NEXT: bx lr
;
; CHECK-ARM-LABEL: qdsub:
; CHECK-ARM: @ %bb.0:
; CHECK-ARM-NEXT: qdsub r0, r1, r0
; CHECK-ARM-NEXT: bx lr
%z = call i32 @llvm.sadd.sat.i32(i32 %x, i32 %x)
%tmp = call i32 @llvm.ssub.sat.i32(i32 %y, i32 %z)
ret i32 %tmp
}
define i32 @qdsub_c(i32 %x, i32 %y) nounwind {
; CHECK-T2NODSP-LABEL: qdsub_c:
; CHECK-T2NODSP: @ %bb.0:
; CHECK-T2NODSP-NEXT: adds r0, r0, r0
; CHECK-T2NODSP-NEXT: mov.w r2, #-2147483648
; CHECK-T2NODSP-NEXT: it vs
; CHECK-T2NODSP-NEXT: eorvs.w r0, r2, r0, asr #31
; CHECK-T2NODSP-NEXT: subs r0, r0, r1
; CHECK-T2NODSP-NEXT: it vs
; CHECK-T2NODSP-NEXT: eorvs.w r0, r2, r0, asr #31
; CHECK-T2NODSP-NEXT: bx lr
;
; CHECK-T2DSP-LABEL: qdsub_c:
; CHECK-T2DSP: @ %bb.0:
; CHECK-T2DSP-NEXT: qadd r0, r0, r0
; CHECK-T2DSP-NEXT: qsub r0, r0, r1
; CHECK-T2DSP-NEXT: bx lr
;
; CHECK-ARM-LABEL: qdsub_c:
; CHECK-ARM: @ %bb.0:
; CHECK-ARM-NEXT: qadd r0, r0, r0
; CHECK-ARM-NEXT: qsub r0, r0, r1
; CHECK-ARM-NEXT: bx lr
%z = call i32 @llvm.sadd.sat.i32(i32 %x, i32 %x)
%tmp = call i32 @llvm.ssub.sat.i32(i32 %z, i32 %y)
ret i32 %tmp
}
declare i32 @llvm.sadd.sat.i32(i32, i32)
declare i32 @llvm.ssub.sat.i32(i32, i32)