
The previous expansion of [US]CMP was done using two selects and two compares. It produced decent code, but on many platforms it is better to implement [US]CMP nodes by performing the following operation: ``` [us]cmp(x, y) = (x [us]> y) - (x [us]< y) ``` This patch adds this new expansion, as well as a hook in TargetLowering to allow some targets to still use the select-based approach. AArch64 and SystemZ are currently the only targets to prefer the former approach, but other targets may also start to use it if it provides for better codegen.
125 lines
3.3 KiB
LLVM
125 lines
3.3 KiB
LLVM
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
|
|
; RUN: llc -mtriple=ppc64le-unknown-unknown %s -o - | FileCheck %s
|
|
|
|
define i8 @ucmp_8_8(i8 zeroext %x, i8 zeroext %y) nounwind {
|
|
; CHECK-LABEL: ucmp_8_8:
|
|
; CHECK: # %bb.0:
|
|
; CHECK-NEXT: cmplw 3, 4
|
|
; CHECK-NEXT: sub 5, 4, 3
|
|
; CHECK-NEXT: li 3, -1
|
|
; CHECK-NEXT: rldicl 5, 5, 1, 63
|
|
; CHECK-NEXT: rldic 3, 3, 0, 32
|
|
; CHECK-NEXT: isellt 3, 3, 5
|
|
; CHECK-NEXT: blr
|
|
%1 = call i8 @llvm.ucmp(i8 %x, i8 %y)
|
|
ret i8 %1
|
|
}
|
|
|
|
define i8 @ucmp_8_16(i16 zeroext %x, i16 zeroext %y) nounwind {
|
|
; CHECK-LABEL: ucmp_8_16:
|
|
; CHECK: # %bb.0:
|
|
; CHECK-NEXT: cmplw 3, 4
|
|
; CHECK-NEXT: sub 5, 4, 3
|
|
; CHECK-NEXT: li 3, -1
|
|
; CHECK-NEXT: rldicl 5, 5, 1, 63
|
|
; CHECK-NEXT: rldic 3, 3, 0, 32
|
|
; CHECK-NEXT: isellt 3, 3, 5
|
|
; CHECK-NEXT: blr
|
|
%1 = call i8 @llvm.ucmp(i16 %x, i16 %y)
|
|
ret i8 %1
|
|
}
|
|
|
|
define i8 @ucmp_8_32(i32 %x, i32 %y) nounwind {
|
|
; CHECK-LABEL: ucmp_8_32:
|
|
; CHECK: # %bb.0:
|
|
; CHECK-NEXT: clrldi 5, 4, 32
|
|
; CHECK-NEXT: clrldi 6, 3, 32
|
|
; CHECK-NEXT: sub 5, 5, 6
|
|
; CHECK-NEXT: cmplw 3, 4
|
|
; CHECK-NEXT: li 3, -1
|
|
; CHECK-NEXT: rldic 3, 3, 0, 32
|
|
; CHECK-NEXT: rldicl 5, 5, 1, 63
|
|
; CHECK-NEXT: isellt 3, 3, 5
|
|
; CHECK-NEXT: blr
|
|
%1 = call i8 @llvm.ucmp(i32 %x, i32 %y)
|
|
ret i8 %1
|
|
}
|
|
|
|
define i8 @ucmp_8_64(i64 %x, i64 %y) nounwind {
|
|
; CHECK-LABEL: ucmp_8_64:
|
|
; CHECK: # %bb.0:
|
|
; CHECK-NEXT: cmpld 3, 4
|
|
; CHECK-NEXT: subc 3, 4, 3
|
|
; CHECK-NEXT: subfe 3, 4, 4
|
|
; CHECK-NEXT: li 4, -1
|
|
; CHECK-NEXT: neg 3, 3
|
|
; CHECK-NEXT: isellt 3, 4, 3
|
|
; CHECK-NEXT: blr
|
|
%1 = call i8 @llvm.ucmp(i64 %x, i64 %y)
|
|
ret i8 %1
|
|
}
|
|
|
|
define i8 @ucmp_8_128(i128 %x, i128 %y) nounwind {
|
|
; CHECK-LABEL: ucmp_8_128:
|
|
; CHECK: # %bb.0:
|
|
; CHECK-NEXT: cmpld 4, 6
|
|
; CHECK-NEXT: cmpld 1, 3, 5
|
|
; CHECK-NEXT: li 3, 1
|
|
; CHECK-NEXT: li 4, -1
|
|
; CHECK-NEXT: crandc 20, 1, 2
|
|
; CHECK-NEXT: crand 21, 2, 5
|
|
; CHECK-NEXT: crnor 20, 21, 20
|
|
; CHECK-NEXT: crand 21, 2, 4
|
|
; CHECK-NEXT: isel 3, 0, 3, 20
|
|
; CHECK-NEXT: crandc 20, 0, 2
|
|
; CHECK-NEXT: cror 20, 21, 20
|
|
; CHECK-NEXT: isel 3, 4, 3, 20
|
|
; CHECK-NEXT: blr
|
|
%1 = call i8 @llvm.ucmp(i128 %x, i128 %y)
|
|
ret i8 %1
|
|
}
|
|
|
|
define i32 @ucmp_32_32(i32 %x, i32 %y) nounwind {
|
|
; CHECK-LABEL: ucmp_32_32:
|
|
; CHECK: # %bb.0:
|
|
; CHECK-NEXT: clrldi 5, 4, 32
|
|
; CHECK-NEXT: clrldi 6, 3, 32
|
|
; CHECK-NEXT: sub 5, 5, 6
|
|
; CHECK-NEXT: cmplw 3, 4
|
|
; CHECK-NEXT: li 3, -1
|
|
; CHECK-NEXT: rldic 3, 3, 0, 32
|
|
; CHECK-NEXT: rldicl 5, 5, 1, 63
|
|
; CHECK-NEXT: isellt 3, 3, 5
|
|
; CHECK-NEXT: blr
|
|
%1 = call i32 @llvm.ucmp(i32 %x, i32 %y)
|
|
ret i32 %1
|
|
}
|
|
|
|
define i32 @ucmp_32_64(i64 %x, i64 %y) nounwind {
|
|
; CHECK-LABEL: ucmp_32_64:
|
|
; CHECK: # %bb.0:
|
|
; CHECK-NEXT: cmpld 3, 4
|
|
; CHECK-NEXT: subc 3, 4, 3
|
|
; CHECK-NEXT: subfe 3, 4, 4
|
|
; CHECK-NEXT: li 4, -1
|
|
; CHECK-NEXT: neg 3, 3
|
|
; CHECK-NEXT: isellt 3, 4, 3
|
|
; CHECK-NEXT: blr
|
|
%1 = call i32 @llvm.ucmp(i64 %x, i64 %y)
|
|
ret i32 %1
|
|
}
|
|
|
|
define i64 @ucmp_64_64(i64 %x, i64 %y) nounwind {
|
|
; CHECK-LABEL: ucmp_64_64:
|
|
; CHECK: # %bb.0:
|
|
; CHECK-NEXT: subc 5, 4, 3
|
|
; CHECK-NEXT: cmpld 3, 4
|
|
; CHECK-NEXT: li 3, -1
|
|
; CHECK-NEXT: subfe 5, 4, 4
|
|
; CHECK-NEXT: neg 5, 5
|
|
; CHECK-NEXT: isellt 3, 3, 5
|
|
; CHECK-NEXT: blr
|
|
%1 = call i64 @llvm.ucmp(i64 %x, i64 %y)
|
|
ret i64 %1
|
|
}
|