
mflr is kind of expensive on Power version smaller than 10, so we should schedule the store for the mflr's def away from mflr. In epilogue, the expensive mtlr has no user for its def, so it doesn't matter that the load and the mtlr are back-to-back. Reviewed By: RolandF Differential Revision: https://reviews.llvm.org/D137423
230 lines
7.5 KiB
LLVM
230 lines
7.5 KiB
LLVM
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
|
|
; RUN: llc -mcpu=pwr9 -mtriple=powerpc64le-unknown-unknown -verify-machineinstrs \
|
|
; RUN: -ppc-vsr-nums-as-vr -ppc-asm-full-reg-names < %s | FileCheck %s
|
|
; RUN: llc -mcpu=pwr8 -mtriple=powerpc64le-unknown-unknown -verify-machineinstrs \
|
|
; RUN: -ppc-vsr-nums-as-vr -ppc-asm-full-reg-names < %s | FileCheck %s \
|
|
; RUN: -check-prefix=CHECK-P8
|
|
|
|
define void @qp_trunc(ptr nocapture readonly %a, ptr nocapture %res) {
|
|
; CHECK-LABEL: qp_trunc:
|
|
; CHECK: # %bb.0: # %entry
|
|
; CHECK-NEXT: lxv v2, 0(r3)
|
|
; CHECK-NEXT: xsrqpi 1, v2, v2, 1
|
|
; CHECK-NEXT: stxv v2, 0(r4)
|
|
; CHECK-NEXT: blr
|
|
;
|
|
; CHECK-P8-LABEL: qp_trunc:
|
|
; CHECK-P8: # %bb.0: # %entry
|
|
; CHECK-P8-NEXT: mflr r0
|
|
; CHECK-P8-NEXT: .cfi_def_cfa_offset 48
|
|
; CHECK-P8-NEXT: .cfi_offset lr, 16
|
|
; CHECK-P8-NEXT: .cfi_offset r30, -16
|
|
; CHECK-P8-NEXT: std r30, -16(r1) # 8-byte Folded Spill
|
|
; CHECK-P8-NEXT: stdu r1, -48(r1)
|
|
; CHECK-P8-NEXT: std r0, 64(r1)
|
|
; CHECK-P8-NEXT: mr r30, r4
|
|
; CHECK-P8-NEXT: lxvd2x vs0, 0, r3
|
|
; CHECK-P8-NEXT: xxswapd v2, vs0
|
|
; CHECK-P8-NEXT: bl truncf128
|
|
; CHECK-P8-NEXT: nop
|
|
; CHECK-P8-NEXT: xxswapd vs0, v2
|
|
; CHECK-P8-NEXT: stxvd2x vs0, 0, r30
|
|
; CHECK-P8-NEXT: addi r1, r1, 48
|
|
; CHECK-P8-NEXT: ld r0, 16(r1)
|
|
; CHECK-P8-NEXT: ld r30, -16(r1) # 8-byte Folded Reload
|
|
; CHECK-P8-NEXT: mtlr r0
|
|
; CHECK-P8-NEXT: blr
|
|
entry:
|
|
%0 = load fp128, ptr %a, align 16
|
|
%1 = tail call fp128 @llvm.trunc.f128(fp128 %0)
|
|
store fp128 %1, ptr %res, align 16
|
|
ret void
|
|
}
|
|
declare fp128 @llvm.trunc.f128(fp128 %Val)
|
|
|
|
define void @qp_rint(ptr nocapture readonly %a, ptr nocapture %res) {
|
|
; CHECK-LABEL: qp_rint:
|
|
; CHECK: # %bb.0: # %entry
|
|
; CHECK-NEXT: lxv v2, 0(r3)
|
|
; CHECK-NEXT: xsrqpix 0, v2, v2, 3
|
|
; CHECK-NEXT: stxv v2, 0(r4)
|
|
; CHECK-NEXT: blr
|
|
;
|
|
; CHECK-P8-LABEL: qp_rint:
|
|
; CHECK-P8: # %bb.0: # %entry
|
|
; CHECK-P8-NEXT: mflr r0
|
|
; CHECK-P8-NEXT: .cfi_def_cfa_offset 48
|
|
; CHECK-P8-NEXT: .cfi_offset lr, 16
|
|
; CHECK-P8-NEXT: .cfi_offset r30, -16
|
|
; CHECK-P8-NEXT: std r30, -16(r1) # 8-byte Folded Spill
|
|
; CHECK-P8-NEXT: stdu r1, -48(r1)
|
|
; CHECK-P8-NEXT: std r0, 64(r1)
|
|
; CHECK-P8-NEXT: mr r30, r4
|
|
; CHECK-P8-NEXT: lxvd2x vs0, 0, r3
|
|
; CHECK-P8-NEXT: xxswapd v2, vs0
|
|
; CHECK-P8-NEXT: bl rintf128
|
|
; CHECK-P8-NEXT: nop
|
|
; CHECK-P8-NEXT: xxswapd vs0, v2
|
|
; CHECK-P8-NEXT: stxvd2x vs0, 0, r30
|
|
; CHECK-P8-NEXT: addi r1, r1, 48
|
|
; CHECK-P8-NEXT: ld r0, 16(r1)
|
|
; CHECK-P8-NEXT: ld r30, -16(r1) # 8-byte Folded Reload
|
|
; CHECK-P8-NEXT: mtlr r0
|
|
; CHECK-P8-NEXT: blr
|
|
entry:
|
|
%0 = load fp128, ptr %a, align 16
|
|
%1 = tail call fp128 @llvm.rint.f128(fp128 %0)
|
|
store fp128 %1, ptr %res, align 16
|
|
ret void
|
|
}
|
|
declare fp128 @llvm.rint.f128(fp128 %Val)
|
|
|
|
define void @qp_nearbyint(ptr nocapture readonly %a, ptr nocapture %res) {
|
|
; CHECK-LABEL: qp_nearbyint:
|
|
; CHECK: # %bb.0: # %entry
|
|
; CHECK-NEXT: lxv v2, 0(r3)
|
|
; CHECK-NEXT: xsrqpi 0, v2, v2, 3
|
|
; CHECK-NEXT: stxv v2, 0(r4)
|
|
; CHECK-NEXT: blr
|
|
;
|
|
; CHECK-P8-LABEL: qp_nearbyint:
|
|
; CHECK-P8: # %bb.0: # %entry
|
|
; CHECK-P8-NEXT: mflr r0
|
|
; CHECK-P8-NEXT: .cfi_def_cfa_offset 48
|
|
; CHECK-P8-NEXT: .cfi_offset lr, 16
|
|
; CHECK-P8-NEXT: .cfi_offset r30, -16
|
|
; CHECK-P8-NEXT: std r30, -16(r1) # 8-byte Folded Spill
|
|
; CHECK-P8-NEXT: stdu r1, -48(r1)
|
|
; CHECK-P8-NEXT: std r0, 64(r1)
|
|
; CHECK-P8-NEXT: mr r30, r4
|
|
; CHECK-P8-NEXT: lxvd2x vs0, 0, r3
|
|
; CHECK-P8-NEXT: xxswapd v2, vs0
|
|
; CHECK-P8-NEXT: bl nearbyintf128
|
|
; CHECK-P8-NEXT: nop
|
|
; CHECK-P8-NEXT: xxswapd vs0, v2
|
|
; CHECK-P8-NEXT: stxvd2x vs0, 0, r30
|
|
; CHECK-P8-NEXT: addi r1, r1, 48
|
|
; CHECK-P8-NEXT: ld r0, 16(r1)
|
|
; CHECK-P8-NEXT: ld r30, -16(r1) # 8-byte Folded Reload
|
|
; CHECK-P8-NEXT: mtlr r0
|
|
; CHECK-P8-NEXT: blr
|
|
entry:
|
|
%0 = load fp128, ptr %a, align 16
|
|
%1 = tail call fp128 @llvm.nearbyint.f128(fp128 %0)
|
|
store fp128 %1, ptr %res, align 16
|
|
ret void
|
|
}
|
|
declare fp128 @llvm.nearbyint.f128(fp128 %Val)
|
|
|
|
define void @qp_round(ptr nocapture readonly %a, ptr nocapture %res) {
|
|
; CHECK-LABEL: qp_round:
|
|
; CHECK: # %bb.0: # %entry
|
|
; CHECK-NEXT: lxv v2, 0(r3)
|
|
; CHECK-NEXT: xsrqpi 0, v2, v2, 0
|
|
; CHECK-NEXT: stxv v2, 0(r4)
|
|
; CHECK-NEXT: blr
|
|
;
|
|
; CHECK-P8-LABEL: qp_round:
|
|
; CHECK-P8: # %bb.0: # %entry
|
|
; CHECK-P8-NEXT: mflr r0
|
|
; CHECK-P8-NEXT: .cfi_def_cfa_offset 48
|
|
; CHECK-P8-NEXT: .cfi_offset lr, 16
|
|
; CHECK-P8-NEXT: .cfi_offset r30, -16
|
|
; CHECK-P8-NEXT: std r30, -16(r1) # 8-byte Folded Spill
|
|
; CHECK-P8-NEXT: stdu r1, -48(r1)
|
|
; CHECK-P8-NEXT: std r0, 64(r1)
|
|
; CHECK-P8-NEXT: mr r30, r4
|
|
; CHECK-P8-NEXT: lxvd2x vs0, 0, r3
|
|
; CHECK-P8-NEXT: xxswapd v2, vs0
|
|
; CHECK-P8-NEXT: bl roundf128
|
|
; CHECK-P8-NEXT: nop
|
|
; CHECK-P8-NEXT: xxswapd vs0, v2
|
|
; CHECK-P8-NEXT: stxvd2x vs0, 0, r30
|
|
; CHECK-P8-NEXT: addi r1, r1, 48
|
|
; CHECK-P8-NEXT: ld r0, 16(r1)
|
|
; CHECK-P8-NEXT: ld r30, -16(r1) # 8-byte Folded Reload
|
|
; CHECK-P8-NEXT: mtlr r0
|
|
; CHECK-P8-NEXT: blr
|
|
entry:
|
|
%0 = load fp128, ptr %a, align 16
|
|
%1 = tail call fp128 @llvm.round.f128(fp128 %0)
|
|
store fp128 %1, ptr %res, align 16
|
|
ret void
|
|
}
|
|
declare fp128 @llvm.round.f128(fp128 %Val)
|
|
|
|
define void @qp_floor(ptr nocapture readonly %a, ptr nocapture %res) {
|
|
; CHECK-LABEL: qp_floor:
|
|
; CHECK: # %bb.0: # %entry
|
|
; CHECK-NEXT: lxv v2, 0(r3)
|
|
; CHECK-NEXT: xsrqpi 1, v2, v2, 3
|
|
; CHECK-NEXT: stxv v2, 0(r4)
|
|
; CHECK-NEXT: blr
|
|
;
|
|
; CHECK-P8-LABEL: qp_floor:
|
|
; CHECK-P8: # %bb.0: # %entry
|
|
; CHECK-P8-NEXT: mflr r0
|
|
; CHECK-P8-NEXT: .cfi_def_cfa_offset 48
|
|
; CHECK-P8-NEXT: .cfi_offset lr, 16
|
|
; CHECK-P8-NEXT: .cfi_offset r30, -16
|
|
; CHECK-P8-NEXT: std r30, -16(r1) # 8-byte Folded Spill
|
|
; CHECK-P8-NEXT: stdu r1, -48(r1)
|
|
; CHECK-P8-NEXT: std r0, 64(r1)
|
|
; CHECK-P8-NEXT: mr r30, r4
|
|
; CHECK-P8-NEXT: lxvd2x vs0, 0, r3
|
|
; CHECK-P8-NEXT: xxswapd v2, vs0
|
|
; CHECK-P8-NEXT: bl floorf128
|
|
; CHECK-P8-NEXT: nop
|
|
; CHECK-P8-NEXT: xxswapd vs0, v2
|
|
; CHECK-P8-NEXT: stxvd2x vs0, 0, r30
|
|
; CHECK-P8-NEXT: addi r1, r1, 48
|
|
; CHECK-P8-NEXT: ld r0, 16(r1)
|
|
; CHECK-P8-NEXT: ld r30, -16(r1) # 8-byte Folded Reload
|
|
; CHECK-P8-NEXT: mtlr r0
|
|
; CHECK-P8-NEXT: blr
|
|
entry:
|
|
%0 = load fp128, ptr %a, align 16
|
|
%1 = tail call fp128 @llvm.floor.f128(fp128 %0)
|
|
store fp128 %1, ptr %res, align 16
|
|
ret void
|
|
}
|
|
declare fp128 @llvm.floor.f128(fp128 %Val)
|
|
|
|
define void @qp_ceil(ptr nocapture readonly %a, ptr nocapture %res) {
|
|
; CHECK-LABEL: qp_ceil:
|
|
; CHECK: # %bb.0: # %entry
|
|
; CHECK-NEXT: lxv v2, 0(r3)
|
|
; CHECK-NEXT: xsrqpi 1, v2, v2, 2
|
|
; CHECK-NEXT: stxv v2, 0(r4)
|
|
; CHECK-NEXT: blr
|
|
;
|
|
; CHECK-P8-LABEL: qp_ceil:
|
|
; CHECK-P8: # %bb.0: # %entry
|
|
; CHECK-P8-NEXT: mflr r0
|
|
; CHECK-P8-NEXT: .cfi_def_cfa_offset 48
|
|
; CHECK-P8-NEXT: .cfi_offset lr, 16
|
|
; CHECK-P8-NEXT: .cfi_offset r30, -16
|
|
; CHECK-P8-NEXT: std r30, -16(r1) # 8-byte Folded Spill
|
|
; CHECK-P8-NEXT: stdu r1, -48(r1)
|
|
; CHECK-P8-NEXT: std r0, 64(r1)
|
|
; CHECK-P8-NEXT: mr r30, r4
|
|
; CHECK-P8-NEXT: lxvd2x vs0, 0, r3
|
|
; CHECK-P8-NEXT: xxswapd v2, vs0
|
|
; CHECK-P8-NEXT: bl ceilf128
|
|
; CHECK-P8-NEXT: nop
|
|
; CHECK-P8-NEXT: xxswapd vs0, v2
|
|
; CHECK-P8-NEXT: stxvd2x vs0, 0, r30
|
|
; CHECK-P8-NEXT: addi r1, r1, 48
|
|
; CHECK-P8-NEXT: ld r0, 16(r1)
|
|
; CHECK-P8-NEXT: ld r30, -16(r1) # 8-byte Folded Reload
|
|
; CHECK-P8-NEXT: mtlr r0
|
|
; CHECK-P8-NEXT: blr
|
|
entry:
|
|
%0 = load fp128, ptr %a, align 16
|
|
%1 = tail call fp128 @llvm.ceil.f128(fp128 %0)
|
|
store fp128 %1, ptr %res, align 16
|
|
ret void
|
|
}
|
|
declare fp128 @llvm.ceil.f128(fp128 %Val)
|
|
|