Stefan Pintilie 53c37f300d
[PowerPC] Add phony subregisters to cover the high half of the VSX registers. (#94628)
On PowerPC there are 128 bit VSX registers. These registers are half
overlapped with 64 bit floating point registers (FPR). The 64 bit half
of the VXS register that does not overlap with the FPR does not overlap
with any other register class. The FPR are the only subregisters of the
VSX registers but they do not fully cover the 128 bit super register.
This leads to incorrect lane masks being created.

This patch adds phony registers for the other half of the VSX registers
in order to fully cover them and to make sure that the lane masks are
not the same for the VSX and the floating point register.
2024-07-29 11:17:04 -04:00

201 lines
6.7 KiB
LLVM

; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 2
; RUN: llc -mcpu=pwr9 -mtriple=powerpc64le-unknown-unknown \
; RUN: -ppc-vsr-nums-as-vr -ppc-asm-full-reg-names < %s | FileCheck %s
define float @ldexp_f32(i8 zeroext %x) {
; CHECK-LABEL: ldexp_f32:
; CHECK: # %bb.0:
; CHECK-NEXT: mflr r0
; CHECK-NEXT: stdu r1, -32(r1)
; CHECK-NEXT: std r0, 48(r1)
; CHECK-NEXT: .cfi_def_cfa_offset 32
; CHECK-NEXT: .cfi_offset lr, 16
; CHECK-NEXT: vspltisw v2, 1
; CHECK-NEXT: mr r4, r3
; CHECK-NEXT: xvcvsxwdp vs1, v2
; CHECK-NEXT: bl ldexpf
; CHECK-NEXT: nop
; CHECK-NEXT: addi r1, r1, 32
; CHECK-NEXT: ld r0, 16(r1)
; CHECK-NEXT: mtlr r0
; CHECK-NEXT: blr
%zext = zext i8 %x to i32
%ldexp = call float @llvm.ldexp.f32.i32(float 1.000000e+00, i32 %zext)
ret float %ldexp
}
define double @ldexp_f64(i8 zeroext %x) {
; CHECK-LABEL: ldexp_f64:
; CHECK: # %bb.0:
; CHECK-NEXT: mflr r0
; CHECK-NEXT: stdu r1, -32(r1)
; CHECK-NEXT: std r0, 48(r1)
; CHECK-NEXT: .cfi_def_cfa_offset 32
; CHECK-NEXT: .cfi_offset lr, 16
; CHECK-NEXT: vspltisw v2, 1
; CHECK-NEXT: mr r4, r3
; CHECK-NEXT: xvcvsxwdp vs1, v2
; CHECK-NEXT: bl ldexp
; CHECK-NEXT: nop
; CHECK-NEXT: addi r1, r1, 32
; CHECK-NEXT: ld r0, 16(r1)
; CHECK-NEXT: mtlr r0
; CHECK-NEXT: blr
%zext = zext i8 %x to i32
%ldexp = call double @llvm.ldexp.f64.i32(double 1.000000e+00, i32 %zext)
ret double %ldexp
}
define <2 x float> @ldexp_v2f32(<2 x float> %val, <2 x i32> %exp) {
; CHECK-LABEL: ldexp_v2f32:
; CHECK: # %bb.0:
; CHECK-NEXT: mflr r0
; CHECK-NEXT: stdu r1, -80(r1)
; CHECK-NEXT: std r0, 96(r1)
; CHECK-NEXT: .cfi_def_cfa_offset 80
; CHECK-NEXT: .cfi_offset lr, 16
; CHECK-NEXT: .cfi_offset v29, -48
; CHECK-NEXT: .cfi_offset v30, -32
; CHECK-NEXT: .cfi_offset v31, -16
; CHECK-NEXT: xxsldwi vs0, v2, v2, 3
; CHECK-NEXT: li r3, 0
; CHECK-NEXT: stxv v29, 32(r1) # 16-byte Folded Spill
; CHECK-NEXT: xscvspdpn f1, vs0
; CHECK-NEXT: vextuwrx r4, r3, v3
; CHECK-NEXT: stxv v30, 48(r1) # 16-byte Folded Spill
; CHECK-NEXT: stxv v31, 64(r1) # 16-byte Folded Spill
; CHECK-NEXT: vmr v31, v3
; CHECK-NEXT: vmr v30, v2
; CHECK-NEXT: bl ldexpf
; CHECK-NEXT: nop
; CHECK-NEXT: xxswapd vs0, v30
; CHECK-NEXT: li r3, 4
; CHECK-NEXT: xscvdpspn v29, f1
; CHECK-NEXT: xscvspdpn f1, vs0
; CHECK-NEXT: vextuwrx r4, r3, v31
; CHECK-NEXT: bl ldexpf
; CHECK-NEXT: nop
; CHECK-NEXT: xscvdpspn vs0, f1
; CHECK-NEXT: lxv v31, 64(r1) # 16-byte Folded Reload
; CHECK-NEXT: lxv v30, 48(r1) # 16-byte Folded Reload
; CHECK-NEXT: xxmrghw v2, vs0, v29
; CHECK-NEXT: lxv v29, 32(r1) # 16-byte Folded Reload
; CHECK-NEXT: addi r1, r1, 80
; CHECK-NEXT: ld r0, 16(r1)
; CHECK-NEXT: mtlr r0
; CHECK-NEXT: blr
%1 = call <2 x float> @llvm.ldexp.v2f32.v2i32(<2 x float> %val, <2 x i32> %exp)
ret <2 x float> %1
}
define <4 x float> @ldexp_v4f32(<4 x float> %val, <4 x i32> %exp) {
; CHECK-LABEL: ldexp_v4f32:
; CHECK: # %bb.0:
; CHECK-NEXT: mflr r0
; CHECK-NEXT: stdu r1, -96(r1)
; CHECK-NEXT: std r0, 112(r1)
; CHECK-NEXT: .cfi_def_cfa_offset 96
; CHECK-NEXT: .cfi_offset lr, 16
; CHECK-NEXT: .cfi_offset v28, -64
; CHECK-NEXT: .cfi_offset v29, -48
; CHECK-NEXT: .cfi_offset v30, -32
; CHECK-NEXT: .cfi_offset v31, -16
; CHECK-NEXT: li r3, 12
; CHECK-NEXT: xscvspdpn f1, v2
; CHECK-NEXT: stxv v28, 32(r1) # 16-byte Folded Spill
; CHECK-NEXT: stxv v29, 48(r1) # 16-byte Folded Spill
; CHECK-NEXT: stxv v30, 64(r1) # 16-byte Folded Spill
; CHECK-NEXT: stxv v31, 80(r1) # 16-byte Folded Spill
; CHECK-NEXT: vmr v31, v3
; CHECK-NEXT: vmr v30, v2
; CHECK-NEXT: vextuwrx r4, r3, v3
; CHECK-NEXT: bl ldexpf
; CHECK-NEXT: nop
; CHECK-NEXT: xxswapd vs0, v30
; CHECK-NEXT: li r3, 4
; CHECK-NEXT: xscpsgndp v29, f1, f1
; CHECK-NEXT: xscvspdpn f1, vs0
; CHECK-NEXT: vextuwrx r4, r3, v31
; CHECK-NEXT: bl ldexpf
; CHECK-NEXT: nop
; CHECK-NEXT: xxmrghd vs0, v29, vs1
; CHECK-NEXT: li r3, 0
; CHECK-NEXT: vextuwrx r4, r3, v31
; CHECK-NEXT: xvcvdpsp v28, vs0
; CHECK-NEXT: xxsldwi vs0, v30, v30, 3
; CHECK-NEXT: xscvspdpn f1, vs0
; CHECK-NEXT: bl ldexpf
; CHECK-NEXT: nop
; CHECK-NEXT: xxsldwi vs0, v30, v30, 1
; CHECK-NEXT: xscpsgndp v29, f1, f1
; CHECK-NEXT: mfvsrwz r4, v31
; CHECK-NEXT: xscvspdpn f1, vs0
; CHECK-NEXT: bl ldexpf
; CHECK-NEXT: nop
; CHECK-NEXT: xxmrghd vs0, vs1, v29
; CHECK-NEXT: lxv v31, 80(r1) # 16-byte Folded Reload
; CHECK-NEXT: lxv v30, 64(r1) # 16-byte Folded Reload
; CHECK-NEXT: lxv v29, 48(r1) # 16-byte Folded Reload
; CHECK-NEXT: xvcvdpsp v2, vs0
; CHECK-NEXT: vmrgew v2, v28, v2
; CHECK-NEXT: lxv v28, 32(r1) # 16-byte Folded Reload
; CHECK-NEXT: addi r1, r1, 96
; CHECK-NEXT: ld r0, 16(r1)
; CHECK-NEXT: mtlr r0
; CHECK-NEXT: blr
%1 = call <4 x float> @llvm.ldexp.v4f32.v4i32(<4 x float> %val, <4 x i32> %exp)
ret <4 x float> %1
}
define half @ldexp_f16(half %arg0, i32 %arg1) {
; CHECK-LABEL: ldexp_f16:
; CHECK: # %bb.0:
; CHECK-NEXT: mflr r0
; CHECK-NEXT: stdu r1, -32(r1)
; CHECK-NEXT: std r0, 48(r1)
; CHECK-NEXT: .cfi_def_cfa_offset 32
; CHECK-NEXT: .cfi_offset lr, 16
; CHECK-NEXT: xscvdphp f0, f1
; CHECK-NEXT: clrldi r4, r4, 32
; CHECK-NEXT: mffprwz r3, f0
; CHECK-NEXT: clrlwi r3, r3, 16
; CHECK-NEXT: mtfprwz f0, r3
; CHECK-NEXT: xscvhpdp f1, f0
; CHECK-NEXT: bl ldexpf
; CHECK-NEXT: nop
; CHECK-NEXT: addi r1, r1, 32
; CHECK-NEXT: ld r0, 16(r1)
; CHECK-NEXT: mtlr r0
; CHECK-NEXT: blr
%ldexp = call half @llvm.ldexp.f16.i32(half %arg0, i32 %arg1)
ret half %ldexp
}
define ppc_fp128 @ldexp_fp128(ppc_fp128 %arg0, i32 %arg1) {
; CHECK-LABEL: ldexp_fp128:
; CHECK: # %bb.0:
; CHECK-NEXT: mflr r0
; CHECK-NEXT: stdu r1, -32(r1)
; CHECK-NEXT: std r0, 48(r1)
; CHECK-NEXT: .cfi_def_cfa_offset 32
; CHECK-NEXT: .cfi_offset lr, 16
; CHECK-NEXT: clrldi r5, r5, 32
; CHECK-NEXT: bl ldexpl
; CHECK-NEXT: nop
; CHECK-NEXT: addi r1, r1, 32
; CHECK-NEXT: ld r0, 16(r1)
; CHECK-NEXT: mtlr r0
; CHECK-NEXT: blr
%ldexp = call ppc_fp128 @llvm.ldexp.ppcf128.i32(ppc_fp128 %arg0, i32 %arg1)
ret ppc_fp128 %ldexp
}
declare double @llvm.ldexp.f64.i32(double, i32) #0
declare float @llvm.ldexp.f32.i32(float, i32) #0
declare <2 x float> @llvm.ldexp.v2f32.v2i32(<2 x float>, <2 x i32>) #0
declare <4 x float> @llvm.ldexp.v4f32.v4i32(<4 x float>, <4 x i32>) #0
declare half @llvm.ldexp.f16.i32(half, i32) #0
declare ppc_fp128 @llvm.ldexp.ppcf128.i32(ppc_fp128, i32) #0
attributes #0 = { nocallback nofree nosync nounwind speculatable willreturn memory(none) }