llvm-project/llvm/test/CodeGen/PowerPC/test-vector-insert.ll
RolandF77 26ba186bd0
[PowerPC] Improve pwr7 codegen for v4i8 load (#104507)
There are no partial vector loads on pwr7 so current v4i8 codegen is an
int load then store to vector sized temp and re-load as vector. Try to
use lfiwax to load 32 bits into an FP reg and take advantage of VSX FP
and vector reg sharing to move the result to the right vector position.
2024-09-04 12:55:27 -04:00

278 lines
10 KiB
LLVM

; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc -verify-machineinstrs -mtriple=powerpc64le-unknown-linux-gnu \
; RUN: -ppc-asm-full-reg-names -ppc-vsr-nums-as-vr \
; RUN: -mcpu=pwr7 < %s | FileCheck %s --check-prefix=CHECK-LE-P7
; RUN: llc -verify-machineinstrs -mtriple=powerpc64le-unknown-linux-gnu \
; RUN: -ppc-asm-full-reg-names -ppc-vsr-nums-as-vr \
; RUN: -mcpu=pwr8 < %s | FileCheck %s --check-prefix=CHECK-LE-P8
; RUN: llc -verify-machineinstrs -mtriple=powerpc64le-unknown-linux-gnu \
; RUN: -ppc-asm-full-reg-names -ppc-vsr-nums-as-vr \
; RUN: -mcpu=pwr9 < %s | FileCheck %s --check-prefix=CHECK-LE-P9
; RUN: llc -verify-machineinstrs -mtriple=powerpc64-unknown-linux-gnu \
; RUN: -ppc-asm-full-reg-names -ppc-vsr-nums-as-vr \
; RUN: -mcpu=pwr7 < %s | FileCheck %s --check-prefix=CHECK-BE-P7
; RUN: llc -verify-machineinstrs -mtriple=powerpc64-unknown-linux-gnu \
; RUN: -ppc-asm-full-reg-names -ppc-vsr-nums-as-vr \
; RUN: -mcpu=pwr8 < %s | FileCheck %s --check-prefix=CHECK-BE-P8
; RUN: llc -verify-machineinstrs -mtriple=powerpc64-unknown-linux-gnu \
; RUN: -ppc-asm-full-reg-names -ppc-vsr-nums-as-vr \
; RUN: -mcpu=pwr9 < %s | FileCheck %s --check-prefix=CHECK-BE-P9
; xscvdpsxws and xscvdpsxws is only available on Power7 and above
; Codgen is different for Power7, Power8, and Power9.
define dso_local <4 x i32> @test(<4 x i32> %a, double %b) {
; CHECK-LE-P7-LABEL: test:
; CHECK-LE-P7: # %bb.0: # %entry
; CHECK-LE-P7-NEXT: xscvdpsxws f0, f1
; CHECK-LE-P7-NEXT: addi r3, r1, -4
; CHECK-LE-P7-NEXT: addis r4, r2, .LCPI0_0@toc@ha
; CHECK-LE-P7-NEXT: addi r4, r4, .LCPI0_0@toc@l
; CHECK-LE-P7-NEXT: stfiwx f0, 0, r3
; CHECK-LE-P7-NEXT: lxvd2x vs0, 0, r4
; CHECK-LE-P7-NEXT: xxswapd v3, vs0
; CHECK-LE-P7-NEXT: lfiwzx f0, 0, r3
; CHECK-LE-P7-NEXT: xxspltw v4, vs0, 1
; CHECK-LE-P7-NEXT: vperm v2, v4, v2, v3
; CHECK-LE-P7-NEXT: blr
;
; CHECK-LE-P8-LABEL: test:
; CHECK-LE-P8: # %bb.0: # %entry
; CHECK-LE-P8-NEXT: addis r3, r2, .LCPI0_0@toc@ha
; CHECK-LE-P8-NEXT: xscvdpsxws v4, f1
; CHECK-LE-P8-NEXT: addi r3, r3, .LCPI0_0@toc@l
; CHECK-LE-P8-NEXT: lxvd2x vs0, 0, r3
; CHECK-LE-P8-NEXT: xxswapd v3, vs0
; CHECK-LE-P8-NEXT: vperm v2, v4, v2, v3
; CHECK-LE-P8-NEXT: blr
;
; CHECK-LE-P9-LABEL: test:
; CHECK-LE-P9: # %bb.0: # %entry
; CHECK-LE-P9-NEXT: xscvdpsxws f0, f1
; CHECK-LE-P9-NEXT: xxinsertw v2, vs0, 0
; CHECK-LE-P9-NEXT: blr
;
; CHECK-BE-P7-LABEL: test:
; CHECK-BE-P7: # %bb.0: # %entry
; CHECK-BE-P7-NEXT: xscvdpsxws f0, f1
; CHECK-BE-P7-NEXT: addi r3, r1, -4
; CHECK-BE-P7-NEXT: stfiwx f0, 0, r3
; CHECK-BE-P7-NEXT: lfiwzx f0, 0, r3
; CHECK-BE-P7-NEXT: addis r3, r2, .LCPI0_0@toc@ha
; CHECK-BE-P7-NEXT: addi r3, r3, .LCPI0_0@toc@l
; CHECK-BE-P7-NEXT: lxvw4x v4, 0, r3
; CHECK-BE-P7-NEXT: xxspltw v3, vs0, 1
; CHECK-BE-P7-NEXT: vperm v2, v2, v3, v4
; CHECK-BE-P7-NEXT: blr
;
; CHECK-BE-P8-LABEL: test:
; CHECK-BE-P8: # %bb.0: # %entry
; CHECK-BE-P8-NEXT: addis r3, r2, .LCPI0_0@toc@ha
; CHECK-BE-P8-NEXT: xscvdpsxws v4, f1
; CHECK-BE-P8-NEXT: addi r3, r3, .LCPI0_0@toc@l
; CHECK-BE-P8-NEXT: lxvw4x v3, 0, r3
; CHECK-BE-P8-NEXT: vperm v2, v2, v4, v3
; CHECK-BE-P8-NEXT: blr
;
; CHECK-BE-P9-LABEL: test:
; CHECK-BE-P9: # %bb.0: # %entry
; CHECK-BE-P9-NEXT: xscvdpsxws f0, f1
; CHECK-BE-P9-NEXT: xxinsertw v2, vs0, 12
; CHECK-BE-P9-NEXT: blr
entry:
%conv = fptosi double %b to i32
%vecins = insertelement <4 x i32> %a, i32 %conv, i32 3
ret <4 x i32> %vecins
}
define dso_local <4 x i32> @test2(<4 x i32> %a, float %b) {
; CHECK-LE-P7-LABEL: test2:
; CHECK-LE-P7: # %bb.0: # %entry
; CHECK-LE-P7-NEXT: xscvdpsxws f0, f1
; CHECK-LE-P7-NEXT: addi r3, r1, -4
; CHECK-LE-P7-NEXT: addis r4, r2, .LCPI1_0@toc@ha
; CHECK-LE-P7-NEXT: addi r4, r4, .LCPI1_0@toc@l
; CHECK-LE-P7-NEXT: stfiwx f0, 0, r3
; CHECK-LE-P7-NEXT: lxvd2x vs0, 0, r4
; CHECK-LE-P7-NEXT: xxswapd v3, vs0
; CHECK-LE-P7-NEXT: lfiwzx f0, 0, r3
; CHECK-LE-P7-NEXT: xxspltw v4, vs0, 1
; CHECK-LE-P7-NEXT: vperm v2, v4, v2, v3
; CHECK-LE-P7-NEXT: blr
;
; CHECK-LE-P8-LABEL: test2:
; CHECK-LE-P8: # %bb.0: # %entry
; CHECK-LE-P8-NEXT: addis r3, r2, .LCPI1_0@toc@ha
; CHECK-LE-P8-NEXT: xscvdpsxws v4, f1
; CHECK-LE-P8-NEXT: addi r3, r3, .LCPI1_0@toc@l
; CHECK-LE-P8-NEXT: lxvd2x vs0, 0, r3
; CHECK-LE-P8-NEXT: xxswapd v3, vs0
; CHECK-LE-P8-NEXT: vperm v2, v4, v2, v3
; CHECK-LE-P8-NEXT: blr
;
; CHECK-LE-P9-LABEL: test2:
; CHECK-LE-P9: # %bb.0: # %entry
; CHECK-LE-P9-NEXT: xscvdpsxws f0, f1
; CHECK-LE-P9-NEXT: xxinsertw v2, vs0, 0
; CHECK-LE-P9-NEXT: blr
;
; CHECK-BE-P7-LABEL: test2:
; CHECK-BE-P7: # %bb.0: # %entry
; CHECK-BE-P7-NEXT: xscvdpsxws f0, f1
; CHECK-BE-P7-NEXT: addi r3, r1, -4
; CHECK-BE-P7-NEXT: stfiwx f0, 0, r3
; CHECK-BE-P7-NEXT: lfiwzx f0, 0, r3
; CHECK-BE-P7-NEXT: addis r3, r2, .LCPI1_0@toc@ha
; CHECK-BE-P7-NEXT: addi r3, r3, .LCPI1_0@toc@l
; CHECK-BE-P7-NEXT: lxvw4x v4, 0, r3
; CHECK-BE-P7-NEXT: xxspltw v3, vs0, 1
; CHECK-BE-P7-NEXT: vperm v2, v2, v3, v4
; CHECK-BE-P7-NEXT: blr
;
; CHECK-BE-P8-LABEL: test2:
; CHECK-BE-P8: # %bb.0: # %entry
; CHECK-BE-P8-NEXT: addis r3, r2, .LCPI1_0@toc@ha
; CHECK-BE-P8-NEXT: xscvdpsxws v4, f1
; CHECK-BE-P8-NEXT: addi r3, r3, .LCPI1_0@toc@l
; CHECK-BE-P8-NEXT: lxvw4x v3, 0, r3
; CHECK-BE-P8-NEXT: vperm v2, v2, v4, v3
; CHECK-BE-P8-NEXT: blr
;
; CHECK-BE-P9-LABEL: test2:
; CHECK-BE-P9: # %bb.0: # %entry
; CHECK-BE-P9-NEXT: xscvdpsxws f0, f1
; CHECK-BE-P9-NEXT: xxinsertw v2, vs0, 12
; CHECK-BE-P9-NEXT: blr
entry:
%conv = fptosi float %b to i32
%vecins = insertelement <4 x i32> %a, i32 %conv, i32 3
ret <4 x i32> %vecins
}
define dso_local <4 x i32> @test3(<4 x i32> %a, double %b) {
; CHECK-LE-P7-LABEL: test3:
; CHECK-LE-P7: # %bb.0: # %entry
; CHECK-LE-P7-NEXT: xscvdpuxws f0, f1
; CHECK-LE-P7-NEXT: addi r3, r1, -4
; CHECK-LE-P7-NEXT: addis r4, r2, .LCPI2_0@toc@ha
; CHECK-LE-P7-NEXT: addi r4, r4, .LCPI2_0@toc@l
; CHECK-LE-P7-NEXT: stfiwx f0, 0, r3
; CHECK-LE-P7-NEXT: lxvd2x vs0, 0, r4
; CHECK-LE-P7-NEXT: xxswapd v3, vs0
; CHECK-LE-P7-NEXT: lfiwzx f0, 0, r3
; CHECK-LE-P7-NEXT: xxspltw v4, vs0, 1
; CHECK-LE-P7-NEXT: vperm v2, v4, v2, v3
; CHECK-LE-P7-NEXT: blr
;
; CHECK-LE-P8-LABEL: test3:
; CHECK-LE-P8: # %bb.0: # %entry
; CHECK-LE-P8-NEXT: addis r3, r2, .LCPI2_0@toc@ha
; CHECK-LE-P8-NEXT: xscvdpuxws v4, f1
; CHECK-LE-P8-NEXT: addi r3, r3, .LCPI2_0@toc@l
; CHECK-LE-P8-NEXT: lxvd2x vs0, 0, r3
; CHECK-LE-P8-NEXT: xxswapd v3, vs0
; CHECK-LE-P8-NEXT: vperm v2, v4, v2, v3
; CHECK-LE-P8-NEXT: blr
;
; CHECK-LE-P9-LABEL: test3:
; CHECK-LE-P9: # %bb.0: # %entry
; CHECK-LE-P9-NEXT: xscvdpuxws f0, f1
; CHECK-LE-P9-NEXT: xxinsertw v2, vs0, 0
; CHECK-LE-P9-NEXT: blr
;
; CHECK-BE-P7-LABEL: test3:
; CHECK-BE-P7: # %bb.0: # %entry
; CHECK-BE-P7-NEXT: xscvdpuxws f0, f1
; CHECK-BE-P7-NEXT: addi r3, r1, -4
; CHECK-BE-P7-NEXT: stfiwx f0, 0, r3
; CHECK-BE-P7-NEXT: lfiwzx f0, 0, r3
; CHECK-BE-P7-NEXT: addis r3, r2, .LCPI2_0@toc@ha
; CHECK-BE-P7-NEXT: addi r3, r3, .LCPI2_0@toc@l
; CHECK-BE-P7-NEXT: lxvw4x v4, 0, r3
; CHECK-BE-P7-NEXT: xxspltw v3, vs0, 1
; CHECK-BE-P7-NEXT: vperm v2, v2, v3, v4
; CHECK-BE-P7-NEXT: blr
;
; CHECK-BE-P8-LABEL: test3:
; CHECK-BE-P8: # %bb.0: # %entry
; CHECK-BE-P8-NEXT: addis r3, r2, .LCPI2_0@toc@ha
; CHECK-BE-P8-NEXT: xscvdpuxws v4, f1
; CHECK-BE-P8-NEXT: addi r3, r3, .LCPI2_0@toc@l
; CHECK-BE-P8-NEXT: lxvw4x v3, 0, r3
; CHECK-BE-P8-NEXT: vperm v2, v2, v4, v3
; CHECK-BE-P8-NEXT: blr
;
; CHECK-BE-P9-LABEL: test3:
; CHECK-BE-P9: # %bb.0: # %entry
; CHECK-BE-P9-NEXT: xscvdpuxws f0, f1
; CHECK-BE-P9-NEXT: xxinsertw v2, vs0, 12
; CHECK-BE-P9-NEXT: blr
entry:
%conv = fptoui double %b to i32
%vecins = insertelement <4 x i32> %a, i32 %conv, i32 3
ret <4 x i32> %vecins
}
define dso_local <4 x i32> @test4(<4 x i32> %a, float %b) {
; CHECK-LE-P7-LABEL: test4:
; CHECK-LE-P7: # %bb.0: # %entry
; CHECK-LE-P7-NEXT: xscvdpuxws f0, f1
; CHECK-LE-P7-NEXT: addi r3, r1, -4
; CHECK-LE-P7-NEXT: addis r4, r2, .LCPI3_0@toc@ha
; CHECK-LE-P7-NEXT: addi r4, r4, .LCPI3_0@toc@l
; CHECK-LE-P7-NEXT: stfiwx f0, 0, r3
; CHECK-LE-P7-NEXT: lxvd2x vs0, 0, r4
; CHECK-LE-P7-NEXT: xxswapd v3, vs0
; CHECK-LE-P7-NEXT: lfiwzx f0, 0, r3
; CHECK-LE-P7-NEXT: xxspltw v4, vs0, 1
; CHECK-LE-P7-NEXT: vperm v2, v4, v2, v3
; CHECK-LE-P7-NEXT: blr
;
; CHECK-LE-P8-LABEL: test4:
; CHECK-LE-P8: # %bb.0: # %entry
; CHECK-LE-P8-NEXT: addis r3, r2, .LCPI3_0@toc@ha
; CHECK-LE-P8-NEXT: xscvdpuxws v4, f1
; CHECK-LE-P8-NEXT: addi r3, r3, .LCPI3_0@toc@l
; CHECK-LE-P8-NEXT: lxvd2x vs0, 0, r3
; CHECK-LE-P8-NEXT: xxswapd v3, vs0
; CHECK-LE-P8-NEXT: vperm v2, v4, v2, v3
; CHECK-LE-P8-NEXT: blr
;
; CHECK-LE-P9-LABEL: test4:
; CHECK-LE-P9: # %bb.0: # %entry
; CHECK-LE-P9-NEXT: xscvdpuxws f0, f1
; CHECK-LE-P9-NEXT: xxinsertw v2, vs0, 0
; CHECK-LE-P9-NEXT: blr
;
; CHECK-BE-P7-LABEL: test4:
; CHECK-BE-P7: # %bb.0: # %entry
; CHECK-BE-P7-NEXT: xscvdpuxws f0, f1
; CHECK-BE-P7-NEXT: addi r3, r1, -4
; CHECK-BE-P7-NEXT: stfiwx f0, 0, r3
; CHECK-BE-P7-NEXT: lfiwzx f0, 0, r3
; CHECK-BE-P7-NEXT: addis r3, r2, .LCPI3_0@toc@ha
; CHECK-BE-P7-NEXT: addi r3, r3, .LCPI3_0@toc@l
; CHECK-BE-P7-NEXT: lxvw4x v4, 0, r3
; CHECK-BE-P7-NEXT: xxspltw v3, vs0, 1
; CHECK-BE-P7-NEXT: vperm v2, v2, v3, v4
; CHECK-BE-P7-NEXT: blr
;
; CHECK-BE-P8-LABEL: test4:
; CHECK-BE-P8: # %bb.0: # %entry
; CHECK-BE-P8-NEXT: addis r3, r2, .LCPI3_0@toc@ha
; CHECK-BE-P8-NEXT: xscvdpuxws v4, f1
; CHECK-BE-P8-NEXT: addi r3, r3, .LCPI3_0@toc@l
; CHECK-BE-P8-NEXT: lxvw4x v3, 0, r3
; CHECK-BE-P8-NEXT: vperm v2, v2, v4, v3
; CHECK-BE-P8-NEXT: blr
;
; CHECK-BE-P9-LABEL: test4:
; CHECK-BE-P9: # %bb.0: # %entry
; CHECK-BE-P9-NEXT: xscvdpuxws f0, f1
; CHECK-BE-P9-NEXT: xxinsertw v2, vs0, 12
; CHECK-BE-P9-NEXT: blr
entry:
%conv = fptoui float %b to i32
%vecins = insertelement <4 x i32> %a, i32 %conv, i32 3
ret <4 x i32> %vecins
}