
There are no partial vector loads on pwr7 so current v4i8 codegen is an int load then store to vector sized temp and re-load as vector. Try to use lfiwax to load 32 bits into an FP reg and take advantage of VSX FP and vector reg sharing to move the result to the right vector position.
278 lines
10 KiB
LLVM
278 lines
10 KiB
LLVM
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
|
|
; RUN: llc -verify-machineinstrs -mtriple=powerpc64le-unknown-linux-gnu \
|
|
; RUN: -ppc-asm-full-reg-names -ppc-vsr-nums-as-vr \
|
|
; RUN: -mcpu=pwr7 < %s | FileCheck %s --check-prefix=CHECK-LE-P7
|
|
; RUN: llc -verify-machineinstrs -mtriple=powerpc64le-unknown-linux-gnu \
|
|
; RUN: -ppc-asm-full-reg-names -ppc-vsr-nums-as-vr \
|
|
; RUN: -mcpu=pwr8 < %s | FileCheck %s --check-prefix=CHECK-LE-P8
|
|
; RUN: llc -verify-machineinstrs -mtriple=powerpc64le-unknown-linux-gnu \
|
|
; RUN: -ppc-asm-full-reg-names -ppc-vsr-nums-as-vr \
|
|
; RUN: -mcpu=pwr9 < %s | FileCheck %s --check-prefix=CHECK-LE-P9
|
|
; RUN: llc -verify-machineinstrs -mtriple=powerpc64-unknown-linux-gnu \
|
|
; RUN: -ppc-asm-full-reg-names -ppc-vsr-nums-as-vr \
|
|
; RUN: -mcpu=pwr7 < %s | FileCheck %s --check-prefix=CHECK-BE-P7
|
|
; RUN: llc -verify-machineinstrs -mtriple=powerpc64-unknown-linux-gnu \
|
|
; RUN: -ppc-asm-full-reg-names -ppc-vsr-nums-as-vr \
|
|
; RUN: -mcpu=pwr8 < %s | FileCheck %s --check-prefix=CHECK-BE-P8
|
|
; RUN: llc -verify-machineinstrs -mtriple=powerpc64-unknown-linux-gnu \
|
|
; RUN: -ppc-asm-full-reg-names -ppc-vsr-nums-as-vr \
|
|
; RUN: -mcpu=pwr9 < %s | FileCheck %s --check-prefix=CHECK-BE-P9
|
|
; xscvdpsxws and xscvdpsxws is only available on Power7 and above
|
|
; Codgen is different for Power7, Power8, and Power9.
|
|
|
|
define dso_local <4 x i32> @test(<4 x i32> %a, double %b) {
|
|
; CHECK-LE-P7-LABEL: test:
|
|
; CHECK-LE-P7: # %bb.0: # %entry
|
|
; CHECK-LE-P7-NEXT: xscvdpsxws f0, f1
|
|
; CHECK-LE-P7-NEXT: addi r3, r1, -4
|
|
; CHECK-LE-P7-NEXT: addis r4, r2, .LCPI0_0@toc@ha
|
|
; CHECK-LE-P7-NEXT: addi r4, r4, .LCPI0_0@toc@l
|
|
; CHECK-LE-P7-NEXT: stfiwx f0, 0, r3
|
|
; CHECK-LE-P7-NEXT: lxvd2x vs0, 0, r4
|
|
; CHECK-LE-P7-NEXT: xxswapd v3, vs0
|
|
; CHECK-LE-P7-NEXT: lfiwzx f0, 0, r3
|
|
; CHECK-LE-P7-NEXT: xxspltw v4, vs0, 1
|
|
; CHECK-LE-P7-NEXT: vperm v2, v4, v2, v3
|
|
; CHECK-LE-P7-NEXT: blr
|
|
;
|
|
; CHECK-LE-P8-LABEL: test:
|
|
; CHECK-LE-P8: # %bb.0: # %entry
|
|
; CHECK-LE-P8-NEXT: addis r3, r2, .LCPI0_0@toc@ha
|
|
; CHECK-LE-P8-NEXT: xscvdpsxws v4, f1
|
|
; CHECK-LE-P8-NEXT: addi r3, r3, .LCPI0_0@toc@l
|
|
; CHECK-LE-P8-NEXT: lxvd2x vs0, 0, r3
|
|
; CHECK-LE-P8-NEXT: xxswapd v3, vs0
|
|
; CHECK-LE-P8-NEXT: vperm v2, v4, v2, v3
|
|
; CHECK-LE-P8-NEXT: blr
|
|
;
|
|
; CHECK-LE-P9-LABEL: test:
|
|
; CHECK-LE-P9: # %bb.0: # %entry
|
|
; CHECK-LE-P9-NEXT: xscvdpsxws f0, f1
|
|
; CHECK-LE-P9-NEXT: xxinsertw v2, vs0, 0
|
|
; CHECK-LE-P9-NEXT: blr
|
|
;
|
|
; CHECK-BE-P7-LABEL: test:
|
|
; CHECK-BE-P7: # %bb.0: # %entry
|
|
; CHECK-BE-P7-NEXT: xscvdpsxws f0, f1
|
|
; CHECK-BE-P7-NEXT: addi r3, r1, -4
|
|
; CHECK-BE-P7-NEXT: stfiwx f0, 0, r3
|
|
; CHECK-BE-P7-NEXT: lfiwzx f0, 0, r3
|
|
; CHECK-BE-P7-NEXT: addis r3, r2, .LCPI0_0@toc@ha
|
|
; CHECK-BE-P7-NEXT: addi r3, r3, .LCPI0_0@toc@l
|
|
; CHECK-BE-P7-NEXT: lxvw4x v4, 0, r3
|
|
; CHECK-BE-P7-NEXT: xxspltw v3, vs0, 1
|
|
; CHECK-BE-P7-NEXT: vperm v2, v2, v3, v4
|
|
; CHECK-BE-P7-NEXT: blr
|
|
;
|
|
; CHECK-BE-P8-LABEL: test:
|
|
; CHECK-BE-P8: # %bb.0: # %entry
|
|
; CHECK-BE-P8-NEXT: addis r3, r2, .LCPI0_0@toc@ha
|
|
; CHECK-BE-P8-NEXT: xscvdpsxws v4, f1
|
|
; CHECK-BE-P8-NEXT: addi r3, r3, .LCPI0_0@toc@l
|
|
; CHECK-BE-P8-NEXT: lxvw4x v3, 0, r3
|
|
; CHECK-BE-P8-NEXT: vperm v2, v2, v4, v3
|
|
; CHECK-BE-P8-NEXT: blr
|
|
;
|
|
; CHECK-BE-P9-LABEL: test:
|
|
; CHECK-BE-P9: # %bb.0: # %entry
|
|
; CHECK-BE-P9-NEXT: xscvdpsxws f0, f1
|
|
; CHECK-BE-P9-NEXT: xxinsertw v2, vs0, 12
|
|
; CHECK-BE-P9-NEXT: blr
|
|
entry:
|
|
%conv = fptosi double %b to i32
|
|
%vecins = insertelement <4 x i32> %a, i32 %conv, i32 3
|
|
ret <4 x i32> %vecins
|
|
}
|
|
|
|
define dso_local <4 x i32> @test2(<4 x i32> %a, float %b) {
|
|
; CHECK-LE-P7-LABEL: test2:
|
|
; CHECK-LE-P7: # %bb.0: # %entry
|
|
; CHECK-LE-P7-NEXT: xscvdpsxws f0, f1
|
|
; CHECK-LE-P7-NEXT: addi r3, r1, -4
|
|
; CHECK-LE-P7-NEXT: addis r4, r2, .LCPI1_0@toc@ha
|
|
; CHECK-LE-P7-NEXT: addi r4, r4, .LCPI1_0@toc@l
|
|
; CHECK-LE-P7-NEXT: stfiwx f0, 0, r3
|
|
; CHECK-LE-P7-NEXT: lxvd2x vs0, 0, r4
|
|
; CHECK-LE-P7-NEXT: xxswapd v3, vs0
|
|
; CHECK-LE-P7-NEXT: lfiwzx f0, 0, r3
|
|
; CHECK-LE-P7-NEXT: xxspltw v4, vs0, 1
|
|
; CHECK-LE-P7-NEXT: vperm v2, v4, v2, v3
|
|
; CHECK-LE-P7-NEXT: blr
|
|
;
|
|
; CHECK-LE-P8-LABEL: test2:
|
|
; CHECK-LE-P8: # %bb.0: # %entry
|
|
; CHECK-LE-P8-NEXT: addis r3, r2, .LCPI1_0@toc@ha
|
|
; CHECK-LE-P8-NEXT: xscvdpsxws v4, f1
|
|
; CHECK-LE-P8-NEXT: addi r3, r3, .LCPI1_0@toc@l
|
|
; CHECK-LE-P8-NEXT: lxvd2x vs0, 0, r3
|
|
; CHECK-LE-P8-NEXT: xxswapd v3, vs0
|
|
; CHECK-LE-P8-NEXT: vperm v2, v4, v2, v3
|
|
; CHECK-LE-P8-NEXT: blr
|
|
;
|
|
; CHECK-LE-P9-LABEL: test2:
|
|
; CHECK-LE-P9: # %bb.0: # %entry
|
|
; CHECK-LE-P9-NEXT: xscvdpsxws f0, f1
|
|
; CHECK-LE-P9-NEXT: xxinsertw v2, vs0, 0
|
|
; CHECK-LE-P9-NEXT: blr
|
|
;
|
|
; CHECK-BE-P7-LABEL: test2:
|
|
; CHECK-BE-P7: # %bb.0: # %entry
|
|
; CHECK-BE-P7-NEXT: xscvdpsxws f0, f1
|
|
; CHECK-BE-P7-NEXT: addi r3, r1, -4
|
|
; CHECK-BE-P7-NEXT: stfiwx f0, 0, r3
|
|
; CHECK-BE-P7-NEXT: lfiwzx f0, 0, r3
|
|
; CHECK-BE-P7-NEXT: addis r3, r2, .LCPI1_0@toc@ha
|
|
; CHECK-BE-P7-NEXT: addi r3, r3, .LCPI1_0@toc@l
|
|
; CHECK-BE-P7-NEXT: lxvw4x v4, 0, r3
|
|
; CHECK-BE-P7-NEXT: xxspltw v3, vs0, 1
|
|
; CHECK-BE-P7-NEXT: vperm v2, v2, v3, v4
|
|
; CHECK-BE-P7-NEXT: blr
|
|
;
|
|
; CHECK-BE-P8-LABEL: test2:
|
|
; CHECK-BE-P8: # %bb.0: # %entry
|
|
; CHECK-BE-P8-NEXT: addis r3, r2, .LCPI1_0@toc@ha
|
|
; CHECK-BE-P8-NEXT: xscvdpsxws v4, f1
|
|
; CHECK-BE-P8-NEXT: addi r3, r3, .LCPI1_0@toc@l
|
|
; CHECK-BE-P8-NEXT: lxvw4x v3, 0, r3
|
|
; CHECK-BE-P8-NEXT: vperm v2, v2, v4, v3
|
|
; CHECK-BE-P8-NEXT: blr
|
|
;
|
|
; CHECK-BE-P9-LABEL: test2:
|
|
; CHECK-BE-P9: # %bb.0: # %entry
|
|
; CHECK-BE-P9-NEXT: xscvdpsxws f0, f1
|
|
; CHECK-BE-P9-NEXT: xxinsertw v2, vs0, 12
|
|
; CHECK-BE-P9-NEXT: blr
|
|
entry:
|
|
%conv = fptosi float %b to i32
|
|
%vecins = insertelement <4 x i32> %a, i32 %conv, i32 3
|
|
ret <4 x i32> %vecins
|
|
}
|
|
|
|
define dso_local <4 x i32> @test3(<4 x i32> %a, double %b) {
|
|
; CHECK-LE-P7-LABEL: test3:
|
|
; CHECK-LE-P7: # %bb.0: # %entry
|
|
; CHECK-LE-P7-NEXT: xscvdpuxws f0, f1
|
|
; CHECK-LE-P7-NEXT: addi r3, r1, -4
|
|
; CHECK-LE-P7-NEXT: addis r4, r2, .LCPI2_0@toc@ha
|
|
; CHECK-LE-P7-NEXT: addi r4, r4, .LCPI2_0@toc@l
|
|
; CHECK-LE-P7-NEXT: stfiwx f0, 0, r3
|
|
; CHECK-LE-P7-NEXT: lxvd2x vs0, 0, r4
|
|
; CHECK-LE-P7-NEXT: xxswapd v3, vs0
|
|
; CHECK-LE-P7-NEXT: lfiwzx f0, 0, r3
|
|
; CHECK-LE-P7-NEXT: xxspltw v4, vs0, 1
|
|
; CHECK-LE-P7-NEXT: vperm v2, v4, v2, v3
|
|
; CHECK-LE-P7-NEXT: blr
|
|
;
|
|
; CHECK-LE-P8-LABEL: test3:
|
|
; CHECK-LE-P8: # %bb.0: # %entry
|
|
; CHECK-LE-P8-NEXT: addis r3, r2, .LCPI2_0@toc@ha
|
|
; CHECK-LE-P8-NEXT: xscvdpuxws v4, f1
|
|
; CHECK-LE-P8-NEXT: addi r3, r3, .LCPI2_0@toc@l
|
|
; CHECK-LE-P8-NEXT: lxvd2x vs0, 0, r3
|
|
; CHECK-LE-P8-NEXT: xxswapd v3, vs0
|
|
; CHECK-LE-P8-NEXT: vperm v2, v4, v2, v3
|
|
; CHECK-LE-P8-NEXT: blr
|
|
;
|
|
; CHECK-LE-P9-LABEL: test3:
|
|
; CHECK-LE-P9: # %bb.0: # %entry
|
|
; CHECK-LE-P9-NEXT: xscvdpuxws f0, f1
|
|
; CHECK-LE-P9-NEXT: xxinsertw v2, vs0, 0
|
|
; CHECK-LE-P9-NEXT: blr
|
|
;
|
|
; CHECK-BE-P7-LABEL: test3:
|
|
; CHECK-BE-P7: # %bb.0: # %entry
|
|
; CHECK-BE-P7-NEXT: xscvdpuxws f0, f1
|
|
; CHECK-BE-P7-NEXT: addi r3, r1, -4
|
|
; CHECK-BE-P7-NEXT: stfiwx f0, 0, r3
|
|
; CHECK-BE-P7-NEXT: lfiwzx f0, 0, r3
|
|
; CHECK-BE-P7-NEXT: addis r3, r2, .LCPI2_0@toc@ha
|
|
; CHECK-BE-P7-NEXT: addi r3, r3, .LCPI2_0@toc@l
|
|
; CHECK-BE-P7-NEXT: lxvw4x v4, 0, r3
|
|
; CHECK-BE-P7-NEXT: xxspltw v3, vs0, 1
|
|
; CHECK-BE-P7-NEXT: vperm v2, v2, v3, v4
|
|
; CHECK-BE-P7-NEXT: blr
|
|
;
|
|
; CHECK-BE-P8-LABEL: test3:
|
|
; CHECK-BE-P8: # %bb.0: # %entry
|
|
; CHECK-BE-P8-NEXT: addis r3, r2, .LCPI2_0@toc@ha
|
|
; CHECK-BE-P8-NEXT: xscvdpuxws v4, f1
|
|
; CHECK-BE-P8-NEXT: addi r3, r3, .LCPI2_0@toc@l
|
|
; CHECK-BE-P8-NEXT: lxvw4x v3, 0, r3
|
|
; CHECK-BE-P8-NEXT: vperm v2, v2, v4, v3
|
|
; CHECK-BE-P8-NEXT: blr
|
|
;
|
|
; CHECK-BE-P9-LABEL: test3:
|
|
; CHECK-BE-P9: # %bb.0: # %entry
|
|
; CHECK-BE-P9-NEXT: xscvdpuxws f0, f1
|
|
; CHECK-BE-P9-NEXT: xxinsertw v2, vs0, 12
|
|
; CHECK-BE-P9-NEXT: blr
|
|
entry:
|
|
%conv = fptoui double %b to i32
|
|
%vecins = insertelement <4 x i32> %a, i32 %conv, i32 3
|
|
ret <4 x i32> %vecins
|
|
}
|
|
|
|
define dso_local <4 x i32> @test4(<4 x i32> %a, float %b) {
|
|
; CHECK-LE-P7-LABEL: test4:
|
|
; CHECK-LE-P7: # %bb.0: # %entry
|
|
; CHECK-LE-P7-NEXT: xscvdpuxws f0, f1
|
|
; CHECK-LE-P7-NEXT: addi r3, r1, -4
|
|
; CHECK-LE-P7-NEXT: addis r4, r2, .LCPI3_0@toc@ha
|
|
; CHECK-LE-P7-NEXT: addi r4, r4, .LCPI3_0@toc@l
|
|
; CHECK-LE-P7-NEXT: stfiwx f0, 0, r3
|
|
; CHECK-LE-P7-NEXT: lxvd2x vs0, 0, r4
|
|
; CHECK-LE-P7-NEXT: xxswapd v3, vs0
|
|
; CHECK-LE-P7-NEXT: lfiwzx f0, 0, r3
|
|
; CHECK-LE-P7-NEXT: xxspltw v4, vs0, 1
|
|
; CHECK-LE-P7-NEXT: vperm v2, v4, v2, v3
|
|
; CHECK-LE-P7-NEXT: blr
|
|
;
|
|
; CHECK-LE-P8-LABEL: test4:
|
|
; CHECK-LE-P8: # %bb.0: # %entry
|
|
; CHECK-LE-P8-NEXT: addis r3, r2, .LCPI3_0@toc@ha
|
|
; CHECK-LE-P8-NEXT: xscvdpuxws v4, f1
|
|
; CHECK-LE-P8-NEXT: addi r3, r3, .LCPI3_0@toc@l
|
|
; CHECK-LE-P8-NEXT: lxvd2x vs0, 0, r3
|
|
; CHECK-LE-P8-NEXT: xxswapd v3, vs0
|
|
; CHECK-LE-P8-NEXT: vperm v2, v4, v2, v3
|
|
; CHECK-LE-P8-NEXT: blr
|
|
;
|
|
; CHECK-LE-P9-LABEL: test4:
|
|
; CHECK-LE-P9: # %bb.0: # %entry
|
|
; CHECK-LE-P9-NEXT: xscvdpuxws f0, f1
|
|
; CHECK-LE-P9-NEXT: xxinsertw v2, vs0, 0
|
|
; CHECK-LE-P9-NEXT: blr
|
|
;
|
|
; CHECK-BE-P7-LABEL: test4:
|
|
; CHECK-BE-P7: # %bb.0: # %entry
|
|
; CHECK-BE-P7-NEXT: xscvdpuxws f0, f1
|
|
; CHECK-BE-P7-NEXT: addi r3, r1, -4
|
|
; CHECK-BE-P7-NEXT: stfiwx f0, 0, r3
|
|
; CHECK-BE-P7-NEXT: lfiwzx f0, 0, r3
|
|
; CHECK-BE-P7-NEXT: addis r3, r2, .LCPI3_0@toc@ha
|
|
; CHECK-BE-P7-NEXT: addi r3, r3, .LCPI3_0@toc@l
|
|
; CHECK-BE-P7-NEXT: lxvw4x v4, 0, r3
|
|
; CHECK-BE-P7-NEXT: xxspltw v3, vs0, 1
|
|
; CHECK-BE-P7-NEXT: vperm v2, v2, v3, v4
|
|
; CHECK-BE-P7-NEXT: blr
|
|
;
|
|
; CHECK-BE-P8-LABEL: test4:
|
|
; CHECK-BE-P8: # %bb.0: # %entry
|
|
; CHECK-BE-P8-NEXT: addis r3, r2, .LCPI3_0@toc@ha
|
|
; CHECK-BE-P8-NEXT: xscvdpuxws v4, f1
|
|
; CHECK-BE-P8-NEXT: addi r3, r3, .LCPI3_0@toc@l
|
|
; CHECK-BE-P8-NEXT: lxvw4x v3, 0, r3
|
|
; CHECK-BE-P8-NEXT: vperm v2, v2, v4, v3
|
|
; CHECK-BE-P8-NEXT: blr
|
|
;
|
|
; CHECK-BE-P9-LABEL: test4:
|
|
; CHECK-BE-P9: # %bb.0: # %entry
|
|
; CHECK-BE-P9-NEXT: xscvdpuxws f0, f1
|
|
; CHECK-BE-P9-NEXT: xxinsertw v2, vs0, 12
|
|
; CHECK-BE-P9-NEXT: blr
|
|
entry:
|
|
%conv = fptoui float %b to i32
|
|
%vecins = insertelement <4 x i32> %a, i32 %conv, i32 3
|
|
ret <4 x i32> %vecins
|
|
}
|