Kai Nacke 5403c59c60 [PPC] Opaque pointer migration, part 2.
The LIT test cases were migrated with the script provided by
Nikita Popov. Due to the size of the change it is split into
several parts.

Reviewed By: nemanja, nikic

Differential Revision: https://reviews.llvm.org/D135474
2022-10-11 17:24:06 +00:00

146 lines
6.4 KiB
LLVM

; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc -verify-machineinstrs -mtriple=powerpc64le-unknown-linux-gnu -mcpu=pwr9 < %s | FileCheck -check-prefix=CHECK-P9 %s
; RUN: llc -verify-machineinstrs -mtriple=powerpc64le-unknown-linux-gnu -mcpu=pwr9 -ppc-postra-bias-addi=false < %s |\
; RUN: FileCheck -check-prefix=CHECK-P9-NO-HEURISTIC %s
%_type_of_scalars = type <{ [16 x i8], double, [152 x i8] }>
%_elem_type_of_x = type <{ double }>
%_elem_type_of_a = type <{ double }>
@scalars = common dso_local local_unnamed_addr global %_type_of_scalars zeroinitializer, align 16
define dso_local void @test(ptr noalias %.x, ptr %.a, ptr noalias %.n) {
; CHECK-P9-LABEL: test:
; CHECK-P9: # %bb.0: # %entry
; CHECK-P9-NEXT: ld 5, 0(5)
; CHECK-P9-NEXT: addis 6, 2, scalars@toc@ha
; CHECK-P9-NEXT: addi 6, 6, scalars@toc@l
; CHECK-P9-NEXT: rldicr 5, 5, 0, 58
; CHECK-P9-NEXT: addi 6, 6, 16
; CHECK-P9-NEXT: addi 5, 5, -32
; CHECK-P9-NEXT: lxvdsx 0, 0, 6
; CHECK-P9-NEXT: rldicl 5, 5, 59, 5
; CHECK-P9-NEXT: addi 5, 5, 1
; CHECK-P9-NEXT: mtctr 5
; CHECK-P9-NEXT: .p2align 4
; CHECK-P9-NEXT: .LBB0_1: # %vector.body
; CHECK-P9-NEXT: #
; CHECK-P9-NEXT: lxv 1, 16(4)
; CHECK-P9-NEXT: lxv 2, 0(4)
; CHECK-P9-NEXT: lxv 3, 48(4)
; CHECK-P9-NEXT: lxv 4, 32(4)
; CHECK-P9-NEXT: xvmuldp 2, 2, 0
; CHECK-P9-NEXT: lxv 5, 240(4)
; CHECK-P9-NEXT: lxv 6, 224(4)
; CHECK-P9-NEXT: xvmuldp 1, 1, 0
; CHECK-P9-NEXT: xvmuldp 4, 4, 0
; CHECK-P9-NEXT: xvmuldp 3, 3, 0
; CHECK-P9-NEXT: xvmuldp 6, 6, 0
; CHECK-P9-NEXT: xvmuldp 5, 5, 0
; CHECK-P9-NEXT: addi 4, 4, 256
; CHECK-P9-NEXT: stxv 1, 16(3)
; CHECK-P9-NEXT: stxv 2, 0(3)
; CHECK-P9-NEXT: stxv 3, 48(3)
; CHECK-P9-NEXT: stxv 4, 32(3)
; CHECK-P9-NEXT: stxv 5, 240(3)
; CHECK-P9-NEXT: stxv 6, 224(3)
; CHECK-P9-NEXT: addi 3, 3, 256
; CHECK-P9-NEXT: bdnz .LBB0_1
; CHECK-P9-NEXT: # %bb.2: # %return.block
; CHECK-P9-NEXT: blr
;
; CHECK-P9-NO-HEURISTIC-LABEL: test:
; CHECK-P9-NO-HEURISTIC: # %bb.0: # %entry
; CHECK-P9-NO-HEURISTIC-NEXT: ld 5, 0(5)
; CHECK-P9-NO-HEURISTIC-NEXT: addis 6, 2, scalars@toc@ha
; CHECK-P9-NO-HEURISTIC-NEXT: addi 6, 6, scalars@toc@l
; CHECK-P9-NO-HEURISTIC-NEXT: rldicr 5, 5, 0, 58
; CHECK-P9-NO-HEURISTIC-NEXT: addi 6, 6, 16
; CHECK-P9-NO-HEURISTIC-NEXT: addi 5, 5, -32
; CHECK-P9-NO-HEURISTIC-NEXT: lxvdsx 0, 0, 6
; CHECK-P9-NO-HEURISTIC-NEXT: rldicl 5, 5, 59, 5
; CHECK-P9-NO-HEURISTIC-NEXT: addi 5, 5, 1
; CHECK-P9-NO-HEURISTIC-NEXT: mtctr 5
; CHECK-P9-NO-HEURISTIC-NEXT: .p2align 4
; CHECK-P9-NO-HEURISTIC-NEXT: .LBB0_1: # %vector.body
; CHECK-P9-NO-HEURISTIC-NEXT: #
; CHECK-P9-NO-HEURISTIC-NEXT: lxv 1, 16(4)
; CHECK-P9-NO-HEURISTIC-NEXT: lxv 2, 0(4)
; CHECK-P9-NO-HEURISTIC-NEXT: lxv 3, 48(4)
; CHECK-P9-NO-HEURISTIC-NEXT: lxv 4, 32(4)
; CHECK-P9-NO-HEURISTIC-NEXT: xvmuldp 2, 2, 0
; CHECK-P9-NO-HEURISTIC-NEXT: lxv 5, 240(4)
; CHECK-P9-NO-HEURISTIC-NEXT: lxv 6, 224(4)
; CHECK-P9-NO-HEURISTIC-NEXT: xvmuldp 1, 1, 0
; CHECK-P9-NO-HEURISTIC-NEXT: xvmuldp 4, 4, 0
; CHECK-P9-NO-HEURISTIC-NEXT: xvmuldp 3, 3, 0
; CHECK-P9-NO-HEURISTIC-NEXT: xvmuldp 6, 6, 0
; CHECK-P9-NO-HEURISTIC-NEXT: xvmuldp 5, 5, 0
; CHECK-P9-NO-HEURISTIC-NEXT: addi 4, 4, 256
; CHECK-P9-NO-HEURISTIC-NEXT: stxv 1, 16(3)
; CHECK-P9-NO-HEURISTIC-NEXT: stxv 2, 0(3)
; CHECK-P9-NO-HEURISTIC-NEXT: stxv 3, 48(3)
; CHECK-P9-NO-HEURISTIC-NEXT: stxv 4, 32(3)
; CHECK-P9-NO-HEURISTIC-NEXT: stxv 5, 240(3)
; CHECK-P9-NO-HEURISTIC-NEXT: stxv 6, 224(3)
; CHECK-P9-NO-HEURISTIC-NEXT: addi 3, 3, 256
; CHECK-P9-NO-HEURISTIC-NEXT: bdnz .LBB0_1
; CHECK-P9-NO-HEURISTIC-NEXT: # %bb.2: # %return.block
; CHECK-P9-NO-HEURISTIC-NEXT: blr
entry:
%x_rvo_based_addr_3 = getelementptr inbounds [0 x %_elem_type_of_x], ptr %.x, i64 0, i64 -1
%a_rvo_based_addr_5 = getelementptr inbounds [0 x %_elem_type_of_a], ptr %.a, i64 0, i64 -1
%_val_n_ = load i64, ptr %.n, align 8
%_val_c1_ = load double, ptr getelementptr inbounds (%_type_of_scalars, ptr @scalars, i64 0, i32 1), align 16
%n.vec = and i64 %_val_n_, -32
%broadcast.splatinsert26 = insertelement <4 x double> undef, double %_val_c1_, i32 0
%broadcast.splat27 = shufflevector <4 x double> %broadcast.splatinsert26, <4 x double> undef, <4 x i32> zeroinitializer
br label %vector.body
vector.body:
%index = phi i64 [ 0, %entry ], [ %index.next, %vector.body ]
%offset.idx = or i64 %index, 1
%0 = getelementptr %_elem_type_of_x, ptr %x_rvo_based_addr_3, i64 %offset.idx, i32 0
%1 = getelementptr %_elem_type_of_a, ptr %a_rvo_based_addr_5, i64 %offset.idx, i32 0
%wide.load = load <4 x double>, ptr %1, align 8
%2 = getelementptr double, ptr %1, i64 4
%wide.load19 = load <4 x double>, ptr %2, align 8
%3 = getelementptr double, ptr %1, i64 8
%wide.load20 = load <4 x double>, ptr %3, align 8
%4 = getelementptr double, ptr %1, i64 12
%wide.load21 = load <4 x double>, ptr %4, align 8
%5 = getelementptr double, ptr %1, i64 16
%wide.load22 = load <4 x double>, ptr %5, align 8
%6 = getelementptr double, ptr %1, i64 20
%wide.load23 = load <4 x double>, ptr %6, align 8
%7 = getelementptr double, ptr %1, i64 24
%wide.load24 = load <4 x double>, ptr %7, align 8
%8 = getelementptr double, ptr %1, i64 28
%wide.load25 = load <4 x double>, ptr %8, align 8
%9 = fmul fast <4 x double> %wide.load, %broadcast.splat27
%10 = fmul fast <4 x double> %wide.load19, %broadcast.splat27
%11 = fmul fast <4 x double> %wide.load20, %broadcast.splat27
%12 = fmul fast <4 x double> %wide.load21, %broadcast.splat27
%13 = fmul fast <4 x double> %wide.load22, %broadcast.splat27
%14 = fmul fast <4 x double> %wide.load23, %broadcast.splat27
%15 = fmul fast <4 x double> %wide.load24, %broadcast.splat27
%16 = fmul fast <4 x double> %wide.load25, %broadcast.splat27
store <4 x double> %9, ptr %0, align 8
%17 = getelementptr double, ptr %0, i64 4
store <4 x double> %10, ptr %17, align 8
%18 = getelementptr double, ptr %0, i64 8
%19 = getelementptr double, ptr %0, i64 12
%20 = getelementptr double, ptr %0, i64 16
%21 = getelementptr double, ptr %0, i64 20
%22 = getelementptr double, ptr %0, i64 24
%23 = getelementptr double, ptr %0, i64 28
store <4 x double> %16, ptr %23, align 8
%index.next = add i64 %index, 32
%cm = icmp eq i64 %index.next, %n.vec
br i1 %cm, label %return.block, label %vector.body
return.block:
ret void
}