llvm-project/llvm/test/Transforms/LoopVectorize/scalable-reduction-inloop.ll
Florian Hahn fd31112634
[VPlan] Insert Trunc/Exts for reductions directly in VPlan.
Update the code to create Trunc/Ext recipes directly in
adjustRecipesForReductions instead of fixing it up later in
fixReductions.

This explicitly models the required conversions and also makes sure they
are generated at the right place (instead of after the exit condition),
hence the changes in a few tests.
2023-10-17 19:17:40 +01:00

61 lines
3.7 KiB
LLVM

; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
; RUN: opt < %s -passes=loop-vectorize -force-target-supports-scalable-vectors=true -scalable-vectorization=on -S | FileCheck %s
target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
define i8 @reduction_add_trunc(ptr noalias nocapture %A) {
; CHECK-LABEL: @reduction_add_trunc(
; CHECK: vector.body:
; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, %vector.ph ], [ [[INDEX_NEXT:%.*]], %vector.body ]
; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <vscale x 8 x i32> [ insertelement (<vscale x 8 x i32> zeroinitializer, i32 255, i32 0), %vector.ph ], [ [[TMP34:%.*]], %vector.body ]
; CHECK-NEXT: [[VEC_PHI1:%.*]] = phi <vscale x 8 x i32> [ zeroinitializer, %vector.ph ], [ [[TMP36:%.*]], %vector.body ]
; CHECK: [[TMP14:%.*]] = and <vscale x 8 x i32> [[VEC_PHI]], shufflevector (<vscale x 8 x i32> insertelement (<vscale x 8 x i32> poison, i32 255, i64 0), <vscale x 8 x i32> poison, <vscale x 8 x i32> zeroinitializer)
; CHECK-NEXT: [[TMP15:%.*]] = and <vscale x 8 x i32> [[VEC_PHI1]], shufflevector (<vscale x 8 x i32> insertelement (<vscale x 8 x i32> poison, i32 255, i64 0), <vscale x 8 x i32> poison, <vscale x 8 x i32> zeroinitializer)
; CHECK: [[WIDE_LOAD:%.*]] = load <vscale x 8 x i8>, ptr
; CHECK: [[WIDE_LOAD2:%.*]] = load <vscale x 8 x i8>, ptr
; CHECK-NEXT: [[TMP26:%.*]] = zext <vscale x 8 x i8> [[WIDE_LOAD]] to <vscale x 8 x i32>
; CHECK-NEXT: [[TMP27:%.*]] = zext <vscale x 8 x i8> [[WIDE_LOAD2]] to <vscale x 8 x i32>
; CHECK-NEXT: [[TMP28:%.*]] = add <vscale x 8 x i32> [[TMP14]], [[TMP26]]
; CHECK-NEXT: [[TMP29:%.*]] = add <vscale x 8 x i32> [[TMP15]], [[TMP27]]
; CHECK-NEXT: [[TMP33:%.*]] = trunc <vscale x 8 x i32> [[TMP28]] to <vscale x 8 x i8>
; CHECK-NEXT: [[TMP35:%.*]] = trunc <vscale x 8 x i32> [[TMP29]] to <vscale x 8 x i8>
; CHECK-NEXT: [[TMP34]] = zext <vscale x 8 x i8> [[TMP33]] to <vscale x 8 x i32>
; CHECK-NEXT: [[TMP36]] = zext <vscale x 8 x i8> [[TMP35]] to <vscale x 8 x i32>
; CHECK-NEXT: [[TMP30:%.*]] = call i32 @llvm.vscale.i32()
; CHECK-NEXT: [[TMP31:%.*]] = mul i32 [[TMP30]], 16
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], [[TMP31]]
; CHECK-NEXT: [[TMP32:%.*]] = icmp eq i32 [[INDEX_NEXT]], {{%.*}}
; CHECK: middle.block:
; CHECK-NEXT: [[TMP37:%.*]] = trunc <vscale x 8 x i32> [[TMP34]] to <vscale x 8 x i8>
; CHECK-NEXT: [[TMP38:%.*]] = trunc <vscale x 8 x i32> [[TMP36]] to <vscale x 8 x i8>
; CHECK-NEXT: [[BIN_RDX:%.*]] = add <vscale x 8 x i8> [[TMP38]], [[TMP37]]
; CHECK-NEXT: [[TMP39:%.*]] = call i8 @llvm.vector.reduce.add.nxv8i8(<vscale x 8 x i8> [[BIN_RDX]])
; CHECK-NEXT: [[TMP40:%.*]] = zext i8 [[TMP39]] to i32
;
entry:
br label %loop
loop: ; preds = %entry, %loop
%indvars.iv = phi i32 [ %indvars.iv.next, %loop ], [ 0, %entry ]
%sum.02p = phi i32 [ %l9, %loop ], [ 255, %entry ]
%sum.02 = and i32 %sum.02p, 255
%l2 = getelementptr inbounds i8, ptr %A, i32 %indvars.iv
%l3 = load i8, ptr %l2, align 4
%l3e = zext i8 %l3 to i32
%l9 = add i32 %sum.02, %l3e
%indvars.iv.next = add i32 %indvars.iv, 1
%exitcond = icmp eq i32 %indvars.iv.next, 256
br i1 %exitcond, label %exit, label %loop, !llvm.loop !0
exit: ; preds = %loop
%sum.0.lcssa = phi i32 [ %l9, %loop ]
%ret = trunc i32 %sum.0.lcssa to i8
ret i8 %ret
}
!0 = distinct !{!0, !1, !2, !3, !4}
!1 = !{!"llvm.loop.vectorize.width", i32 8}
!2 = !{!"llvm.loop.vectorize.scalable.enable", i1 true}
!3 = !{!"llvm.loop.interleave.count", i32 2}
!4 = !{!"llvm.loop.vectorize.enable", i1 true}