Update the code to create Trunc/Ext recipes directly in adjustRecipesForReductions instead of fixing it up later in fixReductions. This explicitly models the required conversions and also makes sure they are generated at the right place (instead of after the exit condition), hence the changes in a few tests.
61 lines
3.7 KiB
LLVM
61 lines
3.7 KiB
LLVM
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
|
|
; RUN: opt < %s -passes=loop-vectorize -force-target-supports-scalable-vectors=true -scalable-vectorization=on -S | FileCheck %s
|
|
|
|
target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
|
|
|
|
define i8 @reduction_add_trunc(ptr noalias nocapture %A) {
|
|
; CHECK-LABEL: @reduction_add_trunc(
|
|
; CHECK: vector.body:
|
|
; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, %vector.ph ], [ [[INDEX_NEXT:%.*]], %vector.body ]
|
|
; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <vscale x 8 x i32> [ insertelement (<vscale x 8 x i32> zeroinitializer, i32 255, i32 0), %vector.ph ], [ [[TMP34:%.*]], %vector.body ]
|
|
; CHECK-NEXT: [[VEC_PHI1:%.*]] = phi <vscale x 8 x i32> [ zeroinitializer, %vector.ph ], [ [[TMP36:%.*]], %vector.body ]
|
|
; CHECK: [[TMP14:%.*]] = and <vscale x 8 x i32> [[VEC_PHI]], shufflevector (<vscale x 8 x i32> insertelement (<vscale x 8 x i32> poison, i32 255, i64 0), <vscale x 8 x i32> poison, <vscale x 8 x i32> zeroinitializer)
|
|
; CHECK-NEXT: [[TMP15:%.*]] = and <vscale x 8 x i32> [[VEC_PHI1]], shufflevector (<vscale x 8 x i32> insertelement (<vscale x 8 x i32> poison, i32 255, i64 0), <vscale x 8 x i32> poison, <vscale x 8 x i32> zeroinitializer)
|
|
; CHECK: [[WIDE_LOAD:%.*]] = load <vscale x 8 x i8>, ptr
|
|
; CHECK: [[WIDE_LOAD2:%.*]] = load <vscale x 8 x i8>, ptr
|
|
; CHECK-NEXT: [[TMP26:%.*]] = zext <vscale x 8 x i8> [[WIDE_LOAD]] to <vscale x 8 x i32>
|
|
; CHECK-NEXT: [[TMP27:%.*]] = zext <vscale x 8 x i8> [[WIDE_LOAD2]] to <vscale x 8 x i32>
|
|
; CHECK-NEXT: [[TMP28:%.*]] = add <vscale x 8 x i32> [[TMP14]], [[TMP26]]
|
|
; CHECK-NEXT: [[TMP29:%.*]] = add <vscale x 8 x i32> [[TMP15]], [[TMP27]]
|
|
; CHECK-NEXT: [[TMP33:%.*]] = trunc <vscale x 8 x i32> [[TMP28]] to <vscale x 8 x i8>
|
|
; CHECK-NEXT: [[TMP35:%.*]] = trunc <vscale x 8 x i32> [[TMP29]] to <vscale x 8 x i8>
|
|
; CHECK-NEXT: [[TMP34]] = zext <vscale x 8 x i8> [[TMP33]] to <vscale x 8 x i32>
|
|
; CHECK-NEXT: [[TMP36]] = zext <vscale x 8 x i8> [[TMP35]] to <vscale x 8 x i32>
|
|
; CHECK-NEXT: [[TMP30:%.*]] = call i32 @llvm.vscale.i32()
|
|
; CHECK-NEXT: [[TMP31:%.*]] = mul i32 [[TMP30]], 16
|
|
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], [[TMP31]]
|
|
; CHECK-NEXT: [[TMP32:%.*]] = icmp eq i32 [[INDEX_NEXT]], {{%.*}}
|
|
; CHECK: middle.block:
|
|
; CHECK-NEXT: [[TMP37:%.*]] = trunc <vscale x 8 x i32> [[TMP34]] to <vscale x 8 x i8>
|
|
; CHECK-NEXT: [[TMP38:%.*]] = trunc <vscale x 8 x i32> [[TMP36]] to <vscale x 8 x i8>
|
|
; CHECK-NEXT: [[BIN_RDX:%.*]] = add <vscale x 8 x i8> [[TMP38]], [[TMP37]]
|
|
; CHECK-NEXT: [[TMP39:%.*]] = call i8 @llvm.vector.reduce.add.nxv8i8(<vscale x 8 x i8> [[BIN_RDX]])
|
|
; CHECK-NEXT: [[TMP40:%.*]] = zext i8 [[TMP39]] to i32
|
|
;
|
|
entry:
|
|
br label %loop
|
|
|
|
loop: ; preds = %entry, %loop
|
|
%indvars.iv = phi i32 [ %indvars.iv.next, %loop ], [ 0, %entry ]
|
|
%sum.02p = phi i32 [ %l9, %loop ], [ 255, %entry ]
|
|
%sum.02 = and i32 %sum.02p, 255
|
|
%l2 = getelementptr inbounds i8, ptr %A, i32 %indvars.iv
|
|
%l3 = load i8, ptr %l2, align 4
|
|
%l3e = zext i8 %l3 to i32
|
|
%l9 = add i32 %sum.02, %l3e
|
|
%indvars.iv.next = add i32 %indvars.iv, 1
|
|
%exitcond = icmp eq i32 %indvars.iv.next, 256
|
|
br i1 %exitcond, label %exit, label %loop, !llvm.loop !0
|
|
|
|
exit: ; preds = %loop
|
|
%sum.0.lcssa = phi i32 [ %l9, %loop ]
|
|
%ret = trunc i32 %sum.0.lcssa to i8
|
|
ret i8 %ret
|
|
}
|
|
|
|
!0 = distinct !{!0, !1, !2, !3, !4}
|
|
!1 = !{!"llvm.loop.vectorize.width", i32 8}
|
|
!2 = !{!"llvm.loop.vectorize.scalable.enable", i1 true}
|
|
!3 = !{!"llvm.loop.interleave.count", i32 2}
|
|
!4 = !{!"llvm.loop.vectorize.enable", i1 true}
|