This reverts commit e4ea0997486000b460c4875a00301b73b3c0d6a7.
The recommit fixes a reported crash by adding a missing check to make
sure the cast recipes are only introduced when vectorizing.
Test coverage added in 3cac608fbd0811b2f5c59c6e13148162ccd8543e.
Original commit message:
Update the code to create Trunc/Ext recipes directly in
adjustRecipesForReductions instead of fixing it up later in
fixReductions.
This explicitly models the required conversions and also makes sure they
are generated at the right place (instead of after the exit condition),
hence the changes in a few tests.
61 lines
3.7 KiB
LLVM
61 lines
3.7 KiB
LLVM
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
|
|
; RUN: opt < %s -passes=loop-vectorize -force-target-supports-scalable-vectors=true -scalable-vectorization=on -S | FileCheck %s
|
|
|
|
target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
|
|
|
|
define i8 @reduction_add_trunc(ptr noalias nocapture %A) {
|
|
; CHECK-LABEL: @reduction_add_trunc(
|
|
; CHECK: vector.body:
|
|
; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, %vector.ph ], [ [[INDEX_NEXT:%.*]], %vector.body ]
|
|
; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <vscale x 8 x i32> [ insertelement (<vscale x 8 x i32> zeroinitializer, i32 255, i32 0), %vector.ph ], [ [[TMP34:%.*]], %vector.body ]
|
|
; CHECK-NEXT: [[VEC_PHI1:%.*]] = phi <vscale x 8 x i32> [ zeroinitializer, %vector.ph ], [ [[TMP36:%.*]], %vector.body ]
|
|
; CHECK: [[TMP14:%.*]] = and <vscale x 8 x i32> [[VEC_PHI]], shufflevector (<vscale x 8 x i32> insertelement (<vscale x 8 x i32> poison, i32 255, i64 0), <vscale x 8 x i32> poison, <vscale x 8 x i32> zeroinitializer)
|
|
; CHECK-NEXT: [[TMP15:%.*]] = and <vscale x 8 x i32> [[VEC_PHI1]], shufflevector (<vscale x 8 x i32> insertelement (<vscale x 8 x i32> poison, i32 255, i64 0), <vscale x 8 x i32> poison, <vscale x 8 x i32> zeroinitializer)
|
|
; CHECK: [[WIDE_LOAD:%.*]] = load <vscale x 8 x i8>, ptr
|
|
; CHECK: [[WIDE_LOAD2:%.*]] = load <vscale x 8 x i8>, ptr
|
|
; CHECK-NEXT: [[TMP26:%.*]] = zext <vscale x 8 x i8> [[WIDE_LOAD]] to <vscale x 8 x i32>
|
|
; CHECK-NEXT: [[TMP27:%.*]] = zext <vscale x 8 x i8> [[WIDE_LOAD2]] to <vscale x 8 x i32>
|
|
; CHECK-NEXT: [[TMP28:%.*]] = add <vscale x 8 x i32> [[TMP14]], [[TMP26]]
|
|
; CHECK-NEXT: [[TMP29:%.*]] = add <vscale x 8 x i32> [[TMP15]], [[TMP27]]
|
|
; CHECK-NEXT: [[TMP33:%.*]] = trunc <vscale x 8 x i32> [[TMP28]] to <vscale x 8 x i8>
|
|
; CHECK-NEXT: [[TMP35:%.*]] = trunc <vscale x 8 x i32> [[TMP29]] to <vscale x 8 x i8>
|
|
; CHECK-NEXT: [[TMP34]] = zext <vscale x 8 x i8> [[TMP33]] to <vscale x 8 x i32>
|
|
; CHECK-NEXT: [[TMP36]] = zext <vscale x 8 x i8> [[TMP35]] to <vscale x 8 x i32>
|
|
; CHECK-NEXT: [[TMP30:%.*]] = call i32 @llvm.vscale.i32()
|
|
; CHECK-NEXT: [[TMP31:%.*]] = mul i32 [[TMP30]], 16
|
|
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], [[TMP31]]
|
|
; CHECK-NEXT: [[TMP32:%.*]] = icmp eq i32 [[INDEX_NEXT]], {{%.*}}
|
|
; CHECK: middle.block:
|
|
; CHECK-NEXT: [[TMP37:%.*]] = trunc <vscale x 8 x i32> [[TMP34]] to <vscale x 8 x i8>
|
|
; CHECK-NEXT: [[TMP38:%.*]] = trunc <vscale x 8 x i32> [[TMP36]] to <vscale x 8 x i8>
|
|
; CHECK-NEXT: [[BIN_RDX:%.*]] = add <vscale x 8 x i8> [[TMP38]], [[TMP37]]
|
|
; CHECK-NEXT: [[TMP39:%.*]] = call i8 @llvm.vector.reduce.add.nxv8i8(<vscale x 8 x i8> [[BIN_RDX]])
|
|
; CHECK-NEXT: [[TMP40:%.*]] = zext i8 [[TMP39]] to i32
|
|
;
|
|
entry:
|
|
br label %loop
|
|
|
|
loop: ; preds = %entry, %loop
|
|
%indvars.iv = phi i32 [ %indvars.iv.next, %loop ], [ 0, %entry ]
|
|
%sum.02p = phi i32 [ %l9, %loop ], [ 255, %entry ]
|
|
%sum.02 = and i32 %sum.02p, 255
|
|
%l2 = getelementptr inbounds i8, ptr %A, i32 %indvars.iv
|
|
%l3 = load i8, ptr %l2, align 4
|
|
%l3e = zext i8 %l3 to i32
|
|
%l9 = add i32 %sum.02, %l3e
|
|
%indvars.iv.next = add i32 %indvars.iv, 1
|
|
%exitcond = icmp eq i32 %indvars.iv.next, 256
|
|
br i1 %exitcond, label %exit, label %loop, !llvm.loop !0
|
|
|
|
exit: ; preds = %loop
|
|
%sum.0.lcssa = phi i32 [ %l9, %loop ]
|
|
%ret = trunc i32 %sum.0.lcssa to i8
|
|
ret i8 %ret
|
|
}
|
|
|
|
!0 = distinct !{!0, !1, !2, !3, !4}
|
|
!1 = !{!"llvm.loop.vectorize.width", i32 8}
|
|
!2 = !{!"llvm.loop.vectorize.scalable.enable", i1 true}
|
|
!3 = !{!"llvm.loop.interleave.count", i32 2}
|
|
!4 = !{!"llvm.loop.vectorize.enable", i1 true}
|