llvm-project/llvm/test/Transforms/LoopVectorize/Hexagon/invalidate-cm-after-invalidating-interleavegroups.ll
Florian Hahn c071dba1a3
[LV] update hexagon test to use load results.
The current version of the test doesn't use any of the loads, so they
can be removed together with the mask of the interleave group.

Use some loaded values and store them, to prevent the mask from being
optimized away.
2023-08-22 20:20:58 +01:00

98 lines
3.6 KiB
LLVM

; RUN: opt -passes=loop-vectorize -hexagon-autohvx=1 -force-vector-width=64 -prefer-predicate-over-epilogue=predicate-dont-vectorize -S %s | FileCheck %s
target datalayout = "e-m:e-p:32:32:32-a:0-n16:32-i64:64:64-i32:32:32-i16:16:16-i1:8:8-f32:32:32-f64:64:64-v32:32:32-v64:64:64-v512:512:512-v1024:1024:1024-v2048:2048:2048"
target triple = "hexagon"
; Test for PR45572.
; Check that interleave groups and decisions based on them are correctly
; invalidated with tail-folding on platforms where masked interleaved accesses
; are disabled.
; Make sure a vector body has been created, 64 element vectors are used and a block predicate has been computed.
; Also make sure the loads are not widened.
; CHECK-LABEL: @test1
; CHECK: vector.body:
; CHECK: icmp ule <64 x i32> %vec.ind
; CHECK-NOT: load <{{.*}} x i32>
define void @test1(ptr %arg, i32 %N) #0 {
entry:
%tmp = alloca i32
br label %loop
loop: ; preds = %bb2, %bb
%iv = phi i32 [ %iv.next, %loop], [ 0, %entry ]
%idx.mul = mul nuw nsw i32 %iv, 7
%idx.start = add nuw nsw i32 %idx.mul, 1
%tmp6 = getelementptr inbounds i32, ptr %arg, i32 %idx.start
%tmp7 = load i32, ptr %tmp6, align 4
%tmp8 = add nuw nsw i32 %idx.start, 1
%tmp9 = getelementptr inbounds i32, ptr %arg, i32 %tmp8
%tmp10 = load i32, ptr %tmp9, align 4
%tmp11 = add nuw nsw i32 %idx.start, 2
%tmp12 = getelementptr inbounds i32, ptr %arg, i32 %tmp11
%tmp13 = load i32, ptr %tmp12, align 4
%tmp14 = add nuw nsw i32 %idx.start, 3
%tmp15 = getelementptr inbounds i32, ptr %arg, i32 %tmp14
%tmp16 = load i32, ptr %tmp15, align 4
%tmp18 = add nuw nsw i32 %idx.start, 4
%tmp19 = getelementptr inbounds i32, ptr %arg, i32 %tmp18
%tmp20 = load i32, ptr %tmp19, align 4
%tmp21 = add nuw nsw i32 %idx.start, 5
%tmp22 = getelementptr inbounds i32, ptr %arg, i32 %tmp21
%tmp23 = load i32, ptr %tmp22, align 4
%tmp25 = add nuw nsw i32 %idx.start, 6
%tmp26 = getelementptr inbounds i32, ptr %arg, i32 %tmp25
%tmp27 = load i32, ptr %tmp26, align 4
%add = add i32 %tmp7, %tmp27
store i32 %add, ptr %tmp, align 1
%iv.next= add nuw nsw i32 %iv, 1
%exit.cond = icmp eq i32 %iv.next, %N
br i1 %exit.cond, label %exit, label %loop
exit: ; preds = %loop
ret void
}
; The loop below only requires tail folding due to interleave groups with gaps.
; Make sure the loads are not widened.
; CHECK-LABEL: @test2
; CHECK: vector.body:
; CHECK-NOT: load <{{.*}} x i32>
define void @test2(ptr %arg) #1 {
entry:
%tmp = alloca i32
br label %loop
loop: ; preds = %bb2, %bb
%iv = phi i32 [ %iv.next, %loop], [ 0, %entry ]
%idx.start = mul nuw nsw i32 %iv, 5
%tmp6 = getelementptr inbounds i32, ptr %arg, i32 %idx.start
%tmp7 = load i32, ptr %tmp6, align 4
%tmp8 = add nuw nsw i32 %idx.start, 1
%tmp9 = getelementptr inbounds i32, ptr %arg, i32 %tmp8
%tmp10 = load i32, ptr %tmp9, align 4
%tmp11 = add nuw nsw i32 %idx.start, 2
%tmp12 = getelementptr inbounds i32, ptr %arg, i32 %tmp11
%tmp13 = load i32, ptr %tmp12, align 4
%tmp14 = add nuw nsw i32 %idx.start, 3
%tmp15 = getelementptr inbounds i32, ptr %arg, i32 %tmp14
%tmp16 = load i32, ptr %tmp15, align 4
%add = add i32 %tmp7, %tmp16
store i32 %add, ptr %tmp, align 1
%iv.next= add nuw nsw i32 %iv, 1
%exit.cond = icmp eq i32 %iv.next, 128
br i1 %exit.cond, label %exit, label %loop
exit: ; preds = %loop
ret void
}
attributes #0 = { "target-features"="+hvx,+hvx-length128b" }
attributes #1 = { optsize "target-features"="+hvx,+hvx-length128b" }