llvm-project/llvm/test/Transforms/LoopVectorize/SystemZ/load-store-scalarization-cost.ll
Florian Hahn c9dd14d1d4
[VPlan] Compute interleave count for VPlan. (#149702)
Move selectInterleaveCount to LoopVectorizationPlanner and retrieve some
information directly from VPlan. Register pressure was already computed
for a VPlan, and with this patch we now also check for reductions
directly on VPlan, as well as checking how many load and store
operations remain in the loop.

This should be mostly NFC, but we may compute slightly different
interleave counts, except for some edge cases, e.g. where dead loads
have been removed. This shouldn't happen in practice, and the patch
doesn't cause changes across a large test corpus on AArch64.

Computing the interleave count based on VPlan allows for making better
decisions in presence of VPlan optimizations, for example when
operations on interleave groups are narrowed.

Note that there are a few test changes for tests that were still
checking the legacy cost-model output when it was computed in
selectInterleaveCount.

PR: https://github.com/llvm/llvm-project/pull/149702
2025-08-05 09:42:55 +01:00

33 lines
1.0 KiB
LLVM

; REQUIRES: asserts
; RUN: opt -mtriple=s390x-unknown-linux -mcpu=z13 -passes=loop-vectorize \
; RUN: -force-vector-width=4 -debug-only=loop-vectorize \
; RUN: -disable-output -enable-interleaved-mem-accesses=false < %s 2>&1 | \
; RUN: FileCheck %s
;
; Check that a scalarized load/store does not get a cost for insterts/
; extracts, since z13 supports element load/store.
define void @fun(ptr %data, i64 %n) {
entry:
br label %for.body
for.body:
%i = phi i64 [ 0, %entry ], [ %i.next, %for.body ]
%tmp0 = getelementptr inbounds i32, ptr %data, i64 %i
%tmp1 = load i32, ptr %tmp0, align 4
%tmp2 = add i32 %tmp1, 1
store i32 %tmp2, ptr %tmp0, align 4
%i.next = add nuw nsw i64 %i, 2
%cond = icmp slt i64 %i.next, %n
br i1 %cond, label %for.body, label %for.end
for.end:
ret void
; CHECK: LV: Scalarizing: %tmp1 = load i32, ptr %tmp0, align 4
; CHECK: LV: Scalarizing: store i32 %tmp2, ptr %tmp0, align 4
; CHECK: Cost of 4 for VF 4: REPLICATE ir<%tmp1> = load ir<%tmp0>
; CHECK: Cost of 4 for VF 4: REPLICATE store ir<%tmp2>, ir<%tmp0>
}