
These tests rely on SCEV looking recognizing an "or" with no common bits as an "add". Add the disjoint flag to relevant or instructions in preparation for switching SCEV to use the flag instead of the ValueTracking query. The IR with disjoint flag matches what InstCombine would produce.
218 lines
8.2 KiB
LLVM
218 lines
8.2 KiB
LLVM
; REQUIRES: asserts
|
|
; RUN: opt -S -passes=loop-vectorize -force-vector-width=8 -force-vector-interleave=1 -enable-interleaved-mem-accesses -debug-only=loop-vectorize,vectorutils -disable-output < %s 2>&1 | FileCheck %s -check-prefix=STRIDED_UNMASKED
|
|
; RUN: opt -S -passes=loop-vectorize -force-vector-width=8 -force-vector-interleave=1 -enable-interleaved-mem-accesses -enable-masked-interleaved-mem-accesses -debug-only=loop-vectorize,vectorutils -disable-output < %s 2>&1 | FileCheck %s -check-prefix=STRIDED_MASKED
|
|
|
|
target datalayout = "e-m:e-p:32:32-f64:32:64-f80:32-n8:16:32-S128"
|
|
|
|
; We test here that the loop-vectorizer forms an interleave-groups from
|
|
; predicated memory accesses only if they are both in the same (predicated)
|
|
; block (first scenario below).
|
|
; If the accesses are not in the same predicated block, an interleave-group
|
|
; is not formed (scenarios 2,3 below).
|
|
|
|
; Scenario 1: Check the case where it is legal to create masked interleave-
|
|
; groups. Altogether two groups are created (one for loads and one for stores)
|
|
; when masked-interleaved-acceses are enabled. When masked-interleaved-acceses
|
|
; are disabled we do not create any interleave-group.
|
|
;
|
|
; void masked_strided1(const unsigned char* restrict p,
|
|
; unsigned char* restrict q,
|
|
; unsigned char guard) {
|
|
; for(ix=0; ix < 1024; ++ix) {
|
|
; if (ix > guard) {
|
|
; char left = p[2*ix];
|
|
; char right = p[2*ix + 1];
|
|
; char max = max(left, right);
|
|
; q[2*ix] = max;
|
|
; q[2*ix+1] = 0 - max;
|
|
; }
|
|
; }
|
|
;}
|
|
|
|
|
|
; STRIDED_UNMASKED: LV: Checking a loop in 'masked_strided1'
|
|
; STRIDED_UNMASKED: LV: Analyzing interleaved accesses...
|
|
; STRIDED_UNMASKED-NOT: LV: Creating an interleave group
|
|
|
|
; STRIDED_MASKED: LV: Checking a loop in 'masked_strided1'
|
|
; STRIDED_MASKED: LV: Analyzing interleaved accesses...
|
|
; STRIDED_MASKED-NEXT: LV: Creating an interleave group with: store i8 %{{.*}}, ptr %{{.*}}, align 1
|
|
; STRIDED_MASKED-NEXT: LV: Inserted: store i8 %{{.*}}, ptr %{{.*}}, align 1
|
|
; STRIDED_MASKED-NEXT: into the interleave group with store i8 %{{.*}}, ptr %{{.*}}, align 1
|
|
; STRIDED_MASKED-NEXT: LV: Creating an interleave group with: %{{.*}} = load i8, ptr %{{.*}}, align 1
|
|
; STRIDED_MASKED-NEXT: LV: Inserted: %{{.*}} = load i8, ptr %{{.*}}, align 1
|
|
; STRIDED_MASKED-NEXT: into the interleave group with %{{.*}} = load i8, ptr %{{.*}}, align 1
|
|
|
|
; Scenario 2: Check the case where it is illegal to create a masked interleave-
|
|
; group because the first access is predicated, and the second isn't.
|
|
; We therefore create a separate interleave-group with gaps for each of the
|
|
; stores (if masked-interleaved-accesses are enabled).
|
|
; If masked-interleaved-accesses is not enabled we create only one interleave
|
|
; group of stores (for the non-predicated store) and it is later invalidated
|
|
; due to gaps.
|
|
;
|
|
; void masked_strided2(const unsigned char* restrict p,
|
|
; unsigned char* restrict q,
|
|
; unsigned char guard1,
|
|
; unsigned char guard2) {
|
|
; for(ix=0; ix < 1024; ++ix) {
|
|
; if (ix > guard1) {
|
|
; q[2*ix] = 1;
|
|
; }
|
|
; q[2*ix+1] = 2;
|
|
; }
|
|
;}
|
|
|
|
; STRIDED_UNMASKED: LV: Checking a loop in 'masked_strided2'
|
|
; STRIDED_UNMASKED: LV: Analyzing interleaved accesses...
|
|
; STRIDED_UNMASKED-NEXT: LV: Creating an interleave group with: store i8 1, ptr %{{.*}}, align 1
|
|
; STRIDED_UNMASKED-NEXT: LV: Invalidate candidate interleaved store group due to gaps.
|
|
; STRIDED_UNMASKED-NOT: LV: Creating an interleave group
|
|
|
|
; STRIDED_MASKED: LV: Checking a loop in 'masked_strided2'
|
|
; STRIDED_MASKED: LV: Analyzing interleaved accesses...
|
|
; STRIDED_MASKED-NEXT: LV: Creating an interleave group with: store i8 2, ptr %{{.*}}, align 1
|
|
; STRIDED_MASKED-NEXT: LV: Creating an interleave group with: store i8 1, ptr %{{.*}}, align 1
|
|
; STRIDED_MASKED-NOT: LV: Invalidate candidate interleaved store group due to gaps.
|
|
|
|
|
|
; Scenario 3: Check the case where it is illegal to create a masked interleave-
|
|
; group because the two accesses are in separate predicated blocks.
|
|
; We therefore create a separate interleave-group with gaps for each of the accesses,
|
|
; If masked-interleaved-accesses is not enabled we don't create any interleave
|
|
; group because all accesses are predicated.
|
|
;
|
|
; void masked_strided3(const unsigned char* restrict p,
|
|
; unsigned char* restrict q,
|
|
; unsigned char guard1,
|
|
; unsigned char guard2) {
|
|
; for(ix=0; ix < 1024; ++ix) {
|
|
; if (ix > guard1) {
|
|
; q[2*ix] = 1;
|
|
; }
|
|
; if (ix > guard2) {
|
|
; q[2*ix+1] = 2;
|
|
; }
|
|
; }
|
|
;}
|
|
|
|
|
|
; STRIDED_UNMASKED: LV: Checking a loop in 'masked_strided3'
|
|
; STRIDED_UNMASKED: LV: Analyzing interleaved accesses...
|
|
; STRIDED_UNMASKED-NOT: LV: Creating an interleave group
|
|
|
|
; STRIDED_MASKED: LV: Checking a loop in 'masked_strided3'
|
|
; STRIDED_MASKED: LV: Analyzing interleaved accesses...
|
|
; STRIDED_MASKED-NEXT: LV: Creating an interleave group with: store i8 2, ptr %{{.*}}, align 1
|
|
; STRIDED_MASKED-NEXT: LV: Creating an interleave group with: store i8 1, ptr %{{.*}}, align 1
|
|
; STRIDED_MASKED-NOT: LV: Invalidate candidate interleaved store group due to gaps.
|
|
|
|
|
|
; ModuleID = 'test.c'
|
|
source_filename = "test.c"
|
|
target datalayout = "e-m:e-p:32:32-f64:32:64-f80:32-n8:16:32-S128"
|
|
target triple = "i386-unknown-linux-gnu"
|
|
|
|
define dso_local void @masked_strided1(ptr noalias nocapture readonly %p, ptr noalias nocapture %q, i8 zeroext %guard) local_unnamed_addr #0 {
|
|
entry:
|
|
%conv = zext i8 %guard to i32
|
|
br label %for.body
|
|
|
|
for.body:
|
|
%ix.024 = phi i32 [ 0, %entry ], [ %inc, %for.inc ]
|
|
%cmp1 = icmp ugt i32 %ix.024, %conv
|
|
br i1 %cmp1, label %if.then, label %for.inc
|
|
|
|
if.then:
|
|
%mul = shl nuw nsw i32 %ix.024, 1
|
|
%arrayidx = getelementptr inbounds i8, ptr %p, i32 %mul
|
|
%0 = load i8, ptr %arrayidx, align 1
|
|
%add = or disjoint i32 %mul, 1
|
|
%arrayidx4 = getelementptr inbounds i8, ptr %p, i32 %add
|
|
%1 = load i8, ptr %arrayidx4, align 1
|
|
%cmp.i = icmp slt i8 %0, %1
|
|
%spec.select.i = select i1 %cmp.i, i8 %1, i8 %0
|
|
%arrayidx6 = getelementptr inbounds i8, ptr %q, i32 %mul
|
|
store i8 %spec.select.i, ptr %arrayidx6, align 1
|
|
%sub = sub i8 0, %spec.select.i
|
|
%arrayidx11 = getelementptr inbounds i8, ptr %q, i32 %add
|
|
store i8 %sub, ptr %arrayidx11, align 1
|
|
br label %for.inc
|
|
|
|
for.inc:
|
|
%inc = add nuw nsw i32 %ix.024, 1
|
|
%exitcond = icmp eq i32 %inc, 1024
|
|
br i1 %exitcond, label %for.end, label %for.body
|
|
|
|
for.end:
|
|
ret void
|
|
}
|
|
|
|
|
|
define dso_local void @masked_strided2(ptr noalias nocapture readnone %p, ptr noalias nocapture %q, i8 zeroext %guard) local_unnamed_addr #0 {
|
|
entry:
|
|
%conv = zext i8 %guard to i32
|
|
br label %for.body
|
|
|
|
for.body:
|
|
%ix.012 = phi i32 [ 0, %entry ], [ %inc, %for.inc ]
|
|
%mul = shl nuw nsw i32 %ix.012, 1
|
|
%arrayidx = getelementptr inbounds i8, ptr %q, i32 %mul
|
|
store i8 1, ptr %arrayidx, align 1
|
|
%cmp1 = icmp ugt i32 %ix.012, %conv
|
|
br i1 %cmp1, label %if.then, label %for.inc
|
|
|
|
if.then:
|
|
%add = or disjoint i32 %mul, 1
|
|
%arrayidx3 = getelementptr inbounds i8, ptr %q, i32 %add
|
|
store i8 2, ptr %arrayidx3, align 1
|
|
br label %for.inc
|
|
|
|
for.inc:
|
|
%inc = add nuw nsw i32 %ix.012, 1
|
|
%exitcond = icmp eq i32 %inc, 1024
|
|
br i1 %exitcond, label %for.end, label %for.body
|
|
|
|
for.end:
|
|
ret void
|
|
}
|
|
|
|
|
|
define dso_local void @masked_strided3(ptr noalias nocapture readnone %p, ptr noalias nocapture %q, i8 zeroext %guard1, i8 zeroext %guard2) local_unnamed_addr #0 {
|
|
entry:
|
|
%conv = zext i8 %guard1 to i32
|
|
%conv3 = zext i8 %guard2 to i32
|
|
br label %for.body
|
|
|
|
for.body:
|
|
%ix.018 = phi i32 [ 0, %entry ], [ %inc, %for.inc ]
|
|
%mul = shl nuw nsw i32 %ix.018, 1
|
|
%cmp1 = icmp ugt i32 %ix.018, %conv
|
|
br i1 %cmp1, label %if.then, label %if.end
|
|
|
|
if.then:
|
|
%arrayidx = getelementptr inbounds i8, ptr %q, i32 %mul
|
|
store i8 1, ptr %arrayidx, align 1
|
|
br label %if.end
|
|
|
|
if.end:
|
|
%cmp4 = icmp ugt i32 %ix.018, %conv3
|
|
br i1 %cmp4, label %if.then6, label %for.inc
|
|
|
|
if.then6:
|
|
%add = or disjoint i32 %mul, 1
|
|
%arrayidx7 = getelementptr inbounds i8, ptr %q, i32 %add
|
|
store i8 2, ptr %arrayidx7, align 1
|
|
br label %for.inc
|
|
|
|
for.inc:
|
|
%inc = add nuw nsw i32 %ix.018, 1
|
|
%exitcond = icmp eq i32 %inc, 1024
|
|
br i1 %exitcond, label %for.end, label %for.body
|
|
|
|
for.end:
|
|
ret void
|
|
}
|
|
|
|
attributes #0 = { "target-features"="+fxsr,+mmx,+sse,+sse2,+x87" }
|