Recommit with a fix for the use-after-free causing the revert. This reverts the revert commit f872043e055f4163c3c4b1b86ca0354490174987. Original commit message: Dropping disjoint from an OR may yield incorrect results, as some analysis may have converted it to an Add implicitly (e.g. SCEV used for dependence analysis). Instead, replace it with an equivalent Add. This is possible as all users of the disjoint OR only access lanes where the operands are disjoint or poison otherwise. Note that replacing all disjoint ORs with ADDs instead of dropping the flags is not strictly necessary. It is only needed for disjoint ORs that SCEV treated as ADDs, but those are not tracked. There are other places that may drop poison-generating flags; those likely need similar treatment. Fixes https://github.com/llvm/llvm-project/issues/81872 PR: https://github.com/llvm/llvm-project/pull/83821
108 lines
5.5 KiB
LLVM
108 lines
5.5 KiB
LLVM
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 4
|
|
; RUN: opt -S -passes=loop-vectorize < %s | FileCheck %s
|
|
target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
|
|
target triple = "x86_64-unknown-linux-gnu"
|
|
|
|
; PR 81872 explains the issue.
|
|
|
|
; If we vectorize, we have a miscompile where array IV and thereby value stored in (arr[99],
|
|
; arr[98]) is calculated incorrectly since disjoint or was only disjoint because
|
|
; of dominating conditions. Dropping the disjoint to avoid poison still changes
|
|
; the behaviour since now the or is no longer equivalent to the add.
|
|
;
|
|
define void @test(ptr noundef align 8 dereferenceable_or_null(16) %arr) #0 {
|
|
; CHECK-LABEL: define void @test(
|
|
; CHECK-SAME: ptr noundef align 8 dereferenceable_or_null(16) [[ARR:%.*]]) #[[ATTR0:[0-9]+]] {
|
|
; CHECK-NEXT: bb5:
|
|
; CHECK-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]], !prof [[PROF0:![0-9]+]]
|
|
; CHECK: vector.ph:
|
|
; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
|
|
; CHECK: vector.body:
|
|
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
|
|
; CHECK-NEXT: [[VEC_IND:%.*]] = phi <4 x i64> [ <i64 99, i64 98, i64 97, i64 96>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ]
|
|
; CHECK-NEXT: [[OFFSET_IDX:%.*]] = sub i64 99, [[INDEX]]
|
|
; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[OFFSET_IDX]], 0
|
|
; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i64> poison, i64 [[INDEX]], i64 0
|
|
; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i64> [[BROADCAST_SPLATINSERT]], <4 x i64> poison, <4 x i32> zeroinitializer
|
|
; CHECK-NEXT: [[VEC_IV:%.*]] = add <4 x i64> [[BROADCAST_SPLAT]], <i64 0, i64 1, i64 2, i64 3>
|
|
; CHECK-NEXT: [[TMP1:%.*]] = icmp ule <4 x i64> [[VEC_IV]], <i64 8, i64 8, i64 8, i64 8>
|
|
; CHECK-NEXT: [[TMP2:%.*]] = and <4 x i64> [[VEC_IND]], <i64 1, i64 1, i64 1, i64 1>
|
|
; CHECK-NEXT: [[TMP3:%.*]] = icmp eq <4 x i64> [[TMP2]], zeroinitializer
|
|
; CHECK-NEXT: [[TMP4:%.*]] = select <4 x i1> [[TMP1]], <4 x i1> [[TMP3]], <4 x i1> zeroinitializer
|
|
; CHECK-NEXT: [[TMP5:%.*]] = add i64 [[TMP0]], 1
|
|
; CHECK-NEXT: [[TMP6:%.*]] = getelementptr i64, ptr [[ARR]], i64 [[TMP5]]
|
|
; CHECK-NEXT: [[TMP7:%.*]] = getelementptr i64, ptr [[TMP6]], i32 0
|
|
; CHECK-NEXT: [[TMP8:%.*]] = getelementptr i64, ptr [[TMP7]], i32 -3
|
|
; CHECK-NEXT: [[REVERSE:%.*]] = shufflevector <4 x i1> [[TMP4]], <4 x i1> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
|
|
; CHECK-NEXT: call void @llvm.masked.store.v4i64.p0(<4 x i64> <i64 1, i64 1, i64 1, i64 1>, ptr [[TMP8]], i32 8, <4 x i1> [[REVERSE]])
|
|
; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 4
|
|
; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], <i64 -4, i64 -4, i64 -4, i64 -4>
|
|
; CHECK-NEXT: [[TMP9:%.*]] = icmp eq i64 [[INDEX_NEXT]], 12
|
|
; CHECK-NEXT: br i1 [[TMP9]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !prof [[PROF1:![0-9]+]], !llvm.loop [[LOOP2:![0-9]+]]
|
|
; CHECK: middle.block:
|
|
; CHECK-NEXT: br i1 true, label [[BB6:%.*]], label [[SCALAR_PH]]
|
|
; CHECK: scalar.ph:
|
|
; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 87, [[MIDDLE_BLOCK]] ], [ 99, [[BB5:%.*]] ]
|
|
; CHECK-NEXT: br label [[LOOP_HEADER:%.*]]
|
|
; CHECK: loop.header:
|
|
; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[LOOP_LATCH:%.*]] ]
|
|
; CHECK-NEXT: [[AND:%.*]] = and i64 [[IV]], 1
|
|
; CHECK-NEXT: [[ICMP17:%.*]] = icmp eq i64 [[AND]], 0
|
|
; CHECK-NEXT: br i1 [[ICMP17]], label [[BB18:%.*]], label [[LOOP_LATCH]], !prof [[PROF5:![0-9]+]]
|
|
; CHECK: bb18:
|
|
; CHECK-NEXT: [[OR:%.*]] = or disjoint i64 [[IV]], 1
|
|
; CHECK-NEXT: [[GETELEMENTPTR19:%.*]] = getelementptr inbounds i64, ptr [[ARR]], i64 [[OR]]
|
|
; CHECK-NEXT: store i64 1, ptr [[GETELEMENTPTR19]], align 8
|
|
; CHECK-NEXT: br label [[LOOP_LATCH]]
|
|
; CHECK: loop.latch:
|
|
; CHECK-NEXT: [[IV_NEXT]] = add nsw i64 [[IV]], -1
|
|
; CHECK-NEXT: [[ICMP22:%.*]] = icmp eq i64 [[IV_NEXT]], 90
|
|
; CHECK-NEXT: br i1 [[ICMP22]], label [[BB6]], label [[LOOP_HEADER]], !prof [[PROF6:![0-9]+]], !llvm.loop [[LOOP7:![0-9]+]]
|
|
; CHECK: bb6:
|
|
; CHECK-NEXT: ret void
|
|
;
|
|
bb5:
|
|
br label %loop.header
|
|
|
|
loop.header: ; preds = %loop.latch, %bb8
|
|
%iv = phi i64 [ 99, %bb5 ], [ %iv.next, %loop.latch ]
|
|
%and = and i64 %iv, 1
|
|
%icmp17 = icmp eq i64 %and, 0
|
|
br i1 %icmp17, label %bb18, label %loop.latch, !prof !21
|
|
|
|
bb18: ; preds = %loop.header
|
|
%or = or disjoint i64 %iv, 1
|
|
%getelementptr19 = getelementptr inbounds i64, ptr %arr, i64 %or
|
|
store i64 1, ptr %getelementptr19, align 8
|
|
br label %loop.latch
|
|
|
|
loop.latch: ; preds = %bb18, %loop.header
|
|
%iv.next = add nsw i64 %iv, -1
|
|
%icmp22 = icmp eq i64 %iv.next, 90
|
|
br i1 %icmp22, label %bb6, label %loop.header, !prof !22
|
|
|
|
bb6:
|
|
ret void
|
|
}
|
|
|
|
attributes #0 = {"target-cpu"="haswell" "target-features"="+avx2" }
|
|
|
|
!4 = !{}
|
|
!10 = !{i32 1}
|
|
!16 = !{i64 864}
|
|
!17 = !{i64 8}
|
|
!21 = !{!"branch_weights", i32 1, i32 1}
|
|
!22 = !{!"branch_weights", i32 1, i32 95}
|
|
|
|
|
|
;.
|
|
; CHECK: [[PROF0]] = !{!"branch_weights", i32 1, i32 127}
|
|
; CHECK: [[PROF1]] = !{!"branch_weights", i32 1, i32 23}
|
|
; CHECK: [[LOOP2]] = distinct !{[[LOOP2]], [[META3:![0-9]+]], [[META4:![0-9]+]]}
|
|
; CHECK: [[META3]] = !{!"llvm.loop.isvectorized", i32 1}
|
|
; CHECK: [[META4]] = !{!"llvm.loop.unroll.runtime.disable"}
|
|
; CHECK: [[PROF5]] = !{!"branch_weights", i32 1, i32 1}
|
|
; CHECK: [[PROF6]] = !{!"branch_weights", i32 0, i32 0}
|
|
; CHECK: [[LOOP7]] = distinct !{[[LOOP7]], [[META4]], [[META3]]}
|
|
;.
|