Nikita Popov 90ba33099c
[InstCombine] Canonicalize constant GEPs to i8 source element type (#68882)
This patch canonicalizes getelementptr instructions with constant
indices to use the `i8` source element type. This makes it easier for
optimizations to recognize that two GEPs are identical, because they
don't need to see past many different ways to express the same offset.

This is a first step towards
https://discourse.llvm.org/t/rfc-replacing-getelementptr-with-ptradd/68699.
This is limited to constant GEPs only for now, as they have a clear
canonical form, while we're not yet sure how exactly to deal with
variable indices.

The test llvm/test/Transforms/PhaseOrdering/switch_with_geps.ll gives
two representative examples of the kind of optimization improvement we
expect from this change. In the first test SimplifyCFG can now realize
that all switch branches are actually the same. In the second test it
can convert it into simple arithmetic. These are representative of
common optimization failures we see in Rust.

Fixes https://github.com/llvm/llvm-project/issues/69841.
2024-01-24 15:25:29 +01:00

289 lines
18 KiB
LLVM

; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
; RUN: opt -passes=loop-vectorize,instcombine,simplifycfg -simplifycfg-require-and-preserve-domtree=1 < %s -S -o - | FileCheck %s
target datalayout = "e-m:e-p:32:32-Fi8-i64:64-v128:64:128-a:0:32-n32-S64"
target triple = "thumbv8.1m.main-arm-none-eabi"
define void @arm_abs_q7(ptr nocapture readonly %pSrc, ptr nocapture %pDst, i32 %blockSize) #0 {
; CHECK-LABEL: @arm_abs_q7(
; CHECK-NEXT: entry:
; CHECK-NEXT: [[PSRC2:%.*]] = ptrtoint ptr [[PSRC:%.*]] to i32
; CHECK-NEXT: [[PDST1:%.*]] = ptrtoint ptr [[PDST:%.*]] to i32
; CHECK-NEXT: [[CMP_NOT19:%.*]] = icmp eq i32 [[BLOCKSIZE:%.*]], 0
; CHECK-NEXT: br i1 [[CMP_NOT19]], label [[WHILE_END:%.*]], label [[WHILE_BODY_PREHEADER:%.*]]
; CHECK: while.body.preheader:
; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i32 [[BLOCKSIZE]], 16
; CHECK-NEXT: [[TMP0:%.*]] = sub i32 [[PDST1]], [[PSRC2]]
; CHECK-NEXT: [[DIFF_CHECK:%.*]] = icmp ult i32 [[TMP0]], 16
; CHECK-NEXT: [[OR_COND:%.*]] = select i1 [[MIN_ITERS_CHECK]], i1 true, i1 [[DIFF_CHECK]]
; CHECK-NEXT: br i1 [[OR_COND]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
; CHECK: vector.ph:
; CHECK-NEXT: [[N_VEC:%.*]] = and i32 [[BLOCKSIZE]], -16
; CHECK-NEXT: [[IND_END:%.*]] = getelementptr i8, ptr [[PSRC]], i32 [[N_VEC]]
; CHECK-NEXT: [[IND_END3:%.*]] = and i32 [[BLOCKSIZE]], 15
; CHECK-NEXT: [[IND_END5:%.*]] = getelementptr i8, ptr [[PDST]], i32 [[N_VEC]]
; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
; CHECK: vector.body:
; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
; CHECK-NEXT: [[NEXT_GEP:%.*]] = getelementptr i8, ptr [[PSRC]], i32 [[INDEX]]
; CHECK-NEXT: [[NEXT_GEP7:%.*]] = getelementptr i8, ptr [[PDST]], i32 [[INDEX]]
; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <16 x i8>, ptr [[NEXT_GEP]], align 1
; CHECK-NEXT: [[TMP1:%.*]] = icmp sgt <16 x i8> [[WIDE_LOAD]], zeroinitializer
; CHECK-NEXT: [[TMP2:%.*]] = icmp eq <16 x i8> [[WIDE_LOAD]], <i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128>
; CHECK-NEXT: [[TMP3:%.*]] = sub <16 x i8> zeroinitializer, [[WIDE_LOAD]]
; CHECK-NEXT: [[TMP4:%.*]] = select <16 x i1> [[TMP2]], <16 x i8> <i8 127, i8 127, i8 127, i8 127, i8 127, i8 127, i8 127, i8 127, i8 127, i8 127, i8 127, i8 127, i8 127, i8 127, i8 127, i8 127>, <16 x i8> [[TMP3]]
; CHECK-NEXT: [[TMP5:%.*]] = select <16 x i1> [[TMP1]], <16 x i8> [[WIDE_LOAD]], <16 x i8> [[TMP4]]
; CHECK-NEXT: store <16 x i8> [[TMP5]], ptr [[NEXT_GEP7]], align 1
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 16
; CHECK-NEXT: [[TMP6:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]]
; CHECK-NEXT: br i1 [[TMP6]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
; CHECK: middle.block:
; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i32 [[N_VEC]], [[BLOCKSIZE]]
; CHECK-NEXT: br i1 [[CMP_N]], label [[WHILE_END]], label [[SCALAR_PH]]
; CHECK: scalar.ph:
; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi ptr [ [[IND_END]], [[MIDDLE_BLOCK]] ], [ [[PSRC]], [[WHILE_BODY_PREHEADER]] ]
; CHECK-NEXT: [[BC_RESUME_VAL4:%.*]] = phi i32 [ [[IND_END3]], [[MIDDLE_BLOCK]] ], [ [[BLOCKSIZE]], [[WHILE_BODY_PREHEADER]] ]
; CHECK-NEXT: [[BC_RESUME_VAL6:%.*]] = phi ptr [ [[IND_END5]], [[MIDDLE_BLOCK]] ], [ [[PDST]], [[WHILE_BODY_PREHEADER]] ]
; CHECK-NEXT: br label [[WHILE_BODY:%.*]]
; CHECK: while.body:
; CHECK-NEXT: [[PSRC_ADDR_022:%.*]] = phi ptr [ [[INCDEC_PTR:%.*]], [[WHILE_BODY]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ]
; CHECK-NEXT: [[BLKCNT_021:%.*]] = phi i32 [ [[DEC:%.*]], [[WHILE_BODY]] ], [ [[BC_RESUME_VAL4]], [[SCALAR_PH]] ]
; CHECK-NEXT: [[PDST_ADDR_020:%.*]] = phi ptr [ [[INCDEC_PTR13:%.*]], [[WHILE_BODY]] ], [ [[BC_RESUME_VAL6]], [[SCALAR_PH]] ]
; CHECK-NEXT: [[INCDEC_PTR]] = getelementptr inbounds i8, ptr [[PSRC_ADDR_022]], i32 1
; CHECK-NEXT: [[TMP7:%.*]] = load i8, ptr [[PSRC_ADDR_022]], align 1
; CHECK-NEXT: [[CMP1:%.*]] = icmp sgt i8 [[TMP7]], 0
; CHECK-NEXT: [[CMP5:%.*]] = icmp eq i8 [[TMP7]], -128
; CHECK-NEXT: [[SUB:%.*]] = sub i8 0, [[TMP7]]
; CHECK-NEXT: [[COND:%.*]] = select i1 [[CMP5]], i8 127, i8 [[SUB]]
; CHECK-NEXT: [[COND11:%.*]] = select i1 [[CMP1]], i8 [[TMP7]], i8 [[COND]]
; CHECK-NEXT: [[INCDEC_PTR13]] = getelementptr inbounds i8, ptr [[PDST_ADDR_020]], i32 1
; CHECK-NEXT: store i8 [[COND11]], ptr [[PDST_ADDR_020]], align 1
; CHECK-NEXT: [[DEC]] = add i32 [[BLKCNT_021]], -1
; CHECK-NEXT: [[CMP_NOT:%.*]] = icmp eq i32 [[DEC]], 0
; CHECK-NEXT: br i1 [[CMP_NOT]], label [[WHILE_END]], label [[WHILE_BODY]], !llvm.loop [[LOOP3:![0-9]+]]
; CHECK: while.end:
; CHECK-NEXT: ret void
;
entry:
%cmp.not19 = icmp eq i32 %blockSize, 0
br i1 %cmp.not19, label %while.end, label %while.body.preheader
while.body.preheader: ; preds = %entry
br label %while.body
while.body: ; preds = %while.body.preheader, %while.body
%pSrc.addr.022 = phi ptr [ %incdec.ptr, %while.body ], [ %pSrc, %while.body.preheader ]
%blkCnt.021 = phi i32 [ %dec, %while.body ], [ %blockSize, %while.body.preheader ]
%pDst.addr.020 = phi ptr [ %incdec.ptr13, %while.body ], [ %pDst, %while.body.preheader ]
%incdec.ptr = getelementptr inbounds i8, ptr %pSrc.addr.022, i32 1
%0 = load i8, ptr %pSrc.addr.022, align 1
%cmp1 = icmp sgt i8 %0, 0
%cmp5 = icmp eq i8 %0, -128
%sub = sub i8 0, %0
%cond = select i1 %cmp5, i8 127, i8 %sub
%cond11 = select i1 %cmp1, i8 %0, i8 %cond
%incdec.ptr13 = getelementptr inbounds i8, ptr %pDst.addr.020, i32 1
store i8 %cond11, ptr %pDst.addr.020, align 1
%dec = add i32 %blkCnt.021, -1
%cmp.not = icmp eq i32 %dec, 0
br i1 %cmp.not, label %while.end.loopexit, label %while.body
while.end.loopexit: ; preds = %while.body
br label %while.end
while.end: ; preds = %while.end.loopexit, %entry
ret void
}
define void @arm_abs_q15(ptr nocapture readonly %pSrc, ptr nocapture %pDst, i32 %blockSize) #0 {
; CHECK-LABEL: @arm_abs_q15(
; CHECK-NEXT: entry:
; CHECK-NEXT: [[PSRC2:%.*]] = ptrtoint ptr [[PSRC:%.*]] to i32
; CHECK-NEXT: [[PDST1:%.*]] = ptrtoint ptr [[PDST:%.*]] to i32
; CHECK-NEXT: [[CMP_NOT20:%.*]] = icmp eq i32 [[BLOCKSIZE:%.*]], 0
; CHECK-NEXT: br i1 [[CMP_NOT20]], label [[WHILE_END:%.*]], label [[WHILE_BODY_PREHEADER:%.*]]
; CHECK: while.body.preheader:
; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i32 [[BLOCKSIZE]], 8
; CHECK-NEXT: [[TMP0:%.*]] = sub i32 [[PDST1]], [[PSRC2]]
; CHECK-NEXT: [[DIFF_CHECK:%.*]] = icmp ult i32 [[TMP0]], 16
; CHECK-NEXT: [[OR_COND:%.*]] = select i1 [[MIN_ITERS_CHECK]], i1 true, i1 [[DIFF_CHECK]]
; CHECK-NEXT: br i1 [[OR_COND]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
; CHECK: vector.ph:
; CHECK-NEXT: [[N_VEC:%.*]] = and i32 [[BLOCKSIZE]], -8
; CHECK-NEXT: [[TMP1:%.*]] = shl i32 [[N_VEC]], 1
; CHECK-NEXT: [[IND_END:%.*]] = getelementptr i8, ptr [[PSRC]], i32 [[TMP1]]
; CHECK-NEXT: [[IND_END3:%.*]] = and i32 [[BLOCKSIZE]], 7
; CHECK-NEXT: [[TMP2:%.*]] = shl i32 [[N_VEC]], 1
; CHECK-NEXT: [[IND_END5:%.*]] = getelementptr i8, ptr [[PDST]], i32 [[TMP2]]
; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
; CHECK: vector.body:
; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
; CHECK-NEXT: [[TMP3:%.*]] = shl i32 [[INDEX]], 1
; CHECK-NEXT: [[NEXT_GEP:%.*]] = getelementptr i8, ptr [[PSRC]], i32 [[TMP3]]
; CHECK-NEXT: [[TMP4:%.*]] = shl i32 [[INDEX]], 1
; CHECK-NEXT: [[NEXT_GEP7:%.*]] = getelementptr i8, ptr [[PDST]], i32 [[TMP4]]
; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <8 x i16>, ptr [[NEXT_GEP]], align 2
; CHECK-NEXT: [[TMP5:%.*]] = icmp sgt <8 x i16> [[WIDE_LOAD]], zeroinitializer
; CHECK-NEXT: [[TMP6:%.*]] = icmp eq <8 x i16> [[WIDE_LOAD]], <i16 -32768, i16 -32768, i16 -32768, i16 -32768, i16 -32768, i16 -32768, i16 -32768, i16 -32768>
; CHECK-NEXT: [[TMP7:%.*]] = sub <8 x i16> zeroinitializer, [[WIDE_LOAD]]
; CHECK-NEXT: [[TMP8:%.*]] = select <8 x i1> [[TMP6]], <8 x i16> <i16 32767, i16 32767, i16 32767, i16 32767, i16 32767, i16 32767, i16 32767, i16 32767>, <8 x i16> [[TMP7]]
; CHECK-NEXT: [[TMP9:%.*]] = select <8 x i1> [[TMP5]], <8 x i16> [[WIDE_LOAD]], <8 x i16> [[TMP8]]
; CHECK-NEXT: store <8 x i16> [[TMP9]], ptr [[NEXT_GEP7]], align 2
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 8
; CHECK-NEXT: [[TMP10:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]]
; CHECK-NEXT: br i1 [[TMP10]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]]
; CHECK: middle.block:
; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i32 [[N_VEC]], [[BLOCKSIZE]]
; CHECK-NEXT: br i1 [[CMP_N]], label [[WHILE_END]], label [[SCALAR_PH]]
; CHECK: scalar.ph:
; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi ptr [ [[IND_END]], [[MIDDLE_BLOCK]] ], [ [[PSRC]], [[WHILE_BODY_PREHEADER]] ]
; CHECK-NEXT: [[BC_RESUME_VAL4:%.*]] = phi i32 [ [[IND_END3]], [[MIDDLE_BLOCK]] ], [ [[BLOCKSIZE]], [[WHILE_BODY_PREHEADER]] ]
; CHECK-NEXT: [[BC_RESUME_VAL6:%.*]] = phi ptr [ [[IND_END5]], [[MIDDLE_BLOCK]] ], [ [[PDST]], [[WHILE_BODY_PREHEADER]] ]
; CHECK-NEXT: br label [[WHILE_BODY:%.*]]
; CHECK: while.body:
; CHECK-NEXT: [[PSRC_ADDR_023:%.*]] = phi ptr [ [[INCDEC_PTR:%.*]], [[WHILE_BODY]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ]
; CHECK-NEXT: [[BLKCNT_022:%.*]] = phi i32 [ [[DEC:%.*]], [[WHILE_BODY]] ], [ [[BC_RESUME_VAL4]], [[SCALAR_PH]] ]
; CHECK-NEXT: [[PDST_ADDR_021:%.*]] = phi ptr [ [[INCDEC_PTR13:%.*]], [[WHILE_BODY]] ], [ [[BC_RESUME_VAL6]], [[SCALAR_PH]] ]
; CHECK-NEXT: [[INCDEC_PTR]] = getelementptr inbounds i8, ptr [[PSRC_ADDR_023]], i32 2
; CHECK-NEXT: [[TMP11:%.*]] = load i16, ptr [[PSRC_ADDR_023]], align 2
; CHECK-NEXT: [[CMP1:%.*]] = icmp sgt i16 [[TMP11]], 0
; CHECK-NEXT: [[CMP5:%.*]] = icmp eq i16 [[TMP11]], -32768
; CHECK-NEXT: [[SUB:%.*]] = sub i16 0, [[TMP11]]
; CHECK-NEXT: [[COND:%.*]] = select i1 [[CMP5]], i16 32767, i16 [[SUB]]
; CHECK-NEXT: [[COND11:%.*]] = select i1 [[CMP1]], i16 [[TMP11]], i16 [[COND]]
; CHECK-NEXT: [[INCDEC_PTR13]] = getelementptr inbounds i8, ptr [[PDST_ADDR_021]], i32 2
; CHECK-NEXT: store i16 [[COND11]], ptr [[PDST_ADDR_021]], align 2
; CHECK-NEXT: [[DEC]] = add i32 [[BLKCNT_022]], -1
; CHECK-NEXT: [[CMP_NOT:%.*]] = icmp eq i32 [[DEC]], 0
; CHECK-NEXT: br i1 [[CMP_NOT]], label [[WHILE_END]], label [[WHILE_BODY]], !llvm.loop [[LOOP5:![0-9]+]]
; CHECK: while.end:
; CHECK-NEXT: ret void
;
entry:
%cmp.not20 = icmp eq i32 %blockSize, 0
br i1 %cmp.not20, label %while.end, label %while.body.preheader
while.body.preheader: ; preds = %entry
br label %while.body
while.body: ; preds = %while.body.preheader, %while.body
%pSrc.addr.023 = phi ptr [ %incdec.ptr, %while.body ], [ %pSrc, %while.body.preheader ]
%blkCnt.022 = phi i32 [ %dec, %while.body ], [ %blockSize, %while.body.preheader ]
%pDst.addr.021 = phi ptr [ %incdec.ptr13, %while.body ], [ %pDst, %while.body.preheader ]
%incdec.ptr = getelementptr inbounds i16, ptr %pSrc.addr.023, i32 1
%0 = load i16, ptr %pSrc.addr.023, align 2
%cmp1 = icmp sgt i16 %0, 0
%cmp5 = icmp eq i16 %0, -32768
%sub = sub i16 0, %0
%cond = select i1 %cmp5, i16 32767, i16 %sub
%cond11 = select i1 %cmp1, i16 %0, i16 %cond
%incdec.ptr13 = getelementptr inbounds i16, ptr %pDst.addr.021, i32 1
store i16 %cond11, ptr %pDst.addr.021, align 2
%dec = add i32 %blkCnt.022, -1
%cmp.not = icmp eq i32 %dec, 0
br i1 %cmp.not, label %while.end.loopexit, label %while.body
while.end.loopexit: ; preds = %while.body
br label %while.end
while.end: ; preds = %while.end.loopexit, %entry
ret void
}
define void @arm_abs_q31(ptr nocapture readonly %pSrc, ptr nocapture %pDst, i32 %blockSize) #0 {
; CHECK-LABEL: @arm_abs_q31(
; CHECK-NEXT: entry:
; CHECK-NEXT: [[PSRC2:%.*]] = ptrtoint ptr [[PSRC:%.*]] to i32
; CHECK-NEXT: [[PDST1:%.*]] = ptrtoint ptr [[PDST:%.*]] to i32
; CHECK-NEXT: [[CMP_NOT14:%.*]] = icmp eq i32 [[BLOCKSIZE:%.*]], 0
; CHECK-NEXT: br i1 [[CMP_NOT14]], label [[WHILE_END:%.*]], label [[WHILE_BODY_PREHEADER:%.*]]
; CHECK: while.body.preheader:
; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i32 [[BLOCKSIZE]], 4
; CHECK-NEXT: [[TMP0:%.*]] = sub i32 [[PDST1]], [[PSRC2]]
; CHECK-NEXT: [[DIFF_CHECK:%.*]] = icmp ult i32 [[TMP0]], 16
; CHECK-NEXT: [[OR_COND:%.*]] = select i1 [[MIN_ITERS_CHECK]], i1 true, i1 [[DIFF_CHECK]]
; CHECK-NEXT: br i1 [[OR_COND]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
; CHECK: vector.ph:
; CHECK-NEXT: [[N_VEC:%.*]] = and i32 [[BLOCKSIZE]], -4
; CHECK-NEXT: [[TMP1:%.*]] = shl i32 [[N_VEC]], 2
; CHECK-NEXT: [[IND_END:%.*]] = getelementptr i8, ptr [[PSRC]], i32 [[TMP1]]
; CHECK-NEXT: [[IND_END3:%.*]] = and i32 [[BLOCKSIZE]], 3
; CHECK-NEXT: [[TMP2:%.*]] = shl i32 [[N_VEC]], 2
; CHECK-NEXT: [[IND_END5:%.*]] = getelementptr i8, ptr [[PDST]], i32 [[TMP2]]
; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
; CHECK: vector.body:
; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
; CHECK-NEXT: [[TMP3:%.*]] = shl i32 [[INDEX]], 2
; CHECK-NEXT: [[NEXT_GEP:%.*]] = getelementptr i8, ptr [[PSRC]], i32 [[TMP3]]
; CHECK-NEXT: [[TMP4:%.*]] = shl i32 [[INDEX]], 2
; CHECK-NEXT: [[NEXT_GEP7:%.*]] = getelementptr i8, ptr [[PDST]], i32 [[TMP4]]
; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i32>, ptr [[NEXT_GEP]], align 4
; CHECK-NEXT: [[TMP5:%.*]] = icmp sgt <4 x i32> [[WIDE_LOAD]], zeroinitializer
; CHECK-NEXT: [[TMP6:%.*]] = icmp eq <4 x i32> [[WIDE_LOAD]], <i32 -2147483648, i32 -2147483648, i32 -2147483648, i32 -2147483648>
; CHECK-NEXT: [[TMP7:%.*]] = sub nsw <4 x i32> zeroinitializer, [[WIDE_LOAD]]
; CHECK-NEXT: [[TMP8:%.*]] = select <4 x i1> [[TMP6]], <4 x i32> <i32 2147483647, i32 2147483647, i32 2147483647, i32 2147483647>, <4 x i32> [[TMP7]]
; CHECK-NEXT: [[TMP9:%.*]] = select <4 x i1> [[TMP5]], <4 x i32> [[WIDE_LOAD]], <4 x i32> [[TMP8]]
; CHECK-NEXT: store <4 x i32> [[TMP9]], ptr [[NEXT_GEP7]], align 4
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 4
; CHECK-NEXT: [[TMP10:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]]
; CHECK-NEXT: br i1 [[TMP10]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]]
; CHECK: middle.block:
; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i32 [[N_VEC]], [[BLOCKSIZE]]
; CHECK-NEXT: br i1 [[CMP_N]], label [[WHILE_END]], label [[SCALAR_PH]]
; CHECK: scalar.ph:
; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi ptr [ [[IND_END]], [[MIDDLE_BLOCK]] ], [ [[PSRC]], [[WHILE_BODY_PREHEADER]] ]
; CHECK-NEXT: [[BC_RESUME_VAL4:%.*]] = phi i32 [ [[IND_END3]], [[MIDDLE_BLOCK]] ], [ [[BLOCKSIZE]], [[WHILE_BODY_PREHEADER]] ]
; CHECK-NEXT: [[BC_RESUME_VAL6:%.*]] = phi ptr [ [[IND_END5]], [[MIDDLE_BLOCK]] ], [ [[PDST]], [[WHILE_BODY_PREHEADER]] ]
; CHECK-NEXT: br label [[WHILE_BODY:%.*]]
; CHECK: while.body:
; CHECK-NEXT: [[PSRC_ADDR_017:%.*]] = phi ptr [ [[INCDEC_PTR:%.*]], [[WHILE_BODY]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ]
; CHECK-NEXT: [[BLKCNT_016:%.*]] = phi i32 [ [[DEC:%.*]], [[WHILE_BODY]] ], [ [[BC_RESUME_VAL4]], [[SCALAR_PH]] ]
; CHECK-NEXT: [[PDST_ADDR_015:%.*]] = phi ptr [ [[INCDEC_PTR7:%.*]], [[WHILE_BODY]] ], [ [[BC_RESUME_VAL6]], [[SCALAR_PH]] ]
; CHECK-NEXT: [[INCDEC_PTR]] = getelementptr inbounds i8, ptr [[PSRC_ADDR_017]], i32 4
; CHECK-NEXT: [[TMP11:%.*]] = load i32, ptr [[PSRC_ADDR_017]], align 4
; CHECK-NEXT: [[CMP1:%.*]] = icmp sgt i32 [[TMP11]], 0
; CHECK-NEXT: [[CMP2:%.*]] = icmp eq i32 [[TMP11]], -2147483648
; CHECK-NEXT: [[SUB:%.*]] = sub nsw i32 0, [[TMP11]]
; CHECK-NEXT: [[COND:%.*]] = select i1 [[CMP2]], i32 2147483647, i32 [[SUB]]
; CHECK-NEXT: [[COND6:%.*]] = select i1 [[CMP1]], i32 [[TMP11]], i32 [[COND]]
; CHECK-NEXT: [[INCDEC_PTR7]] = getelementptr inbounds i8, ptr [[PDST_ADDR_015]], i32 4
; CHECK-NEXT: store i32 [[COND6]], ptr [[PDST_ADDR_015]], align 4
; CHECK-NEXT: [[DEC]] = add i32 [[BLKCNT_016]], -1
; CHECK-NEXT: [[CMP_NOT:%.*]] = icmp eq i32 [[DEC]], 0
; CHECK-NEXT: br i1 [[CMP_NOT]], label [[WHILE_END]], label [[WHILE_BODY]], !llvm.loop [[LOOP7:![0-9]+]]
; CHECK: while.end:
; CHECK-NEXT: ret void
;
entry:
%cmp.not14 = icmp eq i32 %blockSize, 0
br i1 %cmp.not14, label %while.end, label %while.body.preheader
while.body.preheader: ; preds = %entry
br label %while.body
while.body: ; preds = %while.body.preheader, %while.body
%pSrc.addr.017 = phi ptr [ %incdec.ptr, %while.body ], [ %pSrc, %while.body.preheader ]
%blkCnt.016 = phi i32 [ %dec, %while.body ], [ %blockSize, %while.body.preheader ]
%pDst.addr.015 = phi ptr [ %incdec.ptr7, %while.body ], [ %pDst, %while.body.preheader ]
%incdec.ptr = getelementptr inbounds i32, ptr %pSrc.addr.017, i32 1
%0 = load i32, ptr %pSrc.addr.017, align 4
%cmp1 = icmp sgt i32 %0, 0
%cmp2 = icmp eq i32 %0, -2147483648
%sub = sub nsw i32 0, %0
%cond = select i1 %cmp2, i32 2147483647, i32 %sub
%cond6 = select i1 %cmp1, i32 %0, i32 %cond
%incdec.ptr7 = getelementptr inbounds i32, ptr %pDst.addr.015, i32 1
store i32 %cond6, ptr %pDst.addr.015, align 4
%dec = add i32 %blkCnt.016, -1
%cmp.not = icmp eq i32 %dec, 0
br i1 %cmp.not, label %while.end.loopexit, label %while.body
while.end.loopexit: ; preds = %while.body
br label %while.end
while.end: ; preds = %while.end.loopexit, %entry
ret void
}
attributes #0 = { "target-features"="+mve" }