This patch implements the `getAddressComputationCost()` in RISCV TTI which make the gather/scatter with address calculation more expansive that stride cost. Note that the only user of `getAddressComputationCost()` with vector type is in `VPWidenMemoryRecipe::computeCost()`. So this patch make some LV tests changes. I've checked the tests changes in LV and seems those changes can be divided into two groups. * gather/scatter with uniform vector ptr, seems can be optimized to masked.load. * can optimize to stride load/store. ---- After #155739 landed, the assertion (cost mis-aligned) is fixed. I've tested llvm-test-suite w/ rva23u64 and rva23u64_zvl1024b locally and no assertion occurred.
65 lines
2.4 KiB
LLVM
65 lines
2.4 KiB
LLVM
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --check-globals none --version 5
|
|
; RUN: opt -p loop-vectorize -mtriple riscv64 -mattr=+v < %s -S | FileCheck %s
|
|
|
|
; Make sure we don't duplicate the safe divisor cost in the VPlan cost model.
|
|
|
|
define void @pr154103(ptr noalias %a, ptr noalias %b, ptr noalias %c, ptr noalias %d) {
|
|
; CHECK-LABEL: define void @pr154103(
|
|
; CHECK-SAME: ptr noalias [[A:%.*]], ptr noalias [[B:%.*]], ptr noalias [[C:%.*]], ptr noalias [[D:%.*]]) #[[ATTR0:[0-9]+]] {
|
|
; CHECK-NEXT: [[ENTRY:.*]]:
|
|
; CHECK-NEXT: br label %[[LOOP:.*]]
|
|
; CHECK: [[LOOP]]:
|
|
; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 1, %[[ENTRY]] ], [ [[IV_NEXT:%.*]], %[[LATCH:.*]] ]
|
|
; CHECK-NEXT: [[GEP:%.*]] = getelementptr i8, ptr [[A]], i64 [[IV]]
|
|
; CHECK-NEXT: [[X:%.*]] = load i8, ptr [[GEP]], align 1
|
|
; CHECK-NEXT: [[CONV:%.*]] = zext i8 [[X]] to i64
|
|
; CHECK-NEXT: [[DIV:%.*]] = sdiv i64 0, [[CONV]]
|
|
; CHECK-NEXT: [[CMP:%.*]] = icmp sgt i64 [[DIV]], 0
|
|
; CHECK-NEXT: br i1 [[CMP]], label %[[THEN:.*]], label %[[LATCH]]
|
|
; CHECK: [[THEN]]:
|
|
; CHECK-NEXT: [[Y:%.*]] = load i8, ptr [[B]], align 1
|
|
; CHECK-NEXT: [[ZEXT:%.*]] = zext i8 [[Y]] to i64
|
|
; CHECK-NEXT: [[NOT:%.*]] = xor i64 [[ZEXT]], 0
|
|
; CHECK-NEXT: br label %[[LATCH]]
|
|
; CHECK: [[LATCH]]:
|
|
; CHECK-NEXT: [[COND:%.*]] = phi i64 [ [[NOT]], %[[THEN]] ], [ 0, %[[LOOP]] ]
|
|
; CHECK-NEXT: [[TRUNC:%.*]] = trunc i64 [[COND]] to i16
|
|
; CHECK-NEXT: store i16 [[TRUNC]], ptr [[C]], align 2
|
|
; CHECK-NEXT: store i32 0, ptr [[D]], align 4
|
|
; CHECK-NEXT: [[IV_NEXT]] = add i64 [[IV]], 7
|
|
; CHECK-NEXT: [[DONE:%.*]] = icmp eq i64 [[IV]], 0
|
|
; CHECK-NEXT: br i1 [[DONE]], label %[[EXIT:.*]], label %[[LOOP]]
|
|
; CHECK: [[EXIT]]:
|
|
; CHECK-NEXT: ret void
|
|
;
|
|
entry:
|
|
br label %loop
|
|
|
|
loop:
|
|
%iv = phi i64 [ 1, %entry ], [ %iv.next, %latch ]
|
|
%gep = getelementptr i8, ptr %a, i64 %iv
|
|
%x = load i8, ptr %gep, align 1
|
|
%conv = zext i8 %x to i64
|
|
%div = sdiv i64 0, %conv
|
|
%cmp = icmp sgt i64 %div, 0
|
|
br i1 %cmp, label %then, label %latch
|
|
|
|
then:
|
|
%y = load i8, ptr %b
|
|
%zext = zext i8 %y to i64
|
|
%not = xor i64 %zext, 0
|
|
br label %latch
|
|
|
|
latch:
|
|
%cond = phi i64 [ %not, %then ], [ 0, %loop ]
|
|
%trunc = trunc i64 %cond to i16
|
|
store i16 %trunc, ptr %c
|
|
store i32 0, ptr %d
|
|
%iv.next = add i64 %iv, 7
|
|
%done = icmp eq i64 %iv, 0
|
|
br i1 %done, label %exit, label %loop
|
|
|
|
exit:
|
|
ret void
|
|
}
|