Philip Reames 2663d2cb9c
[RISCV] Adjust select shuffle cost to reflect mask creation cost (#77963)
This is inspired by
https://github.com/llvm/llvm-project/pull/77342#pullrequestreview-1814673242,
and is split off of same with some differences in style.

A select is a vmerge.vv with the additional cost of materializing the
bitmask vector in a vreg. All masks fit within a single vector register
(e8 + m8 is the worst case), and thus our worst case cost should be
roughly 3 (2 scalar to produce the address, one vector load op). Given
most shuffles are small, and the mask will be instead produced by
LUI/ADDI + vmv.s.x or ADDI + vmv.s.x, using 2 as the default seems quite
reasonable. At worst, we're not going to be off by much.

The prior lowering scaled the cost of the bitmask with LMUL, which I
don't understand. At m1 it did use the same base cost of 2. (@lukel97
You wrote the original code here, anything I'm missing here?)
2024-01-18 10:24:47 -08:00

63 lines
4.3 KiB
LLVM

; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py UTC_ARGS: --version 2
; RUN: opt < %s -passes="print<cost-model>" 2>&1 -disable-output -mtriple=riscv32 -mattr=+v | FileCheck %s -check-prefixes=CHECK,RV32
; RUN: opt < %s -passes="print<cost-model>" 2>&1 -disable-output -mtriple=riscv64 -mattr=+v | FileCheck %s -check-prefixes=CHECK,RV64
; RUN: opt < %s -passes="print<cost-model>" -cost-kind=code-size 2>&1 -disable-output -mtriple=riscv32 -mattr=+v | FileCheck %s -check-prefixes=CHECK-SIZE,RV32-SIZE
; RUN: opt < %s -passes="print<cost-model>" -cost-kind=code-size 2>&1 -disable-output -mtriple=riscv64 -mattr=+v | FileCheck %s -check-prefixes=CHECK-SIZE,RV64-SIZE
define <8 x i8> @select_start_v8i8(<8 x i8> %v, <8 x i8> %w) {
; CHECK-LABEL: 'select_start_v8i8'
; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %res = shufflevector <8 x i8> %v, <8 x i8> %w, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 4, i32 5, i32 6, i32 7>
; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i8> %res
;
; CHECK-SIZE-LABEL: 'select_start_v8i8'
; CHECK-SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %res = shufflevector <8 x i8> %v, <8 x i8> %w, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 4, i32 5, i32 6, i32 7>
; CHECK-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i8> %res
;
%res = shufflevector <8 x i8> %v, <8 x i8> %w, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 4, i32 5, i32 6, i32 7>
ret <8 x i8> %res
}
define <8 x i8> @select_non_contiguous_v8i8(<8 x i8> %v, <8 x i8> %w) {
; CHECK-LABEL: 'select_non_contiguous_v8i8'
; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %res = shufflevector <8 x i8> %v, <8 x i8> %w, <8 x i32> <i32 8, i32 1, i32 10, i32 3, i32 4, i32 13, i32 6, i32 15>
; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i8> %res
;
; CHECK-SIZE-LABEL: 'select_non_contiguous_v8i8'
; CHECK-SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %res = shufflevector <8 x i8> %v, <8 x i8> %w, <8 x i32> <i32 8, i32 1, i32 10, i32 3, i32 4, i32 13, i32 6, i32 15>
; CHECK-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i8> %res
;
%res = shufflevector <8 x i8> %v, <8 x i8> %w, <8 x i32> <i32 8, i32 1, i32 10, i32 3, i32 4, i32 13, i32 6, i32 15>
ret <8 x i8> %res
}
define <8 x i64> @select_start_v8i64(<8 x i64> %v, <8 x i64> %w) {
; CHECK-LABEL: 'select_start_v8i64'
; CHECK-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %res = shufflevector <8 x i64> %v, <8 x i64> %w, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 4, i32 5, i32 6, i32 7>
; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i64> %res
;
; CHECK-SIZE-LABEL: 'select_start_v8i64'
; CHECK-SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %res = shufflevector <8 x i64> %v, <8 x i64> %w, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 4, i32 5, i32 6, i32 7>
; CHECK-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i64> %res
;
%res = shufflevector <8 x i64> %v, <8 x i64> %w, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 4, i32 5, i32 6, i32 7>
ret <8 x i64> %res
}
define <8 x i64> @select_non_contiguous_v8i64(<8 x i64> %v, <8 x i64> %w) {
; CHECK-LABEL: 'select_non_contiguous_v8i64'
; CHECK-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %res = shufflevector <8 x i64> %v, <8 x i64> %w, <8 x i32> <i32 8, i32 1, i32 10, i32 3, i32 4, i32 13, i32 6, i32 15>
; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i64> %res
;
; CHECK-SIZE-LABEL: 'select_non_contiguous_v8i64'
; CHECK-SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %res = shufflevector <8 x i64> %v, <8 x i64> %w, <8 x i32> <i32 8, i32 1, i32 10, i32 3, i32 4, i32 13, i32 6, i32 15>
; CHECK-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i64> %res
;
%res = shufflevector <8 x i64> %v, <8 x i64> %w, <8 x i32> <i32 8, i32 1, i32 10, i32 3, i32 4, i32 13, i32 6, i32 15>
ret <8 x i64> %res
}
;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
; RV32: {{.*}}
; RV32-SIZE: {{.*}}
; RV64: {{.*}}
; RV64-SIZE: {{.*}}