Luke Lau 3a26feb607
[RISCV] Lower fixed-length mgather/mscatter for zvfhmin/zvfbfmin (#114945)
In preparation for allowing zvfhmin and zvfbfmin in
isLegalElementTypeForRVV, this lowers fixed-length masked gathers and
scatters

We need to mark f16 and bf16 as legal in isLegalMaskedGatherScatter
otherwise ScalarizeMaskedMemIntrin will just scalarize them, but we can
move this back into isLegalElementTypeForRVV afterwards.

The scalarized codegen required #114938, #114927 and #114915 to not
crash.
2024-11-06 10:33:06 +08:00

294 lines
42 KiB
LLVM

; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py
; RUN: opt -passes="print<cost-model>" 2>&1 -disable-output -mtriple=riscv64 -mattr=+v,+f,+d,+zfh,+zvfh < %s | FileCheck %s
; RUN: opt -passes="print<cost-model>" 2>&1 -disable-output -mtriple=riscv64 -mattr=+v,+f,+d,+zfh,+zvfhmin < %s | FileCheck %s
; RUN: opt -passes="print<cost-model>" 2>&1 -disable-output -mtriple=riscv64 -mattr=+v,+f,+d,+zfh,+zvfh -riscv-v-vector-bits-max=256 < %s | FileCheck %s --check-prefixes=CHECK,MAX256
; RUN: opt -passes="print<cost-model>" 2>&1 -disable-output -mtriple=riscv64 < %s | FileCheck %s --check-prefixes=CHECK,UNSUPPORTED
; RUN: opt -passes="print<cost-model>" 2>&1 -disable-output -mtriple=riscv64 -mattr=+zve32f,+zvl128b,+f,+d,+zfh,+zvfh < %s | FileCheck %s --check-prefixes=CHECK,UNSUPPORTED
define void @masked_scatter_aligned() {
; GENERIC-LABEL: 'masked_scatter_aligned'
; GENERIC-NEXT: Cost Model: Found an estimated cost of 16 for instruction: call void @llvm.masked.scatter.nxv8f64.nxv8p0(<vscale x 8 x double> undef, <vscale x 8 x ptr> undef, i32 8, <vscale x 8 x i1> undef)
; GENERIC-NEXT: Cost Model: Found an estimated cost of 8 for instruction: call void @llvm.masked.scatter.nxv4f64.nxv4p0(<vscale x 4 x double> undef, <vscale x 4 x ptr> undef, i32 8, <vscale x 4 x i1> undef)
; GENERIC-NEXT: Cost Model: Found an estimated cost of 4 for instruction: call void @llvm.masked.scatter.nxv2f64.nxv2p0(<vscale x 2 x double> undef, <vscale x 2 x ptr> undef, i32 8, <vscale x 2 x i1> undef)
; GENERIC-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.scatter.nxv1f64.nxv1p0(<vscale x 1 x double> undef, <vscale x 1 x ptr> undef, i32 8, <vscale x 1 x i1> undef)
; GENERIC-NEXT: Cost Model: Found an estimated cost of 32 for instruction: call void @llvm.masked.scatter.nxv16f32.nxv16p0(<vscale x 16 x float> undef, <vscale x 16 x ptr> undef, i32 4, <vscale x 16 x i1> undef)
; GENERIC-NEXT: Cost Model: Found an estimated cost of 16 for instruction: call void @llvm.masked.scatter.nxv8f32.nxv8p0(<vscale x 8 x float> undef, <vscale x 8 x ptr> undef, i32 4, <vscale x 8 x i1> undef)
; GENERIC-NEXT: Cost Model: Found an estimated cost of 8 for instruction: call void @llvm.masked.scatter.nxv4f32.nxv4p0(<vscale x 4 x float> undef, <vscale x 4 x ptr> undef, i32 4, <vscale x 4 x i1> undef)
; GENERIC-NEXT: Cost Model: Found an estimated cost of 4 for instruction: call void @llvm.masked.scatter.nxv2f32.nxv2p0(<vscale x 2 x float> undef, <vscale x 2 x ptr> undef, i32 4, <vscale x 2 x i1> undef)
; GENERIC-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.scatter.nxv1f32.nxv1p0(<vscale x 1 x float> undef, <vscale x 1 x ptr> undef, i32 4, <vscale x 1 x i1> undef)
; GENERIC-NEXT: Cost Model: Found an estimated cost of 16 for instruction: call void @llvm.masked.scatter.nxv8i64.nxv8p0(<vscale x 8 x i64> undef, <vscale x 8 x ptr> undef, i32 8, <vscale x 8 x i1> undef)
; GENERIC-NEXT: Cost Model: Found an estimated cost of 8 for instruction: call void @llvm.masked.scatter.nxv4i64.nxv4p0(<vscale x 4 x i64> undef, <vscale x 4 x ptr> undef, i32 8, <vscale x 4 x i1> undef)
; GENERIC-NEXT: Cost Model: Found an estimated cost of 4 for instruction: call void @llvm.masked.scatter.nxv2i64.nxv2p0(<vscale x 2 x i64> undef, <vscale x 2 x ptr> undef, i32 8, <vscale x 2 x i1> undef)
; GENERIC-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.scatter.nxv1i64.nxv1p0(<vscale x 1 x i64> undef, <vscale x 1 x ptr> undef, i32 8, <vscale x 1 x i1> undef)
; GENERIC-NEXT: Cost Model: Found an estimated cost of 32 for instruction: call void @llvm.masked.scatter.nxv16i32.nxv16p0(<vscale x 16 x i32> undef, <vscale x 16 x ptr> undef, i32 4, <vscale x 16 x i1> undef)
; GENERIC-NEXT: Cost Model: Found an estimated cost of 16 for instruction: call void @llvm.masked.scatter.nxv8i32.nxv8p0(<vscale x 8 x i32> undef, <vscale x 8 x ptr> undef, i32 4, <vscale x 8 x i1> undef)
; GENERIC-NEXT: Cost Model: Found an estimated cost of 8 for instruction: call void @llvm.masked.scatter.nxv4i32.nxv4p0(<vscale x 4 x i32> undef, <vscale x 4 x ptr> undef, i32 4, <vscale x 4 x i1> undef)
; GENERIC-NEXT: Cost Model: Found an estimated cost of 4 for instruction: call void @llvm.masked.scatter.nxv2i32.nxv2p0(<vscale x 2 x i32> undef, <vscale x 2 x ptr> undef, i32 4, <vscale x 2 x i1> undef)
; GENERIC-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.scatter.nxv1i32.nxv1p0(<vscale x 1 x i32> undef, <vscale x 1 x ptr> undef, i32 4, <vscale x 1 x i1> undef)
; GENERIC-NEXT: Cost Model: Found an estimated cost of 64 for instruction: call void @llvm.masked.scatter.nxv32i16.nxv32p0(<vscale x 32 x i16> undef, <vscale x 32 x ptr> undef, i32 2, <vscale x 32 x i1> undef)
; GENERIC-NEXT: Cost Model: Found an estimated cost of 32 for instruction: call void @llvm.masked.scatter.nxv16i16.nxv16p0(<vscale x 16 x i16> undef, <vscale x 16 x ptr> undef, i32 2, <vscale x 16 x i1> undef)
; GENERIC-NEXT: Cost Model: Found an estimated cost of 16 for instruction: call void @llvm.masked.scatter.nxv8i16.nxv8p0(<vscale x 8 x i16> undef, <vscale x 8 x ptr> undef, i32 2, <vscale x 8 x i1> undef)
; GENERIC-NEXT: Cost Model: Found an estimated cost of 8 for instruction: call void @llvm.masked.scatter.nxv4i16.nxv4p0(<vscale x 4 x i16> undef, <vscale x 4 x ptr> undef, i32 2, <vscale x 4 x i1> undef)
; GENERIC-NEXT: Cost Model: Found an estimated cost of 4 for instruction: call void @llvm.masked.scatter.nxv2i16.nxv2p0(<vscale x 2 x i16> undef, <vscale x 2 x ptr> undef, i32 2, <vscale x 2 x i1> undef)
; GENERIC-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.scatter.nxv1i16.nxv1p0(<vscale x 1 x i16> undef, <vscale x 1 x ptr> undef, i32 2, <vscale x 1 x i1> undef)
; GENERIC-NEXT: Cost Model: Found an estimated cost of 128 for instruction: call void @llvm.masked.scatter.nxv64i8.nxv64p0(<vscale x 64 x i8> undef, <vscale x 64 x ptr> undef, i32 1, <vscale x 64 x i1> undef)
; GENERIC-NEXT: Cost Model: Found an estimated cost of 64 for instruction: call void @llvm.masked.scatter.nxv32i8.nxv32p0(<vscale x 32 x i8> undef, <vscale x 32 x ptr> undef, i32 1, <vscale x 32 x i1> undef)
; GENERIC-NEXT: Cost Model: Found an estimated cost of 32 for instruction: call void @llvm.masked.scatter.nxv16i8.nxv16p0(<vscale x 16 x i8> undef, <vscale x 16 x ptr> undef, i32 1, <vscale x 16 x i1> undef)
; GENERIC-NEXT: Cost Model: Found an estimated cost of 16 for instruction: call void @llvm.masked.scatter.nxv8i8.nxv8p0(<vscale x 8 x i8> undef, <vscale x 8 x ptr> undef, i32 1, <vscale x 8 x i1> undef)
; GENERIC-NEXT: Cost Model: Found an estimated cost of 8 for instruction: call void @llvm.masked.scatter.nxv4i8.nxv4p0(<vscale x 4 x i8> undef, <vscale x 4 x ptr> undef, i32 1, <vscale x 4 x i1> undef)
; GENERIC-NEXT: Cost Model: Found an estimated cost of 4 for instruction: call void @llvm.masked.scatter.nxv2i8.nxv2p0(<vscale x 2 x i8> undef, <vscale x 2 x ptr> undef, i32 1, <vscale x 2 x i1> undef)
; GENERIC-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.scatter.nxv1i8.nxv1p0(<vscale x 1 x i8> undef, <vscale x 1 x ptr> undef, i32 1, <vscale x 1 x i1> undef)
; GENERIC-NEXT: Cost Model: Found an estimated cost of 16 for instruction: call void @llvm.masked.scatter.nxv8p0.nxv8p0(<vscale x 8 x ptr> undef, <vscale x 8 x ptr> undef, i32 8, <vscale x 8 x i1> undef)
; GENERIC-NEXT: Cost Model: Found an estimated cost of 8 for instruction: call void @llvm.masked.scatter.nxv4p0.nxv4p0(<vscale x 4 x ptr> undef, <vscale x 4 x ptr> undef, i32 8, <vscale x 4 x i1> undef)
; GENERIC-NEXT: Cost Model: Found an estimated cost of 4 for instruction: call void @llvm.masked.scatter.nxv2p0.nxv2p0(<vscale x 2 x ptr> undef, <vscale x 2 x ptr> undef, i32 8, <vscale x 2 x i1> undef)
; GENERIC-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.scatter.nxv1p0.nxv1p0(<vscale x 1 x ptr> undef, <vscale x 1 x ptr> undef, i32 8, <vscale x 1 x i1> undef)
; GENERIC-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
;
; MAX256-LABEL: 'masked_scatter_aligned'
; MAX256-NEXT: Cost Model: Found an estimated cost of 16 for instruction: call void @llvm.masked.scatter.nxv8f64.nxv8p0(<vscale x 8 x double> undef, <vscale x 8 x ptr> undef, i32 8, <vscale x 8 x i1> undef)
; MAX256-NEXT: Cost Model: Found an estimated cost of 8 for instruction: call void @llvm.masked.scatter.nxv4f64.nxv4p0(<vscale x 4 x double> undef, <vscale x 4 x ptr> undef, i32 8, <vscale x 4 x i1> undef)
; MAX256-NEXT: Cost Model: Found an estimated cost of 4 for instruction: call void @llvm.masked.scatter.nxv2f64.nxv2p0(<vscale x 2 x double> undef, <vscale x 2 x ptr> undef, i32 8, <vscale x 2 x i1> undef)
; MAX256-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.scatter.nxv1f64.nxv1p0(<vscale x 1 x double> undef, <vscale x 1 x ptr> undef, i32 8, <vscale x 1 x i1> undef)
; MAX256-NEXT: Cost Model: Found an estimated cost of 32 for instruction: call void @llvm.masked.scatter.nxv16f32.nxv16p0(<vscale x 16 x float> undef, <vscale x 16 x ptr> undef, i32 4, <vscale x 16 x i1> undef)
; MAX256-NEXT: Cost Model: Found an estimated cost of 16 for instruction: call void @llvm.masked.scatter.nxv8f32.nxv8p0(<vscale x 8 x float> undef, <vscale x 8 x ptr> undef, i32 4, <vscale x 8 x i1> undef)
; MAX256-NEXT: Cost Model: Found an estimated cost of 8 for instruction: call void @llvm.masked.scatter.nxv4f32.nxv4p0(<vscale x 4 x float> undef, <vscale x 4 x ptr> undef, i32 4, <vscale x 4 x i1> undef)
; MAX256-NEXT: Cost Model: Found an estimated cost of 4 for instruction: call void @llvm.masked.scatter.nxv2f32.nxv2p0(<vscale x 2 x float> undef, <vscale x 2 x ptr> undef, i32 4, <vscale x 2 x i1> undef)
; MAX256-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.scatter.nxv1f32.nxv1p0(<vscale x 1 x float> undef, <vscale x 1 x ptr> undef, i32 4, <vscale x 1 x i1> undef)
; MAX256-NEXT: Cost Model: Found an estimated cost of 16 for instruction: call void @llvm.masked.scatter.nxv8i64.nxv8p0(<vscale x 8 x i64> undef, <vscale x 8 x ptr> undef, i32 8, <vscale x 8 x i1> undef)
; MAX256-NEXT: Cost Model: Found an estimated cost of 8 for instruction: call void @llvm.masked.scatter.nxv4i64.nxv4p0(<vscale x 4 x i64> undef, <vscale x 4 x ptr> undef, i32 8, <vscale x 4 x i1> undef)
; MAX256-NEXT: Cost Model: Found an estimated cost of 4 for instruction: call void @llvm.masked.scatter.nxv2i64.nxv2p0(<vscale x 2 x i64> undef, <vscale x 2 x ptr> undef, i32 8, <vscale x 2 x i1> undef)
; MAX256-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.scatter.nxv1i64.nxv1p0(<vscale x 1 x i64> undef, <vscale x 1 x ptr> undef, i32 8, <vscale x 1 x i1> undef)
; MAX256-NEXT: Cost Model: Found an estimated cost of 32 for instruction: call void @llvm.masked.scatter.nxv16i32.nxv16p0(<vscale x 16 x i32> undef, <vscale x 16 x ptr> undef, i32 4, <vscale x 16 x i1> undef)
; MAX256-NEXT: Cost Model: Found an estimated cost of 16 for instruction: call void @llvm.masked.scatter.nxv8i32.nxv8p0(<vscale x 8 x i32> undef, <vscale x 8 x ptr> undef, i32 4, <vscale x 8 x i1> undef)
; MAX256-NEXT: Cost Model: Found an estimated cost of 8 for instruction: call void @llvm.masked.scatter.nxv4i32.nxv4p0(<vscale x 4 x i32> undef, <vscale x 4 x ptr> undef, i32 4, <vscale x 4 x i1> undef)
; MAX256-NEXT: Cost Model: Found an estimated cost of 4 for instruction: call void @llvm.masked.scatter.nxv2i32.nxv2p0(<vscale x 2 x i32> undef, <vscale x 2 x ptr> undef, i32 4, <vscale x 2 x i1> undef)
; MAX256-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.scatter.nxv1i32.nxv1p0(<vscale x 1 x i32> undef, <vscale x 1 x ptr> undef, i32 4, <vscale x 1 x i1> undef)
; MAX256-NEXT: Cost Model: Found an estimated cost of 64 for instruction: call void @llvm.masked.scatter.nxv32i16.nxv32p0(<vscale x 32 x i16> undef, <vscale x 32 x ptr> undef, i32 2, <vscale x 32 x i1> undef)
; MAX256-NEXT: Cost Model: Found an estimated cost of 32 for instruction: call void @llvm.masked.scatter.nxv16i16.nxv16p0(<vscale x 16 x i16> undef, <vscale x 16 x ptr> undef, i32 2, <vscale x 16 x i1> undef)
; MAX256-NEXT: Cost Model: Found an estimated cost of 16 for instruction: call void @llvm.masked.scatter.nxv8i16.nxv8p0(<vscale x 8 x i16> undef, <vscale x 8 x ptr> undef, i32 2, <vscale x 8 x i1> undef)
; MAX256-NEXT: Cost Model: Found an estimated cost of 8 for instruction: call void @llvm.masked.scatter.nxv4i16.nxv4p0(<vscale x 4 x i16> undef, <vscale x 4 x ptr> undef, i32 2, <vscale x 4 x i1> undef)
; MAX256-NEXT: Cost Model: Found an estimated cost of 4 for instruction: call void @llvm.masked.scatter.nxv2i16.nxv2p0(<vscale x 2 x i16> undef, <vscale x 2 x ptr> undef, i32 2, <vscale x 2 x i1> undef)
; MAX256-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.scatter.nxv1i16.nxv1p0(<vscale x 1 x i16> undef, <vscale x 1 x ptr> undef, i32 2, <vscale x 1 x i1> undef)
; MAX256-NEXT: Cost Model: Found an estimated cost of 128 for instruction: call void @llvm.masked.scatter.nxv64i8.nxv64p0(<vscale x 64 x i8> undef, <vscale x 64 x ptr> undef, i32 1, <vscale x 64 x i1> undef)
; MAX256-NEXT: Cost Model: Found an estimated cost of 64 for instruction: call void @llvm.masked.scatter.nxv32i8.nxv32p0(<vscale x 32 x i8> undef, <vscale x 32 x ptr> undef, i32 1, <vscale x 32 x i1> undef)
; MAX256-NEXT: Cost Model: Found an estimated cost of 32 for instruction: call void @llvm.masked.scatter.nxv16i8.nxv16p0(<vscale x 16 x i8> undef, <vscale x 16 x ptr> undef, i32 1, <vscale x 16 x i1> undef)
; MAX256-NEXT: Cost Model: Found an estimated cost of 16 for instruction: call void @llvm.masked.scatter.nxv8i8.nxv8p0(<vscale x 8 x i8> undef, <vscale x 8 x ptr> undef, i32 1, <vscale x 8 x i1> undef)
; MAX256-NEXT: Cost Model: Found an estimated cost of 8 for instruction: call void @llvm.masked.scatter.nxv4i8.nxv4p0(<vscale x 4 x i8> undef, <vscale x 4 x ptr> undef, i32 1, <vscale x 4 x i1> undef)
; MAX256-NEXT: Cost Model: Found an estimated cost of 4 for instruction: call void @llvm.masked.scatter.nxv2i8.nxv2p0(<vscale x 2 x i8> undef, <vscale x 2 x ptr> undef, i32 1, <vscale x 2 x i1> undef)
; MAX256-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.scatter.nxv1i8.nxv1p0(<vscale x 1 x i8> undef, <vscale x 1 x ptr> undef, i32 1, <vscale x 1 x i1> undef)
; MAX256-NEXT: Cost Model: Found an estimated cost of 16 for instruction: call void @llvm.masked.scatter.nxv8p0.nxv8p0(<vscale x 8 x ptr> undef, <vscale x 8 x ptr> undef, i32 8, <vscale x 8 x i1> undef)
; MAX256-NEXT: Cost Model: Found an estimated cost of 8 for instruction: call void @llvm.masked.scatter.nxv4p0.nxv4p0(<vscale x 4 x ptr> undef, <vscale x 4 x ptr> undef, i32 8, <vscale x 4 x i1> undef)
; MAX256-NEXT: Cost Model: Found an estimated cost of 4 for instruction: call void @llvm.masked.scatter.nxv2p0.nxv2p0(<vscale x 2 x ptr> undef, <vscale x 2 x ptr> undef, i32 8, <vscale x 2 x i1> undef)
; MAX256-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.scatter.nxv1p0.nxv1p0(<vscale x 1 x ptr> undef, <vscale x 1 x ptr> undef, i32 8, <vscale x 1 x i1> undef)
; MAX256-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
;
; UNSUPPORTED-LABEL: 'masked_scatter_aligned'
; UNSUPPORTED-NEXT: Cost Model: Invalid cost for instruction: call void @llvm.masked.scatter.nxv8f64.nxv8p0(<vscale x 8 x double> undef, <vscale x 8 x ptr> undef, i32 8, <vscale x 8 x i1> undef)
; UNSUPPORTED-NEXT: Cost Model: Invalid cost for instruction: call void @llvm.masked.scatter.nxv4f64.nxv4p0(<vscale x 4 x double> undef, <vscale x 4 x ptr> undef, i32 8, <vscale x 4 x i1> undef)
; UNSUPPORTED-NEXT: Cost Model: Invalid cost for instruction: call void @llvm.masked.scatter.nxv2f64.nxv2p0(<vscale x 2 x double> undef, <vscale x 2 x ptr> undef, i32 8, <vscale x 2 x i1> undef)
; UNSUPPORTED-NEXT: Cost Model: Invalid cost for instruction: call void @llvm.masked.scatter.nxv1f64.nxv1p0(<vscale x 1 x double> undef, <vscale x 1 x ptr> undef, i32 8, <vscale x 1 x i1> undef)
; UNSUPPORTED-NEXT: Cost Model: Invalid cost for instruction: call void @llvm.masked.scatter.nxv16f32.nxv16p0(<vscale x 16 x float> undef, <vscale x 16 x ptr> undef, i32 4, <vscale x 16 x i1> undef)
; UNSUPPORTED-NEXT: Cost Model: Invalid cost for instruction: call void @llvm.masked.scatter.nxv8f32.nxv8p0(<vscale x 8 x float> undef, <vscale x 8 x ptr> undef, i32 4, <vscale x 8 x i1> undef)
; UNSUPPORTED-NEXT: Cost Model: Invalid cost for instruction: call void @llvm.masked.scatter.nxv4f32.nxv4p0(<vscale x 4 x float> undef, <vscale x 4 x ptr> undef, i32 4, <vscale x 4 x i1> undef)
; UNSUPPORTED-NEXT: Cost Model: Invalid cost for instruction: call void @llvm.masked.scatter.nxv2f32.nxv2p0(<vscale x 2 x float> undef, <vscale x 2 x ptr> undef, i32 4, <vscale x 2 x i1> undef)
; UNSUPPORTED-NEXT: Cost Model: Invalid cost for instruction: call void @llvm.masked.scatter.nxv1f32.nxv1p0(<vscale x 1 x float> undef, <vscale x 1 x ptr> undef, i32 4, <vscale x 1 x i1> undef)
; UNSUPPORTED-NEXT: Cost Model: Invalid cost for instruction: call void @llvm.masked.scatter.nxv8i64.nxv8p0(<vscale x 8 x i64> undef, <vscale x 8 x ptr> undef, i32 8, <vscale x 8 x i1> undef)
; UNSUPPORTED-NEXT: Cost Model: Invalid cost for instruction: call void @llvm.masked.scatter.nxv4i64.nxv4p0(<vscale x 4 x i64> undef, <vscale x 4 x ptr> undef, i32 8, <vscale x 4 x i1> undef)
; UNSUPPORTED-NEXT: Cost Model: Invalid cost for instruction: call void @llvm.masked.scatter.nxv2i64.nxv2p0(<vscale x 2 x i64> undef, <vscale x 2 x ptr> undef, i32 8, <vscale x 2 x i1> undef)
; UNSUPPORTED-NEXT: Cost Model: Invalid cost for instruction: call void @llvm.masked.scatter.nxv1i64.nxv1p0(<vscale x 1 x i64> undef, <vscale x 1 x ptr> undef, i32 8, <vscale x 1 x i1> undef)
; UNSUPPORTED-NEXT: Cost Model: Invalid cost for instruction: call void @llvm.masked.scatter.nxv16i32.nxv16p0(<vscale x 16 x i32> undef, <vscale x 16 x ptr> undef, i32 4, <vscale x 16 x i1> undef)
; UNSUPPORTED-NEXT: Cost Model: Invalid cost for instruction: call void @llvm.masked.scatter.nxv8i32.nxv8p0(<vscale x 8 x i32> undef, <vscale x 8 x ptr> undef, i32 4, <vscale x 8 x i1> undef)
; UNSUPPORTED-NEXT: Cost Model: Invalid cost for instruction: call void @llvm.masked.scatter.nxv4i32.nxv4p0(<vscale x 4 x i32> undef, <vscale x 4 x ptr> undef, i32 4, <vscale x 4 x i1> undef)
; UNSUPPORTED-NEXT: Cost Model: Invalid cost for instruction: call void @llvm.masked.scatter.nxv2i32.nxv2p0(<vscale x 2 x i32> undef, <vscale x 2 x ptr> undef, i32 4, <vscale x 2 x i1> undef)
; UNSUPPORTED-NEXT: Cost Model: Invalid cost for instruction: call void @llvm.masked.scatter.nxv1i32.nxv1p0(<vscale x 1 x i32> undef, <vscale x 1 x ptr> undef, i32 4, <vscale x 1 x i1> undef)
; UNSUPPORTED-NEXT: Cost Model: Invalid cost for instruction: call void @llvm.masked.scatter.nxv32i16.nxv32p0(<vscale x 32 x i16> undef, <vscale x 32 x ptr> undef, i32 2, <vscale x 32 x i1> undef)
; UNSUPPORTED-NEXT: Cost Model: Invalid cost for instruction: call void @llvm.masked.scatter.nxv16i16.nxv16p0(<vscale x 16 x i16> undef, <vscale x 16 x ptr> undef, i32 2, <vscale x 16 x i1> undef)
; UNSUPPORTED-NEXT: Cost Model: Invalid cost for instruction: call void @llvm.masked.scatter.nxv8i16.nxv8p0(<vscale x 8 x i16> undef, <vscale x 8 x ptr> undef, i32 2, <vscale x 8 x i1> undef)
; UNSUPPORTED-NEXT: Cost Model: Invalid cost for instruction: call void @llvm.masked.scatter.nxv4i16.nxv4p0(<vscale x 4 x i16> undef, <vscale x 4 x ptr> undef, i32 2, <vscale x 4 x i1> undef)
; UNSUPPORTED-NEXT: Cost Model: Invalid cost for instruction: call void @llvm.masked.scatter.nxv2i16.nxv2p0(<vscale x 2 x i16> undef, <vscale x 2 x ptr> undef, i32 2, <vscale x 2 x i1> undef)
; UNSUPPORTED-NEXT: Cost Model: Invalid cost for instruction: call void @llvm.masked.scatter.nxv1i16.nxv1p0(<vscale x 1 x i16> undef, <vscale x 1 x ptr> undef, i32 2, <vscale x 1 x i1> undef)
; UNSUPPORTED-NEXT: Cost Model: Invalid cost for instruction: call void @llvm.masked.scatter.nxv64i8.nxv64p0(<vscale x 64 x i8> undef, <vscale x 64 x ptr> undef, i32 1, <vscale x 64 x i1> undef)
; UNSUPPORTED-NEXT: Cost Model: Invalid cost for instruction: call void @llvm.masked.scatter.nxv32i8.nxv32p0(<vscale x 32 x i8> undef, <vscale x 32 x ptr> undef, i32 1, <vscale x 32 x i1> undef)
; UNSUPPORTED-NEXT: Cost Model: Invalid cost for instruction: call void @llvm.masked.scatter.nxv16i8.nxv16p0(<vscale x 16 x i8> undef, <vscale x 16 x ptr> undef, i32 1, <vscale x 16 x i1> undef)
; UNSUPPORTED-NEXT: Cost Model: Invalid cost for instruction: call void @llvm.masked.scatter.nxv8i8.nxv8p0(<vscale x 8 x i8> undef, <vscale x 8 x ptr> undef, i32 1, <vscale x 8 x i1> undef)
; UNSUPPORTED-NEXT: Cost Model: Invalid cost for instruction: call void @llvm.masked.scatter.nxv4i8.nxv4p0(<vscale x 4 x i8> undef, <vscale x 4 x ptr> undef, i32 1, <vscale x 4 x i1> undef)
; UNSUPPORTED-NEXT: Cost Model: Invalid cost for instruction: call void @llvm.masked.scatter.nxv2i8.nxv2p0(<vscale x 2 x i8> undef, <vscale x 2 x ptr> undef, i32 1, <vscale x 2 x i1> undef)
; UNSUPPORTED-NEXT: Cost Model: Invalid cost for instruction: call void @llvm.masked.scatter.nxv1i8.nxv1p0(<vscale x 1 x i8> undef, <vscale x 1 x ptr> undef, i32 1, <vscale x 1 x i1> undef)
; UNSUPPORTED-NEXT: Cost Model: Invalid cost for instruction: call void @llvm.masked.scatter.nxv8p0.nxv8p0(<vscale x 8 x ptr> undef, <vscale x 8 x ptr> undef, i32 8, <vscale x 8 x i1> undef)
; UNSUPPORTED-NEXT: Cost Model: Invalid cost for instruction: call void @llvm.masked.scatter.nxv4p0.nxv4p0(<vscale x 4 x ptr> undef, <vscale x 4 x ptr> undef, i32 8, <vscale x 4 x i1> undef)
; UNSUPPORTED-NEXT: Cost Model: Invalid cost for instruction: call void @llvm.masked.scatter.nxv2p0.nxv2p0(<vscale x 2 x ptr> undef, <vscale x 2 x ptr> undef, i32 8, <vscale x 2 x i1> undef)
; UNSUPPORTED-NEXT: Cost Model: Invalid cost for instruction: call void @llvm.masked.scatter.nxv1p0.nxv1p0(<vscale x 1 x ptr> undef, <vscale x 1 x ptr> undef, i32 8, <vscale x 1 x i1> undef)
; UNSUPPORTED-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
;
call void @llvm.masked.scatter.nxv8f64.nxv8p0(<vscale x 8 x double> undef, <vscale x 8 x ptr> undef, i32 8, <vscale x 8 x i1> undef)
call void @llvm.masked.scatter.nxv4f64.nxv4p0(<vscale x 4 x double> undef, <vscale x 4 x ptr> undef, i32 8, <vscale x 4 x i1> undef)
call void @llvm.masked.scatter.nxv2f64.nxv2p0(<vscale x 2 x double> undef, <vscale x 2 x ptr> undef, i32 8, <vscale x 2 x i1> undef)
call void @llvm.masked.scatter.nxv1f64.nxv1p0(<vscale x 1 x double> undef, <vscale x 1 x ptr> undef, i32 8, <vscale x 1 x i1> undef)
call void @llvm.masked.scatter.nxv16f32.nxv16p0(<vscale x 16 x float> undef, <vscale x 16 x ptr> undef, i32 4, <vscale x 16 x i1> undef)
call void @llvm.masked.scatter.nxv8f32.nxv8p0(<vscale x 8 x float> undef, <vscale x 8 x ptr> undef, i32 4, <vscale x 8 x i1> undef)
call void @llvm.masked.scatter.nxv4f32.nxv4p0(<vscale x 4 x float> undef, <vscale x 4 x ptr> undef, i32 4, <vscale x 4 x i1> undef)
call void @llvm.masked.scatter.nxv2f32.nxv2p0(<vscale x 2 x float> undef, <vscale x 2 x ptr> undef, i32 4, <vscale x 2 x i1> undef)
call void @llvm.masked.scatter.nxv1f32.nxv1p0(<vscale x 1 x float> undef, <vscale x 1 x ptr> undef, i32 4, <vscale x 1 x i1> undef)
call void @llvm.masked.scatter.nxv8i64.nxv8p0(<vscale x 8 x i64> undef, <vscale x 8 x ptr> undef, i32 8, <vscale x 8 x i1> undef)
call void @llvm.masked.scatter.nxv4i64.nxv4p0(<vscale x 4 x i64> undef, <vscale x 4 x ptr> undef, i32 8, <vscale x 4 x i1> undef)
call void @llvm.masked.scatter.nxv2i64.nxv2p0(<vscale x 2 x i64> undef, <vscale x 2 x ptr> undef, i32 8, <vscale x 2 x i1> undef)
call void @llvm.masked.scatter.nxv1i64.nxv1p0(<vscale x 1 x i64> undef, <vscale x 1 x ptr> undef, i32 8, <vscale x 1 x i1> undef)
call void @llvm.masked.scatter.nxv16i32.nxv16p0(<vscale x 16 x i32> undef, <vscale x 16 x ptr> undef, i32 4, <vscale x 16 x i1> undef)
call void @llvm.masked.scatter.nxv8i32.nxv8p0(<vscale x 8 x i32> undef, <vscale x 8 x ptr> undef, i32 4, <vscale x 8 x i1> undef)
call void @llvm.masked.scatter.nxv4i32.nxv4p0(<vscale x 4 x i32> undef, <vscale x 4 x ptr> undef, i32 4, <vscale x 4 x i1> undef)
call void @llvm.masked.scatter.nxv2i32.nxv2p0(<vscale x 2 x i32> undef, <vscale x 2 x ptr> undef, i32 4, <vscale x 2 x i1> undef)
call void @llvm.masked.scatter.nxv1i32.nxv1p0(<vscale x 1 x i32> undef, <vscale x 1 x ptr> undef, i32 4, <vscale x 1 x i1> undef)
call void @llvm.masked.scatter.nxv32i16.nxv32p0(<vscale x 32 x i16> undef, <vscale x 32 x ptr> undef, i32 2, <vscale x 32 x i1> undef)
call void @llvm.masked.scatter.nxv16i16.nxv16p0(<vscale x 16 x i16> undef, <vscale x 16 x ptr> undef, i32 2, <vscale x 16 x i1> undef)
call void @llvm.masked.scatter.nxv8i16.nxv8p0(<vscale x 8 x i16> undef, <vscale x 8 x ptr> undef, i32 2, <vscale x 8 x i1> undef)
call void @llvm.masked.scatter.nxv4i16.nxv4p0(<vscale x 4 x i16> undef, <vscale x 4 x ptr> undef, i32 2, <vscale x 4 x i1> undef)
call void @llvm.masked.scatter.nxv2i16.nxv2p0(<vscale x 2 x i16> undef, <vscale x 2 x ptr> undef, i32 2, <vscale x 2 x i1> undef)
call void @llvm.masked.scatter.nxv1i16.nxv1p0(<vscale x 1 x i16> undef, <vscale x 1 x ptr> undef, i32 2, <vscale x 1 x i1> undef)
call void @llvm.masked.scatter.nxv64i8.nxv64p0(<vscale x 64 x i8> undef, <vscale x 64 x ptr> undef, i32 1, <vscale x 64 x i1> undef)
call void @llvm.masked.scatter.nxv32i8.nxv32p0(<vscale x 32 x i8> undef, <vscale x 32 x ptr> undef, i32 1, <vscale x 32 x i1> undef)
call void @llvm.masked.scatter.nxv16i8.nxv16p0(<vscale x 16 x i8> undef, <vscale x 16 x ptr> undef, i32 1, <vscale x 16 x i1> undef)
call void @llvm.masked.scatter.nxv8i8.nxv8p0(<vscale x 8 x i8> undef, <vscale x 8 x ptr> undef, i32 1, <vscale x 8 x i1> undef)
call void @llvm.masked.scatter.nxv4i8.nxv4p0(<vscale x 4 x i8> undef, <vscale x 4 x ptr> undef, i32 1, <vscale x 4 x i1> undef)
call void @llvm.masked.scatter.nxv2i8.nxv2p0(<vscale x 2 x i8> undef, <vscale x 2 x ptr> undef, i32 1, <vscale x 2 x i1> undef)
call void @llvm.masked.scatter.nxv1i8.nxv1p0(<vscale x 1 x i8> undef, <vscale x 1 x ptr> undef, i32 1, <vscale x 1 x i1> undef)
call void @llvm.masked.scatter.nxv8p0.nxv8p0(<vscale x 8 x ptr> undef, <vscale x 8 x ptr> undef, i32 8, <vscale x 8 x i1> undef)
call void @llvm.masked.scatter.nxv4p0.nxv4p0(<vscale x 4 x ptr> undef, <vscale x 4 x ptr> undef, i32 8, <vscale x 4 x i1> undef)
call void @llvm.masked.scatter.nxv2p0.nxv2p0(<vscale x 2 x ptr> undef, <vscale x 2 x ptr> undef, i32 8, <vscale x 2 x i1> undef)
call void @llvm.masked.scatter.nxv1p0.nxv1p0(<vscale x 1 x ptr> undef, <vscale x 1 x ptr> undef, i32 8, <vscale x 1 x i1> undef)
ret void
}
define void @masked_scatter_aligned_f16() {
; GENERIC-LABEL: 'masked_scatter_aligned_f16'
; GENERIC-NEXT: Cost Model: Found an estimated cost of 64 for instruction: call void @llvm.masked.scatter.nxv32f16.nxv32p0(<vscale x 32 x half> undef, <vscale x 32 x ptr> undef, i32 2, <vscale x 32 x i1> undef)
; GENERIC-NEXT: Cost Model: Found an estimated cost of 32 for instruction: call void @llvm.masked.scatter.nxv16f16.nxv16p0(<vscale x 16 x half> undef, <vscale x 16 x ptr> undef, i32 2, <vscale x 16 x i1> undef)
; GENERIC-NEXT: Cost Model: Found an estimated cost of 16 for instruction: call void @llvm.masked.scatter.nxv8f16.nxv8p0(<vscale x 8 x half> undef, <vscale x 8 x ptr> undef, i32 2, <vscale x 8 x i1> undef)
; GENERIC-NEXT: Cost Model: Found an estimated cost of 8 for instruction: call void @llvm.masked.scatter.nxv4f16.nxv4p0(<vscale x 4 x half> undef, <vscale x 4 x ptr> undef, i32 2, <vscale x 4 x i1> undef)
; GENERIC-NEXT: Cost Model: Found an estimated cost of 4 for instruction: call void @llvm.masked.scatter.nxv2f16.nxv2p0(<vscale x 2 x half> undef, <vscale x 2 x ptr> undef, i32 2, <vscale x 2 x i1> undef)
; GENERIC-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.scatter.nxv1f16.nxv1p0(<vscale x 1 x half> undef, <vscale x 1 x ptr> undef, i32 2, <vscale x 1 x i1> undef)
; GENERIC-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
;
; MAX256-LABEL: 'masked_scatter_aligned_f16'
; MAX256-NEXT: Cost Model: Found an estimated cost of 64 for instruction: call void @llvm.masked.scatter.nxv32f16.nxv32p0(<vscale x 32 x half> undef, <vscale x 32 x ptr> undef, i32 2, <vscale x 32 x i1> undef)
; MAX256-NEXT: Cost Model: Found an estimated cost of 32 for instruction: call void @llvm.masked.scatter.nxv16f16.nxv16p0(<vscale x 16 x half> undef, <vscale x 16 x ptr> undef, i32 2, <vscale x 16 x i1> undef)
; MAX256-NEXT: Cost Model: Found an estimated cost of 16 for instruction: call void @llvm.masked.scatter.nxv8f16.nxv8p0(<vscale x 8 x half> undef, <vscale x 8 x ptr> undef, i32 2, <vscale x 8 x i1> undef)
; MAX256-NEXT: Cost Model: Found an estimated cost of 8 for instruction: call void @llvm.masked.scatter.nxv4f16.nxv4p0(<vscale x 4 x half> undef, <vscale x 4 x ptr> undef, i32 2, <vscale x 4 x i1> undef)
; MAX256-NEXT: Cost Model: Found an estimated cost of 4 for instruction: call void @llvm.masked.scatter.nxv2f16.nxv2p0(<vscale x 2 x half> undef, <vscale x 2 x ptr> undef, i32 2, <vscale x 2 x i1> undef)
; MAX256-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.scatter.nxv1f16.nxv1p0(<vscale x 1 x half> undef, <vscale x 1 x ptr> undef, i32 2, <vscale x 1 x i1> undef)
; MAX256-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
;
; UNSUPPORTED-LABEL: 'masked_scatter_aligned_f16'
; UNSUPPORTED-NEXT: Cost Model: Invalid cost for instruction: call void @llvm.masked.scatter.nxv32f16.nxv32p0(<vscale x 32 x half> undef, <vscale x 32 x ptr> undef, i32 2, <vscale x 32 x i1> undef)
; UNSUPPORTED-NEXT: Cost Model: Invalid cost for instruction: call void @llvm.masked.scatter.nxv16f16.nxv16p0(<vscale x 16 x half> undef, <vscale x 16 x ptr> undef, i32 2, <vscale x 16 x i1> undef)
; UNSUPPORTED-NEXT: Cost Model: Invalid cost for instruction: call void @llvm.masked.scatter.nxv8f16.nxv8p0(<vscale x 8 x half> undef, <vscale x 8 x ptr> undef, i32 2, <vscale x 8 x i1> undef)
; UNSUPPORTED-NEXT: Cost Model: Invalid cost for instruction: call void @llvm.masked.scatter.nxv4f16.nxv4p0(<vscale x 4 x half> undef, <vscale x 4 x ptr> undef, i32 2, <vscale x 4 x i1> undef)
; UNSUPPORTED-NEXT: Cost Model: Invalid cost for instruction: call void @llvm.masked.scatter.nxv2f16.nxv2p0(<vscale x 2 x half> undef, <vscale x 2 x ptr> undef, i32 2, <vscale x 2 x i1> undef)
; UNSUPPORTED-NEXT: Cost Model: Invalid cost for instruction: call void @llvm.masked.scatter.nxv1f16.nxv1p0(<vscale x 1 x half> undef, <vscale x 1 x ptr> undef, i32 2, <vscale x 1 x i1> undef)
; UNSUPPORTED-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
;
call void @llvm.masked.scatter.nxv32f16.nxv32p0(<vscale x 32 x half> undef, <vscale x 32 x ptr> undef, i32 2, <vscale x 32 x i1> undef)
call void @llvm.masked.scatter.nxv16f16.nxv16p0(<vscale x 16 x half> undef, <vscale x 16 x ptr> undef, i32 2, <vscale x 16 x i1> undef)
call void @llvm.masked.scatter.nxv8f16.nxv8p0(<vscale x 8 x half> undef, <vscale x 8 x ptr> undef, i32 2, <vscale x 8 x i1> undef)
call void @llvm.masked.scatter.nxv4f16.nxv4p0(<vscale x 4 x half> undef, <vscale x 4 x ptr> undef, i32 2, <vscale x 4 x i1> undef)
call void @llvm.masked.scatter.nxv2f16.nxv2p0(<vscale x 2 x half> undef, <vscale x 2 x ptr> undef, i32 2, <vscale x 2 x i1> undef)
call void @llvm.masked.scatter.nxv1f16.nxv1p0(<vscale x 1 x half> undef, <vscale x 1 x ptr> undef, i32 2, <vscale x 1 x i1> undef)
ret void
}
define void @masked_scatter_unaligned() {
; CHECK-LABEL: 'masked_scatter_unaligned'
; CHECK-NEXT: Cost Model: Invalid cost for instruction: call void @llvm.masked.scatter.nxv8f64.nxv8p0(<vscale x 8 x double> undef, <vscale x 8 x ptr> undef, i32 2, <vscale x 8 x i1> undef)
; CHECK-NEXT: Cost Model: Invalid cost for instruction: call void @llvm.masked.scatter.nxv4f64.nxv4p0(<vscale x 4 x double> undef, <vscale x 4 x ptr> undef, i32 2, <vscale x 4 x i1> undef)
; CHECK-NEXT: Cost Model: Invalid cost for instruction: call void @llvm.masked.scatter.nxv2f64.nxv2p0(<vscale x 2 x double> undef, <vscale x 2 x ptr> undef, i32 2, <vscale x 2 x i1> undef)
; CHECK-NEXT: Cost Model: Invalid cost for instruction: call void @llvm.masked.scatter.nxv1f64.nxv1p0(<vscale x 1 x double> undef, <vscale x 1 x ptr> undef, i32 2, <vscale x 1 x i1> undef)
; CHECK-NEXT: Cost Model: Invalid cost for instruction: call void @llvm.masked.scatter.nxv16f32.nxv16p0(<vscale x 16 x float> undef, <vscale x 16 x ptr> undef, i32 2, <vscale x 16 x i1> undef)
; CHECK-NEXT: Cost Model: Invalid cost for instruction: call void @llvm.masked.scatter.nxv8f32.nxv8p0(<vscale x 8 x float> undef, <vscale x 8 x ptr> undef, i32 2, <vscale x 8 x i1> undef)
; CHECK-NEXT: Cost Model: Invalid cost for instruction: call void @llvm.masked.scatter.nxv4f32.nxv4p0(<vscale x 4 x float> undef, <vscale x 4 x ptr> undef, i32 2, <vscale x 4 x i1> undef)
; CHECK-NEXT: Cost Model: Invalid cost for instruction: call void @llvm.masked.scatter.nxv2f32.nxv2p0(<vscale x 2 x float> undef, <vscale x 2 x ptr> undef, i32 2, <vscale x 2 x i1> undef)
; CHECK-NEXT: Cost Model: Invalid cost for instruction: call void @llvm.masked.scatter.nxv1f32.nxv1p0(<vscale x 1 x float> undef, <vscale x 1 x ptr> undef, i32 2, <vscale x 1 x i1> undef)
; CHECK-NEXT: Cost Model: Invalid cost for instruction: call void @llvm.masked.scatter.nxv8i64.nxv8p0(<vscale x 8 x i64> undef, <vscale x 8 x ptr> undef, i32 1, <vscale x 8 x i1> undef)
; CHECK-NEXT: Cost Model: Invalid cost for instruction: call void @llvm.masked.scatter.nxv4i64.nxv4p0(<vscale x 4 x i64> undef, <vscale x 4 x ptr> undef, i32 1, <vscale x 4 x i1> undef)
; CHECK-NEXT: Cost Model: Invalid cost for instruction: call void @llvm.masked.scatter.nxv2i64.nxv2p0(<vscale x 2 x i64> undef, <vscale x 2 x ptr> undef, i32 1, <vscale x 2 x i1> undef)
; CHECK-NEXT: Cost Model: Invalid cost for instruction: call void @llvm.masked.scatter.nxv1i64.nxv1p0(<vscale x 1 x i64> undef, <vscale x 1 x ptr> undef, i32 1, <vscale x 1 x i1> undef)
; CHECK-NEXT: Cost Model: Invalid cost for instruction: call void @llvm.masked.scatter.nxv16i32.nxv16p0(<vscale x 16 x i32> undef, <vscale x 16 x ptr> undef, i32 1, <vscale x 16 x i1> undef)
; CHECK-NEXT: Cost Model: Invalid cost for instruction: call void @llvm.masked.scatter.nxv8i32.nxv8p0(<vscale x 8 x i32> undef, <vscale x 8 x ptr> undef, i32 1, <vscale x 8 x i1> undef)
; CHECK-NEXT: Cost Model: Invalid cost for instruction: call void @llvm.masked.scatter.nxv4i32.nxv4p0(<vscale x 4 x i32> undef, <vscale x 4 x ptr> undef, i32 1, <vscale x 4 x i1> undef)
; CHECK-NEXT: Cost Model: Invalid cost for instruction: call void @llvm.masked.scatter.nxv2i32.nxv2p0(<vscale x 2 x i32> undef, <vscale x 2 x ptr> undef, i32 1, <vscale x 2 x i1> undef)
; CHECK-NEXT: Cost Model: Invalid cost for instruction: call void @llvm.masked.scatter.nxv1i32.nxv1p0(<vscale x 1 x i32> undef, <vscale x 1 x ptr> undef, i32 1, <vscale x 1 x i1> undef)
; CHECK-NEXT: Cost Model: Invalid cost for instruction: call void @llvm.masked.scatter.nxv32i16.nxv32p0(<vscale x 32 x i16> undef, <vscale x 32 x ptr> undef, i32 1, <vscale x 32 x i1> undef)
; CHECK-NEXT: Cost Model: Invalid cost for instruction: call void @llvm.masked.scatter.nxv16i16.nxv16p0(<vscale x 16 x i16> undef, <vscale x 16 x ptr> undef, i32 1, <vscale x 16 x i1> undef)
; CHECK-NEXT: Cost Model: Invalid cost for instruction: call void @llvm.masked.scatter.nxv8i16.nxv8p0(<vscale x 8 x i16> undef, <vscale x 8 x ptr> undef, i32 1, <vscale x 8 x i1> undef)
; CHECK-NEXT: Cost Model: Invalid cost for instruction: call void @llvm.masked.scatter.nxv4i16.nxv4p0(<vscale x 4 x i16> undef, <vscale x 4 x ptr> undef, i32 1, <vscale x 4 x i1> undef)
; CHECK-NEXT: Cost Model: Invalid cost for instruction: call void @llvm.masked.scatter.nxv2i16.nxv2p0(<vscale x 2 x i16> undef, <vscale x 2 x ptr> undef, i32 1, <vscale x 2 x i1> undef)
; CHECK-NEXT: Cost Model: Invalid cost for instruction: call void @llvm.masked.scatter.nxv1i16.nxv1p0(<vscale x 1 x i16> undef, <vscale x 1 x ptr> undef, i32 1, <vscale x 1 x i1> undef)
; CHECK-NEXT: Cost Model: Invalid cost for instruction: call void @llvm.masked.scatter.nxv8p0.nxv8p0(<vscale x 8 x ptr> undef, <vscale x 8 x ptr> undef, i32 1, <vscale x 8 x i1> undef)
; CHECK-NEXT: Cost Model: Invalid cost for instruction: call void @llvm.masked.scatter.nxv4p0.nxv4p0(<vscale x 4 x ptr> undef, <vscale x 4 x ptr> undef, i32 1, <vscale x 4 x i1> undef)
; CHECK-NEXT: Cost Model: Invalid cost for instruction: call void @llvm.masked.scatter.nxv2p0.nxv2p0(<vscale x 2 x ptr> undef, <vscale x 2 x ptr> undef, i32 1, <vscale x 2 x i1> undef)
; CHECK-NEXT: Cost Model: Invalid cost for instruction: call void @llvm.masked.scatter.nxv1p0.nxv1p0(<vscale x 1 x ptr> undef, <vscale x 1 x ptr> undef, i32 1, <vscale x 1 x i1> undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
;
call void @llvm.masked.scatter.nxv8f64.nxv8p0(<vscale x 8 x double> undef, <vscale x 8 x ptr> undef, i32 2, <vscale x 8 x i1> undef)
call void @llvm.masked.scatter.nxv4f64.nxv4p0(<vscale x 4 x double> undef, <vscale x 4 x ptr> undef, i32 2, <vscale x 4 x i1> undef)
call void @llvm.masked.scatter.nxv2f64.nxv2p0(<vscale x 2 x double> undef, <vscale x 2 x ptr> undef, i32 2, <vscale x 2 x i1> undef)
call void @llvm.masked.scatter.nxv1f64.nxv1p0(<vscale x 1 x double> undef, <vscale x 1 x ptr> undef, i32 2, <vscale x 1 x i1> undef)
call void @llvm.masked.scatter.nxv16f32.nxv16p0(<vscale x 16 x float> undef, <vscale x 16 x ptr> undef, i32 2, <vscale x 16 x i1> undef)
call void @llvm.masked.scatter.nxv8f32.nxv8p0(<vscale x 8 x float> undef, <vscale x 8 x ptr> undef, i32 2, <vscale x 8 x i1> undef)
call void @llvm.masked.scatter.nxv4f32.nxv4p0(<vscale x 4 x float> undef, <vscale x 4 x ptr> undef, i32 2, <vscale x 4 x i1> undef)
call void @llvm.masked.scatter.nxv2f32.nxv2p0(<vscale x 2 x float> undef, <vscale x 2 x ptr> undef, i32 2, <vscale x 2 x i1> undef)
call void @llvm.masked.scatter.nxv1f32.nxv1p0(<vscale x 1 x float> undef, <vscale x 1 x ptr> undef, i32 2, <vscale x 1 x i1> undef)
call void @llvm.masked.scatter.nxv8i64.nxv8p0(<vscale x 8 x i64> undef, <vscale x 8 x ptr> undef, i32 1, <vscale x 8 x i1> undef)
call void @llvm.masked.scatter.nxv4i64.nxv4p0(<vscale x 4 x i64> undef, <vscale x 4 x ptr> undef, i32 1, <vscale x 4 x i1> undef)
call void @llvm.masked.scatter.nxv2i64.nxv2p0(<vscale x 2 x i64> undef, <vscale x 2 x ptr> undef, i32 1, <vscale x 2 x i1> undef)
call void @llvm.masked.scatter.nxv1i64.nxv1p0(<vscale x 1 x i64> undef, <vscale x 1 x ptr> undef, i32 1, <vscale x 1 x i1> undef)
call void @llvm.masked.scatter.nxv16i32.nxv16p0(<vscale x 16 x i32> undef, <vscale x 16 x ptr> undef, i32 1, <vscale x 16 x i1> undef)
call void @llvm.masked.scatter.nxv8i32.nxv8p0(<vscale x 8 x i32> undef, <vscale x 8 x ptr> undef, i32 1, <vscale x 8 x i1> undef)
call void @llvm.masked.scatter.nxv4i32.nxv4p0(<vscale x 4 x i32> undef, <vscale x 4 x ptr> undef, i32 1, <vscale x 4 x i1> undef)
call void @llvm.masked.scatter.nxv2i32.nxv2p0(<vscale x 2 x i32> undef, <vscale x 2 x ptr> undef, i32 1, <vscale x 2 x i1> undef)
call void @llvm.masked.scatter.nxv1i32.nxv1p0(<vscale x 1 x i32> undef, <vscale x 1 x ptr> undef, i32 1, <vscale x 1 x i1> undef)
call void @llvm.masked.scatter.nxv32i16.nxv32p0(<vscale x 32 x i16> undef, <vscale x 32 x ptr> undef, i32 1, <vscale x 32 x i1> undef)
call void @llvm.masked.scatter.nxv16i16.nxv16p0(<vscale x 16 x i16> undef, <vscale x 16 x ptr> undef, i32 1, <vscale x 16 x i1> undef)
call void @llvm.masked.scatter.nxv8i16.nxv8p0(<vscale x 8 x i16> undef, <vscale x 8 x ptr> undef, i32 1, <vscale x 8 x i1> undef)
call void @llvm.masked.scatter.nxv4i16.nxv4p0(<vscale x 4 x i16> undef, <vscale x 4 x ptr> undef, i32 1, <vscale x 4 x i1> undef)
call void @llvm.masked.scatter.nxv2i16.nxv2p0(<vscale x 2 x i16> undef, <vscale x 2 x ptr> undef, i32 1, <vscale x 2 x i1> undef)
call void @llvm.masked.scatter.nxv1i16.nxv1p0(<vscale x 1 x i16> undef, <vscale x 1 x ptr> undef, i32 1, <vscale x 1 x i1> undef)
call void @llvm.masked.scatter.nxv8p0.nxv8p0(<vscale x 8 x ptr> undef, <vscale x 8 x ptr> undef, i32 1, <vscale x 8 x i1> undef)
call void @llvm.masked.scatter.nxv4p0.nxv4p0(<vscale x 4 x ptr> undef, <vscale x 4 x ptr> undef, i32 1, <vscale x 4 x i1> undef)
call void @llvm.masked.scatter.nxv2p0.nxv2p0(<vscale x 2 x ptr> undef, <vscale x 2 x ptr> undef, i32 1, <vscale x 2 x i1> undef)
call void @llvm.masked.scatter.nxv1p0.nxv1p0(<vscale x 1 x ptr> undef, <vscale x 1 x ptr> undef, i32 1, <vscale x 1 x i1> undef)
ret void
}
define void @masked_scatter_unaligned_f16() {
; CHECK-LABEL: 'masked_scatter_unaligned_f16'
; CHECK-NEXT: Cost Model: Invalid cost for instruction: call void @llvm.masked.scatter.nxv32f16.nxv32p0(<vscale x 32 x half> undef, <vscale x 32 x ptr> undef, i32 1, <vscale x 32 x i1> undef)
; CHECK-NEXT: Cost Model: Invalid cost for instruction: call void @llvm.masked.scatter.nxv16f16.nxv16p0(<vscale x 16 x half> undef, <vscale x 16 x ptr> undef, i32 1, <vscale x 16 x i1> undef)
; CHECK-NEXT: Cost Model: Invalid cost for instruction: call void @llvm.masked.scatter.nxv8f16.nxv8p0(<vscale x 8 x half> undef, <vscale x 8 x ptr> undef, i32 1, <vscale x 8 x i1> undef)
; CHECK-NEXT: Cost Model: Invalid cost for instruction: call void @llvm.masked.scatter.nxv4f16.nxv4p0(<vscale x 4 x half> undef, <vscale x 4 x ptr> undef, i32 1, <vscale x 4 x i1> undef)
; CHECK-NEXT: Cost Model: Invalid cost for instruction: call void @llvm.masked.scatter.nxv2f16.nxv2p0(<vscale x 2 x half> undef, <vscale x 2 x ptr> undef, i32 1, <vscale x 2 x i1> undef)
; CHECK-NEXT: Cost Model: Invalid cost for instruction: call void @llvm.masked.scatter.nxv1f16.nxv1p0(<vscale x 1 x half> undef, <vscale x 1 x ptr> undef, i32 1, <vscale x 1 x i1> undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
;
call void @llvm.masked.scatter.nxv32f16.nxv32p0(<vscale x 32 x half> undef, <vscale x 32 x ptr> undef, i32 1, <vscale x 32 x i1> undef)
call void @llvm.masked.scatter.nxv16f16.nxv16p0(<vscale x 16 x half> undef, <vscale x 16 x ptr> undef, i32 1, <vscale x 16 x i1> undef)
call void @llvm.masked.scatter.nxv8f16.nxv8p0(<vscale x 8 x half> undef, <vscale x 8 x ptr> undef, i32 1, <vscale x 8 x i1> undef)
call void @llvm.masked.scatter.nxv4f16.nxv4p0(<vscale x 4 x half> undef, <vscale x 4 x ptr> undef, i32 1, <vscale x 4 x i1> undef)
call void @llvm.masked.scatter.nxv2f16.nxv2p0(<vscale x 2 x half> undef, <vscale x 2 x ptr> undef, i32 1, <vscale x 2 x i1> undef)
call void @llvm.masked.scatter.nxv1f16.nxv1p0(<vscale x 1 x half> undef, <vscale x 1 x ptr> undef, i32 1, <vscale x 1 x i1> undef)
ret void
}