From 98867bf8de1ec093447f7efc490628fbeed3dfb3 Mon Sep 17 00:00:00 2001 From: Mary Kassayova Date: Thu, 21 Aug 2025 11:31:34 +0100 Subject: [PATCH] [AArch64] [CostModel] Fix cost modelling for saturating arithmetic intrinsics (#152333) The cost model previously overestimating throughput costs to wide fixed-length saturating arithmetic intrinsics when using SVE with a fixed vscale of 2. These costs ended up much higher than for the same operations using NEON, despite being fully legal and efficient with SVE. This patch adjusts the cost model to avoid penalising these intrinsics under SVE. --- .../AArch64/AArch64TargetTransformInfo.cpp | 7 + .../Analysis/CostModel/AArch64/arith-ssat.ll | 331 +++++++++++++----- .../Analysis/CostModel/AArch64/arith-usat.ll | 327 ++++++++++++----- 3 files changed, 473 insertions(+), 192 deletions(-) diff --git a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp index 911aaf6f8749..490f6391c15a 100644 --- a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp +++ b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp @@ -643,6 +643,13 @@ AArch64TTIImpl::getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA, LT.second.getScalarSizeInBits() == RetTy->getScalarSizeInBits() ? 1 : 4; if (any_of(ValidSatTys, [<](MVT M) { return M == LT.second; })) return LT.first * Instrs; + + TypeSize TS = getDataLayout().getTypeSizeInBits(RetTy); + uint64_t VectorSize = TS.getKnownMinValue(); + + if (ST->isSVEAvailable() && VectorSize >= 128 && isPowerOf2_64(VectorSize)) + return LT.first * Instrs; + break; } case Intrinsic::abs: { diff --git a/llvm/test/Analysis/CostModel/AArch64/arith-ssat.ll b/llvm/test/Analysis/CostModel/AArch64/arith-ssat.ll index 254715ac909e..0d1f20bdf817 100644 --- a/llvm/test/Analysis/CostModel/AArch64/arith-ssat.ll +++ b/llvm/test/Analysis/CostModel/AArch64/arith-ssat.ll @@ -1,5 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py -; RUN: opt -passes="print" 2>&1 -disable-output -cost-kind=all -mtriple=aarch64 < %s | FileCheck %s +; RUN: opt -passes="print" 2>&1 -disable-output -cost-kind=all -mtriple=aarch64 < %s | FileCheck %s --check-prefixes=COMMON,BASE +; RUN: opt -passes="print" 2>&1 -disable-output -cost-kind=all -mtriple=aarch64--linux-gnu -mattr=+sve < %s | FileCheck %s --check-prefixes=COMMON,SVE target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128" @@ -31,60 +32,128 @@ declare <32 x i8> @llvm.sadd.sat.v32i8(<32 x i8>, <32 x i8>) declare <64 x i8> @llvm.sadd.sat.v64i8(<64 x i8>, <64 x i8>) define i32 @add(i32 %arg) { -; CHECK-LABEL: 'add' -; CHECK-NEXT: Cost Model: Found costs of 4 for: %I64 = call i64 @llvm.sadd.sat.i64(i64 undef, i64 undef) -; CHECK-NEXT: Cost Model: Found costs of 1 for: %V2I64 = call <2 x i64> @llvm.sadd.sat.v2i64(<2 x i64> undef, <2 x i64> undef) -; CHECK-NEXT: Cost Model: Found costs of 2 for: %V4I64 = call <4 x i64> @llvm.sadd.sat.v4i64(<4 x i64> undef, <4 x i64> undef) -; CHECK-NEXT: Cost Model: Found costs of 4 for: %V8I64 = call <8 x i64> @llvm.sadd.sat.v8i64(<8 x i64> undef, <8 x i64> undef) -; CHECK-NEXT: Cost Model: Found costs of 4 for: %I32 = call i32 @llvm.sadd.sat.i32(i32 undef, i32 undef) -; CHECK-NEXT: Cost Model: Found costs of 1 for: %V2I32 = call <2 x i32> @llvm.sadd.sat.v2i32(<2 x i32> undef, <2 x i32> undef) -; CHECK-NEXT: Cost Model: Found costs of 1 for: %V4I32 = call <4 x i32> @llvm.sadd.sat.v4i32(<4 x i32> undef, <4 x i32> undef) -; CHECK-NEXT: Cost Model: Found costs of 2 for: %V8I32 = call <8 x i32> @llvm.sadd.sat.v8i32(<8 x i32> undef, <8 x i32> undef) -; CHECK-NEXT: Cost Model: Found costs of 4 for: %V16I32 = call <16 x i32> @llvm.sadd.sat.v16i32(<16 x i32> undef, <16 x i32> undef) -; CHECK-NEXT: Cost Model: Found costs of 1 for: %V3I32 = call <3 x i32> @llvm.sadd.sat.v3i32(<3 x i32> undef, <3 x i32> undef) -; CHECK-NEXT: Cost Model: Found costs of 6 for: %I16 = call i16 @llvm.sadd.sat.i16(i16 undef, i16 undef) -; CHECK-NEXT: Cost Model: Found costs of 4 for: %V2I16 = call <2 x i16> @llvm.sadd.sat.v2i16(<2 x i16> undef, <2 x i16> undef) -; CHECK-NEXT: Cost Model: Found costs of 1 for: %V4I16 = call <4 x i16> @llvm.sadd.sat.v4i16(<4 x i16> undef, <4 x i16> undef) -; CHECK-NEXT: Cost Model: Found costs of 1 for: %V8I16 = call <8 x i16> @llvm.sadd.sat.v8i16(<8 x i16> undef, <8 x i16> undef) -; CHECK-NEXT: Cost Model: Found costs of 2 for: %V16I16 = call <16 x i16> @llvm.sadd.sat.v16i16(<16 x i16> undef, <16 x i16> undef) -; CHECK-NEXT: Cost Model: Found costs of 4 for: %V32I16 = call <32 x i16> @llvm.sadd.sat.v32i16(<32 x i16> undef, <32 x i16> undef) -; CHECK-NEXT: Cost Model: Found costs of 6 for: %I8 = call i8 @llvm.sadd.sat.i8(i8 undef, i8 undef) -; CHECK-NEXT: Cost Model: Found costs of 4 for: %V2I8 = call <2 x i8> @llvm.sadd.sat.v2i8(<2 x i8> undef, <2 x i8> undef) -; CHECK-NEXT: Cost Model: Found costs of 4 for: %V4I8 = call <4 x i8> @llvm.sadd.sat.v4i8(<4 x i8> undef, <4 x i8> undef) -; CHECK-NEXT: Cost Model: Found costs of 1 for: %V8I8 = call <8 x i8> @llvm.sadd.sat.v8i8(<8 x i8> undef, <8 x i8> undef) -; CHECK-NEXT: Cost Model: Found costs of 1 for: %V16I8 = call <16 x i8> @llvm.sadd.sat.v16i8(<16 x i8> undef, <16 x i8> undef) -; CHECK-NEXT: Cost Model: Found costs of 2 for: %V32I8 = call <32 x i8> @llvm.sadd.sat.v32i8(<32 x i8> undef, <32 x i8> undef) -; CHECK-NEXT: Cost Model: Found costs of 4 for: %V64I8 = call <64 x i8> @llvm.sadd.sat.v64i8(<64 x i8> undef, <64 x i8> undef) -; CHECK-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret i32 undef +; COMMON-LABEL: 'add' +; COMMON-NEXT: Cost Model: Found costs of 4 for: %I64 = call i64 @llvm.sadd.sat.i64(i64 poison, i64 poison) +; COMMON-NEXT: Cost Model: Found costs of 1 for: %V2I64 = call <2 x i64> @llvm.sadd.sat.v2i64(<2 x i64> poison, <2 x i64> poison) +; COMMON-NEXT: Cost Model: Found costs of 2 for: %V4I64 = call <4 x i64> @llvm.sadd.sat.v4i64(<4 x i64> poison, <4 x i64> poison) +; COMMON-NEXT: Cost Model: Found costs of 4 for: %V8I64 = call <8 x i64> @llvm.sadd.sat.v8i64(<8 x i64> poison, <8 x i64> poison) +; COMMON-NEXT: Cost Model: Found costs of 4 for: %I32 = call i32 @llvm.sadd.sat.i32(i32 poison, i32 poison) +; COMMON-NEXT: Cost Model: Found costs of 1 for: %V2I32 = call <2 x i32> @llvm.sadd.sat.v2i32(<2 x i32> poison, <2 x i32> poison) +; COMMON-NEXT: Cost Model: Found costs of 1 for: %V4I32 = call <4 x i32> @llvm.sadd.sat.v4i32(<4 x i32> poison, <4 x i32> poison) +; COMMON-NEXT: Cost Model: Found costs of 2 for: %V8I32 = call <8 x i32> @llvm.sadd.sat.v8i32(<8 x i32> poison, <8 x i32> poison) +; COMMON-NEXT: Cost Model: Found costs of 4 for: %V16I32 = call <16 x i32> @llvm.sadd.sat.v16i32(<16 x i32> poison, <16 x i32> poison) +; COMMON-NEXT: Cost Model: Found costs of 1 for: %V3I32 = call <3 x i32> @llvm.sadd.sat.v3i32(<3 x i32> poison, <3 x i32> poison) +; COMMON-NEXT: Cost Model: Found costs of 6 for: %I16 = call i16 @llvm.sadd.sat.i16(i16 poison, i16 poison) +; COMMON-NEXT: Cost Model: Found costs of 4 for: %V2I16 = call <2 x i16> @llvm.sadd.sat.v2i16(<2 x i16> poison, <2 x i16> poison) +; COMMON-NEXT: Cost Model: Found costs of 1 for: %V4I16 = call <4 x i16> @llvm.sadd.sat.v4i16(<4 x i16> poison, <4 x i16> poison) +; COMMON-NEXT: Cost Model: Found costs of 1 for: %V8I16 = call <8 x i16> @llvm.sadd.sat.v8i16(<8 x i16> poison, <8 x i16> poison) +; COMMON-NEXT: Cost Model: Found costs of 2 for: %V16I16 = call <16 x i16> @llvm.sadd.sat.v16i16(<16 x i16> poison, <16 x i16> poison) +; COMMON-NEXT: Cost Model: Found costs of 4 for: %V32I16 = call <32 x i16> @llvm.sadd.sat.v32i16(<32 x i16> poison, <32 x i16> poison) +; COMMON-NEXT: Cost Model: Found costs of 6 for: %I8 = call i8 @llvm.sadd.sat.i8(i8 poison, i8 poison) +; COMMON-NEXT: Cost Model: Found costs of 4 for: %V2I8 = call <2 x i8> @llvm.sadd.sat.v2i8(<2 x i8> poison, <2 x i8> poison) +; COMMON-NEXT: Cost Model: Found costs of 4 for: %V4I8 = call <4 x i8> @llvm.sadd.sat.v4i8(<4 x i8> poison, <4 x i8> poison) +; COMMON-NEXT: Cost Model: Found costs of 1 for: %V8I8 = call <8 x i8> @llvm.sadd.sat.v8i8(<8 x i8> poison, <8 x i8> poison) +; COMMON-NEXT: Cost Model: Found costs of 1 for: %V16I8 = call <16 x i8> @llvm.sadd.sat.v16i8(<16 x i8> poison, <16 x i8> poison) +; COMMON-NEXT: Cost Model: Found costs of 2 for: %V32I8 = call <32 x i8> @llvm.sadd.sat.v32i8(<32 x i8> poison, <32 x i8> poison) +; COMMON-NEXT: Cost Model: Found costs of 4 for: %V64I8 = call <64 x i8> @llvm.sadd.sat.v64i8(<64 x i8> poison, <64 x i8> poison) +; COMMON-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret i32 poison ; - %I64 = call i64 @llvm.sadd.sat.i64(i64 undef, i64 undef) - %V2I64 = call <2 x i64> @llvm.sadd.sat.v2i64(<2 x i64> undef, <2 x i64> undef) - %V4I64 = call <4 x i64> @llvm.sadd.sat.v4i64(<4 x i64> undef, <4 x i64> undef) - %V8I64 = call <8 x i64> @llvm.sadd.sat.v8i64(<8 x i64> undef, <8 x i64> undef) + %I64 = call i64 @llvm.sadd.sat.i64(i64 poison, i64 poison) + %V2I64 = call <2 x i64> @llvm.sadd.sat.v2i64(<2 x i64> poison, <2 x i64> poison) + %V4I64 = call <4 x i64> @llvm.sadd.sat.v4i64(<4 x i64> poison, <4 x i64> poison) + %V8I64 = call <8 x i64> @llvm.sadd.sat.v8i64(<8 x i64> poison, <8 x i64> poison) - %I32 = call i32 @llvm.sadd.sat.i32(i32 undef, i32 undef) - %V2I32 = call <2 x i32> @llvm.sadd.sat.v2i32(<2 x i32> undef, <2 x i32> undef) - %V4I32 = call <4 x i32> @llvm.sadd.sat.v4i32(<4 x i32> undef, <4 x i32> undef) - %V8I32 = call <8 x i32> @llvm.sadd.sat.v8i32(<8 x i32> undef, <8 x i32> undef) - %V16I32 = call <16 x i32> @llvm.sadd.sat.v16i32(<16 x i32> undef, <16 x i32> undef) - %V3I32 = call <3 x i32> @llvm.sadd.sat.v3i32(<3 x i32> undef, <3 x i32> undef) + %I32 = call i32 @llvm.sadd.sat.i32(i32 poison, i32 poison) + %V2I32 = call <2 x i32> @llvm.sadd.sat.v2i32(<2 x i32> poison, <2 x i32> poison) + %V4I32 = call <4 x i32> @llvm.sadd.sat.v4i32(<4 x i32> poison, <4 x i32> poison) + %V8I32 = call <8 x i32> @llvm.sadd.sat.v8i32(<8 x i32> poison, <8 x i32> poison) + %V16I32 = call <16 x i32> @llvm.sadd.sat.v16i32(<16 x i32> poison, <16 x i32> poison) + %V3I32 = call <3 x i32> @llvm.sadd.sat.v3i32(<3 x i32> poison, <3 x i32> poison) - %I16 = call i16 @llvm.sadd.sat.i16(i16 undef, i16 undef) - %V2I16 = call <2 x i16> @llvm.sadd.sat.v2i16(<2 x i16> undef, <2 x i16> undef) - %V4I16 = call <4 x i16> @llvm.sadd.sat.v4i16(<4 x i16> undef, <4 x i16> undef) - %V8I16 = call <8 x i16> @llvm.sadd.sat.v8i16(<8 x i16> undef, <8 x i16> undef) - %V16I16 = call <16 x i16> @llvm.sadd.sat.v16i16(<16 x i16> undef, <16 x i16> undef) - %V32I16 = call <32 x i16> @llvm.sadd.sat.v32i16(<32 x i16> undef, <32 x i16> undef) + %I16 = call i16 @llvm.sadd.sat.i16(i16 poison, i16 poison) + %V2I16 = call <2 x i16> @llvm.sadd.sat.v2i16(<2 x i16> poison, <2 x i16> poison) + %V4I16 = call <4 x i16> @llvm.sadd.sat.v4i16(<4 x i16> poison, <4 x i16> poison) + %V8I16 = call <8 x i16> @llvm.sadd.sat.v8i16(<8 x i16> poison, <8 x i16> poison) + %V16I16 = call <16 x i16> @llvm.sadd.sat.v16i16(<16 x i16> poison, <16 x i16> poison) + %V32I16 = call <32 x i16> @llvm.sadd.sat.v32i16(<32 x i16> poison, <32 x i16> poison) - %I8 = call i8 @llvm.sadd.sat.i8(i8 undef, i8 undef) - %V2I8 = call <2 x i8> @llvm.sadd.sat.v2i8(<2 x i8> undef, <2 x i8> undef) - %V4I8 = call <4 x i8> @llvm.sadd.sat.v4i8(<4 x i8> undef, <4 x i8> undef) - %V8I8 = call <8 x i8> @llvm.sadd.sat.v8i8(<8 x i8> undef, <8 x i8> undef) - %V16I8 = call <16 x i8> @llvm.sadd.sat.v16i8(<16 x i8> undef, <16 x i8> undef) - %V32I8 = call <32 x i8> @llvm.sadd.sat.v32i8(<32 x i8> undef, <32 x i8> undef) - %V64I8 = call <64 x i8> @llvm.sadd.sat.v64i8(<64 x i8> undef, <64 x i8> undef) + %I8 = call i8 @llvm.sadd.sat.i8(i8 poison, i8 poison) + %V2I8 = call <2 x i8> @llvm.sadd.sat.v2i8(<2 x i8> poison, <2 x i8> poison) + %V4I8 = call <4 x i8> @llvm.sadd.sat.v4i8(<4 x i8> poison, <4 x i8> poison) + %V8I8 = call <8 x i8> @llvm.sadd.sat.v8i8(<8 x i8> poison, <8 x i8> poison) + %V16I8 = call <16 x i8> @llvm.sadd.sat.v16i8(<16 x i8> poison, <16 x i8> poison) + %V32I8 = call <32 x i8> @llvm.sadd.sat.v32i8(<32 x i8> poison, <32 x i8> poison) + %V64I8 = call <64 x i8> @llvm.sadd.sat.v64i8(<64 x i8> poison, <64 x i8> poison) - ret i32 undef + ret i32 poison +} + +define i32 @add_sve_vscale2(i32 %arg) vscale_range(2,2) { +; BASE-LABEL: 'add_sve_vscale2' +; BASE-NEXT: Cost Model: Found costs of 1 for: %V2I64 = call <2 x i64> @llvm.sadd.sat.v2i64(<2 x i64> poison, <2 x i64> poison) +; BASE-NEXT: Cost Model: Found costs of 2 for: %V4I64 = call <4 x i64> @llvm.sadd.sat.v4i64(<4 x i64> poison, <4 x i64> poison) +; BASE-NEXT: Cost Model: Found costs of 4 for: %V8I64 = call <8 x i64> @llvm.sadd.sat.v8i64(<8 x i64> poison, <8 x i64> poison) +; BASE-NEXT: Cost Model: Found costs of 1 for: %V2I32 = call <2 x i32> @llvm.sadd.sat.v2i32(<2 x i32> poison, <2 x i32> poison) +; BASE-NEXT: Cost Model: Found costs of 1 for: %V4I32 = call <4 x i32> @llvm.sadd.sat.v4i32(<4 x i32> poison, <4 x i32> poison) +; BASE-NEXT: Cost Model: Found costs of 2 for: %V8I32 = call <8 x i32> @llvm.sadd.sat.v8i32(<8 x i32> poison, <8 x i32> poison) +; BASE-NEXT: Cost Model: Found costs of 4 for: %V16I32 = call <16 x i32> @llvm.sadd.sat.v16i32(<16 x i32> poison, <16 x i32> poison) +; BASE-NEXT: Cost Model: Found costs of 4 for: %V2I16 = call <2 x i16> @llvm.sadd.sat.v2i16(<2 x i16> poison, <2 x i16> poison) +; BASE-NEXT: Cost Model: Found costs of 1 for: %V4I16 = call <4 x i16> @llvm.sadd.sat.v4i16(<4 x i16> poison, <4 x i16> poison) +; BASE-NEXT: Cost Model: Found costs of 1 for: %V8I16 = call <8 x i16> @llvm.sadd.sat.v8i16(<8 x i16> poison, <8 x i16> poison) +; BASE-NEXT: Cost Model: Found costs of 2 for: %V16I16 = call <16 x i16> @llvm.sadd.sat.v16i16(<16 x i16> poison, <16 x i16> poison) +; BASE-NEXT: Cost Model: Found costs of 4 for: %V32I16 = call <32 x i16> @llvm.sadd.sat.v32i16(<32 x i16> poison, <32 x i16> poison) +; BASE-NEXT: Cost Model: Found costs of 4 for: %V2I8 = call <2 x i8> @llvm.sadd.sat.v2i8(<2 x i8> poison, <2 x i8> poison) +; BASE-NEXT: Cost Model: Found costs of 4 for: %V4I8 = call <4 x i8> @llvm.sadd.sat.v4i8(<4 x i8> poison, <4 x i8> poison) +; BASE-NEXT: Cost Model: Found costs of 1 for: %V8I8 = call <8 x i8> @llvm.sadd.sat.v8i8(<8 x i8> poison, <8 x i8> poison) +; BASE-NEXT: Cost Model: Found costs of 1 for: %V16I8 = call <16 x i8> @llvm.sadd.sat.v16i8(<16 x i8> poison, <16 x i8> poison) +; BASE-NEXT: Cost Model: Found costs of 2 for: %V32I8 = call <32 x i8> @llvm.sadd.sat.v32i8(<32 x i8> poison, <32 x i8> poison) +; BASE-NEXT: Cost Model: Found costs of 4 for: %V64I8 = call <64 x i8> @llvm.sadd.sat.v64i8(<64 x i8> poison, <64 x i8> poison) +; BASE-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret i32 poison +; +; SVE-LABEL: 'add_sve_vscale2' +; SVE-NEXT: Cost Model: Found costs of 1 for: %V2I64 = call <2 x i64> @llvm.sadd.sat.v2i64(<2 x i64> poison, <2 x i64> poison) +; SVE-NEXT: Cost Model: Found costs of 1 for: %V4I64 = call <4 x i64> @llvm.sadd.sat.v4i64(<4 x i64> poison, <4 x i64> poison) +; SVE-NEXT: Cost Model: Found costs of 2 for: %V8I64 = call <8 x i64> @llvm.sadd.sat.v8i64(<8 x i64> poison, <8 x i64> poison) +; SVE-NEXT: Cost Model: Found costs of 1 for: %V2I32 = call <2 x i32> @llvm.sadd.sat.v2i32(<2 x i32> poison, <2 x i32> poison) +; SVE-NEXT: Cost Model: Found costs of 1 for: %V4I32 = call <4 x i32> @llvm.sadd.sat.v4i32(<4 x i32> poison, <4 x i32> poison) +; SVE-NEXT: Cost Model: Found costs of 1 for: %V8I32 = call <8 x i32> @llvm.sadd.sat.v8i32(<8 x i32> poison, <8 x i32> poison) +; SVE-NEXT: Cost Model: Found costs of 2 for: %V16I32 = call <16 x i32> @llvm.sadd.sat.v16i32(<16 x i32> poison, <16 x i32> poison) +; SVE-NEXT: Cost Model: Found costs of 4 for: %V2I16 = call <2 x i16> @llvm.sadd.sat.v2i16(<2 x i16> poison, <2 x i16> poison) +; SVE-NEXT: Cost Model: Found costs of 1 for: %V4I16 = call <4 x i16> @llvm.sadd.sat.v4i16(<4 x i16> poison, <4 x i16> poison) +; SVE-NEXT: Cost Model: Found costs of 1 for: %V8I16 = call <8 x i16> @llvm.sadd.sat.v8i16(<8 x i16> poison, <8 x i16> poison) +; SVE-NEXT: Cost Model: Found costs of 1 for: %V16I16 = call <16 x i16> @llvm.sadd.sat.v16i16(<16 x i16> poison, <16 x i16> poison) +; SVE-NEXT: Cost Model: Found costs of 2 for: %V32I16 = call <32 x i16> @llvm.sadd.sat.v32i16(<32 x i16> poison, <32 x i16> poison) +; SVE-NEXT: Cost Model: Found costs of 4 for: %V2I8 = call <2 x i8> @llvm.sadd.sat.v2i8(<2 x i8> poison, <2 x i8> poison) +; SVE-NEXT: Cost Model: Found costs of 4 for: %V4I8 = call <4 x i8> @llvm.sadd.sat.v4i8(<4 x i8> poison, <4 x i8> poison) +; SVE-NEXT: Cost Model: Found costs of 1 for: %V8I8 = call <8 x i8> @llvm.sadd.sat.v8i8(<8 x i8> poison, <8 x i8> poison) +; SVE-NEXT: Cost Model: Found costs of 1 for: %V16I8 = call <16 x i8> @llvm.sadd.sat.v16i8(<16 x i8> poison, <16 x i8> poison) +; SVE-NEXT: Cost Model: Found costs of 1 for: %V32I8 = call <32 x i8> @llvm.sadd.sat.v32i8(<32 x i8> poison, <32 x i8> poison) +; SVE-NEXT: Cost Model: Found costs of 2 for: %V64I8 = call <64 x i8> @llvm.sadd.sat.v64i8(<64 x i8> poison, <64 x i8> poison) +; SVE-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret i32 poison +; + %V2I64 = call <2 x i64> @llvm.sadd.sat.v2i64(<2 x i64> poison, <2 x i64> poison) + %V4I64 = call <4 x i64> @llvm.sadd.sat.v4i64(<4 x i64> poison, <4 x i64> poison) + %V8I64 = call <8 x i64> @llvm.sadd.sat.v8i64(<8 x i64> poison, <8 x i64> poison) + + %V2I32 = call <2 x i32> @llvm.sadd.sat.v2i32(<2 x i32> poison, <2 x i32> poison) + %V4I32 = call <4 x i32> @llvm.sadd.sat.v4i32(<4 x i32> poison, <4 x i32> poison) + %V8I32 = call <8 x i32> @llvm.sadd.sat.v8i32(<8 x i32> poison, <8 x i32> poison) + %V16I32 = call <16 x i32> @llvm.sadd.sat.v16i32(<16 x i32> poison, <16 x i32> poison) + + %V2I16 = call <2 x i16> @llvm.sadd.sat.v2i16(<2 x i16> poison, <2 x i16> poison) + %V4I16 = call <4 x i16> @llvm.sadd.sat.v4i16(<4 x i16> poison, <4 x i16> poison) + %V8I16 = call <8 x i16> @llvm.sadd.sat.v8i16(<8 x i16> poison, <8 x i16> poison) + %V16I16 = call <16 x i16> @llvm.sadd.sat.v16i16(<16 x i16> poison, <16 x i16> poison) + %V32I16 = call <32 x i16> @llvm.sadd.sat.v32i16(<32 x i16> poison, <32 x i16> poison) + + %V2I8 = call <2 x i8> @llvm.sadd.sat.v2i8(<2 x i8> poison, <2 x i8> poison) + %V4I8 = call <4 x i8> @llvm.sadd.sat.v4i8(<4 x i8> poison, <4 x i8> poison) + %V8I8 = call <8 x i8> @llvm.sadd.sat.v8i8(<8 x i8> poison, <8 x i8> poison) + %V16I8 = call <16 x i8> @llvm.sadd.sat.v16i8(<16 x i8> poison, <16 x i8> poison) + %V32I8 = call <32 x i8> @llvm.sadd.sat.v32i8(<32 x i8> poison, <32 x i8> poison) + %V64I8 = call <64 x i8> @llvm.sadd.sat.v64i8(<64 x i8> poison, <64 x i8> poison) + + ret i32 poison } declare i64 @llvm.ssub.sat.i64(i64, i64) @@ -114,56 +183,124 @@ declare <32 x i8> @llvm.ssub.sat.v32i8(<32 x i8>, <32 x i8>) declare <64 x i8> @llvm.ssub.sat.v64i8(<64 x i8>, <64 x i8>) define i32 @sub(i32 %arg) { -; CHECK-LABEL: 'sub' -; CHECK-NEXT: Cost Model: Found costs of 4 for: %I64 = call i64 @llvm.ssub.sat.i64(i64 undef, i64 undef) -; CHECK-NEXT: Cost Model: Found costs of 1 for: %V2I64 = call <2 x i64> @llvm.ssub.sat.v2i64(<2 x i64> undef, <2 x i64> undef) -; CHECK-NEXT: Cost Model: Found costs of 2 for: %V4I64 = call <4 x i64> @llvm.ssub.sat.v4i64(<4 x i64> undef, <4 x i64> undef) -; CHECK-NEXT: Cost Model: Found costs of 4 for: %V8I64 = call <8 x i64> @llvm.ssub.sat.v8i64(<8 x i64> undef, <8 x i64> undef) -; CHECK-NEXT: Cost Model: Found costs of 4 for: %I32 = call i32 @llvm.ssub.sat.i32(i32 undef, i32 undef) -; CHECK-NEXT: Cost Model: Found costs of 1 for: %V2I32 = call <2 x i32> @llvm.ssub.sat.v2i32(<2 x i32> undef, <2 x i32> undef) -; CHECK-NEXT: Cost Model: Found costs of 1 for: %V4I32 = call <4 x i32> @llvm.ssub.sat.v4i32(<4 x i32> undef, <4 x i32> undef) -; CHECK-NEXT: Cost Model: Found costs of 2 for: %V8I32 = call <8 x i32> @llvm.ssub.sat.v8i32(<8 x i32> undef, <8 x i32> undef) -; CHECK-NEXT: Cost Model: Found costs of 4 for: %V16I32 = call <16 x i32> @llvm.ssub.sat.v16i32(<16 x i32> undef, <16 x i32> undef) -; CHECK-NEXT: Cost Model: Found costs of 6 for: %I16 = call i16 @llvm.ssub.sat.i16(i16 undef, i16 undef) -; CHECK-NEXT: Cost Model: Found costs of 4 for: %V2I16 = call <2 x i16> @llvm.ssub.sat.v2i16(<2 x i16> undef, <2 x i16> undef) -; CHECK-NEXT: Cost Model: Found costs of 1 for: %V4I16 = call <4 x i16> @llvm.ssub.sat.v4i16(<4 x i16> undef, <4 x i16> undef) -; CHECK-NEXT: Cost Model: Found costs of 1 for: %V8I16 = call <8 x i16> @llvm.ssub.sat.v8i16(<8 x i16> undef, <8 x i16> undef) -; CHECK-NEXT: Cost Model: Found costs of 2 for: %V16I16 = call <16 x i16> @llvm.ssub.sat.v16i16(<16 x i16> undef, <16 x i16> undef) -; CHECK-NEXT: Cost Model: Found costs of 4 for: %V32I16 = call <32 x i16> @llvm.ssub.sat.v32i16(<32 x i16> undef, <32 x i16> undef) -; CHECK-NEXT: Cost Model: Found costs of 6 for: %I8 = call i8 @llvm.ssub.sat.i8(i8 undef, i8 undef) -; CHECK-NEXT: Cost Model: Found costs of 4 for: %V2I8 = call <2 x i8> @llvm.ssub.sat.v2i8(<2 x i8> undef, <2 x i8> undef) -; CHECK-NEXT: Cost Model: Found costs of 4 for: %V4I8 = call <4 x i8> @llvm.ssub.sat.v4i8(<4 x i8> undef, <4 x i8> undef) -; CHECK-NEXT: Cost Model: Found costs of 1 for: %V8I8 = call <8 x i8> @llvm.ssub.sat.v8i8(<8 x i8> undef, <8 x i8> undef) -; CHECK-NEXT: Cost Model: Found costs of 1 for: %V16I8 = call <16 x i8> @llvm.ssub.sat.v16i8(<16 x i8> undef, <16 x i8> undef) -; CHECK-NEXT: Cost Model: Found costs of 2 for: %V32I8 = call <32 x i8> @llvm.ssub.sat.v32i8(<32 x i8> undef, <32 x i8> undef) -; CHECK-NEXT: Cost Model: Found costs of 4 for: %V64I8 = call <64 x i8> @llvm.ssub.sat.v64i8(<64 x i8> undef, <64 x i8> undef) -; CHECK-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret i32 undef +; COMMON-LABEL: 'sub' +; COMMON-NEXT: Cost Model: Found costs of 4 for: %I64 = call i64 @llvm.ssub.sat.i64(i64 poison, i64 poison) +; COMMON-NEXT: Cost Model: Found costs of 1 for: %V2I64 = call <2 x i64> @llvm.ssub.sat.v2i64(<2 x i64> poison, <2 x i64> poison) +; COMMON-NEXT: Cost Model: Found costs of 2 for: %V4I64 = call <4 x i64> @llvm.ssub.sat.v4i64(<4 x i64> poison, <4 x i64> poison) +; COMMON-NEXT: Cost Model: Found costs of 4 for: %V8I64 = call <8 x i64> @llvm.ssub.sat.v8i64(<8 x i64> poison, <8 x i64> poison) +; COMMON-NEXT: Cost Model: Found costs of 4 for: %I32 = call i32 @llvm.ssub.sat.i32(i32 poison, i32 poison) +; COMMON-NEXT: Cost Model: Found costs of 1 for: %V2I32 = call <2 x i32> @llvm.ssub.sat.v2i32(<2 x i32> poison, <2 x i32> poison) +; COMMON-NEXT: Cost Model: Found costs of 1 for: %V4I32 = call <4 x i32> @llvm.ssub.sat.v4i32(<4 x i32> poison, <4 x i32> poison) +; COMMON-NEXT: Cost Model: Found costs of 2 for: %V8I32 = call <8 x i32> @llvm.ssub.sat.v8i32(<8 x i32> poison, <8 x i32> poison) +; COMMON-NEXT: Cost Model: Found costs of 4 for: %V16I32 = call <16 x i32> @llvm.ssub.sat.v16i32(<16 x i32> poison, <16 x i32> poison) +; COMMON-NEXT: Cost Model: Found costs of 6 for: %I16 = call i16 @llvm.ssub.sat.i16(i16 poison, i16 poison) +; COMMON-NEXT: Cost Model: Found costs of 4 for: %V2I16 = call <2 x i16> @llvm.ssub.sat.v2i16(<2 x i16> poison, <2 x i16> poison) +; COMMON-NEXT: Cost Model: Found costs of 1 for: %V4I16 = call <4 x i16> @llvm.ssub.sat.v4i16(<4 x i16> poison, <4 x i16> poison) +; COMMON-NEXT: Cost Model: Found costs of 1 for: %V8I16 = call <8 x i16> @llvm.ssub.sat.v8i16(<8 x i16> poison, <8 x i16> poison) +; COMMON-NEXT: Cost Model: Found costs of 2 for: %V16I16 = call <16 x i16> @llvm.ssub.sat.v16i16(<16 x i16> poison, <16 x i16> poison) +; COMMON-NEXT: Cost Model: Found costs of 4 for: %V32I16 = call <32 x i16> @llvm.ssub.sat.v32i16(<32 x i16> poison, <32 x i16> poison) +; COMMON-NEXT: Cost Model: Found costs of 6 for: %I8 = call i8 @llvm.ssub.sat.i8(i8 poison, i8 poison) +; COMMON-NEXT: Cost Model: Found costs of 4 for: %V2I8 = call <2 x i8> @llvm.ssub.sat.v2i8(<2 x i8> poison, <2 x i8> poison) +; COMMON-NEXT: Cost Model: Found costs of 4 for: %V4I8 = call <4 x i8> @llvm.ssub.sat.v4i8(<4 x i8> poison, <4 x i8> poison) +; COMMON-NEXT: Cost Model: Found costs of 1 for: %V8I8 = call <8 x i8> @llvm.ssub.sat.v8i8(<8 x i8> poison, <8 x i8> poison) +; COMMON-NEXT: Cost Model: Found costs of 1 for: %V16I8 = call <16 x i8> @llvm.ssub.sat.v16i8(<16 x i8> poison, <16 x i8> poison) +; COMMON-NEXT: Cost Model: Found costs of 2 for: %V32I8 = call <32 x i8> @llvm.ssub.sat.v32i8(<32 x i8> poison, <32 x i8> poison) +; COMMON-NEXT: Cost Model: Found costs of 4 for: %V64I8 = call <64 x i8> @llvm.ssub.sat.v64i8(<64 x i8> poison, <64 x i8> poison) +; COMMON-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret i32 poison ; - %I64 = call i64 @llvm.ssub.sat.i64(i64 undef, i64 undef) - %V2I64 = call <2 x i64> @llvm.ssub.sat.v2i64(<2 x i64> undef, <2 x i64> undef) - %V4I64 = call <4 x i64> @llvm.ssub.sat.v4i64(<4 x i64> undef, <4 x i64> undef) - %V8I64 = call <8 x i64> @llvm.ssub.sat.v8i64(<8 x i64> undef, <8 x i64> undef) + %I64 = call i64 @llvm.ssub.sat.i64(i64 poison, i64 poison) + %V2I64 = call <2 x i64> @llvm.ssub.sat.v2i64(<2 x i64> poison, <2 x i64> poison) + %V4I64 = call <4 x i64> @llvm.ssub.sat.v4i64(<4 x i64> poison, <4 x i64> poison) + %V8I64 = call <8 x i64> @llvm.ssub.sat.v8i64(<8 x i64> poison, <8 x i64> poison) - %I32 = call i32 @llvm.ssub.sat.i32(i32 undef, i32 undef) - %V2I32 = call <2 x i32> @llvm.ssub.sat.v2i32(<2 x i32> undef, <2 x i32> undef) - %V4I32 = call <4 x i32> @llvm.ssub.sat.v4i32(<4 x i32> undef, <4 x i32> undef) - %V8I32 = call <8 x i32> @llvm.ssub.sat.v8i32(<8 x i32> undef, <8 x i32> undef) - %V16I32 = call <16 x i32> @llvm.ssub.sat.v16i32(<16 x i32> undef, <16 x i32> undef) + %I32 = call i32 @llvm.ssub.sat.i32(i32 poison, i32 poison) + %V2I32 = call <2 x i32> @llvm.ssub.sat.v2i32(<2 x i32> poison, <2 x i32> poison) + %V4I32 = call <4 x i32> @llvm.ssub.sat.v4i32(<4 x i32> poison, <4 x i32> poison) + %V8I32 = call <8 x i32> @llvm.ssub.sat.v8i32(<8 x i32> poison, <8 x i32> poison) + %V16I32 = call <16 x i32> @llvm.ssub.sat.v16i32(<16 x i32> poison, <16 x i32> poison) - %I16 = call i16 @llvm.ssub.sat.i16(i16 undef, i16 undef) - %V2I16 = call <2 x i16> @llvm.ssub.sat.v2i16(<2 x i16> undef, <2 x i16> undef) - %V4I16 = call <4 x i16> @llvm.ssub.sat.v4i16(<4 x i16> undef, <4 x i16> undef) - %V8I16 = call <8 x i16> @llvm.ssub.sat.v8i16(<8 x i16> undef, <8 x i16> undef) - %V16I16 = call <16 x i16> @llvm.ssub.sat.v16i16(<16 x i16> undef, <16 x i16> undef) - %V32I16 = call <32 x i16> @llvm.ssub.sat.v32i16(<32 x i16> undef, <32 x i16> undef) + %I16 = call i16 @llvm.ssub.sat.i16(i16 poison, i16 poison) + %V2I16 = call <2 x i16> @llvm.ssub.sat.v2i16(<2 x i16> poison, <2 x i16> poison) + %V4I16 = call <4 x i16> @llvm.ssub.sat.v4i16(<4 x i16> poison, <4 x i16> poison) + %V8I16 = call <8 x i16> @llvm.ssub.sat.v8i16(<8 x i16> poison, <8 x i16> poison) + %V16I16 = call <16 x i16> @llvm.ssub.sat.v16i16(<16 x i16> poison, <16 x i16> poison) + %V32I16 = call <32 x i16> @llvm.ssub.sat.v32i16(<32 x i16> poison, <32 x i16> poison) - %I8 = call i8 @llvm.ssub.sat.i8(i8 undef, i8 undef) - %V2I8 = call <2 x i8> @llvm.ssub.sat.v2i8(<2 x i8> undef, <2 x i8> undef) - %V4I8 = call <4 x i8> @llvm.ssub.sat.v4i8(<4 x i8> undef, <4 x i8> undef) - %V8I8 = call <8 x i8> @llvm.ssub.sat.v8i8(<8 x i8> undef, <8 x i8> undef) - %V16I8 = call <16 x i8> @llvm.ssub.sat.v16i8(<16 x i8> undef, <16 x i8> undef) - %V32I8 = call <32 x i8> @llvm.ssub.sat.v32i8(<32 x i8> undef, <32 x i8> undef) - %V64I8 = call <64 x i8> @llvm.ssub.sat.v64i8(<64 x i8> undef, <64 x i8> undef) + %I8 = call i8 @llvm.ssub.sat.i8(i8 poison, i8 poison) + %V2I8 = call <2 x i8> @llvm.ssub.sat.v2i8(<2 x i8> poison, <2 x i8> poison) + %V4I8 = call <4 x i8> @llvm.ssub.sat.v4i8(<4 x i8> poison, <4 x i8> poison) + %V8I8 = call <8 x i8> @llvm.ssub.sat.v8i8(<8 x i8> poison, <8 x i8> poison) + %V16I8 = call <16 x i8> @llvm.ssub.sat.v16i8(<16 x i8> poison, <16 x i8> poison) + %V32I8 = call <32 x i8> @llvm.ssub.sat.v32i8(<32 x i8> poison, <32 x i8> poison) + %V64I8 = call <64 x i8> @llvm.ssub.sat.v64i8(<64 x i8> poison, <64 x i8> poison) - ret i32 undef + ret i32 poison +} + +define i32 @sub_sve_vscale2(i32 %arg) vscale_range(2,2) { +; BASE-LABEL: 'sub_sve_vscale2' +; BASE-NEXT: Cost Model: Found costs of 1 for: %V2I64 = call <2 x i64> @llvm.ssub.sat.v2i64(<2 x i64> poison, <2 x i64> poison) +; BASE-NEXT: Cost Model: Found costs of 2 for: %V4I64 = call <4 x i64> @llvm.ssub.sat.v4i64(<4 x i64> poison, <4 x i64> poison) +; BASE-NEXT: Cost Model: Found costs of 4 for: %V8I64 = call <8 x i64> @llvm.ssub.sat.v8i64(<8 x i64> poison, <8 x i64> poison) +; BASE-NEXT: Cost Model: Found costs of 1 for: %V2I32 = call <2 x i32> @llvm.ssub.sat.v2i32(<2 x i32> poison, <2 x i32> poison) +; BASE-NEXT: Cost Model: Found costs of 1 for: %V4I32 = call <4 x i32> @llvm.ssub.sat.v4i32(<4 x i32> poison, <4 x i32> poison) +; BASE-NEXT: Cost Model: Found costs of 2 for: %V8I32 = call <8 x i32> @llvm.ssub.sat.v8i32(<8 x i32> poison, <8 x i32> poison) +; BASE-NEXT: Cost Model: Found costs of 4 for: %V16I32 = call <16 x i32> @llvm.ssub.sat.v16i32(<16 x i32> poison, <16 x i32> poison) +; BASE-NEXT: Cost Model: Found costs of 4 for: %V2I16 = call <2 x i16> @llvm.ssub.sat.v2i16(<2 x i16> poison, <2 x i16> poison) +; BASE-NEXT: Cost Model: Found costs of 1 for: %V4I16 = call <4 x i16> @llvm.ssub.sat.v4i16(<4 x i16> poison, <4 x i16> poison) +; BASE-NEXT: Cost Model: Found costs of 1 for: %V8I16 = call <8 x i16> @llvm.ssub.sat.v8i16(<8 x i16> poison, <8 x i16> poison) +; BASE-NEXT: Cost Model: Found costs of 2 for: %V16I16 = call <16 x i16> @llvm.ssub.sat.v16i16(<16 x i16> poison, <16 x i16> poison) +; BASE-NEXT: Cost Model: Found costs of 4 for: %V32I16 = call <32 x i16> @llvm.ssub.sat.v32i16(<32 x i16> poison, <32 x i16> poison) +; BASE-NEXT: Cost Model: Found costs of 4 for: %V2I8 = call <2 x i8> @llvm.ssub.sat.v2i8(<2 x i8> poison, <2 x i8> poison) +; BASE-NEXT: Cost Model: Found costs of 4 for: %V4I8 = call <4 x i8> @llvm.ssub.sat.v4i8(<4 x i8> poison, <4 x i8> poison) +; BASE-NEXT: Cost Model: Found costs of 1 for: %V8I8 = call <8 x i8> @llvm.ssub.sat.v8i8(<8 x i8> poison, <8 x i8> poison) +; BASE-NEXT: Cost Model: Found costs of 1 for: %V16I8 = call <16 x i8> @llvm.ssub.sat.v16i8(<16 x i8> poison, <16 x i8> poison) +; BASE-NEXT: Cost Model: Found costs of 2 for: %V32I8 = call <32 x i8> @llvm.ssub.sat.v32i8(<32 x i8> poison, <32 x i8> poison) +; BASE-NEXT: Cost Model: Found costs of 4 for: %V64I8 = call <64 x i8> @llvm.ssub.sat.v64i8(<64 x i8> poison, <64 x i8> poison) +; BASE-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret i32 poison +; +; SVE-LABEL: 'sub_sve_vscale2' +; SVE-NEXT: Cost Model: Found costs of 1 for: %V2I64 = call <2 x i64> @llvm.ssub.sat.v2i64(<2 x i64> poison, <2 x i64> poison) +; SVE-NEXT: Cost Model: Found costs of 1 for: %V4I64 = call <4 x i64> @llvm.ssub.sat.v4i64(<4 x i64> poison, <4 x i64> poison) +; SVE-NEXT: Cost Model: Found costs of 2 for: %V8I64 = call <8 x i64> @llvm.ssub.sat.v8i64(<8 x i64> poison, <8 x i64> poison) +; SVE-NEXT: Cost Model: Found costs of 1 for: %V2I32 = call <2 x i32> @llvm.ssub.sat.v2i32(<2 x i32> poison, <2 x i32> poison) +; SVE-NEXT: Cost Model: Found costs of 1 for: %V4I32 = call <4 x i32> @llvm.ssub.sat.v4i32(<4 x i32> poison, <4 x i32> poison) +; SVE-NEXT: Cost Model: Found costs of 1 for: %V8I32 = call <8 x i32> @llvm.ssub.sat.v8i32(<8 x i32> poison, <8 x i32> poison) +; SVE-NEXT: Cost Model: Found costs of 2 for: %V16I32 = call <16 x i32> @llvm.ssub.sat.v16i32(<16 x i32> poison, <16 x i32> poison) +; SVE-NEXT: Cost Model: Found costs of 4 for: %V2I16 = call <2 x i16> @llvm.ssub.sat.v2i16(<2 x i16> poison, <2 x i16> poison) +; SVE-NEXT: Cost Model: Found costs of 1 for: %V4I16 = call <4 x i16> @llvm.ssub.sat.v4i16(<4 x i16> poison, <4 x i16> poison) +; SVE-NEXT: Cost Model: Found costs of 1 for: %V8I16 = call <8 x i16> @llvm.ssub.sat.v8i16(<8 x i16> poison, <8 x i16> poison) +; SVE-NEXT: Cost Model: Found costs of 1 for: %V16I16 = call <16 x i16> @llvm.ssub.sat.v16i16(<16 x i16> poison, <16 x i16> poison) +; SVE-NEXT: Cost Model: Found costs of 2 for: %V32I16 = call <32 x i16> @llvm.ssub.sat.v32i16(<32 x i16> poison, <32 x i16> poison) +; SVE-NEXT: Cost Model: Found costs of 4 for: %V2I8 = call <2 x i8> @llvm.ssub.sat.v2i8(<2 x i8> poison, <2 x i8> poison) +; SVE-NEXT: Cost Model: Found costs of 4 for: %V4I8 = call <4 x i8> @llvm.ssub.sat.v4i8(<4 x i8> poison, <4 x i8> poison) +; SVE-NEXT: Cost Model: Found costs of 1 for: %V8I8 = call <8 x i8> @llvm.ssub.sat.v8i8(<8 x i8> poison, <8 x i8> poison) +; SVE-NEXT: Cost Model: Found costs of 1 for: %V16I8 = call <16 x i8> @llvm.ssub.sat.v16i8(<16 x i8> poison, <16 x i8> poison) +; SVE-NEXT: Cost Model: Found costs of 1 for: %V32I8 = call <32 x i8> @llvm.ssub.sat.v32i8(<32 x i8> poison, <32 x i8> poison) +; SVE-NEXT: Cost Model: Found costs of 2 for: %V64I8 = call <64 x i8> @llvm.ssub.sat.v64i8(<64 x i8> poison, <64 x i8> poison) +; SVE-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret i32 poison +; + %V2I64 = call <2 x i64> @llvm.ssub.sat.v2i64(<2 x i64> poison, <2 x i64> poison) + %V4I64 = call <4 x i64> @llvm.ssub.sat.v4i64(<4 x i64> poison, <4 x i64> poison) + %V8I64 = call <8 x i64> @llvm.ssub.sat.v8i64(<8 x i64> poison, <8 x i64> poison) + + %V2I32 = call <2 x i32> @llvm.ssub.sat.v2i32(<2 x i32> poison, <2 x i32> poison) + %V4I32 = call <4 x i32> @llvm.ssub.sat.v4i32(<4 x i32> poison, <4 x i32> poison) + %V8I32 = call <8 x i32> @llvm.ssub.sat.v8i32(<8 x i32> poison, <8 x i32> poison) + %V16I32 = call <16 x i32> @llvm.ssub.sat.v16i32(<16 x i32> poison, <16 x i32> poison) + + %V2I16 = call <2 x i16> @llvm.ssub.sat.v2i16(<2 x i16> poison, <2 x i16> poison) + %V4I16 = call <4 x i16> @llvm.ssub.sat.v4i16(<4 x i16> poison, <4 x i16> poison) + %V8I16 = call <8 x i16> @llvm.ssub.sat.v8i16(<8 x i16> poison, <8 x i16> poison) + %V16I16 = call <16 x i16> @llvm.ssub.sat.v16i16(<16 x i16> poison, <16 x i16> poison) + %V32I16 = call <32 x i16> @llvm.ssub.sat.v32i16(<32 x i16> poison, <32 x i16> poison) + + %V2I8 = call <2 x i8> @llvm.ssub.sat.v2i8(<2 x i8> poison, <2 x i8> poison) + %V4I8 = call <4 x i8> @llvm.ssub.sat.v4i8(<4 x i8> poison, <4 x i8> poison) + %V8I8 = call <8 x i8> @llvm.ssub.sat.v8i8(<8 x i8> poison, <8 x i8> poison) + %V16I8 = call <16 x i8> @llvm.ssub.sat.v16i8(<16 x i8> poison, <16 x i8> poison) + %V32I8 = call <32 x i8> @llvm.ssub.sat.v32i8(<32 x i8> poison, <32 x i8> poison) + %V64I8 = call <64 x i8> @llvm.ssub.sat.v64i8(<64 x i8> poison, <64 x i8> poison) + + ret i32 poison } diff --git a/llvm/test/Analysis/CostModel/AArch64/arith-usat.ll b/llvm/test/Analysis/CostModel/AArch64/arith-usat.ll index dba42b1e6ebb..032a4480d11e 100644 --- a/llvm/test/Analysis/CostModel/AArch64/arith-usat.ll +++ b/llvm/test/Analysis/CostModel/AArch64/arith-usat.ll @@ -1,5 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py -; RUN: opt -passes="print" 2>&1 -disable-output -cost-kind=all -mtriple=aarch64 < %s | FileCheck %s +; RUN: opt -passes="print" 2>&1 -disable-output -cost-kind=all -mtriple=aarch64 < %s | FileCheck %s --check-prefixes=COMMON,BASE +; RUN: opt -passes="print" 2>&1 -disable-output -cost-kind=all -mtriple=aarch64--linux-gnu -mattr=+sve < %s | FileCheck %s --check-prefixes=COMMON,SVE target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128" @@ -30,58 +31,126 @@ declare <32 x i8> @llvm.uadd.sat.v32i8(<32 x i8>, <32 x i8>) declare <64 x i8> @llvm.uadd.sat.v64i8(<64 x i8>, <64 x i8>) define i32 @add(i32 %arg) { -; CHECK-LABEL: 'add' -; CHECK-NEXT: Cost Model: Found costs of 2 for: %I64 = call i64 @llvm.uadd.sat.i64(i64 undef, i64 undef) -; CHECK-NEXT: Cost Model: Found costs of 1 for: %V2I64 = call <2 x i64> @llvm.uadd.sat.v2i64(<2 x i64> undef, <2 x i64> undef) -; CHECK-NEXT: Cost Model: Found costs of 2 for: %V4I64 = call <4 x i64> @llvm.uadd.sat.v4i64(<4 x i64> undef, <4 x i64> undef) -; CHECK-NEXT: Cost Model: Found costs of 4 for: %V8I64 = call <8 x i64> @llvm.uadd.sat.v8i64(<8 x i64> undef, <8 x i64> undef) -; CHECK-NEXT: Cost Model: Found costs of 2 for: %I32 = call i32 @llvm.uadd.sat.i32(i32 undef, i32 undef) -; CHECK-NEXT: Cost Model: Found costs of 1 for: %V2I32 = call <2 x i32> @llvm.uadd.sat.v2i32(<2 x i32> undef, <2 x i32> undef) -; CHECK-NEXT: Cost Model: Found costs of 1 for: %V4I32 = call <4 x i32> @llvm.uadd.sat.v4i32(<4 x i32> undef, <4 x i32> undef) -; CHECK-NEXT: Cost Model: Found costs of 2 for: %V8I32 = call <8 x i32> @llvm.uadd.sat.v8i32(<8 x i32> undef, <8 x i32> undef) -; CHECK-NEXT: Cost Model: Found costs of 4 for: %V16I32 = call <16 x i32> @llvm.uadd.sat.v16i32(<16 x i32> undef, <16 x i32> undef) -; CHECK-NEXT: Cost Model: Found costs of 4 for: %I16 = call i16 @llvm.uadd.sat.i16(i16 undef, i16 undef) -; CHECK-NEXT: Cost Model: Found costs of 4 for: %V2I16 = call <2 x i16> @llvm.uadd.sat.v2i16(<2 x i16> undef, <2 x i16> undef) -; CHECK-NEXT: Cost Model: Found costs of 1 for: %V4I16 = call <4 x i16> @llvm.uadd.sat.v4i16(<4 x i16> undef, <4 x i16> undef) -; CHECK-NEXT: Cost Model: Found costs of 1 for: %V8I16 = call <8 x i16> @llvm.uadd.sat.v8i16(<8 x i16> undef, <8 x i16> undef) -; CHECK-NEXT: Cost Model: Found costs of 2 for: %V16I16 = call <16 x i16> @llvm.uadd.sat.v16i16(<16 x i16> undef, <16 x i16> undef) -; CHECK-NEXT: Cost Model: Found costs of 4 for: %V32I16 = call <32 x i16> @llvm.uadd.sat.v32i16(<32 x i16> undef, <32 x i16> undef) -; CHECK-NEXT: Cost Model: Found costs of 4 for: %I8 = call i8 @llvm.uadd.sat.i8(i8 undef, i8 undef) -; CHECK-NEXT: Cost Model: Found costs of 4 for: %V2I8 = call <2 x i8> @llvm.uadd.sat.v2i8(<2 x i8> undef, <2 x i8> undef) -; CHECK-NEXT: Cost Model: Found costs of 4 for: %V4I8 = call <4 x i8> @llvm.uadd.sat.v4i8(<4 x i8> undef, <4 x i8> undef) -; CHECK-NEXT: Cost Model: Found costs of 1 for: %V8I8 = call <8 x i8> @llvm.uadd.sat.v8i8(<8 x i8> undef, <8 x i8> undef) -; CHECK-NEXT: Cost Model: Found costs of 1 for: %V16I8 = call <16 x i8> @llvm.uadd.sat.v16i8(<16 x i8> undef, <16 x i8> undef) -; CHECK-NEXT: Cost Model: Found costs of 2 for: %V32I8 = call <32 x i8> @llvm.uadd.sat.v32i8(<32 x i8> undef, <32 x i8> undef) -; CHECK-NEXT: Cost Model: Found costs of 4 for: %V64I8 = call <64 x i8> @llvm.uadd.sat.v64i8(<64 x i8> undef, <64 x i8> undef) -; CHECK-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret i32 undef +; COMMON-LABEL: 'add' +; COMMON-NEXT: Cost Model: Found costs of 2 for: %I64 = call i64 @llvm.uadd.sat.i64(i64 poison, i64 poison) +; COMMON-NEXT: Cost Model: Found costs of 1 for: %V2I64 = call <2 x i64> @llvm.uadd.sat.v2i64(<2 x i64> poison, <2 x i64> poison) +; COMMON-NEXT: Cost Model: Found costs of 2 for: %V4I64 = call <4 x i64> @llvm.uadd.sat.v4i64(<4 x i64> poison, <4 x i64> poison) +; COMMON-NEXT: Cost Model: Found costs of 4 for: %V8I64 = call <8 x i64> @llvm.uadd.sat.v8i64(<8 x i64> poison, <8 x i64> poison) +; COMMON-NEXT: Cost Model: Found costs of 2 for: %I32 = call i32 @llvm.uadd.sat.i32(i32 poison, i32 poison) +; COMMON-NEXT: Cost Model: Found costs of 1 for: %V2I32 = call <2 x i32> @llvm.uadd.sat.v2i32(<2 x i32> poison, <2 x i32> poison) +; COMMON-NEXT: Cost Model: Found costs of 1 for: %V4I32 = call <4 x i32> @llvm.uadd.sat.v4i32(<4 x i32> poison, <4 x i32> poison) +; COMMON-NEXT: Cost Model: Found costs of 2 for: %V8I32 = call <8 x i32> @llvm.uadd.sat.v8i32(<8 x i32> poison, <8 x i32> poison) +; COMMON-NEXT: Cost Model: Found costs of 4 for: %V16I32 = call <16 x i32> @llvm.uadd.sat.v16i32(<16 x i32> poison, <16 x i32> poison) +; COMMON-NEXT: Cost Model: Found costs of 4 for: %I16 = call i16 @llvm.uadd.sat.i16(i16 poison, i16 poison) +; COMMON-NEXT: Cost Model: Found costs of 4 for: %V2I16 = call <2 x i16> @llvm.uadd.sat.v2i16(<2 x i16> poison, <2 x i16> poison) +; COMMON-NEXT: Cost Model: Found costs of 1 for: %V4I16 = call <4 x i16> @llvm.uadd.sat.v4i16(<4 x i16> poison, <4 x i16> poison) +; COMMON-NEXT: Cost Model: Found costs of 1 for: %V8I16 = call <8 x i16> @llvm.uadd.sat.v8i16(<8 x i16> poison, <8 x i16> poison) +; COMMON-NEXT: Cost Model: Found costs of 2 for: %V16I16 = call <16 x i16> @llvm.uadd.sat.v16i16(<16 x i16> poison, <16 x i16> poison) +; COMMON-NEXT: Cost Model: Found costs of 4 for: %V32I16 = call <32 x i16> @llvm.uadd.sat.v32i16(<32 x i16> poison, <32 x i16> poison) +; COMMON-NEXT: Cost Model: Found costs of 4 for: %I8 = call i8 @llvm.uadd.sat.i8(i8 poison, i8 poison) +; COMMON-NEXT: Cost Model: Found costs of 4 for: %V2I8 = call <2 x i8> @llvm.uadd.sat.v2i8(<2 x i8> poison, <2 x i8> poison) +; COMMON-NEXT: Cost Model: Found costs of 4 for: %V4I8 = call <4 x i8> @llvm.uadd.sat.v4i8(<4 x i8> poison, <4 x i8> poison) +; COMMON-NEXT: Cost Model: Found costs of 1 for: %V8I8 = call <8 x i8> @llvm.uadd.sat.v8i8(<8 x i8> poison, <8 x i8> poison) +; COMMON-NEXT: Cost Model: Found costs of 1 for: %V16I8 = call <16 x i8> @llvm.uadd.sat.v16i8(<16 x i8> poison, <16 x i8> poison) +; COMMON-NEXT: Cost Model: Found costs of 2 for: %V32I8 = call <32 x i8> @llvm.uadd.sat.v32i8(<32 x i8> poison, <32 x i8> poison) +; COMMON-NEXT: Cost Model: Found costs of 4 for: %V64I8 = call <64 x i8> @llvm.uadd.sat.v64i8(<64 x i8> poison, <64 x i8> poison) +; COMMON-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret i32 poison ; - %I64 = call i64 @llvm.uadd.sat.i64(i64 undef, i64 undef) - %V2I64 = call <2 x i64> @llvm.uadd.sat.v2i64(<2 x i64> undef, <2 x i64> undef) - %V4I64 = call <4 x i64> @llvm.uadd.sat.v4i64(<4 x i64> undef, <4 x i64> undef) - %V8I64 = call <8 x i64> @llvm.uadd.sat.v8i64(<8 x i64> undef, <8 x i64> undef) + %I64 = call i64 @llvm.uadd.sat.i64(i64 poison, i64 poison) + %V2I64 = call <2 x i64> @llvm.uadd.sat.v2i64(<2 x i64> poison, <2 x i64> poison) + %V4I64 = call <4 x i64> @llvm.uadd.sat.v4i64(<4 x i64> poison, <4 x i64> poison) + %V8I64 = call <8 x i64> @llvm.uadd.sat.v8i64(<8 x i64> poison, <8 x i64> poison) - %I32 = call i32 @llvm.uadd.sat.i32(i32 undef, i32 undef) - %V2I32 = call <2 x i32> @llvm.uadd.sat.v2i32(<2 x i32> undef, <2 x i32> undef) - %V4I32 = call <4 x i32> @llvm.uadd.sat.v4i32(<4 x i32> undef, <4 x i32> undef) - %V8I32 = call <8 x i32> @llvm.uadd.sat.v8i32(<8 x i32> undef, <8 x i32> undef) - %V16I32 = call <16 x i32> @llvm.uadd.sat.v16i32(<16 x i32> undef, <16 x i32> undef) + %I32 = call i32 @llvm.uadd.sat.i32(i32 poison, i32 poison) + %V2I32 = call <2 x i32> @llvm.uadd.sat.v2i32(<2 x i32> poison, <2 x i32> poison) + %V4I32 = call <4 x i32> @llvm.uadd.sat.v4i32(<4 x i32> poison, <4 x i32> poison) + %V8I32 = call <8 x i32> @llvm.uadd.sat.v8i32(<8 x i32> poison, <8 x i32> poison) + %V16I32 = call <16 x i32> @llvm.uadd.sat.v16i32(<16 x i32> poison, <16 x i32> poison) - %I16 = call i16 @llvm.uadd.sat.i16(i16 undef, i16 undef) - %V2I16 = call <2 x i16> @llvm.uadd.sat.v2i16(<2 x i16> undef, <2 x i16> undef) - %V4I16 = call <4 x i16> @llvm.uadd.sat.v4i16(<4 x i16> undef, <4 x i16> undef) - %V8I16 = call <8 x i16> @llvm.uadd.sat.v8i16(<8 x i16> undef, <8 x i16> undef) - %V16I16 = call <16 x i16> @llvm.uadd.sat.v16i16(<16 x i16> undef, <16 x i16> undef) - %V32I16 = call <32 x i16> @llvm.uadd.sat.v32i16(<32 x i16> undef, <32 x i16> undef) + %I16 = call i16 @llvm.uadd.sat.i16(i16 poison, i16 poison) + %V2I16 = call <2 x i16> @llvm.uadd.sat.v2i16(<2 x i16> poison, <2 x i16> poison) + %V4I16 = call <4 x i16> @llvm.uadd.sat.v4i16(<4 x i16> poison, <4 x i16> poison) + %V8I16 = call <8 x i16> @llvm.uadd.sat.v8i16(<8 x i16> poison, <8 x i16> poison) + %V16I16 = call <16 x i16> @llvm.uadd.sat.v16i16(<16 x i16> poison, <16 x i16> poison) + %V32I16 = call <32 x i16> @llvm.uadd.sat.v32i16(<32 x i16> poison, <32 x i16> poison) - %I8 = call i8 @llvm.uadd.sat.i8(i8 undef, i8 undef) - %V2I8 = call <2 x i8> @llvm.uadd.sat.v2i8(<2 x i8> undef, <2 x i8> undef) - %V4I8 = call <4 x i8> @llvm.uadd.sat.v4i8(<4 x i8> undef, <4 x i8> undef) - %V8I8 = call <8 x i8> @llvm.uadd.sat.v8i8(<8 x i8> undef, <8 x i8> undef) - %V16I8 = call <16 x i8> @llvm.uadd.sat.v16i8(<16 x i8> undef, <16 x i8> undef) - %V32I8 = call <32 x i8> @llvm.uadd.sat.v32i8(<32 x i8> undef, <32 x i8> undef) - %V64I8 = call <64 x i8> @llvm.uadd.sat.v64i8(<64 x i8> undef, <64 x i8> undef) + %I8 = call i8 @llvm.uadd.sat.i8(i8 poison, i8 poison) + %V2I8 = call <2 x i8> @llvm.uadd.sat.v2i8(<2 x i8> poison, <2 x i8> poison) + %V4I8 = call <4 x i8> @llvm.uadd.sat.v4i8(<4 x i8> poison, <4 x i8> poison) + %V8I8 = call <8 x i8> @llvm.uadd.sat.v8i8(<8 x i8> poison, <8 x i8> poison) + %V16I8 = call <16 x i8> @llvm.uadd.sat.v16i8(<16 x i8> poison, <16 x i8> poison) + %V32I8 = call <32 x i8> @llvm.uadd.sat.v32i8(<32 x i8> poison, <32 x i8> poison) + %V64I8 = call <64 x i8> @llvm.uadd.sat.v64i8(<64 x i8> poison, <64 x i8> poison) - ret i32 undef + ret i32 poison +} + +define i32 @add_sve_vscale2(i32 %arg) vscale_range(2,2) { +; BASE-LABEL: 'add_sve_vscale2' +; BASE-NEXT: Cost Model: Found costs of 1 for: %V2I64 = call <2 x i64> @llvm.uadd.sat.v2i64(<2 x i64> poison, <2 x i64> poison) +; BASE-NEXT: Cost Model: Found costs of 2 for: %V4I64 = call <4 x i64> @llvm.uadd.sat.v4i64(<4 x i64> poison, <4 x i64> poison) +; BASE-NEXT: Cost Model: Found costs of 4 for: %V8I64 = call <8 x i64> @llvm.uadd.sat.v8i64(<8 x i64> poison, <8 x i64> poison) +; BASE-NEXT: Cost Model: Found costs of 1 for: %V2I32 = call <2 x i32> @llvm.uadd.sat.v2i32(<2 x i32> poison, <2 x i32> poison) +; BASE-NEXT: Cost Model: Found costs of 1 for: %V4I32 = call <4 x i32> @llvm.uadd.sat.v4i32(<4 x i32> poison, <4 x i32> poison) +; BASE-NEXT: Cost Model: Found costs of 2 for: %V8I32 = call <8 x i32> @llvm.uadd.sat.v8i32(<8 x i32> poison, <8 x i32> poison) +; BASE-NEXT: Cost Model: Found costs of 4 for: %V16I32 = call <16 x i32> @llvm.uadd.sat.v16i32(<16 x i32> poison, <16 x i32> poison) +; BASE-NEXT: Cost Model: Found costs of 4 for: %V2I16 = call <2 x i16> @llvm.uadd.sat.v2i16(<2 x i16> poison, <2 x i16> poison) +; BASE-NEXT: Cost Model: Found costs of 1 for: %V4I16 = call <4 x i16> @llvm.uadd.sat.v4i16(<4 x i16> poison, <4 x i16> poison) +; BASE-NEXT: Cost Model: Found costs of 1 for: %V8I16 = call <8 x i16> @llvm.uadd.sat.v8i16(<8 x i16> poison, <8 x i16> poison) +; BASE-NEXT: Cost Model: Found costs of 2 for: %V16I16 = call <16 x i16> @llvm.uadd.sat.v16i16(<16 x i16> poison, <16 x i16> poison) +; BASE-NEXT: Cost Model: Found costs of 4 for: %V32I16 = call <32 x i16> @llvm.uadd.sat.v32i16(<32 x i16> poison, <32 x i16> poison) +; BASE-NEXT: Cost Model: Found costs of 4 for: %V2I8 = call <2 x i8> @llvm.uadd.sat.v2i8(<2 x i8> poison, <2 x i8> poison) +; BASE-NEXT: Cost Model: Found costs of 4 for: %V4I8 = call <4 x i8> @llvm.uadd.sat.v4i8(<4 x i8> poison, <4 x i8> poison) +; BASE-NEXT: Cost Model: Found costs of 1 for: %V8I8 = call <8 x i8> @llvm.uadd.sat.v8i8(<8 x i8> poison, <8 x i8> poison) +; BASE-NEXT: Cost Model: Found costs of 1 for: %V16I8 = call <16 x i8> @llvm.uadd.sat.v16i8(<16 x i8> poison, <16 x i8> poison) +; BASE-NEXT: Cost Model: Found costs of 2 for: %V32I8 = call <32 x i8> @llvm.uadd.sat.v32i8(<32 x i8> poison, <32 x i8> poison) +; BASE-NEXT: Cost Model: Found costs of 4 for: %V64I8 = call <64 x i8> @llvm.uadd.sat.v64i8(<64 x i8> poison, <64 x i8> poison) +; BASE-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret i32 poison +; +; SVE-LABEL: 'add_sve_vscale2' +; SVE-NEXT: Cost Model: Found costs of 1 for: %V2I64 = call <2 x i64> @llvm.uadd.sat.v2i64(<2 x i64> poison, <2 x i64> poison) +; SVE-NEXT: Cost Model: Found costs of 1 for: %V4I64 = call <4 x i64> @llvm.uadd.sat.v4i64(<4 x i64> poison, <4 x i64> poison) +; SVE-NEXT: Cost Model: Found costs of 2 for: %V8I64 = call <8 x i64> @llvm.uadd.sat.v8i64(<8 x i64> poison, <8 x i64> poison) +; SVE-NEXT: Cost Model: Found costs of 1 for: %V2I32 = call <2 x i32> @llvm.uadd.sat.v2i32(<2 x i32> poison, <2 x i32> poison) +; SVE-NEXT: Cost Model: Found costs of 1 for: %V4I32 = call <4 x i32> @llvm.uadd.sat.v4i32(<4 x i32> poison, <4 x i32> poison) +; SVE-NEXT: Cost Model: Found costs of 1 for: %V8I32 = call <8 x i32> @llvm.uadd.sat.v8i32(<8 x i32> poison, <8 x i32> poison) +; SVE-NEXT: Cost Model: Found costs of 2 for: %V16I32 = call <16 x i32> @llvm.uadd.sat.v16i32(<16 x i32> poison, <16 x i32> poison) +; SVE-NEXT: Cost Model: Found costs of 4 for: %V2I16 = call <2 x i16> @llvm.uadd.sat.v2i16(<2 x i16> poison, <2 x i16> poison) +; SVE-NEXT: Cost Model: Found costs of 1 for: %V4I16 = call <4 x i16> @llvm.uadd.sat.v4i16(<4 x i16> poison, <4 x i16> poison) +; SVE-NEXT: Cost Model: Found costs of 1 for: %V8I16 = call <8 x i16> @llvm.uadd.sat.v8i16(<8 x i16> poison, <8 x i16> poison) +; SVE-NEXT: Cost Model: Found costs of 1 for: %V16I16 = call <16 x i16> @llvm.uadd.sat.v16i16(<16 x i16> poison, <16 x i16> poison) +; SVE-NEXT: Cost Model: Found costs of 2 for: %V32I16 = call <32 x i16> @llvm.uadd.sat.v32i16(<32 x i16> poison, <32 x i16> poison) +; SVE-NEXT: Cost Model: Found costs of 4 for: %V2I8 = call <2 x i8> @llvm.uadd.sat.v2i8(<2 x i8> poison, <2 x i8> poison) +; SVE-NEXT: Cost Model: Found costs of 4 for: %V4I8 = call <4 x i8> @llvm.uadd.sat.v4i8(<4 x i8> poison, <4 x i8> poison) +; SVE-NEXT: Cost Model: Found costs of 1 for: %V8I8 = call <8 x i8> @llvm.uadd.sat.v8i8(<8 x i8> poison, <8 x i8> poison) +; SVE-NEXT: Cost Model: Found costs of 1 for: %V16I8 = call <16 x i8> @llvm.uadd.sat.v16i8(<16 x i8> poison, <16 x i8> poison) +; SVE-NEXT: Cost Model: Found costs of 1 for: %V32I8 = call <32 x i8> @llvm.uadd.sat.v32i8(<32 x i8> poison, <32 x i8> poison) +; SVE-NEXT: Cost Model: Found costs of 2 for: %V64I8 = call <64 x i8> @llvm.uadd.sat.v64i8(<64 x i8> poison, <64 x i8> poison) +; SVE-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret i32 poison +; + %V2I64 = call <2 x i64> @llvm.uadd.sat.v2i64(<2 x i64> poison, <2 x i64> poison) + %V4I64 = call <4 x i64> @llvm.uadd.sat.v4i64(<4 x i64> poison, <4 x i64> poison) + %V8I64 = call <8 x i64> @llvm.uadd.sat.v8i64(<8 x i64> poison, <8 x i64> poison) + + %V2I32 = call <2 x i32> @llvm.uadd.sat.v2i32(<2 x i32> poison, <2 x i32> poison) + %V4I32 = call <4 x i32> @llvm.uadd.sat.v4i32(<4 x i32> poison, <4 x i32> poison) + %V8I32 = call <8 x i32> @llvm.uadd.sat.v8i32(<8 x i32> poison, <8 x i32> poison) + %V16I32 = call <16 x i32> @llvm.uadd.sat.v16i32(<16 x i32> poison, <16 x i32> poison) + + %V2I16 = call <2 x i16> @llvm.uadd.sat.v2i16(<2 x i16> poison, <2 x i16> poison) + %V4I16 = call <4 x i16> @llvm.uadd.sat.v4i16(<4 x i16> poison, <4 x i16> poison) + %V8I16 = call <8 x i16> @llvm.uadd.sat.v8i16(<8 x i16> poison, <8 x i16> poison) + %V16I16 = call <16 x i16> @llvm.uadd.sat.v16i16(<16 x i16> poison, <16 x i16> poison) + %V32I16 = call <32 x i16> @llvm.uadd.sat.v32i16(<32 x i16> poison, <32 x i16> poison) + + %V2I8 = call <2 x i8> @llvm.uadd.sat.v2i8(<2 x i8> poison, <2 x i8> poison) + %V4I8 = call <4 x i8> @llvm.uadd.sat.v4i8(<4 x i8> poison, <4 x i8> poison) + %V8I8 = call <8 x i8> @llvm.uadd.sat.v8i8(<8 x i8> poison, <8 x i8> poison) + %V16I8 = call <16 x i8> @llvm.uadd.sat.v16i8(<16 x i8> poison, <16 x i8> poison) + %V32I8 = call <32 x i8> @llvm.uadd.sat.v32i8(<32 x i8> poison, <32 x i8> poison) + %V64I8 = call <64 x i8> @llvm.uadd.sat.v64i8(<64 x i8> poison, <64 x i8> poison) + + ret i32 poison } declare i64 @llvm.usub.sat.i64(i64, i64) @@ -111,56 +180,124 @@ declare <32 x i8> @llvm.usub.sat.v32i8(<32 x i8>, <32 x i8>) declare <64 x i8> @llvm.usub.sat.v64i8(<64 x i8>, <64 x i8>) define i32 @sub(i32 %arg) { -; CHECK-LABEL: 'sub' -; CHECK-NEXT: Cost Model: Found costs of 2 for: %I64 = call i64 @llvm.usub.sat.i64(i64 undef, i64 undef) -; CHECK-NEXT: Cost Model: Found costs of 1 for: %V2I64 = call <2 x i64> @llvm.usub.sat.v2i64(<2 x i64> undef, <2 x i64> undef) -; CHECK-NEXT: Cost Model: Found costs of 2 for: %V4I64 = call <4 x i64> @llvm.usub.sat.v4i64(<4 x i64> undef, <4 x i64> undef) -; CHECK-NEXT: Cost Model: Found costs of 4 for: %V8I64 = call <8 x i64> @llvm.usub.sat.v8i64(<8 x i64> undef, <8 x i64> undef) -; CHECK-NEXT: Cost Model: Found costs of 2 for: %I32 = call i32 @llvm.usub.sat.i32(i32 undef, i32 undef) -; CHECK-NEXT: Cost Model: Found costs of 1 for: %V2I32 = call <2 x i32> @llvm.usub.sat.v2i32(<2 x i32> undef, <2 x i32> undef) -; CHECK-NEXT: Cost Model: Found costs of 1 for: %V4I32 = call <4 x i32> @llvm.usub.sat.v4i32(<4 x i32> undef, <4 x i32> undef) -; CHECK-NEXT: Cost Model: Found costs of 2 for: %V8I32 = call <8 x i32> @llvm.usub.sat.v8i32(<8 x i32> undef, <8 x i32> undef) -; CHECK-NEXT: Cost Model: Found costs of 4 for: %V16I32 = call <16 x i32> @llvm.usub.sat.v16i32(<16 x i32> undef, <16 x i32> undef) -; CHECK-NEXT: Cost Model: Found costs of 4 for: %I16 = call i16 @llvm.usub.sat.i16(i16 undef, i16 undef) -; CHECK-NEXT: Cost Model: Found costs of 4 for: %V2I16 = call <2 x i16> @llvm.usub.sat.v2i16(<2 x i16> undef, <2 x i16> undef) -; CHECK-NEXT: Cost Model: Found costs of 1 for: %V4I16 = call <4 x i16> @llvm.usub.sat.v4i16(<4 x i16> undef, <4 x i16> undef) -; CHECK-NEXT: Cost Model: Found costs of 1 for: %V8I16 = call <8 x i16> @llvm.usub.sat.v8i16(<8 x i16> undef, <8 x i16> undef) -; CHECK-NEXT: Cost Model: Found costs of 2 for: %V16I16 = call <16 x i16> @llvm.usub.sat.v16i16(<16 x i16> undef, <16 x i16> undef) -; CHECK-NEXT: Cost Model: Found costs of 4 for: %V32I16 = call <32 x i16> @llvm.usub.sat.v32i16(<32 x i16> undef, <32 x i16> undef) -; CHECK-NEXT: Cost Model: Found costs of 4 for: %I8 = call i8 @llvm.usub.sat.i8(i8 undef, i8 undef) -; CHECK-NEXT: Cost Model: Found costs of 4 for: %V2I8 = call <2 x i8> @llvm.usub.sat.v2i8(<2 x i8> undef, <2 x i8> undef) -; CHECK-NEXT: Cost Model: Found costs of 4 for: %V4I8 = call <4 x i8> @llvm.usub.sat.v4i8(<4 x i8> undef, <4 x i8> undef) -; CHECK-NEXT: Cost Model: Found costs of 1 for: %V8I8 = call <8 x i8> @llvm.usub.sat.v8i8(<8 x i8> undef, <8 x i8> undef) -; CHECK-NEXT: Cost Model: Found costs of 1 for: %V16I8 = call <16 x i8> @llvm.usub.sat.v16i8(<16 x i8> undef, <16 x i8> undef) -; CHECK-NEXT: Cost Model: Found costs of 2 for: %V32I8 = call <32 x i8> @llvm.usub.sat.v32i8(<32 x i8> undef, <32 x i8> undef) -; CHECK-NEXT: Cost Model: Found costs of 4 for: %V64I8 = call <64 x i8> @llvm.usub.sat.v64i8(<64 x i8> undef, <64 x i8> undef) -; CHECK-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret i32 undef +; COMMON-LABEL: 'sub' +; COMMON-NEXT: Cost Model: Found costs of 2 for: %I64 = call i64 @llvm.usub.sat.i64(i64 poison, i64 poison) +; COMMON-NEXT: Cost Model: Found costs of 1 for: %V2I64 = call <2 x i64> @llvm.usub.sat.v2i64(<2 x i64> poison, <2 x i64> poison) +; COMMON-NEXT: Cost Model: Found costs of 2 for: %V4I64 = call <4 x i64> @llvm.usub.sat.v4i64(<4 x i64> poison, <4 x i64> poison) +; COMMON-NEXT: Cost Model: Found costs of 4 for: %V8I64 = call <8 x i64> @llvm.usub.sat.v8i64(<8 x i64> poison, <8 x i64> poison) +; COMMON-NEXT: Cost Model: Found costs of 2 for: %I32 = call i32 @llvm.usub.sat.i32(i32 poison, i32 poison) +; COMMON-NEXT: Cost Model: Found costs of 1 for: %V2I32 = call <2 x i32> @llvm.usub.sat.v2i32(<2 x i32> poison, <2 x i32> poison) +; COMMON-NEXT: Cost Model: Found costs of 1 for: %V4I32 = call <4 x i32> @llvm.usub.sat.v4i32(<4 x i32> poison, <4 x i32> poison) +; COMMON-NEXT: Cost Model: Found costs of 2 for: %V8I32 = call <8 x i32> @llvm.usub.sat.v8i32(<8 x i32> poison, <8 x i32> poison) +; COMMON-NEXT: Cost Model: Found costs of 4 for: %V16I32 = call <16 x i32> @llvm.usub.sat.v16i32(<16 x i32> poison, <16 x i32> poison) +; COMMON-NEXT: Cost Model: Found costs of 4 for: %I16 = call i16 @llvm.usub.sat.i16(i16 poison, i16 poison) +; COMMON-NEXT: Cost Model: Found costs of 4 for: %V2I16 = call <2 x i16> @llvm.usub.sat.v2i16(<2 x i16> poison, <2 x i16> poison) +; COMMON-NEXT: Cost Model: Found costs of 1 for: %V4I16 = call <4 x i16> @llvm.usub.sat.v4i16(<4 x i16> poison, <4 x i16> poison) +; COMMON-NEXT: Cost Model: Found costs of 1 for: %V8I16 = call <8 x i16> @llvm.usub.sat.v8i16(<8 x i16> poison, <8 x i16> poison) +; COMMON-NEXT: Cost Model: Found costs of 2 for: %V16I16 = call <16 x i16> @llvm.usub.sat.v16i16(<16 x i16> poison, <16 x i16> poison) +; COMMON-NEXT: Cost Model: Found costs of 4 for: %V32I16 = call <32 x i16> @llvm.usub.sat.v32i16(<32 x i16> poison, <32 x i16> poison) +; COMMON-NEXT: Cost Model: Found costs of 4 for: %I8 = call i8 @llvm.usub.sat.i8(i8 poison, i8 poison) +; COMMON-NEXT: Cost Model: Found costs of 4 for: %V2I8 = call <2 x i8> @llvm.usub.sat.v2i8(<2 x i8> poison, <2 x i8> poison) +; COMMON-NEXT: Cost Model: Found costs of 4 for: %V4I8 = call <4 x i8> @llvm.usub.sat.v4i8(<4 x i8> poison, <4 x i8> poison) +; COMMON-NEXT: Cost Model: Found costs of 1 for: %V8I8 = call <8 x i8> @llvm.usub.sat.v8i8(<8 x i8> poison, <8 x i8> poison) +; COMMON-NEXT: Cost Model: Found costs of 1 for: %V16I8 = call <16 x i8> @llvm.usub.sat.v16i8(<16 x i8> poison, <16 x i8> poison) +; COMMON-NEXT: Cost Model: Found costs of 2 for: %V32I8 = call <32 x i8> @llvm.usub.sat.v32i8(<32 x i8> poison, <32 x i8> poison) +; COMMON-NEXT: Cost Model: Found costs of 4 for: %V64I8 = call <64 x i8> @llvm.usub.sat.v64i8(<64 x i8> poison, <64 x i8> poison) +; COMMON-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret i32 poison ; - %I64 = call i64 @llvm.usub.sat.i64(i64 undef, i64 undef) - %V2I64 = call <2 x i64> @llvm.usub.sat.v2i64(<2 x i64> undef, <2 x i64> undef) - %V4I64 = call <4 x i64> @llvm.usub.sat.v4i64(<4 x i64> undef, <4 x i64> undef) - %V8I64 = call <8 x i64> @llvm.usub.sat.v8i64(<8 x i64> undef, <8 x i64> undef) + %I64 = call i64 @llvm.usub.sat.i64(i64 poison, i64 poison) + %V2I64 = call <2 x i64> @llvm.usub.sat.v2i64(<2 x i64> poison, <2 x i64> poison) + %V4I64 = call <4 x i64> @llvm.usub.sat.v4i64(<4 x i64> poison, <4 x i64> poison) + %V8I64 = call <8 x i64> @llvm.usub.sat.v8i64(<8 x i64> poison, <8 x i64> poison) - %I32 = call i32 @llvm.usub.sat.i32(i32 undef, i32 undef) - %V2I32 = call <2 x i32> @llvm.usub.sat.v2i32(<2 x i32> undef, <2 x i32> undef) - %V4I32 = call <4 x i32> @llvm.usub.sat.v4i32(<4 x i32> undef, <4 x i32> undef) - %V8I32 = call <8 x i32> @llvm.usub.sat.v8i32(<8 x i32> undef, <8 x i32> undef) - %V16I32 = call <16 x i32> @llvm.usub.sat.v16i32(<16 x i32> undef, <16 x i32> undef) + %I32 = call i32 @llvm.usub.sat.i32(i32 poison, i32 poison) + %V2I32 = call <2 x i32> @llvm.usub.sat.v2i32(<2 x i32> poison, <2 x i32> poison) + %V4I32 = call <4 x i32> @llvm.usub.sat.v4i32(<4 x i32> poison, <4 x i32> poison) + %V8I32 = call <8 x i32> @llvm.usub.sat.v8i32(<8 x i32> poison, <8 x i32> poison) + %V16I32 = call <16 x i32> @llvm.usub.sat.v16i32(<16 x i32> poison, <16 x i32> poison) - %I16 = call i16 @llvm.usub.sat.i16(i16 undef, i16 undef) - %V2I16 = call <2 x i16> @llvm.usub.sat.v2i16(<2 x i16> undef, <2 x i16> undef) - %V4I16 = call <4 x i16> @llvm.usub.sat.v4i16(<4 x i16> undef, <4 x i16> undef) - %V8I16 = call <8 x i16> @llvm.usub.sat.v8i16(<8 x i16> undef, <8 x i16> undef) - %V16I16 = call <16 x i16> @llvm.usub.sat.v16i16(<16 x i16> undef, <16 x i16> undef) - %V32I16 = call <32 x i16> @llvm.usub.sat.v32i16(<32 x i16> undef, <32 x i16> undef) + %I16 = call i16 @llvm.usub.sat.i16(i16 poison, i16 poison) + %V2I16 = call <2 x i16> @llvm.usub.sat.v2i16(<2 x i16> poison, <2 x i16> poison) + %V4I16 = call <4 x i16> @llvm.usub.sat.v4i16(<4 x i16> poison, <4 x i16> poison) + %V8I16 = call <8 x i16> @llvm.usub.sat.v8i16(<8 x i16> poison, <8 x i16> poison) + %V16I16 = call <16 x i16> @llvm.usub.sat.v16i16(<16 x i16> poison, <16 x i16> poison) + %V32I16 = call <32 x i16> @llvm.usub.sat.v32i16(<32 x i16> poison, <32 x i16> poison) - %I8 = call i8 @llvm.usub.sat.i8(i8 undef, i8 undef) - %V2I8 = call <2 x i8> @llvm.usub.sat.v2i8(<2 x i8> undef, <2 x i8> undef) - %V4I8 = call <4 x i8> @llvm.usub.sat.v4i8(<4 x i8> undef, <4 x i8> undef) - %V8I8 = call <8 x i8> @llvm.usub.sat.v8i8(<8 x i8> undef, <8 x i8> undef) - %V16I8 = call <16 x i8> @llvm.usub.sat.v16i8(<16 x i8> undef, <16 x i8> undef) - %V32I8 = call <32 x i8> @llvm.usub.sat.v32i8(<32 x i8> undef, <32 x i8> undef) - %V64I8 = call <64 x i8> @llvm.usub.sat.v64i8(<64 x i8> undef, <64 x i8> undef) + %I8 = call i8 @llvm.usub.sat.i8(i8 poison, i8 poison) + %V2I8 = call <2 x i8> @llvm.usub.sat.v2i8(<2 x i8> poison, <2 x i8> poison) + %V4I8 = call <4 x i8> @llvm.usub.sat.v4i8(<4 x i8> poison, <4 x i8> poison) + %V8I8 = call <8 x i8> @llvm.usub.sat.v8i8(<8 x i8> poison, <8 x i8> poison) + %V16I8 = call <16 x i8> @llvm.usub.sat.v16i8(<16 x i8> poison, <16 x i8> poison) + %V32I8 = call <32 x i8> @llvm.usub.sat.v32i8(<32 x i8> poison, <32 x i8> poison) + %V64I8 = call <64 x i8> @llvm.usub.sat.v64i8(<64 x i8> poison, <64 x i8> poison) - ret i32 undef + ret i32 poison +} + +define i32 @sub_sve_vscale2(i32 %arg) vscale_range(2,2) { +; BASE-LABEL: 'sub_sve_vscale2' +; BASE-NEXT: Cost Model: Found costs of 1 for: %V2I64 = call <2 x i64> @llvm.usub.sat.v2i64(<2 x i64> poison, <2 x i64> poison) +; BASE-NEXT: Cost Model: Found costs of 2 for: %V4I64 = call <4 x i64> @llvm.usub.sat.v4i64(<4 x i64> poison, <4 x i64> poison) +; BASE-NEXT: Cost Model: Found costs of 4 for: %V8I64 = call <8 x i64> @llvm.usub.sat.v8i64(<8 x i64> poison, <8 x i64> poison) +; BASE-NEXT: Cost Model: Found costs of 1 for: %V2I32 = call <2 x i32> @llvm.usub.sat.v2i32(<2 x i32> poison, <2 x i32> poison) +; BASE-NEXT: Cost Model: Found costs of 1 for: %V4I32 = call <4 x i32> @llvm.usub.sat.v4i32(<4 x i32> poison, <4 x i32> poison) +; BASE-NEXT: Cost Model: Found costs of 2 for: %V8I32 = call <8 x i32> @llvm.usub.sat.v8i32(<8 x i32> poison, <8 x i32> poison) +; BASE-NEXT: Cost Model: Found costs of 4 for: %V16I32 = call <16 x i32> @llvm.usub.sat.v16i32(<16 x i32> poison, <16 x i32> poison) +; BASE-NEXT: Cost Model: Found costs of 4 for: %V2I16 = call <2 x i16> @llvm.usub.sat.v2i16(<2 x i16> poison, <2 x i16> poison) +; BASE-NEXT: Cost Model: Found costs of 1 for: %V4I16 = call <4 x i16> @llvm.usub.sat.v4i16(<4 x i16> poison, <4 x i16> poison) +; BASE-NEXT: Cost Model: Found costs of 1 for: %V8I16 = call <8 x i16> @llvm.usub.sat.v8i16(<8 x i16> poison, <8 x i16> poison) +; BASE-NEXT: Cost Model: Found costs of 2 for: %V16I16 = call <16 x i16> @llvm.usub.sat.v16i16(<16 x i16> poison, <16 x i16> poison) +; BASE-NEXT: Cost Model: Found costs of 4 for: %V32I16 = call <32 x i16> @llvm.usub.sat.v32i16(<32 x i16> poison, <32 x i16> poison) +; BASE-NEXT: Cost Model: Found costs of 4 for: %V2I8 = call <2 x i8> @llvm.usub.sat.v2i8(<2 x i8> poison, <2 x i8> poison) +; BASE-NEXT: Cost Model: Found costs of 4 for: %V4I8 = call <4 x i8> @llvm.usub.sat.v4i8(<4 x i8> poison, <4 x i8> poison) +; BASE-NEXT: Cost Model: Found costs of 1 for: %V8I8 = call <8 x i8> @llvm.usub.sat.v8i8(<8 x i8> poison, <8 x i8> poison) +; BASE-NEXT: Cost Model: Found costs of 1 for: %V16I8 = call <16 x i8> @llvm.usub.sat.v16i8(<16 x i8> poison, <16 x i8> poison) +; BASE-NEXT: Cost Model: Found costs of 2 for: %V32I8 = call <32 x i8> @llvm.usub.sat.v32i8(<32 x i8> poison, <32 x i8> poison) +; BASE-NEXT: Cost Model: Found costs of 4 for: %V64I8 = call <64 x i8> @llvm.usub.sat.v64i8(<64 x i8> poison, <64 x i8> poison) +; BASE-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret i32 poison +; +; SVE-LABEL: 'sub_sve_vscale2' +; SVE-NEXT: Cost Model: Found costs of 1 for: %V2I64 = call <2 x i64> @llvm.usub.sat.v2i64(<2 x i64> poison, <2 x i64> poison) +; SVE-NEXT: Cost Model: Found costs of 1 for: %V4I64 = call <4 x i64> @llvm.usub.sat.v4i64(<4 x i64> poison, <4 x i64> poison) +; SVE-NEXT: Cost Model: Found costs of 2 for: %V8I64 = call <8 x i64> @llvm.usub.sat.v8i64(<8 x i64> poison, <8 x i64> poison) +; SVE-NEXT: Cost Model: Found costs of 1 for: %V2I32 = call <2 x i32> @llvm.usub.sat.v2i32(<2 x i32> poison, <2 x i32> poison) +; SVE-NEXT: Cost Model: Found costs of 1 for: %V4I32 = call <4 x i32> @llvm.usub.sat.v4i32(<4 x i32> poison, <4 x i32> poison) +; SVE-NEXT: Cost Model: Found costs of 1 for: %V8I32 = call <8 x i32> @llvm.usub.sat.v8i32(<8 x i32> poison, <8 x i32> poison) +; SVE-NEXT: Cost Model: Found costs of 2 for: %V16I32 = call <16 x i32> @llvm.usub.sat.v16i32(<16 x i32> poison, <16 x i32> poison) +; SVE-NEXT: Cost Model: Found costs of 4 for: %V2I16 = call <2 x i16> @llvm.usub.sat.v2i16(<2 x i16> poison, <2 x i16> poison) +; SVE-NEXT: Cost Model: Found costs of 1 for: %V4I16 = call <4 x i16> @llvm.usub.sat.v4i16(<4 x i16> poison, <4 x i16> poison) +; SVE-NEXT: Cost Model: Found costs of 1 for: %V8I16 = call <8 x i16> @llvm.usub.sat.v8i16(<8 x i16> poison, <8 x i16> poison) +; SVE-NEXT: Cost Model: Found costs of 1 for: %V16I16 = call <16 x i16> @llvm.usub.sat.v16i16(<16 x i16> poison, <16 x i16> poison) +; SVE-NEXT: Cost Model: Found costs of 2 for: %V32I16 = call <32 x i16> @llvm.usub.sat.v32i16(<32 x i16> poison, <32 x i16> poison) +; SVE-NEXT: Cost Model: Found costs of 4 for: %V2I8 = call <2 x i8> @llvm.usub.sat.v2i8(<2 x i8> poison, <2 x i8> poison) +; SVE-NEXT: Cost Model: Found costs of 4 for: %V4I8 = call <4 x i8> @llvm.usub.sat.v4i8(<4 x i8> poison, <4 x i8> poison) +; SVE-NEXT: Cost Model: Found costs of 1 for: %V8I8 = call <8 x i8> @llvm.usub.sat.v8i8(<8 x i8> poison, <8 x i8> poison) +; SVE-NEXT: Cost Model: Found costs of 1 for: %V16I8 = call <16 x i8> @llvm.usub.sat.v16i8(<16 x i8> poison, <16 x i8> poison) +; SVE-NEXT: Cost Model: Found costs of 1 for: %V32I8 = call <32 x i8> @llvm.usub.sat.v32i8(<32 x i8> poison, <32 x i8> poison) +; SVE-NEXT: Cost Model: Found costs of 2 for: %V64I8 = call <64 x i8> @llvm.usub.sat.v64i8(<64 x i8> poison, <64 x i8> poison) +; SVE-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret i32 poison +; + %V2I64 = call <2 x i64> @llvm.usub.sat.v2i64(<2 x i64> poison, <2 x i64> poison) + %V4I64 = call <4 x i64> @llvm.usub.sat.v4i64(<4 x i64> poison, <4 x i64> poison) + %V8I64 = call <8 x i64> @llvm.usub.sat.v8i64(<8 x i64> poison, <8 x i64> poison) + + %V2I32 = call <2 x i32> @llvm.usub.sat.v2i32(<2 x i32> poison, <2 x i32> poison) + %V4I32 = call <4 x i32> @llvm.usub.sat.v4i32(<4 x i32> poison, <4 x i32> poison) + %V8I32 = call <8 x i32> @llvm.usub.sat.v8i32(<8 x i32> poison, <8 x i32> poison) + %V16I32 = call <16 x i32> @llvm.usub.sat.v16i32(<16 x i32> poison, <16 x i32> poison) + + %V2I16 = call <2 x i16> @llvm.usub.sat.v2i16(<2 x i16> poison, <2 x i16> poison) + %V4I16 = call <4 x i16> @llvm.usub.sat.v4i16(<4 x i16> poison, <4 x i16> poison) + %V8I16 = call <8 x i16> @llvm.usub.sat.v8i16(<8 x i16> poison, <8 x i16> poison) + %V16I16 = call <16 x i16> @llvm.usub.sat.v16i16(<16 x i16> poison, <16 x i16> poison) + %V32I16 = call <32 x i16> @llvm.usub.sat.v32i16(<32 x i16> poison, <32 x i16> poison) + + %V2I8 = call <2 x i8> @llvm.usub.sat.v2i8(<2 x i8> poison, <2 x i8> poison) + %V4I8 = call <4 x i8> @llvm.usub.sat.v4i8(<4 x i8> poison, <4 x i8> poison) + %V8I8 = call <8 x i8> @llvm.usub.sat.v8i8(<8 x i8> poison, <8 x i8> poison) + %V16I8 = call <16 x i8> @llvm.usub.sat.v16i8(<16 x i8> poison, <16 x i8> poison) + %V32I8 = call <32 x i8> @llvm.usub.sat.v32i8(<32 x i8> poison, <32 x i8> poison) + %V64I8 = call <64 x i8> @llvm.usub.sat.v64i8(<64 x i8> poison, <64 x i8> poison) + + ret i32 poison }