llvm-project/llvm/lib/Target/WebAssembly/WebAssemblyTargetTransformInfo.cpp
David Green be7a107070 [ARM] Teach the Arm cost model that a Shift can be folded into other instructions
This attempts to teach the cost model in Arm that code such as:
  %s = shl i32 %a, 3
  %a = and i32 %s, %b
Can under Arm or Thumb2 become:
  and r0, r1, r2, lsl #3

So the cost of the shift can essentially be free. To do this without
trying to artificially adjust the cost of the "and" instruction, it
needs to get the users of the shl and check if they are a type of
instruction that the shift can be folded into. And so it needs to have
access to the actual instruction in getArithmeticInstrCost, which if
available is added as an extra parameter much like getCastInstrCost.

We otherwise limit it to shifts with a single user, which should
hopefully handle most of the cases. The list of instruction that the
shift can be folded into include ADC, ADD, AND, BIC, CMP, EOR, MVN, ORR,
ORN, RSB, SBC and SUB. This translates to Add, Sub, And, Or, Xor and
ICmp.

Differential Revision: https://reviews.llvm.org/D70966
2019-12-09 10:24:33 +00:00

85 lines
2.8 KiB
C++

//===-- WebAssemblyTargetTransformInfo.cpp - WebAssembly-specific TTI -----===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
///
/// \file
/// This file defines the WebAssembly-specific TargetTransformInfo
/// implementation.
///
//===----------------------------------------------------------------------===//
#include "WebAssemblyTargetTransformInfo.h"
#include "llvm/CodeGen/CostTable.h"
#include "llvm/Support/Debug.h"
using namespace llvm;
#define DEBUG_TYPE "wasmtti"
TargetTransformInfo::PopcntSupportKind
WebAssemblyTTIImpl::getPopcntSupport(unsigned TyWidth) const {
assert(isPowerOf2_32(TyWidth) && "Ty width must be power of 2");
return TargetTransformInfo::PSK_FastHardware;
}
unsigned WebAssemblyTTIImpl::getNumberOfRegisters(unsigned ClassID) const {
unsigned Result = BaseT::getNumberOfRegisters(ClassID);
// For SIMD, use at least 16 registers, as a rough guess.
bool Vector = (ClassID == 1);
if (Vector)
Result = std::max(Result, 16u);
return Result;
}
unsigned WebAssemblyTTIImpl::getRegisterBitWidth(bool Vector) const {
if (Vector && getST()->hasSIMD128())
return 128;
return 64;
}
unsigned WebAssemblyTTIImpl::getArithmeticInstrCost(
unsigned Opcode, Type *Ty, TTI::OperandValueKind Opd1Info,
TTI::OperandValueKind Opd2Info, TTI::OperandValueProperties Opd1PropInfo,
TTI::OperandValueProperties Opd2PropInfo, ArrayRef<const Value *> Args,
const Instruction *CxtI) {
unsigned Cost = BasicTTIImplBase<WebAssemblyTTIImpl>::getArithmeticInstrCost(
Opcode, Ty, Opd1Info, Opd2Info, Opd1PropInfo, Opd2PropInfo);
if (auto *VTy = dyn_cast<VectorType>(Ty)) {
switch (Opcode) {
case Instruction::LShr:
case Instruction::AShr:
case Instruction::Shl:
// SIMD128's shifts currently only accept a scalar shift count. For each
// element, we'll need to extract, op, insert. The following is a rough
// approxmation.
if (Opd2Info != TTI::OK_UniformValue &&
Opd2Info != TTI::OK_UniformConstantValue)
Cost = VTy->getNumElements() *
(TargetTransformInfo::TCC_Basic +
getArithmeticInstrCost(Opcode, VTy->getElementType()) +
TargetTransformInfo::TCC_Basic);
break;
}
}
return Cost;
}
unsigned WebAssemblyTTIImpl::getVectorInstrCost(unsigned Opcode, Type *Val,
unsigned Index) {
unsigned Cost = BasicTTIImplBase::getVectorInstrCost(Opcode, Val, Index);
// SIMD128's insert/extract currently only take constant indices.
if (Index == -1u)
return Cost + 25 * TargetTransformInfo::TCC_Expensive;
return Cost;
}