779 lines
29 KiB
C++
779 lines
29 KiB
C++
//===- X86LegalizerInfo.cpp --------------------------------------*- C++ -*-==//
|
|
//
|
|
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
|
|
// See https://llvm.org/LICENSE.txt for license information.
|
|
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
|
|
//
|
|
//===----------------------------------------------------------------------===//
|
|
/// \file
|
|
/// This file implements the targeting of the Machinelegalizer class for X86.
|
|
/// \todo This should be generated by TableGen.
|
|
//===----------------------------------------------------------------------===//
|
|
|
|
#include "X86LegalizerInfo.h"
|
|
#include "X86Subtarget.h"
|
|
#include "X86TargetMachine.h"
|
|
#include "llvm/CodeGen/GlobalISel/GenericMachineInstrs.h"
|
|
#include "llvm/CodeGen/GlobalISel/LegalizerHelper.h"
|
|
#include "llvm/CodeGen/GlobalISel/MachineIRBuilder.h"
|
|
#include "llvm/CodeGen/MachineConstantPool.h"
|
|
#include "llvm/CodeGen/TargetOpcodes.h"
|
|
#include "llvm/CodeGen/ValueTypes.h"
|
|
#include "llvm/IR/DerivedTypes.h"
|
|
#include "llvm/IR/Type.h"
|
|
|
|
using namespace llvm;
|
|
using namespace TargetOpcode;
|
|
using namespace LegalizeActions;
|
|
using namespace LegalityPredicates;
|
|
|
|
X86LegalizerInfo::X86LegalizerInfo(const X86Subtarget &STI,
|
|
const X86TargetMachine &TM)
|
|
: Subtarget(STI) {
|
|
|
|
bool Is64Bit = Subtarget.is64Bit();
|
|
bool HasCMOV = Subtarget.canUseCMOV();
|
|
bool HasSSE1 = Subtarget.hasSSE1();
|
|
bool HasSSE2 = Subtarget.hasSSE2();
|
|
bool HasSSE41 = Subtarget.hasSSE41();
|
|
bool HasAVX = Subtarget.hasAVX();
|
|
bool HasAVX2 = Subtarget.hasAVX2();
|
|
bool HasAVX512 = Subtarget.hasAVX512();
|
|
bool HasVLX = Subtarget.hasVLX();
|
|
bool HasDQI = Subtarget.hasAVX512() && Subtarget.hasDQI();
|
|
bool HasBWI = Subtarget.hasAVX512() && Subtarget.hasBWI();
|
|
bool UseX87 = !Subtarget.useSoftFloat() && Subtarget.hasX87();
|
|
bool HasPOPCNT = Subtarget.hasPOPCNT();
|
|
bool HasLZCNT = Subtarget.hasLZCNT();
|
|
bool HasBMI = Subtarget.hasBMI();
|
|
|
|
const LLT p0 = LLT::pointer(0, TM.getPointerSizeInBits(0));
|
|
const LLT s1 = LLT::scalar(1);
|
|
const LLT s8 = LLT::scalar(8);
|
|
const LLT s16 = LLT::scalar(16);
|
|
const LLT s32 = LLT::scalar(32);
|
|
const LLT s64 = LLT::scalar(64);
|
|
const LLT s80 = LLT::scalar(80);
|
|
const LLT s128 = LLT::scalar(128);
|
|
const LLT sMaxScalar = Subtarget.is64Bit() ? s64 : s32;
|
|
const LLT v2s32 = LLT::fixed_vector(2, 32);
|
|
const LLT v4s8 = LLT::fixed_vector(4, 8);
|
|
|
|
const LLT v16s8 = LLT::fixed_vector(16, 8);
|
|
const LLT v8s16 = LLT::fixed_vector(8, 16);
|
|
const LLT v4s32 = LLT::fixed_vector(4, 32);
|
|
const LLT v2s64 = LLT::fixed_vector(2, 64);
|
|
const LLT v2p0 = LLT::fixed_vector(2, p0);
|
|
|
|
const LLT v32s8 = LLT::fixed_vector(32, 8);
|
|
const LLT v16s16 = LLT::fixed_vector(16, 16);
|
|
const LLT v8s32 = LLT::fixed_vector(8, 32);
|
|
const LLT v4s64 = LLT::fixed_vector(4, 64);
|
|
const LLT v4p0 = LLT::fixed_vector(4, p0);
|
|
|
|
const LLT v64s8 = LLT::fixed_vector(64, 8);
|
|
const LLT v32s16 = LLT::fixed_vector(32, 16);
|
|
const LLT v16s32 = LLT::fixed_vector(16, 32);
|
|
const LLT v8s64 = LLT::fixed_vector(8, 64);
|
|
|
|
const LLT s8MaxVector = HasAVX512 ? v64s8 : HasAVX ? v32s8 : v16s8;
|
|
const LLT s16MaxVector = HasAVX512 ? v32s16 : HasAVX ? v16s16 : v8s16;
|
|
const LLT s32MaxVector = HasAVX512 ? v16s32 : HasAVX ? v8s32 : v4s32;
|
|
const LLT s64MaxVector = HasAVX512 ? v8s64 : HasAVX ? v4s64 : v2s64;
|
|
|
|
// todo: AVX512 bool vector predicate types
|
|
|
|
// implicit/constants
|
|
// 32/64-bits needs support for s64/s128 to handle cases:
|
|
// s64 = EXTEND (G_IMPLICIT_DEF s32) -> s64 = G_IMPLICIT_DEF
|
|
// s128 = EXTEND (G_IMPLICIT_DEF s32/s64) -> s128 = G_IMPLICIT_DEF
|
|
getActionDefinitionsBuilder(G_IMPLICIT_DEF)
|
|
.legalFor({p0, s1, s8, s16, s32, s64})
|
|
.legalFor(Is64Bit, {s128});
|
|
|
|
getActionDefinitionsBuilder(G_CONSTANT)
|
|
.legalFor({p0, s8, s16, s32})
|
|
.legalFor(Is64Bit, {s64})
|
|
.widenScalarToNextPow2(0, /*Min=*/8)
|
|
.clampScalar(0, s8, sMaxScalar);
|
|
|
|
getActionDefinitionsBuilder(
|
|
{G_LROUND, G_LLROUND, G_FCOS, G_FCOSH, G_FACOS, G_FSIN, G_FSINH,
|
|
G_FASIN, G_FTAN, G_FTANH, G_FATAN, G_FATAN2, G_FPOW, G_FEXP,
|
|
G_FEXP2, G_FEXP10, G_FLOG, G_FLOG2, G_FLOG10, G_FPOWI, G_FSINCOS})
|
|
.libcall();
|
|
|
|
getActionDefinitionsBuilder(G_FSQRT)
|
|
.legalFor(HasSSE1 || UseX87, {s32})
|
|
.legalFor(HasSSE2 || UseX87, {s64})
|
|
.legalFor(UseX87, {s80});
|
|
|
|
// merge/unmerge
|
|
for (unsigned Op : {G_MERGE_VALUES, G_UNMERGE_VALUES}) {
|
|
unsigned BigTyIdx = Op == G_MERGE_VALUES ? 0 : 1;
|
|
unsigned LitTyIdx = Op == G_MERGE_VALUES ? 1 : 0;
|
|
getActionDefinitionsBuilder(Op)
|
|
.widenScalarToNextPow2(LitTyIdx, /*Min=*/8)
|
|
.widenScalarToNextPow2(BigTyIdx, /*Min=*/16)
|
|
.minScalar(LitTyIdx, s8)
|
|
.minScalar(BigTyIdx, s32)
|
|
.legalIf([=](const LegalityQuery &Q) {
|
|
switch (Q.Types[BigTyIdx].getSizeInBits()) {
|
|
case 16:
|
|
case 32:
|
|
case 64:
|
|
case 128:
|
|
case 256:
|
|
case 512:
|
|
break;
|
|
default:
|
|
return false;
|
|
}
|
|
switch (Q.Types[LitTyIdx].getSizeInBits()) {
|
|
case 8:
|
|
case 16:
|
|
case 32:
|
|
case 64:
|
|
case 128:
|
|
case 256:
|
|
return true;
|
|
default:
|
|
return false;
|
|
}
|
|
});
|
|
}
|
|
|
|
// integer addition/subtraction
|
|
getActionDefinitionsBuilder({G_ADD, G_SUB})
|
|
.legalFor({s8, s16, s32})
|
|
.legalFor(Is64Bit, {s64})
|
|
.legalFor(HasSSE2, {v16s8, v8s16, v4s32, v2s64})
|
|
.legalFor(HasAVX2, {v32s8, v16s16, v8s32, v4s64})
|
|
.legalFor(HasAVX512, {v16s32, v8s64})
|
|
.legalFor(HasBWI, {v64s8, v32s16})
|
|
.clampMinNumElements(0, s8, 16)
|
|
.clampMinNumElements(0, s16, 8)
|
|
.clampMinNumElements(0, s32, 4)
|
|
.clampMinNumElements(0, s64, 2)
|
|
.clampMaxNumElements(0, s8, HasBWI ? 64 : (HasAVX2 ? 32 : 16))
|
|
.clampMaxNumElements(0, s16, HasBWI ? 32 : (HasAVX2 ? 16 : 8))
|
|
.clampMaxNumElements(0, s32, HasAVX512 ? 16 : (HasAVX2 ? 8 : 4))
|
|
.clampMaxNumElements(0, s64, HasAVX512 ? 8 : (HasAVX2 ? 4 : 2))
|
|
.widenScalarToNextPow2(0, /*Min=*/32)
|
|
.clampScalar(0, s8, sMaxScalar)
|
|
.scalarize(0);
|
|
|
|
getActionDefinitionsBuilder({G_UADDE, G_UADDO, G_USUBE, G_USUBO})
|
|
.legalFor({{s8, s1}, {s16, s1}, {s32, s1}})
|
|
.legalFor(Is64Bit, {{s64, s1}})
|
|
.widenScalarToNextPow2(0, /*Min=*/32)
|
|
.clampScalar(0, s8, sMaxScalar)
|
|
.clampScalar(1, s1, s1)
|
|
.scalarize(0);
|
|
|
|
// integer multiply
|
|
getActionDefinitionsBuilder(G_MUL)
|
|
.legalFor({s8, s16, s32})
|
|
.legalFor(Is64Bit, {s64})
|
|
.legalFor(HasSSE2, {v8s16})
|
|
.legalFor(HasSSE41, {v4s32})
|
|
.legalFor(HasAVX2, {v16s16, v8s32})
|
|
.legalFor(HasAVX512, {v16s32})
|
|
.legalFor(HasDQI, {v8s64})
|
|
.legalFor(HasDQI && HasVLX, {v2s64, v4s64})
|
|
.legalFor(HasBWI, {v32s16})
|
|
.clampMinNumElements(0, s16, 8)
|
|
.clampMinNumElements(0, s32, 4)
|
|
.clampMinNumElements(0, s64, HasVLX ? 2 : 8)
|
|
.clampMaxNumElements(0, s16, HasBWI ? 32 : (HasAVX2 ? 16 : 8))
|
|
.clampMaxNumElements(0, s32, HasAVX512 ? 16 : (HasAVX2 ? 8 : 4))
|
|
.clampMaxNumElements(0, s64, 8)
|
|
.widenScalarToNextPow2(0, /*Min=*/32)
|
|
.clampScalar(0, s8, sMaxScalar)
|
|
.scalarize(0);
|
|
|
|
getActionDefinitionsBuilder({G_SMULH, G_UMULH})
|
|
.legalFor({s8, s16, s32})
|
|
.legalFor(Is64Bit, {s64})
|
|
.widenScalarToNextPow2(0, /*Min=*/32)
|
|
.clampScalar(0, s8, sMaxScalar)
|
|
.scalarize(0);
|
|
|
|
// integer divisions
|
|
getActionDefinitionsBuilder({G_SDIV, G_SREM, G_UDIV, G_UREM})
|
|
.legalFor({s8, s16, s32})
|
|
.legalFor(Is64Bit, {s64})
|
|
.libcallFor({s64})
|
|
.clampScalar(0, s8, sMaxScalar);
|
|
|
|
// integer shifts
|
|
getActionDefinitionsBuilder({G_SHL, G_LSHR, G_ASHR})
|
|
.legalFor({{s8, s8}, {s16, s8}, {s32, s8}})
|
|
.legalFor(Is64Bit, {{s64, s8}})
|
|
.clampScalar(0, s8, sMaxScalar)
|
|
.clampScalar(1, s8, s8);
|
|
|
|
// integer logic
|
|
getActionDefinitionsBuilder({G_AND, G_OR, G_XOR})
|
|
.legalFor({s8, s16, s32})
|
|
.legalFor(Is64Bit, {s64})
|
|
.legalFor(HasSSE2, {v16s8, v8s16, v4s32, v2s64})
|
|
.legalFor(HasAVX, {v32s8, v16s16, v8s32, v4s64})
|
|
.legalFor(HasAVX512, {v64s8, v32s16, v16s32, v8s64})
|
|
.clampMinNumElements(0, s8, 16)
|
|
.clampMinNumElements(0, s16, 8)
|
|
.clampMinNumElements(0, s32, 4)
|
|
.clampMinNumElements(0, s64, 2)
|
|
.clampMaxNumElements(0, s8, HasAVX512 ? 64 : (HasAVX ? 32 : 16))
|
|
.clampMaxNumElements(0, s16, HasAVX512 ? 32 : (HasAVX ? 16 : 8))
|
|
.clampMaxNumElements(0, s32, HasAVX512 ? 16 : (HasAVX ? 8 : 4))
|
|
.clampMaxNumElements(0, s64, HasAVX512 ? 8 : (HasAVX ? 4 : 2))
|
|
.widenScalarToNextPow2(0, /*Min=*/32)
|
|
.clampScalar(0, s8, sMaxScalar)
|
|
.scalarize(0);
|
|
|
|
// integer comparison
|
|
const std::initializer_list<LLT> IntTypes32 = {s8, s16, s32, p0};
|
|
const std::initializer_list<LLT> IntTypes64 = {s8, s16, s32, s64, p0};
|
|
|
|
getActionDefinitionsBuilder(G_ICMP)
|
|
.legalForCartesianProduct({s8}, Is64Bit ? IntTypes64 : IntTypes32)
|
|
.clampScalar(0, s8, s8)
|
|
.clampScalar(1, s8, sMaxScalar);
|
|
|
|
// bswap
|
|
getActionDefinitionsBuilder(G_BSWAP)
|
|
.legalFor({s32})
|
|
.legalFor(Is64Bit, {s64})
|
|
.widenScalarToNextPow2(0, /*Min=*/32)
|
|
.clampScalar(0, s32, sMaxScalar);
|
|
|
|
// popcount
|
|
getActionDefinitionsBuilder(G_CTPOP)
|
|
.legalFor(HasPOPCNT, {{s16, s16}, {s32, s32}})
|
|
.legalFor(HasPOPCNT && Is64Bit, {{s64, s64}})
|
|
.widenScalarToNextPow2(1, /*Min=*/16)
|
|
.clampScalar(1, s16, sMaxScalar)
|
|
.scalarSameSizeAs(0, 1);
|
|
|
|
// count leading zeros (LZCNT)
|
|
getActionDefinitionsBuilder(G_CTLZ)
|
|
.legalFor(HasLZCNT, {{s16, s16}, {s32, s32}})
|
|
.legalFor(HasLZCNT && Is64Bit, {{s64, s64}})
|
|
.widenScalarToNextPow2(1, /*Min=*/16)
|
|
.clampScalar(1, s16, sMaxScalar)
|
|
.scalarSameSizeAs(0, 1);
|
|
|
|
// count trailing zeros
|
|
getActionDefinitionsBuilder(G_CTTZ_ZERO_UNDEF)
|
|
.legalFor({{s16, s16}, {s32, s32}})
|
|
.legalFor(Is64Bit, {{s64, s64}})
|
|
.widenScalarToNextPow2(1, /*Min=*/16)
|
|
.clampScalar(1, s16, sMaxScalar)
|
|
.scalarSameSizeAs(0, 1);
|
|
|
|
getActionDefinitionsBuilder(G_CTTZ)
|
|
.legalFor(HasBMI, {{s16, s16}, {s32, s32}})
|
|
.legalFor(HasBMI && Is64Bit, {{s64, s64}})
|
|
.widenScalarToNextPow2(1, /*Min=*/16)
|
|
.clampScalar(1, s16, sMaxScalar)
|
|
.scalarSameSizeAs(0, 1);
|
|
|
|
// control flow
|
|
getActionDefinitionsBuilder(G_PHI)
|
|
.legalFor({s8, s16, s32, p0})
|
|
.legalFor(UseX87, {s80})
|
|
.legalFor(Is64Bit, {s64})
|
|
.legalFor(HasSSE1, {v16s8, v8s16, v4s32, v2s64})
|
|
.legalFor(HasAVX, {v32s8, v16s16, v8s32, v4s64})
|
|
.legalFor(HasAVX512, {v64s8, v32s16, v16s32, v8s64})
|
|
.clampMinNumElements(0, s8, 16)
|
|
.clampMinNumElements(0, s16, 8)
|
|
.clampMinNumElements(0, s32, 4)
|
|
.clampMinNumElements(0, s64, 2)
|
|
.clampMaxNumElements(0, s8, HasAVX512 ? 64 : (HasAVX ? 32 : 16))
|
|
.clampMaxNumElements(0, s16, HasAVX512 ? 32 : (HasAVX ? 16 : 8))
|
|
.clampMaxNumElements(0, s32, HasAVX512 ? 16 : (HasAVX ? 8 : 4))
|
|
.clampMaxNumElements(0, s64, HasAVX512 ? 8 : (HasAVX ? 4 : 2))
|
|
.widenScalarToNextPow2(0, /*Min=*/32)
|
|
.clampScalar(0, s8, sMaxScalar)
|
|
.scalarize(0);
|
|
|
|
getActionDefinitionsBuilder(G_BRCOND).legalFor({s1});
|
|
|
|
// pointer handling
|
|
const std::initializer_list<LLT> PtrTypes32 = {s1, s8, s16, s32};
|
|
const std::initializer_list<LLT> PtrTypes64 = {s1, s8, s16, s32, s64};
|
|
|
|
getActionDefinitionsBuilder(G_PTRTOINT)
|
|
.legalForCartesianProduct(Is64Bit ? PtrTypes64 : PtrTypes32, {p0})
|
|
.maxScalar(0, sMaxScalar)
|
|
.widenScalarToNextPow2(0, /*Min*/ 8);
|
|
|
|
getActionDefinitionsBuilder(G_INTTOPTR).legalFor({{p0, sMaxScalar}});
|
|
|
|
getActionDefinitionsBuilder(G_CONSTANT_POOL).legalFor({p0});
|
|
|
|
getActionDefinitionsBuilder(G_PTR_ADD)
|
|
.legalFor({{p0, s32}})
|
|
.legalFor(Is64Bit, {{p0, s64}})
|
|
.widenScalarToNextPow2(1, /*Min*/ 32)
|
|
.clampScalar(1, s32, sMaxScalar);
|
|
|
|
getActionDefinitionsBuilder({G_FRAME_INDEX, G_GLOBAL_VALUE}).legalFor({p0});
|
|
|
|
// load/store: add more corner cases
|
|
for (unsigned Op : {G_LOAD, G_STORE}) {
|
|
auto &Action = getActionDefinitionsBuilder(Op);
|
|
Action.legalForTypesWithMemDesc({{s8, p0, s8, 1},
|
|
{s16, p0, s16, 1},
|
|
{s32, p0, s32, 1},
|
|
{s80, p0, s80, 1},
|
|
{p0, p0, p0, 1},
|
|
{v4s8, p0, v4s8, 1}});
|
|
if (Is64Bit)
|
|
Action.legalForTypesWithMemDesc(
|
|
{{s64, p0, s64, 1}, {v2s32, p0, v2s32, 1}});
|
|
|
|
if (HasSSE1)
|
|
Action.legalForTypesWithMemDesc({{v4s32, p0, v4s32, 1}});
|
|
if (HasSSE2)
|
|
Action.legalForTypesWithMemDesc({{v16s8, p0, v16s8, 1},
|
|
{v8s16, p0, v8s16, 1},
|
|
{v2s64, p0, v2s64, 1},
|
|
{v2p0, p0, v2p0, 1}});
|
|
if (HasAVX)
|
|
Action.legalForTypesWithMemDesc({{v32s8, p0, v32s8, 1},
|
|
{v16s16, p0, v16s16, 1},
|
|
{v8s32, p0, v8s32, 1},
|
|
{v4s64, p0, v4s64, 1},
|
|
{v4p0, p0, v4p0, 1}});
|
|
if (HasAVX512)
|
|
Action.legalForTypesWithMemDesc({{v64s8, p0, v64s8, 1},
|
|
{v32s16, p0, v32s16, 1},
|
|
{v16s32, p0, v16s32, 1},
|
|
{v8s64, p0, v8s64, 1}});
|
|
|
|
// X86 supports extending loads but not stores for GPRs
|
|
if (Op == G_LOAD) {
|
|
Action.legalForTypesWithMemDesc({{s8, p0, s1, 1},
|
|
{s16, p0, s8, 1},
|
|
{s32, p0, s8, 1},
|
|
{s32, p0, s16, 1}});
|
|
if (Is64Bit)
|
|
Action.legalForTypesWithMemDesc(
|
|
{{s64, p0, s8, 1}, {s64, p0, s16, 1}, {s64, p0, s32, 1}});
|
|
} else {
|
|
Action.customIf([=](const LegalityQuery &Query) {
|
|
return Query.Types[0] != Query.MMODescrs[0].MemoryTy;
|
|
});
|
|
}
|
|
Action.widenScalarToNextPow2(0, /*Min=*/8)
|
|
.clampScalar(0, s8, sMaxScalar)
|
|
.scalarize(0);
|
|
}
|
|
|
|
for (unsigned Op : {G_SEXTLOAD, G_ZEXTLOAD}) {
|
|
auto &Action = getActionDefinitionsBuilder(Op);
|
|
Action.legalForTypesWithMemDesc(
|
|
{{s16, p0, s8, 1}, {s32, p0, s8, 1}, {s32, p0, s16, 1}});
|
|
if (Is64Bit)
|
|
Action.legalForTypesWithMemDesc(
|
|
{{s64, p0, s8, 1}, {s64, p0, s16, 1}, {s64, p0, s32, 1}});
|
|
// TODO - SSE41/AVX2/AVX512F/AVX512BW vector extensions
|
|
}
|
|
|
|
// sext, zext, and anyext
|
|
getActionDefinitionsBuilder(G_ANYEXT)
|
|
.legalFor({s8, s16, s32, s128})
|
|
.legalFor(Is64Bit, {s64})
|
|
.widenScalarToNextPow2(0, /*Min=*/8)
|
|
.clampScalar(0, s8, sMaxScalar)
|
|
.widenScalarToNextPow2(1, /*Min=*/8)
|
|
.clampScalar(1, s8, sMaxScalar)
|
|
.scalarize(0);
|
|
|
|
getActionDefinitionsBuilder({G_SEXT, G_ZEXT})
|
|
.legalFor({s8, s16, s32})
|
|
.legalFor(Is64Bit, {s64})
|
|
.widenScalarToNextPow2(0, /*Min=*/8)
|
|
.clampScalar(0, s8, sMaxScalar)
|
|
.widenScalarToNextPow2(1, /*Min=*/8)
|
|
.clampScalar(1, s8, sMaxScalar)
|
|
.scalarize(0);
|
|
|
|
getActionDefinitionsBuilder(G_SEXT_INREG).lower();
|
|
|
|
// fp constants
|
|
getActionDefinitionsBuilder(G_FCONSTANT)
|
|
.legalFor({s32, s64})
|
|
.legalFor(UseX87, {s80});
|
|
|
|
// fp arithmetic
|
|
getActionDefinitionsBuilder({G_FADD, G_FSUB, G_FMUL, G_FDIV})
|
|
.legalFor({s32, s64})
|
|
.legalFor(HasSSE1, {v4s32})
|
|
.legalFor(HasSSE2, {v2s64})
|
|
.legalFor(HasAVX, {v8s32, v4s64})
|
|
.legalFor(HasAVX512, {v16s32, v8s64})
|
|
.legalFor(UseX87, {s80});
|
|
|
|
// fp comparison
|
|
getActionDefinitionsBuilder(G_FCMP)
|
|
.legalFor(HasSSE1 || UseX87, {s8, s32})
|
|
.legalFor(HasSSE2 || UseX87, {s8, s64})
|
|
.legalFor(UseX87, {s8, s80})
|
|
.clampScalar(0, s8, s8)
|
|
.clampScalar(1, s32, HasSSE2 ? s64 : s32)
|
|
.widenScalarToNextPow2(1);
|
|
|
|
// fp conversions
|
|
getActionDefinitionsBuilder(G_FPEXT)
|
|
.legalFor(HasSSE2, {{s64, s32}})
|
|
.legalFor(HasAVX, {{v4s64, v4s32}})
|
|
.legalFor(HasAVX512, {{v8s64, v8s32}});
|
|
|
|
getActionDefinitionsBuilder(G_FPTRUNC)
|
|
.legalFor(HasSSE2, {{s32, s64}})
|
|
.legalFor(HasAVX, {{v4s32, v4s64}})
|
|
.legalFor(HasAVX512, {{v8s32, v8s64}});
|
|
|
|
getActionDefinitionsBuilder(G_SITOFP)
|
|
.legalFor(HasSSE1, {{s32, s32}})
|
|
.legalFor(HasSSE1 && Is64Bit, {{s32, s64}})
|
|
.legalFor(HasSSE2, {{s64, s32}})
|
|
.legalFor(HasSSE2 && Is64Bit, {{s64, s64}})
|
|
.clampScalar(1, (UseX87 && !HasSSE1) ? s16 : s32, sMaxScalar)
|
|
.widenScalarToNextPow2(1)
|
|
.customForCartesianProduct(UseX87, {s32, s64, s80}, {s16, s32, s64})
|
|
.clampScalar(0, s32, HasSSE2 ? s64 : s32)
|
|
.widenScalarToNextPow2(0);
|
|
|
|
getActionDefinitionsBuilder(G_FPTOSI)
|
|
.legalFor(HasSSE1, {{s32, s32}})
|
|
.legalFor(HasSSE1 && Is64Bit, {{s64, s32}})
|
|
.legalFor(HasSSE2, {{s32, s64}})
|
|
.legalFor(HasSSE2 && Is64Bit, {{s64, s64}})
|
|
.clampScalar(0, (UseX87 && !HasSSE1) ? s16 : s32, sMaxScalar)
|
|
.widenScalarToNextPow2(0)
|
|
.customForCartesianProduct(UseX87, {s16, s32, s64}, {s32, s64, s80})
|
|
.clampScalar(1, s32, HasSSE2 ? s64 : s32)
|
|
.widenScalarToNextPow2(1);
|
|
|
|
// For G_UITOFP and G_FPTOUI without AVX512, we have to custom legalize types
|
|
// <= s32 manually. Otherwise, in custom handler there is no way to
|
|
// understand whether s32 is an original type and we need to promote it to
|
|
// s64 or s32 is obtained after widening and we shouldn't widen it to s64.
|
|
//
|
|
// For AVX512 we simply widen types as there is direct mapping from opcodes
|
|
// to asm instructions.
|
|
getActionDefinitionsBuilder(G_UITOFP)
|
|
.legalFor(HasAVX512, {{s32, s32}, {s32, s64}, {s64, s32}, {s64, s64}})
|
|
.customIf([=](const LegalityQuery &Query) {
|
|
return !HasAVX512 &&
|
|
((HasSSE1 && typeIs(0, s32)(Query)) ||
|
|
(HasSSE2 && typeIs(0, s64)(Query))) &&
|
|
scalarNarrowerThan(1, Is64Bit ? 64 : 32)(Query);
|
|
})
|
|
.lowerIf([=](const LegalityQuery &Query) {
|
|
// Lower conversions from s64
|
|
return !HasAVX512 &&
|
|
((HasSSE1 && typeIs(0, s32)(Query)) ||
|
|
(HasSSE2 && typeIs(0, s64)(Query))) &&
|
|
(Is64Bit && typeIs(1, s64)(Query));
|
|
})
|
|
.clampScalar(0, s32, HasSSE2 ? s64 : s32)
|
|
.widenScalarToNextPow2(0)
|
|
.clampScalar(1, s32, sMaxScalar)
|
|
.widenScalarToNextPow2(1);
|
|
|
|
getActionDefinitionsBuilder(G_FPTOUI)
|
|
.legalFor(HasAVX512, {{s32, s32}, {s32, s64}, {s64, s32}, {s64, s64}})
|
|
.customIf([=](const LegalityQuery &Query) {
|
|
return !HasAVX512 &&
|
|
((HasSSE1 && typeIs(1, s32)(Query)) ||
|
|
(HasSSE2 && typeIs(1, s64)(Query))) &&
|
|
scalarNarrowerThan(0, Is64Bit ? 64 : 32)(Query);
|
|
})
|
|
// TODO: replace with customized legalization using
|
|
// specifics of cvttsd2si. The selection of this node requires
|
|
// a vector type. Either G_SCALAR_TO_VECTOR is needed or more advanced
|
|
// support of G_BUILD_VECTOR/G_INSERT_VECTOR_ELT is required beforehand.
|
|
.lowerIf([=](const LegalityQuery &Query) {
|
|
return !HasAVX512 &&
|
|
((HasSSE1 && typeIs(1, s32)(Query)) ||
|
|
(HasSSE2 && typeIs(1, s64)(Query))) &&
|
|
(Is64Bit && typeIs(0, s64)(Query));
|
|
})
|
|
.clampScalar(0, s32, sMaxScalar)
|
|
.widenScalarToNextPow2(0)
|
|
.clampScalar(1, s32, HasSSE2 ? s64 : s32)
|
|
.widenScalarToNextPow2(1);
|
|
|
|
// vector ops
|
|
getActionDefinitionsBuilder(G_BUILD_VECTOR)
|
|
.customIf([=](const LegalityQuery &Query) {
|
|
return (HasSSE1 && typeInSet(0, {v4s32})(Query)) ||
|
|
(HasSSE2 && typeInSet(0, {v2s64, v8s16, v16s8})(Query)) ||
|
|
(HasAVX && typeInSet(0, {v4s64, v8s32, v16s16, v32s8})(Query)) ||
|
|
(HasAVX512 && typeInSet(0, {v8s64, v16s32, v32s16, v64s8}));
|
|
})
|
|
.clampNumElements(0, v16s8, s8MaxVector)
|
|
.clampNumElements(0, v8s16, s16MaxVector)
|
|
.clampNumElements(0, v4s32, s32MaxVector)
|
|
.clampNumElements(0, v2s64, s64MaxVector)
|
|
.moreElementsToNextPow2(0);
|
|
|
|
getActionDefinitionsBuilder({G_EXTRACT, G_INSERT})
|
|
.legalIf([=](const LegalityQuery &Query) {
|
|
unsigned SubIdx = Query.Opcode == G_EXTRACT ? 0 : 1;
|
|
unsigned FullIdx = Query.Opcode == G_EXTRACT ? 1 : 0;
|
|
return (HasAVX && typePairInSet(SubIdx, FullIdx,
|
|
{{v16s8, v32s8},
|
|
{v8s16, v16s16},
|
|
{v4s32, v8s32},
|
|
{v2s64, v4s64}})(Query)) ||
|
|
(HasAVX512 && typePairInSet(SubIdx, FullIdx,
|
|
{{v16s8, v64s8},
|
|
{v32s8, v64s8},
|
|
{v8s16, v32s16},
|
|
{v16s16, v32s16},
|
|
{v4s32, v16s32},
|
|
{v8s32, v16s32},
|
|
{v2s64, v8s64},
|
|
{v4s64, v8s64}})(Query));
|
|
});
|
|
|
|
// todo: only permit dst types up to max legal vector register size?
|
|
getActionDefinitionsBuilder(G_CONCAT_VECTORS)
|
|
.legalFor(
|
|
HasSSE1,
|
|
{{v32s8, v16s8}, {v16s16, v8s16}, {v8s32, v4s32}, {v4s64, v2s64}})
|
|
.legalFor(HasAVX, {{v64s8, v16s8},
|
|
{v64s8, v32s8},
|
|
{v32s16, v8s16},
|
|
{v32s16, v16s16},
|
|
{v16s32, v4s32},
|
|
{v16s32, v8s32},
|
|
{v8s64, v2s64},
|
|
{v8s64, v4s64}});
|
|
|
|
// todo: vectors and address spaces
|
|
getActionDefinitionsBuilder(G_SELECT)
|
|
.legalFor({{s8, s32}, {s16, s32}, {s32, s32}, {s64, s32}, {p0, s32}})
|
|
.widenScalarToNextPow2(0, /*Min=*/8)
|
|
.clampScalar(0, HasCMOV ? s16 : s8, sMaxScalar)
|
|
.clampScalar(1, s32, s32);
|
|
|
|
// memory intrinsics
|
|
getActionDefinitionsBuilder({G_MEMCPY, G_MEMMOVE, G_MEMSET}).libcall();
|
|
|
|
getActionDefinitionsBuilder({G_DYN_STACKALLOC, G_STACKSAVE, G_STACKRESTORE})
|
|
.lower();
|
|
|
|
// fp intrinsics
|
|
getActionDefinitionsBuilder(G_INTRINSIC_ROUNDEVEN)
|
|
.scalarize(0)
|
|
.minScalar(0, LLT::scalar(32))
|
|
.libcall();
|
|
|
|
getActionDefinitionsBuilder({G_FREEZE, G_CONSTANT_FOLD_BARRIER})
|
|
.legalFor({s8, s16, s32, s64, p0})
|
|
.widenScalarToNextPow2(0, /*Min=*/8)
|
|
.clampScalar(0, s8, sMaxScalar);
|
|
|
|
getLegacyLegalizerInfo().computeTables();
|
|
verify(*STI.getInstrInfo());
|
|
}
|
|
|
|
bool X86LegalizerInfo::legalizeCustom(LegalizerHelper &Helper, MachineInstr &MI,
|
|
LostDebugLocObserver &LocObserver) const {
|
|
MachineIRBuilder &MIRBuilder = Helper.MIRBuilder;
|
|
MachineRegisterInfo &MRI = *MIRBuilder.getMRI();
|
|
switch (MI.getOpcode()) {
|
|
default:
|
|
// No idea what to do.
|
|
return false;
|
|
case TargetOpcode::G_BUILD_VECTOR:
|
|
return legalizeBuildVector(MI, MRI, Helper);
|
|
case TargetOpcode::G_FPTOUI:
|
|
return legalizeFPTOUI(MI, MRI, Helper);
|
|
case TargetOpcode::G_UITOFP:
|
|
return legalizeUITOFP(MI, MRI, Helper);
|
|
case TargetOpcode::G_STORE:
|
|
return legalizeNarrowingStore(MI, MRI, Helper);
|
|
case TargetOpcode::G_SITOFP:
|
|
return legalizeSITOFP(MI, MRI, Helper);
|
|
case TargetOpcode::G_FPTOSI:
|
|
return legalizeFPTOSI(MI, MRI, Helper);
|
|
}
|
|
llvm_unreachable("expected switch to return");
|
|
}
|
|
|
|
bool X86LegalizerInfo::legalizeSITOFP(MachineInstr &MI,
|
|
MachineRegisterInfo &MRI,
|
|
LegalizerHelper &Helper) const {
|
|
MachineIRBuilder &MIRBuilder = Helper.MIRBuilder;
|
|
MachineFunction &MF = *MI.getMF();
|
|
auto [Dst, DstTy, Src, SrcTy] = MI.getFirst2RegLLTs();
|
|
|
|
assert((SrcTy.getSizeInBits() == 16 || SrcTy.getSizeInBits() == 32 ||
|
|
SrcTy.getSizeInBits() == 64) &&
|
|
"Unexpected source type for SITOFP in X87 mode.");
|
|
|
|
TypeSize MemSize = SrcTy.getSizeInBytes();
|
|
MachinePointerInfo PtrInfo;
|
|
Align Alignmt = Helper.getStackTemporaryAlignment(SrcTy);
|
|
auto SlotPointer = Helper.createStackTemporary(MemSize, Alignmt, PtrInfo);
|
|
MachineMemOperand *StoreMMO = MF.getMachineMemOperand(
|
|
PtrInfo, MachineMemOperand::MOStore, MemSize, Align(MemSize));
|
|
|
|
// Store the integer value on the FPU stack.
|
|
MIRBuilder.buildStore(Src, SlotPointer, *StoreMMO);
|
|
|
|
MachineMemOperand *LoadMMO = MF.getMachineMemOperand(
|
|
PtrInfo, MachineMemOperand::MOLoad, MemSize, Align(MemSize));
|
|
MIRBuilder.buildInstr(X86::G_FILD)
|
|
.addDef(Dst)
|
|
.addUse(SlotPointer.getReg(0))
|
|
.addMemOperand(LoadMMO);
|
|
|
|
MI.eraseFromParent();
|
|
return true;
|
|
}
|
|
|
|
bool X86LegalizerInfo::legalizeFPTOSI(MachineInstr &MI,
|
|
MachineRegisterInfo &MRI,
|
|
LegalizerHelper &Helper) const {
|
|
MachineFunction &MF = *MI.getMF();
|
|
MachineIRBuilder &MIRBuilder = Helper.MIRBuilder;
|
|
auto [Dst, DstTy, Src, SrcTy] = MI.getFirst2RegLLTs();
|
|
|
|
TypeSize MemSize = DstTy.getSizeInBytes();
|
|
MachinePointerInfo PtrInfo;
|
|
Align Alignmt = Helper.getStackTemporaryAlignment(DstTy);
|
|
auto SlotPointer = Helper.createStackTemporary(MemSize, Alignmt, PtrInfo);
|
|
MachineMemOperand *StoreMMO = MF.getMachineMemOperand(
|
|
PtrInfo, MachineMemOperand::MOStore, MemSize, Align(MemSize));
|
|
|
|
MIRBuilder.buildInstr(X86::G_FIST)
|
|
.addUse(Src)
|
|
.addUse(SlotPointer.getReg(0))
|
|
.addMemOperand(StoreMMO);
|
|
|
|
MIRBuilder.buildLoad(Dst, SlotPointer, PtrInfo, Align(MemSize));
|
|
MI.eraseFromParent();
|
|
return true;
|
|
}
|
|
|
|
bool X86LegalizerInfo::legalizeBuildVector(MachineInstr &MI,
|
|
MachineRegisterInfo &MRI,
|
|
LegalizerHelper &Helper) const {
|
|
MachineIRBuilder &MIRBuilder = Helper.MIRBuilder;
|
|
const auto &BuildVector = cast<GBuildVector>(MI);
|
|
Register Dst = BuildVector.getReg(0);
|
|
LLT DstTy = MRI.getType(Dst);
|
|
MachineFunction &MF = MIRBuilder.getMF();
|
|
LLVMContext &Ctx = MF.getFunction().getContext();
|
|
uint64_t DstTySize = DstTy.getScalarSizeInBits();
|
|
|
|
SmallVector<Constant *, 4> CstIdxs;
|
|
for (unsigned i = 0; i < BuildVector.getNumSources(); ++i) {
|
|
Register Source = BuildVector.getSourceReg(i);
|
|
|
|
auto ValueAndReg = getIConstantVRegValWithLookThrough(Source, MRI);
|
|
if (ValueAndReg) {
|
|
CstIdxs.emplace_back(ConstantInt::get(Ctx, ValueAndReg->Value));
|
|
continue;
|
|
}
|
|
|
|
auto FPValueAndReg = getFConstantVRegValWithLookThrough(Source, MRI);
|
|
if (FPValueAndReg) {
|
|
CstIdxs.emplace_back(ConstantFP::get(Ctx, FPValueAndReg->Value));
|
|
continue;
|
|
}
|
|
|
|
if (getOpcodeDef<GImplicitDef>(Source, MRI)) {
|
|
CstIdxs.emplace_back(UndefValue::get(Type::getIntNTy(Ctx, DstTySize)));
|
|
continue;
|
|
}
|
|
return false;
|
|
}
|
|
|
|
Constant *ConstVal = ConstantVector::get(CstIdxs);
|
|
|
|
const DataLayout &DL = MIRBuilder.getDataLayout();
|
|
unsigned AddrSpace = DL.getDefaultGlobalsAddressSpace();
|
|
Align Alignment(DL.getABITypeAlign(ConstVal->getType()));
|
|
auto Addr = MIRBuilder.buildConstantPool(
|
|
LLT::pointer(AddrSpace, DL.getPointerSizeInBits(AddrSpace)),
|
|
MF.getConstantPool()->getConstantPoolIndex(ConstVal, Alignment));
|
|
MachineMemOperand *MMO =
|
|
MF.getMachineMemOperand(MachinePointerInfo::getConstantPool(MF),
|
|
MachineMemOperand::MOLoad, DstTy, Alignment);
|
|
|
|
MIRBuilder.buildLoad(Dst, Addr, *MMO);
|
|
MI.eraseFromParent();
|
|
return true;
|
|
}
|
|
|
|
bool X86LegalizerInfo::legalizeFPTOUI(MachineInstr &MI,
|
|
MachineRegisterInfo &MRI,
|
|
LegalizerHelper &Helper) const {
|
|
MachineIRBuilder &MIRBuilder = Helper.MIRBuilder;
|
|
auto [Dst, DstTy, Src, SrcTy] = MI.getFirst2RegLLTs();
|
|
unsigned DstSizeInBits = DstTy.getScalarSizeInBits();
|
|
const LLT s32 = LLT::scalar(32);
|
|
const LLT s64 = LLT::scalar(64);
|
|
|
|
// Simply reuse FPTOSI when it is possible to widen the type
|
|
if (DstSizeInBits <= 32) {
|
|
auto Casted = MIRBuilder.buildFPTOSI(DstTy == s32 ? s64 : s32, Src);
|
|
MIRBuilder.buildTrunc(Dst, Casted);
|
|
MI.eraseFromParent();
|
|
return true;
|
|
}
|
|
|
|
return false;
|
|
}
|
|
|
|
bool X86LegalizerInfo::legalizeUITOFP(MachineInstr &MI,
|
|
MachineRegisterInfo &MRI,
|
|
LegalizerHelper &Helper) const {
|
|
MachineIRBuilder &MIRBuilder = Helper.MIRBuilder;
|
|
auto [Dst, DstTy, Src, SrcTy] = MI.getFirst2RegLLTs();
|
|
const LLT s32 = LLT::scalar(32);
|
|
const LLT s64 = LLT::scalar(64);
|
|
|
|
// Simply reuse SITOFP when it is possible to widen the type
|
|
if (SrcTy.getSizeInBits() <= 32) {
|
|
auto Ext = MIRBuilder.buildZExt(SrcTy == s32 ? s64 : s32, Src);
|
|
MIRBuilder.buildSITOFP(Dst, Ext);
|
|
MI.eraseFromParent();
|
|
return true;
|
|
}
|
|
|
|
return false;
|
|
}
|
|
|
|
bool X86LegalizerInfo::legalizeNarrowingStore(MachineInstr &MI,
|
|
MachineRegisterInfo &MRI,
|
|
LegalizerHelper &Helper) const {
|
|
auto &Store = cast<GStore>(MI);
|
|
MachineIRBuilder &MIRBuilder = Helper.MIRBuilder;
|
|
MachineMemOperand &MMO = **Store.memoperands_begin();
|
|
MachineFunction &MF = MIRBuilder.getMF();
|
|
LLT ValTy = MRI.getType(Store.getValueReg());
|
|
auto *NewMMO = MF.getMachineMemOperand(&MMO, MMO.getPointerInfo(), ValTy);
|
|
|
|
Helper.Observer.changingInstr(Store);
|
|
Store.setMemRefs(MF, {NewMMO});
|
|
Helper.Observer.changedInstr(Store);
|
|
return true;
|
|
}
|
|
|
|
bool X86LegalizerInfo::legalizeIntrinsic(LegalizerHelper &Helper,
|
|
MachineInstr &MI) const {
|
|
return true;
|
|
}
|