[X86][MC][CodeGen] Support EGPR for KMOV (#73781)
KMOV is essential for copy between k-registers and GPRs. R16-R31 was added into GPRs in #70958, so we extend KMOV for these new registers first. This patch 1. Promotes KMOV instructions from VEX space to EVEX space 2. Emits prefix {evex} for the EVEX variants 3. Prefers EVEX variant than VEX variant in ISEL and optimizations for better RA EVEX variants will be compressed to VEX variants by existing EVEX2VEX pass if no EGPR is used. RFC: https://discourse.llvm.org/t/rfc-design-for-apx-feature-egpr-and-ndd-support/73031/4 TAG: llvm-test-suite && CPU2017 can be built with feature egpr successfully.
This commit is contained in:
parent
5891a8f7ce
commit
511ba45a47
@ -377,7 +377,8 @@ void X86InstPrinterCommon::printInstFlags(const MCInst *MI, raw_ostream &O,
|
||||
O << "\t{vex2}";
|
||||
else if (Flags & X86::IP_USE_VEX3)
|
||||
O << "\t{vex3}";
|
||||
else if (Flags & X86::IP_USE_EVEX)
|
||||
else if ((Flags & X86::IP_USE_EVEX) ||
|
||||
(TSFlags & X86II::ExplicitOpPrefixMask) == X86II::ExplicitEVEXPrefix)
|
||||
O << "\t{evex}";
|
||||
|
||||
if (Flags & X86::IP_USE_DISP8)
|
||||
|
@ -619,16 +619,22 @@ void X86DomainReassignment::initConverters() {
|
||||
std::make_unique<InstrReplacerDstCOPY>(From, To);
|
||||
};
|
||||
|
||||
createReplacerDstCOPY(X86::MOVZX32rm16, X86::KMOVWkm);
|
||||
createReplacerDstCOPY(X86::MOVZX64rm16, X86::KMOVWkm);
|
||||
bool HasEGPR = STI->hasEGPR();
|
||||
createReplacerDstCOPY(X86::MOVZX32rm16,
|
||||
HasEGPR ? X86::KMOVWkm_EVEX : X86::KMOVWkm);
|
||||
createReplacerDstCOPY(X86::MOVZX64rm16,
|
||||
HasEGPR ? X86::KMOVWkm_EVEX : X86::KMOVWkm);
|
||||
|
||||
createReplacerDstCOPY(X86::MOVZX32rr16, X86::KMOVWkk);
|
||||
createReplacerDstCOPY(X86::MOVZX64rr16, X86::KMOVWkk);
|
||||
|
||||
if (STI->hasDQI()) {
|
||||
createReplacerDstCOPY(X86::MOVZX16rm8, X86::KMOVBkm);
|
||||
createReplacerDstCOPY(X86::MOVZX32rm8, X86::KMOVBkm);
|
||||
createReplacerDstCOPY(X86::MOVZX64rm8, X86::KMOVBkm);
|
||||
createReplacerDstCOPY(X86::MOVZX16rm8,
|
||||
HasEGPR ? X86::KMOVBkm_EVEX : X86::KMOVBkm);
|
||||
createReplacerDstCOPY(X86::MOVZX32rm8,
|
||||
HasEGPR ? X86::KMOVBkm_EVEX : X86::KMOVBkm);
|
||||
createReplacerDstCOPY(X86::MOVZX64rm8,
|
||||
HasEGPR ? X86::KMOVBkm_EVEX : X86::KMOVBkm);
|
||||
|
||||
createReplacerDstCOPY(X86::MOVZX16rr8, X86::KMOVBkk);
|
||||
createReplacerDstCOPY(X86::MOVZX32rr8, X86::KMOVBkk);
|
||||
@ -639,8 +645,8 @@ void X86DomainReassignment::initConverters() {
|
||||
Converters[{MaskDomain, From}] = std::make_unique<InstrReplacer>(From, To);
|
||||
};
|
||||
|
||||
createReplacer(X86::MOV16rm, X86::KMOVWkm);
|
||||
createReplacer(X86::MOV16mr, X86::KMOVWmk);
|
||||
createReplacer(X86::MOV16rm, HasEGPR ? X86::KMOVWkm_EVEX : X86::KMOVWkm);
|
||||
createReplacer(X86::MOV16mr, HasEGPR ? X86::KMOVWmk_EVEX : X86::KMOVWmk);
|
||||
createReplacer(X86::MOV16rr, X86::KMOVWkk);
|
||||
createReplacer(X86::SHR16ri, X86::KSHIFTRWri);
|
||||
createReplacer(X86::SHL16ri, X86::KSHIFTLWri);
|
||||
@ -650,11 +656,11 @@ void X86DomainReassignment::initConverters() {
|
||||
createReplacer(X86::XOR16rr, X86::KXORWrr);
|
||||
|
||||
if (STI->hasBWI()) {
|
||||
createReplacer(X86::MOV32rm, X86::KMOVDkm);
|
||||
createReplacer(X86::MOV64rm, X86::KMOVQkm);
|
||||
createReplacer(X86::MOV32rm, HasEGPR ? X86::KMOVDkm_EVEX : X86::KMOVDkm);
|
||||
createReplacer(X86::MOV64rm, HasEGPR ? X86::KMOVQkm_EVEX : X86::KMOVQkm);
|
||||
|
||||
createReplacer(X86::MOV32mr, X86::KMOVDmk);
|
||||
createReplacer(X86::MOV64mr, X86::KMOVQmk);
|
||||
createReplacer(X86::MOV32mr, HasEGPR ? X86::KMOVDmk_EVEX : X86::KMOVDmk);
|
||||
createReplacer(X86::MOV64mr, HasEGPR ? X86::KMOVQmk_EVEX : X86::KMOVQmk);
|
||||
|
||||
createReplacer(X86::MOV32rr, X86::KMOVDkk);
|
||||
createReplacer(X86::MOV64rr, X86::KMOVQkk);
|
||||
@ -695,8 +701,8 @@ void X86DomainReassignment::initConverters() {
|
||||
|
||||
createReplacer(X86::AND8rr, X86::KANDBrr);
|
||||
|
||||
createReplacer(X86::MOV8rm, X86::KMOVBkm);
|
||||
createReplacer(X86::MOV8mr, X86::KMOVBmk);
|
||||
createReplacer(X86::MOV8rm, HasEGPR ? X86::KMOVBkm_EVEX : X86::KMOVBkm);
|
||||
createReplacer(X86::MOV8mr, HasEGPR ? X86::KMOVBmk_EVEX : X86::KMOVBmk);
|
||||
createReplacer(X86::MOV8rr, X86::KMOVBkk);
|
||||
|
||||
createReplacer(X86::NOT8r, X86::KNOTBrr);
|
||||
|
@ -264,6 +264,7 @@ bool X86ExpandPseudo::ExpandMI(MachineBasicBlock &MBB,
|
||||
MachineInstr &MI = *MBBI;
|
||||
unsigned Opcode = MI.getOpcode();
|
||||
const DebugLoc &DL = MBBI->getDebugLoc();
|
||||
bool HasEGPR = STI->hasEGPR();
|
||||
switch (Opcode) {
|
||||
default:
|
||||
return false;
|
||||
@ -466,10 +467,14 @@ bool X86ExpandPseudo::ExpandMI(MachineBasicBlock &MBB,
|
||||
Register Reg0 = TRI->getSubReg(Reg, X86::sub_mask_0);
|
||||
Register Reg1 = TRI->getSubReg(Reg, X86::sub_mask_1);
|
||||
|
||||
auto MIBLo = BuildMI(MBB, MBBI, DL, TII->get(X86::KMOVWkm))
|
||||
.addReg(Reg0, RegState::Define | getDeadRegState(DstIsDead));
|
||||
auto MIBHi = BuildMI(MBB, MBBI, DL, TII->get(X86::KMOVWkm))
|
||||
.addReg(Reg1, RegState::Define | getDeadRegState(DstIsDead));
|
||||
auto MIBLo =
|
||||
BuildMI(MBB, MBBI, DL,
|
||||
TII->get(HasEGPR ? X86::KMOVWkm_EVEX : X86::KMOVWkm))
|
||||
.addReg(Reg0, RegState::Define | getDeadRegState(DstIsDead));
|
||||
auto MIBHi =
|
||||
BuildMI(MBB, MBBI, DL,
|
||||
TII->get(HasEGPR ? X86::KMOVWkm_EVEX : X86::KMOVWkm))
|
||||
.addReg(Reg1, RegState::Define | getDeadRegState(DstIsDead));
|
||||
|
||||
for (int i = 0; i < X86::AddrNumOperands; ++i) {
|
||||
MIBLo.add(MBBI->getOperand(1 + i));
|
||||
@ -500,8 +505,10 @@ bool X86ExpandPseudo::ExpandMI(MachineBasicBlock &MBB,
|
||||
Register Reg0 = TRI->getSubReg(Reg, X86::sub_mask_0);
|
||||
Register Reg1 = TRI->getSubReg(Reg, X86::sub_mask_1);
|
||||
|
||||
auto MIBLo = BuildMI(MBB, MBBI, DL, TII->get(X86::KMOVWmk));
|
||||
auto MIBHi = BuildMI(MBB, MBBI, DL, TII->get(X86::KMOVWmk));
|
||||
auto MIBLo = BuildMI(MBB, MBBI, DL,
|
||||
TII->get(HasEGPR ? X86::KMOVWmk_EVEX : X86::KMOVWmk));
|
||||
auto MIBHi = BuildMI(MBB, MBBI, DL,
|
||||
TII->get(HasEGPR ? X86::KMOVWmk_EVEX : X86::KMOVWmk));
|
||||
|
||||
for (int i = 0; i < X86::AddrNumOperands; ++i) {
|
||||
MIBLo.add(MBBI->getOperand(i));
|
||||
|
@ -2853,46 +2853,56 @@ defm VFPCLASS : avx512_fp_fpclass_all<"vfpclass", 0x66, 0x67, SchedWriteFCmp>, E
|
||||
// - copy from GPR to mask register and vice versa
|
||||
//
|
||||
multiclass avx512_mask_mov<bits<8> opc_kk, bits<8> opc_km, bits<8> opc_mk,
|
||||
string OpcodeStr, RegisterClass KRC,
|
||||
ValueType vvt, X86MemOperand x86memop> {
|
||||
string OpcodeStr, RegisterClass KRC, ValueType vvt,
|
||||
X86MemOperand x86memop, string Suffix = ""> {
|
||||
let explicitOpPrefix = !if(!eq(Suffix, ""), NoExplicitOpPrefix, ExplicitEVEX) in {
|
||||
let isMoveReg = 1, hasSideEffects = 0, SchedRW = [WriteMove] in
|
||||
def kk : I<opc_kk, MRMSrcReg, (outs KRC:$dst), (ins KRC:$src),
|
||||
!strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), []>,
|
||||
Sched<[WriteMove]>;
|
||||
def km : I<opc_km, MRMSrcMem, (outs KRC:$dst), (ins x86memop:$src),
|
||||
!strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
|
||||
[(set KRC:$dst, (vvt (load addr:$src)))]>,
|
||||
Sched<[WriteLoad]>;
|
||||
def mk : I<opc_mk, MRMDestMem, (outs), (ins x86memop:$dst, KRC:$src),
|
||||
!strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
|
||||
[(store KRC:$src, addr:$dst)]>,
|
||||
Sched<[WriteStore]>;
|
||||
}
|
||||
|
||||
multiclass avx512_mask_mov_gpr<bits<8> opc_kr, bits<8> opc_rk,
|
||||
string OpcodeStr,
|
||||
RegisterClass KRC, RegisterClass GRC> {
|
||||
let hasSideEffects = 0 in {
|
||||
def kr : I<opc_kr, MRMSrcReg, (outs KRC:$dst), (ins GRC:$src),
|
||||
!strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), []>,
|
||||
Sched<[WriteMove]>;
|
||||
def rk : I<opc_rk, MRMSrcReg, (outs GRC:$dst), (ins KRC:$src),
|
||||
!strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), []>,
|
||||
Sched<[WriteMove]>;
|
||||
def kk#Suffix : I<opc_kk, MRMSrcReg, (outs KRC:$dst), (ins KRC:$src),
|
||||
!strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), []>,
|
||||
Sched<[WriteMove]>;
|
||||
def km#Suffix : I<opc_km, MRMSrcMem, (outs KRC:$dst), (ins x86memop:$src),
|
||||
!strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
|
||||
[(set KRC:$dst, (vvt (load addr:$src)))]>,
|
||||
Sched<[WriteLoad]>;
|
||||
def mk#Suffix : I<opc_mk, MRMDestMem, (outs), (ins x86memop:$dst, KRC:$src),
|
||||
!strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
|
||||
[(store KRC:$src, addr:$dst)]>,
|
||||
Sched<[WriteStore]>;
|
||||
}
|
||||
}
|
||||
|
||||
let Predicates = [HasDQI] in
|
||||
multiclass avx512_mask_mov_gpr<bits<8> opc_kr, bits<8> opc_rk,
|
||||
string OpcodeStr, RegisterClass KRC,
|
||||
RegisterClass GRC, string Suffix = ""> {
|
||||
let hasSideEffects = 0, explicitOpPrefix = !if(!eq(Suffix, ""), NoExplicitOpPrefix, ExplicitEVEX) in {
|
||||
def kr#Suffix : I<opc_kr, MRMSrcReg, (outs KRC:$dst), (ins GRC:$src),
|
||||
!strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), []>,
|
||||
Sched<[WriteMove]>;
|
||||
def rk#Suffix : I<opc_rk, MRMSrcReg, (outs GRC:$dst), (ins KRC:$src),
|
||||
!strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), []>,
|
||||
Sched<[WriteMove]>;
|
||||
}
|
||||
}
|
||||
|
||||
let Predicates = [HasDQI, NoEGPR] in
|
||||
defm KMOVB : avx512_mask_mov<0x90, 0x90, 0x91, "kmovb", VK8, v8i1, i8mem>,
|
||||
avx512_mask_mov_gpr<0x92, 0x93, "kmovb", VK8, GR32>,
|
||||
VEX, PD;
|
||||
let Predicates = [HasDQI, HasEGPR, In64BitMode] in
|
||||
defm KMOVB : avx512_mask_mov<0x90, 0x90, 0x91, "kmovb", VK8, v8i1, i8mem, "_EVEX">,
|
||||
avx512_mask_mov_gpr<0x92, 0x93, "kmovb", VK8, GR32, "_EVEX">,
|
||||
EVEX, PD;
|
||||
|
||||
let Predicates = [HasAVX512] in
|
||||
let Predicates = [HasAVX512, NoEGPR] in
|
||||
defm KMOVW : avx512_mask_mov<0x90, 0x90, 0x91, "kmovw", VK16, v16i1, i16mem>,
|
||||
avx512_mask_mov_gpr<0x92, 0x93, "kmovw", VK16, GR32>,
|
||||
VEX, PS;
|
||||
let Predicates = [HasAVX512, HasEGPR, In64BitMode] in
|
||||
defm KMOVW : avx512_mask_mov<0x90, 0x90, 0x91, "kmovw", VK16, v16i1, i16mem, "_EVEX">,
|
||||
avx512_mask_mov_gpr<0x92, 0x93, "kmovw", VK16, GR32, "_EVEX">,
|
||||
EVEX, PS;
|
||||
|
||||
let Predicates = [HasBWI] in {
|
||||
let Predicates = [HasBWI, NoEGPR] in {
|
||||
defm KMOVD : avx512_mask_mov<0x90, 0x90, 0x91, "kmovd", VK32, v32i1,i32mem>,
|
||||
VEX, PD, REX_W;
|
||||
defm KMOVD : avx512_mask_mov_gpr<0x92, 0x93, "kmovd", VK32, GR32>,
|
||||
@ -2902,6 +2912,16 @@ let Predicates = [HasBWI] in {
|
||||
defm KMOVQ : avx512_mask_mov_gpr<0x92, 0x93, "kmovq", VK64, GR64>,
|
||||
VEX, XD, REX_W;
|
||||
}
|
||||
let Predicates = [HasBWI, HasEGPR, In64BitMode] in {
|
||||
defm KMOVD : avx512_mask_mov<0x90, 0x90, 0x91, "kmovd", VK32, v32i1,i32mem, "_EVEX">,
|
||||
EVEX, PD, REX_W;
|
||||
defm KMOVD : avx512_mask_mov_gpr<0x92, 0x93, "kmovd", VK32, GR32, "_EVEX">,
|
||||
EVEX, XD;
|
||||
defm KMOVQ : avx512_mask_mov<0x90, 0x90, 0x91, "kmovq", VK64, v64i1, i64mem, "_EVEX">,
|
||||
EVEX, PS, REX_W;
|
||||
defm KMOVQ : avx512_mask_mov_gpr<0x92, 0x93, "kmovq", VK64, GR64, "_EVEX">,
|
||||
EVEX, XD, REX_W;
|
||||
}
|
||||
|
||||
// GR from/to mask register
|
||||
def : Pat<(v16i1 (bitconvert (i16 GR16:$src))),
|
||||
|
@ -495,10 +495,12 @@ static bool isFrameLoadOpcode(int Opcode, unsigned &MemBytes) {
|
||||
return false;
|
||||
case X86::MOV8rm:
|
||||
case X86::KMOVBkm:
|
||||
case X86::KMOVBkm_EVEX:
|
||||
MemBytes = 1;
|
||||
return true;
|
||||
case X86::MOV16rm:
|
||||
case X86::KMOVWkm:
|
||||
case X86::KMOVWkm_EVEX:
|
||||
case X86::VMOVSHZrm:
|
||||
case X86::VMOVSHZrm_alt:
|
||||
MemBytes = 2;
|
||||
@ -511,6 +513,7 @@ static bool isFrameLoadOpcode(int Opcode, unsigned &MemBytes) {
|
||||
case X86::VMOVSSZrm:
|
||||
case X86::VMOVSSZrm_alt:
|
||||
case X86::KMOVDkm:
|
||||
case X86::KMOVDkm_EVEX:
|
||||
MemBytes = 4;
|
||||
return true;
|
||||
case X86::MOV64rm:
|
||||
@ -524,6 +527,7 @@ static bool isFrameLoadOpcode(int Opcode, unsigned &MemBytes) {
|
||||
case X86::MMX_MOVD64rm:
|
||||
case X86::MMX_MOVQ64rm:
|
||||
case X86::KMOVQkm:
|
||||
case X86::KMOVQkm_EVEX:
|
||||
MemBytes = 8;
|
||||
return true;
|
||||
case X86::MOVAPSrm:
|
||||
@ -593,10 +597,12 @@ static bool isFrameStoreOpcode(int Opcode, unsigned &MemBytes) {
|
||||
return false;
|
||||
case X86::MOV8mr:
|
||||
case X86::KMOVBmk:
|
||||
case X86::KMOVBmk_EVEX:
|
||||
MemBytes = 1;
|
||||
return true;
|
||||
case X86::MOV16mr:
|
||||
case X86::KMOVWmk:
|
||||
case X86::KMOVWmk_EVEX:
|
||||
case X86::VMOVSHZmr:
|
||||
MemBytes = 2;
|
||||
return true;
|
||||
@ -605,6 +611,7 @@ static bool isFrameStoreOpcode(int Opcode, unsigned &MemBytes) {
|
||||
case X86::VMOVSSmr:
|
||||
case X86::VMOVSSZmr:
|
||||
case X86::KMOVDmk:
|
||||
case X86::KMOVDmk_EVEX:
|
||||
MemBytes = 4;
|
||||
return true;
|
||||
case X86::MOV64mr:
|
||||
@ -616,6 +623,7 @@ static bool isFrameStoreOpcode(int Opcode, unsigned &MemBytes) {
|
||||
case X86::MMX_MOVQ64mr:
|
||||
case X86::MMX_MOVNTQmr:
|
||||
case X86::KMOVQmk:
|
||||
case X86::KMOVQmk_EVEX:
|
||||
MemBytes = 8;
|
||||
return true;
|
||||
case X86::MOVAPSmr:
|
||||
@ -3519,6 +3527,7 @@ static unsigned CopyToFromAsymmetricReg(unsigned DestReg, unsigned SrcReg,
|
||||
const X86Subtarget &Subtarget) {
|
||||
bool HasAVX = Subtarget.hasAVX();
|
||||
bool HasAVX512 = Subtarget.hasAVX512();
|
||||
bool HasEGPR = Subtarget.hasEGPR();
|
||||
|
||||
// SrcReg(MaskReg) -> DestReg(GR64)
|
||||
// SrcReg(MaskReg) -> DestReg(GR32)
|
||||
@ -3527,10 +3536,11 @@ static unsigned CopyToFromAsymmetricReg(unsigned DestReg, unsigned SrcReg,
|
||||
if (X86::VK16RegClass.contains(SrcReg)) {
|
||||
if (X86::GR64RegClass.contains(DestReg)) {
|
||||
assert(Subtarget.hasBWI());
|
||||
return X86::KMOVQrk;
|
||||
return HasEGPR ? X86::KMOVQrk_EVEX : X86::KMOVQrk;
|
||||
}
|
||||
if (X86::GR32RegClass.contains(DestReg))
|
||||
return Subtarget.hasBWI() ? X86::KMOVDrk : X86::KMOVWrk;
|
||||
return Subtarget.hasBWI() ? (HasEGPR ? X86::KMOVDrk_EVEX : X86::KMOVDrk)
|
||||
: (HasEGPR ? X86::KMOVWrk_EVEX : X86::KMOVWrk);
|
||||
}
|
||||
|
||||
// SrcReg(GR64) -> DestReg(MaskReg)
|
||||
@ -3540,10 +3550,11 @@ static unsigned CopyToFromAsymmetricReg(unsigned DestReg, unsigned SrcReg,
|
||||
if (X86::VK16RegClass.contains(DestReg)) {
|
||||
if (X86::GR64RegClass.contains(SrcReg)) {
|
||||
assert(Subtarget.hasBWI());
|
||||
return X86::KMOVQkr;
|
||||
return HasEGPR ? X86::KMOVQkr_EVEX : X86::KMOVQkr;
|
||||
}
|
||||
if (X86::GR32RegClass.contains(SrcReg))
|
||||
return Subtarget.hasBWI() ? X86::KMOVDkr : X86::KMOVWkr;
|
||||
return Subtarget.hasBWI() ? (HasEGPR ? X86::KMOVDkr_EVEX : X86::KMOVDkr)
|
||||
: (HasEGPR ? X86::KMOVWkr_EVEX : X86::KMOVWkr);
|
||||
}
|
||||
|
||||
|
||||
@ -3710,6 +3721,7 @@ static unsigned getLoadStoreRegOpcode(Register Reg,
|
||||
bool HasAVX = STI.hasAVX();
|
||||
bool HasAVX512 = STI.hasAVX512();
|
||||
bool HasVLX = STI.hasVLX();
|
||||
bool HasEGPR = STI.hasEGPR();
|
||||
|
||||
assert(RC != nullptr && "Invalid target register class");
|
||||
switch (STI.getRegisterInfo()->getSpillSize(*RC)) {
|
||||
@ -3725,7 +3737,8 @@ static unsigned getLoadStoreRegOpcode(Register Reg,
|
||||
return Load ? X86::MOV8rm : X86::MOV8mr;
|
||||
case 2:
|
||||
if (X86::VK16RegClass.hasSubClassEq(RC))
|
||||
return Load ? X86::KMOVWkm : X86::KMOVWmk;
|
||||
return Load ? (HasEGPR ? X86::KMOVWkm_EVEX : X86::KMOVWkm)
|
||||
: (HasEGPR ? X86::KMOVWmk_EVEX : X86::KMOVWmk);
|
||||
assert(X86::GR16RegClass.hasSubClassEq(RC) && "Unknown 2-byte regclass");
|
||||
return Load ? X86::MOV16rm : X86::MOV16mr;
|
||||
case 4:
|
||||
@ -3743,7 +3756,8 @@ static unsigned getLoadStoreRegOpcode(Register Reg,
|
||||
return Load ? X86::LD_Fp32m : X86::ST_Fp32m;
|
||||
if (X86::VK32RegClass.hasSubClassEq(RC)) {
|
||||
assert(STI.hasBWI() && "KMOVD requires BWI");
|
||||
return Load ? X86::KMOVDkm : X86::KMOVDmk;
|
||||
return Load ? (HasEGPR ? X86::KMOVDkm_EVEX : X86::KMOVDkm)
|
||||
: (HasEGPR ? X86::KMOVDmk_EVEX : X86::KMOVDmk);
|
||||
}
|
||||
// All of these mask pair classes have the same spill size, the same kind
|
||||
// of kmov instructions can be used with all of them.
|
||||
@ -3774,7 +3788,8 @@ static unsigned getLoadStoreRegOpcode(Register Reg,
|
||||
return Load ? X86::LD_Fp64m : X86::ST_Fp64m;
|
||||
if (X86::VK64RegClass.hasSubClassEq(RC)) {
|
||||
assert(STI.hasBWI() && "KMOVQ requires BWI");
|
||||
return Load ? X86::KMOVQkm : X86::KMOVQmk;
|
||||
return Load ? (HasEGPR ? X86::KMOVQkm_EVEX : X86::KMOVQkm)
|
||||
: (HasEGPR ? X86::KMOVQmk_EVEX : X86::KMOVQmk);
|
||||
}
|
||||
llvm_unreachable("Unknown 8-byte regclass");
|
||||
case 10:
|
||||
@ -7717,9 +7732,13 @@ X86InstrInfo::areLoadsFromSameBasePtr(SDNode *Load1, SDNode *Load2,
|
||||
case X86::VMOVDQA64Zrm:
|
||||
case X86::VMOVDQU64Zrm:
|
||||
case X86::KMOVBkm:
|
||||
case X86::KMOVBkm_EVEX:
|
||||
case X86::KMOVWkm:
|
||||
case X86::KMOVWkm_EVEX:
|
||||
case X86::KMOVDkm:
|
||||
case X86::KMOVDkm_EVEX:
|
||||
case X86::KMOVQkm:
|
||||
case X86::KMOVQkm_EVEX:
|
||||
return true;
|
||||
}
|
||||
};
|
||||
|
@ -878,9 +878,10 @@ def relocImm : ComplexPattern<iAny, 1, "selectRelocImm",
|
||||
// X86 Instruction Predicate Definitions.
|
||||
def TruePredicate : Predicate<"true">;
|
||||
|
||||
def HasEGPR : Predicate<"Subtarget->hasEGPR()">;
|
||||
def NoEGPR : Predicate<"!Subtarget->hasEGPR()">;
|
||||
def HasCMOV : Predicate<"Subtarget->canUseCMOV()">;
|
||||
def NoCMOV : Predicate<"!Subtarget->canUseCMOV()">;
|
||||
|
||||
def HasNOPL : Predicate<"Subtarget->hasNOPL()">;
|
||||
def HasMMX : Predicate<"Subtarget->hasMMX()">;
|
||||
def Has3DNow : Predicate<"Subtarget->hasThreeDNow()">;
|
||||
|
@ -0,0 +1,14 @@
|
||||
; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+avx512f -show-mc-encoding | FileCheck %s
|
||||
; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+avx512f,+egpr -show-mc-encoding | FileCheck --check-prefix=EGPR %s
|
||||
|
||||
define void @kmov(i1 %cmp23.not) {
|
||||
; CHECK-LABEL: kmov:
|
||||
; CHECK: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
|
||||
;
|
||||
; EGPR-LABEL: kmov:
|
||||
; EGPR: kmovw %edi, %k1 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x92,0xcf]
|
||||
entry:
|
||||
%0 = select i1 %cmp23.not, double 1.000000e+00, double 0.000000e+00
|
||||
store double %0, ptr null, align 8
|
||||
ret void
|
||||
}
|
58
llvm/test/CodeGen/X86/apx/kmov-domain-assignment.ll
Normal file
58
llvm/test/CodeGen/X86/apx/kmov-domain-assignment.ll
Normal file
@ -0,0 +1,58 @@
|
||||
; NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
|
||||
; RUN: llc < %s -mtriple=x86_64-unknown -stop-after=x86-domain-reassignment -mattr=+avx512f,+avx512bw,+avx512dq,+egpr | FileCheck %s
|
||||
|
||||
define void @test_fcmp_storei1(i1 %cond, ptr %fptr, ptr %iptr, float %f1, float %f2, float %f3, float %f4) {
|
||||
; CHECK-LABEL: name: test_fcmp_storei1
|
||||
; CHECK: bb.0.entry:
|
||||
; CHECK-NEXT: successors: %bb.1(0x40000000), %bb.2(0x40000000)
|
||||
; CHECK-NEXT: liveins: $edi, $rdx, $xmm0, $xmm1, $xmm2, $xmm3
|
||||
; CHECK-NEXT: {{ $}}
|
||||
; CHECK-NEXT: [[COPY:%[0-9]+]]:fr32x = COPY $xmm3
|
||||
; CHECK-NEXT: [[COPY1:%[0-9]+]]:fr32x = COPY $xmm2
|
||||
; CHECK-NEXT: [[COPY2:%[0-9]+]]:fr32x = COPY $xmm1
|
||||
; CHECK-NEXT: [[COPY3:%[0-9]+]]:fr32x = COPY $xmm0
|
||||
; CHECK-NEXT: [[COPY4:%[0-9]+]]:gr64 = COPY $rdx
|
||||
; CHECK-NEXT: [[COPY5:%[0-9]+]]:gr32 = COPY $edi
|
||||
; CHECK-NEXT: [[COPY6:%[0-9]+]]:gr8 = COPY [[COPY5]].sub_8bit
|
||||
; CHECK-NEXT: TEST8ri killed [[COPY6]], 1, implicit-def $eflags
|
||||
; CHECK-NEXT: JCC_1 %bb.2, 4, implicit $eflags
|
||||
; CHECK-NEXT: JMP_1 %bb.1
|
||||
; CHECK-NEXT: {{ $}}
|
||||
; CHECK-NEXT: bb.1.if:
|
||||
; CHECK-NEXT: successors: %bb.3(0x80000000)
|
||||
; CHECK-NEXT: {{ $}}
|
||||
; CHECK-NEXT: [[VCMPSSZrr:%[0-9]+]]:vk1 = nofpexcept VCMPSSZrr [[COPY3]], [[COPY2]], 0, implicit $mxcsr
|
||||
; CHECK-NEXT: [[COPY7:%[0-9]+]]:vk16 = COPY [[VCMPSSZrr]]
|
||||
; CHECK-NEXT: [[COPY8:%[0-9]+]]:vk32 = COPY [[COPY7]]
|
||||
; CHECK-NEXT: [[COPY9:%[0-9]+]]:vk8 = COPY [[COPY8]]
|
||||
; CHECK-NEXT: JMP_1 %bb.3
|
||||
; CHECK-NEXT: {{ $}}
|
||||
; CHECK-NEXT: bb.2.else:
|
||||
; CHECK-NEXT: successors: %bb.3(0x80000000)
|
||||
; CHECK-NEXT: {{ $}}
|
||||
; CHECK-NEXT: [[VCMPSSZrr1:%[0-9]+]]:vk1 = nofpexcept VCMPSSZrr [[COPY1]], [[COPY]], 0, implicit $mxcsr
|
||||
; CHECK-NEXT: [[COPY10:%[0-9]+]]:vk16 = COPY [[VCMPSSZrr1]]
|
||||
; CHECK-NEXT: [[COPY11:%[0-9]+]]:vk32 = COPY [[COPY10]]
|
||||
; CHECK-NEXT: [[COPY12:%[0-9]+]]:vk8 = COPY [[COPY11]]
|
||||
; CHECK-NEXT: {{ $}}
|
||||
; CHECK-NEXT: bb.3.exit:
|
||||
; CHECK-NEXT: [[PHI:%[0-9]+]]:vk8 = PHI [[COPY12]], %bb.2, [[COPY9]], %bb.1
|
||||
; CHECK-NEXT: KMOVBmk_EVEX [[COPY4]], 1, $noreg, 0, $noreg, [[PHI]]
|
||||
; CHECK-NEXT: RET 0
|
||||
entry:
|
||||
br i1 %cond, label %if, label %else
|
||||
|
||||
if:
|
||||
%cmp1 = fcmp oeq float %f1, %f2
|
||||
br label %exit
|
||||
|
||||
else:
|
||||
%cmp2 = fcmp oeq float %f3, %f4
|
||||
br label %exit
|
||||
|
||||
exit:
|
||||
%val = phi i1 [%cmp1, %if], [%cmp2, %else]
|
||||
store i1 %val, ptr %iptr
|
||||
ret void
|
||||
}
|
||||
|
103
llvm/test/CodeGen/X86/apx/kmov-isel.ll
Normal file
103
llvm/test/CodeGen/X86/apx/kmov-isel.ll
Normal file
@ -0,0 +1,103 @@
|
||||
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
|
||||
; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+avx512f,+avx512bw,+egpr --show-mc-encoding | FileCheck --check-prefix=AVX512 %s
|
||||
|
||||
define void @bitcast_16i8_store(ptr %p, <16 x i8> %a0) {
|
||||
; AVX512-LABEL: bitcast_16i8_store:
|
||||
; AVX512: # %bb.0:
|
||||
; AVX512-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
|
||||
; AVX512-NEXT: vpmovb2m %zmm0, %k0 # encoding: [0x62,0xf2,0x7e,0x48,0x29,0xc0]
|
||||
; AVX512-NEXT: kmovw %k0, (%rdi) # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x91,0x07]
|
||||
; AVX512-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77]
|
||||
; AVX512-NEXT: retq # encoding: [0xc3]
|
||||
%a1 = icmp slt <16 x i8> %a0, zeroinitializer
|
||||
%a2 = bitcast <16 x i1> %a1 to i16
|
||||
store i16 %a2, ptr %p
|
||||
ret void
|
||||
}
|
||||
|
||||
define void @bitcast_32i8_store(ptr %p, <32 x i8> %a0) {
|
||||
; AVX512-LABEL: bitcast_32i8_store:
|
||||
; AVX512: # %bb.0:
|
||||
; AVX512-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
|
||||
; AVX512-NEXT: vpmovb2m %zmm0, %k0 # encoding: [0x62,0xf2,0x7e,0x48,0x29,0xc0]
|
||||
; AVX512-NEXT: kmovd %k0, (%rdi) # EVEX TO VEX Compression encoding: [0xc4,0xe1,0xf9,0x91,0x07]
|
||||
; AVX512-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77]
|
||||
; AVX512-NEXT: retq # encoding: [0xc3]
|
||||
%a1 = icmp slt <32 x i8> %a0, zeroinitializer
|
||||
%a2 = bitcast <32 x i1> %a1 to i32
|
||||
store i32 %a2, ptr %p
|
||||
ret void
|
||||
}
|
||||
|
||||
define void @bitcast_64i8_store(ptr %p, <64 x i8> %a0) {
|
||||
; AVX512-LABEL: bitcast_64i8_store:
|
||||
; AVX512: # %bb.0:
|
||||
; AVX512-NEXT: vpmovb2m %zmm0, %k0 # encoding: [0x62,0xf2,0x7e,0x48,0x29,0xc0]
|
||||
; AVX512-NEXT: kmovq %k0, (%rdi) # EVEX TO VEX Compression encoding: [0xc4,0xe1,0xf8,0x91,0x07]
|
||||
; AVX512-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77]
|
||||
; AVX512-NEXT: retq # encoding: [0xc3]
|
||||
%a1 = icmp slt <64 x i8> %a0, zeroinitializer
|
||||
%a2 = bitcast <64 x i1> %a1 to i64
|
||||
store i64 %a2, ptr %p
|
||||
ret void
|
||||
}
|
||||
|
||||
define <16 x i1> @bitcast_16i8_load(ptr %p, <16 x i1> %a, <16 x i1> %b) {
|
||||
; AVX512-LABEL: bitcast_16i8_load:
|
||||
; AVX512: # %bb.0:
|
||||
; AVX512-NEXT: vpsllw $7, %xmm1, %xmm1 # encoding: [0xc5,0xf1,0x71,0xf1,0x07]
|
||||
; AVX512-NEXT: vpmovb2m %zmm1, %k0 # encoding: [0x62,0xf2,0x7e,0x48,0x29,0xc1]
|
||||
; AVX512-NEXT: vpsllw $7, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0x71,0xf0,0x07]
|
||||
; AVX512-NEXT: vpxor %xmm1, %xmm1, %xmm1 # encoding: [0xc5,0xf1,0xef,0xc9]
|
||||
; AVX512-NEXT: kmovw (%rdi), %k1 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x90,0x0f]
|
||||
; AVX512-NEXT: vpcmpgtb %zmm0, %zmm1, %k2 {%k1} # encoding: [0x62,0xf1,0x75,0x49,0x64,0xd0]
|
||||
; AVX512-NEXT: kandnw %k0, %k1, %k0 # encoding: [0xc5,0xf4,0x42,0xc0]
|
||||
; AVX512-NEXT: korw %k0, %k2, %k0 # encoding: [0xc5,0xec,0x45,0xc0]
|
||||
; AVX512-NEXT: vpmovm2b %k0, %zmm0 # encoding: [0x62,0xf2,0x7e,0x48,0x28,0xc0]
|
||||
; AVX512-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0
|
||||
; AVX512-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77]
|
||||
; AVX512-NEXT: retq # encoding: [0xc3]
|
||||
%mask = load i16, ptr %p
|
||||
%vmask = bitcast i16 %mask to <16 x i1>
|
||||
%res = select <16 x i1> %vmask, <16 x i1> %a, <16 x i1> %b
|
||||
ret <16 x i1> %res
|
||||
}
|
||||
|
||||
define <32 x i1> @bitcast_32i8_load(ptr %p, <32 x i1> %a, <32 x i1> %b) {
|
||||
; AVX512-LABEL: bitcast_32i8_load:
|
||||
; AVX512: # %bb.0:
|
||||
; AVX512-NEXT: vpsllw $7, %ymm1, %ymm1 # encoding: [0xc5,0xf5,0x71,0xf1,0x07]
|
||||
; AVX512-NEXT: vpmovb2m %zmm1, %k0 # encoding: [0x62,0xf2,0x7e,0x48,0x29,0xc1]
|
||||
; AVX512-NEXT: vpsllw $7, %ymm0, %ymm0 # encoding: [0xc5,0xfd,0x71,0xf0,0x07]
|
||||
; AVX512-NEXT: vpxor %xmm1, %xmm1, %xmm1 # encoding: [0xc5,0xf1,0xef,0xc9]
|
||||
; AVX512-NEXT: kmovd (%rdi), %k1 # EVEX TO VEX Compression encoding: [0xc4,0xe1,0xf9,0x90,0x0f]
|
||||
; AVX512-NEXT: vpcmpgtb %zmm0, %zmm1, %k2 {%k1} # encoding: [0x62,0xf1,0x75,0x49,0x64,0xd0]
|
||||
; AVX512-NEXT: kandnd %k0, %k1, %k0 # encoding: [0xc4,0xe1,0xf5,0x42,0xc0]
|
||||
; AVX512-NEXT: kord %k0, %k2, %k0 # encoding: [0xc4,0xe1,0xed,0x45,0xc0]
|
||||
; AVX512-NEXT: vpmovm2b %k0, %zmm0 # encoding: [0x62,0xf2,0x7e,0x48,0x28,0xc0]
|
||||
; AVX512-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0
|
||||
; AVX512-NEXT: retq # encoding: [0xc3]
|
||||
%mask = load i32, ptr %p
|
||||
%vmask = bitcast i32 %mask to <32 x i1>
|
||||
%res = select <32 x i1> %vmask, <32 x i1> %a, <32 x i1> %b
|
||||
ret <32 x i1> %res
|
||||
}
|
||||
|
||||
define <64 x i1> @bitcast_64i8_load(ptr %p, <64 x i1> %a, <64 x i1> %b) {
|
||||
; AVX512-LABEL: bitcast_64i8_load:
|
||||
; AVX512: # %bb.0:
|
||||
; AVX512-NEXT: vpsllw $7, %zmm1, %zmm1 # encoding: [0x62,0xf1,0x75,0x48,0x71,0xf1,0x07]
|
||||
; AVX512-NEXT: vpmovb2m %zmm1, %k0 # encoding: [0x62,0xf2,0x7e,0x48,0x29,0xc1]
|
||||
; AVX512-NEXT: vpsllw $7, %zmm0, %zmm0 # encoding: [0x62,0xf1,0x7d,0x48,0x71,0xf0,0x07]
|
||||
; AVX512-NEXT: vpxor %xmm1, %xmm1, %xmm1 # encoding: [0xc5,0xf1,0xef,0xc9]
|
||||
; AVX512-NEXT: kmovq (%rdi), %k1 # EVEX TO VEX Compression encoding: [0xc4,0xe1,0xf8,0x90,0x0f]
|
||||
; AVX512-NEXT: vpcmpgtb %zmm0, %zmm1, %k2 {%k1} # encoding: [0x62,0xf1,0x75,0x49,0x64,0xd0]
|
||||
; AVX512-NEXT: kandnq %k0, %k1, %k0 # encoding: [0xc4,0xe1,0xf4,0x42,0xc0]
|
||||
; AVX512-NEXT: korq %k0, %k2, %k0 # encoding: [0xc4,0xe1,0xec,0x45,0xc0]
|
||||
; AVX512-NEXT: vpmovm2b %k0, %zmm0 # encoding: [0x62,0xf2,0x7e,0x48,0x28,0xc0]
|
||||
; AVX512-NEXT: retq # encoding: [0xc3]
|
||||
%mask = load i64, ptr %p
|
||||
%vmask = bitcast i64 %mask to <64 x i1>
|
||||
%res = select <64 x i1> %vmask, <64 x i1> %a, <64 x i1> %b
|
||||
ret <64 x i1> %res
|
||||
}
|
90
llvm/test/CodeGen/X86/apx/kmov-postrapseudos.ll
Normal file
90
llvm/test/CodeGen/X86/apx/kmov-postrapseudos.ll
Normal file
@ -0,0 +1,90 @@
|
||||
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
|
||||
; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+avx512f,+egpr -show-mc-encoding | FileCheck --check-prefix=AVX512 %s
|
||||
; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+avx512f,+avx512bw,+egpr -show-mc-encoding | FileCheck --check-prefix=AVX512BW %s
|
||||
|
||||
define void @kmovkr_1(i1 %cmp23.not) {
|
||||
; AVX512-LABEL: kmovkr_1:
|
||||
; AVX512: # %bb.0: # %entry
|
||||
; AVX512-NEXT: kmovw %edi, %k1 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x92,0xcf]
|
||||
; AVX512-NEXT: vmovsd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0xff,0x89,0x10,0x05,A,A,A,A]
|
||||
; AVX512-NEXT: # fixup A - offset: 6, value: {{\.?LCPI[0-9]+_[0-9]+}}-4, kind: reloc_riprel_4byte
|
||||
; AVX512-NEXT: vmovsd %xmm0, 0 # EVEX TO VEX Compression encoding: [0xc5,0xfb,0x11,0x04,0x25,0x00,0x00,0x00,0x00]
|
||||
; AVX512-NEXT: retq # encoding: [0xc3]
|
||||
;
|
||||
; AVX512BW-LABEL: kmovkr_1:
|
||||
; AVX512BW: # %bb.0: # %entry
|
||||
; AVX512BW-NEXT: kmovd %edi, %k1 # EVEX TO VEX Compression encoding: [0xc5,0xfb,0x92,0xcf]
|
||||
; AVX512BW-NEXT: vmovsd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0xff,0x89,0x10,0x05,A,A,A,A]
|
||||
; AVX512BW-NEXT: # fixup A - offset: 6, value: {{\.?LCPI[0-9]+_[0-9]+}}-4, kind: reloc_riprel_4byte
|
||||
; AVX512BW-NEXT: vmovsd %xmm0, 0 # EVEX TO VEX Compression encoding: [0xc5,0xfb,0x11,0x04,0x25,0x00,0x00,0x00,0x00]
|
||||
; AVX512BW-NEXT: retq # encoding: [0xc3]
|
||||
entry:
|
||||
%0 = select i1 %cmp23.not, double 1.000000e+00, double 0.000000e+00
|
||||
store double %0, ptr null, align 8
|
||||
ret void
|
||||
}
|
||||
|
||||
define void @kmovkr_2() {
|
||||
; AVX512-LABEL: kmovkr_2:
|
||||
; AVX512: # %bb.0: # %alloca_21
|
||||
; AVX512-NEXT: vxorps %xmm0, %xmm0, %xmm0 # encoding: [0xc5,0xf8,0x57,0xc0]
|
||||
; AVX512-NEXT: movw $3, %ax # encoding: [0x66,0xb8,0x03,0x00]
|
||||
; AVX512-NEXT: kmovw %eax, %k1 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x92,0xc8]
|
||||
; AVX512-NEXT: vmovups %zmm0, 0 {%k1} # encoding: [0x62,0xf1,0x7c,0x49,0x11,0x04,0x25,0x00,0x00,0x00,0x00]
|
||||
; AVX512-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77]
|
||||
; AVX512-NEXT: retq # encoding: [0xc3]
|
||||
;
|
||||
; AVX512BW-LABEL: kmovkr_2:
|
||||
; AVX512BW: # %bb.0: # %alloca_21
|
||||
; AVX512BW-NEXT: vxorps %xmm0, %xmm0, %xmm0 # encoding: [0xc5,0xf8,0x57,0xc0]
|
||||
; AVX512BW-NEXT: movw $3, %ax # encoding: [0x66,0xb8,0x03,0x00]
|
||||
; AVX512BW-NEXT: kmovd %eax, %k1 # EVEX TO VEX Compression encoding: [0xc5,0xfb,0x92,0xc8]
|
||||
; AVX512BW-NEXT: vmovups %zmm0, 0 {%k1} # encoding: [0x62,0xf1,0x7c,0x49,0x11,0x04,0x25,0x00,0x00,0x00,0x00]
|
||||
; AVX512BW-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77]
|
||||
; AVX512BW-NEXT: retq # encoding: [0xc3]
|
||||
alloca_21:
|
||||
call void @llvm.masked.store.v4f32.p0(<4 x float> zeroinitializer, ptr null, i32 1, <4 x i1> <i1 true, i1 true, i1 false, i1 false>)
|
||||
ret void
|
||||
}
|
||||
|
||||
define i32 @kmovrk_1(<4 x ptr> %arg) {
|
||||
; AVX512-LABEL: kmovrk_1:
|
||||
; AVX512: # %bb.0: # %bb
|
||||
; AVX512-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
|
||||
; AVX512-NEXT: vptestmq %zmm0, %zmm0, %k0 # encoding: [0x62,0xf2,0xfd,0x48,0x27,0xc0]
|
||||
; AVX512-NEXT: kmovw %k0, %eax # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x93,0xc0]
|
||||
; AVX512-NEXT: testb $15, %al # encoding: [0xa8,0x0f]
|
||||
; AVX512-NEXT: jne .LBB2_1 # encoding: [0x75,A]
|
||||
; AVX512-NEXT: # fixup A - offset: 1, value: .LBB2_1-1, kind: FK_PCRel_1
|
||||
; AVX512-NEXT: # %bb.2: # %bb3
|
||||
; AVX512-NEXT: xorl %eax, %eax # encoding: [0x31,0xc0]
|
||||
; AVX512-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77]
|
||||
; AVX512-NEXT: retq # encoding: [0xc3]
|
||||
; AVX512-NEXT: .LBB2_1: # %bb2
|
||||
;
|
||||
; AVX512BW-LABEL: kmovrk_1:
|
||||
; AVX512BW: # %bb.0: # %bb
|
||||
; AVX512BW-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
|
||||
; AVX512BW-NEXT: vptestmq %zmm0, %zmm0, %k0 # encoding: [0x62,0xf2,0xfd,0x48,0x27,0xc0]
|
||||
; AVX512BW-NEXT: kmovd %k0, %eax # EVEX TO VEX Compression encoding: [0xc5,0xfb,0x93,0xc0]
|
||||
; AVX512BW-NEXT: testb $15, %al # encoding: [0xa8,0x0f]
|
||||
; AVX512BW-NEXT: jne .LBB2_1 # encoding: [0x75,A]
|
||||
; AVX512BW-NEXT: # fixup A - offset: 1, value: .LBB2_1-1, kind: FK_PCRel_1
|
||||
; AVX512BW-NEXT: # %bb.2: # %bb3
|
||||
; AVX512BW-NEXT: xorl %eax, %eax # encoding: [0x31,0xc0]
|
||||
; AVX512BW-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77]
|
||||
; AVX512BW-NEXT: retq # encoding: [0xc3]
|
||||
; AVX512BW-NEXT: .LBB2_1: # %bb2
|
||||
bb:
|
||||
%icmp = icmp ne <4 x ptr> %arg, zeroinitializer
|
||||
%freeze = freeze <4 x i1> %icmp
|
||||
%bitcast = bitcast <4 x i1> %freeze to i4
|
||||
%icmp1 = icmp ne i4 %bitcast, 0
|
||||
br i1 %icmp1, label %bb2, label %bb3
|
||||
bb2:
|
||||
unreachable
|
||||
bb3:
|
||||
ret i32 0
|
||||
}
|
||||
|
||||
declare void @llvm.masked.store.v4f32.p0(<4 x float>, ptr nocapture, i32 immarg, <4 x i1>)
|
82
llvm/test/MC/Disassembler/X86/apx/kmov.txt
Normal file
82
llvm/test/MC/Disassembler/X86/apx/kmov.txt
Normal file
@ -0,0 +1,82 @@
|
||||
# RUN: llvm-mc -triple x86_64 -disassemble %s | FileCheck %s --check-prefix=ATT
|
||||
# RUN: llvm-mc -triple x86_64 -disassemble -output-asm-variant=1 %s | FileCheck %s --check-prefix=INTEL
|
||||
|
||||
# ATT: kmovb %r16d, %k1
|
||||
# INTEL: kmovb k1, r16d
|
||||
0x62,0xf9,0x7d,0x08,0x92,0xc8
|
||||
|
||||
# ATT: kmovw %r16d, %k1
|
||||
# INTEL: kmovw k1, r16d
|
||||
0x62,0xf9,0x7c,0x08,0x92,0xc8
|
||||
|
||||
# ATT: kmovd %r16d, %k1
|
||||
# INTEL: kmovd k1, r16d
|
||||
0x62,0xf9,0x7f,0x08,0x92,0xc8
|
||||
|
||||
# ATT: kmovq %r16, %k1
|
||||
# INTEL: kmovq k1, r16
|
||||
0x62,0xf9,0xff,0x08,0x92,0xc8
|
||||
|
||||
# ATT: kmovb %k1, %r16d
|
||||
# INTEL: kmovb r16d, k1
|
||||
0x62,0xe1,0x7d,0x08,0x93,0xc1
|
||||
|
||||
# ATT: kmovw %k1, %r16d
|
||||
# INTEL: kmovw r16d, k1
|
||||
0x62,0xe1,0x7c,0x08,0x93,0xc1
|
||||
|
||||
# ATT: kmovd %k1, %r16d
|
||||
# INTEL: kmovd r16d, k1
|
||||
0x62,0xe1,0x7f,0x08,0x93,0xc1
|
||||
|
||||
# ATT: kmovq %k1, %r16
|
||||
# INTEL: kmovq r16, k1
|
||||
0x62,0xe1,0xff,0x08,0x93,0xc1
|
||||
|
||||
# ATT: kmovb (%r16,%r17), %k1
|
||||
# INTEL: kmovb k1, byte ptr [r16 + r17]
|
||||
0x62,0xf9,0x79,0x08,0x90,0x0c,0x08
|
||||
|
||||
# ATT: kmovw (%r16,%r17), %k1
|
||||
# INTEL: kmovw k1, word ptr [r16 + r17]
|
||||
0x62,0xf9,0x78,0x08,0x90,0x0c,0x08
|
||||
|
||||
# ATT: kmovd (%r16,%r17), %k1
|
||||
# INTEL: kmovd k1, dword ptr [r16 + r17]
|
||||
0x62,0xf9,0xf9,0x08,0x90,0x0c,0x08
|
||||
|
||||
# ATT: kmovq (%r16,%r17), %k1
|
||||
# INTEL: kmovq k1, qword ptr [r16 + r17]
|
||||
0x62,0xf9,0xf8,0x08,0x90,0x0c,0x08
|
||||
|
||||
# ATT: kmovb %k1, (%r16,%r17)
|
||||
# INTEL: kmovb byte ptr [r16 + r17], k1
|
||||
0x62,0xf9,0x79,0x08,0x91,0x0c,0x08
|
||||
|
||||
# ATT: kmovw %k1, (%r16,%r17)
|
||||
# INTEL: kmovw word ptr [r16 + r17], k1
|
||||
0x62,0xf9,0x78,0x08,0x91,0x0c,0x08
|
||||
|
||||
# ATT: kmovd %k1, (%r16,%r17)
|
||||
# INTEL: kmovd dword ptr [r16 + r17], k1
|
||||
0x62,0xf9,0xf9,0x08,0x91,0x0c,0x08
|
||||
|
||||
# ATT: kmovq %k1, (%r16,%r17)
|
||||
# INTEL: kmovq qword ptr [r16 + r17], k1
|
||||
0x62,0xf9,0xf8,0x08,0x91,0x0c,0x08
|
||||
|
||||
# ATT: {evex} kmovb %k1, %k2
|
||||
# INTEL: {evex} kmovb k2, k1
|
||||
0x62,0xf1,0x7d,0x08,0x90,0xd1
|
||||
|
||||
# ATT: {evex} kmovw %k1, %k2
|
||||
# INTEL: {evex} kmovw k2, k1
|
||||
0x62,0xf1,0x7c,0x08,0x90,0xd1
|
||||
|
||||
# ATT: {evex} kmovd %k1, %k2
|
||||
# INTEL: {evex} kmovd k2, k1
|
||||
0x62,0xf1,0xfd,0x08,0x90,0xd1
|
||||
|
||||
# ATT: {evex} kmovq %k1, %k2
|
||||
# INTEL: {evex} kmovq k2, k1
|
||||
0x62,0xf1,0xfc,0x08,0x90,0xd1
|
69
llvm/test/MC/X86/apx/kmov-att.s
Normal file
69
llvm/test/MC/X86/apx/kmov-att.s
Normal file
@ -0,0 +1,69 @@
|
||||
# RUN: llvm-mc -triple x86_64 -show-encoding %s | FileCheck %s
|
||||
# RUN: not llvm-mc -triple i386 -show-encoding %s 2>&1 | FileCheck %s --check-prefix=ERROR
|
||||
|
||||
# ERROR-COUNT-20: error:
|
||||
# ERROR-NOT: error:
|
||||
# CHECK: kmovb %r16d, %k1
|
||||
# CHECK: encoding: [0x62,0xf9,0x7d,0x08,0x92,0xc8]
|
||||
kmovb %r16d, %k1
|
||||
# CHECK: kmovw %r16d, %k1
|
||||
# CHECK: encoding: [0x62,0xf9,0x7c,0x08,0x92,0xc8]
|
||||
kmovw %r16d, %k1
|
||||
# CHECK: kmovd %r16d, %k1
|
||||
# CHECK: encoding: [0x62,0xf9,0x7f,0x08,0x92,0xc8]
|
||||
kmovd %r16d, %k1
|
||||
# CHECK: kmovq %r16, %k1
|
||||
# CHECK: encoding: [0x62,0xf9,0xff,0x08,0x92,0xc8]
|
||||
kmovq %r16, %k1
|
||||
|
||||
# CHECK: kmovb %k1, %r16d
|
||||
# CHECK: encoding: [0x62,0xe1,0x7d,0x08,0x93,0xc1]
|
||||
kmovb %k1, %r16d
|
||||
# CHECK: kmovw %k1, %r16d
|
||||
# CHECK: encoding: [0x62,0xe1,0x7c,0x08,0x93,0xc1]
|
||||
kmovw %k1, %r16d
|
||||
# CHECK: kmovd %k1, %r16d
|
||||
# CHECK: encoding: [0x62,0xe1,0x7f,0x08,0x93,0xc1]
|
||||
kmovd %k1, %r16d
|
||||
# CHECK: kmovq %k1, %r16
|
||||
# CHECK: encoding: [0x62,0xe1,0xff,0x08,0x93,0xc1]
|
||||
kmovq %k1, %r16
|
||||
|
||||
# CHECK: kmovb (%r16,%r17), %k1
|
||||
# CHECK: encoding: [0x62,0xf9,0x79,0x08,0x90,0x0c,0x08]
|
||||
kmovb (%r16,%r17), %k1
|
||||
# CHECK: kmovw (%r16,%r17), %k1
|
||||
# CHECK: encoding: [0x62,0xf9,0x78,0x08,0x90,0x0c,0x08]
|
||||
kmovw (%r16,%r17), %k1
|
||||
# CHECK: kmovd (%r16,%r17), %k1
|
||||
# CHECK: encoding: [0x62,0xf9,0xf9,0x08,0x90,0x0c,0x08]
|
||||
kmovd (%r16,%r17), %k1
|
||||
# CHECK: kmovq (%r16,%r17), %k1
|
||||
# CHECK: encoding: [0x62,0xf9,0xf8,0x08,0x90,0x0c,0x08]
|
||||
kmovq (%r16,%r17), %k1
|
||||
|
||||
# CHECK: kmovb %k1, (%r16,%r17)
|
||||
# CHECK: encoding: [0x62,0xf9,0x79,0x08,0x91,0x0c,0x08]
|
||||
kmovb %k1, (%r16,%r17)
|
||||
# CHECK: kmovw %k1, (%r16,%r17)
|
||||
# CHECK: encoding: [0x62,0xf9,0x78,0x08,0x91,0x0c,0x08]
|
||||
kmovw %k1, (%r16,%r17)
|
||||
# CHECK: kmovd %k1, (%r16,%r17)
|
||||
# CHECK: encoding: [0x62,0xf9,0xf9,0x08,0x91,0x0c,0x08]
|
||||
kmovd %k1, (%r16,%r17)
|
||||
# CHECK: kmovq %k1, (%r16,%r17)
|
||||
# CHECK: encoding: [0x62,0xf9,0xf8,0x08,0x91,0x0c,0x08]
|
||||
kmovq %k1, (%r16,%r17)
|
||||
|
||||
# CHECK: {evex} kmovb %k1, %k2
|
||||
# CHECK: encoding: [0x62,0xf1,0x7d,0x08,0x90,0xd1]
|
||||
{evex} kmovb %k1, %k2
|
||||
# CHECK: {evex} kmovw %k1, %k2
|
||||
# CHECK: encoding: [0x62,0xf1,0x7c,0x08,0x90,0xd1]
|
||||
{evex} kmovw %k1, %k2
|
||||
# CHECK: {evex} kmovd %k1, %k2
|
||||
# CHECK: encoding: [0x62,0xf1,0xfd,0x08,0x90,0xd1]
|
||||
{evex} kmovd %k1, %k2
|
||||
# CHECK: {evex} kmovq %k1, %k2
|
||||
# CHECK: encoding: [0x62,0xf1,0xfc,0x08,0x90,0xd1]
|
||||
{evex} kmovq %k1, %k2
|
66
llvm/test/MC/X86/apx/kmov-intel.s
Normal file
66
llvm/test/MC/X86/apx/kmov-intel.s
Normal file
@ -0,0 +1,66 @@
|
||||
# RUN: llvm-mc -triple x86_64 -show-encoding -x86-asm-syntax=intel -output-asm-variant=1 %s | FileCheck %s
|
||||
|
||||
# CHECK: kmovb k1, r16d
|
||||
# CHECK: encoding: [0x62,0xf9,0x7d,0x08,0x92,0xc8]
|
||||
kmovb k1, r16d
|
||||
# CHECK: kmovw k1, r16d
|
||||
# CHECK: encoding: [0x62,0xf9,0x7c,0x08,0x92,0xc8]
|
||||
kmovw k1, r16d
|
||||
# CHECK: kmovd k1, r16d
|
||||
# CHECK: encoding: [0x62,0xf9,0x7f,0x08,0x92,0xc8]
|
||||
kmovd k1, r16d
|
||||
# CHECK: kmovq k1, r16
|
||||
# CHECK: encoding: [0x62,0xf9,0xff,0x08,0x92,0xc8]
|
||||
kmovq k1, r16
|
||||
|
||||
# CHECK: kmovb r16d, k1
|
||||
# CHECK: encoding: [0x62,0xe1,0x7d,0x08,0x93,0xc1]
|
||||
kmovb r16d, k1
|
||||
# CHECK: kmovw r16d, k1
|
||||
# CHECK: encoding: [0x62,0xe1,0x7c,0x08,0x93,0xc1]
|
||||
kmovw r16d, k1
|
||||
# CHECK: kmovd r16d, k1
|
||||
# CHECK: encoding: [0x62,0xe1,0x7f,0x08,0x93,0xc1]
|
||||
kmovd r16d, k1
|
||||
# CHECK: kmovq r16, k1
|
||||
# CHECK: encoding: [0x62,0xe1,0xff,0x08,0x93,0xc1]
|
||||
kmovq r16, k1
|
||||
|
||||
# CHECK: kmovb k1, byte ptr [r16 + r17]
|
||||
# CHECK: encoding: [0x62,0xf9,0x79,0x08,0x90,0x0c,0x08]
|
||||
kmovb k1, byte ptr [r16 + r17]
|
||||
# CHECK: kmovw k1, word ptr [r16 + r17]
|
||||
# CHECK: encoding: [0x62,0xf9,0x78,0x08,0x90,0x0c,0x08]
|
||||
kmovw k1, word ptr [r16 + r17]
|
||||
# CHECK: kmovd k1, dword ptr [r16 + r17]
|
||||
# CHECK: encoding: [0x62,0xf9,0xf9,0x08,0x90,0x0c,0x08]
|
||||
kmovd k1, dword ptr [r16 + r17]
|
||||
# CHECK: kmovq k1, qword ptr [r16 + r17]
|
||||
# CHECK: encoding: [0x62,0xf9,0xf8,0x08,0x90,0x0c,0x08]
|
||||
kmovq k1, qword ptr [r16 + r17]
|
||||
|
||||
# CHECK: kmovb byte ptr [r16 + r17], k1
|
||||
# CHECK: encoding: [0x62,0xf9,0x79,0x08,0x91,0x0c,0x08]
|
||||
kmovb byte ptr [r16 + r17], k1
|
||||
# CHECK: kmovw word ptr [r16 + r17], k1
|
||||
# CHECK: encoding: [0x62,0xf9,0x78,0x08,0x91,0x0c,0x08]
|
||||
kmovw word ptr [r16 + r17], k1
|
||||
# CHECK: kmovd dword ptr [r16 + r17], k1
|
||||
# CHECK: encoding: [0x62,0xf9,0xf9,0x08,0x91,0x0c,0x08]
|
||||
kmovd dword ptr [r16 + r17], k1
|
||||
# CHECK: kmovq qword ptr [r16 + r17], k1
|
||||
# CHECK: encoding: [0x62,0xf9,0xf8,0x08,0x91,0x0c,0x08]
|
||||
kmovq qword ptr [r16 + r17], k1
|
||||
|
||||
# CHECK: {evex} kmovb k2, k1
|
||||
# CHECK: encoding: [0x62,0xf1,0x7d,0x08,0x90,0xd1]
|
||||
{evex} kmovb k2, k1
|
||||
# CHECK: {evex} kmovw k2, k1
|
||||
# CHECK: encoding: [0x62,0xf1,0x7c,0x08,0x90,0xd1]
|
||||
{evex} kmovw k2, k1
|
||||
# CHECK: {evex} kmovd k2, k1
|
||||
# CHECK: encoding: [0x62,0xf1,0xfd,0x08,0x90,0xd1]
|
||||
{evex} kmovd k2, k1
|
||||
# CHECK: {evex} kmovq k2, k1
|
||||
# CHECK: encoding: [0x62,0xf1,0xfc,0x08,0x90,0xd1]
|
||||
{evex} kmovq k2, k1
|
@ -487,9 +487,13 @@ static const X86FoldTableEntry Table1[] = {
|
||||
{X86::IMUL64rri32, X86::IMUL64rmi32, 0},
|
||||
{X86::IMUL64rri8, X86::IMUL64rmi8, 0},
|
||||
{X86::KMOVBkk, X86::KMOVBkm, TB_NO_REVERSE},
|
||||
{X86::KMOVBkk_EVEX, X86::KMOVBkm_EVEX, TB_NO_REVERSE},
|
||||
{X86::KMOVDkk, X86::KMOVDkm, 0},
|
||||
{X86::KMOVDkk_EVEX, X86::KMOVDkm_EVEX, 0},
|
||||
{X86::KMOVQkk, X86::KMOVQkm, 0},
|
||||
{X86::KMOVQkk_EVEX, X86::KMOVQkm_EVEX, 0},
|
||||
{X86::KMOVWkk, X86::KMOVWkm, 0},
|
||||
{X86::KMOVWkk_EVEX, X86::KMOVWkm_EVEX, 0},
|
||||
{X86::LWPINS32rri, X86::LWPINS32rmi, 0},
|
||||
{X86::LWPINS64rri, X86::LWPINS64rmi, 0},
|
||||
{X86::LWPVAL32rri, X86::LWPVAL32rmi, 0},
|
||||
|
Loading…
x
Reference in New Issue
Block a user