llvm-project/llvm/lib/Target/X86/X86SchedSkylakeServer.td
Aiden Grossman 069f8121e0
[X86] Add RCU for Skylake Models (#153832)
We cannot actually retire an infinite number of uops per cycle. This
patch adds a RCU to the skylake scheduling model to fix this. I'm
purposefully using a loose upper bound here. We're unlikely to actually
get four fused uops per cycle, but this is better than not setting
anything. Most realistic code I've put through uiCA will retire up to ~6
uops per cycle.

Information taken from
https://en.wikichip.org/wiki/intel/microarchitectures/skylake_(client).

This requires modification of the two zero idiom tests because we do not
currently model the CPU frontend which would likely be the actual
bottleneck in that case.

Related to #153747.
2025-08-15 10:33:26 -07:00

2567 lines
112 KiB
TableGen

//=- X86SchedSkylake.td - X86 Skylake Server Scheduling ------*- tablegen -*-=//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
// This file defines the machine model for Skylake Server to support
// instruction scheduling and other instruction cost heuristics.
//
//===----------------------------------------------------------------------===//
def SkylakeServerModel : SchedMachineModel {
// All x86 instructions are modeled as a single micro-op, and SKylake can
// decode 6 instructions per cycle.
let IssueWidth = 6;
let MicroOpBufferSize = 224; // Based on the reorder buffer.
let LoadLatency = 5;
let MispredictPenalty = 14;
// Based on the LSD (loop-stream detector) queue size and benchmarking data.
let LoopMicroOpBufferSize = 50;
// This flag is set to allow the scheduler to assign a default model to
// unrecognized opcodes.
let CompleteModel = 0;
}
let SchedModel = SkylakeServerModel in {
// Skylake Server can issue micro-ops to 8 different ports in one cycle.
// Ports 0, 1, 5, and 6 handle all computation.
// Port 4 gets the data half of stores. Store data can be available later than
// the store address, but since we don't model the latency of stores, we can
// ignore that.
// Ports 2 and 3 are identical. They handle loads and the address half of
// stores. Port 7 can handle address calculations.
def SKXPort0 : ProcResource<1>;
def SKXPort1 : ProcResource<1>;
def SKXPort2 : ProcResource<1>;
def SKXPort3 : ProcResource<1>;
def SKXPort4 : ProcResource<1>;
def SKXPort5 : ProcResource<1>;
def SKXPort6 : ProcResource<1>;
def SKXPort7 : ProcResource<1>;
// Many micro-ops are capable of issuing on multiple ports.
def SKXPort01 : ProcResGroup<[SKXPort0, SKXPort1]>;
def SKXPort23 : ProcResGroup<[SKXPort2, SKXPort3]>;
def SKXPort237 : ProcResGroup<[SKXPort2, SKXPort3, SKXPort7]>;
def SKXPort04 : ProcResGroup<[SKXPort0, SKXPort4]>;
def SKXPort05 : ProcResGroup<[SKXPort0, SKXPort5]>;
def SKXPort06 : ProcResGroup<[SKXPort0, SKXPort6]>;
def SKXPort15 : ProcResGroup<[SKXPort1, SKXPort5]>;
def SKXPort16 : ProcResGroup<[SKXPort1, SKXPort6]>;
def SKXPort56 : ProcResGroup<[SKXPort5, SKXPort6]>;
def SKXPort015 : ProcResGroup<[SKXPort0, SKXPort1, SKXPort5]>;
def SKXPort056 : ProcResGroup<[SKXPort0, SKXPort5, SKXPort6]>;
def SKXPort0156: ProcResGroup<[SKXPort0, SKXPort1, SKXPort5, SKXPort6]>;
def SKXDivider : ProcResource<1>; // Integer division issued on port 0.
// FP division and sqrt on port 0.
def SKXFPDivider : ProcResource<1>;
// 60 Entry Unified Scheduler
def SKXPortAny : ProcResGroup<[SKXPort0, SKXPort1, SKXPort2, SKXPort3, SKXPort4,
SKXPort5, SKXPort6, SKXPort7]> {
let BufferSize=60;
}
// Skylake can retire up to four (potentially fused) uops per cycle. Set the
// limit to twice that given we do not model fused uops as only taking up one
// retirement slot. I could not find any documented sources on how many
// in-flight micro-ops can be tracked.
def SKXRCU : RetireControlUnit<0, 8>;
// Integer loads are 5 cycles, so ReadAfterLd registers needn't be available until 5
// cycles after the memory operand.
def : ReadAdvance<ReadAfterLd, 5>;
// Vector loads are 5/6/7 cycles, so ReadAfterVec*Ld registers needn't be available
// until 5/6/7 cycles after the memory operand.
def : ReadAdvance<ReadAfterVecLd, 5>;
def : ReadAdvance<ReadAfterVecXLd, 6>;
def : ReadAdvance<ReadAfterVecYLd, 7>;
def : ReadAdvance<ReadInt2Fpu, 0>;
// Many SchedWrites are defined in pairs with and without a folded load.
// Instructions with folded loads are usually micro-fused, so they only appear
// as two micro-ops when queued in the reservation station.
// This multiclass defines the resource usage for variants with and without
// folded loads.
multiclass SKXWriteResPair<X86FoldableSchedWrite SchedRW,
list<ProcResourceKind> ExePorts,
int Lat, list<int> Res = [1], int UOps = 1,
int LoadLat = 5, int LoadUOps = 1> {
// Register variant is using a single cycle on ExePort.
def : WriteRes<SchedRW, ExePorts> {
let Latency = Lat;
let ReleaseAtCycles = Res;
let NumMicroOps = UOps;
}
// Memory variant also uses a cycle on port 2/3 and adds LoadLat cycles to
// the latency (default = 5).
def : WriteRes<SchedRW.Folded, !listconcat([SKXPort23], ExePorts)> {
let Latency = !add(Lat, LoadLat);
let ReleaseAtCycles = !listconcat([1], Res);
let NumMicroOps = !add(UOps, LoadUOps);
}
}
// A folded store needs a cycle on port 4 for the store data, and an extra port
// 2/3/7 cycle to recompute the address.
def : WriteRes<WriteRMW, [SKXPort237,SKXPort4]>;
// Arithmetic.
defm : SKXWriteResPair<WriteALU, [SKXPort0156], 1>; // Simple integer ALU op.
defm : SKXWriteResPair<WriteADC, [SKXPort06], 1>; // Integer ALU + flags op.
// Integer multiplication.
defm : SKXWriteResPair<WriteIMul8, [SKXPort1], 3>;
defm : SKXWriteResPair<WriteIMul16, [SKXPort1,SKXPort06,SKXPort0156], 4, [1,1,2], 4>;
defm : X86WriteRes<WriteIMul16Imm, [SKXPort1,SKXPort0156], 4, [1,1], 2>;
defm : X86WriteRes<WriteIMul16ImmLd, [SKXPort1,SKXPort0156,SKXPort23], 8, [1,1,1], 3>;
defm : X86WriteRes<WriteIMul16Reg, [SKXPort1], 3, [1], 1>;
defm : X86WriteRes<WriteIMul16RegLd, [SKXPort1,SKXPort0156,SKXPort23], 8, [1,1,1], 3>;
defm : SKXWriteResPair<WriteIMul32, [SKXPort1,SKXPort06,SKXPort0156], 4, [1,1,1], 3>;
defm : SKXWriteResPair<WriteMULX32, [SKXPort1,SKXPort06,SKXPort0156], 3, [1,1,1], 3>;
defm : SKXWriteResPair<WriteIMul32Imm, [SKXPort1], 3>;
defm : SKXWriteResPair<WriteIMul32Reg, [SKXPort1], 3>;
defm : SKXWriteResPair<WriteIMul64, [SKXPort1,SKXPort5], 4, [1,1], 2>;
defm : SKXWriteResPair<WriteMULX64, [SKXPort1,SKXPort5], 3, [1,1], 2>;
defm : SKXWriteResPair<WriteIMul64Imm, [SKXPort1], 3>;
defm : SKXWriteResPair<WriteIMul64Reg, [SKXPort1], 3>;
def SKXWriteIMulH : WriteRes<WriteIMulH, []> { let Latency = 4; }
def : WriteRes<WriteIMulHLd, []> {
let Latency = !add(SKXWriteIMulH.Latency, SkylakeServerModel.LoadLatency);
}
defm : X86WriteRes<WriteBSWAP32, [SKXPort15], 1, [1], 1>;
defm : X86WriteRes<WriteBSWAP64, [SKXPort06, SKXPort15], 2, [1,1], 2>;
defm : X86WriteRes<WriteCMPXCHG,[SKXPort06, SKXPort0156], 5, [2,3], 5>;
defm : X86WriteRes<WriteCMPXCHGRMW,[SKXPort23,SKXPort06,SKXPort0156,SKXPort237,SKXPort4], 8, [1,2,1,1,1], 6>;
defm : X86WriteRes<WriteXCHG, [SKXPort0156], 2, [3], 3>;
// TODO: Why isn't the SKXDivider used?
defm : SKXWriteResPair<WriteDiv8, [SKXPort0, SKXDivider], 25, [1,10], 1, 4>;
defm : X86WriteRes<WriteDiv16, [SKXPort0,SKXPort1,SKXPort5,SKXPort6,SKXPort05,SKXPort0156], 76, [7,2,8,3,1,11], 32>;
defm : X86WriteRes<WriteDiv32, [SKXPort0,SKXPort1,SKXPort5,SKXPort6,SKXPort05,SKXPort0156], 76, [7,2,8,3,1,11], 32>;
defm : X86WriteRes<WriteDiv64, [SKXPort0,SKXPort1,SKXPort5,SKXPort6,SKXPort05,SKXPort0156], 76, [7,2,8,3,1,11], 32>;
defm : X86WriteRes<WriteDiv16Ld, [SKXPort0,SKXPort23,SKXDivider], 29, [1,1,10], 2>;
defm : X86WriteRes<WriteDiv32Ld, [SKXPort0,SKXPort23,SKXDivider], 29, [1,1,10], 2>;
defm : X86WriteRes<WriteDiv64Ld, [SKXPort0,SKXPort23,SKXDivider], 29, [1,1,10], 2>;
defm : X86WriteRes<WriteIDiv8, [SKXPort0, SKXDivider], 25, [1,10], 1>;
defm : X86WriteRes<WriteIDiv16, [SKXPort0,SKXPort1,SKXPort5,SKXPort6,SKXPort06,SKXPort0156], 102, [4,2,4,8,14,34], 66>;
defm : X86WriteRes<WriteIDiv32, [SKXPort0,SKXPort1,SKXPort5,SKXPort6,SKXPort06,SKXPort0156], 102, [4,2,4,8,14,34], 66>;
defm : X86WriteRes<WriteIDiv64, [SKXPort0,SKXPort1,SKXPort5,SKXPort6,SKXPort06,SKXPort0156], 102, [4,2,4,8,14,34], 66>;
defm : X86WriteRes<WriteIDiv8Ld, [SKXPort0,SKXPort5,SKXPort23,SKXPort0156], 28, [2,4,1,1], 8>;
defm : X86WriteRes<WriteIDiv16Ld, [SKXPort0,SKXPort5,SKXPort23,SKXPort0156], 28, [2,4,1,1], 8>;
defm : X86WriteRes<WriteIDiv32Ld, [SKXPort0,SKXPort5,SKXPort23,SKXPort0156], 28, [2,4,1,1], 8>;
defm : X86WriteRes<WriteIDiv64Ld, [SKXPort0,SKXPort5,SKXPort23,SKXPort0156], 28, [2,4,1,1], 8>;
defm : SKXWriteResPair<WriteCRC32, [SKXPort1], 3>;
def : WriteRes<WriteLEA, [SKXPort15]>; // LEA instructions can't fold loads.
defm : SKXWriteResPair<WriteCMOV, [SKXPort06], 1, [1], 1>; // Conditional move.
defm : X86WriteRes<WriteFCMOV, [SKXPort1], 3, [1], 1>; // x87 conditional move.
def : WriteRes<WriteSETCC, [SKXPort06]>; // Setcc.
def : WriteRes<WriteSETCCStore, [SKXPort06,SKXPort4,SKXPort237]> {
let Latency = 2;
let NumMicroOps = 3;
}
defm : X86WriteRes<WriteLAHFSAHF, [SKXPort06], 1, [1], 1>;
defm : X86WriteRes<WriteBitTest, [SKXPort06], 1, [1], 1>;
defm : X86WriteRes<WriteBitTestImmLd, [SKXPort06,SKXPort23], 6, [1,1], 2>;
defm : X86WriteRes<WriteBitTestRegLd, [SKXPort0156,SKXPort23], 6, [1,1], 2>;
defm : X86WriteRes<WriteBitTestSet, [SKXPort06], 1, [1], 1>;
defm : X86WriteRes<WriteBitTestSetImmLd, [SKXPort06,SKXPort23], 5, [1,1], 3>;
defm : X86WriteRes<WriteBitTestSetRegLd, [SKXPort0156,SKXPort23], 5, [1,1], 2>;
// Integer shifts and rotates.
defm : SKXWriteResPair<WriteShift, [SKXPort06], 1>;
defm : SKXWriteResPair<WriteShiftCL, [SKXPort06], 3, [3], 3>;
defm : SKXWriteResPair<WriteRotate, [SKXPort06], 1, [1], 1>;
defm : SKXWriteResPair<WriteRotateCL, [SKXPort06], 3, [3], 3>;
// SHLD/SHRD.
defm : X86WriteRes<WriteSHDrri, [SKXPort1], 3, [1], 1>;
defm : X86WriteRes<WriteSHDrrcl,[SKXPort1,SKXPort06,SKXPort0156], 6, [1, 2, 1], 4>;
defm : X86WriteRes<WriteSHDmri, [SKXPort1,SKXPort23,SKXPort237,SKXPort0156], 9, [1, 1, 1, 1], 4>;
defm : X86WriteRes<WriteSHDmrcl,[SKXPort1,SKXPort23,SKXPort237,SKXPort06,SKXPort0156], 11, [1, 1, 1, 2, 1], 6>;
// Bit counts.
defm : SKXWriteResPair<WriteBSF, [SKXPort1], 3>;
defm : SKXWriteResPair<WriteBSR, [SKXPort1], 3>;
defm : SKXWriteResPair<WriteLZCNT, [SKXPort1], 3>;
defm : SKXWriteResPair<WriteTZCNT, [SKXPort1], 3>;
defm : SKXWriteResPair<WritePOPCNT, [SKXPort1], 3>;
// BMI1 BEXTR/BLS, BMI2 BZHI
defm : SKXWriteResPair<WriteBEXTR, [SKXPort06,SKXPort15], 2, [1,1], 2>;
defm : SKXWriteResPair<WriteBLS, [SKXPort15], 1>;
defm : SKXWriteResPair<WriteBZHI, [SKXPort15], 1>;
// Loads, stores, and moves, not folded with other operations.
defm : X86WriteRes<WriteLoad, [SKXPort23], 5, [1], 1>;
defm : X86WriteRes<WriteStore, [SKXPort237, SKXPort4], 1, [1,1], 1>;
defm : X86WriteRes<WriteStoreNT, [SKXPort237, SKXPort4], 1, [1,1], 2>;
defm : X86WriteRes<WriteMove, [SKXPort0156], 1, [1], 1>;
// Model the effect of clobbering the read-write mask operand of the GATHER operation.
// Does not cost anything by itself, only has latency, matching that of the WriteLoad,
defm : X86WriteRes<WriteVecMaskedGatherWriteback, [], 5, [], 0>;
// Idioms that clear a register, like xorps %xmm0, %xmm0.
// These can often bypass execution ports completely.
def : WriteRes<WriteZero, []>;
// Branches don't produce values, so they have no latency, but they still
// consume resources. Indirect branches can fold loads.
defm : SKXWriteResPair<WriteJump, [SKXPort06], 1>;
// Floating point. This covers both scalar and vector operations.
defm : X86WriteRes<WriteFLD0, [SKXPort05], 1, [1], 1>;
defm : X86WriteRes<WriteFLD1, [SKXPort05], 1, [2], 2>;
defm : X86WriteRes<WriteFLDC, [SKXPort05], 1, [2], 2>;
defm : X86WriteRes<WriteFLoad, [SKXPort23], 5, [1], 1>;
defm : X86WriteRes<WriteFLoadX, [SKXPort23], 6, [1], 1>;
defm : X86WriteRes<WriteFLoadY, [SKXPort23], 7, [1], 1>;
defm : X86WriteRes<WriteFMaskedLoad, [SKXPort23,SKXPort015], 7, [1,1], 2>;
defm : X86WriteRes<WriteFMaskedLoadY, [SKXPort23,SKXPort015], 8, [1,1], 2>;
defm : X86WriteRes<WriteFStore, [SKXPort237,SKXPort4], 1, [1,1], 2>;
defm : X86WriteRes<WriteFStoreX, [SKXPort237,SKXPort4], 1, [1,1], 2>;
defm : X86WriteRes<WriteFStoreY, [SKXPort237,SKXPort4], 1, [1,1], 2>;
defm : X86WriteRes<WriteFStoreNT, [SKXPort237,SKXPort4], 1, [1,1], 2>;
defm : X86WriteRes<WriteFStoreNTX, [SKXPort237,SKXPort4], 1, [1,1], 2>;
defm : X86WriteRes<WriteFStoreNTY, [SKXPort237,SKXPort4], 1, [1,1], 2>;
defm : X86WriteRes<WriteFMaskedStore32, [SKXPort23,SKXPort0,SKXPort4], 2, [1,1,1], 3>;
defm : X86WriteRes<WriteFMaskedStore32Y, [SKXPort23,SKXPort0,SKXPort4], 2, [1,1,1], 3>;
defm : X86WriteRes<WriteFMaskedStore64, [SKXPort23,SKXPort0,SKXPort4], 2, [1,1,1], 3>;
defm : X86WriteRes<WriteFMaskedStore64Y, [SKXPort23,SKXPort0,SKXPort4], 2, [1,1,1], 3>;
defm : X86WriteRes<WriteFMove, [SKXPort015], 1, [1], 1>;
defm : X86WriteRes<WriteFMoveX, [SKXPort015], 1, [1], 1>;
defm : X86WriteRes<WriteFMoveY, [SKXPort015], 1, [1], 1>;
defm : X86WriteRes<WriteFMoveZ, [SKXPort05], 1, [1], 1>;
defm : X86WriteRes<WriteEMMS, [SKXPort05,SKXPort0156], 10, [9,1], 10>;
defm : SKXWriteResPair<WriteFAdd, [SKXPort01], 4, [1], 1, 5>; // Floating point add/sub.
defm : SKXWriteResPair<WriteFAddX, [SKXPort01], 4, [1], 1, 6>;
defm : SKXWriteResPair<WriteFAddY, [SKXPort01], 4, [1], 1, 7>;
defm : SKXWriteResPair<WriteFAddZ, [SKXPort05], 4, [1], 1, 7>;
defm : SKXWriteResPair<WriteFAdd64, [SKXPort01], 4, [1], 1, 5>; // Floating point double add/sub.
defm : SKXWriteResPair<WriteFAdd64X, [SKXPort01], 4, [1], 1, 6>;
defm : SKXWriteResPair<WriteFAdd64Y, [SKXPort01], 4, [1], 1, 7>;
defm : SKXWriteResPair<WriteFAdd64Z, [SKXPort05], 4, [1], 1, 7>;
defm : SKXWriteResPair<WriteFCmp, [SKXPort01], 4, [1], 1, 5>; // Floating point compare.
defm : SKXWriteResPair<WriteFCmpX, [SKXPort01], 4, [1], 1, 6>;
defm : SKXWriteResPair<WriteFCmpY, [SKXPort01], 4, [1], 1, 7>;
defm : SKXWriteResPair<WriteFCmpZ, [SKXPort05], 4, [1], 1, 7>;
defm : SKXWriteResPair<WriteFCmp64, [SKXPort01], 4, [1], 1, 5>; // Floating point double compare.
defm : SKXWriteResPair<WriteFCmp64X, [SKXPort01], 4, [1], 1, 6>;
defm : SKXWriteResPair<WriteFCmp64Y, [SKXPort01], 4, [1], 1, 7>;
defm : SKXWriteResPair<WriteFCmp64Z, [SKXPort05], 4, [1], 1, 7>;
defm : SKXWriteResPair<WriteFCom, [SKXPort0], 2>; // Floating point compare to flags (X87).
defm : SKXWriteResPair<WriteFComX, [SKXPort0], 2>; // Floating point compare to flags (SSE).
defm : SKXWriteResPair<WriteFMul, [SKXPort01], 4, [1], 1, 5>; // Floating point multiplication.
defm : SKXWriteResPair<WriteFMulX, [SKXPort01], 4, [1], 1, 6>;
defm : SKXWriteResPair<WriteFMulY, [SKXPort01], 4, [1], 1, 7>;
defm : SKXWriteResPair<WriteFMulZ, [SKXPort05], 4, [1], 1, 7>;
defm : SKXWriteResPair<WriteFMul64, [SKXPort01], 4, [1], 1, 5>; // Floating point double multiplication.
defm : SKXWriteResPair<WriteFMul64X, [SKXPort01], 4, [1], 1, 6>;
defm : SKXWriteResPair<WriteFMul64Y, [SKXPort01], 4, [1], 1, 7>;
defm : SKXWriteResPair<WriteFMul64Z, [SKXPort05], 4, [1], 1, 7>;
defm : SKXWriteResPair<WriteFDiv, [SKXPort0,SKXFPDivider], 11, [1,3], 1, 5>; // 10-14 cycles. // Floating point division.
defm : SKXWriteResPair<WriteFDivX, [SKXPort0,SKXFPDivider], 11, [1,3], 1, 6>; // 10-14 cycles.
defm : SKXWriteResPair<WriteFDivY, [SKXPort0,SKXFPDivider], 11, [1,5], 1, 7>; // 10-14 cycles.
defm : SKXWriteResPair<WriteFDivZ, [SKXPort0,SKXPort5,SKXFPDivider], 18, [2,1,10], 3, 7>; // 10-14 cycles.
defm : SKXWriteResPair<WriteFDiv64, [SKXPort0,SKXFPDivider], 14, [1,4], 1, 5>; // 10-14 cycles. // Floating point division.
defm : SKXWriteResPair<WriteFDiv64X, [SKXPort0,SKXFPDivider], 14, [1,4], 1, 6>; // 10-14 cycles.
defm : SKXWriteResPair<WriteFDiv64Y, [SKXPort0,SKXFPDivider], 14, [1,8], 1, 7>; // 10-14 cycles.
defm : SKXWriteResPair<WriteFDiv64Z, [SKXPort0,SKXPort5,SKXFPDivider], 23, [2,1,16], 3, 7>; // 10-14 cycles.
defm : SKXWriteResPair<WriteFSqrt, [SKXPort0,SKXFPDivider], 12, [1,3], 1, 5>; // Floating point square root.
defm : SKXWriteResPair<WriteFSqrtX, [SKXPort0,SKXFPDivider], 12, [1,3], 1, 6>;
defm : SKXWriteResPair<WriteFSqrtY, [SKXPort0,SKXFPDivider], 12, [1,6], 1, 7>;
defm : SKXWriteResPair<WriteFSqrtZ, [SKXPort0,SKXPort5,SKXFPDivider], 20, [2,1,12], 3, 7>;
defm : SKXWriteResPair<WriteFSqrt64, [SKXPort0,SKXFPDivider], 18, [1,6], 1, 5>; // Floating point double square root.
defm : SKXWriteResPair<WriteFSqrt64X, [SKXPort0,SKXFPDivider], 18, [1,6], 1, 6>;
defm : SKXWriteResPair<WriteFSqrt64Y, [SKXPort0,SKXFPDivider], 18, [1,12],1, 7>;
defm : SKXWriteResPair<WriteFSqrt64Z, [SKXPort0,SKXPort5,SKXFPDivider], 32, [2,1,24], 3, 7>;
defm : SKXWriteResPair<WriteFSqrt80, [SKXPort0,SKXFPDivider], 21, [1,7]>; // Floating point long double square root.
defm : SKXWriteResPair<WriteFRcp, [SKXPort0], 4, [1], 1, 5>; // Floating point reciprocal estimate.
defm : SKXWriteResPair<WriteFRcpX, [SKXPort0], 4, [1], 1, 6>;
defm : SKXWriteResPair<WriteFRcpY, [SKXPort0], 4, [1], 1, 7>;
defm : SKXWriteResPair<WriteFRcpZ, [SKXPort0,SKXPort5], 4, [2,1], 3, 7>;
defm : SKXWriteResPair<WriteFRsqrt, [SKXPort0], 4, [1], 1, 5>; // Floating point reciprocal square root estimate.
defm : SKXWriteResPair<WriteFRsqrtX,[SKXPort0], 4, [1], 1, 6>;
defm : SKXWriteResPair<WriteFRsqrtY,[SKXPort0], 4, [1], 1, 7>;
defm : SKXWriteResPair<WriteFRsqrtZ,[SKXPort0,SKXPort5], 9, [2,1], 3, 7>;
defm : SKXWriteResPair<WriteFMA, [SKXPort01], 4, [1], 1, 5>; // Fused Multiply Add.
defm : SKXWriteResPair<WriteFMAX, [SKXPort01], 4, [1], 1, 6>;
defm : SKXWriteResPair<WriteFMAY, [SKXPort01], 4, [1], 1, 7>;
defm : SKXWriteResPair<WriteFMAZ, [SKXPort05], 4, [1], 1, 7>;
defm : SKXWriteResPair<WriteDPPD, [SKXPort5,SKXPort015], 9, [1,2], 3, 6>; // Floating point double dot product.
defm : X86WriteRes<WriteDPPS, [SKXPort5,SKXPort01], 13, [1,3], 4>;
defm : X86WriteRes<WriteDPPSY, [SKXPort5,SKXPort01], 13, [1,3], 4>;
defm : X86WriteRes<WriteDPPSLd, [SKXPort5,SKXPort01,SKXPort06,SKXPort23], 19, [1,3,1,1], 6>;
defm : X86WriteRes<WriteDPPSYLd, [SKXPort5,SKXPort01,SKXPort06,SKXPort23], 20, [1,3,1,1], 6>;
defm : SKXWriteResPair<WriteFSign, [SKXPort0], 1>; // Floating point fabs/fchs.
defm : SKXWriteResPair<WriteFRnd, [SKXPort01], 8, [2], 2, 6>; // Floating point rounding.
defm : SKXWriteResPair<WriteFRndY, [SKXPort01], 8, [2], 2, 7>;
defm : SKXWriteResPair<WriteFRndZ, [SKXPort05], 8, [2], 2, 7>;
defm : SKXWriteResPair<WriteFLogic, [SKXPort015], 1, [1], 1, 6>; // Floating point and/or/xor logicals.
defm : SKXWriteResPair<WriteFLogicY, [SKXPort015], 1, [1], 1, 7>;
defm : SKXWriteResPair<WriteFLogicZ, [SKXPort05], 1, [1], 1, 7>;
defm : SKXWriteResPair<WriteFTest, [SKXPort0], 2, [1], 1, 6>; // Floating point TEST instructions.
defm : SKXWriteResPair<WriteFTestY, [SKXPort0], 2, [1], 1, 7>;
defm : SKXWriteResPair<WriteFTestZ, [SKXPort0], 2, [1], 1, 7>;
defm : SKXWriteResPair<WriteFShuffle, [SKXPort5], 1, [1], 1, 6>; // Floating point vector shuffles.
defm : SKXWriteResPair<WriteFShuffleY, [SKXPort5], 1, [1], 1, 7>;
defm : SKXWriteResPair<WriteFShuffleZ, [SKXPort5], 1, [1], 1, 7>;
defm : SKXWriteResPair<WriteFVarShuffle, [SKXPort5], 1, [1], 1, 6>; // Floating point vector variable shuffles.
defm : SKXWriteResPair<WriteFVarShuffleY, [SKXPort5], 1, [1], 1, 7>;
defm : SKXWriteResPair<WriteFVarShuffleZ, [SKXPort5], 1, [1], 1, 7>;
defm : SKXWriteResPair<WriteFBlend, [SKXPort015], 1, [1], 1, 6>; // Floating point vector blends.
defm : SKXWriteResPair<WriteFBlendY,[SKXPort015], 1, [1], 1, 7>;
defm : SKXWriteResPair<WriteFBlendZ,[SKXPort015], 1, [1], 1, 7>;
defm : SKXWriteResPair<WriteFVarBlend, [SKXPort015], 2, [2], 2, 6>; // Fp vector variable blends.
defm : SKXWriteResPair<WriteFVarBlendY,[SKXPort015], 2, [2], 2, 7>;
defm : SKXWriteResPair<WriteFVarBlendZ,[SKXPort015], 2, [2], 2, 7>;
// FMA Scheduling helper class.
// class FMASC { X86FoldableSchedWrite Sched = WriteFAdd; }
// Vector integer operations.
defm : X86WriteRes<WriteVecLoad, [SKXPort23], 5, [1], 1>;
defm : X86WriteRes<WriteVecLoadX, [SKXPort23], 6, [1], 1>;
defm : X86WriteRes<WriteVecLoadY, [SKXPort23], 7, [1], 1>;
defm : X86WriteRes<WriteVecLoadNT, [SKXPort23,SKXPort015], 7, [1,1], 2>;
defm : X86WriteRes<WriteVecLoadNTY, [SKXPort23,SKXPort015], 8, [1,1], 2>;
defm : X86WriteRes<WriteVecMaskedLoad, [SKXPort23,SKXPort015], 7, [1,1], 2>;
defm : X86WriteRes<WriteVecMaskedLoadY, [SKXPort23,SKXPort015], 8, [1,1], 2>;
defm : X86WriteRes<WriteVecStore, [SKXPort237,SKXPort4], 1, [1,1], 2>;
defm : X86WriteRes<WriteVecStoreX, [SKXPort237,SKXPort4], 1, [1,1], 2>;
defm : X86WriteRes<WriteVecStoreY, [SKXPort237,SKXPort4], 1, [1,1], 2>;
defm : X86WriteRes<WriteVecStoreNT, [SKXPort237,SKXPort4], 1, [1,1], 2>;
defm : X86WriteRes<WriteVecStoreNTY, [SKXPort237,SKXPort4], 1, [1,1], 2>;
defm : X86WriteRes<WriteVecMaskedStore32, [SKXPort23,SKXPort0,SKXPort4], 2, [1,1,1], 3>;
defm : X86WriteRes<WriteVecMaskedStore32Y, [SKXPort23,SKXPort0,SKXPort4], 2, [1,1,1], 3>;
defm : X86WriteRes<WriteVecMaskedStore64, [SKXPort23,SKXPort0,SKXPort4], 2, [1,1,1], 3>;
defm : X86WriteRes<WriteVecMaskedStore64Y, [SKXPort23,SKXPort0,SKXPort4], 2, [1,1,1], 3>;
defm : X86WriteRes<WriteVecMove, [SKXPort05], 1, [1], 1>;
defm : X86WriteRes<WriteVecMoveX, [SKXPort015], 1, [1], 1>;
defm : X86WriteRes<WriteVecMoveY, [SKXPort015], 1, [1], 1>;
defm : X86WriteRes<WriteVecMoveZ, [SKXPort05], 1, [1], 1>;
defm : X86WriteRes<WriteVecMoveToGpr, [SKXPort0], 2, [1], 1>;
defm : X86WriteRes<WriteVecMoveFromGpr, [SKXPort5], 1, [1], 1>;
defm : SKXWriteResPair<WriteVecALU, [SKXPort05], 1, [1], 1, 5>; // Vector integer ALU op, no logicals.
defm : SKXWriteResPair<WriteVecALUX, [SKXPort01], 1, [1], 1, 6>;
defm : SKXWriteResPair<WriteVecALUY, [SKXPort01], 1, [1], 1, 7>;
defm : SKXWriteResPair<WriteVecALUZ, [SKXPort0], 1, [1], 1, 7>;
defm : SKXWriteResPair<WriteVecLogic, [SKXPort05], 1, [1], 1, 5>; // Vector integer and/or/xor.
defm : SKXWriteResPair<WriteVecLogicX,[SKXPort015], 1, [1], 1, 6>;
defm : SKXWriteResPair<WriteVecLogicY,[SKXPort015], 1, [1], 1, 7>;
defm : SKXWriteResPair<WriteVecLogicZ,[SKXPort05], 1, [1], 1, 7>;
defm : SKXWriteResPair<WriteVecTest, [SKXPort0,SKXPort5], 3, [1,1], 2, 6>; // Vector integer TEST instructions.
defm : SKXWriteResPair<WriteVecTestY, [SKXPort0,SKXPort5], 3, [1,1], 2, 7>;
defm : SKXWriteResPair<WriteVecTestZ, [SKXPort0,SKXPort5], 3, [1,1], 2, 7>;
defm : SKXWriteResPair<WriteVecIMul, [SKXPort0], 5, [1], 1, 5>; // Vector integer multiply.
defm : SKXWriteResPair<WriteVecIMulX, [SKXPort01], 5, [1], 1, 6>;
defm : SKXWriteResPair<WriteVecIMulY, [SKXPort01], 5, [1], 1, 7>;
defm : SKXWriteResPair<WriteVecIMulZ, [SKXPort05], 5, [1], 1, 7>;
defm : SKXWriteResPair<WritePMULLD, [SKXPort01], 10, [2], 2, 6>; // Vector PMULLD.
defm : SKXWriteResPair<WritePMULLDY, [SKXPort01], 10, [2], 2, 7>;
defm : SKXWriteResPair<WritePMULLDZ, [SKXPort05], 10, [2], 2, 7>;
defm : SKXWriteResPair<WriteShuffle, [SKXPort5], 1, [1], 1, 5>; // Vector shuffles.
defm : SKXWriteResPair<WriteShuffleX, [SKXPort5], 1, [1], 1, 6>;
defm : SKXWriteResPair<WriteShuffleY, [SKXPort5], 1, [1], 1, 7>;
defm : SKXWriteResPair<WriteShuffleZ, [SKXPort5], 1, [1], 1, 7>;
defm : SKXWriteResPair<WriteVarShuffle, [SKXPort0,SKXPort5], 1, [1,1], 2, 5>; // Vector variable shuffles.
defm : SKXWriteResPair<WriteVarShuffleX, [SKXPort5], 1, [1], 1, 6>;
defm : SKXWriteResPair<WriteVarShuffleY, [SKXPort5], 1, [1], 1, 7>;
defm : SKXWriteResPair<WriteVarShuffleZ, [SKXPort5], 1, [1], 1, 7>;
defm : SKXWriteResPair<WriteBlend, [SKXPort5], 1, [1], 1, 6>; // Vector blends.
defm : SKXWriteResPair<WriteBlendY,[SKXPort5], 1, [1], 1, 7>;
defm : SKXWriteResPair<WriteBlendZ,[SKXPort5], 1, [1], 1, 7>;
defm : SKXWriteResPair<WriteVarBlend, [SKXPort015], 2, [2], 2, 6>; // Vector variable blends.
defm : SKXWriteResPair<WriteVarBlendY,[SKXPort015], 2, [2], 2, 6>;
defm : SKXWriteResPair<WriteVarBlendZ,[SKXPort05], 2, [1], 1, 6>;
defm : SKXWriteResPair<WriteMPSAD, [SKXPort5], 4, [2], 2, 6>; // Vector MPSAD.
defm : SKXWriteResPair<WriteMPSADY, [SKXPort5], 4, [2], 2, 7>;
defm : SKXWriteResPair<WriteMPSADZ, [SKXPort5], 4, [2], 2, 7>;
defm : SKXWriteResPair<WritePSADBW, [SKXPort5], 3, [1], 1, 5>; // Vector PSADBW.
defm : SKXWriteResPair<WritePSADBWX, [SKXPort5], 3, [1], 1, 6>;
defm : SKXWriteResPair<WritePSADBWY, [SKXPort5], 3, [1], 1, 7>;
defm : SKXWriteResPair<WritePSADBWZ, [SKXPort5], 3, [1], 1, 7>; // TODO: 512-bit ops require ports 0/1 to be joined.
defm : SKXWriteResPair<WritePHMINPOS, [SKXPort0], 4, [1], 1, 6>; // Vector PHMINPOS.
// Vector integer shifts.
defm : SKXWriteResPair<WriteVecShift, [SKXPort0], 1, [1], 1, 5>;
defm : X86WriteRes<WriteVecShiftX, [SKXPort5,SKXPort01], 2, [1,1], 2>;
defm : X86WriteRes<WriteVecShiftY, [SKXPort5,SKXPort01], 4, [1,1], 2>;
defm : X86WriteRes<WriteVecShiftZ, [SKXPort5,SKXPort0], 4, [1,1], 2>;
defm : X86WriteRes<WriteVecShiftXLd, [SKXPort01,SKXPort23], 7, [1,1], 2>;
defm : X86WriteRes<WriteVecShiftYLd, [SKXPort01,SKXPort23], 8, [1,1], 2>;
defm : X86WriteRes<WriteVecShiftZLd, [SKXPort0,SKXPort23], 8, [1,1], 2>;
defm : SKXWriteResPair<WriteVecShiftImm, [SKXPort0], 1, [1], 1, 5>;
defm : SKXWriteResPair<WriteVecShiftImmX, [SKXPort01], 1, [1], 1, 6>; // Vector integer immediate shifts.
defm : SKXWriteResPair<WriteVecShiftImmY, [SKXPort01], 1, [1], 1, 7>;
defm : SKXWriteResPair<WriteVecShiftImmZ, [SKXPort0], 1, [1], 1, 7>;
defm : SKXWriteResPair<WriteVarVecShift, [SKXPort01], 1, [1], 1, 6>; // Variable vector shifts.
defm : SKXWriteResPair<WriteVarVecShiftY, [SKXPort01], 1, [1], 1, 7>;
defm : SKXWriteResPair<WriteVarVecShiftZ, [SKXPort0], 1, [1], 1, 7>;
// Vector insert/extract operations.
def : WriteRes<WriteVecInsert, [SKXPort5]> {
let Latency = 2;
let NumMicroOps = 2;
let ReleaseAtCycles = [2];
}
def : WriteRes<WriteVecInsertLd, [SKXPort5,SKXPort23]> {
let Latency = 6;
let NumMicroOps = 2;
}
def: InstRW<[WriteVecInsertLd], (instregex "(V?)MOV(H|L)(PD|PS)rm")>;
def : WriteRes<WriteVecExtract, [SKXPort0,SKXPort5]> {
let Latency = 3;
let NumMicroOps = 2;
}
def : WriteRes<WriteVecExtractSt, [SKXPort4,SKXPort5,SKXPort237]> {
let Latency = 2;
let NumMicroOps = 3;
}
// Conversion between integer and float.
defm : SKXWriteResPair<WriteCvtSS2I, [SKXPort01], 6, [2], 2>; // Needs more work: DD vs DQ.
defm : SKXWriteResPair<WriteCvtPS2I, [SKXPort01], 3>;
defm : SKXWriteResPair<WriteCvtPS2IY, [SKXPort01], 3>;
defm : SKXWriteResPair<WriteCvtPS2IZ, [SKXPort05], 3>;
defm : SKXWriteResPair<WriteCvtSD2I, [SKXPort01], 6, [2], 2>;
defm : SKXWriteResPair<WriteCvtPD2I, [SKXPort01], 3>;
defm : SKXWriteResPair<WriteCvtPD2IY, [SKXPort01], 3>;
defm : SKXWriteResPair<WriteCvtPD2IZ, [SKXPort05], 3>;
defm : SKXWriteResPair<WriteCvtI2SS, [SKXPort1], 4>;
defm : SKXWriteResPair<WriteCvtI2PS, [SKXPort01], 4>;
defm : SKXWriteResPair<WriteCvtI2PSY, [SKXPort01], 4>;
defm : SKXWriteResPair<WriteCvtI2PSZ, [SKXPort05], 4>; // Needs more work: DD vs DQ.
defm : SKXWriteResPair<WriteCvtI2SD, [SKXPort1], 4>;
defm : SKXWriteResPair<WriteCvtI2PD, [SKXPort01], 4>;
defm : SKXWriteResPair<WriteCvtI2PDY, [SKXPort01], 4>;
defm : SKXWriteResPair<WriteCvtI2PDZ, [SKXPort05], 4>;
defm : SKXWriteResPair<WriteCvtSS2SD, [SKXPort1], 3>;
defm : SKXWriteResPair<WriteCvtPS2PD, [SKXPort1], 3>;
defm : SKXWriteResPair<WriteCvtPS2PDY, [SKXPort5,SKXPort01], 3, [1,1], 2>;
defm : SKXWriteResPair<WriteCvtPS2PDZ, [SKXPort05], 3, [2], 2>;
defm : SKXWriteResPair<WriteCvtSD2SS, [SKXPort5,SKXPort01], 5, [1,1], 2, 5>;
defm : SKXWriteResPair<WriteCvtPD2PS, [SKXPort5,SKXPort01], 5, [1,1], 2, 4>;
defm : SKXWriteResPair<WriteCvtPD2PSY, [SKXPort5,SKXPort01], 7, [1,1], 2, 7>;
defm : SKXWriteResPair<WriteCvtPD2PSZ, [SKXPort5,SKXPort05], 7, [1,1], 2, 7>;
defm : X86WriteRes<WriteCvtPH2PS, [SKXPort5,SKXPort01], 5, [1,1], 2>;
defm : X86WriteRes<WriteCvtPH2PSY, [SKXPort5,SKXPort01], 7, [1,1], 2>;
defm : X86WriteRes<WriteCvtPH2PSZ, [SKXPort5,SKXPort0], 7, [1,1], 2>;
defm : X86WriteRes<WriteCvtPH2PSLd, [SKXPort23,SKXPort01], 9, [1,1], 2>;
defm : X86WriteRes<WriteCvtPH2PSYLd, [SKXPort23,SKXPort01], 10, [1,1], 2>;
defm : X86WriteRes<WriteCvtPH2PSZLd, [SKXPort23,SKXPort05], 10, [1,1], 2>;
defm : X86WriteRes<WriteCvtPS2PH, [SKXPort5,SKXPort01], 5, [1,1], 2>;
defm : X86WriteRes<WriteCvtPS2PHY, [SKXPort5,SKXPort01], 7, [1,1], 2>;
defm : X86WriteRes<WriteCvtPS2PHZ, [SKXPort5,SKXPort05], 7, [1,1], 2>;
defm : X86WriteRes<WriteCvtPS2PHSt, [SKXPort4,SKXPort5,SKXPort237,SKXPort01], 6, [1,1,1,1], 4>;
defm : X86WriteRes<WriteCvtPS2PHYSt, [SKXPort4,SKXPort5,SKXPort237,SKXPort01], 8, [1,1,1,1], 4>;
defm : X86WriteRes<WriteCvtPS2PHZSt, [SKXPort4,SKXPort5,SKXPort237,SKXPort05], 8, [1,1,1,1], 4>;
// Strings instructions.
// Packed Compare Implicit Length Strings, Return Mask
def : WriteRes<WritePCmpIStrM, [SKXPort0]> {
let Latency = 10;
let NumMicroOps = 3;
let ReleaseAtCycles = [3];
}
def : WriteRes<WritePCmpIStrMLd, [SKXPort0, SKXPort23]> {
let Latency = 16;
let NumMicroOps = 4;
let ReleaseAtCycles = [3,1];
}
// Packed Compare Explicit Length Strings, Return Mask
def : WriteRes<WritePCmpEStrM, [SKXPort0, SKXPort5, SKXPort015, SKXPort0156]> {
let Latency = 19;
let NumMicroOps = 9;
let ReleaseAtCycles = [4,3,1,1];
}
def : WriteRes<WritePCmpEStrMLd, [SKXPort0, SKXPort5, SKXPort23, SKXPort015, SKXPort0156]> {
let Latency = 25;
let NumMicroOps = 10;
let ReleaseAtCycles = [4,3,1,1,1];
}
// Packed Compare Implicit Length Strings, Return Index
def : WriteRes<WritePCmpIStrI, [SKXPort0]> {
let Latency = 10;
let NumMicroOps = 3;
let ReleaseAtCycles = [3];
}
def : WriteRes<WritePCmpIStrILd, [SKXPort0, SKXPort23]> {
let Latency = 16;
let NumMicroOps = 4;
let ReleaseAtCycles = [3,1];
}
// Packed Compare Explicit Length Strings, Return Index
def : WriteRes<WritePCmpEStrI, [SKXPort0,SKXPort5,SKXPort0156]> {
let Latency = 18;
let NumMicroOps = 8;
let ReleaseAtCycles = [4,3,1];
}
def : WriteRes<WritePCmpEStrILd, [SKXPort0, SKXPort5, SKXPort23, SKXPort0156]> {
let Latency = 24;
let NumMicroOps = 9;
let ReleaseAtCycles = [4,3,1,1];
}
// MOVMSK Instructions.
def : WriteRes<WriteFMOVMSK, [SKXPort0]> { let Latency = 2; }
def : WriteRes<WriteVecMOVMSK, [SKXPort0]> { let Latency = 2; }
def : WriteRes<WriteVecMOVMSKY, [SKXPort0]> { let Latency = 2; }
def : WriteRes<WriteMMXMOVMSK, [SKXPort0]> { let Latency = 2; }
// AES instructions.
def : WriteRes<WriteAESDecEnc, [SKXPort0]> { // Decryption, encryption.
let Latency = 4;
let NumMicroOps = 1;
let ReleaseAtCycles = [1];
}
def : WriteRes<WriteAESDecEncLd, [SKXPort0, SKXPort23]> {
let Latency = 10;
let NumMicroOps = 2;
let ReleaseAtCycles = [1,1];
}
def : WriteRes<WriteAESIMC, [SKXPort0]> { // InvMixColumn.
let Latency = 8;
let NumMicroOps = 2;
let ReleaseAtCycles = [2];
}
def : WriteRes<WriteAESIMCLd, [SKXPort0, SKXPort23]> {
let Latency = 14;
let NumMicroOps = 3;
let ReleaseAtCycles = [2,1];
}
def : WriteRes<WriteAESKeyGen, [SKXPort0,SKXPort5,SKXPort015]> { // Key Generation.
let Latency = 20;
let NumMicroOps = 11;
let ReleaseAtCycles = [3,6,2];
}
def : WriteRes<WriteAESKeyGenLd, [SKXPort0,SKXPort5,SKXPort23,SKXPort015]> {
let Latency = 25;
let NumMicroOps = 11;
let ReleaseAtCycles = [3,6,1,1];
}
// Carry-less multiplication instructions.
def : WriteRes<WriteCLMul, [SKXPort5]> {
let Latency = 6;
let NumMicroOps = 1;
let ReleaseAtCycles = [1];
}
def : WriteRes<WriteCLMulLd, [SKXPort5, SKXPort23]> {
let Latency = 12;
let NumMicroOps = 2;
let ReleaseAtCycles = [1,1];
}
// Catch-all for expensive system instructions.
def : WriteRes<WriteSystem, [SKXPort0156]> { let Latency = 100; } // def WriteSystem : SchedWrite;
// AVX2.
defm : SKXWriteResPair<WriteFShuffle256, [SKXPort5], 3, [1], 1, 7>; // Fp 256-bit width vector shuffles.
defm : SKXWriteResPair<WriteFVarShuffle256, [SKXPort5], 3, [1], 1, 7>; // Fp 256-bit width vector variable shuffles.
defm : SKXWriteResPair<WriteShuffle256, [SKXPort5], 3, [1], 1, 7>; // 256-bit width vector shuffles.
defm : SKXWriteResPair<WriteVPMOV256, [SKXPort5], 3, [1], 1, 7>; // 256-bit width packed vector width-changing move.
defm : SKXWriteResPair<WriteVarShuffle256, [SKXPort5], 3, [1], 1, 7>; // 256-bit width vector variable shuffles.
// Old microcoded instructions that nobody use.
def : WriteRes<WriteMicrocoded, [SKXPort0156]> { let Latency = 100; } // def WriteMicrocoded : SchedWrite;
// Fence instructions.
def : WriteRes<WriteFence, [SKXPort23, SKXPort4]> { let NumMicroOps = 2; let ReleaseAtCycles = [1,1]; }
// Load/store MXCSR.
def : WriteRes<WriteLDMXCSR, [SKXPort0,SKXPort23,SKXPort0156]> { let Latency = 7; let NumMicroOps = 3; let ReleaseAtCycles = [1,1,1]; }
def : WriteRes<WriteSTMXCSR, [SKXPort4,SKXPort5,SKXPort237]> { let Latency = 2; let NumMicroOps = 3; let ReleaseAtCycles = [1,1,1]; }
// Nop, not very useful expect it provides a model for nops!
def : WriteRes<WriteNop, []>;
////////////////////////////////////////////////////////////////////////////////
// Horizontal add/sub instructions.
////////////////////////////////////////////////////////////////////////////////
defm : SKXWriteResPair<WriteFHAdd, [SKXPort5,SKXPort01], 6, [2,1], 3, 6>;
defm : SKXWriteResPair<WriteFHAddY, [SKXPort5,SKXPort01], 6, [2,1], 3, 7>;
defm : SKXWriteResPair<WritePHAdd, [SKXPort5,SKXPort05], 3, [2,1], 3, 5>;
defm : SKXWriteResPair<WritePHAddX, [SKXPort5,SKXPort015], 3, [2,1], 3, 6>;
defm : SKXWriteResPair<WritePHAddY, [SKXPort5,SKXPort015], 3, [2,1], 3, 7>;
// Remaining instrs.
def SKXWriteResGroup1 : SchedWriteRes<[SKXPort0]> {
let Latency = 1;
let NumMicroOps = 1;
let ReleaseAtCycles = [1];
}
def: InstRW<[SKXWriteResGroup1], (instregex "KAND(B|D|Q|W)kk",
"KANDN(B|D|Q|W)kk",
"KMOV(B|D|Q|W)kk",
"KNOT(B|D|Q|W)kk",
"KOR(B|D|Q|W)kk",
"KXNOR(B|D|Q|W)kk",
"KXOR(B|D|Q|W)kk",
"KSET0(B|D|Q|W)", // Same as KXOR
"KSET1(B|D|Q|W)", // Same as KXNOR
"MMX_PADDS(B|W)rr",
"MMX_PADDUS(B|W)rr",
"MMX_PAVG(B|W)rr",
"MMX_PCMPEQ(B|D|W)rr",
"MMX_PCMPGT(B|D|W)rr",
"MMX_P(MAX|MIN)SWrr",
"MMX_P(MAX|MIN)UBrr",
"MMX_PSUBS(B|W)rr",
"MMX_PSUBUS(B|W)rr",
"VPMOVB2M(Z|Z128|Z256)kr",
"VPMOVD2M(Z|Z128|Z256)kr",
"VPMOVQ2M(Z|Z128|Z256)kr",
"VPMOVW2M(Z|Z128|Z256)kr")>;
def SKXWriteResGroup3 : SchedWriteRes<[SKXPort5]> {
let Latency = 1;
let NumMicroOps = 1;
let ReleaseAtCycles = [1];
}
def: InstRW<[SKXWriteResGroup3], (instregex "COM(P?)_FST0r",
"KMOV(B|D|Q|W)kr",
"UCOM_F(P?)r")>;
def SKXWriteResGroup4 : SchedWriteRes<[SKXPort6]> {
let Latency = 1;
let NumMicroOps = 1;
let ReleaseAtCycles = [1];
}
def: InstRW<[SKXWriteResGroup4], (instregex "JMP(16|32|64)r")>;
def SKXWriteResGroup6 : SchedWriteRes<[SKXPort05]> {
let Latency = 1;
let NumMicroOps = 1;
let ReleaseAtCycles = [1];
}
def: InstRW<[SKXWriteResGroup6], (instrs FINCSTP, FNOP)>;
def SKXWriteResGroup7 : SchedWriteRes<[SKXPort06]> {
let Latency = 1;
let NumMicroOps = 1;
let ReleaseAtCycles = [1];
}
def: InstRW<[SKXWriteResGroup7], (instrs CDQ, CQO, CLAC, STAC)>;
def SKXWriteResGroup8 : SchedWriteRes<[SKXPort15]> {
let Latency = 1;
let NumMicroOps = 1;
let ReleaseAtCycles = [1];
}
def: InstRW<[SKXWriteResGroup8], (instregex "ANDN(32|64)rr")>;
def SKXWriteResGroup9 : SchedWriteRes<[SKXPort015]> {
let Latency = 1;
let NumMicroOps = 1;
let ReleaseAtCycles = [1];
}
def: InstRW<[SKXWriteResGroup9], (instregex "VBLENDMPD(Z128|Z256)rr",
"VBLENDMPS(Z128|Z256)rr",
"VPADD(B|D|Q|W)(Y|Z|Z128|Z256)rr",
"(V?)PADD(B|D|Q|W)rr",
"VPBLENDD(Y?)rri",
"VPBLENDMB(Z128|Z256)rr",
"VPBLENDMD(Z128|Z256)rr",
"VPBLENDMQ(Z128|Z256)rr",
"VPBLENDMW(Z128|Z256)rr",
"VPSUB(B|D|Q|W)(Y|Z|Z128|Z256)rrk",
"VPTERNLOGD(Z|Z128|Z256)rri",
"VPTERNLOGQ(Z|Z128|Z256)rri")>;
def SKXWriteResGroup10 : SchedWriteRes<[SKXPort0156]> {
let Latency = 1;
let NumMicroOps = 1;
let ReleaseAtCycles = [1];
}
def: InstRW<[SKXWriteResGroup10], (instrs SGDT64m,
SIDT64m,
SMSW16m,
STRm,
SYSCALL)>;
def SKXWriteResGroup11 : SchedWriteRes<[SKXPort4,SKXPort237]> {
let Latency = 1;
let NumMicroOps = 2;
let ReleaseAtCycles = [1,1];
}
def: InstRW<[SKXWriteResGroup11], (instrs FBSTPm, VMPTRSTm)>;
def: InstRW<[SKXWriteResGroup11], (instregex "KMOV(B|D|Q|W)mk",
"ST_FP(32|64|80)m")>;
def SKXWriteResGroup13 : SchedWriteRes<[SKXPort5]> {
let Latency = 2;
let NumMicroOps = 2;
let ReleaseAtCycles = [2];
}
def: InstRW<[SKXWriteResGroup13], (instrs MMX_MOVQ2DQrr)>;
def SKXWriteResGroup14 : SchedWriteRes<[SKXPort05]> {
let Latency = 2;
let NumMicroOps = 2;
let ReleaseAtCycles = [2];
}
def: InstRW<[SKXWriteResGroup14], (instrs FDECSTP,
MMX_MOVDQ2Qrr)>;
def SKXWriteResGroup17 : SchedWriteRes<[SKXPort0156]> {
let Latency = 2;
let NumMicroOps = 2;
let ReleaseAtCycles = [2];
}
def: InstRW<[SKXWriteResGroup17], (instrs LFENCE,
WAIT,
XGETBV)>;
def SKXWriteResGroup20 : SchedWriteRes<[SKXPort6,SKXPort0156]> {
let Latency = 2;
let NumMicroOps = 2;
let ReleaseAtCycles = [1,1];
}
def: InstRW<[SKXWriteResGroup20], (instregex "CLFLUSH")>;
def SKXWriteResGroup23 : SchedWriteRes<[SKXPort06,SKXPort0156]> {
let Latency = 2;
let NumMicroOps = 2;
let ReleaseAtCycles = [1,1];
}
def: InstRW<[SKXWriteResGroup23], (instrs CWD,
JCXZ, JECXZ, JRCXZ,
ADC8i8, SBB8i8,
ADC16i16, SBB16i16,
ADC32i32, SBB32i32,
ADC64i32, SBB64i32)>;
def SKXWriteResGroup25 : SchedWriteRes<[SKXPort4,SKXPort6,SKXPort237]> {
let Latency = 2;
let NumMicroOps = 3;
let ReleaseAtCycles = [1,1,1];
}
def: InstRW<[SKXWriteResGroup25], (instrs FNSTCW16m)>;
def SKXWriteResGroup27 : SchedWriteRes<[SKXPort4,SKXPort237,SKXPort15]> {
let Latency = 2;
let NumMicroOps = 3;
let ReleaseAtCycles = [1,1,1];
}
def: InstRW<[SKXWriteResGroup27], (instregex "MOVBE(16|32|64)mr")>;
def SKXWriteResGroup28 : SchedWriteRes<[SKXPort4,SKXPort237,SKXPort0156]> {
let Latency = 2;
let NumMicroOps = 3;
let ReleaseAtCycles = [1,1,1];
}
def: InstRW<[SKXWriteResGroup28], (instrs PUSH16r, PUSH32r, PUSH64r, PUSH64i8,
STOSB, STOSL, STOSQ, STOSW)>;
def: InstRW<[SKXWriteResGroup28], (instregex "PUSH(16|32|64)rmr")>;
def SKXWriteResGroup29 : SchedWriteRes<[SKXPort4,SKXPort237,SKXPort15]> {
let Latency = 2;
let NumMicroOps = 5;
let ReleaseAtCycles = [2,2,1];
}
def: InstRW<[SKXWriteResGroup29], (instregex "VMOVDQU8Zmr(b?)")>;
def SKXWriteResGroup30 : SchedWriteRes<[SKXPort0]> {
let Latency = 3;
let NumMicroOps = 1;
let ReleaseAtCycles = [1];
}
def: InstRW<[SKXWriteResGroup30], (instregex "KMOV(B|D|Q|W)rk",
"KORTEST(B|D|Q|W)kk",
"KTEST(B|D|Q|W)kk")>;
def SKXWriteResGroup31 : SchedWriteRes<[SKXPort1]> {
let Latency = 3;
let NumMicroOps = 1;
let ReleaseAtCycles = [1];
}
def: InstRW<[SKXWriteResGroup31], (instregex "PDEP(32|64)rr",
"PEXT(32|64)rr")>;
def SKXWriteResGroup32 : SchedWriteRes<[SKXPort5]> {
let Latency = 3;
let NumMicroOps = 1;
let ReleaseAtCycles = [1];
}
def: InstRW<[SKXWriteResGroup32], (instregex "(ADD|SUB|SUBR)_(FPrST0|FST0r|FrST0)",
"VALIGND(Z|Z128|Z256)rri",
"VALIGNQ(Z|Z128|Z256)rri",
"VPBROADCAST(B|W)rr",
"VP(MAX|MIN)(S|U)Q(Z|Z128|Z256)rr")>;
def SKXWriteResGroup33 : SchedWriteRes<[SKXPort5]> {
let Latency = 4;
let NumMicroOps = 1;
let ReleaseAtCycles = [1];
}
def: InstRW<[SKXWriteResGroup33], (instregex "KADD(B|D|Q|W)kk",
"KSHIFTL(B|D|Q|W)ki",
"KSHIFTR(B|D|Q|W)ki",
"KUNPCK(BW|DQ|WD)kk",
"VCMPPD(Z|Z128|Z256)rri",
"VCMPPS(Z|Z128|Z256)rri",
"VCMP(SD|SS)Zrr",
"VFPCLASS(PD|PS)(Z|Z128|Z256)ri",
"VFPCLASS(SD|SS)Zri",
"VPCMPB(Z|Z128|Z256)rri",
"VPCMPD(Z|Z128|Z256)rri",
"VPCMPEQ(B|D|Q|W)(Z|Z128|Z256)rr",
"VPCMPGT(B|D|Q|W)(Z|Z128|Z256)rr",
"VPCMPQ(Z|Z128|Z256)rri",
"VPCMPU(B|D|Q|W)(Z|Z128|Z256)rri",
"VPCMPW(Z|Z128|Z256)rri",
"VPTEST(N?)M(B|D|Q|W)(Z|Z128|Z256)rr")>;
def SKXWriteResGroup34 : SchedWriteRes<[SKXPort0,SKXPort0156]> {
let Latency = 3;
let NumMicroOps = 2;
let ReleaseAtCycles = [1,1];
}
def: InstRW<[SKXWriteResGroup34], (instrs FNSTSW16r)>;
def SKXWriteResGroup37 : SchedWriteRes<[SKXPort0,SKXPort5]> {
let Latency = 3;
let NumMicroOps = 3;
let ReleaseAtCycles = [1,2];
}
def: InstRW<[SKXWriteResGroup37], (instregex "MMX_PH(ADD|SUB)SWrr")>;
def SKXWriteResGroup38 : SchedWriteRes<[SKXPort5,SKXPort01]> {
let Latency = 3;
let NumMicroOps = 3;
let ReleaseAtCycles = [2,1];
}
def: InstRW<[SKXWriteResGroup38], (instregex "(V?)PH(ADD|SUB)SW(Y?)rr")>;
def SKXWriteResGroup41 : SchedWriteRes<[SKXPort5]> {
let Latency = 2;
let NumMicroOps = 2;
let ReleaseAtCycles = [2];
}
def: InstRW<[SKXWriteResGroup41], (instrs MMX_PACKSSDWrr,
MMX_PACKSSWBrr,
MMX_PACKUSWBrr)>;
def SKXWriteResGroup42 : SchedWriteRes<[SKXPort6,SKXPort0156]> {
let Latency = 3;
let NumMicroOps = 3;
let ReleaseAtCycles = [1,2];
}
def: InstRW<[SKXWriteResGroup42], (instregex "CLD")>;
def SKXWriteResGroup44 : SchedWriteRes<[SKXPort06,SKXPort0156]> {
let Latency = 2;
let NumMicroOps = 3;
let ReleaseAtCycles = [1,2];
}
def: InstRW<[SKXWriteResGroup44], (instrs RCL8r1, RCL16r1, RCL32r1, RCL64r1,
RCR8r1, RCR16r1, RCR32r1, RCR64r1)>;
def SKXWriteResGroup44b : SchedWriteRes<[SKXPort1,SKXPort06,SKXPort0156]> {
let Latency = 5;
let NumMicroOps = 8;
let ReleaseAtCycles = [2,4,2];
}
def: InstRW<[SKXWriteResGroup44b], (instrs RCR8ri, RCR16ri, RCR32ri, RCR64ri)>;
def SKXWriteResGroup44c : SchedWriteRes<[SKXPort1,SKXPort06,SKXPort0156]> {
let Latency = 6;
let NumMicroOps = 8;
let ReleaseAtCycles = [2,4,2];
}
def: InstRW<[SKXWriteResGroup44c], (instrs RCL8ri, RCL16ri, RCL32ri, RCL64ri)>;
def SKXWriteResGroup45 : SchedWriteRes<[SKXPort0,SKXPort4,SKXPort237]> {
let Latency = 3;
let NumMicroOps = 3;
let ReleaseAtCycles = [1,1,1];
}
def: InstRW<[SKXWriteResGroup45], (instrs FNSTSWm)>;
def SKXWriteResGroup47 : SchedWriteRes<[SKXPort4,SKXPort6,SKXPort237,SKXPort0156]> {
let Latency = 3;
let NumMicroOps = 4;
let ReleaseAtCycles = [1,1,1,1];
}
def: InstRW<[SKXWriteResGroup47], (instregex "CALL(16|32|64)r")>;
def SKXWriteResGroup48 : SchedWriteRes<[SKXPort4,SKXPort237,SKXPort06,SKXPort0156]> {
let Latency = 3;
let NumMicroOps = 4;
let ReleaseAtCycles = [1,1,1,1];
}
def: InstRW<[SKXWriteResGroup48], (instrs CALL64pcrel32)>;
def SKXWriteResGroup49 : SchedWriteRes<[SKXPort0]> {
let Latency = 4;
let NumMicroOps = 1;
let ReleaseAtCycles = [1];
}
def: InstRW<[SKXWriteResGroup49], (instregex "MUL_(FPrST0|FST0r|FrST0)")>;
def SKXWriteResGroup50 : SchedWriteRes<[SKXPort01]> {
let Latency = 4;
let NumMicroOps = 1;
let ReleaseAtCycles = [1];
}
def: InstRW<[SKXWriteResGroup50], (instregex "VCVTPD2QQ(Z128|Z256)rr",
"VCVTPD2UQQ(Z128|Z256)rr",
"VCVTPS2DQ(Y|Z128|Z256)rr",
"(V?)CVTPS2DQrr",
"VCVTPS2UDQ(Z128|Z256)rr",
"VCVTTPD2QQ(Z128|Z256)rr",
"VCVTTPD2UQQ(Z128|Z256)rr",
"VCVTTPS2DQ(Z128|Z256)rr",
"(V?)CVTTPS2DQrr",
"VCVTTPS2UDQ(Z128|Z256)rr")>;
def SKXWriteResGroup50z : SchedWriteRes<[SKXPort05]> {
let Latency = 4;
let NumMicroOps = 1;
let ReleaseAtCycles = [1];
}
def: InstRW<[SKXWriteResGroup50z], (instrs VCVTPD2QQZrr,
VCVTPD2UQQZrr,
VCVTPS2DQZrr,
VCVTPS2UDQZrr,
VCVTTPD2QQZrr,
VCVTTPD2UQQZrr,
VCVTTPS2DQZrr,
VCVTTPS2UDQZrr)>;
def SKXWriteResGroup51 : SchedWriteRes<[SKXPort5]> {
let Latency = 4;
let NumMicroOps = 2;
let ReleaseAtCycles = [2];
}
def: InstRW<[SKXWriteResGroup51], (instregex "VEXPANDPD(Z|Z128|Z256)rr",
"VEXPANDPS(Z|Z128|Z256)rr",
"VPEXPANDD(Z|Z128|Z256)rr",
"VPEXPANDQ(Z|Z128|Z256)rr",
"VPMOVDB(Z|Z128|Z256)rr",
"VPMOVDW(Z|Z128|Z256)rr",
"VPMOVQB(Z|Z128|Z256)rr",
"VPMOVQW(Z|Z128|Z256)rr",
"VPMOVSDB(Z|Z128|Z256)rr",
"VPMOVSDW(Z|Z128|Z256)rr",
"VPMOVSQB(Z|Z128|Z256)rr",
"VPMOVSQD(Z|Z128|Z256)rr",
"VPMOVSQW(Z|Z128|Z256)rr",
"VPMOVSWB(Z|Z128|Z256)rr",
"VPMOVUSDB(Z|Z128|Z256)rr",
"VPMOVUSDW(Z|Z128|Z256)rr",
"VPMOVUSQB(Z|Z128|Z256)rr",
"VPMOVUSQD(Z|Z128|Z256)rr",
"VPMOVUSWB(Z|Z128|Z256)rr",
"VPMOVWB(Z|Z128|Z256)rr")>;
def SKXWriteResGroup54 : SchedWriteRes<[SKXPort4,SKXPort5,SKXPort237]> {
let Latency = 4;
let NumMicroOps = 3;
let ReleaseAtCycles = [1,1,1];
}
def: InstRW<[SKXWriteResGroup54], (instregex "IST(T?)_FP(16|32|64)m",
"IST_F(16|32)m",
"VPMOVQD(Z|Z128|Z256)mr(b?)")>;
def SKXWriteResGroup55 : SchedWriteRes<[SKXPort0156]> {
let Latency = 4;
let NumMicroOps = 4;
let ReleaseAtCycles = [4];
}
def: InstRW<[SKXWriteResGroup55], (instrs FNCLEX)>;
def SKXWriteResGroup56 : SchedWriteRes<[]> {
let Latency = 0;
let NumMicroOps = 4;
let ReleaseAtCycles = [];
}
def: InstRW<[SKXWriteResGroup56], (instrs VZEROUPPER)>;
def SKXWriteResGroup57 : SchedWriteRes<[SKXPort1,SKXPort6,SKXPort0156]> {
let Latency = 4;
let NumMicroOps = 4;
let ReleaseAtCycles = [1,1,2];
}
def: InstRW<[SKXWriteResGroup57], (instregex "LAR(16|32|64)rr")>;
def SKXWriteResGroup61 : SchedWriteRes<[SKXPort5,SKXPort01]> {
let Latency = 5;
let NumMicroOps = 2;
let ReleaseAtCycles = [1,1];
}
def: InstRW<[SKXWriteResGroup61], (instregex "MMX_CVT(T?)PD2PIrr",
"MMX_CVT(T?)PS2PIrr",
"VCVTDQ2PDZ128rr",
"VCVTPD2DQZ128rr",
"(V?)CVT(T?)PD2DQrr",
"VCVTPD2UDQZ128rr",
"VCVTPS2PDZ128rr",
"(V?)CVTPS2PDrr",
"VCVTPS2QQZ128rr",
"VCVTPS2UQQZ128rr",
"VCVTQQ2PSZ128rr",
"(V?)CVTSI(64)?2SDrr",
"VCVTSI2SSZrr",
"(V?)CVTSI2SSrr",
"VCVTSI(64)?2SDZrr",
"VCVTSS2SDZrr",
"(V?)CVTSS2SDrr",
"VCVTTPD2DQZ128rr",
"VCVTTPD2UDQZ128rr",
"VCVTTPS2QQZ128rr",
"VCVTTPS2UQQZ128rr",
"VCVTUDQ2PDZ128rr",
"VCVTUQQ2PSZ128rr",
"VCVTUSI2SSZrr",
"VCVTUSI(64)?2SDZrr")>;
def SKXWriteResGroup62 : SchedWriteRes<[SKXPort5,SKXPort015]> {
let Latency = 5;
let NumMicroOps = 3;
let ReleaseAtCycles = [2,1];
}
def: InstRW<[SKXWriteResGroup62], (instregex "VPCONFLICTQZ128rr")>;
def SKXWriteResGroup63 : SchedWriteRes<[SKXPort1,SKXPort6,SKXPort06]> {
let Latency = 5;
let NumMicroOps = 3;
let ReleaseAtCycles = [1,1,1];
}
def: InstRW<[SKXWriteResGroup63], (instregex "STR(16|32|64)r")>;
def SKXWriteResGroup65 : SchedWriteRes<[SKXPort4,SKXPort237,SKXPort01]> {
let Latency = 5;
let NumMicroOps = 3;
let ReleaseAtCycles = [1,1,1];
}
def: InstRW<[SKXWriteResGroup65], (instregex "VCVTPS2PHZ128mr(b?)",
"VCVTPS2PHZ256mr(b?)",
"VCVTPS2PHZmr(b?)")>;
def SKXWriteResGroup66 : SchedWriteRes<[SKXPort4,SKXPort5,SKXPort237]> {
let Latency = 5;
let NumMicroOps = 4;
let ReleaseAtCycles = [1,2,1];
}
def: InstRW<[SKXWriteResGroup66], (instregex "VPMOVDB(Z|Z128|Z256)mr(b?)",
"VPMOVDW(Z|Z128|Z256)mr(b?)",
"VPMOVQB(Z|Z128|Z256)mr(b?)",
"VPMOVQW(Z|Z128|Z256)mr(b?)",
"VPMOVSDB(Z|Z128|Z256)mr(b?)",
"VPMOVSDW(Z|Z128|Z256)mr(b?)",
"VPMOVSQB(Z|Z128|Z256)mr(b?)",
"VPMOVSQD(Z|Z128|Z256)mr(b?)",
"VPMOVSQW(Z|Z128|Z256)mr(b?)",
"VPMOVSWB(Z|Z128|Z256)mr(b?)",
"VPMOVUSDB(Z|Z128|Z256)mr(b?)",
"VPMOVUSDW(Z|Z128|Z256)mr(b?)",
"VPMOVUSQB(Z|Z128|Z256)mr(b?)",
"VPMOVUSQD(Z|Z128|Z256)mr(b?)",
"VPMOVUSQW(Z|Z128|Z256)mr(b?)",
"VPMOVUSWB(Z|Z128|Z256)mr(b?)",
"VPMOVWB(Z|Z128|Z256)mr(b?)")>;
def SKXWriteResGroup67 : SchedWriteRes<[SKXPort06,SKXPort0156]> {
let Latency = 5;
let NumMicroOps = 5;
let ReleaseAtCycles = [1,4];
}
def: InstRW<[SKXWriteResGroup67], (instrs XSETBV)>;
def SKXWriteResGroup69 : SchedWriteRes<[SKXPort4,SKXPort237,SKXPort0156]> {
let Latency = 5;
let NumMicroOps = 6;
let ReleaseAtCycles = [1,1,4];
}
def: InstRW<[SKXWriteResGroup69], (instregex "PUSHF(16|64)")>;
def SKXWriteResGroup71 : SchedWriteRes<[SKXPort23]> {
let Latency = 6;
let NumMicroOps = 1;
let ReleaseAtCycles = [1];
}
def: InstRW<[SKXWriteResGroup71], (instrs VBROADCASTSSrm,
VPBROADCASTDrm,
VPBROADCASTQrm)>;
def: InstRW<[SKXWriteResGroup71], (instregex "(V?)MOVSHDUPrm",
"(V?)MOVSLDUPrm",
"(V?)MOVDDUPrm")>;
def SKXWriteResGroup72 : SchedWriteRes<[SKXPort5]> {
let Latency = 6;
let NumMicroOps = 2;
let ReleaseAtCycles = [2];
}
def: InstRW<[SKXWriteResGroup72], (instrs MMX_CVTPI2PSrr)>;
def: InstRW<[SKXWriteResGroup72], (instregex "VCOMPRESSPD(Z|Z128|Z256)rr",
"VCOMPRESSPS(Z|Z128|Z256)rr",
"VPCOMPRESSD(Z|Z128|Z256)rr",
"VPCOMPRESSQ(Z|Z128|Z256)rr",
"VPERMW(Z|Z128|Z256)rr")>;
def SKXWriteResGroup73 : SchedWriteRes<[SKXPort0,SKXPort23]> {
let Latency = 6;
let NumMicroOps = 2;
let ReleaseAtCycles = [1,1];
}
def: InstRW<[SKXWriteResGroup73], (instrs MMX_PADDSBrm,
MMX_PADDSWrm,
MMX_PADDUSBrm,
MMX_PADDUSWrm,
MMX_PAVGBrm,
MMX_PAVGWrm,
MMX_PCMPEQBrm,
MMX_PCMPEQDrm,
MMX_PCMPEQWrm,
MMX_PCMPGTBrm,
MMX_PCMPGTDrm,
MMX_PCMPGTWrm,
MMX_PMAXSWrm,
MMX_PMAXUBrm,
MMX_PMINSWrm,
MMX_PMINUBrm,
MMX_PSUBSBrm,
MMX_PSUBSWrm,
MMX_PSUBUSBrm,
MMX_PSUBUSWrm)>;
def SKXWriteResGroup76 : SchedWriteRes<[SKXPort6,SKXPort23]> {
let Latency = 6;
let NumMicroOps = 2;
let ReleaseAtCycles = [1,1];
}
def: InstRW<[SKXWriteResGroup76], (instrs FARJMP64m)>;
def: InstRW<[SKXWriteResGroup76], (instregex "JMP(16|32|64)m")>;
def SKXWriteResGroup79 : SchedWriteRes<[SKXPort23,SKXPort15]> {
let Latency = 6;
let NumMicroOps = 2;
let ReleaseAtCycles = [1,1];
}
def: InstRW<[SKXWriteResGroup79], (instregex "ANDN(32|64)rm",
"MOVBE(16|32|64)rm")>;
def SKXWriteResGroup80 : SchedWriteRes<[SKXPort23,SKXPort015]> {
let Latency = 6;
let NumMicroOps = 2;
let ReleaseAtCycles = [1,1];
}
def: InstRW<[SKXWriteResGroup80], (instregex "VMOV(64to|QI2)PQIZrm(b?)")>;
def: InstRW<[SKXWriteResGroup80], (instrs VMOVDI2PDIZrm)>;
def SKXWriteResGroup81 : SchedWriteRes<[SKXPort23,SKXPort0156]> {
let Latency = 6;
let NumMicroOps = 2;
let ReleaseAtCycles = [1,1];
}
def: InstRW<[SKXWriteResGroup81], (instrs POP16r, POP32r, POP64r)>;
def: InstRW<[SKXWriteResGroup81], (instregex "POP(16|32|64)rmr")>;
def SKXWriteResGroup82 : SchedWriteRes<[SKXPort5,SKXPort01]> {
let Latency = 6;
let NumMicroOps = 3;
let ReleaseAtCycles = [2,1];
}
def: InstRW<[SKXWriteResGroup82], (instregex "(V?)CVTSI642SSrr",
"VCVTSI642SSZrr",
"VCVTUSI642SSZrr")>;
def SKXWriteResGroup84 : SchedWriteRes<[SKXPort1,SKXPort6,SKXPort06,SKXPort0156]> {
let Latency = 6;
let NumMicroOps = 4;
let ReleaseAtCycles = [1,1,1,1];
}
def: InstRW<[SKXWriteResGroup84], (instregex "SLDT(16|32|64)r")>;
def SKXWriteResGroup86 : SchedWriteRes<[SKXPort4,SKXPort23,SKXPort237,SKXPort06]> {
let Latency = 6;
let NumMicroOps = 4;
let ReleaseAtCycles = [1,1,1,1];
}
def: InstRW<[SKXWriteResGroup86], (instregex "SAR(8|16|32|64)m(1|i)",
"SHL(8|16|32|64)m(1|i)",
"SHR(8|16|32|64)m(1|i)")>;
def SKXWriteResGroup87 : SchedWriteRes<[SKXPort4,SKXPort23,SKXPort237,SKXPort0156]> {
let Latency = 6;
let NumMicroOps = 4;
let ReleaseAtCycles = [1,1,1,1];
}
def: InstRW<[SKXWriteResGroup87], (instregex "POP(16|32|64)rmm",
"PUSH(16|32|64)rmm")>;
def SKXWriteResGroup88 : SchedWriteRes<[SKXPort6,SKXPort0156]> {
let Latency = 6;
let NumMicroOps = 6;
let ReleaseAtCycles = [1,5];
}
def: InstRW<[SKXWriteResGroup88], (instrs STD)>;
def SKXWriteResGroup89 : SchedWriteRes<[SKXPort23]> {
let Latency = 7;
let NumMicroOps = 1;
let ReleaseAtCycles = [1];
}
def: InstRW<[SKXWriteResGroup89], (instregex "LD_F(32|64|80)m")>;
def: InstRW<[SKXWriteResGroup89], (instrs VBROADCASTF128rm,
VBROADCASTI128rm,
VBROADCASTSDYrm,
VBROADCASTSSYrm,
VMOVDDUPYrm,
VMOVSHDUPYrm,
VMOVSLDUPYrm,
VPBROADCASTDYrm,
VPBROADCASTQYrm)>;
def SKXWriteResGroup90 : SchedWriteRes<[SKXPort01,SKXPort5]> {
let Latency = 7;
let NumMicroOps = 2;
let ReleaseAtCycles = [1,1];
}
def: InstRW<[SKXWriteResGroup90], (instrs VCVTDQ2PDYrr)>;
def SKXWriteResGroup92 : SchedWriteRes<[SKXPort5,SKXPort23]> {
let Latency = 7;
let NumMicroOps = 2;
let ReleaseAtCycles = [1,1];
}
def: InstRW<[SKXWriteResGroup92], (instregex "VMOVSDZrm(b?)",
"VMOVSSZrm(b?)")>;
def SKXWriteResGroup92a : SchedWriteRes<[SKXPort5,SKXPort23]> {
let Latency = 6;
let NumMicroOps = 2;
let ReleaseAtCycles = [1,1];
}
def: InstRW<[SKXWriteResGroup92a], (instregex "(V?)PMOV(SX|ZX)BDrm",
"(V?)PMOV(SX|ZX)BQrm",
"(V?)PMOV(SX|ZX)BWrm",
"(V?)PMOV(SX|ZX)DQrm",
"(V?)PMOV(SX|ZX)WDrm",
"(V?)PMOV(SX|ZX)WQrm")>;
def SKXWriteResGroup93 : SchedWriteRes<[SKXPort5,SKXPort01]> {
let Latency = 7;
let NumMicroOps = 2;
let ReleaseAtCycles = [1,1];
}
def: InstRW<[SKXWriteResGroup93], (instregex "VCVTDQ2PDZ256rr",
"VCVTPD2DQ(Y|Z256)rr",
"VCVTPD2UDQZ256rr",
"VCVTPS2PD(Y|Z256)rr",
"VCVTPS2QQZ256rr",
"VCVTPS2UQQZ256rr",
"VCVTQQ2PSZ256rr",
"VCVTTPD2DQ(Y|Z256)rr",
"VCVTTPD2UDQZ256rr",
"VCVTTPS2QQZ256rr",
"VCVTTPS2UQQZ256rr",
"VCVTUDQ2PDZ256rr",
"VCVTUQQ2PSZ256rr")>;
def SKXWriteResGroup93z : SchedWriteRes<[SKXPort5,SKXPort05]> {
let Latency = 7;
let NumMicroOps = 2;
let ReleaseAtCycles = [1,1];
}
def: InstRW<[SKXWriteResGroup93z], (instrs VCVTDQ2PDZrr,
VCVTPD2DQZrr,
VCVTPD2UDQZrr,
VCVTPS2PDZrr,
VCVTPS2QQZrr,
VCVTPS2UQQZrr,
VCVTQQ2PSZrr,
VCVTTPD2DQZrr,
VCVTTPD2UDQZrr,
VCVTTPS2QQZrr,
VCVTTPS2UQQZrr,
VCVTUDQ2PDZrr,
VCVTUQQ2PSZrr)>;
def SKXWriteResGroup95 : SchedWriteRes<[SKXPort23,SKXPort015]> {
let Latency = 7;
let NumMicroOps = 2;
let ReleaseAtCycles = [1,1];
}
def: InstRW<[SKXWriteResGroup95], (instrs VPBLENDDrmi)>;
def: InstRW<[SKXWriteResGroup95, ReadAfterVecXLd],
(instregex "VBLENDMPDZ128rm(b?)",
"VBLENDMPSZ128rm(b?)",
"VBROADCASTI32X2Z128rm(b?)",
"VBROADCASTSSZ128rm(b?)",
"VINSERT(F|I)128rmi",
"VMOVAPDZ128rm(b?)",
"VMOVAPSZ128rm(b?)",
"VMOVDDUPZ128rm(b?)",
"VMOVDQA32Z128rm(b?)",
"VMOVDQA64Z128rm(b?)",
"VMOVDQU16Z128rm(b?)",
"VMOVDQU32Z128rm(b?)",
"VMOVDQU64Z128rm(b?)",
"VMOVDQU8Z128rm(b?)",
"VMOVSHDUPZ128rm(b?)",
"VMOVSLDUPZ128rm(b?)",
"VMOVUPDZ128rm(b?)",
"VMOVUPSZ128rm(b?)",
"VPADD(B|D|Q|W)Z128rm(b?)",
"(V?)PADD(B|D|Q|W)rm",
"VPBLENDM(B|D|Q|W)Z128rm(b?)",
"VPBROADCASTDZ128rm(b?)",
"VPBROADCASTQZ128rm(b?)",
"VPSUB(B|D|Q|W)Z128rm(b?)",
"(V?)PSUB(B|D|Q|W)rm",
"VPTERNLOGDZ128rm(b?)i",
"VPTERNLOGQZ128rm(b?)i")>;
def SKXWriteResGroup96 : SchedWriteRes<[SKXPort5,SKXPort23]> {
let Latency = 7;
let NumMicroOps = 3;
let ReleaseAtCycles = [2,1];
}
def: InstRW<[SKXWriteResGroup96], (instrs MMX_PACKSSDWrm,
MMX_PACKSSWBrm,
MMX_PACKUSWBrm)>;
def SKXWriteResGroup97 : SchedWriteRes<[SKXPort5,SKXPort015]> {
let Latency = 7;
let NumMicroOps = 3;
let ReleaseAtCycles = [2,1];
}
def: InstRW<[SKXWriteResGroup97], (instregex "VPERMI2WZ128rr",
"VPERMI2WZ256rr",
"VPERMI2WZrr",
"VPERMT2WZ128rr",
"VPERMT2WZ256rr",
"VPERMT2WZrr")>;
def SKXWriteResGroup99 : SchedWriteRes<[SKXPort23,SKXPort0156]> {
let Latency = 7;
let NumMicroOps = 3;
let ReleaseAtCycles = [1,2];
}
def: InstRW<[SKXWriteResGroup99], (instrs LEAVE, LEAVE64,
SCASB, SCASL, SCASQ, SCASW)>;
def SKXWriteResGroup100 : SchedWriteRes<[SKXPort0,SKXPort5,SKXPort01]> {
let Latency = 7;
let NumMicroOps = 3;
let ReleaseAtCycles = [1,1,1];
}
def: InstRW<[SKXWriteResGroup100], (instregex "(V?)CVT(T?)SS2SI64(Z?)rr",
"VCVT(T?)SS2USI64Zrr")>;
def SKXWriteResGroup101 : SchedWriteRes<[SKXPort0,SKXPort23,SKXPort05]> {
let Latency = 7;
let NumMicroOps = 3;
let ReleaseAtCycles = [1,1,1];
}
def: InstRW<[SKXWriteResGroup101], (instrs FLDCW16m)>;
def SKXWriteResGroup103 : SchedWriteRes<[SKXPort5,SKXPort23,SKXPort0156]> {
let Latency = 7;
let NumMicroOps = 3;
let ReleaseAtCycles = [1,1,1];
}
def: InstRW<[SKXWriteResGroup103], (instregex "KMOV(B|D|Q|W)km")>;
def SKXWriteResGroup104 : SchedWriteRes<[SKXPort6,SKXPort23,SKXPort0156]> {
let Latency = 7;
let NumMicroOps = 3;
let ReleaseAtCycles = [1,1,1];
}
def: InstRW<[SKXWriteResGroup104], (instrs LRET64, RET64)>;
def SKXWriteResGroup106 : SchedWriteRes<[SKXPort4,SKXPort5,SKXPort237]> {
let Latency = 7;
let NumMicroOps = 4;
let ReleaseAtCycles = [1,2,1];
}
def: InstRW<[SKXWriteResGroup106], (instregex "VCOMPRESSPD(Z|Z128|Z256)mr(b?)",
"VCOMPRESSPS(Z|Z128|Z256)mr(b?)",
"VPCOMPRESSD(Z|Z128|Z256)mr(b?)",
"VPCOMPRESSQ(Z|Z128|Z256)mr(b?)")>;
def SKXWriteResGroup107 : SchedWriteRes<[SKXPort4,SKXPort23,SKXPort237,SKXPort06]> {
let Latency = 7;
let NumMicroOps = 5;
let ReleaseAtCycles = [1,1,1,2];
}
def: InstRW<[SKXWriteResGroup107], (instregex "ROL(8|16|32|64)m(1|i)",
"ROR(8|16|32|64)m(1|i)")>;
def SKXWriteResGroup107_1 : SchedWriteRes<[SKXPort06]> {
let Latency = 2;
let NumMicroOps = 2;
let ReleaseAtCycles = [2];
}
def: InstRW<[SKXWriteResGroup107_1], (instrs ROL8r1, ROL16r1, ROL32r1, ROL64r1,
ROR8r1, ROR16r1, ROR32r1, ROR64r1)>;
def SKXWriteResGroup108 : SchedWriteRes<[SKXPort4,SKXPort23,SKXPort237,SKXPort0156]> {
let Latency = 7;
let NumMicroOps = 5;
let ReleaseAtCycles = [1,1,1,2];
}
def: InstRW<[SKXWriteResGroup108], (instregex "XADD(8|16|32|64)rm")>;
def SKXWriteResGroup109 : SchedWriteRes<[SKXPort4,SKXPort6,SKXPort23,SKXPort237,SKXPort0156]> {
let Latency = 7;
let NumMicroOps = 5;
let ReleaseAtCycles = [1,1,1,1,1];
}
def: InstRW<[SKXWriteResGroup109], (instregex "CALL(16|32|64)m")>;
def: InstRW<[SKXWriteResGroup109], (instrs FARCALL64m)>;
def SKXWriteResGroup110 : SchedWriteRes<[SKXPort0,SKXPort4,SKXPort237,SKXPort0156]> {
let Latency = 7;
let NumMicroOps = 7;
let ReleaseAtCycles = [1,2,2,2];
}
def: InstRW<[SKXWriteResGroup110], (instrs VPSCATTERDQZ128mr,
VPSCATTERQQZ128mr,
VSCATTERDPDZ128mr,
VSCATTERQPDZ128mr)>;
def SKXWriteResGroup111 : SchedWriteRes<[SKXPort6,SKXPort06,SKXPort15,SKXPort0156]> {
let Latency = 7;
let NumMicroOps = 7;
let ReleaseAtCycles = [1,3,1,2];
}
def: InstRW<[SKXWriteResGroup111], (instrs LOOP)>;
def SKXWriteResGroup112 : SchedWriteRes<[SKXPort0,SKXPort4,SKXPort237,SKXPort0156]> {
let Latency = 7;
let NumMicroOps = 11;
let ReleaseAtCycles = [1,4,4,2];
}
def: InstRW<[SKXWriteResGroup112], (instrs VPSCATTERDQZ256mr,
VPSCATTERQQZ256mr,
VSCATTERDPDZ256mr,
VSCATTERQPDZ256mr)>;
def SKXWriteResGroup113 : SchedWriteRes<[SKXPort0,SKXPort4,SKXPort237,SKXPort0156]> {
let Latency = 7;
let NumMicroOps = 19;
let ReleaseAtCycles = [1,8,8,2];
}
def: InstRW<[SKXWriteResGroup113], (instrs VPSCATTERDQZmr,
VPSCATTERQDZmr,
VPSCATTERQQZmr,
VSCATTERDPDZmr,
VSCATTERQPSZmr,
VSCATTERQPDZmr)>;
def SKXWriteResGroup114 : SchedWriteRes<[SKXPort0,SKXPort4,SKXPort5,SKXPort237,SKXPort0156]> {
let Latency = 7;
let NumMicroOps = 36;
let ReleaseAtCycles = [1,16,1,16,2];
}
def: InstRW<[SKXWriteResGroup114], (instrs VSCATTERDPSZmr)>;
def SKXWriteResGroup118 : SchedWriteRes<[SKXPort1,SKXPort23]> {
let Latency = 8;
let NumMicroOps = 2;
let ReleaseAtCycles = [1,1];
}
def: InstRW<[SKXWriteResGroup118], (instregex "PDEP(32|64)rm",
"PEXT(32|64)rm")>;
def SKXWriteResGroup119 : SchedWriteRes<[SKXPort5,SKXPort23]> {
let Latency = 8;
let NumMicroOps = 2;
let ReleaseAtCycles = [1,1];
}
def: InstRW<[SKXWriteResGroup119], (instregex "FCOM(P?)(32|64)m",
"VPBROADCASTB(Z|Z256)rm(b?)",
"VPBROADCASTW(Z|Z256)rm(b?)")>;
def: InstRW<[SKXWriteResGroup119], (instrs VPBROADCASTBYrm,
VPBROADCASTWYrm,
VPMOVSXBDYrm,
VPMOVSXBQYrm,
VPMOVSXWQYrm)>;
def SKXWriteResGroup121 : SchedWriteRes<[SKXPort23,SKXPort015]> {
let Latency = 8;
let NumMicroOps = 2;
let ReleaseAtCycles = [1,1];
}
def: InstRW<[SKXWriteResGroup121], (instrs VPBLENDDYrmi)>;
def: InstRW<[SKXWriteResGroup121, ReadAfterVecYLd],
(instregex "VBLENDMPD(Z|Z256)rm(b?)",
"VBLENDMPS(Z|Z256)rm(b?)",
"VBROADCASTF32X2Z256rm(b?)",
"VBROADCASTF32X2Zrm(b?)",
"VBROADCASTF32X4Z256rm(b?)",
"VBROADCASTF32X4Zrm(b?)",
"VBROADCASTF32X8Zrm(b?)",
"VBROADCASTF64X2Z256rm(b?)",
"VBROADCASTF64X2Zrm(b?)",
"VBROADCASTF64X4Zrm(b?)",
"VBROADCASTI32X2Z256rm(b?)",
"VBROADCASTI32X2Zrm(b?)",
"VBROADCASTI32X4Z256rm(b?)",
"VBROADCASTI32X4Zrm(b?)",
"VBROADCASTI32X8Zrm(b?)",
"VBROADCASTI64X2Z256rm(b?)",
"VBROADCASTI64X2Zrm(b?)",
"VBROADCASTI64X4Zrm(b?)",
"VBROADCASTSD(Z|Z256)rm(b?)",
"VBROADCASTSS(Z|Z256)rm(b?)",
"VINSERTF32X4(Z|Z256)rm(b?)",
"VINSERTF32X8Zrm(b?)",
"VINSERTF64X2(Z|Z256)rm(b?)",
"VINSERTF64X4Zrm(b?)",
"VINSERTI32X4(Z|Z256)rm(b?)",
"VINSERTI32X8Zrm(b?)",
"VINSERTI64X2(Z|Z256)rm(b?)",
"VINSERTI64X4Zrm(b?)",
"VMOVAPD(Z|Z256)rm(b?)",
"VMOVAPS(Z|Z256)rm(b?)",
"VMOVDDUP(Z|Z256)rm(b?)",
"VMOVDQA32(Z|Z256)rm(b?)",
"VMOVDQA64(Z|Z256)rm(b?)",
"VMOVDQU16(Z|Z256)rm(b?)",
"VMOVDQU32(Z|Z256)rm(b?)",
"VMOVDQU64(Z|Z256)rm(b?)",
"VMOVDQU8(Z|Z256)rm(b?)",
"VMOVSHDUP(Z|Z256)rm(b?)",
"VMOVSLDUP(Z|Z256)rm(b?)",
"VMOVUPD(Z|Z256)rm(b?)",
"VMOVUPS(Z|Z256)rm(b?)",
"VPADD(B|D|Q|W)Yrm",
"VPADD(B|D|Q|W)(Z|Z256)rm(b?)",
"VPBLENDM(B|D|Q|W)(Z|Z256)rm(b?)",
"VPBROADCASTD(Z|Z256)rm(b?)",
"VPBROADCASTQ(Z|Z256)rm(b?)",
"VPSUB(B|D|Q|W)Yrm",
"VPSUB(B|D|Q|W)(Z|Z256)rm(b?)",
"VPTERNLOGD(Z|Z256)rm(b?)i",
"VPTERNLOGQ(Z|Z256)rm(b?)i")>;
def SKXWriteResGroup123 : SchedWriteRes<[SKXPort0,SKXPort5,SKXPort23]> {
let Latency = 8;
let NumMicroOps = 4;
let ReleaseAtCycles = [1,2,1];
}
def: InstRW<[SKXWriteResGroup123], (instregex "MMX_PH(ADD|SUB)SWrm")>;
def SKXWriteResGroup127 : SchedWriteRes<[SKXPort23,SKXPort237,SKXPort06,SKXPort0156]> {
let Latency = 8;
let NumMicroOps = 5;
let ReleaseAtCycles = [1,1,1,2];
}
def: InstRW<[SKXWriteResGroup127], (instregex "RCL(8|16|32|64)m(1|i)",
"RCR(8|16|32|64)m(1|i)")>;
def SKXWriteResGroup128 : SchedWriteRes<[SKXPort4,SKXPort23,SKXPort237,SKXPort06]> {
let Latency = 8;
let NumMicroOps = 6;
let ReleaseAtCycles = [1,1,1,3];
}
def: InstRW<[SKXWriteResGroup128], (instregex "ROL(8|16|32|64)mCL",
"ROR(8|16|32|64)mCL",
"SAR(8|16|32|64)mCL",
"SHL(8|16|32|64)mCL",
"SHR(8|16|32|64)mCL")>;
def SKXWriteResGroup130 : SchedWriteRes<[SKXPort4,SKXPort23,SKXPort237,SKXPort06,SKXPort0156]> {
let Latency = 8;
let NumMicroOps = 6;
let ReleaseAtCycles = [1,1,1,2,1];
}
def: SchedAlias<WriteADCRMW, SKXWriteResGroup130>;
def SKXWriteResGroup131 : SchedWriteRes<[SKXPort0,SKXPort4,SKXPort5,SKXPort237,SKXPort0156]> {
let Latency = 8;
let NumMicroOps = 8;
let ReleaseAtCycles = [1,2,1,2,2];
}
def: InstRW<[SKXWriteResGroup131], (instrs VPSCATTERQDZ128mr,
VPSCATTERQDZ256mr,
VSCATTERQPSZ128mr,
VSCATTERQPSZ256mr)>;
def SKXWriteResGroup132 : SchedWriteRes<[SKXPort0,SKXPort4,SKXPort5,SKXPort237,SKXPort0156]> {
let Latency = 8;
let NumMicroOps = 12;
let ReleaseAtCycles = [1,4,1,4,2];
}
def: InstRW<[SKXWriteResGroup132], (instrs VPSCATTERDDZ128mr,
VSCATTERDPSZ128mr)>;
def SKXWriteResGroup133 : SchedWriteRes<[SKXPort0,SKXPort4,SKXPort5,SKXPort237,SKXPort0156]> {
let Latency = 8;
let NumMicroOps = 20;
let ReleaseAtCycles = [1,8,1,8,2];
}
def: InstRW<[SKXWriteResGroup133], (instrs VPSCATTERDDZ256mr,
VSCATTERDPSZ256mr)>;
def SKXWriteResGroup134 : SchedWriteRes<[SKXPort0,SKXPort4,SKXPort5,SKXPort237,SKXPort0156]> {
let Latency = 8;
let NumMicroOps = 36;
let ReleaseAtCycles = [1,16,1,16,2];
}
def: InstRW<[SKXWriteResGroup134], (instrs VPSCATTERDDZmr)>;
def SKXWriteResGroup135 : SchedWriteRes<[SKXPort0,SKXPort23]> {
let Latency = 9;
let NumMicroOps = 2;
let ReleaseAtCycles = [1,1];
}
def: InstRW<[SKXWriteResGroup135], (instrs MMX_CVTPI2PSrm)>;
def SKXWriteResGroup136 : SchedWriteRes<[SKXPort5,SKXPort23]> {
let Latency = 9;
let NumMicroOps = 2;
let ReleaseAtCycles = [1,1];
}
def: InstRW<[SKXWriteResGroup136], (instrs VPMOVSXBWYrm,
VPMOVSXDQYrm,
VPMOVSXWDYrm,
VPMOVZXWDYrm)>;
def: InstRW<[SKXWriteResGroup136], (instregex "VALIGN(D|Q)Z128rm(b?)i",
"VFPCLASSSDZm(b?)i",
"VFPCLASSSSZm(b?)i",
"(V?)PCMPGTQrm",
"VPERMI2DZ128rm(b?)",
"VPERMI2PDZ128rm(b?)",
"VPERMI2PSZ128rm(b?)",
"VPERMI2QZ128rm(b?)",
"VPERMT2DZ128rm(b?)",
"VPERMT2PDZ128rm(b?)",
"VPERMT2PSZ128rm(b?)",
"VPERMT2QZ128rm(b?)",
"VPMAXSQZ128rm(b?)",
"VPMAXUQZ128rm(b?)",
"VPMINSQZ128rm(b?)",
"VPMINUQZ128rm(b?)")>;
def SKXWriteResGroup136_2 : SchedWriteRes<[SKXPort5,SKXPort23]> {
let Latency = 10;
let NumMicroOps = 2;
let ReleaseAtCycles = [1,1];
}
def: InstRW<[SKXWriteResGroup136_2], (instregex "VCMP(PD|PS)Z128rm(b?)i",
"VCMP(SD|SS)Zrm",
"VFPCLASSPDZ128m(b?)i",
"VFPCLASSPSZ128m(b?)i",
"VPCMPBZ128rm(b?)i",
"VPCMPDZ128rm(b?)i",
"VPCMPEQ(B|D|Q|W)Z128rm(b?)",
"VPCMPGT(B|D|Q|W)Z128rm(b?)",
"VPCMPQZ128rm(b?)i",
"VPCMPU(B|D|Q|W)Z128rm(b?)i",
"VPCMPWZ128rm(b?)i",
"VPTESTMBZ128rm(b?)",
"VPTESTMDZ128rm(b?)",
"VPTESTMQZ128rm(b?)",
"VPTESTMWZ128rm(b?)",
"VPTESTNMBZ128rm(b?)",
"VPTESTNMDZ128rm(b?)",
"VPTESTNMQZ128rm(b?)",
"VPTESTNMWZ128rm(b?)")>;
def SKXWriteResGroup137 : SchedWriteRes<[SKXPort23,SKXPort01]> {
let Latency = 9;
let NumMicroOps = 2;
let ReleaseAtCycles = [1,1];
}
def: InstRW<[SKXWriteResGroup137], (instregex "MMX_CVT(T?)PS2PIrm",
"(V?)CVTPS2PDrm")>;
def SKXWriteResGroup143 : SchedWriteRes<[SKXPort5,SKXPort01,SKXPort23]> {
let Latency = 9;
let NumMicroOps = 4;
let ReleaseAtCycles = [2,1,1];
}
def: InstRW<[SKXWriteResGroup143], (instregex "(V?)PHADDSWrm",
"(V?)PHSUBSWrm")>;
def SKXWriteResGroup146 : SchedWriteRes<[SKXPort1,SKXPort6,SKXPort23,SKXPort0156]> {
let Latency = 9;
let NumMicroOps = 5;
let ReleaseAtCycles = [1,2,1,1];
}
def: InstRW<[SKXWriteResGroup146], (instregex "LAR(16|32|64)rm",
"LSL(16|32|64)rm")>;
def SKXWriteResGroup148 : SchedWriteRes<[SKXPort5,SKXPort23]> {
let Latency = 10;
let NumMicroOps = 2;
let ReleaseAtCycles = [1,1];
}
def: InstRW<[SKXWriteResGroup148], (instrs VPCMPGTQYrm)>;
def: InstRW<[SKXWriteResGroup148], (instregex "(ADD|SUB|SUBR)_F(32|64)m",
"ILD_F(16|32|64)m",
"VALIGND(Z|Z256)rm(b?)i",
"VALIGNQ(Z|Z256)rm(b?)i",
"VPMAXSQ(Z|Z256)rm(b?)",
"VPMAXUQ(Z|Z256)rm(b?)",
"VPMINSQ(Z|Z256)rm(b?)",
"VPMINUQ(Z|Z256)rm(b?)")>;
def SKXWriteResGroup148_2 : SchedWriteRes<[SKXPort5,SKXPort23]> {
let Latency = 11;
let NumMicroOps = 2;
let ReleaseAtCycles = [1,1];
}
def: InstRW<[SKXWriteResGroup148_2], (instregex "VCMPPD(Z|Z256)rm(b?)i",
"VCMPPS(Z|Z256)rm(b?)i",
"VFPCLASSPD(Z|Z256)m(b?)i",
"VFPCLASSPS(Z|Z256)m(b?)i",
"VPCMPB(Z|Z256)rm(b?)i",
"VPCMPD(Z|Z256)rm(b?)i",
"VPCMPEQB(Z|Z256)rm(b?)",
"VPCMPEQD(Z|Z256)rm(b?)",
"VPCMPEQQ(Z|Z256)rm(b?)",
"VPCMPEQW(Z|Z256)rm(b?)",
"VPCMPGTB(Z|Z256)rm(b?)",
"VPCMPGTD(Z|Z256)rm(b?)",
"VPCMPGTQ(Z|Z256)rm(b?)",
"VPCMPGTW(Z|Z256)rm(b?)",
"VPCMPQ(Z|Z256)rm(b?)i",
"VPCMPU(B|D|Q|W)Z256rm(b?)i",
"VPCMPU(B|D|Q|W)Zrm(b?)i",
"VPCMPW(Z|Z256)rm(b?)i",
"VPTESTM(B|D|Q|W)Z256rm(b?)",
"VPTESTM(B|D|Q|W)Zrm(b?)",
"VPTESTNM(B|D|Q|W)Z256rm(b?)",
"VPTESTNM(B|D|Q|W)Zrm(b?)")>;
def SKXWriteResGroup149 : SchedWriteRes<[SKXPort23,SKXPort01]> {
let Latency = 10;
let NumMicroOps = 2;
let ReleaseAtCycles = [1,1];
}
def: InstRW<[SKXWriteResGroup149], (instregex "VCVTDQ2PDZ128rm(b?)",
"VCVTDQ2PSZ128rm(b?)",
"(V?)CVTDQ2PSrm",
"VCVTPD2QQZ128rm(b?)",
"VCVTPD2UQQZ128rm(b?)",
"VCVTPH2PSZ128rm(b?)",
"VCVTPS2DQZ128rm(b?)",
"(V?)CVTPS2DQrm",
"VCVTPS2PDZ128rm(b?)",
"VCVTPS2QQZ128rm(b?)",
"VCVTPS2UDQZ128rm(b?)",
"VCVTPS2UQQZ128rm(b?)",
"VCVTQQ2PDZ128rm(b?)",
"VCVTQQ2PSZ128rm(b?)",
"VCVTSS2SDZrm",
"(V?)CVTSS2SDrm",
"VCVTTPD2QQZ128rm(b?)",
"VCVTTPD2UQQZ128rm(b?)",
"VCVTTPS2DQZ128rm(b?)",
"(V?)CVTTPS2DQrm",
"VCVTTPS2QQZ128rm(b?)",
"VCVTTPS2UDQZ128rm(b?)",
"VCVTTPS2UQQZ128rm(b?)",
"VCVTUDQ2PDZ128rm(b?)",
"VCVTUDQ2PSZ128rm(b?)",
"VCVTUQQ2PDZ128rm(b?)",
"VCVTUQQ2PSZ128rm(b?)")>;
def SKXWriteResGroup151 : SchedWriteRes<[SKXPort5,SKXPort23]> {
let Latency = 10;
let NumMicroOps = 3;
let ReleaseAtCycles = [2,1];
}
def: InstRW<[SKXWriteResGroup151], (instregex "VEXPANDPDZ128rm(b?)",
"VEXPANDPSZ128rm(b?)",
"VPEXPANDDZ128rm(b?)",
"VPEXPANDQZ128rm(b?)")>;
def SKXWriteResGroup154 : SchedWriteRes<[SKXPort5,SKXPort01,SKXPort23]> {
let Latency = 10;
let NumMicroOps = 4;
let ReleaseAtCycles = [2,1,1];
}
def: InstRW<[SKXWriteResGroup154], (instrs VPHADDSWYrm,
VPHSUBSWYrm)>;
def SKXWriteResGroup157 : SchedWriteRes<[SKXPort4,SKXPort6,SKXPort23,SKXPort237,SKXPort06,SKXPort0156]> {
let Latency = 10;
let NumMicroOps = 8;
let ReleaseAtCycles = [1,1,1,1,1,3];
}
def: InstRW<[SKXWriteResGroup157], (instregex "XCHG(8|16|32|64)rm")>;
def SKXWriteResGroup160 : SchedWriteRes<[SKXPort0,SKXPort23]> {
let Latency = 11;
let NumMicroOps = 2;
let ReleaseAtCycles = [1,1];
}
def: InstRW<[SKXWriteResGroup160], (instregex "MUL_F(32|64)m")>;
def SKXWriteResGroup161 : SchedWriteRes<[SKXPort23,SKXPort01]> {
let Latency = 11;
let NumMicroOps = 2;
let ReleaseAtCycles = [1,1];
}
def: InstRW<[SKXWriteResGroup161], (instrs VCVTDQ2PSYrm,
VCVTPS2PDYrm)>;
def: InstRW<[SKXWriteResGroup161], (instregex "VCVTDQ2(PD|PS)(Z|Z256)rm(b?)",
"VCVTPH2PS(Z|Z256)rm(b?)",
"VCVTPS2PD(Z|Z256)rm(b?)",
"VCVTQQ2PD(Z|Z256)rm(b?)",
"VCVTQQ2PSZ256rm(b?)",
"VCVT(T?)PD2QQ(Z|Z256)rm(b?)",
"VCVT(T?)PD2UQQ(Z|Z256)rm(b?)",
"VCVT(T?)PS2DQYrm",
"VCVT(T?)PS2DQ(Z|Z256)rm(b?)",
"VCVT(T?)PS2QQZ256rm(b?)",
"VCVT(T?)PS2UDQ(Z|Z256)rm(b?)",
"VCVT(T?)PS2UQQZ256rm(b?)",
"VCVTUDQ2(PD|PS)(Z|Z256)rm(b?)",
"VCVTUQQ2PD(Z|Z256)rm(b?)",
"VCVTUQQ2PSZ256rm(b?)")>;
def SKXWriteResGroup162 : SchedWriteRes<[SKXPort5,SKXPort23]> {
let Latency = 11;
let NumMicroOps = 3;
let ReleaseAtCycles = [2,1];
}
def: InstRW<[SKXWriteResGroup162], (instregex "FICOM(P?)(16|32)m",
"VEXPANDPD(Z|Z256)rm(b?)",
"VEXPANDPS(Z|Z256)rm(b?)",
"VPEXPANDD(Z|Z256)rm(b?)",
"VPEXPANDQ(Z|Z256)rm(b?)")>;
def SKXWriteResGroup164 : SchedWriteRes<[SKXPort0,SKXPort5,SKXPort23]> {
let Latency = 11;
let NumMicroOps = 3;
let ReleaseAtCycles = [1,1,1];
}
def: InstRW<[SKXWriteResGroup164], (instregex "(V?)CVTDQ2PDrm")>;
def SKXWriteResGroup166 : SchedWriteRes<[SKXPort5,SKXPort23,SKXPort01]> {
let Latency = 11;
let NumMicroOps = 3;
let ReleaseAtCycles = [1,1,1];
}
def: InstRW<[SKXWriteResGroup166], (instrs CVTPD2DQrm,
CVTTPD2DQrm,
MMX_CVTPD2PIrm,
MMX_CVTTPD2PIrm)>;
def SKXWriteResGroup167 : SchedWriteRes<[SKXPort5,SKXPort23,SKXPort015]> {
let Latency = 11;
let NumMicroOps = 4;
let ReleaseAtCycles = [2,1,1];
}
def: InstRW<[SKXWriteResGroup167], (instregex "VPCONFLICTQZ128rm(b?)")>;
def SKXWriteResGroup169 : SchedWriteRes<[SKXPort1,SKXPort06,SKXPort0156]> {
let Latency = 11;
let NumMicroOps = 7;
let ReleaseAtCycles = [2,3,2];
}
def: InstRW<[SKXWriteResGroup169], (instregex "RCL(16|32|64)rCL",
"RCR(16|32|64)rCL")>;
def SKXWriteResGroup170 : SchedWriteRes<[SKXPort1,SKXPort06,SKXPort15,SKXPort0156]> {
let Latency = 11;
let NumMicroOps = 9;
let ReleaseAtCycles = [1,5,1,2];
}
def: InstRW<[SKXWriteResGroup170], (instrs RCL8rCL)>;
def SKXWriteResGroup171 : SchedWriteRes<[SKXPort06,SKXPort0156]> {
let Latency = 11;
let NumMicroOps = 11;
let ReleaseAtCycles = [2,9];
}
def: InstRW<[SKXWriteResGroup171], (instrs LOOPE, LOOPNE)>;
def SKXWriteResGroup174 : SchedWriteRes<[SKXPort01]> {
let Latency = 15;
let NumMicroOps = 3;
let ReleaseAtCycles = [3];
}
def: InstRW<[SKXWriteResGroup174], (instregex "VPMULLQ(Z128|Z256)rr")>;
def SKXWriteResGroup174z : SchedWriteRes<[SKXPort05]> {
let Latency = 15;
let NumMicroOps = 3;
let ReleaseAtCycles = [3];
}
def: InstRW<[SKXWriteResGroup174z], (instregex "VPMULLQZrr")>;
def SKXWriteResGroup175 : SchedWriteRes<[SKXPort5,SKXPort23]> {
let Latency = 12;
let NumMicroOps = 3;
let ReleaseAtCycles = [2,1];
}
def: InstRW<[SKXWriteResGroup175], (instregex "VPERMWZ128rm(b?)")>;
def SKXWriteResGroup176 : SchedWriteRes<[SKXPort0,SKXPort23,SKXPort01]> {
let Latency = 12;
let NumMicroOps = 3;
let ReleaseAtCycles = [1,1,1];
}
def: InstRW<[SKXWriteResGroup176], (instregex "VCVT(T?)SD2USIZrm(b?)",
"VCVT(T?)SS2USI64Zrm(b?)")>;
def SKXWriteResGroup177 : SchedWriteRes<[SKXPort5,SKXPort23,SKXPort01]> {
let Latency = 12;
let NumMicroOps = 3;
let ReleaseAtCycles = [1,1,1];
}
def: InstRW<[SKXWriteResGroup177], (instregex "VCVT(T?)PS2QQZrm(b?)",
"VCVT(T?)PS2UQQZrm(b?)")>;
def SKXWriteResGroup180 : SchedWriteRes<[SKXPort5,SKXPort23]> {
let Latency = 13;
let NumMicroOps = 3;
let ReleaseAtCycles = [2,1];
}
def: InstRW<[SKXWriteResGroup180], (instregex "(ADD|SUB|SUBR)_FI(16|32)m",
"VPERMWZ256rm(b?)",
"VPERMWZrm(b?)")>;
def SKXWriteResGroup181 : SchedWriteRes<[SKXPort0,SKXPort5,SKXPort23]> {
let Latency = 13;
let NumMicroOps = 3;
let ReleaseAtCycles = [1,1,1];
}
def: InstRW<[SKXWriteResGroup181], (instrs VCVTDQ2PDYrm)>;
def SKXWriteResGroup183 : SchedWriteRes<[SKXPort5,SKXPort23,SKXPort015]> {
let Latency = 13;
let NumMicroOps = 4;
let ReleaseAtCycles = [2,1,1];
}
def: InstRW<[SKXWriteResGroup183], (instregex "VPERMI2WZ128rm(b?)",
"VPERMT2WZ128rm(b?)")>;
def SKXWriteResGroup187 : SchedWriteRes<[SKXPort0,SKXPort5,SKXPort23]> {
let Latency = 14;
let NumMicroOps = 3;
let ReleaseAtCycles = [1,1,1];
}
def: InstRW<[SKXWriteResGroup187], (instregex "MUL_FI(16|32)m")>;
def SKXWriteResGroup188 : SchedWriteRes<[SKXPort5,SKXPort23,SKXPort01]> {
let Latency = 14;
let NumMicroOps = 3;
let ReleaseAtCycles = [1,1,1];
}
def: InstRW<[SKXWriteResGroup188], (instregex "VCVTPD2DQZrm(b?)",
"VCVTPD2UDQZrm(b?)",
"VCVTQQ2PSZrm(b?)",
"VCVTTPD2DQZrm(b?)",
"VCVTTPD2UDQZrm(b?)",
"VCVTUQQ2PSZrm(b?)")>;
def SKXWriteResGroup189 : SchedWriteRes<[SKXPort5,SKXPort23,SKXPort015]> {
let Latency = 14;
let NumMicroOps = 4;
let ReleaseAtCycles = [2,1,1];
}
def: InstRW<[SKXWriteResGroup189], (instregex "VPERMI2WZ256rm(b?)",
"VPERMI2WZrm(b?)",
"VPERMT2WZ256rm(b?)",
"VPERMT2WZrm(b?)")>;
def SKXWriteResGroup190 : SchedWriteRes<[SKXPort1,SKXPort06,SKXPort15,SKXPort0156]> {
let Latency = 14;
let NumMicroOps = 10;
let ReleaseAtCycles = [2,4,1,3];
}
def: InstRW<[SKXWriteResGroup190], (instrs RCR8rCL)>;
def SKXWriteResGroup191 : SchedWriteRes<[SKXPort0]> {
let Latency = 15;
let NumMicroOps = 1;
let ReleaseAtCycles = [1];
}
def: InstRW<[SKXWriteResGroup191], (instregex "DIVR_(FPrST0|FST0r|FrST0)")>;
def SKXWriteResGroup194 : SchedWriteRes<[SKXPort1,SKXPort5,SKXPort01,SKXPort23,SKXPort015]> {
let Latency = 15;
let NumMicroOps = 8;
let ReleaseAtCycles = [1,2,2,1,2];
}
def: InstRW<[SKXWriteResGroup194], (instregex "VPCONFLICTDZ128rm(b?)")>;
def SKXWriteResGroup195 : SchedWriteRes<[SKXPort1,SKXPort23,SKXPort237,SKXPort06,SKXPort15,SKXPort0156]> {
let Latency = 15;
let NumMicroOps = 10;
let ReleaseAtCycles = [1,1,1,5,1,1];
}
def: InstRW<[SKXWriteResGroup195], (instregex "RCL(8|16|32|64)mCL")>;
def SKXWriteResGroup199 : SchedWriteRes<[SKXPort4,SKXPort23,SKXPort237,SKXPort06,SKXPort15,SKXPort0156]> {
let Latency = 16;
let NumMicroOps = 14;
let ReleaseAtCycles = [1,1,1,4,2,5];
}
def: InstRW<[SKXWriteResGroup199], (instrs CMPXCHG8B)>;
def SKXWriteResGroup200 : SchedWriteRes<[SKXPort1, SKXPort05, SKXPort6]> {
let Latency = 12;
let NumMicroOps = 34;
let ReleaseAtCycles = [1, 4, 5];
}
def: InstRW<[SKXWriteResGroup200], (instrs VZEROALL)>;
def SKXWriteResGroup202 : SchedWriteRes<[SKXPort0,SKXPort1,SKXPort5,SKXPort6,SKXPort05,SKXPort0156]> {
let Latency = 17;
let NumMicroOps = 15;
let ReleaseAtCycles = [2,1,2,4,2,4];
}
def: InstRW<[SKXWriteResGroup202], (instrs XCH_F)>;
def SKXWriteResGroup205 : SchedWriteRes<[SKXPort23,SKXPort01]> {
let Latency = 21;
let NumMicroOps = 4;
let ReleaseAtCycles = [1,3];
}
def: InstRW<[SKXWriteResGroup205], (instregex "VPMULLQZ128rm(b?)")>;
def SKXWriteResGroup207 : SchedWriteRes<[SKXPort5,SKXPort6,SKXPort06,SKXPort0156]> {
let Latency = 18;
let NumMicroOps = 8;
let ReleaseAtCycles = [1,1,1,5];
}
def: InstRW<[SKXWriteResGroup207], (instrs CPUID, RDTSC)>;
def SKXWriteResGroup208 : SchedWriteRes<[SKXPort1,SKXPort23,SKXPort237,SKXPort06,SKXPort15,SKXPort0156]> {
let Latency = 18;
let NumMicroOps = 11;
let ReleaseAtCycles = [2,1,1,4,1,2];
}
def: InstRW<[SKXWriteResGroup208], (instregex "RCR(8|16|32|64)mCL")>;
def SKXWriteResGroup211 : SchedWriteRes<[SKXPort23,SKXPort01]> {
let Latency = 22;
let NumMicroOps = 4;
let ReleaseAtCycles = [1,3];
}
def: InstRW<[SKXWriteResGroup211], (instregex "VPMULLQZ256rm(b?)")>;
def SKXWriteResGroup211_1 : SchedWriteRes<[SKXPort23,SKXPort05]> {
let Latency = 22;
let NumMicroOps = 4;
let ReleaseAtCycles = [1,3];
}
def: InstRW<[SKXWriteResGroup211_1], (instregex "VPMULLQZrm(b?)")>;
def SKXWriteResGroup215 : SchedWriteRes<[SKXPort0]> {
let Latency = 20;
let NumMicroOps = 1;
let ReleaseAtCycles = [1];
}
def: InstRW<[SKXWriteResGroup215], (instregex "DIV_(FPrST0|FST0r|FrST0)")>;
def SKXWriteGatherEVEX2 : SchedWriteRes<[SKXPort0,SKXPort23,SKXPort015,SKXPort0156]> {
let Latency = 17;
let NumMicroOps = 5; // 2 uops perform multiple loads
let ReleaseAtCycles = [1,2,1,1];
}
def: InstRW<[SKXWriteGatherEVEX2], (instrs VGATHERQPSZ128rm, VPGATHERQDZ128rm,
VGATHERDPDZ128rm, VPGATHERDQZ128rm,
VGATHERQPDZ128rm, VPGATHERQQZ128rm)>;
def SKXWriteGatherEVEX4 : SchedWriteRes<[SKXPort0,SKXPort23,SKXPort015,SKXPort0156]> {
let Latency = 19;
let NumMicroOps = 5; // 2 uops perform multiple loads
let ReleaseAtCycles = [1,4,1,1];
}
def: InstRW<[SKXWriteGatherEVEX4], (instrs VGATHERQPSZ256rm, VPGATHERQDZ256rm,
VGATHERQPDZ256rm, VPGATHERQQZ256rm,
VGATHERDPSZ128rm, VPGATHERDDZ128rm,
VGATHERDPDZ256rm, VPGATHERDQZ256rm)>;
def SKXWriteGatherEVEX8 : SchedWriteRes<[SKXPort0,SKXPort23,SKXPort015,SKXPort0156]> {
let Latency = 21;
let NumMicroOps = 5; // 2 uops perform multiple loads
let ReleaseAtCycles = [1,8,1,1];
}
def: InstRW<[SKXWriteGatherEVEX8], (instrs VGATHERDPSZ256rm, VPGATHERDDZ256rm,
VGATHERDPDZrm, VPGATHERDQZrm,
VGATHERQPDZrm, VPGATHERQQZrm,
VGATHERQPSZrm, VPGATHERQDZrm)>;
def SKXWriteGatherEVEX16 : SchedWriteRes<[SKXPort0,SKXPort23,SKXPort015,SKXPort0156]> {
let Latency = 25;
let NumMicroOps = 5; // 2 uops perform multiple loads
let ReleaseAtCycles = [1,16,1,1];
}
def: InstRW<[SKXWriteGatherEVEX16], (instrs VGATHERDPSZrm, VPGATHERDDZrm)>;
def SKXWriteResGroup219 : SchedWriteRes<[SKXPort4,SKXPort5,SKXPort6,SKXPort23,SKXPort237,SKXPort06,SKXPort0156]> {
let Latency = 20;
let NumMicroOps = 8;
let ReleaseAtCycles = [1,1,1,1,1,1,2];
}
def: InstRW<[SKXWriteResGroup219], (instrs INSB, INSL, INSW)>;
def SKXWriteResGroup220 : SchedWriteRes<[SKXPort5,SKXPort6,SKXPort0156]> {
let Latency = 20;
let NumMicroOps = 10;
let ReleaseAtCycles = [1,2,7];
}
def: InstRW<[SKXWriteResGroup220], (instrs MWAITrr)>;
def SKXWriteResGroup223 : SchedWriteRes<[SKXPort0,SKXPort23]> {
let Latency = 22;
let NumMicroOps = 2;
let ReleaseAtCycles = [1,1];
}
def: InstRW<[SKXWriteResGroup223], (instregex "DIV_F(32|64)m")>;
def SKXWriteResGroupVEX2 : SchedWriteRes<[SKXPort0, SKXPort23, SKXPort5, SKXPort015]> {
let Latency = 18;
let NumMicroOps = 5; // 2 uops perform multiple loads
let ReleaseAtCycles = [1,2,1,1];
}
def: InstRW<[SKXWriteResGroupVEX2], (instrs VGATHERDPDrm, VPGATHERDQrm,
VGATHERQPDrm, VPGATHERQQrm,
VGATHERQPSrm, VPGATHERQDrm)>;
def SKXWriteResGroupVEX4 : SchedWriteRes<[SKXPort0, SKXPort23, SKXPort5, SKXPort015]> {
let Latency = 20;
let NumMicroOps = 5; // 2 uops peform multiple loads
let ReleaseAtCycles = [1,4,1,1];
}
def: InstRW<[SKXWriteResGroupVEX4], (instrs VGATHERDPDYrm, VPGATHERDQYrm,
VGATHERDPSrm, VPGATHERDDrm,
VGATHERQPDYrm, VPGATHERQQYrm,
VGATHERQPSYrm, VPGATHERQDYrm)>;
def SKXWriteResGroupVEX8 : SchedWriteRes<[SKXPort0, SKXPort23, SKXPort5, SKXPort015]> {
let Latency = 22;
let NumMicroOps = 5; // 2 uops perform multiple loads
let ReleaseAtCycles = [1,8,1,1];
}
def: InstRW<[SKXWriteResGroupVEX8], (instrs VGATHERDPSYrm, VPGATHERDDYrm)>;
def SKXWriteResGroup225 : SchedWriteRes<[SKXPort5,SKXPort01,SKXPort015]> {
let Latency = 22;
let NumMicroOps = 14;
let ReleaseAtCycles = [5,5,4];
}
def: InstRW<[SKXWriteResGroup225], (instregex "VPCONFLICTDZ128rr",
"VPCONFLICTQZ256rr")>;
def SKXWriteResGroup228 : SchedWriteRes<[SKXPort0,SKXPort4,SKXPort5,SKXPort23,SKXPort237,SKXPort06,SKXPort0156]> {
let Latency = 23;
let NumMicroOps = 19;
let ReleaseAtCycles = [2,1,4,1,1,4,6];
}
def: InstRW<[SKXWriteResGroup228], (instrs CMPXCHG16B)>;
def SKXWriteResGroup233 : SchedWriteRes<[SKXPort0,SKXPort5,SKXPort23]> {
let Latency = 25;
let NumMicroOps = 3;
let ReleaseAtCycles = [1,1,1];
}
def: InstRW<[SKXWriteResGroup233], (instregex "DIV_FI(16|32)m")>;
def SKXWriteResGroup239 : SchedWriteRes<[SKXPort0,SKXPort23]> {
let Latency = 27;
let NumMicroOps = 2;
let ReleaseAtCycles = [1,1];
}
def: InstRW<[SKXWriteResGroup239], (instregex "DIVR_F(32|64)m")>;
def SKXWriteResGroup242 : SchedWriteRes<[SKXPort5,SKXPort01,SKXPort23,SKXPort015]> {
let Latency = 29;
let NumMicroOps = 15;
let ReleaseAtCycles = [5,5,1,4];
}
def: InstRW<[SKXWriteResGroup242], (instregex "VPCONFLICTQZ256rm(b?)")>;
def SKXWriteResGroup243 : SchedWriteRes<[SKXPort0,SKXPort5,SKXPort23]> {
let Latency = 30;
let NumMicroOps = 3;
let ReleaseAtCycles = [1,1,1];
}
def: InstRW<[SKXWriteResGroup243], (instregex "DIVR_FI(16|32)m")>;
def SKXWriteResGroup247 : SchedWriteRes<[SKXPort5,SKXPort6,SKXPort23,SKXPort06,SKXPort0156]> {
let Latency = 35;
let NumMicroOps = 23;
let ReleaseAtCycles = [1,5,3,4,10];
}
def: InstRW<[SKXWriteResGroup247], (instregex "IN(8|16|32)ri",
"IN(8|16|32)rr")>;
def SKXWriteResGroup248 : SchedWriteRes<[SKXPort5,SKXPort6,SKXPort23,SKXPort237,SKXPort06,SKXPort0156]> {
let Latency = 35;
let NumMicroOps = 23;
let ReleaseAtCycles = [1,5,2,1,4,10];
}
def: InstRW<[SKXWriteResGroup248], (instregex "OUT(8|16|32)ir",
"OUT(8|16|32)rr")>;
def SKXWriteResGroup249 : SchedWriteRes<[SKXPort5,SKXPort01,SKXPort015]> {
let Latency = 37;
let NumMicroOps = 21;
let ReleaseAtCycles = [9,7,5];
}
def: InstRW<[SKXWriteResGroup249], (instregex "VPCONFLICTDZ256rr",
"VPCONFLICTQZrr")>;
def SKXWriteResGroup250 : SchedWriteRes<[SKXPort1,SKXPort6,SKXPort23,SKXPort0156]> {
let Latency = 37;
let NumMicroOps = 31;
let ReleaseAtCycles = [1,8,1,21];
}
def: InstRW<[SKXWriteResGroup250], (instregex "XRSTOR(64)?")>;
def SKXWriteResGroup252 : SchedWriteRes<[SKXPort1,SKXPort4,SKXPort5,SKXPort6,SKXPort23,SKXPort237,SKXPort15,SKXPort0156]> {
let Latency = 40;
let NumMicroOps = 18;
let ReleaseAtCycles = [1,1,2,3,1,1,1,8];
}
def: InstRW<[SKXWriteResGroup252], (instrs VMCLEARm)>;
def SKXWriteResGroup253 : SchedWriteRes<[SKXPort4,SKXPort6,SKXPort23,SKXPort237,SKXPort0156]> {
let Latency = 41;
let NumMicroOps = 39;
let ReleaseAtCycles = [1,10,1,1,26];
}
def: InstRW<[SKXWriteResGroup253], (instrs XSAVE64)>;
def SKXWriteResGroup254 : SchedWriteRes<[SKXPort5,SKXPort0156]> {
let Latency = 42;
let NumMicroOps = 22;
let ReleaseAtCycles = [2,20];
}
def: InstRW<[SKXWriteResGroup254], (instrs RDTSCP)>;
def SKXWriteResGroup255 : SchedWriteRes<[SKXPort4,SKXPort6,SKXPort23,SKXPort237,SKXPort0156]> {
let Latency = 42;
let NumMicroOps = 40;
let ReleaseAtCycles = [1,11,1,1,26];
}
def: InstRW<[SKXWriteResGroup255], (instrs XSAVE)>;
def: InstRW<[SKXWriteResGroup255], (instregex "XSAVEC", "XSAVES", "XSAVEOPT")>;
def SKXWriteResGroup256 : SchedWriteRes<[SKXPort5,SKXPort01,SKXPort23,SKXPort015]> {
let Latency = 44;
let NumMicroOps = 22;
let ReleaseAtCycles = [9,7,1,5];
}
def: InstRW<[SKXWriteResGroup256], (instregex "VPCONFLICTDZ256rm(b?)",
"VPCONFLICTQZrm(b?)")>;
def SKXWriteResGroup258 : SchedWriteRes<[SKXPort0,SKXPort23,SKXPort05,SKXPort06,SKXPort0156]> {
let Latency = 62;
let NumMicroOps = 64;
let ReleaseAtCycles = [2,8,5,10,39];
}
def: InstRW<[SKXWriteResGroup258], (instrs FLDENVm)>;
def SKXWriteResGroup259 : SchedWriteRes<[SKXPort0,SKXPort6,SKXPort23,SKXPort05,SKXPort06,SKXPort15,SKXPort0156]> {
let Latency = 63;
let NumMicroOps = 88;
let ReleaseAtCycles = [4,4,31,1,2,1,45];
}
def: InstRW<[SKXWriteResGroup259], (instrs FXRSTOR64)>;
def SKXWriteResGroup260 : SchedWriteRes<[SKXPort0,SKXPort6,SKXPort23,SKXPort05,SKXPort06,SKXPort15,SKXPort0156]> {
let Latency = 63;
let NumMicroOps = 90;
let ReleaseAtCycles = [4,2,33,1,2,1,47];
}
def: InstRW<[SKXWriteResGroup260], (instrs FXRSTOR)>;
def SKXWriteResGroup261 : SchedWriteRes<[SKXPort5,SKXPort01,SKXPort015]> {
let Latency = 67;
let NumMicroOps = 35;
let ReleaseAtCycles = [17,11,7];
}
def: InstRW<[SKXWriteResGroup261], (instregex "VPCONFLICTDZrr")>;
def SKXWriteResGroup262 : SchedWriteRes<[SKXPort5,SKXPort01,SKXPort23,SKXPort015]> {
let Latency = 74;
let NumMicroOps = 36;
let ReleaseAtCycles = [17,11,1,7];
}
def: InstRW<[SKXWriteResGroup262], (instregex "VPCONFLICTDZrm(b?)")>;
def SKXWriteResGroup263 : SchedWriteRes<[SKXPort5,SKXPort05,SKXPort0156]> {
let Latency = 75;
let NumMicroOps = 15;
let ReleaseAtCycles = [6,3,6];
}
def: InstRW<[SKXWriteResGroup263], (instrs FNINIT)>;
def SKXWriteResGroup266 : SchedWriteRes<[SKXPort0,SKXPort1,SKXPort4,SKXPort5,SKXPort6,SKXPort237,SKXPort06,SKXPort0156]> {
let Latency = 106;
let NumMicroOps = 100;
let ReleaseAtCycles = [9,1,11,16,1,11,21,30];
}
def: InstRW<[SKXWriteResGroup266], (instrs FSTENVm)>;
def SKXWriteResGroup267 : SchedWriteRes<[SKXPort6,SKXPort0156]> {
let Latency = 140;
let NumMicroOps = 4;
let ReleaseAtCycles = [1,3];
}
def: InstRW<[SKXWriteResGroup267], (instrs PAUSE)>;
def: InstRW<[WriteZero], (instrs CLC)>;
// Instruction variants handled by the renamer. These might not need execution
// ports in certain conditions.
// See Agner's Fog "The microarchitecture of Intel, AMD and VIA CPUs",
// section "Skylake Pipeline" > "Register allocation and renaming".
// These can be investigated with llvm-exegesis, e.g.
// echo 'pxor %mm0, %mm0' | /tmp/llvm-exegesis -mode=uops -snippets-file=-
// echo 'vxorpd %xmm0, %xmm0, %xmm1' | /tmp/llvm-exegesis -mode=uops -snippets-file=-
def SKXWriteZeroLatency : SchedWriteRes<[]> {
let Latency = 0;
}
def SKXWriteZeroIdiom : SchedWriteVariant<[
SchedVar<MCSchedPredicate<ZeroIdiomPredicate>, [SKXWriteZeroLatency]>,
SchedVar<NoSchedPred, [WriteALU]>
]>;
def : InstRW<[SKXWriteZeroIdiom], (instrs SUB32rr, SUB64rr,
XOR32rr, XOR64rr)>;
def SKXWriteFZeroIdiom : SchedWriteVariant<[
SchedVar<MCSchedPredicate<ZeroIdiomPredicate>, [SKXWriteZeroLatency]>,
SchedVar<NoSchedPred, [WriteFLogic]>
]>;
def : InstRW<[SKXWriteFZeroIdiom], (instrs XORPSrr, VXORPSrr,
XORPDrr, VXORPDrr,
VXORPSZ128rr,
VXORPDZ128rr)>;
def SKXWriteFZeroIdiomY : SchedWriteVariant<[
SchedVar<MCSchedPredicate<ZeroIdiomPredicate>, [SKXWriteZeroLatency]>,
SchedVar<NoSchedPred, [WriteFLogicY]>
]>;
def : InstRW<[SKXWriteFZeroIdiomY], (instrs VXORPSYrr, VXORPDYrr,
VXORPSZ256rr, VXORPDZ256rr)>;
def SKXWriteFZeroIdiomZ : SchedWriteVariant<[
SchedVar<MCSchedPredicate<ZeroIdiomPredicate>, [SKXWriteZeroLatency]>,
SchedVar<NoSchedPred, [WriteFLogicZ]>
]>;
def : InstRW<[SKXWriteFZeroIdiomZ], (instrs VXORPSZrr, VXORPDZrr)>;
def SKXWriteVZeroIdiomLogicX : SchedWriteVariant<[
SchedVar<MCSchedPredicate<ZeroIdiomPredicate>, [SKXWriteZeroLatency]>,
SchedVar<NoSchedPred, [WriteVecLogicX]>
]>;
def : InstRW<[SKXWriteVZeroIdiomLogicX], (instrs PXORrr, VPXORrr,
VPXORDZ128rr, VPXORQZ128rr)>;
def SKXWriteVZeroIdiomLogicY : SchedWriteVariant<[
SchedVar<MCSchedPredicate<ZeroIdiomPredicate>, [SKXWriteZeroLatency]>,
SchedVar<NoSchedPred, [WriteVecLogicY]>
]>;
def : InstRW<[SKXWriteVZeroIdiomLogicY], (instrs VPXORYrr,
VPXORDZ256rr, VPXORQZ256rr)>;
def SKXWriteVZeroIdiomLogicZ : SchedWriteVariant<[
SchedVar<MCSchedPredicate<ZeroIdiomPredicate>, [SKXWriteZeroLatency]>,
SchedVar<NoSchedPred, [WriteVecLogicZ]>
]>;
def : InstRW<[SKXWriteVZeroIdiomLogicZ], (instrs VPXORDZrr, VPXORQZrr)>;
def SKXWriteVZeroIdiomALUX : SchedWriteVariant<[
SchedVar<MCSchedPredicate<ZeroIdiomPredicate>, [SKXWriteZeroLatency]>,
SchedVar<NoSchedPred, [WriteVecALUX]>
]>;
def : InstRW<[SKXWriteVZeroIdiomALUX], (instrs PCMPGTBrr, VPCMPGTBrr,
PCMPGTDrr, VPCMPGTDrr,
PCMPGTWrr, VPCMPGTWrr)>;
def SKXWriteVZeroIdiomALUY : SchedWriteVariant<[
SchedVar<MCSchedPredicate<ZeroIdiomPredicate>, [SKXWriteZeroLatency]>,
SchedVar<NoSchedPred, [WriteVecALUY]>
]>;
def : InstRW<[SKXWriteVZeroIdiomALUY], (instrs VPCMPGTBYrr,
VPCMPGTDYrr,
VPCMPGTWYrr)>;
def SKXWritePSUB : SchedWriteRes<[SKXPort015]> {
let Latency = 1;
let NumMicroOps = 1;
let ReleaseAtCycles = [1];
}
def SKXWriteVZeroIdiomPSUB : SchedWriteVariant<[
SchedVar<MCSchedPredicate<ZeroIdiomPredicate>, [SKXWriteZeroLatency]>,
SchedVar<NoSchedPred, [SKXWritePSUB]>
]>;
def : InstRW<[SKXWriteVZeroIdiomPSUB], (instrs PSUBBrr, VPSUBBrr, VPSUBBZ128rr,
PSUBDrr, VPSUBDrr, VPSUBDZ128rr,
PSUBQrr, VPSUBQrr, VPSUBQZ128rr,
PSUBWrr, VPSUBWrr, VPSUBWZ128rr,
VPSUBBYrr, VPSUBBZ256rr,
VPSUBDYrr, VPSUBDZ256rr,
VPSUBQYrr, VPSUBQZ256rr,
VPSUBWYrr, VPSUBWZ256rr,
VPSUBBZrr,
VPSUBDZrr,
VPSUBQZrr,
VPSUBWZrr)>;
def SKXWritePCMPGTQ : SchedWriteRes<[SKXPort5]> {
let Latency = 3;
let NumMicroOps = 1;
let ReleaseAtCycles = [1];
}
def SKXWriteVZeroIdiomPCMPGTQ : SchedWriteVariant<[
SchedVar<MCSchedPredicate<ZeroIdiomPredicate>, [SKXWriteZeroLatency]>,
SchedVar<NoSchedPred, [SKXWritePCMPGTQ]>
]>;
def : InstRW<[SKXWriteVZeroIdiomPCMPGTQ], (instrs PCMPGTQrr, VPCMPGTQrr,
VPCMPGTQYrr)>;
// CMOVs that use both Z and C flag require an extra uop.
def SKXWriteCMOVA_CMOVBErr : SchedWriteRes<[SKXPort06]> {
let Latency = 2;
let ReleaseAtCycles = [2];
let NumMicroOps = 2;
}
def SKXWriteCMOVA_CMOVBErm : SchedWriteRes<[SKXPort23,SKXPort06]> {
let Latency = 7;
let ReleaseAtCycles = [1,2];
let NumMicroOps = 3;
}
def SKXCMOVA_CMOVBErr : SchedWriteVariant<[
SchedVar<MCSchedPredicate<IsCMOVArr_Or_CMOVBErr>, [SKXWriteCMOVA_CMOVBErr]>,
SchedVar<NoSchedPred, [WriteCMOV]>
]>;
def SKXCMOVA_CMOVBErm : SchedWriteVariant<[
SchedVar<MCSchedPredicate<IsCMOVArm_Or_CMOVBErm>, [SKXWriteCMOVA_CMOVBErm]>,
SchedVar<NoSchedPred, [WriteCMOV.Folded]>
]>;
def : InstRW<[SKXCMOVA_CMOVBErr], (instrs CMOV16rr, CMOV32rr, CMOV64rr)>;
def : InstRW<[SKXCMOVA_CMOVBErm], (instrs CMOV16rm, CMOV32rm, CMOV64rm)>;
// SETCCs that use both Z and C flag require an extra uop.
def SKXWriteSETA_SETBEr : SchedWriteRes<[SKXPort06]> {
let Latency = 2;
let ReleaseAtCycles = [2];
let NumMicroOps = 2;
}
def SKXWriteSETA_SETBEm : SchedWriteRes<[SKXPort4,SKXPort237,SKXPort06]> {
let Latency = 3;
let ReleaseAtCycles = [1,1,2];
let NumMicroOps = 4;
}
def SKXSETA_SETBErr : SchedWriteVariant<[
SchedVar<MCSchedPredicate<IsSETAr_Or_SETBEr>, [SKXWriteSETA_SETBEr]>,
SchedVar<NoSchedPred, [WriteSETCC]>
]>;
def SKXSETA_SETBErm : SchedWriteVariant<[
SchedVar<MCSchedPredicate<IsSETAm_Or_SETBEm>, [SKXWriteSETA_SETBEm]>,
SchedVar<NoSchedPred, [WriteSETCCStore]>
]>;
def : InstRW<[SKXSETA_SETBErr], (instrs SETCCr)>;
def : InstRW<[SKXSETA_SETBErm], (instrs SETCCm)>;
///////////////////////////////////////////////////////////////////////////////
// Dependency breaking instructions.
///////////////////////////////////////////////////////////////////////////////
def : IsZeroIdiomFunction<[
// GPR Zero-idioms.
DepBreakingClass<[ SUB32rr, SUB64rr, XOR32rr, XOR64rr ], ZeroIdiomPredicate>,
// SSE Zero-idioms.
DepBreakingClass<[
// fp variants.
XORPSrr, XORPDrr,
// int variants.
PXORrr,
PSUBBrr, PSUBWrr, PSUBDrr, PSUBQrr,
PCMPGTBrr, PCMPGTDrr, PCMPGTQrr, PCMPGTWrr
], ZeroIdiomPredicate>,
// AVX Zero-idioms.
DepBreakingClass<[
// xmm fp variants.
VXORPSrr, VXORPDrr,
// xmm int variants.
VPXORrr,
VPSUBBrr, VPSUBWrr, VPSUBDrr, VPSUBQrr,
VPCMPGTBrr, VPCMPGTWrr, VPCMPGTDrr, VPCMPGTQrr,
// ymm variants.
VXORPSYrr, VXORPDYrr, VPXORYrr,
VPSUBBYrr, VPSUBWYrr, VPSUBDYrr, VPSUBQYrr,
VPCMPGTBYrr, VPCMPGTWYrr, VPCMPGTDYrr, VPCMPGTQYrr,
// zmm variants.
VXORPSZrr, VXORPDZrr, VPXORDZrr, VPXORQZrr,
VXORPSZ128rr, VXORPDZ128rr, VPXORDZ128rr, VPXORQZ128rr,
VXORPSZ256rr, VXORPDZ256rr, VPXORDZ256rr, VPXORQZ256rr,
VPSUBBZrr, VPSUBWZrr, VPSUBDZrr, VPSUBQZrr,
VPSUBBZ128rr, VPSUBWZ128rr, VPSUBDZ128rr, VPSUBQZ128rr,
VPSUBBZ256rr, VPSUBWZ256rr, VPSUBDZ256rr, VPSUBQZ256rr,
], ZeroIdiomPredicate>,
]>;
} // SchedModel