[SelectionDAG] Add space-optimized forms of OPC_EmitNode/OPC_MorphNodeTo (#73502)

If there is only one bit set in EmitNodeInfo, then we can encode it
implicitly to save one byte.

Overall this reduces the llc binary size with all in-tree targets by
about 168K.
This commit is contained in:
Wang Pengcheng 2023-12-12 17:45:32 +08:00 committed by GitHub
parent e3f4fa9834
commit 0d5f1cc4d0
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
6 changed files with 133 additions and 26 deletions

View File

@ -244,11 +244,31 @@ public:
OPC_EmitNode0,
OPC_EmitNode1,
OPC_EmitNode2,
// Space-optimized forms that implicitly encode EmitNodeInfo.
OPC_EmitNode0None,
OPC_EmitNode1None,
OPC_EmitNode2None,
OPC_EmitNode0Chain,
OPC_EmitNode1Chain,
OPC_EmitNode2Chain,
OPC_MorphNodeTo,
// Space-optimized forms that implicitly encode number of result VTs.
OPC_MorphNodeTo0,
OPC_MorphNodeTo1,
OPC_MorphNodeTo2,
// Space-optimized forms that implicitly encode EmitNodeInfo.
OPC_MorphNodeTo0None,
OPC_MorphNodeTo1None,
OPC_MorphNodeTo2None,
OPC_MorphNodeTo0Chain,
OPC_MorphNodeTo1Chain,
OPC_MorphNodeTo2Chain,
OPC_MorphNodeTo0GlueInput,
OPC_MorphNodeTo1GlueInput,
OPC_MorphNodeTo2GlueInput,
OPC_MorphNodeTo0GlueOutput,
OPC_MorphNodeTo1GlueOutput,
OPC_MorphNodeTo2GlueOutput,
OPC_CompleteMatch,
// Contains offset in table for pattern being selected
OPC_Coverage

View File

@ -3757,20 +3757,77 @@ void SelectionDAGISel::SelectCodeCommon(SDNode *NodeToMatch,
continue;
}
case OPC_EmitNode: case OPC_MorphNodeTo:
case OPC_EmitNode0: case OPC_EmitNode1: case OPC_EmitNode2:
case OPC_MorphNodeTo0: case OPC_MorphNodeTo1: case OPC_MorphNodeTo2: {
case OPC_EmitNode:
case OPC_EmitNode0:
case OPC_EmitNode1:
case OPC_EmitNode2:
case OPC_EmitNode0None:
case OPC_EmitNode1None:
case OPC_EmitNode2None:
case OPC_EmitNode0Chain:
case OPC_EmitNode1Chain:
case OPC_EmitNode2Chain:
case OPC_MorphNodeTo:
case OPC_MorphNodeTo0:
case OPC_MorphNodeTo1:
case OPC_MorphNodeTo2:
case OPC_MorphNodeTo0None:
case OPC_MorphNodeTo1None:
case OPC_MorphNodeTo2None:
case OPC_MorphNodeTo0Chain:
case OPC_MorphNodeTo1Chain:
case OPC_MorphNodeTo2Chain:
case OPC_MorphNodeTo0GlueInput:
case OPC_MorphNodeTo1GlueInput:
case OPC_MorphNodeTo2GlueInput:
case OPC_MorphNodeTo0GlueOutput:
case OPC_MorphNodeTo1GlueOutput:
case OPC_MorphNodeTo2GlueOutput: {
uint16_t TargetOpc = MatcherTable[MatcherIndex++];
TargetOpc |= static_cast<uint16_t>(MatcherTable[MatcherIndex++]) << 8;
unsigned EmitNodeInfo = MatcherTable[MatcherIndex++];
unsigned EmitNodeInfo;
if (Opcode >= OPC_EmitNode0None && Opcode <= OPC_EmitNode2Chain) {
if (Opcode >= OPC_EmitNode0Chain && Opcode <= OPC_EmitNode2Chain)
EmitNodeInfo = OPFL_Chain;
else
EmitNodeInfo = OPFL_None;
} else if (Opcode >= OPC_MorphNodeTo0None &&
Opcode <= OPC_MorphNodeTo2GlueOutput) {
if (Opcode >= OPC_MorphNodeTo0Chain && Opcode <= OPC_MorphNodeTo2Chain)
EmitNodeInfo = OPFL_Chain;
else if (Opcode >= OPC_MorphNodeTo0GlueInput &&
Opcode <= OPC_MorphNodeTo2GlueInput)
EmitNodeInfo = OPFL_GlueInput;
else if (Opcode >= OPC_MorphNodeTo0GlueOutput &&
Opcode <= OPC_MorphNodeTo2GlueOutput)
EmitNodeInfo = OPFL_GlueOutput;
else
EmitNodeInfo = OPFL_None;
} else
EmitNodeInfo = MatcherTable[MatcherIndex++];
// Get the result VT list.
unsigned NumVTs;
// If this is one of the compressed forms, get the number of VTs based
// on the Opcode. Otherwise read the next byte from the table.
if (Opcode >= OPC_MorphNodeTo0 && Opcode <= OPC_MorphNodeTo2)
NumVTs = Opcode - OPC_MorphNodeTo0;
else if (Opcode >= OPC_MorphNodeTo0None && Opcode <= OPC_MorphNodeTo2None)
NumVTs = Opcode - OPC_MorphNodeTo0None;
else if (Opcode >= OPC_MorphNodeTo0Chain &&
Opcode <= OPC_MorphNodeTo2Chain)
NumVTs = Opcode - OPC_MorphNodeTo0Chain;
else if (Opcode >= OPC_MorphNodeTo0GlueInput &&
Opcode <= OPC_MorphNodeTo2GlueInput)
NumVTs = Opcode - OPC_MorphNodeTo0GlueInput;
else if (Opcode >= OPC_MorphNodeTo0GlueOutput &&
Opcode <= OPC_MorphNodeTo2GlueOutput)
NumVTs = Opcode - OPC_MorphNodeTo0GlueOutput;
else if (Opcode >= OPC_EmitNode0 && Opcode <= OPC_EmitNode2)
NumVTs = Opcode - OPC_EmitNode0;
else if (Opcode >= OPC_EmitNode0None && Opcode <= OPC_EmitNode2None)
NumVTs = Opcode - OPC_EmitNode0None;
else if (Opcode >= OPC_EmitNode0Chain && Opcode <= OPC_EmitNode2Chain)
NumVTs = Opcode - OPC_EmitNode0Chain;
else
NumVTs = MatcherTable[MatcherIndex++];
SmallVector<EVT, 4> VTs;
@ -3843,8 +3900,9 @@ void SelectionDAGISel::SelectCodeCommon(SDNode *NodeToMatch,
// Create the node.
MachineSDNode *Res = nullptr;
bool IsMorphNodeTo = Opcode == OPC_MorphNodeTo ||
(Opcode >= OPC_MorphNodeTo0 && Opcode <= OPC_MorphNodeTo2);
bool IsMorphNodeTo =
Opcode == OPC_MorphNodeTo ||
(Opcode >= OPC_MorphNodeTo0 && Opcode <= OPC_MorphNodeTo2GlueOutput);
if (!IsMorphNodeTo) {
// If this is a normal EmitNode command, just create the new node and
// add the results to the RecordedNodes list.

View File

@ -77,20 +77,20 @@ def MulIRRPat : Pat<(mul i32:$x, i32:$y), (MulIRR Reg:$x, Reg:$y)>;
// ADD-NEXT: OPC_RecordChild0
// ADD-NEXT: OPC_RecordChild1
// ADD-NEXT: OPC_EmitInteger32, 0
// ADD-NEXT: OPC_MorphNodeTo1, TARGET_VAL(::AddRRI)
// ADD-NEXT: OPC_MorphNodeTo1None, TARGET_VAL(::AddRRI)
// ADDINT: SwitchOpcode{{.*}}TARGET_VAL(ISD::INTRINSIC_WO_CHAIN)
// ADDINT-NEXT: OPC_CheckChild0Integer
// ADDINT-NEXT: OPC_RecordChild1
// ADDINT-NEXT: OPC_RecordChild2
// ADDINT-NEXT: OPC_EmitInteger32, 2
// ADDINT-NEXT: OPC_MorphNodeTo1, TARGET_VAL(::AddRRI)
// ADDINT-NEXT: OPC_MorphNodeTo1None, TARGET_VAL(::AddRRI)
// SUB: SwitchOpcode{{.*}}TARGET_VAL(ISD::SUB)
// SUB-NEXT: OPC_RecordChild0
// SUB-NEXT: OPC_RecordChild1
// SUB-NEXT: OPC_EmitInteger32, 0
// SUB-NEXT: OPC_MorphNodeTo1, TARGET_VAL(::SubRRI)
// SUB-NEXT: OPC_MorphNodeTo1None, TARGET_VAL(::SubRRI)
// MULINT: SwitchOpcode{{.*}}TARGET_VAL(ISD::INTRINSIC_W_CHAIN)
// MULINT-NEXT: OPC_RecordNode
@ -99,10 +99,10 @@ def MulIRRPat : Pat<(mul i32:$x, i32:$y), (MulIRR Reg:$x, Reg:$y)>;
// MULINT-NEXT: OPC_RecordChild3
// MULINT-NEXT: OPC_RecordChild4
// MULINT-NEXT: OPC_EmitMergeInputChains
// MULINT-NEXT: OPC_MorphNodeTo1, TARGET_VAL(::MulRRI)
// MULINT-NEXT: OPC_MorphNodeTo1Chain, TARGET_VAL(::MulRRI)
// MUL: SwitchOpcode{{.*}}TARGET_VAL(ISD::MUL)
// MUL-NEXT: OPC_EmitInteger32, 0
// MUL-NEXT: OPC_RecordChild0
// MUL-NEXT: OPC_RecordChild1
// MUL-NEXT: OPC_MorphNodeTo1, TARGET_VAL(::MulRRI)
// MUL-NEXT: OPC_MorphNodeTo1Chain, TARGET_VAL(::MulRRI)

View File

@ -23,17 +23,17 @@ def GPRAbove127 : RegisterClass<"TestTarget", [i32], 32,
// CHECK: OPC_CheckOpcode, TARGET_VAL(ISD::ADD),
// CHECK-NEXT: OPC_RecordChild0, // #0 = $src
// CHECK-NEXT: OPC_Scope, 13, /*->19*/ // 2 children in Scope
// CHECK-NEXT: OPC_Scope, 12, /*->18*/ // 2 children in Scope
// CHECK-NEXT: OPC_CheckChild1Integer, 0,
// CHECK-NEXT: OPC_EmitInteger32, 0|128,2/*256*/,
// CHECK-NEXT: OPC_MorphNodeTo1, TARGET_VAL(TargetOpcode::COPY_TO_REGCLASS), 0,
// CHECK-NEXT: OPC_MorphNodeTo1None, TARGET_VAL(TargetOpcode::COPY_TO_REGCLASS),
// CHECK-NEXT: MVT::i32, 2/*#Ops*/, 1, 0,
def : Pat<(i32 (add i32:$src, (i32 0))),
(COPY_TO_REGCLASS GPRAbove127, GPR0:$src)>;
// CHECK: OPC_CheckChild1Integer, 2,
// CHECK-NEXT: OPC_EmitStringInteger32, TestNamespace::GPR127RegClassID,
// CHECK-NEXT: OPC_MorphNodeTo1, TARGET_VAL(TargetOpcode::COPY_TO_REGCLASS), 0,
// CHECK-NEXT: OPC_MorphNodeTo1None, TARGET_VAL(TargetOpcode::COPY_TO_REGCLASS),
// CHECK-NEXT: MVT::i32, 2/*#Ops*/, 1, 0,
def : Pat<(i32 (add i32:$src, (i32 1))),
(COPY_TO_REGCLASS GPR127, GPR0:$src)>;

View File

@ -12,7 +12,7 @@ def REG : Register<"REG">;
def GPR : RegisterClass<"TestTarget", [i32], 32, (add REG)>;
// CHECK-LABEL: OPC_CheckOpcode, TARGET_VAL(ISD::UDIVREM)
// CHECK: OPC_EmitNode2, TARGET_VAL(::INSTR)
// CHECK: OPC_EmitNode2None, TARGET_VAL(::INSTR)
// CHECK: Results = #2 #3
// CHECK: OPC_CompleteMatch, 2, 3, 2
def INSTR : Instruction {

View File

@ -824,21 +824,50 @@ EmitMatcher(const Matcher *N, const unsigned Indent, unsigned CurrentIdx,
}
}
const EmitNodeMatcherCommon *EN = cast<EmitNodeMatcherCommon>(N);
OS << (isa<EmitNodeMatcher>(EN) ? "OPC_EmitNode" : "OPC_MorphNodeTo");
bool IsEmitNode = isa<EmitNodeMatcher>(EN);
OS << (IsEmitNode ? "OPC_EmitNode" : "OPC_MorphNodeTo");
bool CompressVTs = EN->getNumVTs() < 3;
if (CompressVTs)
bool CompressNodeInfo = false;
if (CompressVTs) {
OS << EN->getNumVTs();
if (!EN->hasChain() && !EN->hasInGlue() && !EN->hasOutGlue() &&
!EN->hasMemRefs() && EN->getNumFixedArityOperands() == -1) {
CompressNodeInfo = true;
OS << "None";
} else if (EN->hasChain() && !EN->hasInGlue() && !EN->hasOutGlue() &&
!EN->hasMemRefs() && EN->getNumFixedArityOperands() == -1) {
CompressNodeInfo = true;
OS << "Chain";
} else if (!IsEmitNode && !EN->hasChain() && EN->hasInGlue() &&
!EN->hasOutGlue() && !EN->hasMemRefs() &&
EN->getNumFixedArityOperands() == -1) {
CompressNodeInfo = true;
OS << "GlueInput";
} else if (!IsEmitNode && !EN->hasChain() && !EN->hasInGlue() &&
EN->hasOutGlue() && !EN->hasMemRefs() &&
EN->getNumFixedArityOperands() == -1) {
CompressNodeInfo = true;
OS << "GlueOutput";
}
}
const CodeGenInstruction &CGI = EN->getInstruction();
OS << ", TARGET_VAL(" << CGI.Namespace << "::" << CGI.TheDef->getName()
<< "), 0";
<< ")";
if (EN->hasChain()) OS << "|OPFL_Chain";
if (EN->hasInGlue()) OS << "|OPFL_GlueInput";
if (EN->hasOutGlue()) OS << "|OPFL_GlueOutput";
if (EN->hasMemRefs()) OS << "|OPFL_MemRefs";
if (EN->getNumFixedArityOperands() != -1)
OS << "|OPFL_Variadic" << EN->getNumFixedArityOperands();
if (!CompressNodeInfo) {
OS << ", 0";
if (EN->hasChain())
OS << "|OPFL_Chain";
if (EN->hasInGlue())
OS << "|OPFL_GlueInput";
if (EN->hasOutGlue())
OS << "|OPFL_GlueOutput";
if (EN->hasMemRefs())
OS << "|OPFL_MemRefs";
if (EN->getNumFixedArityOperands() != -1)
OS << "|OPFL_Variadic" << EN->getNumFixedArityOperands();
}
OS << ",\n";
OS.indent(FullIndexWidth + Indent+4);
@ -881,8 +910,8 @@ EmitMatcher(const Matcher *N, const unsigned Indent, unsigned CurrentIdx,
} else
OS << '\n';
return 5 + !CompressVTs + EN->getNumVTs() + NumOperandBytes +
NumCoveredBytes;
return 4 + !CompressVTs + !CompressNodeInfo + EN->getNumVTs() +
NumOperandBytes + NumCoveredBytes;
}
case Matcher::CompleteMatch: {
const CompleteMatchMatcher *CM = cast<CompleteMatchMatcher>(N);