[SelectionDAG] Add space-optimized forms of OPC_EmitNode/OPC_MorphNodeTo (#73502)

If there is only one bit set in EmitNodeInfo, then we can encode it
implicitly to save one byte.

Overall this reduces the llc binary size with all in-tree targets by
about 168K.
This commit is contained in:
Wang Pengcheng 2023-12-12 17:45:32 +08:00 committed by GitHub
parent e3f4fa9834
commit 0d5f1cc4d0
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
6 changed files with 133 additions and 26 deletions

View File

@ -244,11 +244,31 @@ public:
OPC_EmitNode0, OPC_EmitNode0,
OPC_EmitNode1, OPC_EmitNode1,
OPC_EmitNode2, OPC_EmitNode2,
// Space-optimized forms that implicitly encode EmitNodeInfo.
OPC_EmitNode0None,
OPC_EmitNode1None,
OPC_EmitNode2None,
OPC_EmitNode0Chain,
OPC_EmitNode1Chain,
OPC_EmitNode2Chain,
OPC_MorphNodeTo, OPC_MorphNodeTo,
// Space-optimized forms that implicitly encode number of result VTs. // Space-optimized forms that implicitly encode number of result VTs.
OPC_MorphNodeTo0, OPC_MorphNodeTo0,
OPC_MorphNodeTo1, OPC_MorphNodeTo1,
OPC_MorphNodeTo2, OPC_MorphNodeTo2,
// Space-optimized forms that implicitly encode EmitNodeInfo.
OPC_MorphNodeTo0None,
OPC_MorphNodeTo1None,
OPC_MorphNodeTo2None,
OPC_MorphNodeTo0Chain,
OPC_MorphNodeTo1Chain,
OPC_MorphNodeTo2Chain,
OPC_MorphNodeTo0GlueInput,
OPC_MorphNodeTo1GlueInput,
OPC_MorphNodeTo2GlueInput,
OPC_MorphNodeTo0GlueOutput,
OPC_MorphNodeTo1GlueOutput,
OPC_MorphNodeTo2GlueOutput,
OPC_CompleteMatch, OPC_CompleteMatch,
// Contains offset in table for pattern being selected // Contains offset in table for pattern being selected
OPC_Coverage OPC_Coverage

View File

@ -3757,20 +3757,77 @@ void SelectionDAGISel::SelectCodeCommon(SDNode *NodeToMatch,
continue; continue;
} }
case OPC_EmitNode: case OPC_MorphNodeTo: case OPC_EmitNode:
case OPC_EmitNode0: case OPC_EmitNode1: case OPC_EmitNode2: case OPC_EmitNode0:
case OPC_MorphNodeTo0: case OPC_MorphNodeTo1: case OPC_MorphNodeTo2: { case OPC_EmitNode1:
case OPC_EmitNode2:
case OPC_EmitNode0None:
case OPC_EmitNode1None:
case OPC_EmitNode2None:
case OPC_EmitNode0Chain:
case OPC_EmitNode1Chain:
case OPC_EmitNode2Chain:
case OPC_MorphNodeTo:
case OPC_MorphNodeTo0:
case OPC_MorphNodeTo1:
case OPC_MorphNodeTo2:
case OPC_MorphNodeTo0None:
case OPC_MorphNodeTo1None:
case OPC_MorphNodeTo2None:
case OPC_MorphNodeTo0Chain:
case OPC_MorphNodeTo1Chain:
case OPC_MorphNodeTo2Chain:
case OPC_MorphNodeTo0GlueInput:
case OPC_MorphNodeTo1GlueInput:
case OPC_MorphNodeTo2GlueInput:
case OPC_MorphNodeTo0GlueOutput:
case OPC_MorphNodeTo1GlueOutput:
case OPC_MorphNodeTo2GlueOutput: {
uint16_t TargetOpc = MatcherTable[MatcherIndex++]; uint16_t TargetOpc = MatcherTable[MatcherIndex++];
TargetOpc |= static_cast<uint16_t>(MatcherTable[MatcherIndex++]) << 8; TargetOpc |= static_cast<uint16_t>(MatcherTable[MatcherIndex++]) << 8;
unsigned EmitNodeInfo = MatcherTable[MatcherIndex++]; unsigned EmitNodeInfo;
if (Opcode >= OPC_EmitNode0None && Opcode <= OPC_EmitNode2Chain) {
if (Opcode >= OPC_EmitNode0Chain && Opcode <= OPC_EmitNode2Chain)
EmitNodeInfo = OPFL_Chain;
else
EmitNodeInfo = OPFL_None;
} else if (Opcode >= OPC_MorphNodeTo0None &&
Opcode <= OPC_MorphNodeTo2GlueOutput) {
if (Opcode >= OPC_MorphNodeTo0Chain && Opcode <= OPC_MorphNodeTo2Chain)
EmitNodeInfo = OPFL_Chain;
else if (Opcode >= OPC_MorphNodeTo0GlueInput &&
Opcode <= OPC_MorphNodeTo2GlueInput)
EmitNodeInfo = OPFL_GlueInput;
else if (Opcode >= OPC_MorphNodeTo0GlueOutput &&
Opcode <= OPC_MorphNodeTo2GlueOutput)
EmitNodeInfo = OPFL_GlueOutput;
else
EmitNodeInfo = OPFL_None;
} else
EmitNodeInfo = MatcherTable[MatcherIndex++];
// Get the result VT list. // Get the result VT list.
unsigned NumVTs; unsigned NumVTs;
// If this is one of the compressed forms, get the number of VTs based // If this is one of the compressed forms, get the number of VTs based
// on the Opcode. Otherwise read the next byte from the table. // on the Opcode. Otherwise read the next byte from the table.
if (Opcode >= OPC_MorphNodeTo0 && Opcode <= OPC_MorphNodeTo2) if (Opcode >= OPC_MorphNodeTo0 && Opcode <= OPC_MorphNodeTo2)
NumVTs = Opcode - OPC_MorphNodeTo0; NumVTs = Opcode - OPC_MorphNodeTo0;
else if (Opcode >= OPC_MorphNodeTo0None && Opcode <= OPC_MorphNodeTo2None)
NumVTs = Opcode - OPC_MorphNodeTo0None;
else if (Opcode >= OPC_MorphNodeTo0Chain &&
Opcode <= OPC_MorphNodeTo2Chain)
NumVTs = Opcode - OPC_MorphNodeTo0Chain;
else if (Opcode >= OPC_MorphNodeTo0GlueInput &&
Opcode <= OPC_MorphNodeTo2GlueInput)
NumVTs = Opcode - OPC_MorphNodeTo0GlueInput;
else if (Opcode >= OPC_MorphNodeTo0GlueOutput &&
Opcode <= OPC_MorphNodeTo2GlueOutput)
NumVTs = Opcode - OPC_MorphNodeTo0GlueOutput;
else if (Opcode >= OPC_EmitNode0 && Opcode <= OPC_EmitNode2) else if (Opcode >= OPC_EmitNode0 && Opcode <= OPC_EmitNode2)
NumVTs = Opcode - OPC_EmitNode0; NumVTs = Opcode - OPC_EmitNode0;
else if (Opcode >= OPC_EmitNode0None && Opcode <= OPC_EmitNode2None)
NumVTs = Opcode - OPC_EmitNode0None;
else if (Opcode >= OPC_EmitNode0Chain && Opcode <= OPC_EmitNode2Chain)
NumVTs = Opcode - OPC_EmitNode0Chain;
else else
NumVTs = MatcherTable[MatcherIndex++]; NumVTs = MatcherTable[MatcherIndex++];
SmallVector<EVT, 4> VTs; SmallVector<EVT, 4> VTs;
@ -3843,8 +3900,9 @@ void SelectionDAGISel::SelectCodeCommon(SDNode *NodeToMatch,
// Create the node. // Create the node.
MachineSDNode *Res = nullptr; MachineSDNode *Res = nullptr;
bool IsMorphNodeTo = Opcode == OPC_MorphNodeTo || bool IsMorphNodeTo =
(Opcode >= OPC_MorphNodeTo0 && Opcode <= OPC_MorphNodeTo2); Opcode == OPC_MorphNodeTo ||
(Opcode >= OPC_MorphNodeTo0 && Opcode <= OPC_MorphNodeTo2GlueOutput);
if (!IsMorphNodeTo) { if (!IsMorphNodeTo) {
// If this is a normal EmitNode command, just create the new node and // If this is a normal EmitNode command, just create the new node and
// add the results to the RecordedNodes list. // add the results to the RecordedNodes list.

View File

@ -77,20 +77,20 @@ def MulIRRPat : Pat<(mul i32:$x, i32:$y), (MulIRR Reg:$x, Reg:$y)>;
// ADD-NEXT: OPC_RecordChild0 // ADD-NEXT: OPC_RecordChild0
// ADD-NEXT: OPC_RecordChild1 // ADD-NEXT: OPC_RecordChild1
// ADD-NEXT: OPC_EmitInteger32, 0 // ADD-NEXT: OPC_EmitInteger32, 0
// ADD-NEXT: OPC_MorphNodeTo1, TARGET_VAL(::AddRRI) // ADD-NEXT: OPC_MorphNodeTo1None, TARGET_VAL(::AddRRI)
// ADDINT: SwitchOpcode{{.*}}TARGET_VAL(ISD::INTRINSIC_WO_CHAIN) // ADDINT: SwitchOpcode{{.*}}TARGET_VAL(ISD::INTRINSIC_WO_CHAIN)
// ADDINT-NEXT: OPC_CheckChild0Integer // ADDINT-NEXT: OPC_CheckChild0Integer
// ADDINT-NEXT: OPC_RecordChild1 // ADDINT-NEXT: OPC_RecordChild1
// ADDINT-NEXT: OPC_RecordChild2 // ADDINT-NEXT: OPC_RecordChild2
// ADDINT-NEXT: OPC_EmitInteger32, 2 // ADDINT-NEXT: OPC_EmitInteger32, 2
// ADDINT-NEXT: OPC_MorphNodeTo1, TARGET_VAL(::AddRRI) // ADDINT-NEXT: OPC_MorphNodeTo1None, TARGET_VAL(::AddRRI)
// SUB: SwitchOpcode{{.*}}TARGET_VAL(ISD::SUB) // SUB: SwitchOpcode{{.*}}TARGET_VAL(ISD::SUB)
// SUB-NEXT: OPC_RecordChild0 // SUB-NEXT: OPC_RecordChild0
// SUB-NEXT: OPC_RecordChild1 // SUB-NEXT: OPC_RecordChild1
// SUB-NEXT: OPC_EmitInteger32, 0 // SUB-NEXT: OPC_EmitInteger32, 0
// SUB-NEXT: OPC_MorphNodeTo1, TARGET_VAL(::SubRRI) // SUB-NEXT: OPC_MorphNodeTo1None, TARGET_VAL(::SubRRI)
// MULINT: SwitchOpcode{{.*}}TARGET_VAL(ISD::INTRINSIC_W_CHAIN) // MULINT: SwitchOpcode{{.*}}TARGET_VAL(ISD::INTRINSIC_W_CHAIN)
// MULINT-NEXT: OPC_RecordNode // MULINT-NEXT: OPC_RecordNode
@ -99,10 +99,10 @@ def MulIRRPat : Pat<(mul i32:$x, i32:$y), (MulIRR Reg:$x, Reg:$y)>;
// MULINT-NEXT: OPC_RecordChild3 // MULINT-NEXT: OPC_RecordChild3
// MULINT-NEXT: OPC_RecordChild4 // MULINT-NEXT: OPC_RecordChild4
// MULINT-NEXT: OPC_EmitMergeInputChains // MULINT-NEXT: OPC_EmitMergeInputChains
// MULINT-NEXT: OPC_MorphNodeTo1, TARGET_VAL(::MulRRI) // MULINT-NEXT: OPC_MorphNodeTo1Chain, TARGET_VAL(::MulRRI)
// MUL: SwitchOpcode{{.*}}TARGET_VAL(ISD::MUL) // MUL: SwitchOpcode{{.*}}TARGET_VAL(ISD::MUL)
// MUL-NEXT: OPC_EmitInteger32, 0 // MUL-NEXT: OPC_EmitInteger32, 0
// MUL-NEXT: OPC_RecordChild0 // MUL-NEXT: OPC_RecordChild0
// MUL-NEXT: OPC_RecordChild1 // MUL-NEXT: OPC_RecordChild1
// MUL-NEXT: OPC_MorphNodeTo1, TARGET_VAL(::MulRRI) // MUL-NEXT: OPC_MorphNodeTo1Chain, TARGET_VAL(::MulRRI)

View File

@ -23,17 +23,17 @@ def GPRAbove127 : RegisterClass<"TestTarget", [i32], 32,
// CHECK: OPC_CheckOpcode, TARGET_VAL(ISD::ADD), // CHECK: OPC_CheckOpcode, TARGET_VAL(ISD::ADD),
// CHECK-NEXT: OPC_RecordChild0, // #0 = $src // CHECK-NEXT: OPC_RecordChild0, // #0 = $src
// CHECK-NEXT: OPC_Scope, 13, /*->19*/ // 2 children in Scope // CHECK-NEXT: OPC_Scope, 12, /*->18*/ // 2 children in Scope
// CHECK-NEXT: OPC_CheckChild1Integer, 0, // CHECK-NEXT: OPC_CheckChild1Integer, 0,
// CHECK-NEXT: OPC_EmitInteger32, 0|128,2/*256*/, // CHECK-NEXT: OPC_EmitInteger32, 0|128,2/*256*/,
// CHECK-NEXT: OPC_MorphNodeTo1, TARGET_VAL(TargetOpcode::COPY_TO_REGCLASS), 0, // CHECK-NEXT: OPC_MorphNodeTo1None, TARGET_VAL(TargetOpcode::COPY_TO_REGCLASS),
// CHECK-NEXT: MVT::i32, 2/*#Ops*/, 1, 0, // CHECK-NEXT: MVT::i32, 2/*#Ops*/, 1, 0,
def : Pat<(i32 (add i32:$src, (i32 0))), def : Pat<(i32 (add i32:$src, (i32 0))),
(COPY_TO_REGCLASS GPRAbove127, GPR0:$src)>; (COPY_TO_REGCLASS GPRAbove127, GPR0:$src)>;
// CHECK: OPC_CheckChild1Integer, 2, // CHECK: OPC_CheckChild1Integer, 2,
// CHECK-NEXT: OPC_EmitStringInteger32, TestNamespace::GPR127RegClassID, // CHECK-NEXT: OPC_EmitStringInteger32, TestNamespace::GPR127RegClassID,
// CHECK-NEXT: OPC_MorphNodeTo1, TARGET_VAL(TargetOpcode::COPY_TO_REGCLASS), 0, // CHECK-NEXT: OPC_MorphNodeTo1None, TARGET_VAL(TargetOpcode::COPY_TO_REGCLASS),
// CHECK-NEXT: MVT::i32, 2/*#Ops*/, 1, 0, // CHECK-NEXT: MVT::i32, 2/*#Ops*/, 1, 0,
def : Pat<(i32 (add i32:$src, (i32 1))), def : Pat<(i32 (add i32:$src, (i32 1))),
(COPY_TO_REGCLASS GPR127, GPR0:$src)>; (COPY_TO_REGCLASS GPR127, GPR0:$src)>;

View File

@ -12,7 +12,7 @@ def REG : Register<"REG">;
def GPR : RegisterClass<"TestTarget", [i32], 32, (add REG)>; def GPR : RegisterClass<"TestTarget", [i32], 32, (add REG)>;
// CHECK-LABEL: OPC_CheckOpcode, TARGET_VAL(ISD::UDIVREM) // CHECK-LABEL: OPC_CheckOpcode, TARGET_VAL(ISD::UDIVREM)
// CHECK: OPC_EmitNode2, TARGET_VAL(::INSTR) // CHECK: OPC_EmitNode2None, TARGET_VAL(::INSTR)
// CHECK: Results = #2 #3 // CHECK: Results = #2 #3
// CHECK: OPC_CompleteMatch, 2, 3, 2 // CHECK: OPC_CompleteMatch, 2, 3, 2
def INSTR : Instruction { def INSTR : Instruction {

View File

@ -824,21 +824,50 @@ EmitMatcher(const Matcher *N, const unsigned Indent, unsigned CurrentIdx,
} }
} }
const EmitNodeMatcherCommon *EN = cast<EmitNodeMatcherCommon>(N); const EmitNodeMatcherCommon *EN = cast<EmitNodeMatcherCommon>(N);
OS << (isa<EmitNodeMatcher>(EN) ? "OPC_EmitNode" : "OPC_MorphNodeTo"); bool IsEmitNode = isa<EmitNodeMatcher>(EN);
OS << (IsEmitNode ? "OPC_EmitNode" : "OPC_MorphNodeTo");
bool CompressVTs = EN->getNumVTs() < 3; bool CompressVTs = EN->getNumVTs() < 3;
if (CompressVTs) bool CompressNodeInfo = false;
if (CompressVTs) {
OS << EN->getNumVTs(); OS << EN->getNumVTs();
if (!EN->hasChain() && !EN->hasInGlue() && !EN->hasOutGlue() &&
!EN->hasMemRefs() && EN->getNumFixedArityOperands() == -1) {
CompressNodeInfo = true;
OS << "None";
} else if (EN->hasChain() && !EN->hasInGlue() && !EN->hasOutGlue() &&
!EN->hasMemRefs() && EN->getNumFixedArityOperands() == -1) {
CompressNodeInfo = true;
OS << "Chain";
} else if (!IsEmitNode && !EN->hasChain() && EN->hasInGlue() &&
!EN->hasOutGlue() && !EN->hasMemRefs() &&
EN->getNumFixedArityOperands() == -1) {
CompressNodeInfo = true;
OS << "GlueInput";
} else if (!IsEmitNode && !EN->hasChain() && !EN->hasInGlue() &&
EN->hasOutGlue() && !EN->hasMemRefs() &&
EN->getNumFixedArityOperands() == -1) {
CompressNodeInfo = true;
OS << "GlueOutput";
}
}
const CodeGenInstruction &CGI = EN->getInstruction(); const CodeGenInstruction &CGI = EN->getInstruction();
OS << ", TARGET_VAL(" << CGI.Namespace << "::" << CGI.TheDef->getName() OS << ", TARGET_VAL(" << CGI.Namespace << "::" << CGI.TheDef->getName()
<< "), 0"; << ")";
if (EN->hasChain()) OS << "|OPFL_Chain"; if (!CompressNodeInfo) {
if (EN->hasInGlue()) OS << "|OPFL_GlueInput"; OS << ", 0";
if (EN->hasOutGlue()) OS << "|OPFL_GlueOutput"; if (EN->hasChain())
if (EN->hasMemRefs()) OS << "|OPFL_MemRefs"; OS << "|OPFL_Chain";
if (EN->getNumFixedArityOperands() != -1) if (EN->hasInGlue())
OS << "|OPFL_Variadic" << EN->getNumFixedArityOperands(); OS << "|OPFL_GlueInput";
if (EN->hasOutGlue())
OS << "|OPFL_GlueOutput";
if (EN->hasMemRefs())
OS << "|OPFL_MemRefs";
if (EN->getNumFixedArityOperands() != -1)
OS << "|OPFL_Variadic" << EN->getNumFixedArityOperands();
}
OS << ",\n"; OS << ",\n";
OS.indent(FullIndexWidth + Indent+4); OS.indent(FullIndexWidth + Indent+4);
@ -881,8 +910,8 @@ EmitMatcher(const Matcher *N, const unsigned Indent, unsigned CurrentIdx,
} else } else
OS << '\n'; OS << '\n';
return 5 + !CompressVTs + EN->getNumVTs() + NumOperandBytes + return 4 + !CompressVTs + !CompressNodeInfo + EN->getNumVTs() +
NumCoveredBytes; NumOperandBytes + NumCoveredBytes;
} }
case Matcher::CompleteMatch: { case Matcher::CompleteMatch: {
const CompleteMatchMatcher *CM = cast<CompleteMatchMatcher>(N); const CompleteMatchMatcher *CM = cast<CompleteMatchMatcher>(N);