[llvm-mca] Missing data dependencies due to constant registers not being cached (#177990)

Commit 385f59f modified MCA InstrBuilder methods `populateReads` and
`populateWrites` to discard information about constant registers and
avoid creating non-existent dependency chains.

However, information about reads and writes is cached based on
instruction descriptions. In this way, if the same instruction is
encountered multiple times with (before) and without (after) a constant
register, the cached entry will not contain information about that
specific register, resulting in missing data dependencies.

This patch moves the check of constant registers to `createInstruction`,
so that cached entries will also take into account constant registers
and, if necessary, they will be discarded later when creating the
instruction.
This commit is contained in:
Giorgio Marletta 2026-02-17 18:38:12 +01:00 committed by GitHub
parent 1229c23723
commit a166de9d43
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
5 changed files with 148 additions and 56 deletions

View File

@ -338,10 +338,6 @@ void InstrBuilder::populateWrites(InstrDesc &ID, const MCInst &MCI,
OptionalDefIdx = CurrentDef++;
continue;
}
if (MRI.isConstant(Op.getReg())) {
CurrentDef++;
continue;
}
WriteDescriptor &Write = ID.Writes[CurrentDef];
Write.OpIndex = i;
@ -420,8 +416,6 @@ void InstrBuilder::populateWrites(InstrDesc &ID, const MCInst &MCI,
const MCOperand &Op = MCI.getOperand(OpIndex);
if (!Op.isReg())
continue;
if (MRI.isConstant(Op.getReg()))
continue;
WriteDescriptor &Write = ID.Writes[CurrentDef];
Write.OpIndex = OpIndex;
@ -457,8 +451,6 @@ void InstrBuilder::populateReads(InstrDesc &ID, const MCInst &MCI,
const MCOperand &Op = MCI.getOperand(OpIndex);
if (!Op.isReg())
continue;
if (MRI.isConstant(Op.getReg()))
continue;
ReadDescriptor &Read = ID.Reads[CurrentUse];
Read.OpIndex = OpIndex;
@ -476,8 +468,6 @@ void InstrBuilder::populateReads(InstrDesc &ID, const MCInst &MCI,
Read.OpIndex = ~I;
Read.UseIndex = NumExplicitUses + I;
Read.RegisterID = MCDesc.implicit_uses()[I];
if (MRI.isConstant(Read.RegisterID))
continue;
Read.SchedClassID = SchedClassID;
LLVM_DEBUG(dbgs() << "\t\t[Use][I] OpIdx=" << ~Read.OpIndex
<< ", UseIndex=" << Read.UseIndex << ", RegisterID="
@ -742,6 +732,9 @@ InstrBuilder::createInstruction(const MCInst &MCI,
// Skip non-register operands.
if (!Op.isReg())
continue;
// Skip constant register operands.
if (MRI.isConstant(Op.getReg()))
continue;
RegID = Op.getReg().id();
} else {
// Implicit read.

View File

@ -52,22 +52,22 @@ madd x0, x0, x0, x0
# CHECK: Iterations: 1
# CHECK-NEXT: Instructions: 4
# CHECK-NEXT: Total Cycles: 12
# CHECK-NEXT: Total Cycles: 13
# CHECK-NEXT: Total uOps: 4
# CHECK: Dispatch Width: 4
# CHECK-NEXT: uOps Per Cycle: 0.33
# CHECK-NEXT: IPC: 0.33
# CHECK-NEXT: uOps Per Cycle: 0.31
# CHECK-NEXT: IPC: 0.31
# CHECK-NEXT: Block RThroughput: 4.0
# CHECK: Timeline view:
# CHECK-NEXT: 01
# CHECK-NEXT: 012
# CHECK-NEXT: Index 0123456789
# CHECK: [0,0] DeeeeER .. mul x0, x1, x2
# CHECK-NEXT: [0,1] D==eeeeER .. madd x0, x1, x2, x0
# CHECK-NEXT: [0,2] D=eeeeE-R .. madd x0, x1, x2, x0
# CHECK-NEXT: [0,3] D=====eeeeER madd x0, x0, x0, x0
# CHECK: [0,0] DeeeeER . . mul x0, x1, x2
# CHECK-NEXT: [0,1] D=eeeeER . . madd x0, x1, x2, x0
# CHECK-NEXT: [0,2] D==eeeeER . . madd x0, x1, x2, x0
# CHECK-NEXT: [0,3] D======eeeeER madd x0, x0, x0, x0
# CHECK: Average Wait times (based on the timeline view):
# CHECK-NEXT: [0]: Executions
@ -77,7 +77,7 @@ madd x0, x0, x0, x0
# CHECK: [0] [1] [2] [3]
# CHECK-NEXT: 0. 1 1.0 1.0 0.0 mul x0, x1, x2
# CHECK-NEXT: 1. 1 3.0 3.0 0.0 madd x0, x1, x2, x0
# CHECK-NEXT: 2. 1 2.0 2.0 1.0 madd x0, x1, x2, x0
# CHECK-NEXT: 3. 1 6.0 0.0 0.0 madd x0, x0, x0, x0
# CHECK-NEXT: 1 3.0 1.5 0.3 <total>
# CHECK-NEXT: 1. 1 2.0 0.0 0.0 madd x0, x1, x2, x0
# CHECK-NEXT: 2. 1 3.0 0.0 0.0 madd x0, x1, x2, x0
# CHECK-NEXT: 3. 1 7.0 0.0 0.0 madd x0, x0, x0, x0
# CHECK-NEXT: 1 3.3 0.3 0.0 <total>

View File

@ -312,25 +312,26 @@ bfmlalb z0.s, z0.h, z1.h
# CHECK: Iterations: 100
# CHECK-NEXT: Instructions: 400
# CHECK-NEXT: Total Cycles: 205
# CHECK-NEXT: Total Cycles: 803
# CHECK-NEXT: Total uOps: 400
# CHECK: Dispatch Width: 10
# CHECK-NEXT: uOps Per Cycle: 1.95
# CHECK-NEXT: IPC: 1.95
# CHECK-NEXT: uOps Per Cycle: 0.50
# CHECK-NEXT: IPC: 0.50
# CHECK-NEXT: Block RThroughput: 2.0
# CHECK: Timeline view:
# CHECK-NEXT: Index 012345678
# CHECK-NEXT: 012345678
# CHECK-NEXT: Index 0123456789
# CHECK: [0,0] DeeER. . mul x0, x0, x0
# CHECK-NEXT: [0,1] D=eeER . madd x0, x1, x2, x0
# CHECK-NEXT: [0,2] DeeE-R . madd x0, x1, x2, x0
# CHECK-NEXT: [0,3] D==eeER . madd x0, x0, x0, x0
# CHECK-NEXT: [1,0] D====eeER mul x0, x0, x0
# CHECK-NEXT: [1,1] D==eeE--R madd x0, x1, x2, x0
# CHECK-NEXT: [1,2] D=eeE---R madd x0, x1, x2, x0
# CHECK-NEXT: [1,3] D===eeE-R madd x0, x0, x0, x0
# CHECK: [0,0] DeeER. . . . mul x0, x0, x0
# CHECK-NEXT: [0,1] D==eeER . . . madd x0, x1, x2, x0
# CHECK-NEXT: [0,2] D====eeER . . . madd x0, x1, x2, x0
# CHECK-NEXT: [0,3] D======eeER . . madd x0, x0, x0, x0
# CHECK-NEXT: [1,0] D========eeER . . mul x0, x0, x0
# CHECK-NEXT: [1,1] D==========eeER. . madd x0, x1, x2, x0
# CHECK-NEXT: [1,2] D============eeER . madd x0, x1, x2, x0
# CHECK-NEXT: [1,3] D==============eeER madd x0, x0, x0, x0
# CHECK: Average Wait times (based on the timeline view):
# CHECK-NEXT: [0]: Executions
@ -339,11 +340,11 @@ bfmlalb z0.s, z0.h, z1.h
# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
# CHECK: [0] [1] [2] [3]
# CHECK-NEXT: 0. 2 3.0 0.5 0.0 mul x0, x0, x0
# CHECK-NEXT: 1. 2 2.5 2.5 1.0 madd x0, x1, x2, x0
# CHECK-NEXT: 2. 2 1.5 1.5 2.0 madd x0, x1, x2, x0
# CHECK-NEXT: 3. 2 3.5 0.0 0.5 madd x0, x0, x0, x0
# CHECK-NEXT: 2 2.6 1.1 0.9 <total>
# CHECK-NEXT: 0. 2 5.0 0.5 0.0 mul x0, x0, x0
# CHECK-NEXT: 1. 2 7.0 0.0 0.0 madd x0, x1, x2, x0
# CHECK-NEXT: 2. 2 9.0 0.0 0.0 madd x0, x1, x2, x0
# CHECK-NEXT: 3. 2 11.0 0.0 0.0 madd x0, x0, x0, x0
# CHECK-NEXT: 2 8.0 0.1 0.0 <total>
# CHECK: [1] Code Region - smaddl

View File

@ -312,25 +312,26 @@ bfmlalb z0.s, z0.h, z1.h
# CHECK: Iterations: 100
# CHECK-NEXT: Instructions: 400
# CHECK-NEXT: Total Cycles: 205
# CHECK-NEXT: Total Cycles: 803
# CHECK-NEXT: Total uOps: 400
# CHECK: Dispatch Width: 10
# CHECK-NEXT: uOps Per Cycle: 1.95
# CHECK-NEXT: IPC: 1.95
# CHECK-NEXT: uOps Per Cycle: 0.50
# CHECK-NEXT: IPC: 0.50
# CHECK-NEXT: Block RThroughput: 2.0
# CHECK: Timeline view:
# CHECK-NEXT: Index 012345678
# CHECK-NEXT: 012345678
# CHECK-NEXT: Index 0123456789
# CHECK: [0,0] DeeER. . mul x0, x0, x0
# CHECK-NEXT: [0,1] D=eeER . madd x0, x1, x2, x0
# CHECK-NEXT: [0,2] DeeE-R . madd x0, x1, x2, x0
# CHECK-NEXT: [0,3] D==eeER . madd x0, x0, x0, x0
# CHECK-NEXT: [1,0] D====eeER mul x0, x0, x0
# CHECK-NEXT: [1,1] D==eeE--R madd x0, x1, x2, x0
# CHECK-NEXT: [1,2] D=eeE---R madd x0, x1, x2, x0
# CHECK-NEXT: [1,3] D===eeE-R madd x0, x0, x0, x0
# CHECK: [0,0] DeeER. . . . mul x0, x0, x0
# CHECK-NEXT: [0,1] D==eeER . . . madd x0, x1, x2, x0
# CHECK-NEXT: [0,2] D====eeER . . . madd x0, x1, x2, x0
# CHECK-NEXT: [0,3] D======eeER . . madd x0, x0, x0, x0
# CHECK-NEXT: [1,0] D========eeER . . mul x0, x0, x0
# CHECK-NEXT: [1,1] D==========eeER. . madd x0, x1, x2, x0
# CHECK-NEXT: [1,2] D============eeER . madd x0, x1, x2, x0
# CHECK-NEXT: [1,3] D==============eeER madd x0, x0, x0, x0
# CHECK: Average Wait times (based on the timeline view):
# CHECK-NEXT: [0]: Executions
@ -339,11 +340,11 @@ bfmlalb z0.s, z0.h, z1.h
# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
# CHECK: [0] [1] [2] [3]
# CHECK-NEXT: 0. 2 3.0 0.5 0.0 mul x0, x0, x0
# CHECK-NEXT: 1. 2 2.5 2.5 1.0 madd x0, x1, x2, x0
# CHECK-NEXT: 2. 2 1.5 1.5 2.0 madd x0, x1, x2, x0
# CHECK-NEXT: 3. 2 3.5 0.0 0.5 madd x0, x0, x0, x0
# CHECK-NEXT: 2 2.6 1.1 0.9 <total>
# CHECK-NEXT: 0. 2 5.0 0.5 0.0 mul x0, x0, x0
# CHECK-NEXT: 1. 2 7.0 0.0 0.0 madd x0, x1, x2, x0
# CHECK-NEXT: 2. 2 9.0 0.0 0.0 madd x0, x1, x2, x0
# CHECK-NEXT: 3. 2 11.0 0.0 0.0 madd x0, x0, x0, x0
# CHECK-NEXT: 2 8.0 0.1 0.0 <total>
# CHECK: [1] Code Region - smaddl

View File

@ -0,0 +1,97 @@
# NOTE: Assertions have been autogenerated by utils/update_mca_test_checks.py
# RUN: llvm-mca -mtriple=riscv64 -mcpu=andes-nx45 -timeline -iterations=1 < %s | FileCheck %s
# zero (x0) as input
sb zero, 0x0(a0)
lbu a2, -0x1(a1)
sb a2, 0x0(a1)
# zero (x0) as output
add zero, a3, a4
add a5, a3, a4
sb a5, 0x0(a1)
# CHECK: Iterations: 1
# CHECK-NEXT: Instructions: 6
# CHECK-NEXT: Total Cycles: 10
# CHECK-NEXT: Total uOps: 6
# CHECK: Dispatch Width: 2
# CHECK-NEXT: uOps Per Cycle: 0.60
# CHECK-NEXT: IPC: 0.60
# CHECK-NEXT: Block RThroughput: 4.0
# CHECK: Instruction Info:
# CHECK-NEXT: [1]: #uOps
# CHECK-NEXT: [2]: Latency
# CHECK-NEXT: [3]: RThroughput
# CHECK-NEXT: [4]: MayLoad
# CHECK-NEXT: [5]: MayStore
# CHECK-NEXT: [6]: HasSideEffects (U)
# CHECK: [1] [2] [3] [4] [5] [6] Instructions:
# CHECK-NEXT: 1 1 1.00 * sb zero, 0(a0)
# CHECK-NEXT: 1 5 1.00 * lbu a2, -1(a1)
# CHECK-NEXT: 1 1 1.00 * sb a2, 0(a1)
# CHECK-NEXT: 1 1 0.50 add zero, a3, a4
# CHECK-NEXT: 1 1 0.50 add a5, a3, a4
# CHECK-NEXT: 1 1 1.00 * sb a5, 0(a1)
# CHECK: Resources:
# CHECK-NEXT: [0.0] - Andes45ALU
# CHECK-NEXT: [0.1] - Andes45ALU
# CHECK-NEXT: [1] - Andes45CSR
# CHECK-NEXT: [2] - Andes45FDIV
# CHECK-NEXT: [3] - Andes45FMAC
# CHECK-NEXT: [4] - Andes45FMISC
# CHECK-NEXT: [5] - Andes45FMV
# CHECK-NEXT: [6] - Andes45LSU
# CHECK-NEXT: [7] - Andes45MDU
# CHECK-NEXT: [8] - Andes45VALU
# CHECK-NEXT: [9] - Andes45VDIV
# CHECK-NEXT: [10] - Andes45VFDIV
# CHECK-NEXT: [11] - Andes45VFMIS
# CHECK-NEXT: [12] - Andes45VLSU
# CHECK-NEXT: [13] - Andes45VMAC
# CHECK-NEXT: [14] - Andes45VMASK
# CHECK-NEXT: [15] - Andes45VPERMUT
# CHECK: Resource pressure per iteration:
# CHECK-NEXT: [0.0] [0.1] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12] [13] [14] [15]
# CHECK-NEXT: 1.00 1.00 - - - - - 4.00 - - - - - - - - -
# CHECK: Resource pressure by instruction:
# CHECK-NEXT: [0.0] [0.1] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12] [13] [14] [15] Instructions:
# CHECK-NEXT: - - - - - - - 1.00 - - - - - - - - - sb zero, 0(a0)
# CHECK-NEXT: - - - - - - - 1.00 - - - - - - - - - lbu a2, -1(a1)
# CHECK-NEXT: - - - - - - - 1.00 - - - - - - - - - sb a2, 0(a1)
# CHECK-NEXT: - 1.00 - - - - - - - - - - - - - - - add zero, a3, a4
# CHECK-NEXT: 1.00 - - - - - - - - - - - - - - - - add a5, a3, a4
# CHECK-NEXT: - - - - - - - 1.00 - - - - - - - - - sb a5, 0(a1)
# CHECK: Timeline view:
# CHECK-NEXT: Index 0123456789
# CHECK: [0,0] DE . . sb zero, 0(a0)
# CHECK-NEXT: [0,1] .DeeeeE . lbu a2, -1(a1)
# CHECK-NEXT: [0,2] . .DE . sb a2, 0(a1)
# CHECK-NEXT: [0,3] . .DE . add zero, a3, a4
# CHECK-NEXT: [0,4] . . DE. add a5, a3, a4
# CHECK-NEXT: [0,5] . . DE sb a5, 0(a1)
# CHECK: Average Wait times (based on the timeline view):
# CHECK-NEXT: [0]: Executions
# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
# CHECK: [0] [1] [2] [3]
# CHECK-NEXT: 0. 1 0.0 0.0 0.0 sb zero, 0(a0)
# CHECK-NEXT: 1. 1 0.0 0.0 0.0 lbu a2, -1(a1)
# CHECK-NEXT: 2. 1 0.0 0.0 0.0 sb a2, 0(a1)
# CHECK-NEXT: 3. 1 0.0 0.0 0.0 add zero, a3, a4
# CHECK-NEXT: 4. 1 0.0 0.0 0.0 add a5, a3, a4
# CHECK-NEXT: 5. 1 0.0 0.0 0.0 sb a5, 0(a1)
# CHECK-NEXT: 1 0.0 0.0 0.0 <total>