From a166de9d43be7cefd0ed28471f7766a2b4c7fcc9 Mon Sep 17 00:00:00 2001 From: Giorgio Marletta <57546456+giorgio-marletta@users.noreply.github.com> Date: Tue, 17 Feb 2026 18:38:12 +0100 Subject: [PATCH] [llvm-mca] Missing data dependencies due to constant registers not being cached (#177990) Commit 385f59f modified MCA InstrBuilder methods `populateReads` and `populateWrites` to discard information about constant registers and avoid creating non-existent dependency chains. However, information about reads and writes is cached based on instruction descriptions. In this way, if the same instruction is encountered multiple times with (before) and without (after) a constant register, the cached entry will not contain information about that specific register, resulting in missing data dependencies. This patch moves the check of constant registers to `createInstruction`, so that cached entries will also take into account constant registers and, if necessary, they will be discarded later when creating the instruction. --- llvm/lib/MCA/InstrBuilder.cpp | 13 +-- .../AArch64/HiSilicon/tsv110-forwarding.s | 24 ++--- .../llvm-mca/AArch64/Neoverse/V3-forwarding.s | 35 +++---- .../AArch64/Neoverse/V3AE-forwarding.s | 35 +++---- .../tools/llvm-mca/RISCV/Andes45/zero-reg.s | 97 +++++++++++++++++++ 5 files changed, 148 insertions(+), 56 deletions(-) create mode 100644 llvm/test/tools/llvm-mca/RISCV/Andes45/zero-reg.s diff --git a/llvm/lib/MCA/InstrBuilder.cpp b/llvm/lib/MCA/InstrBuilder.cpp index 32b8cf5c38b4..72184ccd84ca 100644 --- a/llvm/lib/MCA/InstrBuilder.cpp +++ b/llvm/lib/MCA/InstrBuilder.cpp @@ -338,10 +338,6 @@ void InstrBuilder::populateWrites(InstrDesc &ID, const MCInst &MCI, OptionalDefIdx = CurrentDef++; continue; } - if (MRI.isConstant(Op.getReg())) { - CurrentDef++; - continue; - } WriteDescriptor &Write = ID.Writes[CurrentDef]; Write.OpIndex = i; @@ -420,8 +416,6 @@ void InstrBuilder::populateWrites(InstrDesc &ID, const MCInst &MCI, const MCOperand &Op = MCI.getOperand(OpIndex); if (!Op.isReg()) continue; - if (MRI.isConstant(Op.getReg())) - continue; WriteDescriptor &Write = ID.Writes[CurrentDef]; Write.OpIndex = OpIndex; @@ -457,8 +451,6 @@ void InstrBuilder::populateReads(InstrDesc &ID, const MCInst &MCI, const MCOperand &Op = MCI.getOperand(OpIndex); if (!Op.isReg()) continue; - if (MRI.isConstant(Op.getReg())) - continue; ReadDescriptor &Read = ID.Reads[CurrentUse]; Read.OpIndex = OpIndex; @@ -476,8 +468,6 @@ void InstrBuilder::populateReads(InstrDesc &ID, const MCInst &MCI, Read.OpIndex = ~I; Read.UseIndex = NumExplicitUses + I; Read.RegisterID = MCDesc.implicit_uses()[I]; - if (MRI.isConstant(Read.RegisterID)) - continue; Read.SchedClassID = SchedClassID; LLVM_DEBUG(dbgs() << "\t\t[Use][I] OpIdx=" << ~Read.OpIndex << ", UseIndex=" << Read.UseIndex << ", RegisterID=" @@ -742,6 +732,9 @@ InstrBuilder::createInstruction(const MCInst &MCI, // Skip non-register operands. if (!Op.isReg()) continue; + // Skip constant register operands. + if (MRI.isConstant(Op.getReg())) + continue; RegID = Op.getReg().id(); } else { // Implicit read. diff --git a/llvm/test/tools/llvm-mca/AArch64/HiSilicon/tsv110-forwarding.s b/llvm/test/tools/llvm-mca/AArch64/HiSilicon/tsv110-forwarding.s index b29697ea7972..207822b61839 100644 --- a/llvm/test/tools/llvm-mca/AArch64/HiSilicon/tsv110-forwarding.s +++ b/llvm/test/tools/llvm-mca/AArch64/HiSilicon/tsv110-forwarding.s @@ -52,22 +52,22 @@ madd x0, x0, x0, x0 # CHECK: Iterations: 1 # CHECK-NEXT: Instructions: 4 -# CHECK-NEXT: Total Cycles: 12 +# CHECK-NEXT: Total Cycles: 13 # CHECK-NEXT: Total uOps: 4 # CHECK: Dispatch Width: 4 -# CHECK-NEXT: uOps Per Cycle: 0.33 -# CHECK-NEXT: IPC: 0.33 +# CHECK-NEXT: uOps Per Cycle: 0.31 +# CHECK-NEXT: IPC: 0.31 # CHECK-NEXT: Block RThroughput: 4.0 # CHECK: Timeline view: -# CHECK-NEXT: 01 +# CHECK-NEXT: 012 # CHECK-NEXT: Index 0123456789 -# CHECK: [0,0] DeeeeER .. mul x0, x1, x2 -# CHECK-NEXT: [0,1] D==eeeeER .. madd x0, x1, x2, x0 -# CHECK-NEXT: [0,2] D=eeeeE-R .. madd x0, x1, x2, x0 -# CHECK-NEXT: [0,3] D=====eeeeER madd x0, x0, x0, x0 +# CHECK: [0,0] DeeeeER . . mul x0, x1, x2 +# CHECK-NEXT: [0,1] D=eeeeER . . madd x0, x1, x2, x0 +# CHECK-NEXT: [0,2] D==eeeeER . . madd x0, x1, x2, x0 +# CHECK-NEXT: [0,3] D======eeeeER madd x0, x0, x0, x0 # CHECK: Average Wait times (based on the timeline view): # CHECK-NEXT: [0]: Executions @@ -77,7 +77,7 @@ madd x0, x0, x0, x0 # CHECK: [0] [1] [2] [3] # CHECK-NEXT: 0. 1 1.0 1.0 0.0 mul x0, x1, x2 -# CHECK-NEXT: 1. 1 3.0 3.0 0.0 madd x0, x1, x2, x0 -# CHECK-NEXT: 2. 1 2.0 2.0 1.0 madd x0, x1, x2, x0 -# CHECK-NEXT: 3. 1 6.0 0.0 0.0 madd x0, x0, x0, x0 -# CHECK-NEXT: 1 3.0 1.5 0.3 +# CHECK-NEXT: 1. 1 2.0 0.0 0.0 madd x0, x1, x2, x0 +# CHECK-NEXT: 2. 1 3.0 0.0 0.0 madd x0, x1, x2, x0 +# CHECK-NEXT: 3. 1 7.0 0.0 0.0 madd x0, x0, x0, x0 +# CHECK-NEXT: 1 3.3 0.3 0.0 diff --git a/llvm/test/tools/llvm-mca/AArch64/Neoverse/V3-forwarding.s b/llvm/test/tools/llvm-mca/AArch64/Neoverse/V3-forwarding.s index 1214b15ea8b9..e14e436a5067 100644 --- a/llvm/test/tools/llvm-mca/AArch64/Neoverse/V3-forwarding.s +++ b/llvm/test/tools/llvm-mca/AArch64/Neoverse/V3-forwarding.s @@ -312,25 +312,26 @@ bfmlalb z0.s, z0.h, z1.h # CHECK: Iterations: 100 # CHECK-NEXT: Instructions: 400 -# CHECK-NEXT: Total Cycles: 205 +# CHECK-NEXT: Total Cycles: 803 # CHECK-NEXT: Total uOps: 400 # CHECK: Dispatch Width: 10 -# CHECK-NEXT: uOps Per Cycle: 1.95 -# CHECK-NEXT: IPC: 1.95 +# CHECK-NEXT: uOps Per Cycle: 0.50 +# CHECK-NEXT: IPC: 0.50 # CHECK-NEXT: Block RThroughput: 2.0 # CHECK: Timeline view: -# CHECK-NEXT: Index 012345678 +# CHECK-NEXT: 012345678 +# CHECK-NEXT: Index 0123456789 -# CHECK: [0,0] DeeER. . mul x0, x0, x0 -# CHECK-NEXT: [0,1] D=eeER . madd x0, x1, x2, x0 -# CHECK-NEXT: [0,2] DeeE-R . madd x0, x1, x2, x0 -# CHECK-NEXT: [0,3] D==eeER . madd x0, x0, x0, x0 -# CHECK-NEXT: [1,0] D====eeER mul x0, x0, x0 -# CHECK-NEXT: [1,1] D==eeE--R madd x0, x1, x2, x0 -# CHECK-NEXT: [1,2] D=eeE---R madd x0, x1, x2, x0 -# CHECK-NEXT: [1,3] D===eeE-R madd x0, x0, x0, x0 +# CHECK: [0,0] DeeER. . . . mul x0, x0, x0 +# CHECK-NEXT: [0,1] D==eeER . . . madd x0, x1, x2, x0 +# CHECK-NEXT: [0,2] D====eeER . . . madd x0, x1, x2, x0 +# CHECK-NEXT: [0,3] D======eeER . . madd x0, x0, x0, x0 +# CHECK-NEXT: [1,0] D========eeER . . mul x0, x0, x0 +# CHECK-NEXT: [1,1] D==========eeER. . madd x0, x1, x2, x0 +# CHECK-NEXT: [1,2] D============eeER . madd x0, x1, x2, x0 +# CHECK-NEXT: [1,3] D==============eeER madd x0, x0, x0, x0 # CHECK: Average Wait times (based on the timeline view): # CHECK-NEXT: [0]: Executions @@ -339,11 +340,11 @@ bfmlalb z0.s, z0.h, z1.h # CHECK-NEXT: [3]: Average time elapsed from WB until retire stage # CHECK: [0] [1] [2] [3] -# CHECK-NEXT: 0. 2 3.0 0.5 0.0 mul x0, x0, x0 -# CHECK-NEXT: 1. 2 2.5 2.5 1.0 madd x0, x1, x2, x0 -# CHECK-NEXT: 2. 2 1.5 1.5 2.0 madd x0, x1, x2, x0 -# CHECK-NEXT: 3. 2 3.5 0.0 0.5 madd x0, x0, x0, x0 -# CHECK-NEXT: 2 2.6 1.1 0.9 +# CHECK-NEXT: 0. 2 5.0 0.5 0.0 mul x0, x0, x0 +# CHECK-NEXT: 1. 2 7.0 0.0 0.0 madd x0, x1, x2, x0 +# CHECK-NEXT: 2. 2 9.0 0.0 0.0 madd x0, x1, x2, x0 +# CHECK-NEXT: 3. 2 11.0 0.0 0.0 madd x0, x0, x0, x0 +# CHECK-NEXT: 2 8.0 0.1 0.0 # CHECK: [1] Code Region - smaddl diff --git a/llvm/test/tools/llvm-mca/AArch64/Neoverse/V3AE-forwarding.s b/llvm/test/tools/llvm-mca/AArch64/Neoverse/V3AE-forwarding.s index 1e97750215d5..9f4fc4701f28 100644 --- a/llvm/test/tools/llvm-mca/AArch64/Neoverse/V3AE-forwarding.s +++ b/llvm/test/tools/llvm-mca/AArch64/Neoverse/V3AE-forwarding.s @@ -312,25 +312,26 @@ bfmlalb z0.s, z0.h, z1.h # CHECK: Iterations: 100 # CHECK-NEXT: Instructions: 400 -# CHECK-NEXT: Total Cycles: 205 +# CHECK-NEXT: Total Cycles: 803 # CHECK-NEXT: Total uOps: 400 # CHECK: Dispatch Width: 10 -# CHECK-NEXT: uOps Per Cycle: 1.95 -# CHECK-NEXT: IPC: 1.95 +# CHECK-NEXT: uOps Per Cycle: 0.50 +# CHECK-NEXT: IPC: 0.50 # CHECK-NEXT: Block RThroughput: 2.0 # CHECK: Timeline view: -# CHECK-NEXT: Index 012345678 +# CHECK-NEXT: 012345678 +# CHECK-NEXT: Index 0123456789 -# CHECK: [0,0] DeeER. . mul x0, x0, x0 -# CHECK-NEXT: [0,1] D=eeER . madd x0, x1, x2, x0 -# CHECK-NEXT: [0,2] DeeE-R . madd x0, x1, x2, x0 -# CHECK-NEXT: [0,3] D==eeER . madd x0, x0, x0, x0 -# CHECK-NEXT: [1,0] D====eeER mul x0, x0, x0 -# CHECK-NEXT: [1,1] D==eeE--R madd x0, x1, x2, x0 -# CHECK-NEXT: [1,2] D=eeE---R madd x0, x1, x2, x0 -# CHECK-NEXT: [1,3] D===eeE-R madd x0, x0, x0, x0 +# CHECK: [0,0] DeeER. . . . mul x0, x0, x0 +# CHECK-NEXT: [0,1] D==eeER . . . madd x0, x1, x2, x0 +# CHECK-NEXT: [0,2] D====eeER . . . madd x0, x1, x2, x0 +# CHECK-NEXT: [0,3] D======eeER . . madd x0, x0, x0, x0 +# CHECK-NEXT: [1,0] D========eeER . . mul x0, x0, x0 +# CHECK-NEXT: [1,1] D==========eeER. . madd x0, x1, x2, x0 +# CHECK-NEXT: [1,2] D============eeER . madd x0, x1, x2, x0 +# CHECK-NEXT: [1,3] D==============eeER madd x0, x0, x0, x0 # CHECK: Average Wait times (based on the timeline view): # CHECK-NEXT: [0]: Executions @@ -339,11 +340,11 @@ bfmlalb z0.s, z0.h, z1.h # CHECK-NEXT: [3]: Average time elapsed from WB until retire stage # CHECK: [0] [1] [2] [3] -# CHECK-NEXT: 0. 2 3.0 0.5 0.0 mul x0, x0, x0 -# CHECK-NEXT: 1. 2 2.5 2.5 1.0 madd x0, x1, x2, x0 -# CHECK-NEXT: 2. 2 1.5 1.5 2.0 madd x0, x1, x2, x0 -# CHECK-NEXT: 3. 2 3.5 0.0 0.5 madd x0, x0, x0, x0 -# CHECK-NEXT: 2 2.6 1.1 0.9 +# CHECK-NEXT: 0. 2 5.0 0.5 0.0 mul x0, x0, x0 +# CHECK-NEXT: 1. 2 7.0 0.0 0.0 madd x0, x1, x2, x0 +# CHECK-NEXT: 2. 2 9.0 0.0 0.0 madd x0, x1, x2, x0 +# CHECK-NEXT: 3. 2 11.0 0.0 0.0 madd x0, x0, x0, x0 +# CHECK-NEXT: 2 8.0 0.1 0.0 # CHECK: [1] Code Region - smaddl diff --git a/llvm/test/tools/llvm-mca/RISCV/Andes45/zero-reg.s b/llvm/test/tools/llvm-mca/RISCV/Andes45/zero-reg.s new file mode 100644 index 000000000000..249fe8fe893a --- /dev/null +++ b/llvm/test/tools/llvm-mca/RISCV/Andes45/zero-reg.s @@ -0,0 +1,97 @@ +# NOTE: Assertions have been autogenerated by utils/update_mca_test_checks.py +# RUN: llvm-mca -mtriple=riscv64 -mcpu=andes-nx45 -timeline -iterations=1 < %s | FileCheck %s + +# zero (x0) as input + +sb zero, 0x0(a0) +lbu a2, -0x1(a1) +sb a2, 0x0(a1) + +# zero (x0) as output + +add zero, a3, a4 +add a5, a3, a4 +sb a5, 0x0(a1) + +# CHECK: Iterations: 1 +# CHECK-NEXT: Instructions: 6 +# CHECK-NEXT: Total Cycles: 10 +# CHECK-NEXT: Total uOps: 6 + +# CHECK: Dispatch Width: 2 +# CHECK-NEXT: uOps Per Cycle: 0.60 +# CHECK-NEXT: IPC: 0.60 +# CHECK-NEXT: Block RThroughput: 4.0 + +# CHECK: Instruction Info: +# CHECK-NEXT: [1]: #uOps +# CHECK-NEXT: [2]: Latency +# CHECK-NEXT: [3]: RThroughput +# CHECK-NEXT: [4]: MayLoad +# CHECK-NEXT: [5]: MayStore +# CHECK-NEXT: [6]: HasSideEffects (U) + +# CHECK: [1] [2] [3] [4] [5] [6] Instructions: +# CHECK-NEXT: 1 1 1.00 * sb zero, 0(a0) +# CHECK-NEXT: 1 5 1.00 * lbu a2, -1(a1) +# CHECK-NEXT: 1 1 1.00 * sb a2, 0(a1) +# CHECK-NEXT: 1 1 0.50 add zero, a3, a4 +# CHECK-NEXT: 1 1 0.50 add a5, a3, a4 +# CHECK-NEXT: 1 1 1.00 * sb a5, 0(a1) + +# CHECK: Resources: +# CHECK-NEXT: [0.0] - Andes45ALU +# CHECK-NEXT: [0.1] - Andes45ALU +# CHECK-NEXT: [1] - Andes45CSR +# CHECK-NEXT: [2] - Andes45FDIV +# CHECK-NEXT: [3] - Andes45FMAC +# CHECK-NEXT: [4] - Andes45FMISC +# CHECK-NEXT: [5] - Andes45FMV +# CHECK-NEXT: [6] - Andes45LSU +# CHECK-NEXT: [7] - Andes45MDU +# CHECK-NEXT: [8] - Andes45VALU +# CHECK-NEXT: [9] - Andes45VDIV +# CHECK-NEXT: [10] - Andes45VFDIV +# CHECK-NEXT: [11] - Andes45VFMIS +# CHECK-NEXT: [12] - Andes45VLSU +# CHECK-NEXT: [13] - Andes45VMAC +# CHECK-NEXT: [14] - Andes45VMASK +# CHECK-NEXT: [15] - Andes45VPERMUT + +# CHECK: Resource pressure per iteration: +# CHECK-NEXT: [0.0] [0.1] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12] [13] [14] [15] +# CHECK-NEXT: 1.00 1.00 - - - - - 4.00 - - - - - - - - - + +# CHECK: Resource pressure by instruction: +# CHECK-NEXT: [0.0] [0.1] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12] [13] [14] [15] Instructions: +# CHECK-NEXT: - - - - - - - 1.00 - - - - - - - - - sb zero, 0(a0) +# CHECK-NEXT: - - - - - - - 1.00 - - - - - - - - - lbu a2, -1(a1) +# CHECK-NEXT: - - - - - - - 1.00 - - - - - - - - - sb a2, 0(a1) +# CHECK-NEXT: - 1.00 - - - - - - - - - - - - - - - add zero, a3, a4 +# CHECK-NEXT: 1.00 - - - - - - - - - - - - - - - - add a5, a3, a4 +# CHECK-NEXT: - - - - - - - 1.00 - - - - - - - - - sb a5, 0(a1) + +# CHECK: Timeline view: +# CHECK-NEXT: Index 0123456789 + +# CHECK: [0,0] DE . . sb zero, 0(a0) +# CHECK-NEXT: [0,1] .DeeeeE . lbu a2, -1(a1) +# CHECK-NEXT: [0,2] . .DE . sb a2, 0(a1) +# CHECK-NEXT: [0,3] . .DE . add zero, a3, a4 +# CHECK-NEXT: [0,4] . . DE. add a5, a3, a4 +# CHECK-NEXT: [0,5] . . DE sb a5, 0(a1) + +# CHECK: Average Wait times (based on the timeline view): +# CHECK-NEXT: [0]: Executions +# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue +# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready +# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage + +# CHECK: [0] [1] [2] [3] +# CHECK-NEXT: 0. 1 0.0 0.0 0.0 sb zero, 0(a0) +# CHECK-NEXT: 1. 1 0.0 0.0 0.0 lbu a2, -1(a1) +# CHECK-NEXT: 2. 1 0.0 0.0 0.0 sb a2, 0(a1) +# CHECK-NEXT: 3. 1 0.0 0.0 0.0 add zero, a3, a4 +# CHECK-NEXT: 4. 1 0.0 0.0 0.0 add a5, a3, a4 +# CHECK-NEXT: 5. 1 0.0 0.0 0.0 sb a5, 0(a1) +# CHECK-NEXT: 1 0.0 0.0 0.0