diff --git a/llvm/include/llvm/MCA/Stages/InstructionTables.h b/llvm/include/llvm/MCA/Stages/InstructionTables.h index 7a96e82dd995..8db2a6b0f14d 100644 --- a/llvm/include/llvm/MCA/Stages/InstructionTables.h +++ b/llvm/include/llvm/MCA/Stages/InstructionTables.h @@ -22,6 +22,9 @@ #include "llvm/MCA/Stages/Stage.h" #include "llvm/MCA/Support.h" #include "llvm/Support/Compiler.h" +#include "llvm/Support/Debug.h" + +#define DEBUG_TYPE "llvm-mca" namespace llvm { namespace mca { @@ -35,6 +38,7 @@ public: InstructionTables(const MCSchedModel &Model) : SM(Model), Masks(Model.getNumProcResourceKinds()) { computeProcResourceMasks(Model, Masks); + LLVM_DEBUG(dumpProcResourceMasks(Model, Masks)); } bool hasWorkToComplete() const override { return false; } diff --git a/llvm/include/llvm/MCA/Support.h b/llvm/include/llvm/MCA/Support.h index ce2ac9b4b6cd..6875787833d9 100644 --- a/llvm/include/llvm/MCA/Support.h +++ b/llvm/include/llvm/MCA/Support.h @@ -96,6 +96,11 @@ public: LLVM_ABI void computeProcResourceMasks(const MCSchedModel &SM, MutableArrayRef Masks); +#ifndef NDEBUG +LLVM_ABI void dumpProcResourceMasks(const MCSchedModel &SM, + ArrayRef Masks); +#endif + // Returns the index of the highest bit set. For resource masks, the position of // the highest bit set can be used to construct a resource mask identifier. inline unsigned getResourceStateIndex(uint64_t Mask) { diff --git a/llvm/lib/MCA/HardwareUnits/ResourceManager.cpp b/llvm/lib/MCA/HardwareUnits/ResourceManager.cpp index 0429f7b6970d..7671348ff29d 100644 --- a/llvm/lib/MCA/HardwareUnits/ResourceManager.cpp +++ b/llvm/lib/MCA/HardwareUnits/ResourceManager.cpp @@ -135,6 +135,24 @@ ResourceManager::ResourceManager(const MCSchedModel &SM) Strategies[Index] = getStrategyFor(*Resources[Index]); } + // Print static resource information on debug mode + LLVM_DEBUG({ + dbgs() << "\nProcessor resources:\n"; + // Print InvalidUnit first to be consistent with scheduling model indexing + // schema + const MCProcResourceDesc &InvalidUnit = *SM.getProcResource(0); + dbgs() << "[ 0] - " << format_hex(ProcResID2Mask[0], 16) << " - " + << InvalidUnit.Name << "\n"; + for (unsigned I = 0, E = Resources.size(); I < E; ++I) { + const ResourceState &RS = *Resources[I]; + const unsigned ProcResID = RS.getProcResourceID(); + const MCProcResourceDesc &Desc = *SM.getProcResource(ProcResID); + dbgs() << '[' << format_decimal(ProcResID, 2) << "] " + << " - " << format_hex(RS.getResourceMask(), 16) << " - " + << Desc.Name << " (BufferSize=" << RS.getBufferSize() << ")\n"; + } + }); + for (unsigned I = 1, E = SM.getNumProcResourceKinds(); I < E; ++I) { uint64_t Mask = ProcResID2Mask[I]; unsigned Index = getResourceStateIndex(Mask); diff --git a/llvm/lib/MCA/Support.cpp b/llvm/lib/MCA/Support.cpp index 1f1f2ab8d2c3..45459f7a3a72 100644 --- a/llvm/lib/MCA/Support.cpp +++ b/llvm/lib/MCA/Support.cpp @@ -14,6 +14,7 @@ #include "llvm/MCA/Support.h" #include "llvm/MC/MCSchedule.h" +#include "llvm/Support/Debug.h" #include namespace llvm { @@ -67,17 +68,19 @@ void computeProcResourceMasks(const MCSchedModel &SM, } ProcResourceID++; } - - LLVM_DEBUG({ - dbgs() << "\nProcessor resource masks:\n"; - for (unsigned I = 0, E = SM.getNumProcResourceKinds(); I < E; ++I) { - const MCProcResourceDesc &Desc = *SM.getProcResource(I); - dbgs() << '[' << format_decimal(I, 2) << "] " << " - " - << format_hex(Masks[I], 16) << " - " << Desc.Name << '\n'; - } - }); } +#ifndef NDEBUG +void dumpProcResourceMasks(const MCSchedModel &SM, ArrayRef Masks) { + dbgs() << "\nProcessor resource masks:\n"; + for (unsigned I = 0, E = SM.getNumProcResourceKinds(); I < E; ++I) { + const MCProcResourceDesc &Desc = *SM.getProcResource(I); + dbgs() << '[' << format_decimal(I, 2) << "] " << " - " + << format_hex(Masks[I], 16) << " - " << Desc.Name << '\n'; + } +} +#endif + double computeBlockRThroughput(const MCSchedModel &SM, unsigned DispatchWidth, unsigned NumMicroOps, ArrayRef ProcResourceUsage) { diff --git a/llvm/test/tools/llvm-exegesis/analysis-processor-resource-masks-debug.test b/llvm/test/tools/llvm-exegesis/analysis-processor-resource-masks-debug.test new file mode 100644 index 000000000000..fa97d8fa9cd6 --- /dev/null +++ b/llvm/test/tools/llvm-exegesis/analysis-processor-resource-masks-debug.test @@ -0,0 +1,55 @@ +# REQUIRES: asserts +# REQUIRES: aarch64-registered-target + +# RUN: llvm-exegesis -mode=analysis -benchmarks-file=%s -analysis-inconsistencies-output-file=/dev/null -analysis-numpoints=1 --debug-only=exegesis-sched-class-resolution 2>&1 | FileCheck %s + +## Do not print detailed processor resources information without simulation +# CHECK-NOT: Processor resources: + +## Print mask-only information without simulation +# CHECK-COUNT-1: Processor resource masks: +# CHECK-NEXT: [ 0] - 0x00000000000000 - InvalidUnit +# CHECK-NEXT: [ 1] - 0x00000000000001 - V2UnitB +# CHECK-NEXT: [ 2] - 0x00000000000002 - V2UnitD +# CHECK-NEXT: [ 3] - 0x000000000081e0 - V2UnitF +# CHECK-NEXT: [ 4] - 0x00000000000004 - V2UnitFlg +# CHECK-NEXT: [ 5] - 0x000000000107e0 - V2UnitI +# CHECK-NEXT: [ 6] - 0x00000000020018 - V2UnitL +# CHECK-NEXT: [ 7] - 0x00000000000008 - V2UnitL2 +# CHECK-NEXT: [ 8] - 0x00000000000010 - V2UnitL01 +# CHECK-NEXT: [ 9] - 0x00000000040060 - V2UnitM +# CHECK-NEXT: [10] - 0x00000000000020 - V2UnitM0 +# CHECK-NEXT: [11] - 0x00000000000040 - V2UnitM1 +# CHECK-NEXT: [12] - 0x00000000080180 - V2UnitR +# CHECK-NEXT: [13] - 0x00000000100780 - V2UnitS +# CHECK-NEXT: [14] - 0x00000000000080 - V2UnitS0 +# CHECK-NEXT: [15] - 0x00000000000100 - V2UnitS1 +# CHECK-NEXT: [16] - 0x00000000000200 - V2UnitS2 +# CHECK-NEXT: [17] - 0x00000000000400 - V2UnitS3 +# CHECK-NEXT: [18] - 0x00000000207800 - V2UnitV +# CHECK-NEXT: [19] - 0x00000000000800 - V2UnitV0 +# CHECK-NEXT: [20] - 0x00000000001000 - V2UnitV1 +# CHECK-NEXT: [21] - 0x00000000002000 - V2UnitV2 +# CHECK-NEXT: [22] - 0x00000000004000 - V2UnitV3 +# CHECK-NEXT: [23] - 0x00000000401800 - V2UnitV01 +# CHECK-NEXT: [24] - 0x00000000802800 - V2UnitV02 +# CHECK-NEXT: [25] - 0x00000001005000 - V2UnitV13 +# CHECK-NEXT: [26] - 0x00000002006000 - V2UnitV23 + +--- +mode: latency +key: + instructions: + - 'ADDVv4i16v H16 D16' + config: '' + register_initial_values: + - 'D16=0x0' +cpu_name: neoverse-v2 +llvm_triple: aarch64 +min_instructions: 100 +measurements: + - { key: latency, value: 1.0, per_snippet_value: 1.0 } +error: '' +info: Repeating a single explicitly serial instruction +assembled_snippet: 10E4002F10BA710E10BA710E10BA710E10BA710EC0035FD6 +... diff --git a/llvm/test/tools/llvm-mca/processor-resource-masks-debug.s b/llvm/test/tools/llvm-mca/processor-resource-masks-debug.s new file mode 100644 index 000000000000..afb23fb3fdb9 --- /dev/null +++ b/llvm/test/tools/llvm-mca/processor-resource-masks-debug.s @@ -0,0 +1,29 @@ +# REQUIRES: asserts +# REQUIRES: aarch64-registered-target + +# RUN: llvm-mca < %s -mtriple=aarch64 -mcpu=apple-m1 -debug -instruction-tables 2>&1 | FileCheck %s + +# LLVM-MCA-BEGIN foo +add x2, x0, x1 +# LLVM-MCA-END + +## Do not print detailed processor resources information without simulation +# CHECK-NOT: Processor resources: + +## Print mask-only information without simulation +# CHECK-COUNT-1: Processor resource masks: +# CHECK-NEXT: [ 0] - 0x00000000000000 - InvalidUnit +# CHECK-NEXT: [ 1] - 0x00000000000001 - CyUnitB +# CHECK-NEXT: [ 2] - 0x00000000000002 - CyUnitBR +# CHECK-NEXT: [ 3] - 0x00000000000004 - CyUnitFloatDiv +# CHECK-NEXT: [ 4] - 0x00000000000008 - CyUnitI +# CHECK-NEXT: [ 5] - 0x00000000000010 - CyUnitID +# CHECK-NEXT: [ 6] - 0x00000000000020 - CyUnitIM +# CHECK-NEXT: [ 7] - 0x00000000000040 - CyUnitIS +# CHECK-NEXT: [ 8] - 0x00000000000080 - CyUnitIntDiv +# CHECK-NEXT: [ 9] - 0x00000000000100 - CyUnitLS +# CHECK-NEXT: [10] - 0x00000000000200 - CyUnitV +# CHECK-NEXT: [11] - 0x00000000000400 - CyUnitVC +# CHECK-NEXT: [12] - 0x00000000000800 - CyUnitVD +# CHECK-NEXT: [13] - 0x00000000001000 - CyUnitVM +# CHECK: [0] Code Region - foo diff --git a/llvm/test/tools/llvm-mca/processor-resources-debug.s b/llvm/test/tools/llvm-mca/processor-resources-debug.s new file mode 100644 index 000000000000..632516b2c2b1 --- /dev/null +++ b/llvm/test/tools/llvm-mca/processor-resources-debug.s @@ -0,0 +1,29 @@ +# REQUIRES: asserts +# REQUIRES: aarch64-registered-target + +# RUN: llvm-mca < %s -mtriple=aarch64 -mcpu=apple-m1 -debug 2>&1 | FileCheck %s + +# LLVM-MCA-BEGIN foo +add x2, x0, x1 +# LLVM-MCA-END + +## Print detailed processor resources information on simulation +# CHECK-COUNT-1: Processor resources: +# CHECK-NEXT: [ 0] - 0x00000000000000 - InvalidUnit +# CHECK-NEXT: [ 1] - 0x00000000000001 - CyUnitB (BufferSize=24) +# CHECK-NEXT: [ 2] - 0x00000000000002 - CyUnitBR (BufferSize=-1) +# CHECK-NEXT: [ 3] - 0x00000000000004 - CyUnitFloatDiv (BufferSize=-1) +# CHECK-NEXT: [ 4] - 0x00000000000008 - CyUnitI (BufferSize=48) +# CHECK-NEXT: [ 5] - 0x00000000000010 - CyUnitID (BufferSize=16) +# CHECK-NEXT: [ 6] - 0x00000000000020 - CyUnitIM (BufferSize=32) +# CHECK-NEXT: [ 7] - 0x00000000000040 - CyUnitIS (BufferSize=24) +# CHECK-NEXT: [ 8] - 0x00000000000080 - CyUnitIntDiv (BufferSize=-1) +# CHECK-NEXT: [ 9] - 0x00000000000100 - CyUnitLS (BufferSize=28) +# CHECK-NEXT: [10] - 0x00000000000200 - CyUnitV (BufferSize=48) +# CHECK-NEXT: [11] - 0x00000000000400 - CyUnitVC (BufferSize=16) +# CHECK-NEXT: [12] - 0x00000000000800 - CyUnitVD (BufferSize=16) +# CHECK-NEXT: [13] - 0x00000000001000 - CyUnitVM (BufferSize=32) +# CHECK: [0] Code Region - foo + +## Do not print mask-only information on simulation +# CHECK-NOT: Processor resource masks: diff --git a/llvm/tools/llvm-exegesis/lib/SchedClassResolution.cpp b/llvm/tools/llvm-exegesis/lib/SchedClassResolution.cpp index d6dfb65bf82e..788a03b55c5d 100644 --- a/llvm/tools/llvm-exegesis/lib/SchedClassResolution.cpp +++ b/llvm/tools/llvm-exegesis/lib/SchedClassResolution.cpp @@ -11,9 +11,12 @@ #include "llvm/ADT/STLExtras.h" #include "llvm/MC/MCAsmInfo.h" #include "llvm/MCA/Support.h" +#include "llvm/Support/Debug.h" #include "llvm/Support/FormatVariadic.h" #include +#define DEBUG_TYPE "exegesis-sched-class-resolution" + namespace llvm { namespace exegesis { @@ -55,6 +58,7 @@ getNonRedundantWriteProcRes(const MCSchedClassDesc &SCDesc, // Collect resource masks. SmallVector ProcResourceMasks(NumProcRes); mca::computeProcResourceMasks(SM, ProcResourceMasks); + LLVM_DEBUG(mca::dumpProcResourceMasks(SM, ProcResourceMasks)); // Sort entries by smaller resources for (basic) topological ordering. using ResourceMaskAndEntry = std::pair;