From 15487238c4e5d433f8e2078fdf1534ae6a0e8d10 Mon Sep 17 00:00:00 2001 From: dianqk Date: Sun, 22 Feb 2026 20:20:54 +0800 Subject: [PATCH] [SPARC] Set how many bytes load from or store to stack slot (#182674) Refer from: https://reviews.llvm.org/D44782 The testcase is copied from llvm/test/CodeGen/RISCV/stack-slot-coloring.mir. --- llvm/lib/Target/Sparc/SparcInstrInfo.cpp | 68 +++++-- llvm/lib/Target/Sparc/SparcInstrInfo.h | 8 +- .../CodeGen/SPARC/stack-slot-coloring.mir | 179 ++++++++++++++++++ 3 files changed, 233 insertions(+), 22 deletions(-) create mode 100644 llvm/test/CodeGen/SPARC/stack-slot-coloring.mir diff --git a/llvm/lib/Target/Sparc/SparcInstrInfo.cpp b/llvm/lib/Target/Sparc/SparcInstrInfo.cpp index 34f3118f314e..6dba25af59a7 100644 --- a/llvm/lib/Target/Sparc/SparcInstrInfo.cpp +++ b/llvm/lib/Target/Sparc/SparcInstrInfo.cpp @@ -47,15 +47,31 @@ SparcInstrInfo::SparcInstrInfo(const SparcSubtarget &ST) /// not, return 0. This predicate must return 0 if the instruction has /// any side effects other than loading from the stack slot. Register SparcInstrInfo::isLoadFromStackSlot(const MachineInstr &MI, - int &FrameIndex) const { - if (MI.getOpcode() == SP::LDri || MI.getOpcode() == SP::LDXri || - MI.getOpcode() == SP::LDFri || MI.getOpcode() == SP::LDDFri || - MI.getOpcode() == SP::LDQFri) { - if (MI.getOperand(1).isFI() && MI.getOperand(2).isImm() && - MI.getOperand(2).getImm() == 0) { - FrameIndex = MI.getOperand(1).getIndex(); - return MI.getOperand(0).getReg(); - } + int &FrameIndex, + TypeSize &MemBytes) const { + switch (MI.getOpcode()) { + default: + return 0; + case SP::LDri: + MemBytes = TypeSize::getFixed(4); + break; + case SP::LDXri: + MemBytes = TypeSize::getFixed(8); + break; + case SP::LDFri: + MemBytes = TypeSize::getFixed(4); + break; + case SP::LDDFri: + MemBytes = TypeSize::getFixed(8); + break; + case SP::LDQFri: + MemBytes = TypeSize::getFixed(16); + break; + } + if (MI.getOperand(1).isFI() && MI.getOperand(2).isImm() && + MI.getOperand(2).getImm() == 0) { + FrameIndex = MI.getOperand(1).getIndex(); + return MI.getOperand(0).getReg(); } return 0; } @@ -66,15 +82,31 @@ Register SparcInstrInfo::isLoadFromStackSlot(const MachineInstr &MI, /// not, return 0. This predicate must return 0 if the instruction has /// any side effects other than storing to the stack slot. Register SparcInstrInfo::isStoreToStackSlot(const MachineInstr &MI, - int &FrameIndex) const { - if (MI.getOpcode() == SP::STri || MI.getOpcode() == SP::STXri || - MI.getOpcode() == SP::STFri || MI.getOpcode() == SP::STDFri || - MI.getOpcode() == SP::STQFri) { - if (MI.getOperand(0).isFI() && MI.getOperand(1).isImm() && - MI.getOperand(1).getImm() == 0) { - FrameIndex = MI.getOperand(0).getIndex(); - return MI.getOperand(2).getReg(); - } + int &FrameIndex, + TypeSize &MemBytes) const { + switch (MI.getOpcode()) { + default: + return 0; + case SP::STri: + MemBytes = TypeSize::getFixed(4); + break; + case SP::STXri: + MemBytes = TypeSize::getFixed(8); + break; + case SP::STFri: + MemBytes = TypeSize::getFixed(4); + break; + case SP::STDFri: + MemBytes = TypeSize::getFixed(8); + break; + case SP::STQFri: + MemBytes = TypeSize::getFixed(16); + break; + } + if (MI.getOperand(0).isFI() && MI.getOperand(1).isImm() && + MI.getOperand(1).getImm() == 0) { + FrameIndex = MI.getOperand(0).getIndex(); + return MI.getOperand(2).getReg(); } return 0; } diff --git a/llvm/lib/Target/Sparc/SparcInstrInfo.h b/llvm/lib/Target/Sparc/SparcInstrInfo.h index 033af93777ca..6e90102b8754 100644 --- a/llvm/lib/Target/Sparc/SparcInstrInfo.h +++ b/llvm/lib/Target/Sparc/SparcInstrInfo.h @@ -53,16 +53,16 @@ public: /// the destination along with the FrameIndex of the loaded stack slot. If /// not, return 0. This predicate must return 0 if the instruction has /// any side effects other than loading from the stack slot. - Register isLoadFromStackSlot(const MachineInstr &MI, - int &FrameIndex) const override; + Register isLoadFromStackSlot(const MachineInstr &MI, int &FrameIndex, + TypeSize &MemBytes) const override; /// isStoreToStackSlot - If the specified machine instruction is a direct /// store to a stack slot, return the virtual or physical register number of /// the source reg along with the FrameIndex of the loaded stack slot. If /// not, return 0. This predicate must return 0 if the instruction has /// any side effects other than storing to the stack slot. - Register isStoreToStackSlot(const MachineInstr &MI, - int &FrameIndex) const override; + Register isStoreToStackSlot(const MachineInstr &MI, int &FrameIndex, + TypeSize &MemBytes) const override; MachineBasicBlock *getBranchDestBlock(const MachineInstr &MI) const override; diff --git a/llvm/test/CodeGen/SPARC/stack-slot-coloring.mir b/llvm/test/CodeGen/SPARC/stack-slot-coloring.mir new file mode 100644 index 000000000000..8f6004e1349a --- /dev/null +++ b/llvm/test/CodeGen/SPARC/stack-slot-coloring.mir @@ -0,0 +1,179 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 6 +# RUN: llc -mtriple=sparcv9 -run-pass=greedy,virtregrewriter,stack-slot-coloring %s -o - | FileCheck %s +# RUN: llc -mtriple=sparc -run-pass=greedy,virtregrewriter,stack-slot-coloring %s -o - | FileCheck %s + +--- | + define dso_local i32 @main() local_unnamed_addr { + entry: + %a = alloca i64, align 4 + ret i32 0 + } +... +--- +name: main +stack: + - { id: 0, name: a, type: spill-slot, offset: 0, size: 8, alignment: 4, + stack-id: default, callee-saved-register: '', callee-saved-restored: true, + debug-info-variable: '', debug-info-expression: '', debug-info-location: '' } +body: | + bb.0.entry: + ; CHECK-LABEL: name: main + ; CHECK: $i0 = LDri %stack.0.a, 0 :: (load (s32) from %ir.a) + ; CHECK-NEXT: $i1 = LDri %stack.0.a, 0 :: (load (s32) from %ir.a) + ; CHECK-NEXT: $i3 = LDri %stack.0.a, 0 :: (load (s32) from %ir.a) + ; CHECK-NEXT: $i4 = LDri %stack.0.a, 0 :: (load (s32) from %ir.a) + ; CHECK-NEXT: $i5 = LDri %stack.0.a, 0 :: (load (s32) from %ir.a) + ; CHECK-NEXT: $i6 = LDri %stack.0.a, 0 :: (load (s32) from %ir.a) + ; CHECK-NEXT: $i7 = LDri %stack.0.a, 0 :: (load (s32) from %ir.a) + ; CHECK-NEXT: $g0 = LDri %stack.0.a, 0 :: (load (s32) from %ir.a) + ; CHECK-NEXT: $g1 = LDri %stack.0.a, 0 :: (load (s32) from %ir.a) + ; CHECK-NEXT: $g2 = LDri %stack.0.a, 0 :: (load (s32) from %ir.a) + ; CHECK-NEXT: $g3 = LDri %stack.0.a, 0 :: (load (s32) from %ir.a) + ; CHECK-NEXT: $g4 = LDri %stack.0.a, 0 :: (load (s32) from %ir.a) + ; CHECK-NEXT: $g5 = LDri %stack.0.a, 0 :: (load (s32) from %ir.a) + ; CHECK-NEXT: $g6 = LDri %stack.0.a, 0 :: (load (s32) from %ir.a) + ; CHECK-NEXT: $g7 = LDri %stack.0.a, 0 :: (load (s32) from %ir.a) + ; CHECK-NEXT: $l0 = LDri %stack.0.a, 0 :: (load (s32) from %ir.a) + ; CHECK-NEXT: $l1 = LDri %stack.0.a, 0 :: (load (s32) from %ir.a) + ; CHECK-NEXT: $l2 = LDri %stack.0.a, 0 :: (load (s32) from %ir.a) + ; CHECK-NEXT: $l3 = LDri %stack.0.a, 0 :: (load (s32) from %ir.a) + ; CHECK-NEXT: $l4 = LDri %stack.0.a, 0 :: (load (s32) from %ir.a) + ; CHECK-NEXT: $l5 = LDri %stack.0.a, 0 :: (load (s32) from %ir.a) + ; CHECK-NEXT: $l6 = LDri %stack.0.a, 0 :: (load (s32) from %ir.a) + ; CHECK-NEXT: $l7 = LDri %stack.0.a, 0 :: (load (s32) from %ir.a) + ; CHECK-NEXT: $o0 = LDri %stack.0.a, 0 :: (load (s32) from %ir.a) + ; CHECK-NEXT: $o1 = LDri %stack.0.a, 0 :: (load (s32) from %ir.a) + ; CHECK-NEXT: $o2 = LDri %stack.0.a, 0 :: (load (s32) from %ir.a) + ; CHECK-NEXT: $o3 = LDri %stack.0.a, 0 :: (load (s32) from %ir.a) + ; CHECK-NEXT: $o4 = LDri %stack.0.a, 0 :: (load (s32) from %ir.a) + ; CHECK-NEXT: $o5 = LDri %stack.0.a, 0 :: (load (s32) from %ir.a) + ; CHECK-NEXT: $o6 = LDri %stack.0.a, 0 :: (load (s32) from %ir.a) + ; CHECK-NEXT: $o7 = LDri %stack.0.a, 0 :: (load (s32) from %ir.a) + ; CHECK-NEXT: renamable $i2 = LDri %stack.0.a, 0 :: (load (s32) from %ir.a) + ; CHECK-NEXT: STXri %stack.1, 0, killed renamable $i2 :: (store (s64) into %stack.1) + ; CHECK-NEXT: renamable $i2 = LDri %stack.0.a, 0 :: (load (s32) from %ir.a) + ; CHECK-NEXT: STXri %stack.0.a, 0, killed renamable $i2 :: (store (s64) into %ir.a) + ; CHECK-NEXT: renamable $i2 = LDXri %stack.1, 0 :: (load (s64) from %stack.1) + ; CHECK-NEXT: STri %stack.0.a, 0, killed renamable $i2 :: (store (s32) into %ir.a) + ; CHECK-NEXT: renamable $i2 = LDri %stack.0.a, 0 :: (load (s32) from %ir.a) + ; CHECK-NEXT: STXri %stack.1, 0, killed renamable $i2 :: (store (s64) into %stack.1) + ; CHECK-NEXT: renamable $i2 = LDri %stack.0.a, 0 :: (load (s32) from %ir.a) + ; CHECK-NEXT: STXri %stack.0.a, 0, killed renamable $i2 :: (store (s64) into %ir.a) + ; CHECK-NEXT: renamable $i2 = LDXri %stack.1, 0 :: (load (s64) from %stack.1) + ; CHECK-NEXT: STri %stack.0.a, 0, killed renamable $i2 :: (store (s32) into %ir.a) + ; CHECK-NEXT: STri %stack.0.a, 0, $i0 :: (store (s32) into %ir.a) + ; CHECK-NEXT: STri %stack.0.a, 0, $i1 :: (store (s32) into %ir.a) + ; CHECK-NEXT: STri %stack.0.a, 0, $i3 :: (store (s32) into %ir.a) + ; CHECK-NEXT: STri %stack.0.a, 0, $i4 :: (store (s32) into %ir.a) + ; CHECK-NEXT: STri %stack.0.a, 0, $i5 :: (store (s32) into %ir.a) + ; CHECK-NEXT: STri %stack.0.a, 0, $i6 :: (store (s32) into %ir.a) + ; CHECK-NEXT: STri %stack.0.a, 0, $i7 :: (store (s32) into %ir.a) + ; CHECK-NEXT: STri %stack.0.a, 0, $g0 :: (store (s32) into %ir.a) + ; CHECK-NEXT: STri %stack.0.a, 0, $g1 :: (store (s32) into %ir.a) + ; CHECK-NEXT: STri %stack.0.a, 0, $g2 :: (store (s32) into %ir.a) + ; CHECK-NEXT: STri %stack.0.a, 0, $g3 :: (store (s32) into %ir.a) + ; CHECK-NEXT: STri %stack.0.a, 0, $g4 :: (store (s32) into %ir.a) + ; CHECK-NEXT: STri %stack.0.a, 0, $g5 :: (store (s32) into %ir.a) + ; CHECK-NEXT: STri %stack.0.a, 0, $g6 :: (store (s32) into %ir.a) + ; CHECK-NEXT: STri %stack.0.a, 0, $g7 :: (store (s32) into %ir.a) + ; CHECK-NEXT: STri %stack.0.a, 0, $l0 :: (store (s32) into %ir.a) + ; CHECK-NEXT: STri %stack.0.a, 0, $l1 :: (store (s32) into %ir.a) + ; CHECK-NEXT: STri %stack.0.a, 0, $l2 :: (store (s32) into %ir.a) + ; CHECK-NEXT: STri %stack.0.a, 0, $l3 :: (store (s32) into %ir.a) + ; CHECK-NEXT: STri %stack.0.a, 0, $l4 :: (store (s32) into %ir.a) + ; CHECK-NEXT: STri %stack.0.a, 0, $l5 :: (store (s32) into %ir.a) + ; CHECK-NEXT: STri %stack.0.a, 0, $l6 :: (store (s32) into %ir.a) + ; CHECK-NEXT: STri %stack.0.a, 0, $l7 :: (store (s32) into %ir.a) + ; CHECK-NEXT: STri %stack.0.a, 0, $o0 :: (store (s32) into %ir.a) + ; CHECK-NEXT: STri %stack.0.a, 0, $o1 :: (store (s32) into %ir.a) + ; CHECK-NEXT: STri %stack.0.a, 0, $o2 :: (store (s32) into %ir.a) + ; CHECK-NEXT: STri %stack.0.a, 0, $o3 :: (store (s32) into %ir.a) + ; CHECK-NEXT: STri %stack.0.a, 0, $o4 :: (store (s32) into %ir.a) + ; CHECK-NEXT: STri %stack.0.a, 0, $o5 :: (store (s32) into %ir.a) + ; CHECK-NEXT: STri %stack.0.a, 0, $o6 :: (store (s32) into %ir.a) + ; CHECK-NEXT: STri %stack.0.a, 0, $o7 :: (store (s32) into %ir.a) + ; CHECK-NEXT: RETL 8, implicit $i0 + $i0 = LDri %stack.0.a, 0 :: (load (s32) from %ir.a) + $i1 = LDri %stack.0.a, 0 :: (load (s32) from %ir.a) + $i3 = LDri %stack.0.a, 0 :: (load (s32) from %ir.a) + $i4 = LDri %stack.0.a, 0 :: (load (s32) from %ir.a) + $i5 = LDri %stack.0.a, 0 :: (load (s32) from %ir.a) + $i6 = LDri %stack.0.a, 0 :: (load (s32) from %ir.a) + $i7 = LDri %stack.0.a, 0 :: (load (s32) from %ir.a) + $g0 = LDri %stack.0.a, 0 :: (load (s32) from %ir.a) + $g1 = LDri %stack.0.a, 0 :: (load (s32) from %ir.a) + $g2 = LDri %stack.0.a, 0 :: (load (s32) from %ir.a) + $g3 = LDri %stack.0.a, 0 :: (load (s32) from %ir.a) + $g4 = LDri %stack.0.a, 0 :: (load (s32) from %ir.a) + $g5 = LDri %stack.0.a, 0 :: (load (s32) from %ir.a) + $g6 = LDri %stack.0.a, 0 :: (load (s32) from %ir.a) + $g7 = LDri %stack.0.a, 0 :: (load (s32) from %ir.a) + $l0 = LDri %stack.0.a, 0 :: (load (s32) from %ir.a) + $l1 = LDri %stack.0.a, 0 :: (load (s32) from %ir.a) + $l2 = LDri %stack.0.a, 0 :: (load (s32) from %ir.a) + $l3 = LDri %stack.0.a, 0 :: (load (s32) from %ir.a) + $l4 = LDri %stack.0.a, 0 :: (load (s32) from %ir.a) + $l5 = LDri %stack.0.a, 0 :: (load (s32) from %ir.a) + $l6 = LDri %stack.0.a, 0 :: (load (s32) from %ir.a) + $l7 = LDri %stack.0.a, 0 :: (load (s32) from %ir.a) + $o0 = LDri %stack.0.a, 0 :: (load (s32) from %ir.a) + $o1 = LDri %stack.0.a, 0 :: (load (s32) from %ir.a) + $o2 = LDri %stack.0.a, 0 :: (load (s32) from %ir.a) + $o3 = LDri %stack.0.a, 0 :: (load (s32) from %ir.a) + $o4 = LDri %stack.0.a, 0 :: (load (s32) from %ir.a) + $o5 = LDri %stack.0.a, 0 :: (load (s32) from %ir.a) + $o6 = LDri %stack.0.a, 0 :: (load (s32) from %ir.a) + $o7 = LDri %stack.0.a, 0 :: (load (s32) from %ir.a) + + ; First vreg load + %1:i64regs = LDri %stack.0.a, 0 :: (load (s32) from %ir.a) + + ; First faulty sequence; %1 spilt + %12:i64regs = LDri %stack.0.a, 0 :: (load (s32) from %ir.a) + STXri %stack.0.a, 0, %12 :: (store (s64) into %ir.a) + + ; Store %1 to avoid it being optimised out, will result in a load-from-spill + STri %stack.0.a, 0, %1 :: (store (s32) into %ir.a) + + ; That code sequence a second time, to generate a second spill slot that + ; will get coloured and merged. + %2:i64regs = LDri %stack.0.a, 0 :: (load (s32) from %ir.a) + + %22:i64regs = LDri %stack.0.a, 0 :: (load (s32) from %ir.a) + STXri %stack.0.a, 0, %22 :: (store (s64) into %ir.a) + + STri %stack.0.a, 0, %2 :: (store (s32) into %ir.a) + + STri %stack.0.a, 0, $i0 :: (store (s32) into %ir.a) + STri %stack.0.a, 0, $i1 :: (store (s32) into %ir.a) + STri %stack.0.a, 0, $i3 :: (store (s32) into %ir.a) + STri %stack.0.a, 0, $i4 :: (store (s32) into %ir.a) + STri %stack.0.a, 0, $i5 :: (store (s32) into %ir.a) + STri %stack.0.a, 0, $i6 :: (store (s32) into %ir.a) + STri %stack.0.a, 0, $i7 :: (store (s32) into %ir.a) + STri %stack.0.a, 0, $g0 :: (store (s32) into %ir.a) + STri %stack.0.a, 0, $g1 :: (store (s32) into %ir.a) + STri %stack.0.a, 0, $g2 :: (store (s32) into %ir.a) + STri %stack.0.a, 0, $g3 :: (store (s32) into %ir.a) + STri %stack.0.a, 0, $g4 :: (store (s32) into %ir.a) + STri %stack.0.a, 0, $g5 :: (store (s32) into %ir.a) + STri %stack.0.a, 0, $g6 :: (store (s32) into %ir.a) + STri %stack.0.a, 0, $g7 :: (store (s32) into %ir.a) + STri %stack.0.a, 0, $l0 :: (store (s32) into %ir.a) + STri %stack.0.a, 0, $l1 :: (store (s32) into %ir.a) + STri %stack.0.a, 0, $l2 :: (store (s32) into %ir.a) + STri %stack.0.a, 0, $l3 :: (store (s32) into %ir.a) + STri %stack.0.a, 0, $l4 :: (store (s32) into %ir.a) + STri %stack.0.a, 0, $l5 :: (store (s32) into %ir.a) + STri %stack.0.a, 0, $l6 :: (store (s32) into %ir.a) + STri %stack.0.a, 0, $l7 :: (store (s32) into %ir.a) + STri %stack.0.a, 0, $o0 :: (store (s32) into %ir.a) + STri %stack.0.a, 0, $o1 :: (store (s32) into %ir.a) + STri %stack.0.a, 0, $o2 :: (store (s32) into %ir.a) + STri %stack.0.a, 0, $o3 :: (store (s32) into %ir.a) + STri %stack.0.a, 0, $o4 :: (store (s32) into %ir.a) + STri %stack.0.a, 0, $o5 :: (store (s32) into %ir.a) + STri %stack.0.a, 0, $o6 :: (store (s32) into %ir.a) + STri %stack.0.a, 0, $o7 :: (store (s32) into %ir.a) + RETL 8, implicit $i0 +...