[RISCV] Add initial assembler/MC layer support for big-endian (#146534)

This patch adds basic assembler and MC layer infrastructure for
RISC-V big-endian targets (riscv32be/riscv64be):
      - Register big-endian targets in RISCVTargetMachine
      - Add big-endian data layout strings
      - Implement endianness-aware fixup application in assembler
        backend
      - Add byte swapping for data fixups on BE cores
      - Update MC layer components (AsmInfo, MCTargetDesc, Disassembler,
        AsmParser)
    
This provides the foundation for BE support but does not yet include:
      - Codegen patterns for BE
      - Load/store instruction handling
      - BE-specific subtarget features
This commit is contained in:
Djordje Todorovic 2025-08-22 09:21:10 +02:00 committed by GitHub
parent a2f542b7a5
commit 5050da7ba1
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
15 changed files with 307 additions and 23 deletions

View File

@ -2340,6 +2340,9 @@ void MCAsmStreamer::AddEncodingComment(const MCInst &Inst,
getAssembler().getEmitter().encodeInstruction(Inst, Code, Fixups, STI);
// RISC-V instructions are always little-endian, even on BE systems.
bool ForceLE = getContext().getTargetTriple().isRISCV();
// If we are showing fixups, create symbolic markers in the encoded
// representation. We do this by making a per-bit map to the fixup item index,
// then trying to display it as nicely as possible.
@ -2394,7 +2397,10 @@ void MCAsmStreamer::AddEncodingComment(const MCInst &Inst,
unsigned Bit = (Code[i] >> j) & 1;
unsigned FixupBit;
if (MAI->isLittleEndian())
// RISC-V instructions are always little-endian.
// The FixupMap is indexed by actual bit positions in the LE
// instruction.
if (MAI->isLittleEndian() || ForceLE)
FixupBit = i * 8 + j;
else
FixupBit = i * 8 + (7-j);

View File

@ -4065,4 +4065,6 @@ extern "C" LLVM_ABI LLVM_EXTERNAL_VISIBILITY void
LLVMInitializeRISCVAsmParser() {
RegisterMCAsmParser<RISCVAsmParser> X(getTheRISCV32Target());
RegisterMCAsmParser<RISCVAsmParser> Y(getTheRISCV64Target());
RegisterMCAsmParser<RISCVAsmParser> A(getTheRISCV32beTarget());
RegisterMCAsmParser<RISCVAsmParser> B(getTheRISCV64beTarget());
}

View File

@ -74,6 +74,10 @@ LLVMInitializeRISCVDisassembler() {
createRISCVDisassembler);
TargetRegistry::RegisterMCDisassembler(getTheRISCV64Target(),
createRISCVDisassembler);
TargetRegistry::RegisterMCDisassembler(getTheRISCV32beTarget(),
createRISCVDisassembler);
TargetRegistry::RegisterMCDisassembler(getTheRISCV64beTarget(),
createRISCVDisassembler);
}
static DecodeStatus DecodeGPRRegisterClass(MCInst &Inst, uint32_t RegNo,

View File

@ -38,9 +38,11 @@ static cl::opt<bool>
"bytes of NOPs even in norvc code"));
RISCVAsmBackend::RISCVAsmBackend(const MCSubtargetInfo &STI, uint8_t OSABI,
bool Is64Bit, const MCTargetOptions &Options)
: MCAsmBackend(llvm::endianness::little), STI(STI), OSABI(OSABI),
Is64Bit(Is64Bit), TargetOptions(Options) {
bool Is64Bit, bool IsLittleEndian,
const MCTargetOptions &Options)
: MCAsmBackend(IsLittleEndian ? llvm::endianness::little
: llvm::endianness::big),
STI(STI), OSABI(OSABI), Is64Bit(Is64Bit), TargetOptions(Options) {
RISCVFeatures::validate(STI.getTargetTriple(), STI.getFeatureBits());
}
@ -374,7 +376,7 @@ bool RISCVAsmBackend::relaxDwarfLineAddr(MCFragment &F,
} else {
PCBytes = 2;
OS << uint8_t(dwarf::DW_LNS_fixed_advance_pc);
support::endian::write<uint16_t>(OS, 0, llvm::endianness::little);
support::endian::write<uint16_t>(OS, 0, Endian);
}
auto Offset = OS.tell() - PCBytes;
@ -428,15 +430,15 @@ bool RISCVAsmBackend::relaxDwarfCFA(MCFragment &F, bool &WasRelaxed) const {
AddFixups(0, {ELF::R_RISCV_SET6, ELF::R_RISCV_SUB6});
} else if (isUInt<8>(Value)) {
OS << uint8_t(dwarf::DW_CFA_advance_loc1);
support::endian::write<uint8_t>(OS, 0, llvm::endianness::little);
support::endian::write<uint8_t>(OS, 0, Endian);
AddFixups(1, {ELF::R_RISCV_SET8, ELF::R_RISCV_SUB8});
} else if (isUInt<16>(Value)) {
OS << uint8_t(dwarf::DW_CFA_advance_loc2);
support::endian::write<uint16_t>(OS, 0, llvm::endianness::little);
support::endian::write<uint16_t>(OS, 0, Endian);
AddFixups(1, {ELF::R_RISCV_SET16, ELF::R_RISCV_SUB16});
} else if (isUInt<32>(Value)) {
OS << uint8_t(dwarf::DW_CFA_advance_loc4);
support::endian::write<uint32_t>(OS, 0, llvm::endianness::little);
support::endian::write<uint32_t>(OS, 0, Endian);
AddFixups(1, {ELF::R_RISCV_SET32, ELF::R_RISCV_SUB32});
} else {
llvm_unreachable("unsupported CFA encoding");
@ -909,6 +911,22 @@ bool RISCVAsmBackend::addReloc(const MCFragment &F, const MCFixup &Fixup,
return false;
}
// Data fixups should be swapped for big endian cores.
// Instruction fixups should not be swapped as RISC-V instructions
// are always little-endian.
static bool isDataFixup(unsigned Kind) {
switch (Kind) {
default:
return false;
case FK_Data_1:
case FK_Data_2:
case FK_Data_4:
case FK_Data_8:
return true;
}
}
void RISCVAsmBackend::applyFixup(const MCFragment &F, const MCFixup &Fixup,
const MCValue &Target, uint8_t *Data,
uint64_t Value, bool IsResolved) {
@ -932,8 +950,11 @@ void RISCVAsmBackend::applyFixup(const MCFragment &F, const MCFixup &Fixup,
// For each byte of the fragment that the fixup touches, mask in the
// bits from the fixup value.
// For big endian cores, data fixup should be swapped.
bool SwapValue = Endian == llvm::endianness::big && isDataFixup(Kind);
for (unsigned i = 0; i != NumBytes; ++i) {
Data[i] |= uint8_t((Value >> (i * 8)) & 0xff);
unsigned Idx = SwapValue ? (NumBytes - 1 - i) : i;
Data[Idx] |= uint8_t((Value >> (i * 8)) & 0xff);
}
}
@ -948,5 +969,6 @@ MCAsmBackend *llvm::createRISCVAsmBackend(const Target &T,
const MCTargetOptions &Options) {
const Triple &TT = STI.getTargetTriple();
uint8_t OSABI = MCELFObjectTargetWriter::getOSABI(TT.getOS());
return new RISCVAsmBackend(STI, OSABI, TT.isArch64Bit(), Options);
return new RISCVAsmBackend(STI, OSABI, TT.isArch64Bit(), TT.isLittleEndian(),
Options);
}

View File

@ -35,7 +35,7 @@ class RISCVAsmBackend : public MCAsmBackend {
public:
RISCVAsmBackend(const MCSubtargetInfo &STI, uint8_t OSABI, bool Is64Bit,
const MCTargetOptions &Options);
bool IsLittleEndian, const MCTargetOptions &Options);
~RISCVAsmBackend() override = default;
std::optional<bool> evaluateFixup(const MCFragment &, MCFixup &, MCValue &,

View File

@ -21,6 +21,7 @@ using namespace llvm;
void RISCVMCAsmInfo::anchor() {}
RISCVMCAsmInfo::RISCVMCAsmInfo(const Triple &TT) {
IsLittleEndian = TT.isLittleEndian();
CodePointerSize = CalleeSaveStackSlotSize = TT.isArch64Bit() ? 8 : 4;
CommentString = "#";
AlignmentIsInBytes = false;

View File

@ -376,7 +376,8 @@ static MCInstrAnalysis *createRISCVInstrAnalysis(const MCInstrInfo *Info) {
extern "C" LLVM_ABI LLVM_EXTERNAL_VISIBILITY void
LLVMInitializeRISCVTargetMC() {
for (Target *T : {&getTheRISCV32Target(), &getTheRISCV64Target()}) {
for (Target *T : {&getTheRISCV32Target(), &getTheRISCV64Target(),
&getTheRISCV32beTarget(), &getTheRISCV64beTarget()}) {
TargetRegistry::RegisterMCAsmInfo(*T, createRISCVMCAsmInfo);
TargetRegistry::RegisterMCObjectFileInfo(*T, createRISCVMCObjectFileInfo);
TargetRegistry::RegisterMCInstrInfo(*T, createRISCVMCInstrInfo);

View File

@ -611,6 +611,8 @@ extern "C" LLVM_ABI LLVM_EXTERNAL_VISIBILITY void
LLVMInitializeRISCVAsmPrinter() {
RegisterAsmPrinter<RISCVAsmPrinter> X(getTheRISCV32Target());
RegisterAsmPrinter<RISCVAsmPrinter> Y(getTheRISCV64Target());
RegisterAsmPrinter<RISCVAsmPrinter> A(getTheRISCV32beTarget());
RegisterAsmPrinter<RISCVAsmPrinter> B(getTheRISCV64beTarget());
}
void RISCVAsmPrinter::LowerHWASAN_CHECK_MEMACCESS(const MachineInstr &MI) {

View File

@ -106,6 +106,8 @@ static cl::opt<bool>
extern "C" LLVM_ABI LLVM_EXTERNAL_VISIBILITY void LLVMInitializeRISCVTarget() {
RegisterTargetMachine<RISCVTargetMachine> X(getTheRISCV32Target());
RegisterTargetMachine<RISCVTargetMachine> Y(getTheRISCV64Target());
RegisterTargetMachine<RISCVTargetMachine> A(getTheRISCV32beTarget());
RegisterTargetMachine<RISCVTargetMachine> B(getTheRISCV64beTarget());
auto *PR = PassRegistry::getPassRegistry();
initializeGlobalISel(*PR);
initializeRISCVO0PreLegalizerCombinerPass(*PR);
@ -139,21 +141,37 @@ extern "C" LLVM_ABI LLVM_EXTERNAL_VISIBILITY void LLVMInitializeRISCVTarget() {
initializeRISCVAsmPrinterPass(*PR);
}
static StringRef computeDataLayout(const Triple &TT,
const TargetOptions &Options) {
StringRef ABIName = Options.MCOptions.getABIName();
static std::string computeDataLayout(const Triple &TT,
const TargetOptions &Opts) {
std::string Ret;
if (TT.isLittleEndian())
Ret += "e";
else
Ret += "E";
Ret += "-m:e";
// Pointer and integer sizes.
if (TT.isArch64Bit()) {
if (ABIName == "lp64e")
return "e-m:e-p:64:64-i64:64-i128:128-n32:64-S64";
return "e-m:e-p:64:64-i64:64-i128:128-n32:64-S128";
}
Ret += "-p:64:64-i64:64-i128:128";
Ret += "-n32:64";
} else {
assert(TT.isArch32Bit() && "only RV32 and RV64 are currently supported");
Ret += "-p:32:32-i64:64";
Ret += "-n32";
}
if (ABIName == "ilp32e")
return "e-m:e-p:32:32-i64:64-n32-S32";
// Stack alignment based on ABI.
StringRef ABI = Opts.MCOptions.getABIName();
if (ABI == "ilp32e")
Ret += "-S32";
else if (ABI == "lp64e")
Ret += "-S64";
else
Ret += "-S128";
return "e-m:e-p:32:32-i64:64-n32-S128";
return Ret;
}
static Reloc::Model getEffectiveRelocModel(const Triple &TT,

View File

@ -21,10 +21,24 @@ Target &llvm::getTheRISCV64Target() {
return TheRISCV64Target;
}
Target &llvm::getTheRISCV32beTarget() {
static Target TheRISCV32beTarget;
return TheRISCV32beTarget;
}
Target &llvm::getTheRISCV64beTarget() {
static Target TheRISCV64beTarget;
return TheRISCV64beTarget;
}
extern "C" LLVM_ABI LLVM_EXTERNAL_VISIBILITY void
LLVMInitializeRISCVTargetInfo() {
RegisterTarget<Triple::riscv32, /*HasJIT=*/true> X(
getTheRISCV32Target(), "riscv32", "32-bit RISC-V", "RISCV");
RegisterTarget<Triple::riscv64, /*HasJIT=*/true> Y(
getTheRISCV64Target(), "riscv64", "64-bit RISC-V", "RISCV");
RegisterTarget<Triple::riscv32be> A(getTheRISCV32beTarget(), "riscv32be",
"32-bit big endian RISC-V", "RISCV");
RegisterTarget<Triple::riscv64be> B(getTheRISCV64beTarget(), "riscv64be",
"64-bit big endian RISC-V", "RISCV");
}

View File

@ -15,6 +15,8 @@ class Target;
Target &getTheRISCV32Target();
Target &getTheRISCV64Target();
Target &getTheRISCV32beTarget();
Target &getTheRISCV64beTarget();
} // namespace llvm

View File

@ -0,0 +1,29 @@
# RUN: llvm-mc --disassemble %s -triple=riscv32be -mattr=+c 2>&1 | FileCheck %s
# RUN: llvm-mc --disassemble %s -triple=riscv64be -mattr=+c 2>&1 | FileCheck %s
# Test basic disassembly for big-endian RISC-V
# Instructions are always little-endian encoded in RISC-V
[0x13,0x05,0x45,0x06]
# CHECK: addi a0, a0, 100
[0xb7,0x52,0x34,0x12]
# CHECK: lui t0, 74565
[0x03,0x26,0x05,0x00]
# CHECK: lw a2, 0(a0)
[0x23,0x22,0xc5,0x00]
# CHECK: sw a2, 4(a0)
[0xef,0x00,0x00,0x00]
# CHECK: jal 0
[0x63,0x00,0xb5,0x00]
# CHECK: beq a0, a1, 0
[0x01,0x00]
# CHECK: nop
[0x05,0x05]
# CHECK: addi a0, a0, 1

View File

@ -0,0 +1,101 @@
# RUN: llvm-mc --triple=riscv32be %s --show-encoding \
# RUN: | FileCheck --check-prefixes=CHECK-FIXUP,CHECK-ENCODING %s
# RUN: llvm-mc --filetype=obj --triple=riscv32be %s \
# RUN: | llvm-objdump -d - | FileCheck --check-prefix=CHECK-INSTR %s
# RUN: llvm-mc --filetype=obj --triple=riscv32be %s \
# RUN: | llvm-readobj -r - | FileCheck --check-prefix=CHECK-REL %s
# RUN: llvm-mc --triple=riscv64be %s --show-encoding \
# RUN: | FileCheck --check-prefixes=CHECK-FIXUP,CHECK-ENCODING %s
# RUN: llvm-mc --filetype=obj --triple=riscv64be %s \
# RUN: | llvm-objdump -d - | FileCheck --check-prefix=CHECK-INSTR %s
# RUN: llvm-mc --filetype=obj --triple=riscv64be %s \
# RUN: | llvm-readobj -r - | FileCheck --check-prefix=CHECK-REL %s
## Checks that fixups that can be resolved within the same object file are
## applied correctly on big-endian RISC-V targets.
##
## This test verifies that RISC-V instructions remain little-endian even on
## big-endian systems. This is a fundamental property of RISC-V:
## - Instructions are always little-endian
## - Data can be big-endian or little-endian depending on the system
.LBB0:
addi t0, t0, 1
# CHECK-ENCODING: encoding: [0x93,0x82,0x12,0x00]
# CHECK-INSTR: addi t0, t0, 0x1
lui t1, %hi(val)
# CHECK-ENCODING: encoding: [0x37,0bAAAA0011,A,A]
# CHECK-FIXUP: fixup A - offset: 0, value: %hi(val), kind: fixup_riscv_hi20
# CHECK-INSTR: lui t1, 0x12345
lw a0, %lo(val)(t1)
# CHECK-ENCODING: encoding: [0x03,0x25,0bAAAA0011,A]
# CHECK-FIXUP: fixup A - offset: 0, value: %lo(val), kind: fixup_riscv_lo12_i
# CHECK-INSTR: lw a0, 0x678(t1)
addi a1, t1, %lo(val)
# CHECK-ENCODING: encoding: [0x93,0x05,0bAAAA0011,A]
# CHECK-FIXUP: fixup A - offset: 0, value: %lo(val), kind: fixup_riscv_lo12_i
# CHECK-INSTR: addi a1, t1, 0x678
sw a0, %lo(val)(t1)
# CHECK-ENCODING: encoding: [0x23'A',0x20'A',0xa3'A',A]
# CHECK-FIXUP: fixup A - offset: 0, value: %lo(val), kind: fixup_riscv_lo12_s
# CHECK-INSTR: sw a0, 0x678(t1)
1:
auipc t1, %pcrel_hi(.LBB0)
# CHECK-ENCODING: encoding: [0x17,0bAAAA0011,A,A]
# CHECK-FIXUP: fixup A - offset: 0, value: %pcrel_hi(.LBB0), kind: fixup_riscv_pcrel_hi20
# CHECK-INSTR: auipc t1, 0
addi t1, t1, %pcrel_lo(1b)
# CHECK-ENCODING: encoding: [0x13,0x03,0bAAAA0011,A]
# CHECK-FIXUP: fixup A - offset: 0, value: %pcrel_lo({{.*}}), kind: fixup_riscv_pcrel_lo12_i
# CHECK-INSTR: addi t1, t1, -0x14
sw t1, %pcrel_lo(1b)(t1)
# CHECK-ENCODING: encoding: [0x23'A',0x20'A',0x63'A',A]
# CHECK-FIXUP: fixup A - offset: 0, value: %pcrel_lo({{.*}}), kind: fixup_riscv_pcrel_lo12_s
# CHECK-INSTR: sw t1, -0x14(t1)
jal zero, .LBB0
# CHECK-ENCODING: encoding: [0x6f,0bAAAA0000,A,A]
# CHECK-FIXUP: fixup A - offset: 0, value: .LBB0, kind: fixup_riscv_jal
# CHECK-INSTR: j 0x0 <.text>
jal zero, .LBB2
# CHECK-ENCODING: encoding: [0x6f,0bAAAA0000,A,A]
# CHECK-FIXUP: fixup A - offset: 0, value: .LBB2, kind: fixup_riscv_jal
# CHECK-INSTR: j 0x50d18 <.text+0x50d18>
beq a0, a1, .LBB0
# CHECK-ENCODING: encoding: [0x63'A',A,0xb5'A',A]
# CHECK-FIXUP: fixup A - offset: 0, value: .LBB0, kind: fixup_riscv_branch
# CHECK-INSTR: beq a0, a1, 0x0 <.text>
blt a0, a1, .LBB1
# CHECK-ENCODING: encoding: [0x63'A',0x40'A',0xb5'A',A]
# CHECK-FIXUP: fixup A - offset: 0, value: .LBB1, kind: fixup_riscv_branch
# CHECK-INSTR: blt a0, a1, 0x480 <.text+0x480>
.fill 1104
.LBB1:
.fill 329876
addi zero, zero, 0
.LBB2:
.set val, 0x12345678
# CHECK-REL-NOT: R_RISCV
.data
.align 3
data_label:
.word val # On BE: 0x12345678 stored as [0x12, 0x34, 0x56, 0x78]
.long val # On BE: 0x12345678 stored as [0x12, 0x34, 0x56, 0x78]
.quad val # On BE: 0x0000000012345678 stored as [0x00, 0x00, 0x00, 0x00, 0x12, 0x34, 0x56, 0x78]

View File

@ -0,0 +1,36 @@
# RUN: llvm-mc -filetype=obj -triple=riscv32be %s -o %t.32be.o
# RUN: llvm-objdump -s %t.32be.o | FileCheck -check-prefix=RV32BE %s
# RUN: llvm-mc -filetype=obj -triple=riscv64be %s -o %t.64be.o
# RUN: llvm-objdump -s %t.64be.o | FileCheck -check-prefix=RV64BE %s
# RUN: llvm-mc -filetype=obj -triple=riscv32 %s -o %t.32le.o
# RUN: llvm-objdump -s %t.32le.o | FileCheck -check-prefix=RV32LE %s
# RUN: llvm-mc -filetype=obj -triple=riscv64 %s -o %t.64le.o
# RUN: llvm-objdump -s %t.64le.o | FileCheck -check-prefix=RV64LE %s
# Test that data directives are properly byte-swapped on big-endian RISC-V
.data
byte_data:
.byte 0x11
.byte 0x22
.half 0x3344
.word 0x55667788
.long 0x99aabbcc
.quad 0x1122334455667788
# RV32BE: Contents of section .data:
# RV32BE-NEXT: 0000 11223344 55667788 99aabbcc 11223344
# RV32BE-NEXT: 0010 55667788
# RV64BE: Contents of section .data:
# RV64BE-NEXT: 0000 11223344 55667788 99aabbcc 11223344
# RV64BE-NEXT: 0010 55667788
# RV32LE: Contents of section .data:
# RV32LE-NEXT: 0000 11224433 88776655 ccbbaa99 88776655
# RV32LE-NEXT: 0010 44332211
# RV64LE: Contents of section .data:
# RV64LE-NEXT: 0000 11224433 88776655 ccbbaa99 88776655
# RV64LE-NEXT: 0010 44332211

View File

@ -0,0 +1,46 @@
# RUN: llvm-mc %s -filetype=obj -triple=riscv32be | llvm-readobj -h - \
# RUN: | FileCheck -check-prefix=RV32BE %s
# RUN: llvm-mc %s -filetype=obj -triple=riscv64be | llvm-readobj -h - \
# RUN: | FileCheck -check-prefix=RV64BE %s
# Test that RISC-V big-endian targets produce correct ELF headers
# RV32BE: Format: elf32-bigriscv
# RV32BE: Arch: riscv32
# RV32BE: AddressSize: 32bit
# RV32BE: ElfHeader {
# RV32BE: Ident {
# RV32BE: Magic: (7F 45 4C 46)
# RV32BE: Class: 32-bit (0x1)
# RV32BE: DataEncoding: BigEndian (0x2)
# RV32BE: FileVersion: 1
# RV32BE: OS/ABI: SystemV (0x0)
# RV32BE: ABIVersion: 0
# RV32BE: }
# RV32BE: Type: Relocatable (0x1)
# RV32BE: Machine: EM_RISCV (0xF3)
# RV32BE: Version: 1
# RV32BE: Flags [ (0x0)
# RV32BE: ]
# RV32BE: }
# RV64BE: Format: elf64-bigriscv
# RV64BE: Arch: riscv64
# RV64BE: AddressSize: 64bit
# RV64BE: ElfHeader {
# RV64BE: Ident {
# RV64BE: Magic: (7F 45 4C 46)
# RV64BE: Class: 64-bit (0x2)
# RV64BE: DataEncoding: BigEndian (0x2)
# RV64BE: FileVersion: 1
# RV64BE: OS/ABI: SystemV (0x0)
# RV64BE: ABIVersion: 0
# RV64BE: }
# RV64BE: Type: Relocatable (0x1)
# RV64BE: Machine: EM_RISCV (0xF3)
# RV64BE: Version: 1
# RV64BE: Flags [ (0x0)
# RV64BE: ]
# RV64BE: }
nop