Currently, when llvm-objdump is disassembling a code section and encounters a point where no instruction can be decoded, it uses the same policy on all targets: consume one byte of the section, emit it as "<unknown>", and try disassembling from the next byte position. On an architecture where instructions are always 4 bytes long and 4-byte aligned, this makes no sense at all. If a 4-byte word cannot be decoded as an instruction, then the next place that a valid instruction could //possibly// be found is 4 bytes further on. Disassembling from a misaligned address can't possibly produce anything that the code generator intended, or that the CPU would even attempt to execute. This patch introduces a new MCDisassembler virtual method called `suggestBytesToSkip`, which allows each target to choose its own resynchronization policy. For Arm (as opposed to Thumb) and AArch64, I've filled in the new method to return a fixed width of 4. Thumb is a more interesting case, because the criterion for identifying 2-byte and 4-byte instruction encodings is very simple, and doesn't require the particular instruction to be recognized. So `suggestBytesToSkip` is also passed an ArrayRef of the bytes in question, so that it can take that into account. The new test case shows Thumb disassembly skipping over two unrecognized instructions, and identifying one as 2-byte and one as 4-byte. For targets other than Arm and AArch64, this is NFC: the base class implementation of `suggestBytesToSkip` still returns 1, so that the existing behavior is unchanged. Other targets can fill in their own implementations as they see fit; I haven't attempted to choose a new behavior for each one myself. I've updated all the call sites of `MCDisassembler::getInstruction` in llvm-objdump, and also one in sancov, which was the only other place I spotted the same idiom of `if (Size == 0) Size = 1` after a call to `getInstruction`. Reviewed By: DavidSpickett Differential Revision: https://reviews.llvm.org/D130357
102 lines
3.4 KiB
C++
102 lines
3.4 KiB
C++
//===- MCDisassembler.cpp - Disassembler interface ------------------------===//
|
|
//
|
|
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
|
|
// See https://llvm.org/LICENSE.txt for license information.
|
|
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
|
|
//
|
|
//===----------------------------------------------------------------------===//
|
|
|
|
#include "llvm/MC/MCDisassembler/MCDisassembler.h"
|
|
#include "llvm/ADT/ArrayRef.h"
|
|
|
|
using namespace llvm;
|
|
|
|
MCDisassembler::~MCDisassembler() = default;
|
|
|
|
Optional<MCDisassembler::DecodeStatus>
|
|
MCDisassembler::onSymbolStart(SymbolInfoTy &Symbol, uint64_t &Size,
|
|
ArrayRef<uint8_t> Bytes, uint64_t Address,
|
|
raw_ostream &CStream) const {
|
|
return None;
|
|
}
|
|
|
|
uint64_t MCDisassembler::suggestBytesToSkip(ArrayRef<uint8_t> Bytes,
|
|
uint64_t Address) const {
|
|
return 1;
|
|
}
|
|
|
|
bool MCDisassembler::tryAddingSymbolicOperand(MCInst &Inst, int64_t Value,
|
|
uint64_t Address, bool IsBranch,
|
|
uint64_t Offset, uint64_t OpSize,
|
|
uint64_t InstSize) const {
|
|
if (Symbolizer)
|
|
return Symbolizer->tryAddingSymbolicOperand(Inst, *CommentStream, Value,
|
|
Address, IsBranch, Offset,
|
|
OpSize, InstSize);
|
|
return false;
|
|
}
|
|
|
|
void MCDisassembler::tryAddingPcLoadReferenceComment(int64_t Value,
|
|
uint64_t Address) const {
|
|
if (Symbolizer)
|
|
Symbolizer->tryAddingPcLoadReferenceComment(*CommentStream, Value, Address);
|
|
}
|
|
|
|
void MCDisassembler::setSymbolizer(std::unique_ptr<MCSymbolizer> Symzer) {
|
|
Symbolizer = std::move(Symzer);
|
|
}
|
|
|
|
#define SMC_PCASE(A, P) \
|
|
case XCOFF::XMC_##A: \
|
|
return P;
|
|
|
|
static uint8_t getSMCPriority(XCOFF::StorageMappingClass SMC) {
|
|
switch (SMC) {
|
|
SMC_PCASE(PR, 1)
|
|
SMC_PCASE(RO, 1)
|
|
SMC_PCASE(DB, 1)
|
|
SMC_PCASE(GL, 1)
|
|
SMC_PCASE(XO, 1)
|
|
SMC_PCASE(SV, 1)
|
|
SMC_PCASE(SV64, 1)
|
|
SMC_PCASE(SV3264, 1)
|
|
SMC_PCASE(TI, 1)
|
|
SMC_PCASE(TB, 1)
|
|
SMC_PCASE(RW, 1)
|
|
SMC_PCASE(TC0, 0)
|
|
SMC_PCASE(TC, 1)
|
|
SMC_PCASE(TD, 1)
|
|
SMC_PCASE(DS, 1)
|
|
SMC_PCASE(UA, 1)
|
|
SMC_PCASE(BS, 1)
|
|
SMC_PCASE(UC, 1)
|
|
SMC_PCASE(TL, 1)
|
|
SMC_PCASE(UL, 1)
|
|
SMC_PCASE(TE, 1)
|
|
#undef SMC_PCASE
|
|
}
|
|
return 0;
|
|
}
|
|
|
|
/// The function is for symbol sorting when symbols have the same address.
|
|
/// The symbols in the same section are sorted in ascending order.
|
|
/// llvm-objdump -D will choose the highest priority symbol to display when
|
|
/// there are symbols with the same address.
|
|
bool XCOFFSymbolInfo::operator<(const XCOFFSymbolInfo &SymInfo) const {
|
|
// Label symbols have higher priority than non-label symbols.
|
|
if (IsLabel != SymInfo.IsLabel)
|
|
return SymInfo.IsLabel;
|
|
|
|
// Symbols with a StorageMappingClass have higher priority than those without.
|
|
if (StorageMappingClass.has_value() !=
|
|
SymInfo.StorageMappingClass.has_value())
|
|
return SymInfo.StorageMappingClass.has_value();
|
|
|
|
if (StorageMappingClass) {
|
|
return getSMCPriority(StorageMappingClass.value()) <
|
|
getSMCPriority(SymInfo.StorageMappingClass.value());
|
|
}
|
|
|
|
return false;
|
|
}
|