
LLDB uses the LLVM disassembler to determine the size of instructions and to do the actual disassembly. Currently, if the LLVM disassembler can't disassemble an instruction, LLDB will ignore the instruction size, assume the instruction size is the minimum size for that device, print no useful opcode, and print nothing for the instruction. This patch changes this behavior to separate the instruction size and "can't disassemble". If the LLVM disassembler knows the size, but can't dissasemble the instruction, LLDB will use that size. It will print out the opcode, and will print "<unknown>" for the instruction. This is much more useful to both a user and a script. The impetus behind this change is to clean up RISC-V disassembly when the LLVM disassembler doesn't understand all of the instructions. RISC-V supports proprietary extensions, where the TD files don't know about certain instructions, and the disassembler can't disassemble them. Internal users want to be able to disassemble these instructions. With llvm-objdump, the solution is to pipe the output of the disassembly through a filter program. This patch modifies LLDB's disassembly to look more like llvm-objdump's, and includes an example python script that adds a command "fdis" that will disassemble, then pipe the output through a specified filter program. This has been tested with crustfilt, a sample filter located at https://github.com/quic/crustfilt . Changes in this PR: - Decouple "can't disassemble" with "instruction size". DisassemblerLLVMC::MCDisasmInstance::GetMCInst now returns a bool for valid disassembly, and has the size as an out paramter. Use the size even if the disassembly is invalid. Disassemble if disassemby is valid. - Always print out the opcode when -b is specified. Previously it wouldn't print out the opcode if it couldn't disassemble. - Print out RISC-V opcodes the way llvm-objdump does. Code for the new Opcode Type eType16_32Tuples by Jason Molenda. - Print <unknown> for instructions that can't be disassembled, matching llvm-objdump, instead of printing nothing. - Update max riscv32 and riscv64 instruction size to 8. - Add example "fdis" command script. - Added disassembly byte test for x86 with known and unknown instructions. - Added disassembly byte test for riscv32 with known and unknown instructions, with and without filtering. - Added test from Jason Molenda to RISC-V disassembly unit tests.
166 lines
4.6 KiB
C++
166 lines
4.6 KiB
C++
//===-- Opcode.cpp --------------------------------------------------------===//
|
|
//
|
|
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
|
|
// See https://llvm.org/LICENSE.txt for license information.
|
|
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
|
|
//
|
|
//===----------------------------------------------------------------------===//
|
|
|
|
#include "lldb/Core/Opcode.h"
|
|
|
|
#include "lldb/Utility/DataBufferHeap.h"
|
|
#include "lldb/Utility/DataExtractor.h"
|
|
#include "lldb/Utility/Endian.h"
|
|
#include "lldb/Utility/Stream.h"
|
|
#include "lldb/lldb-forward.h"
|
|
|
|
#include <memory>
|
|
|
|
#include <cinttypes>
|
|
|
|
using namespace lldb;
|
|
using namespace lldb_private;
|
|
|
|
int Opcode::Dump(Stream *s, uint32_t min_byte_width) const {
|
|
const uint32_t previous_bytes = s->GetWrittenBytes();
|
|
switch (m_type) {
|
|
case Opcode::eTypeInvalid:
|
|
s->PutCString("<invalid>");
|
|
break;
|
|
case Opcode::eType8:
|
|
s->Printf("0x%2.2x", m_data.inst8);
|
|
break;
|
|
case Opcode::eType16:
|
|
s->Printf("0x%4.4x", m_data.inst16);
|
|
break;
|
|
case Opcode::eType16_2:
|
|
case Opcode::eType32:
|
|
s->Printf("0x%8.8x", m_data.inst32);
|
|
break;
|
|
|
|
case Opcode::eType16_32Tuples: {
|
|
const bool format_as_words = (m_data.inst.length % 4) == 0;
|
|
uint32_t i = 0;
|
|
while (i < m_data.inst.length) {
|
|
if (i > 0)
|
|
s->PutChar(' ');
|
|
if (format_as_words) {
|
|
// Format as words; print 1 or more UInt32 values.
|
|
s->Printf("%2.2x%2.2x%2.2x%2.2x", m_data.inst.bytes[i + 3],
|
|
m_data.inst.bytes[i + 2], m_data.inst.bytes[i + 1],
|
|
m_data.inst.bytes[i + 0]);
|
|
i += 4;
|
|
} else {
|
|
// Format as halfwords; print 1 or more UInt16 values.
|
|
s->Printf("%2.2x%2.2x", m_data.inst.bytes[i + 1],
|
|
m_data.inst.bytes[i + 0]);
|
|
i += 2;
|
|
}
|
|
}
|
|
} break;
|
|
|
|
case Opcode::eType64:
|
|
s->Printf("0x%16.16" PRIx64, m_data.inst64);
|
|
break;
|
|
|
|
case Opcode::eTypeBytes:
|
|
for (uint32_t i = 0; i < m_data.inst.length; ++i) {
|
|
if (i > 0)
|
|
s->PutChar(' ');
|
|
s->Printf("%2.2x", m_data.inst.bytes[i]);
|
|
}
|
|
break;
|
|
}
|
|
|
|
uint32_t bytes_written_so_far = s->GetWrittenBytes() - previous_bytes;
|
|
// Add spaces to make sure bytes display comes out even in case opcodes aren't
|
|
// all the same size.
|
|
if (bytes_written_so_far < min_byte_width)
|
|
s->Printf("%*s", min_byte_width - bytes_written_so_far, "");
|
|
return s->GetWrittenBytes() - previous_bytes;
|
|
}
|
|
|
|
lldb::ByteOrder Opcode::GetDataByteOrder() const {
|
|
if (m_byte_order != eByteOrderInvalid) {
|
|
return m_byte_order;
|
|
}
|
|
switch (m_type) {
|
|
case Opcode::eTypeInvalid:
|
|
break;
|
|
case Opcode::eType8:
|
|
case Opcode::eType16:
|
|
case Opcode::eType16_2:
|
|
case Opcode::eType16_32Tuples:
|
|
case Opcode::eType32:
|
|
case Opcode::eType64:
|
|
return endian::InlHostByteOrder();
|
|
case Opcode::eTypeBytes:
|
|
break;
|
|
}
|
|
return eByteOrderInvalid;
|
|
}
|
|
|
|
uint32_t Opcode::GetData(DataExtractor &data) const {
|
|
uint32_t byte_size = GetByteSize();
|
|
uint8_t swap_buf[8];
|
|
const void *buf = nullptr;
|
|
|
|
if (byte_size > 0) {
|
|
if (!GetEndianSwap()) {
|
|
if (m_type == Opcode::eType16_2) {
|
|
// 32 bit thumb instruction, we need to sizzle this a bit
|
|
swap_buf[0] = m_data.inst.bytes[2];
|
|
swap_buf[1] = m_data.inst.bytes[3];
|
|
swap_buf[2] = m_data.inst.bytes[0];
|
|
swap_buf[3] = m_data.inst.bytes[1];
|
|
buf = swap_buf;
|
|
} else {
|
|
buf = GetOpcodeDataBytes();
|
|
}
|
|
} else {
|
|
switch (m_type) {
|
|
case Opcode::eTypeInvalid:
|
|
break;
|
|
case Opcode::eType8:
|
|
buf = GetOpcodeDataBytes();
|
|
break;
|
|
case Opcode::eType16:
|
|
*(uint16_t *)swap_buf = llvm::byteswap<uint16_t>(m_data.inst16);
|
|
buf = swap_buf;
|
|
break;
|
|
case Opcode::eType16_2:
|
|
swap_buf[0] = m_data.inst.bytes[1];
|
|
swap_buf[1] = m_data.inst.bytes[0];
|
|
swap_buf[2] = m_data.inst.bytes[3];
|
|
swap_buf[3] = m_data.inst.bytes[2];
|
|
buf = swap_buf;
|
|
break;
|
|
case Opcode::eType16_32Tuples:
|
|
buf = GetOpcodeDataBytes();
|
|
break;
|
|
case Opcode::eType32:
|
|
*(uint32_t *)swap_buf = llvm::byteswap<uint32_t>(m_data.inst32);
|
|
buf = swap_buf;
|
|
break;
|
|
case Opcode::eType64:
|
|
*(uint32_t *)swap_buf = llvm::byteswap<uint64_t>(m_data.inst64);
|
|
buf = swap_buf;
|
|
break;
|
|
case Opcode::eTypeBytes:
|
|
buf = GetOpcodeDataBytes();
|
|
break;
|
|
}
|
|
}
|
|
}
|
|
if (buf != nullptr) {
|
|
DataBufferSP buffer_sp;
|
|
|
|
buffer_sp = std::make_shared<DataBufferHeap>(buf, byte_size);
|
|
data.SetByteOrder(GetDataByteOrder());
|
|
data.SetData(buffer_sp);
|
|
return byte_size;
|
|
}
|
|
data.Clear();
|
|
return 0;
|
|
}
|