From 28008a746a31abb7909dd86cb0cd413ac8943b0b Mon Sep 17 00:00:00 2001 From: jmpoep Date: Thu, 7 Dec 2023 16:51:07 +0800 Subject: first commit --- utils/x86disasm/disasm.cc | 295 ++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 295 insertions(+) create mode 100644 utils/x86disasm/disasm.cc (limited to 'utils/x86disasm/disasm.cc') diff --git a/utils/x86disasm/disasm.cc b/utils/x86disasm/disasm.cc new file mode 100644 index 0000000..bd8067b --- /dev/null +++ b/utils/x86disasm/disasm.cc @@ -0,0 +1,295 @@ +#include +#include +#include +#include +#include +#include +#include +#include + +#include "disasm.h" +#include "../../third-party/libudis86/extern.h" + +const size_t kBufSize = 20; + +typedef unsigned char uint8_t; + +static void GenerateBuffer(std::vector * buf) +{ + size_t i; + buf->resize(kBufSize); + for (i = 0; i < kBufSize; i++) + (*buf)[i] = 0x10 + i; +} + +typedef unsigned int operand_encoding_t; +#define OP_ENC_REG 0x80000000UL +#define OP_ENC_MEM 0x40000000UL + +struct InsnDef { + unsigned insn_enc; + size_t count; + operand_encoding_t enc[3]; + uint8_t pfx_rex; + uint8_t pfx_seg; + uint8_t pfx_opr; + uint8_t pfx_adr; + uint8_t pfx_lock; + uint8_t pfx_rep; + uint8_t pfx_repe; + uint8_t pfx_repne; + uint8_t pfx_insn; + + InsnDef(const struct ud & u); + friend bool operator == (const InsnDef & left, const InsnDef & right) + { + return 0 == memcmp(&left, &right, sizeof(InsnDef)); + } + friend bool operator < (const InsnDef & left, const InsnDef & right) + { + return 0 > memcmp(&left, &right, sizeof(InsnDef)); + } + friend bool operator > (const InsnDef & left, const InsnDef & right) + { + return 0 < memcmp(&left, &right, sizeof(InsnDef)); + } +}; + +struct InsnDefCompare { + bool operator() (const InsnDef & left, const InsnDef & right) + { + return left < right; + } +}; +typedef std::set insn_set_t; + +InsnDef::InsnDef(const struct ud & u) + : count(0) +{ + int i; + const struct ud_operand *op; + + pfx_adr = u.pfx_adr; + pfx_insn = u.pfx_insn; + pfx_lock = u.pfx_lock; + pfx_opr = u.pfx_opr; + pfx_rep = u.pfx_rep; + pfx_repe = u.pfx_repe; + pfx_repne = u.pfx_repne; + pfx_rex = u.pfx_rex; + pfx_seg = u.pfx_seg; + insn_enc = u.mnemonic; + memset(enc, 0, sizeof(enc)); + /* + * Encode registers and operand types. Do not encode offsets and + * immediate values. + */ + for (i = 0; i < 3; i++) { + op = &u.operand[i]; + switch (op->type) { + case UD_OP_REG: + enc[i] |= OP_ENC_REG; + enc[i] |= op->base; + break; + case UD_OP_MEM: + /* Encode only registers and scales. */ + enc[i] |= OP_ENC_MEM; + enc[i] |= op->base | (op->index << 8) | (op->scale << 16); + break; + default: + /* Encode operand type other than OP_ENC_MEM or OP_ENC_REG. */ + enc[i] |= op->type; + break; + } + if (op->type != UD_NONE) + count++; + } + +} + +static std::string ReplaceAll(const std::string & str, + const std::string & prev_val, + const std::string & new_val) +{ + size_t pos; + std::string s = str; + + while (true) { + pos = s.find(prev_val); + if (std::string::npos == pos) + break; + s = s.replace(pos, prev_val.size(), new_val); + } + return s; +} + +static const struct { + const char *from; + const char *to; +} repl[] = { + {"retn", "ret"}, + {"retnw", "ret"}, + {"iretw", "iret"}, + {"pushfw", "pushf"}, + {"popfw", "popf"}, + {"enterw", "enter"}, + {"cmovae", "cmovnb"}, + {"cmova", "cmovnbe"}, + {"cmovge", "cmovnl"}, + {"cmovg", "cmovnle"}, + {"setae", "setnb"}, + {"seta", "setnbe"}, + {"setge", "setnl"}, + {"setg", "setnle"}, + {"leavew", "leave"}, + {"int1", "int 01"}, + {"int3", "int 03"} +}; + +/* trim from start */ +static inline std::string <rim(std::string &s) { + s.erase(s.begin(), std::find_if(s.begin(), s.end(), std::not1(std::ptr_fun(std::isspace)))); + return s; +} + +/* trim from end */ +static inline std::string &rtrim(std::string &s) { + s.erase(std::find_if(s.rbegin(), s.rend(), std::not1(std::ptr_fun(std::isspace))).base(), s.end()); + return s; +} + +/* trim from both ends */ +static inline std::string &trim(std::string &s) { + return ltrim(rtrim(s)); +} + +static const char *pfx[] = { + "cs", "es", "fs", "gs", "ss", "ds" +}; + +static std::string FixDisassembly(const std::string & disasm) +{ + size_t i, j; + std::string s,s2; + + s = ReplaceAll(disasm, "0x", ""); + s = ReplaceAll(s, "o16 ", ""); + s = ReplaceAll(s, "a16 ", ""); + s = ReplaceAll(s, "a32 ", ""); + for (i = 0; i < _countof(repl); i++) { + s2 = ReplaceAll(s, repl[i].from, repl[i].to); + if (s2 != s) { + s = s2; + break; + } + } + s = trim(s); + for (i = 0; i < 6; i++) { + if (s.substr(0, 3).compare(std::string(pfx[i]) + " ") == 0) { + s = s.substr(3); + break; + } else { + j = s.find(" " + std::string(pfx[i]) + " "); + if (j != s.npos) { + s = s.substr(0, j) + s.substr(j + 3); + break; + } + } + } + return s; +} + +static bool IsInsnUnique(const struct ud & u, insn_set_t *is) +{ + InsnDef insn_def(u); + + insn_set_t::iterator it = is->lower_bound(insn_def); + if (it != is->end() && *it == insn_def) { + return false; + } + is->insert(insn_def); + return true; +} + +static void WriteOutput(FILE * f, const std::vector & buf, size_t size, + const char *disasm) +{ + size_t i; + assert(size <= buf.size()); + for (i = 0; i < size; i++) + fprintf(f, "%02x", buf[i]); + fprintf(f, " %s\n", FixDisassembly(disasm).c_str()); + fflush(f); +} + +static void GenerateToFile(FILE * f, bool x64) +{ + /* Generate buffer */ + std::vector buf; + unsigned int p0, p1, p2; + insn_set_t is; + struct ud u; + unsigned int insn_len, n; + bool disasm_ok; + + n = 0; + GenerateBuffer(&buf); + /* + for (p0 = 0x10; p0 < 0x110; p0++) { + for (p1 = 0x10; p1 < 0x110; p1++) { + for (p2 = 0x10; p2 < 0x110; p2++) { + */ + for (p0 = 0x10; p0 < 0x110; p0++) { + for (p1 = 0x10; p1 < 0x110; p1++) { + for (p2 = 0x10; p2 < 0x110; p2++) { + buf[0] = (p0 & 0xff); + buf[1] = (p1 & 0xff); + buf[2] = (p2 & 0xff); + ud_init(&u); + ud_set_input_buffer(&u, &buf[0], buf.size()); + ud_set_pc(&u, 0x401000); + ud_set_mode(&u, x64 ? 64 : 32); + ud_set_syntax(&u, UD_SYN_INTEL); + ud_set_vendor(&u, UD_VENDOR_INTEL); + + disasm_ok = false; + if ((insn_len = ud_disassemble(&u)) != 0) { + char *disasm = ud_insn_asm(&u); + if (0 != strncmp(disasm, "invalid", 7)) { + disasm_ok = true; + if (IsInsnUnique(u, &is)) { + WriteOutput(f, buf, insn_len, disasm); + n++; + if (n % 10000 == 0) + std::cout << n << " opcodes processed\n"; + } + } + } + if (!disasm_ok) { + /* Cannot disassemble. */ + WriteOutput(f, buf, 10, "db"); + n++; + if (n % 10000 == 0) + std::cout << n << " opcodes processed\n"; + } + } + } + } +} + +int GenerateInstructions(const std::string & out_filename, bool x64) +{ + FILE *f; + + f = fopen(out_filename.c_str(), "wt"); + if (f == NULL) { + std::cerr << "ERROR Cannot open file " << out_filename << "\n"; + return GEN_INSN_IOERROR; + } + + GenerateToFile(f, x64); + + fclose(f); + return GEN_INSN_OKAY; +} + -- cgit v1.2.3