From 28008a746a31abb7909dd86cb0cd413ac8943b0b Mon Sep 17 00:00:00 2001 From: jmpoep Date: Thu, 7 Dec 2023 16:51:07 +0800 Subject: first commit --- utils/x86disasm/Sconstruct-x86disasm | 93 ++++++++++ utils/x86disasm/disasm.cc | 295 ++++++++++++++++++++++++++++++ utils/x86disasm/disasm.h | 15 ++ utils/x86disasm/main.cc | 24 +++ utils/x86disasm/make-x86disasm.py | 59 ++++++ utils/x86disasm/x86disasm.sln | 26 +++ utils/x86disasm/x86disasm.vcxproj | 169 +++++++++++++++++ utils/x86disasm/x86disasm.vcxproj.filters | 75 ++++++++ 8 files changed, 756 insertions(+) create mode 100644 utils/x86disasm/Sconstruct-x86disasm create mode 100644 utils/x86disasm/disasm.cc create mode 100644 utils/x86disasm/disasm.h create mode 100644 utils/x86disasm/main.cc create mode 100644 utils/x86disasm/make-x86disasm.py create mode 100644 utils/x86disasm/x86disasm.sln create mode 100644 utils/x86disasm/x86disasm.vcxproj create mode 100644 utils/x86disasm/x86disasm.vcxproj.filters (limited to 'utils/x86disasm') diff --git a/utils/x86disasm/Sconstruct-x86disasm b/utils/x86disasm/Sconstruct-x86disasm new file mode 100644 index 0000000..409bcc5 --- /dev/null +++ b/utils/x86disasm/Sconstruct-x86disasm @@ -0,0 +1,93 @@ +import os +import platform +import sys +sys.path.append("../..") +import fileop + +def get_arg(arg_str): + return int((ARGUMENTS.get(arg_str, '0'))) != 0 + +release = get_arg('release') +clean = get_arg('clean') +amd64 = get_arg('amd64') + +system = platform.system().lower() +linux = (system == 'linux') +macosx = (system == 'darwin') +win = (system == 'windows') +assert win or linux or macosx, 'Unsupported platform' + +defs = {} + +if win: + if release: + compiler_flags = '-MD -Ox -Oy ' + defs['NDEBUG'] = None + else: + compiler_flags = '-WX -MTd -Zi -Od ' + defs['_DEBUG'] = None + defs['_DPRINT'] = None + compiler_flags += ' -GS -GF -EHsc ' + defs['WIN'] = None + defs['_CRT_SECURE_NO_DEPRECATE'] = None + defs['_FILE_OFFSET_BITS'] = '64' + defs['WIN32'] = None + defs['_CONSOLE'] = None + linker_flags = '-debug -opt:ref' +elif linux or macosx: + if release: + compiler_flags = '-O3 -fomit-frame-pointer' + defs['NDEBUG'] = None + else: + compiler_flags = '-g' + defs['_DPRINT'] = None + if linux: + defs['LIN'] = None + else: + defs['MACOSX'] = None + linker_flags = ' -pthread ' +else: + assert False, 'Unsupported OS' + +defs['UNITTEST'] = None + +incdirs = ['../../third-party/libudis86'] + +if amd64: + defs['AMD64'] = None + target_arch = 'x86_64' +else: + defs['I386'] = None + target_arch = 'x86' + +project_name = 'x86disasm' + +env = Environment( + ENV = os.environ, + CCFLAGS = compiler_flags, + CPPPATH = incdirs, + CPPDEFINES = defs, + LINKFLAGS = linker_flags, + NAME = project_name, + TARGET_ARCH = target_arch + ) + +if win: + obj_ext = '.obj' +elif linux or macosx: + obj_ext = '.o' +else: + assert False + +dirs = ['.', '../../third-party/libudis86'] + +if clean: + fileop.clean_dirs(dirs, obj_ext) +else: + # Build file list + sources = [] + sources += Glob('./*.cc') + sources += Glob('../../third-party/libudis86/*.c') + # Perform build + env.Program(project_name, sources) + diff --git a/utils/x86disasm/disasm.cc b/utils/x86disasm/disasm.cc new file mode 100644 index 0000000..bd8067b --- /dev/null +++ b/utils/x86disasm/disasm.cc @@ -0,0 +1,295 @@ +#include +#include +#include +#include +#include +#include +#include +#include + +#include "disasm.h" +#include "../../third-party/libudis86/extern.h" + +const size_t kBufSize = 20; + +typedef unsigned char uint8_t; + +static void GenerateBuffer(std::vector * buf) +{ + size_t i; + buf->resize(kBufSize); + for (i = 0; i < kBufSize; i++) + (*buf)[i] = 0x10 + i; +} + +typedef unsigned int operand_encoding_t; +#define OP_ENC_REG 0x80000000UL +#define OP_ENC_MEM 0x40000000UL + +struct InsnDef { + unsigned insn_enc; + size_t count; + operand_encoding_t enc[3]; + uint8_t pfx_rex; + uint8_t pfx_seg; + uint8_t pfx_opr; + uint8_t pfx_adr; + uint8_t pfx_lock; + uint8_t pfx_rep; + uint8_t pfx_repe; + uint8_t pfx_repne; + uint8_t pfx_insn; + + InsnDef(const struct ud & u); + friend bool operator == (const InsnDef & left, const InsnDef & right) + { + return 0 == memcmp(&left, &right, sizeof(InsnDef)); + } + friend bool operator < (const InsnDef & left, const InsnDef & right) + { + return 0 > memcmp(&left, &right, sizeof(InsnDef)); + } + friend bool operator > (const InsnDef & left, const InsnDef & right) + { + return 0 < memcmp(&left, &right, sizeof(InsnDef)); + } +}; + +struct InsnDefCompare { + bool operator() (const InsnDef & left, const InsnDef & right) + { + return left < right; + } +}; +typedef std::set insn_set_t; + +InsnDef::InsnDef(const struct ud & u) + : count(0) +{ + int i; + const struct ud_operand *op; + + pfx_adr = u.pfx_adr; + pfx_insn = u.pfx_insn; + pfx_lock = u.pfx_lock; + pfx_opr = u.pfx_opr; + pfx_rep = u.pfx_rep; + pfx_repe = u.pfx_repe; + pfx_repne = u.pfx_repne; + pfx_rex = u.pfx_rex; + pfx_seg = u.pfx_seg; + insn_enc = u.mnemonic; + memset(enc, 0, sizeof(enc)); + /* + * Encode registers and operand types. Do not encode offsets and + * immediate values. + */ + for (i = 0; i < 3; i++) { + op = &u.operand[i]; + switch (op->type) { + case UD_OP_REG: + enc[i] |= OP_ENC_REG; + enc[i] |= op->base; + break; + case UD_OP_MEM: + /* Encode only registers and scales. */ + enc[i] |= OP_ENC_MEM; + enc[i] |= op->base | (op->index << 8) | (op->scale << 16); + break; + default: + /* Encode operand type other than OP_ENC_MEM or OP_ENC_REG. */ + enc[i] |= op->type; + break; + } + if (op->type != UD_NONE) + count++; + } + +} + +static std::string ReplaceAll(const std::string & str, + const std::string & prev_val, + const std::string & new_val) +{ + size_t pos; + std::string s = str; + + while (true) { + pos = s.find(prev_val); + if (std::string::npos == pos) + break; + s = s.replace(pos, prev_val.size(), new_val); + } + return s; +} + +static const struct { + const char *from; + const char *to; +} repl[] = { + {"retn", "ret"}, + {"retnw", "ret"}, + {"iretw", "iret"}, + {"pushfw", "pushf"}, + {"popfw", "popf"}, + {"enterw", "enter"}, + {"cmovae", "cmovnb"}, + {"cmova", "cmovnbe"}, + {"cmovge", "cmovnl"}, + {"cmovg", "cmovnle"}, + {"setae", "setnb"}, + {"seta", "setnbe"}, + {"setge", "setnl"}, + {"setg", "setnle"}, + {"leavew", "leave"}, + {"int1", "int 01"}, + {"int3", "int 03"} +}; + +/* trim from start */ +static inline std::string <rim(std::string &s) { + s.erase(s.begin(), std::find_if(s.begin(), s.end(), std::not1(std::ptr_fun(std::isspace)))); + return s; +} + +/* trim from end */ +static inline std::string &rtrim(std::string &s) { + s.erase(std::find_if(s.rbegin(), s.rend(), std::not1(std::ptr_fun(std::isspace))).base(), s.end()); + return s; +} + +/* trim from both ends */ +static inline std::string &trim(std::string &s) { + return ltrim(rtrim(s)); +} + +static const char *pfx[] = { + "cs", "es", "fs", "gs", "ss", "ds" +}; + +static std::string FixDisassembly(const std::string & disasm) +{ + size_t i, j; + std::string s,s2; + + s = ReplaceAll(disasm, "0x", ""); + s = ReplaceAll(s, "o16 ", ""); + s = ReplaceAll(s, "a16 ", ""); + s = ReplaceAll(s, "a32 ", ""); + for (i = 0; i < _countof(repl); i++) { + s2 = ReplaceAll(s, repl[i].from, repl[i].to); + if (s2 != s) { + s = s2; + break; + } + } + s = trim(s); + for (i = 0; i < 6; i++) { + if (s.substr(0, 3).compare(std::string(pfx[i]) + " ") == 0) { + s = s.substr(3); + break; + } else { + j = s.find(" " + std::string(pfx[i]) + " "); + if (j != s.npos) { + s = s.substr(0, j) + s.substr(j + 3); + break; + } + } + } + return s; +} + +static bool IsInsnUnique(const struct ud & u, insn_set_t *is) +{ + InsnDef insn_def(u); + + insn_set_t::iterator it = is->lower_bound(insn_def); + if (it != is->end() && *it == insn_def) { + return false; + } + is->insert(insn_def); + return true; +} + +static void WriteOutput(FILE * f, const std::vector & buf, size_t size, + const char *disasm) +{ + size_t i; + assert(size <= buf.size()); + for (i = 0; i < size; i++) + fprintf(f, "%02x", buf[i]); + fprintf(f, " %s\n", FixDisassembly(disasm).c_str()); + fflush(f); +} + +static void GenerateToFile(FILE * f, bool x64) +{ + /* Generate buffer */ + std::vector buf; + unsigned int p0, p1, p2; + insn_set_t is; + struct ud u; + unsigned int insn_len, n; + bool disasm_ok; + + n = 0; + GenerateBuffer(&buf); + /* + for (p0 = 0x10; p0 < 0x110; p0++) { + for (p1 = 0x10; p1 < 0x110; p1++) { + for (p2 = 0x10; p2 < 0x110; p2++) { + */ + for (p0 = 0x10; p0 < 0x110; p0++) { + for (p1 = 0x10; p1 < 0x110; p1++) { + for (p2 = 0x10; p2 < 0x110; p2++) { + buf[0] = (p0 & 0xff); + buf[1] = (p1 & 0xff); + buf[2] = (p2 & 0xff); + ud_init(&u); + ud_set_input_buffer(&u, &buf[0], buf.size()); + ud_set_pc(&u, 0x401000); + ud_set_mode(&u, x64 ? 64 : 32); + ud_set_syntax(&u, UD_SYN_INTEL); + ud_set_vendor(&u, UD_VENDOR_INTEL); + + disasm_ok = false; + if ((insn_len = ud_disassemble(&u)) != 0) { + char *disasm = ud_insn_asm(&u); + if (0 != strncmp(disasm, "invalid", 7)) { + disasm_ok = true; + if (IsInsnUnique(u, &is)) { + WriteOutput(f, buf, insn_len, disasm); + n++; + if (n % 10000 == 0) + std::cout << n << " opcodes processed\n"; + } + } + } + if (!disasm_ok) { + /* Cannot disassemble. */ + WriteOutput(f, buf, 10, "db"); + n++; + if (n % 10000 == 0) + std::cout << n << " opcodes processed\n"; + } + } + } + } +} + +int GenerateInstructions(const std::string & out_filename, bool x64) +{ + FILE *f; + + f = fopen(out_filename.c_str(), "wt"); + if (f == NULL) { + std::cerr << "ERROR Cannot open file " << out_filename << "\n"; + return GEN_INSN_IOERROR; + } + + GenerateToFile(f, x64); + + fclose(f); + return GEN_INSN_OKAY; +} + diff --git a/utils/x86disasm/disasm.h b/utils/x86disasm/disasm.h new file mode 100644 index 0000000..0124dcc --- /dev/null +++ b/utils/x86disasm/disasm.h @@ -0,0 +1,15 @@ + + +#ifndef DISASM_H +#define DISASM_H + +#include + +#define GEN_INSN_OKAY 0 +#define GEN_INSN_IOERROR 1 +#define GEN_INSN_ERROR 2 + +int GenerateInstructions(const std::string & out_filename, bool x64); + +#endif + diff --git a/utils/x86disasm/main.cc b/utils/x86disasm/main.cc new file mode 100644 index 0000000..e0d43af --- /dev/null +++ b/utils/x86disasm/main.cc @@ -0,0 +1,24 @@ +#include +#include "disasm.h" + +int main(int argc, char **argv) +{ + int rc; + bool x64; + std::cout << "x86 Disassembly Generator (C) 2012\n"; + if (argc < 2) { + std::cerr << "Usage: " << argv[0] << " [x64]\n"; + return 1; + } + if (argc >= 3) { + x64 = (0 == _strnicmp(argv[2], "x64", 3)); + } else { + x64 = false; + } + rc = GenerateInstructions(argv[1], x64); + if (rc == GEN_INSN_OKAY) + std::cout << "Finished\n"; + else + std::cerr << "ERROR Failed with error " << rc << "\n"; + return rc; +} diff --git a/utils/x86disasm/make-x86disasm.py b/utils/x86disasm/make-x86disasm.py new file mode 100644 index 0000000..d7b9fdf --- /dev/null +++ b/utils/x86disasm/make-x86disasm.py @@ -0,0 +1,59 @@ +#!/usr/bin/python +import os +import os.path +import sys +sys.path.append("../..") +import fileop +import utils + +name = "x86disasm" + +def do_clean(): + fileop.clean_dir(".", ".obj") + fileop.clean_dir(".", ".pdb") + fileop.clean_dir(".", ".ilk") + fileop.clean_dir(".", ".exe") + fileop.remove_file(os.path.join(bin_dir, name)) + fileop.remove_file(os.path.join(bin_dir, name + '.exe')) + fileop.remove_file(os.path.join(bin_dir, name + '.pdb')) + fileop.remove_file(os.path.join(bin_dir, name + '.ilk')) + return 0 + +def make(clean, release, x64): + if clean: + do_clean() + result = os.system("scons -f Sconstruct-%s release=%d clean=%d amd64=%d" % (name, release, clean, x64)) + if 0 == result: + # Scons does not put file in the required directory. Do it ourselves. + if utils.get_platform() == 'windows': + os.system("move " + name + ".exe " + bin_dir) + os.system("move " + name + ".pdb " + bin_dir) + os.system("move " + name + ".ilk " + bin_dir) + else: + os.system("mv " + name + " " + bin_dir) + return result + +def title(clean, release, x64): + print "*** %s making ... ***" % name + print "clean = ", clean + print "release = ", release + print "x64 = ", x64 + +def print_result(result): + if 0 == result: + print "*** %s make: OK ***" % name + else: + print "*** %s make: error %d" % (name, result) + +clean = 'clean' in sys.argv +release = 'release' in sys.argv +x64 = 'x64' in sys.argv + +bin_dir = fileop.get_bin_dir('../..', x64, release) +assert os.path.isdir(bin_dir) + +title(clean, release, x64) +result = make(clean, release, x64) +print_result(result) +sys.exit(result) + diff --git a/utils/x86disasm/x86disasm.sln b/utils/x86disasm/x86disasm.sln new file mode 100644 index 0000000..0ba0f4a --- /dev/null +++ b/utils/x86disasm/x86disasm.sln @@ -0,0 +1,26 @@ + +Microsoft Visual Studio Solution File, Format Version 11.00 +# Visual Studio 2010 +Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "x86disasm", "x86disasm.vcxproj", "{C4688533-8E09-4F65-A04D-39201C9B2D33}" +EndProject +Global + GlobalSection(SolutionConfigurationPlatforms) = preSolution + Debug|Win32 = Debug|Win32 + Debug|x64 = Debug|x64 + Release|Win32 = Release|Win32 + Release|x64 = Release|x64 + EndGlobalSection + GlobalSection(ProjectConfigurationPlatforms) = postSolution + {C4688533-8E09-4F65-A04D-39201C9B2D33}.Debug|Win32.ActiveCfg = Debug|Win32 + {C4688533-8E09-4F65-A04D-39201C9B2D33}.Debug|Win32.Build.0 = Debug|Win32 + {C4688533-8E09-4F65-A04D-39201C9B2D33}.Debug|x64.ActiveCfg = Debug|x64 + {C4688533-8E09-4F65-A04D-39201C9B2D33}.Debug|x64.Build.0 = Debug|x64 + {C4688533-8E09-4F65-A04D-39201C9B2D33}.Release|Win32.ActiveCfg = Release|Win32 + {C4688533-8E09-4F65-A04D-39201C9B2D33}.Release|Win32.Build.0 = Release|Win32 + {C4688533-8E09-4F65-A04D-39201C9B2D33}.Release|x64.ActiveCfg = Release|x64 + {C4688533-8E09-4F65-A04D-39201C9B2D33}.Release|x64.Build.0 = Release|x64 + EndGlobalSection + GlobalSection(SolutionProperties) = preSolution + HideSolutionNode = FALSE + EndGlobalSection +EndGlobal diff --git a/utils/x86disasm/x86disasm.vcxproj b/utils/x86disasm/x86disasm.vcxproj new file mode 100644 index 0000000..fc1cdb6 --- /dev/null +++ b/utils/x86disasm/x86disasm.vcxproj @@ -0,0 +1,169 @@ + + + + + Debug + Win32 + + + Debug + x64 + + + Release + Win32 + + + Release + x64 + + + + {C4688533-8E09-4F65-A04D-39201C9B2D33} + Win32Proj + x86disasm + + + + Application + true + Unicode + + + Application + true + Unicode + + + Application + false + true + Unicode + + + Application + false + true + Unicode + + + + + + + + + + + + + + + + + + + true + $(ProjectDir)../../bin/i386/$(Configuration) + + + true + $(ProjectDir)../../bin/amd64/$(Configuration) + + + false + $(ProjectDir)../../bin/i386/$(Configuration) + + + false + $(ProjectDir)../../bin/amd64/$(Configuration) + + + + + + Level3 + Disabled + WIN32;_DEBUG;_CONSOLE;_CRT_SECURE_NO_WARNINGS;%(PreprocessorDefinitions) + $(ProjectDir)../../third-party/libudis86;%(AdditionalIncludeDirectories) + + + Console + true + + + + + + + Level3 + Disabled + WIN32;_DEBUG;_CONSOLE;_CRT_SECURE_NO_WARNINGS;%(PreprocessorDefinitions) + $(ProjectDir)../../third-party/libudis86;%(AdditionalIncludeDirectories) + + + Console + true + + + + + Level3 + + + MaxSpeed + true + true + WIN32;NDEBUG;_CONSOLE;_CRT_SECURE_NO_WARNINGS;%(PreprocessorDefinitions) + $(ProjectDir)../../third-party/libudis86 + + + Console + true + true + true + + + + + Level3 + + + MaxSpeed + true + true + WIN32;NDEBUG;_CONSOLE;_CRT_SECURE_NO_WARNINGS;%(PreprocessorDefinitions) + $(ProjectDir)../../third-party/libudis86 + + + Console + true + true + true + + + + + + + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/utils/x86disasm/x86disasm.vcxproj.filters b/utils/x86disasm/x86disasm.vcxproj.filters new file mode 100644 index 0000000..1f99f34 --- /dev/null +++ b/utils/x86disasm/x86disasm.vcxproj.filters @@ -0,0 +1,75 @@ + + + + + {4FC737F1-C7A5-4376-A066-2A32D752A2FF} + cpp;c;cc;cxx;def;odl;idl;hpj;bat;asm;asmx + + + {93995380-89BD-4b04-88EB-625FBE52EBFB} + h;hpp;hxx;hm;inl;inc;xsd + + + {67DA6AB6-F800-4c08-8B7A-83BB121AAD01} + rc;ico;cur;bmp;dlg;rc2;rct;bin;rgs;gif;jpg;jpeg;jpe;resx;tiff;tif;png;wav;mfcribbon-ms + + + {0fa6ebf3-658c-4c86-89c7-425c16cf7ba6} + + + + + Source Files + + + Source Files + + + libudis86 + + + libudis86 + + + libudis86 + + + libudis86 + + + libudis86 + + + libudis86 + + + libudis86 + + + + + Source Files + + + libudis86 + + + libudis86 + + + libudis86 + + + libudis86 + + + libudis86 + + + libudis86 + + + libudis86 + + + \ No newline at end of file -- cgit v1.2.3