From 28008a746a31abb7909dd86cb0cd413ac8943b0b Mon Sep 17 00:00:00 2001 From: jmpoep Date: Thu, 7 Dec 2023 16:51:07 +0800 Subject: first commit --- utils/arm-thumb.py | 122 +++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 122 insertions(+) create mode 100644 utils/arm-thumb.py (limited to 'utils/arm-thumb.py') diff --git a/utils/arm-thumb.py b/utils/arm-thumb.py new file mode 100644 index 0000000..fed9386 --- /dev/null +++ b/utils/arm-thumb.py @@ -0,0 +1,122 @@ +from idc import * +import sys +import binascii +from sets import Set +import sys +import re + +p_spaces = re.compile(r'(\s+)') + +# Apply fixes to IDA opcode +def ida_disasm_fix(insn_binary, insn_str): + # Remove extra spaces and tabs. Replace tabs with spaces + insn_str = p_spaces.sub(r' ', insn_str) + return insn_str + +def get_insn(ea, sz): + s = '' + for i in range(0, sz): + s += chr(Byte(ea + i)) + return s + +def insn_write(f, insn_binary, insn_str, header): + assert len(insn_binary) != 0 + s = '' + sz = len(insn_binary) + if header: + s += '{%d, "' % sz + for i in range(0, sz): + s += '\\x%02x' % ord(insn_binary[i]) + s += '", "' + insn_str + '"},\n' + else: + s += binascii.hexlify(insn_binary) + s += (' %s\n' % insn_str) + f.write(s) + f.flush() + +# Normalize operand (replace numeric operands with -1) +def normalize_operand(op_type, op_str): + if op_type in [o_mem, o_displ, o_imm, o_near, o_far]: + return "-1" + else: + return op_str + +def is_unique(set, ea): + ot1 = GetOpType(ea, 0) + ot2 = GetOpType(ea, 1) + ot3 = GetOpType(ea, 2) + v1 = GetOpnd(ea, 0) + v2 = GetOpnd(ea, 1) + v3 = GetOpnd(ea, 2) + mnem = GetMnem(ea) + hashstr = "%s|%s|%s|%s" % (mnem, + normalize_operand(ot1, v1), + normalize_operand(ot2, v2), + normalize_operand(ot3, v3)) + if hashstr in set: + return False + else: + set.add(hashstr) + return True + +def iteration(f, set, ea, n, data, prev_mnem): + PatchDword(ea, data) + sz = MakeCode(ea) + if sz == 0: + return (prev_mnem, n) + str = GetDisasm(ea) + mnem = GetMnem(ea) + if prev_mnem != mnem: + # Opcode changed, purge cache set + set.clear() + prev_mnem = mnem + # Now we got disasm + # Remove comments + pos = str.find(';') + if pos != -1: + str = str[0:pos] + # Remove spaces at start and end + str = str.strip(' ') + if str == '': + return (prev_mnem, n) + if not is_unique(set, ea): + return (prev_mnem, n) + insn_binary = get_insn(ea, sz) + # Add unique disasms to file + str = ida_disasm_fix(insn_binary, str) + insn_write(f, insn_binary, str, False) + n += 1 + if n % 1000 == 0: + print '%d opcodes processed' % n + return (prev_mnem, n) + +def generate_arm_thumb(filename): + set = Set() + ea = GetEntryPoint(GetEntryOrdinal(0)) + for i in range(0, 20): + PatchByte(ea + i, i) + f = open(filename, 'wt') + data = 0L + mnem = '' + data = 0 + n = 0 + while data <= 0xffff: + prefix = data >> (32 - 5) + if prefix in [0x1d, 0x1e, 0x1f]: + data2 = 0 + while data2 <= 0xffff: + (mnem, n) = iteration(f, set, ea, n, data | (data2 << 16), mnem) + data2 += 1 + else: + mnem = iteration(f, set, ea, n, data | 0xffff0000, mnem) + data += 1 + f.close() + print 'Finished' + +def main(): + generate_arm_thumb("./thumb-opcodes.txt") + +if __name__ == "__main__": + main() + + -- cgit v1.2.3