aboutsummaryrefslogtreecommitdiff
path: root/utils/arm-32bit.py
blob: 3be04be2a312c2c24cfe7c135746c0214a8cabfd (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
from idc import *
import sys
import binascii
from sets import Set
import sys
import re

p_spaces = re.compile(r'(\s+)')

# Apply fixes to IDA opcode
def ida_disasm_fix(insn_binary, insn_str):
	# Remove extra spaces and tabs. Replace tabs with spaces
	insn_str = p_spaces.sub(r' ', insn_str)
	return insn_str

def get_insn(ea, sz):
	s = ''
	for i in range(0, sz):
		s += chr(Byte(ea + i))
	return s

def insn_write(f, insn_binary, insn_str, header):
	assert len(insn_binary) != 0
	s = ''
	sz = len(insn_binary)
	if header:
		s += '{%d, "' % sz
		for i in range(0, sz):
			s += '\\x%02x' % ord(insn_binary[i])
		s += '", "' + insn_str + '"},\n'
	else:
		s += binascii.hexlify(insn_binary)
		s += (' %s\n' % insn_str)
	f.write(s)
	f.flush()

# Normalize operand (replace numeric operands with -1)
def normalize_operand(op_type, op_str):
	if op_type in [o_mem, o_displ, o_imm, o_near, o_far]:
		return "-1"
	else:
		return op_str

def is_unique(set, ea):
	ot1 = GetOpType(ea, 0)
	ot2 = GetOpType(ea, 1)
	ot3 = GetOpType(ea, 2)
	v1 = GetOpnd(ea, 0)
	v2 = GetOpnd(ea, 1)
	v3 = GetOpnd(ea, 2)
	mnem = GetMnem(ea)
	hashstr = "%s|%s|%s|%s" % (mnem, 
			normalize_operand(ot1, v1),
			normalize_operand(ot2, v2),
			normalize_operand(ot3, v3))
	if hashstr in set:
		return False
	else:
		set.add(hashstr)
		return True

def iteration(f, set, ea, n, data, prev_mnem):
	PatchDword(ea, data)
	sz = MakeCode(ea)
	if sz == 0:
		return (prev_mnem, n)
	str = GetDisasm(ea)
	mnem = GetMnem(ea)
#	if prev_mnem != mnem:
		# Opcode changed, purge cache set
#		set.clear()
	prev_mnem = mnem
	# Now we got disasm
	# Remove comments
	pos = str.find(';')
	if pos != -1:
		str = str[0:pos]
	# Remove spaces at start and end
	str = str.strip(' ')
	if str == '':
		return (prev_mnem, n)
	if not is_unique(set, ea):
		return (prev_mnem, n)
	insn_binary = get_insn(ea, sz)
	# Add unique disasms to file
	str = ida_disasm_fix(insn_binary, str)
	insn_write(f, insn_binary, str, False)
	n += 1
	if n % 10000 == 0:
		print '%d opcodes processed' % n
	return (prev_mnem, n)

def generate_arm32(filename):
	set = Set()
	ea = GetEntryPoint(GetEntryOrdinal(0))
	for i in range(0, 20):
		PatchByte(ea + i, i)
	f = open(filename, 'wt')
	n = 0
	data = 0L
	mnem = ''
	# Do not enumerate highest 4 bits.
	# Use only 0b1110 (AL) and 0b1111 (extended opcode encoding)
	while data <= 0x1fffffff:
		data2 = (data & 0xfffffff)
		if (data & 0x10000000) == 0:
			data2 |= 0xe0000000
		else:
			data2 |= 0xf0000000
		(mnem, n) = iteration(f, set, ea, n, data2, mnem)
		data += 1
	f.close()
	print 'Finished'

def main():
	generate_arm32("./arm32-opcodes.txt")

if __name__ == "__main__":
	main()