1
Fork 0
mirror of https://github.com/awgil/ffxiv_reverse.git synced 2025-04-22 23:07:44 +00:00
ffxiv_reverse/idbtoolkit/populate_idb.py
2023-05-08 23:23:04 +01:00

554 lines
21 KiB
Python

import idaapi
import ida_bytes
import ida_enum
import ida_name
import ida_nalt
import ida_funcs
import ida_xref
import ida_typeinf
import ida_struct
import ida_funcs
import yaml
import os
import collections
import datetime
import itertools
# not import to simplify development - we want to reload my modules if they change
idaapi.require('ii')
import ii
def msg(text):
print(f'{datetime.datetime.now()} {text}')
def has_custom_name(ea):
name = ida_name.get_ea_name(ea)
if not ida_name.is_uname(name):
return False # it's an officially IDA dummy name (see https://hex-rays.com/blog/igors-tip-of-the-week-34-dummy-names/)
if name.startswith('nullsub_'):
return False # nullsub_ is not considered a dummy, but it's autogenerated nonetheless
if name.startswith('staticinit_'):
return False # staticinit_ is our custom prefix which is functionally a dummy
if len(name) >= 2 and name[0] == 'a':
byte = ida_bytes.get_byte(ea)
if byte < 0x80 and name[1] == chr(byte).upper():
return False # string constant: foo => aFoo
return True
def set_custom_name(ea, name, sig):
if not has_custom_name(ea):
dupEA = ii.find_global_name_ea(name)
if dupEA != idaapi.BADADDR:
msg(f'Skipping rename for {hex(ea)}: same name {name} is already used by {hex(dupEA)}')
ii.add_comment_ea_auto(ea, f'duplicate name: {name}')
elif ida_name.set_name(ea, name) == 0:
# no need to write a message, we get a messagebox anyway...
ii.add_comment_ea_auto(ea, f'rename failed: {name}')
elif ida_name.get_ea_name(ea) != name:
msg(f'Skipping rename for existing name: {hex(ea)} {ida_name.get_ea_name(ea)} -> {name}')
ii.add_comment_ea_auto(ea, f'alt name: {name}')
if sig:
ii.add_comment_ea_auto(ea, f'signature: {sig["sig"]} +{sig["sigOffset"]}')
# ensure specified ea is a function start; returns success (either function already existed or was created)
def ensure_function_at(ea, message):
if ida_funcs.get_func(ea):
return True
added = ida_funcs.add_func(ea)
if message:
msg(f'Created new {message} at {hex(ea)}' if added else f'Failed to create {message} at {hex(ea)}')
return added
def calc_vtable_length(ea):
# assume that length is from start until next name
end = ea + 8
while ida_name.get_ea_name(end) == '' and ii.ea_has_data_offset(end):
end += 8
return (end - ea) >> 3
def convert_exported_sig(sig, name = None):
args = ', '.join(f'{a["type"]} {a["name"]}' for a in sig['arguments'])
return f'{sig["retType"]} {name if name else "func"}({args})'
def apply_function_type(ea, tinfo):
func = ida_funcs.get_func(ea)
if not func:
raise Exception(f'Function not defined')
existing = ida_typeinf.print_type(ea, 0)
if existing:
msg(f'Skipping function type assignment @ {hex(ea)}: {existing} -> {tinfo}')
ii.add_comment_func(func, f'alt sig: {tinfo}')
elif not ida_typeinf.apply_tinfo(ea, tinfo, 0):
raise Exception(f'Apply failed')
# return offset of the base of specified type, or None if not found
def find_base_offset(structId, baseId):
s = ida_struct.get_struc(structId)
if not s or baseId == idaapi.BADADDR:
return None
offset = 0
while True:
m = ida_struct.get_member(s, offset)
if not m or ida_struct.get_member_name(m.id) != f'baseclass_{offset}':
return None
op = ida_nalt.opinfo_t()
ida_struct.retrieve_member_info(op, m)
nestedOffset = 0 if op.tid == baseId else find_base_offset(op.tid, baseId)
if nestedOffset != None:
return offset + nestedOffset
offset += ida_struct.get_member_size(m)
def populate_static_initializers():
msg('*** Populating static initializers ***')
def find_main_iniiterm():
eaInitterm = ii.find_global_name_ea('_initterm')
if eaInitterm == idaapi.BADADDR:
raise Exception('Failed to find _initterm address')
# there are several _initterm calls, interesting one is from main, others are purely framework ones
mainXrefs = [xref for xref in ii.enumerate_xrefs_to(eaInitterm) if ida_funcs.get_func_name(xref).startswith('?__scrt_common_main')]
if len(mainXrefs) != 1:
raise Exception(f'Found {len(mainXrefs)} calls to _initterm from main, 1 expected')
return mainXrefs[0]
def parse_initterm_arguments(callEA):
# assume arguments are of the form 'lea rcx/rdx, addr'
args = ii.get_call_argument_assignment_eas(callEA)
if len(args) != 2:
raise Exception(f'Unexpected args for _initterm call: 2 expected, got {len(args)}')
start = ii.get_instruction_operand_immediate(args[0], 1)
end = ii.get_instruction_operand_immediate(args[1], 1)
# both start and end have 0's, between them have function pointers
if start + 8 * calc_vtable_length(start) != end:
raise Exception('Unexpected _initterm table contents')
return (start, end)
try:
eaInittermCall = find_main_iniiterm()
start, end = parse_initterm_arguments(eaInittermCall)
for ea in range(start + 8, end, 8):
func = ida_bytes.get_qword(ea)
ensure_function_at(func, "static initializer")
if not has_custom_name(func):
ida_name.set_name(func, f'staticinit_{(ea - start) >> 3}')
except Exception as e:
msg(f'Static initializer error: {e}')
def populate_global_names(data):
msg('** Populating exported global names **')
for ea, g in data.items():
set_custom_name(ea, g['name'], g['address'])
def populate_function_names(data):
msg('** Populating exported function names **')
for ea, g in data.items():
ensure_function_at(ea, "function") # if function is not referenced from anywhere, define one manually
set_custom_name(ea, g['name'], g['address'])
def populate_enums(data):
msg('** Populating exported enums **')
def populate_enum(name, isBitfield, isSigned, width, values):
if ida_enum.get_enum(name) != idaapi.BADADDR:
raise Exception(f'{name} already exists in database')
eid = ii.add_enum(name, isBitfield, isSigned, width)
if (eid == idaapi.BADADDR):
raise Exception(f'Failed to create {name}')
for val in values:
en = val['name']
ev = val['value']
qn = f'{name}.{en}' # enum names in ida are global, so qualify them
res = ida_enum.add_enum_member(eid, qn, ev, ev if isBitfield else -1)
if res != 0:
msg(f'Failed to add enum member {name}.{en} = {ev}: {res}')
ii.add_comment_enum(eid, f'could not add field {en} = {ev}')
with ii.mass_type_updater(ida_typeinf.UTP_ENUM):
for name, e in data.items():
try:
populate_enum(name, e['isBitfield'], e['isSigned'], e['width'], e['values'])
except Exception as e:
msg(f'Enum error: {e}')
def populate_vtables(data):
# vtable population is done in several passes
Vtable = collections.namedtuple("VTable", "primaryEA secondaryEAs vFuncs base")
vtables = {} # base always ordered before derived; key = class name
# first pass: build an ordered set of classes with vtables (base before derived) and assign names for all known addresses
# note that we don't immediately calculate vtable size on the off chance that some of the vtables-to-be-renamed has no known xrefs
def pass1():
msg('** Populating exported vtables: pass 1 **')
def populate_vtable(cname):
if cname in vtables:
return # class already processed, since it's a base of some earlier-defined class
cdata = data[cname]
primary = cdata['primaryVTable']
secondary = cdata['secondaryVTables']
bases = cdata['bases']
# ensure we process all bases first - both direct and indirect (from secondary vtables) - we might not have correct inheritance chain set up
for b in bases:
populate_vtable(b['type'])
for v in secondary:
populate_vtable(v['base'])
# if primary base has vtable, this class should have one too
# TODO: this should be handled during generation
primaryBase = bases[0]['type'] if len(bases) > 0 else None
if primaryBase and primaryBase in vtables and not primary:
msg(f'Class {cname} has no primary vtable, but has base {bases[0]["type"]} with one')
primary = { 'ea': 0, 'address': None, 'vFuncs': [] }
if not primary:
return # skip, this class has no vtables
primaryEA = primary['ea']
if primaryEA != 0:
set_custom_name(primaryEA, f'vtbl_{cname}', primary['address'])
for v in secondary:
secEA = v['ea']
secBase = v['base']
set_custom_name(secEA, f'vtbl_{cname}___{secBase}', None)
if secBase in vtables:
vtables[secBase].secondaryEAs.append(secEA)
else:
msg(f'Indirect base {secBase} has no known vtables')
vtables[cname] = Vtable(primaryEA, [], primary['vFuncs'], primaryBase)
for name in data.keys():
populate_vtable(name)
# second pass: determine vtable sizes and create structures
def pass2():
msg('** Populating exported vtables: pass 2 **')
def common_vtable_length(primaryEA, secondaryEAs):
primaryLen = calc_vtable_length(primaryEA) if primaryEA != 0 else 0
vlen = primaryLen
for ea in secondaryEAs:
secLen = calc_vtable_length(ea)
if vlen == 0:
vlen = secLen
if primaryLen != 0 and primaryLen != secLen:
msg(f'Mismatch between vtable sizes at {hex(primaryEA)} ({primaryLen}) and {hex(ea)} ({secLen})')
if vlen == 0 or vlen > secLen:
vlen = secLen
return vlen
def calc_vf_name(vtable, vfuncs, idx):
if idx in vfuncs:
custom = vfuncs[idx]['name']
if not ida_struct.get_member_by_name(vtable, custom):
return custom
msg(f'Duplicate vtable field {ida_struct.get_struc_name(vtable.id)}.{custom}, using fallback for {idx}')
return f'vf{idx}'
def create_vtable(cname, vtbl):
vlen = common_vtable_length(vtbl.primaryEA, vtbl.secondaryEAs)
if vlen == 0:
return # don't bother creating a vtable if there are no instances
# create structure
vtable = ii.add_struct(f'{cname}_vtbl')
if not vtable:
raise Exception(f'Failed to create vtable structure for {cname}')
# add base, if any
if vtbl.base and not ii.add_struct_baseclass(vtable, f'{vtbl.base}_vtbl'):
msg(f'Failed to add base for vtable for {cname}')
# check that all custom vfuncs are in range
firstNewVF = ida_struct.get_struc_size(vtable) >> 3
for idx in vtbl.vFuncs.keys():
if idx < firstNewVF:
msg(f'Class {cname} overrides vfunc {idx} inherited from base {vtbl.base}')
elif idx >= vlen:
msg(f'Class {cname} defines vfunc {idx} which is outside bounds ({vlen})')
# add fields
for idx in range(firstNewVF, vlen):
name = calc_vf_name(vtable, vtbl.vFuncs, idx)
if not ii.add_struct_member_ptr(vtable, idx << 3, name):
msg(f'Failed to add vfunc {idx} to vtable {cname}')
#ida_struct.save_struc(vtable)
with ii.mass_type_updater(ida_typeinf.UTP_STRUCT):
for cname, vtbl in vtables.items():
try:
create_vtable(cname, vtbl)
except Exception as e:
msg(f'Create vtable error: {e}')
# third pass: rename functions, add crossrefs for vtable instances
# the only reason to split it into separate pass is to do it outside mass type update
def pass3():
msg('** Populating exported vtables: pass 3 **')
def create_vtable_instance(vtable, numVFs, ea, prefix, signatures):
# note: i feel that creating vtable global is, while correct, makes viewing it slightly worse (not seeing vf offsets etc)
# but at very least create custom xref (so that find-refs on vtable struct works)
# TODO: reconsider...
ida_xref.add_dref(ea, vtable.id, ida_xref.XREF_USER | ida_xref.dr_I)
for idx in range(0, numVFs):
vfuncEA = ida_bytes.get_qword(ea + idx * 8)
vfuncName = ida_name.get_ea_name(vfuncEA)
if vfuncName == "_purecall":
continue # abstract virtual function
inner = ida_struct.get_innermost_member(vtable, idx * 8)
if not inner:
msg(f'Failed to find field for vfunc {idx} of {cname}')
continue
leafName = ida_struct.get_member_name(inner[0].id)
if f'.{leafName}' in vfuncName:
continue # this function is probably not overridden
set_custom_name(vfuncEA, f'{prefix}.{leafName}', signatures[idx]['address'] if signatures and idx in signatures else None)
for cname, vtbl in vtables.items():
vtable = ii.get_struct_by_name(f'{cname}_vtbl')
numVFs = ida_struct.get_struc_size(vtable) >> 3
if vtbl.primaryEA != 0:
create_vtable_instance(vtable, numVFs, vtbl.primaryEA, cname, vtbl.vFuncs)
for ea in vtbl.secondaryEAs:
create_vtable_instance(vtable, numVFs, ea, ida_name.get_ea_name(ea)[5:], None) # remove vtbl_ prefix, leaving derived___cname
pass1()
pass2()
pass3()
return vtables
def populate_structs(data):
# structure creation is done in two passes
res = {} # base/substruct always ordered before referencing struct; key = class name
# first pass: build an ordered set of structures (base/subfield before containing structures) and create empty structs
# these empty structs can be used as a kind of 'forward declarations' for pointers
def pass1():
msg('** Populating exported structs: pass 1 **')
def populate_struct(cname):
if cname in res:
return # class already processed, since it's a base/substruct of some earlier-defined class
cdata = data[cname]
# ensure we process all bases and struct fields first
for b in cdata['bases']:
populate_struct(b['type'])
for f in cdata['fields']:
if f['isStruct']:
populate_struct(f['type'])
res[cname] = cdata # tbd
# add struct
s = ii.add_struct(cname)
if not s:
raise Exception(f'Failed to create structure {cname}')
with ii.mass_type_updater(ida_typeinf.UTP_STRUCT):
for name in data.keys():
try:
populate_struct(name)
except Exception as e:
msg(f'Struct create error: {e}')
# second pass: fill structure bases and fields
def pass2():
msg('** Populating exported structs: pass 2 **')
def add_base(s, type, offset, size):
curSize = ida_struct.get_struc_size(s)
if curSize != offset:
# treat this as a warning...
msg(f'Unexpected offset for {ida_struct.get_struc_name(s.id)} base {type}: expected {hex(offset)}, got {hex(curSize)}')
if not ii.add_struct_baseclass(s, type):
msg(f'Failed to add {ida_struct.get_struc_name(s.id)} base {type}')
return
actualSize = ida_struct.get_member_size(ida_struct.get_member(s, curSize))
if actualSize != size:
msg(f'Unexpected size for {ida_struct.get_struc_name(s.id)} base {type}: expected {hex(size)}, got {hex(actualSize)}')
def add_vptr(s, vtname):
if not ii.add_struct_member_ptr(s, 0, "__vftable"):
msg(f'Failed to add vtable pointer to {ida_struct.get_struc_name(s.id)}')
elif not ii.set_struct_member_by_offset_type(s, 0, vtname + '*' if ida_struct.get_struc_id(vtname) != idaapi.BADADDR else 'void*'):
msg(f'Failed to set vtable pointer type for {ida_struct.get_struc_name(s.id)} (vtbl-struct-id={hex(ida_struct.get_struc_id(vtname))})')
def add_field(s, offset, fdata, checkSize):
name = fdata['name']
type = fdata['type']
arrLen = fdata['arrayLength']
if fdata['isStruct']:
success = ii.add_struct_member_substruct(s, offset, name, type, arrLen if arrLen > 0 else 1)
else:
typeSuffix = f'[{arrLen}]' if arrLen > 0 else ''
success = ii.add_struct_member_typed(s, offset, name, type + typeSuffix)
if not success:
msg(f'Failed to add field {ida_struct.get_struc_name(s.id)}.{name}')
elif checkSize:
actualSize = ida_struct.get_member_size(ida_struct.get_member(s, offset))
expectedSize = fdata['size']
if actualSize != expectedSize:
msg(f'Unexpected size for {ida_struct.get_struc_name(s.id)}.{name}: expected {hex(expectedSize)}, got {hex(actualSize)}')
with ii.mass_type_updater(ida_typeinf.UTP_STRUCT):
for cname, cdata in res.items():
s = ii.get_struct_by_name(cname)
if not s:
continue
# start with bases (if any)
for b in cdata['bases']:
add_base(s, b['type'], b['offset'], b['size'])
# now add primary vtable, if needed
if ida_struct.get_struc_size(s) == 0 and cdata['primaryVTable']:
add_vptr(s, cname + '_vtbl')
# now add fields
for offset, fgroup in itertools.groupby(cdata['fields'], lambda f: f['offset']):
if offset < ida_struct.get_struc_size(s):
msg(f'Unexpected offset for {cname}+{hex(offset)}, current size if {ida_struct.get_struc_size(s)}')
continue
flist = [f for f in fgroup] # group can only be iterated over once
if len(flist) == 1:
add_field(s, offset, flist[0], True)
else:
uname = f'union{hex(offset)[2:]}'
su = ii.add_struct(f'{cname}_{uname}', True)
for f in flist:
add_field(su, 0, f, False)
ii.add_struct_member_substruct(s, offset, uname, f'{cname}_{uname}')
# add tail, if structure is larger than last field
finalSize = ida_struct.get_struc_size(s)
expectedSize = cdata['size']
if finalSize > expectedSize:
msg(f'Structure {cname} is too large: {hex(finalSize)} > {hex(expectedSize)}')
elif finalSize < expectedSize and not ii.add_struct_member_byte(s, finalSize, f'tail_{hex(finalSize)[2:]}', expectedSize - finalSize):
msg(f'Failed to extend structure {cname}')
pass1()
pass2()
def populate_global_types(data):
msg('** Populating exported global types **')
def process_global(ea, type, expectedSize):
if not type:
return 0 # nothing to do, type unknown
tif = ii.parse_cdecl(type)
if not tif:
raise Exception(f'Failed to parse type {type}')
actualSize = tif.get_size()
if actualSize != expectedSize:
msg(f'Mismatched global size {type} @ {hex(ea)}: expected {hex(expectedSize)}, got {hex(actualSize)}')
for nameEA, name in ii.enumerate_names():
if nameEA > ea and nameEA < ea + actualSize:
msg(f'Existing global {name} is now a part of global {type} @ {hex(ea)} at offset {hex(nameEA - ea)}')
if not ida_typeinf.apply_tinfo(ea, tif, 0):
msg(f'Failed to apply {type} @ {hex(ea)}')
return 0
return actualSize
minEA = 0
for ea, g in data.items():
if ea < minEA:
msg(f'Skipping global {g["type"]} {g["name"]} @ {hex(ea)}, since it is a part of another global')
continue # this global was already consumed by another global
minEA = ea
try:
minEA += process_global(ea, g['type'], g['size'])
except Exception as e:
msg(f'Global type error: {e}')
def populate_function_types(data):
msg('** Populating exported function types **')
for ea, f in data.items():
sig = f['signature']
if not sig:
continue
tif = ii.parse_cdecl(convert_exported_sig(sig))
try:
apply_function_type(ea, tif)
except Exception as e:
msg(f'Failed to apply function type {tif} @ {hex(ea)}: {e}')
def populate_vfunc_types(vtables):
msg('** Populating exported virtual function types **')
def update_vtable_fields(cname, vtable, vtbl):
for idx, vfunc in vtbl.vFuncs.items():
sig = vfunc['signature']
if not sig:
continue
m = ida_struct.get_member(vtable, idx * 8)
if not m or ida_struct.get_member_name(m.id) == 'baseclass_0':
continue
type = convert_exported_sig(sig, '(*)')
if not ii.set_struct_member_type(vtable, m, type):
msg(f'Failed to set vtable {cname} entry #{idx} type to {type}')
def propagate_vfunc_type(eaRef, tinfo, cname, shift):
vfuncEA = ida_bytes.get_qword(eaRef)
vfuncName = ida_name.get_ea_name(vfuncEA)
if vfuncName == "_purecall":
return # abstract virtual function
# replace 'this' pointer type with proper one
try:
fi = ida_typeinf.func_type_data_t()
if not tinfo.get_func_details(fi):
raise Exception('Failed to get func details')
elif fi.size() == 0:
raise Exception('Func has 0 args')
elif fi[0].name != 'this':
raise Exception(f'First arg is not this: {fi[0].name}')
elif not fi[0].type.is_ptr():
raise Exception(f'First arg has unexpected type {fi[0].type}')
if shift == 0:
fi[0].type = ii.parse_cdecl(cname + '*')
elif shift > 0:
fi[0].type = ii.parse_cdecl(f'{fi[0].type} __shifted({cname}, {shift})')
# else: failed to find base, keep base*
tifAdj = ida_typeinf.tinfo_t()
if not tifAdj.create_func(fi):
raise Exception(f'Failed to build updated tinfo: {tifAdj}')
apply_function_type(vfuncEA, tifAdj)
except Exception as e:
msg(f'Failed to apply virtual function type {tinfo} @ {hex(vfuncEA)}: {e}')
def propagate_types_to_instances(cname, vtable, vtbl):
numVFs = ida_struct.get_struc_size(vtable) >> 3
for idx in range(numVFs):
inner = ida_struct.get_innermost_member(vtable, idx * 8)
itype = ii.get_struct_member_tinfo(inner[0]) if inner else None
if not itype or not itype.is_funcptr():
continue
itype = ida_typeinf.remove_pointer(itype)
if vtbl.primaryEA != 0:
propagate_vfunc_type(vtbl.primaryEA + idx * 8, itype, cname, 0)
for ea in vtbl.secondaryEAs:
derivedName = ida_name.get_ea_name(ea)[5:-3-len(cname)] # remove vtbl_ prefix and ___cname suffix, leaving derived
shift = find_base_offset(ida_struct.get_struc_id(derivedName), ida_struct.get_struc_id(cname))
propagate_vfunc_type(ea + idx * 8, itype, derivedName, shift if shift != None else -1)
for cname, vtbl in vtables.items():
vtable = ii.get_struct_by_name(f'{cname}_vtbl')
if not vtable:
continue
update_vtable_fields(cname, vtable, vtbl)
propagate_types_to_instances(cname, vtable, vtbl)
def populate_exported(yamlName):
msg(f'*** Populating exported items from {yamlName} ***')
with open(os.path.join(os.path.dirname(os.path.realpath(__file__)), yamlName), 'r') as fd:
data = yaml.safe_load(fd)
populate_global_names(data['globals'])
populate_function_names(data['functions'])
populate_enums(data['enums'])
vtables = populate_vtables(data['structs'])
populate_structs(data['structs'])
populate_global_types(data['globals'])
populate_function_types(data['functions'])
populate_vfunc_types(vtables)
breakpoint()
populate_static_initializers()
populate_exported('info.yml')
msg('*** Finished! ***')