import idaapi import ida_bytes import ida_enum import ida_name import ida_nalt import ida_funcs import ida_xref import ida_typeinf import ida_struct import ida_funcs import json import yaml import os import collections import datetime import itertools idaapi.require('ii') # to simplify development, we want to reload my modules if they change import ii def msg(text): print(f'{datetime.datetime.now()} {text}') # json, unlike yml, always stores keys as strings - but we use int keys in some cases def as_int(k): return int(k) if isinstance(k, str) else k def has_custom_name(ea): name = ida_name.get_ea_name(ea) if not ida_name.is_uname(name): return False # it's an officially IDA dummy name (see https://hex-rays.com/blog/igors-tip-of-the-week-34-dummy-names/) if name.startswith('nullsub_'): return False # nullsub_ is not considered a dummy, but it's autogenerated nonetheless if name.startswith('staticinit_'): return False # staticinit_ is our custom prefix which is functionally a dummy if len(name) >= 2 and name[0] == 'a': byte = ida_bytes.get_byte(ea) if byte < 0x80 and name[1] == chr(byte).upper(): return False # string constant: foo => aFoo return True renamed_eas = {} def rename_ea(ea, name): if ea not in renamed_eas: dupEA = ii.find_global_name_ea(name) if dupEA == ea: renamed_eas[ea] = {} # already has same name, probably from previous run elif dupEA != idaapi.BADADDR: msg(f'Skipping rename for {hex(ea)}: same name {name} is already used by {hex(dupEA)}') ii.add_comment_ea_auto(ea, f'duplicate name: {name}') else: if has_custom_name(ea): ii.add_comment_ea_auto(ea, f'original name: {ida_name.get_ea_name(ea)}') if ida_name.set_name(ea, name) == 0: # no need to write a message, we get a messagebox anyway... ii.add_comment_ea_auto(ea, f'rename failed: {name}') else: renamed_eas[ea] = {} # placeholder value elif ida_name.get_ea_name(ea) != name: #msg(f'Skipping rename for existing name: {hex(ea)} {ida_name.get_ea_name(ea)} -> {name}') ii.add_comment_ea_auto(ea, f'alt name: {name}') def add_sig_comment(ea, sig): if sig: ii.add_comment_ea_auto(ea, f'signature: {sig["sig"]} +{sig["sigOffset"]}') # ensure specified ea is a function start; returns success (either function already existed or was created) def ensure_function_at(ea, message): if ida_funcs.get_func(ea): return True added = ida_funcs.add_func(ea) if message: msg(f'Created new {message} at {hex(ea)}' if added else f'Failed to create {message} at {hex(ea)}') return added def calc_vtable_length(ea): # assume that length is from start until next name end = ea + 8 while ida_name.get_ea_name(end) == '' and ii.ea_has_data_offset(end): end += 8 return (end - ea) >> 3 applied_function_types = {} def apply_function_type(ea, tinfo): func = ida_funcs.get_func(ea) if not func: raise Exception(f'Function not defined') if ea in applied_function_types: ii.add_comment_func(func, f'alt sig: {tinfo}') return existing = ida_typeinf.print_type(ea, 0) if existing: #msg(f'Skipping function type assignment @ {hex(ea)}: {existing} -> {tinfo}') ii.add_comment_func(func, f'original sig: {existing}') if not ida_typeinf.apply_tinfo(ea, tinfo, 0): raise Exception(f'Apply failed') applied_function_types[ea] = {} def populate_static_initializers(): msg('*** Populating static initializers ***') def find_main_iniiterm(): eaInitterm = ii.find_global_name_ea('_initterm') if eaInitterm == idaapi.BADADDR: raise Exception('Failed to find _initterm address') # there are several _initterm calls, interesting one is from main, others are purely framework ones mainXrefs = [xref for xref in ii.enumerate_xrefs_to(eaInitterm) if ida_funcs.get_func_name(xref).startswith('?__scrt_common_main')] if len(mainXrefs) != 1: raise Exception(f'Found {len(mainXrefs)} calls to _initterm from main, 1 expected') return mainXrefs[0] def parse_initterm_arguments(callEA): # assume arguments are of the form 'lea rcx/rdx, addr' args = ii.get_call_argument_assignment_eas(callEA) if len(args) != 2: raise Exception(f'Unexpected args for _initterm call: 2 expected, got {len(args)}') start = ii.get_instruction_operand_immediate(args[0], 1) end = ii.get_instruction_operand_immediate(args[1], 1) # both start and end have 0's, between them have function pointers if start + 8 * calc_vtable_length(start) != end: raise Exception('Unexpected _initterm table contents') return (start, end) try: eaInittermCall = find_main_iniiterm() start, end = parse_initterm_arguments(eaInittermCall) for ea in range(start + 8, end, 8): func = ida_bytes.get_qword(ea) ensure_function_at(func, "static initializer") if not has_custom_name(func): ida_name.set_name(func, f'staticinit_{(ea - start) >> 3}') except Exception as e: msg(f'Static initializer error: {e}') def populate_global_names(data): msg('** Populating exported global names **') for k, g in data['globals'].items(): ea = as_int(k) for n in g['names']: rename_ea(ea, n) add_sig_comment(ea, g['address']) def populate_function_names(data): msg('** Populating exported function names **') for k, f in data['functions'].items(): ea = as_int(k) ensure_function_at(ea, "function") # if function is not referenced from anywhere, define one manually for n in f['names']: rename_ea(ea, n) add_sig_comment(ea, f['address']) def populate_vtable_names(data): msg('** Populating exported vtable names **') for name, s in data['structs'].items(): vtable = s['vTable'] if not vtable: continue # not interested in classes without vtables primaryEA = vtable['ea'] if primaryEA != 0: rename_ea(primaryEA, f'vtbl_{name}') add_sig_comment(primaryEA, vtable['address']) for v in vtable['secondary']: rename_ea(v['ea'], f'vtbl_{v["derived"]}___{name}') # TODO: calculate masks for bitfield values properly def populate_enums(data): msg('** Populating exported enums **') def populate_enum(name, isBitfield, isSigned, width, values): if ida_enum.get_enum(name) != idaapi.BADADDR: raise Exception(f'{name} already exists in database') eid = ii.add_enum(name, isBitfield, isSigned, width) if (eid == idaapi.BADADDR): raise Exception(f'Failed to create {name}') for val in values: en = val['name'] ev = val['value'] qn = f'{name}.{en}' # enum names in ida are global, so qualify them res = ida_enum.add_enum_member(eid, qn, ev, ev if isBitfield else -1) if res != 0: msg(f'Failed to add enum member {name}.{en} = {ev}: {res}') ii.add_comment_enum(eid, f'could not add field {en} = {ev}') with ii.mass_type_updater(ida_typeinf.UTP_ENUM): for name, e in data['enums'].items(): try: populate_enum(name, e['isBitfield'], e['isSigned'], e['width'], e['values']) except Exception as e: msg(f'Enum error: {e}') def populate_vtables(data): msg('** Populating exported vtables **') # for each vtable, we determine its size, create the structure, add crossrefs from structure to instances, and rename functions def common_vtable_length(vtbl): primaryEA = vtbl['ea'] primaryLen = calc_vtable_length(primaryEA) if primaryEA != 0 else 0 vlen = primaryLen for sec in vtbl['secondary']: secEA = sec['ea'] secLen = calc_vtable_length(secEA) if vlen == 0: vlen = secLen if primaryLen != 0 and primaryLen != secLen: msg(f'Mismatch between vtable sizes at {hex(primaryEA)} ({primaryLen}) and {hex(secEA)} ({secLen})') if vlen == 0 or vlen > secLen: vlen = secLen return vlen def calc_vf_names(vtable, vfuncs, idx): names = vfuncs[idx]['names'] if idx in vfuncs else [] if len(names) > 0: if not ida_struct.get_member_by_name(vtable, names[0]): return names # all good, use custom names else: msg(f'Duplicate vtable field {ida_struct.get_struc_name(vtable.id)}.{names[0]}, using fallback for {idx}') names.insert(0, f'vf{idx}') return names def create_vtable_instance(vtable, numVFs, ea, prefix, signatures): # note: i feel that creating vtable global is, while correct, makes viewing it slightly worse (not seeing vf offsets etc) # but at very least create custom xref (so that find-refs on vtable struct works) # TODO: reconsider... ida_xref.add_dref(ea, vtable.id, ida_xref.XREF_USER | ida_xref.dr_I) for idx in range(0, numVFs): vfuncEA = ida_bytes.get_qword(ea + idx * 8) vfuncName = ida_name.get_ea_name(vfuncEA) if vfuncName == "_purecall": continue # abstract virtual function inner = ida_struct.get_innermost_member(vtable, idx * 8) if not inner: msg(f'Failed to find field for vfunc {idx} of {ida_struct.get_struc_name(vtable.id)}') continue leafName = ida_struct.get_member_name(inner[0].id) if f'.{leafName}' in vfuncName: continue # this function is probably not overridden rename_ea(vfuncEA, f'{prefix}.{leafName}') if signatures and idx in signatures: add_sig_comment(vfuncEA, signatures[idx]['address']) def populate_vtable(cname, vtbl): vlen = common_vtable_length(vtbl) if vlen == 0: return # don't bother creating a vtable if there are no instances # create structure vtable = ii.add_struct(f'{cname}_vtbl') if not vtable: raise Exception(f'Failed to create vtable structure for {cname}') # add base, if any primaryBase = vtbl['base'] if primaryBase and not ii.add_struct_baseclass(vtable, f'{primaryBase}_vtbl'): msg(f'Failed to add base for vtable for {cname}') # check that all custom vfuncs are in range firstNewVF = ida_struct.get_struc_size(vtable) >> 3 vfuncs = {} # this always has ints as keys for k, vf in vtbl['vFuncs'].items(): idx = as_int(k) if idx < firstNewVF: msg(f'Class {cname} overrides vfunc {idx} inherited from base {primaryBase}') elif idx >= vlen: msg(f'Class {cname} defines vfunc {idx} which is outside bounds ({vlen})') else: vfuncs[idx] = vf # add fields for idx in range(firstNewVF, vlen): names = calc_vf_names(vtable, vfuncs, idx) if not ii.add_struct_member_ptr(vtable, idx << 3, names[0]): msg(f'Failed to add vfunc {idx} to vtable {cname}') else: for n in names[1:]: ii.add_comment_member(ida_struct.get_member(vtable, idx << 3), f'alt name: {n}') #ida_struct.save_struc(vtable) # process vtable instances primaryEA = vtbl['ea'] if primaryEA != 0: create_vtable_instance(vtable, vlen, primaryEA, cname, vfuncs) for sec in vtbl['secondary']: create_vtable_instance(vtable, vlen, sec['ea'], f'{sec["derived"]}___{cname}', None) # this assumes that structures are ordered correctly, so vtable of the base is always created before vtable of derived with ii.mass_type_updater(ida_typeinf.UTP_STRUCT): for name, s in data['structs'].items(): vtable = s['vTable'] if not vtable: continue try: populate_vtable(name, vtable) except Exception as e: msg(f'Create vtable error: {e}') def populate_structs(data): msg('** Populating exported structs **') def add_base(s, type, offset, size): curSize = ida_struct.get_struc_size(s) if curSize != offset: # treat this as a warning... msg(f'Unexpected offset for {ida_struct.get_struc_name(s.id)} base {type}: expected {hex(offset)}, got {hex(curSize)}') if not ii.add_struct_baseclass(s, type): msg(f'Failed to add {ida_struct.get_struc_name(s.id)} base {type}') return if size != 0: actualSize = ida_struct.get_member_size(ida_struct.get_member(s, curSize)) if actualSize != size: msg(f'Unexpected size for {ida_struct.get_struc_name(s.id)} base {type}: expected {hex(size)}, got {hex(actualSize)}') def add_vptr(s, vtname): if not ii.add_struct_member_ptr(s, 0, "__vftable"): msg(f'Failed to add vtable pointer to {ida_struct.get_struc_name(s.id)}') elif not ii.set_struct_member_by_offset_type(s, 0, vtname + '*' if ida_struct.get_struc_id(vtname) != idaapi.BADADDR else 'void*'): msg(f'Failed to set vtable pointer type for {ida_struct.get_struc_name(s.id)} (vtbl-struct-id={hex(ida_struct.get_struc_id(vtname))})') def add_field(s, offset, fdata): names = fdata['names'] type = fdata['type'] arrLen = fdata['arrayLength'] if fdata['isStruct']: success = ii.add_struct_member_substruct(s, offset, names[0], type, arrLen if arrLen > 0 else 1) else: typeSuffix = f'[{arrLen}]' if arrLen > 0 else '' success = ii.add_struct_member_typed(s, offset, names[0], type + typeSuffix) if not success: msg(f'Failed to add field {ida_struct.get_struc_name(s.id)}.{names[0]}') return member = ida_struct.get_member(s, offset) for n in names[1:]: ii.add_comment_member(member, f'alt name: {n}') if not ida_struct.is_union(s.id): actualSize = ida_struct.get_member_size(member) expectedSize = fdata['size'] if actualSize != expectedSize: msg(f'Unexpected size for {ida_struct.get_struc_name(s.id)}.{names[0]}: expected {hex(expectedSize)}, got {hex(actualSize)}') # structure creation is done in two passes: we first create empty structs, they can then be used as a kind of 'forward declarations' for pointer types with ii.mass_type_updater(ida_typeinf.UTP_STRUCT): for name, s in data['structs'].items(): if not ii.add_struct(name, s['isUnion']): msg(f'Failed to create structure {name}') # note: we commit changes before second pass, to ensure that setting types works correctly with ii.mass_type_updater(ida_typeinf.UTP_STRUCT): for cname, cdata in data['structs'].items(): s = ii.get_struct_by_name(cname) if not s: continue isUnion = cdata['isUnion'] # start with bases (if any) for b in cdata['bases']: add_base(s, b['type'], b['offset'], b['size']) # now add primary vtable, if needed if ida_struct.get_struc_size(s) == 0 and cdata['vTable']: add_vptr(s, cname + '_vtbl') # now add fields for offset, fgroup in itertools.groupby(cdata['fields'], lambda f: f['offset']): if offset < ida_struct.get_struc_size(s): msg(f'Unexpected offset for {cname}+{hex(offset)}, current size if {ida_struct.get_struc_size(s)}') continue flist = [f for f in fgroup] # group can only be iterated over once if isUnion or len(flist) == 1: add_field(s, offset, flist[0]) else: uname = f'union{hex(offset)[2:]}' su = ii.add_struct(f'{cname}_{uname}', True) for f in flist: add_field(su, 0, f) ii.add_struct_member_substruct(s, offset, uname, f'{cname}_{uname}') # add tail, if structure is larger than last field expectedSize = cdata['size'] if expectedSize != 0: finalSize = ida_struct.get_struc_size(s) if finalSize > expectedSize: msg(f'Structure {cname} is too large: {hex(finalSize)} > {hex(expectedSize)}') elif finalSize < expectedSize: success = ii.add_struct_member_byte(s, 0, 'padding', expectedSize) if isUnion else ii.add_struct_member_byte(s, finalSize, f'field_{hex(finalSize)[2:]}', expectedSize - finalSize) if not success: msg(f'Failed to extend structure {cname}') def populate_global_types(data): msg('** Populating exported global types **') def process_global(ea, type, expectedSize): if not type: return 0 # nothing to do, type unknown tif = ii.parse_cdecl(type) if not tif: raise Exception(f'Failed to parse type {type}') actualSize = tif.get_size() if actualSize != expectedSize: msg(f'Mismatched global size {type} @ {hex(ea)}: expected {hex(expectedSize)}, got {hex(actualSize)}') for nameEA, name in ii.enumerate_names(): if nameEA > ea and nameEA < ea + actualSize: msg(f'Existing global {name} is now a part of global {type} @ {hex(ea)} at offset {hex(nameEA - ea)}') if not ida_typeinf.apply_tinfo(ea, tif, 0): raise Exception(f'Failed to apply {type} @ {hex(ea)}') return actualSize minEA = 0 for k, g in data['globals'].items(): ea = as_int(k) if ea < minEA: msg(f'Skipping global {g["type"]} {g["names"][0]} @ {hex(ea)}, since it is a part of another global') continue # this global was already consumed by another global minEA = ea try: minEA += process_global(ea, g['type'], g['size']) except Exception as e: msg(f'Global type error: {e}') def populate_function_types(data): msg('** Populating exported function types **') for k, f in data['functions'].items(): ea = as_int(k) sig = f['type'] if not sig: continue tif = ii.parse_cdecl(sig.replace('^', '__fastcall func')) try: apply_function_type(ea, tif) except Exception as e: msg(f'Failed to apply function type {tif} @ {hex(ea)}: {e}') def populate_vfunc_types(data): msg('** Populating exported virtual function types **') def update_vtable_fields(vtable, vfuncs): for k, vfunc in vfuncs.items(): sig = vfunc['type'] if not sig: continue idx = as_int(k) m = ida_struct.get_member(vtable, idx * 8) if not m or ida_struct.get_member_name(m.id) == 'baseclass_0': continue type = sig.replace('^', '(__fastcall *)') if not ii.set_struct_member_type(vtable, m, type): msg(f'Failed to set vtable {ida_struct.get_struc_name(vtable.id)} entry #{idx} type to {type}') def propagate_vfunc_type(eaRef, tinfo, cname, shift): vfuncEA = ida_bytes.get_qword(eaRef) vfuncName = ida_name.get_ea_name(vfuncEA) if vfuncName == "_purecall": return # abstract virtual function # replace 'this' pointer type with proper one try: fi = ida_typeinf.func_type_data_t() if not tinfo.get_func_details(fi): raise Exception('Failed to get func details') elif fi.size() == 0: raise Exception('Func has 0 args') elif fi[0].name != 'this': raise Exception(f'First arg is not this: {fi[0].name}') elif not fi[0].type.is_ptr(): raise Exception(f'First arg has unexpected type {fi[0].type}') if shift == 0: fi[0].type = ii.parse_cdecl(cname + '*') elif shift > 0: fi[0].type = ii.parse_cdecl(f'{fi[0].type} __shifted({cname}, {shift})') # else: failed to find base, keep base* tifAdj = ida_typeinf.tinfo_t() if not tifAdj.create_func(fi): raise Exception(f'Failed to build updated tinfo: {tifAdj}') apply_function_type(vfuncEA, tifAdj) except Exception as e: msg(f'Failed to apply virtual function type {tinfo} @ {hex(vfuncEA)}: {e}') def propagate_types_to_instances(cname, vdata, vtable): numVFs = ida_struct.get_struc_size(vtable) >> 3 for idx in range(numVFs): inner = ida_struct.get_innermost_member(vtable, idx * 8) itype = ii.get_struct_member_tinfo(inner[0]) if inner else None if not itype or not itype.is_funcptr(): continue itype = ida_typeinf.remove_pointer(itype) primaryEA = vdata['ea'] if primaryEA != 0: propagate_vfunc_type(primaryEA + idx * 8, itype, cname, 0) for sec in vdata['secondary']: propagate_vfunc_type(sec['ea'] + idx * 8, itype, sec['derived'], sec['offset']) for cname, cdata in data['structs'].items(): vdata = cdata['vTable'] vtable = ii.get_struct_by_name(f'{cname}_vtbl') if vdata else None if not vtable: continue update_vtable_fields(vtable, vdata['vFuncs']) propagate_types_to_instances(cname, vdata, vtable) def populate_exported(fileName, isYaml): msg(f'*** Populating exported items from {fileName} ***') # note: I found that parsing json is orders of magnitude faster than yaml :( with open(os.path.join(os.path.dirname(os.path.realpath(__file__)), fileName), 'r') as fd: data = yaml.safe_load(fd) if isYaml else json.load(fd) populate_global_names(data) populate_function_names(data) populate_vtable_names(data) populate_enums(data) populate_vtables(data) # vtable population kind-of requires populated global/vtable names (otherwise conceivably vtable can have no refs, which would make us determine size of preceeding vtable incorrectly) populate_structs(data) # structs require existence of vtable types populate_global_types(data) # this and below requires all types to be defined populate_function_types(data) populate_vfunc_types(data) breakpoint() populate_static_initializers() populate_exported('info.json', False) msg('*** Finished! ***')