import idaapi import ida_bytes import ida_enum import ida_name import ida_nalt import ida_funcs import ida_xref import ida_typeinf import ida_struct import ida_funcs import yaml import os import collections import datetime import itertools # not import to simplify development - we want to reload my modules if they change idaapi.require('ii') import ii def msg(text): print(f'{datetime.datetime.now()} {text}') def has_custom_name(ea): name = ida_name.get_ea_name(ea) if not ida_name.is_uname(name): return False # it's an officially IDA dummy name (see https://hex-rays.com/blog/igors-tip-of-the-week-34-dummy-names/) if name.startswith('nullsub_'): return False # nullsub_ is not considered a dummy, but it's autogenerated nonetheless if name.startswith('staticinit_'): return False # staticinit_ is our custom prefix which is functionally a dummy if len(name) >= 2 and name[0] == 'a': byte = ida_bytes.get_byte(ea) if byte < 0x80 and name[1] == chr(byte).upper(): return False # string constant: foo => aFoo return True def set_custom_name(ea, name, sig): if not has_custom_name(ea): dupEA = ii.find_global_name_ea(name) if dupEA != idaapi.BADADDR: msg(f'Skipping rename for {hex(ea)}: same name {name} is already used by {hex(dupEA)}') ii.add_comment_ea_auto(ea, f'duplicate name: {name}') elif ida_name.set_name(ea, name) == 0: # no need to write a message, we get a messagebox anyway... ii.add_comment_ea_auto(ea, f'rename failed: {name}') elif ida_name.get_ea_name(ea) != name: msg(f'Skipping rename for existing name: {hex(ea)} {ida_name.get_ea_name(ea)} -> {name}') ii.add_comment_ea_auto(ea, f'alt name: {name}') if sig: ii.add_comment_ea_auto(ea, f'signature: {sig["sig"]} +{sig["sigOffset"]}') # ensure specified ea is a function start; returns success (either function already existed or was created) def ensure_function_at(ea, message): if ida_funcs.get_func(ea): return True added = ida_funcs.add_func(ea) if message: msg(f'Created new {message} at {hex(ea)}' if added else f'Failed to create {message} at {hex(ea)}') return added def calc_vtable_length(ea): # assume that length is from start until next name end = ea + 8 while ida_name.get_ea_name(end) == '' and ii.ea_has_data_offset(end): end += 8 return (end - ea) >> 3 def convert_exported_sig(sig, name = None): args = ', '.join(f'{a["type"]} {a["name"]}' for a in sig['arguments']) return f'{sig["retType"]} {name if name else "func"}({args})' def apply_function_type(ea, tinfo): func = ida_funcs.get_func(ea) if not func: raise Exception(f'Function not defined') existing = ida_typeinf.print_type(ea, 0) if existing: msg(f'Skipping function type assignment @ {hex(ea)}: {existing} -> {tinfo}') ii.add_comment_func(func, f'alt sig: {tinfo}') elif not ida_typeinf.apply_tinfo(ea, tinfo, 0): raise Exception(f'Apply failed') # return offset of the base of specified type, or None if not found def find_base_offset(structId, baseId): s = ida_struct.get_struc(structId) if not s or baseId == idaapi.BADADDR: return None offset = 0 while True: m = ida_struct.get_member(s, offset) if not m or ida_struct.get_member_name(m.id) != f'baseclass_{offset}': return None op = ida_nalt.opinfo_t() ida_struct.retrieve_member_info(op, m) nestedOffset = 0 if op.tid == baseId else find_base_offset(op.tid, baseId) if nestedOffset != None: return offset + nestedOffset offset += ida_struct.get_member_size(m) def populate_static_initializers(): msg('*** Populating static initializers ***') def find_main_iniiterm(): eaInitterm = ii.find_global_name_ea('_initterm') if eaInitterm == idaapi.BADADDR: raise Exception('Failed to find _initterm address') # there are several _initterm calls, interesting one is from main, others are purely framework ones mainXrefs = [xref for xref in ii.enumerate_xrefs_to(eaInitterm) if ida_funcs.get_func_name(xref).startswith('?__scrt_common_main')] if len(mainXrefs) != 1: raise Exception(f'Found {len(mainXrefs)} calls to _initterm from main, 1 expected') return mainXrefs[0] def parse_initterm_arguments(callEA): # assume arguments are of the form 'lea rcx/rdx, addr' args = ii.get_call_argument_assignment_eas(callEA) if len(args) != 2: raise Exception(f'Unexpected args for _initterm call: 2 expected, got {len(args)}') start = ii.get_instruction_operand_immediate(args[0], 1) end = ii.get_instruction_operand_immediate(args[1], 1) # both start and end have 0's, between them have function pointers if start + 8 * calc_vtable_length(start) != end: raise Exception('Unexpected _initterm table contents') return (start, end) try: eaInittermCall = find_main_iniiterm() start, end = parse_initterm_arguments(eaInittermCall) for ea in range(start + 8, end, 8): func = ida_bytes.get_qword(ea) ensure_function_at(func, "static initializer") if not has_custom_name(func): ida_name.set_name(func, f'staticinit_{(ea - start) >> 3}') except Exception as e: msg(f'Static initializer error: {e}') def populate_global_names(data): msg('** Populating exported global names **') for ea, g in data.items(): set_custom_name(ea, g['name'], g['address']) def populate_function_names(data): msg('** Populating exported function names **') for ea, g in data.items(): ensure_function_at(ea, "function") # if function is not referenced from anywhere, define one manually set_custom_name(ea, g['name'], g['address']) def populate_enums(data): msg('** Populating exported enums **') def populate_enum(name, isBitfield, isSigned, width, values): if ida_enum.get_enum(name) != idaapi.BADADDR: raise Exception(f'{name} already exists in database') eid = ii.add_enum(name, isBitfield, isSigned, width) if (eid == idaapi.BADADDR): raise Exception(f'Failed to create {name}') for val in values: en = val['name'] ev = val['value'] qn = f'{name}.{en}' # enum names in ida are global, so qualify them res = ida_enum.add_enum_member(eid, qn, ev, ev if isBitfield else -1) if res != 0: msg(f'Failed to add enum member {name}.{en} = {ev}: {res}') ii.add_comment_enum(eid, f'could not add field {en} = {ev}') with ii.mass_type_updater(ida_typeinf.UTP_ENUM): for name, e in data.items(): try: populate_enum(name, e['isBitfield'], e['isSigned'], e['width'], e['values']) except Exception as e: msg(f'Enum error: {e}') def populate_vtables(data): # vtable population is done in several passes Vtable = collections.namedtuple("VTable", "primaryEA secondaryEAs vFuncs base") vtables = {} # base always ordered before derived; key = class name # first pass: build an ordered set of classes with vtables (base before derived) and assign names for all known addresses # note that we don't immediately calculate vtable size on the off chance that some of the vtables-to-be-renamed has no known xrefs def pass1(): msg('** Populating exported vtables: pass 1 **') def populate_vtable(cname): if cname in vtables: return # class already processed, since it's a base of some earlier-defined class cdata = data[cname] primary = cdata['primaryVTable'] secondary = cdata['secondaryVTables'] bases = cdata['bases'] # ensure we process all bases first - both direct and indirect (from secondary vtables) - we might not have correct inheritance chain set up for b in bases: populate_vtable(b['type']) for v in secondary: populate_vtable(v['base']) # if primary base has vtable, this class should have one too # TODO: this should be handled during generation primaryBase = bases[0]['type'] if len(bases) > 0 else None if primaryBase and primaryBase in vtables and not primary: msg(f'Class {cname} has no primary vtable, but has base {bases[0]["type"]} with one') primary = { 'ea': 0, 'address': None, 'vFuncs': [] } if not primary: return # skip, this class has no vtables primaryEA = primary['ea'] if primaryEA != 0: set_custom_name(primaryEA, f'vtbl_{cname}', primary['address']) for v in secondary: secEA = v['ea'] secBase = v['base'] set_custom_name(secEA, f'vtbl_{cname}___{secBase}', None) if secBase in vtables: vtables[secBase].secondaryEAs.append(secEA) else: msg(f'Indirect base {secBase} has no known vtables') vtables[cname] = Vtable(primaryEA, [], primary['vFuncs'], primaryBase) for name in data.keys(): populate_vtable(name) # second pass: determine vtable sizes and create structures def pass2(): msg('** Populating exported vtables: pass 2 **') def common_vtable_length(primaryEA, secondaryEAs): primaryLen = calc_vtable_length(primaryEA) if primaryEA != 0 else 0 vlen = primaryLen for ea in secondaryEAs: secLen = calc_vtable_length(ea) if vlen == 0: vlen = secLen if primaryLen != 0 and primaryLen != secLen: msg(f'Mismatch between vtable sizes at {hex(primaryEA)} ({primaryLen}) and {hex(ea)} ({secLen})') if vlen == 0 or vlen > secLen: vlen = secLen return vlen def calc_vf_name(vtable, vfuncs, idx): if idx in vfuncs: custom = vfuncs[idx]['name'] if not ida_struct.get_member_by_name(vtable, custom): return custom msg(f'Duplicate vtable field {ida_struct.get_struc_name(vtable.id)}.{custom}, using fallback for {idx}') return f'vf{idx}' def create_vtable(cname, vtbl): vlen = common_vtable_length(vtbl.primaryEA, vtbl.secondaryEAs) if vlen == 0: return # don't bother creating a vtable if there are no instances # create structure vtable = ii.add_struct(f'{cname}_vtbl') if not vtable: raise Exception(f'Failed to create vtable structure for {cname}') # add base, if any if vtbl.base and not ii.add_struct_baseclass(vtable, f'{vtbl.base}_vtbl'): msg(f'Failed to add base for vtable for {cname}') # check that all custom vfuncs are in range firstNewVF = ida_struct.get_struc_size(vtable) >> 3 for idx in vtbl.vFuncs.keys(): if idx < firstNewVF: msg(f'Class {cname} overrides vfunc {idx} inherited from base {vtbl.base}') elif idx >= vlen: msg(f'Class {cname} defines vfunc {idx} which is outside bounds ({vlen})') # add fields for idx in range(firstNewVF, vlen): name = calc_vf_name(vtable, vtbl.vFuncs, idx) if not ii.add_struct_member_ptr(vtable, idx << 3, name): msg(f'Failed to add vfunc {idx} to vtable {cname}') #ida_struct.save_struc(vtable) with ii.mass_type_updater(ida_typeinf.UTP_STRUCT): for cname, vtbl in vtables.items(): try: create_vtable(cname, vtbl) except Exception as e: msg(f'Create vtable error: {e}') # third pass: rename functions, add crossrefs for vtable instances # the only reason to split it into separate pass is to do it outside mass type update def pass3(): msg('** Populating exported vtables: pass 3 **') def create_vtable_instance(vtable, numVFs, ea, prefix, signatures): # note: i feel that creating vtable global is, while correct, makes viewing it slightly worse (not seeing vf offsets etc) # but at very least create custom xref (so that find-refs on vtable struct works) # TODO: reconsider... ida_xref.add_dref(ea, vtable.id, ida_xref.XREF_USER | ida_xref.dr_I) for idx in range(0, numVFs): vfuncEA = ida_bytes.get_qword(ea + idx * 8) vfuncName = ida_name.get_ea_name(vfuncEA) if vfuncName == "_purecall": continue # abstract virtual function inner = ida_struct.get_innermost_member(vtable, idx * 8) if not inner: msg(f'Failed to find field for vfunc {idx} of {cname}') continue leafName = ida_struct.get_member_name(inner[0].id) if f'.{leafName}' in vfuncName: continue # this function is probably not overridden set_custom_name(vfuncEA, f'{prefix}.{leafName}', signatures[idx]['address'] if signatures and idx in signatures else None) for cname, vtbl in vtables.items(): vtable = ii.get_struct_by_name(f'{cname}_vtbl') numVFs = ida_struct.get_struc_size(vtable) >> 3 if vtbl.primaryEA != 0: create_vtable_instance(vtable, numVFs, vtbl.primaryEA, cname, vtbl.vFuncs) for ea in vtbl.secondaryEAs: create_vtable_instance(vtable, numVFs, ea, ida_name.get_ea_name(ea)[5:], None) # remove vtbl_ prefix, leaving derived___cname pass1() pass2() pass3() return vtables def populate_structs(data): # structure creation is done in two passes res = {} # base/substruct always ordered before referencing struct; key = class name # first pass: build an ordered set of structures (base/subfield before containing structures) and create empty structs # these empty structs can be used as a kind of 'forward declarations' for pointers def pass1(): msg('** Populating exported structs: pass 1 **') def populate_struct(cname): if cname in res: return # class already processed, since it's a base/substruct of some earlier-defined class cdata = data[cname] # ensure we process all bases and struct fields first for b in cdata['bases']: populate_struct(b['type']) for f in cdata['fields']: if f['isStruct']: populate_struct(f['type']) res[cname] = cdata # tbd # add struct s = ii.add_struct(cname) if not s: raise Exception(f'Failed to create structure {cname}') with ii.mass_type_updater(ida_typeinf.UTP_STRUCT): for name in data.keys(): try: populate_struct(name) except Exception as e: msg(f'Struct create error: {e}') # second pass: fill structure bases and fields def pass2(): msg('** Populating exported structs: pass 2 **') def add_base(s, type, offset, size): curSize = ida_struct.get_struc_size(s) if curSize != offset: # treat this as a warning... msg(f'Unexpected offset for {ida_struct.get_struc_name(s.id)} base {type}: expected {hex(offset)}, got {hex(curSize)}') if not ii.add_struct_baseclass(s, type): msg(f'Failed to add {ida_struct.get_struc_name(s.id)} base {type}') return actualSize = ida_struct.get_member_size(ida_struct.get_member(s, curSize)) if actualSize != size: msg(f'Unexpected size for {ida_struct.get_struc_name(s.id)} base {type}: expected {hex(size)}, got {hex(actualSize)}') def add_vptr(s, vtname): if not ii.add_struct_member_ptr(s, 0, "__vftable"): msg(f'Failed to add vtable pointer to {ida_struct.get_struc_name(s.id)}') elif not ii.set_struct_member_by_offset_type(s, 0, vtname + '*' if ida_struct.get_struc_id(vtname) != idaapi.BADADDR else 'void*'): msg(f'Failed to set vtable pointer type for {ida_struct.get_struc_name(s.id)} (vtbl-struct-id={hex(ida_struct.get_struc_id(vtname))})') def add_field(s, offset, fdata, checkSize): name = fdata['name'] type = fdata['type'] arrLen = fdata['arrayLength'] if fdata['isStruct']: success = ii.add_struct_member_substruct(s, offset, name, type, arrLen if arrLen > 0 else 1) else: typeSuffix = f'[{arrLen}]' if arrLen > 0 else '' success = ii.add_struct_member_typed(s, offset, name, type + typeSuffix) if not success: msg(f'Failed to add field {ida_struct.get_struc_name(s.id)}.{name}') elif checkSize: actualSize = ida_struct.get_member_size(ida_struct.get_member(s, offset)) expectedSize = fdata['size'] if actualSize != expectedSize: msg(f'Unexpected size for {ida_struct.get_struc_name(s.id)}.{name}: expected {hex(expectedSize)}, got {hex(actualSize)}') with ii.mass_type_updater(ida_typeinf.UTP_STRUCT): for cname, cdata in res.items(): s = ii.get_struct_by_name(cname) if not s: continue # start with bases (if any) for b in cdata['bases']: add_base(s, b['type'], b['offset'], b['size']) # now add primary vtable, if needed if ida_struct.get_struc_size(s) == 0 and cdata['primaryVTable']: add_vptr(s, cname + '_vtbl') # now add fields for offset, fgroup in itertools.groupby(cdata['fields'], lambda f: f['offset']): if offset < ida_struct.get_struc_size(s): msg(f'Unexpected offset for {cname}+{hex(offset)}, current size if {ida_struct.get_struc_size(s)}') continue flist = [f for f in fgroup] # group can only be iterated over once if len(flist) == 1: add_field(s, offset, flist[0], True) else: uname = f'union{hex(offset)[2:]}' su = ii.add_struct(f'{cname}_{uname}', True) for f in flist: add_field(su, 0, f, False) ii.add_struct_member_substruct(s, offset, uname, f'{cname}_{uname}') # add tail, if structure is larger than last field finalSize = ida_struct.get_struc_size(s) expectedSize = cdata['size'] if finalSize > expectedSize: msg(f'Structure {cname} is too large: {hex(finalSize)} > {hex(expectedSize)}') elif finalSize < expectedSize and not ii.add_struct_member_byte(s, finalSize, f'tail_{hex(finalSize)[2:]}', expectedSize - finalSize): msg(f'Failed to extend structure {cname}') pass1() pass2() def populate_global_types(data): msg('** Populating exported global types **') def process_global(ea, type, expectedSize): if not type: return 0 # nothing to do, type unknown tif = ii.parse_cdecl(type) if not tif: raise Exception(f'Failed to parse type {type}') actualSize = tif.get_size() if actualSize != expectedSize: msg(f'Mismatched global size {type} @ {hex(ea)}: expected {hex(expectedSize)}, got {hex(actualSize)}') for nameEA, name in ii.enumerate_names(): if nameEA > ea and nameEA < ea + actualSize: msg(f'Existing global {name} is now a part of global {type} @ {hex(ea)} at offset {hex(nameEA - ea)}') if not ida_typeinf.apply_tinfo(ea, tif, 0): msg(f'Failed to apply {type} @ {hex(ea)}') return 0 return actualSize minEA = 0 for ea, g in data.items(): if ea < minEA: msg(f'Skipping global {g["type"]} {g["name"]} @ {hex(ea)}, since it is a part of another global') continue # this global was already consumed by another global minEA = ea try: minEA += process_global(ea, g['type'], g['size']) except Exception as e: msg(f'Global type error: {e}') def populate_function_types(data): msg('** Populating exported function types **') for ea, f in data.items(): sig = f['signature'] if not sig: continue tif = ii.parse_cdecl(convert_exported_sig(sig)) try: apply_function_type(ea, tif) except Exception as e: msg(f'Failed to apply function type {tif} @ {hex(ea)}: {e}') def populate_vfunc_types(vtables): msg('** Populating exported virtual function types **') def update_vtable_fields(cname, vtable, vtbl): for idx, vfunc in vtbl.vFuncs.items(): sig = vfunc['signature'] if not sig: continue m = ida_struct.get_member(vtable, idx * 8) if not m or ida_struct.get_member_name(m.id) == 'baseclass_0': continue type = convert_exported_sig(sig, '(*)') if not ii.set_struct_member_type(vtable, m, type): msg(f'Failed to set vtable {cname} entry #{idx} type to {type}') def propagate_vfunc_type(eaRef, tinfo, cname, shift): vfuncEA = ida_bytes.get_qword(eaRef) vfuncName = ida_name.get_ea_name(vfuncEA) if vfuncName == "_purecall": return # abstract virtual function # replace 'this' pointer type with proper one try: fi = ida_typeinf.func_type_data_t() if not tinfo.get_func_details(fi): raise Exception('Failed to get func details') elif fi.size() == 0: raise Exception('Func has 0 args') elif fi[0].name != 'this': raise Exception(f'First arg is not this: {fi[0].name}') elif not fi[0].type.is_ptr(): raise Exception(f'First arg has unexpected type {fi[0].type}') if shift == 0: fi[0].type = ii.parse_cdecl(cname + '*') elif shift > 0: fi[0].type = ii.parse_cdecl(f'{fi[0].type} __shifted({cname}, {shift})') # else: failed to find base, keep base* tifAdj = ida_typeinf.tinfo_t() if not tifAdj.create_func(fi): raise Exception(f'Failed to build updated tinfo: {tifAdj}') apply_function_type(vfuncEA, tifAdj) except Exception as e: msg(f'Failed to apply virtual function type {tinfo} @ {hex(vfuncEA)}: {e}') def propagate_types_to_instances(cname, vtable, vtbl): numVFs = ida_struct.get_struc_size(vtable) >> 3 for idx in range(numVFs): inner = ida_struct.get_innermost_member(vtable, idx * 8) itype = ii.get_struct_member_tinfo(inner[0]) if inner else None if not itype or not itype.is_funcptr(): continue itype = ida_typeinf.remove_pointer(itype) if vtbl.primaryEA != 0: propagate_vfunc_type(vtbl.primaryEA + idx * 8, itype, cname, 0) for ea in vtbl.secondaryEAs: derivedName = ida_name.get_ea_name(ea)[5:-3-len(cname)] # remove vtbl_ prefix and ___cname suffix, leaving derived shift = find_base_offset(ida_struct.get_struc_id(derivedName), ida_struct.get_struc_id(cname)) propagate_vfunc_type(ea + idx * 8, itype, derivedName, shift if shift != None else -1) for cname, vtbl in vtables.items(): vtable = ii.get_struct_by_name(f'{cname}_vtbl') if not vtable: continue update_vtable_fields(cname, vtable, vtbl) propagate_types_to_instances(cname, vtable, vtbl) def populate_exported(yamlName): msg(f'*** Populating exported items from {yamlName} ***') with open(os.path.join(os.path.dirname(os.path.realpath(__file__)), yamlName), 'r') as fd: data = yaml.safe_load(fd) populate_global_names(data['globals']) populate_function_names(data['functions']) populate_enums(data['enums']) vtables = populate_vtables(data['structs']) populate_structs(data['structs']) populate_global_types(data['globals']) populate_function_types(data['functions']) populate_vfunc_types(vtables) breakpoint() populate_static_initializers() populate_exported('info.yml') msg('*** Finished! ***')