diff options
author | Bryan Bishop <kanzure@gmail.com> | 2012-06-21 03:37:13 -0500 |
---|---|---|
committer | Bryan Bishop <kanzure@gmail.com> | 2012-06-21 03:37:13 -0500 |
commit | 0507c67bc8a65f52936ba6f9aa01f4a89cf94695 (patch) | |
tree | ebbd7658da7602fa18a55e40fcfc5c810ac96bdc | |
parent | f18eff8cdaf3d1b3d546a36750095913bcb82cfa (diff) |
graph.py - parse pokecrystal into a function graph for d3.js
original-commit-id: 3359121ba732f702fa3dbbc06357e3b5085a9067
-rw-r--r-- | graph.py | 143 | ||||
-rw-r--r-- | romstr.py | 25 |
2 files changed, 163 insertions, 5 deletions
diff --git a/graph.py b/graph.py new file mode 100644 index 0000000..c5b3f40 --- /dev/null +++ b/graph.py @@ -0,0 +1,143 @@ +#!/usr/bin/python +# author: Bryan Bishop <kanzure@gmail.com> +# date: 2012-06-20 + +import networkx as nx + +from romstr import RomStr, DisAsm, \ + relative_jumps, call_commands, \ + relative_unconditional_jumps + +class RomGraph(nx.DiGraph): + """ Graphs various functions pointing to each other. + + TODO: Bank switches are nasty. They should be detected. Otherwise, + functions will point to non-functions within the same bank. Another way + to detect bankswitches is retroactively. By disassembling one function + after another within the function banks, it can be roughly assumed that + anything pointing to something else (within the same bank) is really + actually a bankswitch. An even better method to handle bankswitches + would be to just detect those situations in the asm (but I presently + forget how bankswitches are performed in pokecrystal). + """ + + # some areas shouldn't be parsed as asm + exclusions = [] + + # where is the first function located? + start_address = 0x150 + + # and where is a good place to stop? + end_address = 0x4000 * 0x01 # only do the first bank? sure.. + + # where is the rom stored? + rompath = "../baserom.gbc" + + def __init__(self, rom=None, **kwargs): + """ Loads and parses the ROM into a function graph. + """ + # continue the initialization + nx.DiGraph.__init__(self, **kwargs) + + # load the graph + if rom == None: + self.load_rom() + else: + self.rom = rom + + # start parsing the ROM + self.parse() + + def load_rom(self): + """ Creates a RomStr from rompath. + """ + file_handler = open(self.rompath, "r") + self.rom = RomStr(file_handler.read()) + file_handler.close() + + def parse(self): + """ Parses the ROM starting with the first function address. Each + function is disassembled and parsed to find where else it leads to. + """ + functions = {} + + address = self.start_address + + other_addresses = set() + + count = 0 + + while True: + if count > 100: + break + + if address < self.end_address and address not in functions.keys(): + # address is okay to parse at, keep going + pass + elif len(other_addresses) > 0: + # parse some other address possibly in a remote bank + address = other_addresses.pop() + else: + # no more addresses detected- exit loop + break + + # parse the asm + func = self.rom.to_asm(address) + + # store this parsed function + functions[address] = func + + # where does this function jump to? + used_addresses = set(func.used_addresses()) + + # add this information to the graph + for used_address in used_addresses: + # only add this remote address if it's not yet parsed + if used_address not in functions.keys(): + other_addresses.update([used_address]) + + # add this other address to the graph + self.add_node(used_address) + + # add this as an edge between the two nodes + self.add_edge(address, used_address) + + # setup the next function to be parsed + address = func.last_address + + count += 1 + + self.functions = functions + + def pretty_printer(self): + """ Shows some text output describing which nodes point to which other + nodes. + """ + print self.edges() + + def to_d3(self): + """ Exports to d3.js because we're gangster like that. + """ + import networkx.readwrite.json_graph as json_graph + content = json_graph.dumps(self) + fh = open("graphs.json", "w") + fh.write(content) + fh.close() + +class RedGraph(RomGraph): + """ Not implemented. Go away. + """ + + rompath = "../pokered-baserom.gbc" + +class CryGraph(RomGraph): + exclusions = [ + [0x000, 0x149], + ] + + rompath = "../baserom.gbc" + +if __name__ == "__main__": + crygraph = CryGraph() + crygraph.pretty_printer() + crygraph.to_d3() @@ -11,9 +11,9 @@ end_08_scripts_with = [ 0xe9, # jp hl 0xc9, # ret ] # possibly also: - # 0xc3, # jp + # 0xc3, # jp # 0xc18, # jr - # 0xda, 0xe9, 0xd2, 0xc2, 0xca, 0xc3, 0x38, 0x30, 0x20, 0x28, 0x18, 0xd8, + # 0xda, 0xe9, 0xd2, 0xc2, 0xca, 0x38, 0x30, 0x20, 0x28, 0x18, 0xd8, # 0xd0, 0xc0, 0xc8, 0xc9 spacing = "\t" @@ -124,7 +124,7 @@ class RomStr(str): that will be parsed, so that large patches of data aren't parsed as code. """ - if "0x" in address: + if type(address) == str and "0x" in address: address = int(address, 16) start_address = address @@ -302,6 +302,7 @@ class DisAsm: opstr2 = base_opstr[:base_opstr.find("x")].lower() + insertion + base_opstr[base_opstr.find("x")+1:].lower() asm_command["formatted_with_labels"] = opstr2 + asm_command["target_address"] = target_address current_byte_number += 1 offset += 1 @@ -331,6 +332,7 @@ class DisAsm: opstr2 = base_opstr[:base_opstr.find("?")].lower() + insertion + base_opstr[base_opstr.find("?")+1:].lower() asm_command["formatted_with_labels"] = opstr2 + asm_command["target_address"] = target_address current_byte_number += 2 offset += 2 @@ -423,19 +425,32 @@ class DisAsm: offset += 1 # also save the last command if necessary - if asm_commands[asm_commands.keys()[-1]] is not asm_command: + if len(asm_commands.keys()) > 0 and asm_commands[asm_commands.keys()[-1]] is not asm_command: asm_commands[asm_command["address"]] = asm_command # store the set of commands on this object self.asm_commands = asm_commands - self.end_address = offset + 1 + self.end_address = offset + 1 + self.last_address = self.end_address def has_outstanding_labels(self, asm_commands, offset): """ Checks if there are any labels that haven't yet been created. """ # is this really necessary?? return False + def used_addresses(self): + """ Returns a list of unique addresses that this function will probably + call. + """ + addresses = set() + + for (id, command) in self.asm_commands.items(): + if command.has_key("target_address"): + addresses.add(command["target_address"]) + + return addresses + def __str__(self): """ ASM pretty printer. """ |