diff options
author | Sanky <gsanky@gmail.com> | 2013-02-02 23:13:32 +0100 |
---|---|---|
committer | Sanky <gsanky@gmail.com> | 2013-02-02 23:20:50 +0100 |
commit | 4089cf486d83d78fddaab98316952e1bbeaafd47 (patch) | |
tree | f2721b81775a3e450e1a301a4679c9d6159c22b3 /romstr.py | |
parent | 43546fd07ac588be694c697873a642c5a2cc8c1d (diff) | |
parent | 1e893fae740cd9f82ead9adcb6f5c1ccb1c6090b (diff) |
Merge https://github.com/kanzure/pokecrystal
Conflicts:
constants.asm
extras/crystal.py
main.asm
original-commit-id: 7df002c3e20f1b728b1d29a877c3731d4867f502
Diffstat (limited to 'romstr.py')
-rw-r--r-- | romstr.py | 354 |
1 files changed, 21 insertions, 333 deletions
@@ -1,8 +1,21 @@ -import sys, os, time, datetime, json -from gbz80disasm import opt_table +# -*- coding: utf-8 -*- + +import sys +import os +import time +import datetime from ctypes import c_int8 -from copy import copy, deepcopy -from labels import get_label_from_line, get_address_from_line_comment +from copy import copy +import json + +# New versions of json don't have read anymore. +if not hasattr(json, "read"): + json.read = json.loads + +from labels import ( + get_label_from_line, + get_address_from_line_comment, +) relative_jumps = [0x38, 0x30, 0x20, 0x28, 0x18, 0xc3, 0xda, 0xc2, 0x32] relative_unconditional_jumps = [0xc3, 0x18] @@ -91,7 +104,7 @@ class RomStr(str): file_handler.close() # load the labels from the file - self.labels = json.loads(open(filename, "r").read()) + self.labels = json.read(open(filename, "r").read()) def get_address_for(self, label): """ Returns the address of a label. This is slow and could be improved @@ -137,7 +150,7 @@ class RomStr(str): that will be parsed, so that large patches of data aren't parsed as code. """ - if type(address) == str and "0x" in address: + if type(address) in [str, unicode] and "0x" in address: address = int(address, 16) start_address = address @@ -166,333 +179,8 @@ class RomStr(str): elif end_address != None and size == None: size = end_address - start_address - return DisAsm(start_address=start_address, end_address=end_address, size=size, max_size=max_size, debug=debug, rom=self) - -class DisAsm: - """ z80 disassembler - """ - - def __init__(self, start_address=None, end_address=None, size=None, max_size=0x4000, debug=True, rom=None): - assert start_address != None, "start_address must be given" - - if rom == None: - file_handler = open("../baserom.gbc", "r") - bytes = file_handler.read() - file_handler.close() - rom = RomStr(bytes) - - if debug not in [None, True, False]: - raise Exception, "debug param is invalid" - if debug == None: - debug = False - - # get end_address and size in sync with each other - if end_address == None and size != None: - end_address = start_address + size - elif end_address != None and size == None: - size = end_address - start_address - elif end_address != None and size != None: - size = max(end_address - start_address, size) - end_address = start_address + size - - # check that the bounds make sense - if end_address != None: - if end_address <= start_address: - raise Exception, "end_address is out of bounds" - elif (end_address - start_address) > max_size: - raise Exception, "end_address goes beyond max_size" - - # check more edge cases - if not start_address >= 0: - raise Exception, "start_address must be at least 0" - elif end_address != None and not end_address >= 0: - raise Exception, "end_address must be at least 0" - - self.rom = rom - self.start_address = start_address - self.end_address = end_address - self.size = size - self.max_size = max_size - self.debug = debug - - self.parse() - - def parse(self): - """ Disassembles stuff and things. - """ - - rom = self.rom - start_address = self.start_address - end_address = self.end_address - max_size = self.max_size - debug = self.debug - - bank_id = start_address / 0x4000 - - # [{"command": 0x20, "bytes": [0x20, 0x40, 0x50], - # "asm": "jp $5040", "label": "Unknown5040"}] - asm_commands = {} - - offset = start_address - - last_hl_address = None - last_a_address = None - used_3d97 = False - - keep_reading = True - - while (end_address != 0 and offset <= end_address) or keep_reading: - # read the current opcode byte - current_byte = ord(rom[offset]) - current_byte_number = len(asm_commands.keys()) - - # setup this next/upcoming command - if offset in asm_commands.keys(): - asm_command = asm_commands[offset] - else: - asm_command = {} - - asm_command["address"] = offset - - if not "references" in asm_command.keys(): - # This counts how many times relative jumps reference this - # byte. This is used to determine whether or not to print out a - # label later. - asm_command["references"] = 0 - - # some commands have two opcodes - next_byte = ord(rom[offset+1]) - - if self.debug: - print "offset: \t\t" + hex(offset) - print "current_byte: \t\t" + hex(current_byte) - print "next_byte: \t\t" + hex(next_byte) - - # all two-byte opcodes also have their first byte in there somewhere - if (current_byte in opt_table.keys()) or ((current_byte + (next_byte << 8)) in opt_table.keys()): - # this might be a two-byte opcode - possible_opcode = current_byte + (next_byte << 8) - - # check if this is a two-byte opcode - if possible_opcode in opt_table.keys(): - op_code = possible_opcode - else: - op_code = current_byte - - op = opt_table[op_code] - - opstr = op[0].lower() - optype = op[1] - - if self.debug: - print "opstr: " + opstr - - asm_command["type"] = "op" - asm_command["id"] = op_code - asm_command["format"] = opstr - asm_command["opnumberthing"] = optype - - opstr2 = None - base_opstr = copy(opstr) - - if "x" in opstr: - for x in range(0, opstr.count("x")): - insertion = ord(rom[offset + 1]) - - # Certain opcodes will have a local relative jump label - # here instead of a raw hex value, but this is - # controlled through asm output. - insertion = "$" + hex(insertion)[2:] - - opstr = opstr[:opstr.find("x")].lower() + insertion + opstr[opstr.find("x")+1:].lower() - - if op_code in relative_jumps: - target_address = offset + 2 + c_int8(ord(rom[offset + 1])).value - insertion = "asm_" + hex(target_address) - - if str(target_address) in self.rom.labels.keys(): - insertion = self.rom.labels[str(target_address)] - - opstr2 = base_opstr[:base_opstr.find("x")].lower() + insertion + base_opstr[base_opstr.find("x")+1:].lower() - asm_command["formatted_with_labels"] = opstr2 - asm_command["target_address"] = target_address - - current_byte_number += 1 - offset += 1 - - if "?" in opstr: - for y in range(0, opstr.count("?")): - byte1 = ord(rom[offset + 1]) - byte2 = ord(rom[offset + 2]) - - number = byte1 - number += byte2 << 8; - - # In most cases, you can use a label here. Labels will - # be shown during asm output. - insertion = "$%.4x" % (number) - - opstr = opstr[:opstr.find("?")].lower() + insertion + opstr[opstr.find("?")+1:].lower() - - # This version of the formatted string has labels. In - # the future, the actual labels should be parsed - # straight out of the "main.asm" file. - target_address = number % 0x4000 - insertion = "asm_" + hex(target_address) - - if str(target_address) in self.rom.labels.keys(): - insertion = self.rom.labels[str(target_address)] - - opstr2 = base_opstr[:base_opstr.find("?")].lower() + insertion + base_opstr[base_opstr.find("?")+1:].lower() - asm_command["formatted_with_labels"] = opstr2 - asm_command["target_address"] = target_address - - current_byte_number += 2 - offset += 2 - - # Check for relative jumps, construct the formatted asm line. - # Also set the usage of labels. - if current_byte in [0x18, 0x20] + relative_jumps: # jr or jr nz - # generate a label for the byte we're jumping to - target_address = offset + 1 + c_int8(ord(rom[offset])).value - - if target_address in asm_commands.keys(): - asm_commands[target_address]["references"] += 1 - remote_label = "asm_" + hex(target_address) - asm_commands[target_address]["current_label"] = remote_label - asm_command["remote_label"] = remote_label - - # Not sure how to set this, can't be True because an - # address referenced multiple times will use a label - # despite the label not necessarily being used in the - # output. The "use_remote_label" values should be - # calculated when rendering the asm output, based on - # which addresses and which op codes will be displayed - # (within the range). - asm_command["use_remote_label"] = "unknown" - else: - remote_label = "asm_" + hex(target_address) - - # This remote address might not be part of this - # function. - asm_commands[target_address] = { - "references": 1, - "current_label": remote_label, - "address": target_address, - } - # Also, target_address can be negative (before the - # start_address that the user originally requested), - # and it shouldn't be shown on asm output because the - # intermediate bytes (between a negative target_address - # and start_address) won't be disassembled. - - # Don't know yet if this remote address is part of this - # function or not. When the remote address is not part - # of this function, the label name should not be used, - # because that label will not be disassembled in the - # output, until the user asks it to. - asm_command["use_remote_label"] = "unknown" - asm_command["remote_label"] = remote_label - elif current_byte == 0x3e: - last_a_address = ord(rom[offset + 1]) - - # store the formatted string for the output later - asm_command["formatted"] = opstr - - if current_byte == 0x21: - last_hl_address = byte1 + (byte2 << 8) - - # this is leftover from pokered, might be meaningless - if current_byte == 0xcd: - if number == 0x3d97: - used_3d97 = True - - if current_byte == 0xc3 or current_byte in relative_unconditional_jumps: - if current_byte == 0xc3: - if number == 0x3d97: - used_3d97 = True - - # stop reading at a jump, relative jump or return - if current_byte in end_08_scripts_with: - is_data = False - - if not self.has_outstanding_labels(asm_commands, offset): - keep_reading = False - break - else: - keep_reading = True - else: - keep_reading = True - - else: - # This shouldn't really happen, and means that this area of the - # ROM probably doesn't represent instructions. - asm_command["type"] = "data" # db - asm_command["value"] = current_byte - keep_reading = False - - # save this new command in the list - asm_commands[asm_command["address"]] = asm_command - - # jump forward by a byte - offset += 1 - - # also save the last command if necessary - if len(asm_commands.keys()) > 0 and asm_commands[asm_commands.keys()[-1]] is not asm_command: - asm_commands[asm_command["address"]] = asm_command - - # store the set of commands on this object - self.asm_commands = asm_commands - - self.end_address = offset + 1 - self.last_address = self.end_address - - def has_outstanding_labels(self, asm_commands, offset): - """ Checks if there are any labels that haven't yet been created. - """ # is this really necessary?? - return False - - def used_addresses(self): - """ Returns a list of unique addresses that this function will probably - call. - """ - addresses = set() - - for (id, command) in self.asm_commands.items(): - if command.has_key("target_address") and command["id"] in call_commands: - addresses.add(command["target_address"]) - - return addresses - - def __str__(self): - """ ASM pretty printer. - """ - output = "" - - for (key, line) in self.asm_commands.items(): - # skip anything from before the beginning - if key < self.start_address: - continue - - # show a label - if line["references"] > 0 and "current_label" in line.keys(): - if line["address"] == self.start_address: - output += "thing: ; " + hex(line["address"]) + "\n" - else: - output += "." + line["current_label"] + "\@ ; " + hex(line["address"]) + "\n" - - # show the actual line - if line.has_key("formatted_with_labels"): - output += spacing + line["formatted_with_labels"] - elif line.has_key("formatted"): - output += spacing + line["formatted"] - #output += " ; to " + - output += "\n" - - # show the next address after this chunk - output += "; " + hex(self.end_address) - - return output + raise NotImplementedError("DisAsm was removed and never worked; hook up another disassembler please.") + #return DisAsm(start_address=start_address, end_address=end_address, size=size, max_size=max_size, debug=debug, rom=self) class AsmList(list): """ Simple wrapper to prevent all asm lines from being shown on screen. |