diff options
| -rw-r--r-- | pokemontools/__init__.py | 1 | ||||
| -rw-r--r-- | pokemontools/config.py | 46 | ||||
| -rw-r--r-- | pokemontools/exceptions.py | 15 | ||||
| -rw-r--r-- | pokemontools/gbz80disasm.py | 650 | ||||
| -rw-r--r-- | pokemontools/labels.py | 30 | ||||
| -rw-r--r-- | pokemontools/preprocessor.py | 414 | ||||
| -rw-r--r-- | pokemontools/wram.py | 139 | 
7 files changed, 719 insertions, 576 deletions
| diff --git a/pokemontools/__init__.py b/pokemontools/__init__.py index 8fb8b19..09331af 100644 --- a/pokemontools/__init__.py +++ b/pokemontools/__init__.py @@ -1,2 +1,3 @@ +import config  import crystal  import preprocessor diff --git a/pokemontools/config.py b/pokemontools/config.py new file mode 100644 index 0000000..656fab0 --- /dev/null +++ b/pokemontools/config.py @@ -0,0 +1,46 @@ +""" +Configuration +""" + +import os + +import exceptions + +class Config(object): +    """ +    The Config class handles all configuration for pokemontools. Other classes +    and functions use a Config object to determine where expected files can be +    located. +    """ + +    def __init__(self, **kwargs): +        """ +        Store all parameters. +        """ +        self._config = {} + +        for (key, value) in kwargs.items(): +            if key not in self.__dict__: +                self._config[key] = value +            else: +                raise exceptions.ConfigException( +                    "Can't store \"{0}\" in configuration because the key conflicts with an existing property." +                    .format(key) +                ) + +        if "path" not in self._config: +            self._config["path"] = os.getcwd() + +    def __getattr__(self, key): +        """ +        Grab the value from the class properties, then check the configuration, +        and raise an exception if nothing works. +        """ +        if key in self.__dict__: +            return self.__dict__[key] +        elif key in self._config: +            return self._config[key] +        else: +            raise exceptions.ConfigException( +                "no config found for \"{0}\"".format(key) +            ) diff --git a/pokemontools/exceptions.py b/pokemontools/exceptions.py index 71d0da2..4de62eb 100644 --- a/pokemontools/exceptions.py +++ b/pokemontools/exceptions.py @@ -11,3 +11,18 @@ class TextScriptException(Exception):      """      TextScript encountered an inconsistency or problem.      """ + +class ConfigException(Exception): +    """ +    Configuration error. Maybe a missing config variable. +    """ + +class PreprocessorException(Exception): +    """ +    There was a problem in the preprocessor. +    """ + +class MacroException(PreprocessorException): +    """ +    There was a problem with a macro. +    """ diff --git a/pokemontools/gbz80disasm.py b/pokemontools/gbz80disasm.py index 7499982..790388e 100644 --- a/pokemontools/gbz80disasm.py +++ b/pokemontools/gbz80disasm.py @@ -1,4 +1,7 @@  # -*- coding: utf-8 -*- +""" +GBC disassembler +"""  import os  import sys @@ -7,22 +10,15 @@ from ctypes import c_int8  import random  import json -from wram import * +import config +import crystal +import labels +import wram  # New versions of json don't have read anymore.  if not hasattr(json, "read"):      json.read = json.loads -def load_rom(filename="../baserom.gbc"): -    """ -    Load the specified rom. - -    If no rom is given, load "../baserom.gbc". -    """ -    global rom -    rom = bytearray(open(filename,'rb').read()) -    return rom -  spacing = "\t"  temp_opt_table = [ @@ -563,49 +559,6 @@ relative_unconditional_jumps = [0xc3, 0x18]  call_commands = [0xdc, 0xd4, 0xc4, 0xcc, 0xcd] -all_labels = {} -def load_labels(filename="labels.json"): -    """ -    Load labels from specified file. - -    If no filename is given, loads 'labels.json'. -    """ -    global all_labels - -    # don't re-load labels each time -    if all_labels != {}: -        return - -    if os.path.exists(filename): -        all_labels = json.read(open(filename, "r").read()) -    else: -        print "You must run crystal.scan_for_predefined_labels() to create \"labels.json\". Trying..." -        import crystal -        crystal.scan_for_predefined_labels() - -def find_label(local_address, bank_id=0): -    # keep an integer -    if type(local_address) == str: -        local_address = int(local_address.replace("$", "0x"), 16) - -    if local_address < 0x8000: -        for label_entry in all_labels: -            if get_local_address(label_entry["address"]) == local_address: -                if label_entry["bank"] == bank_id or label_entry["bank"] == 0: -                    return label_entry["label"] -    if local_address in wram_labels.keys(): -        return wram_labels[local_address][-1] -    for constants in [gbhw_constants, hram_constants]: -        if local_address in constants.keys() and local_address >= 0xff00: -            return constants[local_address] -    return None - -def find_address_from_label(label): -    for label_entry in all_labels: -        if label == label_entry["label"]: -            return label_entry["address"] -    return None -  def asm_label(address):      """      Return the ASM label using the address. @@ -627,320 +580,369 @@ def get_global_address(address, bank):      return ".ASM_" + hex(address)[2:] -def output_bank_opcodes(original_offset, max_byte_count=0x4000, include_last_address=True, stop_at=[], debug=False): +def has_outstanding_labels(byte_labels):      """ -    Output bank opcodes. - -    fs = current_address -    b = bank_byte -    in = input_data  -- rom -    bank_size = byte_count -    i = offset -    ad = end_address -    a, oa = current_byte_number - -    stop_at can be used to supply a list of addresses to not disassemble -    over. This is useful if you know in advance that there are a lot of -    fall-throughs. +    Check whether a label is used once in the asm output. + +    If so, then that means it has to be called or specified later.      """ +    for label_line in byte_labels.keys(): +        real_line = byte_labels[label_line] +        if real_line["definition"] == False: return True +    return False -    load_labels() -    load_rom() - -    bank_id = original_offset / 0x4000 -    if debug: print "bank id is: " + str(bank_id) - -    last_hl_address = None #for when we're scanning the main map script -    last_a_address = None -    used_3d97 = False - -    global rom -    offset = original_offset -    current_byte_number = 0 #start from the beginning - -    #we don't actually have an end address, but we'll just say $4000 -    end_address = original_offset + max_byte_count - -    byte_labels = {} -    data_tables = {} - -    first_loop = True -    output = "" -    keep_reading = True -    is_data = False -    while offset <= end_address and keep_reading: -        current_byte = rom[offset] -        maybe_byte = current_byte - -        # stop at any address -        if not first_loop and offset in stop_at: -            keep_reading = False -            break - -        #first check if this byte already has a label -        #if it does, use the label -        #if not, generate a new label -        if offset in byte_labels.keys(): -            line_label = byte_labels[offset]["name"] -            byte_labels[offset]["usage"] += 1 -            output += "\n" -        else: -            line_label = asm_label(offset) -            byte_labels[offset] = {} -            byte_labels[offset]["name"] = line_label -            byte_labels[offset]["usage"] = 0 -        byte_labels[offset]["definition"] = True -        output += line_label + "\n" #" ; " + hex(offset) + "\n" - -        #find out if there's a two byte key like this -        temp_maybe = maybe_byte -        temp_maybe += ( rom[offset+1] << 8) -        if not is_data and temp_maybe in opt_table.keys() and rom[offset+1]!=0: -            opstr = opt_table[temp_maybe][0].lower() - -            if "x" in opstr: -                for x in range(0, opstr.count("x")): -                    insertion = rom[offset + 1] -                    insertion = "$" + hex(insertion)[2:] - -                    opstr = opstr[:opstr.find("x")].lower() + insertion + opstr[opstr.find("x")+1:].lower() - -                    current_byte += 1 -                    offset += 1 -            if "?" in opstr: -                for y in range(0, opstr.count("?")): -                    byte1 = rom[offset + 1] -                    byte2 = rom[offset + 2] +def all_outstanding_labels_are_reverse(byte_labels, offset): +    for label_id in byte_labels.keys(): +        line = byte_labels[label_id] # label_id is also the address +        if line["definition"] == False: +            if not label_id < offset: return False +    return True + +class Disassembler(object): +    """ +    GBC disassembler +    """ + +    def __init__(self, config): +        """ +        Setup the class instance. +        """ +        self.config = config + +        self.wram = wram.WRAMProcessor(self.config) +        self.labels = labels.Labels(self.config) + +    def initialize(self): +        """ +        Setup the disassembler. +        """ +        self.wram.initialize() +        self.labels.initialize() + +        # TODO: fix how ROM is handled throughout the project. +        rom_path = os.path.join(self.config.path, "baserom.gbc") +        self.rom = bytearray(open(rom_path, "rb").read()) + +    def find_label(self, local_address, bank_id=0): +        # keep an integer +        if type(local_address) == str: +            local_address = int(local_address.replace("$", "0x"), 16) + +        if local_address < 0x8000: +            for label_entry in self.labels.labels: +                if get_local_address(label_entry["address"]) == local_address: +                    if label_entry["bank"] == bank_id or label_entry["bank"] == 0: +                        return label_entry["label"] +        if local_address in self.wram.wram_labels.keys(): +            return self.wram.wram_labels[local_address][-1] +        for constants in [self.wram.gbhw_constants, self.wram.hram_constants]: +            if local_address in constants.keys() and local_address >= 0xff00: +                return constants[local_address] +        return None + +    def find_address_from_label(self, label): +        for label_entry in self.labels.labels: +            if label == label_entry["label"]: +                return label_entry["address"] +        return None + +    def output_bank_opcodes(self, original_offset, max_byte_count=0x4000, include_last_address=True, stop_at=[], debug=False): +        """ +        Output bank opcodes. + +        fs = current_address +        b = bank_byte +        in = input_data  -- rom +        bank_size = byte_count +        i = offset +        ad = end_address +        a, oa = current_byte_number + +        stop_at can be used to supply a list of addresses to not disassemble +        over. This is useful if you know in advance that there are a lot of +        fall-throughs. +        """ + +        bank_id = original_offset / 0x4000 +        if debug: print "bank id is: " + str(bank_id) + +        last_hl_address = None #for when we're scanning the main map script +        last_a_address = None +        used_3d97 = False + +        rom = self.rom + +        offset = original_offset +        current_byte_number = 0 #start from the beginning + +        #we don't actually have an end address, but we'll just say $4000 +        end_address = original_offset + max_byte_count + +        byte_labels = {} +        data_tables = {} + +        first_loop = True +        output = "" +        keep_reading = True +        is_data = False +        while offset <= end_address and keep_reading: +            current_byte = rom[offset] +            maybe_byte = current_byte + +            # stop at any address +            if not first_loop and offset in stop_at: +                keep_reading = False +                break -                    number = byte1 -                    number += byte2 << 8; +            #first check if this byte already has a label +            #if it does, use the label +            #if not, generate a new label +            if offset in byte_labels.keys(): +                line_label = byte_labels[offset]["name"] +                byte_labels[offset]["usage"] += 1 +                output += "\n" +            else: +                line_label = asm_label(offset) +                byte_labels[offset] = {} +                byte_labels[offset]["name"] = line_label +                byte_labels[offset]["usage"] = 0 +            byte_labels[offset]["definition"] = True +            output += line_label + "\n" #" ; " + hex(offset) + "\n" -                    insertion = "$%.4x" % (number) +            #find out if there's a two byte key like this +            temp_maybe = maybe_byte +            temp_maybe += ( rom[offset+1] << 8) +            if not is_data and temp_maybe in opt_table.keys() and rom[offset+1]!=0: +                opstr = opt_table[temp_maybe][0].lower() -                    opstr = opstr[:opstr.find("?")].lower() + insertion + opstr[opstr.find("?")+1:].lower() +                if "x" in opstr: +                    for x in range(0, opstr.count("x")): +                        insertion = rom[offset + 1] +                        insertion = "$" + hex(insertion)[2:] -                    current_byte_number += 2 -                    offset += 2 +                        opstr = opstr[:opstr.find("x")].lower() + insertion + opstr[opstr.find("x")+1:].lower() -            output += spacing + opstr #+ " ; " + hex(offset) -            output += "\n" +                        current_byte += 1 +                        offset += 1 +                if "?" in opstr: +                    for y in range(0, opstr.count("?")): +                        byte1 = rom[offset + 1] +                        byte2 = rom[offset + 2] -            current_byte_number += 2 -            offset += 2 -        elif not is_data and maybe_byte in opt_table.keys(): -            op_code = opt_table[maybe_byte] -            op_code_type = op_code[1] -            op_code_byte = maybe_byte +                        number = byte1 +                        number += byte2 << 8; -            #type = -1 when it's the E op -            #if op_code_type != -1: -            if   op_code_type == 0 and rom[offset] == op_code_byte: -                op_str = op_code[0].lower() +                        insertion = "$%.4x" % (number) -                output += spacing + op_code[0].lower() #+ " ; " + hex(offset) +                        opstr = opstr[:opstr.find("?")].lower() + insertion + opstr[opstr.find("?")+1:].lower() + +                        current_byte_number += 2 +                        offset += 2 + +                output += spacing + opstr #+ " ; " + hex(offset)                  output += "\n" -                offset += 1 -                current_byte_number += 1 -            elif op_code_type == 1 and rom[offset] == op_code_byte: -                oplen = len(op_code[0]) -                opstr = copy(op_code[0]) -                xes = op_code[0].count("x") -                include_comment = False -                for x in range(0, xes): -                    insertion = rom[offset + 1] -                    insertion = "$" + hex(insertion)[2:] - -                    if current_byte == 0x18 or current_byte==0x20 or current_byte in relative_jumps: #jr or jr nz -                        #generate a label for the byte we're jumping to -                        target_address = offset + 2 + c_int8(rom[offset + 1]).value -                        if target_address in byte_labels.keys(): -                            byte_labels[target_address]["usage"] = 1 + byte_labels[target_address]["usage"] -                            line_label2 = byte_labels[target_address]["name"] -                        else: -                            line_label2 = asm_label(target_address) -                            byte_labels[target_address] = {} -                            byte_labels[target_address]["name"] = line_label2 -                            byte_labels[target_address]["usage"] = 1 -                            byte_labels[target_address]["definition"] = False - -                        insertion = line_label2 -                        if has_outstanding_labels(byte_labels) and all_outstanding_labels_are_reverse(byte_labels, offset): -                            include_comment = True -                    elif current_byte == 0x3e: -                        last_a_address = rom[offset + 1] - -                    opstr = opstr[:opstr.find("x")].lower() + insertion + opstr[opstr.find("x")+1:].lower() - -                    # because the $ff00+$ff syntax is silly -                    if opstr.count("$") > 1 and "+" in opstr: -                        first_orig = opstr[opstr.find("$"):opstr.find("+")] -                        first_val = eval(first_orig.replace("$","0x")) - -                        second_orig = opstr[opstr.find("+$")+1:opstr.find("]")] -                        second_val = eval(second_orig.replace("$","0x")) - -                        combined_val = "$%.4x" % (first_val + second_val) -                        result = find_label(combined_val, bank_id) -                        if result != None: -                            combined_val = result +                current_byte_number += 2 +                offset += 2 +            elif not is_data and maybe_byte in opt_table.keys(): +                op_code = opt_table[maybe_byte] +                op_code_type = op_code[1] +                op_code_byte = maybe_byte -                        replacetron = "[%s+%s]" % (first_orig, second_orig) -                        opstr = opstr.replace(replacetron, "[%s]" % combined_val) +                #type = -1 when it's the E op +                #if op_code_type != -1: +                if   op_code_type == 0 and rom[offset] == op_code_byte: +                    op_str = op_code[0].lower() -                    output += spacing + opstr -                    if include_comment: -                        output += " ; " + hex(offset) -                        if current_byte in relative_jumps: -                            output += " $" + hex(rom[offset + 1])[2:] +                    output += spacing + op_code[0].lower() #+ " ; " + hex(offset)                      output += "\n" -                    current_byte_number += 1                      offset += 1 -                    insertion = "" +                    current_byte_number += 1 +                elif op_code_type == 1 and rom[offset] == op_code_byte: +                    oplen = len(op_code[0]) +                    opstr = copy(op_code[0]) +                    xes = op_code[0].count("x") +                    include_comment = False +                    for x in range(0, xes): +                        insertion = rom[offset + 1] +                        insertion = "$" + hex(insertion)[2:] + +                        if current_byte == 0x18 or current_byte==0x20 or current_byte in relative_jumps: #jr or jr nz +                            #generate a label for the byte we're jumping to +                            target_address = offset + 2 + c_int8(rom[offset + 1]).value +                            if target_address in byte_labels.keys(): +                                byte_labels[target_address]["usage"] = 1 + byte_labels[target_address]["usage"] +                                line_label2 = byte_labels[target_address]["name"] +                            else: +                                line_label2 = asm_label(target_address) +                                byte_labels[target_address] = {} +                                byte_labels[target_address]["name"] = line_label2 +                                byte_labels[target_address]["usage"] = 1 +                                byte_labels[target_address]["definition"] = False + +                            insertion = line_label2 +                            if has_outstanding_labels(byte_labels) and all_outstanding_labels_are_reverse(byte_labels, offset): +                                include_comment = True +                        elif current_byte == 0x3e: +                            last_a_address = rom[offset + 1] + +                        opstr = opstr[:opstr.find("x")].lower() + insertion + opstr[opstr.find("x")+1:].lower() + +                        # because the $ff00+$ff syntax is silly +                        if opstr.count("$") > 1 and "+" in opstr: +                            first_orig = opstr[opstr.find("$"):opstr.find("+")] +                            first_val = eval(first_orig.replace("$","0x")) + +                            second_orig = opstr[opstr.find("+$")+1:opstr.find("]")] +                            second_val = eval(second_orig.replace("$","0x")) + +                            combined_val = "$%.4x" % (first_val + second_val) +                            result = self.find_label(combined_val, bank_id) +                            if result != None: +                                combined_val = result + +                            replacetron = "[%s+%s]" % (first_orig, second_orig) +                            opstr = opstr.replace(replacetron, "[%s]" % combined_val) + +                        output += spacing + opstr +                        if include_comment: +                            output += " ; " + hex(offset) +                            if current_byte in relative_jumps: +                                output += " $" + hex(rom[offset + 1])[2:] +                        output += "\n" + +                        current_byte_number += 1 +                        offset += 1 +                        insertion = "" -                current_byte_number += 1 -                offset += 1 -                include_comment = False -            elif op_code_type == 2 and rom[offset] == op_code_byte: -                oplen = len(op_code[0]) -                opstr = copy(op_code[0]) -                qes = op_code[0].count("?") -                for x in range(0, qes): -                    byte1 = rom[offset + 1] -                    byte2 = rom[offset + 2] - -                    number = byte1 -                    number += byte2 << 8 - -                    if current_byte not in call_commands + discrete_jumps + relative_jumps: -                        pointer = get_global_address(number, bank_id) -                        if pointer not in data_tables.keys(): -                            data_tables[pointer] = {} -                            data_tables[pointer]['usage'] = 0 -                        else: -                            data_tables[pointer]['usage'] += 1 - -                    insertion = "$%.4x" % (number) -                    result = find_label(insertion, bank_id) -                    if result != None: -                        insertion = result - -                    opstr = opstr[:opstr.find("?")].lower() + insertion + opstr[opstr.find("?")+1:].lower() -                    output += spacing + opstr #+ " ; " + hex(offset) -                    output += "\n" +                    current_byte_number += 1 +                    offset += 1 +                    include_comment = False +                elif op_code_type == 2 and rom[offset] == op_code_byte: +                    oplen = len(op_code[0]) +                    opstr = copy(op_code[0]) +                    qes = op_code[0].count("?") +                    for x in range(0, qes): +                        byte1 = rom[offset + 1] +                        byte2 = rom[offset + 2] + +                        number = byte1 +                        number += byte2 << 8 + +                        if current_byte not in call_commands + discrete_jumps + relative_jumps: +                            pointer = get_global_address(number, bank_id) +                            if pointer not in data_tables.keys(): +                                data_tables[pointer] = {} +                                data_tables[pointer]['usage'] = 0 +                            else: +                                data_tables[pointer]['usage'] += 1 + +                        insertion = "$%.4x" % (number) +                        result = self.find_label(insertion, bank_id) +                        if result != None: +                            insertion = result -                    current_byte_number += 2 -                    offset += 2 +                        opstr = opstr[:opstr.find("?")].lower() + insertion + opstr[opstr.find("?")+1:].lower() +                        output += spacing + opstr #+ " ; " + hex(offset) +                        output += "\n" -                current_byte_number += 1 -                offset += 1 +                        current_byte_number += 2 +                        offset += 2 -                if current_byte == 0x21: -                    last_hl_address = byte1 + (byte2 << 8) -                if current_byte == 0xcd: -                    if number == 0x3d97: used_3d97 = True +                    current_byte_number += 1 +                    offset += 1 -                #duck out if this is jp $24d7 -                if current_byte == 0xc3 or current_byte in relative_unconditional_jumps: -                    if current_byte == 0xc3: +                    if current_byte == 0x21: +                        last_hl_address = byte1 + (byte2 << 8) +                    if current_byte == 0xcd:                          if number == 0x3d97: used_3d97 = True -                    #if number == 0x24d7: #jp -                    if not has_outstanding_labels(byte_labels) or all_outstanding_labels_are_reverse(byte_labels, offset): -                        keep_reading = False -                        is_data = False -                        break -            else: -                is_data = True -        else: -        #if is_data and keep_reading: -            output += spacing + "db $" + hex(rom[offset])[2:] #+ " ; " + hex(offset) -            output += "\n" -            offset += 1 -            current_byte_number += 1 -            if offset in byte_labels.keys(): -                is_data = False -                keep_reading = True -        #else the while loop would have spit out the opcode - -        #these two are done prior -        #offset += 1 -        #current_byte_number += 1 -        if not is_data and current_byte in relative_unconditional_jumps + end_08_scripts_with: -            #stop reading at a jump, relative jump or return -            if not has_outstanding_labels(byte_labels) or all_outstanding_labels_are_reverse(byte_labels, offset): -                keep_reading = False -                is_data = False #cleanup -                break -            elif offset not in byte_labels.keys() and offset in data_tables.keys(): +                    #duck out if this is jp $24d7 +                    if current_byte == 0xc3 or current_byte in relative_unconditional_jumps: +                        if current_byte == 0xc3: +                            if number == 0x3d97: used_3d97 = True +                        #if number == 0x24d7: #jp +                        if not has_outstanding_labels(byte_labels) or all_outstanding_labels_are_reverse(byte_labels, offset): +                            keep_reading = False +                            is_data = False +                            break +                else: +                    is_data = True +            else: +            #if is_data and keep_reading: +                output += spacing + "db $" + hex(rom[offset])[2:] #+ " ; " + hex(offset) +                output += "\n" +                offset += 1 +                current_byte_number += 1 +                if offset in byte_labels.keys(): +                    is_data = False +                    keep_reading = True +            #else the while loop would have spit out the opcode + +            #these two are done prior +            #offset += 1 +            #current_byte_number += 1 + +            if not is_data and current_byte in relative_unconditional_jumps + end_08_scripts_with: +                #stop reading at a jump, relative jump or return +                if not has_outstanding_labels(byte_labels) or all_outstanding_labels_are_reverse(byte_labels, offset): +                    keep_reading = False +                    is_data = False #cleanup +                    break +                elif offset not in byte_labels.keys() and offset in data_tables.keys(): +                    is_data = True +                    keep_reading = True +                else: +                    is_data = False +                    keep_reading = True +                output += "\n" +            elif is_data and offset not in byte_labels.keys():                  is_data = True                  keep_reading = True              else:                  is_data = False                  keep_reading = True -            output += "\n" -        elif is_data and offset not in byte_labels.keys(): -            is_data = True -            keep_reading = True -        else: -            is_data = False -            keep_reading = True - -        if offset in data_tables.keys(): -            output = output.replace('$%x' % (get_local_address(offset)), data_label(offset)) -            output += data_label(offset) + '\n' -            is_data = True -            keep_reading = True -        first_loop = False - -    #clean up unused labels -    for label_line in byte_labels.keys(): -        address = label_line -        label_line = byte_labels[label_line] -        if label_line["usage"] == 0: -            output = output.replace((label_line["name"] + "\n"), "") - -    #tone down excessive spacing -    output = output.replace("\n\n\n","\n\n") - -    #add the offset of the final location -    if include_last_address: -        output += "; " + hex(offset) - -    return (output, offset, last_hl_address, last_a_address, used_3d97) +            if offset in data_tables.keys(): +                output = output.replace('$%x' % (get_local_address(offset)), data_label(offset)) +                output += data_label(offset) + '\n' +                is_data = True +                keep_reading = True -def has_outstanding_labels(byte_labels): -    """ -    Check whether a label is used once in the asm output. +            first_loop = False -    If so, then that means it has to be called or specified later. -    """ -    for label_line in byte_labels.keys(): -        real_line = byte_labels[label_line] -        if real_line["definition"] == False: return True -    return False +        #clean up unused labels +        for label_line in byte_labels.keys(): +            address = label_line +            label_line = byte_labels[label_line] +            if label_line["usage"] == 0: +                output = output.replace((label_line["name"] + "\n"), "") -def all_outstanding_labels_are_reverse(byte_labels, offset): -    for label_id in byte_labels.keys(): -        line = byte_labels[label_id] # label_id is also the address -        if line["definition"] == False: -            if not label_id < offset: return False -    return True +        #tone down excessive spacing +        output = output.replace("\n\n\n","\n\n") +        #add the offset of the final location +        if include_last_address: +            output += "; " + hex(offset) +        return (output, offset, last_hl_address, last_a_address, used_3d97)  if __name__ == "__main__": -    load_labels() +    conf = config.Config() +    disasm = Disassembler(conf) +    disasm.initialize() +      addr = sys.argv[1]      if ":" in addr:          addr = addr.split(":")          addr = int(addr[0], 16)*0x4000+(int(addr[1], 16)%0x4000)      else: -        label_addr = find_address_from_label(addr) +        label_addr = disasm.find_address_from_label(addr)          if label_addr:              addr = label_addr          else:              addr = int(addr, 16) -    print output_bank_opcodes(addr)[0] + +    output = disasm.output_bank_opcodes(addr)[0] +    print output diff --git a/pokemontools/labels.py b/pokemontools/labels.py index ca411d1..2e50b89 100644 --- a/pokemontools/labels.py +++ b/pokemontools/labels.py @@ -3,7 +3,37 @@  Various label/line-related functions.  """ +import os +import json +  import pointers +import crystal + +class Labels(object): +    """ +    Store all labels. +    """ +    filename = "labels.json" + +    def __init__(self, config): +        """ +        Setup the instance. +        """ +        self.config = config +        self.path = os.path.join(self.config.path, Labels.filename) + +    def initialize(self): +        """ +        Handle anything requiring file-loading and such. +        """ +        if not os.path.exists(self.path): +            logging.info( +                "Running crystal.scan_for_predefined_labels to create \"{0}\". Trying.." +                .format(Labels.filename) +            ) +            crystal.scan_for_predefined_labels() + +        self.labels = json.read(open(self.path, "r").read())  def remove_quoted_text(line):      """get rid of content inside quotes diff --git a/pokemontools/preprocessor.py b/pokemontools/preprocessor.py index d9373ac..026da41 100644 --- a/pokemontools/preprocessor.py +++ b/pokemontools/preprocessor.py @@ -5,13 +5,8 @@ Basic preprocessor for both pokecrystal and pokered.  import sys -from crystal import ( -    DataByteWordMacro, -) - -default_macros = [ -    DataByteWordMacro, -] +import exceptions +import crystal  chars = {  "ガ": 0x05, @@ -278,16 +273,6 @@ chars = {  "9": 0xFF  } -class PreprocessorException(Exception): -    """ -    There was a problem in the preprocessor. -    """ - -class MacroException(PreprocessorException): -    """ -    There was a problem with a macro. -    """ -  def separate_comment(l):      """      Separates asm and comments on a single line. @@ -299,7 +284,10 @@ def separate_comment(l):                  break          if l[i] == "\"":              in_quotes = not in_quotes -    return l[:i], l[i:] or None +    return (l[:i], l[i:]) or None + +def make_macro_table(macros): +    return dict(((macro.macro_name, macro) for macro in macros))  def quote_translator(asm):      """ @@ -387,38 +375,7 @@ def quote_translator(asm):      return output -def extract_token(asm): -    return asm.split(" ")[0].strip() - -def make_macro_table(macros): -    return dict(((macro.macro_name, macro) for macro in macros)) - -def macro_test(asm, macro_table): -    """ -    Returns a matching macro, or None/False. -    """ -    # macros are determined by the first symbol on the line -    token = extract_token(asm) - -    # skip db and dw since rgbasm handles those and they aren't macros -    if token is not None and token not in ["db", "dw"] and token in macro_table: -        return (macro_table[token], token) -    else: -        return (None, None) - -def is_based_on(something, base): -    """ -    Checks whether or not 'something' is a class that is a subclass of a class -    by name. This is a terrible hack but it removes a direct dependency on -    existing macros. - -    Used by macro_translator. -    """ -    options = [str(klass.__name__) for klass in something.__bases__] -    options += [something.__name__] -    return (base in options) - -def check_macro_sanity(params, macro, original_line): +def check_macro_sanity(self, params, macro, original_line):      """      Checks whether or not the correct number of arguments are being passed to a      certain macro. There are a number of possibilities based on the types of @@ -441,12 +398,12 @@ def check_macro_sanity(params, macro, original_line):              elif param_klass.size == 3:                  allowed_length += 2 # bank and label              else: -                raise MacroException( +                raise exceptions.MacroException(                      "dunno what to do with a macro param with a size > 3 (size={size})"                      .format(size=param_klass.size)                  )          else: -            raise MacroException( +            raise exceptions.MacroException(                  "dunno what to do with this non db/dw macro param: {klass} in line {line}"                  .format(klass=param_klass, line=original_line)              ) @@ -461,7 +418,7 @@ def check_macro_sanity(params, macro, original_line):      params_len = len(params)      if params_len not in allowed_lengths: -        raise PreprocessorException( +        raise exceptions.PreprocessorException(              "mismatched number of parameters ({count}, instead of any of {allowed}) on this line: {line}"              .format(                  count=params_len, @@ -472,170 +429,223 @@ def check_macro_sanity(params, macro, original_line):      return True -def macro_translator(macro, token, line, show_original_lines=False, do_macro_sanity_check=False): -    """ -    Converts a line with a macro into a rgbasm-compatible line. +def extract_token(asm): +    return asm.split(" ")[0].strip() -    @param show_original_lines: show lines before preprocessing in stdout -    @param do_macro_sanity_check: helpful for debugging macros +def is_based_on(something, base):      """ -    if macro.macro_name != token: -        raise MacroException("macro/token mismatch") - -    original_line = line - -    # remove trailing newline -    if line[-1] == "\n": -        line = line[:-1] -    else: -        original_line += "\n" - -    # remove first tab -    has_tab = False -    if line[0] == "\t": -        has_tab = True -        line = line[1:] - -    # remove duplicate whitespace (also trailing) -    line = " ".join(line.split()) - -    params = [] - -    # check if the line has params -    if " " in line: -        # split the line into separate parameters -        params = line.replace(token, "").split(",") - -        # check if there are no params (redundant) -        if len(params) == 1 and params[0] == "": -            raise MacroException("macro has no params?") - -    # write out a comment showing the original line -    if show_original_lines: -        sys.stdout.write("; original_line: " + original_line) - -    # rgbasm can handle "db" so no preprocessing is required, plus this wont be -    # reached because of earlier checks in macro_test. -    if macro.macro_name in ["db", "dw"]: -        sys.stdout.write(original_line) -        return - -    # certain macros don't need an initial byte written -    # do: all scripting macros -    # don't: signpost, warp_def, person_event, xy_trigger -    if not macro.override_byte_check: -        sys.stdout.write("db ${0:02X}\n".format(macro.id)) - -    # Does the number of parameters on this line match any allowed number of -    # parameters that the macro expects? -    if do_macro_sanity_check: -        check_macro_sanity(params, macro, original_line) - -    # used for storetext -    correction = 0 - -    output = "" - -    index = 0 -    while index < len(params): -        param_type  = macro.param_types[index - correction] -        description = param_type["name"] -        param_klass = param_type["class"] -        byte_type   = param_klass.byte_type # db or dw -        size        = param_klass.size -        param       = params[index].strip() - -        # param_klass.to_asm() won't work here because it doesn't -        # include db/dw. - -        # some parameters are really multiple types of bytes -        if (byte_type == "dw" and size != 2) or \ -           (byte_type == "db" and size != 1): - -            output += ("; " + description + "\n") - -            if   size == 3 and is_based_on(param_klass, "PointerLabelBeforeBank"): -                # write the bank first -                output += ("db " + param + "\n") -                # write the pointer second -                output += ("dw " + params[index+1].strip() + "\n") -                index += 2 -                correction += 1 -            elif size == 3 and is_based_on(param_klass, "PointerLabelAfterBank"): -                # write the pointer first -                output += ("dw " + param + "\n") -                # write the bank second -                output += ("db " + params[index+1].strip() + "\n") -                index += 2 -                correction += 1 -            elif size == 3 and "from_asm" in dir(param_klass): -                output += ("db " + param_klass.from_asm(param) + "\n") -                index += 1 -            else: -                raise MacroException( -                    "dunno what to do with this macro param ({klass}) in line: {line}" -                    .format( -                        klass=param_klass, -                        line=original_line, -                    ) -                ) +    Checks whether or not 'something' is a class that is a subclass of a class +    by name. This is a terrible hack but it removes a direct dependency on +    existing macros. -        # or just print out the byte -        else: -            output += (byte_type + " " + param + " ; " + description + "\n") +    Used by macro_translator. +    """ +    options = [str(klass.__name__) for klass in something.__bases__] +    options += [something.__name__] +    return (base in options) -            index += 1 +class Preprocessor(object): +    """ +    A wrapper around the actual preprocessing step. Because rgbasm can't handle +    many of these macros. +    """ -    sys.stdout.write(output) +    default_macros = [ +        crystal.DataByteWordMacro, +    ] + +    def __init__(self, config, macros=None): +        """ +        Setup the preprocessor. +        """ +        self.config = config + +        if macros == None: +            macros = Preprocessor.default_macros + +        self.macros = macros +        self.macro_table = make_macro_table(self.macros) + +    def preprocess(self, lines=None): +        """ +        Run the preprocessor against stdin. +        """ +        if not lines: +            # read each line from stdin +            lines = (sys.stdin.readlines()) +        elif not isinstance(lines, list): +            # split up the input into individual lines +            lines = lines.split("\n") + +        for l in lines: +            self.read_line(l) + +    def read_line(self, l): +        """ +        Preprocesses a given line of asm. +        """ + +        if l in ["\n", ""] or l[0] == ";": +            sys.stdout.write(l) +            return # jump out early + +        # strip comments from asm +        asm, comment = separate_comment(l) + +        # export all labels +        if ':' in asm[:asm.find('"')] and "macro" not in asm.lower(): +            sys.stdout.write('GLOBAL ' + asm.split(':')[0] + '\n') + +        # expect preprocessed .asm files +        if "INCLUDE" in asm: +            asm = asm.replace('.asm','.tx') +            sys.stdout.write(asm) -def read_line(l, macro_table): -    """Preprocesses a given line of asm.""" +        # ascii string macro preserves the bytes as ascii (skip the translator) +        elif len(asm) > 6 and ("ascii " == asm[:6] or "\tascii " == asm[:7]): +            asm = asm.replace("ascii", "db", 1) +            sys.stdout.write(asm) -    if l in ["\n", ""] or l[0] == ";": -        sys.stdout.write(l) -        return # jump out early +        # convert text to bytes when a quote appears (not in a comment) +        elif "\"" in asm: +            sys.stdout.write(quote_translator(asm)) -    # strip comments from asm -    asm, comment = separate_comment(l) +        # check against other preprocessor features +        else: +            macro, token = self.macro_test(asm) +            if macro: +                self.macro_translator(macro, token, asm) +            else: +                sys.stdout.write(asm) -    # export all labels -    if ':' in asm[:asm.find('"')] and "macro" not in asm.lower(): -        sys.stdout.write('GLOBAL ' + asm.split(':')[0] + '\n') +        if comment: +            sys.stdout.write(comment) -    # expect preprocessed .asm files -    if "INCLUDE" in asm: -        asm = asm.replace('.asm','.tx') -        sys.stdout.write(asm) +    def macro_translator(self, macro, token, line, show_original_lines=False, do_macro_sanity_check=False): +        """ +        Converts a line with a macro into a rgbasm-compatible line. -    # ascii string macro preserves the bytes as ascii (skip the translator) -    elif len(asm) > 6 and ("ascii " == asm[:6] or "\tascii " == asm[:7]): -        asm = asm.replace("ascii", "db", 1) -        sys.stdout.write(asm) +        @param show_original_lines: show lines before preprocessing in stdout +        @param do_macro_sanity_check: helpful for debugging macros +        """ +        if macro.macro_name != token: +            raise exceptions.MacroException("macro/token mismatch") -    # convert text to bytes when a quote appears (not in a comment) -    elif "\"" in asm: -        sys.stdout.write(quote_translator(asm)) +        original_line = line -    # check against other preprocessor features -    else: -        macro, token = macro_test(asm, macro_table) -        if macro: -            macro_translator(macro, token, asm) +        # remove trailing newline +        if line[-1] == "\n": +            line = line[:-1]          else: -            sys.stdout.write(asm) +            original_line += "\n" + +        # remove first tab +        has_tab = False +        if line[0] == "\t": +            has_tab = True +            line = line[1:] + +        # remove duplicate whitespace (also trailing) +        line = " ".join(line.split()) + +        params = [] + +        # check if the line has params +        if " " in line: +            # split the line into separate parameters +            params = line.replace(token, "").split(",") + +            # check if there are no params (redundant) +            if len(params) == 1 and params[0] == "": +                raise exceptions.MacroException("macro has no params?") + +        # write out a comment showing the original line +        if show_original_lines: +            sys.stdout.write("; original_line: " + original_line) + +        # rgbasm can handle "db" so no preprocessing is required, plus this wont be +        # reached because of earlier checks in macro_test. +        if macro.macro_name in ["db", "dw"]: +            sys.stdout.write(original_line) +            return + +        # certain macros don't need an initial byte written +        # do: all scripting macros +        # don't: signpost, warp_def, person_event, xy_trigger +        if not macro.override_byte_check: +            sys.stdout.write("db ${0:02X}\n".format(macro.id)) + +        # Does the number of parameters on this line match any allowed number of +        # parameters that the macro expects? +        if do_macro_sanity_check: +            self.check_macro_sanity(params, macro, original_line) + +        # used for storetext +        correction = 0 + +        output = "" + +        index = 0 +        while index < len(params): +            param_type  = macro.param_types[index - correction] +            description = param_type["name"] +            param_klass = param_type["class"] +            byte_type   = param_klass.byte_type # db or dw +            size        = param_klass.size +            param       = params[index].strip() + +            # param_klass.to_asm() won't work here because it doesn't +            # include db/dw. + +            # some parameters are really multiple types of bytes +            if (byte_type == "dw" and size != 2) or \ +               (byte_type == "db" and size != 1): + +                output += ("; " + description + "\n") + +                if   size == 3 and is_based_on(param_klass, "PointerLabelBeforeBank"): +                    # write the bank first +                    output += ("db " + param + "\n") +                    # write the pointer second +                    output += ("dw " + params[index+1].strip() + "\n") +                    index += 2 +                    correction += 1 +                elif size == 3 and is_based_on(param_klass, "PointerLabelAfterBank"): +                    # write the pointer first +                    output += ("dw " + param + "\n") +                    # write the bank second +                    output += ("db " + params[index+1].strip() + "\n") +                    index += 2 +                    correction += 1 +                elif size == 3 and "from_asm" in dir(param_klass): +                    output += ("db " + param_klass.from_asm(param) + "\n") +                    index += 1 +                else: +                    raise exceptions.MacroException( +                        "dunno what to do with this macro param ({klass}) in line: {line}" +                        .format( +                            klass=param_klass, +                            line=original_line, +                        ) +                    ) -    if comment: -        sys.stdout.write(comment) +            # or just print out the byte +            else: +                output += (byte_type + " " + param + " ; " + description + "\n") + +                index += 1 -def preprocess(macro_table, lines=None): -    """Main entry point for the preprocessor.""" +        sys.stdout.write(output) -    if not lines: -        # read each line from stdin -        lines = (sys.stdin.readlines()) -    elif not isinstance(lines, list): -        # split up the input into individual lines -        lines = lines.split("\n") +    def macro_test(self, asm): +        """ +        Returns a matching macro, or None/False. +        """ +        # macros are determined by the first symbol on the line +        token = extract_token(asm) -    for l in lines: -        read_line(l, macro_table) +        # skip db and dw since rgbasm handles those and they aren't macros +        if token is not None and token not in ["db", "dw"] and token in self.macro_table: +            return (self.macro_table[token], token) +        else: +            return (None, None) diff --git a/pokemontools/wram.py b/pokemontools/wram.py index 5a5fa75..60001aa 100644 --- a/pokemontools/wram.py +++ b/pokemontools/wram.py @@ -4,7 +4,15 @@ RGBDS BSS section and constant parsing.  """  import os -path = os.path.dirname(os.path.abspath(__file__)) + +def make_wram_labels(wram_sections): +    wram_labels = {} +    for section in wram_sections: +        for label in section['labels']: +            if label['address'] not in wram_labels.keys(): +                wram_labels[label['address']] = [] +            wram_labels[label['address']] += [label['label']] +    return wram_labels  def read_bss_sections(bss):      sections = [] @@ -55,34 +63,6 @@ def read_bss_sections(bss):      sections.append(section)      return sections -def read_wram_sections(): -    """ -    Opens the wram file and calls read_bss_sections. -    """ -    wram_content = None -    wram_file_path = os.path.join(os.path.dirname(path), 'wram.asm') -    try: -        wram_file_handler = open(wram_file_path, 'r') -    except IOError as exception: -        wram_content = [""] -    else: -        wram_content = wram_file_handler.readlines() -    wram_sections = read_bss_sections(wram_content) -    return wram_sections - -wram_sections = read_wram_sections() - -def make_wram_labels(wram_sections): -    wram_labels = {} -    for section in wram_sections: -        for label in section['labels']: -            if label['address'] not in wram_labels.keys(): -                wram_labels[label['address']] = [] -            wram_labels[label['address']] += [label['label']] -    return wram_labels - -wram_labels = make_wram_labels(wram_sections) -  def constants_to_dict(constants):      return dict((eval(constant[constant.find('EQU')+3:constant.find(';')].replace('$','0x')), constant[:constant.find('EQU')].strip()) for constant in constants) @@ -95,31 +75,90 @@ def read_constants(filepath):      """      Load lines from a file and call scrape_constants.      """ -    try: -        file_handler = open(filepath, "r") -    except IOError as exception: -        lines = [""] -    else: +    lines = None + +    with open(filepath, "r") as file_handler:          lines = file_handler.readlines() +      constants = scrape_constants(lines)      return constants -def read_hram_constants(): -    """ -    Load constants from hram.asm. -    """ -    hram_path = os.path.join(os.path.dirname(path), 'hram.asm') -    return read_constants(hram_path) - -# TODO: get rid of this global -hram_constants = read_hram_constants() - -def read_gbhw_constants(): +class WRAMProcessor(object):      """ -    Load constants from gbhw.asm. +    RGBDS BSS section and constant parsing.      """ -    gbhw_path = os.path.join(os.path.dirname(path), 'gbhw.asm') -    return read_constants(gbhw_path) -# TODO: get rid of this global -gbhw_constants = read_gbhw_constants() +    def __init__(self, config): +        """ +        Setup for WRAM parsing. +        """ +        self.config = config + +        self.paths = {} +        self.paths["wram"] = os.path.join(self.config.path, "wram.asm") +        self.paths["hram"] = os.path.join(self.config.path, "hram.asm") +        self.paths["gbhw"] = os.path.join(self.config.path, "gbhw.asm") + +    def initialize(self): +        """ +        Read constants. +        """ +        self.setup_wram_sections() +        self.setup_wram_labels() +        self.setup_hram_constants() +        self.setup_gbhw_constants() + +    def read_wram_sections(self): +        """ +        Opens the wram file and calls read_bss_sections. +        """ +        wram_content = None +        wram_file_path = self.paths["wram"] + +        with open(wram_file_path, "r") as wram: +            wram_content = wram.readlines() + +        wram_sections = read_bss_sections(wram_content) +        return wram_sections + +    def setup_wram_sections(self): +        """ +        Call read_wram_sections and set a variable. +        """ +        self.wram_sections = self.read_wram_sections() +        return self.wram_sections + +    def setup_wram_labels(self): +        """ +        Make wram labels based on self.wram_sections as input. +        """ +        self.wram_labels = make_wram_labels(self.wram_sections) +        return self.wram_labels + +    def read_hram_constants(self): +        """ +        Read constants from hram.asm using read_constants. +        """ +        hram_constants = read_constants(self.paths["hram"]) +        return hram_constants + +    def setup_hram_constants(self): +        """ +        Call read_hram_constants and set a variable. +        """ +        self.hram_constants = self.read_hram_constants() +        return self.hram_constants + +    def read_gbhw_constants(self): +        """ +        Read constants from gbhw.asm using read_constants. +        """ +        gbhw_constants = read_constants(self.paths["gbhw"]) +        return gbhw_constants + +    def setup_gbhw_constants(self): +        """ +        Call read_gbhw_constants and set a variable. +        """ +        self.gbhw_constants = self.read_gbhw_constants() +        return self.gbhw_constants | 
