Merge pull request #10 from kanzure/config

Configuration for paths
author: Bryan Bishop <kanzure@gmail.com> 2013-09-04 20:38:11 -0700
committer: Bryan Bishop <kanzure@gmail.com> 2013-09-04 20:38:11 -0700
commit: ee05e2fe1d03e0e68c64cea09ec41ab70e12bc3a (patch)
tree: db3b846aa6c92acd4cbf6f4bc0f7e5fb566ade27
parent: c2712bb90f09083f0bfa786750be2a9b34105fa9 (diff)
parent: 37441a35b13f3421ba0c0f234e2ee4bbc5db4b63 (diff)
7 files changed, 719 insertions, 576 deletions
diff --git a/pokemontools/__init__.py b/pokemontools/__init__.py
index 8fb8b19..09331af 100644
--- a/pokemontools/__init__.py
+++ b/pokemontools/__init__.py
@@ -1,2 +1,3 @@
+import config
 import crystal
 import preprocessor
diff --git a/pokemontools/config.py b/pokemontools/config.py
new file mode 100644
index 0000000..656fab0
--- /dev/null
+++ b/pokemontools/config.py
@@ -0,0 +1,46 @@
+"""
+Configuration
+"""
+
+import os
+
+import exceptions
+
+class Config(object):
+    """
+    The Config class handles all configuration for pokemontools. Other classes
+    and functions use a Config object to determine where expected files can be
+    located.
+    """
+
+    def __init__(self, **kwargs):
+        """
+        Store all parameters.
+        """
+        self._config = {}
+
+        for (key, value) in kwargs.items():
+            if key not in self.__dict__:
+                self._config[key] = value
+            else:
+                raise exceptions.ConfigException(
+                    "Can't store \"{0}\" in configuration because the key conflicts with an existing property."
+                    .format(key)
+                )
+
+        if "path" not in self._config:
+            self._config["path"] = os.getcwd()
+
+    def __getattr__(self, key):
+        """
+        Grab the value from the class properties, then check the configuration,
+        and raise an exception if nothing works.
+        """
+        if key in self.__dict__:
+            return self.__dict__[key]
+        elif key in self._config:
+            return self._config[key]
+        else:
+            raise exceptions.ConfigException(
+                "no config found for \"{0}\"".format(key)
+            )
diff --git a/pokemontools/exceptions.py b/pokemontools/exceptions.py
index 71d0da2..4de62eb 100644
--- a/pokemontools/exceptions.py
+++ b/pokemontools/exceptions.py
@@ -11,3 +11,18 @@ class TextScriptException(Exception):
     """
     TextScript encountered an inconsistency or problem.
     """
+
+class ConfigException(Exception):
+    """
+    Configuration error. Maybe a missing config variable.
+    """
+
+class PreprocessorException(Exception):
+    """
+    There was a problem in the preprocessor.
+    """
+
+class MacroException(PreprocessorException):
+    """
+    There was a problem with a macro.
+    """
diff --git a/pokemontools/gbz80disasm.py b/pokemontools/gbz80disasm.py
index 7499982..790388e 100644
--- a/pokemontools/gbz80disasm.py
+++ b/pokemontools/gbz80disasm.py
@@ -1,4 +1,7 @@
 # -*- coding: utf-8 -*-
+"""
+GBC disassembler
+"""
 
 import os
 import sys
@@ -7,22 +10,15 @@ from ctypes import c_int8
 import random
 import json
 
-from wram import *
+import config
+import crystal
+import labels
+import wram
 
 # New versions of json don't have read anymore.
 if not hasattr(json, "read"):
     json.read = json.loads
 
-def load_rom(filename="../baserom.gbc"):
-    """
-    Load the specified rom.
-
-    If no rom is given, load "../baserom.gbc".
-    """
-    global rom
-    rom = bytearray(open(filename,'rb').read())
-    return rom
-
 spacing = "\t"
 
 temp_opt_table = [
@@ -563,49 +559,6 @@ relative_unconditional_jumps = [0xc3, 0x18]
 
 call_commands = [0xdc, 0xd4, 0xc4, 0xcc, 0xcd]
 
-all_labels = {}
-def load_labels(filename="labels.json"):
-    """
-    Load labels from specified file.
-
-    If no filename is given, loads 'labels.json'.
-    """
-    global all_labels
-
-    # don't re-load labels each time
-    if all_labels != {}:
-        return
-
-    if os.path.exists(filename):
-        all_labels = json.read(open(filename, "r").read())
-    else:
-        print "You must run crystal.scan_for_predefined_labels() to create \"labels.json\". Trying..."
-        import crystal
-        crystal.scan_for_predefined_labels()
-
-def find_label(local_address, bank_id=0):
-    # keep an integer
-    if type(local_address) == str:
-        local_address = int(local_address.replace("$", "0x"), 16)
-
-    if local_address < 0x8000:
-        for label_entry in all_labels:
-            if get_local_address(label_entry["address"]) == local_address:
-                if label_entry["bank"] == bank_id or label_entry["bank"] == 0:
-                    return label_entry["label"]
-    if local_address in wram_labels.keys():
-        return wram_labels[local_address][-1]
-    for constants in [gbhw_constants, hram_constants]:
-        if local_address in constants.keys() and local_address >= 0xff00:
-            return constants[local_address]
-    return None
-
-def find_address_from_label(label):
-    for label_entry in all_labels:
-        if label == label_entry["label"]:
-            return label_entry["address"]
-    return None
-
 def asm_label(address):
     """
     Return the ASM label using the address.
@@ -627,320 +580,369 @@ def get_global_address(address, bank):
 
     return ".ASM_" + hex(address)[2:]
 
-def output_bank_opcodes(original_offset, max_byte_count=0x4000, include_last_address=True, stop_at=[], debug=False):
+def has_outstanding_labels(byte_labels):
     """
-    Output bank opcodes.
-
-    fs = current_address
-    b = bank_byte
-    in = input_data  -- rom
-    bank_size = byte_count
-    i = offset
-    ad = end_address
-    a, oa = current_byte_number
-
-    stop_at can be used to supply a list of addresses to not disassemble
-    over. This is useful if you know in advance that there are a lot of
-    fall-throughs.
+    Check whether a label is used once in the asm output.
+
+    If so, then that means it has to be called or specified later.
     """
+    for label_line in byte_labels.keys():
+        real_line = byte_labels[label_line]
+        if real_line["definition"] == False: return True
+    return False
 
-    load_labels()
-    load_rom()
-
-    bank_id = original_offset / 0x4000
-    if debug: print "bank id is: " + str(bank_id)
-
-    last_hl_address = None #for when we're scanning the main map script
-    last_a_address = None
-    used_3d97 = False
-
-    global rom
-    offset = original_offset
-    current_byte_number = 0 #start from the beginning
-
-    #we don't actually have an end address, but we'll just say $4000
-    end_address = original_offset + max_byte_count
-
-    byte_labels = {}
-    data_tables = {}
-
-    first_loop = True
-    output = ""
-    keep_reading = True
-    is_data = False
-    while offset <= end_address and keep_reading:
-        current_byte = rom[offset]
-        maybe_byte = current_byte
-
-        # stop at any address
-        if not first_loop and offset in stop_at:
-            keep_reading = False
-            break
-
-        #first check if this byte already has a label
-        #if it does, use the label
-        #if not, generate a new label
-        if offset in byte_labels.keys():
-            line_label = byte_labels[offset]["name"]
-            byte_labels[offset]["usage"] += 1
-            output += "\n"
-        else:
-            line_label = asm_label(offset)
-            byte_labels[offset] = {}
-            byte_labels[offset]["name"] = line_label
-            byte_labels[offset]["usage"] = 0
-        byte_labels[offset]["definition"] = True
-        output += line_label + "\n" #" ; " + hex(offset) + "\n"
-
-        #find out if there's a two byte key like this
-        temp_maybe = maybe_byte
-        temp_maybe += ( rom[offset+1] << 8)
-        if not is_data and temp_maybe in opt_table.keys() and rom[offset+1]!=0:
-            opstr = opt_table[temp_maybe][0].lower()
-
-            if "x" in opstr:
-                for x in range(0, opstr.count("x")):
-                    insertion = rom[offset + 1]
-                    insertion = "$" + hex(insertion)[2:]
-
-                    opstr = opstr[:opstr.find("x")].lower() + insertion + opstr[opstr.find("x")+1:].lower()
-
-                    current_byte += 1
-                    offset += 1
-            if "?" in opstr:
-                for y in range(0, opstr.count("?")):
-                    byte1 = rom[offset + 1]
-                    byte2 = rom[offset + 2]
+def all_outstanding_labels_are_reverse(byte_labels, offset):
+    for label_id in byte_labels.keys():
+        line = byte_labels[label_id] # label_id is also the address
+        if line["definition"] == False:
+            if not label_id < offset: return False
+    return True
+
+class Disassembler(object):
+    """
+    GBC disassembler
+    """
+
+    def __init__(self, config):
+        """
+        Setup the class instance.
+        """
+        self.config = config
+
+        self.wram = wram.WRAMProcessor(self.config)
+        self.labels = labels.Labels(self.config)
+
+    def initialize(self):
+        """
+        Setup the disassembler.
+        """
+        self.wram.initialize()
+        self.labels.initialize()
+
+        # TODO: fix how ROM is handled throughout the project.
+        rom_path = os.path.join(self.config.path, "baserom.gbc")
+        self.rom = bytearray(open(rom_path, "rb").read())
+
+    def find_label(self, local_address, bank_id=0):
+        # keep an integer
+        if type(local_address) == str:
+            local_address = int(local_address.replace("$", "0x"), 16)
+
+        if local_address < 0x8000:
+            for label_entry in self.labels.labels:
+                if get_local_address(label_entry["address"]) == local_address:
+                    if label_entry["bank"] == bank_id or label_entry["bank"] == 0:
+                        return label_entry["label"]
+        if local_address in self.wram.wram_labels.keys():
+            return self.wram.wram_labels[local_address][-1]
+        for constants in [self.wram.gbhw_constants, self.wram.hram_constants]:
+            if local_address in constants.keys() and local_address >= 0xff00:
+                return constants[local_address]
+        return None
+
+    def find_address_from_label(self, label):
+        for label_entry in self.labels.labels:
+            if label == label_entry["label"]:
+                return label_entry["address"]
+        return None
+
+    def output_bank_opcodes(self, original_offset, max_byte_count=0x4000, include_last_address=True, stop_at=[], debug=False):
+        """
+        Output bank opcodes.
+
+        fs = current_address
+        b = bank_byte
+        in = input_data  -- rom
+        bank_size = byte_count
+        i = offset
+        ad = end_address
+        a, oa = current_byte_number
+
+        stop_at can be used to supply a list of addresses to not disassemble
+        over. This is useful if you know in advance that there are a lot of
+        fall-throughs.
+        """
+
+        bank_id = original_offset / 0x4000
+        if debug: print "bank id is: " + str(bank_id)
+
+        last_hl_address = None #for when we're scanning the main map script
+        last_a_address = None
+        used_3d97 = False
+
+        rom = self.rom
+
+        offset = original_offset
+        current_byte_number = 0 #start from the beginning
+
+        #we don't actually have an end address, but we'll just say $4000
+        end_address = original_offset + max_byte_count
+
+        byte_labels = {}
+        data_tables = {}
+
+        first_loop = True
+        output = ""
+        keep_reading = True
+        is_data = False
+        while offset <= end_address and keep_reading:
+            current_byte = rom[offset]
+            maybe_byte = current_byte
+
+            # stop at any address
+            if not first_loop and offset in stop_at:
+                keep_reading = False
+                break
 
-                    number = byte1
-                    number += byte2 << 8;
+            #first check if this byte already has a label
+            #if it does, use the label
+            #if not, generate a new label
+            if offset in byte_labels.keys():
+                line_label = byte_labels[offset]["name"]
+                byte_labels[offset]["usage"] += 1
+                output += "\n"
+            else:
+                line_label = asm_label(offset)
+                byte_labels[offset] = {}
+                byte_labels[offset]["name"] = line_label
+                byte_labels[offset]["usage"] = 0
+            byte_labels[offset]["definition"] = True
+            output += line_label + "\n" #" ; " + hex(offset) + "\n"
 
-                    insertion = "$%.4x" % (number)
+            #find out if there's a two byte key like this
+            temp_maybe = maybe_byte
+            temp_maybe += ( rom[offset+1] << 8)
+            if not is_data and temp_maybe in opt_table.keys() and rom[offset+1]!=0:
+                opstr = opt_table[temp_maybe][0].lower()
 
-                    opstr = opstr[:opstr.find("?")].lower() + insertion + opstr[opstr.find("?")+1:].lower()
+                if "x" in opstr:
+                    for x in range(0, opstr.count("x")):
+                        insertion = rom[offset + 1]
+                        insertion = "$" + hex(insertion)[2:]
 
-                    current_byte_number += 2
-                    offset += 2
+                        opstr = opstr[:opstr.find("x")].lower() + insertion + opstr[opstr.find("x")+1:].lower()
 
-            output += spacing + opstr #+ " ; " + hex(offset)
-            output += "\n"
+                        current_byte += 1
+                        offset += 1
+                if "?" in opstr:
+                    for y in range(0, opstr.count("?")):
+                        byte1 = rom[offset + 1]
+                        byte2 = rom[offset + 2]
 
-            current_byte_number += 2
-            offset += 2
-        elif not is_data and maybe_byte in opt_table.keys():
-            op_code = opt_table[maybe_byte]
-            op_code_type = op_code[1]
-            op_code_byte = maybe_byte
+                        number = byte1
+                        number += byte2 << 8;
 
-            #type = -1 when it's the E op
-            #if op_code_type != -1:
-            if   op_code_type == 0 and rom[offset] == op_code_byte:
-                op_str = op_code[0].lower()
+                        insertion = "$%.4x" % (number)
 
-                output += spacing + op_code[0].lower() #+ " ; " + hex(offset)
+                        opstr = opstr[:opstr.find("?")].lower() + insertion + opstr[opstr.find("?")+1:].lower()
+
+                        current_byte_number += 2
+                        offset += 2
+
+                output += spacing + opstr #+ " ; " + hex(offset)
                 output += "\n"
 
-                offset += 1
-                current_byte_number += 1
-            elif op_code_type == 1 and rom[offset] == op_code_byte:
-                oplen = len(op_code[0])
-                opstr = copy(op_code[0])
-                xes = op_code[0].count("x")
-                include_comment = False
-                for x in range(0, xes):
-                    insertion = rom[offset + 1]
-                    insertion = "$" + hex(insertion)[2:]
-
-                    if current_byte == 0x18 or current_byte==0x20 or current_byte in relative_jumps: #jr or jr nz
-                        #generate a label for the byte we're jumping to
-                        target_address = offset + 2 + c_int8(rom[offset + 1]).value
-                        if target_address in byte_labels.keys():
-                            byte_labels[target_address]["usage"] = 1 + byte_labels[target_address]["usage"]
-                            line_label2 = byte_labels[target_address]["name"]
-                        else:
-                            line_label2 = asm_label(target_address)
-                            byte_labels[target_address] = {}
-                            byte_labels[target_address]["name"] = line_label2
-                            byte_labels[target_address]["usage"] = 1
-                            byte_labels[target_address]["definition"] = False
-
-                        insertion = line_label2
-                        if has_outstanding_labels(byte_labels) and all_outstanding_labels_are_reverse(byte_labels, offset):
-                            include_comment = True
-                    elif current_byte == 0x3e:
-                        last_a_address = rom[offset + 1]
-
-                    opstr = opstr[:opstr.find("x")].lower() + insertion + opstr[opstr.find("x")+1:].lower()
-
-                    # because the $ff00+$ff syntax is silly
-                    if opstr.count("$") > 1 and "+" in opstr:
-                        first_orig = opstr[opstr.find("$"):opstr.find("+")]
-                        first_val = eval(first_orig.replace("$","0x"))
-
-                        second_orig = opstr[opstr.find("+$")+1:opstr.find("]")]
-                        second_val = eval(second_orig.replace("$","0x"))
-
-                        combined_val = "$%.4x" % (first_val + second_val)
-                        result = find_label(combined_val, bank_id)
-                        if result != None:
-                            combined_val = result
+                current_byte_number += 2
+                offset += 2
+            elif not is_data and maybe_byte in opt_table.keys():
+                op_code = opt_table[maybe_byte]
+                op_code_type = op_code[1]
+                op_code_byte = maybe_byte
 
-                        replacetron = "[%s+%s]" % (first_orig, second_orig)
-                        opstr = opstr.replace(replacetron, "[%s]" % combined_val)
+                #type = -1 when it's the E op
+                #if op_code_type != -1:
+                if   op_code_type == 0 and rom[offset] == op_code_byte:
+                    op_str = op_code[0].lower()
 
-                    output += spacing + opstr
-                    if include_comment:
-                        output += " ; " + hex(offset)
-                        if current_byte in relative_jumps:
-                            output += " $" + hex(rom[offset + 1])[2:]
+                    output += spacing + op_code[0].lower() #+ " ; " + hex(offset)
                     output += "\n"
 
-                    current_byte_number += 1
                     offset += 1
-                    insertion = ""
+                    current_byte_number += 1
+                elif op_code_type == 1 and rom[offset] == op_code_byte:
+                    oplen = len(op_code[0])
+                    opstr = copy(op_code[0])
+                    xes = op_code[0].count("x")
+                    include_comment = False
+                    for x in range(0, xes):
+                        insertion = rom[offset + 1]
+                        insertion = "$" + hex(insertion)[2:]
+
+                        if current_byte == 0x18 or current_byte==0x20 or current_byte in relative_jumps: #jr or jr nz
+                            #generate a label for the byte we're jumping to
+                            target_address = offset + 2 + c_int8(rom[offset + 1]).value
+                            if target_address in byte_labels.keys():
+                                byte_labels[target_address]["usage"] = 1 + byte_labels[target_address]["usage"]
+                                line_label2 = byte_labels[target_address]["name"]
+                            else:
+                                line_label2 = asm_label(target_address)
+                                byte_labels[target_address] = {}
+                                byte_labels[target_address]["name"] = line_label2
+                                byte_labels[target_address]["usage"] = 1
+                                byte_labels[target_address]["definition"] = False
+
+                            insertion = line_label2
+                            if has_outstanding_labels(byte_labels) and all_outstanding_labels_are_reverse(byte_labels, offset):
+                                include_comment = True
+                        elif current_byte == 0x3e:
+                            last_a_address = rom[offset + 1]
+
+                        opstr = opstr[:opstr.find("x")].lower() + insertion + opstr[opstr.find("x")+1:].lower()
+
+                        # because the $ff00+$ff syntax is silly
+                        if opstr.count("$") > 1 and "+" in opstr:
+                            first_orig = opstr[opstr.find("$"):opstr.find("+")]
+                            first_val = eval(first_orig.replace("$","0x"))
+
+                            second_orig = opstr[opstr.find("+$")+1:opstr.find("]")]
+                            second_val = eval(second_orig.replace("$","0x"))
+
+                            combined_val = "$%.4x" % (first_val + second_val)
+                            result = self.find_label(combined_val, bank_id)
+                            if result != None:
+                                combined_val = result
+
+                            replacetron = "[%s+%s]" % (first_orig, second_orig)
+                            opstr = opstr.replace(replacetron, "[%s]" % combined_val)
+
+                        output += spacing + opstr
+                        if include_comment:
+                            output += " ; " + hex(offset)
+                            if current_byte in relative_jumps:
+                                output += " $" + hex(rom[offset + 1])[2:]
+                        output += "\n"
+
+                        current_byte_number += 1
+                        offset += 1
+                        insertion = ""
 
-                current_byte_number += 1
-                offset += 1
-                include_comment = False
-            elif op_code_type == 2 and rom[offset] == op_code_byte:
-                oplen = len(op_code[0])
-                opstr = copy(op_code[0])
-                qes = op_code[0].count("?")
-                for x in range(0, qes):
-                    byte1 = rom[offset + 1]
-                    byte2 = rom[offset + 2]
-
-                    number = byte1
-                    number += byte2 << 8
-
-                    if current_byte not in call_commands + discrete_jumps + relative_jumps:
-                        pointer = get_global_address(number, bank_id)
-                        if pointer not in data_tables.keys():
-                            data_tables[pointer] = {}
-                            data_tables[pointer]['usage'] = 0
-                        else:
-                            data_tables[pointer]['usage'] += 1
-
-                    insertion = "$%.4x" % (number)
-                    result = find_label(insertion, bank_id)
-                    if result != None:
-                        insertion = result
-
-                    opstr = opstr[:opstr.find("?")].lower() + insertion + opstr[opstr.find("?")+1:].lower()
-                    output += spacing + opstr #+ " ; " + hex(offset)
-                    output += "\n"
+                    current_byte_number += 1
+                    offset += 1
+                    include_comment = False
+                elif op_code_type == 2 and rom[offset] == op_code_byte:
+                    oplen = len(op_code[0])
+                    opstr = copy(op_code[0])
+                    qes = op_code[0].count("?")
+                    for x in range(0, qes):
+                        byte1 = rom[offset + 1]
+                        byte2 = rom[offset + 2]
+
+                        number = byte1
+                        number += byte2 << 8
+
+                        if current_byte not in call_commands + discrete_jumps + relative_jumps:
+                            pointer = get_global_address(number, bank_id)
+                            if pointer not in data_tables.keys():
+                                data_tables[pointer] = {}
+                                data_tables[pointer]['usage'] = 0
+                            else:
+                                data_tables[pointer]['usage'] += 1
+
+                        insertion = "$%.4x" % (number)
+                        result = self.find_label(insertion, bank_id)
+                        if result != None:
+                            insertion = result
 
-                    current_byte_number += 2
-                    offset += 2
+                        opstr = opstr[:opstr.find("?")].lower() + insertion + opstr[opstr.find("?")+1:].lower()
+                        output += spacing + opstr #+ " ; " + hex(offset)
+                        output += "\n"
 
-                current_byte_number += 1
-                offset += 1
+                        current_byte_number += 2
+                        offset += 2
 
-                if current_byte == 0x21:
-                    last_hl_address = byte1 + (byte2 << 8)
-                if current_byte == 0xcd:
-                    if number == 0x3d97: used_3d97 = True
+                    current_byte_number += 1
+                    offset += 1
 
-                #duck out if this is jp $24d7
-                if current_byte == 0xc3 or current_byte in relative_unconditional_jumps:
-                    if current_byte == 0xc3:
+                    if current_byte == 0x21:
+                        last_hl_address = byte1 + (byte2 << 8)
+                    if current_byte == 0xcd:
                         if number == 0x3d97: used_3d97 = True
-                    #if number == 0x24d7: #jp
-                    if not has_outstanding_labels(byte_labels) or all_outstanding_labels_are_reverse(byte_labels, offset):
-                        keep_reading = False
-                        is_data = False
-                        break
-            else:
-                is_data = True
-        else:
-        #if is_data and keep_reading:
-            output += spacing + "db $" + hex(rom[offset])[2:] #+ " ; " + hex(offset)
-            output += "\n"
-            offset += 1
-            current_byte_number += 1
-            if offset in byte_labels.keys():
-                is_data = False
-                keep_reading = True
-        #else the while loop would have spit out the opcode
-
-        #these two are done prior
-        #offset += 1
-        #current_byte_number += 1
 
-        if not is_data and current_byte in relative_unconditional_jumps + end_08_scripts_with:
-            #stop reading at a jump, relative jump or return
-            if not has_outstanding_labels(byte_labels) or all_outstanding_labels_are_reverse(byte_labels, offset):
-                keep_reading = False
-                is_data = False #cleanup
-                break
-            elif offset not in byte_labels.keys() and offset in data_tables.keys():
+                    #duck out if this is jp $24d7
+                    if current_byte == 0xc3 or current_byte in relative_unconditional_jumps:
+                        if current_byte == 0xc3:
+                            if number == 0x3d97: used_3d97 = True
+                        #if number == 0x24d7: #jp
+                        if not has_outstanding_labels(byte_labels) or all_outstanding_labels_are_reverse(byte_labels, offset):
+                            keep_reading = False
+                            is_data = False
+                            break
+                else:
+                    is_data = True
+            else:
+            #if is_data and keep_reading:
+                output += spacing + "db $" + hex(rom[offset])[2:] #+ " ; " + hex(offset)
+                output += "\n"
+                offset += 1
+                current_byte_number += 1
+                if offset in byte_labels.keys():
+                    is_data = False
+                    keep_reading = True
+            #else the while loop would have spit out the opcode
+
+            #these two are done prior
+            #offset += 1
+            #current_byte_number += 1
+
+            if not is_data and current_byte in relative_unconditional_jumps + end_08_scripts_with:
+                #stop reading at a jump, relative jump or return
+                if not has_outstanding_labels(byte_labels) or all_outstanding_labels_are_reverse(byte_labels, offset):
+                    keep_reading = False
+                    is_data = False #cleanup
+                    break
+                elif offset not in byte_labels.keys() and offset in data_tables.keys():
+                    is_data = True
+                    keep_reading = True
+                else:
+                    is_data = False
+                    keep_reading = True
+                output += "\n"
+            elif is_data and offset not in byte_labels.keys():
                 is_data = True
                 keep_reading = True
             else:
                 is_data = False
                 keep_reading = True
-            output += "\n"
-        elif is_data and offset not in byte_labels.keys():
-            is_data = True
-            keep_reading = True
-        else:
-            is_data = False
-            keep_reading = True
-
-        if offset in data_tables.keys():
-            output = output.replace('$%x' % (get_local_address(offset)), data_label(offset))
-            output += data_label(offset) + '\n'
-            is_data = True
-            keep_reading = True
 
-        first_loop = False
-
-    #clean up unused labels
-    for label_line in byte_labels.keys():
-        address = label_line
-        label_line = byte_labels[label_line]
-        if label_line["usage"] == 0:
-            output = output.replace((label_line["name"] + "\n"), "")
-
-    #tone down excessive spacing
-    output = output.replace("\n\n\n","\n\n")
-
-    #add the offset of the final location
-    if include_last_address:
-        output += "; " + hex(offset)
-
-    return (output, offset, last_hl_address, last_a_address, used_3d97)
+            if offset in data_tables.keys():
+                output = output.replace('$%x' % (get_local_address(offset)), data_label(offset))
+                output += data_label(offset) + '\n'
+                is_data = True
+                keep_reading = True
 
-def has_outstanding_labels(byte_labels):
-    """
-    Check whether a label is used once in the asm output.
+            first_loop = False
 
-    If so, then that means it has to be called or specified later.
-    """
-    for label_line in byte_labels.keys():
-        real_line = byte_labels[label_line]
-        if real_line["definition"] == False: return True
-    return False
+        #clean up unused labels
+        for label_line in byte_labels.keys():
+            address = label_line
+            label_line = byte_labels[label_line]
+            if label_line["usage"] == 0:
+                output = output.replace((label_line["name"] + "\n"), "")
 
-def all_outstanding_labels_are_reverse(byte_labels, offset):
-    for label_id in byte_labels.keys():
-        line = byte_labels[label_id] # label_id is also the address
-        if line["definition"] == False:
-            if not label_id < offset: return False
-    return True
+        #tone down excessive spacing
+        output = output.replace("\n\n\n","\n\n")
 
+        #add the offset of the final location
+        if include_last_address:
+            output += "; " + hex(offset)
 
+        return (output, offset, last_hl_address, last_a_address, used_3d97)
 
 if __name__ == "__main__":
-    load_labels()
+    conf = config.Config()
+    disasm = Disassembler(conf)
+    disasm.initialize()
+
     addr = sys.argv[1]
     if ":" in addr:
         addr = addr.split(":")
         addr = int(addr[0], 16)*0x4000+(int(addr[1], 16)%0x4000)
     else:
-        label_addr = find_address_from_label(addr)
+        label_addr = disasm.find_address_from_label(addr)
         if label_addr:
             addr = label_addr
         else:
             addr = int(addr, 16)
-    print output_bank_opcodes(addr)[0]
+
+    output = disasm.output_bank_opcodes(addr)[0]
+    print output
diff --git a/pokemontools/labels.py b/pokemontools/labels.py
index ca411d1..2e50b89 100644
--- a/pokemontools/labels.py
+++ b/pokemontools/labels.py
@@ -3,7 +3,37 @@
 Various label/line-related functions.
 """
 
+import os
+import json
+
 import pointers
+import crystal
+
+class Labels(object):
+    """
+    Store all labels.
+    """
+    filename = "labels.json"
+
+    def __init__(self, config):
+        """
+        Setup the instance.
+        """
+        self.config = config
+        self.path = os.path.join(self.config.path, Labels.filename)
+
+    def initialize(self):
+        """
+        Handle anything requiring file-loading and such.
+        """
+        if not os.path.exists(self.path):
+            logging.info(
+                "Running crystal.scan_for_predefined_labels to create \"{0}\". Trying.."
+                .format(Labels.filename)
+            )
+            crystal.scan_for_predefined_labels()
+
+        self.labels = json.read(open(self.path, "r").read())
 
 def remove_quoted_text(line):
     """get rid of content inside quotes
diff --git a/pokemontools/preprocessor.py b/pokemontools/preprocessor.py
index d9373ac..026da41 100644
--- a/pokemontools/preprocessor.py
+++ b/pokemontools/preprocessor.py
@@ -5,13 +5,8 @@ Basic preprocessor for both pokecrystal and pokered.
 
 import sys
 
-from crystal import (
-    DataByteWordMacro,
-)
-
-default_macros = [
-    DataByteWordMacro,
-]
+import exceptions
+import crystal
 
 chars = {
 "ガ": 0x05,
@@ -278,16 +273,6 @@ chars = {
 "9": 0xFF
 }
 
-class PreprocessorException(Exception):
-    """
-    There was a problem in the preprocessor.
-    """
-
-class MacroException(PreprocessorException):
-    """
-    There was a problem with a macro.
-    """
-
 def separate_comment(l):
     """
     Separates asm and comments on a single line.
@@ -299,7 +284,10 @@ def separate_comment(l):
                 break
         if l[i] == "\"":
             in_quotes = not in_quotes
-    return l[:i], l[i:] or None
+    return (l[:i], l[i:]) or None
+
+def make_macro_table(macros):
+    return dict(((macro.macro_name, macro) for macro in macros))
 
 def quote_translator(asm):
     """
@@ -387,38 +375,7 @@ def quote_translator(asm):
 
     return output
 
-def extract_token(asm):
-    return asm.split(" ")[0].strip()
-
-def make_macro_table(macros):
-    return dict(((macro.macro_name, macro) for macro in macros))
-
-def macro_test(asm, macro_table):
-    """
-    Returns a matching macro, or None/False.
-    """
-    # macros are determined by the first symbol on the line
-    token = extract_token(asm)
-
-    # skip db and dw since rgbasm handles those and they aren't macros
-    if token is not None and token not in ["db", "dw"] and token in macro_table:
-        return (macro_table[token], token)
-    else:
-        return (None, None)
-
-def is_based_on(something, base):
-    """
-    Checks whether or not 'something' is a class that is a subclass of a class
-    by name. This is a terrible hack but it removes a direct dependency on
-    existing macros.
-
-    Used by macro_translator.
-    """
-    options = [str(klass.__name__) for klass in something.__bases__]
-    options += [something.__name__]
-    return (base in options)
-
-def check_macro_sanity(params, macro, original_line):
+def check_macro_sanity(self, params, macro, original_line):
     """
     Checks whether or not the correct number of arguments are being passed to a
     certain macro. There are a number of possibilities based on the types of
@@ -441,12 +398,12 @@ def check_macro_sanity(params, macro, original_line):
             elif param_klass.size == 3:
                 allowed_length += 2 # bank and label
             else:
-                raise MacroException(
+                raise exceptions.MacroException(
                     "dunno what to do with a macro param with a size > 3 (size={size})"
                     .format(size=param_klass.size)
                 )
         else:
-            raise MacroException(
+            raise exceptions.MacroException(
                 "dunno what to do with this non db/dw macro param: {klass} in line {line}"
                 .format(klass=param_klass, line=original_line)
             )
@@ -461,7 +418,7 @@ def check_macro_sanity(params, macro, original_line):
     params_len = len(params)
 
     if params_len not in allowed_lengths:
-        raise PreprocessorException(
+        raise exceptions.PreprocessorException(
             "mismatched number of parameters ({count}, instead of any of {allowed}) on this line: {line}"
             .format(
                 count=params_len,
@@ -472,170 +429,223 @@ def check_macro_sanity(params, macro, original_line):
 
     return True
 
-def macro_translator(macro, token, line, show_original_lines=False, do_macro_sanity_check=False):
-    """
-    Converts a line with a macro into a rgbasm-compatible line.
+def extract_token(asm):
+    return asm.split(" ")[0].strip()
 
-    @param show_original_lines: show lines before preprocessing in stdout
-    @param do_macro_sanity_check: helpful for debugging macros
+def is_based_on(something, base):
     """
-    if macro.macro_name != token:
-        raise MacroException("macro/token mismatch")
-
-    original_line = line
-
-    # remove trailing newline
-    if line[-1] == "\n":
-        line = line[:-1]
-    else:
-        original_line += "\n"
-
-    # remove first tab
-    has_tab = False
-    if line[0] == "\t":
-        has_tab = True
-        line = line[1:]
-
-    # remove duplicate whitespace (also trailing)
-    line = " ".join(line.split())
-
-    params = []
-
-    # check if the line has params
-    if " " in line:
-        # split the line into separate parameters
-        params = line.replace(token, "").split(",")
-
-        # check if there are no params (redundant)
-        if len(params) == 1 and params[0] == "":
-            raise MacroException("macro has no params?")
-
-    # write out a comment showing the original line
-    if show_original_lines:
-        sys.stdout.write("; original_line: " + original_line)
-
-    # rgbasm can handle "db" so no preprocessing is required, plus this wont be
-    # reached because of earlier checks in macro_test.
-    if macro.macro_name in ["db", "dw"]:
-        sys.stdout.write(original_line)
-        return
-
-    # certain macros don't need an initial byte written
-    # do: all scripting macros
-    # don't: signpost, warp_def, person_event, xy_trigger
-    if not macro.override_byte_check:
-        sys.stdout.write("db ${0:02X}\n".format(macro.id))
-
-    # Does the number of parameters on this line match any allowed number of
-    # parameters that the macro expects?
-    if do_macro_sanity_check:
-        check_macro_sanity(params, macro, original_line)
-
-    # used for storetext
-    correction = 0
-
-    output = ""
-
-    index = 0
-    while index < len(params):
-        param_type  = macro.param_types[index - correction]
-        description = param_type["name"]
-        param_klass = param_type["class"]
-        byte_type   = param_klass.byte_type # db or dw
-        size        = param_klass.size
-        param       = params[index].strip()
-
-        # param_klass.to_asm() won't work here because it doesn't
-        # include db/dw.
-
-        # some parameters are really multiple types of bytes
-        if (byte_type == "dw" and size != 2) or \
-           (byte_type == "db" and size != 1):
-
-            output += ("; " + description + "\n")
-
-            if   size == 3 and is_based_on(param_klass, "PointerLabelBeforeBank"):
-                # write the bank first
-                output += ("db " + param + "\n")
-                # write the pointer second
-                output += ("dw " + params[index+1].strip() + "\n")
-                index += 2
-                correction += 1
-            elif size == 3 and is_based_on(param_klass, "PointerLabelAfterBank"):
-                # write the pointer first
-                output += ("dw " + param + "\n")
-                # write the bank second
-                output += ("db " + params[index+1].strip() + "\n")
-                index += 2
-                correction += 1
-            elif size == 3 and "from_asm" in dir(param_klass):
-                output += ("db " + param_klass.from_asm(param) + "\n")
-                index += 1
-            else:
-                raise MacroException(
-                    "dunno what to do with this macro param ({klass}) in line: {line}"
-                    .format(
-                        klass=param_klass,
-                        line=original_line,
-                    )
-                )
+    Checks whether or not 'something' is a class that is a subclass of a class
+    by name. This is a terrible hack but it removes a direct dependency on
+    existing macros.
 
-        # or just print out the byte
-        else:
-            output += (byte_type + " " + param + " ; " + description + "\n")
+    Used by macro_translator.
+    """
+    options = [str(klass.__name__) for klass in something.__bases__]
+    options += [something.__name__]
+    return (base in options)
 
-            index += 1
+class Preprocessor(object):
+    """
+    A wrapper around the actual preprocessing step. Because rgbasm can't handle
+    many of these macros.
+    """
 
-    sys.stdout.write(output)
+    default_macros = [
+        crystal.DataByteWordMacro,
+    ]
+
+    def __init__(self, config, macros=None):
+        """
+        Setup the preprocessor.
+        """
+        self.config = config
+
+        if macros == None:
+            macros = Preprocessor.default_macros
+
+        self.macros = macros
+        self.macro_table = make_macro_table(self.macros)
+
+    def preprocess(self, lines=None):
+        """
+        Run the preprocessor against stdin.
+        """
+        if not lines:
+            # read each line from stdin
+            lines = (sys.stdin.readlines())
+        elif not isinstance(lines, list):
+            # split up the input into individual lines
+            lines = lines.split("\n")
+
+        for l in lines:
+            self.read_line(l)
+
+    def read_line(self, l):
+        """
+        Preprocesses a given line of asm.
+        """
+
+        if l in ["\n", ""] or l[0] == ";":
+            sys.stdout.write(l)
+            return # jump out early
+
+        # strip comments from asm
+        asm, comment = separate_comment(l)
+
+        # export all labels
+        if ':' in asm[:asm.find('"')] and "macro" not in asm.lower():
+            sys.stdout.write('GLOBAL ' + asm.split(':')[0] + '\n')
+
+        # expect preprocessed .asm files
+        if "INCLUDE" in asm:
+            asm = asm.replace('.asm','.tx')
+            sys.stdout.write(asm)
 
-def read_line(l, macro_table):
-    """Preprocesses a given line of asm."""
+        # ascii string macro preserves the bytes as ascii (skip the translator)
+        elif len(asm) > 6 and ("ascii " == asm[:6] or "\tascii " == asm[:7]):
+            asm = asm.replace("ascii", "db", 1)
+            sys.stdout.write(asm)
 
-    if l in ["\n", ""] or l[0] == ";":
-        sys.stdout.write(l)
-        return # jump out early
+        # convert text to bytes when a quote appears (not in a comment)
+        elif "\"" in asm:
+            sys.stdout.write(quote_translator(asm))
 
-    # strip comments from asm
-    asm, comment = separate_comment(l)
+        # check against other preprocessor features
+        else:
+            macro, token = self.macro_test(asm)
+            if macro:
+                self.macro_translator(macro, token, asm)
+            else:
+                sys.stdout.write(asm)
 
-    # export all labels
-    if ':' in asm[:asm.find('"')] and "macro" not in asm.lower():
-        sys.stdout.write('GLOBAL ' + asm.split(':')[0] + '\n')
+        if comment:
+            sys.stdout.write(comment)
 
-    # expect preprocessed .asm files
-    if "INCLUDE" in asm:
-        asm = asm.replace('.asm','.tx')
-        sys.stdout.write(asm)
+    def macro_translator(self, macro, token, line, show_original_lines=False, do_macro_sanity_check=False):
+        """
+        Converts a line with a macro into a rgbasm-compatible line.
 
-    # ascii string macro preserves the bytes as ascii (skip the translator)
-    elif len(asm) > 6 and ("ascii " == asm[:6] or "\tascii " == asm[:7]):
-        asm = asm.replace("ascii", "db", 1)
-        sys.stdout.write(asm)
+        @param show_original_lines: show lines before preprocessing in stdout
+        @param do_macro_sanity_check: helpful for debugging macros
+        """
+        if macro.macro_name != token:
+            raise exceptions.MacroException("macro/token mismatch")
 
-    # convert text to bytes when a quote appears (not in a comment)
-    elif "\"" in asm:
-        sys.stdout.write(quote_translator(asm))
+        original_line = line
 
-    # check against other preprocessor features
-    else:
-        macro, token = macro_test(asm, macro_table)
-        if macro:
-            macro_translator(macro, token, asm)
+        # remove trailing newline
+        if line[-1] == "\n":
+            line = line[:-1]
         else:
-            sys.stdout.write(asm)
+            original_line += "\n"
+
+        # remove first tab
+        has_tab = False
+        if line[0] == "\t":
+            has_tab = True
+            line = line[1:]
+
+        # remove duplicate whitespace (also trailing)
+        line = " ".join(line.split())
+
+        params = []
+
+        # check if the line has params
+        if " " in line:
+            # split the line into separate parameters
+            params = line.replace(token, "").split(",")
+
+            # check if there are no params (redundant)
+            if len(params) == 1 and params[0] == "":
+                raise exceptions.MacroException("macro has no params?")
+
+        # write out a comment showing the original line
+        if show_original_lines:
+            sys.stdout.write("; original_line: " + original_line)
+
+        # rgbasm can handle "db" so no preprocessing is required, plus this wont be
+        # reached because of earlier checks in macro_test.
+        if macro.macro_name in ["db", "dw"]:
+            sys.stdout.write(original_line)
+            return
+
+        # certain macros don't need an initial byte written
+        # do: all scripting macros
+        # don't: signpost, warp_def, person_event, xy_trigger
+        if not macro.override_byte_check:
+            sys.stdout.write("db ${0:02X}\n".format(macro.id))
+
+        # Does the number of parameters on this line match any allowed number of
+        # parameters that the macro expects?
+        if do_macro_sanity_check:
+            self.check_macro_sanity(params, macro, original_line)
+
+        # used for storetext
+        correction = 0
+
+        output = ""
+
+        index = 0
+        while index < len(params):
+            param_type  = macro.param_types[index - correction]
+            description = param_type["name"]
+            param_klass = param_type["class"]
+            byte_type   = param_klass.byte_type # db or dw
+            size        = param_klass.size
+            param       = params[index].strip()
+
+            # param_klass.to_asm() won't work here because it doesn't
+            # include db/dw.
+
+            # some parameters are really multiple types of bytes
+            if (byte_type == "dw" and size != 2) or \
+               (byte_type == "db" and size != 1):
+
+                output += ("; " + description + "\n")
+
+                if   size == 3 and is_based_on(param_klass, "PointerLabelBeforeBank"):
+                    # write the bank first
+                    output += ("db " + param + "\n")
+                    # write the pointer second
+                    output += ("dw " + params[index+1].strip() + "\n")
+                    index += 2
+                    correction += 1
+                elif size == 3 and is_based_on(param_klass, "PointerLabelAfterBank"):
+                    # write the pointer first
+                    output += ("dw " + param + "\n")
+                    # write the bank second
+                    output += ("db " + params[index+1].strip() + "\n")
+                    index += 2
+                    correction += 1
+                elif size == 3 and "from_asm" in dir(param_klass):
+                    output += ("db " + param_klass.from_asm(param) + "\n")
+                    index += 1
+                else:
+                    raise exceptions.MacroException(
+                        "dunno what to do with this macro param ({klass}) in line: {line}"
+                        .format(
+                            klass=param_klass,
+                            line=original_line,
+                        )
+                    )
 
-    if comment:
-        sys.stdout.write(comment)
+            # or just print out the byte
+            else:
+                output += (byte_type + " " + param + " ; " + description + "\n")
+
+                index += 1
 
-def preprocess(macro_table, lines=None):
-    """Main entry point for the preprocessor."""
+        sys.stdout.write(output)
 
-    if not lines:
-        # read each line from stdin
-        lines = (sys.stdin.readlines())
-    elif not isinstance(lines, list):
-        # split up the input into individual lines
-        lines = lines.split("\n")
+    def macro_test(self, asm):
+        """
+        Returns a matching macro, or None/False.
+        """
+        # macros are determined by the first symbol on the line
+        token = extract_token(asm)
 
-    for l in lines:
-        read_line(l, macro_table)
+        # skip db and dw since rgbasm handles those and they aren't macros
+        if token is not None and token not in ["db", "dw"] and token in self.macro_table:
+            return (self.macro_table[token], token)
+        else:
+            return (None, None)
diff --git a/pokemontools/wram.py b/pokemontools/wram.py
index 5a5fa75..60001aa 100644
--- a/pokemontools/wram.py
+++ b/pokemontools/wram.py
@@ -4,7 +4,15 @@ RGBDS BSS section and constant parsing.
 """
 
 import os
-path = os.path.dirname(os.path.abspath(__file__))
+
+def make_wram_labels(wram_sections):
+    wram_labels = {}
+    for section in wram_sections:
+        for label in section['labels']:
+            if label['address'] not in wram_labels.keys():
+                wram_labels[label['address']] = []
+            wram_labels[label['address']] += [label['label']]
+    return wram_labels
 
 def read_bss_sections(bss):
     sections = []
@@ -55,34 +63,6 @@ def read_bss_sections(bss):
     sections.append(section)
     return sections
 
-def read_wram_sections():
-    """
-    Opens the wram file and calls read_bss_sections.
-    """
-    wram_content = None
-    wram_file_path = os.path.join(os.path.dirname(path), 'wram.asm')
-    try:
-        wram_file_handler = open(wram_file_path, 'r')
-    except IOError as exception:
-        wram_content = [""]
-    else:
-        wram_content = wram_file_handler.readlines()
-    wram_sections = read_bss_sections(wram_content)
-    return wram_sections
-
-wram_sections = read_wram_sections()
-
-def make_wram_labels(wram_sections):
-    wram_labels = {}
-    for section in wram_sections:
-        for label in section['labels']:
-            if label['address'] not in wram_labels.keys():
-                wram_labels[label['address']] = []
-            wram_labels[label['address']] += [label['label']]
-    return wram_labels
-
-wram_labels = make_wram_labels(wram_sections)
-
 def constants_to_dict(constants):
     return dict((eval(constant[constant.find('EQU')+3:constant.find(';')].replace('$','0x')), constant[:constant.find('EQU')].strip()) for constant in constants)
 
@@ -95,31 +75,90 @@ def read_constants(filepath):
     """
     Load lines from a file and call scrape_constants.
     """
-    try:
-        file_handler = open(filepath, "r")
-    except IOError as exception:
-        lines = [""]
-    else:
+    lines = None
+
+    with open(filepath, "r") as file_handler:
         lines = file_handler.readlines()
+
     constants = scrape_constants(lines)
     return constants
 
-def read_hram_constants():
-    """
-    Load constants from hram.asm.
-    """
-    hram_path = os.path.join(os.path.dirname(path), 'hram.asm')
-    return read_constants(hram_path)
-
-# TODO: get rid of this global
-hram_constants = read_hram_constants()
-
-def read_gbhw_constants():
+class WRAMProcessor(object):
     """
-    Load constants from gbhw.asm.
+    RGBDS BSS section and constant parsing.
     """
-    gbhw_path = os.path.join(os.path.dirname(path), 'gbhw.asm')
-    return read_constants(gbhw_path)
 
-# TODO: get rid of this global
-gbhw_constants = read_gbhw_constants()
+    def __init__(self, config):
+        """
+        Setup for WRAM parsing.
+        """
+        self.config = config
+
+        self.paths = {}
+        self.paths["wram"] = os.path.join(self.config.path, "wram.asm")
+        self.paths["hram"] = os.path.join(self.config.path, "hram.asm")
+        self.paths["gbhw"] = os.path.join(self.config.path, "gbhw.asm")
+
+    def initialize(self):
+        """
+        Read constants.
+        """
+        self.setup_wram_sections()
+        self.setup_wram_labels()
+        self.setup_hram_constants()
+        self.setup_gbhw_constants()
+
+    def read_wram_sections(self):
+        """
+        Opens the wram file and calls read_bss_sections.
+        """
+        wram_content = None
+        wram_file_path = self.paths["wram"]
+
+        with open(wram_file_path, "r") as wram:
+            wram_content = wram.readlines()
+
+        wram_sections = read_bss_sections(wram_content)
+        return wram_sections
+
+    def setup_wram_sections(self):
+        """
+        Call read_wram_sections and set a variable.
+        """
+        self.wram_sections = self.read_wram_sections()
+        return self.wram_sections
+
+    def setup_wram_labels(self):
+        """
+        Make wram labels based on self.wram_sections as input.
+        """
+        self.wram_labels = make_wram_labels(self.wram_sections)
+        return self.wram_labels
+
+    def read_hram_constants(self):
+        """
+        Read constants from hram.asm using read_constants.
+        """
+        hram_constants = read_constants(self.paths["hram"])
+        return hram_constants
+
+    def setup_hram_constants(self):
+        """
+        Call read_hram_constants and set a variable.
+        """
+        self.hram_constants = self.read_hram_constants()
+        return self.hram_constants
+
+    def read_gbhw_constants(self):
+        """
+        Read constants from gbhw.asm using read_constants.
+        """
+        gbhw_constants = read_constants(self.paths["gbhw"])
+        return gbhw_constants
+
+    def setup_gbhw_constants(self):
+        """
+        Call read_gbhw_constants and set a variable.
+        """
+        self.gbhw_constants = self.read_gbhw_constants()
+        return self.gbhw_constants
author	Bryan Bishop <kanzure@gmail.com>	2013-09-04 20:38:11 -0700
committer	Bryan Bishop <kanzure@gmail.com>	2013-09-04 20:38:11 -0700
commit	ee05e2fe1d03e0e68c64cea09ec41ab70e12bc3a (patch)
tree	db3b846aa6c92acd4cbf6f4bc0f7e5fb566ade27
parent	c2712bb90f09083f0bfa786750be2a9b34105fa9 (diff)
parent	37441a35b13f3421ba0c0f234e2ee4bbc5db4b63 (diff)