diff options
author | Bryan Bishop <kanzure@gmail.com> | 2013-09-04 20:38:11 -0700 |
---|---|---|
committer | Bryan Bishop <kanzure@gmail.com> | 2013-09-04 20:38:11 -0700 |
commit | ee05e2fe1d03e0e68c64cea09ec41ab70e12bc3a (patch) | |
tree | db3b846aa6c92acd4cbf6f4bc0f7e5fb566ade27 | |
parent | c2712bb90f09083f0bfa786750be2a9b34105fa9 (diff) | |
parent | 37441a35b13f3421ba0c0f234e2ee4bbc5db4b63 (diff) |
Merge pull request #10 from kanzure/config
Configuration for paths
-rw-r--r-- | pokemontools/__init__.py | 1 | ||||
-rw-r--r-- | pokemontools/config.py | 46 | ||||
-rw-r--r-- | pokemontools/exceptions.py | 15 | ||||
-rw-r--r-- | pokemontools/gbz80disasm.py | 650 | ||||
-rw-r--r-- | pokemontools/labels.py | 30 | ||||
-rw-r--r-- | pokemontools/preprocessor.py | 414 | ||||
-rw-r--r-- | pokemontools/wram.py | 139 |
7 files changed, 719 insertions, 576 deletions
diff --git a/pokemontools/__init__.py b/pokemontools/__init__.py index 8fb8b19..09331af 100644 --- a/pokemontools/__init__.py +++ b/pokemontools/__init__.py @@ -1,2 +1,3 @@ +import config import crystal import preprocessor diff --git a/pokemontools/config.py b/pokemontools/config.py new file mode 100644 index 0000000..656fab0 --- /dev/null +++ b/pokemontools/config.py @@ -0,0 +1,46 @@ +""" +Configuration +""" + +import os + +import exceptions + +class Config(object): + """ + The Config class handles all configuration for pokemontools. Other classes + and functions use a Config object to determine where expected files can be + located. + """ + + def __init__(self, **kwargs): + """ + Store all parameters. + """ + self._config = {} + + for (key, value) in kwargs.items(): + if key not in self.__dict__: + self._config[key] = value + else: + raise exceptions.ConfigException( + "Can't store \"{0}\" in configuration because the key conflicts with an existing property." + .format(key) + ) + + if "path" not in self._config: + self._config["path"] = os.getcwd() + + def __getattr__(self, key): + """ + Grab the value from the class properties, then check the configuration, + and raise an exception if nothing works. + """ + if key in self.__dict__: + return self.__dict__[key] + elif key in self._config: + return self._config[key] + else: + raise exceptions.ConfigException( + "no config found for \"{0}\"".format(key) + ) diff --git a/pokemontools/exceptions.py b/pokemontools/exceptions.py index 71d0da2..4de62eb 100644 --- a/pokemontools/exceptions.py +++ b/pokemontools/exceptions.py @@ -11,3 +11,18 @@ class TextScriptException(Exception): """ TextScript encountered an inconsistency or problem. """ + +class ConfigException(Exception): + """ + Configuration error. Maybe a missing config variable. + """ + +class PreprocessorException(Exception): + """ + There was a problem in the preprocessor. + """ + +class MacroException(PreprocessorException): + """ + There was a problem with a macro. + """ diff --git a/pokemontools/gbz80disasm.py b/pokemontools/gbz80disasm.py index 7499982..790388e 100644 --- a/pokemontools/gbz80disasm.py +++ b/pokemontools/gbz80disasm.py @@ -1,4 +1,7 @@ # -*- coding: utf-8 -*- +""" +GBC disassembler +""" import os import sys @@ -7,22 +10,15 @@ from ctypes import c_int8 import random import json -from wram import * +import config +import crystal +import labels +import wram # New versions of json don't have read anymore. if not hasattr(json, "read"): json.read = json.loads -def load_rom(filename="../baserom.gbc"): - """ - Load the specified rom. - - If no rom is given, load "../baserom.gbc". - """ - global rom - rom = bytearray(open(filename,'rb').read()) - return rom - spacing = "\t" temp_opt_table = [ @@ -563,49 +559,6 @@ relative_unconditional_jumps = [0xc3, 0x18] call_commands = [0xdc, 0xd4, 0xc4, 0xcc, 0xcd] -all_labels = {} -def load_labels(filename="labels.json"): - """ - Load labels from specified file. - - If no filename is given, loads 'labels.json'. - """ - global all_labels - - # don't re-load labels each time - if all_labels != {}: - return - - if os.path.exists(filename): - all_labels = json.read(open(filename, "r").read()) - else: - print "You must run crystal.scan_for_predefined_labels() to create \"labels.json\". Trying..." - import crystal - crystal.scan_for_predefined_labels() - -def find_label(local_address, bank_id=0): - # keep an integer - if type(local_address) == str: - local_address = int(local_address.replace("$", "0x"), 16) - - if local_address < 0x8000: - for label_entry in all_labels: - if get_local_address(label_entry["address"]) == local_address: - if label_entry["bank"] == bank_id or label_entry["bank"] == 0: - return label_entry["label"] - if local_address in wram_labels.keys(): - return wram_labels[local_address][-1] - for constants in [gbhw_constants, hram_constants]: - if local_address in constants.keys() and local_address >= 0xff00: - return constants[local_address] - return None - -def find_address_from_label(label): - for label_entry in all_labels: - if label == label_entry["label"]: - return label_entry["address"] - return None - def asm_label(address): """ Return the ASM label using the address. @@ -627,320 +580,369 @@ def get_global_address(address, bank): return ".ASM_" + hex(address)[2:] -def output_bank_opcodes(original_offset, max_byte_count=0x4000, include_last_address=True, stop_at=[], debug=False): +def has_outstanding_labels(byte_labels): """ - Output bank opcodes. - - fs = current_address - b = bank_byte - in = input_data -- rom - bank_size = byte_count - i = offset - ad = end_address - a, oa = current_byte_number - - stop_at can be used to supply a list of addresses to not disassemble - over. This is useful if you know in advance that there are a lot of - fall-throughs. + Check whether a label is used once in the asm output. + + If so, then that means it has to be called or specified later. """ + for label_line in byte_labels.keys(): + real_line = byte_labels[label_line] + if real_line["definition"] == False: return True + return False - load_labels() - load_rom() - - bank_id = original_offset / 0x4000 - if debug: print "bank id is: " + str(bank_id) - - last_hl_address = None #for when we're scanning the main map script - last_a_address = None - used_3d97 = False - - global rom - offset = original_offset - current_byte_number = 0 #start from the beginning - - #we don't actually have an end address, but we'll just say $4000 - end_address = original_offset + max_byte_count - - byte_labels = {} - data_tables = {} - - first_loop = True - output = "" - keep_reading = True - is_data = False - while offset <= end_address and keep_reading: - current_byte = rom[offset] - maybe_byte = current_byte - - # stop at any address - if not first_loop and offset in stop_at: - keep_reading = False - break - - #first check if this byte already has a label - #if it does, use the label - #if not, generate a new label - if offset in byte_labels.keys(): - line_label = byte_labels[offset]["name"] - byte_labels[offset]["usage"] += 1 - output += "\n" - else: - line_label = asm_label(offset) - byte_labels[offset] = {} - byte_labels[offset]["name"] = line_label - byte_labels[offset]["usage"] = 0 - byte_labels[offset]["definition"] = True - output += line_label + "\n" #" ; " + hex(offset) + "\n" - - #find out if there's a two byte key like this - temp_maybe = maybe_byte - temp_maybe += ( rom[offset+1] << 8) - if not is_data and temp_maybe in opt_table.keys() and rom[offset+1]!=0: - opstr = opt_table[temp_maybe][0].lower() - - if "x" in opstr: - for x in range(0, opstr.count("x")): - insertion = rom[offset + 1] - insertion = "$" + hex(insertion)[2:] - - opstr = opstr[:opstr.find("x")].lower() + insertion + opstr[opstr.find("x")+1:].lower() - - current_byte += 1 - offset += 1 - if "?" in opstr: - for y in range(0, opstr.count("?")): - byte1 = rom[offset + 1] - byte2 = rom[offset + 2] +def all_outstanding_labels_are_reverse(byte_labels, offset): + for label_id in byte_labels.keys(): + line = byte_labels[label_id] # label_id is also the address + if line["definition"] == False: + if not label_id < offset: return False + return True + +class Disassembler(object): + """ + GBC disassembler + """ + + def __init__(self, config): + """ + Setup the class instance. + """ + self.config = config + + self.wram = wram.WRAMProcessor(self.config) + self.labels = labels.Labels(self.config) + + def initialize(self): + """ + Setup the disassembler. + """ + self.wram.initialize() + self.labels.initialize() + + # TODO: fix how ROM is handled throughout the project. + rom_path = os.path.join(self.config.path, "baserom.gbc") + self.rom = bytearray(open(rom_path, "rb").read()) + + def find_label(self, local_address, bank_id=0): + # keep an integer + if type(local_address) == str: + local_address = int(local_address.replace("$", "0x"), 16) + + if local_address < 0x8000: + for label_entry in self.labels.labels: + if get_local_address(label_entry["address"]) == local_address: + if label_entry["bank"] == bank_id or label_entry["bank"] == 0: + return label_entry["label"] + if local_address in self.wram.wram_labels.keys(): + return self.wram.wram_labels[local_address][-1] + for constants in [self.wram.gbhw_constants, self.wram.hram_constants]: + if local_address in constants.keys() and local_address >= 0xff00: + return constants[local_address] + return None + + def find_address_from_label(self, label): + for label_entry in self.labels.labels: + if label == label_entry["label"]: + return label_entry["address"] + return None + + def output_bank_opcodes(self, original_offset, max_byte_count=0x4000, include_last_address=True, stop_at=[], debug=False): + """ + Output bank opcodes. + + fs = current_address + b = bank_byte + in = input_data -- rom + bank_size = byte_count + i = offset + ad = end_address + a, oa = current_byte_number + + stop_at can be used to supply a list of addresses to not disassemble + over. This is useful if you know in advance that there are a lot of + fall-throughs. + """ + + bank_id = original_offset / 0x4000 + if debug: print "bank id is: " + str(bank_id) + + last_hl_address = None #for when we're scanning the main map script + last_a_address = None + used_3d97 = False + + rom = self.rom + + offset = original_offset + current_byte_number = 0 #start from the beginning + + #we don't actually have an end address, but we'll just say $4000 + end_address = original_offset + max_byte_count + + byte_labels = {} + data_tables = {} + + first_loop = True + output = "" + keep_reading = True + is_data = False + while offset <= end_address and keep_reading: + current_byte = rom[offset] + maybe_byte = current_byte + + # stop at any address + if not first_loop and offset in stop_at: + keep_reading = False + break - number = byte1 - number += byte2 << 8; + #first check if this byte already has a label + #if it does, use the label + #if not, generate a new label + if offset in byte_labels.keys(): + line_label = byte_labels[offset]["name"] + byte_labels[offset]["usage"] += 1 + output += "\n" + else: + line_label = asm_label(offset) + byte_labels[offset] = {} + byte_labels[offset]["name"] = line_label + byte_labels[offset]["usage"] = 0 + byte_labels[offset]["definition"] = True + output += line_label + "\n" #" ; " + hex(offset) + "\n" - insertion = "$%.4x" % (number) + #find out if there's a two byte key like this + temp_maybe = maybe_byte + temp_maybe += ( rom[offset+1] << 8) + if not is_data and temp_maybe in opt_table.keys() and rom[offset+1]!=0: + opstr = opt_table[temp_maybe][0].lower() - opstr = opstr[:opstr.find("?")].lower() + insertion + opstr[opstr.find("?")+1:].lower() + if "x" in opstr: + for x in range(0, opstr.count("x")): + insertion = rom[offset + 1] + insertion = "$" + hex(insertion)[2:] - current_byte_number += 2 - offset += 2 + opstr = opstr[:opstr.find("x")].lower() + insertion + opstr[opstr.find("x")+1:].lower() - output += spacing + opstr #+ " ; " + hex(offset) - output += "\n" + current_byte += 1 + offset += 1 + if "?" in opstr: + for y in range(0, opstr.count("?")): + byte1 = rom[offset + 1] + byte2 = rom[offset + 2] - current_byte_number += 2 - offset += 2 - elif not is_data and maybe_byte in opt_table.keys(): - op_code = opt_table[maybe_byte] - op_code_type = op_code[1] - op_code_byte = maybe_byte + number = byte1 + number += byte2 << 8; - #type = -1 when it's the E op - #if op_code_type != -1: - if op_code_type == 0 and rom[offset] == op_code_byte: - op_str = op_code[0].lower() + insertion = "$%.4x" % (number) - output += spacing + op_code[0].lower() #+ " ; " + hex(offset) + opstr = opstr[:opstr.find("?")].lower() + insertion + opstr[opstr.find("?")+1:].lower() + + current_byte_number += 2 + offset += 2 + + output += spacing + opstr #+ " ; " + hex(offset) output += "\n" - offset += 1 - current_byte_number += 1 - elif op_code_type == 1 and rom[offset] == op_code_byte: - oplen = len(op_code[0]) - opstr = copy(op_code[0]) - xes = op_code[0].count("x") - include_comment = False - for x in range(0, xes): - insertion = rom[offset + 1] - insertion = "$" + hex(insertion)[2:] - - if current_byte == 0x18 or current_byte==0x20 or current_byte in relative_jumps: #jr or jr nz - #generate a label for the byte we're jumping to - target_address = offset + 2 + c_int8(rom[offset + 1]).value - if target_address in byte_labels.keys(): - byte_labels[target_address]["usage"] = 1 + byte_labels[target_address]["usage"] - line_label2 = byte_labels[target_address]["name"] - else: - line_label2 = asm_label(target_address) - byte_labels[target_address] = {} - byte_labels[target_address]["name"] = line_label2 - byte_labels[target_address]["usage"] = 1 - byte_labels[target_address]["definition"] = False - - insertion = line_label2 - if has_outstanding_labels(byte_labels) and all_outstanding_labels_are_reverse(byte_labels, offset): - include_comment = True - elif current_byte == 0x3e: - last_a_address = rom[offset + 1] - - opstr = opstr[:opstr.find("x")].lower() + insertion + opstr[opstr.find("x")+1:].lower() - - # because the $ff00+$ff syntax is silly - if opstr.count("$") > 1 and "+" in opstr: - first_orig = opstr[opstr.find("$"):opstr.find("+")] - first_val = eval(first_orig.replace("$","0x")) - - second_orig = opstr[opstr.find("+$")+1:opstr.find("]")] - second_val = eval(second_orig.replace("$","0x")) - - combined_val = "$%.4x" % (first_val + second_val) - result = find_label(combined_val, bank_id) - if result != None: - combined_val = result + current_byte_number += 2 + offset += 2 + elif not is_data and maybe_byte in opt_table.keys(): + op_code = opt_table[maybe_byte] + op_code_type = op_code[1] + op_code_byte = maybe_byte - replacetron = "[%s+%s]" % (first_orig, second_orig) - opstr = opstr.replace(replacetron, "[%s]" % combined_val) + #type = -1 when it's the E op + #if op_code_type != -1: + if op_code_type == 0 and rom[offset] == op_code_byte: + op_str = op_code[0].lower() - output += spacing + opstr - if include_comment: - output += " ; " + hex(offset) - if current_byte in relative_jumps: - output += " $" + hex(rom[offset + 1])[2:] + output += spacing + op_code[0].lower() #+ " ; " + hex(offset) output += "\n" - current_byte_number += 1 offset += 1 - insertion = "" + current_byte_number += 1 + elif op_code_type == 1 and rom[offset] == op_code_byte: + oplen = len(op_code[0]) + opstr = copy(op_code[0]) + xes = op_code[0].count("x") + include_comment = False + for x in range(0, xes): + insertion = rom[offset + 1] + insertion = "$" + hex(insertion)[2:] + + if current_byte == 0x18 or current_byte==0x20 or current_byte in relative_jumps: #jr or jr nz + #generate a label for the byte we're jumping to + target_address = offset + 2 + c_int8(rom[offset + 1]).value + if target_address in byte_labels.keys(): + byte_labels[target_address]["usage"] = 1 + byte_labels[target_address]["usage"] + line_label2 = byte_labels[target_address]["name"] + else: + line_label2 = asm_label(target_address) + byte_labels[target_address] = {} + byte_labels[target_address]["name"] = line_label2 + byte_labels[target_address]["usage"] = 1 + byte_labels[target_address]["definition"] = False + + insertion = line_label2 + if has_outstanding_labels(byte_labels) and all_outstanding_labels_are_reverse(byte_labels, offset): + include_comment = True + elif current_byte == 0x3e: + last_a_address = rom[offset + 1] + + opstr = opstr[:opstr.find("x")].lower() + insertion + opstr[opstr.find("x")+1:].lower() + + # because the $ff00+$ff syntax is silly + if opstr.count("$") > 1 and "+" in opstr: + first_orig = opstr[opstr.find("$"):opstr.find("+")] + first_val = eval(first_orig.replace("$","0x")) + + second_orig = opstr[opstr.find("+$")+1:opstr.find("]")] + second_val = eval(second_orig.replace("$","0x")) + + combined_val = "$%.4x" % (first_val + second_val) + result = self.find_label(combined_val, bank_id) + if result != None: + combined_val = result + + replacetron = "[%s+%s]" % (first_orig, second_orig) + opstr = opstr.replace(replacetron, "[%s]" % combined_val) + + output += spacing + opstr + if include_comment: + output += " ; " + hex(offset) + if current_byte in relative_jumps: + output += " $" + hex(rom[offset + 1])[2:] + output += "\n" + + current_byte_number += 1 + offset += 1 + insertion = "" - current_byte_number += 1 - offset += 1 - include_comment = False - elif op_code_type == 2 and rom[offset] == op_code_byte: - oplen = len(op_code[0]) - opstr = copy(op_code[0]) - qes = op_code[0].count("?") - for x in range(0, qes): - byte1 = rom[offset + 1] - byte2 = rom[offset + 2] - - number = byte1 - number += byte2 << 8 - - if current_byte not in call_commands + discrete_jumps + relative_jumps: - pointer = get_global_address(number, bank_id) - if pointer not in data_tables.keys(): - data_tables[pointer] = {} - data_tables[pointer]['usage'] = 0 - else: - data_tables[pointer]['usage'] += 1 - - insertion = "$%.4x" % (number) - result = find_label(insertion, bank_id) - if result != None: - insertion = result - - opstr = opstr[:opstr.find("?")].lower() + insertion + opstr[opstr.find("?")+1:].lower() - output += spacing + opstr #+ " ; " + hex(offset) - output += "\n" + current_byte_number += 1 + offset += 1 + include_comment = False + elif op_code_type == 2 and rom[offset] == op_code_byte: + oplen = len(op_code[0]) + opstr = copy(op_code[0]) + qes = op_code[0].count("?") + for x in range(0, qes): + byte1 = rom[offset + 1] + byte2 = rom[offset + 2] + + number = byte1 + number += byte2 << 8 + + if current_byte not in call_commands + discrete_jumps + relative_jumps: + pointer = get_global_address(number, bank_id) + if pointer not in data_tables.keys(): + data_tables[pointer] = {} + data_tables[pointer]['usage'] = 0 + else: + data_tables[pointer]['usage'] += 1 + + insertion = "$%.4x" % (number) + result = self.find_label(insertion, bank_id) + if result != None: + insertion = result - current_byte_number += 2 - offset += 2 + opstr = opstr[:opstr.find("?")].lower() + insertion + opstr[opstr.find("?")+1:].lower() + output += spacing + opstr #+ " ; " + hex(offset) + output += "\n" - current_byte_number += 1 - offset += 1 + current_byte_number += 2 + offset += 2 - if current_byte == 0x21: - last_hl_address = byte1 + (byte2 << 8) - if current_byte == 0xcd: - if number == 0x3d97: used_3d97 = True + current_byte_number += 1 + offset += 1 - #duck out if this is jp $24d7 - if current_byte == 0xc3 or current_byte in relative_unconditional_jumps: - if current_byte == 0xc3: + if current_byte == 0x21: + last_hl_address = byte1 + (byte2 << 8) + if current_byte == 0xcd: if number == 0x3d97: used_3d97 = True - #if number == 0x24d7: #jp - if not has_outstanding_labels(byte_labels) or all_outstanding_labels_are_reverse(byte_labels, offset): - keep_reading = False - is_data = False - break - else: - is_data = True - else: - #if is_data and keep_reading: - output += spacing + "db $" + hex(rom[offset])[2:] #+ " ; " + hex(offset) - output += "\n" - offset += 1 - current_byte_number += 1 - if offset in byte_labels.keys(): - is_data = False - keep_reading = True - #else the while loop would have spit out the opcode - - #these two are done prior - #offset += 1 - #current_byte_number += 1 - if not is_data and current_byte in relative_unconditional_jumps + end_08_scripts_with: - #stop reading at a jump, relative jump or return - if not has_outstanding_labels(byte_labels) or all_outstanding_labels_are_reverse(byte_labels, offset): - keep_reading = False - is_data = False #cleanup - break - elif offset not in byte_labels.keys() and offset in data_tables.keys(): + #duck out if this is jp $24d7 + if current_byte == 0xc3 or current_byte in relative_unconditional_jumps: + if current_byte == 0xc3: + if number == 0x3d97: used_3d97 = True + #if number == 0x24d7: #jp + if not has_outstanding_labels(byte_labels) or all_outstanding_labels_are_reverse(byte_labels, offset): + keep_reading = False + is_data = False + break + else: + is_data = True + else: + #if is_data and keep_reading: + output += spacing + "db $" + hex(rom[offset])[2:] #+ " ; " + hex(offset) + output += "\n" + offset += 1 + current_byte_number += 1 + if offset in byte_labels.keys(): + is_data = False + keep_reading = True + #else the while loop would have spit out the opcode + + #these two are done prior + #offset += 1 + #current_byte_number += 1 + + if not is_data and current_byte in relative_unconditional_jumps + end_08_scripts_with: + #stop reading at a jump, relative jump or return + if not has_outstanding_labels(byte_labels) or all_outstanding_labels_are_reverse(byte_labels, offset): + keep_reading = False + is_data = False #cleanup + break + elif offset not in byte_labels.keys() and offset in data_tables.keys(): + is_data = True + keep_reading = True + else: + is_data = False + keep_reading = True + output += "\n" + elif is_data and offset not in byte_labels.keys(): is_data = True keep_reading = True else: is_data = False keep_reading = True - output += "\n" - elif is_data and offset not in byte_labels.keys(): - is_data = True - keep_reading = True - else: - is_data = False - keep_reading = True - - if offset in data_tables.keys(): - output = output.replace('$%x' % (get_local_address(offset)), data_label(offset)) - output += data_label(offset) + '\n' - is_data = True - keep_reading = True - first_loop = False - - #clean up unused labels - for label_line in byte_labels.keys(): - address = label_line - label_line = byte_labels[label_line] - if label_line["usage"] == 0: - output = output.replace((label_line["name"] + "\n"), "") - - #tone down excessive spacing - output = output.replace("\n\n\n","\n\n") - - #add the offset of the final location - if include_last_address: - output += "; " + hex(offset) - - return (output, offset, last_hl_address, last_a_address, used_3d97) + if offset in data_tables.keys(): + output = output.replace('$%x' % (get_local_address(offset)), data_label(offset)) + output += data_label(offset) + '\n' + is_data = True + keep_reading = True -def has_outstanding_labels(byte_labels): - """ - Check whether a label is used once in the asm output. + first_loop = False - If so, then that means it has to be called or specified later. - """ - for label_line in byte_labels.keys(): - real_line = byte_labels[label_line] - if real_line["definition"] == False: return True - return False + #clean up unused labels + for label_line in byte_labels.keys(): + address = label_line + label_line = byte_labels[label_line] + if label_line["usage"] == 0: + output = output.replace((label_line["name"] + "\n"), "") -def all_outstanding_labels_are_reverse(byte_labels, offset): - for label_id in byte_labels.keys(): - line = byte_labels[label_id] # label_id is also the address - if line["definition"] == False: - if not label_id < offset: return False - return True + #tone down excessive spacing + output = output.replace("\n\n\n","\n\n") + #add the offset of the final location + if include_last_address: + output += "; " + hex(offset) + return (output, offset, last_hl_address, last_a_address, used_3d97) if __name__ == "__main__": - load_labels() + conf = config.Config() + disasm = Disassembler(conf) + disasm.initialize() + addr = sys.argv[1] if ":" in addr: addr = addr.split(":") addr = int(addr[0], 16)*0x4000+(int(addr[1], 16)%0x4000) else: - label_addr = find_address_from_label(addr) + label_addr = disasm.find_address_from_label(addr) if label_addr: addr = label_addr else: addr = int(addr, 16) - print output_bank_opcodes(addr)[0] + + output = disasm.output_bank_opcodes(addr)[0] + print output diff --git a/pokemontools/labels.py b/pokemontools/labels.py index ca411d1..2e50b89 100644 --- a/pokemontools/labels.py +++ b/pokemontools/labels.py @@ -3,7 +3,37 @@ Various label/line-related functions. """ +import os +import json + import pointers +import crystal + +class Labels(object): + """ + Store all labels. + """ + filename = "labels.json" + + def __init__(self, config): + """ + Setup the instance. + """ + self.config = config + self.path = os.path.join(self.config.path, Labels.filename) + + def initialize(self): + """ + Handle anything requiring file-loading and such. + """ + if not os.path.exists(self.path): + logging.info( + "Running crystal.scan_for_predefined_labels to create \"{0}\". Trying.." + .format(Labels.filename) + ) + crystal.scan_for_predefined_labels() + + self.labels = json.read(open(self.path, "r").read()) def remove_quoted_text(line): """get rid of content inside quotes diff --git a/pokemontools/preprocessor.py b/pokemontools/preprocessor.py index d9373ac..026da41 100644 --- a/pokemontools/preprocessor.py +++ b/pokemontools/preprocessor.py @@ -5,13 +5,8 @@ Basic preprocessor for both pokecrystal and pokered. import sys -from crystal import ( - DataByteWordMacro, -) - -default_macros = [ - DataByteWordMacro, -] +import exceptions +import crystal chars = { "ガ": 0x05, @@ -278,16 +273,6 @@ chars = { "9": 0xFF } -class PreprocessorException(Exception): - """ - There was a problem in the preprocessor. - """ - -class MacroException(PreprocessorException): - """ - There was a problem with a macro. - """ - def separate_comment(l): """ Separates asm and comments on a single line. @@ -299,7 +284,10 @@ def separate_comment(l): break if l[i] == "\"": in_quotes = not in_quotes - return l[:i], l[i:] or None + return (l[:i], l[i:]) or None + +def make_macro_table(macros): + return dict(((macro.macro_name, macro) for macro in macros)) def quote_translator(asm): """ @@ -387,38 +375,7 @@ def quote_translator(asm): return output -def extract_token(asm): - return asm.split(" ")[0].strip() - -def make_macro_table(macros): - return dict(((macro.macro_name, macro) for macro in macros)) - -def macro_test(asm, macro_table): - """ - Returns a matching macro, or None/False. - """ - # macros are determined by the first symbol on the line - token = extract_token(asm) - - # skip db and dw since rgbasm handles those and they aren't macros - if token is not None and token not in ["db", "dw"] and token in macro_table: - return (macro_table[token], token) - else: - return (None, None) - -def is_based_on(something, base): - """ - Checks whether or not 'something' is a class that is a subclass of a class - by name. This is a terrible hack but it removes a direct dependency on - existing macros. - - Used by macro_translator. - """ - options = [str(klass.__name__) for klass in something.__bases__] - options += [something.__name__] - return (base in options) - -def check_macro_sanity(params, macro, original_line): +def check_macro_sanity(self, params, macro, original_line): """ Checks whether or not the correct number of arguments are being passed to a certain macro. There are a number of possibilities based on the types of @@ -441,12 +398,12 @@ def check_macro_sanity(params, macro, original_line): elif param_klass.size == 3: allowed_length += 2 # bank and label else: - raise MacroException( + raise exceptions.MacroException( "dunno what to do with a macro param with a size > 3 (size={size})" .format(size=param_klass.size) ) else: - raise MacroException( + raise exceptions.MacroException( "dunno what to do with this non db/dw macro param: {klass} in line {line}" .format(klass=param_klass, line=original_line) ) @@ -461,7 +418,7 @@ def check_macro_sanity(params, macro, original_line): params_len = len(params) if params_len not in allowed_lengths: - raise PreprocessorException( + raise exceptions.PreprocessorException( "mismatched number of parameters ({count}, instead of any of {allowed}) on this line: {line}" .format( count=params_len, @@ -472,170 +429,223 @@ def check_macro_sanity(params, macro, original_line): return True -def macro_translator(macro, token, line, show_original_lines=False, do_macro_sanity_check=False): - """ - Converts a line with a macro into a rgbasm-compatible line. +def extract_token(asm): + return asm.split(" ")[0].strip() - @param show_original_lines: show lines before preprocessing in stdout - @param do_macro_sanity_check: helpful for debugging macros +def is_based_on(something, base): """ - if macro.macro_name != token: - raise MacroException("macro/token mismatch") - - original_line = line - - # remove trailing newline - if line[-1] == "\n": - line = line[:-1] - else: - original_line += "\n" - - # remove first tab - has_tab = False - if line[0] == "\t": - has_tab = True - line = line[1:] - - # remove duplicate whitespace (also trailing) - line = " ".join(line.split()) - - params = [] - - # check if the line has params - if " " in line: - # split the line into separate parameters - params = line.replace(token, "").split(",") - - # check if there are no params (redundant) - if len(params) == 1 and params[0] == "": - raise MacroException("macro has no params?") - - # write out a comment showing the original line - if show_original_lines: - sys.stdout.write("; original_line: " + original_line) - - # rgbasm can handle "db" so no preprocessing is required, plus this wont be - # reached because of earlier checks in macro_test. - if macro.macro_name in ["db", "dw"]: - sys.stdout.write(original_line) - return - - # certain macros don't need an initial byte written - # do: all scripting macros - # don't: signpost, warp_def, person_event, xy_trigger - if not macro.override_byte_check: - sys.stdout.write("db ${0:02X}\n".format(macro.id)) - - # Does the number of parameters on this line match any allowed number of - # parameters that the macro expects? - if do_macro_sanity_check: - check_macro_sanity(params, macro, original_line) - - # used for storetext - correction = 0 - - output = "" - - index = 0 - while index < len(params): - param_type = macro.param_types[index - correction] - description = param_type["name"] - param_klass = param_type["class"] - byte_type = param_klass.byte_type # db or dw - size = param_klass.size - param = params[index].strip() - - # param_klass.to_asm() won't work here because it doesn't - # include db/dw. - - # some parameters are really multiple types of bytes - if (byte_type == "dw" and size != 2) or \ - (byte_type == "db" and size != 1): - - output += ("; " + description + "\n") - - if size == 3 and is_based_on(param_klass, "PointerLabelBeforeBank"): - # write the bank first - output += ("db " + param + "\n") - # write the pointer second - output += ("dw " + params[index+1].strip() + "\n") - index += 2 - correction += 1 - elif size == 3 and is_based_on(param_klass, "PointerLabelAfterBank"): - # write the pointer first - output += ("dw " + param + "\n") - # write the bank second - output += ("db " + params[index+1].strip() + "\n") - index += 2 - correction += 1 - elif size == 3 and "from_asm" in dir(param_klass): - output += ("db " + param_klass.from_asm(param) + "\n") - index += 1 - else: - raise MacroException( - "dunno what to do with this macro param ({klass}) in line: {line}" - .format( - klass=param_klass, - line=original_line, - ) - ) + Checks whether or not 'something' is a class that is a subclass of a class + by name. This is a terrible hack but it removes a direct dependency on + existing macros. - # or just print out the byte - else: - output += (byte_type + " " + param + " ; " + description + "\n") + Used by macro_translator. + """ + options = [str(klass.__name__) for klass in something.__bases__] + options += [something.__name__] + return (base in options) - index += 1 +class Preprocessor(object): + """ + A wrapper around the actual preprocessing step. Because rgbasm can't handle + many of these macros. + """ - sys.stdout.write(output) + default_macros = [ + crystal.DataByteWordMacro, + ] + + def __init__(self, config, macros=None): + """ + Setup the preprocessor. + """ + self.config = config + + if macros == None: + macros = Preprocessor.default_macros + + self.macros = macros + self.macro_table = make_macro_table(self.macros) + + def preprocess(self, lines=None): + """ + Run the preprocessor against stdin. + """ + if not lines: + # read each line from stdin + lines = (sys.stdin.readlines()) + elif not isinstance(lines, list): + # split up the input into individual lines + lines = lines.split("\n") + + for l in lines: + self.read_line(l) + + def read_line(self, l): + """ + Preprocesses a given line of asm. + """ + + if l in ["\n", ""] or l[0] == ";": + sys.stdout.write(l) + return # jump out early + + # strip comments from asm + asm, comment = separate_comment(l) + + # export all labels + if ':' in asm[:asm.find('"')] and "macro" not in asm.lower(): + sys.stdout.write('GLOBAL ' + asm.split(':')[0] + '\n') + + # expect preprocessed .asm files + if "INCLUDE" in asm: + asm = asm.replace('.asm','.tx') + sys.stdout.write(asm) -def read_line(l, macro_table): - """Preprocesses a given line of asm.""" + # ascii string macro preserves the bytes as ascii (skip the translator) + elif len(asm) > 6 and ("ascii " == asm[:6] or "\tascii " == asm[:7]): + asm = asm.replace("ascii", "db", 1) + sys.stdout.write(asm) - if l in ["\n", ""] or l[0] == ";": - sys.stdout.write(l) - return # jump out early + # convert text to bytes when a quote appears (not in a comment) + elif "\"" in asm: + sys.stdout.write(quote_translator(asm)) - # strip comments from asm - asm, comment = separate_comment(l) + # check against other preprocessor features + else: + macro, token = self.macro_test(asm) + if macro: + self.macro_translator(macro, token, asm) + else: + sys.stdout.write(asm) - # export all labels - if ':' in asm[:asm.find('"')] and "macro" not in asm.lower(): - sys.stdout.write('GLOBAL ' + asm.split(':')[0] + '\n') + if comment: + sys.stdout.write(comment) - # expect preprocessed .asm files - if "INCLUDE" in asm: - asm = asm.replace('.asm','.tx') - sys.stdout.write(asm) + def macro_translator(self, macro, token, line, show_original_lines=False, do_macro_sanity_check=False): + """ + Converts a line with a macro into a rgbasm-compatible line. - # ascii string macro preserves the bytes as ascii (skip the translator) - elif len(asm) > 6 and ("ascii " == asm[:6] or "\tascii " == asm[:7]): - asm = asm.replace("ascii", "db", 1) - sys.stdout.write(asm) + @param show_original_lines: show lines before preprocessing in stdout + @param do_macro_sanity_check: helpful for debugging macros + """ + if macro.macro_name != token: + raise exceptions.MacroException("macro/token mismatch") - # convert text to bytes when a quote appears (not in a comment) - elif "\"" in asm: - sys.stdout.write(quote_translator(asm)) + original_line = line - # check against other preprocessor features - else: - macro, token = macro_test(asm, macro_table) - if macro: - macro_translator(macro, token, asm) + # remove trailing newline + if line[-1] == "\n": + line = line[:-1] else: - sys.stdout.write(asm) + original_line += "\n" + + # remove first tab + has_tab = False + if line[0] == "\t": + has_tab = True + line = line[1:] + + # remove duplicate whitespace (also trailing) + line = " ".join(line.split()) + + params = [] + + # check if the line has params + if " " in line: + # split the line into separate parameters + params = line.replace(token, "").split(",") + + # check if there are no params (redundant) + if len(params) == 1 and params[0] == "": + raise exceptions.MacroException("macro has no params?") + + # write out a comment showing the original line + if show_original_lines: + sys.stdout.write("; original_line: " + original_line) + + # rgbasm can handle "db" so no preprocessing is required, plus this wont be + # reached because of earlier checks in macro_test. + if macro.macro_name in ["db", "dw"]: + sys.stdout.write(original_line) + return + + # certain macros don't need an initial byte written + # do: all scripting macros + # don't: signpost, warp_def, person_event, xy_trigger + if not macro.override_byte_check: + sys.stdout.write("db ${0:02X}\n".format(macro.id)) + + # Does the number of parameters on this line match any allowed number of + # parameters that the macro expects? + if do_macro_sanity_check: + self.check_macro_sanity(params, macro, original_line) + + # used for storetext + correction = 0 + + output = "" + + index = 0 + while index < len(params): + param_type = macro.param_types[index - correction] + description = param_type["name"] + param_klass = param_type["class"] + byte_type = param_klass.byte_type # db or dw + size = param_klass.size + param = params[index].strip() + + # param_klass.to_asm() won't work here because it doesn't + # include db/dw. + + # some parameters are really multiple types of bytes + if (byte_type == "dw" and size != 2) or \ + (byte_type == "db" and size != 1): + + output += ("; " + description + "\n") + + if size == 3 and is_based_on(param_klass, "PointerLabelBeforeBank"): + # write the bank first + output += ("db " + param + "\n") + # write the pointer second + output += ("dw " + params[index+1].strip() + "\n") + index += 2 + correction += 1 + elif size == 3 and is_based_on(param_klass, "PointerLabelAfterBank"): + # write the pointer first + output += ("dw " + param + "\n") + # write the bank second + output += ("db " + params[index+1].strip() + "\n") + index += 2 + correction += 1 + elif size == 3 and "from_asm" in dir(param_klass): + output += ("db " + param_klass.from_asm(param) + "\n") + index += 1 + else: + raise exceptions.MacroException( + "dunno what to do with this macro param ({klass}) in line: {line}" + .format( + klass=param_klass, + line=original_line, + ) + ) - if comment: - sys.stdout.write(comment) + # or just print out the byte + else: + output += (byte_type + " " + param + " ; " + description + "\n") + + index += 1 -def preprocess(macro_table, lines=None): - """Main entry point for the preprocessor.""" + sys.stdout.write(output) - if not lines: - # read each line from stdin - lines = (sys.stdin.readlines()) - elif not isinstance(lines, list): - # split up the input into individual lines - lines = lines.split("\n") + def macro_test(self, asm): + """ + Returns a matching macro, or None/False. + """ + # macros are determined by the first symbol on the line + token = extract_token(asm) - for l in lines: - read_line(l, macro_table) + # skip db and dw since rgbasm handles those and they aren't macros + if token is not None and token not in ["db", "dw"] and token in self.macro_table: + return (self.macro_table[token], token) + else: + return (None, None) diff --git a/pokemontools/wram.py b/pokemontools/wram.py index 5a5fa75..60001aa 100644 --- a/pokemontools/wram.py +++ b/pokemontools/wram.py @@ -4,7 +4,15 @@ RGBDS BSS section and constant parsing. """ import os -path = os.path.dirname(os.path.abspath(__file__)) + +def make_wram_labels(wram_sections): + wram_labels = {} + for section in wram_sections: + for label in section['labels']: + if label['address'] not in wram_labels.keys(): + wram_labels[label['address']] = [] + wram_labels[label['address']] += [label['label']] + return wram_labels def read_bss_sections(bss): sections = [] @@ -55,34 +63,6 @@ def read_bss_sections(bss): sections.append(section) return sections -def read_wram_sections(): - """ - Opens the wram file and calls read_bss_sections. - """ - wram_content = None - wram_file_path = os.path.join(os.path.dirname(path), 'wram.asm') - try: - wram_file_handler = open(wram_file_path, 'r') - except IOError as exception: - wram_content = [""] - else: - wram_content = wram_file_handler.readlines() - wram_sections = read_bss_sections(wram_content) - return wram_sections - -wram_sections = read_wram_sections() - -def make_wram_labels(wram_sections): - wram_labels = {} - for section in wram_sections: - for label in section['labels']: - if label['address'] not in wram_labels.keys(): - wram_labels[label['address']] = [] - wram_labels[label['address']] += [label['label']] - return wram_labels - -wram_labels = make_wram_labels(wram_sections) - def constants_to_dict(constants): return dict((eval(constant[constant.find('EQU')+3:constant.find(';')].replace('$','0x')), constant[:constant.find('EQU')].strip()) for constant in constants) @@ -95,31 +75,90 @@ def read_constants(filepath): """ Load lines from a file and call scrape_constants. """ - try: - file_handler = open(filepath, "r") - except IOError as exception: - lines = [""] - else: + lines = None + + with open(filepath, "r") as file_handler: lines = file_handler.readlines() + constants = scrape_constants(lines) return constants -def read_hram_constants(): - """ - Load constants from hram.asm. - """ - hram_path = os.path.join(os.path.dirname(path), 'hram.asm') - return read_constants(hram_path) - -# TODO: get rid of this global -hram_constants = read_hram_constants() - -def read_gbhw_constants(): +class WRAMProcessor(object): """ - Load constants from gbhw.asm. + RGBDS BSS section and constant parsing. """ - gbhw_path = os.path.join(os.path.dirname(path), 'gbhw.asm') - return read_constants(gbhw_path) -# TODO: get rid of this global -gbhw_constants = read_gbhw_constants() + def __init__(self, config): + """ + Setup for WRAM parsing. + """ + self.config = config + + self.paths = {} + self.paths["wram"] = os.path.join(self.config.path, "wram.asm") + self.paths["hram"] = os.path.join(self.config.path, "hram.asm") + self.paths["gbhw"] = os.path.join(self.config.path, "gbhw.asm") + + def initialize(self): + """ + Read constants. + """ + self.setup_wram_sections() + self.setup_wram_labels() + self.setup_hram_constants() + self.setup_gbhw_constants() + + def read_wram_sections(self): + """ + Opens the wram file and calls read_bss_sections. + """ + wram_content = None + wram_file_path = self.paths["wram"] + + with open(wram_file_path, "r") as wram: + wram_content = wram.readlines() + + wram_sections = read_bss_sections(wram_content) + return wram_sections + + def setup_wram_sections(self): + """ + Call read_wram_sections and set a variable. + """ + self.wram_sections = self.read_wram_sections() + return self.wram_sections + + def setup_wram_labels(self): + """ + Make wram labels based on self.wram_sections as input. + """ + self.wram_labels = make_wram_labels(self.wram_sections) + return self.wram_labels + + def read_hram_constants(self): + """ + Read constants from hram.asm using read_constants. + """ + hram_constants = read_constants(self.paths["hram"]) + return hram_constants + + def setup_hram_constants(self): + """ + Call read_hram_constants and set a variable. + """ + self.hram_constants = self.read_hram_constants() + return self.hram_constants + + def read_gbhw_constants(self): + """ + Read constants from gbhw.asm using read_constants. + """ + gbhw_constants = read_constants(self.paths["gbhw"]) + return gbhw_constants + + def setup_gbhw_constants(self): + """ + Call read_gbhw_constants and set a variable. + """ + self.gbhw_constants = self.read_gbhw_constants() + return self.gbhw_constants |