summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorBryan Bishop <kanzure@gmail.com>2013-08-31 10:55:49 -0700
committerBryan Bishop <kanzure@gmail.com>2013-08-31 10:55:49 -0700
commit8c6328543a4ac2c6e7ba805e3c536589deca9d6d (patch)
treedc8aa0a8e6e1ef794986c68b27c3d500c3a7c8f8
parent4209ad8637c33e846e2e1935061590707b820a1c (diff)
parent4998994c9a624f6298f8a54a665c788825c3e8f9 (diff)
Merge pull request #1 from kanzure/preprocessor
Also have the common bits of the preprocessor.
-rw-r--r--pokemontools/__init__.py1
-rw-r--r--pokemontools/preprocessor.py641
2 files changed, 642 insertions, 0 deletions
diff --git a/pokemontools/__init__.py b/pokemontools/__init__.py
index 833709b..8fb8b19 100644
--- a/pokemontools/__init__.py
+++ b/pokemontools/__init__.py
@@ -1 +1,2 @@
import crystal
+import preprocessor
diff --git a/pokemontools/preprocessor.py b/pokemontools/preprocessor.py
new file mode 100644
index 0000000..87faefd
--- /dev/null
+++ b/pokemontools/preprocessor.py
@@ -0,0 +1,641 @@
+# -*- coding: utf-8 -*-
+"""
+Basic preprocessor for both pokecrystal and pokered.
+"""
+
+import sys
+
+from crystal import (
+ DataByteWordMacro,
+)
+
+default_macros = [
+ DataByteWordMacro,
+]
+
+chars = {
+"ガ": 0x05,
+"ギ": 0x06,
+"グ": 0x07,
+"ゲ": 0x08,
+"ゴ": 0x09,
+"ザ": 0x0A,
+"ジ": 0x0B,
+"ズ": 0x0C,
+"ゼ": 0x0D,
+"ゾ": 0x0E,
+"ダ": 0x0F,
+"ヂ": 0x10,
+"ヅ": 0x11,
+"デ": 0x12,
+"ド": 0x13,
+"バ": 0x19,
+"ビ": 0x1A,
+"ブ": 0x1B,
+"ボ": 0x1C,
+"が": 0x26,
+"ぎ": 0x27,
+"ぐ": 0x28,
+"げ": 0x29,
+"ご": 0x2A,
+"ざ": 0x2B,
+"じ": 0x2C,
+"ず": 0x2D,
+"ぜ": 0x2E,
+"ぞ": 0x2F,
+"だ": 0x30,
+"ぢ": 0x31,
+"づ": 0x32,
+"で": 0x33,
+"ど": 0x34,
+"ば": 0x3A,
+"び": 0x3B,
+"ぶ": 0x3C,
+"べ": 0x3D,
+"ぼ": 0x3E,
+"パ": 0x40,
+"ピ": 0x41,
+"プ": 0x42,
+"ポ": 0x43,
+"ぱ": 0x44,
+"ぴ": 0x45,
+"ぷ": 0x46,
+"ぺ": 0x47,
+"ぽ": 0x48,
+"ア": 0x80,
+"イ": 0x81,
+"ウ": 0x82,
+"エ": 0x83,
+"ォ": 0x84,
+"カ": 0x85,
+"キ": 0x86,
+"ク": 0x87,
+"ケ": 0x88,
+"コ": 0x89,
+"サ": 0x8A,
+"シ": 0x8B,
+"ス": 0x8C,
+"セ": 0x8D,
+"ソ": 0x8E,
+"タ": 0x8F,
+"チ": 0x90,
+"ツ": 0x91,
+"テ": 0x92,
+"ト": 0x93,
+"ナ": 0x94,
+"ニ": 0x95,
+"ヌ": 0x96,
+"ネ": 0x97,
+"ノ": 0x98,
+"ハ": 0x99,
+"ヒ": 0x9A,
+"フ": 0x9B,
+"ホ": 0x9C,
+"マ": 0x9D,
+"ミ": 0x9E,
+"ム": 0x9F,
+"メ": 0xA0,
+"モ": 0xA1,
+"ヤ": 0xA2,
+"ユ": 0xA3,
+"ヨ": 0xA4,
+"ラ": 0xA5,
+"ル": 0xA6,
+"レ": 0xA7,
+"ロ": 0xA8,
+"ワ": 0xA9,
+"ヲ": 0xAA,
+"ン": 0xAB,
+"ッ": 0xAC,
+"ャ": 0xAD,
+"ュ": 0xAE,
+"ョ": 0xAF,
+"ィ": 0xB0,
+"あ": 0xB1,
+"い": 0xB2,
+"う": 0xB3,
+"え": 0xB4,
+"お": 0xB5,
+"か": 0xB6,
+"き": 0xB7,
+"く": 0xB8,
+"け": 0xB9,
+"こ": 0xBA,
+"さ": 0xBB,
+"し": 0xBC,
+"す": 0xBD,
+"せ": 0xBE,
+"そ": 0xBF,
+"た": 0xC0,
+"ち": 0xC1,
+"つ": 0xC2,
+"て": 0xC3,
+"と": 0xC4,
+"な": 0xC5,
+"に": 0xC6,
+"ぬ": 0xC7,
+"ね": 0xC8,
+"の": 0xC9,
+"は": 0xCA,
+"ひ": 0xCB,
+"ふ": 0xCC,
+"へ": 0xCD,
+"ほ": 0xCE,
+"ま": 0xCF,
+"み": 0xD0,
+"む": 0xD1,
+"め": 0xD2,
+"も": 0xD3,
+"や": 0xD4,
+"ゆ": 0xD5,
+"よ": 0xD6,
+"ら": 0xD7,
+"り": 0xD8,
+"る": 0xD9,
+"れ": 0xDA,
+"ろ": 0xDB,
+"わ": 0xDC,
+"を": 0xDD,
+"ん": 0xDE,
+"っ": 0xDF,
+"ゃ": 0xE0,
+"ゅ": 0xE1,
+"ょ": 0xE2,
+"ー": 0xE3,
+"ァ": 0xE9,
+
+"@": 0x50,
+"#": 0x54,
+"…": 0x75,
+
+"┌": 0x79,
+"─": 0x7A,
+"┐": 0x7B,
+"│": 0x7C,
+"└": 0x7D,
+"┘": 0x7E,
+
+"№": 0x74,
+
+" ": 0x7F,
+"A": 0x80,
+"B": 0x81,
+"C": 0x82,
+"D": 0x83,
+"E": 0x84,
+"F": 0x85,
+"G": 0x86,
+"H": 0x87,
+"I": 0x88,
+"J": 0x89,
+"K": 0x8A,
+"L": 0x8B,
+"M": 0x8C,
+"N": 0x8D,
+"O": 0x8E,
+"P": 0x8F,
+"Q": 0x90,
+"R": 0x91,
+"S": 0x92,
+"T": 0x93,
+"U": 0x94,
+"V": 0x95,
+"W": 0x96,
+"X": 0x97,
+"Y": 0x98,
+"Z": 0x99,
+"(": 0x9A,
+")": 0x9B,
+":": 0x9C,
+";": 0x9D,
+"[": 0x9E,
+"]": 0x9F,
+"a": 0xA0,
+"b": 0xA1,
+"c": 0xA2,
+"d": 0xA3,
+"e": 0xA4,
+"f": 0xA5,
+"g": 0xA6,
+"h": 0xA7,
+"i": 0xA8,
+"j": 0xA9,
+"k": 0xAA,
+"l": 0xAB,
+"m": 0xAC,
+"n": 0xAD,
+"o": 0xAE,
+"p": 0xAF,
+"q": 0xB0,
+"r": 0xB1,
+"s": 0xB2,
+"t": 0xB3,
+"u": 0xB4,
+"v": 0xB5,
+"w": 0xB6,
+"x": 0xB7,
+"y": 0xB8,
+"z": 0xB9,
+"Ä": 0xC0,
+"Ö": 0xC1,
+"Ü": 0xC2,
+"ä": 0xC3,
+"ö": 0xC4,
+"ü": 0xC5,
+"'d": 0xD0,
+"'l": 0xD1,
+"'m": 0xD2,
+"'r": 0xD3,
+"'s": 0xD4,
+"'t": 0xD5,
+"'v": 0xD6,
+"'": 0xE0,
+"-": 0xE3,
+"?": 0xE6,
+"!": 0xE7,
+".": 0xE8,
+"&": 0xE9,
+"é": 0xEA,
+"→": 0xEB,
+"▷": 0xEC,
+"▶": 0xED,
+"▼": 0xEE,
+"♂": 0xEF,
+"¥": 0xF0,
+"×": 0xF1,
+"/": 0xF3,
+",": 0xF4,
+"♀": 0xF5,
+"0": 0xF6,
+"1": 0xF7,
+"2": 0xF8,
+"3": 0xF9,
+"4": 0xFA,
+"5": 0xFB,
+"6": 0xFC,
+"7": 0xFD,
+"8": 0xFE,
+"9": 0xFF
+}
+
+class PreprocessorException(Exception):
+ """
+ There was a problem in the preprocessor.
+ """
+
+class MacroException(PreprocessorException):
+ """
+ There was a problem with a macro.
+ """
+
+def separate_comment(l):
+ """
+ Separates asm and comments on a single line.
+ """
+ in_quotes = False
+ for i in xrange(len(l)):
+ if not in_quotes:
+ if l[i] == ";":
+ break
+ if l[i] == "\"":
+ in_quotes = not in_quotes
+ return l[:i], l[i:] or None
+
+def quote_translator(asm):
+ """
+ Writes asm with quoted text translated into bytes.
+ """
+
+ # split by quotes
+ asms = asm.split('"')
+
+ # skip asm that actually does use ASCII in quotes
+ if "SECTION" in asms[0]\
+ or "INCBIN" in asms[0]\
+ or "INCLUDE" in asms[0]:
+ return asm
+
+ print_macro = False
+ if asms[0].strip() == 'print':
+ asms[0] = asms[0].replace('print','db 0,')
+ print_macro = True
+
+ output = ''
+ even = False
+ for token in asms:
+ if even:
+ characters = []
+ # token is a string to convert to byte values
+ while len(token):
+ # read a single UTF-8 codepoint
+ char = token[0]
+ if ord(char) < 0xc0:
+ token = token[1:]
+ # certain apostrophe-letter pairs are considered a single character
+ if char == "'" and token:
+ if token[0] in 'dlmrstv':
+ char += token[0]
+ token = token[1:]
+ elif ord(char) < 0xe0:
+ char = char + token[1:2]
+ token = token[2:]
+ elif ord(char) < 0xf0:
+ char = char + token[1:3]
+ token = token[3:]
+ elif ord(char) < 0xf8:
+ char = char + token[1:4]
+ token = token[4:]
+ elif ord(char) < 0xfc:
+ char = char + token[1:5]
+ token = token[5:]
+ else:
+ char = char + token[1:6]
+ token = token[6:]
+ characters += [char]
+
+ if print_macro:
+ line = 0
+ while len(characters):
+ last_char = 1
+ if len(characters) > 18 and characters[-1] != '@':
+ for i, char in enumerate(characters):
+ last_char = i + 1
+ if ' ' not in characters[i+1:18]: break
+ output += ", ".join("${0:02X}".format(chars[char]) for char in characters[:last_char-1])
+ if characters[last_char-1] != " ":
+ output += ", ${0:02X}".format(characters[last_char-1])
+ if not line & 1:
+ line_ending = 0x4f
+ else:
+ line_ending = 0x51
+ output += ", ${0:02X}".format(line_ending)
+ line += 1
+ else:
+ output += ", ".join(["${0:02X}".format(chars[char]) for char in characters[:last_char]])
+ characters = characters[last_char:]
+ if len(characters): output += ", "
+ # end text
+ line_ending = 0x57
+ output += ", ${0:02X}".format(line_ending)
+
+ output += ", ".join(["${0:02X}".format(chars[char]) for char in characters])
+
+ else:
+ output += token
+
+ even = not even
+
+ return output
+
+def extract_token(asm):
+ return asm.split(" ")[0].strip()
+
+def make_macro_table(macros):
+ return dict(((macro.macro_name, macro) for macro in macros))
+
+def macro_test(asm, macro_table):
+ """
+ Returns a matching macro, or None/False.
+ """
+ # macros are determined by the first symbol on the line
+ token = extract_token(asm)
+
+ # skip db and dw since rgbasm handles those and they aren't macros
+ if token is not None and token not in ["db", "dw"] and token in macro_table:
+ return (macro_table[token], token)
+ else:
+ return (None, None)
+
+def is_based_on(something, base):
+ """
+ Checks whether or not 'something' is a class that is a subclass of a class
+ by name. This is a terrible hack but it removes a direct dependency on
+ existing macros.
+
+ Used by macro_translator.
+ """
+ options = [str(klass.__name__) for klass in something.__bases__]
+ options += [something.__name__]
+ return (base in options)
+
+def check_macro_sanity(params, macro, original_line):
+ """
+ Checks whether or not the correct number of arguments are being passed to a
+ certain macro. There are a number of possibilities based on the types of
+ parameters that define the macro.
+
+ @param params: a list of parameters given to the macro
+ @param macro: macro klass
+ @param original_line: the line being preprocessed
+ """
+ allowed_length = 0
+
+ for (index, param_type) in macro.param_types.items():
+ param_klass = param_type["class"]
+
+ if param_klass.byte_type == "db":
+ allowed_length += 1 # just one value
+ elif param_klass.byte_type == "dw":
+ if param_klass.size == 2:
+ allowed_length += 1 # just label
+ elif param_klass.size == 3:
+ allowed_length += 2 # bank and label
+ else:
+ raise MacroException(
+ "dunno what to do with a macro param with a size > 3 (size={size})"
+ .format(size=param_klass.size)
+ )
+ else:
+ raise MacroException(
+ "dunno what to do with this non db/dw macro param: {klass} in line {line}"
+ .format(klass=param_klass, line=original_line)
+ )
+
+ # sometimes the allowed length can vary
+ if hasattr(macro, "allowed_lengths"):
+ allowed_lengths = macro.allowed_lengths + [allowed_length]
+ else:
+ allowed_lengths = [allowed_length]
+
+ # used twice, so precompute once
+ params_len = len(params)
+
+ if params_len not in allowed_lengths:
+ raise PreprocessorException(
+ "mismatched number of parameters ({count}, instead of any of {allowed}) on this line: {line}"
+ .format(
+ count=params_len,
+ allowed=allowed_lengths,
+ line=original_line,
+ )
+ )
+
+ return True
+
+def macro_translator(macro, token, line, show_original_lines=False, do_macro_sanity_check=False):
+ """
+ Converts a line with a macro into a rgbasm-compatible line.
+
+ @param show_original_lines: show lines before preprocessing in stdout
+ @param do_macro_sanity_check: helpful for debugging macros
+ """
+ if macro.macro_name != token:
+ raise MacroException("macro/token mismatch")
+
+ original_line = line
+
+ # remove trailing newline
+ if line[-1] == "\n":
+ line = line[:-1]
+ else:
+ original_line += "\n"
+
+ # remove first tab
+ has_tab = False
+ if line[0] == "\t":
+ has_tab = True
+ line = line[1:]
+
+ # remove duplicate whitespace (also trailing)
+ line = " ".join(line.split())
+
+ params = []
+
+ # check if the line has params
+ if " " in line:
+ # split the line into separate parameters
+ params = line.replace(token, "").split(",")
+
+ # check if there are no params (redundant)
+ if len(params) == 1 and params[0] == "":
+ raise MacroException("macro has no params?")
+
+ # write out a comment showing the original line
+ if show_original_lines:
+ sys.stdout.write("; original_line: " + original_line)
+
+ # rgbasm can handle "db" so no preprocessing is required, plus this wont be
+ # reached because of earlier checks in macro_test.
+ if macro.macro_name in ["db", "dw"]:
+ sys.stdout.write(original_line)
+ return
+
+ # certain macros don't need an initial byte written
+ # do: all scripting macros
+ # don't: signpost, warp_def, person_event, xy_trigger
+ if not macro.override_byte_check:
+ sys.stdout.write("db ${0:02X}\n".format(macro.id))
+
+ # Does the number of parameters on this line match any allowed number of
+ # parameters that the macro expects?
+ if do_macro_sanity_check:
+ check_macro_sanity(params, macro, original_line)
+
+ # used for storetext
+ correction = 0
+
+ output = ""
+
+ index = 0
+ while index < len(params):
+ param_type = macro.param_types[index - correction]
+ description = param_type["name"]
+ param_klass = param_type["class"]
+ byte_type = param_klass.byte_type # db or dw
+ size = param_klass.size
+ param = params[index].strip()
+
+ # param_klass.to_asm() won't work here because it doesn't
+ # include db/dw.
+
+ # some parameters are really multiple types of bytes
+ if (byte_type == "dw" and size != 2) or \
+ (byte_type == "db" and size != 1):
+
+ output += ("; " + description + "\n")
+
+ if size == 3 and is_based_on(param_klass, "PointerLabelBeforeBank"):
+ # write the bank first
+ output += ("db " + param + "\n")
+ # write the pointer second
+ output += ("dw " + params[index+1].strip() + "\n")
+ index += 2
+ correction += 1
+ elif size == 3 and is_based_on(param_klass, "PointerLabelAfterBank"):
+ # write the pointer first
+ output += ("dw " + param + "\n")
+ # write the bank second
+ output += ("db " + params[index+1].strip() + "\n")
+ index += 2
+ correction += 1
+ elif size == 3 and "from_asm" in dir(param_klass):
+ output += ("db " + param_klass.from_asm(param) + "\n")
+ index += 1
+ else:
+ raise MacroException(
+ "dunno what to do with this macro param ({klass}) in line: {line}"
+ .format(
+ klass=param_klass,
+ line=original_line,
+ )
+ )
+
+ # or just print out the byte
+ else:
+ output += (byte_type + " " + param + " ; " + description + "\n")
+
+ index += 1
+
+ sys.stdout.write(output)
+
+def read_line(l, macro_table):
+ """Preprocesses a given line of asm."""
+
+ if l in ["\n", ""] or l[0] == ";":
+ sys.stdout.write(l)
+ return # jump out early
+
+ # strip comments from asm
+ asm, comment = separate_comment(l)
+
+ # export all labels
+ if ':' in asm[:asm.find('"')]:
+ sys.stdout.write('GLOBAL ' + asm.split(':')[0] + '\n')
+
+ # expect preprocessed .asm files
+ if "INCLUDE" in asm:
+ asm = asm.replace('.asm','.tx')
+ sys.stdout.write(asm)
+
+ # ascii string macro preserves the bytes as ascii (skip the translator)
+ elif len(asm) > 6 and ("ascii " == asm[:6] or "\tascii " == asm[:7]):
+ asm = asm.replace("ascii", "db", 1)
+ sys.stdout.write(asm)
+
+ # convert text to bytes when a quote appears (not in a comment)
+ elif "\"" in asm:
+ sys.stdout.write(quote_translator(asm))
+
+ # check against other preprocessor features
+ else:
+ macro, token = macro_test(asm, macro_table)
+ if macro:
+ macro_translator(macro, token, asm)
+ else:
+ sys.stdout.write(asm)
+
+ if comment:
+ sys.stdout.write(comment)
+
+def preprocess(macro_table, lines=None):
+ """Main entry point for the preprocessor."""
+
+ if not lines:
+ # read each line from stdin
+ lines = (sys.stdin.readlines())
+ elif not isinstance(lines, list):
+ # split up the input into individual lines
+ lines = lines.split("\n")
+
+ for l in lines:
+ read_line(l, macro_table)