From 849c0d132d9a9a9b2414d5914f41a83820be8b11 Mon Sep 17 00:00:00 2001 From: Bryan Bishop Date: Thu, 26 Apr 2012 16:58:20 -0500 Subject: rename textpre.py -> preprocessor.py --- preprocessor.py | 524 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 524 insertions(+) create mode 100644 preprocessor.py (limited to 'preprocessor.py') diff --git a/preprocessor.py b/preprocessor.py new file mode 100644 index 000000000..70750d437 --- /dev/null +++ b/preprocessor.py @@ -0,0 +1,524 @@ +#!/usr/bin/python +# -*- coding: utf-8 -*- + +import sys + +from extras.crystal import * + +macros = command_classes + \ + [Warp, XYTrigger, Signpost, PeopleEvent, DataByteWordMacro] + +chars = { +"ガ": 0x05, +"ギ": 0x06, +"グ": 0x07, +"ゲ": 0x08, +"ゴ": 0x09, +"ザ": 0x0A, +"ジ": 0x0B, +"ズ": 0x0C, +"ゼ": 0x0D, +"ゾ": 0x0E, +"ダ": 0x0F, +"ヂ": 0x10, +"ヅ": 0x11, +"デ": 0x12, +"ド": 0x13, +"バ": 0x19, +"ビ": 0x1A, +"ブ": 0x1B, +"ボ": 0x1C, +"が": 0x26, +"ぎ": 0x27, +"ぐ": 0x28, +"げ": 0x29, +"ご": 0x2A, +"ざ": 0x2B, +"じ": 0x2C, +"ず": 0x2D, +"ぜ": 0x2E, +"ぞ": 0x2F, +"だ": 0x30, +"ぢ": 0x31, +"づ": 0x32, +"で": 0x33, +"ど": 0x34, +"ば": 0x3A, +"び": 0x3B, +"ぶ": 0x3C, +"べ": 0x3D, +"ぼ": 0x3E, +"パ": 0x40, +"ピ": 0x41, +"プ": 0x42, +"ポ": 0x43, +"ぱ": 0x44, +"ぴ": 0x45, +"ぷ": 0x46, +"ぺ": 0x47, +"ぽ": 0x48, +"ア": 0x80, +"イ": 0x81, +"ウ": 0x82, +"エ": 0x83, +"ォ": 0x84, +"カ": 0x85, +"キ": 0x86, +"ク": 0x87, +"ケ": 0x88, +"コ": 0x89, +"サ": 0x8A, +"シ": 0x8B, +"ス": 0x8C, +"セ": 0x8D, +"ソ": 0x8E, +"タ": 0x8F, +"チ": 0x90, +"ツ": 0x91, +"テ": 0x92, +"ト": 0x93, +"ナ": 0x94, +"ニ": 0x95, +"ヌ": 0x96, +"ネ": 0x97, +"ノ": 0x98, +"ハ": 0x99, +"ヒ": 0x9A, +"フ": 0x9B, +"ホ": 0x9C, +"マ": 0x9D, +"ミ": 0x9E, +"ム": 0x9F, +"メ": 0xA0, +"モ": 0xA1, +"ヤ": 0xA2, +"ユ": 0xA3, +"ヨ": 0xA4, +"ラ": 0xA5, +"ル": 0xA6, +"レ": 0xA7, +"ロ": 0xA8, +"ワ": 0xA9, +"ヲ": 0xAA, +"ン": 0xAB, +"ッ": 0xAC, +"ャ": 0xAD, +"ュ": 0xAE, +"ョ": 0xAF, +"ィ": 0xB0, +"あ": 0xB1, +"い": 0xB2, +"う": 0xB3, +"え": 0xB4, +"お": 0xB5, +"か": 0xB6, +"き": 0xB7, +"く": 0xB8, +"け": 0xB9, +"こ": 0xBA, +"さ": 0xBB, +"し": 0xBC, +"す": 0xBD, +"せ": 0xBE, +"そ": 0xBF, +"た": 0xC0, +"ち": 0xC1, +"つ": 0xC2, +"て": 0xC3, +"と": 0xC4, +"な": 0xC5, +"に": 0xC6, +"ぬ": 0xC7, +"ね": 0xC8, +"の": 0xC9, +"は": 0xCA, +"ひ": 0xCB, +"ふ": 0xCC, +"へ": 0xCD, +"ほ": 0xCE, +"ま": 0xCF, +"み": 0xD0, +"む": 0xD1, +"め": 0xD2, +"も": 0xD3, +"や": 0xD4, +"ゆ": 0xD5, +"よ": 0xD6, +"ら": 0xD7, +"り": 0xD8, +"る": 0xD9, +"れ": 0xDA, +"ろ": 0xDB, +"わ": 0xDC, +"を": 0xDD, +"ん": 0xDE, +"っ": 0xDF, +"ゃ": 0xE0, +"ゅ": 0xE1, +"ょ": 0xE2, +"ー": 0xE3, + + +"@": 0x50, +"#": 0x54, +"…": 0x75, + +"┌": 0x79, +"─": 0x7A, +"┐": 0x7B, +"│": 0x7C, +"└": 0x7D, +"┘": 0x7E, + +"№": 0x74, + +" ": 0x7F, +"A": 0x80, +"B": 0x81, +"C": 0x82, +"D": 0x83, +"E": 0x84, +"F": 0x85, +"G": 0x86, +"H": 0x87, +"I": 0x88, +"J": 0x89, +"K": 0x8A, +"L": 0x8B, +"M": 0x8C, +"N": 0x8D, +"O": 0x8E, +"P": 0x8F, +"Q": 0x90, +"R": 0x91, +"S": 0x92, +"T": 0x93, +"U": 0x94, +"V": 0x95, +"W": 0x96, +"X": 0x97, +"Y": 0x98, +"Z": 0x99, +"(": 0x9A, +")": 0x9B, +":": 0x9C, +";": 0x9D, +"[": 0x9E, +"]": 0x9F, +"a": 0xA0, +"b": 0xA1, +"c": 0xA2, +"d": 0xA3, +"e": 0xA4, +"f": 0xA5, +"g": 0xA6, +"h": 0xA7, +"i": 0xA8, +"j": 0xA9, +"k": 0xAA, +"l": 0xAB, +"m": 0xAC, +"n": 0xAD, +"o": 0xAE, +"p": 0xAF, +"q": 0xB0, +"r": 0xB1, +"s": 0xB2, +"t": 0xB3, +"u": 0xB4, +"v": 0xB5, +"w": 0xB6, +"x": 0xB7, +"y": 0xB8, +"z": 0xB9, +"Ä": 0xC0, +"Ö": 0xC1, +"Ü": 0xC2, +"ä": 0xC3, +"ö": 0xC4, +"ü": 0xC5, +"'d": 0xD0, +"'l": 0xD1, +"'m": 0xD2, +"'r": 0xD3, +"'s": 0xD4, +"'t": 0xD5, +"'v": 0xD6, +"'": 0xE0, +"-": 0xE3, +"?": 0xE6, +"!": 0xE7, +".": 0xE8, +"&": 0xE9, +"é": 0xEA, +"→": 0xEB, +"♂": 0xEF, +"¥": 0xF0, +"×": 0xF1, +"/": 0xF3, +",": 0xF4, +"♀": 0xF5, +"0": 0xF6, +"1": 0xF7, +"2": 0xF8, +"3": 0xF9, +"4": 0xFA, +"5": 0xFB, +"6": 0xFC, +"7": 0xFD, +"8": 0xFE, +"9": 0xFF +} + +def separate_comment(l): + """ Separates asm and comments on a single line. + """ + + asm = "" + comment = None + in_quotes = False + in_comment = False + + # token either belongs to the line or to the comment + for token in l: + if in_comment: + comment += token + elif in_quotes and token != "\"": + asm += token + elif in_quotes and token == "\"": + in_quotes = False + asm += token + elif not in_quotes and token == "\"": + in_quotes = True + asm += token + elif not in_quotes and token != "\"": + if token == ";": + in_comment = True + comment = ";" + else: + asm += token + return asm, comment + +def quote_translator(asm): + """ Writes asm with quoted text translated into bytes. + """ + + # split by quotes + asms = asm.split("\"") + + # skip asm that actually does use ASCII in quotes + lowasm = asms[0].lower() + if "section" in lowasm \ + or "include" in lowasm \ + or "incbin" in lowasm: + sys.stdout.write(asm) + return + + even = False + i = 0 + for token in asms: + i = i + 1 + + if even: + # token is a string to convert to byte values + while len(token): + # read a single UTF-8 codepoint + char = token[0] + if ord(char) >= 0xFC: + char = char + token[1:6] + token = token[6:] + elif ord(char) >= 0xF8: + char = char + token[1:5] + token = token[5:] + elif ord(char) >= 0xF0: + char = char + token[1:4] + token = token[4:] + elif ord(char) >= 0xE0: + char = char + token[1:3] + token = token[3:] + elif ord(char) >= 0xC0: + char = char + token[1:2] + token = token[2:] + else: + token = token[1:] + + # certain apostrophe-letter pairs are only a single byte + if char == "'" and \ + (token[0] == "d" or \ + token[0] == "l" or \ + token[0] == "m" or \ + token[0] == "r" or \ + token[0] == "s" or \ + token[0] == "t" or \ + token[0] == "v"): + char = char + token[0] + token = token[1:] + + sys.stdout.write("${0:02X}".format(chars[char])) + + if len(token): + sys.stdout.write(", ") + # if not even + else: + sys.stdout.write(token) + + even = not even + return + +def extract_token(asm): + token = asm.split(" ")[0].replace("\t", "").replace("\n", "") + return token + +def macro_test(asm): + """ Returns a matching macro, or None/False. + """ + + # macros are determined by the first symbol on the line + token = extract_token(asm) + + # check against all names + for macro in macros: + if macro.macro_name == token: + return macro, token + + return None, None + +def macro_translator(macro, token, line): + """ Converts a line with a macro into a rgbasm-compatible line. + """ + assert macro.macro_name == token, "macro/token mismatch" + + original_line = line + + # remove trailing newline + if line[-1] == "\n": + line = line[:-1] + else: + original_line += "\n" + + # remove first tab + has_tab = False + if line[0] == "\t": + has_tab = True + line = line[1:] + + # remove duplicate whitespace (also trailing) + line = " ".join(line.split()) + + params = [] + + # check if the line has params + if " " in line: + # split the line into separate parameters + params = line.replace(token, "").split(",") + + # check if there are no params (redundant) + if len(params) == 1 and params[0] == "": + raise Exception, "macro has no params?" + + # write out a comment showing the original line + sys.stdout.write("; original_line: " + original_line) + + # certain macros don't need an initial byte written + # do: all scripting macros + # don't: signpost, warp_def, person_event, xy_trigger + if not macro.override_byte_check: + sys.stdout.write("db $%.2x\n" % (macro.id)) + + # --- long-winded sanity check goes here --- + + # sanity check... this won't work because PointerLabelBeforeBank shows + # up as two params, so these two lengths will always be different. + #assert len(params) == len(macro.param_types), \ + # "mismatched number of parameters on this line: " + \ + # original_line + + # v2 sanity check :) although it sorta sucks that this loop happens twice? + allowed_length = 0 + for (index, param_type) in macro.param_types.items(): + param_klass = param_type["class"] + + if param_klass.byte_type == "db": + allowed_length += 1 # just one value + elif param_klass.byte_type == "dw": + if param_klass.size == 2: + allowed_length += 1 # just label + elif param_klass.size == 3: + allowed_length += 2 # bank and label + else: + raise Exception, "dunno what to do with a macro param with a size > 3" + else: + raise Exception, "dunno what to do with this non db/dw macro param: " + \ + str(param_klass) + " in line: " + original_line + + assert len(params) == allowed_length, \ + "mismatched number of parameters on this line: " + \ + original_line + + # --- end of ridiculously long sanity check --- + + index = 0 + while index < len(macro.param_types): + param_type = macro.param_types[index] + description = param_type["name"] + param_klass = param_type["class"] + byte_type = param_klass.byte_type # db or dw + size = param_klass.size + param = params[index].strip() + + # param_klass.to_asm() won't work here because it doesn't + # include db/dw. + + # some parameters are really multiple types of bytes + if (byte_type == "dw" and size != 2) or \ + (byte_type == "db" and size != 1): + + sys.stdout.write("; " + description + "\n") + + if size == 3 and issubclass(param_klass, PointerLabelBeforeBank): + # write the bank first + sys.stdout.write("db " + params[index] + "\n") + # write the pointer second + sys.stdout.write("dw " + params[index+1] + "\n") + index += 2 + elif size == 3 and issubclass(param_klass, PointerLabelAfterBank): + # write the pointer first + sys.stdout.write("dw " + params[index] + "\n") + # write the bank second + sys.stdout.write("db " + params[index+1] + "\n") + index += 2 + else: + raise Exception, "dunno what to do with this macro " + \ + "param (" + str(param_klass) + ") " + "on this line: " + \ + original_line + + # or just print out the byte + else: + sys.stdout.write(byte_type + " " + param + " ; " + description + "\n") + + index += 1 + +for l in sys.stdin: + # strip and store any comment on this line + if ";" in l: + asm, comment = separate_comment(l) + else: + asm = l + comment = None + + # convert text to bytes when a quote appears (not in a comment) + if "\"" in asm: + quote_translator(asm) + + # check against other preprocessor features + else: + macro, token = macro_test(asm) + + if macro: + macro_translator(macro, token, asm) + else: + sys.stdout.write(asm) + + # show line comment + if comment != None: + sys.stdout.write(comment) -- cgit v1.2.3