1 files changed, 171 insertions, 0 deletions
diff --git a/pokemontools/labels.py b/pokemontools/labels.py
new file mode 100644
index 0000000..d553bdc
--- /dev/null
+++ b/pokemontools/labels.py
@@ -0,0 +1,171 @@
+# -*- coding: utf-8 -*-
+"""
+Various label/line-related functions.
+"""
+
+from pointers import (
+    calculate_pointer,
+    calculate_bank,
+)
+
+def remove_quoted_text(line):
+    """get rid of content inside quotes
+    and also removes the quotes from the input string"""
+    while line.count("\"") % 2 == 0 and line.count("\"") > 0:
+        first = line.find("\"")
+        second = line.find("\"", first+1)
+        line = line[0:first] + line[second+1:]
+    while line.count("\'") % 2 == 0 and line.count("'") > 0:
+        first = line.find("\'")
+        second = line.find("\'", first+1)
+        line = line[0:first] + line[second+1:]
+    return line
+
+def line_has_comment_address(line, returnable={}, bank=None):
+    """checks that a given line has a comment
+    with a valid address, and returns the address in the object.
+    Note: bank is required if you have a 4-letter-or-less address,
+    because otherwise there is no way to figure out which bank
+    is curretly being scanned."""
+    #first set the bank/offset to nada
+    returnable["bank"] = None
+    returnable["offset"] = None
+    returnable["address"] = None
+    #only valid characters are 0-9a-fA-F
+    valid = [str(x) for x in range(10)] + \
+            [chr(x) for x in range(ord('a'), ord('f')+1)] + \
+            [chr(x) for x in range(ord('A'), ord('F')+1)]
+    #check if there is a comment in this line
+    if ";" not in line:
+        return False
+    #first throw away anything in quotes
+    if (line.count("\"") % 2 == 0 and line.count("\"")!=0) \
+       or (line.count("\'") % 2 == 0 and line.count("\'")!=0):
+        line = remove_quoted_text(line)
+    #check if there is still a comment in this line after quotes removed
+    if ";" not in line:
+        return False
+    #but even if there's a semicolon there must be later text
+    if line[-1] == ";":
+        return False
+    #and just a space doesn't count
+    if line[-2:] == "; ":
+        return False
+    #and multiple whitespace doesn't count either
+    line = line.rstrip(" ").lstrip(" ")
+    if line[-1] == ";":
+        return False
+    #there must be more content after the semicolon
+    if len(line)-1 == line.find(";"):
+        return False
+    #split it up into the main comment part
+    comment = line[line.find(";")+1:]
+    #don't want no leading whitespace
+    comment = comment.lstrip(" ").rstrip(" ")
+    #split up multi-token comments into single tokens
+    token = comment
+    if " " in comment:
+        #use the first token in the comment
+        token = comment.split(" ")[0]
+    if token in ["0x", "$", "x", ":"]:
+        return False
+    offset = None
+    #process a token with a A:B format
+    if ":" in token: #3:3F0A, $3:$3F0A, 0x3:0x3F0A, 3:3F0A
+        #split up the token
+        bank_piece = token.split(":")[0].lower()
+        offset_piece = token.split(":")[1].lower()
+        #filter out blanks/duds
+        if bank_piece in ["$", "0x", "x"] \
+        or offset_piece in ["$", "0x", "x"]:
+            return False
+        #they can't have both "$" and "x"
+        if "$" in bank_piece and "x" in bank_piece:
+            return False
+        if "$" in offset_piece and "x" in offset_piece:
+            return False
+        #process the bank piece
+        if "$" in bank_piece:
+            bank_piece = bank_piece.replace("$", "0x")
+        #check characters for validity?
+        for c in bank_piece.replace("x", ""):
+            if c not in valid:
+                return False
+        bank = int(bank_piece, 16)
+        #process the offset piece
+        if "$" in offset_piece:
+            offset_piece = offset_piece.replace("$", "0x")
+        #check characters for validity?
+        for c in offset_piece.replace("x", ""):
+            if c not in valid:
+                return False
+        if len(offset_piece) == 0:
+            return None
+        offset = int(offset_piece, 16)
+    #filter out blanks/duds
+    elif token in ["$", "0x", "x"]:
+        return False
+    #can't have both "$" and "x" in the number
+    elif "$" in token and "x" in token:
+        return False
+    elif "x" in token and not "0x" in token: #it should be 0x
+        return False
+    elif "$" in token and not "x" in token:
+        token = token.replace("$", "0x")
+        offset = int(token, 16)
+    elif "0x" in token and not "$" in token:
+        offset = int(token, 16)
+    else: #might just be "1" at this point
+        token = token.lower()
+        #check if there are bad characters
+        for c in token:
+            if c not in valid:
+                return False
+        offset = int(token, 16)
+    if offset == None and bank == None:
+        return False
+    if bank == None:
+        bank = calculate_bank(offset)
+    returnable["bank"] = bank
+    returnable["offset"] = offset
+    returnable["address"] = calculate_pointer(offset, bank=bank)
+    return True
+
+def get_address_from_line_comment(line, bank=None):
+    """
+    wrapper for line_has_comment_address
+    """
+    returnable = {}
+    result = line_has_comment_address(line, returnable=returnable, bank=bank)
+    if not result:
+        return False
+    return returnable["address"]
+
+def line_has_label(line):
+    """returns True if the line has an asm label"""
+    if not isinstance(line, str):
+        raise Exception, "can't check this type of object"
+    line = line.rstrip(" ").lstrip(" ")
+    line = remove_quoted_text(line)
+    if ";" in line:
+        line = line.split(";")[0]
+    if 0 <= len(line) <= 1:
+        return False
+    if ":" not in line:
+        return False
+    if line[0] == ";":
+        return False
+    if line[0] == "\"":
+        return False
+    if "::" in line:
+        return False
+    return True
+
+def get_label_from_line(line):
+    """returns the label from the line"""
+    #check if the line has a label
+    if not line_has_label(line):
+        return None
+    #split up the line
+    label = line.split(":")[0]
+    return label