pokemontools/labels.py


1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216

# -*- coding: utf-8 -*-
"""
Various label/line-related functions.
"""
from __future__ import absolute_import

import os
import json
import logging

from . import pointers
from . import sym

class Labels(object):
    """
    Store all labels.
    """

    def __init__(self, config, filename="pokecrystal.sym"):
        """
        Setup the instance.
        """
        self.config = config
        self.filename = filename
        self.path = os.path.join(self.config.path, self.filename)

    def initialize(self):
        """
        Handle anything requiring file-loading and such.
        """
        # Look for a symfile if it's not given
        if not os.path.exists(self.path):
            self.filename = find_symfile_in_dir(self.config.path)
            if self.filename == None:
                raise Exception("Couldn't find any .sym files. Run rgblink -n to create a .sym file.")
            self.path = os.path.join(self.config.path, self.filename)

        self.labels = sym.read_symfile(self.path)

def find_symfile_in_dir(path):
    for filename in os.listdir(path):
        if os.path.splitext(filename)[1] == '.sym':
            return filename
    return None

def remove_quoted_text(line):
    """get rid of content inside quotes
    and also removes the quotes from the input string"""
    while line.count("\"") % 2 == 0 and line.count("\"") > 0:
        first = line.find("\"")
        second = line.find("\"", first+1)
        line = line[0:first] + line[second+1:]
    while line.count("\'") % 2 == 0 and line.count("'") > 0:
        first = line.find("\'")
        second = line.find("\'", first+1)
        line = line[0:first] + line[second+1:]
    return line

def line_has_comment_address(line, returnable={}, bank=None):
    """checks that a given line has a comment
    with a valid address, and returns the address in the object.
    Note: bank is required if you have a 4-letter-or-less address,
    because otherwise there is no way to figure out which bank
    is curretly being scanned."""
    #first set the bank/offset to nada
    returnable["bank"] = None
    returnable["offset"] = None
    returnable["address"] = None
    #only valid characters are 0-9a-fA-F
    valid = [str(x) for x in range(10)] + \
            [chr(x) for x in range(ord('a'), ord('f')+1)] + \
            [chr(x) for x in range(ord('A'), ord('F')+1)]
    #check if there is a comment in this line
    if ";" not in line:
        return False
    #first throw away anything in quotes
    if (line.count("\"") % 2 == 0 and line.count("\"")!=0) \
       or (line.count("\'") % 2 == 0 and line.count("\'")!=0):
        line = remove_quoted_text(line)
    #check if there is still a comment in this line after quotes removed
    if ";" not in line:
        return False
    #but even if there's a semicolon there must be later text
    if line[-1] == ";":
        return False
    #and just a space doesn't count
    if line[-2:] == "; ":
        return False
    #and multiple whitespace doesn't count either
    line = line.rstrip(" ").lstrip(" ")
    if line[-1] == ";":
        return False
    #there must be more content after the semicolon
    if len(line)-1 == line.find(";"):
        return False
    #split it up into the main comment part
    comment = line[line.find(";")+1:]
    #don't want no leading whitespace
    comment = comment.lstrip(" ").rstrip(" ")
    #split up multi-token comments into single tokens
    token = comment
    if " " in comment:
        #use the first token in the comment
        token = comment.split(" ")[0]
    if token in ["0x", "$", "x", ":"]:
        return False
    offset = None
    #process a token with a A:B format
    if ":" in token: #3:3F0A, $3:$3F0A, 0x3:0x3F0A, 3:3F0A
        #split up the token
        bank_piece = token.split(":")[0].lower()
        offset_piece = token.split(":")[1].lower()
        #filter out blanks/duds
        if bank_piece in ["$", "0x", "x"] \
        or offset_piece in ["$", "0x", "x"]:
            return False
        #they can't have both "$" and "x"
        if "$" in bank_piece and "x" in bank_piece:
            return False
        if "$" in offset_piece and "x" in offset_piece:
            return False
        #process the bank piece
        if "$" in bank_piece:
            bank_piece = bank_piece.replace("$", "0x")
        #check characters for validity?
        for c in bank_piece.replace("x", ""):
            if c not in valid:
                return False
        bank = int(bank_piece, 16)
        #process the offset piece
        if "$" in offset_piece:
            offset_piece = offset_piece.replace("$", "0x")
        #check characters for validity?
        for c in offset_piece.replace("x", ""):
            if c not in valid:
                return False
        if len(offset_piece) == 0:
            return None
        offset = int(offset_piece, 16)
    #filter out blanks/duds
    elif token in ["$", "0x", "x"]:
        return False
    #can't have both "$" and "x" in the number
    elif "$" in token and "x" in token:
        return False
    elif "x" in token and not "0x" in token: #it should be 0x
        return False
    elif "$" in token and not "x" in token:
        token = token.replace("$", "0x")
        offset = int(token, 16)
    elif "0x" in token and not "$" in token:
        offset = int(token, 16)
    else: #might just be "1" at this point
        token = token.lower()
        #check if there are bad characters
        for c in token:
            if c not in valid:
                return False
        offset = int(token, 16)
    if offset == None and bank == None:
        return False
    if bank == None:
        bank = pointers.calculate_bank(offset)
    returnable["bank"] = bank
    returnable["offset"] = offset
    returnable["address"] = pointers.calculate_pointer(offset, bank=bank)
    return True

def get_address_from_line_comment(line, bank=None):
    """
    wrapper for line_has_comment_address
    """
    returnable = {}
    result = line_has_comment_address(line, returnable=returnable, bank=bank)
    if not result:
        return False
    return returnable["address"]

def line_has_label(line):
    """returns True if the line has an asm label"""
    if not isinstance(line, str):
        raise Exception("can't check this type of object")
    line = line.rstrip(" ").lstrip(" ")
    line = remove_quoted_text(line)
    if ";" in line:
        line = line.split(";")[0]
    if 0 <= len(line) <= 1:
        return False
    if ":" not in line:
        return False
    if line[0] == ";":
        return False
    if line[0] == "\"":
        return False
    if line[0] == ":":
        return False
    return True

def get_label_from_line(line):
    """returns the label from the line"""
    #check if the line has a label
    if not line_has_label(line):
        return None
    #split up the line
    label = line.split(":")[0]
    return label

def find_labels_without_addresses(asm):
    """scans the asm source and finds labels that are unmarked"""
    without_addresses = []
    for (line_number, line) in enumerate(asm):
        if line_has_label(line):
            label = get_label_from_line(line)
            if not line_has_comment_address(line):
                without_addresses.append({"line_number": line_number, "line": line, "label": label})
    return without_addresses