summaryrefslogtreecommitdiff
path: root/pkmnasm/asmlex.py
diff options
context:
space:
mode:
authorBryan Bishop <kanzure@gmail.com>2012-04-26 00:31:53 -0500
committerBryan Bishop <kanzure@gmail.com>2012-04-26 00:31:53 -0500
commitdc5ed571f03b2f0f316acb4c24f1d9cced1cf34a (patch)
treeec0b853c8aebcd27a51fabc718feea78a5973f74 /pkmnasm/asmlex.py
parentd69fa4710ee761a601c7538ea4b0e9c7bd6165d7 (diff)
simple lexer
original-commit-id: ad76f259ff28b719b353581a654b4918f8ebb3b7
Diffstat (limited to 'pkmnasm/asmlex.py')
-rw-r--r--pkmnasm/asmlex.py494
1 files changed, 494 insertions, 0 deletions
diff --git a/pkmnasm/asmlex.py b/pkmnasm/asmlex.py
new file mode 100644
index 0000000..9f61ab3
--- /dev/null
+++ b/pkmnasm/asmlex.py
@@ -0,0 +1,494 @@
+# -*- coding: utf-8 -*-
+
+import ply.lex as lex
+import sys, os
+
+
+FILENAME = '' # Current filename
+
+
+_tokens = ('STRING', 'NEWLINE', 'LABEL',
+ 'ID', 'COMMA', 'PLUS', 'MINUS', 'LP', 'RP', 'MUL', 'DIV', 'POW',
+ 'UMINUS', 'APO', 'INTEGER', 'ADDR', 'RB', 'LB',
+ 'LOCALLABEL', 'LSHIFT', 'RSHIFT', 'BITWISE_OR', 'BITWISE_AND',
+ 'LOGICAL_NOT', 'BITWISE_COMPLEMENT',
+ )
+
+reserved_instructions = {
+ 'adc': 'ADC',
+ 'add': 'ADD',
+ 'and': 'AND',
+ 'bit': 'BIT',
+ 'call': 'CALL',
+ 'ccf': 'CCF',
+ 'cp': 'CP',
+ 'cpd': 'CPD',
+ 'cpdr': 'CPDR',
+ 'cpi': 'CPI',
+ 'cpir': 'CPIR',
+ 'cpl': 'CPL',
+ 'daa': 'DAA',
+ 'dec': 'DEC',
+ 'di': 'DI',
+ 'djnz': 'DJNZ',
+ 'ei': 'EI',
+ 'ex': 'EX',
+ 'exx': 'EXX',
+ 'halt': 'HALT',
+ 'im': 'IM',
+ 'in': 'IN',
+ 'inc': 'INC',
+ 'ind': 'IND',
+ 'indr': 'INDR',
+ 'ini': 'INI',
+ 'inir': 'INIR',
+ 'jp': 'JP',
+ 'jr': 'JR',
+ 'ld': 'LD',
+ 'ldd': 'LDD',
+ 'lddr': 'LDDR',
+ 'ldi': 'LDI',
+ 'ldir': 'LDIR',
+ 'neg': 'NEG',
+ 'nop': 'NOP',
+ 'or': 'OR',
+ 'otdr': 'OTDR',
+ 'otir': 'OTIR',
+ 'out': 'OUT',
+ 'outd': 'OUTD',
+ 'outi': 'OUTI',
+ 'pop': 'POP',
+ 'push': 'PUSH',
+ 'res': 'RES',
+ 'ret': 'RET',
+ 'reti': 'RETI',
+ 'retn': 'RETN',
+ 'rl': 'RL',
+ 'rla': 'RLA',
+ 'rlc': 'RLC',
+ 'rlca': 'RLCA',
+ 'rld': 'RLD',
+ 'rr': 'RR',
+ 'rra': 'RRA',
+ 'rrc': 'RRC',
+ 'rrca': 'RRCA',
+ 'rrd': 'RRD',
+ 'rst': 'RST',
+ 'sbc': 'SBC',
+ 'scf': 'SCF',
+ 'set': 'SET',
+ 'sla': 'SLA',
+ 'sll': 'SLL',
+ 'sra': 'SRA',
+ 'srl': 'SRL',
+ 'sub': 'SUB',
+ 'xor': 'XOR',
+ }
+
+
+pseudo = { # pseudo ops
+ 'align': 'ALIGN',
+ 'org': 'ORG',
+ 'defb': 'DEFB',
+ 'defm': 'DEFB',
+ 'db' : 'DEFB',
+ 'defs': 'DEFS',
+ 'defw': 'DEFW',
+ 'ds' : 'DEFS',
+ 'dw' : 'DEFW',
+ 'equ': 'EQU',
+ 'proc': 'PROC',
+ 'endp': 'ENDP',
+ 'local': 'LOCAL',
+ 'end': 'END',
+ 'incbin': 'INCBIN'
+ }
+
+
+regs8 = {'a': 'A',
+ 'b': 'B', 'c': 'C',
+ 'd': 'D', 'e': 'E',
+ 'h': 'H', 'l': 'L',
+ 'i': 'I', 'r': 'R',
+ 'ixh': 'IXH', 'ixl': 'IXL',
+ 'iyh': 'IYH', 'iyl': 'IYL'
+ }
+
+
+regs16 = {
+ 'af': 'AF',
+ 'bc': 'BC',
+ 'de': 'DE',
+ 'hl': 'HL',
+ 'ix': 'IX',
+ 'iy': 'IY',
+ 'sp': 'SP'
+}
+
+
+flags = {
+ 'z' : 'Z',
+ 'nz' : 'NZ',
+ 'nc' : 'NC',
+ 'po' : 'PO',
+ 'pe' : 'PE',
+ 'p' : 'P',
+ 'm' : 'M',
+}
+
+
+preprocessor = {
+ 'init' : '_INIT',
+ 'line' : '_LINE'
+}
+
+
+
+# List of token names.
+_tokens = _tokens \
+ + tuple(reserved_instructions.values()) \
+ + tuple(pseudo.values()) \
+ + tuple(regs8.values()) \
+ + tuple(regs16.values()) \
+ + tuple(flags.values()) \
+ + tuple(preprocessor.values())
+
+
+def get_uniques(l):
+ ''' Returns a list with no repeated elements.
+ '''
+ result = []
+
+ for i in l:
+ if i not in result:
+ result.append(i)
+
+ return result
+
+
+
+tokens = get_uniques(_tokens)
+
+
+class Lexer(object):
+ ''' Own class lexer to allow multiple instances.
+ This lexer is just a wrapper of the current FILESTACK[-1] lexer
+ '''
+ states = (
+ ('preproc', 'exclusive'),
+ )
+
+ # -------------- TOKEN ACTIONS --------------
+
+
+ def __set_lineno(self, value):
+ ''' Setter for lexer.lineno
+ '''
+ self.lex.lineno = value
+
+
+ def __get_lineno(self):
+ ''' Getter for lexer.lineno
+ '''
+ if self.lex is None:
+ return 0
+
+ return self.lex.lineno
+
+ lineno = property(__get_lineno, __set_lineno)
+
+
+ def t_INITIAL_preproc_skip(self, t):
+ r'[ \t]+'
+ pass # Ignore whitespaces and tabs
+
+
+ def t_CHAR(self, t):
+ r"'.'" # A single char
+
+ t.value = ord(t.value[1])
+ t.type = 'INTEGER'
+
+ return t
+
+
+ def t_HEXA(self, t):
+ r'([0-9][0-9a-fA-F]*[hH])|(\$[0-9a-fA-F]+)'
+
+ if t.value[0] == '$':
+ t.value = t.value[1:] # Remove initial '$'
+ else:
+ t.value = t.value[:-1] # Remove last 'h'
+
+ t.value = int(t.value, 16) # Convert to decimal
+ t.type = 'INTEGER'
+
+ return t
+
+
+ def t_BIN(self, t):
+ r'(%[01]+)|([01]+[bB])' # A Binary integer
+ # Note 00B is a 0 binary, but
+ # 00Bh is a 12 in hex. So this pattern must come
+ # after HEXA
+
+ if t.value[0] == '%':
+ t.value = t.value[1:] # Remove initial %
+ else:
+ t.value = t.value[:-1] # Remove last 'b'
+
+ t.value = int(t.value, 2) # Convert to decimal
+ t.type = 'INTEGER'
+
+ return t
+
+
+ def t_INITIAL_preproc_INTEGER(self, t):
+ r'[0-9]+' # an integer decimal number
+
+ t.value = int(t.value)
+
+ return t
+
+ def t_INITIAL_ID(self, t):
+ r'[_a-zA-Z.]([.]?[_a-zA-Z0-9\\@\#]+)*[:]?(\\\W)?' # Any identifier
+
+ tmp = t.value # Saves original value
+ if tmp[-1] == ':':
+ t.type = 'LABEL'
+ t.value = tmp[:-1]
+ return t
+ if tmp[0] == "." and (tmp[-2:] == "\@" or tmp[-3:] == "\@:"):
+ t.type = "LOCALLABEL"
+ t.value = tmp[1:]
+ return t
+
+ t.value = tmp.upper() # Convert it to uppercase, since our internal tables uses uppercase
+ id = tmp.lower()
+
+ t.type = reserved_instructions.get(id)
+ if t.type is not None: return t
+
+ t.type = pseudo.get(id)
+ if t.type is not None: return t
+
+ t.type = regs8.get(id)
+ if t.type is not None: return t
+
+ t.type = flags.get(id)
+ if t.type is not None: return t
+
+ t.type = regs16.get(id, 'ID')
+ if t.type == 'ID':
+ t.value = tmp # Restores original value
+
+ return t
+
+
+ def t_preproc_ID(self, t):
+ r'[_a-zA-Z][_a-zA-Z0-9]*' # preprocessor directives
+
+ t.type = preprocessor.get(t.value.lower(), 'ID')
+ return t
+
+
+ def t_COMMA(self, t):
+ r','
+
+ return t
+
+
+ def t_ADDR(self, t):
+ r'\$'
+
+ return t
+
+
+ def t_LP(self, t):
+ r'\('
+
+ return t
+
+
+ def t_RP(self, t):
+ r'\)'
+
+ return t
+
+
+ def t_RB(self, t):
+ r'\['
+
+ return t
+
+
+ def t_LB(self, t):
+ r'\]'
+ return t
+
+ def t_LSHIFT(self, t):
+ r'<<'
+ return t
+ def t_RSHIFT(self, t):
+ r'>>'
+ return t
+
+ def t_BITWISE_OR(self, t):
+ r'\|'
+ return t
+ def t_BITWISE_AND(self, t):
+ r'\&'
+ return t
+ def t_BITWISE_COMPLEMENT(self, t):
+ r'~'
+ return t
+ def t_LOGICAL_NOT(self, t):
+ r'\!'
+ return t
+
+ def t_PLUS(self, t):
+ r'\+'
+
+ return t
+
+
+ def t_MINUS(self, t):
+ r'\-'
+
+ return t
+
+
+ def t_MUL(self, t):
+ r'\*'
+
+ return t
+
+
+ def t_DIV(self, t):
+ r'\/'
+
+ return t
+
+
+ def t_POW(self, t):
+ r'\^'
+
+ return t
+
+
+ def t_APO(self, t):
+ r"'"
+
+ return t
+
+
+ def t_INITIAL_preproc_STRING(self, t):
+ r'"[^"]*"' # a doubled quoted string
+ t.value = t.value[1:-1] # Remove quotes
+
+ return t
+
+
+ def t_INITIAL_preproc_error(self, t):
+ ''' error handling rule
+ '''
+ self.error("illegal character '%s'" % t.value[0])
+
+
+ def t_INITIAL_preproc_CONTINUE(self, t):
+ r'\\\r?\n'
+ t.lexer.lineno += 1
+
+ # Allows line breaking
+
+
+ def t_COMMENT(self, t):
+ r';.*'
+
+ # Skip to end of line (except end of line)
+
+
+ def t_INITIAL_preproc_NEWLINE(self, t):
+ r'\r?\n'
+
+ t.lexer.lineno += 1
+ t.lexer.begin('INITIAL')
+
+ return t
+
+
+ def t_INITIAL_SHARP(self, t):
+ r'\#'
+
+ if self.find_column(t) == 1:
+ t.lexer.begin('preproc')
+ else:
+ self.error("illegal character '%s'" % t.value[0])
+
+
+ def __init__(self):
+ ''' Creates a new GLOBAL lexer instance
+ '''
+ self.lex = None
+ self.filestack = [] # Current filename, and line number being parsed
+ self.input_data = ''
+ self.tokens = tokens
+ self.next_token = None # if set to something, this will be returned once
+
+
+ def input(self, str):
+ ''' Defines input string, removing current lexer.
+ '''
+ self.input_data = str
+ self.lex = lex.lex(object = self)
+ self.lex.input(self.input_data)
+
+
+ def token(self):
+ return self.lex.token()
+
+
+ def find_column(self, token):
+ ''' Compute column:
+ - token is a token instance
+ '''
+ i = token.lexpos
+ while i > 0:
+ if self.input_data[i - 1] == '\n': break
+ i -= 1
+
+ column = token.lexpos - i + 1
+
+ return column
+
+
+ def msg(self, str):
+ ''' Prints an error msg.
+ '''
+ #print '%s:%i %s' % (FILENAME, self.lex.lineno, str)
+ print '%s:%s %s' % (FILENAME, "?", str)
+
+
+ def error(self, str):
+ ''' Prints an error msg, and exits.
+ '''
+ self.msg('Error: %s' % str)
+
+ sys.exit(1)
+
+
+ def warning(self, str):
+ ''' Emmits a warning and continue execution.
+ '''
+ self.msg('Warning: %s' % str)
+
+# Needed for states
+tmp = lex.lex(object = Lexer(), lextab = 'zxbasmlextab')
+
+if __name__ == '__main__':
+ FILENAME = sys.argv[1]
+ tmp.input(open(sys.argv[1]).read())
+ tok = tmp.token()
+ while tok:
+ print tok
+ tok = tmp.token()