1 files changed, 1184 insertions, 0 deletions
diff --git a/mgbdis/mgbdis.py b/mgbdis/mgbdis.py
new file mode 100755
index 0000000..529126e
--- /dev/null
+++ b/mgbdis/mgbdis.py
@@ -0,0 +1,1184 @@
+#!/usr/bin/env python3
+
+"""Disassemble a Game Boy ROM into RGBDS compatible assembly code"""
+
+__author__ = 'Matt Currie and contributors'
+__credits__ = ['mattcurrie', 'kemenaran', 'bnzis']
+__version__ = '1.4'
+__copyright__ = 'Copyright 2018 by Matt Currie'
+__license__ = 'MIT'
+
+import argparse
+import glob
+import hashlib
+import os
+import png
+from shutil import copyfile
+
+from instruction_set import instructions, cb_instructions, instruction_variants
+
+default_symbols = [
+    '00:0000 RST_00',
+    '00:0000 .code:8',
+    '00:0008 RST_08',
+    '00:0008 .code:8',
+    '00:0010 RST_10',
+    '00:0010 .code:8',
+    '00:0018 RST_18',
+    '00:0018 .code:8',
+    '00:0020 RST_20',
+    '00:0020 .code:8',
+    '00:0028 RST_28',
+    '00:0028 .code:8',
+    '00:0030 RST_30',
+    '00:0030 .code:8',
+    '00:0038 RST_38',
+    '00:0038 .code:8',
+
+    '00:0040 VBlankInterrupt',
+    '00:0040 .code:8',
+    '00:0048 LCDCInterrupt',
+    '00:0048 .code:8',
+    '00:0050 TimerOverflowInterrupt',
+    '00:0050 .code:8',
+    '00:0058 SerialTransferCompleteInterrupt',
+    '00:0058 .code:8',
+    '00:0060 JoypadTransitionInterrupt',
+    '00:0060 .code:8',
+
+    '00:0100 Boot',
+    '00:0100 .code:4',
+    '00:0104 HeaderLogo',
+    '00:0104 .data:30',
+    '00:0134 HeaderTitle',
+    '00:0134 .text:10',
+    '00:0144 .data:c',
+    '00:0144 HeaderNewLicenseeCode',
+    '00:0146 HeaderSGBFlag',
+    '00:0147 HeaderCartridgeType',
+    '00:0148 HeaderROMSize',
+    '00:0149 HeaderRAMSize',
+    '00:014a HeaderDestinationCode',
+    '00:014b HeaderOldLicenseeCode',
+    '00:014c HeaderMaskROMVersion',
+    '00:014d HeaderComplementCheck',
+    '00:014e HeaderGlobalChecksum',
+]
+
+gbc_symbols = [
+    '00:0134 .text:b',
+    '00:013f HeaderManufacturerCode',
+    '00:013f .text:4',
+    '00:0143 HeaderCGBFlag',
+    '00:0143 .data:1'
+]
+
+hardware_labels = {
+    0xFF00: 'rP1',
+    0xFF01: 'rSB',
+    0xFF02: 'rSC',
+    0xFF04: 'rDIV',
+    0xFF05: 'rTIMA',
+    0xFF06: 'rTMA',
+    0xFF07: 'rTAC',
+    0xFF0F: 'rIF',
+    0xFF40: 'rLCDC',
+    0xFF41: 'rSTAT',
+    0xFF42: 'rSCY',
+    0xFF43: 'rSCX',
+    0xFF44: 'rLY',
+    0xFF45: 'rLYC',
+    0xFF46: 'rDMA',
+    0xFF47: 'rBGP',
+    0xFF48: 'rOBP0',
+    0xFF49: 'rOBP1',
+    0xFF4A: 'rWY',
+    0xFF4B: 'rWX',
+    0xFF4D: 'rKEY1',
+    0xFF4F: 'rVBK',
+    0xFF51: 'rHDMA1',
+    0xFF52: 'rHDMA2',
+    0xFF53: 'rHDMA3',
+    0xFF54: 'rHDMA4',
+    0xFF55: 'rHDMA5',
+    0xFF56: 'rRP',
+    0xFF68: 'rBCPS',
+    0xFF69: 'rBCPD',
+    0xFF6A: 'rOCPS',
+    0xFF6B: 'rOCPD',
+    0xFF70: 'rSVBK',
+    0xFFFF: 'rIE',
+    0xFF24: 'rNR50',
+    0xFF25: 'rNR51',
+    0xFF26: 'rNR52',
+    0xFF10: 'rNR10',
+    0xFF11: 'rNR11',
+    0xFF12: 'rNR12',
+    0xFF13: 'rNR13',
+    0xFF14: 'rNR14',
+    0xFF16: 'rNR21',
+    0xFF17: 'rNR22',
+    0xFF18: 'rNR23',
+    0xFF19: 'rNR24',
+    0xFF1A: 'rNR30',
+    0xFF1B: 'rNR31',
+    0xFF1C: 'rNR32',
+    0xFF1D: 'rNR33',
+    0xFF1E: 'rNR34',
+    0xFF20: 'rNR41',
+    0xFF21: 'rNR42',
+    0xFF22: 'rNR43',
+    0xFF23: 'rNR44',
+    0xFF76: 'rPCM12',
+    0xFF77: 'rPCM34',
+}
+
+ldh_a8_formatters = {
+    'ldh_a8': lambda value: '[{0}]'.format(hex_byte(value)),
+    'ld_ff00_a8': lambda value: '[{0}+{1}]'.format(hex_word(0xff00), hex_byte(value)),
+    'ldh_ffa8': lambda value: '[{0}]'.format(hex_word(0xff00 + value)),
+}
+
+def abort(message):
+    print(message)
+    os._exit(1)
+
+
+def hex_word(value):
+    return format_hex('${:04x}'.format(value))
+
+
+def hex_byte(value):
+    return format_hex('${:02x}'.format(value))
+
+
+def format_hex(hex_string):
+    if style['uppercase_hex']:
+        return hex_string.upper()
+    else:
+        return hex_string.lower()
+
+def bytes_to_string(data):
+    return ' '.join(hex_byte(byte) for byte in data)
+
+
+def rom_address_to_mem_address(address):
+    if address < 0x4000:
+        return address
+    else:
+        return ((address % 0x4000) + 0x4000)
+
+
+def to_signed(value):
+    if value > 127:
+        return (256 - value) * -1
+    return value
+
+def apply_style_to_instructions(style, instructions):
+    # set undefined opcodes to use db/DB
+    for opcode, instruction in instructions.items():
+        if instruction.startswith('db '):
+            instructions[opcode] = style['db'] + ' ' + hex_byte(opcode)
+
+    # set instruction variants
+    for variant_name, variants in instruction_variants.items():
+        for opcode, instruction in variants[style[variant_name]].items():
+            instructions[opcode] = instruction
+
+    return instructions
+
+
+class Bank:
+
+    def __init__(self, number, symbols, style):
+        self.style = style
+        self.bank_number = number
+        self.blocks = dict()
+        self.disassembled_addresses = set()
+        self.symbols = symbols
+
+        if number == 0:
+            self.memory_base_address = 0
+            self.rom_base_address = 0
+        else:
+            self.memory_base_address = 0x4000
+            self.rom_base_address = (number - 1) * 0x4000
+
+        self.target_addresses = dict({
+            'call': set(),
+            'jp': set(),
+            'jr': set()
+        })
+
+        self.instruction_label_prefixes = dict({
+            'call': 'Call',
+            'jp': 'Jump',
+            'jr': 'jr'
+        })
+
+        self.disassemble_block_range = dict({
+            'code': self.process_code_in_range,
+            'data': self.process_data_in_range,
+            'text': self.process_text_in_range,
+            'image': self.process_image_in_range
+        })
+
+
+    def add_target_address(self, instruction_name, address):
+        if address not in self.target_addresses[instruction_name]:
+            self.target_addresses[instruction_name].add(address)
+
+
+    def resolve_blocks(self):
+        blocks = self.symbols.get_blocks(self.bank_number)
+        block_start_addresses = sorted(blocks.keys())
+        resolved_blocks = dict()
+
+        for index in range(len(block_start_addresses)):
+
+            start_address = block_start_addresses[index]
+            block = blocks[start_address]
+            end_address = start_address + block['length']
+
+            # check if there is another block after this block
+            next_start_address = None
+            if index < len(block_start_addresses) - 1:
+                next_start_address = block_start_addresses[index + 1]
+
+                # if the next block starts before this one finishes, then adjust end address
+                if next_start_address < end_address:
+                    end_address = next_start_address
+
+            resolved_blocks[start_address] = {
+                'type': block['type'],
+                'length': end_address - start_address,
+                'arguments': block['arguments'],
+            }
+
+            if next_start_address is None and (end_address != self.memory_base_address + 0x4000):
+                # no more blocks and didn't finish at the end of the block, so finish up with a code block
+                resolved_blocks[end_address] = {
+                    'type': 'code',
+                    'length': (self.memory_base_address + 0x4000) - end_address,
+                    'arguments': None
+                }
+
+            if next_start_address is not None and end_address < next_start_address:
+                # we have another block, but there is a gap until the next block, so fill in the gap with a code block
+                resolved_blocks[end_address] = {
+                    'type': 'code',
+                    'length': next_start_address - end_address,
+                    'arguments': None
+                }
+
+        self.blocks = resolved_blocks
+
+    def get_label_for_instruction_operand(self, value):
+        # an operand value lower than $100 is more probably an actual value than an address:
+        # don't lookup symbols for it
+        if value <= 0x100:
+            return None
+
+        return self.symbols.get_label(self.bank_number, value)
+
+    def get_label_for_jump_target(self, instruction_name, address):
+        if self.bank_number == 0:
+            if address not in self.disassembled_addresses:
+                return None
+        else:
+            # TODO: if target address is in bank 0 then should check if that address
+            # has been disassembled in bank 0. requires access to bank 0 from
+            # other bank objects
+
+            is_in_switchable_bank = 0x4000 <= address < 0x8000
+            if is_in_switchable_bank and address not in self.disassembled_addresses:
+                return None
+
+        label = self.symbols.get_label(self.bank_number, address)
+        if label is not None:
+            # if the address has a specific label then just use that
+            return label
+
+        if address in self.target_addresses[instruction_name]:
+            return self.format_label(instruction_name, address)
+
+        return None
+
+
+    def get_labels_for_non_code_address(self, address):
+        labels = list()
+
+        label = self.symbols.get_label(self.bank_number, address)
+        if label is not None:
+            is_local = label.startswith('.')
+            if is_local:
+                labels.append(label + ':')
+            else:
+                labels.append(label + '::')
+
+        return labels
+
+
+    def get_labels_for_address(self, address):
+        labels = list()
+
+        label = self.symbols.get_label(self.bank_number, address)
+        if label is not None:
+            # if the address has a specific label then just use that
+            is_local = label.startswith('.')
+            if is_local:
+                labels.append(label + ':')
+            else:
+                labels.append(label + '::')
+        else:
+            # otherwise, if the address was marked as a target address, generate a label
+            for instruction_name in ['call', 'jp', 'jr']:
+                if address in self.target_addresses[instruction_name]:
+                    labels.append(self.format_label(instruction_name, address) + ':')
+
+        return labels
+
+
+    def format_label(self, instruction_name, address):
+        formatted_bank = format_hex('{:03x}'.format(self.bank_number))
+        formatted_address = format_hex('{:04x}'.format(address))
+        return '{0}_{1}_{2}'.format(self.instruction_label_prefixes[instruction_name], formatted_bank, formatted_address)
+
+
+    def format_image_label(self, address):
+        return 'image_{0:03x}_{1:04x}'.format(self.bank_number, address)
+
+
+    def format_instruction(self, instruction_name, operands, address = None, source_bytes = None):
+        instruction = '{indentation}{instruction_name:<{operand_padding}} {operands}'.format(
+            indentation=self.style['indentation'],
+            instruction_name=instruction_name,
+            operand_padding=self.style['operand_padding'],
+            operands=', '.join(operands)
+        )
+
+        if self.style['print_hex'] and address is not None and source_bytes is not None:
+            return '{0:<50}; {1}: {2}'.format(instruction, hex_word(address), bytes_to_string(source_bytes))
+        else:
+            return '{0}'.format(instruction.rstrip())
+
+
+    def format_data(self, data):
+        return self.format_instruction(self.style['db'], data)
+
+
+    def append_output(self, text):
+        self.output.append(text)
+
+
+    def append_labels_to_output(self, labels):
+        self.append_empty_line_if_none_already()
+        self.append_output('\n'.join(labels))
+
+
+    def append_empty_line_if_none_already(self):
+        if len(self.output) > 0 and self.output[len(self.output) - 1] != '':
+            self.append_output('')
+
+
+    def disassemble(self, rom, first_pass = False):
+        self.first_pass = first_pass
+
+        if first_pass:
+            self.resolve_blocks()
+
+        self.output = list()
+
+        if self.bank_number == 0:
+            self.append_output('SECTION "ROM Bank ${0:03x}", ROM0[$0]'.format(self.bank_number))
+        else:
+            self.append_output('SECTION "ROM Bank ${0:03x}", ROMX[$4000], BANK[${0:x}]'.format(self.bank_number))
+        self.append_output('')
+
+        block_start_addresses = sorted(self.blocks.keys())
+
+        for index in range(len(block_start_addresses)):
+            start_address = block_start_addresses[index]
+            block = self.blocks[start_address]
+            end_address = start_address + block['length']
+            self.disassemble_block_range[block['type']](rom, self.rom_base_address + start_address, self.rom_base_address + end_address, block['arguments'])
+            self.append_empty_line_if_none_already()
+
+        return '\n'.join(self.output)
+
+
+    def process_code_in_range(self, rom, start_address, end_address, arguments = None):
+        if not self.first_pass and debug:
+            print('Disassembling code in range: {} - {}'.format(hex_word(start_address), hex_word(end_address)))
+
+        self.pc = start_address
+        while self.pc < end_address:
+            instruction = self.disassemble_at_pc(rom, end_address)
+
+
+    def disassemble_at_pc(self, rom, end_address):
+        pc = self.pc
+        pc_mem_address = rom_address_to_mem_address(pc)
+        length = 1
+        opcode = rom.data[pc]
+        comment = None
+        operands = None
+        operand_values = list()
+
+        if opcode not in instructions:
+            abort('Unhandled opcode: {} at {}'.format(hex_byte(opcode), hex_word(pc)))
+
+        if opcode == 0xCB:
+            cb_opcode = rom.data[pc + 1]
+            length += 1
+
+            instruction_name = rom.cb_instruction_name[cb_opcode]
+            operands = rom.cb_instruction_operands[cb_opcode]
+        else:
+            instruction_name = rom.instruction_names[opcode]
+            operands = rom.instruction_operands[opcode]
+
+        if instruction_name == 'stop' or (instruction_name == 'halt' and not self.style['disable_halt_nops']):
+            if rom.data[pc + 1] == 0x00:
+                # rgbds adds a nop instruction after a stop/halt, so if that instruction
+                # exists then we can insert it as a stop/halt command with length 2
+                length += 1
+            else:
+                # otherwise handle it as a data byte
+                instruction_name = self.style['db']
+                operands = [hex_byte(opcode)]
+
+
+        # figure out the operand values for each operand
+        for operand in operands:
+            value = None
+
+            if operand == 'a16':
+                length += 2
+                value = rom.data[pc + 1] + rom.data[pc + 2] * 256
+                operand_values.append(hex_word(value))
+
+            elif operand == '[a16]':
+                length += 2
+                value = rom.data[pc + 1] + rom.data[pc + 2] * 256
+                label = self.get_label_for_instruction_operand(value)
+                if label:
+                    operand_values.append('[' + label + ']')
+                else:
+                    operand_values.append('[' + hex_word(value) + ']')
+
+                # rgbds converts "ld [$ff40],a" into "ld [$ff00+40],a" automatically,
+                # so use a macro to encode it as data to ensure exact binary reproduction of the rom
+                if not self.style['disable_auto_ldh']:
+                    if value >= 0xff00 and (opcode == 0xea or opcode == 0xfa):
+                        rom.has_ld_long = True
+
+                        # use ld_long macro
+                        instruction_name = 'ld_long'
+
+                        # cannot wrap the address value with square brackets
+                        operand_values.pop()
+                        operand_values.append(hex_word(value))
+
+            elif operand == '[$ff00+a8]' or operand == '[a8]' or operand == '[$ffa8]':
+                length += 1
+                value = rom.data[pc + 1]
+                full_value = 0xff00 + value
+                label = self.get_label_for_instruction_operand(full_value)
+                if label is not None:
+                    # when referencing a label, we need to explicitely tell rgbds to use the short load opcode
+                    instruction_name = 'ldh'
+                    operand_values.append('[{}]'.format(label))
+                elif full_value in hardware_labels:
+                    operand_values.append('[{}]'.format(hardware_labels[full_value]))
+                else:
+                    # use one of the ldh_a8_formatters formatters
+                    operand_values.append(ldh_a8_formatters[self.style['ldh_a8']](value))
+
+            elif operand == 'd8':
+                length += 1
+                value = rom.data[pc + 1]
+                operand_values.append(hex_byte(value))
+
+            elif operand == 'd16':
+                length += 2
+                value = rom.data[pc + 1] + rom.data[pc + 2] * 256
+                label = self.get_label_for_instruction_operand(value)
+                if label is not None:
+                    operand_values.append(label)
+                else:
+                    operand_values.append(hex_word(value))
+
+            elif operand == 'r8':
+                length += 1
+                value = to_signed(rom.data[pc + 1])
+                if value < 0:
+                    operand_values.append('-' + hex_byte(abs(value)))
+                else:
+                    operand_values.append(hex_byte(value))
+
+            elif operand == 'pc+r8':
+                length += 1
+                value = to_signed(rom.data[pc + 1])
+
+                # calculate the absolute address for the jump
+                value = pc + 2 + value
+
+                relative_value = value - pc
+                if relative_value >= 0:
+                    operand_values.append('@+' + hex_byte(relative_value))
+                else:
+                    operand_values.append('@-' + hex_byte(relative_value * -1))
+
+                target_bank = value // 0x4000
+
+                # convert to banked value so it can be used as a label
+                value = rom_address_to_mem_address(value)
+
+                if self.bank_number != target_bank:
+                    # don't use labels for relative jumps across banks
+                    value = None
+
+                if target_bank < self.bank_number:
+                    # output as data, otherwise RGBDS will complain
+                    instruction_name = self.style['db']
+                    operand_values = [hex_byte(opcode), hex_byte(rom.data[pc + 1])]
+
+                    # exit the loop to avoid processing the operands any further
+                    break
+
+            elif operand == 'sp+r8':
+                length += 1
+                value = to_signed(rom.data[pc + 1])
+
+                if value < 0:
+                    operand_values.append('sp-' + hex_byte(abs(value)))
+                else:
+                    operand_values.append('sp+' + hex_byte(value))
+
+            elif operand == '[$ff00+c]':
+                operand_values.append('[{0}+c]'.format(hex_word(0xff00)))
+
+            elif type(operand) is str:
+                operand_values.append(operand)
+
+            else:
+                operand_values.append(hex_byte(operand))
+
+
+            if instruction_name in ['jr', 'jp', 'call'] and value is not None and value < 0x8000:
+                mem_address = rom_address_to_mem_address(value)
+
+                if self.first_pass:
+                    # dont allow switched banks to create labels in bank 0
+                    is_address_in_current_bank = (mem_address < 0x4000 and self.bank_number == 0) or (mem_address >= 0x4000 and self.bank_number > 0)
+                    if is_address_in_current_bank:
+                        # add the label
+                        self.add_target_address(instruction_name, mem_address)
+                else:
+                    # fetch the label name
+                    label = self.get_label_for_jump_target(instruction_name, mem_address)
+                    if label is not None:
+                        # remove the address from operand values and use the label instead
+                        operand_values.pop()
+                        operand_values.append(label)
+
+
+        # check the instruction is not spanning 2 banks
+        if pc + length - 1 >= end_address:
+            # must handle it as data
+            length = 1
+            instruction_name = self.style['db']
+            operand_values = [hex_byte(opcode)]
+
+        self.pc += length
+
+        if self.first_pass:
+            self.disassembled_addresses.add(pc_mem_address)
+        else:
+            labels = self.get_labels_for_address(pc_mem_address)
+            if len(labels):
+                self.append_labels_to_output(labels)
+
+            if comment is not None:
+                self.append_output(comment)
+
+            instruction_bytes = rom.data[pc:pc + length]
+            self.append_output(self.format_instruction(instruction_name, operand_values, pc_mem_address, instruction_bytes))
+
+            # add some empty lines after returns and jumps to break up the code blocks
+            if instruction_name in ['ret', 'reti', 'jr', 'jp']:
+                if (
+                    instruction_name == 'jr' or
+                    (instruction_name == 'jp' and len(operand_values) > 1) or
+                    (instruction_name == 'ret' and len(operand_values) > 0)
+                ):
+                    # conditional or jr
+                    self.append_output('')
+                else:
+                    # always executes
+                    self.append_output('')
+                    self.append_output('')
+
+
+    def process_data_in_range(self, rom, start_address, end_address, arguments = None):
+        if not self.first_pass and debug:
+            print('Outputting data in range: {} - {}'.format(hex_word(start_address), hex_word(end_address)))
+
+        values = list()
+
+        for address in range(start_address, end_address):
+            mem_address = rom_address_to_mem_address(address)
+
+            labels = self.get_labels_for_non_code_address(mem_address)
+            if len(labels):
+                # add any existing values to the output and reset the list
+                if len(values) > 0:
+                    self.append_output(self.format_data(values))
+                    values = list()
+
+                self.append_labels_to_output(labels)
+
+            values.append(hex_byte(rom.data[address]))
+
+            # output max of 16 bytes per line, and ensure any remaining values are output
+            if len(values) == 16 or (address == end_address - 1 and len(values)):
+                self.append_output(self.format_data(values))
+                values = list()
+
+
+    def process_text_in_range(self, rom, start_address, end_address, arguments = None):
+        if not self.first_pass and debug:
+            print('Outputting text in range: {} - {}'.format(hex_word(start_address), hex_word(end_address)))
+
+        values = list()
+        text = ''
+
+        for address in range(start_address, end_address):
+            mem_address = rom_address_to_mem_address(address)
+
+            labels = self.get_labels_for_non_code_address(mem_address)
+            if len(labels):
+                # add any existing values to the output and reset the list
+                if len(text):
+                    values.append('"{}"'.format(text))
+                    text = ''
+
+                if len(values):
+                    self.append_output(self.format_data(values))
+                    values = list()
+
+                self.append_labels_to_output(labels)
+
+            byte = rom.data[address]
+            if byte >= 0x20 and byte < 0x7F:
+                text += chr(byte)
+            else:
+                if len(text):
+                    values.append('"{}"'.format(text))
+                    text = ''
+                values.append(hex_byte(byte))
+
+        if len(text):
+            values.append('"{}"'.format(text))
+
+        if len(values):
+            self.append_output(self.format_data(values))
+
+    def process_image_in_range(self, rom, start_address, end_address, arguments = None):
+        if not self.first_pass and debug:
+            print('Outputting image in range: {} - {}'.format(hex_word(start_address), hex_word(end_address)))
+
+        if self.first_pass:
+            return
+
+        mem_address = rom_address_to_mem_address(start_address)
+        labels = self.get_labels_for_non_code_address(mem_address)
+        if len(labels):
+            self.append_labels_to_output(labels)
+            basename = labels[0].rstrip(':')
+        else:
+            basename = self.format_image_label(mem_address)
+
+        full_filename = rom.write_image(basename, arguments, rom.data[start_address:end_address])
+        self.append_output(self.format_instruction('INCBIN', ['\"' + full_filename + '\"']))
+
+
+
+
+class Symbols:
+    def __init__(self):
+        self.symbols = dict()
+        self.blocks = dict()
+
+    def load_sym_file(self, symbols_path):
+        f = open(symbols_path, 'r')
+
+        for line in f:
+            # ignore comments and empty lines
+            if line[0] != ';' and len(line.strip()):
+                self.add_symbol_definition(line)
+
+        f.close()
+
+
+    def add_symbol_definition(self, symbol_def):
+        try:
+            location, label = symbol_def.split()
+            bank, address = location.split(':')
+            bank = int(bank, 16)
+            address = int(address, 16)
+        except:
+            print("Ignored invalid symbol definition: {}\n".format(symbol_def))
+        else:
+            label_parts = label.split(':')
+            is_block_definition = label[0] == '.' and len(label_parts) >= 2
+
+            if is_block_definition:
+                # add a block
+                block_type = label_parts[0].lower()
+                data_length = int(label_parts[1], 16)
+
+                if block_type in ['.byt', '.data']:
+                    block_type = 'data'
+
+                elif block_type in ['.asc', '.text']:
+                    block_type = 'text'
+
+                elif block_type in ['.code']:
+                    block_type = 'code'
+
+                elif block_type in ['.image']:
+                    block_type = 'image'
+
+                else:
+                    return
+
+                if len(label_parts) == 3:
+                    arguments = label_parts[2]
+                else:
+                    arguments = None
+
+                self.add_block(bank, address, block_type, data_length, arguments)
+
+            else:
+                # add the label
+                self.add_label(bank, address, label)
+
+    def add_block(self, bank, address, block_type, length, arguments = None):
+        memory_base_address = 0x0000 if bank == 0 else 0x4000
+
+        if address >= memory_base_address:
+            blocks = self.get_blocks(bank)
+            blocks[address] = {
+                'type': block_type,
+                'length': length,
+                'arguments': arguments
+            }
+
+    def add_label(self, bank, address, label):
+        if bank not in self.symbols:
+            self.symbols[bank] = dict()
+
+        is_symbol_banked = 0x4000 <= address < 0x8000
+        if is_symbol_banked:
+            self.symbols[bank][address] = label
+        else:
+            self.symbols[0][address] = label
+
+    def get_label(self, bank, address):
+        # attempt to find a banked symbol
+        is_symbol_banked = 0x4000 <= address < 0x8000
+        if is_symbol_banked and bank in self.symbols and address in self.symbols[bank]:
+            return self.symbols[bank][address]
+
+        # attempt to find a symbol in non-banked space (stored as bank 0)
+        if 0 in self.symbols and address in self.symbols[0]:
+            return self.symbols[0][address]
+
+        return None
+
+    def get_blocks(self, bank):
+        memory_base_address = 0x0000 if bank == 0 else 0x4000
+
+        if bank not in self.blocks:
+            self.blocks[bank] = dict()
+            # each bank defaults to having a single code block
+            self.add_block(bank, memory_base_address, 'code', 0x4000)
+
+        return self.blocks[bank]
+
+class ROM:
+
+    def __init__(self, rom_path, style):
+        self.style = style
+        self.script_dir = os.path.dirname(os.path.realpath(__file__))
+        self.rom_path = rom_path
+        self.load()
+        self.split_instructions()
+        self.has_ld_long = False
+
+        self.image_output_directory = 'gfx'
+        self.image_dependencies = []
+
+        print('ROM MD5 hash:', hashlib.md5(self.data).hexdigest())
+
+        self.symbols = self.load_symbols()
+
+        # add some bytes to avoid an index out of range error
+        # when processing last few instructions in the rom
+        self.data += b'\x00\x00'
+
+        self.banks = dict()
+        for bank in range(0, self.num_banks):
+            self.banks[bank] = Bank(bank, self.symbols, style)
+
+    def load(self):
+        if os.path.isfile(self.rom_path):
+            print('Loading "{}"...'.format(self.rom_path))
+            self.data = open(self.rom_path, 'rb').read()
+            self.rom_size = len(self.data)
+            self.num_banks = self.rom_size // 0x4000
+        else:
+            abort('"{}" not found'.format(self.rom_path))
+
+
+    def split_instructions(self):
+        # split the instructions and operands
+        self.instruction_names = dict()
+        self.instruction_operands = dict()
+        self.cb_instruction_name = dict()
+        self.cb_instruction_operands = dict()
+
+        for opcode in instructions:
+            instruction_parts = instructions[opcode].split()
+            self.instruction_names[opcode] = instruction_parts[0]
+            if len(instruction_parts) > 1:
+                self.instruction_operands[opcode] = instruction_parts[1].split(',')
+            else:
+                self.instruction_operands[opcode] = list()
+
+        for cb_opcode in cb_instructions:
+            instruction_parts = cb_instructions[cb_opcode].split()
+            self.cb_instruction_name[cb_opcode] = instruction_parts[0]
+            if len(instruction_parts) > 1:
+                self.cb_instruction_operands[cb_opcode] = instruction_parts[1].split(',')
+            else:
+                self.cb_instruction_operands[cb_opcode] = list()
+
+
+    def load_symbols(self):
+        symbols = Symbols()
+
+        for symbol_def in default_symbols:
+            symbols.add_symbol_definition(symbol_def)
+
+        if self.supports_gbc():
+            for symbol_def in gbc_symbols:
+                symbols.add_symbol_definition(symbol_def)
+
+        symbols_path = os.path.splitext(self.rom_path)[0] + '.sym'
+        if os.path.isfile(symbols_path):
+            print('Processing symbol file "{}"...'.format(symbols_path))
+            symbols.load_sym_file(symbols_path)
+
+        return symbols
+
+
+    def supports_gbc(self):
+        return ((self.data[0x143] & 0x80) == 0x80)
+
+
+    def disassemble(self, output_dir):
+
+        self.output_directory = os.path.abspath(output_dir.rstrip(os.sep))
+
+        if os.path.exists(self.output_directory):
+            if not args.overwrite:
+                abort('Output directory "{}" already exists!'.format(self.output_directory))
+
+            if not os.path.isdir(self.output_directory):
+                abort('Output path "{}" already exists and is not a directory!'.format(self.output_directory))
+        else:
+            os.makedirs(self.output_directory)
+
+
+        print('Generating labels...')
+        self.generate_labels()
+
+        self.image_dependencies = []
+
+        print('Generating disassembly', end='')
+        if debug:
+            print('')
+
+        for bank in range(0, self.num_banks):
+            self.write_bank_asm(bank)
+
+        self.copy_hardware_inc()
+        self.write_game_asm()
+        self.write_makefile()
+
+        print('\nDisassembly generated in "{}"'.format(self.output_directory))
+
+
+    def generate_labels(self):
+        for bank in range(0, self.num_banks):
+            self.banks[bank].disassemble(rom, True)
+
+
+    def write_bank_asm(self, bank):
+        if not debug:
+            # progress indicator
+            print('.', end='', flush=True)
+
+        path = os.path.join(self.output_directory, 'bank_{0:03x}.asm'.format(bank))
+        f = open(path, 'w')
+
+        self.write_header(f)
+        f.write(self.banks[bank].disassemble(rom))
+
+        f.close()
+
+
+    def write_header(self, f):
+        f.write('; Disassembly of "{}"\n'.format(os.path.basename(self.rom_path)))
+        f.write('; This file was created with:\n')
+        f.write('; {}\n'.format(app_name))
+        f.write('; https://github.com/mattcurrie/mgbdis\n\n')
+
+
+    def copy_hardware_inc(self):
+        src = os.path.join(self.script_dir, 'hardware.inc')
+        dest = os.path.join(self.output_directory, 'hardware.inc')
+        copyfile(src, dest)
+
+
+    def write_game_asm(self):
+        path = os.path.join(self.output_directory, 'game.asm')
+        f = open(path, 'w')
+
+        self.write_header(f)
+
+        if self.has_ld_long:
+
+            f.write(
+"""ld_long: MACRO
+    IF STRLWR("\\1") == "a"
+        ; ld a, [$ff40]
+        db $FA
+        dw \\2
+    ELSE
+        IF STRLWR("\\2") == "a"
+            ; ld [$ff40], a
+            db $EA
+            dw \\1
+        ENDC
+    ENDC
+ENDM
+
+""")
+
+        f.write('INCLUDE "hardware.inc"')
+        for bank in range(0, self.num_banks):
+            f.write('\nINCLUDE "bank_{0:03x}.asm"'.format(bank))
+        f.close()
+
+
+    def write_image(self, basename, arguments, data):
+
+        # defaults
+        width = 128
+        palette = 0xe4
+        bpp = 2
+
+        # process arguments
+        if arguments is not None:
+            for argument in arguments.split(','):
+                if len(argument) > 1:
+                    if argument[0] == 'w':
+                        # width is in decimal
+                        width = int(argument[1:], 10)
+
+                    elif argument[0] == 'p':
+                        palette = int(argument[1:], 16)
+
+                    elif argument == '1bpp':
+                        bpp = 1
+
+        image_output_path = os.path.join(self.output_directory, self.image_output_directory)
+        if os.path.exists(image_output_path):
+            if not os.path.isdir(image_output_path):
+                abort('File already exists named "{}". Cannot store images!'.format(image_output_path))
+        else:
+            os.makedirs(image_output_path)
+
+        relative_path = os.path.join(self.image_output_directory, basename + '.' + "{}bpp".format(bpp))
+        self.image_dependencies.append(relative_path)
+        path = os.path.join(self.output_directory, self.image_output_directory, basename + '.png')
+
+        bytes_per_tile_row = bpp  # 8 pixels at 1 or 2 bits per pixel
+        bytes_per_tile = bytes_per_tile_row * 8  # 8 rows per tile
+
+        num_tiles = len(data) // bytes_per_tile
+        tiles_per_row = width // 8
+
+        # if we have fewer tiles than the number of tiles per row, or if an odd number of tiles
+        if (num_tiles < tiles_per_row) or (num_tiles & 1):
+            # then just make a single row of tiles
+            tiles_per_row = num_tiles
+            width = num_tiles * 8
+
+        tile_rows = (num_tiles / tiles_per_row)
+        if not tile_rows.is_integer():
+            abort('Invalid length ${:0x} or width {} for image block: {}'.format(len(data), width, basename))
+
+        height = int(tile_rows) * 8
+
+        pixel_data = self.convert_to_pixel_data(data, width, height, bpp)
+        rgb_palette = self.convert_palette_to_rgb(palette, bpp)
+
+        f = open(path, 'wb')
+        w = png.Writer(width, height, alpha=False, bitdepth=2, palette=rgb_palette)
+        w.write(f, pixel_data)
+        f.close()
+
+        return relative_path
+
+
+    def convert_to_pixel_data(self, data, width, height, bpp):
+        result = []
+        for y in range(0, height):
+            row = []
+            for x in range(0, width):
+                offset = self.coordinate_to_tile_offset(x, y, width, bpp)
+
+                if offset < len(data):
+                    # extract the color from the one or two bytes of tile data at the offset
+                    shift = (7 - (x & 7))
+                    mask = (1 << shift)
+                    if bpp == 2:
+                        color = ((data[offset] & mask) >> shift) + (((data[offset + 1] & mask) >> shift) << 1)
+                    else:
+                        color = ((data[offset] & mask) >> shift)
+                else:
+                    color = 0
+
+                row.append(color)
+            result.append(row)
+
+        return result
+
+
+    def coordinate_to_tile_offset(self, x, y, width, bpp):
+        bytes_per_tile_row = bpp  # 8 pixels at 1 or 2 bits per pixel
+        bytes_per_tile = bytes_per_tile_row * 8  # 8 rows per tile
+        tiles_per_row = width // 8
+
+        tile_y = y // 8
+        tile_x = x // 8
+        row_of_tile = y & 7
+
+        return (tile_y * tiles_per_row * bytes_per_tile) + (tile_x * bytes_per_tile) + (row_of_tile * bytes_per_tile_row)
+
+
+    def convert_palette_to_rgb(self, palette, bpp):
+        col0 = 255 - (((palette & 0x03)     ) << 6)
+        col1 = 255 - (((palette & 0x0C) >> 2) << 6)
+        col2 = 255 - (((palette & 0x30) >> 4) << 6)
+        col3 = 255 - (((palette & 0xC0) >> 6) << 6)
+        if bpp == 2:
+            return [
+                (col0, col0, col0),
+                (col1, col1, col1),
+                (col2, col2, col2),
+                (col3, col3, col3)
+            ]
+        else:
+            return [
+                (col0, col0, col0),
+                (col3, col3, col3)
+            ]
+
+
+    def write_makefile(self):
+        rom_extension = 'gb'
+        if self.supports_gbc():
+            rom_extension = 'gbc'
+
+        path = os.path.join(self.output_directory, 'Makefile')
+        f = open(path, 'w')
+
+        if len(self.image_dependencies):
+            f.write('IMAGE_DEPS = {}\n\n'.format(' '.join(self.image_dependencies)))
+
+        f.write('all: game.{}\n\n'.format(rom_extension))
+
+        f.write('%.2bpp: %.png\n')
+        f.write('\trgbgfx -o $@ $<\n\n')
+
+        f.write('%.1bpp: %.png\n')
+        f.write('\trgbgfx -d 1 -o $@ $<\n\n')
+
+        if len(self.image_dependencies):
+            f.write('game.o: game.asm bank_*.asm $(IMAGE_DEPS)\n')
+        else:
+            f.write('game.o: game.asm bank_*.asm\n')
+
+        parameters = []
+        if self.style['disable_halt_nops']:
+            parameters.append('-h')
+        if self.style['disable_auto_ldh']:
+            parameters.append('-L')
+        f.write('\trgbasm {} -o game.o game.asm\n\n'.format(' '.join(parameters)))
+
+        f.write('game.{}: game.o\n'.format(rom_extension))
+        f.write('\trgblink -n game.sym -m game.map -o $@ $<\n')
+        f.write('\trgbfix -v -p 255 $@\n\n')
+        f.write('\tmd5 $@\n\n')
+
+        f.write('clean:\n')
+        f.write('\trm -f game.o game.{} game.sym game.map\n'.format(rom_extension))
+        f.write('\tfind . \\( -iname \'*.1bpp\' -o -iname \'*.2bpp\' \\) -exec rm {} +')
+
+        f.close()
+
+
+
+app_name = 'mgbdis v{version} - Game Boy ROM disassembler by {author}.'.format(version=__version__, author=__author__)
+parser = argparse.ArgumentParser(description=app_name)
+parser.add_argument('rom_path', help='Game Boy (Color) ROM file to disassemble')
+parser.add_argument('--output-dir', default='disassembly', help='Directory to write the files into. Defaults to "disassembly"', action='store')
+parser.add_argument('--uppercase-hex', help='Print hexadecimal numbers using uppercase characters', action='store_true')
+parser.add_argument('--print-hex', help='Print the hexadecimal representation next to the opcodes', action='store_true')
+parser.add_argument('--align-operands', help='Format the instruction operands to align them vertically', action='store_true')
+parser.add_argument('--indent-spaces', help='Number of spaces to use to indent instructions', type=int, default=4)
+parser.add_argument('--indent-tabs', help='Use tabs for indenting instructions', action='store_true')
+parser.add_argument('--uppercase-db', help='Use uppercase for DB data declarations', action='store_true')
+parser.add_argument('--hli', help='Mnemonic to use for \'ld [hl+], a\' type instructions.', type=str, default='hl+', choices=['hl+', 'hli', 'ldi'])
+parser.add_argument('--ldh_a8', help='Mnemonic to use for \'ldh [a8], a\' type instructions.', type=str, default='ldh_a8', choices=['ldh_a8', 'ldh_ffa8', 'ld_ff00_a8'])
+parser.add_argument('--ld_c', help='Mnemonic to use for \'ld [c], a\' type instructions.', type=str, default='ld_c', choices=['ld_c', 'ldh_c', 'ld_ff00_c'])
+parser.add_argument('--disable-halt-nops', help='Disable RGBDS\'s automatic insertion of \'nop\' instructions after \'halt\' instructions.', action='store_true')
+parser.add_argument('--disable-auto-ldh', help='Disable RGBDS\'s automatic optimisation of \'ld [$ff00+a8], a\' to \'ldh [a8], a\' instructions. Requires RGBDS >= v0.3.7', action='store_true')
+parser.add_argument('--overwrite', help='Allow generating a disassembly into an already existing directory', action='store_true')
+parser.add_argument('--debug', help='Display debug output', action='store_true')
+args = parser.parse_args()
+
+debug = args.debug
+
+style = {
+    'uppercase_hex': args.uppercase_hex,
+    'print_hex': args.print_hex,
+    'indentation': '\t' if args.indent_tabs else ' ' * args.indent_spaces,
+    'operand_padding': 4 if args.align_operands else 0,
+    'db': 'DB' if args.uppercase_db else 'db',
+    'hli': args.hli,
+    'ldh_a8': args.ldh_a8,
+    'ld_c': args.ld_c,
+    'disable_halt_nops': args.disable_halt_nops,
+    'disable_auto_ldh': args.disable_auto_ldh,
+}
+instructions = apply_style_to_instructions(style, instructions)
+
+rom = ROM(args.rom_path, style)
+rom.disassemble(args.output_dir)