summaryrefslogtreecommitdiff
path: root/mgbdis/mgbdis.py
diff options
context:
space:
mode:
Diffstat (limited to 'mgbdis/mgbdis.py')
-rwxr-xr-xmgbdis/mgbdis.py1184
1 files changed, 1184 insertions, 0 deletions
diff --git a/mgbdis/mgbdis.py b/mgbdis/mgbdis.py
new file mode 100755
index 0000000..529126e
--- /dev/null
+++ b/mgbdis/mgbdis.py
@@ -0,0 +1,1184 @@
+#!/usr/bin/env python3
+
+"""Disassemble a Game Boy ROM into RGBDS compatible assembly code"""
+
+__author__ = 'Matt Currie and contributors'
+__credits__ = ['mattcurrie', 'kemenaran', 'bnzis']
+__version__ = '1.4'
+__copyright__ = 'Copyright 2018 by Matt Currie'
+__license__ = 'MIT'
+
+import argparse
+import glob
+import hashlib
+import os
+import png
+from shutil import copyfile
+
+from instruction_set import instructions, cb_instructions, instruction_variants
+
+default_symbols = [
+ '00:0000 RST_00',
+ '00:0000 .code:8',
+ '00:0008 RST_08',
+ '00:0008 .code:8',
+ '00:0010 RST_10',
+ '00:0010 .code:8',
+ '00:0018 RST_18',
+ '00:0018 .code:8',
+ '00:0020 RST_20',
+ '00:0020 .code:8',
+ '00:0028 RST_28',
+ '00:0028 .code:8',
+ '00:0030 RST_30',
+ '00:0030 .code:8',
+ '00:0038 RST_38',
+ '00:0038 .code:8',
+
+ '00:0040 VBlankInterrupt',
+ '00:0040 .code:8',
+ '00:0048 LCDCInterrupt',
+ '00:0048 .code:8',
+ '00:0050 TimerOverflowInterrupt',
+ '00:0050 .code:8',
+ '00:0058 SerialTransferCompleteInterrupt',
+ '00:0058 .code:8',
+ '00:0060 JoypadTransitionInterrupt',
+ '00:0060 .code:8',
+
+ '00:0100 Boot',
+ '00:0100 .code:4',
+ '00:0104 HeaderLogo',
+ '00:0104 .data:30',
+ '00:0134 HeaderTitle',
+ '00:0134 .text:10',
+ '00:0144 .data:c',
+ '00:0144 HeaderNewLicenseeCode',
+ '00:0146 HeaderSGBFlag',
+ '00:0147 HeaderCartridgeType',
+ '00:0148 HeaderROMSize',
+ '00:0149 HeaderRAMSize',
+ '00:014a HeaderDestinationCode',
+ '00:014b HeaderOldLicenseeCode',
+ '00:014c HeaderMaskROMVersion',
+ '00:014d HeaderComplementCheck',
+ '00:014e HeaderGlobalChecksum',
+]
+
+gbc_symbols = [
+ '00:0134 .text:b',
+ '00:013f HeaderManufacturerCode',
+ '00:013f .text:4',
+ '00:0143 HeaderCGBFlag',
+ '00:0143 .data:1'
+]
+
+hardware_labels = {
+ 0xFF00: 'rP1',
+ 0xFF01: 'rSB',
+ 0xFF02: 'rSC',
+ 0xFF04: 'rDIV',
+ 0xFF05: 'rTIMA',
+ 0xFF06: 'rTMA',
+ 0xFF07: 'rTAC',
+ 0xFF0F: 'rIF',
+ 0xFF40: 'rLCDC',
+ 0xFF41: 'rSTAT',
+ 0xFF42: 'rSCY',
+ 0xFF43: 'rSCX',
+ 0xFF44: 'rLY',
+ 0xFF45: 'rLYC',
+ 0xFF46: 'rDMA',
+ 0xFF47: 'rBGP',
+ 0xFF48: 'rOBP0',
+ 0xFF49: 'rOBP1',
+ 0xFF4A: 'rWY',
+ 0xFF4B: 'rWX',
+ 0xFF4D: 'rKEY1',
+ 0xFF4F: 'rVBK',
+ 0xFF51: 'rHDMA1',
+ 0xFF52: 'rHDMA2',
+ 0xFF53: 'rHDMA3',
+ 0xFF54: 'rHDMA4',
+ 0xFF55: 'rHDMA5',
+ 0xFF56: 'rRP',
+ 0xFF68: 'rBCPS',
+ 0xFF69: 'rBCPD',
+ 0xFF6A: 'rOCPS',
+ 0xFF6B: 'rOCPD',
+ 0xFF70: 'rSVBK',
+ 0xFFFF: 'rIE',
+ 0xFF24: 'rNR50',
+ 0xFF25: 'rNR51',
+ 0xFF26: 'rNR52',
+ 0xFF10: 'rNR10',
+ 0xFF11: 'rNR11',
+ 0xFF12: 'rNR12',
+ 0xFF13: 'rNR13',
+ 0xFF14: 'rNR14',
+ 0xFF16: 'rNR21',
+ 0xFF17: 'rNR22',
+ 0xFF18: 'rNR23',
+ 0xFF19: 'rNR24',
+ 0xFF1A: 'rNR30',
+ 0xFF1B: 'rNR31',
+ 0xFF1C: 'rNR32',
+ 0xFF1D: 'rNR33',
+ 0xFF1E: 'rNR34',
+ 0xFF20: 'rNR41',
+ 0xFF21: 'rNR42',
+ 0xFF22: 'rNR43',
+ 0xFF23: 'rNR44',
+ 0xFF76: 'rPCM12',
+ 0xFF77: 'rPCM34',
+}
+
+ldh_a8_formatters = {
+ 'ldh_a8': lambda value: '[{0}]'.format(hex_byte(value)),
+ 'ld_ff00_a8': lambda value: '[{0}+{1}]'.format(hex_word(0xff00), hex_byte(value)),
+ 'ldh_ffa8': lambda value: '[{0}]'.format(hex_word(0xff00 + value)),
+}
+
+def abort(message):
+ print(message)
+ os._exit(1)
+
+
+def hex_word(value):
+ return format_hex('${:04x}'.format(value))
+
+
+def hex_byte(value):
+ return format_hex('${:02x}'.format(value))
+
+
+def format_hex(hex_string):
+ if style['uppercase_hex']:
+ return hex_string.upper()
+ else:
+ return hex_string.lower()
+
+def bytes_to_string(data):
+ return ' '.join(hex_byte(byte) for byte in data)
+
+
+def rom_address_to_mem_address(address):
+ if address < 0x4000:
+ return address
+ else:
+ return ((address % 0x4000) + 0x4000)
+
+
+def to_signed(value):
+ if value > 127:
+ return (256 - value) * -1
+ return value
+
+def apply_style_to_instructions(style, instructions):
+ # set undefined opcodes to use db/DB
+ for opcode, instruction in instructions.items():
+ if instruction.startswith('db '):
+ instructions[opcode] = style['db'] + ' ' + hex_byte(opcode)
+
+ # set instruction variants
+ for variant_name, variants in instruction_variants.items():
+ for opcode, instruction in variants[style[variant_name]].items():
+ instructions[opcode] = instruction
+
+ return instructions
+
+
+class Bank:
+
+ def __init__(self, number, symbols, style):
+ self.style = style
+ self.bank_number = number
+ self.blocks = dict()
+ self.disassembled_addresses = set()
+ self.symbols = symbols
+
+ if number == 0:
+ self.memory_base_address = 0
+ self.rom_base_address = 0
+ else:
+ self.memory_base_address = 0x4000
+ self.rom_base_address = (number - 1) * 0x4000
+
+ self.target_addresses = dict({
+ 'call': set(),
+ 'jp': set(),
+ 'jr': set()
+ })
+
+ self.instruction_label_prefixes = dict({
+ 'call': 'Call',
+ 'jp': 'Jump',
+ 'jr': 'jr'
+ })
+
+ self.disassemble_block_range = dict({
+ 'code': self.process_code_in_range,
+ 'data': self.process_data_in_range,
+ 'text': self.process_text_in_range,
+ 'image': self.process_image_in_range
+ })
+
+
+ def add_target_address(self, instruction_name, address):
+ if address not in self.target_addresses[instruction_name]:
+ self.target_addresses[instruction_name].add(address)
+
+
+ def resolve_blocks(self):
+ blocks = self.symbols.get_blocks(self.bank_number)
+ block_start_addresses = sorted(blocks.keys())
+ resolved_blocks = dict()
+
+ for index in range(len(block_start_addresses)):
+
+ start_address = block_start_addresses[index]
+ block = blocks[start_address]
+ end_address = start_address + block['length']
+
+ # check if there is another block after this block
+ next_start_address = None
+ if index < len(block_start_addresses) - 1:
+ next_start_address = block_start_addresses[index + 1]
+
+ # if the next block starts before this one finishes, then adjust end address
+ if next_start_address < end_address:
+ end_address = next_start_address
+
+ resolved_blocks[start_address] = {
+ 'type': block['type'],
+ 'length': end_address - start_address,
+ 'arguments': block['arguments'],
+ }
+
+ if next_start_address is None and (end_address != self.memory_base_address + 0x4000):
+ # no more blocks and didn't finish at the end of the block, so finish up with a code block
+ resolved_blocks[end_address] = {
+ 'type': 'code',
+ 'length': (self.memory_base_address + 0x4000) - end_address,
+ 'arguments': None
+ }
+
+ if next_start_address is not None and end_address < next_start_address:
+ # we have another block, but there is a gap until the next block, so fill in the gap with a code block
+ resolved_blocks[end_address] = {
+ 'type': 'code',
+ 'length': next_start_address - end_address,
+ 'arguments': None
+ }
+
+ self.blocks = resolved_blocks
+
+ def get_label_for_instruction_operand(self, value):
+ # an operand value lower than $100 is more probably an actual value than an address:
+ # don't lookup symbols for it
+ if value <= 0x100:
+ return None
+
+ return self.symbols.get_label(self.bank_number, value)
+
+ def get_label_for_jump_target(self, instruction_name, address):
+ if self.bank_number == 0:
+ if address not in self.disassembled_addresses:
+ return None
+ else:
+ # TODO: if target address is in bank 0 then should check if that address
+ # has been disassembled in bank 0. requires access to bank 0 from
+ # other bank objects
+
+ is_in_switchable_bank = 0x4000 <= address < 0x8000
+ if is_in_switchable_bank and address not in self.disassembled_addresses:
+ return None
+
+ label = self.symbols.get_label(self.bank_number, address)
+ if label is not None:
+ # if the address has a specific label then just use that
+ return label
+
+ if address in self.target_addresses[instruction_name]:
+ return self.format_label(instruction_name, address)
+
+ return None
+
+
+ def get_labels_for_non_code_address(self, address):
+ labels = list()
+
+ label = self.symbols.get_label(self.bank_number, address)
+ if label is not None:
+ is_local = label.startswith('.')
+ if is_local:
+ labels.append(label + ':')
+ else:
+ labels.append(label + '::')
+
+ return labels
+
+
+ def get_labels_for_address(self, address):
+ labels = list()
+
+ label = self.symbols.get_label(self.bank_number, address)
+ if label is not None:
+ # if the address has a specific label then just use that
+ is_local = label.startswith('.')
+ if is_local:
+ labels.append(label + ':')
+ else:
+ labels.append(label + '::')
+ else:
+ # otherwise, if the address was marked as a target address, generate a label
+ for instruction_name in ['call', 'jp', 'jr']:
+ if address in self.target_addresses[instruction_name]:
+ labels.append(self.format_label(instruction_name, address) + ':')
+
+ return labels
+
+
+ def format_label(self, instruction_name, address):
+ formatted_bank = format_hex('{:03x}'.format(self.bank_number))
+ formatted_address = format_hex('{:04x}'.format(address))
+ return '{0}_{1}_{2}'.format(self.instruction_label_prefixes[instruction_name], formatted_bank, formatted_address)
+
+
+ def format_image_label(self, address):
+ return 'image_{0:03x}_{1:04x}'.format(self.bank_number, address)
+
+
+ def format_instruction(self, instruction_name, operands, address = None, source_bytes = None):
+ instruction = '{indentation}{instruction_name:<{operand_padding}} {operands}'.format(
+ indentation=self.style['indentation'],
+ instruction_name=instruction_name,
+ operand_padding=self.style['operand_padding'],
+ operands=', '.join(operands)
+ )
+
+ if self.style['print_hex'] and address is not None and source_bytes is not None:
+ return '{0:<50}; {1}: {2}'.format(instruction, hex_word(address), bytes_to_string(source_bytes))
+ else:
+ return '{0}'.format(instruction.rstrip())
+
+
+ def format_data(self, data):
+ return self.format_instruction(self.style['db'], data)
+
+
+ def append_output(self, text):
+ self.output.append(text)
+
+
+ def append_labels_to_output(self, labels):
+ self.append_empty_line_if_none_already()
+ self.append_output('\n'.join(labels))
+
+
+ def append_empty_line_if_none_already(self):
+ if len(self.output) > 0 and self.output[len(self.output) - 1] != '':
+ self.append_output('')
+
+
+ def disassemble(self, rom, first_pass = False):
+ self.first_pass = first_pass
+
+ if first_pass:
+ self.resolve_blocks()
+
+ self.output = list()
+
+ if self.bank_number == 0:
+ self.append_output('SECTION "ROM Bank ${0:03x}", ROM0[$0]'.format(self.bank_number))
+ else:
+ self.append_output('SECTION "ROM Bank ${0:03x}", ROMX[$4000], BANK[${0:x}]'.format(self.bank_number))
+ self.append_output('')
+
+ block_start_addresses = sorted(self.blocks.keys())
+
+ for index in range(len(block_start_addresses)):
+ start_address = block_start_addresses[index]
+ block = self.blocks[start_address]
+ end_address = start_address + block['length']
+ self.disassemble_block_range[block['type']](rom, self.rom_base_address + start_address, self.rom_base_address + end_address, block['arguments'])
+ self.append_empty_line_if_none_already()
+
+ return '\n'.join(self.output)
+
+
+ def process_code_in_range(self, rom, start_address, end_address, arguments = None):
+ if not self.first_pass and debug:
+ print('Disassembling code in range: {} - {}'.format(hex_word(start_address), hex_word(end_address)))
+
+ self.pc = start_address
+ while self.pc < end_address:
+ instruction = self.disassemble_at_pc(rom, end_address)
+
+
+ def disassemble_at_pc(self, rom, end_address):
+ pc = self.pc
+ pc_mem_address = rom_address_to_mem_address(pc)
+ length = 1
+ opcode = rom.data[pc]
+ comment = None
+ operands = None
+ operand_values = list()
+
+ if opcode not in instructions:
+ abort('Unhandled opcode: {} at {}'.format(hex_byte(opcode), hex_word(pc)))
+
+ if opcode == 0xCB:
+ cb_opcode = rom.data[pc + 1]
+ length += 1
+
+ instruction_name = rom.cb_instruction_name[cb_opcode]
+ operands = rom.cb_instruction_operands[cb_opcode]
+ else:
+ instruction_name = rom.instruction_names[opcode]
+ operands = rom.instruction_operands[opcode]
+
+ if instruction_name == 'stop' or (instruction_name == 'halt' and not self.style['disable_halt_nops']):
+ if rom.data[pc + 1] == 0x00:
+ # rgbds adds a nop instruction after a stop/halt, so if that instruction
+ # exists then we can insert it as a stop/halt command with length 2
+ length += 1
+ else:
+ # otherwise handle it as a data byte
+ instruction_name = self.style['db']
+ operands = [hex_byte(opcode)]
+
+
+ # figure out the operand values for each operand
+ for operand in operands:
+ value = None
+
+ if operand == 'a16':
+ length += 2
+ value = rom.data[pc + 1] + rom.data[pc + 2] * 256
+ operand_values.append(hex_word(value))
+
+ elif operand == '[a16]':
+ length += 2
+ value = rom.data[pc + 1] + rom.data[pc + 2] * 256
+ label = self.get_label_for_instruction_operand(value)
+ if label:
+ operand_values.append('[' + label + ']')
+ else:
+ operand_values.append('[' + hex_word(value) + ']')
+
+ # rgbds converts "ld [$ff40],a" into "ld [$ff00+40],a" automatically,
+ # so use a macro to encode it as data to ensure exact binary reproduction of the rom
+ if not self.style['disable_auto_ldh']:
+ if value >= 0xff00 and (opcode == 0xea or opcode == 0xfa):
+ rom.has_ld_long = True
+
+ # use ld_long macro
+ instruction_name = 'ld_long'
+
+ # cannot wrap the address value with square brackets
+ operand_values.pop()
+ operand_values.append(hex_word(value))
+
+ elif operand == '[$ff00+a8]' or operand == '[a8]' or operand == '[$ffa8]':
+ length += 1
+ value = rom.data[pc + 1]
+ full_value = 0xff00 + value
+ label = self.get_label_for_instruction_operand(full_value)
+ if label is not None:
+ # when referencing a label, we need to explicitely tell rgbds to use the short load opcode
+ instruction_name = 'ldh'
+ operand_values.append('[{}]'.format(label))
+ elif full_value in hardware_labels:
+ operand_values.append('[{}]'.format(hardware_labels[full_value]))
+ else:
+ # use one of the ldh_a8_formatters formatters
+ operand_values.append(ldh_a8_formatters[self.style['ldh_a8']](value))
+
+ elif operand == 'd8':
+ length += 1
+ value = rom.data[pc + 1]
+ operand_values.append(hex_byte(value))
+
+ elif operand == 'd16':
+ length += 2
+ value = rom.data[pc + 1] + rom.data[pc + 2] * 256
+ label = self.get_label_for_instruction_operand(value)
+ if label is not None:
+ operand_values.append(label)
+ else:
+ operand_values.append(hex_word(value))
+
+ elif operand == 'r8':
+ length += 1
+ value = to_signed(rom.data[pc + 1])
+ if value < 0:
+ operand_values.append('-' + hex_byte(abs(value)))
+ else:
+ operand_values.append(hex_byte(value))
+
+ elif operand == 'pc+r8':
+ length += 1
+ value = to_signed(rom.data[pc + 1])
+
+ # calculate the absolute address for the jump
+ value = pc + 2 + value
+
+ relative_value = value - pc
+ if relative_value >= 0:
+ operand_values.append('@+' + hex_byte(relative_value))
+ else:
+ operand_values.append('@-' + hex_byte(relative_value * -1))
+
+ target_bank = value // 0x4000
+
+ # convert to banked value so it can be used as a label
+ value = rom_address_to_mem_address(value)
+
+ if self.bank_number != target_bank:
+ # don't use labels for relative jumps across banks
+ value = None
+
+ if target_bank < self.bank_number:
+ # output as data, otherwise RGBDS will complain
+ instruction_name = self.style['db']
+ operand_values = [hex_byte(opcode), hex_byte(rom.data[pc + 1])]
+
+ # exit the loop to avoid processing the operands any further
+ break
+
+ elif operand == 'sp+r8':
+ length += 1
+ value = to_signed(rom.data[pc + 1])
+
+ if value < 0:
+ operand_values.append('sp-' + hex_byte(abs(value)))
+ else:
+ operand_values.append('sp+' + hex_byte(value))
+
+ elif operand == '[$ff00+c]':
+ operand_values.append('[{0}+c]'.format(hex_word(0xff00)))
+
+ elif type(operand) is str:
+ operand_values.append(operand)
+
+ else:
+ operand_values.append(hex_byte(operand))
+
+
+ if instruction_name in ['jr', 'jp', 'call'] and value is not None and value < 0x8000:
+ mem_address = rom_address_to_mem_address(value)
+
+ if self.first_pass:
+ # dont allow switched banks to create labels in bank 0
+ is_address_in_current_bank = (mem_address < 0x4000 and self.bank_number == 0) or (mem_address >= 0x4000 and self.bank_number > 0)
+ if is_address_in_current_bank:
+ # add the label
+ self.add_target_address(instruction_name, mem_address)
+ else:
+ # fetch the label name
+ label = self.get_label_for_jump_target(instruction_name, mem_address)
+ if label is not None:
+ # remove the address from operand values and use the label instead
+ operand_values.pop()
+ operand_values.append(label)
+
+
+ # check the instruction is not spanning 2 banks
+ if pc + length - 1 >= end_address:
+ # must handle it as data
+ length = 1
+ instruction_name = self.style['db']
+ operand_values = [hex_byte(opcode)]
+
+ self.pc += length
+
+ if self.first_pass:
+ self.disassembled_addresses.add(pc_mem_address)
+ else:
+ labels = self.get_labels_for_address(pc_mem_address)
+ if len(labels):
+ self.append_labels_to_output(labels)
+
+ if comment is not None:
+ self.append_output(comment)
+
+ instruction_bytes = rom.data[pc:pc + length]
+ self.append_output(self.format_instruction(instruction_name, operand_values, pc_mem_address, instruction_bytes))
+
+ # add some empty lines after returns and jumps to break up the code blocks
+ if instruction_name in ['ret', 'reti', 'jr', 'jp']:
+ if (
+ instruction_name == 'jr' or
+ (instruction_name == 'jp' and len(operand_values) > 1) or
+ (instruction_name == 'ret' and len(operand_values) > 0)
+ ):
+ # conditional or jr
+ self.append_output('')
+ else:
+ # always executes
+ self.append_output('')
+ self.append_output('')
+
+
+ def process_data_in_range(self, rom, start_address, end_address, arguments = None):
+ if not self.first_pass and debug:
+ print('Outputting data in range: {} - {}'.format(hex_word(start_address), hex_word(end_address)))
+
+ values = list()
+
+ for address in range(start_address, end_address):
+ mem_address = rom_address_to_mem_address(address)
+
+ labels = self.get_labels_for_non_code_address(mem_address)
+ if len(labels):
+ # add any existing values to the output and reset the list
+ if len(values) > 0:
+ self.append_output(self.format_data(values))
+ values = list()
+
+ self.append_labels_to_output(labels)
+
+ values.append(hex_byte(rom.data[address]))
+
+ # output max of 16 bytes per line, and ensure any remaining values are output
+ if len(values) == 16 or (address == end_address - 1 and len(values)):
+ self.append_output(self.format_data(values))
+ values = list()
+
+
+ def process_text_in_range(self, rom, start_address, end_address, arguments = None):
+ if not self.first_pass and debug:
+ print('Outputting text in range: {} - {}'.format(hex_word(start_address), hex_word(end_address)))
+
+ values = list()
+ text = ''
+
+ for address in range(start_address, end_address):
+ mem_address = rom_address_to_mem_address(address)
+
+ labels = self.get_labels_for_non_code_address(mem_address)
+ if len(labels):
+ # add any existing values to the output and reset the list
+ if len(text):
+ values.append('"{}"'.format(text))
+ text = ''
+
+ if len(values):
+ self.append_output(self.format_data(values))
+ values = list()
+
+ self.append_labels_to_output(labels)
+
+ byte = rom.data[address]
+ if byte >= 0x20 and byte < 0x7F:
+ text += chr(byte)
+ else:
+ if len(text):
+ values.append('"{}"'.format(text))
+ text = ''
+ values.append(hex_byte(byte))
+
+ if len(text):
+ values.append('"{}"'.format(text))
+
+ if len(values):
+ self.append_output(self.format_data(values))
+
+ def process_image_in_range(self, rom, start_address, end_address, arguments = None):
+ if not self.first_pass and debug:
+ print('Outputting image in range: {} - {}'.format(hex_word(start_address), hex_word(end_address)))
+
+ if self.first_pass:
+ return
+
+ mem_address = rom_address_to_mem_address(start_address)
+ labels = self.get_labels_for_non_code_address(mem_address)
+ if len(labels):
+ self.append_labels_to_output(labels)
+ basename = labels[0].rstrip(':')
+ else:
+ basename = self.format_image_label(mem_address)
+
+ full_filename = rom.write_image(basename, arguments, rom.data[start_address:end_address])
+ self.append_output(self.format_instruction('INCBIN', ['\"' + full_filename + '\"']))
+
+
+
+
+class Symbols:
+ def __init__(self):
+ self.symbols = dict()
+ self.blocks = dict()
+
+ def load_sym_file(self, symbols_path):
+ f = open(symbols_path, 'r')
+
+ for line in f:
+ # ignore comments and empty lines
+ if line[0] != ';' and len(line.strip()):
+ self.add_symbol_definition(line)
+
+ f.close()
+
+
+ def add_symbol_definition(self, symbol_def):
+ try:
+ location, label = symbol_def.split()
+ bank, address = location.split(':')
+ bank = int(bank, 16)
+ address = int(address, 16)
+ except:
+ print("Ignored invalid symbol definition: {}\n".format(symbol_def))
+ else:
+ label_parts = label.split(':')
+ is_block_definition = label[0] == '.' and len(label_parts) >= 2
+
+ if is_block_definition:
+ # add a block
+ block_type = label_parts[0].lower()
+ data_length = int(label_parts[1], 16)
+
+ if block_type in ['.byt', '.data']:
+ block_type = 'data'
+
+ elif block_type in ['.asc', '.text']:
+ block_type = 'text'
+
+ elif block_type in ['.code']:
+ block_type = 'code'
+
+ elif block_type in ['.image']:
+ block_type = 'image'
+
+ else:
+ return
+
+ if len(label_parts) == 3:
+ arguments = label_parts[2]
+ else:
+ arguments = None
+
+ self.add_block(bank, address, block_type, data_length, arguments)
+
+ else:
+ # add the label
+ self.add_label(bank, address, label)
+
+ def add_block(self, bank, address, block_type, length, arguments = None):
+ memory_base_address = 0x0000 if bank == 0 else 0x4000
+
+ if address >= memory_base_address:
+ blocks = self.get_blocks(bank)
+ blocks[address] = {
+ 'type': block_type,
+ 'length': length,
+ 'arguments': arguments
+ }
+
+ def add_label(self, bank, address, label):
+ if bank not in self.symbols:
+ self.symbols[bank] = dict()
+
+ is_symbol_banked = 0x4000 <= address < 0x8000
+ if is_symbol_banked:
+ self.symbols[bank][address] = label
+ else:
+ self.symbols[0][address] = label
+
+ def get_label(self, bank, address):
+ # attempt to find a banked symbol
+ is_symbol_banked = 0x4000 <= address < 0x8000
+ if is_symbol_banked and bank in self.symbols and address in self.symbols[bank]:
+ return self.symbols[bank][address]
+
+ # attempt to find a symbol in non-banked space (stored as bank 0)
+ if 0 in self.symbols and address in self.symbols[0]:
+ return self.symbols[0][address]
+
+ return None
+
+ def get_blocks(self, bank):
+ memory_base_address = 0x0000 if bank == 0 else 0x4000
+
+ if bank not in self.blocks:
+ self.blocks[bank] = dict()
+ # each bank defaults to having a single code block
+ self.add_block(bank, memory_base_address, 'code', 0x4000)
+
+ return self.blocks[bank]
+
+class ROM:
+
+ def __init__(self, rom_path, style):
+ self.style = style
+ self.script_dir = os.path.dirname(os.path.realpath(__file__))
+ self.rom_path = rom_path
+ self.load()
+ self.split_instructions()
+ self.has_ld_long = False
+
+ self.image_output_directory = 'gfx'
+ self.image_dependencies = []
+
+ print('ROM MD5 hash:', hashlib.md5(self.data).hexdigest())
+
+ self.symbols = self.load_symbols()
+
+ # add some bytes to avoid an index out of range error
+ # when processing last few instructions in the rom
+ self.data += b'\x00\x00'
+
+ self.banks = dict()
+ for bank in range(0, self.num_banks):
+ self.banks[bank] = Bank(bank, self.symbols, style)
+
+ def load(self):
+ if os.path.isfile(self.rom_path):
+ print('Loading "{}"...'.format(self.rom_path))
+ self.data = open(self.rom_path, 'rb').read()
+ self.rom_size = len(self.data)
+ self.num_banks = self.rom_size // 0x4000
+ else:
+ abort('"{}" not found'.format(self.rom_path))
+
+
+ def split_instructions(self):
+ # split the instructions and operands
+ self.instruction_names = dict()
+ self.instruction_operands = dict()
+ self.cb_instruction_name = dict()
+ self.cb_instruction_operands = dict()
+
+ for opcode in instructions:
+ instruction_parts = instructions[opcode].split()
+ self.instruction_names[opcode] = instruction_parts[0]
+ if len(instruction_parts) > 1:
+ self.instruction_operands[opcode] = instruction_parts[1].split(',')
+ else:
+ self.instruction_operands[opcode] = list()
+
+ for cb_opcode in cb_instructions:
+ instruction_parts = cb_instructions[cb_opcode].split()
+ self.cb_instruction_name[cb_opcode] = instruction_parts[0]
+ if len(instruction_parts) > 1:
+ self.cb_instruction_operands[cb_opcode] = instruction_parts[1].split(',')
+ else:
+ self.cb_instruction_operands[cb_opcode] = list()
+
+
+ def load_symbols(self):
+ symbols = Symbols()
+
+ for symbol_def in default_symbols:
+ symbols.add_symbol_definition(symbol_def)
+
+ if self.supports_gbc():
+ for symbol_def in gbc_symbols:
+ symbols.add_symbol_definition(symbol_def)
+
+ symbols_path = os.path.splitext(self.rom_path)[0] + '.sym'
+ if os.path.isfile(symbols_path):
+ print('Processing symbol file "{}"...'.format(symbols_path))
+ symbols.load_sym_file(symbols_path)
+
+ return symbols
+
+
+ def supports_gbc(self):
+ return ((self.data[0x143] & 0x80) == 0x80)
+
+
+ def disassemble(self, output_dir):
+
+ self.output_directory = os.path.abspath(output_dir.rstrip(os.sep))
+
+ if os.path.exists(self.output_directory):
+ if not args.overwrite:
+ abort('Output directory "{}" already exists!'.format(self.output_directory))
+
+ if not os.path.isdir(self.output_directory):
+ abort('Output path "{}" already exists and is not a directory!'.format(self.output_directory))
+ else:
+ os.makedirs(self.output_directory)
+
+
+ print('Generating labels...')
+ self.generate_labels()
+
+ self.image_dependencies = []
+
+ print('Generating disassembly', end='')
+ if debug:
+ print('')
+
+ for bank in range(0, self.num_banks):
+ self.write_bank_asm(bank)
+
+ self.copy_hardware_inc()
+ self.write_game_asm()
+ self.write_makefile()
+
+ print('\nDisassembly generated in "{}"'.format(self.output_directory))
+
+
+ def generate_labels(self):
+ for bank in range(0, self.num_banks):
+ self.banks[bank].disassemble(rom, True)
+
+
+ def write_bank_asm(self, bank):
+ if not debug:
+ # progress indicator
+ print('.', end='', flush=True)
+
+ path = os.path.join(self.output_directory, 'bank_{0:03x}.asm'.format(bank))
+ f = open(path, 'w')
+
+ self.write_header(f)
+ f.write(self.banks[bank].disassemble(rom))
+
+ f.close()
+
+
+ def write_header(self, f):
+ f.write('; Disassembly of "{}"\n'.format(os.path.basename(self.rom_path)))
+ f.write('; This file was created with:\n')
+ f.write('; {}\n'.format(app_name))
+ f.write('; https://github.com/mattcurrie/mgbdis\n\n')
+
+
+ def copy_hardware_inc(self):
+ src = os.path.join(self.script_dir, 'hardware.inc')
+ dest = os.path.join(self.output_directory, 'hardware.inc')
+ copyfile(src, dest)
+
+
+ def write_game_asm(self):
+ path = os.path.join(self.output_directory, 'game.asm')
+ f = open(path, 'w')
+
+ self.write_header(f)
+
+ if self.has_ld_long:
+
+ f.write(
+"""ld_long: MACRO
+ IF STRLWR("\\1") == "a"
+ ; ld a, [$ff40]
+ db $FA
+ dw \\2
+ ELSE
+ IF STRLWR("\\2") == "a"
+ ; ld [$ff40], a
+ db $EA
+ dw \\1
+ ENDC
+ ENDC
+ENDM
+
+""")
+
+ f.write('INCLUDE "hardware.inc"')
+ for bank in range(0, self.num_banks):
+ f.write('\nINCLUDE "bank_{0:03x}.asm"'.format(bank))
+ f.close()
+
+
+ def write_image(self, basename, arguments, data):
+
+ # defaults
+ width = 128
+ palette = 0xe4
+ bpp = 2
+
+ # process arguments
+ if arguments is not None:
+ for argument in arguments.split(','):
+ if len(argument) > 1:
+ if argument[0] == 'w':
+ # width is in decimal
+ width = int(argument[1:], 10)
+
+ elif argument[0] == 'p':
+ palette = int(argument[1:], 16)
+
+ elif argument == '1bpp':
+ bpp = 1
+
+ image_output_path = os.path.join(self.output_directory, self.image_output_directory)
+ if os.path.exists(image_output_path):
+ if not os.path.isdir(image_output_path):
+ abort('File already exists named "{}". Cannot store images!'.format(image_output_path))
+ else:
+ os.makedirs(image_output_path)
+
+ relative_path = os.path.join(self.image_output_directory, basename + '.' + "{}bpp".format(bpp))
+ self.image_dependencies.append(relative_path)
+ path = os.path.join(self.output_directory, self.image_output_directory, basename + '.png')
+
+ bytes_per_tile_row = bpp # 8 pixels at 1 or 2 bits per pixel
+ bytes_per_tile = bytes_per_tile_row * 8 # 8 rows per tile
+
+ num_tiles = len(data) // bytes_per_tile
+ tiles_per_row = width // 8
+
+ # if we have fewer tiles than the number of tiles per row, or if an odd number of tiles
+ if (num_tiles < tiles_per_row) or (num_tiles & 1):
+ # then just make a single row of tiles
+ tiles_per_row = num_tiles
+ width = num_tiles * 8
+
+ tile_rows = (num_tiles / tiles_per_row)
+ if not tile_rows.is_integer():
+ abort('Invalid length ${:0x} or width {} for image block: {}'.format(len(data), width, basename))
+
+ height = int(tile_rows) * 8
+
+ pixel_data = self.convert_to_pixel_data(data, width, height, bpp)
+ rgb_palette = self.convert_palette_to_rgb(palette, bpp)
+
+ f = open(path, 'wb')
+ w = png.Writer(width, height, alpha=False, bitdepth=2, palette=rgb_palette)
+ w.write(f, pixel_data)
+ f.close()
+
+ return relative_path
+
+
+ def convert_to_pixel_data(self, data, width, height, bpp):
+ result = []
+ for y in range(0, height):
+ row = []
+ for x in range(0, width):
+ offset = self.coordinate_to_tile_offset(x, y, width, bpp)
+
+ if offset < len(data):
+ # extract the color from the one or two bytes of tile data at the offset
+ shift = (7 - (x & 7))
+ mask = (1 << shift)
+ if bpp == 2:
+ color = ((data[offset] & mask) >> shift) + (((data[offset + 1] & mask) >> shift) << 1)
+ else:
+ color = ((data[offset] & mask) >> shift)
+ else:
+ color = 0
+
+ row.append(color)
+ result.append(row)
+
+ return result
+
+
+ def coordinate_to_tile_offset(self, x, y, width, bpp):
+ bytes_per_tile_row = bpp # 8 pixels at 1 or 2 bits per pixel
+ bytes_per_tile = bytes_per_tile_row * 8 # 8 rows per tile
+ tiles_per_row = width // 8
+
+ tile_y = y // 8
+ tile_x = x // 8
+ row_of_tile = y & 7
+
+ return (tile_y * tiles_per_row * bytes_per_tile) + (tile_x * bytes_per_tile) + (row_of_tile * bytes_per_tile_row)
+
+
+ def convert_palette_to_rgb(self, palette, bpp):
+ col0 = 255 - (((palette & 0x03) ) << 6)
+ col1 = 255 - (((palette & 0x0C) >> 2) << 6)
+ col2 = 255 - (((palette & 0x30) >> 4) << 6)
+ col3 = 255 - (((palette & 0xC0) >> 6) << 6)
+ if bpp == 2:
+ return [
+ (col0, col0, col0),
+ (col1, col1, col1),
+ (col2, col2, col2),
+ (col3, col3, col3)
+ ]
+ else:
+ return [
+ (col0, col0, col0),
+ (col3, col3, col3)
+ ]
+
+
+ def write_makefile(self):
+ rom_extension = 'gb'
+ if self.supports_gbc():
+ rom_extension = 'gbc'
+
+ path = os.path.join(self.output_directory, 'Makefile')
+ f = open(path, 'w')
+
+ if len(self.image_dependencies):
+ f.write('IMAGE_DEPS = {}\n\n'.format(' '.join(self.image_dependencies)))
+
+ f.write('all: game.{}\n\n'.format(rom_extension))
+
+ f.write('%.2bpp: %.png\n')
+ f.write('\trgbgfx -o $@ $<\n\n')
+
+ f.write('%.1bpp: %.png\n')
+ f.write('\trgbgfx -d 1 -o $@ $<\n\n')
+
+ if len(self.image_dependencies):
+ f.write('game.o: game.asm bank_*.asm $(IMAGE_DEPS)\n')
+ else:
+ f.write('game.o: game.asm bank_*.asm\n')
+
+ parameters = []
+ if self.style['disable_halt_nops']:
+ parameters.append('-h')
+ if self.style['disable_auto_ldh']:
+ parameters.append('-L')
+ f.write('\trgbasm {} -o game.o game.asm\n\n'.format(' '.join(parameters)))
+
+ f.write('game.{}: game.o\n'.format(rom_extension))
+ f.write('\trgblink -n game.sym -m game.map -o $@ $<\n')
+ f.write('\trgbfix -v -p 255 $@\n\n')
+ f.write('\tmd5 $@\n\n')
+
+ f.write('clean:\n')
+ f.write('\trm -f game.o game.{} game.sym game.map\n'.format(rom_extension))
+ f.write('\tfind . \\( -iname \'*.1bpp\' -o -iname \'*.2bpp\' \\) -exec rm {} +')
+
+ f.close()
+
+
+
+app_name = 'mgbdis v{version} - Game Boy ROM disassembler by {author}.'.format(version=__version__, author=__author__)
+parser = argparse.ArgumentParser(description=app_name)
+parser.add_argument('rom_path', help='Game Boy (Color) ROM file to disassemble')
+parser.add_argument('--output-dir', default='disassembly', help='Directory to write the files into. Defaults to "disassembly"', action='store')
+parser.add_argument('--uppercase-hex', help='Print hexadecimal numbers using uppercase characters', action='store_true')
+parser.add_argument('--print-hex', help='Print the hexadecimal representation next to the opcodes', action='store_true')
+parser.add_argument('--align-operands', help='Format the instruction operands to align them vertically', action='store_true')
+parser.add_argument('--indent-spaces', help='Number of spaces to use to indent instructions', type=int, default=4)
+parser.add_argument('--indent-tabs', help='Use tabs for indenting instructions', action='store_true')
+parser.add_argument('--uppercase-db', help='Use uppercase for DB data declarations', action='store_true')
+parser.add_argument('--hli', help='Mnemonic to use for \'ld [hl+], a\' type instructions.', type=str, default='hl+', choices=['hl+', 'hli', 'ldi'])
+parser.add_argument('--ldh_a8', help='Mnemonic to use for \'ldh [a8], a\' type instructions.', type=str, default='ldh_a8', choices=['ldh_a8', 'ldh_ffa8', 'ld_ff00_a8'])
+parser.add_argument('--ld_c', help='Mnemonic to use for \'ld [c], a\' type instructions.', type=str, default='ld_c', choices=['ld_c', 'ldh_c', 'ld_ff00_c'])
+parser.add_argument('--disable-halt-nops', help='Disable RGBDS\'s automatic insertion of \'nop\' instructions after \'halt\' instructions.', action='store_true')
+parser.add_argument('--disable-auto-ldh', help='Disable RGBDS\'s automatic optimisation of \'ld [$ff00+a8], a\' to \'ldh [a8], a\' instructions. Requires RGBDS >= v0.3.7', action='store_true')
+parser.add_argument('--overwrite', help='Allow generating a disassembly into an already existing directory', action='store_true')
+parser.add_argument('--debug', help='Display debug output', action='store_true')
+args = parser.parse_args()
+
+debug = args.debug
+
+style = {
+ 'uppercase_hex': args.uppercase_hex,
+ 'print_hex': args.print_hex,
+ 'indentation': '\t' if args.indent_tabs else ' ' * args.indent_spaces,
+ 'operand_padding': 4 if args.align_operands else 0,
+ 'db': 'DB' if args.uppercase_db else 'db',
+ 'hli': args.hli,
+ 'ldh_a8': args.ldh_a8,
+ 'ld_c': args.ld_c,
+ 'disable_halt_nops': args.disable_halt_nops,
+ 'disable_auto_ldh': args.disable_auto_ldh,
+}
+instructions = apply_style_to_instructions(style, instructions)
+
+rom = ROM(args.rom_path, style)
+rom.disassemble(args.output_dir)