diff options
author | Bryan Bishop <kanzure@gmail.com> | 2012-06-10 12:24:33 -0500 |
---|---|---|
committer | Bryan Bishop <kanzure@gmail.com> | 2012-06-10 12:24:33 -0500 |
commit | 71a06d372cdf2072236c3ce0cef3db50fc1e4231 (patch) | |
tree | 5172c1a0568b8da59f60abe56cd984e6949ecc76 /romstr.py | |
parent | 5727b4e9ac09a41ccfdb5d4df1b3b10cd07e3160 (diff) |
somewhat improved disassembler
original-commit-id: 10a5a6518d709c92df04ce5938d5fedf2c6df685
Diffstat (limited to 'romstr.py')
-rw-r--r-- | romstr.py | 255 |
1 files changed, 93 insertions, 162 deletions
@@ -147,48 +147,55 @@ class Asm: # [{"command": 0x20, "bytes": [0x20, 0x40, 0x50], # "asm": "jp $5040", "label": "Unknown5040"}] - asm_commands = [] + asm_commands = {} offset = start_address - current_byte_number = 0 - last_hl_address = None last_a_address = None used_3d97 = False keep_reading = True - # for labeling future bytes (like for relative jumps) - byte_labels = {} - while offset <= end_address and keep_reading: + # read the current opcode byte current_byte = ord(rom[offset]) - - is_data = False + current_byte_number = len(asm_commands.keys()) - maybe_byte = current_byte - - # check if this byte has a label prior to it - # and if not, generate a new label - # This new label might not be used, so it will be - # removed if the total usage is zero. - if offset in byte_labels.keys(): - line_label = byte_labels[offset]["name"] - byte_labels[offset]["usage"] += 1 - else: - line_label = asm_label(offset) - byte_labels[offset] = {} - byte_labels[offset]["name"] = line_label - byte_labels[offset]["usage"] = 0 - byte_labels[offset]["definition"] = True - - #find out if there's a two byte key like this - temp_maybe = maybe_byte - temp_maybe += ( ord(rom[offset+1]) << 8) - if temp_maybe in opt_table.keys() and ord(rom[offset+1])!=0: - opstr = opt_table[temp_maybe][0].lower() - + # setup this next/upcoming command + asm_command = { + "address": offset, + + # This counts how many times relative jumps reference this + # byte. This is used to determine whether or not to print out a + # label later. + "references": 0, + } + + # some commands have two opcodes + next_byte = ord(rom[offset+1]) + + # all two-byte opcodes also have their first byte in there somewhere + if current_byte in opt_table.keys(): + # this might be a two-byte opcode + possible_opcode = current_byte + (next_byte << 8) + + # check if this is a two-byte opcode + if possible_opcode in opt_table.keys(): + op_code = possible_opcode + else: + op_code = current_byte + + op = opt_table[op_code] + + opstr = op[0].lower() + optype = op[1] + + asm_command["type"] = "op" + asm_command["id"] = op_code + asm_command["format"] = opstr + asm_command["opnumberthing"] = optype + if "x" in opstr: for x in range(0, opstr.count("x")): insertion = ord(rom[offset + 1]) @@ -196,8 +203,8 @@ class Asm: opstr = opstr[:opstr.find("x")].lower() + insertion + opstr[opstr.find("x")+1:].lower() - current_byte += 1 - offset += 1 + current_byte_number += 1 + offset += 1 if "?" in opstr: for y in range(0, opstr.count("?")): @@ -213,145 +220,69 @@ class Asm: current_byte_number += 2 offset += 2 - - asm_commands.append({"address": offset, "command": opstr}) - output += spacing + opstr #+ " ; " + hex(offset) - output += "\n" - - current_byte_number += 2 - offset += 2 - elif maybe_byte in opt_table.keys(): - op_code = opt_table[maybe_byte] - op_code_type = op_code[1] - op_code_byte = maybe_byte - - #type = -1 when it's the E op - #if op_code_type != -1: - if op_code_type == 0 and ord(rom[offset]) == op_code_byte: - op_str = op_code[0].lower() - - output += spacing + op_code[0].lower() #+ " ; " + hex(offset) - output += "\n" - - offset += 1 - current_byte_number += 1 - elif op_code_type == 1 and ord(rom[offset]) == op_code_byte: - oplen = len(op_code[0]) - opstr = copy(op_code[0]) - xes = op_code[0].count("x") - include_comment = False - for x in range(0, xes): - insertion = ord(rom[offset + 1]) - insertion = "$" + hex(insertion)[2:] - - if current_byte == 0x18 or current_byte==0x20 or current_byte in relative_jumps: #jr or jr nz - #generate a label for the byte we're jumping to - target_address = offset + 2 + c_int8(ord(rom[offset + 1])).value - if target_address in byte_labels.keys(): - byte_labels[target_address]["usage"] = 1 + byte_labels[target_address]["usage"] - line_label2 = byte_labels[target_address]["name"] - else: - line_label2 = asm_label(target_address) - byte_labels[target_address] = {} - byte_labels[target_address]["name"] = line_label2 - byte_labels[target_address]["usage"] = 1 - byte_labels[target_address]["definition"] = False - - insertion = line_label2.lower() - include_comment = True - elif current_byte == 0x3e: - last_a_address = ord(rom[offset + 1]) - - opstr = opstr[:opstr.find("x")].lower() + insertion + opstr[opstr.find("x")+1:].lower() - output += spacing + opstr - if include_comment: - output += " ; " + hex(offset) - if current_byte in relative_jumps: - output += " $" + hex(ord(rom[offset + 1]))[2:] - output += "\n" - - current_byte_number += 1 - offset += 1 - insertion = "" - - current_byte_number += 1 - offset += 1 - include_comment = False - - elif op_code_type == 2 and ord(rom[offset]) == op_code_byte: - oplen = len(op_code[0]) - opstr = copy(op_code[0]) - qes = op_code[0].count("?") - for x in range(0, qes): - byte1 = ord(rom[offset + 1]) - byte2 = ord(rom[offset + 2]) - - number = byte1 - number += byte2 << 8; - - insertion = "$%.4x" % (number) - if maybe_byte in call_commands or current_byte in relative_unconditional_jumps or current_byte in relative_jumps: - result = find_label(insertion, bank_id) - if result != None: - insertion = result - - opstr = opstr[:opstr.find("?")].lower() + insertion + opstr[opstr.find("?")+1:].lower() - output += spacing + opstr #+ " ; " + hex(offset) - output += "\n" - - current_byte_number += 2 - offset += 2 - - current_byte_number += 1 - offset += 1 - - if current_byte == 0x21: - last_hl_address = byte1 + (byte2 << 8) - if current_byte == 0xcd: - if number == 0x3d97: used_3d97 = True - #duck out if this is jp $24d7 - if current_byte == 0xc3 or current_byte in relative_unconditional_jumps: - if current_byte == 0xc3: - if number == 0x3d97: used_3d97 = True - #if number == 0x24d7: #jp - if not has_outstanding_labels(byte_labels) or all_outstanding_labels_are_reverse(byte_labels, offset): - keep_reading = False - is_data = False - break - else: - is_data = True - - #stop reading at a jump, relative jump or return + + # Check for relative jumps, construct the formatted asm line. + # Also set the usage of labels. + if current_byte in [0x18, 0x20] or current_byte in relative_jumps: # jr or jr nz + # generate a label for the byte we're jumping to + target_address = offset + 2 + c_int8(ord(rom[offset + 1])).value + + if target_address in byte_labels.keys(): + byte_labels[target_address]["usage"] = 1 + byte_labels[target_address]["usage"] + line_label2 = byte_labels[target_address]["name"] + + else: + line_label2 = asm_label(target_address) + byte_labels[target_address] = {} + byte_labels[target_address]["name"] = line_label2 + byte_labels[target_address]["usage"] = 1 + byte_labels[target_address]["definition"] = False + + insertion = line_label2.lower() + include_comment = True + elif current_byte == 0x3e: + last_a_address = ord(rom[offset + 1]) + + # store the formatted string for the output later + asm_command["formatted"] = opstr + + if current_byte == 0x21: + last_hl_address = byte1 + (byte2 << 8) + + # this is leftover from pokered, might be meaningless + if current_byte == 0xcd: + if number == 0x3d97: + used_3d97 = True + + if current_byte == 0xc3 or current_byte in relative_unconditional_jumps: + if current_byte == 0xc3: + if number == 0x3d97: + used_3d97 = True + + if not has_outstanding_labels(byte_labels) or all_outstanding_labels_are_reverse(byte_labels, offset): + keep_reading = False + break + + # stop reading at a jump, relative jump or return if current_byte in end_08_scripts_with: + is_data = False + if not has_outstanding_labels(byte_labels) and all_outstanding_labels_are_reverse(byte_labels, offset): keep_reading = False - is_data = False #cleanup break else: - is_data = False keep_reading = True else: - is_data = False keep_reading = True + else: - #if is_data and keep_reading: - output += spacing + "db $" + hex(ord(rom[offset]))[2:] #+ " ; " + hex(offset) - output += "\n" - offset += 1 - current_byte_number += 1 - #else the while loop would have spit out the opcode - - #these two are done prior - #offset += 1 - #current_byte_number += 1 - - # clean up unused labels.. used to be in 'output', but is now in asm_commands - for label_line in byte_labels.keys(): - address = label_line - label_line = byte_labels[label_line] - if label_line["usage"] == 0: - output = output.replace((label_line["name"] + "\n").lower(), "") - raise NotImplementedError + # This shouldn't really happen, and means that this area of the + # ROM probably doesn't represent instructions. + asm_command["type"] = "data" # db + asm_command["value"] = current_byte + + # save this new command in the list + asm_commands[current_byte_number] = asm_command def __str__(self): """ ASM pretty printer. |