diff options
author | luckytyphlosion <alan.rj.huang@gmail.com> | 2016-05-28 19:07:00 -0400 |
---|---|---|
committer | luckytyphlosion <alan.rj.huang@gmail.com> | 2016-05-28 19:07:00 -0400 |
commit | 6d983352cb21708b871d403d4a095342ecd55a4a (patch) | |
tree | 5a2af50d5cc6b873eac6c366475ae2260ca7ea73 | |
parent | 39fc0a33cec805a69e2c5343aef19669481b870c (diff) |
Comment gbz80disasm.py
-rw-r--r-- | .gitattributes | 11 | ||||
-rw-r--r-- | .gitignore | 16 | ||||
-rw-r--r-- | pokemontools/gbz80disasm.py | 132 |
3 files changed, 129 insertions, 30 deletions
diff --git a/.gitattributes b/.gitattributes new file mode 100644 index 0000000..88b9f7a --- /dev/null +++ b/.gitattributes @@ -0,0 +1,11 @@ +# Auto detect text files and perform LF normalization +* text eol=lf + +# Explicitly declare text files you want to always be normalized and converted +# to native line endings on checkout. +*.py text +*.txt text +*.MD text +*.c text +*.data text +*.in text
\ No newline at end of file @@ -14,9 +14,25 @@ *.gbc *.gb +# asm files +*.asm + +# symfile +*.sym + # generated *.tx +# output for some scripts +*.1bpp +*.2bpp +*.png +*.bst +*.map +*.wav +*.blk +*.pic + # swap files for vim .*.swp diff --git a/pokemontools/gbz80disasm.py b/pokemontools/gbz80disasm.py index 1c68763..b115b45 100644 --- a/pokemontools/gbz80disasm.py +++ b/pokemontools/gbz80disasm.py @@ -584,8 +584,7 @@ class Disassembler(object): byte_labels = {} data_tables = {} - - first_loop = True + output = "Func_%x:%s\n" % (start_offset,create_address_comment(start_offset)) is_data = False @@ -602,6 +601,7 @@ class Disassembler(object): byte_label_created = local_offset in byte_labels.keys() if byte_label_created: + # if a byte label exists, remove any significance if there is a data label that exists if data_label_created: data_line_label = data_tables[local_offset]["name"] data_tables[local_offset]["usage"] = 0 @@ -615,6 +615,7 @@ class Disassembler(object): byte_labels[local_offset]["usage"] += 1 output += "\n" elif data_label_created and parse_data: + # go add usage to a data label if it exists data_line_label = data_tables[local_offset]["name"] data_tables[local_offset]["usage"] += 1 @@ -624,6 +625,7 @@ class Disassembler(object): byte_labels[local_offset]["usage"] = 0 output += "\n" else: + # create both a data and byte label if neither exist data_line_label = data_label(offset) data_tables[local_offset] = {} data_tables[local_offset]["name"] = data_line_label @@ -634,82 +636,116 @@ class Disassembler(object): byte_labels[local_offset]["name"] = line_label byte_labels[local_offset]["usage"] = 0 + # any labels created not above are now used, so mark them as "defined" byte_labels[local_offset]["definition"] = True - if local_offset in data_tables: - data_tables[local_offset]["definition"] = True + data_tables[local_offset]["definition"] = True + # for now, output the byte and data labels (unused labels will be removed later output += line_label + "\n" + data_line_label + "\n" + # get the current byte opcode_byte = rom[offset] + # process the current byte if this is code or parse data has not been set if not is_data or not parse_data: + # fetch the opcode string from a predefined table opcode_str = z80_table[opcode_byte][0] + # fetch the number of arguments opcode_nargs = z80_table[opcode_byte][1] + # get opcode arguments in advance (may not be used) opcode_arg_1 = rom[offset+1] opcode_arg_2 = rom[offset+2] if opcode_nargs == 0: + # set output string simply as the opcode opcode_output_str = opcode_str elif opcode_nargs == 1: + # opcodes with 1 argument if opcode_byte != 0xcb: # bit opcodes are handled separately - opcode_output_str = "" - - if opcode_byte in relative_jumps: #jr or jr nz - #generate a label for the byte we're jumping to + if opcode_byte in relative_jumps: + # if the current opcode is a relative jump, generate a label for the address we're jumping to + # get the address of the location to jump to target_address = offset + 2 + c_int8(opcode_arg_1).value + # get the local address to use as a key for byte_labels and data_tables local_target_address = get_local_address(target_address) + if local_target_address in byte_labels.keys(): + # if the label has already been created, increase the usage and set output to the already created label byte_labels[local_target_address]["usage"] += 1 opcode_output_str = byte_labels[local_target_address]["name"] elif target_address < start_offset: + # if we're jumping to an address that is located before the start offset, assume it is a function opcode_output_str = "Func_%x" % target_address else: + # create a new label opcode_output_str = asm_label(target_address) byte_labels[local_target_address] = {} byte_labels[local_target_address]["name"] = opcode_output_str + # we know the label is used once, so set the usage to 1 byte_labels[local_target_address]["usage"] = 1 + # since the label has not been output yet, mark it as "not defined" byte_labels[local_target_address]["definition"] = False - + + # check if the target address conflicts with any data labels if local_target_address in data_tables.keys(): + # if so, remove any instances of it being used and set it as defined data_tables[local_target_address]["usage"] = 0 data_tables[local_target_address]["definition"] = True + # format the resulting argument into the output string opcode_output_str = opcode_str.format(opcode_output_str) + # debug function if created_but_unused_labels_exist(byte_labels) and debug: output += create_address_comment(offset) - #if current_byte in relative_jumps: - # output += " $" + hex(rom[offset + 1])[2:] + elif opcode_byte == 0xe0 or opcode_byte == 0xf0: + # handle gameboy hram read/write opcodes + # create the address high_ram_address = 0xff00 + opcode_arg_1 + # search for an hram constant if possible high_ram_label = self.find_label(high_ram_address, bank_id) + # if we couldn't find one, default to the address if high_ram_label is None: high_ram_label = "$%x" % high_ram_address - + + # format the resulting argument into the output string opcode_output_str = opcode_str.format(high_ram_label) + else: + # if this isn't a relative jump or hram read/write, just format the byte into the opcode string opcode_output_str = opcode_str.format(opcode_arg_1) - + else: + # handle bit opcodes by fetching the opcode from a separate table opcode_output_str = bit_ops_table[opcode_arg_1] - + elif opcode_nargs == 2: + # opcodes with a pointer as an argument + # format the two arguments into a little endian 16-bit pointer local_target_offset = opcode_arg_2 << 8 | opcode_arg_1 + # get the global offset of the pointer target_offset = get_global_address(local_target_offset, bank_id) + # attempt to look for a matching label target_label = self.find_label(target_offset, bank_id) - + if opcode_byte in call_commands + absolute_jumps: if target_label is None: + # if this is a call or jump opcode and the target label is not defined, create an undocumented label descriptor target_label = "Func_%x" % target_offset else: + # anything that isn't a call or jump is a load-based command if target_label is None: - if local_target_offset >= 0x8000 or not parse_data: - target_label = "$%x" % local_target_offset - elif offset_is_used(byte_labels, local_target_offset): + # handle the case of a label for the current address not existing + + # first, check if this is a byte label + if offset_is_used(byte_labels, local_target_offset): + # fetch the already created byte label target_label = byte_labels[local_target_offset]["name"] + # prevent this address from being treated as a data label if local_target_offset in data_tables.keys(): data_tables[local_target_offset]["usage"] = 0 else: @@ -717,76 +753,104 @@ class Disassembler(object): data_tables[local_target_offset]["name"] = target_label data_tables[local_target_offset]["usage"] = 0 data_tables[local_target_offset]["definition"] = True + + elif local_target_offset >= 0x8000 or not parse_data: + # do not create a label if this is a wram label or parse_data is not set + target_label = "$%x" % local_target_offset + elif local_target_offset in data_tables.keys(): + # if the target offset has been created as a data label, increase usage and use the already defined name data_tables[local_target_offset]["usage"] += 1 target_label = data_tables[local_target_offset]["name"] else: + # for now, treat this as a data label, but do not set it as used (will be replaced later if unused) target_label = data_label(target_offset) data_tables[local_target_offset] = {} data_tables[local_target_offset]["name"] = target_label data_tables[local_target_offset]["usage"] = 0 data_tables[local_target_offset]["definition"] = False - + # format the label that was created into the opcode string opcode_output_str = opcode_str.format(target_label) else: + # error checking raise ValueError("Invalid amount of args.") + # append the formatted opcode output string to the output output += self.spacing + opcode_output_str + "\n" #+ " ; " + hex(offset) + # increase the current byte number and offset by the amount of arguments plus 1 (opcode itself) current_byte_number += opcode_nargs + 1 offset += opcode_nargs + 1 else: + # output a single lined db, using the current byte output += self.spacing + "db ${:02x}\n".format(opcode_byte) #+ " ; " + hex(offset) + # manually increment offset and current byte number offset += 1 current_byte_number += 1 + # stop treating the current code as data if we're parsing over a byte label if get_local_address(offset) in byte_labels.keys(): is_data = False + # update the local offset local_offset = get_local_address(offset) + # stop processing regardless of function end if we've passed the stop offset and the hard stop (dry run) flag is set if hard_stop and offset >= stop_offset: break + # check if this is the end of the function, or we're processing data elif (opcode_byte in unconditional_jumps + unconditional_returns) or is_data: - # define data if we're right at it + # define data if it is located at the current offset if local_offset not in byte_labels.keys() and local_offset in data_tables.keys() and created_but_unused_labels_exist(data_tables) and parse_data: is_data = True #stop reading at a jump, relative jump or return elif all_byte_labels_are_defined(byte_labels) and (offset >= stop_offset or stop_offset_undefined): break + # otherwise, add some spacing output += "\n" - - first_loop = False - - #clean up unused labels + # before returning output, we need to clean up some things + + # first, clean up on unused byte labels for label_line in byte_labels.values(): if label_line["usage"] == 0: output = output.replace((label_line["name"] + "\n"), "") + # clean up on unused data labels + # this is slightly trickier to do as arguments for two byte variables use data labels + + # create a list of the output lines including the newlines output_lines = [e+"\n" for e in output.split("\n") if e != ""] + # go through each label for label_addr in data_tables.keys(): + # get the label dict label_line = data_tables[label_addr] + # check if this label is unused if label_line["usage"] == 0: + # get label name label_name = label_line["name"] + # loop over all output lines for i, line in enumerate(output_lines): if line.startswith(label_name): + # remove line if it starts with the current label output_lines.pop(i) elif label_name in line: + # if the label is used in a load-based opcode, replace it with the raw hex reference output_lines[i] = output_lines[i].replace(label_name, "$%x" % get_local_address(label_addr)) + # convert the modified list of lines into a string output = "".join(output_lines) - #tone down excessive spacing + # tone down excessive spacing output = output.replace("\n\n\n","\n\n") - #add the offset of the final location + # add the offset of the final location if include_last_address: output += "; " + hex(offset) - return [output, offset, stop_offset, byte_labels] + return [output, offset, stop_offset, byte_labels, data_labels] def get_raw_addr(addr): if addr: @@ -803,6 +867,7 @@ def get_raw_addr(addr): return addr if __name__ == "__main__": + # argument parser ap = argparse.ArgumentParser() ap.add_argument("-r", dest="rom", default="baserom.gbc") ap.add_argument("-o", dest="filename", default="gbz80disasm_output.asm") @@ -816,25 +881,32 @@ if __name__ == "__main__": ap.add_argument('end', nargs='?') args = ap.parse_args() + # if symfile is unspecified, use the rom name as the symfile name if args.symfile is None: args.symfile = args.rom.split(".")[0] + ".sym" - + + # flag to determine whether to use the extras submodule path for labels/constants or the disassembly path if args.use_disasm_path: conf = configuration.Config(path=os.path.abspath(os.path.join(os.getcwd(),"../.."))) else: conf = configuration.Config() - + + # initialize disassembler disasm = Disassembler(conf) disasm.initialize(args.rom, args.symfile) + # get global address of the start and stop offsets start_addr = get_raw_addr(args.offset) - stop_addr = get_raw_addr(args.end) + + # run the disassembler and return the output output = disasm.output_bank_opcodes(start_addr,stop_addr,hard_stop=args.dry_run,parse_data=args.parse_data)[0] + # suppress output if quiet flag is set if not args.quiet: print output + # only write to the output file if the no write flag is unset if not args.no_write: with open(args.filename, "w") as f: f.write(output)
\ No newline at end of file |