From 140718cbb2a99495ef465b4528958ed780f96904 Mon Sep 17 00:00:00 2001 From: hondew Date: Sat, 20 Mar 2021 18:24:59 -0400 Subject: Fix bug preventing relocations from getting updated and inserted --- tools/asm_processor/asm_processor.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'tools/asm_processor/asm_processor.py') diff --git a/tools/asm_processor/asm_processor.py b/tools/asm_processor/asm_processor.py index e1540c0c..5156ec11 100644 --- a/tools/asm_processor/asm_processor.py +++ b/tools/asm_processor/asm_processor.py @@ -1153,8 +1153,7 @@ def fixup_objfile(objfile_name, functions, asm_prelude, assembler, output_enc): for reltab in target.relocated_by: nrels = [] for rel in reltab.relocations: - if (sectype == '.text' and rel.r_offset in modified_text_positions or - sectype == '.rodata' and rel.r_offset in jtbl_rodata_positions) or sectype == ".sbss2": + if (sectype == '.rodata' and rel.r_offset in jtbl_rodata_positions) or sectype == ".sbss2": # don't include relocations for late_rodata dummy code continue # hopefully we don't have relocations for local or -- cgit v1.2.3 From 08b17cc7dd272a0f6fbbb88a507c7bd02e1d1d63 Mon Sep 17 00:00:00 2001 From: hondew Date: Sat, 20 Mar 2021 23:00:24 -0400 Subject: Make func name in symtab point to correct text section --- tools/asm_processor/asm_processor.py | 20 ++++++++++++++++---- 1 file changed, 16 insertions(+), 4 deletions(-) (limited to 'tools/asm_processor/asm_processor.py') diff --git a/tools/asm_processor/asm_processor.py b/tools/asm_processor/asm_processor.py index 5156ec11..669e3cdd 100644 --- a/tools/asm_processor/asm_processor.py +++ b/tools/asm_processor/asm_processor.py @@ -903,6 +903,16 @@ def parse_source(f, opt, framepointer, input_enc, output_enc, print_source=None) out_file.close() return asm_functions +# Return the function name in objfile corresponding to function +# `asm_func_name` in asm_objfile. `to_copy` is the dictionary of the +# same name in fix_objfile(). +def convert_func_name(asm_func_name, to_copy): + for sec_name, func_data in to_copy.items(): + print(sec_name, func_data) + if func_data and func_data[0][4] == asm_func_name: + return func_data[0][2] + return '' + def fixup_objfile(objfile_name, functions, asm_prelude, assembler, output_enc): SECTIONS = ['.data'] SECTIONS.extend(['.text' for i in range(0,len(functions))]) @@ -947,7 +957,7 @@ def fixup_objfile(objfile_name, functions, asm_prelude, assembler, output_enc): asm.append('nop') else: asm.append('.space {}'.format(loc - prev_loc)) - to_copy[sectype + (str(n_text) if sectype == '.text' else '')].append((loc, size, temp_name, function.fn_desc)) + to_copy[sectype + (str(n_text) if sectype == '.text' else '')].append((loc, size, temp_name, function.fn_desc, function.text_glabels[0])) prev_locs[sectype + (str(n_text) if sectype == '.text' else '')] = loc + size if not ifdefed: all_text_glabels.update(function.text_glabels) @@ -1020,7 +1030,7 @@ def fixup_objfile(objfile_name, functions, asm_prelude, assembler, output_enc): asm_n_text = asm_objfile.text_section_index(func + '_asm_start') source = asm_objfile.find_section(sectype, asm_n_text if sectype == '.text' else 0) assert source is not None, "didn't find source section: " + sectype - for (pos, count, temp_name, fn_desc) in to_copy[sectype + (str(n_text) if sectype == '.text' else '')]: + for (pos, count, temp_name, fn_desc, fn_name) in to_copy[sectype + (str(n_text) if sectype == '.text' else '')]: loc1 = asm_objfile.symtab.find_symbol_in_section(temp_name + '_asm_start', source) loc2 = asm_objfile.symtab.find_symbol_in_section(temp_name + '_asm_end', source) assert loc1 == pos, "assembly and C files don't line up for section " + sectype + ", " + fn_desc @@ -1034,7 +1044,7 @@ def fixup_objfile(objfile_name, functions, asm_prelude, assembler, output_enc): target = objfile.find_section(sectype, n_text if sectype == '.text' else 0) assert target is not None, "missing target section of type " + sectype data = list(target.data) - for (pos, count, _, _) in to_copy[sectype + (str(n_text) if sectype == '.text' else '')]: + for (pos, count, _, _, _) in to_copy[sectype + (str(n_text) if sectype == '.text' else '')]: # mwasmarm 4-aligns text sections, so make sure to copy exactly `count` bytes data[pos:pos + count] = source.data[pos:pos + count] if sectype == '.text': @@ -1122,7 +1132,9 @@ def fixup_objfile(objfile_name, functions, asm_prelude, assembler, output_enc): raise Failure("generated assembly .o must only have symbols for .text, .data, .rodata, .sdata, .sdata2, .sbss, ABS and UNDEF, but found " + section_name) if section_name == '.sbss2': #! I'm not sure why this isn't working continue - s.st_shndx = objfile.find_section(section_name, n_text if section_name == '.text' else 0).index + obj_func_name = convert_func_name(s.name, to_copy) + obj_n_text = objfile.text_section_index(obj_func_name) + s.st_shndx = objfile.find_section(section_name, obj_n_text if section_name == '.text' else 0).index if section_name == '.text': n_text += 1 # glabel's aren't marked as functions, making objdump output confusing. Fix that. -- cgit v1.2.3 From 1a9c2488da81dbed1c4e5fc1910e0a41b5fa90f5 Mon Sep 17 00:00:00 2001 From: hondew Date: Sun, 21 Mar 2021 00:36:12 -0400 Subject: Don't forget asm_processor... --- tools/asm_processor/asm_processor.py | 0 1 file changed, 0 insertions(+), 0 deletions(-) mode change 100644 => 100755 tools/asm_processor/asm_processor.py (limited to 'tools/asm_processor/asm_processor.py') diff --git a/tools/asm_processor/asm_processor.py b/tools/asm_processor/asm_processor.py old mode 100644 new mode 100755 -- cgit v1.2.3 From a3f1dea5a8c95830cd767c6385c871b53a2d02f5 Mon Sep 17 00:00:00 2001 From: hondew Date: Sun, 21 Mar 2021 00:49:22 -0400 Subject: Unix format for asm_processor.py --- tools/asm_processor/asm_processor.py | 2544 +++++++++++++++++----------------- 1 file changed, 1272 insertions(+), 1272 deletions(-) (limited to 'tools/asm_processor/asm_processor.py') diff --git a/tools/asm_processor/asm_processor.py b/tools/asm_processor/asm_processor.py index 669e3cdd..c3579b04 100755 --- a/tools/asm_processor/asm_processor.py +++ b/tools/asm_processor/asm_processor.py @@ -1,1272 +1,1272 @@ -#!/usr/bin/env python3 -import argparse -import tempfile -import struct -import copy -import sys -import re -import os -from collections import namedtuple, defaultdict -from io import StringIO - -MAX_FN_SIZE = 100 -SLOW_CHECKS = False - -EI_NIDENT = 16 -EI_CLASS = 4 -EI_DATA = 5 -EI_VERSION = 6 -EI_OSABI = 7 -EI_ABIVERSION = 8 -STN_UNDEF = 0 - -SHN_UNDEF = 0 -SHN_ABS = 0xfff1 -SHN_COMMON = 0xfff2 -SHN_XINDEX = 0xffff -SHN_LORESERVE = 0xff00 - -STT_NOTYPE = 0 -STT_OBJECT = 1 -STT_FUNC = 2 -STT_SECTION = 3 -STT_FILE = 4 -STT_COMMON = 5 -STT_TLS = 6 - -STB_LOCAL = 0 -STB_GLOBAL = 1 -STB_WEAK = 2 - -STV_DEFAULT = 0 -STV_INTERNAL = 1 -STV_HIDDEN = 2 -STV_PROTECTED = 3 - -SHT_NULL = 0 -SHT_PROGBITS = 1 -SHT_SYMTAB = 2 -SHT_STRTAB = 3 -SHT_RELA = 4 -SHT_HASH = 5 -SHT_DYNAMIC = 6 -SHT_NOTE = 7 -SHT_NOBITS = 8 -SHT_REL = 9 -SHT_SHLIB = 10 -SHT_DYNSYM = 11 -SHT_INIT_ARRAY = 14 -SHT_FINI_ARRAY = 15 -SHT_PREINIT_ARRAY = 16 -SHT_GROUP = 17 -SHT_SYMTAB_SHNDX = 18 -SHT_MIPS_GPTAB = 0x70000003 -SHT_MIPS_DEBUG = 0x70000005 -SHT_MIPS_REGINFO = 0x70000006 -SHT_MIPS_OPTIONS = 0x7000000d - -SHF_WRITE = 0x1 -SHF_ALLOC = 0x2 -SHF_EXECINSTR = 0x4 -SHF_MERGE = 0x10 -SHF_STRINGS = 0x20 -SHF_INFO_LINK = 0x40 -SHF_LINK_ORDER = 0x80 -SHF_OS_NONCONFORMING = 0x100 -SHF_GROUP = 0x200 -SHF_TLS = 0x400 - -R_MIPS_32 = 2 -R_MIPS_26 = 4 -R_MIPS_HI16 = 5 -R_MIPS_LO16 = 6 - - -class ElfHeader: - """ - typedef struct { - unsigned char e_ident[EI_NIDENT]; - Elf32_Half e_type; - Elf32_Half e_machine; - Elf32_Word e_version; - Elf32_Addr e_entry; - Elf32_Off e_phoff; - Elf32_Off e_shoff; - Elf32_Word e_flags; - Elf32_Half e_ehsize; - Elf32_Half e_phentsize; - Elf32_Half e_phnum; - Elf32_Half e_shentsize; - Elf32_Half e_shnum; - Elf32_Half e_shstrndx; - } Elf32_Ehdr; - """ - - def __init__(self, data): - self.e_ident = data[:EI_NIDENT] - self.e_type, self.e_machine, self.e_version, self.e_entry, self.e_phoff, self.e_shoff, self.e_flags, self.e_ehsize, self.e_phentsize, self.e_phnum, self.e_shentsize, self.e_shnum, self.e_shstrndx = struct.unpack('> 4 - self.type = st_info & 15 - self.name = strtab.lookup_str(self.st_name) - self.visibility = self.st_other & 3 - - def to_bin(self): - st_info = (self.bind << 4) | self.type - return struct.pack('> 8 - self.rel_type = self.r_info & 0xff - - def to_bin(self): - self.r_info = (self.sym_index << 8) | self.rel_type - if self.sh_type == SHT_REL: - return struct.pack(' 0: - # Generate late rodata by emitting unique float constants. - # This requires 3 instructions for each 4 bytes of rodata. - # If we know alignment, we can use doubles, which give 3 - # instructions for 8 bytes of rodata. - size = self.fn_section_sizes['.late_rodata'] // 2 - skip_next = False - needs_double = (self.late_rodata_alignment != 0) - for i in range(size): - if skip_next: - skip_next = False - continue - # Jump tables give 9 instructions for >= 5 words of rodata, and should be - # emitted when: - # - -O2 or -O2 -g3 are used, which give the right codegen - # - we have emitted our first .float/.double (to ensure that we find the - # created rodata in the binary) - # - we have emitted our first .double, if any (to ensure alignment of doubles - # in shifted rodata sections) - # - we have at least 5 words of rodata left to emit (otherwise IDO does not - # generate a jump table) - # - we have at least 10 more instructions to go in this function (otherwise our - # function size computation will be wrong since the delay slot goes unused) - if (not needs_double and state.use_jtbl_for_rodata and i >= 1 and - size - i >= 5 and num_instr - len(late_rodata_fn_output) >= 10): - cases = " ".join("case {}:".format(case) for case in range(size - i)) - late_rodata_fn_output.append("switch (*(volatile int*)0) { " + cases + " ; }") - late_rodata_fn_output.extend([""] * 8) - jtbl_rodata_size = (size - i) * 4 - break - dummy_bytes = state.next_late_rodata_hex() - late_rodata_dummy_bytes.append(dummy_bytes) - if self.late_rodata_alignment == 4 * ((i + 1) % 2 + 1) and i + 1 < size: - dummy_bytes2 = state.next_late_rodata_hex() - late_rodata_dummy_bytes.append(dummy_bytes2) - fval, = struct.unpack(' 0 or late_rodata_fn_output: - text_name = state.make_name('func') - src[0] = 'int {}(void) {{ return '.format(text_name) - instr_count = self.fn_section_sizes['.text'] // 2 - src[self.num_lines] = '((volatile void *) 0); }; ' if instr_count > 1 else '; }; ' - if instr_count < state.min_instr_count: - self.fail("too short .text block") - tot_emitted = 0 - tot_skipped = 0 - fn_emitted = 0 - fn_skipped = 0 - rodata_stack = late_rodata_fn_output[::-1] - for (line, count) in self.fn_ins_inds: - for _ in range(count): - if (fn_emitted > MAX_FN_SIZE and instr_count - tot_emitted > state.min_instr_count and - (not rodata_stack or rodata_stack[-1])): - # Don't let functions become too large. When a function reaches 284 - # instructions, and -O2 -framepointer flags are passed, the IRIX - # compiler decides it is a great idea to start optimizing more. - fn_emitted = 0 - fn_skipped = 0 - src[line] += '((volatile void *) 0); }} int {}(void) {{ return '.format(state.make_name('large_func')) - if fn_skipped < state.skip_instr_count: - fn_skipped += 1 - tot_skipped += 1 - elif rodata_stack: - src[line] += rodata_stack.pop() - else: - src[line] += '*(int *)' - tot_emitted += 1 - fn_emitted += 1 - if rodata_stack: - size = len(late_rodata_fn_output) // 3 - available = instr_count - tot_skipped - self.fail( - "late rodata to text ratio is too high: {} / {} must be <= 1/3\n" - "add .late_rodata_alignment (4|8) to the .late_rodata " - "block to double the allowed ratio." - .format(size, available)) - - init_name = None - if self.fn_section_sizes['.init'] > 0 or late_rodata_fn_output: - init_name = state.make_name('func') - src[0] = 'int {}(void) {{ return '.format(init_name) - instr_count = self.fn_section_sizes['.init'] // 2 - src[self.num_lines] = '((volatile void *) 0); }; ' if instr_count else '; }; ' - if instr_count < state.min_instr_count: - self.fail("too short .init block") - tot_emitted = 0 - tot_skipped = 0 - fn_emitted = 0 - fn_skipped = 0 - rodata_stack = late_rodata_fn_output[::-1] - for (line, count) in self.fn_ins_inds: - for _ in range(count): - if (fn_emitted > MAX_FN_SIZE and instr_count - tot_emitted > state.min_instr_count and - (not rodata_stack or rodata_stack[-1])): - # Don't let functions become too large. When a function reaches 284 - # instructions, and -O2 -framepointer flags are passed, the IRIX - # compiler decides it is a great idea to start optimizing more. - fn_emitted = 0 - fn_skipped = 0 - src[line] += '((volatile void *) 0); }} int {}(void) {{ return '.format(state.make_name('large_func')) - if fn_skipped < state.skip_instr_count: - fn_skipped += 1 - tot_skipped += 1 - elif rodata_stack: - src[line] += rodata_stack.pop() - else: - src[line] += '*(int *)' - tot_emitted += 1 - fn_emitted += 1 - if rodata_stack: - size = len(late_rodata_fn_output) // 3 - available = instr_count - tot_skipped - self.fail( - "late rodata to init ratio is too high: {} / {} must be <= 1/3\n" - "add .late_rodata_alignment (4|8) to the .late_rodata " - "block to double the allowed ratio." - .format(size, available)) - - rodata_name = None - if self.fn_section_sizes['.rodata'] > 0: - rodata_name = state.make_name('rodata') - src[self.num_lines] += f" const char {rodata_name}[{self.fn_section_sizes['.rodata']}] = {{1}};" - - data_name = None - if self.fn_section_sizes['.data'] > 0: - data_name = state.make_name('data') - src[self.num_lines] += f" char {data_name}[{self.fn_section_sizes['.data']}] = {{1}};" - - bss_name = None - if self.fn_section_sizes['.bss'] > 0: - bss_name = state.make_name('bss') - src[self.num_lines] += f" char {bss_name}[{self.fn_section_sizes['.bss']}];" - - sdata_name = None # sdata is like data but small - if self.fn_section_sizes['.sdata'] > 0: - sdata_code = "" - for i in range(self.fn_section_sizes['.sdata']): - sdata_name = state.make_name('sdata') - sdata_code += f" char {sdata_name} = 1;" - src[self.num_lines] += sdata_code - - sdata2_name = None # sdata2 is like rodata but small - if self.fn_section_sizes['.sdata2'] > 0: - sdata2_code = "" - for i in range(self.fn_section_sizes['.sdata2']): - sdata2_name = state.make_name('sdata2') - sdata2_code += f" const char {sdata2_name} = 1;" - src[self.num_lines] += sdata2_code - - sbss_name = None # Similarly, sbss is like uninitialized data but small - if self.fn_section_sizes['.sbss'] > 0: - sbss_code = "" - for i in range(self.fn_section_sizes['.sbss']): - sbss_name = state.make_name('sbss') - sbss_code += f" char {sbss_name};" - src[self.num_lines] += sbss_code - - """ sbss2 is currently borked - sbss2_name = None # Similarly, sbss2 is like uninitialized rodata but small - if self.fn_section_sizes['.sbss2'] > 0: - sbss2_code = "" - for i in range(self.fn_section_sizes['.sbss2']): - sbss2_name = state.make_name('sbss2') - sbss2_code += f" const char {sbss2_name};" - src[self.num_lines] += sbss2_code - """ - - fn = Function( - text_glabels=self.text_glabels, - asm_conts=self.asm_conts, - late_rodata_dummy_bytes=late_rodata_dummy_bytes, - jtbl_rodata_size=jtbl_rodata_size, - late_rodata_asm_conts=self.late_rodata_asm_conts, - fn_desc=self.fn_desc, - data={ - '.text': (text_name, self.fn_section_sizes['.text']), - '.data': (data_name, self.fn_section_sizes['.data']), - '.rodata': (rodata_name, self.fn_section_sizes['.rodata']), - '.bss': (bss_name, self.fn_section_sizes['.bss']), - '.sdata': (sdata_name, self.fn_section_sizes['.sdata']), - '.sdata2': (sdata2_name, self.fn_section_sizes['.sdata2']), - '.sbss': (sbss_name, self.fn_section_sizes['.sbss']), - #'.sbss2': (sbss2_name, self.fn_section_sizes['.sbss2']), - }) - return src, fn - -cutscene_data_regexpr = re.compile(r"CutsceneData (.|\n)*\[\] = {") -float_regexpr = re.compile(r"[-+]?[0-9]*\.?[0-9]+([eE][-+]?[0-9]+)?f") - -def repl_float_hex(m): - return str(struct.unpack(" 0 - n_text = objfile.text_section_index(temp_name) - loc = objfile.symtab.find_symbol(temp_name) - if loc is None: - ifdefed = True - break - loc = loc[1] - prev_loc = prev_locs[sectype + (str(n_text) if sectype == '.text' else '')] - if loc < prev_loc: - raise Failure("Wrongly computed size for section {} (diff {}). This is an asm-processor bug!".format(sectype + (str(n_text) if sectype == '.text' else ''), prev_loc- loc)) - if loc != prev_loc: - asm.append('.section ' + sectype) - if sectype == '.text': - for i in range((loc - prev_loc) // 2): - asm.append('nop') - else: - asm.append('.space {}'.format(loc - prev_loc)) - to_copy[sectype + (str(n_text) if sectype == '.text' else '')].append((loc, size, temp_name, function.fn_desc, function.text_glabels[0])) - prev_locs[sectype + (str(n_text) if sectype == '.text' else '')] = loc + size - if not ifdefed: - all_text_glabels.update(function.text_glabels) - all_late_rodata_dummy_bytes.append(function.late_rodata_dummy_bytes) - all_jtbl_rodata_size.append(function.jtbl_rodata_size) - late_rodata_asm.append(function.late_rodata_asm_conts) - for sectype, (temp_name, size) in function.data.items(): - if temp_name is not None: - asm.append('.section ' + sectype) - asm.append('glabel ' + temp_name + '_asm_start') - asm.append('.section .text') - for line in function.asm_conts: - asm.append(line) - for sectype, (temp_name, size) in function.data.items(): - if temp_name is not None: - #asm.append('.section ' + sectype) - asm.append('glabel ' + temp_name + '_asm_end') - - if any(late_rodata_asm): - late_rodata_source_name_start = '_asmpp_late_rodata_start' - late_rodata_source_name_end = '_asmpp_late_rodata_end' - asm.append('.rdata') - asm.append('glabel {}'.format(late_rodata_source_name_start)) - for conts in late_rodata_asm: - asm.extend(conts) - asm.append('glabel {}'.format(late_rodata_source_name_end)) - - o_file = open("asm_processor_temp.o", 'w').close() # Create temp file. tempfile module isn't working for me. - o_name = "asm_processor_temp.o" - - s_file = open("asm_processor_temp.s", 'wb') # Ditto. - s_name = "asm_processor_temp.s" - try: - s_file.write(asm_prelude + b'\n') - for line in asm: - s_file.write(line.encode(output_enc) + b'\n') - s_file.close() - ret = os.system(assembler + " " + s_name + " -o " + o_name) - if ret != 0: - raise Failure("failed to assemble") - with open(o_name, 'rb') as f: - asm_objfile = ElfFile(f.read()) - - # Remove some clutter from objdump output - objfile.drop_irrelevant_sections() - - """ - # Unify reginfo sections - target_reginfo = objfile.find_section('.reginfo') - source_reginfo_data = list(asm_objfile.find_section('.reginfo').data) - data = list(target_reginfo.data) - for i in range(20): - data[i] |= source_reginfo_data[i] - target_reginfo.data = bytes(data) - """ - - # Move over section contents - modified_text_positions = set() - jtbl_rodata_positions = set() - last_rodata_pos = 0 - n_text = 0 - for sec in objfile.sections: - sectype = sec.name - if not to_copy[sectype + (str(n_text) if sectype == '.text' else '')]: - if sectype == '.text': - n_text += 1 - continue - # This should work as long as you NONMATCH whole functions rather than asm fragments - func = to_copy[sectype + str(n_text) if sectype == '.text' else ''][0][2] - asm_n_text = asm_objfile.text_section_index(func + '_asm_start') - source = asm_objfile.find_section(sectype, asm_n_text if sectype == '.text' else 0) - assert source is not None, "didn't find source section: " + sectype - for (pos, count, temp_name, fn_desc, fn_name) in to_copy[sectype + (str(n_text) if sectype == '.text' else '')]: - loc1 = asm_objfile.symtab.find_symbol_in_section(temp_name + '_asm_start', source) - loc2 = asm_objfile.symtab.find_symbol_in_section(temp_name + '_asm_end', source) - assert loc1 == pos, "assembly and C files don't line up for section " + sectype + ", " + fn_desc - # Since we are nonmatching whole functions, we don't need to insert the correct - # amount of padding into the src file. We don't actually need to insert padding - # at all. We can just plop the asm's text section into the objfile. - # if loc2 - loc1 != count: - # raise Failure("incorrectly computed size for section " + sectype + ", " + fn_desc + ". If using .double, make sure to provide explicit alignment padding.") - if sectype == '.bss' or sectype == '.sbss2': - continue - target = objfile.find_section(sectype, n_text if sectype == '.text' else 0) - assert target is not None, "missing target section of type " + sectype - data = list(target.data) - for (pos, count, _, _, _) in to_copy[sectype + (str(n_text) if sectype == '.text' else '')]: - # mwasmarm 4-aligns text sections, so make sure to copy exactly `count` bytes - data[pos:pos + count] = source.data[pos:pos + count] - if sectype == '.text': - assert count % 2 == 0 - assert pos % 2 == 0 - for i in range(count // 2): - modified_text_positions.add(pos + 2 * i) - elif sectype == '.rodata': - last_rodata_pos = pos + count - target.data = bytes(data) - if sectype == '.text': - n_text += 1 - - # Move over late rodata. This is heuristic, sadly, since I can't think - # of another way of doing it. - moved_late_rodata = {} - if any(all_late_rodata_dummy_bytes) or any(all_jtbl_rodata_size): - source = asm_objfile.find_section('.rodata', 0) - target = objfile.find_section('.rodata', 0) - source_pos = asm_objfile.symtab.find_symbol_in_section(late_rodata_source_name_start, source) - source_end = asm_objfile.symtab.find_symbol_in_section(late_rodata_source_name_end, source) - if source_end - source_pos != sum(map(len, all_late_rodata_dummy_bytes)) * 2 + sum(all_jtbl_rodata_size): - raise Failure("computed wrong size of .late_rodata") - new_data = list(target.data) - for dummy_bytes_list, jtbl_rodata_size in zip(all_late_rodata_dummy_bytes, all_jtbl_rodata_size): - for index, dummy_bytes in enumerate(dummy_bytes_list): - pos = target.data.index(dummy_bytes, last_rodata_pos) - # This check is nice, but makes time complexity worse for large files: - if SLOW_CHECKS and target.data.find(dummy_bytes, pos + 2) != -1: - raise Failure("multiple occurrences of late_rodata hex magic. Change asm-processor to use something better than 0xE0123456!") - if index == 0 and len(dummy_bytes_list) > 1 and target.data[pos+2:pos+8] == b'\0\0\0\0': - # Ugly hack to handle double alignment for non-matching builds. - # We were told by .late_rodata_alignment (or deduced from a .double) - # that a function's late_rodata started out 4 (mod 8), and emitted - # a float and then a double. But it was actually 0 (mod 8), so our - # double was moved by 4 bytes. To make them adjacent to keep jump - # tables correct, move the float by 4 bytes as well. - new_data[pos:pos+2] = b'\0\0\0\0' - pos += 2 - new_data[pos:pos+2] = source.data[source_pos:source_pos+2] - moved_late_rodata[source_pos] = pos - last_rodata_pos = pos + 2 - source_pos += 2 - if jtbl_rodata_size > 0: - assert dummy_bytes_list, "should always have dummy bytes before jtbl data" - pos = last_rodata_pos - new_data[pos : pos + jtbl_rodata_size] = \ - source.data[source_pos : source_pos + jtbl_rodata_size] - for i in range(0, jtbl_rodata_size, 2): - moved_late_rodata[source_pos + i] = pos + i - jtbl_rodata_positions.add(pos + i) - last_rodata_pos += jtbl_rodata_size - source_pos += jtbl_rodata_size - target.data = bytes(new_data) - - # Merge strtab data. - strtab_adj = len(objfile.symtab.strtab.data) - objfile.symtab.strtab.data += asm_objfile.symtab.strtab.data - - # Find relocated symbols - relocated_symbols = set() - for obj in [asm_objfile, objfile]: - for sec in obj.sections: - for reltab in sec.relocated_by: - for rel in reltab.relocations: - relocated_symbols.add(obj.symtab.symbol_entries[rel.sym_index]) - - # Move over symbols, deleting the temporary function labels. - # Sometimes this naive procedure results in duplicate symbols, or UNDEF - # symbols that are also defined the same .o file. Hopefully that's fine. - # Skip over local symbols that aren't used relocated against, to avoid - # conflicts. - new_local_syms = [s for s in objfile.symtab.local_symbols() if not is_temp_name(s.name)] - new_global_syms = [s for s in objfile.symtab.global_symbols() if not is_temp_name(s.name)] - n_text = 0 - for i, s in enumerate(asm_objfile.symtab.symbol_entries): - is_local = (i < asm_objfile.symtab.sh_info) - if is_local and s not in relocated_symbols: - continue - if is_temp_name(s.name): - continue - if s.st_shndx not in [SHN_UNDEF, SHN_ABS]: - section_name = asm_objfile.sections[s.st_shndx].name - if section_name not in SECTIONS: - raise Failure("generated assembly .o must only have symbols for .text, .data, .rodata, .sdata, .sdata2, .sbss, ABS and UNDEF, but found " + section_name) - if section_name == '.sbss2': #! I'm not sure why this isn't working - continue - obj_func_name = convert_func_name(s.name, to_copy) - obj_n_text = objfile.text_section_index(obj_func_name) - s.st_shndx = objfile.find_section(section_name, obj_n_text if section_name == '.text' else 0).index - if section_name == '.text': - n_text += 1 - # glabel's aren't marked as functions, making objdump output confusing. Fix that. - if s.name in all_text_glabels: - s.type = STT_FUNC - if objfile.sections[s.st_shndx].name == '.rodata' and s.st_value in moved_late_rodata: - s.st_value = moved_late_rodata[s.st_value] - s.st_name += strtab_adj - if is_local: - new_local_syms.append(s) - else: - new_global_syms.append(s) - new_syms = new_local_syms + new_global_syms - for i, s in enumerate(new_syms): - s.new_index = i - objfile.symtab.data = b''.join(s.to_bin() for s in new_syms) - objfile.symtab.sh_info = len(new_local_syms) - - # Move over relocations - n_text = 0 - for sec in objfile.sections: - sectype = sec.name - # This should work as long as you NONMATCH whole functions rather than asm fragments - target = objfile.find_section(sectype, n_text if sectype == '.text' else 0) - - if target is not None: - # fixup relocation symbol indices, since we butchered them above - for reltab in target.relocated_by: - nrels = [] - for rel in reltab.relocations: - if (sectype == '.rodata' and rel.r_offset in jtbl_rodata_positions) or sectype == ".sbss2": - # don't include relocations for late_rodata dummy code - continue - # hopefully we don't have relocations for local or - # temporary symbols, so new_index exists - rel.sym_index = objfile.symtab.symbol_entries[rel.sym_index].new_index - nrels.append(rel) - reltab.relocations = nrels - reltab.data = b''.join(rel.to_bin() for rel in nrels) - - if not to_copy[sectype + (str(n_text) if sectype == '.text' else '')]: - if sectype == '.text': - n_text += 1 - continue - - func = to_copy[sectype + str(n_text) if sectype == '.text' else ''][0][2] - asm_n_text = asm_objfile.text_section_index(func + '_asm_start') - source = asm_objfile.find_section(sectype, asm_n_text if sectype == '.text' else 0) - if not source: - if sectype == '.text': - n_text += 1 - continue - - target_reltab = objfile.find_section('.rel' + sectype, n_text if sectype == '.text' else 0) - target_reltaba = objfile.find_section('.rela' + sectype, n_text if sectype == '.text' else 0) - for reltab in source.relocated_by: - for rel in reltab.relocations: - rel.sym_index = asm_objfile.symtab.symbol_entries[rel.sym_index].new_index - # I suspect that this is requried for matching. If the after linking the - # binary doesn't match, retry after commenting out the following line: - rel.r_addend = 0 - if sectype == '.rodata' and rel.r_offset in moved_late_rodata: - rel.r_offset = moved_late_rodata[rel.r_offset] - new_data = b''.join(rel.to_bin() for rel in reltab.relocations) - if reltab.sh_type == SHT_REL: - target_reltab = objfile.add_section('.rel' + sectype, - sh_type=SHT_REL, sh_flags=0, - sh_link=objfile.symtab.index, sh_info=target.index, - sh_addralign=4, sh_entsize=8, data=b'') - target_reltab.data += new_data - else: - # Always append as a separate .rela.text section - target_reltaba = objfile.add_section('.rela' + sectype, - sh_type=SHT_RELA, sh_flags=0, - sh_link=objfile.symtab.index, sh_info=target.index, - sh_addralign=4, sh_entsize=12, data=b'') - target_reltaba.data += new_data - if sectype == '.text': - n_text += 1 - - objfile.write(objfile_name) - finally: - s_file.close() - #os.remove(s_name) - try: - pass - #os.remove(o_name) - except: - pass - -def run_wrapped(argv, outfile): - parser = argparse.ArgumentParser(description="Pre-process .c files and post-process .o files to enable embedding assembly into C.") - parser.add_argument('filename', help="path to .c code") - parser.add_argument('--post-process', dest='objfile', help="path to .o file to post-process") - parser.add_argument('--assembler', dest='assembler', help="assembler command (e.g. \"mips-linux-gnu-as -march=vr4300 -mabi=32\")") - parser.add_argument('--asm-prelude', dest='asm_prelude', help="path to a file containing a prelude to the assembly file (with .set and .macro directives, e.g.)") - parser.add_argument('--input-enc', default='latin1', help="Input encoding (default: latin1)") - parser.add_argument('--output-enc', default='latin1', help="Output encoding (default: latin1)") - parser.add_argument('-framepointer', dest='framepointer', action='store_true') - parser.add_argument('-g3', dest='g3', action='store_true') - group = parser.add_mutually_exclusive_group(required=False) - group.add_argument('-O1', dest='opt', action='store_const', const='O1') - group.add_argument('-O2', dest='opt', action='store_const', const='O2') - group.add_argument('-g', dest='opt', action='store_const', const='g') - args = parser.parse_args(argv) - opt = args.opt - if args.g3: - if opt != 'O2': - raise Failure("-g3 is only supported together with -O2") - opt = 'g3' - - if args.objfile is None: - with open(args.filename, encoding=args.input_enc) as f: - parse_source(f, opt=opt, framepointer=args.framepointer, input_enc=args.input_enc, output_enc=args.output_enc, print_source=outfile) - else: - if args.assembler is None: - raise Failure("must pass assembler command") - with open(args.filename, encoding=args.input_enc) as f: - functions = parse_source(f, opt=opt, framepointer=args.framepointer, input_enc=args.input_enc, output_enc=args.output_enc) - if not functions: - return - asm_prelude = b'' - if args.asm_prelude: - with open(args.asm_prelude, 'rb') as f: - asm_prelude = f.read() - fixup_objfile(args.objfile, functions, asm_prelude, args.assembler, args.output_enc) - -def run(argv, outfile=sys.stdout.buffer): - try: - run_wrapped(argv, outfile) - except Failure as e: - sys.exit(1) - -if __name__ == "__main__": - run(sys.argv[1:]) +#!/usr/bin/env python3 +import argparse +import tempfile +import struct +import copy +import sys +import re +import os +from collections import namedtuple, defaultdict +from io import StringIO + +MAX_FN_SIZE = 100 +SLOW_CHECKS = False + +EI_NIDENT = 16 +EI_CLASS = 4 +EI_DATA = 5 +EI_VERSION = 6 +EI_OSABI = 7 +EI_ABIVERSION = 8 +STN_UNDEF = 0 + +SHN_UNDEF = 0 +SHN_ABS = 0xfff1 +SHN_COMMON = 0xfff2 +SHN_XINDEX = 0xffff +SHN_LORESERVE = 0xff00 + +STT_NOTYPE = 0 +STT_OBJECT = 1 +STT_FUNC = 2 +STT_SECTION = 3 +STT_FILE = 4 +STT_COMMON = 5 +STT_TLS = 6 + +STB_LOCAL = 0 +STB_GLOBAL = 1 +STB_WEAK = 2 + +STV_DEFAULT = 0 +STV_INTERNAL = 1 +STV_HIDDEN = 2 +STV_PROTECTED = 3 + +SHT_NULL = 0 +SHT_PROGBITS = 1 +SHT_SYMTAB = 2 +SHT_STRTAB = 3 +SHT_RELA = 4 +SHT_HASH = 5 +SHT_DYNAMIC = 6 +SHT_NOTE = 7 +SHT_NOBITS = 8 +SHT_REL = 9 +SHT_SHLIB = 10 +SHT_DYNSYM = 11 +SHT_INIT_ARRAY = 14 +SHT_FINI_ARRAY = 15 +SHT_PREINIT_ARRAY = 16 +SHT_GROUP = 17 +SHT_SYMTAB_SHNDX = 18 +SHT_MIPS_GPTAB = 0x70000003 +SHT_MIPS_DEBUG = 0x70000005 +SHT_MIPS_REGINFO = 0x70000006 +SHT_MIPS_OPTIONS = 0x7000000d + +SHF_WRITE = 0x1 +SHF_ALLOC = 0x2 +SHF_EXECINSTR = 0x4 +SHF_MERGE = 0x10 +SHF_STRINGS = 0x20 +SHF_INFO_LINK = 0x40 +SHF_LINK_ORDER = 0x80 +SHF_OS_NONCONFORMING = 0x100 +SHF_GROUP = 0x200 +SHF_TLS = 0x400 + +R_MIPS_32 = 2 +R_MIPS_26 = 4 +R_MIPS_HI16 = 5 +R_MIPS_LO16 = 6 + + +class ElfHeader: + """ + typedef struct { + unsigned char e_ident[EI_NIDENT]; + Elf32_Half e_type; + Elf32_Half e_machine; + Elf32_Word e_version; + Elf32_Addr e_entry; + Elf32_Off e_phoff; + Elf32_Off e_shoff; + Elf32_Word e_flags; + Elf32_Half e_ehsize; + Elf32_Half e_phentsize; + Elf32_Half e_phnum; + Elf32_Half e_shentsize; + Elf32_Half e_shnum; + Elf32_Half e_shstrndx; + } Elf32_Ehdr; + """ + + def __init__(self, data): + self.e_ident = data[:EI_NIDENT] + self.e_type, self.e_machine, self.e_version, self.e_entry, self.e_phoff, self.e_shoff, self.e_flags, self.e_ehsize, self.e_phentsize, self.e_phnum, self.e_shentsize, self.e_shnum, self.e_shstrndx = struct.unpack('> 4 + self.type = st_info & 15 + self.name = strtab.lookup_str(self.st_name) + self.visibility = self.st_other & 3 + + def to_bin(self): + st_info = (self.bind << 4) | self.type + return struct.pack('> 8 + self.rel_type = self.r_info & 0xff + + def to_bin(self): + self.r_info = (self.sym_index << 8) | self.rel_type + if self.sh_type == SHT_REL: + return struct.pack(' 0: + # Generate late rodata by emitting unique float constants. + # This requires 3 instructions for each 4 bytes of rodata. + # If we know alignment, we can use doubles, which give 3 + # instructions for 8 bytes of rodata. + size = self.fn_section_sizes['.late_rodata'] // 2 + skip_next = False + needs_double = (self.late_rodata_alignment != 0) + for i in range(size): + if skip_next: + skip_next = False + continue + # Jump tables give 9 instructions for >= 5 words of rodata, and should be + # emitted when: + # - -O2 or -O2 -g3 are used, which give the right codegen + # - we have emitted our first .float/.double (to ensure that we find the + # created rodata in the binary) + # - we have emitted our first .double, if any (to ensure alignment of doubles + # in shifted rodata sections) + # - we have at least 5 words of rodata left to emit (otherwise IDO does not + # generate a jump table) + # - we have at least 10 more instructions to go in this function (otherwise our + # function size computation will be wrong since the delay slot goes unused) + if (not needs_double and state.use_jtbl_for_rodata and i >= 1 and + size - i >= 5 and num_instr - len(late_rodata_fn_output) >= 10): + cases = " ".join("case {}:".format(case) for case in range(size - i)) + late_rodata_fn_output.append("switch (*(volatile int*)0) { " + cases + " ; }") + late_rodata_fn_output.extend([""] * 8) + jtbl_rodata_size = (size - i) * 4 + break + dummy_bytes = state.next_late_rodata_hex() + late_rodata_dummy_bytes.append(dummy_bytes) + if self.late_rodata_alignment == 4 * ((i + 1) % 2 + 1) and i + 1 < size: + dummy_bytes2 = state.next_late_rodata_hex() + late_rodata_dummy_bytes.append(dummy_bytes2) + fval, = struct.unpack(' 0 or late_rodata_fn_output: + text_name = state.make_name('func') + src[0] = 'int {}(void) {{ return '.format(text_name) + instr_count = self.fn_section_sizes['.text'] // 2 + src[self.num_lines] = '((volatile void *) 0); }; ' if instr_count > 1 else '; }; ' + if instr_count < state.min_instr_count: + self.fail("too short .text block") + tot_emitted = 0 + tot_skipped = 0 + fn_emitted = 0 + fn_skipped = 0 + rodata_stack = late_rodata_fn_output[::-1] + for (line, count) in self.fn_ins_inds: + for _ in range(count): + if (fn_emitted > MAX_FN_SIZE and instr_count - tot_emitted > state.min_instr_count and + (not rodata_stack or rodata_stack[-1])): + # Don't let functions become too large. When a function reaches 284 + # instructions, and -O2 -framepointer flags are passed, the IRIX + # compiler decides it is a great idea to start optimizing more. + fn_emitted = 0 + fn_skipped = 0 + src[line] += '((volatile void *) 0); }} int {}(void) {{ return '.format(state.make_name('large_func')) + if fn_skipped < state.skip_instr_count: + fn_skipped += 1 + tot_skipped += 1 + elif rodata_stack: + src[line] += rodata_stack.pop() + else: + src[line] += '*(int *)' + tot_emitted += 1 + fn_emitted += 1 + if rodata_stack: + size = len(late_rodata_fn_output) // 3 + available = instr_count - tot_skipped + self.fail( + "late rodata to text ratio is too high: {} / {} must be <= 1/3\n" + "add .late_rodata_alignment (4|8) to the .late_rodata " + "block to double the allowed ratio." + .format(size, available)) + + init_name = None + if self.fn_section_sizes['.init'] > 0 or late_rodata_fn_output: + init_name = state.make_name('func') + src[0] = 'int {}(void) {{ return '.format(init_name) + instr_count = self.fn_section_sizes['.init'] // 2 + src[self.num_lines] = '((volatile void *) 0); }; ' if instr_count else '; }; ' + if instr_count < state.min_instr_count: + self.fail("too short .init block") + tot_emitted = 0 + tot_skipped = 0 + fn_emitted = 0 + fn_skipped = 0 + rodata_stack = late_rodata_fn_output[::-1] + for (line, count) in self.fn_ins_inds: + for _ in range(count): + if (fn_emitted > MAX_FN_SIZE and instr_count - tot_emitted > state.min_instr_count and + (not rodata_stack or rodata_stack[-1])): + # Don't let functions become too large. When a function reaches 284 + # instructions, and -O2 -framepointer flags are passed, the IRIX + # compiler decides it is a great idea to start optimizing more. + fn_emitted = 0 + fn_skipped = 0 + src[line] += '((volatile void *) 0); }} int {}(void) {{ return '.format(state.make_name('large_func')) + if fn_skipped < state.skip_instr_count: + fn_skipped += 1 + tot_skipped += 1 + elif rodata_stack: + src[line] += rodata_stack.pop() + else: + src[line] += '*(int *)' + tot_emitted += 1 + fn_emitted += 1 + if rodata_stack: + size = len(late_rodata_fn_output) // 3 + available = instr_count - tot_skipped + self.fail( + "late rodata to init ratio is too high: {} / {} must be <= 1/3\n" + "add .late_rodata_alignment (4|8) to the .late_rodata " + "block to double the allowed ratio." + .format(size, available)) + + rodata_name = None + if self.fn_section_sizes['.rodata'] > 0: + rodata_name = state.make_name('rodata') + src[self.num_lines] += f" const char {rodata_name}[{self.fn_section_sizes['.rodata']}] = {{1}};" + + data_name = None + if self.fn_section_sizes['.data'] > 0: + data_name = state.make_name('data') + src[self.num_lines] += f" char {data_name}[{self.fn_section_sizes['.data']}] = {{1}};" + + bss_name = None + if self.fn_section_sizes['.bss'] > 0: + bss_name = state.make_name('bss') + src[self.num_lines] += f" char {bss_name}[{self.fn_section_sizes['.bss']}];" + + sdata_name = None # sdata is like data but small + if self.fn_section_sizes['.sdata'] > 0: + sdata_code = "" + for i in range(self.fn_section_sizes['.sdata']): + sdata_name = state.make_name('sdata') + sdata_code += f" char {sdata_name} = 1;" + src[self.num_lines] += sdata_code + + sdata2_name = None # sdata2 is like rodata but small + if self.fn_section_sizes['.sdata2'] > 0: + sdata2_code = "" + for i in range(self.fn_section_sizes['.sdata2']): + sdata2_name = state.make_name('sdata2') + sdata2_code += f" const char {sdata2_name} = 1;" + src[self.num_lines] += sdata2_code + + sbss_name = None # Similarly, sbss is like uninitialized data but small + if self.fn_section_sizes['.sbss'] > 0: + sbss_code = "" + for i in range(self.fn_section_sizes['.sbss']): + sbss_name = state.make_name('sbss') + sbss_code += f" char {sbss_name};" + src[self.num_lines] += sbss_code + + """ sbss2 is currently borked + sbss2_name = None # Similarly, sbss2 is like uninitialized rodata but small + if self.fn_section_sizes['.sbss2'] > 0: + sbss2_code = "" + for i in range(self.fn_section_sizes['.sbss2']): + sbss2_name = state.make_name('sbss2') + sbss2_code += f" const char {sbss2_name};" + src[self.num_lines] += sbss2_code + """ + + fn = Function( + text_glabels=self.text_glabels, + asm_conts=self.asm_conts, + late_rodata_dummy_bytes=late_rodata_dummy_bytes, + jtbl_rodata_size=jtbl_rodata_size, + late_rodata_asm_conts=self.late_rodata_asm_conts, + fn_desc=self.fn_desc, + data={ + '.text': (text_name, self.fn_section_sizes['.text']), + '.data': (data_name, self.fn_section_sizes['.data']), + '.rodata': (rodata_name, self.fn_section_sizes['.rodata']), + '.bss': (bss_name, self.fn_section_sizes['.bss']), + '.sdata': (sdata_name, self.fn_section_sizes['.sdata']), + '.sdata2': (sdata2_name, self.fn_section_sizes['.sdata2']), + '.sbss': (sbss_name, self.fn_section_sizes['.sbss']), + #'.sbss2': (sbss2_name, self.fn_section_sizes['.sbss2']), + }) + return src, fn + +cutscene_data_regexpr = re.compile(r"CutsceneData (.|\n)*\[\] = {") +float_regexpr = re.compile(r"[-+]?[0-9]*\.?[0-9]+([eE][-+]?[0-9]+)?f") + +def repl_float_hex(m): + return str(struct.unpack(" 0 + n_text = objfile.text_section_index(temp_name) + loc = objfile.symtab.find_symbol(temp_name) + if loc is None: + ifdefed = True + break + loc = loc[1] + prev_loc = prev_locs[sectype + (str(n_text) if sectype == '.text' else '')] + if loc < prev_loc: + raise Failure("Wrongly computed size for section {} (diff {}). This is an asm-processor bug!".format(sectype + (str(n_text) if sectype == '.text' else ''), prev_loc- loc)) + if loc != prev_loc: + asm.append('.section ' + sectype) + if sectype == '.text': + for i in range((loc - prev_loc) // 2): + asm.append('nop') + else: + asm.append('.space {}'.format(loc - prev_loc)) + to_copy[sectype + (str(n_text) if sectype == '.text' else '')].append((loc, size, temp_name, function.fn_desc, function.text_glabels[0])) + prev_locs[sectype + (str(n_text) if sectype == '.text' else '')] = loc + size + if not ifdefed: + all_text_glabels.update(function.text_glabels) + all_late_rodata_dummy_bytes.append(function.late_rodata_dummy_bytes) + all_jtbl_rodata_size.append(function.jtbl_rodata_size) + late_rodata_asm.append(function.late_rodata_asm_conts) + for sectype, (temp_name, size) in function.data.items(): + if temp_name is not None: + asm.append('.section ' + sectype) + asm.append('glabel ' + temp_name + '_asm_start') + asm.append('.section .text') + for line in function.asm_conts: + asm.append(line) + for sectype, (temp_name, size) in function.data.items(): + if temp_name is not None: + #asm.append('.section ' + sectype) + asm.append('glabel ' + temp_name + '_asm_end') + + if any(late_rodata_asm): + late_rodata_source_name_start = '_asmpp_late_rodata_start' + late_rodata_source_name_end = '_asmpp_late_rodata_end' + asm.append('.rdata') + asm.append('glabel {}'.format(late_rodata_source_name_start)) + for conts in late_rodata_asm: + asm.extend(conts) + asm.append('glabel {}'.format(late_rodata_source_name_end)) + + o_file = open("asm_processor_temp.o", 'w').close() # Create temp file. tempfile module isn't working for me. + o_name = "asm_processor_temp.o" + + s_file = open("asm_processor_temp.s", 'wb') # Ditto. + s_name = "asm_processor_temp.s" + try: + s_file.write(asm_prelude + b'\n') + for line in asm: + s_file.write(line.encode(output_enc) + b'\n') + s_file.close() + ret = os.system(assembler + " " + s_name + " -o " + o_name) + if ret != 0: + raise Failure("failed to assemble") + with open(o_name, 'rb') as f: + asm_objfile = ElfFile(f.read()) + + # Remove some clutter from objdump output + objfile.drop_irrelevant_sections() + + """ + # Unify reginfo sections + target_reginfo = objfile.find_section('.reginfo') + source_reginfo_data = list(asm_objfile.find_section('.reginfo').data) + data = list(target_reginfo.data) + for i in range(20): + data[i] |= source_reginfo_data[i] + target_reginfo.data = bytes(data) + """ + + # Move over section contents + modified_text_positions = set() + jtbl_rodata_positions = set() + last_rodata_pos = 0 + n_text = 0 + for sec in objfile.sections: + sectype = sec.name + if not to_copy[sectype + (str(n_text) if sectype == '.text' else '')]: + if sectype == '.text': + n_text += 1 + continue + # This should work as long as you NONMATCH whole functions rather than asm fragments + func = to_copy[sectype + str(n_text) if sectype == '.text' else ''][0][2] + asm_n_text = asm_objfile.text_section_index(func + '_asm_start') + source = asm_objfile.find_section(sectype, asm_n_text if sectype == '.text' else 0) + assert source is not None, "didn't find source section: " + sectype + for (pos, count, temp_name, fn_desc, fn_name) in to_copy[sectype + (str(n_text) if sectype == '.text' else '')]: + loc1 = asm_objfile.symtab.find_symbol_in_section(temp_name + '_asm_start', source) + loc2 = asm_objfile.symtab.find_symbol_in_section(temp_name + '_asm_end', source) + assert loc1 == pos, "assembly and C files don't line up for section " + sectype + ", " + fn_desc + # Since we are nonmatching whole functions, we don't need to insert the correct + # amount of padding into the src file. We don't actually need to insert padding + # at all. We can just plop the asm's text section into the objfile. + # if loc2 - loc1 != count: + # raise Failure("incorrectly computed size for section " + sectype + ", " + fn_desc + ". If using .double, make sure to provide explicit alignment padding.") + if sectype == '.bss' or sectype == '.sbss2': + continue + target = objfile.find_section(sectype, n_text if sectype == '.text' else 0) + assert target is not None, "missing target section of type " + sectype + data = list(target.data) + for (pos, count, _, _, _) in to_copy[sectype + (str(n_text) if sectype == '.text' else '')]: + # mwasmarm 4-aligns text sections, so make sure to copy exactly `count` bytes + data[pos:pos + count] = source.data[pos:pos + count] + if sectype == '.text': + assert count % 2 == 0 + assert pos % 2 == 0 + for i in range(count // 2): + modified_text_positions.add(pos + 2 * i) + elif sectype == '.rodata': + last_rodata_pos = pos + count + target.data = bytes(data) + if sectype == '.text': + n_text += 1 + + # Move over late rodata. This is heuristic, sadly, since I can't think + # of another way of doing it. + moved_late_rodata = {} + if any(all_late_rodata_dummy_bytes) or any(all_jtbl_rodata_size): + source = asm_objfile.find_section('.rodata', 0) + target = objfile.find_section('.rodata', 0) + source_pos = asm_objfile.symtab.find_symbol_in_section(late_rodata_source_name_start, source) + source_end = asm_objfile.symtab.find_symbol_in_section(late_rodata_source_name_end, source) + if source_end - source_pos != sum(map(len, all_late_rodata_dummy_bytes)) * 2 + sum(all_jtbl_rodata_size): + raise Failure("computed wrong size of .late_rodata") + new_data = list(target.data) + for dummy_bytes_list, jtbl_rodata_size in zip(all_late_rodata_dummy_bytes, all_jtbl_rodata_size): + for index, dummy_bytes in enumerate(dummy_bytes_list): + pos = target.data.index(dummy_bytes, last_rodata_pos) + # This check is nice, but makes time complexity worse for large files: + if SLOW_CHECKS and target.data.find(dummy_bytes, pos + 2) != -1: + raise Failure("multiple occurrences of late_rodata hex magic. Change asm-processor to use something better than 0xE0123456!") + if index == 0 and len(dummy_bytes_list) > 1 and target.data[pos+2:pos+8] == b'\0\0\0\0': + # Ugly hack to handle double alignment for non-matching builds. + # We were told by .late_rodata_alignment (or deduced from a .double) + # that a function's late_rodata started out 4 (mod 8), and emitted + # a float and then a double. But it was actually 0 (mod 8), so our + # double was moved by 4 bytes. To make them adjacent to keep jump + # tables correct, move the float by 4 bytes as well. + new_data[pos:pos+2] = b'\0\0\0\0' + pos += 2 + new_data[pos:pos+2] = source.data[source_pos:source_pos+2] + moved_late_rodata[source_pos] = pos + last_rodata_pos = pos + 2 + source_pos += 2 + if jtbl_rodata_size > 0: + assert dummy_bytes_list, "should always have dummy bytes before jtbl data" + pos = last_rodata_pos + new_data[pos : pos + jtbl_rodata_size] = \ + source.data[source_pos : source_pos + jtbl_rodata_size] + for i in range(0, jtbl_rodata_size, 2): + moved_late_rodata[source_pos + i] = pos + i + jtbl_rodata_positions.add(pos + i) + last_rodata_pos += jtbl_rodata_size + source_pos += jtbl_rodata_size + target.data = bytes(new_data) + + # Merge strtab data. + strtab_adj = len(objfile.symtab.strtab.data) + objfile.symtab.strtab.data += asm_objfile.symtab.strtab.data + + # Find relocated symbols + relocated_symbols = set() + for obj in [asm_objfile, objfile]: + for sec in obj.sections: + for reltab in sec.relocated_by: + for rel in reltab.relocations: + relocated_symbols.add(obj.symtab.symbol_entries[rel.sym_index]) + + # Move over symbols, deleting the temporary function labels. + # Sometimes this naive procedure results in duplicate symbols, or UNDEF + # symbols that are also defined the same .o file. Hopefully that's fine. + # Skip over local symbols that aren't used relocated against, to avoid + # conflicts. + new_local_syms = [s for s in objfile.symtab.local_symbols() if not is_temp_name(s.name)] + new_global_syms = [s for s in objfile.symtab.global_symbols() if not is_temp_name(s.name)] + n_text = 0 + for i, s in enumerate(asm_objfile.symtab.symbol_entries): + is_local = (i < asm_objfile.symtab.sh_info) + if is_local and s not in relocated_symbols: + continue + if is_temp_name(s.name): + continue + if s.st_shndx not in [SHN_UNDEF, SHN_ABS]: + section_name = asm_objfile.sections[s.st_shndx].name + if section_name not in SECTIONS: + raise Failure("generated assembly .o must only have symbols for .text, .data, .rodata, .sdata, .sdata2, .sbss, ABS and UNDEF, but found " + section_name) + if section_name == '.sbss2': #! I'm not sure why this isn't working + continue + obj_func_name = convert_func_name(s.name, to_copy) + obj_n_text = objfile.text_section_index(obj_func_name) + s.st_shndx = objfile.find_section(section_name, obj_n_text if section_name == '.text' else 0).index + if section_name == '.text': + n_text += 1 + # glabel's aren't marked as functions, making objdump output confusing. Fix that. + if s.name in all_text_glabels: + s.type = STT_FUNC + if objfile.sections[s.st_shndx].name == '.rodata' and s.st_value in moved_late_rodata: + s.st_value = moved_late_rodata[s.st_value] + s.st_name += strtab_adj + if is_local: + new_local_syms.append(s) + else: + new_global_syms.append(s) + new_syms = new_local_syms + new_global_syms + for i, s in enumerate(new_syms): + s.new_index = i + objfile.symtab.data = b''.join(s.to_bin() for s in new_syms) + objfile.symtab.sh_info = len(new_local_syms) + + # Move over relocations + n_text = 0 + for sec in objfile.sections: + sectype = sec.name + # This should work as long as you NONMATCH whole functions rather than asm fragments + target = objfile.find_section(sectype, n_text if sectype == '.text' else 0) + + if target is not None: + # fixup relocation symbol indices, since we butchered them above + for reltab in target.relocated_by: + nrels = [] + for rel in reltab.relocations: + if (sectype == '.rodata' and rel.r_offset in jtbl_rodata_positions) or sectype == ".sbss2": + # don't include relocations for late_rodata dummy code + continue + # hopefully we don't have relocations for local or + # temporary symbols, so new_index exists + rel.sym_index = objfile.symtab.symbol_entries[rel.sym_index].new_index + nrels.append(rel) + reltab.relocations = nrels + reltab.data = b''.join(rel.to_bin() for rel in nrels) + + if not to_copy[sectype + (str(n_text) if sectype == '.text' else '')]: + if sectype == '.text': + n_text += 1 + continue + + func = to_copy[sectype + str(n_text) if sectype == '.text' else ''][0][2] + asm_n_text = asm_objfile.text_section_index(func + '_asm_start') + source = asm_objfile.find_section(sectype, asm_n_text if sectype == '.text' else 0) + if not source: + if sectype == '.text': + n_text += 1 + continue + + target_reltab = objfile.find_section('.rel' + sectype, n_text if sectype == '.text' else 0) + target_reltaba = objfile.find_section('.rela' + sectype, n_text if sectype == '.text' else 0) + for reltab in source.relocated_by: + for rel in reltab.relocations: + rel.sym_index = asm_objfile.symtab.symbol_entries[rel.sym_index].new_index + # I suspect that this is requried for matching. If the after linking the + # binary doesn't match, retry after commenting out the following line: + rel.r_addend = 0 + if sectype == '.rodata' and rel.r_offset in moved_late_rodata: + rel.r_offset = moved_late_rodata[rel.r_offset] + new_data = b''.join(rel.to_bin() for rel in reltab.relocations) + if reltab.sh_type == SHT_REL: + target_reltab = objfile.add_section('.rel' + sectype, + sh_type=SHT_REL, sh_flags=0, + sh_link=objfile.symtab.index, sh_info=target.index, + sh_addralign=4, sh_entsize=8, data=b'') + target_reltab.data += new_data + else: + # Always append as a separate .rela.text section + target_reltaba = objfile.add_section('.rela' + sectype, + sh_type=SHT_RELA, sh_flags=0, + sh_link=objfile.symtab.index, sh_info=target.index, + sh_addralign=4, sh_entsize=12, data=b'') + target_reltaba.data += new_data + if sectype == '.text': + n_text += 1 + + objfile.write(objfile_name) + finally: + s_file.close() + #os.remove(s_name) + try: + pass + #os.remove(o_name) + except: + pass + +def run_wrapped(argv, outfile): + parser = argparse.ArgumentParser(description="Pre-process .c files and post-process .o files to enable embedding assembly into C.") + parser.add_argument('filename', help="path to .c code") + parser.add_argument('--post-process', dest='objfile', help="path to .o file to post-process") + parser.add_argument('--assembler', dest='assembler', help="assembler command (e.g. \"mips-linux-gnu-as -march=vr4300 -mabi=32\")") + parser.add_argument('--asm-prelude', dest='asm_prelude', help="path to a file containing a prelude to the assembly file (with .set and .macro directives, e.g.)") + parser.add_argument('--input-enc', default='latin1', help="Input encoding (default: latin1)") + parser.add_argument('--output-enc', default='latin1', help="Output encoding (default: latin1)") + parser.add_argument('-framepointer', dest='framepointer', action='store_true') + parser.add_argument('-g3', dest='g3', action='store_true') + group = parser.add_mutually_exclusive_group(required=False) + group.add_argument('-O1', dest='opt', action='store_const', const='O1') + group.add_argument('-O2', dest='opt', action='store_const', const='O2') + group.add_argument('-g', dest='opt', action='store_const', const='g') + args = parser.parse_args(argv) + opt = args.opt + if args.g3: + if opt != 'O2': + raise Failure("-g3 is only supported together with -O2") + opt = 'g3' + + if args.objfile is None: + with open(args.filename, encoding=args.input_enc) as f: + parse_source(f, opt=opt, framepointer=args.framepointer, input_enc=args.input_enc, output_enc=args.output_enc, print_source=outfile) + else: + if args.assembler is None: + raise Failure("must pass assembler command") + with open(args.filename, encoding=args.input_enc) as f: + functions = parse_source(f, opt=opt, framepointer=args.framepointer, input_enc=args.input_enc, output_enc=args.output_enc) + if not functions: + return + asm_prelude = b'' + if args.asm_prelude: + with open(args.asm_prelude, 'rb') as f: + asm_prelude = f.read() + fixup_objfile(args.objfile, functions, asm_prelude, args.assembler, args.output_enc) + +def run(argv, outfile=sys.stdout.buffer): + try: + run_wrapped(argv, outfile) + except Failure as e: + sys.exit(1) + +if __name__ == "__main__": + run(sys.argv[1:]) -- cgit v1.2.3