diff options
| author | who-knows-who <j.williams97@outlook.com> | 2021-04-17 12:48:19 +0100 | 
|---|---|---|
| committer | who-knows-who <j.williams97@outlook.com> | 2021-04-17 12:48:19 +0100 | 
| commit | 996d9d78106cf4ab601815550ba77a92ab678328 (patch) | |
| tree | 9a92b4f792a11466cdee719b18ed449dee2e27fc /tools/asm_processor | |
| parent | 267cb812e827604d4829d3afe28a82b0970d3706 (diff) | |
| parent | 85a8a2bd43633e11af094d66a35f3c32dc7c8bfe (diff) | |
Merge branch 'master' into 0202A1E0
Diffstat (limited to 'tools/asm_processor')
| -rwxr-xr-x[-rw-r--r--] | tools/asm_processor/asm_processor.py | 2530 | ||||
| -rwxr-xr-x[-rw-r--r--] | tools/asm_processor/compile.sh | 37 | 
2 files changed, 1293 insertions, 1274 deletions
| diff --git a/tools/asm_processor/asm_processor.py b/tools/asm_processor/asm_processor.py index e3226cb2..ec01c68e 100644..100755 --- a/tools/asm_processor/asm_processor.py +++ b/tools/asm_processor/asm_processor.py @@ -1,1261 +1,1269 @@ -#!/usr/bin/env python3
 -import argparse
 -import tempfile
 -import struct
 -import copy
 -import sys
 -import re
 -import os
 -from collections import namedtuple, defaultdict
 -from io import StringIO
 -
 -MAX_FN_SIZE = 100
 -SLOW_CHECKS = False
 -
 -EI_NIDENT     = 16
 -EI_CLASS      = 4
 -EI_DATA       = 5
 -EI_VERSION    = 6
 -EI_OSABI      = 7
 -EI_ABIVERSION = 8
 -STN_UNDEF = 0
 -
 -SHN_UNDEF     = 0
 -SHN_ABS       = 0xfff1
 -SHN_COMMON    = 0xfff2
 -SHN_XINDEX    = 0xffff
 -SHN_LORESERVE = 0xff00
 -
 -STT_NOTYPE  = 0
 -STT_OBJECT  = 1
 -STT_FUNC    = 2
 -STT_SECTION = 3
 -STT_FILE    = 4
 -STT_COMMON  = 5
 -STT_TLS     = 6
 -
 -STB_LOCAL  = 0
 -STB_GLOBAL = 1
 -STB_WEAK   = 2
 -
 -STV_DEFAULT   = 0
 -STV_INTERNAL  = 1
 -STV_HIDDEN    = 2
 -STV_PROTECTED = 3
 -
 -SHT_NULL          = 0
 -SHT_PROGBITS      = 1
 -SHT_SYMTAB        = 2
 -SHT_STRTAB        = 3
 -SHT_RELA          = 4
 -SHT_HASH          = 5
 -SHT_DYNAMIC       = 6
 -SHT_NOTE          = 7
 -SHT_NOBITS        = 8
 -SHT_REL           = 9
 -SHT_SHLIB         = 10
 -SHT_DYNSYM        = 11
 -SHT_INIT_ARRAY    = 14
 -SHT_FINI_ARRAY    = 15
 -SHT_PREINIT_ARRAY = 16
 -SHT_GROUP         = 17
 -SHT_SYMTAB_SHNDX  = 18
 -SHT_MIPS_GPTAB    = 0x70000003
 -SHT_MIPS_DEBUG    = 0x70000005
 -SHT_MIPS_REGINFO  = 0x70000006
 -SHT_MIPS_OPTIONS  = 0x7000000d
 -
 -SHF_WRITE            = 0x1
 -SHF_ALLOC            = 0x2
 -SHF_EXECINSTR        = 0x4
 -SHF_MERGE            = 0x10
 -SHF_STRINGS          = 0x20
 -SHF_INFO_LINK        = 0x40
 -SHF_LINK_ORDER       = 0x80
 -SHF_OS_NONCONFORMING = 0x100
 -SHF_GROUP            = 0x200
 -SHF_TLS              = 0x400
 -
 -R_MIPS_32   = 2
 -R_MIPS_26   = 4
 -R_MIPS_HI16 = 5
 -R_MIPS_LO16 = 6
 -
 -
 -class ElfHeader:
 -    """
 -    typedef struct {
 -        unsigned char   e_ident[EI_NIDENT];
 -        Elf32_Half      e_type;
 -        Elf32_Half      e_machine;
 -        Elf32_Word      e_version;
 -        Elf32_Addr      e_entry;
 -        Elf32_Off       e_phoff;
 -        Elf32_Off       e_shoff;
 -        Elf32_Word      e_flags;
 -        Elf32_Half      e_ehsize;
 -        Elf32_Half      e_phentsize;
 -        Elf32_Half      e_phnum;
 -        Elf32_Half      e_shentsize;
 -        Elf32_Half      e_shnum;
 -        Elf32_Half      e_shstrndx;
 -    } Elf32_Ehdr;
 -    """
 -
 -    def __init__(self, data):
 -        self.e_ident = data[:EI_NIDENT]
 -        self.e_type, self.e_machine, self.e_version, self.e_entry, self.e_phoff, self.e_shoff, self.e_flags, self.e_ehsize, self.e_phentsize, self.e_phnum, self.e_shentsize, self.e_shnum, self.e_shstrndx = struct.unpack('<HHIIIIIHHHHHH', data[EI_NIDENT:])
 -        assert self.e_ident[EI_CLASS] == 1 # 32-bit
 -        #assert self.e_ident[EI_DATA] == 2 # big-endian
 -        #assert self.e_type == 1 # relocatable
 -        #assert self.e_machine == 8 # MIPS I Architecture
 -        assert self.e_phoff == 0 # no program header
 -        assert self.e_shoff != 0 # section header
 -        assert self.e_shstrndx != SHN_UNDEF
 -
 -    def to_bin(self):
 -        return self.e_ident + struct.pack('<HHIIIIIHHHHHH', self.e_type,
 -                self.e_machine, self.e_version, self.e_entry, self.e_phoff,
 -                self.e_shoff, self.e_flags, self.e_ehsize, self.e_phentsize,
 -                self.e_phnum, self.e_shentsize, self.e_shnum, self.e_shstrndx)
 -
 -
 -class Symbol:
 -    """
 -    typedef struct {
 -        Elf32_Word      st_name;
 -        Elf32_Addr      st_value;
 -        Elf32_Word      st_size;
 -        unsigned char   st_info;
 -        unsigned char   st_other;
 -        Elf32_Half      st_shndx;
 -    } Elf32_Sym;
 -    """
 -
 -    def __init__(self, data, strtab):
 -        self.st_name, self.st_value, self.st_size, st_info, self.st_other, self.st_shndx = struct.unpack('<IIIBBH', data)
 -        assert self.st_shndx != SHN_XINDEX, "too many sections (SHN_XINDEX not supported)"
 -        self.bind = st_info >> 4
 -        self.type = st_info & 15
 -        self.name = strtab.lookup_str(self.st_name)
 -        self.visibility = self.st_other & 3
 -
 -    def to_bin(self):
 -        st_info = (self.bind << 4) | self.type
 -        return struct.pack('<IIIBBH', self.st_name, self.st_value, self.st_size, st_info, self.st_other, self.st_shndx)
 -
 -
 -class Relocation:
 -    def __init__(self, data, sh_type):
 -        self.sh_type = sh_type
 -        if sh_type == SHT_REL:
 -            self.r_offset, self.r_info = struct.unpack('<II', data)
 -        else:
 -            self.r_offset, self.r_info, self.r_addend = struct.unpack('<III', data)
 -        self.sym_index = self.r_info >> 8
 -        self.rel_type = self.r_info & 0xff
 -
 -    def to_bin(self):
 -        self.r_info = (self.sym_index << 8) | self.rel_type
 -        if self.sh_type == SHT_REL:
 -            return struct.pack('<II', self.r_offset, self.r_info)
 -        else:
 -            return struct.pack('<III', self.r_offset, self.r_info, self.r_addend)
 -
 -class Section:
 -    """
 -    typedef struct {
 -        Elf32_Word   sh_name;
 -        Elf32_Word   sh_type;
 -        Elf32_Word   sh_flags;
 -        Elf32_Addr   sh_addr;
 -        Elf32_Off    sh_offset;
 -        Elf32_Word   sh_size;
 -        Elf32_Word   sh_link;
 -        Elf32_Word   sh_info;
 -        Elf32_Word   sh_addralign;
 -        Elf32_Word   sh_entsize;
 -    } Elf32_Shdr;
 -    """
 -
 -    def __init__(self, header, data, index):
 -        self.sh_name, self.sh_type, self.sh_flags, self.sh_addr, self.sh_offset, self.sh_size, self.sh_link, self.sh_info, self.sh_addralign, self.sh_entsize = struct.unpack('<IIIIIIIIII', header)
 -        assert not self.sh_flags & SHF_LINK_ORDER
 -        if self.sh_entsize != 0:
 -            assert self.sh_size % self.sh_entsize == 0
 -        if self.sh_type == SHT_NOBITS:
 -            self.data = ''
 -        else:
 -            self.data = data[self.sh_offset:self.sh_offset + self.sh_size]
 -        self.index = index
 -        self.relocated_by = []
 -
 -    @staticmethod
 -    def from_parts(sh_name, sh_type, sh_flags, sh_link, sh_info, sh_addralign, sh_entsize, data, index):
 -        header = struct.pack('<IIIIIIIIII', sh_name, sh_type, sh_flags, 0, 0, len(data), sh_link, sh_info, sh_addralign, sh_entsize)
 -        return Section(header, data, index)
 -
 -    def lookup_str(self, index):
 -        assert self.sh_type == SHT_STRTAB
 -        to = self.data.find(b'\0', index)
 -        assert to != -1
 -        return self.data[index:to].decode('latin1')
 -
 -    def add_str(self, string):
 -        assert self.sh_type == SHT_STRTAB
 -        ret = len(self.data)
 -        self.data += string.encode('latin1') + b'\0'
 -        return ret
 -
 -    def is_rel(self):
 -        return self.sh_type == SHT_REL or self.sh_type == SHT_RELA
 -
 -    def header_to_bin(self):
 -        if self.sh_type != SHT_NOBITS:
 -            self.sh_size = len(self.data)
 -        return struct.pack('<IIIIIIIIII', self.sh_name, self.sh_type, self.sh_flags, self.sh_addr, self.sh_offset, self.sh_size, self.sh_link, self.sh_info, self.sh_addralign, self.sh_entsize)
 -
 -    def late_init(self, sections):
 -        if self.sh_type == SHT_SYMTAB:
 -            self.init_symbols(sections)
 -        elif self.is_rel():
 -            self.rel_target = sections[self.sh_info]
 -            self.rel_target.relocated_by.append(self)
 -            self.init_relocs()
 -
 -    def find_symbol(self, name):
 -        assert self.sh_type == SHT_SYMTAB
 -        for s in self.symbol_entries:
 -            if s.name == name:
 -                return (s.st_shndx, s.st_value)
 -        return None
 -
 -    def find_symbol_in_section(self, name, section):
 -        pos = self.find_symbol(name)
 -        assert pos is not None
 -        assert pos[0] == section.index
 -        return pos[1]
 -
 -    def init_symbols(self, sections):
 -        assert self.sh_type == SHT_SYMTAB
 -        assert self.sh_entsize == 16
 -        self.strtab = sections[self.sh_link]
 -        entries = []
 -        for i in range(0, self.sh_size, self.sh_entsize):
 -            entries.append(Symbol(self.data[i:i+self.sh_entsize], self.strtab))
 -        self.symbol_entries = entries
 -
 -    def init_relocs(self):
 -        assert self.is_rel()
 -        entries = []
 -        for i in range(0, self.sh_size, self.sh_entsize):
 -            entries.append(Relocation(self.data[i:i+self.sh_entsize], self.sh_type))
 -        self.relocations = entries
 -
 -    def local_symbols(self):
 -        assert self.sh_type == SHT_SYMTAB
 -        return self.symbol_entries[:self.sh_info]
 -
 -    def global_symbols(self):
 -        assert self.sh_type == SHT_SYMTAB
 -        return self.symbol_entries[self.sh_info:]
 -
 -
 -class ElfFile:
 -    def __init__(self, data):
 -        self.data = data
 -        assert data[:4] == b'\x7fELF', "not an ELF file"
 -
 -        self.elf_header = ElfHeader(data[0:52])
 -
 -        offset, size = self.elf_header.e_shoff, self.elf_header.e_shentsize
 -        null_section = Section(data[offset:offset + size], data, 0)
 -        num_sections = self.elf_header.e_shnum or null_section.sh_size
 -
 -        self.sections = [null_section]
 -        for i in range(1, num_sections):
 -            ind = offset + i * size
 -            self.sections.append(Section(data[ind:ind + size], data, i))
 -
 -        symtab = None
 -        for s in self.sections:
 -            if s.sh_type == SHT_SYMTAB:
 -                assert not symtab
 -                symtab = s
 -        assert symtab is not None
 -        self.symtab = symtab
 -
 -        shstr = self.sections[self.elf_header.e_shstrndx]
 -        for s in self.sections:
 -            s.name = shstr.lookup_str(s.sh_name)
 -            s.late_init(self.sections)
 -
 -    def find_section(self, name, num):
 -        i = 0 # Count how many sections of name `name` have been encountered so far, when i reaches `num` return that section
 -        for s in self.sections:
 -            if s.name == name and i == num:
 -                return s
 -            # Increment if section is a .text section
 -            if s.name == ".text":
 -                i += 1 
 -        return None
 -
 -    # Because Metrowerks for DS can make duplicate .text sections
 -    # for every function, we may need to lookup a specific .text area.
 -    def find_section_with_name(self, name, st_name):
 -        for s in self.sections:
 -            if s.name == name and s.sh_name == st_name:
 -                return s
 -        return None
 -
 -    # Return i, where i is the ith text section corresponding to the function 
 -    # called `name`.
 -    def text_section_index(self, name):
 -        st_shndx, _ = self.symtab.find_symbol(name)
 -        n_text = 0
 -        for sec in self.sections:
 -            if sec.index == st_shndx:
 -                return n_text
 -            if sec.name =='.text':
 -                n_text += 1      
 -        return -1
 -
 -    def add_section(self, name, sh_type, sh_flags, sh_link, sh_info, sh_addralign, sh_entsize, data):
 -        shstr = self.sections[self.elf_header.e_shstrndx]
 -        sh_name = shstr.add_str(name)
 -        s = Section.from_parts(sh_name=sh_name, sh_type=sh_type,
 -                sh_flags=sh_flags, sh_link=sh_link, sh_info=sh_info,
 -                sh_addralign=sh_addralign, sh_entsize=sh_entsize, data=data,
 -                index=len(self.sections))
 -        self.sections.append(s)
 -        s.name = name
 -        s.late_init(self.sections)
 -        return s
 -
 -    def drop_irrelevant_sections(self):
 -        # We can only drop sections at the end, since otherwise section
 -        # references might be wrong. Luckily, these sections typically are.
 -        while self.sections[-1].sh_type in [SHT_MIPS_DEBUG, SHT_MIPS_GPTAB]:
 -            self.sections.pop()
 -
 -    def write(self, filename):
 -        outfile = open(filename, 'wb')
 -        outidx = 0
 -        def write_out(data):
 -            nonlocal outidx
 -            outfile.write(data)
 -            outidx += len(data)
 -        def pad_out(align):
 -            if align and outidx % align:
 -                write_out(b'\0' * (align - outidx % align))
 -
 -        self.elf_header.e_shnum = len(self.sections)
 -        write_out(self.elf_header.to_bin())
 -
 -        for s in self.sections:
 -            if s.sh_type != SHT_NOBITS and s.sh_type != SHT_NULL:
 -                pad_out(s.sh_addralign)
 -                s.sh_offset = outidx
 -                write_out(s.data)
 -
 -        pad_out(4)
 -        self.elf_header.e_shoff = outidx
 -        for s in self.sections:
 -            write_out(s.header_to_bin())
 -
 -        outfile.seek(0)
 -        outfile.write(self.elf_header.to_bin())
 -        outfile.close()
 -
 -
 -def is_temp_name(name):
 -    return name.startswith('_asmpp_')
 -
 -
 -# https://stackoverflow.com/a/241506
 -def re_comment_replacer(match):
 -    s = match.group(0)
 -    if s[0] in "/#":
 -        return " "
 -    else:
 -        return s
 -
 -
 -re_comment_or_string = re.compile(
 -    r'#.*|/\*.*?\*/|"(?:\\.|[^\\"])*"'
 -)
 -
 -
 -class Failure(Exception):
 -    def __init__(self, message):
 -        self.message = message
 -
 -    def __str__(self):
 -        return self.message
 -
 -
 -class GlobalState:
 -    def __init__(self, min_instr_count, skip_instr_count, use_jtbl_for_rodata):
 -        # A value that hopefully never appears as a 32-bit rodata constant (or we
 -        # miscompile late rodata). Increases by 1 in each step.
 -        self.late_rodata_hex = 0xE0123456
 -        self.namectr = 0
 -        self.min_instr_count = min_instr_count
 -        self.skip_instr_count = skip_instr_count
 -        self.use_jtbl_for_rodata = use_jtbl_for_rodata
 -
 -    def next_late_rodata_hex(self):
 -        dummy_bytes = struct.pack('<I', self.late_rodata_hex)
 -        if (self.late_rodata_hex & 0xffff) == 0:
 -            # Avoid lui
 -            self.late_rodata_hex += 1
 -        self.late_rodata_hex += 1
 -        return dummy_bytes
 -
 -    def make_name(self, cat):
 -        self.namectr += 1
 -        return '_asmpp_{}{}'.format(cat, self.namectr)
 -
 -
 -Function = namedtuple('Function', ['text_glabels', 'asm_conts', 'late_rodata_dummy_bytes', 'jtbl_rodata_size', 'late_rodata_asm_conts', 'fn_desc', 'data'])
 -
 -
 -class GlobalAsmBlock:
 -    def __init__(self, fn_desc):
 -        self.fn_desc = fn_desc
 -        self.cur_section = '.text'
 -        self.asm_conts = []
 -        self.late_rodata_asm_conts = []
 -        self.late_rodata_alignment = 0
 -        self.late_rodata_alignment_from_content = False
 -        self.text_glabels = []
 -        self.fn_section_sizes = {
 -            '.text': 0,
 -            '.init': 0,
 -            '.data': 0,
 -            '.bss': 0,
 -            '.rodata': 0,
 -            '.sdata': 0,
 -            '.sdata2': 0,
 -            '.sbss': 0,
 -            #'.sbss2': 0,
 -            '.late_rodata': 0,
 -        }
 -        self.fn_ins_inds = []
 -        self.glued_line = ''
 -        self.num_lines = 0
 -
 -    def fail(self, message, line=None):
 -        context = self.fn_desc
 -        if line:
 -            context += ", at line \"" + line + "\""
 -        raise Failure(message + "\nwithin " + context)
 -
 -    def count_quoted_size(self, line, z, real_line, output_enc):
 -        line = line.encode(output_enc).decode('latin1')
 -        in_quote = False
 -        num_parts = 0
 -        ret = 0
 -        i = 0
 -        digits = "0123456789" # 0-7 would be more sane, but this matches GNU as
 -        while i < len(line):
 -            c = line[i]
 -            i += 1
 -            if not in_quote:
 -                if c == '"':
 -                    in_quote = True
 -                    num_parts += 1
 -            else:
 -                if c == '"':
 -                    in_quote = False
 -                    continue
 -                ret += 1
 -                if c != '\\':
 -                    continue
 -                if i == len(line):
 -                    self.fail("backslash at end of line not supported", real_line)
 -                c = line[i]
 -                i += 1
 -                # (if c is in "bfnrtv", we have a real escaped literal)
 -                if c == 'x':
 -                    # hex literal, consume any number of hex chars, possibly none
 -                    while i < len(line) and line[i] in digits + "abcdefABCDEF":
 -                        i += 1
 -                elif c in digits:
 -                    # octal literal, consume up to two more digits
 -                    it = 0
 -                    while i < len(line) and line[i] in digits and it < 2:
 -                        i += 1
 -                        it += 1
 -
 -        if in_quote:
 -            self.fail("unterminated string literal", real_line)
 -        if num_parts == 0:
 -            self.fail(".ascii with no string", real_line)
 -        return ret + num_parts if z else ret
 -
 -
 -    def align4(self):
 -        while self.fn_section_sizes[self.cur_section] % 2 != 0:
 -            self.fn_section_sizes[self.cur_section] += 1
 -
 -    def add_sized(self, size, line):
 -        if self.cur_section in ['.text', '.init', '.late_rodata']:
 -            if size % 2 != 0:
 -                self.fail("size must be a multiple of 2 or 4", line)
 -        if size < 0:
 -            self.fail("size cannot be negative", line)
 -        self.fn_section_sizes[self.cur_section] += size
 -        if self.cur_section in ['.text', '.init']:
 -            if not self.text_glabels:
 -                self.fail(".text or .init block without an initial glabel", line)
 -            self.fn_ins_inds.append((self.num_lines - 1, size // 2))
 -
 -    def process_line(self, line, output_enc):
 -        self.num_lines += 1
 -        if line.endswith('\\'):
 -            self.glued_line += line[:-1]
 -            return
 -        line = self.glued_line + line
 -        self.glued_line = ''
 -
 -        real_line = line
 -        line = re.sub(re_comment_or_string, re_comment_replacer, line)
 -        line = line.strip()
 -        line = re.sub(r'^[a-zA-Z0-9_]+:\s*', '', line)
 -        changed_section = False
 -        emitting_double = False
 -        if line.startswith('glabel ') and self.cur_section in ['.text', '.init']:
 -            self.text_glabels.append(line.split()[1])
 -        if not line:
 -            pass # empty line
 -        elif line.startswith('glabel ') or (' ' not in line and line.endswith(':')):
 -            pass # label
 -        elif line.startswith('.section') or line in ['.text', '.init', '.data', '.rdata', '.rodata', '.sdata', '.sdata2', '.bss','.sbss', '.late_rodata']:
 -            # section change
 -            self.cur_section = '.rodata' if line == '.rdata' else line.split(',')[0].split()[-1]
 -            if self.cur_section not in ['.data', '.text', '.init', '.rodata', '.sdata', '.sdata2', '.late_rodata', '.bss', '.sbss']:
 -                self.fail("unrecognized .section directive", real_line)
 -            changed_section = True
 -        elif line.startswith('.late_rodata_alignment'):
 -            if self.cur_section != '.late_rodata':
 -                self.fail(".late_rodata_alignment must occur within .late_rodata section", real_line)
 -            value = int(line.split()[1])
 -            if value not in [4, 8]:
 -                self.fail(".late_rodata_alignment argument must be 4 or 8", real_line)
 -            if self.late_rodata_alignment and self.late_rodata_alignment != value:
 -                self.fail(".late_rodata_alignment alignment assumption conflicts with earlier .double directive. Make sure to provide explicit alignment padding.")
 -            self.late_rodata_alignment = value
 -            changed_section = True
 -        elif line.startswith('.incbin'):
 -            self.add_sized(int(line.split(',')[-1].strip(), 0), real_line)
 -        elif line.startswith('.skip'):
 -            self.add_sized(int(line.split()[-1].strip(), 0), real_line)
 -        elif line.startswith('.long') or line.startswith('.float'):
 -            self.align4()
 -            self.add_sized(4 * len(line.split(',')), real_line)
 -        elif line.startswith('.double'):
 -            self.align4()
 -            if self.cur_section == '.late_rodata':
 -                align8 = self.fn_section_sizes[self.cur_section] % 8
 -                # Automatically set late_rodata_alignment, so the generated C code uses doubles.
 -                # This gives us correct alignment for the transferred doubles even when the
 -                # late_rodata_alignment is wrong, e.g. for non-matching compilation.
 -                if not self.late_rodata_alignment:
 -                    self.late_rodata_alignment = 8 - align8
 -                    self.late_rodata_alignment_from_content = True
 -                elif self.late_rodata_alignment != 8 - align8:
 -                    if self.late_rodata_alignment_from_content:
 -                        self.fail("found two .double directives with different start addresses mod 8. Make sure to provide explicit alignment padding.", real_line)
 -                    else:
 -                        self.fail(".double at address that is not 0 mod 8 (based on .late_rodata_alignment assumption). Make sure to provide explicit alignment padding.", real_line)
 -            self.add_sized(8 * len(line.split(',')), real_line)
 -            emitting_double = True
 -        elif line.startswith('.space'):
 -            self.add_sized(int(line.split()[1], 0), real_line)
 -        elif line.startswith('.balign') or line.startswith('.align'):
 -            align = int(line.split()[1])
 -            if align != 2:
 -                self.fail("only .balign 4 is supported", real_line)
 -            self.align4()
 -        elif line.startswith('.asci'):
 -            z = (line.startswith('.asciz') or line.startswith('.asciiz'))
 -            self.add_sized(self.count_quoted_size(line, z, real_line, output_enc), real_line)
 -        elif line.startswith('.byte'):
 -            self.add_sized(len(line.split(',')), real_line)
 -        # Branches are 4 bytes long
 -        elif line.startswith('bl'):
 -            self.add_sized(4, real_line)
 -        elif line.startswith('.'):
 -            # .macro, ...
 -            self.fail("asm directive not supported", real_line)
 -        else:
 -            # Unfortunately, macros are hard to support for .rodata --
 -            # we don't know how how space they will expand to before
 -            # running the assembler, but we need that information to
 -            # construct the C code. So if we need that we'll either
 -            # need to run the assembler twice (at least in some rare
 -            # cases), or change how this program is invoked.
 -            # Similarly, we can't currently deal with pseudo-instructions
 -            # that expand to several real instructions.
 -            if self.cur_section != '.text' and self.cur_section != '.init':
 -                self.fail("instruction or macro call in non-.text/.init section? not supported", real_line)
 -            self.add_sized(2, real_line)
 -        if self.cur_section == '.late_rodata':
 -            if not changed_section:
 -                if emitting_double:
 -                    self.late_rodata_asm_conts.append(".align 0")
 -                self.late_rodata_asm_conts.append(real_line)
 -                if emitting_double:
 -                    self.late_rodata_asm_conts.append(".align 2")
 -        else:
 -            self.asm_conts.append(real_line)
 -
 -    def finish(self, state):
 -        src = [''] * (self.num_lines + 1)
 -        late_rodata_dummy_bytes = []
 -        jtbl_rodata_size = 0
 -        late_rodata_fn_output = []
 -
 -        num_instr = self.fn_section_sizes['.text'] // 2
 -
 -        if self.fn_section_sizes['.late_rodata'] > 0:
 -            # Generate late rodata by emitting unique float constants.
 -            # This requires 3 instructions for each 4 bytes of rodata.
 -            # If we know alignment, we can use doubles, which give 3
 -            # instructions for 8 bytes of rodata.
 -            size = self.fn_section_sizes['.late_rodata'] // 2
 -            skip_next = False
 -            needs_double = (self.late_rodata_alignment != 0)
 -            for i in range(size):
 -                if skip_next:
 -                    skip_next = False
 -                    continue
 -                # Jump tables give 9 instructions for >= 5 words of rodata, and should be
 -                # emitted when:
 -                # - -O2 or -O2 -g3 are used, which give the right codegen
 -                # - we have emitted our first .float/.double (to ensure that we find the
 -                #   created rodata in the binary)
 -                # - we have emitted our first .double, if any (to ensure alignment of doubles
 -                #   in shifted rodata sections)
 -                # - we have at least 5 words of rodata left to emit (otherwise IDO does not
 -                #   generate a jump table)
 -                # - we have at least 10 more instructions to go in this function (otherwise our
 -                #   function size computation will be wrong since the delay slot goes unused)
 -                if (not needs_double and state.use_jtbl_for_rodata and i >= 1 and
 -                        size - i >= 5 and num_instr - len(late_rodata_fn_output) >= 10):
 -                    cases = " ".join("case {}:".format(case) for case in range(size - i))
 -                    late_rodata_fn_output.append("switch (*(volatile int*)0) { " + cases + " ; }")
 -                    late_rodata_fn_output.extend([""] * 8)
 -                    jtbl_rodata_size = (size - i) * 4
 -                    break
 -                dummy_bytes = state.next_late_rodata_hex()
 -                late_rodata_dummy_bytes.append(dummy_bytes)
 -                if self.late_rodata_alignment == 4 * ((i + 1) % 2 + 1) and i + 1 < size:
 -                    dummy_bytes2 = state.next_late_rodata_hex()
 -                    late_rodata_dummy_bytes.append(dummy_bytes2)
 -                    fval, = struct.unpack('<d', dummy_bytes + dummy_bytes2)
 -                    late_rodata_fn_output.append('*(volatile double*)0 = {};'.format(fval))
 -                    skip_next = True
 -                    needs_double = True
 -                else:
 -                    fval, = struct.unpack('<f', dummy_bytes)
 -                    late_rodata_fn_output.append('*(volatile float*)0 = {}f;'.format(fval))
 -                late_rodata_fn_output.append('')
 -                late_rodata_fn_output.append('')
 -
 -        text_name = None
 -        if self.fn_section_sizes['.text'] > 0 or late_rodata_fn_output:
 -            text_name = state.make_name('func')
 -            src[0] = 'int {}(void) {{ return '.format(text_name)
 -            instr_count = self.fn_section_sizes['.text'] // 2
 -            src[self.num_lines] = '((volatile void *) 0); }; ' if instr_count > 1 else '; }; '
 -            if instr_count < state.min_instr_count:
 -                self.fail("too short .text block")
 -            tot_emitted = 0
 -            tot_skipped = 0
 -            fn_emitted = 0
 -            fn_skipped = 0
 -            rodata_stack = late_rodata_fn_output[::-1]
 -            for (line, count) in self.fn_ins_inds:
 -                for _ in range(count):
 -                    if (fn_emitted > MAX_FN_SIZE and instr_count - tot_emitted > state.min_instr_count and
 -                            (not rodata_stack or rodata_stack[-1])):
 -                        # Don't let functions become too large. When a function reaches 284
 -                        # instructions, and -O2 -framepointer flags are passed, the IRIX
 -                        # compiler decides it is a great idea to start optimizing more.
 -                        fn_emitted = 0
 -                        fn_skipped = 0
 -                        src[line] += '((volatile void *) 0); }} int {}(void) {{ return '.format(state.make_name('large_func'))
 -                    if fn_skipped < state.skip_instr_count:
 -                        fn_skipped += 1
 -                        tot_skipped += 1
 -                    elif rodata_stack:
 -                        src[line] += rodata_stack.pop()
 -                    else:
 -                        src[line] += '*(int *)'
 -                    tot_emitted += 1
 -                    fn_emitted += 1
 -            if rodata_stack:
 -                size = len(late_rodata_fn_output) // 3
 -                available = instr_count - tot_skipped
 -                self.fail(
 -                    "late rodata to text ratio is too high: {} / {} must be <= 1/3\n"
 -                    "add .late_rodata_alignment (4|8) to the .late_rodata "
 -                    "block to double the allowed ratio."
 -                        .format(size, available))
 -
 -        init_name = None
 -        if self.fn_section_sizes['.init'] > 0 or late_rodata_fn_output:
 -            init_name = state.make_name('func')
 -            src[0] = 'int {}(void) {{ return '.format(init_name)
 -            instr_count = self.fn_section_sizes['.init'] // 2
 -            src[self.num_lines] = '((volatile void *) 0); }; ' if instr_count else '; }; '
 -            if instr_count < state.min_instr_count:
 -                self.fail("too short .init block")
 -            tot_emitted = 0
 -            tot_skipped = 0
 -            fn_emitted = 0
 -            fn_skipped = 0
 -            rodata_stack = late_rodata_fn_output[::-1]
 -            for (line, count) in self.fn_ins_inds:
 -                for _ in range(count):
 -                    if (fn_emitted > MAX_FN_SIZE and instr_count - tot_emitted > state.min_instr_count and
 -                            (not rodata_stack or rodata_stack[-1])):
 -                        # Don't let functions become too large. When a function reaches 284
 -                        # instructions, and -O2 -framepointer flags are passed, the IRIX
 -                        # compiler decides it is a great idea to start optimizing more.
 -                        fn_emitted = 0
 -                        fn_skipped = 0
 -                        src[line] += '((volatile void *) 0); }} int {}(void) {{ return '.format(state.make_name('large_func'))
 -                    if fn_skipped < state.skip_instr_count:
 -                        fn_skipped += 1
 -                        tot_skipped += 1
 -                    elif rodata_stack:
 -                        src[line] += rodata_stack.pop()
 -                    else:
 -                        src[line] += '*(int *)'
 -                    tot_emitted += 1
 -                    fn_emitted += 1
 -            if rodata_stack:
 -                size = len(late_rodata_fn_output) // 3
 -                available = instr_count - tot_skipped
 -                self.fail(
 -                    "late rodata to init ratio is too high: {} / {} must be <= 1/3\n"
 -                    "add .late_rodata_alignment (4|8) to the .late_rodata "
 -                    "block to double the allowed ratio."
 -                        .format(size, available))
 -
 -        rodata_name = None
 -        if self.fn_section_sizes['.rodata'] > 0:
 -            rodata_name = state.make_name('rodata')
 -            src[self.num_lines] += f" const char {rodata_name}[{self.fn_section_sizes['.rodata']}] = {{1}};"
 -
 -        data_name = None
 -        if self.fn_section_sizes['.data'] > 0:
 -            data_name = state.make_name('data')
 -            src[self.num_lines] += f" char {data_name}[{self.fn_section_sizes['.data']}] = {{1}};"
 -
 -        bss_name = None
 -        if self.fn_section_sizes['.bss'] > 0:
 -            bss_name = state.make_name('bss')
 -            src[self.num_lines] += f" char {bss_name}[{self.fn_section_sizes['.bss']}];"
 -
 -        sdata_name = None # sdata is like data but small
 -        if self.fn_section_sizes['.sdata'] > 0:
 -            sdata_code = ""
 -            for i in range(self.fn_section_sizes['.sdata']):
 -                sdata_name = state.make_name('sdata')
 -                sdata_code += f" char {sdata_name} = 1;"
 -            src[self.num_lines] += sdata_code
 -
 -        sdata2_name = None # sdata2 is like rodata but small
 -        if self.fn_section_sizes['.sdata2'] > 0:
 -            sdata2_code = ""
 -            for i in range(self.fn_section_sizes['.sdata2']):
 -                sdata2_name = state.make_name('sdata2')
 -                sdata2_code += f" const char {sdata2_name} = 1;"
 -            src[self.num_lines] += sdata2_code
 -
 -        sbss_name = None # Similarly, sbss is like uninitialized data but small
 -        if self.fn_section_sizes['.sbss'] > 0:
 -            sbss_code = ""
 -            for i in range(self.fn_section_sizes['.sbss']):
 -                sbss_name = state.make_name('sbss')
 -                sbss_code += f" char {sbss_name};"
 -            src[self.num_lines] += sbss_code
 -
 -        """ sbss2 is currently borked
 -        sbss2_name = None # Similarly, sbss2 is like uninitialized rodata but small
 -        if self.fn_section_sizes['.sbss2'] > 0:
 -            sbss2_code = ""
 -            for i in range(self.fn_section_sizes['.sbss2']):
 -                sbss2_name = state.make_name('sbss2')
 -                sbss2_code += f" const char {sbss2_name};"
 -            src[self.num_lines] += sbss2_code
 -        """
 -
 -        fn = Function(
 -                text_glabels=self.text_glabels,
 -                asm_conts=self.asm_conts,
 -                late_rodata_dummy_bytes=late_rodata_dummy_bytes,
 -                jtbl_rodata_size=jtbl_rodata_size,
 -                late_rodata_asm_conts=self.late_rodata_asm_conts,
 -                fn_desc=self.fn_desc,
 -                data={
 -                    '.text': (text_name, self.fn_section_sizes['.text']),
 -                    '.data': (data_name, self.fn_section_sizes['.data']),
 -                    '.rodata': (rodata_name, self.fn_section_sizes['.rodata']),
 -                    '.bss': (bss_name, self.fn_section_sizes['.bss']),
 -                    '.sdata': (sdata_name, self.fn_section_sizes['.sdata']),
 -                    '.sdata2': (sdata2_name, self.fn_section_sizes['.sdata2']),
 -                    '.sbss': (sbss_name, self.fn_section_sizes['.sbss']),
 -                    #'.sbss2': (sbss2_name, self.fn_section_sizes['.sbss2']),
 -                })
 -        return src, fn
 -
 -cutscene_data_regexpr = re.compile(r"CutsceneData (.|\n)*\[\] = {")
 -float_regexpr = re.compile(r"[-+]?[0-9]*\.?[0-9]+([eE][-+]?[0-9]+)?f")
 -
 -def repl_float_hex(m):
 -    return str(struct.unpack("<I", struct.pack("<f", float(m.group(0).strip().rstrip("f"))))[0])
 -
 -def parse_source(f, opt, framepointer, input_enc, output_enc, print_source=None):
 -    opt = "O4"
 -    min_instr_count = 3 # idk
 -    skip_instr_count = 2 # idk
 -
 -    use_jtbl_for_rodata = False
 -    if opt in ['O2', 'g3'] and not framepointer:
 -        use_jtbl_for_rodata = True
 -
 -    state = GlobalState(min_instr_count, skip_instr_count, use_jtbl_for_rodata)
 -
 -    global_asm = None
 -    asm_functions = []
 -    output_lines = []
 -
 -    is_cutscene_data = False
 -
 -    for line_no, raw_line in enumerate(f, 1):
 -        raw_line = raw_line.rstrip()
 -        line = raw_line.lstrip()
 -
 -        # Print exactly one output line per source line, to make compiler
 -        # errors have correct line numbers. These will be overridden with
 -        # reasonable content further down.
 -        output_lines.append('')
 -        if global_asm is not None:
 -            if line.startswith(')'):
 -                src, fn = global_asm.finish(state)
 -                for i, line2 in enumerate(src):
 -                    output_lines[start_index + i] = line2
 -                asm_functions.append(fn)
 -                global_asm = None
 -            else:
 -                global_asm.process_line(raw_line, output_enc)
 -        else:
 -            if line in ['GLOBAL_ASM(', '#pragma GLOBAL_ASM(']:
 -                global_asm = GlobalAsmBlock("GLOBAL_ASM block at line " + str(line_no))
 -                start_index = len(output_lines)
 -            elif ((line.startswith('GLOBAL_ASM("') or line.startswith('#pragma GLOBAL_ASM("'))
 -                    and line.endswith('")')):
 -                fname = line[line.index('(') + 2 : -2]
 -                global_asm = GlobalAsmBlock(fname)
 -                with open(fname, encoding=input_enc) as f:
 -                    for line2 in f:
 -                        global_asm.process_line(line2.rstrip(), output_enc)
 -                src, fn = global_asm.finish(state)
 -                output_lines[-1] = ''.join(src)
 -                asm_functions.append(fn)
 -                global_asm = None
 -            elif ((line.startswith('#include "')) and line.endswith('" EARLY')):
 -                # C includes qualified with EARLY (i.e. #include "file.c" EARLY) will be
 -                # processed recursively when encountered
 -                fpath = os.path.dirname(f.name)
 -                fname = line[line.index(' ') + 2 : -7]
 -                include_src = StringIO()
 -                with open(fpath + os.path.sep + fname, encoding=input_enc) as include_file:
 -                    parse_source(include_file, opt, framepointer, input_enc, output_enc, include_src)
 -                output_lines[-1] = include_src.getvalue()
 -                include_src.write('#line ' + str(line_no) + '\n')
 -                include_src.close()
 -            else:
 -                # This is a hack to replace all floating-point numbers in an array of a particular type
 -                # (in this case CutsceneData) with their corresponding IEEE-754 hexadecimal representation
 -                if cutscene_data_regexpr.search(line) is not None:
 -                    is_cutscene_data = True
 -                elif line.endswith("};"):
 -                    is_cutscene_data = False
 -                if is_cutscene_data:
 -                    raw_line = re.sub(float_regexpr, repl_float_hex, raw_line)
 -                output_lines[-1] = raw_line
 -
 -    if print_source:
 -        if isinstance(print_source, StringIO):
 -            for line in output_lines:
 -                print_source.write(line + '\n')
 -        else:
 -            for line in output_lines:
 -                print_source.write(line.encode(output_enc) + b'\n')
 -            print_source.flush()
 -            if print_source != sys.stdout.buffer:
 -                print_source.close()
 -    out_file = open("output.txt", 'w')
 -    out_file.write(str(asm_functions))
 -    out_file.close()
 -    return asm_functions
 -
 -def fixup_objfile(objfile_name, functions, asm_prelude, assembler, output_enc):
 -    SECTIONS = ['.data']
 -    SECTIONS.extend(['.text' for i in range(0,len(functions))])
 -    SECTIONS.extend(['.rodata', '.bss', '.sdata', '.sdata2', '.sbss'])
 -
 -    with open(objfile_name, 'rb') as f:
 -        objfile = ElfFile(f.read())
 -
 -    prev_locs = defaultdict(int)
 -    to_copy = defaultdict(list) 
 -
 -    asm = []
 -    all_late_rodata_dummy_bytes = []
 -    all_jtbl_rodata_size = []
 -    late_rodata_asm = []
 -    late_rodata_source_name_start = None
 -    late_rodata_source_name_end = None
 -
 -    # Generate an assembly file with all the assembly we need to fill in. For
 -    # simplicity we pad with nops/.space so that addresses match exactly, so we
 -    # don't have to fix up relocations/symbol references.
 -    all_text_glabels = set()
 -    for function in functions:
 -        ifdefed = False
 -        for sectype, (temp_name, size) in function.data.items():
 -            if temp_name is None:
 -                continue
 -            assert size > 0
 -            n_text = objfile.text_section_index(temp_name)
 -            loc = objfile.symtab.find_symbol(temp_name)
 -            if loc is None:
 -                ifdefed = True
 -                break
 -            loc = loc[1]
 -            prev_loc = prev_locs[sectype + (str(n_text) if sectype == '.text' else '')]
 -            if loc < prev_loc:
 -                raise Failure("Wrongly computed size for section {} (diff {}). This is an asm-processor bug!".format(sectype + (str(n_text) if sectype == '.text' else ''), prev_loc- loc))
 -            if loc != prev_loc:
 -                asm.append('.section ' + sectype)
 -                if sectype == '.text':
 -                    for i in range((loc - prev_loc) // 2):
 -                        asm.append('nop')
 -                else:
 -                    asm.append('.space {}'.format(loc - prev_loc))
 -            to_copy[sectype + (str(n_text) if sectype == '.text' else '')].append((loc, size, temp_name, function.fn_desc))
 -            prev_locs[sectype + (str(n_text) if sectype == '.text' else '')] = loc + size
 -        if not ifdefed:
 -            all_text_glabels.update(function.text_glabels)
 -            all_late_rodata_dummy_bytes.append(function.late_rodata_dummy_bytes)
 -            all_jtbl_rodata_size.append(function.jtbl_rodata_size)
 -            late_rodata_asm.append(function.late_rodata_asm_conts)
 -            for sectype, (temp_name, size) in function.data.items():
 -                if temp_name is not None:
 -                    asm.append('.section ' + sectype)
 -                    asm.append('glabel ' + temp_name + '_asm_start')
 -            asm.append('.section .text')
 -            for line in function.asm_conts:
 -                asm.append(line)
 -            for sectype, (temp_name, size) in function.data.items():
 -                if temp_name is not None:
 -                    #asm.append('.section ' + sectype)
 -                    asm.append('glabel ' + temp_name + '_asm_end')
 -
 -    if any(late_rodata_asm):
 -        late_rodata_source_name_start = '_asmpp_late_rodata_start'
 -        late_rodata_source_name_end = '_asmpp_late_rodata_end'
 -        asm.append('.rdata')
 -        asm.append('glabel {}'.format(late_rodata_source_name_start))
 -        for conts in late_rodata_asm:
 -            asm.extend(conts)
 -        asm.append('glabel {}'.format(late_rodata_source_name_end))
 -
 -    o_file = open("asm_processor_temp.o", 'w').close() # Create temp file. tempfile module isn't working for me.
 -    o_name = "asm_processor_temp.o"
 -
 -    s_file = open("asm_processor_temp.s", 'wb') # Ditto.
 -    s_name = "asm_processor_temp.s"
 -    try:
 -        s_file.write(asm_prelude + b'\n')
 -        for line in asm:
 -            s_file.write(line.encode(output_enc) + b'\n')
 -        s_file.close()
 -        ret = os.system(assembler + " " + s_name + " -o " + o_name)
 -        if ret != 0:
 -            raise Failure("failed to assemble")
 -        with open(o_name, 'rb') as f:
 -            asm_objfile = ElfFile(f.read())
 -
 -        # Remove some clutter from objdump output
 -        objfile.drop_irrelevant_sections()
 -
 -        """
 -        # Unify reginfo sections
 -        target_reginfo = objfile.find_section('.reginfo')
 -        source_reginfo_data = list(asm_objfile.find_section('.reginfo').data)
 -        data = list(target_reginfo.data)
 -        for i in range(20):
 -            data[i] |= source_reginfo_data[i]
 -        target_reginfo.data = bytes(data)
 -        """
 -
 -        # Move over section contents
 -        modified_text_positions = set()
 -        jtbl_rodata_positions = set()
 -        last_rodata_pos = 0
 -        n_text = 0
 -        for sec in objfile.sections:
 -            sectype = sec.name
 -            if not to_copy[sectype + (str(n_text) if sectype == '.text' else '')]:
 -                if sectype == '.text':
 -                    n_text += 1
 -                continue
 -            # This should work as long as you NONMATCH whole functions rather than asm fragments
 -            func = to_copy[sectype + str(n_text) if sectype == '.text' else ''][0][2]
 -            asm_n_text = asm_objfile.text_section_index(func + '_asm_start')
 -            source = asm_objfile.find_section(sectype, asm_n_text if sectype == '.text' else 0)
 -            assert source is not None, "didn't find source section: " + sectype
 -            for (pos, count, temp_name, fn_desc) in to_copy[sectype + (str(n_text) if sectype == '.text' else '')]:
 -                loc1 = asm_objfile.symtab.find_symbol_in_section(temp_name + '_asm_start', source)
 -                loc2 = asm_objfile.symtab.find_symbol_in_section(temp_name + '_asm_end', source)
 -                assert loc1 == pos, "assembly and C files don't line up for section " + sectype + ", " + fn_desc
 -                if loc2 - loc1 != count:
 -                    raise Failure("incorrectly computed size for section " + sectype + ", " + fn_desc + ". If using .double, make sure to provide explicit alignment padding.")
 -            if sectype == '.bss' or sectype == '.sbss2':
 -                continue
 -            target = objfile.find_section(sectype, n_text if sectype == '.text' else 0)
 -            assert target is not None, "missing target section of type " + sectype
 -            data = list(target.data)
 -            for (pos, count, _, _) in to_copy[sectype + (str(n_text) if sectype == '.text' else '')]:
 -                # mwasmarm 4-aligns text sections, so make sure to copy exactly `count` bytes
 -                data[pos:pos + count] = source.data[pos:pos + count]
 -                if sectype == '.text':
 -                    assert count % 2 == 0
 -                    assert pos % 2 == 0
 -                    for i in range(count // 2):
 -                        modified_text_positions.add(pos + 2 * i)
 -                elif sectype == '.rodata':
 -                    last_rodata_pos = pos + count
 -            target.data = bytes(data)
 -            if sectype == '.text':
 -                n_text += 1
 -
 -        # Move over late rodata. This is heuristic, sadly, since I can't think
 -        # of another way of doing it.
 -        moved_late_rodata = {}
 -        if any(all_late_rodata_dummy_bytes) or any(all_jtbl_rodata_size):
 -            source = asm_objfile.find_section('.rodata', 0)
 -            target = objfile.find_section('.rodata', 0)
 -            source_pos = asm_objfile.symtab.find_symbol_in_section(late_rodata_source_name_start, source)
 -            source_end = asm_objfile.symtab.find_symbol_in_section(late_rodata_source_name_end, source)
 -            if source_end - source_pos != sum(map(len, all_late_rodata_dummy_bytes)) * 2 + sum(all_jtbl_rodata_size):
 -                raise Failure("computed wrong size of .late_rodata")
 -            new_data = list(target.data)
 -            for dummy_bytes_list, jtbl_rodata_size in zip(all_late_rodata_dummy_bytes, all_jtbl_rodata_size):
 -                for index, dummy_bytes in enumerate(dummy_bytes_list):
 -                    pos = target.data.index(dummy_bytes, last_rodata_pos)
 -                    # This check is nice, but makes time complexity worse for large files:
 -                    if SLOW_CHECKS and target.data.find(dummy_bytes, pos + 2) != -1:
 -                        raise Failure("multiple occurrences of late_rodata hex magic. Change asm-processor to use something better than 0xE0123456!")
 -                    if index == 0 and len(dummy_bytes_list) > 1 and target.data[pos+2:pos+8] == b'\0\0\0\0':
 -                        # Ugly hack to handle double alignment for non-matching builds.
 -                        # We were told by .late_rodata_alignment (or deduced from a .double)
 -                        # that a function's late_rodata started out 4 (mod 8), and emitted
 -                        # a float and then a double. But it was actually 0 (mod 8), so our
 -                        # double was moved by 4 bytes. To make them adjacent to keep jump
 -                        # tables correct, move the float by 4 bytes as well.
 -                        new_data[pos:pos+2] = b'\0\0\0\0'
 -                        pos += 2
 -                    new_data[pos:pos+2] = source.data[source_pos:source_pos+2]
 -                    moved_late_rodata[source_pos] = pos
 -                    last_rodata_pos = pos + 2
 -                    source_pos += 2
 -                if jtbl_rodata_size > 0:
 -                    assert dummy_bytes_list, "should always have dummy bytes before jtbl data"
 -                    pos = last_rodata_pos
 -                    new_data[pos : pos + jtbl_rodata_size] = \
 -                        source.data[source_pos : source_pos + jtbl_rodata_size]
 -                    for i in range(0, jtbl_rodata_size, 2):
 -                        moved_late_rodata[source_pos + i] = pos + i
 -                        jtbl_rodata_positions.add(pos + i)
 -                    last_rodata_pos += jtbl_rodata_size
 -                    source_pos += jtbl_rodata_size
 -            target.data = bytes(new_data)
 -
 -        # Merge strtab data.
 -        strtab_adj = len(objfile.symtab.strtab.data)
 -        objfile.symtab.strtab.data += asm_objfile.symtab.strtab.data
 -
 -        # Find relocated symbols
 -        relocated_symbols = set()
 -        for obj in [asm_objfile, objfile]:
 -            for sec in obj.sections:
 -                for reltab in sec.relocated_by:
 -                    for rel in reltab.relocations:
 -                        relocated_symbols.add(obj.symtab.symbol_entries[rel.sym_index])
 -
 -        # Move over symbols, deleting the temporary function labels.
 -        # Sometimes this naive procedure results in duplicate symbols, or UNDEF
 -        # symbols that are also defined the same .o file. Hopefully that's fine.
 -        # Skip over local symbols that aren't used relocated against, to avoid
 -        # conflicts.
 -        new_local_syms = [s for s in objfile.symtab.local_symbols() if not is_temp_name(s.name)]
 -        new_global_syms = [s for s in objfile.symtab.global_symbols() if not is_temp_name(s.name)]
 -        n_text = 0
 -        for i, s in enumerate(asm_objfile.symtab.symbol_entries):
 -            is_local = (i < asm_objfile.symtab.sh_info)
 -            if is_local and s not in relocated_symbols:
 -                continue
 -            if is_temp_name(s.name):
 -                continue
 -            if s.st_shndx not in [SHN_UNDEF, SHN_ABS]:
 -                section_name = asm_objfile.sections[s.st_shndx].name
 -                if section_name not in SECTIONS:
 -                    raise Failure("generated assembly .o must only have symbols for .text, .data, .rodata, .sdata, .sdata2, .sbss, ABS and UNDEF, but found " + section_name)
 -                if section_name == '.sbss2': #! I'm not sure why this isn't working
 -                    continue
 -                s.st_shndx = objfile.find_section(section_name, n_text if section_name == '.text' else 0).index
 -                if section_name == '.text':
 -                    n_text += 1
 -                # glabel's aren't marked as functions, making objdump output confusing. Fix that.
 -                if s.name in all_text_glabels:
 -                    s.type = STT_FUNC
 -                if objfile.sections[s.st_shndx].name == '.rodata' and s.st_value in moved_late_rodata:
 -                    s.st_value = moved_late_rodata[s.st_value]
 -            s.st_name += strtab_adj
 -            if is_local:
 -                new_local_syms.append(s)
 -            else:
 -                new_global_syms.append(s)
 -        new_syms = new_local_syms + new_global_syms
 -        for i, s in enumerate(new_syms):
 -            s.new_index = i
 -        objfile.symtab.data = b''.join(s.to_bin() for s in new_syms)
 -        objfile.symtab.sh_info = len(new_local_syms)
 -
 -        # Move over relocations
 -        n_text = 0
 -        for sec in objfile.sections:
 -            sectype = sec.name
 -            # This should work as long as you NONMATCH whole functions rather than asm fragments
 -            target = objfile.find_section(sectype, n_text if sectype == '.text' else 0)
 -
 -            if target is not None:
 -                # fixup relocation symbol indices, since we butchered them above
 -                for reltab in target.relocated_by:
 -                    nrels = []
 -                    for rel in reltab.relocations:
 -                        if (sectype == '.text' and rel.r_offset in modified_text_positions or
 -                            sectype == '.rodata' and rel.r_offset in jtbl_rodata_positions) or sectype == ".sbss2":
 -                            # don't include relocations for late_rodata dummy code
 -                            continue
 -                        # hopefully we don't have relocations for local or
 -                        # temporary symbols, so new_index exists
 -                        rel.sym_index = objfile.symtab.symbol_entries[rel.sym_index].new_index
 -                        nrels.append(rel)
 -                    reltab.relocations = nrels
 -                    reltab.data = b''.join(rel.to_bin() for rel in nrels)
 -            
 -            if not to_copy[sectype + (str(n_text) if sectype == '.text' else '')]:
 -                if sectype == '.text':
 -                    n_text += 1
 -                continue
 -
 -            func = to_copy[sectype + str(n_text) if sectype == '.text' else ''][0][2]
 -            asm_n_text = asm_objfile.text_section_index(func + '_asm_start')
 -            source = asm_objfile.find_section(sectype, asm_n_text if sectype == '.text' else 0)
 -            if not source:
 -                if sectype == '.text':
 -                    n_text += 1
 -                continue
 -
 -            target_reltab = objfile.find_section('.rel' + sectype, n_text if sectype == '.text' else 0)
 -            target_reltaba = objfile.find_section('.rela' + sectype, n_text if sectype == '.text' else 0)
 -            for reltab in source.relocated_by:
 -                for rel in reltab.relocations:
 -                    rel.sym_index = asm_objfile.symtab.symbol_entries[rel.sym_index].new_index
 -                    # I suspect that this is requried for matching. If the after linking the
 -                    # binary doesn't match, retry after commenting out the following line:
 -                    rel.r_addend = 0
 -                    if sectype == '.rodata' and rel.r_offset in moved_late_rodata:
 -                        rel.r_offset = moved_late_rodata[rel.r_offset]
 -                new_data = b''.join(rel.to_bin() for rel in reltab.relocations)
 -                if reltab.sh_type == SHT_REL:
 -                    target_reltab = objfile.add_section('.rel' + sectype,
 -                            sh_type=SHT_REL, sh_flags=0,
 -                            sh_link=objfile.symtab.index, sh_info=target.index,
 -                            sh_addralign=4, sh_entsize=8, data=b'')
 -                    target_reltab.data += new_data
 -                else:
 -                    # Always append as a separate .rela.text section
 -                    target_reltaba = objfile.add_section('.rela' + sectype,
 -                            sh_type=SHT_RELA, sh_flags=0,
 -                            sh_link=objfile.symtab.index, sh_info=target.index,
 -                            sh_addralign=4, sh_entsize=12, data=b'')
 -                    target_reltaba.data += new_data
 -            if sectype == '.text':
 -                n_text += 1
 -
 -        objfile.write(objfile_name)
 -    finally:
 -        s_file.close()
 -        #os.remove(s_name)
 -        try:
 -            pass
 -            #os.remove(o_name)
 -        except:
 -            pass
 -
 -def run_wrapped(argv, outfile):
 -    parser = argparse.ArgumentParser(description="Pre-process .c files and post-process .o files to enable embedding assembly into C.")
 -    parser.add_argument('filename', help="path to .c code")
 -    parser.add_argument('--post-process', dest='objfile', help="path to .o file to post-process")
 -    parser.add_argument('--assembler', dest='assembler', help="assembler command (e.g. \"mips-linux-gnu-as -march=vr4300 -mabi=32\")")
 -    parser.add_argument('--asm-prelude', dest='asm_prelude', help="path to a file containing a prelude to the assembly file (with .set and .macro directives, e.g.)")
 -    parser.add_argument('--input-enc', default='latin1', help="Input encoding (default: latin1)")
 -    parser.add_argument('--output-enc', default='latin1', help="Output encoding (default: latin1)")
 -    parser.add_argument('-framepointer', dest='framepointer', action='store_true')
 -    parser.add_argument('-g3', dest='g3', action='store_true')
 -    group = parser.add_mutually_exclusive_group(required=False)
 -    group.add_argument('-O1', dest='opt', action='store_const', const='O1')
 -    group.add_argument('-O2', dest='opt', action='store_const', const='O2')
 -    group.add_argument('-g', dest='opt', action='store_const', const='g')
 -    args = parser.parse_args(argv)
 -    opt = args.opt
 -    if args.g3:
 -        if opt != 'O2':
 -            raise Failure("-g3 is only supported together with -O2")
 -        opt = 'g3'
 -
 -    if args.objfile is None:
 -        with open(args.filename, encoding=args.input_enc) as f:
 -            parse_source(f, opt=opt, framepointer=args.framepointer, input_enc=args.input_enc, output_enc=args.output_enc, print_source=outfile)
 -    else:
 -        if args.assembler is None:
 -            raise Failure("must pass assembler command")
 -        with open(args.filename, encoding=args.input_enc) as f:
 -            functions = parse_source(f, opt=opt, framepointer=args.framepointer, input_enc=args.input_enc, output_enc=args.output_enc)
 -        if not functions:
 -            return
 -        asm_prelude = b''
 -        if args.asm_prelude:
 -            with open(args.asm_prelude, 'rb') as f:
 -                asm_prelude = f.read()
 -        fixup_objfile(args.objfile, functions, asm_prelude, args.assembler, args.output_enc)
 -
 -def run(argv, outfile=sys.stdout.buffer):
 -    try:
 -        run_wrapped(argv, outfile)
 -    except Failure as e:
 -        sys.exit(1)
 -
 -if __name__ == "__main__":
 -    run(sys.argv[1:])
 +#!/usr/bin/env python3 +import argparse +import tempfile +import struct +import copy +import sys +import re +import os +from collections import namedtuple, defaultdict +from io import StringIO + +MAX_FN_SIZE = 100 +SLOW_CHECKS = False + +EI_NIDENT     = 16 +EI_CLASS      = 4 +EI_DATA       = 5 +EI_VERSION    = 6 +EI_OSABI      = 7 +EI_ABIVERSION = 8 +STN_UNDEF = 0 + +SHN_UNDEF     = 0 +SHN_ABS       = 0xfff1 +SHN_COMMON    = 0xfff2 +SHN_XINDEX    = 0xffff +SHN_LORESERVE = 0xff00 + +STT_NOTYPE  = 0 +STT_OBJECT  = 1 +STT_FUNC    = 2 +STT_SECTION = 3 +STT_FILE    = 4 +STT_COMMON  = 5 +STT_TLS     = 6 + +STB_LOCAL  = 0 +STB_GLOBAL = 1 +STB_WEAK   = 2 + +STV_DEFAULT   = 0 +STV_INTERNAL  = 1 +STV_HIDDEN    = 2 +STV_PROTECTED = 3 + +SHT_NULL          = 0 +SHT_PROGBITS      = 1 +SHT_SYMTAB        = 2 +SHT_STRTAB        = 3 +SHT_RELA          = 4 +SHT_HASH          = 5 +SHT_DYNAMIC       = 6 +SHT_NOTE          = 7 +SHT_NOBITS        = 8 +SHT_REL           = 9 +SHT_SHLIB         = 10 +SHT_DYNSYM        = 11 +SHT_INIT_ARRAY    = 14 +SHT_FINI_ARRAY    = 15 +SHT_PREINIT_ARRAY = 16 +SHT_GROUP         = 17 +SHT_SYMTAB_SHNDX  = 18 +SHT_MIPS_GPTAB    = 0x70000003 +SHT_MIPS_DEBUG    = 0x70000005 +SHT_MIPS_REGINFO  = 0x70000006 +SHT_MIPS_OPTIONS  = 0x7000000d + +SHF_WRITE            = 0x1 +SHF_ALLOC            = 0x2 +SHF_EXECINSTR        = 0x4 +SHF_MERGE            = 0x10 +SHF_STRINGS          = 0x20 +SHF_INFO_LINK        = 0x40 +SHF_LINK_ORDER       = 0x80 +SHF_OS_NONCONFORMING = 0x100 +SHF_GROUP            = 0x200 +SHF_TLS              = 0x400 + +R_MIPS_32   = 2 +R_MIPS_26   = 4 +R_MIPS_HI16 = 5 +R_MIPS_LO16 = 6 + + +class ElfHeader: +    """ +    typedef struct { +        unsigned char   e_ident[EI_NIDENT]; +        Elf32_Half      e_type; +        Elf32_Half      e_machine; +        Elf32_Word      e_version; +        Elf32_Addr      e_entry; +        Elf32_Off       e_phoff; +        Elf32_Off       e_shoff; +        Elf32_Word      e_flags; +        Elf32_Half      e_ehsize; +        Elf32_Half      e_phentsize; +        Elf32_Half      e_phnum; +        Elf32_Half      e_shentsize; +        Elf32_Half      e_shnum; +        Elf32_Half      e_shstrndx; +    } Elf32_Ehdr; +    """ + +    def __init__(self, data): +        self.e_ident = data[:EI_NIDENT] +        self.e_type, self.e_machine, self.e_version, self.e_entry, self.e_phoff, self.e_shoff, self.e_flags, self.e_ehsize, self.e_phentsize, self.e_phnum, self.e_shentsize, self.e_shnum, self.e_shstrndx = struct.unpack('<HHIIIIIHHHHHH', data[EI_NIDENT:]) +        assert self.e_ident[EI_CLASS] == 1 # 32-bit +        #assert self.e_ident[EI_DATA] == 2 # big-endian +        #assert self.e_type == 1 # relocatable +        #assert self.e_machine == 8 # MIPS I Architecture +        assert self.e_phoff == 0 # no program header +        assert self.e_shoff != 0 # section header +        assert self.e_shstrndx != SHN_UNDEF + +    def to_bin(self): +        return self.e_ident + struct.pack('<HHIIIIIHHHHHH', self.e_type, +                self.e_machine, self.e_version, self.e_entry, self.e_phoff, +                self.e_shoff, self.e_flags, self.e_ehsize, self.e_phentsize, +                self.e_phnum, self.e_shentsize, self.e_shnum, self.e_shstrndx) + + +class Symbol: +    """ +    typedef struct { +        Elf32_Word      st_name; +        Elf32_Addr      st_value; +        Elf32_Word      st_size; +        unsigned char   st_info; +        unsigned char   st_other; +        Elf32_Half      st_shndx; +    } Elf32_Sym; +    """ + +    def __init__(self, data, strtab): +        self.st_name, self.st_value, self.st_size, st_info, self.st_other, self.st_shndx = struct.unpack('<IIIBBH', data) +        assert self.st_shndx != SHN_XINDEX, "too many sections (SHN_XINDEX not supported)" +        self.bind = st_info >> 4 +        self.type = st_info & 15 +        self.name = strtab.lookup_str(self.st_name) +        self.visibility = self.st_other & 3 + +    def to_bin(self): +        st_info = (self.bind << 4) | self.type +        return struct.pack('<IIIBBH', self.st_name, self.st_value, self.st_size, st_info, self.st_other, self.st_shndx) + + +class Relocation: +    def __init__(self, data, sh_type): +        self.sh_type = sh_type +        if sh_type == SHT_REL: +            self.r_offset, self.r_info = struct.unpack('<II', data) +        else: +            self.r_offset, self.r_info, self.r_addend = struct.unpack('<III', data) +        self.sym_index = self.r_info >> 8 +        self.rel_type = self.r_info & 0xff + +    def to_bin(self): +        self.r_info = (self.sym_index << 8) | self.rel_type +        if self.sh_type == SHT_REL: +            return struct.pack('<II', self.r_offset, self.r_info) +        else: +            return struct.pack('<III', self.r_offset, self.r_info, self.r_addend) + +class Section: +    """ +    typedef struct { +        Elf32_Word   sh_name; +        Elf32_Word   sh_type; +        Elf32_Word   sh_flags; +        Elf32_Addr   sh_addr; +        Elf32_Off    sh_offset; +        Elf32_Word   sh_size; +        Elf32_Word   sh_link; +        Elf32_Word   sh_info; +        Elf32_Word   sh_addralign; +        Elf32_Word   sh_entsize; +    } Elf32_Shdr; +    """ + +    def __init__(self, header, data, index): +        self.sh_name, self.sh_type, self.sh_flags, self.sh_addr, self.sh_offset, self.sh_size, self.sh_link, self.sh_info, self.sh_addralign, self.sh_entsize = struct.unpack('<IIIIIIIIII', header) +        assert not self.sh_flags & SHF_LINK_ORDER +        if self.sh_entsize != 0: +            assert self.sh_size % self.sh_entsize == 0 +        if self.sh_type == SHT_NOBITS: +            self.data = '' +        else: +            self.data = data[self.sh_offset:self.sh_offset + self.sh_size] +        self.index = index +        self.relocated_by = [] + +    @staticmethod +    def from_parts(sh_name, sh_type, sh_flags, sh_link, sh_info, sh_addralign, sh_entsize, data, index): +        header = struct.pack('<IIIIIIIIII', sh_name, sh_type, sh_flags, 0, 0, len(data), sh_link, sh_info, sh_addralign, sh_entsize) +        return Section(header, data, index) + +    def lookup_str(self, index): +        assert self.sh_type == SHT_STRTAB +        to = self.data.find(b'\0', index) +        assert to != -1 +        return self.data[index:to].decode('latin1') + +    def add_str(self, string): +        assert self.sh_type == SHT_STRTAB +        ret = len(self.data) +        self.data += string.encode('latin1') + b'\0' +        return ret + +    def is_rel(self): +        return self.sh_type == SHT_REL or self.sh_type == SHT_RELA + +    def header_to_bin(self): +        if self.sh_type != SHT_NOBITS: +            self.sh_size = len(self.data) +        return struct.pack('<IIIIIIIIII', self.sh_name, self.sh_type, self.sh_flags, self.sh_addr, self.sh_offset, self.sh_size, self.sh_link, self.sh_info, self.sh_addralign, self.sh_entsize) + +    def late_init(self, sections): +        if self.sh_type == SHT_SYMTAB: +            self.init_symbols(sections) +        elif self.is_rel(): +            self.rel_target = sections[self.sh_info] +            self.rel_target.relocated_by.append(self) +            self.init_relocs() + +    def find_symbol(self, name): +        assert self.sh_type == SHT_SYMTAB +        for s in self.symbol_entries: +            if s.name == name: +                return (s.st_shndx, s.st_value) +        return None + +    def find_symbol_in_section(self, name, section): +        pos = self.find_symbol(name) +        assert pos is not None +        assert pos[0] == section.index +        return pos[1] + +    def init_symbols(self, sections): +        assert self.sh_type == SHT_SYMTAB +        assert self.sh_entsize == 16 +        self.strtab = sections[self.sh_link] +        entries = [] +        for i in range(0, self.sh_size, self.sh_entsize): +            entries.append(Symbol(self.data[i:i+self.sh_entsize], self.strtab)) +        self.symbol_entries = entries + +    def init_relocs(self): +        assert self.is_rel() +        entries = [] +        for i in range(0, self.sh_size, self.sh_entsize): +            entries.append(Relocation(self.data[i:i+self.sh_entsize], self.sh_type)) +        self.relocations = entries + +    def local_symbols(self): +        assert self.sh_type == SHT_SYMTAB +        return self.symbol_entries[:self.sh_info] + +    def global_symbols(self): +        assert self.sh_type == SHT_SYMTAB +        return self.symbol_entries[self.sh_info:] + + +class ElfFile: +    def __init__(self, data): +        self.data = data +        assert data[:4] == b'\x7fELF', "not an ELF file" + +        self.elf_header = ElfHeader(data[0:52]) + +        offset, size = self.elf_header.e_shoff, self.elf_header.e_shentsize +        null_section = Section(data[offset:offset + size], data, 0) +        num_sections = self.elf_header.e_shnum or null_section.sh_size + +        self.sections = [null_section] +        for i in range(1, num_sections): +            ind = offset + i * size +            self.sections.append(Section(data[ind:ind + size], data, i)) + +        symtab = None +        for s in self.sections: +            if s.sh_type == SHT_SYMTAB: +                assert not symtab +                symtab = s +        assert symtab is not None +        self.symtab = symtab + +        shstr = self.sections[self.elf_header.e_shstrndx] +        for s in self.sections: +            s.name = shstr.lookup_str(s.sh_name) +            s.late_init(self.sections) + +    def find_section(self, name, num): +        i = 0 # Count how many sections of name `name` have been encountered so far, when i reaches `num` return that section +        for s in self.sections: +            if s.name == name and i == num: +                return s +            # Increment if section is a .text section +            if s.name == ".text": +                i += 1  +        return None + +    # Because Metrowerks for DS can make duplicate .text sections +    # for every function, we may need to lookup a specific .text area. +    def find_section_with_name(self, name, st_name): +        for s in self.sections: +            if s.name == name and s.sh_name == st_name: +                return s +        return None + +    # Return i, where i is the ith text section corresponding to the function  +    # called `name`. +    def text_section_index(self, name): +        st_shndx, _ = self.symtab.find_symbol(name) +        n_text = 0 +        for sec in self.sections: +            if sec.index == st_shndx: +                return n_text +            if sec.name =='.text': +                n_text += 1       +        return -1 + +    def add_section(self, name, sh_type, sh_flags, sh_link, sh_info, sh_addralign, sh_entsize, data): +        shstr = self.sections[self.elf_header.e_shstrndx] +        sh_name = shstr.add_str(name) +        s = Section.from_parts(sh_name=sh_name, sh_type=sh_type, +                sh_flags=sh_flags, sh_link=sh_link, sh_info=sh_info, +                sh_addralign=sh_addralign, sh_entsize=sh_entsize, data=data, +                index=len(self.sections)) +        self.sections.append(s) +        s.name = name +        s.late_init(self.sections) +        return s + +    def drop_irrelevant_sections(self): +        # We can only drop sections at the end, since otherwise section +        # references might be wrong. Luckily, these sections typically are. +        while self.sections[-1].sh_type in [SHT_MIPS_DEBUG, SHT_MIPS_GPTAB]: +            self.sections.pop() + +    def write(self, filename): +        outfile = open(filename, 'wb') +        outidx = 0 +        def write_out(data): +            nonlocal outidx +            outfile.write(data) +            outidx += len(data) +        def pad_out(align): +            if align and outidx % align: +                write_out(b'\0' * (align - outidx % align)) + +        self.elf_header.e_shnum = len(self.sections) +        write_out(self.elf_header.to_bin()) + +        for s in self.sections: +            if s.sh_type != SHT_NOBITS and s.sh_type != SHT_NULL: +                pad_out(s.sh_addralign) +                s.sh_offset = outidx +                write_out(s.data) + +        pad_out(4) +        self.elf_header.e_shoff = outidx +        for s in self.sections: +            write_out(s.header_to_bin()) + +        outfile.seek(0) +        outfile.write(self.elf_header.to_bin()) +        outfile.close() + + +def is_temp_name(name): +    return name.startswith('_asmpp_') + + +# https://stackoverflow.com/a/241506 +def re_comment_replacer(match): +    s = match.group(0) +    if s[0] in "/#": +        return " " +    else: +        return s + + +re_comment_or_string = re.compile( +    r'#.*|/\*.*?\*/|"(?:\\.|[^\\"])*"' +) + + +class Failure(Exception): +    def __init__(self, message): +        self.message = message + +    def __str__(self): +        return self.message + + +class GlobalState: +    def __init__(self, min_instr_count, skip_instr_count, use_jtbl_for_rodata): +        # A value that hopefully never appears as a 32-bit rodata constant (or we +        # miscompile late rodata). Increases by 1 in each step. +        self.late_rodata_hex = 0xE0123456 +        self.namectr = 0 +        self.min_instr_count = min_instr_count +        self.skip_instr_count = skip_instr_count +        self.use_jtbl_for_rodata = use_jtbl_for_rodata + +    def next_late_rodata_hex(self): +        dummy_bytes = struct.pack('<I', self.late_rodata_hex) +        if (self.late_rodata_hex & 0xffff) == 0: +            # Avoid lui +            self.late_rodata_hex += 1 +        self.late_rodata_hex += 1 +        return dummy_bytes + +    def make_name(self, cat): +        self.namectr += 1 +        return '_asmpp_{}{}'.format(cat, self.namectr) + + +Function = namedtuple('Function', ['text_glabels', 'asm_conts', 'late_rodata_dummy_bytes', 'jtbl_rodata_size', 'late_rodata_asm_conts', 'fn_desc', 'data']) + + +class GlobalAsmBlock: +    def __init__(self, fn_desc): +        self.fn_desc = fn_desc +        self.cur_section = '.text' +        self.asm_conts = [] +        self.late_rodata_asm_conts = [] +        self.late_rodata_alignment = 0 +        self.late_rodata_alignment_from_content = False +        self.text_glabels = [] +        self.fn_section_sizes = { +            '.text': 0, +            '.init': 0, +            '.data': 0, +            '.bss': 0, +            '.rodata': 0, +            '.sdata': 0, +            '.sdata2': 0, +            '.sbss': 0, +            #'.sbss2': 0, +            '.late_rodata': 0, +        } +        self.fn_ins_inds = [] +        self.glued_line = '' +        self.num_lines = 0 + +    def fail(self, message, line=None): +        context = self.fn_desc +        if line: +            context += ", at line \"" + line + "\"" +        raise Failure(message + "\nwithin " + context) + +    def count_quoted_size(self, line, z, real_line, output_enc): +        line = line.encode(output_enc).decode('latin1') +        in_quote = False +        num_parts = 0 +        ret = 0 +        i = 0 +        digits = "0123456789" # 0-7 would be more sane, but this matches GNU as +        while i < len(line): +            c = line[i] +            i += 1 +            if not in_quote: +                if c == '"': +                    in_quote = True +                    num_parts += 1 +            else: +                if c == '"': +                    in_quote = False +                    continue +                ret += 1 +                if c != '\\': +                    continue +                if i == len(line): +                    self.fail("backslash at end of line not supported", real_line) +                c = line[i] +                i += 1 +                # (if c is in "bfnrtv", we have a real escaped literal) +                if c == 'x': +                    # hex literal, consume any number of hex chars, possibly none +                    while i < len(line) and line[i] in digits + "abcdefABCDEF": +                        i += 1 +                elif c in digits: +                    # octal literal, consume up to two more digits +                    it = 0 +                    while i < len(line) and line[i] in digits and it < 2: +                        i += 1 +                        it += 1 + +        if in_quote: +            self.fail("unterminated string literal", real_line) +        if num_parts == 0: +            self.fail(".ascii with no string", real_line) +        return ret + num_parts if z else ret + + +    def align4(self): +        while self.fn_section_sizes[self.cur_section] % 2 != 0: +            self.fn_section_sizes[self.cur_section] += 1 + +    def add_sized(self, size, line): +        if self.cur_section in ['.text', '.init', '.late_rodata']: +            if size % 2 != 0: +                self.fail("size must be a multiple of 2 or 4", line) +        if size < 0: +            self.fail("size cannot be negative", line) +        self.fn_section_sizes[self.cur_section] += size +        if self.cur_section in ['.text', '.init']: +            if not self.text_glabels: +                self.fail(".text or .init block without an initial glabel", line) +            self.fn_ins_inds.append((self.num_lines - 1, size // 2)) + +    def process_line(self, line, output_enc): +        self.num_lines += 1 +        if line.endswith('\\'): +            self.glued_line += line[:-1] +            return +        line = self.glued_line + line +        self.glued_line = '' + +        real_line = line +        line = re.sub(re_comment_or_string, re_comment_replacer, line) +        line = line.strip() +        line = re.sub(r'^[a-zA-Z0-9_]+:\s*', '', line) +        changed_section = False +        emitting_double = False +        if line.startswith('glabel ') and self.cur_section in ['.text', '.init']: +            self.text_glabels.append(line.split()[1]) +        if not line: +            pass # empty line +        elif line.startswith('glabel ') or (' ' not in line and line.endswith(':')): +            pass # label +        elif line.startswith('.section') or line in ['.text', '.init', '.data', '.rdata', '.rodata', '.sdata', '.sdata2', '.bss','.sbss', '.late_rodata']: +            # section change +            self.cur_section = '.rodata' if line == '.rdata' else line.split(',')[0].split()[-1] +            if self.cur_section not in ['.data', '.text', '.init', '.rodata', '.sdata', '.sdata2', '.late_rodata', '.bss', '.sbss']: +                self.fail("unrecognized .section directive", real_line) +            changed_section = True +        elif line.startswith('.late_rodata_alignment'): +            if self.cur_section != '.late_rodata': +                self.fail(".late_rodata_alignment must occur within .late_rodata section", real_line) +            value = int(line.split()[1]) +            if value not in [4, 8]: +                self.fail(".late_rodata_alignment argument must be 4 or 8", real_line) +            if self.late_rodata_alignment and self.late_rodata_alignment != value: +                self.fail(".late_rodata_alignment alignment assumption conflicts with earlier .double directive. Make sure to provide explicit alignment padding.") +            self.late_rodata_alignment = value +            changed_section = True +        elif line.startswith('.incbin'): +            self.add_sized(int(line.split(',')[-1].strip(), 0), real_line) +        elif line.startswith('.skip'): +            self.add_sized(int(line.split()[-1].strip(), 0), real_line) +        elif line.startswith('.long') or line.startswith('.float'): +            self.align4() +            self.add_sized(4 * len(line.split(',')), real_line) +        elif line.startswith('.double'): +            self.align4() +            if self.cur_section == '.late_rodata': +                align8 = self.fn_section_sizes[self.cur_section] % 8 +                # Automatically set late_rodata_alignment, so the generated C code uses doubles. +                # This gives us correct alignment for the transferred doubles even when the +                # late_rodata_alignment is wrong, e.g. for non-matching compilation. +                if not self.late_rodata_alignment: +                    self.late_rodata_alignment = 8 - align8 +                    self.late_rodata_alignment_from_content = True +                elif self.late_rodata_alignment != 8 - align8: +                    if self.late_rodata_alignment_from_content: +                        self.fail("found two .double directives with different start addresses mod 8. Make sure to provide explicit alignment padding.", real_line) +                    else: +                        self.fail(".double at address that is not 0 mod 8 (based on .late_rodata_alignment assumption). Make sure to provide explicit alignment padding.", real_line) +            self.add_sized(8 * len(line.split(',')), real_line) +            emitting_double = True +        elif line.startswith('.space'): +            self.add_sized(int(line.split()[1], 0), real_line) +        elif line.startswith('.balign') or line.startswith('.align'): +            align = int(line.split()[1]) +            if align != 4:  +                self.fail("only .balign 4 is supported", real_line) +            self.align4() +        elif line.startswith('.asci'): +            z = (line.startswith('.asciz') or line.startswith('.asciiz')) +            self.add_sized(self.count_quoted_size(line, z, real_line, output_enc), real_line) +        elif line.startswith('.byte'): +            self.add_sized(len(line.split(',')), real_line) +        # Branches are 4 bytes long +        elif line.startswith('bl'): +            self.add_sized(4, real_line) +        else: +            # Unfortunately, macros are hard to support for .rodata -- +            # we don't know how how space they will expand to before +            # running the assembler, but we need that information to +            # construct the C code. So if we need that we'll either +            # need to run the assembler twice (at least in some rare +            # cases), or change how this program is invoked. +            # Similarly, we can't currently deal with pseudo-instructions +            # that expand to several real instructions. +            if self.cur_section != '.text' and self.cur_section != '.init': +                self.fail("instruction or macro call in non-.text/.init section? not supported", real_line) +            self.add_sized(2, real_line) +        if self.cur_section == '.late_rodata': +            if not changed_section: +                if emitting_double: +                    self.late_rodata_asm_conts.append(".align 0") +                self.late_rodata_asm_conts.append(real_line) +                if emitting_double: +                    self.late_rodata_asm_conts.append(".align 2") +        else: +            self.asm_conts.append(real_line) + +    def finish(self, state): +        src = [''] * (self.num_lines + 1) +        late_rodata_dummy_bytes = [] +        jtbl_rodata_size = 0 +        late_rodata_fn_output = [] + +        num_instr = self.fn_section_sizes['.text'] // 2 + +        if self.fn_section_sizes['.late_rodata'] > 0: +            # Generate late rodata by emitting unique float constants. +            # This requires 3 instructions for each 4 bytes of rodata. +            # If we know alignment, we can use doubles, which give 3 +            # instructions for 8 bytes of rodata. +            size = self.fn_section_sizes['.late_rodata'] // 2 +            skip_next = False +            needs_double = (self.late_rodata_alignment != 0) +            for i in range(size): +                if skip_next: +                    skip_next = False +                    continue +                # Jump tables give 9 instructions for >= 5 words of rodata, and should be +                # emitted when: +                # - -O2 or -O2 -g3 are used, which give the right codegen +                # - we have emitted our first .float/.double (to ensure that we find the +                #   created rodata in the binary) +                # - we have emitted our first .double, if any (to ensure alignment of doubles +                #   in shifted rodata sections) +                # - we have at least 5 words of rodata left to emit (otherwise IDO does not +                #   generate a jump table) +                # - we have at least 10 more instructions to go in this function (otherwise our +                #   function size computation will be wrong since the delay slot goes unused) +                if (not needs_double and state.use_jtbl_for_rodata and i >= 1 and +                        size - i >= 5 and num_instr - len(late_rodata_fn_output) >= 10): +                    cases = " ".join("case {}:".format(case) for case in range(size - i)) +                    late_rodata_fn_output.append("switch (*(volatile int*)0) { " + cases + " ; }") +                    late_rodata_fn_output.extend([""] * 8) +                    jtbl_rodata_size = (size - i) * 4 +                    break +                dummy_bytes = state.next_late_rodata_hex() +                late_rodata_dummy_bytes.append(dummy_bytes) +                if self.late_rodata_alignment == 4 * ((i + 1) % 2 + 1) and i + 1 < size: +                    dummy_bytes2 = state.next_late_rodata_hex() +                    late_rodata_dummy_bytes.append(dummy_bytes2) +                    fval, = struct.unpack('<d', dummy_bytes + dummy_bytes2) +                    late_rodata_fn_output.append('*(volatile double*)0 = {};'.format(fval)) +                    skip_next = True +                    needs_double = True +                else: +                    fval, = struct.unpack('<f', dummy_bytes) +                    late_rodata_fn_output.append('*(volatile float*)0 = {}f;'.format(fval)) +                late_rodata_fn_output.append('') +                late_rodata_fn_output.append('') + +        text_name = None +        if self.fn_section_sizes['.text'] > 0 or late_rodata_fn_output: +            text_name = state.make_name('func') +            src[0] = 'int {}(void) {{ return '.format(text_name) +            instr_count = self.fn_section_sizes['.text'] // 2 +            src[self.num_lines] = '((volatile void *) 0); }; ' if instr_count > 1 else '; }; ' +            if instr_count < state.min_instr_count: +                self.fail("too short .text block") +            tot_emitted = 0 +            tot_skipped = 0 +            fn_emitted = 0 +            fn_skipped = 0 +            rodata_stack = late_rodata_fn_output[::-1] +            for (line, count) in self.fn_ins_inds: +                for _ in range(count): +                    if (fn_emitted > MAX_FN_SIZE and instr_count - tot_emitted > state.min_instr_count and +                            (not rodata_stack or rodata_stack[-1])): +                        # Don't let functions become too large. When a function reaches 284 +                        # instructions, and -O2 -framepointer flags are passed, the IRIX +                        # compiler decides it is a great idea to start optimizing more. +                        fn_emitted = 0 +                        fn_skipped = 0 +                        src[line] += '((volatile void *) 0); }} int {}(void) {{ return '.format(state.make_name('large_func')) +                    if fn_skipped < state.skip_instr_count: +                        fn_skipped += 1 +                        tot_skipped += 1 +                    elif rodata_stack: +                        src[line] += rodata_stack.pop() +                    else: +                        src[line] += '*(int *)' +                    tot_emitted += 1 +                    fn_emitted += 1 +            if rodata_stack: +                size = len(late_rodata_fn_output) // 3 +                available = instr_count - tot_skipped +                self.fail( +                    "late rodata to text ratio is too high: {} / {} must be <= 1/3\n" +                    "add .late_rodata_alignment (4|8) to the .late_rodata " +                    "block to double the allowed ratio." +                        .format(size, available)) + +        init_name = None +        if self.fn_section_sizes['.init'] > 0 or late_rodata_fn_output: +            init_name = state.make_name('func') +            src[0] = 'int {}(void) {{ return '.format(init_name) +            instr_count = self.fn_section_sizes['.init'] // 2 +            src[self.num_lines] = '((volatile void *) 0); }; ' if instr_count else '; }; ' +            if instr_count < state.min_instr_count: +                self.fail("too short .init block") +            tot_emitted = 0 +            tot_skipped = 0 +            fn_emitted = 0 +            fn_skipped = 0 +            rodata_stack = late_rodata_fn_output[::-1] +            for (line, count) in self.fn_ins_inds: +                for _ in range(count): +                    if (fn_emitted > MAX_FN_SIZE and instr_count - tot_emitted > state.min_instr_count and +                            (not rodata_stack or rodata_stack[-1])): +                        # Don't let functions become too large. When a function reaches 284 +                        # instructions, and -O2 -framepointer flags are passed, the IRIX +                        # compiler decides it is a great idea to start optimizing more. +                        fn_emitted = 0 +                        fn_skipped = 0 +                        src[line] += '((volatile void *) 0); }} int {}(void) {{ return '.format(state.make_name('large_func')) +                    if fn_skipped < state.skip_instr_count: +                        fn_skipped += 1 +                        tot_skipped += 1 +                    elif rodata_stack: +                        src[line] += rodata_stack.pop() +                    else: +                        src[line] += '*(int *)' +                    tot_emitted += 1 +                    fn_emitted += 1 +            if rodata_stack: +                size = len(late_rodata_fn_output) // 3 +                available = instr_count - tot_skipped +                self.fail( +                    "late rodata to init ratio is too high: {} / {} must be <= 1/3\n" +                    "add .late_rodata_alignment (4|8) to the .late_rodata " +                    "block to double the allowed ratio." +                        .format(size, available)) + +        rodata_name = None +        if self.fn_section_sizes['.rodata'] > 0: +            rodata_name = state.make_name('rodata') +            src[self.num_lines] += f" const char {rodata_name}[{self.fn_section_sizes['.rodata']}] = {{1}};" + +        data_name = None +        if self.fn_section_sizes['.data'] > 0: +            data_name = state.make_name('data') +            src[self.num_lines] += f" char {data_name}[{self.fn_section_sizes['.data']}] = {{1}};" + +        bss_name = None +        if self.fn_section_sizes['.bss'] > 0: +            bss_name = state.make_name('bss') +            src[self.num_lines] += f" char {bss_name}[{self.fn_section_sizes['.bss']}];" + +        sdata_name = None # sdata is like data but small +        if self.fn_section_sizes['.sdata'] > 0: +            sdata_code = "" +            for i in range(self.fn_section_sizes['.sdata']): +                sdata_name = state.make_name('sdata') +                sdata_code += f" char {sdata_name} = 1;" +            src[self.num_lines] += sdata_code + +        sdata2_name = None # sdata2 is like rodata but small +        if self.fn_section_sizes['.sdata2'] > 0: +            sdata2_code = "" +            for i in range(self.fn_section_sizes['.sdata2']): +                sdata2_name = state.make_name('sdata2') +                sdata2_code += f" const char {sdata2_name} = 1;" +            src[self.num_lines] += sdata2_code + +        sbss_name = None # Similarly, sbss is like uninitialized data but small +        if self.fn_section_sizes['.sbss'] > 0: +            sbss_code = "" +            for i in range(self.fn_section_sizes['.sbss']): +                sbss_name = state.make_name('sbss') +                sbss_code += f" char {sbss_name};" +            src[self.num_lines] += sbss_code + +        """ sbss2 is currently borked +        sbss2_name = None # Similarly, sbss2 is like uninitialized rodata but small +        if self.fn_section_sizes['.sbss2'] > 0: +            sbss2_code = "" +            for i in range(self.fn_section_sizes['.sbss2']): +                sbss2_name = state.make_name('sbss2') +                sbss2_code += f" const char {sbss2_name};" +            src[self.num_lines] += sbss2_code +        """ + +        fn = Function( +                text_glabels=self.text_glabels, +                asm_conts=self.asm_conts, +                late_rodata_dummy_bytes=late_rodata_dummy_bytes, +                jtbl_rodata_size=jtbl_rodata_size, +                late_rodata_asm_conts=self.late_rodata_asm_conts, +                fn_desc=self.fn_desc, +                data={ +                    '.text': (text_name, self.fn_section_sizes['.text']), +                    '.data': (data_name, self.fn_section_sizes['.data']), +                    '.rodata': (rodata_name, self.fn_section_sizes['.rodata']), +                    '.bss': (bss_name, self.fn_section_sizes['.bss']), +                    '.sdata': (sdata_name, self.fn_section_sizes['.sdata']), +                    '.sdata2': (sdata2_name, self.fn_section_sizes['.sdata2']), +                    '.sbss': (sbss_name, self.fn_section_sizes['.sbss']), +                    #'.sbss2': (sbss2_name, self.fn_section_sizes['.sbss2']), +                }) +        return src, fn + +cutscene_data_regexpr = re.compile(r"CutsceneData (.|\n)*\[\] = {") +float_regexpr = re.compile(r"[-+]?[0-9]*\.?[0-9]+([eE][-+]?[0-9]+)?f") + +def repl_float_hex(m): +    return str(struct.unpack("<I", struct.pack("<f", float(m.group(0).strip().rstrip("f"))))[0]) + +def parse_source(f, opt, framepointer, input_enc, output_enc, print_source=None): +    opt = "O4" +    min_instr_count = 3 # idk +    skip_instr_count = 2 # idk + +    use_jtbl_for_rodata = False +    if opt in ['O2', 'g3'] and not framepointer: +        use_jtbl_for_rodata = True + +    state = GlobalState(min_instr_count, skip_instr_count, use_jtbl_for_rodata) + +    global_asm = None +    asm_functions = [] +    output_lines = [] + +    is_cutscene_data = False + +    for line_no, raw_line in enumerate(f, 1): +        raw_line = raw_line.rstrip() +        line = raw_line.lstrip() + +        # Print exactly one output line per source line, to make compiler +        # errors have correct line numbers. These will be overridden with +        # reasonable content further down. +        output_lines.append('') +        if global_asm is not None: +            if line.startswith(')'): +                src, fn = global_asm.finish(state) +                for i, line2 in enumerate(src): +                    output_lines[start_index + i] = line2 +                asm_functions.append(fn) +                global_asm = None +            else: +                global_asm.process_line(raw_line, output_enc) +        else: +            if line in ['GLOBAL_ASM(', '#pragma GLOBAL_ASM(']: +                global_asm = GlobalAsmBlock("GLOBAL_ASM block at line " + str(line_no)) +                start_index = len(output_lines) +            elif ((line.startswith('GLOBAL_ASM("') or line.startswith('#pragma GLOBAL_ASM("')) +                    and line.endswith('")')): +                fname = line[line.index('(') + 2 : -2] +                global_asm = GlobalAsmBlock(fname) +                with open(fname, encoding=input_enc) as f: +                    for line2 in f: +                        global_asm.process_line(line2.rstrip(), output_enc) +                src, fn = global_asm.finish(state) +                output_lines[-1] = ''.join(src) +                asm_functions.append(fn) +                global_asm = None +            elif ((line.startswith('#include "')) and line.endswith('" EARLY')): +                # C includes qualified with EARLY (i.e. #include "file.c" EARLY) will be +                # processed recursively when encountered +                fpath = os.path.dirname(f.name) +                fname = line[line.index(' ') + 2 : -7] +                include_src = StringIO() +                with open(fpath + os.path.sep + fname, encoding=input_enc) as include_file: +                    parse_source(include_file, opt, framepointer, input_enc, output_enc, include_src) +                output_lines[-1] = include_src.getvalue() +                include_src.write('#line ' + str(line_no) + '\n') +                include_src.close() +            else: +                # This is a hack to replace all floating-point numbers in an array of a particular type +                # (in this case CutsceneData) with their corresponding IEEE-754 hexadecimal representation +                if cutscene_data_regexpr.search(line) is not None: +                    is_cutscene_data = True +                elif line.endswith("};"): +                    is_cutscene_data = False +                if is_cutscene_data: +                    raw_line = re.sub(float_regexpr, repl_float_hex, raw_line) +                output_lines[-1] = raw_line + +    if print_source: +        if isinstance(print_source, StringIO): +            for line in output_lines: +                print_source.write(line + '\n') +        else: +            for line in output_lines: +                print_source.write(line.encode(output_enc) + b'\n') +            print_source.flush() +            if print_source != sys.stdout.buffer: +                print_source.close() +    out_file = open("output.txt", 'w') +    out_file.write(str(asm_functions)) +    out_file.close() +    return asm_functions + +# Return the function name in objfile corresponding to function +# `asm_func_name` in asm_objfile. `to_copy` is the dictionary of the +# same name in fix_objfile(). +def convert_func_name(asm_func_name, to_copy): +    for sec_name, func_data in to_copy.items(): +        if func_data and func_data[0][4] == asm_func_name: +            return func_data[0][2] +    return '' + +def fixup_objfile(objfile_name, functions, asm_prelude, assembler, output_enc): +    SECTIONS = ['.data'] +    SECTIONS.extend(['.text' for i in range(0,len(functions))]) +    SECTIONS.extend(['.rodata', '.bss', '.sdata', '.sdata2', '.sbss']) + +    with open(objfile_name, 'rb') as f: +        objfile = ElfFile(f.read()) + +    prev_locs = defaultdict(int) +    to_copy = defaultdict(list)  + +    asm = [] +    all_late_rodata_dummy_bytes = [] +    all_jtbl_rodata_size = [] +    late_rodata_asm = [] +    late_rodata_source_name_start = None +    late_rodata_source_name_end = None + +    # Generate an assembly file with all the assembly we need to fill in. For +    # simplicity we pad with nops/.space so that addresses match exactly, so we +    # don't have to fix up relocations/symbol references. +    all_text_glabels = set() +    for function in functions: +        ifdefed = False +        for sectype, (temp_name, size) in function.data.items(): +            if temp_name is None: +                continue +            assert size > 0 +            n_text = objfile.text_section_index(temp_name) +            loc = objfile.symtab.find_symbol(temp_name) +            if loc is None: +                ifdefed = True +                break +            loc = loc[1] +            prev_loc = prev_locs[sectype + (str(n_text) if sectype == '.text' else '')] +            if loc < prev_loc: +                raise Failure("Wrongly computed size for section {} (diff {}). This is an asm-processor bug!".format(sectype + (str(n_text) if sectype == '.text' else ''), prev_loc- loc)) +            if loc != prev_loc: +                asm.append('.section ' + sectype) +                if sectype == '.text': +                    for i in range((loc - prev_loc) // 2): +                        asm.append('nop') +                else: +                    asm.append('.space {}'.format(loc - prev_loc)) +            to_copy[sectype + (str(n_text) if sectype == '.text' else '')].append((loc, size, temp_name, function.fn_desc, function.text_glabels[0])) +            prev_locs[sectype + (str(n_text) if sectype == '.text' else '')] = loc + size +        if not ifdefed: +            all_text_glabels.update(function.text_glabels) +            all_late_rodata_dummy_bytes.append(function.late_rodata_dummy_bytes) +            all_jtbl_rodata_size.append(function.jtbl_rodata_size) +            late_rodata_asm.append(function.late_rodata_asm_conts) +            for sectype, (temp_name, size) in function.data.items(): +                if temp_name is not None: +                    asm.append('.section ' + sectype) +                    asm.append('glabel ' + temp_name + '_asm_start') +            asm.append('.section .text') +            for line in function.asm_conts: +                asm.append(line) +            for sectype, (temp_name, size) in function.data.items(): +                if temp_name is not None: +                    #asm.append('.section ' + sectype) +                    asm.append('glabel ' + temp_name + '_asm_end') + +    if any(late_rodata_asm): +        late_rodata_source_name_start = '_asmpp_late_rodata_start' +        late_rodata_source_name_end = '_asmpp_late_rodata_end' +        asm.append('.rdata') +        asm.append('glabel {}'.format(late_rodata_source_name_start)) +        for conts in late_rodata_asm: +            asm.extend(conts) +        asm.append('glabel {}'.format(late_rodata_source_name_end)) + +    o_file = open("asm_processor_temp.o", 'w').close() # Create temp file. tempfile module isn't working for me. +    o_name = "asm_processor_temp.o" + +    s_file = open("asm_processor_temp.s", 'wb') # Ditto. +    s_name = "asm_processor_temp.s" +    try: +        s_file.write(asm_prelude + b'\n') +        for line in asm: +            s_file.write(line.encode(output_enc) + b'\n') +        s_file.close() +        ret = os.system(assembler + " " + s_name + " -o " + o_name) +        if ret != 0: +            raise Failure("failed to assemble") +        with open(o_name, 'rb') as f: +            asm_objfile = ElfFile(f.read()) + +        # Remove some clutter from objdump output +        objfile.drop_irrelevant_sections() + +        """ +        # Unify reginfo sections +        target_reginfo = objfile.find_section('.reginfo') +        source_reginfo_data = list(asm_objfile.find_section('.reginfo').data) +        data = list(target_reginfo.data) +        for i in range(20): +            data[i] |= source_reginfo_data[i] +        target_reginfo.data = bytes(data) +        """ + +        # Move over section contents +        modified_text_positions = set() +        jtbl_rodata_positions = set() +        last_rodata_pos = 0 +        n_text = 0 +        for sec in objfile.sections: +            sectype = sec.name +            if not to_copy[sectype + (str(n_text) if sectype == '.text' else '')]: +                if sectype == '.text': +                    n_text += 1 +                continue +            # This should work as long as you NONMATCH whole functions rather than asm fragments +            func = to_copy[sectype + str(n_text) if sectype == '.text' else ''][0][2] +            asm_n_text = asm_objfile.text_section_index(func + '_asm_start') +            source = asm_objfile.find_section(sectype, asm_n_text if sectype == '.text' else 0) +            assert source is not None, "didn't find source section: " + sectype +            for (pos, count, temp_name, fn_desc, fn_name) in to_copy[sectype + (str(n_text) if sectype == '.text' else '')]: +                loc1 = asm_objfile.symtab.find_symbol_in_section(temp_name + '_asm_start', source) +                loc2 = asm_objfile.symtab.find_symbol_in_section(temp_name + '_asm_end', source) +                assert loc1 == pos, "assembly and C files don't line up for section " + sectype + ", " + fn_desc +                # Since we are nonmatching whole functions, we don't need to insert the correct +                # amount of padding into the src file. We don't actually need to insert padding   +                # at all. We can just plop the asm's text section into the objfile.    +                # if loc2 - loc1 != count: +                #     raise Failure("incorrectly computed size for section " + sectype + ", " + fn_desc + ". If using .double, make sure to provide explicit alignment padding.") +            if sectype == '.bss' or sectype == '.sbss2': +                continue +            target = objfile.find_section(sectype, n_text if sectype == '.text' else 0) +            assert target is not None, "missing target section of type " + sectype +            data = list(target.data) +            for (pos, count, _, _, _) in to_copy[sectype + (str(n_text) if sectype == '.text' else '')]: +                # mwasmarm 4-aligns text sections, so make sure to copy exactly `count` bytes +                data[pos:pos + count] = source.data[pos:pos + count] +                if sectype == '.text': +                    assert count % 2 == 0 +                    assert pos % 2 == 0 +                    for i in range(count // 2): +                        modified_text_positions.add(pos + 2 * i) +                elif sectype == '.rodata': +                    last_rodata_pos = pos + count +            target.data = bytes(data) +            if sectype == '.text': +                n_text += 1 + +        # Move over late rodata. This is heuristic, sadly, since I can't think +        # of another way of doing it. +        moved_late_rodata = {} +        if any(all_late_rodata_dummy_bytes) or any(all_jtbl_rodata_size): +            source = asm_objfile.find_section('.rodata', 0) +            target = objfile.find_section('.rodata', 0) +            source_pos = asm_objfile.symtab.find_symbol_in_section(late_rodata_source_name_start, source) +            source_end = asm_objfile.symtab.find_symbol_in_section(late_rodata_source_name_end, source) +            if source_end - source_pos != sum(map(len, all_late_rodata_dummy_bytes)) * 2 + sum(all_jtbl_rodata_size): +                raise Failure("computed wrong size of .late_rodata") +            new_data = list(target.data) +            for dummy_bytes_list, jtbl_rodata_size in zip(all_late_rodata_dummy_bytes, all_jtbl_rodata_size): +                for index, dummy_bytes in enumerate(dummy_bytes_list): +                    pos = target.data.index(dummy_bytes, last_rodata_pos) +                    # This check is nice, but makes time complexity worse for large files: +                    if SLOW_CHECKS and target.data.find(dummy_bytes, pos + 2) != -1: +                        raise Failure("multiple occurrences of late_rodata hex magic. Change asm-processor to use something better than 0xE0123456!") +                    if index == 0 and len(dummy_bytes_list) > 1 and target.data[pos+2:pos+8] == b'\0\0\0\0': +                        # Ugly hack to handle double alignment for non-matching builds. +                        # We were told by .late_rodata_alignment (or deduced from a .double) +                        # that a function's late_rodata started out 4 (mod 8), and emitted +                        # a float and then a double. But it was actually 0 (mod 8), so our +                        # double was moved by 4 bytes. To make them adjacent to keep jump +                        # tables correct, move the float by 4 bytes as well. +                        new_data[pos:pos+2] = b'\0\0\0\0' +                        pos += 2 +                    new_data[pos:pos+2] = source.data[source_pos:source_pos+2] +                    moved_late_rodata[source_pos] = pos +                    last_rodata_pos = pos + 2 +                    source_pos += 2 +                if jtbl_rodata_size > 0: +                    assert dummy_bytes_list, "should always have dummy bytes before jtbl data" +                    pos = last_rodata_pos +                    new_data[pos : pos + jtbl_rodata_size] = \ +                        source.data[source_pos : source_pos + jtbl_rodata_size] +                    for i in range(0, jtbl_rodata_size, 2): +                        moved_late_rodata[source_pos + i] = pos + i +                        jtbl_rodata_positions.add(pos + i) +                    last_rodata_pos += jtbl_rodata_size +                    source_pos += jtbl_rodata_size +            target.data = bytes(new_data) + +        # Merge strtab data. +        strtab_adj = len(objfile.symtab.strtab.data) +        objfile.symtab.strtab.data += asm_objfile.symtab.strtab.data + +        # Find relocated symbols +        relocated_symbols = set() +        for obj in [asm_objfile, objfile]: +            for sec in obj.sections: +                for reltab in sec.relocated_by: +                    for rel in reltab.relocations: +                        relocated_symbols.add(obj.symtab.symbol_entries[rel.sym_index]) + +        # Move over symbols, deleting the temporary function labels. +        # Sometimes this naive procedure results in duplicate symbols, or UNDEF +        # symbols that are also defined the same .o file. Hopefully that's fine. +        # Skip over local symbols that aren't used relocated against, to avoid +        # conflicts. +        new_local_syms = [s for s in objfile.symtab.local_symbols() if not is_temp_name(s.name)] +        new_global_syms = [s for s in objfile.symtab.global_symbols() if not is_temp_name(s.name)] +        n_text = 0 +        for i, s in enumerate(asm_objfile.symtab.symbol_entries): +            is_local = (i < asm_objfile.symtab.sh_info) +            if is_local and s not in relocated_symbols: +                continue +            if is_temp_name(s.name): +                continue +            if s.st_shndx not in [SHN_UNDEF, SHN_ABS]: +                section_name = asm_objfile.sections[s.st_shndx].name +                if section_name not in SECTIONS: +                    raise Failure("generated assembly .o must only have symbols for .text, .data, .rodata, .sdata, .sdata2, .sbss, ABS and UNDEF, but found " + section_name) +                if section_name == '.sbss2': #! I'm not sure why this isn't working +                    continue +                obj_func_name = convert_func_name(s.name, to_copy) +                obj_n_text = objfile.text_section_index(obj_func_name)                 +                s.st_shndx = objfile.find_section(section_name, obj_n_text if section_name == '.text' else 0).index +                if section_name == '.text': +                    n_text += 1 +                # glabel's aren't marked as functions, making objdump output confusing. Fix that. +                if s.name in all_text_glabels: +                    s.type = STT_FUNC +                if objfile.sections[s.st_shndx].name == '.rodata' and s.st_value in moved_late_rodata: +                    s.st_value = moved_late_rodata[s.st_value] +            s.st_name += strtab_adj +            if is_local: +                new_local_syms.append(s) +            else: +                new_global_syms.append(s) +        new_syms = new_local_syms + new_global_syms +        for i, s in enumerate(new_syms): +            s.new_index = i +        objfile.symtab.data = b''.join(s.to_bin() for s in new_syms) +        objfile.symtab.sh_info = len(new_local_syms) + +        # Move over relocations +        n_text = 0 +        for sec in objfile.sections: +            sectype = sec.name +            # This should work as long as you NONMATCH whole functions rather than asm fragments +            target = objfile.find_section(sectype, n_text if sectype == '.text' else 0) + +            if target is not None: +                # fixup relocation symbol indices, since we butchered them above +                for reltab in target.relocated_by: +                    nrels = [] +                    for rel in reltab.relocations: +                        if (sectype == '.rodata' and rel.r_offset in jtbl_rodata_positions) or sectype == ".sbss2": +                            # don't include relocations for late_rodata dummy code +                            continue +                        # hopefully we don't have relocations for local or +                        # temporary symbols, so new_index exists +                        rel.sym_index = objfile.symtab.symbol_entries[rel.sym_index].new_index +                        nrels.append(rel) +                    reltab.relocations = nrels +                    reltab.data = b''.join(rel.to_bin() for rel in nrels) +             +            if not to_copy[sectype + (str(n_text) if sectype == '.text' else '')]: +                if sectype == '.text': +                    n_text += 1 +                continue + +            func = to_copy[sectype + str(n_text) if sectype == '.text' else ''][0][2] +            asm_n_text = asm_objfile.text_section_index(func + '_asm_start') +            source = asm_objfile.find_section(sectype, asm_n_text if sectype == '.text' else 0) +            if not source: +                if sectype == '.text': +                    n_text += 1 +                continue + +            target_reltab = objfile.find_section('.rel' + sectype, n_text if sectype == '.text' else 0) +            target_reltaba = objfile.find_section('.rela' + sectype, n_text if sectype == '.text' else 0) +            for reltab in source.relocated_by: +                for rel in reltab.relocations: +                    rel.sym_index = asm_objfile.symtab.symbol_entries[rel.sym_index].new_index +                    if sectype == '.rodata' and rel.r_offset in moved_late_rodata: +                        rel.r_offset = moved_late_rodata[rel.r_offset] +                new_data = b''.join(rel.to_bin() for rel in reltab.relocations) +                if reltab.sh_type == SHT_REL: +                    target_reltab = objfile.add_section('.rel' + sectype, +                            sh_type=SHT_REL, sh_flags=0, +                            sh_link=objfile.symtab.index, sh_info=target.index, +                            sh_addralign=4, sh_entsize=8, data=b'') +                    target_reltab.data += new_data +                else: +                    # Always append as a separate .rela.text section +                    target_reltaba = objfile.add_section('.rela' + sectype, +                            sh_type=SHT_RELA, sh_flags=0, +                            sh_link=objfile.symtab.index, sh_info=target.index, +                            sh_addralign=4, sh_entsize=12, data=b'') +                    target_reltaba.data += new_data +            if sectype == '.text': +                n_text += 1 + +        objfile.write(objfile_name) +    finally: +        s_file.close() +        #os.remove(s_name) +        try: +            pass +            #os.remove(o_name) +        except: +            pass + +def run_wrapped(argv, outfile): +    parser = argparse.ArgumentParser(description="Pre-process .c files and post-process .o files to enable embedding assembly into C.") +    parser.add_argument('filename', help="path to .c code") +    parser.add_argument('--post-process', dest='objfile', help="path to .o file to post-process") +    parser.add_argument('--assembler', dest='assembler', help="assembler command (e.g. \"mips-linux-gnu-as -march=vr4300 -mabi=32\")") +    parser.add_argument('--asm-prelude', dest='asm_prelude', help="path to a file containing a prelude to the assembly file (with .set and .macro directives, e.g.)") +    parser.add_argument('--input-enc', default='latin1', help="Input encoding (default: latin1)") +    parser.add_argument('--output-enc', default='latin1', help="Output encoding (default: latin1)") +    parser.add_argument('-framepointer', dest='framepointer', action='store_true') +    parser.add_argument('-g3', dest='g3', action='store_true') +    group = parser.add_mutually_exclusive_group(required=False) +    group.add_argument('-O1', dest='opt', action='store_const', const='O1') +    group.add_argument('-O2', dest='opt', action='store_const', const='O2') +    group.add_argument('-g', dest='opt', action='store_const', const='g') +    args = parser.parse_args(argv) +    opt = args.opt +    if args.g3: +        if opt != 'O2': +            raise Failure("-g3 is only supported together with -O2") +        opt = 'g3' + +    if args.objfile is None: +        with open(args.filename, encoding=args.input_enc) as f: +            parse_source(f, opt=opt, framepointer=args.framepointer, input_enc=args.input_enc, output_enc=args.output_enc, print_source=outfile) +    else: +        if args.assembler is None: +            raise Failure("must pass assembler command") +        with open(args.filename, encoding=args.input_enc) as f: +            functions = parse_source(f, opt=opt, framepointer=args.framepointer, input_enc=args.input_enc, output_enc=args.output_enc) +        if not functions: +            return +        asm_prelude = b'' +        if args.asm_prelude: +            with open(args.asm_prelude, 'rb') as f: +                asm_prelude = f.read() +        fixup_objfile(args.objfile, functions, asm_prelude, args.assembler, args.output_enc) + +def run(argv, outfile=sys.stdout.buffer): +    try: +        run_wrapped(argv, outfile) +    except Failure as e: +        print("Error:", e, file=sys.stderr) +        sys.exit(1) + +if __name__ == "__main__": +    run(sys.argv[1:]) diff --git a/tools/asm_processor/compile.sh b/tools/asm_processor/compile.sh index 786da404..feef7fc9 100644..100755 --- a/tools/asm_processor/compile.sh +++ b/tools/asm_processor/compile.sh @@ -1,19 +1,30 @@  #!/bin/bash  CC="$1" -shift -AS="$1" -shift +AS="$2" +OBJ="$3" +SRC="$4" -temp="$(mktemp)" -../tools/asm_processor/asm_processor.py "$2" --assembler "$AS" > "$temp.c" && -$CC -c "$temp.c" -o "$1" +PADDED_SRC="$(mktemp --suffix=.c padded-XXXXXX)" +PADDED_OBJ="$(mktemp --suffix=.o padded-XXXXXX)" -prelude=$(mktemp prelude.XXXXXX) -cat ../include/macros.inc >> "$prelude" -cat global.inc >> "$prelude" +# Create a .c file replacing the nonmatching function with volatile int writes,  +# and compile. +../tools/asm_processor/asm_processor.py "$SRC" --assembler "$AS" > "$PADDED_SRC" +$CC -c "$PADDED_SRC" -o "$PADDED_OBJ" -../tools/asm_processor/asm_processor.py "$2" --post-process "$1" --assembler "$AS" --asm-prelude "$prelude" -arm-none-eabi-objcopy --remove-section .comment "$1" "$1" -rm "$prelude" -rm "$temp" +PRELUDE=$(mktemp) +cat ../include/macros.inc >> "$PRELUDE" +cat global.inc >> "$PRELUDE" + +# Inject the matching assembly into the padded obj file. +../tools/asm_processor/asm_processor.py "$SRC" --post-process "$PADDED_OBJ" --assembler "$AS" --asm-prelude "$PRELUDE" + +arm-none-eabi-objcopy --remove-section .comment "$PADDED_OBJ" "$OBJ" + +rm "$PADDED_SRC" +rm "$PADDED_OBJ" +rm "$PRELUDE" +rm output.txt +rm asm_processor_temp.s +rm asm_processor_temp.o | 
