From e43f1d2e0d65ab4cb1feffcaab250060b769dbe0 Mon Sep 17 00:00:00 2001 From: Max Date: Wed, 23 Sep 2020 18:03:38 -0400 Subject: replaced patch_strtab with postprocess.py --- Makefile | 9 +- asm/NW4R/ut_TagProcessorBase.s | 32 +-- asm/text_10_3.s | 12 +- tools/patch_strtab/.gitignore | 2 - tools/patch_strtab/Makefile | 20 -- tools/patch_strtab/README.md | 13 -- tools/patch_strtab/patch_strtab.c | 423 -------------------------------------- tools/postprocess/postprocess.py | 317 ++++++++++++++++++++++++++++ 8 files changed, 343 insertions(+), 485 deletions(-) delete mode 100644 tools/patch_strtab/.gitignore delete mode 100644 tools/patch_strtab/Makefile delete mode 100644 tools/patch_strtab/README.md delete mode 100644 tools/patch_strtab/patch_strtab.c create mode 100644 tools/postprocess/postprocess.py diff --git a/Makefile b/Makefile index 12b2261..aa3ced1 100644 --- a/Makefile +++ b/Makefile @@ -60,12 +60,11 @@ OBJCOPY := $(DEVKITPPC)/bin/powerpc-eabi-objcopy CPP := cpp -P CC := $(WINE) tools/mwcc_compiler/$(MWCC_VERSION)/mwcceppc.exe LD := $(WINE) tools/mwcc_compiler/$(MWCC_VERSION)/mwldeppc.exe -PATCHSTRTAB := tools/patch_strtab/patch_strtab$(EXE) ELF2DOL := tools/elf2dol/elf2dol$(EXE) SHA1SUM := sha1sum PYTHON := python3 -#POSTPROC := tools/postprocess.py +POSTPROC := tools/postprocess/postprocess.py # Options INCLUDES := -i . -I- -i include -i include/SDK -i include/libstdc++ @@ -75,7 +74,7 @@ LDFLAGS := -map $(MAP) -fp hard -nodefaults CFLAGS := -Cpp_exceptions off -proc gekko -fp hard -O4,p -nodefaults -msgstyle gcc -ipa file $(INCLUDES) -W all # for postprocess.py -PROCFLAGS := -fprologue-fixup=old_stack +PROCFLAGS := -fsymbol-fixup # elf2dol needs to know these in order to calculate sbss correctly. SDATA_PDHR := 9 @@ -84,7 +83,7 @@ SBSS_PDHR := 10 infoshell = $(foreach line, $(shell $1 | sed "s/ /__SPACE__/g"), $(info $(subst __SPACE__, ,$(line)))) TOOLS_DIR = tools -TOOLDIRS = $(filter-out $(TOOLS_DIR)/mwcc_compiler,$(wildcard $(TOOLS_DIR)/*)) +TOOLDIRS = $(filter-out $(TOOLS_DIR)/mwcc_compiler $(TOOLS_DIR)/postprocess,$(wildcard $(TOOLS_DIR)/*)) TOOLBASE = $(TOOLDIRS:$(TOOLS_DIR)/%=%) TOOLS = $(foreach tool,$(TOOLBASE),$(TOOLS_DIR)/$(tool)/$(tool)$(EXE)) @@ -134,7 +133,7 @@ $(ELF): $(O_FILES) $(LDSCRIPT) $(BUILD_DIR)/%.o: %.s $(AS) $(ASFLAGS) -o $@ $< # resolve escape sequences for C++ mangled names in the .strtab section (assembler workaround). - $(PATCHSTRTAB) $@ + $(PYTHON) $(POSTPROC) $(PROCFLAGS) $@ -fsymbol-fixup $(BUILD_DIR)/%.o: %.cpp $(CC) $(CFLAGS) -lang c++ -c -o $@ $< diff --git a/asm/NW4R/ut_TagProcessorBase.s b/asm/NW4R/ut_TagProcessorBase.s index 3236a11..d1faeaa 100644 --- a/asm/NW4R/ut_TagProcessorBase.s +++ b/asm/NW4R/ut_TagProcessorBase.s @@ -2,15 +2,15 @@ .section .text, "ax" # 0x80006980 - 0x803E1E60 -.global __ct__Q34nw4r2ut19TagProcessorBase____esc_lt_c____esc_gt_Fv -__ct__Q34nw4r2ut19TagProcessorBase____esc_lt_c____esc_gt_Fv: +.global __ct__Q34nw4r2ut19TagProcessorBase____esc_ltc____esc_gtFv +__ct__Q34nw4r2ut19TagProcessorBase____esc_ltc____esc_gtFv: /* 80362E28 0035EA88 3C 80 80 45 */ lis r4, lbl_8044C6DC@ha /* 80362E2C 0035EA8C 38 84 C6 DC */ addi r4, r4, lbl_8044C6DC@l /* 80362E30 0035EA90 90 83 00 00 */ stw r4, 0(r3) /* 80362E34 0035EA94 4E 80 00 20 */ blr -.global __dt__Q34nw4r2ut19TagProcessorBase____esc_lt_c____esc_gt_Fv -__dt__Q34nw4r2ut19TagProcessorBase____esc_lt_c____esc_gt_Fv: +.global __dt__Q34nw4r2ut19TagProcessorBase____esc_ltc____esc_gtFv +__dt__Q34nw4r2ut19TagProcessorBase____esc_ltc____esc_gtFv: /* 80362E38 0035EA98 94 21 FF F0 */ stwu r1, -0x10(r1) /* 80362E3C 0035EA9C 7C 08 02 A6 */ mflr r0 /* 80362E40 0035EAA0 2C 03 00 00 */ cmpwi r3, 0 @@ -29,8 +29,8 @@ lbl_80362E60: /* 80362E70 0035EAD0 38 21 00 10 */ addi r1, r1, 0x10 /* 80362E74 0035EAD4 4E 80 00 20 */ blr -.global Process__Q34nw4r2ut19TagProcessorBase____esc_lt_c____esc_gt_FUsPQ34nw4r2ut15PrintContext____esc_lt_c____esc_gt_ -Process__Q34nw4r2ut19TagProcessorBase____esc_lt_c____esc_gt_FUsPQ34nw4r2ut15PrintContext____esc_lt_c____esc_gt_: +.global Process__Q34nw4r2ut19TagProcessorBase____esc_ltc____esc_gtFUsPQ34nw4r2ut15PrintContext____esc_ltc____esc_gt +Process__Q34nw4r2ut19TagProcessorBase____esc_ltc____esc_gtFUsPQ34nw4r2ut15PrintContext____esc_ltc____esc_gt: /* 80362E78 0035EAD8 94 21 FF B0 */ stwu r1, -0x50(r1) /* 80362E7C 0035EADC 7C 08 02 A6 */ mflr r0 /* 80362E80 0035EAE0 90 01 00 54 */ stw r0, 0x54(r1) @@ -115,8 +115,8 @@ lbl_80362F7C: /* 80362FA0 0035EC00 38 21 00 50 */ addi r1, r1, 0x50 /* 80362FA4 0035EC04 4E 80 00 20 */ blr -.global CalcRect__Q34nw4r2ut19TagProcessorBase____esc_lt_c____esc_gt_FPQ34nw4r2ut4RectUsPQ34nw4r2ut15PrintContext____esc_lt_c____esc_gt_ -CalcRect__Q34nw4r2ut19TagProcessorBase____esc_lt_c____esc_gt_FPQ34nw4r2ut4RectUsPQ34nw4r2ut15PrintContext____esc_lt_c____esc_gt_: +.global CalcRect__Q34nw4r2ut19TagProcessorBase____esc_ltc____esc_gtFPQ34nw4r2ut4RectUsPQ34nw4r2ut15PrintContext____esc_ltc____esc_gt +CalcRect__Q34nw4r2ut19TagProcessorBase____esc_ltc____esc_gtFPQ34nw4r2ut4RectUsPQ34nw4r2ut15PrintContext____esc_ltc____esc_gt: /* 80362FA8 0035EC08 94 21 FF A0 */ stwu r1, -0x60(r1) /* 80362FAC 0035EC0C 7C 08 02 A6 */ mflr r0 /* 80362FB0 0035EC10 90 01 00 64 */ stw r0, 0x64(r1) @@ -247,15 +247,15 @@ lbl_80363168: /* 80363188 0035EDE8 38 21 00 60 */ addi r1, r1, 0x60 /* 8036318C 0035EDEC 4E 80 00 20 */ blr -.global __ct__Q34nw4r2ut19TagProcessorBase____esc_lt_w____esc_gt_Fv -__ct__Q34nw4r2ut19TagProcessorBase____esc_lt_w____esc_gt_Fv: +.global __ct__Q34nw4r2ut19TagProcessorBase____esc_ltw____esc_gtFv +__ct__Q34nw4r2ut19TagProcessorBase____esc_ltw____esc_gtFv: /* 80363190 0035EDF0 3C 80 80 45 */ lis r4, lbl_8044C6C8@ha /* 80363194 0035EDF4 38 84 C6 C8 */ addi r4, r4, lbl_8044C6C8@l /* 80363198 0035EDF8 90 83 00 00 */ stw r4, 0(r3) /* 8036319C 0035EDFC 4E 80 00 20 */ blr -.global __dt__Q34nw4r2ut19TagProcessorBase____esc_lt_w____esc_gt_Fv -__dt__Q34nw4r2ut19TagProcessorBase____esc_lt_w____esc_gt_Fv: +.global __dt__Q34nw4r2ut19TagProcessorBase____esc_ltw____esc_gtFv +__dt__Q34nw4r2ut19TagProcessorBase____esc_ltw____esc_gtFv: /* 803631A0 0035EE00 94 21 FF F0 */ stwu r1, -0x10(r1) /* 803631A4 0035EE04 7C 08 02 A6 */ mflr r0 /* 803631A8 0035EE08 2C 03 00 00 */ cmpwi r3, 0 @@ -274,8 +274,8 @@ lbl_803631C8: /* 803631D8 0035EE38 38 21 00 10 */ addi r1, r1, 0x10 /* 803631DC 0035EE3C 4E 80 00 20 */ blr -.global Process__Q34nw4r2ut19TagProcessorBase____esc_lt_w____esc_gt_FUsPQ34nw4r2ut15PrintContext____esc_lt_w____esc_gt_ -Process__Q34nw4r2ut19TagProcessorBase____esc_lt_w____esc_gt_FUsPQ34nw4r2ut15PrintContext____esc_lt_w____esc_gt_: +.global Process__Q34nw4r2ut19TagProcessorBase____esc_ltw____esc_gtFUsPQ34nw4r2ut15PrintContext____esc_ltw____esc_gt +Process__Q34nw4r2ut19TagProcessorBase____esc_ltw____esc_gtFUsPQ34nw4r2ut15PrintContext____esc_ltw____esc_gt: /* 803631E0 0035EE40 94 21 FF B0 */ stwu r1, -0x50(r1) /* 803631E4 0035EE44 7C 08 02 A6 */ mflr r0 /* 803631E8 0035EE48 90 01 00 54 */ stw r0, 0x54(r1) @@ -360,8 +360,8 @@ lbl_803632E4: /* 80363308 0035EF68 38 21 00 50 */ addi r1, r1, 0x50 /* 8036330C 0035EF6C 4E 80 00 20 */ blr -.global CalcRect__Q34nw4r2ut19TagProcessorBase____esc_lt_w____esc_gt_FPQ34nw4r2ut4RectUsPQ34nw4r2ut15PrintContext____esc_lt_w____esc_gt_ -CalcRect__Q34nw4r2ut19TagProcessorBase____esc_lt_w____esc_gt_FPQ34nw4r2ut4RectUsPQ34nw4r2ut15PrintContext____esc_lt_w____esc_gt_: +.global CalcRect__Q34nw4r2ut19TagProcessorBase____esc_ltw____esc_gtFPQ34nw4r2ut4RectUsPQ34nw4r2ut15PrintContext____esc_ltw____esc_gt +CalcRect__Q34nw4r2ut19TagProcessorBase____esc_ltw____esc_gtFPQ34nw4r2ut4RectUsPQ34nw4r2ut15PrintContext____esc_ltw____esc_gt: /* 80363310 0035EF70 94 21 FF A0 */ stwu r1, -0x60(r1) /* 80363314 0035EF74 7C 08 02 A6 */ mflr r0 /* 80363318 0035EF78 90 01 00 64 */ stw r0, 0x64(r1) diff --git a/asm/text_10_3.s b/asm/text_10_3.s index ae9bf3c..5a07640 100644 --- a/asm/text_10_3.s +++ b/asm/text_10_3.s @@ -5104,10 +5104,10 @@ lbl_80367EFC: /* 80367F38 00363B98 7C 00 07 75 */ extsb. r0, r0 /* 80367F3C 00363B9C 40 82 00 2C */ bne lbl_80367F68 /* 80367F40 00363BA0 38 6D B1 C8 */ addi r3, r13, lbl_80640488-_SDA_BASE_ -/* 80367F44 00363BA4 4B FF AE E5 */ bl __ct__Q34nw4r2ut19TagProcessorBase____esc_lt_c____esc_gt_Fv -/* 80367F48 00363BA8 3C 80 80 36 */ lis r4, __dt__Q34nw4r2ut19TagProcessorBase____esc_lt_c____esc_gt_Fv@ha +/* 80367F44 00363BA4 4B FF AE E5 */ bl __ct__Q34nw4r2ut19TagProcessorBase____esc_ltc____esc_gtFv +/* 80367F48 00363BA8 3C 80 80 36 */ lis r4, __dt__Q34nw4r2ut19TagProcessorBase____esc_ltc____esc_gtFv@ha /* 80367F4C 00363BAC 3C A0 80 62 */ lis r5, lbl_80621728@ha -/* 80367F50 00363BB0 38 84 2E 38 */ addi r4, r4, __dt__Q34nw4r2ut19TagProcessorBase____esc_lt_c____esc_gt_Fv@l +/* 80367F50 00363BB0 38 84 2E 38 */ addi r4, r4, __dt__Q34nw4r2ut19TagProcessorBase____esc_ltc____esc_gtFv@l /* 80367F54 00363BB4 38 6D B1 C8 */ addi r3, r13, lbl_80640488-_SDA_BASE_ /* 80367F58 00363BB8 38 A5 17 28 */ addi r5, r5, lbl_80621728@l /* 80367F5C 00363BBC 4B E5 EC ED */ bl __register_global_object_tmp @@ -5118,10 +5118,10 @@ lbl_80367F68: /* 80367F6C 00363BCC 7C 00 07 75 */ extsb. r0, r0 /* 80367F70 00363BD0 40 82 00 2C */ bne lbl_80367F9C /* 80367F74 00363BD4 38 6D B1 CC */ addi r3, r13, lbl_8064048C-_SDA_BASE_ -/* 80367F78 00363BD8 4B FF B2 19 */ bl __ct__Q34nw4r2ut19TagProcessorBase____esc_lt_w____esc_gt_Fv -/* 80367F7C 00363BDC 3C 80 80 36 */ lis r4, __dt__Q34nw4r2ut19TagProcessorBase____esc_lt_w____esc_gt_Fv@ha +/* 80367F78 00363BD8 4B FF B2 19 */ bl __ct__Q34nw4r2ut19TagProcessorBase____esc_ltw____esc_gtFv +/* 80367F7C 00363BDC 3C 80 80 36 */ lis r4, __dt__Q34nw4r2ut19TagProcessorBase____esc_ltw____esc_gtFv@ha /* 80367F80 00363BE0 3C A0 80 62 */ lis r5, lbl_80621734@ha -/* 80367F84 00363BE4 38 84 31 A0 */ addi r4, r4, __dt__Q34nw4r2ut19TagProcessorBase____esc_lt_w____esc_gt_Fv@l +/* 80367F84 00363BE4 38 84 31 A0 */ addi r4, r4, __dt__Q34nw4r2ut19TagProcessorBase____esc_ltw____esc_gtFv@l /* 80367F88 00363BE8 38 6D B1 CC */ addi r3, r13, lbl_8064048C-_SDA_BASE_ /* 80367F8C 00363BEC 38 A5 17 34 */ addi r5, r5, lbl_80621734@l /* 80367F90 00363BF0 4B E5 EC B9 */ bl __register_global_object_tmp diff --git a/tools/patch_strtab/.gitignore b/tools/patch_strtab/.gitignore deleted file mode 100644 index 1ed8234..0000000 --- a/tools/patch_strtab/.gitignore +++ /dev/null @@ -1,2 +0,0 @@ -patch_strtab -*.exe diff --git a/tools/patch_strtab/Makefile b/tools/patch_strtab/Makefile deleted file mode 100644 index b76d8dd..0000000 --- a/tools/patch_strtab/Makefile +++ /dev/null @@ -1,20 +0,0 @@ -CC := gcc -CFLAGS := -O3 -std=c99 -Wall - -ifeq ($(OS),Windows_NT) -EXE := .exe -else -EXE := -endif - -TARGET := patch_strtab$(EXE) - -.PHONY: all - -all: $(TARGET) - -clean: - rm -f patch_strtab patch_strtab.exe - -$(TARGET): patch_strtab.c - $(CC) $(CFLAGS) -o $@ $^ diff --git a/tools/patch_strtab/README.md b/tools/patch_strtab/README.md deleted file mode 100644 index 5cdca3f..0000000 --- a/tools/patch_strtab/README.md +++ /dev/null @@ -1,13 +0,0 @@ -README: - -This program patches the .strtab section of an ELF relocatable module -by replacing certain escape sequences with the characters they represent. - -CodeWarrior's C++ name mangling scheme may produce linkage names that -are not valid identifiers in the .s files provided to the -assembler, so we cannot use the mangled names directly as labels in the .s file. -Still, we would like to be able to link with compiled C++ modules and add accurate symbols to them -before they have been decompiled. To deal with this issue, for each .s file -we replace any illegal characters in the mangled name with valid escape sequences, -assemble the .s file, then use this program to postprocess the .o object code, which restores -the correct symbol name. diff --git a/tools/patch_strtab/patch_strtab.c b/tools/patch_strtab/patch_strtab.c deleted file mode 100644 index 5327dc7..0000000 --- a/tools/patch_strtab/patch_strtab.c +++ /dev/null @@ -1,423 +0,0 @@ -/* - * patch_strtab - * - * This program patches the .strtab section of an ELF relocatable module - * by replacing certain escape sequences with the characters they represent. - * - * by Max Parisi, 2020 - * - */ - -#include -#include -#include -#include -#include -#include "elf.h" -#include - -typedef uint64_t u64; -typedef int64_t s64; -typedef uint32_t u32; -typedef int32_t s32; -typedef uint16_t u16; -typedef int16_t s16; -typedef uint8_t u8; -typedef int8_t s8; - -typedef struct elfstruct -{ - s32 id; // +0x0, module ID - char *fileBuf; // +0x4, ptr to file contents buffer - Elf32_Ehdr *ehdr; // +0x8, ptr to ELF header - Elf32_Shdr *shStrTabShdr; // +0xC, ptr to shdr for .shstrtab - Elf32_Shdr *symTabShdr; // +0x10, ptr to shdr for .symtab - Elf32_Shdr *strTabShdr; // +0x14, ptr to shdr for .strtab - u32 *hashTable; // +0x18, ptr to symbol hash table -} ElfStruct; - -typedef struct escapemap -{ - const char *escapeSeq; - const char *replaceChar; -} EscapeMap; - -EscapeMap escapeMaps[] = -{ - { "____esc_lt_", "<" }, - { "____esc_gt_", ">" }, - { "____esc_cm_", "," }, - { "____esc_bs_", "\\" }, - { "____esc_ds_", "$" }, - { "____esc_at_", "@" } -}; - -// Thanks to jmucchiello at https://stackoverflow.com/questions/779875/what-function-is-to-replace-a-substring-from-a-string-in-c -// You must free the result if result is non-NULL. -char *str_replace(const char *orig, const char *rep, const char *with) { - char *result; // the return string - const char *ins; // the next insert point - char *tmp; // varies - int len_rep; // length of rep (the string to remove) - int len_with; // length of with (the string to replace rep with) - int len_front; // distance between rep and end of last rep - int count; // number of replacements - - // sanity checks and initialization - if (!orig || !rep) - return NULL; - len_rep = strlen(rep); - if (len_rep == 0) - return NULL; // empty rep causes infinite loop during count - if (!with) - with = ""; - len_with = strlen(with); - - // count the number of replacements needed - ins = orig; - for (count = 0; (tmp = strstr(ins, rep)); ++count) { - ins = tmp + len_rep; - } - - tmp = result = malloc(strlen(orig) + (len_with - len_rep) * count + 1); - - if (!result) - return NULL; - - // first time through the loop, all the variable are set correctly - // from here on, - // tmp points to the end of the result string - // ins points to the next occurrence of rep in orig - // orig points to the remainder of orig after "end of rep" - while (count--) { - ins = strstr(orig, rep); - len_front = ins - orig; - tmp = strncpy(tmp, orig, len_front) + len_front; // copy the front, before replacement (or the substring between replacements) - tmp = strcpy(tmp, with) + len_with; // copy in the replacement at the correct position - orig += len_front + len_rep; // move to next "end of rep" - } - strcpy(tmp, orig); - return result; -} - -// switch endianness of 32-bit word -u32 Swap32(u32 word) -{ - return word >> 24 | - (word >> 8 & 0xff00) | - (word << 8 & 0xff0000) | - word << 24; -} - -u16 Swap16(u16 hword) -{ - return hword >> 8 | hword << 8; -} - -Elf32_Shdr *GetSection(const ElfStruct *elf, s32 shndx) -{ - return (Elf32_Shdr *)(elf->fileBuf + elf->ehdr->e_shoff + - elf->ehdr->e_shentsize * shndx); -} - -// If this ELF has a .shstrtab section, get the ELF section -// name at the specified offset into the section header string table -char *GetSectionName(const ElfStruct *elf, u32 offset) -{ - if (offset && elf->shStrTabShdr) { - return elf->fileBuf + elf->shStrTabShdr->sh_offset + offset; - } - return NULL; -} - -// If this ELF has a .strtab section, get the ELF symbol name -// at the specified offset into the string table -char *GetName(const ElfStruct *elf, u32 offset) -{ - if (offset && elf->strTabShdr) { - return elf->fileBuf + elf->strTabShdr->sh_offset + offset; - } - return NULL; -} - -// If this ELF has a .symtab section, get the ELF symbol at the -// specified index of the symbol table -Elf32_Sym *GetSymbol(const ElfStruct *elf, u32 symTabIndex) -{ - if (elf->symTabShdr) { - return (Elf32_Sym *)(elf->fileBuf + - elf->symTabShdr->sh_offset) + symTabIndex; - } - return NULL; -} - -u32 GetNumberOfSymbols(const ElfStruct *elf) -{ - return elf->symTabShdr->sh_size / sizeof(Elf32_Sym); -} - -// Write padBytes 0s to fp -void Padding(FILE *fp, u32 padBytes) -{ - const u8 pad = 0; - for (u32 i = 0; i < padBytes; i++) { - if (fwrite(&pad, sizeof(u8), 1, fp) != 1) { - fprintf(stderr, "ERROR: ins. disk space\n"); - exit(7); - } - } -} - -// Swap the endianness of every field of ehdr and return ehdr -Elf32_Ehdr *SwapEhdr(Elf32_Ehdr *ehdr) -{ - ehdr->e_type = Swap16(ehdr->e_type); - ehdr->e_machine = Swap16(ehdr->e_machine); - ehdr->e_version = Swap32(ehdr->e_version); - ehdr->e_entry = Swap32(ehdr->e_entry); - ehdr->e_phoff = Swap32(ehdr->e_phoff); - ehdr->e_shoff = Swap32(ehdr->e_shoff); - ehdr->e_flags = Swap32(ehdr->e_flags); - ehdr->e_ehsize = Swap16(ehdr->e_ehsize); - ehdr->e_phentsize = Swap16(ehdr->e_phentsize); - ehdr->e_phnum = Swap16(ehdr->e_phnum); - ehdr->e_shentsize = Swap16(ehdr->e_shentsize); - ehdr->e_shnum = Swap16(ehdr->e_shnum); - ehdr->e_shstrndx = Swap16(ehdr->e_shstrndx); - - return ehdr; -} - - -// Swap the endianness of every field in shdr and return shdr -Elf32_Shdr *SwapShdr(Elf32_Shdr *shdr) -{ - shdr->sh_name = Swap32(shdr->sh_name); - shdr->sh_type = Swap32(shdr->sh_type); - shdr->sh_flags = Swap32(shdr->sh_flags); - shdr->sh_addr = Swap32(shdr->sh_addr); - shdr->sh_offset = Swap32(shdr->sh_offset); - shdr->sh_size = Swap32(shdr->sh_size); - shdr->sh_link = Swap32(shdr->sh_link); - shdr->sh_info = Swap32(shdr->sh_info); - shdr->sh_addralign = Swap32(shdr->sh_addralign); - shdr->sh_entsize = Swap32(shdr->sh_entsize); - - return shdr; -} - -// Swap the endianness of all the 16-/32-bit fields of sym -// and return sym -Elf32_Sym *SwapSym(Elf32_Sym *sym) -{ - sym->st_name = Swap32(sym->st_name); - sym->st_value = Swap32(sym->st_value); - sym->st_size = Swap32(sym->st_size); - sym->st_shndx = Swap16(sym->st_shndx); - - return sym; -} - -// Swap the endianness of every symbol in the ELF's symbol table -void SwapSymbolTable(ElfStruct *elf) -{ - if (elf->symTabShdr) { - const u32 numSyms = elf->symTabShdr->sh_size / sizeof(Elf32_Sym); - for (u32 i = 0; i < numSyms; i++) { - Elf32_Sym *sym = GetSymbol(elf, i); - SwapSym(sym); - } - } -} - -// Load the entire contents of the file at path into a newly malloc'd -// buffer. Write the file's size into *fileSz and return a pointer -// to the buffer. Return NULL if file open, malloc, or file read fails. -char *LoadFile(char *path) -{ - u32 fileSz; - FILE *fp = fopen(path, "rb"); - if (!fp) { - fprintf(stderr, "ERROR: cannot open file '%s'\n", path); - return NULL; - } - - fseek(fp, 0, SEEK_END); - fileSz = ftell(fp); - rewind(fp); - - char *buf = malloc(fileSz); - if (!buf) { - fprintf(stderr, "ERROR: ins. memory\n"); - return NULL; - } - - if (fread(buf, fileSz, 1, fp) != 1) { - fprintf(stderr, "ERROR: cannot read file '%s'\n", path); - free(buf); - return NULL; - } - - fclose(fp); - return buf; -} - -// Load the ELF file located by path into a newly malloc'd -// ElfStruct, then return the pointer to this ElfStruct. -// The ELF header, section headers, and symbols will all have their endianness -// swapped. Record pointers to the .symtab, .strtab, -// and .shstrtab section headers in the ElfStruct if they exist -ElfStruct *LoadElfFile(char *path) -{ - ElfStruct *elf = malloc(sizeof(ElfStruct)); - if (!elf) { - fprintf(stderr, "LoadElfFile: ins. memory\n"); - return NULL; - } - memset(elf, 0, sizeof(ElfStruct)); - elf->fileBuf = LoadFile(path); - if (!elf->fileBuf) { - free(elf); - return NULL; - } - - if (memcmp(elf->fileBuf, "\177ELF", 4)) { - fprintf(stderr, "LoadElfFile: %s is not an ELF file\n", path); - free(elf); - return NULL; - } - - // Swap ELF file header - elf->ehdr = SwapEhdr((Elf32_Ehdr *)elf->fileBuf); - u32 i; - Elf32_Shdr *shdr; - char *sname; - // Swap each ELF section header - for (i = 0; i < elf->ehdr->e_shnum; i++) { - shdr = GetSection(elf, i); - SwapShdr(shdr); - } - - // Record .shstrtab header if it exists - if (elf->ehdr->e_shstrndx != SHN_UNDEF) { - elf->shStrTabShdr = GetSection(elf, elf->ehdr->e_shstrndx); - } - - // Search for .symtab and .strtab headers and record them if found - for (i = 0; i < elf->ehdr->e_shnum; i++) { - shdr = GetSection(elf, i); - sname = GetSectionName(elf, shdr->sh_name); - if (sname) { - if (!strcmp(sname, ".symtab")) { - elf->symTabShdr = shdr; - } else if (!strcmp(sname, ".strtab")) { - elf->strTabShdr = shdr; - } - } - } - - // Fix endianness in symbol table - SwapSymbolTable(elf); - - return elf; -} - -// perform all escape sequence replacements for the -// string in the .strtab referred to by strTabPtr, then -// pad the remaining space with NUL bytes -void ResolveEscapeSequences(char *strTabPtr, u32 symNameBufSz) -{ - // __ct__20Container____esc_lt_8MyStruct____esc_gt_FUi8MyStruct - // __ct__20Container<8MyStruct>FUi8MyStruct - - // buffers to hold intermediate strings for each transformation performed - char *firstReplaceStr = malloc(symNameBufSz); - strncpy(firstReplaceStr, strTabPtr, symNameBufSz); // copy original name - - char *secondReplaceStr = NULL; - - // replace each escape sequence - const u32 numEscSeqs = sizeof(escapeMaps) / sizeof(escapeMaps[0]); // always > 0 - u32 i; - for (i = 0; i < numEscSeqs; i++) { - if (i > 0) { - free(firstReplaceStr); - firstReplaceStr = secondReplaceStr; - secondReplaceStr = NULL; - } - secondReplaceStr = str_replace(firstReplaceStr, escapeMaps[i].escapeSeq, escapeMaps[i].replaceChar); - if (!secondReplaceStr) { - fprintf(stderr, "ERROR: malloc failed in str_replace\n"); - free(firstReplaceStr); - exit(EXIT_FAILURE); - } - } - free(firstReplaceStr); - - // we should never be making the strtab bigger - if (strlen(secondReplaceStr) > strlen(strTabPtr)) { - fprintf(stderr, "ERROR: the replacement symbol name %s is larger than the original name %s\n", secondReplaceStr, strTabPtr); - free(secondReplaceStr); - exit(EXIT_FAILURE); - } - - // write secondReplaceStr back to .strtab, padding any extra space with NUL - strncpy(strTabPtr, secondReplaceStr, symNameBufSz); -} - -int main(int argc, char *argv[]) -{ - if (argc != 2) { - fprintf(stderr, "usage: ./patch_strtab path/to/o_file.o\n"); - return EXIT_FAILURE; - } - ElfStruct *elf; - elf = LoadElfFile(argv[1]); - if (!elf) { - fprintf(stderr, "ERROR: failed to load '%s' as an ELF\n", argv[1]); - return EXIT_FAILURE; - } - u16 elfType = elf->ehdr->e_type; - if (elfType != ET_REL) { - fprintf(stderr, "ERROR: '%s' is not an ET_REL ELF\n", argv[1]); - free(elf); - return EXIT_FAILURE; - } - - const u32 numSyms = GetNumberOfSymbols(elf); - for (u32 i = 0; i < numSyms; i++) { - Elf32_Sym *sym = GetSymbol(elf, i); - if (!sym) { - fprintf(stderr, "ERROR: couldn't get symbol\n"); - return EXIT_FAILURE; - } - char *symName = GetName(elf, sym->st_name); // this is a pointer into elf at its .strtab section - u32 symNameBufSz; - if (symName) { - symNameBufSz = strlen(symName) + 1; - ResolveEscapeSequences(symName, symNameBufSz); - } - } - - // Write the patched .strtab section back to the file - FILE *fp = fopen(argv[1], "rb+"); - if (!fp) { - fprintf(stderr, "ERROR: failed to open '%s' to perform the patch\n", argv[1]); - free(elf); - return EXIT_FAILURE; - } - - const char *patchedStrTab = elf->fileBuf + elf->strTabShdr->sh_offset; - const u32 strTabSize = elf->strTabShdr->sh_size; - fseek(fp, elf->strTabShdr->sh_offset, SEEK_SET); - - if (fwrite(patchedStrTab, 1, strTabSize, fp) != strTabSize) { - fprintf(stderr, "ERROR: failed to fwrite the patched .strtab section to '%s'\n", argv[1]); - free(elf); - fclose(fp); - return EXIT_FAILURE; - } - free(elf); - fclose(fp); - return EXIT_SUCCESS; -} diff --git a/tools/postprocess/postprocess.py b/tools/postprocess/postprocess.py new file mode 100644 index 0000000..ab2d912 --- /dev/null +++ b/tools/postprocess/postprocess.py @@ -0,0 +1,317 @@ +#!/usr/bin/env python3 + +BANNER = """ +# This script is the culmination of three patches supporting decompilation +# with the CodeWarrior compiler. +# - riidefi, 2020 +# +# postprocess.py [args] file +# +# 1) Certain versions have a bug where the ctor alignment is ignored and set incorrectly. +# This option is enabled with -fctor-realign, and disabled by default with -fno-ctor-realign +# +# 2) Certain C++ symbols cannot be assembled normally. +# To support the buildsystem, a simple substitution system has been devised +# +# ? -> CHAR +# +# IDs (all irregular symbols in mangled names): +# 0: < +# 1: > +# 2: @ +# 3: \\ +# 4: , +# 5: - +# +# This option is enabled with -fsymbol-fixup, and disabled by default with -fno-symbol-fixup +# +# 3) CodeWarrior versions below 2.3 used a different scheduler model. +# The script can currently adjust function epilogues with the old_stack option. +# -fprologue-fixup=[default=none, none, old_stack] +""" + +import struct + +# Substitutions +substitutions = ( + ('<', '____esc_lt'), + ('>', '____esc_gt'), + ('@', '____esc_at'), + ('\\', '____esc_bs'), + (',', '____esc_cm'), + ('-', '____esc_hy') +) + +def format(symbol): + for sub in substitutions: + symbol = symbol.replace(sub[0], sub[1]) + + return symbol + +def decodeformat(symbol): + for sub in substitutions: + symbol = symbol.replace(sub[1], sub[0]) + + return symbol + +# Stream utilities + +def read_u8(f): + return struct.unpack("B", f.read(1))[0] + +def read_u32(f): + return struct.unpack(">I", f.read(4))[0] + +def read_u16(f): + return struct.unpack(">H", f.read(2))[0] + +def write_u32(f, val): + f.write(struct.pack(">I", val)) + +class ToReplace: + def __init__(self, position, dest, src_size): + self.position = position # Where in file + self.dest = dest # String to patch + self.src_size = src_size # Pad rest with zeroes + + # print("To replace: %s %s %s" % (self.position, self.dest, self.src_size)) + +def read_string(f): + tmp = "" + c = 0xff + while c != 0x00: + c = read_u8(f) + if c != 0: + tmp += chr(c) + return tmp + +def ctor_realign(f, ofsSecHeader, nSecHeader, idxSegNameSeg): + patch_align_ofs = [] + + for i in range(nSecHeader): + f.seek(ofsSecHeader + i * 0x28) + ofsname = read_u32(f) + if not ofsname: continue + + back = f.tell() + + f.seek(ofsSecHeader + (idxSegNameSeg * 0x28) + 0x10) + ofsShST = read_u32(f) + f.seek(ofsShST + ofsname) + name = read_string(f) + if name == ".ctors" or name == ".dtors": + patch_align_ofs.append(ofsSecHeader + i * 0x28 + 0x20) + + f.seek(back) + + return patch_align_ofs + +SHT_PROGBITS = 1 +SHT_STRTAB = 3 + +def impl_postprocess_elf(f, do_ctor_realign, do_old_stack, do_symbol_fixup): + result = [] + + f.seek(0x20) + ofsSecHeader = read_u32(f) + f.seek(0x30) + nSecHeader = read_u16(f) + idxSegNameSeg = read_u16(f) + secF = False # First instance the section names + + # Header: 0x32: + patch_align_ofs = [] + + if do_ctor_realign: + patch_align_ofs = ctor_realign(f, ofsSecHeader, nSecHeader, idxSegNameSeg) + + for i in range(nSecHeader): + f.seek(ofsSecHeader + i * 0x28) + sh_name = read_u32(f) + sh_type = read_u32(f) + + if sh_type == SHT_STRTAB and do_symbol_fixup: + if not secF: + secF = True + f.seek(ofsSecHeader + i * 0x28 + 0x10) + ofs = read_u32(f) + size = read_u32(f) + + f.seek(ofs) + string = "" + str_spos = ofs + for i in range(ofs, ofs+size): + c = read_u8(f) + if c == 0: + if len(string): + fixed = decodeformat(string) + if fixed != string: + result.append(ToReplace(str_spos, fixed, len(string))) + string = "" + str_spos = i+1 + else: + string += chr(c) + else: + f.seek(ofsSecHeader + (idxSegNameSeg * 0x28) + 0x10) + ofsShST = read_u32(f) + f.seek(ofsShST + sh_name) + name = read_string(f) + + if name == ".text" and do_old_stack: + f.seek(ofsSecHeader + i * 0x28 + 0x10) + ofs = read_u32(f) + size = read_u32(f) + + # We assume + # 1) Only instructions are in the .text section + # 2) These instructions are 4-byte aligned + assert ofs != 0 + assert ofs % 4 == 0 + assert size % 4 == 0 + + f.seek(ofs) + + mtlr_pos = 0 + + # (mtlr position, blr position) + epilogues = [] + + for _ in range(ofs, ofs+size, 4): + it = f.tell() + instr = read_u32(f) + + # Skip padding + if instr == 0: continue + + # Call analysis is not actually required + # No mtlr will exist without a blr; mtctr/bctr* is used for dynamic dispatch + + # FUN_A: + # li r3, 0 + # blr <---- No mtlr, move onto the next function + # FUN_B: + # ; complex function, stack manip + # mtlr r0 <---- Expect a blr + # addi r1, r1, 24 + # blr <---- Confirm patch above + + # mtlr alias for mtspr + if instr == 0x7C0803A6: + assert mtlr_pos == 0 + mtlr_pos = it + # blr + elif instr == 0x4E800020: + if mtlr_pos: + epilogues.append((mtlr_pos, it)) + mtlr_pos = 0 + + + # Check for a lone mtlr + assert mtlr_pos == 0 + + # Reunify mtlr/blr instructions, shifting intermediary instructions up + for mtlr_pos, blr_pos in epilogues: + # Check if we need to do anything + if mtlr_pos + 4 == blr_pos: continue + + # As the processor can only hold 6 instructions at once in the pipeline, + # it's unlikely for the mtlr be shifted up more instructions than that--usually, + # only one: + # mtlr r0 + # addi r1, r1, 24 + # blr + assert blr_pos - 4 > mtlr_pos + assert blr_pos - mtlr_pos <= 6 * 4 + + print("Patching old epilogue: %s %s" % (mtlr_pos, blr_pos)) + + f.seek(mtlr_pos) + mtlr = read_u32(f) + + for it in range(mtlr_pos, blr_pos - 4, 4): + f.seek(it + 4) + next_instr = read_u32(f) + f.seek(it) + write_u32(f, next_instr) + + f.seek(blr_pos - 4) + write_u32(f, mtlr) + + return (result, patch_align_ofs) + +def postprocess_elf(f, do_ctor_realign, do_old_stack, do_symbol_fixup): + patches = impl_postprocess_elf(f, do_ctor_realign, do_old_stack, do_symbol_fixup) + + f.seek(0) + source_bytes = list(f.read()) + for patch in patches[0]: + assert len(patch.dest) <= patch.src_size + for j in range(patch.src_size): + if j >= len(patch.dest): + c = 0 + else: + c = ord(patch.dest[j]) + source_bytes[patch.position + j] = c + + # Patch ctor align + nP = 0 + for p in patches[1]: + print("Patching ctors") + source_bytes[p + 0] = 0 + source_bytes[p + 1] = 0 + source_bytes[p + 2] = 0 + source_bytes[p + 3] = 4 + nP += 1 + if nP > 1: + print("Patched ctors + dtors") + + f.seek(0) + f.write(bytes(source_bytes)) + +def frontend(args): + inplace = "" + do_ctor_realign = False + do_old_stack = False + do_symbol_fixup = False + + for arg in args: + if arg.startswith('-f'): + negated = False + if arg.startswith('-fno-'): + negated = True + arg = arg[len('-fno-'):] + else: + arg = arg[len('-f'):] + + if arg == 'ctor_realign': + do_ctor_realign = not negated + elif arg == 'symbol-fixup': + do_symbol_fixup = not negated + elif arg.startswith('prologue-fixup='): + do_old_stack = arg[len('prologue-fixup='):] == 'old_stack' + else: + print("Unknown argument: %s" % arg) + elif arg.startswith('-'): + print("Unknown argument: %s. Perhaps you meant -f%s?" % (arg, arg)) + else: + if inplace: + print("Cannot process %s. Only one source file may be specified." % arg) + else: + inplace = arg + + if not inplace: + print("A file must be specified!") + return + + try: + postprocess_elf(open(inplace, 'rb+'), do_ctor_realign, do_old_stack, do_symbol_fixup) + except FileNotFoundError: + print("Cannot open file %s" % inplace) + +if __name__ == "__main__": + import sys + + if len(sys.argv) < 2: + print(BANNER) + else: + frontend(sys.argv[1:]) -- cgit v1.2.3