diff options
-rw-r--r-- | Makefile | 9 | ||||
-rw-r--r-- | asm/NW4R/ut_TagProcessorBase.s | 32 | ||||
-rw-r--r-- | asm/text_10_3.s | 12 | ||||
-rw-r--r-- | tools/patch_strtab/.gitignore | 2 | ||||
-rw-r--r-- | tools/patch_strtab/Makefile | 20 | ||||
-rw-r--r-- | tools/patch_strtab/README.md | 13 | ||||
-rw-r--r-- | tools/patch_strtab/patch_strtab.c | 423 | ||||
-rw-r--r-- | tools/postprocess/postprocess.py | 317 |
8 files changed, 343 insertions, 485 deletions
@@ -60,12 +60,11 @@ OBJCOPY := $(DEVKITPPC)/bin/powerpc-eabi-objcopy CPP := cpp -P CC := $(WINE) tools/mwcc_compiler/$(MWCC_VERSION)/mwcceppc.exe LD := $(WINE) tools/mwcc_compiler/$(MWCC_VERSION)/mwldeppc.exe -PATCHSTRTAB := tools/patch_strtab/patch_strtab$(EXE) ELF2DOL := tools/elf2dol/elf2dol$(EXE) SHA1SUM := sha1sum PYTHON := python3 -#POSTPROC := tools/postprocess.py +POSTPROC := tools/postprocess/postprocess.py # Options INCLUDES := -i . -I- -i include -i include/SDK -i include/libstdc++ @@ -75,7 +74,7 @@ LDFLAGS := -map $(MAP) -fp hard -nodefaults CFLAGS := -Cpp_exceptions off -proc gekko -fp hard -O4,p -nodefaults -msgstyle gcc -ipa file $(INCLUDES) -W all # for postprocess.py -PROCFLAGS := -fprologue-fixup=old_stack +PROCFLAGS := -fsymbol-fixup # elf2dol needs to know these in order to calculate sbss correctly. SDATA_PDHR := 9 @@ -84,7 +83,7 @@ SBSS_PDHR := 10 infoshell = $(foreach line, $(shell $1 | sed "s/ /__SPACE__/g"), $(info $(subst __SPACE__, ,$(line)))) TOOLS_DIR = tools -TOOLDIRS = $(filter-out $(TOOLS_DIR)/mwcc_compiler,$(wildcard $(TOOLS_DIR)/*)) +TOOLDIRS = $(filter-out $(TOOLS_DIR)/mwcc_compiler $(TOOLS_DIR)/postprocess,$(wildcard $(TOOLS_DIR)/*)) TOOLBASE = $(TOOLDIRS:$(TOOLS_DIR)/%=%) TOOLS = $(foreach tool,$(TOOLBASE),$(TOOLS_DIR)/$(tool)/$(tool)$(EXE)) @@ -134,7 +133,7 @@ $(ELF): $(O_FILES) $(LDSCRIPT) $(BUILD_DIR)/%.o: %.s $(AS) $(ASFLAGS) -o $@ $< # resolve escape sequences for C++ mangled names in the .strtab section (assembler workaround). - $(PATCHSTRTAB) $@ + $(PYTHON) $(POSTPROC) $(PROCFLAGS) $@ -fsymbol-fixup $(BUILD_DIR)/%.o: %.cpp $(CC) $(CFLAGS) -lang c++ -c -o $@ $< diff --git a/asm/NW4R/ut_TagProcessorBase.s b/asm/NW4R/ut_TagProcessorBase.s index 3236a11..d1faeaa 100644 --- a/asm/NW4R/ut_TagProcessorBase.s +++ b/asm/NW4R/ut_TagProcessorBase.s @@ -2,15 +2,15 @@ .section .text, "ax" # 0x80006980 - 0x803E1E60
-.global __ct__Q34nw4r2ut19TagProcessorBase____esc_lt_c____esc_gt_Fv
-__ct__Q34nw4r2ut19TagProcessorBase____esc_lt_c____esc_gt_Fv:
+.global __ct__Q34nw4r2ut19TagProcessorBase____esc_ltc____esc_gtFv
+__ct__Q34nw4r2ut19TagProcessorBase____esc_ltc____esc_gtFv:
/* 80362E28 0035EA88 3C 80 80 45 */ lis r4, lbl_8044C6DC@ha
/* 80362E2C 0035EA8C 38 84 C6 DC */ addi r4, r4, lbl_8044C6DC@l
/* 80362E30 0035EA90 90 83 00 00 */ stw r4, 0(r3)
/* 80362E34 0035EA94 4E 80 00 20 */ blr
-.global __dt__Q34nw4r2ut19TagProcessorBase____esc_lt_c____esc_gt_Fv
-__dt__Q34nw4r2ut19TagProcessorBase____esc_lt_c____esc_gt_Fv:
+.global __dt__Q34nw4r2ut19TagProcessorBase____esc_ltc____esc_gtFv
+__dt__Q34nw4r2ut19TagProcessorBase____esc_ltc____esc_gtFv:
/* 80362E38 0035EA98 94 21 FF F0 */ stwu r1, -0x10(r1)
/* 80362E3C 0035EA9C 7C 08 02 A6 */ mflr r0
/* 80362E40 0035EAA0 2C 03 00 00 */ cmpwi r3, 0
@@ -29,8 +29,8 @@ lbl_80362E60: /* 80362E70 0035EAD0 38 21 00 10 */ addi r1, r1, 0x10
/* 80362E74 0035EAD4 4E 80 00 20 */ blr
-.global Process__Q34nw4r2ut19TagProcessorBase____esc_lt_c____esc_gt_FUsPQ34nw4r2ut15PrintContext____esc_lt_c____esc_gt_
-Process__Q34nw4r2ut19TagProcessorBase____esc_lt_c____esc_gt_FUsPQ34nw4r2ut15PrintContext____esc_lt_c____esc_gt_:
+.global Process__Q34nw4r2ut19TagProcessorBase____esc_ltc____esc_gtFUsPQ34nw4r2ut15PrintContext____esc_ltc____esc_gt
+Process__Q34nw4r2ut19TagProcessorBase____esc_ltc____esc_gtFUsPQ34nw4r2ut15PrintContext____esc_ltc____esc_gt:
/* 80362E78 0035EAD8 94 21 FF B0 */ stwu r1, -0x50(r1)
/* 80362E7C 0035EADC 7C 08 02 A6 */ mflr r0
/* 80362E80 0035EAE0 90 01 00 54 */ stw r0, 0x54(r1)
@@ -115,8 +115,8 @@ lbl_80362F7C: /* 80362FA0 0035EC00 38 21 00 50 */ addi r1, r1, 0x50
/* 80362FA4 0035EC04 4E 80 00 20 */ blr
-.global CalcRect__Q34nw4r2ut19TagProcessorBase____esc_lt_c____esc_gt_FPQ34nw4r2ut4RectUsPQ34nw4r2ut15PrintContext____esc_lt_c____esc_gt_
-CalcRect__Q34nw4r2ut19TagProcessorBase____esc_lt_c____esc_gt_FPQ34nw4r2ut4RectUsPQ34nw4r2ut15PrintContext____esc_lt_c____esc_gt_:
+.global CalcRect__Q34nw4r2ut19TagProcessorBase____esc_ltc____esc_gtFPQ34nw4r2ut4RectUsPQ34nw4r2ut15PrintContext____esc_ltc____esc_gt
+CalcRect__Q34nw4r2ut19TagProcessorBase____esc_ltc____esc_gtFPQ34nw4r2ut4RectUsPQ34nw4r2ut15PrintContext____esc_ltc____esc_gt:
/* 80362FA8 0035EC08 94 21 FF A0 */ stwu r1, -0x60(r1)
/* 80362FAC 0035EC0C 7C 08 02 A6 */ mflr r0
/* 80362FB0 0035EC10 90 01 00 64 */ stw r0, 0x64(r1)
@@ -247,15 +247,15 @@ lbl_80363168: /* 80363188 0035EDE8 38 21 00 60 */ addi r1, r1, 0x60
/* 8036318C 0035EDEC 4E 80 00 20 */ blr
-.global __ct__Q34nw4r2ut19TagProcessorBase____esc_lt_w____esc_gt_Fv
-__ct__Q34nw4r2ut19TagProcessorBase____esc_lt_w____esc_gt_Fv:
+.global __ct__Q34nw4r2ut19TagProcessorBase____esc_ltw____esc_gtFv
+__ct__Q34nw4r2ut19TagProcessorBase____esc_ltw____esc_gtFv:
/* 80363190 0035EDF0 3C 80 80 45 */ lis r4, lbl_8044C6C8@ha
/* 80363194 0035EDF4 38 84 C6 C8 */ addi r4, r4, lbl_8044C6C8@l
/* 80363198 0035EDF8 90 83 00 00 */ stw r4, 0(r3)
/* 8036319C 0035EDFC 4E 80 00 20 */ blr
-.global __dt__Q34nw4r2ut19TagProcessorBase____esc_lt_w____esc_gt_Fv
-__dt__Q34nw4r2ut19TagProcessorBase____esc_lt_w____esc_gt_Fv:
+.global __dt__Q34nw4r2ut19TagProcessorBase____esc_ltw____esc_gtFv
+__dt__Q34nw4r2ut19TagProcessorBase____esc_ltw____esc_gtFv:
/* 803631A0 0035EE00 94 21 FF F0 */ stwu r1, -0x10(r1)
/* 803631A4 0035EE04 7C 08 02 A6 */ mflr r0
/* 803631A8 0035EE08 2C 03 00 00 */ cmpwi r3, 0
@@ -274,8 +274,8 @@ lbl_803631C8: /* 803631D8 0035EE38 38 21 00 10 */ addi r1, r1, 0x10
/* 803631DC 0035EE3C 4E 80 00 20 */ blr
-.global Process__Q34nw4r2ut19TagProcessorBase____esc_lt_w____esc_gt_FUsPQ34nw4r2ut15PrintContext____esc_lt_w____esc_gt_
-Process__Q34nw4r2ut19TagProcessorBase____esc_lt_w____esc_gt_FUsPQ34nw4r2ut15PrintContext____esc_lt_w____esc_gt_:
+.global Process__Q34nw4r2ut19TagProcessorBase____esc_ltw____esc_gtFUsPQ34nw4r2ut15PrintContext____esc_ltw____esc_gt
+Process__Q34nw4r2ut19TagProcessorBase____esc_ltw____esc_gtFUsPQ34nw4r2ut15PrintContext____esc_ltw____esc_gt:
/* 803631E0 0035EE40 94 21 FF B0 */ stwu r1, -0x50(r1)
/* 803631E4 0035EE44 7C 08 02 A6 */ mflr r0
/* 803631E8 0035EE48 90 01 00 54 */ stw r0, 0x54(r1)
@@ -360,8 +360,8 @@ lbl_803632E4: /* 80363308 0035EF68 38 21 00 50 */ addi r1, r1, 0x50
/* 8036330C 0035EF6C 4E 80 00 20 */ blr
-.global CalcRect__Q34nw4r2ut19TagProcessorBase____esc_lt_w____esc_gt_FPQ34nw4r2ut4RectUsPQ34nw4r2ut15PrintContext____esc_lt_w____esc_gt_
-CalcRect__Q34nw4r2ut19TagProcessorBase____esc_lt_w____esc_gt_FPQ34nw4r2ut4RectUsPQ34nw4r2ut15PrintContext____esc_lt_w____esc_gt_:
+.global CalcRect__Q34nw4r2ut19TagProcessorBase____esc_ltw____esc_gtFPQ34nw4r2ut4RectUsPQ34nw4r2ut15PrintContext____esc_ltw____esc_gt
+CalcRect__Q34nw4r2ut19TagProcessorBase____esc_ltw____esc_gtFPQ34nw4r2ut4RectUsPQ34nw4r2ut15PrintContext____esc_ltw____esc_gt:
/* 80363310 0035EF70 94 21 FF A0 */ stwu r1, -0x60(r1)
/* 80363314 0035EF74 7C 08 02 A6 */ mflr r0
/* 80363318 0035EF78 90 01 00 64 */ stw r0, 0x64(r1)
diff --git a/asm/text_10_3.s b/asm/text_10_3.s index ae9bf3c..5a07640 100644 --- a/asm/text_10_3.s +++ b/asm/text_10_3.s @@ -5104,10 +5104,10 @@ lbl_80367EFC: /* 80367F38 00363B98 7C 00 07 75 */ extsb. r0, r0
/* 80367F3C 00363B9C 40 82 00 2C */ bne lbl_80367F68
/* 80367F40 00363BA0 38 6D B1 C8 */ addi r3, r13, lbl_80640488-_SDA_BASE_
-/* 80367F44 00363BA4 4B FF AE E5 */ bl __ct__Q34nw4r2ut19TagProcessorBase____esc_lt_c____esc_gt_Fv
-/* 80367F48 00363BA8 3C 80 80 36 */ lis r4, __dt__Q34nw4r2ut19TagProcessorBase____esc_lt_c____esc_gt_Fv@ha
+/* 80367F44 00363BA4 4B FF AE E5 */ bl __ct__Q34nw4r2ut19TagProcessorBase____esc_ltc____esc_gtFv
+/* 80367F48 00363BA8 3C 80 80 36 */ lis r4, __dt__Q34nw4r2ut19TagProcessorBase____esc_ltc____esc_gtFv@ha
/* 80367F4C 00363BAC 3C A0 80 62 */ lis r5, lbl_80621728@ha
-/* 80367F50 00363BB0 38 84 2E 38 */ addi r4, r4, __dt__Q34nw4r2ut19TagProcessorBase____esc_lt_c____esc_gt_Fv@l
+/* 80367F50 00363BB0 38 84 2E 38 */ addi r4, r4, __dt__Q34nw4r2ut19TagProcessorBase____esc_ltc____esc_gtFv@l
/* 80367F54 00363BB4 38 6D B1 C8 */ addi r3, r13, lbl_80640488-_SDA_BASE_
/* 80367F58 00363BB8 38 A5 17 28 */ addi r5, r5, lbl_80621728@l
/* 80367F5C 00363BBC 4B E5 EC ED */ bl __register_global_object_tmp
@@ -5118,10 +5118,10 @@ lbl_80367F68: /* 80367F6C 00363BCC 7C 00 07 75 */ extsb. r0, r0
/* 80367F70 00363BD0 40 82 00 2C */ bne lbl_80367F9C
/* 80367F74 00363BD4 38 6D B1 CC */ addi r3, r13, lbl_8064048C-_SDA_BASE_
-/* 80367F78 00363BD8 4B FF B2 19 */ bl __ct__Q34nw4r2ut19TagProcessorBase____esc_lt_w____esc_gt_Fv
-/* 80367F7C 00363BDC 3C 80 80 36 */ lis r4, __dt__Q34nw4r2ut19TagProcessorBase____esc_lt_w____esc_gt_Fv@ha
+/* 80367F78 00363BD8 4B FF B2 19 */ bl __ct__Q34nw4r2ut19TagProcessorBase____esc_ltw____esc_gtFv
+/* 80367F7C 00363BDC 3C 80 80 36 */ lis r4, __dt__Q34nw4r2ut19TagProcessorBase____esc_ltw____esc_gtFv@ha
/* 80367F80 00363BE0 3C A0 80 62 */ lis r5, lbl_80621734@ha
-/* 80367F84 00363BE4 38 84 31 A0 */ addi r4, r4, __dt__Q34nw4r2ut19TagProcessorBase____esc_lt_w____esc_gt_Fv@l
+/* 80367F84 00363BE4 38 84 31 A0 */ addi r4, r4, __dt__Q34nw4r2ut19TagProcessorBase____esc_ltw____esc_gtFv@l
/* 80367F88 00363BE8 38 6D B1 CC */ addi r3, r13, lbl_8064048C-_SDA_BASE_
/* 80367F8C 00363BEC 38 A5 17 34 */ addi r5, r5, lbl_80621734@l
/* 80367F90 00363BF0 4B E5 EC B9 */ bl __register_global_object_tmp
diff --git a/tools/patch_strtab/.gitignore b/tools/patch_strtab/.gitignore deleted file mode 100644 index 1ed8234..0000000 --- a/tools/patch_strtab/.gitignore +++ /dev/null @@ -1,2 +0,0 @@ -patch_strtab
-*.exe
diff --git a/tools/patch_strtab/Makefile b/tools/patch_strtab/Makefile deleted file mode 100644 index b76d8dd..0000000 --- a/tools/patch_strtab/Makefile +++ /dev/null @@ -1,20 +0,0 @@ -CC := gcc
-CFLAGS := -O3 -std=c99 -Wall
-
-ifeq ($(OS),Windows_NT)
-EXE := .exe
-else
-EXE :=
-endif
-
-TARGET := patch_strtab$(EXE)
-
-.PHONY: all
-
-all: $(TARGET)
-
-clean:
- rm -f patch_strtab patch_strtab.exe
-
-$(TARGET): patch_strtab.c
- $(CC) $(CFLAGS) -o $@ $^
diff --git a/tools/patch_strtab/README.md b/tools/patch_strtab/README.md deleted file mode 100644 index 5cdca3f..0000000 --- a/tools/patch_strtab/README.md +++ /dev/null @@ -1,13 +0,0 @@ -README:
-
-This program patches the .strtab section of an ELF relocatable module
-by replacing certain escape sequences with the characters they represent.
-
-CodeWarrior's C++ name mangling scheme may produce linkage names that
-are not valid identifiers in the .s files provided to the
-assembler, so we cannot use the mangled names directly as labels in the .s file.
-Still, we would like to be able to link with compiled C++ modules and add accurate symbols to them
-before they have been decompiled. To deal with this issue, for each .s file
-we replace any illegal characters in the mangled name with valid escape sequences,
-assemble the .s file, then use this program to postprocess the .o object code, which restores
-the correct symbol name.
diff --git a/tools/patch_strtab/patch_strtab.c b/tools/patch_strtab/patch_strtab.c deleted file mode 100644 index 5327dc7..0000000 --- a/tools/patch_strtab/patch_strtab.c +++ /dev/null @@ -1,423 +0,0 @@ -/*
- * patch_strtab
- *
- * This program patches the .strtab section of an ELF relocatable module
- * by replacing certain escape sequences with the characters they represent.
- *
- * by Max Parisi, 2020
- *
- */
-
-#include <stdio.h>
-#include <stdlib.h>
-#include <string.h>
-#include <stdbool.h>
-#include <inttypes.h>
-#include "elf.h"
-#include <ctype.h>
-
-typedef uint64_t u64;
-typedef int64_t s64;
-typedef uint32_t u32;
-typedef int32_t s32;
-typedef uint16_t u16;
-typedef int16_t s16;
-typedef uint8_t u8;
-typedef int8_t s8;
-
-typedef struct elfstruct
-{
- s32 id; // +0x0, module ID
- char *fileBuf; // +0x4, ptr to file contents buffer
- Elf32_Ehdr *ehdr; // +0x8, ptr to ELF header
- Elf32_Shdr *shStrTabShdr; // +0xC, ptr to shdr for .shstrtab
- Elf32_Shdr *symTabShdr; // +0x10, ptr to shdr for .symtab
- Elf32_Shdr *strTabShdr; // +0x14, ptr to shdr for .strtab
- u32 *hashTable; // +0x18, ptr to symbol hash table
-} ElfStruct;
-
-typedef struct escapemap
-{
- const char *escapeSeq;
- const char *replaceChar;
-} EscapeMap;
-
-EscapeMap escapeMaps[] =
-{
- { "____esc_lt_", "<" },
- { "____esc_gt_", ">" },
- { "____esc_cm_", "," },
- { "____esc_bs_", "\\" },
- { "____esc_ds_", "$" },
- { "____esc_at_", "@" }
-};
-
-// Thanks to jmucchiello at https://stackoverflow.com/questions/779875/what-function-is-to-replace-a-substring-from-a-string-in-c
-// You must free the result if result is non-NULL.
-char *str_replace(const char *orig, const char *rep, const char *with) {
- char *result; // the return string
- const char *ins; // the next insert point
- char *tmp; // varies
- int len_rep; // length of rep (the string to remove)
- int len_with; // length of with (the string to replace rep with)
- int len_front; // distance between rep and end of last rep
- int count; // number of replacements
-
- // sanity checks and initialization
- if (!orig || !rep)
- return NULL;
- len_rep = strlen(rep);
- if (len_rep == 0)
- return NULL; // empty rep causes infinite loop during count
- if (!with)
- with = "";
- len_with = strlen(with);
-
- // count the number of replacements needed
- ins = orig;
- for (count = 0; (tmp = strstr(ins, rep)); ++count) {
- ins = tmp + len_rep;
- }
-
- tmp = result = malloc(strlen(orig) + (len_with - len_rep) * count + 1);
-
- if (!result)
- return NULL;
-
- // first time through the loop, all the variable are set correctly
- // from here on,
- // tmp points to the end of the result string
- // ins points to the next occurrence of rep in orig
- // orig points to the remainder of orig after "end of rep"
- while (count--) {
- ins = strstr(orig, rep);
- len_front = ins - orig;
- tmp = strncpy(tmp, orig, len_front) + len_front; // copy the front, before replacement (or the substring between replacements)
- tmp = strcpy(tmp, with) + len_with; // copy in the replacement at the correct position
- orig += len_front + len_rep; // move to next "end of rep"
- }
- strcpy(tmp, orig);
- return result;
-}
-
-// switch endianness of 32-bit word
-u32 Swap32(u32 word)
-{
- return word >> 24 |
- (word >> 8 & 0xff00) |
- (word << 8 & 0xff0000) |
- word << 24;
-}
-
-u16 Swap16(u16 hword)
-{
- return hword >> 8 | hword << 8;
-}
-
-Elf32_Shdr *GetSection(const ElfStruct *elf, s32 shndx)
-{
- return (Elf32_Shdr *)(elf->fileBuf + elf->ehdr->e_shoff +
- elf->ehdr->e_shentsize * shndx);
-}
-
-// If this ELF has a .shstrtab section, get the ELF section
-// name at the specified offset into the section header string table
-char *GetSectionName(const ElfStruct *elf, u32 offset)
-{
- if (offset && elf->shStrTabShdr) {
- return elf->fileBuf + elf->shStrTabShdr->sh_offset + offset;
- }
- return NULL;
-}
-
-// If this ELF has a .strtab section, get the ELF symbol name
-// at the specified offset into the string table
-char *GetName(const ElfStruct *elf, u32 offset)
-{
- if (offset && elf->strTabShdr) {
- return elf->fileBuf + elf->strTabShdr->sh_offset + offset;
- }
- return NULL;
-}
-
-// If this ELF has a .symtab section, get the ELF symbol at the
-// specified index of the symbol table
-Elf32_Sym *GetSymbol(const ElfStruct *elf, u32 symTabIndex)
-{
- if (elf->symTabShdr) {
- return (Elf32_Sym *)(elf->fileBuf +
- elf->symTabShdr->sh_offset) + symTabIndex;
- }
- return NULL;
-}
-
-u32 GetNumberOfSymbols(const ElfStruct *elf)
-{
- return elf->symTabShdr->sh_size / sizeof(Elf32_Sym);
-}
-
-// Write padBytes 0s to fp
-void Padding(FILE *fp, u32 padBytes)
-{
- const u8 pad = 0;
- for (u32 i = 0; i < padBytes; i++) {
- if (fwrite(&pad, sizeof(u8), 1, fp) != 1) {
- fprintf(stderr, "ERROR: ins. disk space\n");
- exit(7);
- }
- }
-}
-
-// Swap the endianness of every field of ehdr and return ehdr
-Elf32_Ehdr *SwapEhdr(Elf32_Ehdr *ehdr)
-{
- ehdr->e_type = Swap16(ehdr->e_type);
- ehdr->e_machine = Swap16(ehdr->e_machine);
- ehdr->e_version = Swap32(ehdr->e_version);
- ehdr->e_entry = Swap32(ehdr->e_entry);
- ehdr->e_phoff = Swap32(ehdr->e_phoff);
- ehdr->e_shoff = Swap32(ehdr->e_shoff);
- ehdr->e_flags = Swap32(ehdr->e_flags);
- ehdr->e_ehsize = Swap16(ehdr->e_ehsize);
- ehdr->e_phentsize = Swap16(ehdr->e_phentsize);
- ehdr->e_phnum = Swap16(ehdr->e_phnum);
- ehdr->e_shentsize = Swap16(ehdr->e_shentsize);
- ehdr->e_shnum = Swap16(ehdr->e_shnum);
- ehdr->e_shstrndx = Swap16(ehdr->e_shstrndx);
-
- return ehdr;
-}
-
-
-// Swap the endianness of every field in shdr and return shdr
-Elf32_Shdr *SwapShdr(Elf32_Shdr *shdr)
-{
- shdr->sh_name = Swap32(shdr->sh_name);
- shdr->sh_type = Swap32(shdr->sh_type);
- shdr->sh_flags = Swap32(shdr->sh_flags);
- shdr->sh_addr = Swap32(shdr->sh_addr);
- shdr->sh_offset = Swap32(shdr->sh_offset);
- shdr->sh_size = Swap32(shdr->sh_size);
- shdr->sh_link = Swap32(shdr->sh_link);
- shdr->sh_info = Swap32(shdr->sh_info);
- shdr->sh_addralign = Swap32(shdr->sh_addralign);
- shdr->sh_entsize = Swap32(shdr->sh_entsize);
-
- return shdr;
-}
-
-// Swap the endianness of all the 16-/32-bit fields of sym
-// and return sym
-Elf32_Sym *SwapSym(Elf32_Sym *sym)
-{
- sym->st_name = Swap32(sym->st_name);
- sym->st_value = Swap32(sym->st_value);
- sym->st_size = Swap32(sym->st_size);
- sym->st_shndx = Swap16(sym->st_shndx);
-
- return sym;
-}
-
-// Swap the endianness of every symbol in the ELF's symbol table
-void SwapSymbolTable(ElfStruct *elf)
-{
- if (elf->symTabShdr) {
- const u32 numSyms = elf->symTabShdr->sh_size / sizeof(Elf32_Sym);
- for (u32 i = 0; i < numSyms; i++) {
- Elf32_Sym *sym = GetSymbol(elf, i);
- SwapSym(sym);
- }
- }
-}
-
-// Load the entire contents of the file at path into a newly malloc'd
-// buffer. Write the file's size into *fileSz and return a pointer
-// to the buffer. Return NULL if file open, malloc, or file read fails.
-char *LoadFile(char *path)
-{
- u32 fileSz;
- FILE *fp = fopen(path, "rb");
- if (!fp) {
- fprintf(stderr, "ERROR: cannot open file '%s'\n", path);
- return NULL;
- }
-
- fseek(fp, 0, SEEK_END);
- fileSz = ftell(fp);
- rewind(fp);
-
- char *buf = malloc(fileSz);
- if (!buf) {
- fprintf(stderr, "ERROR: ins. memory\n");
- return NULL;
- }
-
- if (fread(buf, fileSz, 1, fp) != 1) {
- fprintf(stderr, "ERROR: cannot read file '%s'\n", path);
- free(buf);
- return NULL;
- }
-
- fclose(fp);
- return buf;
-}
-
-// Load the ELF file located by path into a newly malloc'd
-// ElfStruct, then return the pointer to this ElfStruct.
-// The ELF header, section headers, and symbols will all have their endianness
-// swapped. Record pointers to the .symtab, .strtab,
-// and .shstrtab section headers in the ElfStruct if they exist
-ElfStruct *LoadElfFile(char *path)
-{
- ElfStruct *elf = malloc(sizeof(ElfStruct));
- if (!elf) {
- fprintf(stderr, "LoadElfFile: ins. memory\n");
- return NULL;
- }
- memset(elf, 0, sizeof(ElfStruct));
- elf->fileBuf = LoadFile(path);
- if (!elf->fileBuf) {
- free(elf);
- return NULL;
- }
-
- if (memcmp(elf->fileBuf, "\177ELF", 4)) {
- fprintf(stderr, "LoadElfFile: %s is not an ELF file\n", path);
- free(elf);
- return NULL;
- }
-
- // Swap ELF file header
- elf->ehdr = SwapEhdr((Elf32_Ehdr *)elf->fileBuf);
- u32 i;
- Elf32_Shdr *shdr;
- char *sname;
- // Swap each ELF section header
- for (i = 0; i < elf->ehdr->e_shnum; i++) {
- shdr = GetSection(elf, i);
- SwapShdr(shdr);
- }
-
- // Record .shstrtab header if it exists
- if (elf->ehdr->e_shstrndx != SHN_UNDEF) {
- elf->shStrTabShdr = GetSection(elf, elf->ehdr->e_shstrndx);
- }
-
- // Search for .symtab and .strtab headers and record them if found
- for (i = 0; i < elf->ehdr->e_shnum; i++) {
- shdr = GetSection(elf, i);
- sname = GetSectionName(elf, shdr->sh_name);
- if (sname) {
- if (!strcmp(sname, ".symtab")) {
- elf->symTabShdr = shdr;
- } else if (!strcmp(sname, ".strtab")) {
- elf->strTabShdr = shdr;
- }
- }
- }
-
- // Fix endianness in symbol table
- SwapSymbolTable(elf);
-
- return elf;
-}
-
-// perform all escape sequence replacements for the
-// string in the .strtab referred to by strTabPtr, then
-// pad the remaining space with NUL bytes
-void ResolveEscapeSequences(char *strTabPtr, u32 symNameBufSz)
-{
- // __ct__20Container____esc_lt_8MyStruct____esc_gt_FUi8MyStruct
- // __ct__20Container<8MyStruct>FUi8MyStruct
-
- // buffers to hold intermediate strings for each transformation performed
- char *firstReplaceStr = malloc(symNameBufSz);
- strncpy(firstReplaceStr, strTabPtr, symNameBufSz); // copy original name
-
- char *secondReplaceStr = NULL;
-
- // replace each escape sequence
- const u32 numEscSeqs = sizeof(escapeMaps) / sizeof(escapeMaps[0]); // always > 0
- u32 i;
- for (i = 0; i < numEscSeqs; i++) {
- if (i > 0) {
- free(firstReplaceStr);
- firstReplaceStr = secondReplaceStr;
- secondReplaceStr = NULL;
- }
- secondReplaceStr = str_replace(firstReplaceStr, escapeMaps[i].escapeSeq, escapeMaps[i].replaceChar);
- if (!secondReplaceStr) {
- fprintf(stderr, "ERROR: malloc failed in str_replace\n");
- free(firstReplaceStr);
- exit(EXIT_FAILURE);
- }
- }
- free(firstReplaceStr);
-
- // we should never be making the strtab bigger
- if (strlen(secondReplaceStr) > strlen(strTabPtr)) {
- fprintf(stderr, "ERROR: the replacement symbol name %s is larger than the original name %s\n", secondReplaceStr, strTabPtr);
- free(secondReplaceStr);
- exit(EXIT_FAILURE);
- }
-
- // write secondReplaceStr back to .strtab, padding any extra space with NUL
- strncpy(strTabPtr, secondReplaceStr, symNameBufSz);
-}
-
-int main(int argc, char *argv[])
-{
- if (argc != 2) {
- fprintf(stderr, "usage: ./patch_strtab path/to/o_file.o\n");
- return EXIT_FAILURE;
- }
- ElfStruct *elf;
- elf = LoadElfFile(argv[1]);
- if (!elf) {
- fprintf(stderr, "ERROR: failed to load '%s' as an ELF\n", argv[1]);
- return EXIT_FAILURE;
- }
- u16 elfType = elf->ehdr->e_type;
- if (elfType != ET_REL) {
- fprintf(stderr, "ERROR: '%s' is not an ET_REL ELF\n", argv[1]);
- free(elf);
- return EXIT_FAILURE;
- }
-
- const u32 numSyms = GetNumberOfSymbols(elf);
- for (u32 i = 0; i < numSyms; i++) {
- Elf32_Sym *sym = GetSymbol(elf, i);
- if (!sym) {
- fprintf(stderr, "ERROR: couldn't get symbol\n");
- return EXIT_FAILURE;
- }
- char *symName = GetName(elf, sym->st_name); // this is a pointer into elf at its .strtab section
- u32 symNameBufSz;
- if (symName) {
- symNameBufSz = strlen(symName) + 1;
- ResolveEscapeSequences(symName, symNameBufSz);
- }
- }
-
- // Write the patched .strtab section back to the file
- FILE *fp = fopen(argv[1], "rb+");
- if (!fp) {
- fprintf(stderr, "ERROR: failed to open '%s' to perform the patch\n", argv[1]);
- free(elf);
- return EXIT_FAILURE;
- }
-
- const char *patchedStrTab = elf->fileBuf + elf->strTabShdr->sh_offset;
- const u32 strTabSize = elf->strTabShdr->sh_size;
- fseek(fp, elf->strTabShdr->sh_offset, SEEK_SET);
-
- if (fwrite(patchedStrTab, 1, strTabSize, fp) != strTabSize) {
- fprintf(stderr, "ERROR: failed to fwrite the patched .strtab section to '%s'\n", argv[1]);
- free(elf);
- fclose(fp);
- return EXIT_FAILURE;
- }
- free(elf);
- fclose(fp);
- return EXIT_SUCCESS;
-}
diff --git a/tools/postprocess/postprocess.py b/tools/postprocess/postprocess.py new file mode 100644 index 0000000..ab2d912 --- /dev/null +++ b/tools/postprocess/postprocess.py @@ -0,0 +1,317 @@ +#!/usr/bin/env python3 + +BANNER = """ +# This script is the culmination of three patches supporting decompilation +# with the CodeWarrior compiler. +# - riidefi, 2020 +# +# postprocess.py [args] file +# +# 1) Certain versions have a bug where the ctor alignment is ignored and set incorrectly. +# This option is enabled with -fctor-realign, and disabled by default with -fno-ctor-realign +# +# 2) Certain C++ symbols cannot be assembled normally. +# To support the buildsystem, a simple substitution system has been devised +# +# ?<ID> -> CHAR +# +# IDs (all irregular symbols in mangled names): +# 0: < +# 1: > +# 2: @ +# 3: \\ +# 4: , +# 5: - +# +# This option is enabled with -fsymbol-fixup, and disabled by default with -fno-symbol-fixup +# +# 3) CodeWarrior versions below 2.3 used a different scheduler model. +# The script can currently adjust function epilogues with the old_stack option. +# -fprologue-fixup=[default=none, none, old_stack] +""" + +import struct + +# Substitutions +substitutions = ( + ('<', '____esc_lt'), + ('>', '____esc_gt'), + ('@', '____esc_at'), + ('\\', '____esc_bs'), + (',', '____esc_cm'), + ('-', '____esc_hy') +) + +def format(symbol): + for sub in substitutions: + symbol = symbol.replace(sub[0], sub[1]) + + return symbol + +def decodeformat(symbol): + for sub in substitutions: + symbol = symbol.replace(sub[1], sub[0]) + + return symbol + +# Stream utilities + +def read_u8(f): + return struct.unpack("B", f.read(1))[0] + +def read_u32(f): + return struct.unpack(">I", f.read(4))[0] + +def read_u16(f): + return struct.unpack(">H", f.read(2))[0] + +def write_u32(f, val): + f.write(struct.pack(">I", val)) + +class ToReplace: + def __init__(self, position, dest, src_size): + self.position = position # Where in file + self.dest = dest # String to patch + self.src_size = src_size # Pad rest with zeroes + + # print("To replace: %s %s %s" % (self.position, self.dest, self.src_size)) + +def read_string(f): + tmp = "" + c = 0xff + while c != 0x00: + c = read_u8(f) + if c != 0: + tmp += chr(c) + return tmp + +def ctor_realign(f, ofsSecHeader, nSecHeader, idxSegNameSeg): + patch_align_ofs = [] + + for i in range(nSecHeader): + f.seek(ofsSecHeader + i * 0x28) + ofsname = read_u32(f) + if not ofsname: continue + + back = f.tell() + + f.seek(ofsSecHeader + (idxSegNameSeg * 0x28) + 0x10) + ofsShST = read_u32(f) + f.seek(ofsShST + ofsname) + name = read_string(f) + if name == ".ctors" or name == ".dtors": + patch_align_ofs.append(ofsSecHeader + i * 0x28 + 0x20) + + f.seek(back) + + return patch_align_ofs + +SHT_PROGBITS = 1 +SHT_STRTAB = 3 + +def impl_postprocess_elf(f, do_ctor_realign, do_old_stack, do_symbol_fixup): + result = [] + + f.seek(0x20) + ofsSecHeader = read_u32(f) + f.seek(0x30) + nSecHeader = read_u16(f) + idxSegNameSeg = read_u16(f) + secF = False # First instance the section names + + # Header: 0x32: + patch_align_ofs = [] + + if do_ctor_realign: + patch_align_ofs = ctor_realign(f, ofsSecHeader, nSecHeader, idxSegNameSeg) + + for i in range(nSecHeader): + f.seek(ofsSecHeader + i * 0x28) + sh_name = read_u32(f) + sh_type = read_u32(f) + + if sh_type == SHT_STRTAB and do_symbol_fixup: + if not secF: + secF = True + f.seek(ofsSecHeader + i * 0x28 + 0x10) + ofs = read_u32(f) + size = read_u32(f) + + f.seek(ofs) + string = "" + str_spos = ofs + for i in range(ofs, ofs+size): + c = read_u8(f) + if c == 0: + if len(string): + fixed = decodeformat(string) + if fixed != string: + result.append(ToReplace(str_spos, fixed, len(string))) + string = "" + str_spos = i+1 + else: + string += chr(c) + else: + f.seek(ofsSecHeader + (idxSegNameSeg * 0x28) + 0x10) + ofsShST = read_u32(f) + f.seek(ofsShST + sh_name) + name = read_string(f) + + if name == ".text" and do_old_stack: + f.seek(ofsSecHeader + i * 0x28 + 0x10) + ofs = read_u32(f) + size = read_u32(f) + + # We assume + # 1) Only instructions are in the .text section + # 2) These instructions are 4-byte aligned + assert ofs != 0 + assert ofs % 4 == 0 + assert size % 4 == 0 + + f.seek(ofs) + + mtlr_pos = 0 + + # (mtlr position, blr position) + epilogues = [] + + for _ in range(ofs, ofs+size, 4): + it = f.tell() + instr = read_u32(f) + + # Skip padding + if instr == 0: continue + + # Call analysis is not actually required + # No mtlr will exist without a blr; mtctr/bctr* is used for dynamic dispatch + + # FUN_A: + # li r3, 0 + # blr <---- No mtlr, move onto the next function + # FUN_B: + # ; complex function, stack manip + # mtlr r0 <---- Expect a blr + # addi r1, r1, 24 + # blr <---- Confirm patch above + + # mtlr alias for mtspr + if instr == 0x7C0803A6: + assert mtlr_pos == 0 + mtlr_pos = it + # blr + elif instr == 0x4E800020: + if mtlr_pos: + epilogues.append((mtlr_pos, it)) + mtlr_pos = 0 + + + # Check for a lone mtlr + assert mtlr_pos == 0 + + # Reunify mtlr/blr instructions, shifting intermediary instructions up + for mtlr_pos, blr_pos in epilogues: + # Check if we need to do anything + if mtlr_pos + 4 == blr_pos: continue + + # As the processor can only hold 6 instructions at once in the pipeline, + # it's unlikely for the mtlr be shifted up more instructions than that--usually, + # only one: + # mtlr r0 + # addi r1, r1, 24 + # blr + assert blr_pos - 4 > mtlr_pos + assert blr_pos - mtlr_pos <= 6 * 4 + + print("Patching old epilogue: %s %s" % (mtlr_pos, blr_pos)) + + f.seek(mtlr_pos) + mtlr = read_u32(f) + + for it in range(mtlr_pos, blr_pos - 4, 4): + f.seek(it + 4) + next_instr = read_u32(f) + f.seek(it) + write_u32(f, next_instr) + + f.seek(blr_pos - 4) + write_u32(f, mtlr) + + return (result, patch_align_ofs) + +def postprocess_elf(f, do_ctor_realign, do_old_stack, do_symbol_fixup): + patches = impl_postprocess_elf(f, do_ctor_realign, do_old_stack, do_symbol_fixup) + + f.seek(0) + source_bytes = list(f.read()) + for patch in patches[0]: + assert len(patch.dest) <= patch.src_size + for j in range(patch.src_size): + if j >= len(patch.dest): + c = 0 + else: + c = ord(patch.dest[j]) + source_bytes[patch.position + j] = c + + # Patch ctor align + nP = 0 + for p in patches[1]: + print("Patching ctors") + source_bytes[p + 0] = 0 + source_bytes[p + 1] = 0 + source_bytes[p + 2] = 0 + source_bytes[p + 3] = 4 + nP += 1 + if nP > 1: + print("Patched ctors + dtors") + + f.seek(0) + f.write(bytes(source_bytes)) + +def frontend(args): + inplace = "" + do_ctor_realign = False + do_old_stack = False + do_symbol_fixup = False + + for arg in args: + if arg.startswith('-f'): + negated = False + if arg.startswith('-fno-'): + negated = True + arg = arg[len('-fno-'):] + else: + arg = arg[len('-f'):] + + if arg == 'ctor_realign': + do_ctor_realign = not negated + elif arg == 'symbol-fixup': + do_symbol_fixup = not negated + elif arg.startswith('prologue-fixup='): + do_old_stack = arg[len('prologue-fixup='):] == 'old_stack' + else: + print("Unknown argument: %s" % arg) + elif arg.startswith('-'): + print("Unknown argument: %s. Perhaps you meant -f%s?" % (arg, arg)) + else: + if inplace: + print("Cannot process %s. Only one source file may be specified." % arg) + else: + inplace = arg + + if not inplace: + print("A file must be specified!") + return + + try: + postprocess_elf(open(inplace, 'rb+'), do_ctor_realign, do_old_stack, do_symbol_fixup) + except FileNotFoundError: + print("Cannot open file %s" % inplace) + +if __name__ == "__main__": + import sys + + if len(sys.argv) < 2: + print(BANNER) + else: + frontend(sys.argv[1:]) |